AbrilCVs: CV Catalog from Abril 2020

  • Abril 2020

  • CV catalog on CDS (downloaded to this dir)

  • Michael:

    • number of dimensions to parameters. maybe dataset of only 2000 is ok. make the model smarter (vs data augmentation)

  • Brett:

    • get ~2000 Gaia stars, combine with these CVs

    • mags, etc.

    • random forest

  • Abril20

    • CMD

      • trends with subtypes and periods

      • population density distributions

  • Questions:

    • Periods of hours. could you predict the magnitude based on different periods, and then check whether alert is consistent? or are the uncertainties too big? mags, period.


import os
import pandas as pd
from astropy.coordinates import SkyCoord
project_id = os.getenv('GOOGLE_CLOUD_PROJECT')
dir = "/Users/troyraen/Documents/broker/troy/troy/AbrilCVs"
fcat = f"{dir}/J_MNRAS_492_L40/catalog.dat"
ftns = f"{dir}/tns_search_cvs.csv"
fasn = f"{dir}/ASAS-SN/cvs.csv"

# load Abril CV catalog
names = [
abrildf = pd.read_fwf(fcat, names=names, header=None, index=None)

Calculate HEALPix indexes of CV catalog and use to xmatch

Calculate indexes

def radec_to_skycoord(row):
    return SkyCoord(row["RAdeg"], row["DEdeg"], frame='icrs', unit='deg')
def skycoord_to_healpix(row):
    return hp.skycoord_to_healpix(row['SkyCoord'])

# load Abril CV catalog
abrildf = pd.read_fwf(fcat, names=names, header=None, index=None)
coords = SkyCoord(cv['RAdeg'], cv['DEdeg'], frame='icrs', unit='deg')
# instantiate pixelization
n = 17
nside = 2**n
frame = ICRS()
order = 'nested'
hp = HEALPix(nside=nside, order=order, frame=frame)

# calculate indexes
abrildf['SkyCoord'] = abrildf.apply(radec_to_skycoord, axis=1)
abrildf[f'hp_{n}_index'] = abrildf.apply(skycoord_to_healpix, axis=1)

# save csv
keep_cols = ['Name', 'RAdeg', 'DEdeg', 'hp_17_index']
abrildf[keep_cols].to_csv(fcat_condensed, index=False)


import timeit

adf = pd.read_csv(fcat_condensed)

max_sep = 5.0 * u.arcsec
alert = {'ra': 313.2196851961664, 'dec': -2.6646887231578, 'Name': "J2052-0239"}
alertcoords = SkyCoord(alert['ra'], alert['dec'], frame='icrs', unit='deg')

# time without HEALPix
start = timeit.default_timer()
adf['SkyCoord'] = adf.apply(radec_to_skycoord, axis=1)
matches_wo = {}
for _, cv in adf.iterrows():
    if alertcoords.separation(cv['SkyCoord']) <= max_sep:
        matches_wo[alert['Name']] = (alert, cv)
stop = timeit.default_timer()
print('Time: ', stop - start)

# time with HEALPix
start = timeit.default_timer()
idxs = hp.cone_search_skycoord(alertcoords, radius=max_sep)  # all pixels within max_sep
matches_w = {}
for _, cv in adf.iterrows():
    if cv['hp_17_index'] in idxs:
        if alertcoords.separation(radec_to_skycoord(cv)) <= max_sep:
            matches_w[alert['Name']] = (alert, cv)
stop = timeit.default_timer()
print('Time: ', stop - start)


# get positions of TNS CVs
df = pd.read_csv(ftns)
ztfdf = df.loc[df['Disc. Internal Name'].fillna("").str.startswith("ZTF")]
objectIds = list(ztfdf['Disc. Internal Name'].unique())
dataset = 'ztf_alerts'
table = 'DIASource'
query = f"""
    SELECT objectId, candid, ra, dec
    FROM `{project_id}.{dataset}.{table}`
    WHERE objectId IN ('{"','".join(objectIds)}')
bqdf = gcp_utils.query_bigquery(query).to_dataframe()
cleandf = bqdf.sort_values('candid', ascending=False).drop_duplicates(subset='objectId', keep='first')

# xmatch
max_sep = 50.0 * u.arcsec
matches = {}
for _, cv in abrildf.iterrows():
    cvcoords = SkyCoord(cv['RAdeg'], cv['DEdeg'], frame='icrs', unit='deg')
    for _, alert in cleandf.iterrows():
        alertcoords = SkyCoord(alert['ra'], alert['dec'], frame='icrs', unit='deg')
        if alertcoords.separation(cvcoords) <= max_sep:
            matches[alert['objectId']] = (alert, cv)

Try some ASAS-SN data

# load asassn
asndf = pd.read_csv(fasn)
max_sep = 1.0 * u.arcsec
matches = {}
for _, cv in abrildf.iterrows():
    cvcoords = SkyCoord(cv['RAdeg'], cv['DEdeg'], frame='icrs', unit='deg')
    for _, asn in asndf.iterrows():
        alertcoords = SkyCoord(asn['raj2000'], asn['dej2000'], frame='icrs', unit='deg')
        if alertcoords.separation(cvcoords) <= max_sep:
            matches[asn['source_id']] = (asn, cv)

Test Cloud Run module

from broker_utils import gcp_utils

msgs = gcp_utils.pull_pubsub('ztf-loop', msg_only=False)
msg = msgs[0]
alert_dict = data_utils.decode_alert(msg.message.data)