# docs/source/working-notes/troyraen/AbrilCVs/README.md

## AbrilCVs: CV Catalog from Abril 2020

- [Abril 2020](https://ui.adsabs.harvard.edu/abs/2020MNRAS.492L..40A/abstract)
- [CV catalog on CDS](https://cdsarc.cds.unistra.fr/viz-bin/cat?J/MNRAS/492/L40) (downloaded to this dir)

- Michael:
    - number of dimensions to parameters. maybe dataset of only 2000 is ok. make the model smarter (vs data augmentation)

- Brett:
    - get ~2000 Gaia stars, combine with these CVs
    - mags, etc.
    - random forest

- Abril20
    - CMD
        - trends with subtypes and periods
        - population density distributions

- Questions:
    - Periods of hours. could you predict the magnitude based on different periods, and then check whether alert is consistent? or are the uncertainties too big? mags, period.


### Catalog

```python
import os
import pandas as pd
from astropy.coordinates import SkyCoord
project_id = os.getenv('GOOGLE_CLOUD_PROJECT')
dir = "/Users/troyraen/Documents/broker/Pitt-Google/troy/docs/source/working-notes/troyraen/AbrilCVs"
fcat = f"{dir}/J_MNRAS_492_L40/catalog.dat"
ftns = f"{dir}/tns_search_cvs.csv"
fasn = f"{dir}/ASAS-SN/cvs.csv"

# load Abril CV catalog
names = [
    "Name",
    "AltName",
    "RAdeg",
    "DEdeg",
    "Type1",
    "Type2",
    "mag1",
    "Orb.Per",
    "Outburst",
    "MagRange",
    "SpType2",
    "SpType1",
    "Source",
    "RAGdeg",
    "e_RAGdeg",
    "DEGdeg",
    "e_DEGdeg",
    "rest",
    "b_rest",
    "B_rest",
    "rlen",
    "plx",
    "e_plx",
    "pmRA",
    "e_pmRA",
    "pmDE",
    "e_pmDE",
    "FG",
    "e_FG",
    "Gmag",
    "GMAG",
    "FBP",
    "e_FBP",
    "BPmag",
    "FRP",
    "e_FRP",
    "RPmag",
    "BP-RP",
    "Teff",
]
abrildf = pd.read_fwf(fcat, names=names, header=None, index=None)

```

### Calculate HEALPix indexes of CV catalog and use to xmatch

Calculate indexes

```python
import os
import pandas as pd
from astropy.coordinates import ICRS, SkyCoord
from astropy_healpix import HEALPix
from astropy import units as u
dir = "/Users/troyraen/Documents/broker/troy/troy/AbrilCVs"
fcat = f"{dir}/J_MNRAS_492_L40/catalog.dat"
fcat_condensed = f"{dir}/J_MNRAS_492_L40/catalog_condensed.dat"

def radec_to_skycoord(row):
    return SkyCoord(row["RAdeg"], row["DEdeg"], frame='icrs', unit='deg')
def skycoord_to_healpix(row):
    return hp.skycoord_to_healpix(row['SkyCoord'])

# load Abril CV catalog
abrildf = pd.read_fwf(fcat, names=names, header=None, index=None)
coords = SkyCoord(cv['RAdeg'], cv['DEdeg'], frame='icrs', unit='deg')
# instantiate pixelization
n = 17
nside = 2**n
frame = ICRS()
order = 'nested'
hp = HEALPix(nside=nside, order=order, frame=frame)

# calculate indexes
abrildf['SkyCoord'] = abrildf.apply(radec_to_skycoord, axis=1)
abrildf[f'hp_{n}_index'] = abrildf.apply(skycoord_to_healpix, axis=1)

# save csv
keep_cols = ['Name', 'RAdeg', 'DEdeg', 'hp_17_index']
abrildf[keep_cols].to_csv(fcat_condensed, index=False)
```

Xmatch

```python
import timeit

adf = pd.read_csv(fcat_condensed)

max_sep = 5.0 * u.arcsec
alert = {'ra': 313.2196851961664, 'dec': -2.6646887231578, 'Name': "J2052-0239"}
alertcoords = SkyCoord(alert['ra'], alert['dec'], frame='icrs', unit='deg')

# time without HEALPix
start = timeit.default_timer()
adf['SkyCoord'] = adf.apply(radec_to_skycoord, axis=1)
matches_wo = {}
for _, cv in adf.iterrows():
    if alertcoords.separation(cv['SkyCoord']) <= max_sep:
        matches_wo[alert['Name']] = (alert, cv)
stop = timeit.default_timer()
print('Time: ', stop - start)

# time with HEALPix
start = timeit.default_timer()
idxs = hp.cone_search_skycoord(alertcoords, radius=max_sep)  # all pixels within max_sep
matches_w = {}
for _, cv in adf.iterrows():
    if cv['hp_17_index'] in idxs:
        if alertcoords.separation(radec_to_skycoord(cv)) <= max_sep:
            matches_w[alert['Name']] = (alert, cv)
stop = timeit.default_timer()
print('Time: ', stop - start)
```

### Try TNS CVs

```python
# get positions of TNS CVs
df = pd.read_csv(ftns)
ztfdf = df.loc[df['Disc. Internal Name'].fillna("").str.startswith("ZTF")]
objectIds = list(ztfdf['Disc. Internal Name'].unique())
dataset = 'ztf_alerts'
table = 'DIASource'
query = f"""
    SELECT objectId, candid, ra, dec
    FROM `{project_id}.{dataset}.{table}`
    WHERE objectId IN ('{"','".join(objectIds)}')
"""
bqdf = gcp_utils.query_bigquery(query).to_dataframe()
cleandf = bqdf.sort_values('candid', ascending=False).drop_duplicates(subset='objectId', keep='first')

# xmatch
max_sep = 50.0 * u.arcsec
matches = {}
for _, cv in abrildf.iterrows():
    cvcoords = SkyCoord(cv['RAdeg'], cv['DEdeg'], frame='icrs', unit='deg')
    for _, alert in cleandf.iterrows():
        alertcoords = SkyCoord(alert['ra'], alert['dec'], frame='icrs', unit='deg')
        if alertcoords.separation(cvcoords) <= max_sep:
            matches[alert['objectId']] = (alert, cv)
```

### Try some ASAS-SN data

```python
# load asassn
asndf = pd.read_csv(fasn)
max_sep = 1.0 * u.arcsec
matches = {}
for _, cv in abrildf.iterrows():
    cvcoords = SkyCoord(cv['RAdeg'], cv['DEdeg'], frame='icrs', unit='deg')
    for _, asn in asndf.iterrows():
        alertcoords = SkyCoord(asn['raj2000'], asn['dej2000'], frame='icrs', unit='deg')
        if alertcoords.separation(cvcoords) <= max_sep:
            matches[asn['source_id']] = (asn, cv)
            break
```


### Test Cloud Run module

```python
from broker_utils import gcp_utils

msgs = gcp_utils.pull_pubsub('ztf-loop', msg_only=False)
msg = msgs[0]
alert_dict = data_utils.decode_alert(msg.message.data)
```