Source code for pbjam.star

"""

The `star' class is the core of PBjam and refers to a single target that is to 
be peakbagged. Each `star' instance is assigned an ID and physical input 
parameters, as well as a time series or power spectrum. 

The different steps in the peakbagging process are then passed the `star' 
instance, updating it with the results of each step. The outputs of each step
are stored in a dedicated directory created with the star ID.

The `session' class wraps one or more star class instances and peakbags them all
sequentially. The recommended use of PBjam is the use the `session' class, and
only use the `star' class for more granular control of the peakbagging process.

"""

import os, warnings, re, time
from .asy_peakbag import asymptotic_fit
from .priors import kde
from .peakbag import peakbag
from .jar import get_priorpath, to_log10, references, isvalid
from .plotting import plotting
import pandas as pd
import numpy as np
from astroquery.mast import ObservationsClass as AsqMastObsCl
from astroquery.mast import Catalogs
from astroquery.simbad import Simbad
import astropy.units as units


[docs]class star(plotting):
    """ Class for each star to be peakbagged

    Additional attributes are added for each step of the peakbagging process

    Note spectrum is flattened (background divided out.)

    Examples
    --------
    Peakbag using the star class. Note that the star class only takes Lightkurve
    periodograms, pg, as spectrum input. 

    >>> st = pbjam.star(ID='KIC4448777', pg=pg, numax=[220.0, 3.0], 
                           dnu=[16.97, 0.01], teff=[4750, 100],
                           bp_rp = [1.34, 0.01])
    >>> st(make_plots=True)

    Parameters
    ----------
    ID : string, int
        Target identifier. If custom timeseries/periodogram is provided, it
        must be resolvable by LightKurve (KIC, TIC, EPIC, HD, etc.).
    pg : lightkurve.periodogram.Periodogram object
        A lightkurve periodogram object containing frequencies in units of
        microhertz and power (in arbitrary units).
    numax : list
        List of the form [numax, numax_error]. 
    dnu : list
        List of the form [dnu, dnu_error]. 
    teff : list
        List of the form [teff, teff_error]. 
    bp_rp : list
        List of the form [bp_rp, bp_rp_error]. 
    path : str, optional
        The path at which to store output. If no path is set but make_plots is
        True, output will be saved in the current working directory. Default is
        the current working directory.
    prior_file : str, optional
        Path to the csv file containing the prior data. Default is
        pbjam/data/prior_data.csv

    Attributes
    ----------
    f : array
        Array of power spectrum frequencies
    s : array
        power spectrum

    """

    def __init__(self, ID, pg, numax, dnu, teff=[None,None], bp_rp=[None,None], 
                 path=None, prior_file=None):

        self.ID = ID

        if numax[0] < 25:
            warnings.warn('The input numax is less than 25. The prior is not well defined here, so be careful with the result.')
        self.numax = numax
        self.dnu = dnu

        self.references = references()
        self.references._addRef(['numpy', 'python', 'lightkurve', 'astropy'])
        
        teff, bp_rp = self._checkTeffBpRp(teff, bp_rp)
        self.teff = teff
        self.bp_rp = bp_rp

        self.pg = pg.flatten()  # in case user supplies unormalized spectrum
        self.f = self.pg.frequency.value
        self.s = self.pg.power.value

        self._obs = {'dnu': self.dnu, 'numax': self.numax, 'teff': self.teff,
                     'bp_rp': self.bp_rp}
        self._log_obs = {x: to_log10(*self._obs[x]) for x in self._obs.keys() if x != 'bp_rp'}

        self._set_outpath(path)

        if prior_file is None:
            self.prior_file = get_priorpath()
        else:
            self.prior_file = prior_file
            

    def _checkTeffBpRp(self, teff, bp_rp):
        """ Set the Teff and/or bp_rp values
        
        Checks the input Teff and Gbp-Grp values to see if any are missing.
        
        If Gbp-Grp is missing it will be looked up online either from the TIC or 
        the Gaia archive.
        
        Teff and Gbp-Grp provide a lot of the same information, so only one of
        them need to be provided to start with. If one is not provided, PBjam
        will assume a wide prior on it.
        
        Parameters
        ----------
        teff : list
            List of the form [teff, teff_error]. For multiple targets, use a list
            of lists.
        bp_rp : list
            List of the form [bp_rp, bp_rp_error]. For multiple targets, use a list
            of lists.
        
        Returns
        -------
        teff : list
            The checked teff value. List of the form [teff, teff_error]. 
        bp_rp : list
            The checked bp_rp value. List of the form [bp_rp, bp_rp_error]. 
        """
        
        if isvalid(bp_rp[0]) is False:
            try:
                bp_rp = [get_bp_rp(self.ID), 0.1]
            except:
                bp_rp = [np.nan, np.nan]
            
            
        if not isvalid(teff[0]) and not isvalid(bp_rp[0]):
            raise ValueError('Must provide either teff or bp_rp arguments when initializing the star class.')
        elif not isvalid(teff[0]):
            teff = [4889, 1500] # these are rough esimates from the prior
        elif not np.isfinite(bp_rp[0]):
            bp_rp = [1.2927, 0.5] # these are rough esimates from the prior
            
        self.references._addRef(['Evans2018'])
        
        return teff, bp_rp

    def _get_outpath(self, fname):
        """  Get basepath or make full file path name.
        
        Convenience function for either setting the base path name for the star,
        or if given fname as input, will append this to the basepath name to 
        create a full path to the file in question. 

        Parameters
        ----------
        fname : str, optional
            If not None, will append this to the pathname of the star. Use this
            to store files such as plots or tables.
        
        Returns
        -------
        path : str
            If fname is None, path is the path name of the star. Otherwise it is
            the full file path name for the file in question.
        """
    
        if fname is None:
            return self.path
        elif isinstance(fname, str):
            path = os.path.join(*[self.path, fname])
        else:
            raise ValueError(f'Unrecognized input {fname}.')
        
        if not os.path.isdir(self.path):
            raise IOError(f'You are trying to access {self.path} which is a directory that does not exist.')
        else:
            return path

    def _set_outpath(self, path):
        """ Sets the path attribute for star

        If path is a string it is assumed to be a path name, if not the
        current working directory will be used.

        Attempts to create an output directory for all the results that PBjam
        produces. A directory is created when a star class instance is
        initialized, so a session might create multiple directories.

        Parameters
        ----------
        path : str
            Directory to place the star subdirectory.

        """

        if isinstance(path, str):
            # If path is str, presume user wants to put stuff somewhere specific.
            self.path = os.path.join(*[path, f'{self.ID}'])
        else:
            # Otherwise just create a subdir in cwd.
            self.path = os.path.join(*[os.getcwd(), f'{self.ID}'])

        # Check if self.path exists, if not try to create it
        if not os.path.isdir(self.path):
            try:
                os.makedirs(self.path)
            except Exception as ex:
                message = "Could not create directory for Star {0} because an exception of type {1} occurred. Arguments:\n{2!r}".format(self.ID, type(ex).__name__, ex.args)
                print(message)



[docs]    def run_kde(self, bw_fac=1.0, make_plots=False, store_chains=False):
        """ Run all steps involving KDE.

        Starts by creating a KDE based on the prior data sample. Then samples
        this KDE for initial starting positions for asy_peakbag.

        Parameters
        ----------
        bw_fac : float
            Scaling factor for the KDE bandwidth. By default the bandwidth is
            automatically set, but may be scaled to adjust for sparsity of the
            prior sample.
        make_plots : bool, optional
            Whether or not to produce plots of the results. Default is False.
        store_chains : bool, optional
            Whether or not to store posterior samples on disk. Default is False.

        """

        print('Starting KDE estimation')
        
        # Init
        kde(self)

        # Call
        self.kde(dnu=self.dnu, numax=self.numax, teff=self.teff,
                 bp_rp=self.bp_rp, bw_fac=bw_fac)

        # Store          
        if make_plots:
            self.kde.plot_corner(path=self.path, ID=self.ID,
                                 savefig=make_plots)
            self.kde.plot_spectrum(pg=self.pg, path=self.path, ID=self.ID,
                                   savefig=make_plots)
            self.kde.plot_echelle(path=self.path, ID=self.ID,
                                  savefig=make_plots)
            
            self.references._addRef('matplotlib')

        if store_chains:
            kde_samps = pd.DataFrame(self.kde.samples, columns=self.kde.par_names)
            kde_samps.to_csv(self._get_outpath(f'kde_chains_{self.ID}.csv'), index=False)
            

[docs]    def run_asy_peakbag(self, norders, make_plots=False,
                        store_chains=False, method='emcee', 
                        developer_mode=False):
        """ Run all steps involving asy_peakbag.

        Performs a fit of the asymptotic relation to the spectrum (l=2,0 only),
        using initial guesses and prior for the fit parameters from KDE.

        Parameters
        ----------
        norders : int
            Number of orders to include in the fits.
        make_plots : bool, optional
            Whether or not to produce plots of the results. Default is False.
        store_chains : bool, optional
            Whether or not to store posterior samples on disk. Default is False.
        method : string
            Method to be used for sampling the posterior. Options are 'emcee' or
            'cpnest. Default method is 'emcee' that will call emcee, alternative
            is 'cpnest' to call nested sampling with CPnest.
        developer_mode : bool
            Run asy_peakbag in developer mode. Currently just retains the input 
            value of dnu and numax as priors, for the purposes of expanding
            the prior sample. Important: This is not good practice for getting 
            science results!
            
        """

        print('Starting asymptotic peakbagging')
        # Init
        asymptotic_fit(self, norders=norders)

        # Call
        self.asy_fit(method, developer_mode)
        self.references._addRef(method)

        # Store
        self.asy_fit.summary.to_csv(self._get_outpath(f'asymptotic_fit_summary_{self.ID}.csv'),
                                    index=True, index_label='name')
        self.asy_fit.modeID.to_csv(self._get_outpath(f'asymptotic_fit_modeID_{self.ID}.csv'),
                                   index=False)
        if make_plots:
            self.asy_fit.plot_spectrum(path=self.path, ID=self.ID,
                                       savefig=make_plots)
            self.asy_fit.plot_corner(path=self.path, ID=self.ID,
                                     savefig=make_plots)
            self.asy_fit.plot_echelle(path=self.path, ID=self.ID,
                                      savefig=make_plots)
            self.references._addRef('matplotlib')

        if store_chains:
            asy_samps = pd.DataFrame(self.asy_fit.samples, columns=self.asy_fit.par_names)
            asy_samps.to_csv(self._get_outpath(f'asymptotic_fit_chains_{self.ID}.csv'), index=False)

    
[docs]    def run_peakbag(self, model_type='simple', tune=1500, nthreads=1,
                    make_plots=False, store_chains=False):
        """  Run all steps involving peakbag.

        Performs fit using simple Lorentzian profile pairs to subsections of 
        the power spectrum, based on results from asy_peakbag.

        Parameters
        ----------
        model_type : str
            Can be either 'simple' or 'model_gp' which sets the type of mode
            width model. Defaults is 'simple'.
        tune : int, optional
            Numer of tuning steps passed to pm.sample. Default is 1500.
        nthreads : int, optional.
            Number of processes to spin up in pymc3. Default is 1.
        make_plots : bool, optional.
            Whether or not to produce plots of the results. Default is False.
        store_chains : bool, optional
            Whether or not to store posterior samples on disk. Default is False.

        """

        print('Starting peakbagging')
        # Init
        peakbag(self)

        # Call
        self.peakbag(model_type=model_type, tune=tune, nthreads=nthreads)

        # Store
        self.peakbag.summary.to_csv(self._get_outpath(f'peakbag_summary_{self.ID}.csv'),
                                    index_label='name')
            
        if make_plots:
            self.peakbag.plot_spectrum(path=self.path, ID=self.ID,
                                       savefig=make_plots)
            self.peakbag.plot_echelle(path=self.path, ID=self.ID, 
                                      savefig=make_plots)
            self.references._addRef('matplotlib')

        if store_chains:
            peakbag_samps = pd.DataFrame(self.peakbag.samples, columns=self.peakbag.par_names)
            peakbag_samps.to_csv(self._get_outpath(f'peakbag_chains_{self.ID}.csv'), index=False)
            

    def __call__(self, bw_fac=1.0, norders=8, model_type='simple', tune=1500,
                 nthreads=1, make_plots=True, store_chains=False, 
                 asy_sampling='emcee', developer_mode=False):
        """ Perform all the PBjam steps

        Starts by running KDE, followed by Asy_peakbag and then finally peakbag.
        
        Parameters
        ----------
        bw_fac : float, optional.
            Scaling factor for the KDE bandwidth. By default the bandwidth is
            automatically set, but may be scaled to adjust for sparsity of the 
            prior sample. Default is 1.
        norders : int, optional.
            Number of orders to include in the fits. Default is 8.
        model_type : str, optional.
            Can be either 'simple' or 'model_gp' which sets the type of mode 
            width model. Defaults is 'simple'. 
        tune : int, optional
            Numer of tuning steps passed to pm.sample. Default is 1500.
        nthreads : int, optional.
            Number of processes to spin up in pymc3. Default is 1.
        make_plots : bool, optional.
            Whether or not to produce plots of the results. Default is False.
        store_chains : bool, optional
            Whether or not to store posterior samples on disk. Default is False.
        asy_sampling : string
            Method to be used for sampling the posterior in asy_peakbag. Options
            are 'emcee' or 'cpnest. Default method is 'emcee' that will call 
            emcee, alternative is 'cpnest' to call nested sampling with CPnest.
        developer_mode : bool
            Run asy_peakbag in developer mode. Currently just retains the input 
            value of dnu and numax as priors, for the purposes of expanding
            the prior sample. Important: This is not good practice for getting 
            science results!    
        """

        self.run_kde(bw_fac=bw_fac, make_plots=make_plots, store_chains=store_chains)

        self.run_asy_peakbag(norders=norders, make_plots=make_plots,
                             store_chains=store_chains, method=asy_sampling,
                             developer_mode=developer_mode)

        self.run_peakbag(model_type=model_type, tune=tune, nthreads=nthreads,
                         make_plots=make_plots, store_chains=store_chains)

        self.references._addRef('pandas')

def _querySimbad(ID):
    """ Query any ID at Simbad for Gaia DR2 source ID.
    
    Looks up the target ID on Simbad to check if it has a Gaia DR2 ID.
    
    The input ID can be any commonly used identifier, such as a Bayer 
    designation, HD number or KIC.
    
    Returns None if there is not a valid cross-match with GDR2 on Simbad.
    
    Notes
    -----
    TIC numbers are note currently listed on Simbad. Do a separate MAST quiry 
    for this.
    
    Parameters
    ----------
    ID : str
        Target identifier. If Simbad can resolve the name then it should work. 
        
    Returns
    -------
    gaiaID : str
        Gaia DR2 source ID. Returns None if no Gaia ID is found.   
    """
    
    print('Querying Simbad for Gaia ID')

    try:
        job = Simbad.query_objectids(ID)
    except:
        print(f'Unable to resolve {ID} with Simbad')
        return None
    
    for line in job['ID']:
        if 'Gaia DR2' in line:
            return line.replace('Gaia DR2 ', '')
    return None

def _queryTIC(ID, radius = 20):
    """ Query TIC for bp-rp value
    
    Queries the TIC at MAST to search for a target ID to return bp-rp value. The
    TIC is already cross-matched with the Gaia catalog, so it contains a bp-rp 
    value for many targets (not all though).
    
    For some reason it does a cone search, which may return more than one 
    target. In which case the target matching the ID is found in the returned
    list. 
    
    Returns None if the target does not have a GDR3 ID.
    
    Parameters
    ----------
    ID : str
        The TIC identifier to search for.
    radius : float, optional
        Radius in arcseconds to use for the sky cone search. Default is 20".
    
    Returns
    -------
    bp_rp : float
        Gaia bp-rp value from the TIC.   
    """
    
    print('Querying TIC for Gaia bp-rp values.')
    job = Catalogs.query_object(objectname=ID, catalog='TIC', objType='STAR', 
                                radius = radius*units.arcsec)

    if len(job) > 0:
        idx = job['ID'] == str(ID.replace('TIC','').replace(' ', ''))
        return float(job['gaiabp'][idx] - job['gaiarp'][idx]) #This should crash if len(result) > 1.
    else:
        return None

def _queryMAST(ID):
    """ Query any ID at MAST
    
    Sends a query for a target ID to MAST which returns an Astropy Skycoords 
    object with the target coordinates.
    
    ID can be any commonly used identifier such as a Bayer designation, HD, KIC,
    2MASS or other name.
    
    Parameters
    ----------
    ID : str
        Target identifier
    
    Returns
    -------
    job : astropy.Skycoords
        An Astropy Skycoords object with the target coordinates.
    
    """

    print(f'Querying MAST for the {ID} coordinates.')
    mastobs = AsqMastObsCl()

    try:            
        return mastobs.resolve_object(objectname=ID)
    except:
        return None

def _queryGaia(ID=None, coords=None, radius=2):
    """ Query Gaia archive for bp-rp
    
    Sends an ADQL query to the Gaia archive to look up a requested target ID or
    set of coordinates. 
        
    If the query is based on coordinates a cone search will be performed and the
    closest target is returned. Provided coordinates must be astropy.Skycoords.
    
    Parameters
    ----------
    ID : str
        Gaia source ID to search for.
    coord : astropy.Skycoords
        An Astropy Skycoords object with the target coordinates. Must only 
        contain one target.
    radius : float, optional
        Radius in arcseconds to use for the sky cone search. Default is 20".
    
    Returns
    -------
    bp_rp : float
        Gaia bp-rp value of the requested target from the Gaia archive.  
    """
    
    from astroquery.gaia import Gaia

    if ID is not None:
        print('Querying Gaia archive for bp-rp values by target ID.')
        adql_query = "select * from gaiadr2.gaia_source where source_id=%s" % (ID)
        try:
            job = Gaia.launch_job(adql_query).get_results()
        except:
            return None
        return float(job['bp_rp'][0])
    
    elif coords is not None:
        print('Querying Gaia archive for bp-rp values by target coordinates.')
        ra = coords.ra.value
        dec = coords.dec.value
        adql_query = f"SELECT DISTANCE(POINT('ICRS', {ra}, {dec}), POINT('ICRS', ra, dec)) AS dist, * FROM gaiaedr3.gaia_source WHERE 1=CONTAINS(  POINT('ICRS', {ra}, {dec}),  CIRCLE('ICRS', ra, dec,{radius})) ORDER BY dist ASC"
        try:
            job = Gaia.launch_job(adql_query).get_results()
        except:
            return None
        return float(job['bp_rp'][0])
    else:
        raise ValueError('No ID or coordinates provided when querying the Gaia archive.')

def _format_name(ID):
    """ Format input ID
    
    Users tend to be inconsistent in naming targets, which is an issue for 
    looking stuff up on, e.g., Simbad. This function formats the ID so that 
    Simbad doesn't throw a fit.
    
    If the name doesn't look like anything in the variant list it will only be 
    changed to a lower-case string.
    
    Parameters
    ----------
    ID : str
        Name to be formatted.
    
    Returns
    -------
    ID : str
        Formatted name
        
    """

    ID = str(ID)
    ID = ID.lower()
    
    # Add naming exceptions here
    variants = {'KIC': ['kic', 'kplr', 'KIC'],
                'Gaia EDR3': ['gaia edr3', 'gedr3', 'edr3', 'Gaia EDR3'],
                'Gaia DR2': ['gaia dr2', 'gdr2', 'dr2', 'Gaia DR2'],
                'Gaia DR1': ['gaia dr1', 'gdr1', 'dr1', 'Gaia DR1'], 
                'EPIC': ['epic', 'ktwo', 'EPIC'],
                'TIC': ['tic', 'tess', 'TIC']
               }
    
    fname = None
    for key in variants:   
        for x in variants[key]:
            if x in ID:
                fname = ID.replace(x,'')
                fname = re.sub(r"\s+", "", fname, flags=re.UNICODE)
                fname = key+' '+str(int(fname))
                return fname 
    return ID

[docs]def get_bp_rp(ID):
    """ Search online for bp_rp values based on ID.
       
    First a check is made to see if the target is a TIC number, in which case 
    the TIC will be queried, since this is already cross-matched with Gaia DR2. 
    
    If it is not a TIC number, Simbad is queried to identify a possible Gaia 
    source ID. 
    
    As a last resort MAST is queried to provide the target coordinates, after 
    which a Gaia query is launched to find the closest target. The default 
    search radius is 20" around the provided coordinates. 
    
    Parameters
    ----------
    ID : str
        Target identifier to search for.
    
    Returns
    -------
    bp_rp : float
        Gaia bp-rp value for the target. Is nan if no result is found or the
        queries failed. 
    """
    
    time.sleep(1) # Sleep timer to prevent temporary blacklisting by CDS servers.
    
    ID = _format_name(ID)
    
    if 'TIC' in ID:
        bp_rp = _queryTIC(ID)          

    else:
        try:
            gaiaID = _querySimbad(ID)
            bp_rp = _queryGaia(ID=gaiaID)
        except:
            try:
                coords = _queryMAST(ID)
                bp_rp = _queryGaia(coords=coords)
            except:
                print(f'Unable to retrieve a bp_rp value for {ID}.')
                bp_rp = np.nan
    return bp_rp
Source code for pbjam.star

PBjam

Navigation

Related Topics