Source code for pbjam.session

""" Setup PBjam sessions and perform mode ID and peakbagging

This module contains the input layer for setting up PBjam sessions for 
peakbagging solar-like oscillators. This is the easiest way to handle targets 
in PBjam.

It's possible to manually initiate star class instances and do all the fitting
that way, but it's simpler to just use the session class, which handles
everything, including formatting of the inputs.

A PBjam session is started by initializing the session class instance with a
target ID, $\nu_{max}$, a large separation, effective temperature and Gaia bp_rp 
color. The class instance is the called to run through all the peakbagging 
steps automatically. See the Session class documentation for an example.

Lists of the above can be provided for multiple targets, but it's often simpler
to just provide PBjam with a dictionary or Pandas dataframe. See mytgts.csv
for a template.

Custom timeseries or periodograms can be provided as either file pathnames,
`numpy' arrays, or lightkurve.LightCurve/lightkurve.periodogram objects. If
nothing is provided PBjam will download the data automatically using 
`LightKurve'.

Specific quarters, campgains or sectors can be requested with the relevant
keyword (i.e., 'quarter' for KIC, etc.). If none of these are provided, PBjam
will download all available data, using the long cadence versions by default.

Once initialized, the session class contains a list of star class instances
for each requested target, with associated spectra for each.

The next step is to perform a mode ID on the spectra. At the moment PBjam
only supports use of the asymptotic relation mode ID method. Additional methods
can be added in future.

Finally the peakbagging method takes the output from the modeID and performs
a proper HMC peakbagging run to get the unparameterized mode frequencies.

Plotting the results of each stage is also possible.

Note
----
For automatic download the long cadence data set is used by default, so set
the cadence to `short' for main-sequence targets.

"""

import lightkurve as lk
from lightkurve.periodogram import Periodogram
import numpy as np
import astropy.units as units
import pandas as pd
import os, pickle, warnings
from .star import star, _format_name
from datetime import datetime
from .jar import references

def _organize_sess_dataframe(vardf):
    """ Takes input dataframe and tidies it up.

    Checks to see if required columns are present in the input dataframe,
    and adds optional columns if they don't exists, containing None values.

    Parameters
    ----------
    vardf : Pandas.DataFrame
        Input dataframe
        
    """
    
    keys = ['ID', 'numax', 'dnu', 'numax_err', 'dnu_err']
    if not any(x not in keys for x in vardf.keys()):
        raise(KeyError, 'Some of the required keywords were missing.')

    N = len(vardf)
    singles = ['exptime', 'campaign', 'sector', 'month', 'quarter', 'mission']
    doubles = ['teff', 'bp_rp']

    for key in singles:
        if key not in vardf.keys():
            vardf[key] = np.array([None]*N)

    for key in doubles:
        if key not in vardf.keys():
            vardf[key] = np.array([None]*N)
            vardf[key+'_err'] = np.array([None]*N)

    if 'timeseries' not in vardf.keys():
        _format_col(vardf, None, 'timeseries')
    if 'spectrum' not in vardf.keys():
        _format_col(vardf, None, 'spectrum')


def _organize_sess_input(**vardct):
    """ Takes input and organizes them in a dataframe.

    Checks to see if required inputs are present and inserts them into a
    dataframe. Any optional columns that are not included in the input are
    added as empty columns.

    Parameters
    ----------
    vardct : objects
        Variable inputs to Session class to be arranged into a dataframe

    Returns
    -------
    vardf : Pandas.DataFrame
        Dataframe containing the inputs from Session class call.
        
    """
    
    vardf = pd.DataFrame({'ID': np.array(vardct['ID']).reshape((-1, 1)).flatten()})

    N = len(vardf)
    singles = ['exptime', 'campaign', 'sector', 'month', 'quarter', 'mission']
    doubles = ['numax', 'dnu', 'teff', 'bp_rp']

    for key in singles:
        if not vardct[key]:
            vardf[key] = np.array([None]*N)
        else:
            vardf[key] = vardct[key]

    for key in doubles:
        if not vardct[key]:
            vardf[key] = np.array([None]*N)
            vardf[key+'_err'] = np.array([None]*N)
        else:
            vardf[key] = np.array(vardct[key]).reshape((-1, 2))[:, 0].flatten()
            vardf[key+'_err'] = np.array(vardct[key]).reshape((-1, 2))[:, 1].flatten()
    return vardf

def _sort_lc(lc):
    """ Sort a lightcurve in Lightkurve object.

    Lightkurve lightcurves are not necessarily sorted in time, which causes
    an error in periodogram.

    Parameters
    ----------
    lc : Lightkurve.LightCurve instance
        Lightkurve object to be modified

    Returns
    -------
    lc : Lightkurve.LightCurve instance
        The sorted Lightkurve object   
    """

    sidx = np.argsort(lc.time)
    lc.time = lc.time[sidx]
    lc.flux = lc.flux[sidx]
    return lc


def _query_lightkurve(ID, download_dir, use_cached, lkwargs):
    """ Get time series using LightKurve
    
    Performs a search for available fits files on MAST and then downloads them
    if nessary.
    
    The search results are cached with an expiration of 30 days. If a search
    result is found, the fits file cache is searched for a matching file list
    which is then used.
    
    Parameters
    ----------
    ID : str
        ID string of the target
    download_dir : str
        Directory for fits file and search results caches. 
    use_cached : bool, optional
        Whether or not to use the cached time series. Default is True.
    lkwargs : dict
        Dictionary to be passed to LightKurve  
    
    Returns
    -------
    lc : Lightkurve.LightCurve instance
        The concatenated time series for the target.
    """
    
    ID = _format_name(ID)
    
    _set_mission(ID, lkwargs)
    
    ID = _getMASTidentifier(ID, lkwargs)
    
    search = _perform_search(ID, lkwargs, use_cached)
    
    fitsFiles = _check_lc_cache(search, lkwargs['mission'])
    
    lc = _load_fits(fitsFiles, lkwargs['mission'])
    
    lc = _clean_lc(lc)
    
    return lc
    

def _arr_to_lk(x, y, name, typ):
    """ LightKurve object from input.

    Creates either a lightkurve.LightCurve or lightkurve.periodogram object
    from the input arrays.

    Parameters
    ----------
    x : list-like
        First column of timeseries or periodogram (time/frequency).
    y : list-like
        Second column of timeseries or periodogram (flux/power).
    name : string
        Target ID
    typ : string
        Either timeseries or periodogram.

    Returns
    -------
    lkobj : object
        Either lightkurve.LightCurve or lightkurve.periodogram object
        depending on typ.
    """
    
    if typ == 'timeseries':
        return lk.LightCurve(time=x, flux=y, targetid=name)
    elif typ == 'spectrum':
        return Periodogram(x*units.microhertz,
                           units.Quantity(y, None),
                           targetid=name)
    else:
        raise KeyError("Don't modify anything but spectrum and timeseries cols")


def _format_col(vardf, col, key):
    """ Add timeseries or spectrum column to dataframe based on input

    Based on the contents of col, will try to format col and add it as a column
    to vardf with column name key. col can be many things, so the decision is
    based mainly on the dimensionality of col.

    If dim = 0, it's assumed that col is either None, or a string, (for the
    latter it assumes there is then only one target).

    If dim = 1, it's assumed that col is a list-like object, consisting of
    either None or strings, these are passed along without modification.

    If dim = 2, col is assumed to be either a time series or power spectrum
    of shape (2,M), with time/frequency in 1st row and flux/power in the
    second.

    If dim = 3, it is assumed to be list of (2,M) arrays.

    In both of the latter cases col is converted to Lightkurve object(s).

    Parameters
    ----------
    vardf : pandas.DataFrame instance
        Pandas Dataframe instance to which either a timeseries or spectrum column
        will be added.
    col : object
        Input from Session call, corresponding to key
    key : str
        Name of column to add to vardf

    """
    
    N = np.shape(vardf['ID'])[0]

    col = np.array(col, dtype=object)

    # If dim = 0, it's either none or a string
    if col.ndim == 0:
        if not col:
            # If none, then multiply up to length of ID
            vardf[key] = np.array([None]*N)
        else:
            # If string, then single target
            vardf[key] = np.array(col).reshape((-1, 1)).flatten()

    # If dim = 1, it's either a list of nones, strings or lightkurve objects
    elif col.ndim == 1:
        vardf[key] = col

    # if dim = 2, it's an array or tuple, with time and flux
    elif col.ndim == 2:
        x = np.array(col[0, :], dtype=float)
        y = np.array(col[1, :], dtype=float)
        vardf[key] = [_arr_to_lk(x, y, vardf['ID'][0], key)]

    # If dim = 3, it's a list of arrays or tuples
    elif col.ndim == 3:
        temp = np.array([], dtype=object)
        for i in range(N):
            x = np.array(col[i, 0, :], dtype=float)
            y = np.array(col[i, 1, :], dtype=float)
            temp = np.append(temp,
                             np.array([_arr_to_lk(x, y, vardf.loc[i, 'ID'], key)]))
        vardf[key] = temp
    else:
        print('Unhandled exception')

def _lc_to_lk(ID, tsIn, specIn, download_dir, use_cached, lkwargs):
    """ Convert time series column in dataframe to lk.LightCurve object

    Goes through the timeseries column in the dataframe and tries to convert
    it to a Lightkurve.LightCurve object. If string, it's assumed to be a file
    path name, if None, it will query the LightCurve cache locally or MAST if
    nothing is found. Skips column entries which are already LightKurve objects
    or if a spectrum for the star in question exists in the spectrum column.

    Parameters
    ----------
     ID : str
        ID string of the target
    tsIn : Lightkurve.LightCurve object
        Lightkurve.LightCurve with the time series of the target. If specIn is
        None, this will be used to compute the spectrum.
    specIn : str or Lightkurve.periodogram object
        Lightkurve.periodogram object containing the spectrum of the target.
    download_dir : str
        Directory for fits file and search results caches. 
    use_cached : bool
        Whether or not to use the cached time series. 
    lkwargs : dict
        Dictionary with arguments to be passed to lightkurve. 
    """

    tinyoffset = 1  # to avoid cases LC median = 0 (lk doesn't like it) This may no longer be necessary.
    

    if isinstance(tsIn, str):
        try:
            t, d = np.genfromtxt(tsIn, usecols=(0, 1), delimiter = ',').T 
        except:
            try:
                t, d = np.genfromtxt(tsIn, usecols=(0, 1), delimiter = ' ').T
            except:
                raise IOError('Failed to read the provided ascii files. Please check that they have the required 2-column format, and they are use either comma or white-space delimiters.')
        
        d += tinyoffset
        
        tsOut = lk.LightCurve(time=t, flux=d, targetid=ID)
        
    elif tsIn is None:
        if specIn:
            tsOut = None
            pass
        else:
            tsOut = _query_lightkurve(ID, download_dir, use_cached, lkwargs)

    elif tsIn.__module__ == lk.lightcurve.__name__:
        tsOut = tsIn
    else:
        raise TypeError("Can't handle this type of time series object")

    if tsOut:
        _sort_lc(tsOut)

    return tsOut


def _lk_to_pg(ID, tsIn, specIn):
    """ Convert spectrum column in dataframe to Lightkurve.periodgram objects

    Takes whatever is in the spectrum column of a dataframe and tries to turn it
    into a Lightkurve.periodogram object. If column entry is a string, it
    assumes it's a path name, if None, it will try to take what's in the
    timeseries column and compute periodogram based on that. If a periodogram
    object is already present, it'll just flatten it (to be safe) and then
    continue on.

    Parameters
    ----------
    ID : str
        ID string of the target
    tsIn : Lightkurve.LightCurve object
        Lightkurve.LightCurve with the time series of the target. If specIn is
        None, this will be used to compute the spectrum.
    specIn : str or Lightkurve.periodogram object
        Lightkurve.periodogram object containing the spectrum of the target.

    Returns
    -------
    specOut : Lightkurve.periodogram object
        Lightkurve.periodogram object containing the spectrum of the target.
        Note this is now flattened so that it corresponds to the SNR spectrum.
    """
    
    if isinstance(specIn, str):
        f, s = np.genfromtxt(specIn, usecols=(0, 1)).T
        specOut = lk.periodogram.Periodogram(f*units.microhertz,
                                          units.Quantity(s, None),
                                          targetid=ID).flatten() 
        
    elif not specIn:
        specOut = tsIn.to_periodogram(freq_unit=units.microHertz, normalization='psd').flatten()
        
    elif specIn.__module__ == lk.periodogram.__name__:
        specOut = specIn.flatten()
        
    else:
        raise TypeError("Can't handle this type of time series object")

    return specOut



[docs]class session(): """ Main class used to initiate peakbagging. Use this class to initialize a star class instance for one or more targets. Once initialized, calling the session class instance will execute a complete peakbagging run. Data can be provided in multiple different ways, the simplest of which is just to let PBjam query the MAST server. Otherwise arrays of timeseries/power spectra, lightkurve.LightCurve/lightkurve.periodogram, or just path names as strings, is also possible. The physical parameters, such numax, dnu, teff, bp_rp, must each be provided at least as a list of length 2 for each star. This should contain the parameter value and it's error estimate. For multiple targets all the above can be provided as lists, but the easiest way is to simply provide a dataframe from a csv file. By default, PBjam will download all the available data, favoring long cadence. Cadence and specific observing seasons (quarter, month, campagin, sector) can be specified for more detailed control. The data is downloaded using the Lightkurve API, and cleaned on the timeseries level by removing NaNs, and outliers, and flattening the lightcurve (again using LightKurve's API). Note that the flattening process may impact the appearance of the granulation background. Note ---- 1. If you have a large directory of cached lightcurves or power spectra, it is best to supply the filenames to the files. 2. Teff and bp_rp provide much of the same information, so providing both is not strictly necessary. However, for best results you should try to provide both. If bp_rp is omitted though, PBjam will attempt to find this value in online catalogs. 3. PBjam will not combine time series from different missions. Examples -------- Peakbagging run for a single target: >>> jam_sess = pbjam.session(ID = '4448777', numax = [220.0, 3.0], dnu = [16.97, 0.01], teff = [4750, 100], bp_rp = [1.34, 0.01], exptime = 1800) >>> jam_sess() Peakbagging run for multiple targets: >>> jam_sess = pbjam.session(dictlike = mydataframe) >>> jam_sess() Parameters ---------- ID : string, optional Target identifier, most commonly used identifiers can be used if you want PBjam to download the data (KIC, TIC, HD, Bayer etc.). If you provide data yourself the name can be any string. numax : list, optional List of the form [numax, numax_error], list of lists for multiple targets dnu : list, optional List of the form [dnu, dnu_error], list of lists for multiple targets teff : list, optional List of the form [teff, teff_error], list of lists for multiple targets bp_rp : list, optional List of the form [bp_rp, bp_rp_error], list of lists for multiple targets timeseries : object, optional Timeseries input. Leave as None for PBjam to download it automatically. Otherwise, arrays of shape (2,N), lightkurve.LightCurve objects, or strings for pathnames are accepted. spectrum : object, optional Spectrum input. Leave as None for PBjam to use Timeseries to compute it for you. Otherwise, arrays of shape (2,N), lightkurve.periodogram objects, or strings for pathnames are accepted. dictlike : pandas.DataFrame or dictionary, optional DataFrame, dictionary, record array with a list of targets, and their properties. If string, PBjam will assume it's a pathname to a csv file. Specify timeseries and spectrum columns with file pathnames to use manually reduced data. store_chains : bool, optional Flag for storing all the full set of samples from the MCMC run. Warning, if running multiple targets, make sure you have enough memory. use_cached : bool, optional Flag for using cached data. If fitting the same targets multiple times, use to this to not download the data every time. cadence : string, deprecated This was used to specify the observation cadence for downloading via LightKurve. This was changed to exptime in Lightkurve v. 2 exptime : string, optional Exposure time of the observations in seconds. Argument for lightkurve to download correct data type. Can be 1800 or Kepler 'long' cadence. 1800 is default setting, so if you're looking at main sequence stars, make sure to manually set 60. campaign : int, optional Argument for lightkurve when requesting K2 data. sector : int, optional Argument for lightkurve when requesting TESS data. month : int, optional Argument for lightkurve when requesting Kepler short cadence data. quarter : int, optional Argument for lightkurve when requesting Kepler data. mission : str, optional Which mission to use data from. Default is all, so if your target has been observed by, e.g., both Kepler and TESS, LightKurve will throw and error. make_plots : bool, optional Whether or not to automatically generate diagnostic plots for the different stages of the peakbagging process. path : str, optional Path to store the plots and results for the various stages of the peakbagging process. download_dir : str, optional Directory to cache lightkurve downloads. Lightkurve will place the fits files in the default lightkurve cache path in your home directory. Attributes ---------- stars : list Session will store star class instances in this list, based on the requested targets. pb_model_type : str Model to use for the mode widths in peakbag. """ def __init__(self, ID=None, numax=None, dnu=None, teff=None, bp_rp=None, timeseries=None, spectrum=None, dictlike=None, use_cached=False, exptime=None, campaign=None, sector=None, month=None, quarter=None, mission=None, path=None, download_dir=None, cadence=None): if cadence is not None: raise ValueError('Cadence is no longer a valid argument for LightKurve, use exptime instead. Kepler long cadence is 1800, and short is 60.') self.stars = [] self.references = references() self.references._addRef(['python', 'pandas', 'numpy', 'astropy', 'lightkurve']) if isinstance(dictlike, (dict, np.recarray, pd.DataFrame, str)): if isinstance(dictlike, str): vardf = pd.read_csv(dictlike) else: try: vardf = pd.DataFrame.from_records(dictlike) except TypeError: print('Unrecognized type in dictlike. Must be able to convert to dataframe through pandas.DataFrame.from_records()') if any([ID, numax, dnu, teff, bp_rp]): warnings.warn('Dictlike provided as input, ignoring other input fit parameters.') _organize_sess_dataframe(vardf) elif ID: vardf = _organize_sess_input(ID=ID, numax=numax, dnu=dnu, teff=teff, bp_rp=bp_rp, exptime=exptime, campaign=campaign, sector=sector, month=month, quarter=quarter, mission=mission) _format_col(vardf, timeseries, 'timeseries') _format_col(vardf, spectrum, 'spectrum') else: raise TypeError('session.__init__ requires either ID or dictlike') if any(np.isnan(vardf['exptime'])) and any((vardf['timeseries'])) and any(vardf['spectrum']): warnings.warn('If the target may have been observed at more than one cadence, it is recommended that you set exptime') for i in vardf.index: lkwargs = {x: vardf.loc[i, x] for x in ['exptime', 'month', 'sector', 'campaign', 'quarter', 'mission']} vardf.at[i, 'timeseries'] = _lc_to_lk(vardf.loc[i, 'ID'], vardf.loc[i, 'timeseries'], vardf.loc[i, 'spectrum'], download_dir, use_cached, lkwargs) vardf.at[i,'spectrum'] = _lk_to_pg(vardf.loc[i,'ID'], vardf.loc[i, 'timeseries'], vardf.loc[i, 'spectrum']) self.stars.append(star(ID=vardf.loc[i, 'ID'], pg=vardf.loc[i, 'spectrum'], numax=vardf.loc[i, ['numax', 'numax_err']].values, dnu=vardf.loc[i, ['dnu', 'dnu_err']].values, teff=vardf.loc[i, ['teff', 'teff_err']].values, bp_rp=vardf.loc[i, ['bp_rp', 'bp_rp_err']].values, path=path)) for i, st in enumerate(self.stars): if st.numax[0] > st.f[-1]: warnings.warn("Input numax is greater than Nyquist frequeny for %s" % (st.ID)) def __call__(self, bw_fac=1, norders=8, model_type='simple', tune=1500, nthreads=1, verbose=False, make_plots=False, store_chains=False, asy_sampling='emcee', developer_mode=False): """ Call all the star class instances Once initialized, calling the session class instance will loop through all the stars that it contains, and call each one. This performs a full peakbagging run on each star in the session. Parameters ---------- bw_fac : float, optional. Scaling factor for the KDE bandwidth. By default the bandwidth is automatically set, but may be scaled to adjust for sparsity of the prior sample. Default is 1. norders : int, optional. Number of orders to include in the fits. Default is 8. model_type : str, optional. Can be either 'simple' or 'model_gp' which sets the type of mode width model. Defaults is 'simple'. tune : int, optional Numer of tuning steps passed to pm.sample. Default is 1500. nthreads : int, optional. Number of processes to spin up in pymc3. Default is 1. verbose : bool, optional. Should PBjam say anything? Default is False. make_plots : bool, optional. Whether or not to produce plots of the results. Default is False. store_chains : bool, optional. Whether or not to store MCMC chains on disk. Default is False. asy_sampling : str, optional. Which type of sampler to use for the asymptotic peakbagging. The options are 'emcee' and 'cpnest'. Default is 'emcee'. developer_mode : bool Run asy_peakbag in developer mode. Currently just retains the input value of dnu and numax as priors, for the purposes of expanding the prior sample. Important: This is not good practice for getting science results! """ self.pb_model_type = model_type for i, st in enumerate(self.stars): #try: st(bw_fac=bw_fac, tune=tune, norders=norders, model_type=self.pb_model_type, make_plots=make_plots, store_chains=store_chains, nthreads=nthreads, asy_sampling=asy_sampling, developer_mode=developer_mode) self.references._reflist += st.references._reflist self.stars[i] = None
# Crude way to send error messages that occur in star up to Session # without ending the session. Is there a better way? # except Exception as ex: # message = "Star {0} produced an exception of type {1} occurred. Arguments:\n{2!r}".format(st.ID, type(ex).__name__, ex.args) # print(message) def _load_fits(files, mission): """ Read fitsfiles into a Lightkurve object Parameters ---------- files : list List of pathnames to fits files mission : str Which mission to download the data from. Returns ------- lc : lightkurve.lightcurve.KeplerLightCurve object Lightkurve light curve object containing the concatenated set of quarters. """ if mission in ['Kepler', 'K2']: lcs = [lk.lightcurvefile.KeplerLightCurveFile(file) for file in files] lcCol = lk.collections.LightCurveCollection(lcs) lc = lcCol.stitch() elif mission in ['TESS']: lcs = [lk.lightcurvefile.TessLightCurveFile(file) for file in files] lcCol = lk.collections.LightCurveCollection(lcs) lc = lcCol.stitch() return lc def _set_mission(ID, lkwargs): """ Set mission keyword in lkwargs. If no mission is selected will attempt to figure it out based on any prefixes in the ID string, and add this to the LightKurve keywords arguments dictionary. Parameters ---------- ID : str ID string of the target lkwargs : dict Dictionary to be passed to LightKurve """ if lkwargs['mission'] is None: if ('kic' in ID.lower()): lkwargs['mission'] = 'Kepler' elif ('epic' in ID.lower()) : lkwargs['mission'] = 'K2' elif ('tic' in ID.lower()): lkwargs['mission'] = 'TESS' else: lkwargs['mission'] = ('Kepler', 'K2', 'TESS') def _search_and_dump(ID, lkwargs, search_cache): """ Get lightkurve search result online. Uses the lightkurve search_lightcurve to find the list of available data for a target ID. Stores the result in the ~/.lightkurve-cache/searchResult directory as a dictionary with the search result object and a timestamp. Parameters ---------- ID : str ID string of the target lkwargs : dict Dictionary to be passed to LightKurve search_cache : str Directory to store the search results in. Returns ------- resultDict : dict Dictionary with the search result object and timestamp. """ current_date = datetime.now().isoformat() store_date = current_date[:current_date.index('T')].replace('-','') search = lk.search_lightcurve(ID, exptime=lkwargs['exptime'], mission=lkwargs['mission']) resultDict = {'result': search, 'timestamp': store_date} fname = os.path.join(*[search_cache, f"{ID}_{lkwargs['exptime']}.lksearchresult"]) pickle.dump(resultDict, open(fname, "wb")) return resultDict def _getMASTidentifier(ID, lkwargs): """ return KIC/TIC/EPIC for given ID. If input ID is not a KIC/TIC/EPIC identifier then the target is looked up on MAST and the identifier is retried. If a mission is not specified the set of observations with the most quarters/sectors etc. will be used. Parameters ---------- ID : str Target ID lkwargs : dict Dictionary with arguments to be passed to lightkurve. In this case mission and exptime. Returns ------- ID : str The KIC/TIC/EPIC ID of the target. """ if not any([x in ID for x in ['KIC', 'TIC', 'EPIC']]): search = lk.search_lightcurve(ID, exptime=lkwargs['exptime'], mission=lkwargs['mission']) if len(search) == 0: raise ValueError(f'No results for {ID} found on MAST') maxFreqName = max(set(list(search.table['target_name'])), key = list(search.table['target_name']).count) maxFreqObsCol = max(set(list(search.table['obs_collection'])), key = list(search.table['obs_collection']).count) if maxFreqObsCol == 'TESS': prefix = 'TIC' else: prefix = '' temp_id = prefix + maxFreqName ID = _format_name(temp_id).replace(' ', '') lkwargs['mission'] = maxFreqObsCol else: ID = ID.replace(' ', '') return ID def _perform_search(ID, lkwargs, use_cached, download_dir=None, cache_expire=30): """ Find filenames related to target Preferentially accesses cached search results, otherwise searches the MAST archive. Parameters ---------- ID : str Target ID (must be KIC, TIC, or ktwo prefixed) lkwargs : dict Dictionary with arguments to be passed to lightkurve. In this case mission and exptime. use_cached : bool, optional Whether or not to use the cached time series. Default is True. download_dir : str, optional. Directory for fits file and search results caches. Default is ~/.lightkurve-cache. cache_expire : int, optional. Expiration time for the search cache results. Files older than this will be. The default is 30 days. Returns ------- search : lightkurve.search.SearchResult Search result from MAST. """ # Set default lightkurve cache directory if nothing else is given if download_dir is None: download_dir = os.path.join(*[os.path.expanduser('~'), '.lightkurve-cache']) # Make the search cache dir if it doesn't exist cachepath = os.path.join(*[download_dir, 'searchResults', lkwargs['mission']]) if not os.path.isdir(cachepath): os.makedirs(cachepath) filepath = os.path.join(*[cachepath, f"{ID}_{lkwargs['exptime']}.lksearchresult"]) if os.path.exists(filepath) and use_cached: resultDict = pickle.load(open(filepath, "rb")) fdate = resultDict['timestamp'] ddate = datetime.now() - datetime(int(fdate[:4]), int(fdate[4:6]), int(fdate[6:])) # If file is saved more than cache_expire days ago, a new search is performed if ddate.days > cache_expire: resultDict = _search_and_dump(ID, lkwargs, cachepath) else: resultDict = _search_and_dump(ID, lkwargs, cachepath) return resultDict['result'] def _check_lc_cache(search, mission, download_dir=None): """ Query cache directory or download fits files. Searches the Lightkurve cache directory set by download_dir for fits files matching the search query, and returns a list of path names of the fits files. If not cache either doesn't exist or doesn't contain all the files in the search, all the fits files will be downloaded again. Parameters ---------- search : lightkurve.search.SearchResult Search result from MAST. mission : str Which mission to download the data from. download_dir : str, optional. Top level of the Lightkurve cache directory. default is ~/.lightkurve-cache Returns ------- files_in_cache : list List of path names to the fits files in the cache directory """ if download_dir is None: download_dir = os.path.join(*[os.path.expanduser('~'), '.lightkurve-cache']) files_in_cache = [] for i, row in enumerate(search.table): fname = os.path.join(*[download_dir, 'mastDownload', mission, row['obs_id'], row['productFilename']]) if os.path.exists(fname): files_in_cache.append(fname) if len(files_in_cache) != len(search): search.download_all(download_dir = download_dir) files_in_cache = [os.path.join(*[download_dir, 'mastDownload', mission, row['obs_id'], row['productFilename']]) for row in search.table] return files_in_cache def _clean_lc(lc): """ Perform Lightkurve operations on object. Performes basic cleaning of a light curve, removing nans, outliers, median filtering etc. Parameters ---------- lc : Lightkurve.LightCurve instance Lightkurve object to be cleaned Returns ------- lc : Lightkurve.LightCurve instance The cleaned Lightkurve object """ lc = lc.remove_nans().flatten().remove_outliers() return lc