Source code for pbjam.asy_peakbag

"""

This module fits the asymptotic relation to the p-modes in a frequency range
around $\nu_{max}$, the central frequency of the seismic mode envelope,
in a solar-like oscillator. Only $l=0$ and $l=2$ are fit, $l=1$ modes are 
currently ignored.

"""

import numpy as np
import pbjam as pb
import pandas as pd
import scipy.stats as scist
from .plotting import plotting
from .jar import normal
from collections import OrderedDict
import warnings

[docs]class asymp_spec_model():
    """Class for spectrum model using asymptotic relation.
    
    This class is meant to provide initial inputs and mode ID for the final
    fit using peakbag. 
    
    As such the spectrum model is simplified. The mode frequencies are estimated
    by the asymptotic relation, the mode heights by a Gaussian centered at 
    nu_max, and we only use a single width for all modes across the p-mode
    envelope.

    Parameters
    ---------_
    f : ndarray
        Array of frequency bins of the spectrum (in muHz). 
    norders : int
        Number of radial order to fit.
         
    """

    def __init__(self, f, norders):
        self.f = np.array([f]).flatten()
        self.norders = int(norders)

    def _get_nmax(self, dnu, numax, eps):
        """Compute radial order at numax.
    
        Compute the radial order at numax, which in this implimentation of the
        asymptotic relation is not necessarily integer.
    
        Parameters
        ----------
        numax : float
            Frequency of maximum power of the p-mode envelope (muHz).
        dnu : float
            Large separation of l=0 modes (muHz).
        eps : float
            Epsilon phase term in asymptotic relation (muHz).
    
        Returns
        -------
            nmax : float
                non-integer radial order of maximum power of the p-mode envelope
                
        """
    
        return numax / dnu - eps

    def _get_enns(self, nmax, norders):
        """Compute radial order numbers.
    
        Get the enns that will be included in the asymptotic relation fit. 
        These are all integer.
    
        Parameters
        ----------
        nmax : float
            Frequency of maximum power of the p-mode envelope
        norders : int
            Total number of radial orders to consider
    
        Returns
        -------
        enns : ndarray
                Numpy array of norders radial orders (integers) around nu_max 
                (nmax).
                
        """
    
        below = np.floor(nmax - np.floor(norders/2)).astype(int)
        above = np.floor(nmax + np.ceil(norders/2)).astype(int)
    
        # Handling of single input (during fitting), or array input when evaluating
        # the fit result
        if type(below) != np.ndarray:
            return np.arange(below, above)
        else:
            out = np.concatenate([np.arange(x, y) for x, y in zip(below, above)])
            return out.reshape(-1, norders)

    def _asymptotic_relation(self, numax, dnu, eps, alpha, norders):
        """ Compute the l=0 mode frequencies from the asymptotic relation for
        p-modes
    
        Parameters
        ----------
        numax : float
            Frequency of maximum power of the p-mode envelope (muHz).
        dnu : float
            Large separation of l=0 modes (muHz).
        eps : float
            Epsilon phase term in asymptotic relation (unitless).
        alpha : float
            Curvature factor of l=0 ridge (second order term, unitless).
        norders : int
            Number of desired radial orders to calculate frequncies for, 
            centered around numax.
    
        Returns
        -------
        nu0s : ndarray
            Array of l=0 mode frequencies from the asymptotic relation (muHz).
    
        """
        
        nmax = self._get_nmax(dnu, numax, eps)
        enns = self._get_enns(nmax, norders)
        return (enns.T + eps + alpha/2*(enns.T - nmax)**2) * dnu


    def _P_envelope(self, nu, hmax, numax, width):
        """ Power of the seismic p-mode envelope
    
        Computes the power at frequency nu in the p-mode envelope from a Gaussian
        distribution. Used for computing mode heights.
    
        Parameters
        ----------
        nu : float
            Frequency (in muHz).
        hmax : float
            Height of p-mode envelope (in SNR).
        numax : float
            Frequency of maximum power of the p-mode envelope (in muHz).
        width : float
            Width of the p-mode envelope (in muHz).
    
        Returns
        -------
        h : float
            Power at frequency nu (in SNR)
            
        """
    
        return hmax * np.exp(- 0.5 * (nu - numax)**2 / width**2)

    def _lor(self, freq, h, w):
        """ Lorentzian to describe a mode.

        Parameters
        ----------
        freq : float
            Frequency of lorentzian (muHz).
        h : float
            Height of the lorentizan (SNR).
        w : float
            Full width of the lorentzian (muHz).

        Returns
        -------
        mode : ndarray
            The SNR as a function frequency for a lorentzian.
            
        """
       
        return h / (1.0 + 4.0/w**2*(self.f - freq)**2)


    def _pair(self, freq0, h, w, d02, hfac=0.7):
        """Define a pair as the sum of two Lorentzians.

        A pair is assumed to consist of an l=0 and an l=2 mode. The widths are
        assumed to be identical, and the height of the l=2 mode is scaled
        relative to that of the l=0 mode. The frequency of the l=2 mode is the
        l=0 frequency minus the small separation.

        Parameters
        ----------
        freq0 : float
            Frequency of the l=0 (muHz).
        h : float
            Height of the l=0 (SNR).
        w : float
            The mode width (identical for l=2 and l=0) (log10(muHz)).
        d02 : float
            The small separation (muHz).
        hfac : float, optional
            Ratio of the l=2 height to that of l=0 (unitless).

        Returns
        -------
        pair_model : array
            The SNR as a function of frequency of a mode pair.
            
        """
        
        pair_model = self._lor(freq0, h, w)
        pair_model += self._lor(freq0 - d02, h*hfac, w)
        return pair_model

    
[docs]    def model(self, dnu, numax, eps, d02, alpha, hmax, envwidth, modewidth,
              *args):
        """ Constructs a spectrum model from the asymptotic relation.

        The asymptotic relation for p-modes with angular degree, l=0, is
        defined as:

        $nu_nl = (n + \epsilon + \alpha/2(n - nmax)^2) * \log{dnu}$ ,

        where nmax = numax / dnu - epsilon.

        We separate the l=0 and l=2 modes by the small separation d02.

        Parameters
        ----------
        dnu : float
            Large separation log10(muHz)
        lognumax : float
            Frequency of maximum power of the p-mode envelope log10(muHz)
        eps : float
            Phase term of the asymptotic relation (unitless)
        alpha : float
            Curvature of the asymptotic relation log10(unitless)
        d02 : float
            Small separation log10(muHz)
        loghmax : float
            Gaussian height of p-mode envelope log10(SNR)
        logenvwidth : float
            Gaussian width of the p-mode envelope log10(muHz)
        logmodewidth : float
            Width of the modes (log10(muHz))
        *args : array-like
            List of additional parameters (Teff, bp_rp) that aren't actually
            used to construct the spectrum model, but just for evaluating the
            prior.

        Returns
        -------
        model : ndarray
            spectrum model around the p-mode envelope
            
        """

        f0s = self._asymptotic_relation(10**numax, 10**dnu, eps, 10**alpha, self.norders)
        Hs = self._P_envelope(f0s, 10**hmax, 10**numax, 10**envwidth)
        
        modewidth = 10**modewidth # widths are the same for all modes
        d02 = 10**d02
        
        mod = np.ones(len(self.f))
        for n in range(len(f0s)):
            mod += self._pair(f0s[n], Hs[n], modewidth, d02)
        return mod

    def __call__(self, p):
        """ Produce model of the asymptotic relation

        Parameters
        ----------
        p : list
            list of model parameters

        Returns
        -------
        model : array
            spectrum model around the p-mode envelope
            
        """

        return self.model(*p)


[docs]class asymptotic_fit(plotting, asymp_spec_model):
    """ Class for fitting a spectrum based on the asymptotic relation.

    Parameters
    ----------
    st : star class instance.
        The star to fit using the asymptotic relation.
    
    norders : int, optional
        Number of radial orders to fit.

    Attributes
    ----------
    f : ndarray
        Numpy array of frequency bins of the spectrum (muHz).
    s : ndarray
        Numpy array of power in each frequency bin (SNR).
    sel : ndarray, bool
        Numpy array of boolean values specifying the frequency range to be
        considered in the asymptotic relation fit.
    model : asymp_spec_model.model instance
        Function for computing a spectrum model given a set of parameters.
    prior_file : str
        Path to the csv file containing the prior data. Default is 
        pbjam/data/prior_data.csv
    par_names : list
        List of parameters names of the spectrum model.
    _obs : dict
        Dictionary of the observational parameters (input parameters).
    _log_obs : dict
        Dictionary of the observational parametrs in log-scale.
    prior_data : pandas DataFrame
        Dataframe containing the samples used to the generate the KDE prior.
    start_samples : ndarray
        Array of samples drawn from the KDE to set the starting location of the
        asymptotic relation fit.
    kde : statsmodels.kde instance
        KDE function used as a prior in the asymptotic relation fit.
    start : ndarray 
        Median of parameters in start_samples. 
    developer_mode : bool
        Run asy_peakbag in developer mode. Currently just retains the input 
        value of dnu and numax as priors, for the purposes of expanding
        the prior sample. Important: This is not good practice for getting 
        science results!

    """

    def __init__(self, st, norders=None):
        
        self.pg = st.pg
        self.f = st.f
        self.s = st.s
        self.norders = norders

        self._obs = st._obs
        self._log_obs = st._log_obs    
        
        self.par_names = ['dnu', 'numax', 'eps', 'd02', 'alpha', 'env_height',
                          'env_width', 'mode_width', 'teff', 'bp_rp']
        
        self.prior_file = st.prior_file
        self.prior_data = st.kde.prior_data
        self.start_samples = st.kde.samples       
        self.kde = st.kde.kde    
        self.start = self._get_asy_start()
        
        lfreq, ufreq = self._get_freq_range()
        self.sel = (lfreq < self.f) & (self.f < ufreq)
        self.model = asymp_spec_model(self.f[self.sel], self.norders)
        
        self.developer_mode = False
        self.path = st.path
        
        st.asy_fit = self
       
    def __call__(self, method, developer_mode):
        """ Setup, run and parse the asymptotic relation fit.

        Parameters
        ----------
        method : string
            Method to be used for sampling the posterior. Options are 'emcee' or
            'cpnest. Default method is 'emcee' that will call emcee, alternative
            is 'cpnest' to call nested sampling with CPnest.
        
        developer_mode : bool
            Run asy_peakbag in developer mode. Currently just retains the input 
            value of dnu and numax as priors, for the purposes of expanding
            the prior sample. Important: This is not good practice for getting 
            science results!
      
        Returns
        -------
        asy_result : Dict
            A dictionary of the modeID DataFrame and the summary DataFrame.
            
        """
        self.developer_mode = developer_mode
        
        if method not in ['emcee', 'cpnest']:
            warnings.warn(f'Method {method} not found: Using method emcee')
            method = 'emcee'

        if method == 'emcee':
            self.fit = pb.mcmc(np.median(self.start_samples, axis=0),
                               self.likelihood, self.prior)
            self.fit(start_samples=self.start_samples)

        elif method == 'cpnest':
            bounds = [[self.prior_data[key].min(), self.prior_data[key].max()] for key in self.par_names]
            self.fit = pb.nested(self.par_names, bounds, self.likelihood, self.prior, self.path)
            self.fit()
         
        self.modeID = self.get_modeIDs(self.fit, self.norders)
        self.summary  = self._get_summary_stats(self.fit)        
        self.samples = self.fit.flatchain
        self.acceptance = self.fit.acceptance

        return {'modeID': self.modeID, 'summary': self.summary}


[docs]    def prior(self, p):
        """ Calculates the log prior 
        
        Evaluates the KDE for the parameters p. Additional hard/soft priors
        can be added here as needed to, e.g., apply boundaries to the fit.
        
        Hard constraints should be applied at the top so function exits early,
        if necessary.

        Parameters
        ----------
        p : ndarray
            Array of model parameters.

        Returns
        -------
        lp : float
            The log likelihood evaluated at p.

        """
        
        # d02/dnu < 0.2  (np.log10(0.2) ~ -0.7)
        if p[3] - p[0] > -0.7:
            return -np.inf
        
        lp = 0
        
        # Added linewidth constraints
        if (p[7] > self.start[7] + np.log10(1.5)):
            lp += normal(10**p[7], 10**self.start[7]*1.5, 10**self.start[7]*0.1)
        
        # Constraints from KDE
        lp += np.log(self.kde.pdf(p))
        
        return lp
        
[docs]    def likelihood(self, p):
        """ Likelihood function for set of model parameters

        Evaluates the likelihood function for a set of
        model parameters.  This includes the constraint from
        the observed variables.

        The code now includes a penalty to limit very large linewidth
        examples.  The penalty is very basic and in some cases where the
        true linewidth is much larger than the linewidth in the
        prior it will become informative.  For the most part this
        should not be a problem because if you  care about linewidths
        you should be using the outpout from the peakbag model.

        Parameters
        ----------
        p : ndarray
            Array of model parameters

        Returns
        -------
        lnlike : float
            The log likelihood evaluated at p.

        """
        
        lnlike = 0
        
        # Constraint from input obs
        if self.developer_mode:
            lnlike += normal(p[0], *self._log_obs['dnu'])
            lnlike += normal(p[1], *self._log_obs['numax'])
        
        lnlike += normal(p[-2], *self._log_obs['teff'])
        lnlike += normal(p[-1], *self._obs['bp_rp'])
        
        # Constraint from the periodogram
        mod = self.model(p)
        lnlike += -np.sum(np.log(mod) + self.s[self.sel] / mod)
        return lnlike
    

    def _get_summary_stats(self, fit):
        """ Make dataframe with fit summary statistics

        Creates a dataframe that contains various quantities that summarize the
        fit. Note, these are predominantly derived from the marginalized posteriors.

        Parameters
        ----------
        fit : mcmc.mcmc class instance
            mcmc class instances used in the fit

        Returns
        -------
        summary : pandas.DataFrame
            Dataframe with the summary statistics.

        """

        fc = fit.flatchain

        # Append here to add other statistics
        stats = OrderedDict({'mean' : np.mean(fc, axis = 0),
                             'std'  : np.std(fc, axis = 0),
                             'skew' : scist.skew(fc, axis = 0),
                             '2nd'  : np.percentile(fc,  2.27501, axis=0),
                             '16th' : np.percentile(fc, 15.86552, axis=0),
                             '50th' : np.percentile(fc, 50., axis=0),
                             '84th' : np.percentile(fc, 84.13447, axis=0),
                             '97th' : np.percentile(fc, 97.72498, axis=0),
                             'MAD'  : scist.median_absolute_deviation(fc, axis=0)})

        summary = pd.DataFrame(stats, index = self.par_names)

        return summary
       

[docs]    def get_modeIDs(self, fit, norders):
        """ Set mode ID in a dataframe

        Evaluates the asymptotic relation for each walker position from the
        `emcee' sampling. The median values of the resulting set of frequencies 
        are then returned in a pandas DataFrame

        Parameters
        ----------
        fit : mcmc.mcmc class instance
            mcmc class instances used in the fit
        norders : int
            Number of radial orders to output. Note that doesn't have to be
            the same as that used int he fit itself.

        Returns
        -------
        modeID : pandas.DataFrame
            Dataframe of radial order, n (best guess), angular degree, l,
            frequency and frequency error.
            
        """

        fc = fit.flatchain

        nu0_samps = self._asymptotic_relation(10**fc[:, 1], 10**fc[:, 0], fc[:, 2], 10**fc[:, 4], norders)
        nu2_samps = nu0_samps - 10**fc[:, 3]

        nus_med = np.median(np.array([nu0_samps, nu2_samps]), axis=2)
        nus_mad = scist.median_absolute_deviation(np.array([nu0_samps, nu2_samps]), axis=2)

        ells = np.array([2, 0]*norders) 

        modeID = pd.DataFrame({'ell': ells, 'nu_med': np.zeros(len(ells)), 'nu_mad': np.zeros(len(ells))})

        modeID.nu_med.values[::2] = nus_med[1, :]
        modeID.nu_med.values[1::2] = nus_med[0, :]
        
        modeID.nu_mad.values[::2] = nus_mad[1, :]
        modeID.nu_mad.values[1::2] = nus_mad[0, :]

        return modeID

    
    def _get_asy_start(self):
        """ Get start averages for sampling start locations
        """
 
        mu = np.median(self.start_samples, axis=0)
        start = [10**mu[0], 10**mu[1], mu[2], 10**mu[3], 10**mu[4], mu[5], 
                 mu[6], mu[7], 10**mu[8], mu[9]]
        return start


    def _get_freq_range(self):
        """ Get frequency range around numax for model 
        """
        
        dnu, numax, eps = self.start[:3]
        
        nmax = self._get_nmax(dnu, numax, eps)
        enns = self._get_enns(nmax, self.norders)

        # The range is set +/- 25% of the upper and lower mode frequency 
        # estimate
        lfreq = (min(enns) - 1.25 + eps) * dnu
        ufreq = (max(enns) + 1.25 + eps) * dnu
        return lfreq, ufreq    
    
    
    
#    def _start_init(self, verbose=False):
#        """ This is in pre-alpha
#        
#        Bodge a better starting point
#        """
#        
#        like_start = np.ones(len(self.start_samples[:, 0]))
#        for idx, samp in enumerate(self.start_samples):
#            like_start[idx] = self.likelihood(samp)
#        if verbose:
#            print(f'Likelihood at the start : {np.max(like_start)}')
#            print(f'Start params from init : {self.start_samples[np.argmax(like_start), :]}')
Source code for pbjam.asy_peakbag

PBjam

Navigation

Related Topics