Source code for pbjam.asy_peakbag

"""

This module fits the asymptotic relation to the p-modes in a frequency range
around $\nu_{max}$, the central frequency of the seismic mode envelope,
in a solar-like oscillator. Only $l=0$ and $l=2$ are fit, $l=1$ modes are 
currently ignored.

"""

import numpy as np
import pbjam as pb
import pandas as pd
import scipy.stats as scist
from .plotting import plotting
from .jar import normal
from collections import OrderedDict
import warnings

[docs]class asymp_spec_model(): """Class for spectrum model using asymptotic relation. This class is meant to provide initial inputs and mode ID for the final fit using peakbag. As such the spectrum model is simplified. The mode frequencies are estimated by the asymptotic relation, the mode heights by a Gaussian centered at nu_max, and we only use a single width for all modes across the p-mode envelope. Parameters ---------_ f : ndarray Array of frequency bins of the spectrum (in muHz). norders : int Number of radial order to fit. """ def __init__(self, f, norders): self.f = np.array([f]).flatten() self.norders = int(norders) def _get_nmax(self, dnu, numax, eps): """Compute radial order at numax. Compute the radial order at numax, which in this implimentation of the asymptotic relation is not necessarily integer. Parameters ---------- numax : float Frequency of maximum power of the p-mode envelope (muHz). dnu : float Large separation of l=0 modes (muHz). eps : float Epsilon phase term in asymptotic relation (muHz). Returns ------- nmax : float non-integer radial order of maximum power of the p-mode envelope """ return numax / dnu - eps def _get_enns(self, nmax, norders): """Compute radial order numbers. Get the enns that will be included in the asymptotic relation fit. These are all integer. Parameters ---------- nmax : float Frequency of maximum power of the p-mode envelope norders : int Total number of radial orders to consider Returns ------- enns : ndarray Numpy array of norders radial orders (integers) around nu_max (nmax). """ below = np.floor(nmax - np.floor(norders/2)).astype(int) above = np.floor(nmax + np.ceil(norders/2)).astype(int) # Handling of single input (during fitting), or array input when evaluating # the fit result if type(below) != np.ndarray: return np.arange(below, above) else: out = np.concatenate([np.arange(x, y) for x, y in zip(below, above)]) return out.reshape(-1, norders) def _asymptotic_relation(self, numax, dnu, eps, alpha, norders): """ Compute the l=0 mode frequencies from the asymptotic relation for p-modes Parameters ---------- numax : float Frequency of maximum power of the p-mode envelope (muHz). dnu : float Large separation of l=0 modes (muHz). eps : float Epsilon phase term in asymptotic relation (unitless). alpha : float Curvature factor of l=0 ridge (second order term, unitless). norders : int Number of desired radial orders to calculate frequncies for, centered around numax. Returns ------- nu0s : ndarray Array of l=0 mode frequencies from the asymptotic relation (muHz). """ nmax = self._get_nmax(dnu, numax, eps) enns = self._get_enns(nmax, norders) return (enns.T + eps + alpha/2*(enns.T - nmax)**2) * dnu def _P_envelope(self, nu, hmax, numax, width): """ Power of the seismic p-mode envelope Computes the power at frequency nu in the p-mode envelope from a Gaussian distribution. Used for computing mode heights. Parameters ---------- nu : float Frequency (in muHz). hmax : float Height of p-mode envelope (in SNR). numax : float Frequency of maximum power of the p-mode envelope (in muHz). width : float Width of the p-mode envelope (in muHz). Returns ------- h : float Power at frequency nu (in SNR) """ return hmax * np.exp(- 0.5 * (nu - numax)**2 / width**2) def _lor(self, freq, h, w): """ Lorentzian to describe a mode. Parameters ---------- freq : float Frequency of lorentzian (muHz). h : float Height of the lorentizan (SNR). w : float Full width of the lorentzian (muHz). Returns ------- mode : ndarray The SNR as a function frequency for a lorentzian. """ return h / (1.0 + 4.0/w**2*(self.f - freq)**2) def _pair(self, freq0, h, w, d02, hfac=0.7): """Define a pair as the sum of two Lorentzians. A pair is assumed to consist of an l=0 and an l=2 mode. The widths are assumed to be identical, and the height of the l=2 mode is scaled relative to that of the l=0 mode. The frequency of the l=2 mode is the l=0 frequency minus the small separation. Parameters ---------- freq0 : float Frequency of the l=0 (muHz). h : float Height of the l=0 (SNR). w : float The mode width (identical for l=2 and l=0) (log10(muHz)). d02 : float The small separation (muHz). hfac : float, optional Ratio of the l=2 height to that of l=0 (unitless). Returns ------- pair_model : array The SNR as a function of frequency of a mode pair. """ pair_model = self._lor(freq0, h, w) pair_model += self._lor(freq0 - d02, h*hfac, w) return pair_model
[docs] def model(self, dnu, numax, eps, d02, alpha, hmax, envwidth, modewidth, *args): """ Constructs a spectrum model from the asymptotic relation. The asymptotic relation for p-modes with angular degree, l=0, is defined as: $nu_nl = (n + \epsilon + \alpha/2(n - nmax)^2) * \log{dnu}$ , where nmax = numax / dnu - epsilon. We separate the l=0 and l=2 modes by the small separation d02. Parameters ---------- dnu : float Large separation log10(muHz) lognumax : float Frequency of maximum power of the p-mode envelope log10(muHz) eps : float Phase term of the asymptotic relation (unitless) alpha : float Curvature of the asymptotic relation log10(unitless) d02 : float Small separation log10(muHz) loghmax : float Gaussian height of p-mode envelope log10(SNR) logenvwidth : float Gaussian width of the p-mode envelope log10(muHz) logmodewidth : float Width of the modes (log10(muHz)) *args : array-like List of additional parameters (Teff, bp_rp) that aren't actually used to construct the spectrum model, but just for evaluating the prior. Returns ------- model : ndarray spectrum model around the p-mode envelope """ f0s = self._asymptotic_relation(10**numax, 10**dnu, eps, 10**alpha, self.norders) Hs = self._P_envelope(f0s, 10**hmax, 10**numax, 10**envwidth) modewidth = 10**modewidth # widths are the same for all modes d02 = 10**d02 mod = np.ones(len(self.f)) for n in range(len(f0s)): mod += self._pair(f0s[n], Hs[n], modewidth, d02) return mod
def __call__(self, p): """ Produce model of the asymptotic relation Parameters ---------- p : list list of model parameters Returns ------- model : array spectrum model around the p-mode envelope """ return self.model(*p)
[docs]class asymptotic_fit(plotting, asymp_spec_model): """ Class for fitting a spectrum based on the asymptotic relation. Parameters ---------- st : star class instance. The star to fit using the asymptotic relation. norders : int, optional Number of radial orders to fit. Attributes ---------- f : ndarray Numpy array of frequency bins of the spectrum (muHz). s : ndarray Numpy array of power in each frequency bin (SNR). sel : ndarray, bool Numpy array of boolean values specifying the frequency range to be considered in the asymptotic relation fit. model : asymp_spec_model.model instance Function for computing a spectrum model given a set of parameters. prior_file : str Path to the csv file containing the prior data. Default is pbjam/data/prior_data.csv par_names : list List of parameters names of the spectrum model. _obs : dict Dictionary of the observational parameters (input parameters). _log_obs : dict Dictionary of the observational parametrs in log-scale. prior_data : pandas DataFrame Dataframe containing the samples used to the generate the KDE prior. start_samples : ndarray Array of samples drawn from the KDE to set the starting location of the asymptotic relation fit. kde : statsmodels.kde instance KDE function used as a prior in the asymptotic relation fit. start : ndarray Median of parameters in start_samples. developer_mode : bool Run asy_peakbag in developer mode. Currently just retains the input value of dnu and numax as priors, for the purposes of expanding the prior sample. Important: This is not good practice for getting science results! """ def __init__(self, st, norders=None): self.pg = st.pg self.f = st.f self.s = st.s self.norders = norders self._obs = st._obs self._log_obs = st._log_obs self.par_names = ['dnu', 'numax', 'eps', 'd02', 'alpha', 'env_height', 'env_width', 'mode_width', 'teff', 'bp_rp'] self.prior_file = st.prior_file self.prior_data = st.kde.prior_data self.start_samples = st.kde.samples self.kde = st.kde.kde self.start = self._get_asy_start() lfreq, ufreq = self._get_freq_range() self.sel = (lfreq < self.f) & (self.f < ufreq) self.model = asymp_spec_model(self.f[self.sel], self.norders) self.developer_mode = False self.path = st.path st.asy_fit = self def __call__(self, method, developer_mode): """ Setup, run and parse the asymptotic relation fit. Parameters ---------- method : string Method to be used for sampling the posterior. Options are 'emcee' or 'cpnest. Default method is 'emcee' that will call emcee, alternative is 'cpnest' to call nested sampling with CPnest. developer_mode : bool Run asy_peakbag in developer mode. Currently just retains the input value of dnu and numax as priors, for the purposes of expanding the prior sample. Important: This is not good practice for getting science results! Returns ------- asy_result : Dict A dictionary of the modeID DataFrame and the summary DataFrame. """ self.developer_mode = developer_mode if method not in ['emcee', 'cpnest']: warnings.warn(f'Method {method} not found: Using method emcee') method = 'emcee' if method == 'emcee': self.fit = pb.mcmc(np.median(self.start_samples, axis=0), self.likelihood, self.prior) self.fit(start_samples=self.start_samples) elif method == 'cpnest': bounds = [[self.prior_data[key].min(), self.prior_data[key].max()] for key in self.par_names] self.fit = pb.nested(self.par_names, bounds, self.likelihood, self.prior, self.path) self.fit() self.modeID = self.get_modeIDs(self.fit, self.norders) self.summary = self._get_summary_stats(self.fit) self.samples = self.fit.flatchain self.acceptance = self.fit.acceptance return {'modeID': self.modeID, 'summary': self.summary}
[docs] def prior(self, p): """ Calculates the log prior Evaluates the KDE for the parameters p. Additional hard/soft priors can be added here as needed to, e.g., apply boundaries to the fit. Hard constraints should be applied at the top so function exits early, if necessary. Parameters ---------- p : ndarray Array of model parameters. Returns ------- lp : float The log likelihood evaluated at p. """ # d02/dnu < 0.2 (np.log10(0.2) ~ -0.7) if p[3] - p[0] > -0.7: return -np.inf lp = 0 # Added linewidth constraints if (p[7] > self.start[7] + np.log10(1.5)): lp += normal(10**p[7], 10**self.start[7]*1.5, 10**self.start[7]*0.1) # Constraints from KDE lp += np.log(self.kde.pdf(p)) return lp
[docs] def likelihood(self, p): """ Likelihood function for set of model parameters Evaluates the likelihood function for a set of model parameters. This includes the constraint from the observed variables. The code now includes a penalty to limit very large linewidth examples. The penalty is very basic and in some cases where the true linewidth is much larger than the linewidth in the prior it will become informative. For the most part this should not be a problem because if you care about linewidths you should be using the outpout from the peakbag model. Parameters ---------- p : ndarray Array of model parameters Returns ------- lnlike : float The log likelihood evaluated at p. """ lnlike = 0 # Constraint from input obs if self.developer_mode: lnlike += normal(p[0], *self._log_obs['dnu']) lnlike += normal(p[1], *self._log_obs['numax']) lnlike += normal(p[-2], *self._log_obs['teff']) lnlike += normal(p[-1], *self._obs['bp_rp']) # Constraint from the periodogram mod = self.model(p) lnlike += -np.sum(np.log(mod) + self.s[self.sel] / mod) return lnlike
def _get_summary_stats(self, fit): """ Make dataframe with fit summary statistics Creates a dataframe that contains various quantities that summarize the fit. Note, these are predominantly derived from the marginalized posteriors. Parameters ---------- fit : mcmc.mcmc class instance mcmc class instances used in the fit Returns ------- summary : pandas.DataFrame Dataframe with the summary statistics. """ fc = fit.flatchain # Append here to add other statistics stats = OrderedDict({'mean' : np.mean(fc, axis = 0), 'std' : np.std(fc, axis = 0), 'skew' : scist.skew(fc, axis = 0), '2nd' : np.percentile(fc, 2.27501, axis=0), '16th' : np.percentile(fc, 15.86552, axis=0), '50th' : np.percentile(fc, 50., axis=0), '84th' : np.percentile(fc, 84.13447, axis=0), '97th' : np.percentile(fc, 97.72498, axis=0), 'MAD' : scist.median_absolute_deviation(fc, axis=0)}) summary = pd.DataFrame(stats, index = self.par_names) return summary
[docs] def get_modeIDs(self, fit, norders): """ Set mode ID in a dataframe Evaluates the asymptotic relation for each walker position from the `emcee' sampling. The median values of the resulting set of frequencies are then returned in a pandas DataFrame Parameters ---------- fit : mcmc.mcmc class instance mcmc class instances used in the fit norders : int Number of radial orders to output. Note that doesn't have to be the same as that used int he fit itself. Returns ------- modeID : pandas.DataFrame Dataframe of radial order, n (best guess), angular degree, l, frequency and frequency error. """ fc = fit.flatchain nu0_samps = self._asymptotic_relation(10**fc[:, 1], 10**fc[:, 0], fc[:, 2], 10**fc[:, 4], norders) nu2_samps = nu0_samps - 10**fc[:, 3] nus_med = np.median(np.array([nu0_samps, nu2_samps]), axis=2) nus_mad = scist.median_absolute_deviation(np.array([nu0_samps, nu2_samps]), axis=2) ells = np.array([2, 0]*norders) modeID = pd.DataFrame({'ell': ells, 'nu_med': np.zeros(len(ells)), 'nu_mad': np.zeros(len(ells))}) modeID.nu_med.values[::2] = nus_med[1, :] modeID.nu_med.values[1::2] = nus_med[0, :] modeID.nu_mad.values[::2] = nus_mad[1, :] modeID.nu_mad.values[1::2] = nus_mad[0, :] return modeID
def _get_asy_start(self): """ Get start averages for sampling start locations """ mu = np.median(self.start_samples, axis=0) start = [10**mu[0], 10**mu[1], mu[2], 10**mu[3], 10**mu[4], mu[5], mu[6], mu[7], 10**mu[8], mu[9]] return start def _get_freq_range(self): """ Get frequency range around numax for model """ dnu, numax, eps = self.start[:3] nmax = self._get_nmax(dnu, numax, eps) enns = self._get_enns(nmax, self.norders) # The range is set +/- 25% of the upper and lower mode frequency # estimate lfreq = (min(enns) - 1.25 + eps) * dnu ufreq = (max(enns) + 1.25 + eps) * dnu return lfreq, ufreq
# def _start_init(self, verbose=False): # """ This is in pre-alpha # # Bodge a better starting point # """ # # like_start = np.ones(len(self.start_samples[:, 0])) # for idx, samp in enumerate(self.start_samples): # like_start[idx] = self.likelihood(samp) # if verbose: # print(f'Likelihood at the start : {np.max(like_start)}') # print(f'Start params from init : {self.start_samples[np.argmax(like_start), :]}')