diff --git a/bilby/gw/detector/interferometer.py b/bilby/gw/detector/interferometer.py index ac0c14886..182624adb 100644 --- a/bilby/gw/detector/interferometer.py +++ b/bilby/gw/detector/interferometer.py @@ -639,16 +639,15 @@ def matched_filter_snr(self, signal): def whiten_frequency_series(self, frequency_series : np.array) -> np.array: """Whitens a frequency series with the noise properties of the detector - .. math:: - \\tilde{a}_w(f) = \\tilde{a}(f) \\sqrt{\\frac{4}{T S_n(f)}} - - Such that + Given a frequency series :math:`\\tilde{a}_w(f)`, whiten the data by applying .. math:: - Var(n) = \\frac{1}{N} \\sum_{k=0}^N n_W(f_k)n_W^*(f_k) = 2 + \\tilde{a}_w(f) = \\tilde{a}(f) \\sqrt{\\frac{4}{T S_n(f)}}\\,,` - Where the factor of two is due to the independent real and imaginary - components. + where :math:`S_n(f)` is the Power Spectral Density (PSD). If the PSD correctly + describes the properties of the data, the resulting whitened frequency series + will be whitened in the sense that the real and imaginary components will be + standard normal random variables. Parameters ========== diff --git a/docs/index.txt b/docs/index.txt index 3a02a207a..884ea99eb 100644 --- a/docs/index.txt +++ b/docs/index.txt @@ -35,12 +35,13 @@ Welcome to bilby's documentation! .. toctree:: :maxdepth: 1 - :caption: Examples: + :caption: Tutorials: making_priors compare_samplers fitting_with_x_and_y_errors visualising_the_results + power_spectral_density_estimation .. currentmodule:: bilby diff --git a/examples/tutorials/power_spectral_density_estimation.ipynb b/examples/tutorials/power_spectral_density_estimation.ipynb new file mode 100644 index 000000000..0d4e37a57 --- /dev/null +++ b/examples/tutorials/power_spectral_density_estimation.ipynb @@ -0,0 +1,715 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Power spectral density estimation methods\n", + "\n", + "This notebook introduces and compares in-built methods within `Bilby` for the estimation of the power spectal density (PSD) and discusses some of the theory that goes into estimating PSDs and evaluating their performance.\n", + "\n", + "## Standard imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"GWPY_RCPARAMS\"] = \"FALSE\"\n", + "\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\", \"Wswiglal-redir-stdio\")\n", + "\n", + "import bilby\n", + "from gwpy.timeseries import TimeSeries\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import scipy.special\n", + "import scipy.interpolate\n", + "import scipy.signal\n", + "\n", + "print(bilby.__version__)\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Construction of helper functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def empirical_cdf(data):\n", + " \"\"\" Compute the empirical cumulative distribution function (ECDF). \"\"\"\n", + " sorted_data = np.sort(data)\n", + " n = len(data)\n", + " \n", + " def ecdf(x):\n", + " return np.searchsorted(sorted_data, x, side='right') / n\n", + " \n", + " return ecdf, sorted_data\n", + "\n", + "def anderson_darling_statistic(data):\n", + " \"\"\" Compute the Anderson-Darling test statistic for normality. \"\"\"\n", + " n = len(data)\n", + " ecdf, sorted_data = empirical_cdf(data)\n", + " \n", + " # Transform data to standard normal quantiles\n", + " mean = np.mean(data)\n", + " std = np.std(data, ddof=1) # Sample standard deviation\n", + " standardized = (sorted_data - mean) / std\n", + " \n", + " # Compute theoretical normal CDF values\n", + " normal_cdf = 0.5 * (1 + scipy.special.erf(standardized / np.sqrt(2))) # Standard normal CDF\n", + " \n", + " # Compute Anderson-Darling test statistic\n", + " s = np.sum((2 * np.arange(1, n + 1) - 1) * (np.log(normal_cdf) + np.log(1 - normal_cdf[::-1])))\n", + " A2 = -n - s / n\n", + " \n", + " return A2\n", + "\n", + "def anderson_p_value(data, freqs=None, fmin=0, fmax=np.inf):\n", + " \"\"\" Approximate the p-value for the Anderson-Darling test for normality. \"\"\"\n", + " \n", + " # If provided, cut the frequencies to a min/max value\n", + " if freqs is not None:\n", + " idxs = (freqs > fmin) & (freqs < fmax)\n", + " data = data[idxs]\n", + "\n", + " # Concatenate the real and imaginary parts together\n", + " data = np.concatenate([data.real, data.imag]) \n", + "\n", + " if len(data) == 0:\n", + " return np.nan\n", + " \n", + " A2 = anderson_darling_statistic(data)\n", + "\n", + " critical_values = [\n", + " 0.200, 0.300, 0.400, 0.500, 0.576, 0.656, 0.787, 0.918, \n", + " 1.092, 1.250, 1.500, 1.750, 2.000, 2.500, 3.000, 3.500, \n", + " 4.000, 4.500, 5.000, 6.000, 7.000, 8.000, 10.000\n", + " ]\n", + " \n", + " significance_levels = [\n", + " 0.90, 0.85, 0.80, 0.75, 0.70, 0.60, 0.50, 0.40, \n", + " 0.30, 0.25, 0.20, 0.15, 0.10, 0.05, 0.01, 0.005, \n", + " 0.0025, 0.001, 0.0005, 0.0002, 0.0001, 0.00005, 0.00001\n", + " ]\n", + "\n", + " # Approximate p-value using interpolation\n", + " if A2 < critical_values[0]:\n", + " pval = significance_levels[0]\n", + " elif A2 > critical_values[-1]:\n", + " pval = significance_levels[-1]\n", + " else:\n", + " pval = np.interp(A2, critical_values, significance_levels)\n", + "\n", + " return float(pval)\n", + "\n", + "def fbins_anderson_p_value(freq, data, bin_width_Hz=8, fmin=0, fmax=np.inf):\n", + " \"\"\"\n", + " Compute Anderson-Darling p-values for frequency bins.\n", + " \n", + " Parameters\n", + " ----------\n", + " freq : array-like\n", + " Frequency values.\n", + " data : array-like\n", + " Data corresponding to frequencies.\n", + " bin_width_Hz : float, optional\n", + " Width of frequency bins in Hz. Default is 8.\n", + " fmin : float, optional\n", + " Minimum frequency for analysis. Default is 0.\n", + " fmax : float, optional\n", + " Maximum frequency for analysis. Default is np.inf.\n", + " \n", + " Returns\n", + " -------\n", + " tuple\n", + " A tuple containing frequency bins and corresponding p-values.\n", + " \"\"\"\n", + " bin_width = int(bin_width_Hz * duration)\n", + " idxs = np.arange(0, len(data), bin_width)[:-1]\n", + " pvals = [anderson_p_value(data[ii:ii+bin_width], freq[ii:ii+bin_width], fmin=fmin, fmax=fmax) for ii in idxs]\n", + " fbins = [freq[ii + bin_width // 2] for ii in idxs]\n", + " return fbins, pvals\n", + "\n", + "def get_window_like(data, roll_off=0.1):\n", + " \"\"\"\n", + " Generate a Tukey window with a specified roll-off.\n", + " \n", + " Parameters\n", + " ----------\n", + " data : array-like\n", + " Input data to determine window size.\n", + " roll_off : float, optional\n", + " Roll-off factor for the Tukey window. Default is 0.1.\n", + " \n", + " Returns\n", + " -------\n", + " array\n", + " A Tukey window of the same length as the input data.\n", + " \"\"\"\n", + " psd_alpha = 2 * roll_off / duration\n", + " return scipy.signal.get_window((\"tukey\", psd_alpha), len(data))\n", + "\n", + "def whiten(timeseries, asd, asd_frequencies, roll_off=0.1):\n", + " \"\"\"\n", + " Whiten a time series by dividing by the amplitude spectral density (ASD).\n", + " \n", + " Parameters\n", + " ----------\n", + " timeseries : array-like\n", + " The input time series data.\n", + " asd : array-like\n", + " The amplitude spectral density.\n", + " asd_frequencies : array-like\n", + " Frequencies corresponding to ASD values.\n", + " roll_off : float, optional\n", + " Roll-off factor for windowing. Default is 0.1.\n", + " \n", + " Returns\n", + " -------\n", + " tuple\n", + " The original frequency series and the whitened frequency series.\n", + " \"\"\"\n", + " duration = timeseries.duration.value\n", + " timeseries_windowed = timeseries * get_window_like(timeseries, roll_off)\n", + " frequency_series = timeseries_windowed.fft()\n", + " whitened_frequencyseries = frequency_series * np.sqrt(4 / duration) / asd\n", + " return frequency_series, whitened_frequencyseries\n", + "\n", + "def plot_whitening(frequencies, asd, h_f, wh_f, label=None, bin_width_Hz=8):\n", + " \"\"\"\n", + " Plot whitening analysis, including the frequency domain signal, ASD, p-values, and histogram.\n", + " \n", + " Parameters\n", + " ----------\n", + " frequencies : array-like\n", + " Frequency values.\n", + " asd : array-like\n", + " Amplitude spectral density.\n", + " h_f : array-like\n", + " Frequency-domain representation of the original signal.\n", + " wh_f : array-like\n", + " Frequency-domain representation of the whitened signal.\n", + " label : str, optional\n", + " Label for the ASD plot. Default is None.\n", + " bin_width_Hz : float, optional\n", + " Width of frequency bins for p-value computation. Default is 8.\n", + " \n", + " Returns\n", + " -------\n", + " None\n", + " \"\"\"\n", + " fig, axes = plt.subplots(nrows=2, ncols=2, sharex=True, figsize=(8, 5))\n", + " ax1, ax2 = axes[:, 0]\n", + " gs = axes[1, 0].get_gridspec()\n", + " axes[0, 1].remove()\n", + " axes[1, 1].remove()\n", + " ax3 = fig.add_subplot(gs[0:, 1])\n", + "\n", + " ax1.semilogy(frequencies, np.abs(h_f), label=r\"$|\\tilde{h}(f)|$\")\n", + " ax1.semilogy(frequencies, asd, label=label)\n", + " ax1.set(ylabel=\"Strain data [Hz$^{-1/2}$]\")\n", + " ax1.legend()\n", + "\n", + " fbins, pvals = fbins_anderson_p_value(frequencies, wh_f, bin_width_Hz=bin_width_Hz)\n", + " ax2.scatter(fbins, pvals, s=2)\n", + " ax2.axhline(1e-2, color=\"r\")\n", + " ax2.set(xlabel=\"Frequency [Hz]\", ylabel=\"$p$-value\", yscale=\"log\")\n", + "\n", + " bin_width = 0.025\n", + " bins = np.arange(0, 1 + bin_width, bin_width)\n", + " ax3.hist(pvals, bins=bins)\n", + " ax3.set(xlabel=\"p-values\", ylabel=\"Density\", xlim=(0, 1))\n", + " fig.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we define the `sampling_frequency` and `duration` of data that we will utilise throughout this notebook and construct a set of frequencies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sampling_frequency = 4096\n", + "duration = 4\n", + "frequencies = np.fft.rfftfreq(sampling_frequency * duration, d=1/sampling_frequency)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that `frequencies` will have a length of `duration * sampling_frequency / 2 + 1` as it includes both the `0` Hz bin and the Nyquist frequency `sampling_frequency/2`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting started with a known PSD\n", + "\n", + "We will begin with a case where we *know* the PSD $S(f)$ a priori by simulating the data (or equivalently that we known the amplitude spectral density (ASD) $\\sqrt{S(f)}$). The idea here is to motivate the later sections where we are faced with real inteferometer data without a known PSD and must estimate its properties and evaluate its performance.\n", + "\n", + "First, let's use `bilby` to simulate some coloured Gaussian noise from a known PSD; this method simulates the noise in the frequency domain directly.\n", + "\n", + "We start by first constructing a known PSD using an estimate of O4 sensitivity curve. This is provided from a frequency of 10Hz, therefore we extrapolate down to 0Hz." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a bilby PSD object: the argument points to a named ASD file packaged as part of bilby\n", + "# This file can be replaced by a path to a specific ASD on file\n", + "psd = bilby.gw.detector.PowerSpectralDensity.from_amplitude_spectral_density_file(\"aLIGO_O4_high_asd.txt\")\n", + "\n", + "psd_array = psd.power_spectral_density_interpolated(frequencies)\n", + "psd_array[np.isinf(psd_array)] = psd.psd_array[0]\n", + "\n", + "fig, ax = plt.subplots()\n", + "ax.loglog(frequencies, psd_array)\n", + "ax.set(xlabel=\"Frequency [Hz]\", ylabel=\"PSD [Hz$^{-1}$]\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we construct another `bilby` `psd` object, but this time from the extrapolated PSD array. This ensures that when we simulate the data it will contain low-frequency noise." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "psd = bilby.gw.detector.PowerSpectralDensity.from_power_spectral_density_array(frequencies, psd_array)\n", + "\n", + "# We generate a random noise realization in the frequency domain h_f\n", + "h_f_sim, _ = psd.get_noise_realisation(sampling_frequency, duration)\n", + "\n", + "# Inverse FFT to get the timeseries\n", + "h_t_sim = np.fft.irfft(h_f_sim) \n", + "time = np.arange(0, duration, 1 / sampling_frequency)\n", + "\n", + "fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10, 4))\n", + "ax1.loglog(frequencies, np.abs(h_f_sim), label=r\"$|\\tilde{h}(f)|$\")\n", + "ax1.loglog(frequencies, psd.asd_array, label=r\"$\\sqrt{S(f)}$\")\n", + "ax1.set(xlabel=\"Frequency [Hz]\", ylabel=\"Strain data [Hz$^{-1/2}$]\")\n", + "ax1.legend()\n", + "\n", + "ax2.plot(time, h_t_sim)\n", + "ax2.set(xlabel=\"Time [s]\", ylabel=\"Strain []\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note**: In the frequency domain the strain data (and ASD) have units of $\\textrm{Hz}^{-1/2}$ because we use a normalised Fourier transform:\n", + "$$ \\tilde{h}(f) = \\frac{1}{f_s} \\textrm{FFT}(h(t))\\, $$\n", + "see [Thrane & Talbot (2020)](https://arxiv.org/pdf/1809.02293) for a detailed discussion.\n", + "\n", + "### Whitening \n", + "\n", + "Whitening refers to a transformation applied to a timeseieres to make it uncorrelated and have unit variance. The goal is to remove dependencies between variables (decorrelation) and scale them uniformly. Specifically, given a PSD $S(f)$, the whitening filter in the frequency domain is proportional to $1/\\sqrt{S(f)}$, including the relevant normalisation factors the whitened data is given by\n", + "$$ \\tilde{h}_w(f) = \\tilde{h}(f) \\sqrt{\\frac{4}{T S(f)}}\\,$$ \n", + "where $T$ is the data duration.\n", + "\n", + "The end result is that, if the data is properly whitened, the real and imaginary parts of $\\tilde{h}_w(f)$ are standard normal variables. Let's test this out. Below we plot a histogram of the whitened data alongside the probability density function (PDF) of a standard normal distribution." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "wh_f_sim = h_f_sim * np.sqrt(4 / duration) / np.sqrt(psd.psd_array)\n", + "\n", + "fig, ax = plt.subplots()\n", + "\n", + "kwargs = dict(bins=\"auto\", alpha=0.5, density=True)\n", + "ax.hist(wh_f_sim.real, label=r\"$\\mathrm{Re}(\\tilde{h}_w(f))$\", **kwargs)\n", + "ax.hist(wh_f_sim.imag, label=r\"$\\mathrm{Im}(\\tilde{h}_w(f))$\", **kwargs)\n", + "\n", + "xs = np.linspace(-5, 5, 1000)\n", + "ax.plot(xs, np.exp(-xs**2 / 2) / np.sqrt(2 * np.pi), label=\"Standard normal PDF\")\n", + "ax.set(xlabel=\"Whitened strain\")\n", + "ax.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing normality\n", + "\n", + "Visually, the two histograms agree nicely with the PDF, but to quantify the agreement, we need a test of normality. The standard approach used in the field is the [Anderson-Darling](https://en.wikipedia.org/wiki/Anderson%E2%80%93Darling_test) test. There is a nice [scipy implementation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson.html), however the number of critical values it provides is limited therefore in the helper function we have implemented a more involved test. Note that this is equivalent to the $\\mathcal{N}(\\mu, \\sigma)$ test of [Gupta & Cornish (2023)](https://arxiv.org/html/2312.11808v1) in which the mean and variance are estimated from the data.\n", + "\n", + "Below, we apply our helper function to the whitened frequency domain data generated above. Note that internally, it combined the real and imaginary components together." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(anderson_p_value(wh_f_sim))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting $p$-values indicate that the data is consistent with the null hypothesis (that the data is distributed normally). If the $p$-values are $\\ll 1$ this indicates inconsistency and suggests we can reject the null hypothesis.\n", + "\n", + "### Frequency-dependent tests of normality.\n", + "\n", + "Finally, following [Gupta & Cornish (2023)](https://arxiv.org/html/2312.11808v1), we can also apply the Anderson-Darling test to separate frequency-domain bins, enabling a study of the whitening across the spectrum.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_whitening(psd.frequency_array, psd.asd_array, h_f_sim, wh_f_sim, label=\"True ASD\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "From this figure, we see that across the spectrum, the $p$-value is always above 0.01 (we also mark the 0.01 threshold used by Gupta and Cornish for later reference.)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Estimating the PSD\n", + "In practise, we do not know the PSD of the real interferometric strain data, but instead we must estimate it. To demonstrate the various methods to do this, we will consider estimating a PSD for the Hanford (H1) data during O3b (note, you may find [this GWOSC tool](https://gwosc.org/timeline/show/O3b_4KHZ_R1/H1_DATA/1256655618/12708000/) useful to discover data availability.)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t0 = 1256790000 # A time when the Hanford is online during O3b\n", + "ifo = \"H1\" # The detector name as used by `gwpy`\n", + "duration = 4 # The duration of data used for analysis\n", + "post_trigger_duration = 2 # The end time of the analysis data relative to the trigger time\n", + "\n", + "# We download a total of 132 seconds of data around the signal (128s before and then 4s of analysis data)\n", + "start = t0 + post_trigger_duration\n", + "end = t0 + post_trigger_duration - duration - 128\n", + "data = TimeSeries.fetch_open_data(ifo, start, end, cache=True)\n", + "\n", + "# We then create two sets of data\n", + "data_psd = data.crop(end=t0 + post_trigger_duration - duration)\n", + "data_analysis = data.crop(start=t0 + post_trigger_duration - duration, end=t0 + post_trigger_duration)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Off-source averaging\n", + "\n", + "The first method we will introduce is [Welch's method](https://en.wikipedia.org/wiki/Welch%27s_method) which averages Fourier transforms of off-source data. Specifically, we will use the `median` method including the bias correction, as introduced in [Allen et al. (2012)](https://arxiv.org/pdf/gr-qc/0509116). This method is implemented in [scipy.signal.welch](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.welch.html#scipy.signal.welch) when using the `method='median'` argument and we can access it directly from `gwpy.timeseries.TimeSeries.psd` [see docs](https://gwpy.github.io/docs/stable/api/gwpy.timeseries.TimeSeries/#gwpy.timeseries.TimeSeries.psd).\n", + "\n", + "Below we provide a function implementing this method. Of specific note, this implements the standard Tukey windowing used within Bilby - it is important that the same window is applied to the analysis data as when estimating the PSD." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def estimate_psd_offsource_averaging(\n", + " psd_data,\n", + " duration,\n", + " psd_fractional_overlap,\n", + " psd_method,\n", + " roll_off,\n", + "):\n", + " \"\"\" Estimate a Power Spectral Density (PSD) from averaging off-source strain data\n", + "\n", + " Note: this function utilises [gwpy.timeseries.TimeSeries.psd](https://gwpy.github.io/docs/stable/api/gwpy.timeseries.TimeSeries/#gwpy.timeseries.TimeSeries.psd). It is recommended you read the documentation for that function for a detailed understanding of the implementation and options.\n", + "\n", + " Parameters\n", + " ----------\n", + " psd_data: gwpy.timeseries.TimeSeries\n", + " A timeseries of the strain data to use for off-source averaging. Note: this method assumes\n", + " the data has been truncated and does not include the signal.\n", + " duration: float\n", + " The duration (in seconds) to use for the PSD estimation (assumed to be identical to the\n", + " duration of the analysis data).\n", + " psd_fractional_overlap: float [0, 1]\n", + " The fractional amount of overlap between neigbourning FFT estimates.\n", + " psd_method: str\n", + " See gwpy documentation\n", + " roll_off: float [0, 1]\n", + " MATCH WITH BILBY_PIPE\n", + "\n", + " Returns\n", + " -------\n", + " frequencies, psd: array_like\n", + " The frequencies and estimated PSD\n", + "\n", + " \"\"\"\n", + "\n", + " psd_alpha = 2 * roll_off / duration\n", + " overlap = psd_fractional_overlap * duration\n", + " window = (\"tukey\", psd_alpha)\n", + "\n", + " psd = psd_data.psd(\n", + " fftlength=duration,\n", + " overlap=overlap,\n", + " window=window,\n", + " method=psd_method,\n", + " )\n", + " return psd.frequencies.value, psd.value" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And now let's apply the method to our `psd_data`. We choose to estimate a PSD for the full 128s of data and then another estimate for a shorter 64s of data for comparison." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kwargs = dict(\n", + " duration=4,\n", + " psd_fractional_overlap=0.5,\n", + " roll_off=0.1,\n", + " psd_method=\"median\"\n", + ")\n", + "\n", + "_, median_psd = estimate_psd_offsource_averaging(\n", + " data_psd, **kwargs\n", + ")\n", + "median_asd = np.sqrt(median_psd)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And finally create a plot comparing the estimates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots()\n", + "\n", + "kwargs = dict(lw=1, alpha=0.8)\n", + "ax.loglog(frequencies, median_asd, label=\"Median: 128s\", **kwargs)\n", + "ax.legend()\n", + "ax.set(ylim=(1e-24, 1e-19), xlabel='Frequency [Hz]', ylabel=\"PSD\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Okay, so now lets test how well these ASD estimates whiten the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kwargs = dict(bins=\"auto\", alpha=0.5, density=True)\n", + "fig, ax = plt.subplots()\n", + "\n", + "h_f_median, wh_f_median = whiten(data_analysis, median_asd, frequencies)\n", + "\n", + "\n", + "ax.hist(wh_f_median.real, label=r\"$\\mathrm{Re}(\\tilde{h}_w(f))$\", **kwargs)\n", + "ax.hist(wh_f_median.imag, label=r\"$\\mathrm{Im}(\\tilde{h}_w(f))$\", **kwargs)\n", + "ax.set_title(f\"p-value={anderson_p_value(wh_f_median):0.5f}\")\n", + "xs = np.linspace(-5, 5, 1000)\n", + "ax.plot(xs, np.exp(-xs**2 / 2) / np.sqrt(2 * np.pi))\n", + "ax.set(xlim=(-5, 5))\n", + "ax.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_whitening(frequencies, median_asd, h_f_median, wh_f_median, bin_width_Hz=8)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## MESA\n", + "\n", + "Next, we investigate the MESA approach as implemented in [memspectrum](https://pypi.org/project/memspectrum/) (see [Martini et al. (2024)](https://arxiv.org/pdf/2106.09499))." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! pip install memspectrum" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def estimate_psd_memspectrum(\n", + " psd_data,\n", + " frequencies,\n", + " mesa_solve_kwargs=None,\n", + "):\n", + " \"\"\" Estimate a Power Spectral Density (PSD) use Maximum Entropy Spectal Estimation (MESA)\n", + "\n", + " Note: this function utilises [memspectrum](https://maximum-entropy-spectrum.readthedocs.io/en/latest/usage/overview.html).\n", + "\n", + " Parameters\n", + " ----------\n", + " psd_data: gwpy.timeseries.TimeSeries\n", + " A timeseries of the strain data to use for MESA.\n", + " frequencies: TBD\n", + " mesa_solve_kwargs: dict (None)\n", + " A dictionary of optional arguments to pass to [memspectrum.solve](https://maximum-entropy-spectrum.readthedocs.io/en/latest/package_reference/mesa.html#memspectrum.memspectrum.MESA.solve)\n", + "\n", + " Returns\n", + " -------\n", + " frequencies, psd: array_like\n", + " The frequencies and estimated PSD\n", + "\n", + " \"\"\"\n", + " import memspectrum\n", + "\n", + " if mesa_solve_kwargs is None:\n", + " mesa_solve_kwargs = dict()\n", + "\n", + " mesa = memspectrum.MESA()\n", + " mesa.solve(psd_data.value, **mesa_solve_kwargs)\n", + " psd = mesa.spectrum(psd_data.dt.value, frequencies=frequencies)\n", + " psd[-1] = np.inf\n", + "\n", + " return psd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For MESA, we will use a stretch of data equal in length to the analysis data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_psd_MESA = data_psd.crop(start=data_analysis.t0 - 1 * data_analysis.duration)\n", + "print(f\"Data duration is {data_psd_MESA.duration}\")\n", + "mesa_psd = estimate_psd_memspectrum(data_psd_MESA, frequencies, mesa_solve_kwargs=dict(method=\"Standard\", verbose=False))\n", + "mesa_asd = np.sqrt(mesa_psd)\n", + "\n", + "fig, ax = plt.subplots()\n", + "kwargs = dict(lw=1, alpha=0.8)\n", + "ax.loglog(frequencies, mesa_asd, label=\"MESA: analysis data\", **kwargs)\n", + "ax.legend()\n", + "ax.set(ylim=(1e-24, 1e-19), xlabel='Frequency [Hz]', ylabel=\"PSD\")\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kwargs = dict(bins=np.linspace(-5, 5, 100), alpha=0.5, density=True)\n", + "fig, ax = plt.subplots()\n", + "\n", + "\n", + "h_f_mesa, wh_f_mesa = whiten(data_analysis, mesa_asd, frequencies)\n", + "\n", + "ax.hist(wh_f_mesa.real, label=r\"$\\mathrm{Re}(\\tilde{h}_w(f))$\", **kwargs)\n", + "ax.hist(wh_f_mesa.imag, label=r\"$\\mathrm{Im}(\\tilde{h}_w(f))$\", **kwargs)\n", + "ax.set_title(f\"p-value={anderson_p_value(wh_f_mesa):0.5f}\")\n", + "\n", + "xs = np.linspace(-5, 5, 1000)\n", + "ax.plot(xs, np.exp(-xs**2 / 2) / np.sqrt(2 * np.pi))\n", + "ax.set(xlim=(-5, 5))\n", + "ax.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_whitening(frequencies, mesa_asd, h_f_mesa, wh_f_mesa, bin_width_Hz=8)" + ] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2 +}