diff --git a/bilby/gw/detector/interferometer.py b/bilby/gw/detector/interferometer.py
index ac0c14886..182624adb 100644
--- a/bilby/gw/detector/interferometer.py
+++ b/bilby/gw/detector/interferometer.py
@@ -639,16 +639,15 @@ def matched_filter_snr(self, signal):
     def whiten_frequency_series(self, frequency_series : np.array) -> np.array:
         """Whitens a frequency series with the noise properties of the detector
 
-        .. math::
-            \\tilde{a}_w(f) = \\tilde{a}(f) \\sqrt{\\frac{4}{T S_n(f)}}
-
-        Such that
+        Given a frequency series :math:`\\tilde{a}_w(f)`, whiten the data by applying
 
         .. math::
-            Var(n) = \\frac{1}{N} \\sum_{k=0}^N n_W(f_k)n_W^*(f_k) = 2
+            \\tilde{a}_w(f) = \\tilde{a}(f) \\sqrt{\\frac{4}{T S_n(f)}}\\,,`
 
-        Where the factor of two is due to the independent real and imaginary
-        components.
+        where :math:`S_n(f)` is the Power Spectral Density (PSD). If the PSD correctly
+        describes the properties of the data, the resulting whitened frequency series
+        will be whitened in the sense that the real and imaginary components will be
+        standard normal random variables.
 
         Parameters
         ==========
diff --git a/docs/index.txt b/docs/index.txt
index 3a02a207a..884ea99eb 100644
--- a/docs/index.txt
+++ b/docs/index.txt
@@ -35,12 +35,13 @@ Welcome to bilby's documentation!
 
 .. toctree::
    :maxdepth: 1
-   :caption: Examples:
+   :caption: Tutorials:
 
    making_priors
    compare_samplers
    fitting_with_x_and_y_errors
    visualising_the_results
+   power_spectral_density_estimation
 
 .. currentmodule:: bilby
 
diff --git a/examples/tutorials/power_spectral_density_estimation.ipynb b/examples/tutorials/power_spectral_density_estimation.ipynb
new file mode 100644
index 000000000..0d4e37a57
--- /dev/null
+++ b/examples/tutorials/power_spectral_density_estimation.ipynb
@@ -0,0 +1,715 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Power spectral density estimation methods\n",
+    "\n",
+    "This notebook introduces and compares in-built methods within `Bilby` for the estimation of the power spectal density (PSD) and discusses some of the theory that goes into estimating PSDs and evaluating their performance.\n",
+    "\n",
+    "## Standard imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"GWPY_RCPARAMS\"] = \"FALSE\"\n",
+    "\n",
+    "import warnings\n",
+    "warnings.filterwarnings(\"ignore\", \"Wswiglal-redir-stdio\")\n",
+    "\n",
+    "import bilby\n",
+    "from gwpy.timeseries import TimeSeries\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import scipy.special\n",
+    "import scipy.interpolate\n",
+    "import scipy.signal\n",
+    "\n",
+    "print(bilby.__version__)\n",
+    "\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Construction of helper functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def empirical_cdf(data):\n",
+    "    \"\"\" Compute the empirical cumulative distribution function (ECDF). \"\"\"\n",
+    "    sorted_data = np.sort(data)\n",
+    "    n = len(data)\n",
+    "    \n",
+    "    def ecdf(x):\n",
+    "        return np.searchsorted(sorted_data, x, side='right') / n\n",
+    "    \n",
+    "    return ecdf, sorted_data\n",
+    "\n",
+    "def anderson_darling_statistic(data):\n",
+    "    \"\"\" Compute the Anderson-Darling test statistic for normality. \"\"\"\n",
+    "    n = len(data)\n",
+    "    ecdf, sorted_data = empirical_cdf(data)\n",
+    "    \n",
+    "    # Transform data to standard normal quantiles\n",
+    "    mean = np.mean(data)\n",
+    "    std = np.std(data, ddof=1)  # Sample standard deviation\n",
+    "    standardized = (sorted_data - mean) / std\n",
+    "    \n",
+    "    # Compute theoretical normal CDF values\n",
+    "    normal_cdf = 0.5 * (1 + scipy.special.erf(standardized / np.sqrt(2)))  # Standard normal CDF\n",
+    "    \n",
+    "    # Compute Anderson-Darling test statistic\n",
+    "    s = np.sum((2 * np.arange(1, n + 1) - 1) * (np.log(normal_cdf) + np.log(1 - normal_cdf[::-1])))\n",
+    "    A2 = -n - s / n\n",
+    "    \n",
+    "    return A2\n",
+    "\n",
+    "def anderson_p_value(data, freqs=None, fmin=0, fmax=np.inf):\n",
+    "    \"\"\" Approximate the p-value for the Anderson-Darling test for normality. \"\"\"\n",
+    "   \n",
+    "    # If provided, cut the frequencies to a min/max value\n",
+    "    if freqs is not None:\n",
+    "        idxs = (freqs > fmin) & (freqs < fmax)\n",
+    "        data = data[idxs]\n",
+    "\n",
+    "    # Concatenate the real and imaginary parts together\n",
+    "    data = np.concatenate([data.real, data.imag]) \n",
+    "\n",
+    "    if len(data) == 0:\n",
+    "        return np.nan\n",
+    "        \n",
+    "    A2 = anderson_darling_statistic(data)\n",
+    "\n",
+    "    critical_values = [\n",
+    "        0.200, 0.300, 0.400, 0.500, 0.576, 0.656, 0.787, 0.918, \n",
+    "        1.092, 1.250, 1.500, 1.750, 2.000, 2.500, 3.000, 3.500, \n",
+    "        4.000, 4.500, 5.000, 6.000, 7.000, 8.000, 10.000\n",
+    "    ]\n",
+    "    \n",
+    "    significance_levels = [\n",
+    "        0.90, 0.85, 0.80, 0.75, 0.70, 0.60, 0.50, 0.40, \n",
+    "        0.30, 0.25, 0.20, 0.15, 0.10, 0.05, 0.01, 0.005, \n",
+    "        0.0025, 0.001, 0.0005, 0.0002, 0.0001, 0.00005, 0.00001\n",
+    "    ]\n",
+    "\n",
+    "    # Approximate p-value using interpolation\n",
+    "    if A2 < critical_values[0]:\n",
+    "        pval = significance_levels[0]\n",
+    "    elif A2 > critical_values[-1]:\n",
+    "        pval = significance_levels[-1]\n",
+    "    else:\n",
+    "        pval = np.interp(A2, critical_values, significance_levels)\n",
+    "\n",
+    "    return float(pval)\n",
+    "\n",
+    "def fbins_anderson_p_value(freq, data, bin_width_Hz=8, fmin=0, fmax=np.inf):\n",
+    "    \"\"\"\n",
+    "    Compute Anderson-Darling p-values for frequency bins.\n",
+    "    \n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    freq : array-like\n",
+    "        Frequency values.\n",
+    "    data : array-like\n",
+    "        Data corresponding to frequencies.\n",
+    "    bin_width_Hz : float, optional\n",
+    "        Width of frequency bins in Hz. Default is 8.\n",
+    "    fmin : float, optional\n",
+    "        Minimum frequency for analysis. Default is 0.\n",
+    "    fmax : float, optional\n",
+    "        Maximum frequency for analysis. Default is np.inf.\n",
+    "    \n",
+    "    Returns\n",
+    "    -------\n",
+    "    tuple\n",
+    "        A tuple containing frequency bins and corresponding p-values.\n",
+    "    \"\"\"\n",
+    "    bin_width = int(bin_width_Hz * duration)\n",
+    "    idxs = np.arange(0, len(data), bin_width)[:-1]\n",
+    "    pvals = [anderson_p_value(data[ii:ii+bin_width], freq[ii:ii+bin_width], fmin=fmin, fmax=fmax) for ii in idxs]\n",
+    "    fbins = [freq[ii + bin_width // 2] for ii in idxs]\n",
+    "    return fbins, pvals\n",
+    "\n",
+    "def get_window_like(data, roll_off=0.1):\n",
+    "    \"\"\"\n",
+    "    Generate a Tukey window with a specified roll-off.\n",
+    "    \n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    data : array-like\n",
+    "        Input data to determine window size.\n",
+    "    roll_off : float, optional\n",
+    "        Roll-off factor for the Tukey window. Default is 0.1.\n",
+    "    \n",
+    "    Returns\n",
+    "    -------\n",
+    "    array\n",
+    "        A Tukey window of the same length as the input data.\n",
+    "    \"\"\"\n",
+    "    psd_alpha = 2 * roll_off / duration\n",
+    "    return scipy.signal.get_window((\"tukey\", psd_alpha), len(data))\n",
+    "\n",
+    "def whiten(timeseries, asd, asd_frequencies, roll_off=0.1):\n",
+    "    \"\"\"\n",
+    "    Whiten a time series by dividing by the amplitude spectral density (ASD).\n",
+    "    \n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    timeseries : array-like\n",
+    "        The input time series data.\n",
+    "    asd : array-like\n",
+    "        The amplitude spectral density.\n",
+    "    asd_frequencies : array-like\n",
+    "        Frequencies corresponding to ASD values.\n",
+    "    roll_off : float, optional\n",
+    "        Roll-off factor for windowing. Default is 0.1.\n",
+    "    \n",
+    "    Returns\n",
+    "    -------\n",
+    "    tuple\n",
+    "        The original frequency series and the whitened frequency series.\n",
+    "    \"\"\"\n",
+    "    duration = timeseries.duration.value\n",
+    "    timeseries_windowed = timeseries * get_window_like(timeseries, roll_off)\n",
+    "    frequency_series = timeseries_windowed.fft()\n",
+    "    whitened_frequencyseries = frequency_series * np.sqrt(4 / duration) / asd\n",
+    "    return frequency_series, whitened_frequencyseries\n",
+    "\n",
+    "def plot_whitening(frequencies, asd, h_f, wh_f, label=None, bin_width_Hz=8):\n",
+    "    \"\"\"\n",
+    "    Plot whitening analysis, including the frequency domain signal, ASD, p-values, and histogram.\n",
+    "    \n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    frequencies : array-like\n",
+    "        Frequency values.\n",
+    "    asd : array-like\n",
+    "        Amplitude spectral density.\n",
+    "    h_f : array-like\n",
+    "        Frequency-domain representation of the original signal.\n",
+    "    wh_f : array-like\n",
+    "        Frequency-domain representation of the whitened signal.\n",
+    "    label : str, optional\n",
+    "        Label for the ASD plot. Default is None.\n",
+    "    bin_width_Hz : float, optional\n",
+    "        Width of frequency bins for p-value computation. Default is 8.\n",
+    "    \n",
+    "    Returns\n",
+    "    -------\n",
+    "    None\n",
+    "    \"\"\"\n",
+    "    fig, axes = plt.subplots(nrows=2, ncols=2, sharex=True, figsize=(8, 5))\n",
+    "    ax1, ax2 = axes[:, 0]\n",
+    "    gs = axes[1, 0].get_gridspec()\n",
+    "    axes[0, 1].remove()\n",
+    "    axes[1, 1].remove()\n",
+    "    ax3 = fig.add_subplot(gs[0:, 1])\n",
+    "\n",
+    "    ax1.semilogy(frequencies, np.abs(h_f), label=r\"$|\\tilde{h}(f)|$\")\n",
+    "    ax1.semilogy(frequencies, asd, label=label)\n",
+    "    ax1.set(ylabel=\"Strain data [Hz$^{-1/2}$]\")\n",
+    "    ax1.legend()\n",
+    "\n",
+    "    fbins, pvals = fbins_anderson_p_value(frequencies, wh_f, bin_width_Hz=bin_width_Hz)\n",
+    "    ax2.scatter(fbins, pvals, s=2)\n",
+    "    ax2.axhline(1e-2, color=\"r\")\n",
+    "    ax2.set(xlabel=\"Frequency [Hz]\", ylabel=\"$p$-value\", yscale=\"log\")\n",
+    "\n",
+    "    bin_width = 0.025\n",
+    "    bins = np.arange(0, 1 + bin_width, bin_width)\n",
+    "    ax3.hist(pvals, bins=bins)\n",
+    "    ax3.set(xlabel=\"p-values\", ylabel=\"Density\", xlim=(0, 1))\n",
+    "    fig.tight_layout()\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we define the `sampling_frequency` and `duration` of data that we will utilise throughout this notebook and construct a set of frequencies."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sampling_frequency = 4096\n",
+    "duration = 4\n",
+    "frequencies = np.fft.rfftfreq(sampling_frequency * duration, d=1/sampling_frequency)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that `frequencies` will have a length of `duration * sampling_frequency / 2 + 1` as it includes both the `0` Hz bin and the Nyquist frequency `sampling_frequency/2`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Getting started with a known PSD\n",
+    "\n",
+    "We will begin with a case where we *know* the PSD $S(f)$ a priori by simulating the data (or equivalently that we known the amplitude spectral density (ASD) $\\sqrt{S(f)}$). The idea here is to motivate the later sections where we are faced with real inteferometer data without a known PSD and must estimate its properties and evaluate its performance.\n",
+    "\n",
+    "First, let's use `bilby` to simulate some coloured Gaussian noise from a known PSD; this method simulates the noise in the frequency domain directly.\n",
+    "\n",
+    "We start by first constructing a known PSD using an estimate of O4 sensitivity curve. This is provided from a frequency of 10Hz, therefore we extrapolate down to 0Hz."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a bilby PSD object: the argument points to a named ASD file packaged as part of bilby\n",
+    "# This file can be replaced by a path to a specific ASD on file\n",
+    "psd = bilby.gw.detector.PowerSpectralDensity.from_amplitude_spectral_density_file(\"aLIGO_O4_high_asd.txt\")\n",
+    "\n",
+    "psd_array = psd.power_spectral_density_interpolated(frequencies)\n",
+    "psd_array[np.isinf(psd_array)] = psd.psd_array[0]\n",
+    "\n",
+    "fig, ax = plt.subplots()\n",
+    "ax.loglog(frequencies, psd_array)\n",
+    "ax.set(xlabel=\"Frequency [Hz]\", ylabel=\"PSD [Hz$^{-1}$]\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we construct another `bilby` `psd` object, but this time from the extrapolated PSD array. This ensures that when we simulate the data it will contain low-frequency noise."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "psd = bilby.gw.detector.PowerSpectralDensity.from_power_spectral_density_array(frequencies, psd_array)\n",
+    "\n",
+    "# We generate a random noise realization in the frequency domain h_f\n",
+    "h_f_sim, _ = psd.get_noise_realisation(sampling_frequency, duration)\n",
+    "\n",
+    "# Inverse FFT to get the timeseries\n",
+    "h_t_sim = np.fft.irfft(h_f_sim) \n",
+    "time = np.arange(0, duration, 1 / sampling_frequency)\n",
+    "\n",
+    "fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10, 4))\n",
+    "ax1.loglog(frequencies, np.abs(h_f_sim), label=r\"$|\\tilde{h}(f)|$\")\n",
+    "ax1.loglog(frequencies, psd.asd_array, label=r\"$\\sqrt{S(f)}$\")\n",
+    "ax1.set(xlabel=\"Frequency [Hz]\", ylabel=\"Strain data [Hz$^{-1/2}$]\")\n",
+    "ax1.legend()\n",
+    "\n",
+    "ax2.plot(time, h_t_sim)\n",
+    "ax2.set(xlabel=\"Time [s]\", ylabel=\"Strain []\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Note**: In the frequency domain the strain data (and ASD) have units of $\\textrm{Hz}^{-1/2}$ because we use a normalised Fourier transform:\n",
+    "$$ \\tilde{h}(f) = \\frac{1}{f_s} \\textrm{FFT}(h(t))\\, $$\n",
+    "see [Thrane & Talbot (2020)](https://arxiv.org/pdf/1809.02293) for a detailed discussion.\n",
+    "\n",
+    "### Whitening \n",
+    "\n",
+    "Whitening refers to a transformation applied to a timeseieres to make it uncorrelated and have unit variance. The goal is to remove dependencies between variables (decorrelation) and scale them uniformly. Specifically, given a PSD $S(f)$, the whitening filter in the frequency domain is proportional to $1/\\sqrt{S(f)}$, including the relevant normalisation factors the whitened data is given by\n",
+    "$$ \\tilde{h}_w(f) = \\tilde{h}(f) \\sqrt{\\frac{4}{T S(f)}}\\,$$ \n",
+    "where $T$ is the data duration.\n",
+    "\n",
+    "The end result is that, if the data is properly whitened, the real and imaginary parts of $\\tilde{h}_w(f)$ are standard normal variables. Let's test this out. Below we plot a histogram of the whitened data alongside the probability density function (PDF) of a standard normal distribution."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wh_f_sim = h_f_sim *  np.sqrt(4 / duration) / np.sqrt(psd.psd_array)\n",
+    "\n",
+    "fig, ax = plt.subplots()\n",
+    "\n",
+    "kwargs = dict(bins=\"auto\", alpha=0.5, density=True)\n",
+    "ax.hist(wh_f_sim.real, label=r\"$\\mathrm{Re}(\\tilde{h}_w(f))$\", **kwargs)\n",
+    "ax.hist(wh_f_sim.imag, label=r\"$\\mathrm{Im}(\\tilde{h}_w(f))$\", **kwargs)\n",
+    "\n",
+    "xs = np.linspace(-5, 5, 1000)\n",
+    "ax.plot(xs, np.exp(-xs**2 / 2) / np.sqrt(2 * np.pi), label=\"Standard normal PDF\")\n",
+    "ax.set(xlabel=\"Whitened strain\")\n",
+    "ax.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Testing normality\n",
+    "\n",
+    "Visually, the two histograms agree nicely with the PDF, but to quantify the agreement, we need a test of normality. The standard approach used in the field is the [Anderson-Darling](https://en.wikipedia.org/wiki/Anderson%E2%80%93Darling_test) test. There is a nice [scipy implementation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson.html), however the number of critical values it provides is limited therefore in the helper function we have implemented a more involved test. Note that this is equivalent to the $\\mathcal{N}(\\mu, \\sigma)$ test of [Gupta & Cornish (2023)](https://arxiv.org/html/2312.11808v1) in which the mean and variance are estimated from the data.\n",
+    "\n",
+    "Below, we apply our helper function to the whitened frequency domain data generated above. Note that internally, it combined the real and imaginary components together."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(anderson_p_value(wh_f_sim))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The resulting $p$-values indicate that the data is consistent with the null hypothesis (that the data is distributed normally). If the $p$-values are $\\ll 1$ this indicates inconsistency and suggests we can reject the null hypothesis.\n",
+    "\n",
+    "### Frequency-dependent tests of normality.\n",
+    "\n",
+    "Finally, following [Gupta & Cornish (2023)](https://arxiv.org/html/2312.11808v1), we can also apply the Anderson-Darling test to separate frequency-domain bins, enabling a study of the whitening across the spectrum.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_whitening(psd.frequency_array, psd.asd_array, h_f_sim, wh_f_sim, label=\"True ASD\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "From this figure, we see that across the spectrum, the $p$-value is always above 0.01 (we also mark the 0.01 threshold used by Gupta and Cornish for later reference.)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Estimating the PSD\n",
+    "In practise, we do not know the PSD of the real interferometric strain data, but instead we must estimate it. To demonstrate the various methods to do this, we will consider estimating a PSD for the Hanford (H1) data during O3b (note, you may find [this GWOSC tool](https://gwosc.org/timeline/show/O3b_4KHZ_R1/H1_DATA/1256655618/12708000/) useful to discover data availability.)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t0 = 1256790000   # A time when the Hanford is online during O3b\n",
+    "ifo = \"H1\"  # The detector name as used by `gwpy`\n",
+    "duration = 4   # The duration of data used for analysis\n",
+    "post_trigger_duration = 2  # The end time of the analysis data relative to the trigger time\n",
+    "\n",
+    "# We download a total of 132 seconds of data around the signal (128s before and then 4s of analysis data)\n",
+    "start = t0 + post_trigger_duration\n",
+    "end = t0 + post_trigger_duration - duration - 128\n",
+    "data = TimeSeries.fetch_open_data(ifo, start, end, cache=True)\n",
+    "\n",
+    "# We then create two sets of data\n",
+    "data_psd = data.crop(end=t0 + post_trigger_duration - duration)\n",
+    "data_analysis = data.crop(start=t0 + post_trigger_duration - duration, end=t0 + post_trigger_duration)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Off-source averaging\n",
+    "\n",
+    "The first method we will introduce is [Welch's method](https://en.wikipedia.org/wiki/Welch%27s_method) which averages Fourier transforms of off-source data. Specifically, we will use the `median` method including the bias correction, as introduced in [Allen et al. (2012)](https://arxiv.org/pdf/gr-qc/0509116). This method is implemented in [scipy.signal.welch](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.welch.html#scipy.signal.welch) when using the `method='median'` argument and we can access it directly from `gwpy.timeseries.TimeSeries.psd` [see docs](https://gwpy.github.io/docs/stable/api/gwpy.timeseries.TimeSeries/#gwpy.timeseries.TimeSeries.psd).\n",
+    "\n",
+    "Below we provide a function implementing this method. Of specific note, this implements the standard Tukey windowing used within Bilby - it is important that the same window is applied to the analysis data as when estimating the PSD."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def estimate_psd_offsource_averaging(\n",
+    "    psd_data,\n",
+    "    duration,\n",
+    "    psd_fractional_overlap,\n",
+    "    psd_method,\n",
+    "    roll_off,\n",
+    "):\n",
+    "    \"\"\" Estimate a Power Spectral Density (PSD) from averaging off-source strain data\n",
+    "\n",
+    "    Note: this function utilises [gwpy.timeseries.TimeSeries.psd](https://gwpy.github.io/docs/stable/api/gwpy.timeseries.TimeSeries/#gwpy.timeseries.TimeSeries.psd). It is recommended you read the documentation for that function for a detailed understanding of the implementation and options.\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    psd_data: gwpy.timeseries.TimeSeries\n",
+    "        A timeseries of the strain data to use for off-source averaging. Note: this method assumes\n",
+    "        the data has been truncated and does not include the signal.\n",
+    "    duration: float\n",
+    "        The duration (in seconds) to use for the PSD estimation (assumed to be identical to the\n",
+    "        duration of the analysis data).\n",
+    "    psd_fractional_overlap: float [0, 1]\n",
+    "        The fractional amount of overlap between neigbourning FFT estimates.\n",
+    "    psd_method: str\n",
+    "        See gwpy documentation\n",
+    "    roll_off: float [0, 1]\n",
+    "        MATCH WITH BILBY_PIPE\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "    frequencies, psd: array_like\n",
+    "        The frequencies and estimated PSD\n",
+    "\n",
+    "    \"\"\"\n",
+    "\n",
+    "    psd_alpha = 2 * roll_off / duration\n",
+    "    overlap = psd_fractional_overlap * duration\n",
+    "    window = (\"tukey\", psd_alpha)\n",
+    "\n",
+    "    psd = psd_data.psd(\n",
+    "        fftlength=duration,\n",
+    "        overlap=overlap,\n",
+    "        window=window,\n",
+    "        method=psd_method,\n",
+    "    )\n",
+    "    return psd.frequencies.value, psd.value"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And now let's apply the method to our `psd_data`. We choose to estimate a PSD for the full 128s of data and then another estimate for a shorter 64s of data for comparison."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kwargs = dict(\n",
+    "    duration=4,\n",
+    "    psd_fractional_overlap=0.5,\n",
+    "    roll_off=0.1,\n",
+    "    psd_method=\"median\"\n",
+    ")\n",
+    "\n",
+    "_, median_psd = estimate_psd_offsource_averaging(\n",
+    "    data_psd, **kwargs\n",
+    ")\n",
+    "median_asd = np.sqrt(median_psd)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And finally create a plot comparing the estimates"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots()\n",
+    "\n",
+    "kwargs = dict(lw=1, alpha=0.8)\n",
+    "ax.loglog(frequencies, median_asd, label=\"Median: 128s\", **kwargs)\n",
+    "ax.legend()\n",
+    "ax.set(ylim=(1e-24, 1e-19), xlabel='Frequency [Hz]', ylabel=\"PSD\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Okay, so now lets test how well these ASD estimates whiten the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kwargs = dict(bins=\"auto\", alpha=0.5, density=True)\n",
+    "fig, ax = plt.subplots()\n",
+    "\n",
+    "h_f_median, wh_f_median = whiten(data_analysis, median_asd, frequencies)\n",
+    "\n",
+    "\n",
+    "ax.hist(wh_f_median.real, label=r\"$\\mathrm{Re}(\\tilde{h}_w(f))$\", **kwargs)\n",
+    "ax.hist(wh_f_median.imag, label=r\"$\\mathrm{Im}(\\tilde{h}_w(f))$\", **kwargs)\n",
+    "ax.set_title(f\"p-value={anderson_p_value(wh_f_median):0.5f}\")\n",
+    "xs = np.linspace(-5, 5, 1000)\n",
+    "ax.plot(xs, np.exp(-xs**2 / 2) / np.sqrt(2 * np.pi))\n",
+    "ax.set(xlim=(-5, 5))\n",
+    "ax.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_whitening(frequencies, median_asd, h_f_median, wh_f_median, bin_width_Hz=8)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## MESA\n",
+    "\n",
+    "Next, we investigate the MESA approach as implemented in [memspectrum](https://pypi.org/project/memspectrum/) (see [Martini et al. (2024)](https://arxiv.org/pdf/2106.09499))."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip install memspectrum"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def estimate_psd_memspectrum(\n",
+    "    psd_data,\n",
+    "    frequencies,\n",
+    "    mesa_solve_kwargs=None,\n",
+    "):\n",
+    "    \"\"\" Estimate a Power Spectral Density (PSD) use Maximum Entropy Spectal Estimation (MESA)\n",
+    "\n",
+    "    Note: this function utilises [memspectrum](https://maximum-entropy-spectrum.readthedocs.io/en/latest/usage/overview.html).\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    psd_data: gwpy.timeseries.TimeSeries\n",
+    "        A timeseries of the strain data to use for MESA.\n",
+    "    frequencies: TBD\n",
+    "    mesa_solve_kwargs: dict (None)\n",
+    "        A dictionary of optional arguments to pass to [memspectrum.solve](https://maximum-entropy-spectrum.readthedocs.io/en/latest/package_reference/mesa.html#memspectrum.memspectrum.MESA.solve)\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "    frequencies, psd: array_like\n",
+    "        The frequencies and estimated PSD\n",
+    "\n",
+    "    \"\"\"\n",
+    "    import memspectrum\n",
+    "\n",
+    "    if mesa_solve_kwargs is None:\n",
+    "        mesa_solve_kwargs = dict()\n",
+    "\n",
+    "    mesa = memspectrum.MESA()\n",
+    "    mesa.solve(psd_data.value, **mesa_solve_kwargs)\n",
+    "    psd = mesa.spectrum(psd_data.dt.value, frequencies=frequencies)\n",
+    "    psd[-1] = np.inf\n",
+    "\n",
+    "    return psd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For MESA, we will use a stretch of data equal in length to the analysis data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_psd_MESA = data_psd.crop(start=data_analysis.t0 - 1 * data_analysis.duration)\n",
+    "print(f\"Data duration is {data_psd_MESA.duration}\")\n",
+    "mesa_psd = estimate_psd_memspectrum(data_psd_MESA, frequencies, mesa_solve_kwargs=dict(method=\"Standard\", verbose=False))\n",
+    "mesa_asd = np.sqrt(mesa_psd)\n",
+    "\n",
+    "fig, ax = plt.subplots()\n",
+    "kwargs = dict(lw=1, alpha=0.8)\n",
+    "ax.loglog(frequencies, mesa_asd, label=\"MESA: analysis data\", **kwargs)\n",
+    "ax.legend()\n",
+    "ax.set(ylim=(1e-24, 1e-19), xlabel='Frequency [Hz]', ylabel=\"PSD\")\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kwargs = dict(bins=np.linspace(-5, 5, 100), alpha=0.5, density=True)\n",
+    "fig, ax = plt.subplots()\n",
+    "\n",
+    "\n",
+    "h_f_mesa, wh_f_mesa = whiten(data_analysis, mesa_asd, frequencies)\n",
+    "\n",
+    "ax.hist(wh_f_mesa.real, label=r\"$\\mathrm{Re}(\\tilde{h}_w(f))$\", **kwargs)\n",
+    "ax.hist(wh_f_mesa.imag, label=r\"$\\mathrm{Im}(\\tilde{h}_w(f))$\", **kwargs)\n",
+    "ax.set_title(f\"p-value={anderson_p_value(wh_f_mesa):0.5f}\")\n",
+    "\n",
+    "xs = np.linspace(-5, 5, 1000)\n",
+    "ax.plot(xs, np.exp(-xs**2 / 2) / np.sqrt(2 * np.pi))\n",
+    "ax.set(xlim=(-5, 5))\n",
+    "ax.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_whitening(frequencies, mesa_asd, h_f_mesa, wh_f_mesa, bin_width_Hz=8)"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 2
+}