From 0a6a4183a8fc7d28440dc6dd37efcb653587ae19 Mon Sep 17 00:00:00 2001 From: klapo Date: Fri, 16 Aug 2024 08:16:18 +0000 Subject: [PATCH] export tutorials changed in 82e6ee5 --- PyDMD.egg-info/PKG-INFO | 319 + PyDMD.egg-info/SOURCES.txt | 69 + PyDMD.egg-info/dependency_links.txt | 1 + PyDMD.egg-info/requires.txt | 16 + PyDMD.egg-info/top_level.txt | 1 + .../_tutorials/coststutorial_realdata.html | 9793 +++++++++ docs/source/_tutorials/tutorial2advdmd.html | 16853 ++++++++-------- .../tutorial20/costs-tutorial_real-data.py | 821 + 8 files changed, 19242 insertions(+), 8631 deletions(-) create mode 100644 PyDMD.egg-info/PKG-INFO create mode 100644 PyDMD.egg-info/SOURCES.txt create mode 100644 PyDMD.egg-info/dependency_links.txt create mode 100644 PyDMD.egg-info/requires.txt create mode 100644 PyDMD.egg-info/top_level.txt create mode 100644 docs/source/_tutorials/coststutorial_realdata.html create mode 100644 tutorials/tutorial20/costs-tutorial_real-data.py diff --git a/PyDMD.egg-info/PKG-INFO b/PyDMD.egg-info/PKG-INFO new file mode 100644 index 000000000..dfb3729d7 --- /dev/null +++ b/PyDMD.egg-info/PKG-INFO @@ -0,0 +1,319 @@ +Metadata-Version: 2.1 +Name: PyDMD +Version: 0.0.1 +Summary: Python Dynamic Mode Decomposition. +Author: Nicola Demo, Marco Tezzele, Francesco Andreuzzi, Sara Ichinaga, Karl Lapo +Maintainer-email: pydmd.info@gmail.com +License: The MIT License (MIT) + + Copyright (c) 2017-2024 PyDMD contributors + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + +Project-URL: Homepage, https://pydmd.github.io/PyDMD +Project-URL: Documentation, https://pydmd.github.io/PyDMD/code.html +Project-URL: Issues, https://github.com/PyDMD/PyDMD/issues +Project-URL: Repository, https://github.com/PyDMD/PyDMD +Keywords: dynamic-mode-decomposition,dmd +Classifier: Development Status :: 5 - Production/Stable +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Intended Audience :: Science/Research +Classifier: Topic :: Scientific/Engineering :: Mathematics +Requires-Python: >=3.8 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: numpy<2 +Requires-Dist: scipy>=1.6.0 +Requires-Dist: matplotlib +Requires-Dist: scikit-learn +Requires-Dist: xarray +Requires-Dist: h5netcdf +Provides-Extra: test +Requires-Dist: pytest; extra == "test" +Requires-Dist: pytest-cov; extra == "test" +Requires-Dist: pytest-mock; extra == "test" +Requires-Dist: ezyrb>=v1.2.1.post2205; extra == "test" +Provides-Extra: docs +Requires-Dist: sphinx>=1.4; extra == "docs" +Requires-Dist: sphinx_rtd_theme; extra == "docs" + +

+ + Python Dynamic Mode Decomposition + +

+

+ + Docs + + + PyPI version + + + Python versions + + + Software License + +
+ + CI Status + + + + + + Codacy Badge + + + black code style + +
+ + All Contributors + + + GitHub Repo stars + + + PyPI downloads per month + + + JOSS DOI + +

+ +## Table of contents +* [Description](#description) +* [Dependencies and installation](#dependencies-and-installation) +* [Quickstart guide](#quickstart-guide) +* [Awards](#awards) +* [Citing PyDMD](#citing-pydmd) +* [References](#references) +* [Developers and contributors](#developers-and-contributors) +* [Funding](#funding) +* [Affiliations](#affiliations) + +## Description + +**PyDMD** is a Python package designed for **Dynamic Mode Decomposition (DMD)**, a data-driven method used for analyzing and extracting spatiotemporal coherent structures from time-varying datasets. It provides a comprehensive and user-friendly interface for performing DMD analysis, making it a valuable tool for researchers, engineers, and data scientists working in various fields. + +With PyDMD, users can easily decompose complex, high-dimensional datasets into a set of coherent spatial and temporal modes, capturing the underlying dynamics and extracting important features. The package implements both standard DMD algorithms and advanced variations, enabling users to choose the most suitable method for their specific needs. These extensions allow to deal with noisy data, big dataset, control variables, or to impose physical structures. + +PyDMD offers a seamless integration with the scientific Python ecosystem, leveraging popular libraries such as NumPy and SciPy for efficient numerical computations and data manipulation. It also offers a variety of visualization tools, including mode reconstruction, energy spectrum analysis, and time evolution plotting. These capabilities enable users to gain insights into the dominant modes of the system, identify significant features, and understand the temporal evolution of the dynamics. + +PyDMD promotes ease of use and customization, providing a well-documented API with intuitive function names and clear parameter descriptions. The package is actively maintained and updated, ensuring compatibility with the latest Python versions and incorporating user feedback to improve functionality and performance. We provide many tutorials showing the characteristics of the software. See the [**Examples**](#examples-and-tutorials) section below and the [**Tutorials**](tutorials/README.md) to have an idea of the potential of this package. Also see the diagram below for a summary of all available tools and functionalities. Currently in-progress contributions are represented by semi-transparent boxes. + +

+ +

+ +## Dependencies and installation + +### Installing via PIP +PyDMD is available on [PyPI](https://pypi.org/project/pydmd), therefore you can install the latest released version with: +```bash +> pip install pydmd +``` + +### Installing from source +To install the bleeding edge version, clone this repository with: +```bash +> git clone https://github.com/PyDMD/PyDMD +``` + +and then install the package in [development mode](https://setuptools.pypa.io/en/latest/userguide/development_mode.html): +```bash +> pip install -e . +``` + +### Dependencies +The core features of **PyDMD** depend on `numpy` and `scipy`. In order to use the plotting functionalities you will also need `matplotlib`. + +## Quickstart Guide +To perform DMD, simply begin by initializing a PyDMD module that implements your DMD method of choice. Models may then be fitted by calling the `fit()` method and passing in the necessary data. This step performs the DMD algorithm, after which users may use PyDMD plotting tools in order to visualize their results. + +```python3 +from pydmd import DMD +from pydmd.plotter import plot_summary + +# Build an exact DMD model with 12 spatiotemporal modes. +dmd = DMD(svd_rank=12) + +# Fit the DMD model. +# X = (n, m) numpy array of time-varying snapshot data. +dmd.fit(X) + +# Plot a summary of the key spatiotemporal modes. +plot_summary(dmd) +``` + +PyDMD modules can also be wrapped with data preprocessors if desired. These wrappers will preprocess the data and postprocess data reconstructions automatically. +```python3 +from pydmd import DMD +from pydmd.preprocessing import zero_mean_preprocessing + +# Build and fit an exact DMD model with data centering. +centered_dmd = zero_mean_preprocessing(DMD(svd_rank=12)) +centered_dmd.fit(X) +``` + +Users may also build highly complex DMD models with PyDMD. Below is an example of how one might build and fit a customized Optimized DMD model with bagging, eigenvalue constraints, and custom variable projection arguments. +```python3 +from pydmd import BOPDMD + +# Build a Bagging, Optimized DMD (BOP-DMD) model. +# For Optimized DMD (without bagging), use BOPDMD(svd_rank=12, num_trials=0). +bopdmd = BOPDMD( + svd_rank=12, # Rank of the DMD fit. + num_trials=100, # Number of bagging trials to perform. + trial_size=0.5, # Use 50% of the total number of snapshots per trial. + eig_constraints={"imag", "conjugate_pairs"}, # Eigenvalues must be imaginary and conjugate pairs. + varpro_opts_dict={"tol":0.2, "verbose":True}, # Set convergence tolerance and use verbose updates. +) + +# Fit the BOP-DMD model. +# X = (n, m) numpy array of time-varying snapshot data +# t = (m,) numpy array of times of data collection +bopdmd.fit(X, t) +``` + +PyDMD modules and functions may be parameterized by a variety of inputs for added customization, so we generally recommend that new users refer to our [documentation](https://pydmd.github.io/PyDMD/code.html), as well as to our module-specific [tutorials](tutorials/README.md) for more examples and information. + +Also provided below is an example call to the `plot_summary()` function when given a DMD model fitted to mean-centered flow past a cylinder data available at dmdbook.com/DATA.zip. A rank-12 exact DMD model was used to generate this figure. Eigenvalues, modes, and dynamics are color-coded to indicate associations. Eigenvalue marker sizes also indicate spatiotemporal mode amplitudes or importance. + +Plotting tool documentation can be found [here](https://pydmd.github.io/PyDMD/plotter.html). +```python3 +from pydmd.plotter import plot_summary + +plot_summary( + dmd, # <-- Fitted PyDMD model. Can be DMD, BOPDMD, etc. + figsize=(12, 7), + index_modes=(0, 2, 4), + snapshots_shape=(449, 199), + order="F", + mode_cmap="seismic", + dynamics_color="k", + flip_continuous_axes=True, + max_sval_plot=30, +) +``` +

+
+ Sample output of the plot_summary function. +

+ +For users who are unsure of which DMD method is best for them, we provide the following flow chart, which outlines how one might choose an appropriate DMD variant based on specific problem types or data sets. + +

+ +

+ +## Awards + +First prize winner in **DSWeb 2019 Contest** _Tutorials on Dynamical Systems Software_ (Junior Faculty Category). You can read the winner tutorial (PDF format) in the [tutorials](tutorials/tutorial_dsweb.pdf) folder. + +## Citing PyDMD +When citing PyDMD, please cite both of the following references: +* Demo, Tezzele, Rozza. *PyDMD: Python Dynamic Mode Decomposition*. Journal of Open Source Software, 2018. [[DOI](https://doi.org/10.21105/joss.00530)][[bibitem](readme/refs/Demo2018)] +* Ichinaga, Andreuzzi, Demo, Tezzele, Lapo, Rozza, Brunton, Kutz. *PyDMD: A Python package for robust dynamic mode decomposition*. arXiv preprint, 2024. [[arXiv](https://doi.org/10.48550/arXiv.2402.07463)] + +## References +To implement the various versions of the DMD algorithm we follow these works: + +### General DMD References +* Kutz, Brunton, Brunton, Proctor. *Dynamic Mode Decomposition: Data-Driven Modeling of Complex Systems*. SIAM Other Titles in Applied Mathematics, 2016. [[DOI](https://doi.org/10.1137/1.9781611974508)] [[bibitem](readme/refs/Kutz2016_1.bib)]. +* Schmid. *Dynamic mode decomposition of numerical and experimental data*. Journal of Fluid Mechanics, 2010. [[DOI](https://doi.org/10.1017/S0022112010001217)][[bibitem](readme/refs/Schmid2010)] +* Tu, Rowley, Luchtenburg, Brunton, Kutz. *On Dynamic Mode Decomposition: Theory and Applications*. Journal of Computational Dynamics, 2014. [[DOI](https://doi.org/10.3934/jcd.2014.1.391)][[bibitem](readme/refs/Tu2014.bib)] +* Schmid. *Dynamic mode decomposition and its variants*. Annual Review of Fluid Mechanics, 2022. [[DOI](https://doi.org/10.1146/annurev-fluid-030121-015835)][[bibitem](readme/refs/Schmid2022)] + +### DMD Variants: Noise-robust Methods +* **Forward-backward DMD:** Dawson, Hemati, Williams, Rowley. *Characterizing and correcting for the effect of sensor noise in the dynamic mode decomposition*. Experiments in Fluids, 2016. [[DOI](https://doi.org/10.1007/s00348-016-2127-7)] [[bibitem](readme/refs/Dawson2016.bib)]. +* **Total least-squares DMD:** Hemati, Rowley, Deem, Cattafesta. *De-biasing the dynamic mode decomposition for applied Koopman spectral analysis of noisy datasets*. Theoretical and Computational Fluid Dynamics, 2017. [[DOI](https://doi.org/10.1007/s00162-017-0432-2)] [[bibitem](readme/refs/Hemati2017.bib)]. +* **Optimal closed-form DMD:** Héas, Herzet. *Low-rank dynamic mode decomposition: An exact and tractable solution*. Journal of Nonlinear Science, 2022. [[DOI](https://doi.org/10.1007/s00332-021-09770-w)] [[bibitem](readme/refs/Heas2022.bib)]. +* **Subspace DMD:** Takeishi, Kawahara, Yairi. *Subspace dynamic mode decomposition for stochastic Koopman analysis*. Physical Review E, 2017. [[DOI](https://doi.org/10.1103/PhysRevE.96.033310)] [[bibitem](readme/refs/Takeishi2017.bib)]. +* **Physics-informed DMD:** Baddoo, Herrmann, McKeon, Kutz, Brunton. *Physics-informed dynamic mode decomposition*. Proceedings of the Royal Society A, 2023. [[DOI](https://doi.org/10.1098/rspa.2022.0576)] [[bibitem](readme/refs/Baddoo2023.bib)]. +* **Optimized DMD:** Askham, Kutz. *Variable projection methods for an optimized dynamic mode decomposition*. SIAM Journal on Applied Dynamical Systems, 2018. [[DOI](https://doi.org/10.1137/M1124176)] [[bibitem](readme/refs/Askham2018.bib)]. +* **Bagging, optimized DMD:** Sashidhar, Kutz. *Bagging, optimized dynamic mode decomposition for robust, stable forecasting with spatial and temporal uncertainty quantification*. Proceedings of the Royal Society A, 2022. [[DOI](https://doi.org/10.1098/rsta.2021.0199)] [[bibitem](readme/refs/Sashidhar2022.bib)]. + +### DMD Variants: Additional Methods and Extensions +* **DMD with Control:** Proctor, Brunton, Kutz. *Dynamic mode decomposition with control*. SIAM Journal on Applied Dynamical Systems, 2016. [[DOI](https://doi.org/10.1137/15M1013857)] [[bibitem](readme/refs/Proctor2016.bib)]. +* **Multiresolution DMD:** Kutz, Fu, Brunton. *Multiresolution dynamic mode decomposition*. SIAM Journal on Applied Dynamical Systems, 2016. [[DOI](https://doi.org/10.1137/15M1023543)] [[bibitem](readme/refs/Kutz2016_2.bib)]. +* **Sparsity-promoting DMD:** Jovanović, Schmid, Nichols *Sparsity-promoting dynamic mode decomposition*. Physics of Fluids, 2014. [[DOI](https://doi.org/10.1063/1.4863670)] [[bibitem](readme/refs/Jovanovic2014.bib)]. +* **Compressed DMD:** Erichson, Brunton, Kutz. *Compressed dynamic mode decomposition for background modeling*. Journal of Real-Time Image Processing, 2016. [[DOI](https://doi.org/10.1007/s11554-016-0655-2)] [[bibitem](readme/refs/Erichson2016.bib)]. +* **Randomized DMD:** Erichson, Mathelin, Kutz, Brunton. *Randomized dynamic mode decomposition*. SIAM Journal on Applied Dynamical Systems, 2019. [[DOI](https://doi.org/10.1137/18M1215013)] [[bibitem](readme/refs/Erichson2019.bib)]. +* **Higher Order DMD:** Le Clainche, Vega. *Higher order dynamic mode decomposition*. Journal on Applied Dynamical Systems, 2017. [[DOI](https://doi.org/10.1137/15M1054924)] [[bibitem](readme/refs/LeClainche2017.bib)]. +* **HAVOK:** Brunton, Brunton, Proctor, Kaiser, Kutz. *Chaos as an intermittently forced linear system*. Nature Communications, 2017. [[DOI](https://doi.org/10.1038/s41467-017-00030-8)] [[bibitem](readme/refs/Brunton2017.bib)]. +* **Parametric DMD:** Andreuzzi, Demo, Rozza. *A dynamic mode decomposition extension for the forecasting of parametric dynamical systems*. SIAM Journal on Applied Dynamical Systems, 2023. [[DOI](https://doi.org/10.1137/22M1481658)] [[bibitem](readme/refs/Andreuzzi2021.bib)]. +* **Extended DMD:** Williams, Rowley, Kevrekidis. *A kernel-based method for data-driven koopman spectral analysis*. Journal of Computational Dynamics, 2015. [[DOI](https://doi.org/10.3934/jcd.2015005)] [[bibitem](readme/refs/Williams2015.bib)]. +* **LANDO:** Baddoo, Herrmann, McKeon, Brunton. *Kernel learning for robust dynamic mode decomposition: linear and nonlinear disambiguation optimization*. Proceedings of the Royal Society A, 2022. [[DOI](https://doi.org/10.1098/rspa.2021.0830)] [[bibitem](readme/refs/Baddoo2022.bib)]. +* **DMD with Centering:** Hirsh, Harris, Kutz, Brunton. *Centering data improves the dynamic mode decomposition*. SIAM Journal on Applied Dynamical Systems, 2020. [[DOI](https://doi.org/10.1137/19M1289881)] [[bibitem](readme/refs/Hirsh2020.bib)] + +### General Implementation Tools +* Gavish, Donoho. *The optimal hard threshold for singular values is 4/sqrt(3)*. IEEE Transactions on Information Theory, 2014. [[DOI](https://doi.org/10.1109/TIT.2014.2323359)] [[bibitem](readme/refs/Gavish2014.bib)]. +* Matsumoto, Indinger. *On-the-fly algorithm for dynamic mode decomposition using incremental singular value decomposition and total least squares*. 2017. [[arXiv](https://arxiv.org/abs/1703.11004)] [[bibitem](readme/refs/Matsumoto2017.bib)]. + +### Recent works using PyDMD +You can find a list of the scientific works using **PyDMD** [here](https://scholar.google.com/scholar?oi=bibs&hl=en&cites=5544023489671534143). + +## Developers and contributors +The main developers are +

+ +

+ +We warmly thank all the contributors that have supported PyDMD! + +Do you want to join the team? Read the [Contributing guidelines](.github/CONTRIBUTING.md) and the [Tutorials for Developers](tutorials#tutorials-for-developers) before starting to play! + + + + + +Made with [contrib.rocks](https://contrib.rocks). + +### Testing +We use `pytest` to run our unit tests. You can run the whole test suite by using the following command in the base directory of the repository: +```bash +pytest +``` + +## Funding +A significant part of PyDMD has been written either as a by-product for other projects people were funded for, or by people on university-funded positions. There are probably many of such projects that have led to some development of PyDMD. We are very grateful for this support! + +Beyond this, PyDMD has also been supported by some dedicated projects that have allowed us to work on extensions, documentation, training and dissemination that would otherwise not have been possible. In particular, we acknowledge the following sources of support with great gratitude: + +* [H2020 ERC CoG 2015 AROMA-CFD project 681447](https://people.sissa.it/~grozza/aroma-cfd/), P.I. Professor [Gianluigi Rozza](https://people.sissa.it/~grozza) at [SISSA mathLab](https://mathlab.sissa.it/). +* FSE HEaD project [Bulbous Bow Shape Optimization through Reduced Order Modelling](https://mathlab.sissa.it/project/ottimizzazione-di-forme-prodiere-e-poppiere-di-carena-mediante-luso-di-algoritmi-parametrici), FVG, Italy. +

+ +

+ +## Affiliations +

+ + + +

diff --git a/PyDMD.egg-info/SOURCES.txt b/PyDMD.egg-info/SOURCES.txt new file mode 100644 index 000000000..d8cb9b49c --- /dev/null +++ b/PyDMD.egg-info/SOURCES.txt @@ -0,0 +1,69 @@ +LICENSE +README.md +pyproject.toml +PyDMD.egg-info/PKG-INFO +PyDMD.egg-info/SOURCES.txt +PyDMD.egg-info/dependency_links.txt +PyDMD.egg-info/requires.txt +PyDMD.egg-info/top_level.txt +pydmd/__init__.py +pydmd/bopdmd.py +pydmd/cdmd.py +pydmd/costs.py +pydmd/dmd.py +pydmd/dmd_modes_tuner.py +pydmd/dmdbase.py +pydmd/dmdc.py +pydmd/dmdoperator.py +pydmd/edmd.py +pydmd/fbdmd.py +pydmd/hankeldmd.py +pydmd/havok.py +pydmd/hodmd.py +pydmd/lando.py +pydmd/mrcosts.py +pydmd/mrdmd.py +pydmd/optdmd.py +pydmd/paramdmd.py +pydmd/pidmd.py +pydmd/pidmd_utils.py +pydmd/plotter.py +pydmd/rdmd.py +pydmd/snapshots.py +pydmd/spdmd.py +pydmd/subspacedmd.py +pydmd/utils.py +pydmd/varprodmd.py +pydmd/preprocessing/__init__.py +pydmd/preprocessing/hankel.py +pydmd/preprocessing/pre_post_processing.py +pydmd/preprocessing/randomized.py +pydmd/preprocessing/svd_projection.py +pydmd/preprocessing/zero_mean.py +tests/test_bopdmd.py +tests/test_cdmd.py +tests/test_costs.py +tests/test_dmd.py +tests/test_dmd_modes_tuner.py +tests/test_dmdbase.py +tests/test_dmdc.py +tests/test_dmdoperator.py +tests/test_edmd.py +tests/test_fbdmd.py +tests/test_hankeldmd.py +tests/test_havok.py +tests/test_hodmd.py +tests/test_lando.py +tests/test_mrcosts.py +tests/test_mrdmd.py +tests/test_optdmd.py +tests/test_package.py +tests/test_paramdmd.py +tests/test_pidmd.py +tests/test_plotter.py +tests/test_rdmd.py +tests/test_snapshots.py +tests/test_spdmd.py +tests/test_subspacedmd.py +tests/test_varprodmd.py +tests/test_xyinput.py \ No newline at end of file diff --git a/PyDMD.egg-info/dependency_links.txt b/PyDMD.egg-info/dependency_links.txt new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/PyDMD.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/PyDMD.egg-info/requires.txt b/PyDMD.egg-info/requires.txt new file mode 100644 index 000000000..324dfcbc2 --- /dev/null +++ b/PyDMD.egg-info/requires.txt @@ -0,0 +1,16 @@ +numpy<2 +scipy>=1.6.0 +matplotlib +scikit-learn +xarray +h5netcdf + +[docs] +sphinx>=1.4 +sphinx_rtd_theme + +[test] +pytest +pytest-cov +pytest-mock +ezyrb>=v1.2.1.post2205 diff --git a/PyDMD.egg-info/top_level.txt b/PyDMD.egg-info/top_level.txt new file mode 100644 index 000000000..44d549980 --- /dev/null +++ b/PyDMD.egg-info/top_level.txt @@ -0,0 +1 @@ +pydmd diff --git a/docs/source/_tutorials/coststutorial_realdata.html b/docs/source/_tutorials/coststutorial_realdata.html new file mode 100644 index 000000000..7cd1f25ff --- /dev/null +++ b/docs/source/_tutorials/coststutorial_realdata.html @@ -0,0 +1,9793 @@ + + + + + +costs-tutorial_real-data + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + diff --git a/docs/source/_tutorials/tutorial2advdmd.html b/docs/source/_tutorials/tutorial2advdmd.html index 6085c79e9..3eada04fa 100644 --- a/docs/source/_tutorials/tutorial2advdmd.html +++ b/docs/source/_tutorials/tutorial2advdmd.html @@ -7616,7 +7616,7 @@

Tutorial 2: Adva @@ -7661,7 +7661,7 @@

Tutorial 2: Adva @@ -7703,7 +7703,7 @@

Tutorial 2: Adva @@ -7745,7 +7745,7 @@

Tutorial 2: Adva @@ -7838,7 +7838,7 @@

Tutorial 2: Adva diff --git a/tutorials/tutorial20/costs-tutorial_real-data.py b/tutorials/tutorial20/costs-tutorial_real-data.py new file mode 100644 index 000000000..8231efe5c --- /dev/null +++ b/tutorials/tutorial20/costs-tutorial_real-data.py @@ -0,0 +1,821 @@ +#!/usr/bin/env python +# coding: utf-8 + +# # Tutorial 20b: Multi-resolution Coherent Scale separation (mrCOSTS) using real data +# +# This tutorial focuses on the real-world application the mrCOSTS object for multi-resolution coherent scale separation. In the toy-data example, the data include perfect oscillators as well as data engineered to be separable. Real world data is not so amiable. We provide this example with real data to illustrate how to use the mrCOSTS method on noisy, messy data. +# +# The real data also allows us to introduce the reason why mrCOSTS/COSTS operate with a different data model than the rest of PyDMD. Namely, mrCOSTS has _decomposition levels_ and _frequency bands_. The frequency bands are then further subdivided into the concepts of a _local_ and _global_ frequency bands. +# +# Each decomposition level provides a decomposition of the data by fitting BOP-DMD to a sliding window with a given length. For each decomposition level some number of _local_ frequency bands are found (using the `COSTS` module). The discrete frequency bands are found using a k-means clustering algorithm, introducing a hyperparameter of `n_components`, which can be specified for each level or found through a hyperparameter search. In either case, a frequency band separation results in separating the coherent spatiotemporal features for each decomposition level. +# +# mrCOSTS operates the opposite of other decompositions, fitting the fastest frequency bands and moving to the slowest. After separating the local frequency bands all but the slowest frequency band are removed from the data. The slowest frequency band is then given to the next largest decomposition level (with a larger window) to fit. +# +# After all decompositions have been completed, it is typical for information in a given frequency band to leak between decomposition levels. For this reason, _global_ frequency bands are found (using the `mrCOSTS` module). These global frequency bands are then used to describe the discrete, coherent spatiotemporal features from the data. + +# In[1]: + + +# netcdf/numpy/xray/stats +import numpy as np +import xarray as xr +import pandas as pd +import copy +import scipy + +# OS interaction +import os +import sys +import glob + +# import plotting +import matplotlib +import matplotlib.pyplot as plt +from matplotlib.dates import DateFormatter + +# PyDMD +from pydmd.costs import COSTS +from pydmd.mrcosts import mrCOSTS + +import warnings + + +# We ignore warnings because of the convergence warnings and the condition number warnings in BOP-DMD, which can number in the 100s to 1000s. + +# In[2]: + + +warnings.filterwarnings("ignore") + + +# ## Format plots + +# In[3]: + + +get_ipython().run_line_magic("matplotlib", "inline") +# Higher resolution figures within the notebook +get_ipython().run_line_magic("config", "InlineBackend.figure_format = 'retina'") + + +# # Data +# +# These data come from the Large-eddy Observatory, Voitsumra Experiment 2019 (LOVE19)$^{1}$. They are distributed temperature observations at 1 s and 0.127 m resolution along a 12 m tower in a valley bottom. These data are great for demonstrating the mrCOSTS because: +# +# - They include a wide range of process scales that are not separable by other methods. Separating these scales is considered one of the biggest open questions boundary layer meteorology. +# - There is substantial sensor noise. For the purposes of the tutorial we remove much of this noise using a rolling mean. mrCOSTS would determine the sensor noise and the small-scale turbulent processes as non-coherent. Consequently, mrCOSTS would drop them. However, the amplitudes of these scales are small but computationally expensive. The purpose of the rolling mean is to enable a quicker fitting process since we can skip right to the scale of the coherent processes. +# - The data are 1D, which is simpler to visualize. +# +# +# **Data Reference** +# +# [1] https://zenodo.org/records/4312976 + +# In[4]: + + +ds_data = xr.open_dataarray("../data/mrcosts-real-data-example.nc") + +# Rolling average allows us to skip the high frequency bands and noise +# which would have to be removed by a computationally expensive small +# mrCOSTS window. +ds_data = ( + ds_data.rolling(time=45, center=True).mean().dropna(dim="time", how="all") +) +ts = ds_data.time +ts = ts - ts.isel(time=0) +ts = (ts / 1e9).values.astype(float) + +data = ds_data.values + + +# In[5]: + + +ds_data.plot(figsize=(6, 2)) +plt.gca().set_title("Input data for mrCOSTS") + + +# # Scale separation with the mrCOSTS object +# +# Here we have a number of important keyword arguments and hyperparameters. +# +# ## Keywords: +# +# - `window_length_array`: One of the fundamental properties of decompositions is the window length of the data that is decomposed. Unlike most other decompositions we must manually set these window sizes. In this case dyadic scaling works just fine so we specify the `window_length_array` keyword as the window lengths (in units of time steps) for each decomposition level. **Critical to note**: mrCOSTS operates in the opposite direction of most decompositions, starting from the smallest scales and moving the largest scales. +# - `step_size_array`: For each decomposition we slide the window across the data in the time dimension. While we can do this for each time step, this is often unnecessarily computationally expensive. Instead, we "step" the window by a fixed number of time steps. The `step_size_array` specifies how to slide each window across the data. Here we chose a fixed slide of about 7%. Dylewsky et al., (2019) suggested 4%. Generally the slide width should be a small fraction of the window width. +# - `svd_rank_array`: Specifies the svd_rank to use when fitting the BOP-DMD for each window. The `svd_rank` controls the number of frequency bands. Odd numbers force one of the fitted eigenvalues to have a large positive component. Even ranks tend towards fits with conjugate pairs. **Note**: one can force conjugate pair solutions using the BOP-DMD `eig_constraints` keyword argument. +# - `global_svd_array`: Specifies if each level should use a global svd for the projection basis and initial eigenvalues using the entire dataset instead of individually for each window BOP-DMD fits (`True`) or not (`False`). Setting this value to True forces all of the BOP-DMD solutions towards eigenvalues representative for the entire data set and not just the specific window being fit. Generally using the global_svd speeds up the fitting process with the trade-off of not fitting dynamics which are not present throughout the entire dataset. This is the reason this keyword was set to `True` for the toy data but `False` here. +# - `cluster_sweep`: Specifies if mrCOSTS should performa hyperparameter sweep when clustering the fitted eigenvalues from each window. When `True` it looks for the optimal number of eigenvalue clusters using the Silhouette score. If `False `the `n_components_array` keyword must be provided, specifying the number of frequency bands to use when clustering the fitted eigenvalues. +# + +# In[6]: + + +# Parameters +window_lengths = np.array([150, 300, 600, 1200, 2400]) +step_sizes = np.array([10, 20, 40, 80, 160]) +sr = 12 +svd_ranks = [sr] * len(window_lengths) +global_svd_array = [False] * len(window_lengths) + +# Due to the computational expense of mrCOSTS it often makes sense to +# save the fit results. These strings can be used to identify the particular +# fit combination in the saved file. +strategy = "svd-rank={}_dyadic-windows.smoothed-data".format(sr) +data_name = "mrcosts-example" + + +# The `fit` variable just specifies if the fitting should be performed and the new fit saved (`fit = True`) or if the saved model is used instead (`fit = False`). + +# In[7]: + + +fit = False + +if fit: + mrc = mrCOSTS( + svd_rank_array=svd_ranks, + window_length_array=window_lengths, + step_size_array=step_sizes, + global_svd_array=global_svd_array, + cluster_sweep=True, + transform_method="absolute", + ) + + mrc.fit(data, np.atleast_2d(ts)) + + +# ## I/O +# +# Due to the computation expense of the mrCOSTS fitting, it is desirable to execute this step only once, save the results, and only operate on the model offline. For this reason, we built mrCOSTS to be compatible with xarray, enabling storage as a self-describing dataset in netcdf format. +# +# ### To netcdf + +# In[8]: + + +if fit: + filename = ".".join([data_name, strategy]) + mrc.to_netcdf(os.path.join("./fitted model/", filename)) + + +# ### Convert from netcdf + +# In[9]: + + +file_list = glob.glob(os.path.join("./fitted model/", "*" + strategy + "*.nc")) +mrc = mrCOSTS() +mrc.from_netcdf(file_list) + + +# # Evaluation plots + +# ## Individual decomposition level comprehensive plots +# +# These plots can be easily iterated through. But for the purpose of the tutorial, we present only a single decomposition level. +# +# The plot types all assume 1D data (as this is the shape required by PyDMD). We chose to not develop other visualization methods since the dimensionality of data can change dramatically between applications. However, if there is a consistent use case additional evaluation plots can (and should!) be added. +# +# Plot types for each local decomposition: +# 1) Histogram of DMD frequencies ($\omega$). There are several options for how to express $Im(\omega)$: $|Im(\omega)|$, $Im(\omega)^2$, $log(|Im(\omega)|)$. A good choice of transformation will have the $Im(\omega)$ frequency bands well-separated from each other. **Note** this approach to clustering on $Im(\omega)$ is a change from Dylewsky et al., 2019, who clustered on $\omega^2$, whcih includes the real component dictating exponential growth and decay of the temporal modes. +# 2) Error in the global reconstruction expressed as a percent +# 3) Coherent scale separation with input data and each discrete frequency band. +# 4) Coherent scale separation with input data, low frequency component, and the high frequency component. The low frequency component is the input for the next decomposition scale. +# 5) Time series of the scale separation at a single point given by `space_index`. + +# In[10]: + + +# Plot the 3rd decomposition level +n_decomp = 3 +mrd = mrc.costs_array[n_decomp] + +# The data for each decomposition level is built here to avoid +# reconstructing the data for each plot. If this is not done the +# plots can still be rendered but will take slightly longer. +if n_decomp == 0: + x_iter = data +else: + x_iter, _ = mrc.costs_array[n_decomp - 1].scale_separation( + scale_reconstruction_kwargs=mrc._costs_recon_kwargs + ) + + +# The histogram of the fitted eigenvalues with the cluster centroids from the k-mean clustering indicated by vertical lines. In this case the cluster centroids are very well-separated and easily identified by the k-means clustering. The lowest frequency band (blue) is withheld for the next decomposition level. + +# In[11]: + + +fig1, ax1 = mrd.plot_omega_histogram() +fig1.suptitle("Window length={}".format(mrd.window_length)) + + +# A good check is to look at the error in the global reconstruciton of the data. Specifically, we want to examine if there are specific time windows which were poorly fit. These often indicate a different set of mrCOSTS hyperparameters (e.g., `svd_rank`) may be necessary for this decomposition level. + +# In[12]: + + +# Error in global reconstruction +mrc.plot_local_error( + n_decomp, data=x_iter, scale_reconstruction_kwargs=mrc._costs_recon_kwargs +) + + +# The coherent spatial patterns found for this local decomposition can be plotted. The first plot looks at the scale separation between the high frequency component localized in this decomposition level and the low-frequency component that was given to the next largest decomposition level. The second plot looks at the spatiotemporal patterns of each frequency band. + +# In[13]: + + +# Scale separation +_ = mrc.plot_local_reconstructions( + n_decomp, + data=x_iter, + kwargs={"plot_period": True}, + scale_reconstruction_kwargs=mrc._costs_recon_kwargs, +) + +# Alternatively you can use the local scale separation to visualize the individual frequency bands +# seen in the eigenvalue histogram. +_ = mrc.plot_local_scale_separation( + n_decomp, data=x_iter, scale_reconstruction_kwargs=mrc._costs_recon_kwargs +) + + +# Sometimes it is difficult to visualize the scale separation for spatially explicit data, especially since humans are sometimes bad at seeing small changes in relative color intensity. We can instead view how the decomposition performed for single point in space. +# +# **Important to note:** mrCOSTS is not analogous to normal signal analysis technqiues, it found the spatially coherent signals, not all time scales present in the time series. + +# In[14]: + + +# Single points in space +space_index = 40 +_, _ = mrc.plot_local_time_series( + space_index, + n_decomp, + x_iter, + scale_reconstruction_kwargs=mrc._costs_recon_kwargs, +) + + +# ## Information leaking between decomposition levels +# +# To visualize the effect of the information leaking we can look at the relative amplitudes of the modes. + +# In[15]: + + +fig, ax = plt.subplots(1, 1) +for nm, m in enumerate(mrc.costs_array): + # Extract out the amplitudes of the spatial modes + b = m.amplitudes_array + + # Ratio of spatial amplitude for each mode relative to the amplitudes for all + # high frequency modes (selected using the `omega_classes > 0` indexing) + sum_b_ratio = (b.T / np.sum(b, axis=1)).T[m.omega_classes > 0].real + mode_periods = m.periods() + + ax.scatter( + x=mode_periods, + y=sum_b_ratio, + alpha=0.1, + label="decomposition = {}".format(nm), + ) + +ax.set_ylim(0, 0.25) +ax.legend() +ax.set_xscale("log") +ax.set_ylabel(r"$\frac{b^{d}_j}{\sum^{j}_{j=1}b^{d}_j}$") +ax.set_xlabel("Period (s)") +ax.set_title("Normalized mode spatial amplitude for each decomp. level") + + +# Each color indicates a different decomposition level. Some interesting features emerge. First, the largest amplitude modes for these data are largely located at the largest time scales. Second, some decomposition levels seem to overlap in their time scales. Thrid, most decomposition levels yield discrete frequency bands while the lowest decomposition level (with the smallest time scales) is more of a "mush" of time scales. This is consistent witht the physical interpretation of these data. +# +# **Information leaking:** Most decomposition levels have an overlap between the largest time scales from window $n$ and the smallest time scales in window $n+1$. Often the amplitude of the smallest time scales from decomposition level $n+1$ are relatively smaller than the amplitudes from decompositoin level $n$. But, this informaiton still leaks and a robust scale separation will need to account for this. We go over how to handle these cases below. +# +# ## What do I do if I don't find information leaking? +# +# Congratulations! You've completed your scale separation and can start doing your science! + +# ## All decomposition levels + +# This next plot gives a sense of how mrCOSTS performs across all decomposition levels. For each level the high frequency component, which is removed, and the low-frequency component, which is passed to the next level, are shown. Since the low-frequency component includes the slow evolving background values it is plotted in a perceptually uniform color scale. The high frequency component is perturbations around zero so it is plotted in a diverging color scale. All levels are plotted using the same color map scaling. + +# In[16]: + + +vscale = 0.5 +fig, axes = plt.subplots( + mrc.n_decompositions + 1, + 2, + figsize=(6.25, mrc.n_decompositions * 1.25), + sharex=True, + sharey=True, +) + +plot_kwargs = { + "cmap": "RdBu_r", + "vmin": -vscale, + "vmax": vscale, +} + +plot_kwargs_lf = { + "cmap": "viridis", + "vmin": -2, + "vmax": 2, +} + +ax = axes[0, 0] +ax.pcolormesh(ds_data.time.values, ds_data.z.values, data, **plot_kwargs_lf) +ax.set_title("Input data") +ax.set_ylabel("Height (m)") + +axes[0, 1].axis("off") +for nm, m in enumerate(mrc.costs_array): + xr_low_frequency, xr_high_frequency = m.scale_separation() + + ax = axes[nm + 1, 1] + ax.pcolormesh( + ds_data.time.values, ds_data.z.values, xr_high_frequency, **plot_kwargs + ) + ax.set_title("High frequency: window = {}s".format(m._window_length)) + + ax = axes[nm + 1, 0] + ax.pcolormesh( + ds_data.time.values, + ds_data.z.values, + xr_low_frequency, + **plot_kwargs_lf, + ) + ax.set_title("Low frequency: window = {}s".format(m._window_length)) + ax.set_ylabel("Height (m)") + +axes[-1, 0].xaxis.set_major_formatter(DateFormatter("%H:%M")) + +fig.tight_layout() +fig.autofmt_xdate() + + +# # Global clustering and frequency band separation +# +# The leaking of information between decomposition levels forms a major obstacle since a frequency band found at any given level is missing information that likely leaked to the next level. Generally, the amplitudes are mostly confined to a single decomposition level (see above). But there is often enough leaking that we cannot discard frequency bands from other decomposition levels. The exact cause of this leaking is uncertain for now (bad fits? over fitting? not handling noise?). +# +# To deal with the information leaking we perform a final "global" clustering and scale separation on the entire multi-resolution decomposition. This global scale separation should then be used in place of the "local" scale separation shown in the above evaluation plots. + +# ## Interpolate to common time step +# +# The biggest issue in the global scale separation is the different number of fitted windows for each decomposition level. To get around this issue we need to somehow normalize between the many windows in the first decomposition level (with the smallest windows) with the small number of windows in the last decomposition level (with the largest windows). +# +# We chose to interpolate all decomposition levels to the time step of the smallest time step from the first decomposition level. +# +# In this multi-resolution interpolation step the low frequency cluster is removed and replaced by nans. + +# In[17]: + + +mrc.multi_res_interp() +df = mrc._da_omega.to_dataframe() + + +# ## Visualizing the leaking +# +# First, let's take a look at the typical spaces used for clustering: +# - $\frac{\omega}{2 \pi}^{2}$ (squared frequency, as in Dylewsky et al., 2019) +# - $\frac{\omega}{2 \pi}$ (frequency) +# - $\frac{2 \pi}{\omega}$ (period) +# +# In all of these cases the colors indicate the decomposition level in which $\omega$ was found. You'll notice that the $\omega$ values frequently overlap between levels. + +# In[18]: + + +da = mrc._da_omega +x = da.values +x = x.reshape( + len(da.window_length), len(da.window_time_means) * len(da.svd_rank) +) + +# Squared frequencies +x_trans = np.abs(x.imag) ** 2 / (2 * np.pi) +plt.figure(figsize=(5, 2.5)) +plt.hist( + x=x_trans.T, + bins=np.linspace(0, 0.005, 100), + histtype="barstacked", + density=True, + label=range(mrc.n_decompositions), +) +plt.gca().set_title("Global histogram; Interpolated decomposition levels") +plt.gca().set_xlabel(r"$Im(\omega)^2 (2 \pi)^{-1}$ (s$^2$)") +plt.legend(title="decomposition level") + +# Frequency +plt.figure(figsize=(5, 2.5)) +x_trans = np.abs(x.imag) / (2 * np.pi) +plt.hist( + x=x_trans.T, + bins=100, + histtype="barstacked", + density=True, + label=range(mrc.n_decompositions), +) +plt.gca().set_title("Global histogram; Interpolated decomposition levels") +plt.gca().set_xlabel(r"$Im(|\omega|) (2 \pi)^{-1}$ (s$^{-1}$)") +plt.legend(title="decomposition level") + +# Period +plt.figure(figsize=(5, 2.5)) +x_trans = (2 * np.pi) / np.abs(x.imag) +plt.hist( + x=x_trans.T, + bins=100, + histtype="barstacked", + density=True, + label=range(mrc.n_decompositions), +) +plt.legend(title="decomposition level") +plt.gca().set_title("Global histogram; Interpolated decomposition levels") +plt.gca().set_xlabel(r"Period; $(2 \pi) / Im(|\omega|)$ (s)") + + +# In all of these cases we see that processes were fit across multiple decomposition windows. For instance there is a frequency band around 600 s which was fit in both the 1200 s and 2400 s windows. +# +# However, all of the choices for expressing $\omega$ compress some range of the data either emphasizing the largest or smallest scales. For instance, the squared frequencies effectively compress the top three decomposition levels (600 s to 2400 s) into one super cluster. A physical interpretation of these toy data suggests we should not expect this. +# +# Instead, let us try to cluster in a logarithmic space. From here on out I will be choosing to use periods simply because those quantities are easier to physically interpret for these toy data. Incidentally, once we transform to a logarithmic space, the choice of quantity (e.g. $\omega^2$ vs $\omega$) is immaterial due to the log transformation. + +# ## Cluster in a log scale + +# In[19]: + + +plt.figure(figsize=(5, 2.5)) +x_trans = (2 * np.pi) / np.abs(x.imag) + +# Provide weights so that the smallest decomposition level and the largest +# are visually comparable (this also foreshadows the interpolation problem below). +weights = mrc.window_length_array / mrc.window_length_array[0] +weights = np.broadcast_to(np.atleast_2d(weights).T, (x_trans.shape)) + +plt.hist( + x=x_trans.T, + bins=np.logspace(start=np.log10(20), stop=np.log10(1300), num=100), + histtype="barstacked", + label=range(mrc.n_decompositions), + weights=weights.T, +) +plt.xscale("log") +plt.legend(title="decomposition level") +plt.gca().set_title("Global histogram; Interpolated decomposition levels") +plt.gca().set_xlabel(r"Period; $(2 \pi) / Im(|\omega|)$ (s)") +plt.gca().set_ylabel("Weighted Count (-)") + + +# Now we more nicely see the expected separation of time scales. +# +# The problem of processes being fit across multiple scales now becomes more apparent as well. To address the information leaking a global scale separation is performed. + +# ## Global Clustering +# +# As with the local clustering, the global clustering makes use of the MiniBatches k-means clustering from sklearn. The free parameter is again the number of components, `n_components`. We could probably make a reasonable guess for the optimal number of clusters to fit. But, a hyperparameter sweep function is provided when the optimal number is unclear or to enable objective selection. +# +# Notes: +# - The silhouette scoring is the slowest part of the hyperparameter sweep. +# - The calinski-harabasz score is not useful due to (nearly) monotonically increasing with `n_components`. It can be used instead of the silhouette score but is not recommended. + +# In[20]: + + +n_components_range = np.arange(12, 22) +scores, n_optimal = mrc.global_cluster_hyperparameter_sweep( + n_components_range, + transform_method="log10", +) + +print("Optimal silhouette score is = {}".format(n_optimal)) +plt.figure(figsize=(5, 2.5)) +plt.plot(n_components_range, scores) +plt.gca().set_xlabel("n_components (-)") +plt.gca().set_ylabel("Silhouette score (-), (1 is best, -1 is worst)") + + +# In[21]: + + +cluster_centroids, omega_classes, omega_array = mrc.global_cluster_omega( + n_optimal, transform_method="log10" +) + + +# Here we have to do a bit of manipulation to weight each of the bands the same so that they are visually comparable. A simpler approach is to use the seaborn plotting library with a call similar to: +# +# ``` +# sns.histplot( +# x=(2 * np.pi / 10**omega_array), +# hue=omega_classes, +# hue_order=hue_order, +# common_bins=True, +# common_norm=True, +# stat="density", +# multiple="stack", +# bins=100, +# palette="mako", +# legend=False, +# log_scale=True, +# ) +# ``` + +# In[22]: + + +x_trans = 2 * np.pi / 10**omega_array +unique_labels, label_counts = np.unique(omega_classes, return_counts=True) +weights = label_counts.max() / label_counts +x_trans_labels = [x_trans[omega_classes == label] for label in unique_labels] + +weights_labels = [] +unique_labels = unique_labels.astype(int) +for label in unique_labels: + ind_array = np.flatnonzero(unique_labels == label) + ind_list = int(ind_array) + w = weights[ind_array] + x_w = x_trans_labels[ind_list] + w_broadcast = np.broadcast_to(w, (x_w.shape)) + weights_labels.append(w_broadcast) + + +# In[23]: + + +plt.figure(figsize=(5, 2.5)) + +plt.hist( + x=x_trans_labels, + bins=np.logspace(start=np.log10(20), stop=np.log10(1300), num=100), + histtype="barstacked", + weights=weights_labels, +) + +ax = plt.gca() +ax.set_xscale("log") +ax.set_title( + "Global histogram of frequencies; Interpolated decomposition levels" +) +ax.set_xlabel(r"Period; $(2 \pi) / Im(|\omega|)$ (s)") +ax.set_ylabel("Density (-)") +[ + ax.axvline(2 * np.pi / (10**c), color="k", ls="--") + for nc, c in enumerate(cluster_centroids) +] + +ylim_bottom, ylim_top = ax.get_ylim() +[ + ax.text( + 2 * np.pi / (10**c), + ylim_top - ylim_top * 0.1, + "{:4.0f}s".format(1 / (10 ** (c) / (2 * np.pi))), + rotation="vertical", + va="top", + ha="right", + ) + for c in cluster_centroids +] + + +# We now have discrete frequency bands giving us the coherent spatialtemporal modes found by mrCOSTS. This is not meant to be a dimensionality reduction since we have gone from 80 spatial points into 17 coherent spatialtemporal modes. Instead, these are the physically meaningful processes resolvable by this observation method which would be otherwise extremely difficult to infer. +# +# Notes: +# +# - We have now clearly captured the distinct frequency bands, regardless of which decomposition level they originally came from. +# - There is still some undesirable behavior as the highest frequency band was split into three. Physically, we would expect this highest frequency band to be made up of a melange of small scale processes that are only partially captured by the instrument. The clustering says this melange is three distinct time scales. +# - Decreasing `n_components` does not address the "over-clustered" high frequencies. Instead, the lower frequency components are merged into neighboring clusters. We have a good physical reason to believe we have distinct frequency bands at the longer time scales and not at a the small scales. This example highlights the frequency band scale separation is not an "fire and forget" process but requires physical interprettation of results _a posteriori_. + +# # Visualizing the global scale separation + +# ## "Un-interpolate" +# +# The first step is to "un-interpolate" the clustering results from the fine time step of the first decomposition level to the original time step for each decomposition level. Here a nearest neighbor look up is used to find the cluster classification for $\omega$ for each decomposition level. + +# In[24]: + + +omega_classes_list = mrc.multi_res_deterp() + + +# First, the mrCOSTS fit is used to reconstruct the discrete frequency bands. The resulting data is of the dimensions `n_decomp` x `n_frequency_bands` x `n_space` x `n_time`. In other worse, the 1D profile now has two more dimensions. +# +# The low-frequency background mode was excluded in the last decomposition level but it is necessary to include this component when reconstructing. + +# In[25]: + + +xr_sep = mrc.global_scale_reconstruction() +xr_background = mrc.get_background() + + +# Now we plot the time-space cross sections for each coherent spatiotemporal mode. +# +# As mentioned above, the high frequency component was split up across three clusters. Since processes at these fine time scales are anticipated to be poorly resolved by the instrument as well as being stochastic in nature, we simply exclude them here. + +# In[26]: + + +cluster_centers_periods = 1 / (10 ** (cluster_centroids) / (2 * np.pi)) + +for ncl_center, cl_center in enumerate(cluster_centers_periods): + if cl_center < 60: + continue + plt.figure(figsize=(4, 1.5)) + plt.pcolormesh( + ds_data.time, + ds_data.z, + xr_sep[:, ncl_center, :, :].sum(axis=0), + vmin=-0.5, + vmax=0.5, + cmap="RdBu_r", + ) + plt.gca().set_title( + "Cluster={}, central period={:4.0f}s".format(ncl_center, cl_center) + ) + plt.gca().set_ylabel("Height (m agl)") + plt.gcf().autofmt_xdate() + + +# These discrete, coherent modes are a major breakthrough. No other method can separate them out. +# +# Some of the modes are clearly very small amplitude, while others make up a dominant process scale. +# +# What about the slow background mode? + +# In[27]: + + +plt.figure(figsize=(4, 1.5)) +plt.pcolormesh( + ds_data.time, ds_data.z, xr_background, vmin=-2, vmax=2, cmap="cividis" +) +plt.gca().set_title("mrCOSTS low-frequency background") +plt.gca().set_ylabel("Height (m agl)") +plt.gcf().autofmt_xdate() + + +# ## Converting to xarray +# +# You may have noted in the above code the somewhat complicated indexing, specifically selecting the correct frequency band in axis one and summing across decomposition levels in axis 0 in order to plot the time-space components held in the last two axes. This indexing problem only gets more complicated when considering non-1D data. +# +# It is for precisely this problem that the xarray package was developed. Xarray allows us to name the dimensions and perform operations, such as indexing, using the named dimensions. + +# In[28]: + + +ds_xr_sep = xr.DataArray( + xr_sep.real, + dims=["decomp_level", "cluster", "z", "time"], + coords=[ + np.arange(len(mrc.costs_array)), + np.arange(n_optimal), + ds_data.z, + ds_data.time, + ], +) +ds_cluster_centers = xr.DataArray( + 1 / (10 ** (cluster_centroids) / (2 * np.pi)), + dims=["cluster"], + coords=[np.arange(n_optimal)], +) +ds_cluster_centers.attrs["units"] = "s" +ds_cluster_centers.attrs["long_name"] = "Central period of each period band" + +ds_global_separation = ds_xr_sep.to_dataset(name="frequency_bands") +ds_global_separation["frequency_bands"].attrs["units"] = "K" +ds_global_separation["frequency_bands"].attrs["long_name"] = "mrCOSTS" + +ds_global_separation.coords["cluster_centers"] = ds_cluster_centers + +ds_global_separation["background"] = (("z", "time"), xr_background) + +ds_global_separation + + +# ## Global reconstruction + +# In[29]: + + +xr_global_reconstruction = mrc.global_reconstruction() +da_global_reconstruction = xr.DataArray( + data=xr_global_reconstruction, + coords=[("z", ds_data.z.data), ("time", ds_data.time.data)], +) +da_global_reconstruction.attrs["units"] = "K" +da_global_reconstruction.attrs["long_name"] = r"$\theta - <\theta>$" + + +# In[30]: + + +fig, axes = plt.subplots(3, 1, figsize=(4, 6), sharex=True) + +ax = axes[0] +ds_data.plot(ax=ax) +ax.set_title("Original Data") + +ax = axes[1] +da_global_reconstruction.plot(ax=ax) +ax.set_title("mrCOSTS Global Reconstruction") + +ax = axes[2] +(da_global_reconstruction - ds_data).plot(ax=ax) +ax.set_title("mrCOSTS Fit - Original Data") + +fig.tight_layout() + + +# Most of the error is at the edges, where the window reconstruction uncertainty is the largest. Its a very good reconstruction! + +# ## At a point +# +# Plots of time series are generally easier for intuitive interpretation. For this reason we can generate plots of the observations, global reconstruction, and scale separation at a couple different locations. + +# In[31]: + + +for z in [2, 6, 10]: + fig, ax = plt.subplots(1, 1, figsize=(8, 4)) + ground_truth = ds_data.sel(z=z, method="nearest") + ground_truth = ground_truth + ground_truth.plot(ax=ax, color="k", lw=0.5, label="Observations") + + background = ds_global_separation["background"].sel(z=z, method="nearest") + + for ncl, cl_center in enumerate(ds_global_separation.cluster_centers[::-1]): + if cl_center < 60: + continue + ds_plot = ( + ds_global_separation["frequency_bands"] + .swap_dims({"cluster": "cluster_centers"}) + .sel(cluster_centers=cl_center) + .sel(z=z, method="nearest") + ).sum(dim="decomp_level") + label = "central period={:4.0f}s".format(cl_center.values) + (ds_plot + background).plot(ax=ax, label=label, lw=1, alpha=0.5) + + da_global_reconstruction.sel(z=z, method="nearest").plot( + label="global reconstruction" + ) + + # ax.legend() + ax.legend(bbox_to_anchor=(1.05, 0.95)) + ax.set_title("Global frequency band reconstruction, z={}m".format(z)) + ax.set_ylabel("amplitude (K)") + ax.autoscale(enable=True, axis="x", tight=True) + + +# ## Visualizing the leaked componenet between levels +# +# Here we use the cluster with the 3rd longest period. It is split across the last two decomposition levels. + +# In[32]: + + +fig, axes = plt.subplots(3, 1, sharex=True, figsize=(5, 6)) +plot_kwargs = {"vmin": -0.2, "vmax": 0.2, "cmap": "RdBu_r"} + +ds_plot = ( + ds_global_separation["frequency_bands"].isel(cluster=2).sel(decomp_level=4) +) +ds_plot.plot(ax=axes[0], **plot_kwargs) +axes[0].set_title( + "cluster period={:4.0f}s, decomp_level={}".format( + ds_plot.cluster_centers.values, ds_plot.decomp_level.values + ) +) + +ds_plot = ( + ds_global_separation["frequency_bands"].isel(cluster=2).sel(decomp_level=3) +) +ds_plot.plot(ax=axes[1], **plot_kwargs) +axes[1].set_title( + "cluster period={:4.0f}s, decomp_level={}".format( + ds_plot.cluster_centers.values, ds_plot.decomp_level.values + ) +) + +ds_plot = ds_global_separation["frequency_bands"].isel(cluster=2) +ds_plot = ds_plot.sum(dim="decomp_level") +ds_plot.plot(ax=axes[2], **plot_kwargs) +axes[2].set_title( + "cluster period={:4.0f}s, All decomp levels".format( + ds_plot.cluster_centers.values, + ) +) + +fig.tight_layout() + + +# The leaked component is generally smaller in amplitudeto the "unleaked" component. However, not including this component will lead to large errors in reconstructions and can even miss large amplitude features in some cases. + +# In[ ]: