From 366f072d451eb94a4d3ea63c2ec17326b6a13adb Mon Sep 17 00:00:00 2001 From: klapo Date: Wed, 29 Mar 2023 15:54:14 +0200 Subject: [PATCH 01/31] Feature: multires discovery added --- pydmd/multires_discovery.py | 412 ++++++++++++++++++++++++++++++++++++ 1 file changed, 412 insertions(+) create mode 100644 pydmd/multires_discovery.py diff --git a/pydmd/multires_discovery.py b/pydmd/multires_discovery.py new file mode 100644 index 000000000..5826941f7 --- /dev/null +++ b/pydmd/multires_discovery.py @@ -0,0 +1,412 @@ +import numpy as np +from pydmd.bopdmd import BOPDMD +import scipy +from sklearn.cluster import KMeans +import matplotlib.pyplot as plt + + +class multi_res_discovery: + def __init__( + self, + window_length, + n_components, + step_size, + svd_rank=None, + global_svd=True, + initialize_artificially=False, + use_last_freq=False, + use_kmean_freqs=False, + init_alpha=None, + pydmd_kwargs=None, + threshhold_percent=1, + cluster_centroids=None, + ): + self._n_components = n_components + self._step_size = step_size + self._svd_rank = svd_rank + self._window_length = window_length + self._global_svd = global_svd + self._initialize_artificially = initialize_artificially + self._use_last_freq = use_last_freq + self._use_kmean_freqs = use_kmean_freqs + self._init_alpha = init_alpha + self._cluster_centroids = cluster_centroids + if pydmd_kwargs is None: + self._pydmd_kwargs = { + 'eig_sort': 'imag', + } + else: + self._pydmd_kwargs = pydmd_kwargs + + # Set the threshold for the number of frequencies to use in clustering, + # where the frequencies have been sorted in order of magnitude. + # Note: this value is not currently implemented (it was always 1 in examples). + self._threshhold_percent = threshhold_percent + + def _compute_svd_rank(self, data, svd_rank=None): + # rank to fit w/ optdmd + _, n_data_vars = self._data_shape(data) + + if svd_rank is None: + svd_rank = n_data_vars + elif svd_rank > n_data_vars: + raise ValueError('svd_rank is greater than the number of spatial dimensions.') + return svd_rank + + def _build_proj_basis(self, data, global_svd=True, svd_rank=None): + + if svd_rank is None: + self._svd_rank = self._compute_svd_rank(data, svd_rank=svd_rank) + + # Use global SVD modes for each DMD rather than individual SVD on + # each window. + if global_svd: + # Recover the first r modes of the global svd + u, _, _ = scipy.linalg.svd(data, full_matrices=False) + return u[:, :self._svd_rank] + + def _build_initizialization(self): + """ Method for making initial guess of DMD eigenvalues. + """ + + # User provided initial eigenvalues. + init_alpha = None + if self._initialize_artificially and self._init_alpha is not None: + init_alpha = self._init_alpha + # Initial eigenvalue guesses from kmeans clustering. + elif self._initialize_artificially and self._init_alpha is None: + if self._use_kmean_freqs: + init_alpha = np.repeat(np.sqrt(self._cluster_centroids) * 1j, + int(self._svd_rank / self._n_components)) + init_alpha = init_alpha * np.tile([1, -1], + int(self._svd_rank / self._n_components)) + + return init_alpha + + def build_windows(self, data, integer_windows=False): + """Calculate how many times to slide the window across the data. + + """ + + if integer_windows: + n_split = np.floor(data.shape[1] / self._window_length).astype(int) + else: + n_split = data.shape[1] / self._window_length + + n_steps = int((self._window_length * n_split)) + + # Number of sliding-window iterations + n_slides = np.floor((n_steps - self._window_length) / self._step_size).astype(int) + + return n_slides + + def calculate_lv_kern(self, window_length, corner_sharpness=None): + """Calculate the kerning window for suppressing real eigenvalues. + + """ + + # Higher = sharper corners + if corner_sharpness is None: + corner_sharpness = 16 + + lv_kern = ( + np.tanh( + corner_sharpness * np.arange(1, window_length + 1) / window_length + ) + - np.tanh( + corner_sharpness * ( + np.arange(1, window_length + 1) - window_length) / window_length) - 1 + ) + + return lv_kern + + def _data_shape(self, data): + n_time_steps = np.shape(data)[1] + n_data_vars = np.shape(data)[0] + return n_time_steps, n_data_vars + + @property + def svd_rank(self): + """ + :return: the rank used for the svd truncation. + :rtype: int or float + """ + return self._svd_rank + + @property + def modes_array(self): + if not hasattr(self, "_modes_array"): + raise ValueError("You need to call fit before") + return self._modes_array + + @property + def amplitudes_array(self): + if not hasattr(self, "_amplitudes_array"): + raise ValueError("You need to call fit before") + return self._amplitudes_array + + @property + def omega_array(self): + if not hasattr(self, "_omega_array"): + raise ValueError("You need to call fit before") + return self._omega_array + + @property + def omega_array(self): + if not hasattr(self, "_omega_array"): + raise ValueError("You need to call fit before") + return self._omega_array + + @property + def time_array(self): + if not hasattr(self, "_time_array"): + raise ValueError("You need to call fit before") + return self._time_array + + @property + def window_means_array(self): + if not hasattr(self, "_window_means_array"): + raise ValueError("You need to call fit before") + return self._window_means_array + + @property + def t_starts_array(self): + if not hasattr(self, "_t_starts_array"): + raise ValueError("You need to call fit before") + return self._t_starts_array + + @property + def time_means_array(self): + if not hasattr(self, "_time_means_array"): + raise ValueError("You need to call fit before") + return self._time_means_array + + def fit(self, data, time, verbose=False, corner_sharpness=None): + self._n_time_steps, self._n_data_vars = self._data_shape(data) + self._n_slides = self.build_windows(data) + self._svd_rank = self._compute_svd_rank(data, svd_rank=self._svd_rank) + + # Check dimensionality/shape of all + # Each element calculate for a window is returned to the user in these array. + data_array = np.zeros((self._n_slides, self._n_data_vars, self._window_length)) + self._time_array = np.zeros((self._n_slides, self._window_length)) + self._modes_array = np.zeros((self._n_slides, self._n_data_vars, self._svd_rank), + np.complex128) + self._omega_array = np.zeros((self._n_slides, self._svd_rank), np.complex128) + self._amplitudes_array = np.zeros((self._n_slides, self._svd_rank), np.complex128) + self._window_means_array = np.zeros((self._n_slides, self._n_data_vars)) + self._t_starts_array = np.zeros((self._n_slides)) + self._time_means_array = np.zeros((self._n_slides)) + + # Round the corners of the window to shrink real components. + lv_kern = self.calculate_lv_kern(self._window_length, + corner_sharpness=corner_sharpness) + + # Build the projection basis if using a global svd. If not provided local u is used instead. + if self._global_svd: + u = self._build_proj_basis(data, global_svd=self._global_svd, + svd_rank=self._svd_rank) + self._pydmd_kwargs['proj_basis'] = u + self._pydmd_kwargs['use_proj'] = False + + # Initialize the DMD class. + if self._initialize_artificially: + self._init_alpha = self._build_initizialization() + optdmd = BOPDMD( + svd_rank=self._svd_rank, num_trials=0, init_alpha=self._init_alpha, + **self._pydmd_kwargs + ) + else: + optdmd = BOPDMD( + svd_rank=self._svd_rank, num_trials=0, **self._pydmd_kwargs, + ) + + for k in range(self._n_slides): + if verbose: + if k // 50 == k / 50: + print('{} of {}'.format(k, self._n_slides)) + + sample_start = self._step_size * k + sample_steps = np.arange(sample_start, sample_start + self._window_length) + + data_window = data[:, sample_steps] + time_window = time[:, sample_steps] + data_array[k, :, :] = data_window + self._time_array[k, :] = time_window + self._time_means_array[k] = np.mean(time_window) + + t_start = time_window[:, 0] + time_window = time_window - t_start + self._t_starts_array[k] = t_start + + # Subtract off mean before rounding corners + # https://stackoverflow.com/questions/32030343/ + # subtracting-the-mean-of-each-row-in-numpy-with-broadcasting + c = np.mean(data_window, 1, keepdims=True) + data_window = data_window - c + + # Round corners of the window + data_window = data_window * lv_kern + + # Fit with the desired DMD class + optdmd.fit(data_window, time_window) + + # if use_last_freq == 1: + # e_init = e + + # Assign the results from this window + self._modes_array[k, ::] = optdmd.modes + self._omega_array[k, :] = optdmd.eigs + self._amplitudes_array[k, :] = optdmd.amplitudes + self._window_means_array[k, :] = c.flatten() + + def cluster_omega(self, omega_array, kmeans_kwargs=None): + omega_rshp = omega_array.reshape(self._n_slides * self._svd_rank) + omega_squared = (np.conj(omega_rshp) * omega_rshp).astype('float') + + if kmeans_kwargs is None: + random_state = 0 + kmeans_kwargs = { + "n_init": "auto", + "random_state": random_state, + } + kmeans = KMeans(n_clusters=self._n_components, **kmeans_kwargs) + y_pred = kmeans.fit_predict(np.atleast_2d(omega_squared).T) + omega_classes = y_pred.reshape(self._n_slides, self._svd_rank) + cluster_centroids = kmeans.cluster_centers_.flatten() + + return omega_classes, omega_squared, cluster_centroids + + def plot_omega_squared_histogram(self, omega_squared, cluster_centroids): + fig, ax = plt.subplots(1, 1) + ax.hist(omega_squared, bins=64) + ax.set_xlabel('$|\omega|^{2}$') + ax.set_ylabel('Count') + ax.set_title('$|\omega|^2$ Spectrum & k-Means Centroids') + [ax.axvline(c, color='r') for c in cluster_centroids] + + return fig, ax + + def plot_omega_squared_time_series(self, omega_squared, omega_classes): + fig, ax = plt.subplots(1, 1) + colors = plt.rcParams["axes.prop_cycle"].by_key()["color"] + for ncomponent, component in enumerate(range(self._n_components)): + ax.plot( + self._time_means_array, + np.where( + omega_classes == component, + omega_squared.reshape((self._n_slides, self._svd_rank)), np.nan + ), + color=colors[ncomponent] + ) + ax.set_ylabel('$|\omega|^{2}$') + ax.set_xlabel('Time') + ax.set_title('$|\omega|^2$ Spectrum (Moving Window)') + + return fig, ax + + def global_reconstruction(self, ): + # Container for the reconstructed time series + glbl_reconstruction = np.zeros( + (self._svd_rank, self._n_time_steps)).astype('complex128') + + # Count the number of windows contributing to each step + xn = np.zeros(self._n_time_steps) + + for k in range(self._n_slides): + # Extract out the DMD fit for this window. + w = self._modes_array[k] + b = self._amplitudes_array[k] + omega = np.atleast_2d(self._omega_array[k]).T + c = np.atleast_2d(self._window_means_array[k]).T + + # Compute each segment starting at t=0 + t = self._time_array[k] + t_start = self._t_starts_array[k] + t = t - t_start + + # Perform the global reconstruction. + recon_window = np.linalg.multi_dot([w, np.diag(b), np.exp(omega * t)]) + c + + window_indices = slice(k * self._step_size, + k * self._step_size + self._window_length) + glbl_reconstruction[:, window_indices] += recon_window + xn[window_indices] += 1 + + # Weight xr so all steps are on equal footing + glbl_reconstruction = glbl_reconstruction / xn + + return glbl_reconstruction + + def scale_reconstruction(self, omega_classes, cluster_centroids, suppress_growth=True, + include_means=True): + """Reconstruct the sliding mrDMD into the constituent components. + + The reconstructed data are convolved with a guassian filter since + points near the middle of the window are more reliable than points + at the edge of the window. Note that this will leave the beginning + and end of time series prone to larger errors. A best practice is + to cut off `window_length` from each end before further analysis. + + + suppress_growth: + Kill positive real components of frequencies + """ + + # Each individual reconstructed window + xr_sep = np.zeros((self._n_components, self._svd_rank, self._n_time_steps)) + + # Track the total contribution from all windows to each time step + xn = np.zeros(self._n_time_steps) + + # Convolve each windowed reconstruction with a gaussian filter. + # Std dev of gaussian filter + recon_filter_sd = self._window_length / 8 + recon_filter = np.exp( + -( + np.arange(self._window_length) + - (self._window_length + 1) / 2 + ) ** 2 / recon_filter_sd ** 2) + + for k in range(self._n_slides): + w = self._modes_array[k] + b = self._amplitudes_array[k] + omega = np.atleast_2d(self._omega_array[k]).T + classification = omega_classes[k] + + if suppress_growth: + omega[omega.real > 0] = 1j * omega[omega.real > 0].imag + + c = np.atleast_2d(self._window_means_array[k]).T + + # Compute each segment of xr starting at "t = 0" + t = self._time_array[k] + t_start = self._t_starts_array[k] + t = t - t_start + + xr_sep_window = np.zeros( + (self._n_components, self._svd_rank, self._window_length)) + for j in np.unique(omega_classes): + xr_sep_window[j, :, :] = np.linalg.multi_dot( + [ + w[:, classification == j], + np.diag(b[classification == j]), + np.exp(omega[classification == j] * t) + ] + ) + + # Add the constant offset to the lowest frequency cluster. + if include_means and (j == np.argmin(cluster_centroids)): + xr_sep_window[j, :, :] += c + xr_sep_window[j, :, :] = xr_sep_window[j, :, :] * recon_filter + window_indices = slice(k * self._step_size, + k * self._step_size + self._window_length) + xr_sep[j, :, window_indices] = xr_sep[j, :, + window_indices] + xr_sep_window[j, :, :] + + xn[window_indices] += recon_filter + + xr_sep = xr_sep / xn + + return xr_sep + From 66663a725d73eca216445c8009d3bdc6b35a7e8f Mon Sep 17 00:00:00 2001 From: klapo Date: Fri, 31 Mar 2023 11:55:35 +0200 Subject: [PATCH 02/31] Bug fix: Bad int type check in compute_svd --- pydmd/rdmd.py | 2 +- pydmd/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pydmd/rdmd.py b/pydmd/rdmd.py index 8bc1431a0..9b9314114 100644 --- a/pydmd/rdmd.py +++ b/pydmd/rdmd.py @@ -42,7 +42,7 @@ def omega(x): elif 0 < svd_rank < 1: cumulative_energy = np.cumsum(s**2 / (s**2).sum()) rank = np.searchsorted(cumulative_energy, svd_rank) + 1 - elif svd_rank >= 1 and isinstance(svd_rank, int): + elif svd_rank >= 1 and isinstance(svd_rank, (int, np.integer)): rank = min(svd_rank, U.shape[1]) else: rank = min(X.shape) diff --git a/pydmd/utils.py b/pydmd/utils.py index cb9ff8483..3b6454ee2 100644 --- a/pydmd/utils.py +++ b/pydmd/utils.py @@ -73,7 +73,7 @@ def omega(x): elif 0 < svd_rank < 1: cumulative_energy = np.cumsum(s**2 / (s**2).sum()) rank = np.searchsorted(cumulative_energy, svd_rank) + 1 - elif svd_rank >= 1 and isinstance(svd_rank, int): + elif svd_rank >= 1 and isinstance(svd_rank, (int, np.integer)): rank = min(svd_rank, U.shape[1]) else: rank = X.shape[1] From a14dcf6327873710a75a29db9cb9bf5cbf23ccf1 Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 4 Apr 2023 15:31:10 +0200 Subject: [PATCH 03/31] General cleanup and bug fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Made several init arguments keyword arguments to better reflect algorithm logic - Made several arguments belong to the fit command instead of the class init to better reflect how the fit command should be used (namely window size and step size). - Some pep8’ing - n_components is now a required argument for the clustering operation - Plotting now uses a circular color scheme for many clusters - Updating the allocation to better reflect not knowing the optimal `svd_rank` a priori. --- pydmd/multires_discovery.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/pydmd/multires_discovery.py b/pydmd/multires_discovery.py index 5826941f7..086108558 100644 --- a/pydmd/multires_discovery.py +++ b/pydmd/multires_discovery.py @@ -8,9 +8,9 @@ class multi_res_discovery: def __init__( self, - window_length, - n_components, - step_size, + window_length=None, + step_size=None, + n_components=None, svd_rank=None, global_svd=True, initialize_artificially=False, @@ -181,7 +181,11 @@ def time_means_array(self): raise ValueError("You need to call fit before") return self._time_means_array - def fit(self, data, time, verbose=False, corner_sharpness=None): + def fit(self, data, time, window_length, step_size, verbose=False, + corner_sharpness=None): + self._window_length = window_length + self._step_size = step_size + self._n_time_steps, self._n_data_vars = self._data_shape(data) self._n_slides = self.build_windows(data) self._svd_rank = self._compute_svd_rank(data, svd_rank=self._svd_rank) @@ -195,8 +199,8 @@ def fit(self, data, time, verbose=False, corner_sharpness=None): self._omega_array = np.zeros((self._n_slides, self._svd_rank), np.complex128) self._amplitudes_array = np.zeros((self._n_slides, self._svd_rank), np.complex128) self._window_means_array = np.zeros((self._n_slides, self._n_data_vars)) - self._t_starts_array = np.zeros((self._n_slides)) - self._time_means_array = np.zeros((self._n_slides)) + self._t_starts_array = np.zeros(self._n_slides) + self._time_means_array = np.zeros(self._n_slides) # Round the corners of the window to shrink real components. lv_kern = self.calculate_lv_kern(self._window_length, @@ -260,7 +264,8 @@ def fit(self, data, time, verbose=False, corner_sharpness=None): self._amplitudes_array[k, :] = optdmd.amplitudes self._window_means_array[k, :] = c.flatten() - def cluster_omega(self, omega_array, kmeans_kwargs=None): + def cluster_omega(self, omega_array, n_components, kmeans_kwargs=None): + self._n_components = n_components omega_rshp = omega_array.reshape(self._n_slides * self._svd_rank) omega_squared = (np.conj(omega_rshp) * omega_rshp).astype('float') @@ -297,7 +302,7 @@ def plot_omega_squared_time_series(self, omega_squared, omega_classes): omega_classes == component, omega_squared.reshape((self._n_slides, self._svd_rank)), np.nan ), - color=colors[ncomponent] + color=colors[ncomponent % len(colors)] ) ax.set_ylabel('$|\omega|^{2}$') ax.set_xlabel('Time') @@ -308,7 +313,7 @@ def plot_omega_squared_time_series(self, omega_squared, omega_classes): def global_reconstruction(self, ): # Container for the reconstructed time series glbl_reconstruction = np.zeros( - (self._svd_rank, self._n_time_steps)).astype('complex128') + (self._n_data_vars, self._n_time_steps)).astype('complex128') # Count the number of windows contributing to each step xn = np.zeros(self._n_time_steps) @@ -354,7 +359,7 @@ def scale_reconstruction(self, omega_classes, cluster_centroids, suppress_growth """ # Each individual reconstructed window - xr_sep = np.zeros((self._n_components, self._svd_rank, self._n_time_steps)) + xr_sep = np.zeros((self._n_components, self._n_data_vars, self._n_time_steps)) # Track the total contribution from all windows to each time step xn = np.zeros(self._n_time_steps) @@ -385,7 +390,7 @@ def scale_reconstruction(self, omega_classes, cluster_centroids, suppress_growth t = t - t_start xr_sep_window = np.zeros( - (self._n_components, self._svd_rank, self._window_length)) + (self._n_components, self._n_data_vars, self._window_length)) for j in np.unique(omega_classes): xr_sep_window[j, :, :] = np.linalg.multi_dot( [ From 13a9081bf311ce120077fc8b5c9fe05dd8d8b36f Mon Sep 17 00:00:00 2001 From: klapo Date: Wed, 5 Apr 2023 16:48:17 +0200 Subject: [PATCH 04/31] Further clean up and bug fixing: - Appropriated the svd_rank computation to handle the case with no global svd and unknown svd_rank to return the number of data variables for use in allocating the window arrays. - build_windows now requires positional arguments intsead of accessing class variables - Add logic in `fit` for the case when no global svd nor svd_rank is used and the svd_rank may change between windows. - n_components is specified in the clustering function instead of class creation. --- pydmd/multires_discovery.py | 106 ++++++++++++++++++++++++------------ 1 file changed, 70 insertions(+), 36 deletions(-) diff --git a/pydmd/multires_discovery.py b/pydmd/multires_discovery.py index 086108558..3bdc13856 100644 --- a/pydmd/multires_discovery.py +++ b/pydmd/multires_discovery.py @@ -20,23 +20,27 @@ def __init__( pydmd_kwargs=None, threshhold_percent=1, cluster_centroids=None, + reset_alpha_init=False, ): self._n_components = n_components self._step_size = step_size - self._svd_rank = svd_rank self._window_length = window_length + self._svd_rank = svd_rank self._global_svd = global_svd self._initialize_artificially = initialize_artificially self._use_last_freq = use_last_freq self._use_kmean_freqs = use_kmean_freqs self._init_alpha = init_alpha self._cluster_centroids = cluster_centroids + self._reset_alpha_init = reset_alpha_init + if pydmd_kwargs is None: self._pydmd_kwargs = { 'eig_sort': 'imag', } else: self._pydmd_kwargs = pydmd_kwargs + self._pydmd_kwargs['eig_sort'] = pydmd_kwargs.get('eig_sort', 'imag') # Set the threshold for the number of frequencies to use in clustering, # where the frequencies have been sorted in order of magnitude. @@ -44,13 +48,24 @@ def __init__( self._threshhold_percent = threshhold_percent def _compute_svd_rank(self, data, svd_rank=None): - # rank to fit w/ optdmd - _, n_data_vars = self._data_shape(data) - if svd_rank is None: - svd_rank = n_data_vars - elif svd_rank > n_data_vars: - raise ValueError('svd_rank is greater than the number of spatial dimensions.') + def omega(x): + return 0.56 * x ** 3 - 0.95 * x ** 2 + 1.82 * x + 1.43 + + U, s, _ = np.linalg.svd(data, full_matrices=False) + + if svd_rank == 0: + beta = np.divide(*sorted(data.shape)) + tau = np.median(s) * omega(beta) + svd_rank = np.sum(s > tau) + elif 0 < svd_rank < 1: + cumulative_energy = np.cumsum(s ** 2 / (s ** 2).sum()) + svd_rank = np.searchsorted(cumulative_energy, svd_rank) + 1 + elif svd_rank >= 1 and isinstance(svd_rank, (int, np.integer)): + svd_rank = min(svd_rank, self._n_data_vars) + else: + svd_rank = self._n_data_vars + return svd_rank def _build_proj_basis(self, data, global_svd=True, svd_rank=None): @@ -83,20 +98,20 @@ def _build_initizialization(self): return init_alpha - def build_windows(self, data, integer_windows=False): + def build_windows(self, data, window_length, step_size, integer_windows=False): """Calculate how many times to slide the window across the data. """ if integer_windows: - n_split = np.floor(data.shape[1] / self._window_length).astype(int) + n_split = np.floor(data.shape[1] / window_length).astype(int) else: - n_split = data.shape[1] / self._window_length + n_split = data.shape[1] / window_length - n_steps = int((self._window_length * n_split)) + n_steps = int((window_length * n_split)) # Number of sliding-window iterations - n_slides = np.floor((n_steps - self._window_length) / self._step_size).astype(int) + n_slides = np.floor((n_steps - window_length) / step_size).astype(int) return n_slides @@ -183,36 +198,45 @@ def time_means_array(self): def fit(self, data, time, window_length, step_size, verbose=False, corner_sharpness=None): + + # Prepare window and data properties. self._window_length = window_length self._step_size = step_size self._n_time_steps, self._n_data_vars = self._data_shape(data) - self._n_slides = self.build_windows(data) - self._svd_rank = self._compute_svd_rank(data, svd_rank=self._svd_rank) + self._n_slides = self.build_windows(data, self._window_length, self._step_size) + + # Build the projection basis if using a global svd. + if self._global_svd: + u = self._build_proj_basis(data, global_svd=self._global_svd, + svd_rank=self._svd_rank) + self._pydmd_kwargs['proj_basis'] = self._pydmd_kwargs.get('proj_basis', u) + self._pydmd_kwargs['use_proj'] = self._pydmd_kwargs.get('use_proj', False) + + self._svd_rank = self._compute_svd_rank(data, svd_rank=self._svd_rank) + svd_rank_pre_allocate = self._svd_rank + # If not using a global svd, local u from each window is used instead. + else: + # The various arrays the optimal svd_rank may change when using the locally + # optimal svd_rank. To deal with this situation, we give the maximally + # allowed svd_rank for pre-allocation. + svd_rank_pre_allocate = self._n_data_vars - # Check dimensionality/shape of all - # Each element calculate for a window is returned to the user in these array. + # Pre-allocate all elements for the sliding window DMD. + # Each element calculated for a window is returned through these array. data_array = np.zeros((self._n_slides, self._n_data_vars, self._window_length)) self._time_array = np.zeros((self._n_slides, self._window_length)) - self._modes_array = np.zeros((self._n_slides, self._n_data_vars, self._svd_rank), + self._modes_array = np.zeros( + (self._n_slides, self._n_data_vars, svd_rank_pre_allocate), + np.complex128) + self._omega_array = np.zeros((self._n_slides, svd_rank_pre_allocate), np.complex128) - self._omega_array = np.zeros((self._n_slides, self._svd_rank), np.complex128) - self._amplitudes_array = np.zeros((self._n_slides, self._svd_rank), np.complex128) + self._amplitudes_array = np.zeros((self._n_slides, svd_rank_pre_allocate), + np.complex128) self._window_means_array = np.zeros((self._n_slides, self._n_data_vars)) self._t_starts_array = np.zeros(self._n_slides) self._time_means_array = np.zeros(self._n_slides) - # Round the corners of the window to shrink real components. - lv_kern = self.calculate_lv_kern(self._window_length, - corner_sharpness=corner_sharpness) - - # Build the projection basis if using a global svd. If not provided local u is used instead. - if self._global_svd: - u = self._build_proj_basis(data, global_svd=self._global_svd, - svd_rank=self._svd_rank) - self._pydmd_kwargs['proj_basis'] = u - self._pydmd_kwargs['use_proj'] = False - # Initialize the DMD class. if self._initialize_artificially: self._init_alpha = self._build_initizialization() @@ -225,20 +249,26 @@ def fit(self, data, time, window_length, step_size, verbose=False, svd_rank=self._svd_rank, num_trials=0, **self._pydmd_kwargs, ) + # Round the corners of the window to shrink real components. + lv_kern = self.calculate_lv_kern(self._window_length, + corner_sharpness=corner_sharpness) + + # Perform the sliding window DMD fitting. for k in range(self._n_slides): if verbose: if k // 50 == k / 50: print('{} of {}'.format(k, self._n_slides)) + # Get the window indices and data. sample_start = self._step_size * k sample_steps = np.arange(sample_start, sample_start + self._window_length) - data_window = data[:, sample_steps] time_window = time[:, sample_steps] data_array[k, :, :] = data_window self._time_array[k, :] = time_window self._time_means_array[k] = np.mean(time_window) + # All windows are fit with the time array reset to start at t=0. t_start = time_window[:, 0] time_window = time_window - t_start self._t_starts_array[k] = t_start @@ -259,11 +289,16 @@ def fit(self, data, time, window_length, step_size, verbose=False, # e_init = e # Assign the results from this window - self._modes_array[k, ::] = optdmd.modes - self._omega_array[k, :] = optdmd.eigs - self._amplitudes_array[k, :] = optdmd.amplitudes + self._modes_array[k, :, :optdmd.modes.shape[-1]] = optdmd.modes + self._omega_array[k, :optdmd.eigs.shape[0]] = optdmd.eigs + self._amplitudes_array[k, :optdmd.eigs.shape[0]] = optdmd.amplitudes self._window_means_array[k, :] = c.flatten() + # Reset optdmd between iterations + optdmd._svd_rank = self._svd_rank + optdmd._proj_basis = self._pydmd_kwargs['proj_basis'] + optdmd._init_alpha = self._init_alpha + def cluster_omega(self, omega_array, n_components, kmeans_kwargs=None): self._n_components = n_components omega_rshp = omega_array.reshape(self._n_slides * self._svd_rank) @@ -413,5 +448,4 @@ def scale_reconstruction(self, omega_classes, cluster_centroids, suppress_growth xr_sep = xr_sep / xn - return xr_sep - + return xr_sep \ No newline at end of file From ed7f20852ce06b8c4ad8b7510255fad206b2d73f Mon Sep 17 00:00:00 2001 From: klapo Date: Fri, 7 Apr 2023 17:23:11 +0200 Subject: [PATCH 05/31] Fix: Do not reset optdmd state with global_svd=True --- pydmd/multires_discovery.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pydmd/multires_discovery.py b/pydmd/multires_discovery.py index 3bdc13856..300386fd5 100644 --- a/pydmd/multires_discovery.py +++ b/pydmd/multires_discovery.py @@ -295,9 +295,10 @@ def fit(self, data, time, window_length, step_size, verbose=False, self._window_means_array[k, :] = c.flatten() # Reset optdmd between iterations - optdmd._svd_rank = self._svd_rank - optdmd._proj_basis = self._pydmd_kwargs['proj_basis'] - optdmd._init_alpha = self._init_alpha + if not self._global_svd: + optdmd._svd_rank = self._svd_rank + optdmd._proj_basis = self._pydmd_kwargs['proj_basis'] + optdmd._init_alpha = self._init_alpha def cluster_omega(self, omega_array, n_components, kmeans_kwargs=None): self._n_components = n_components From 1539e3a2dd4fe9981562756dc61df5612fc0165d Mon Sep 17 00:00:00 2001 From: klapo Date: Wed, 12 Apr 2023 17:53:17 +0200 Subject: [PATCH 06/31] Maint: Further building the multi res framework - Local vs global svd logic should be more complete - Moved to slice objects for indexing, allowing incomplete windows and reconstrucitng the entire data set, regardless of the number of integer windows. - Fixed bug in the number of slides to make across the data - Fixed bug in the initialization of the optdmd kwargs. - Reblacked the project - Added logic for initializing using the previous windows frequencies. --- pydmd/multires_discovery.py | 362 +++++++++++++++++++++++------------- 1 file changed, 230 insertions(+), 132 deletions(-) diff --git a/pydmd/multires_discovery.py b/pydmd/multires_discovery.py index 300386fd5..8d6a1248d 100644 --- a/pydmd/multires_discovery.py +++ b/pydmd/multires_discovery.py @@ -7,20 +7,19 @@ class multi_res_discovery: def __init__( - self, - window_length=None, - step_size=None, - n_components=None, - svd_rank=None, - global_svd=True, - initialize_artificially=False, - use_last_freq=False, - use_kmean_freqs=False, - init_alpha=None, - pydmd_kwargs=None, - threshhold_percent=1, - cluster_centroids=None, - reset_alpha_init=False, + self, + window_length=None, + step_size=None, + n_components=None, + svd_rank=None, + global_svd=True, + initialize_artificially=False, + use_last_freq=False, + use_kmean_freqs=False, + init_alpha=None, + pydmd_kwargs=None, + cluster_centroids=None, + reset_alpha_init=False, ): self._n_components = n_components self._step_size = step_size @@ -34,23 +33,39 @@ def __init__( self._cluster_centroids = cluster_centroids self._reset_alpha_init = reset_alpha_init + # Initialize variables that are defined in fitting. + self._n_data_vars = None + self._n_time_steps = None + self._window_length = None + self._n_slides = None + self._time_array = None + self._modes_array = None + self._omega_array = None + self._time_means_array = None + self._amplitudes_array = None + self._t_starts_array = None + if pydmd_kwargs is None: self._pydmd_kwargs = { - 'eig_sort': 'imag', + "eig_sort": "imag", + "proj_basis": None, + "use_proj": False, } else: self._pydmd_kwargs = pydmd_kwargs - self._pydmd_kwargs['eig_sort'] = pydmd_kwargs.get('eig_sort', 'imag') - - # Set the threshold for the number of frequencies to use in clustering, - # where the frequencies have been sorted in order of magnitude. - # Note: this value is not currently implemented (it was always 1 in examples). - self._threshhold_percent = threshhold_percent + self._pydmd_kwargs["eig_sort"] = pydmd_kwargs.get( + "eig_sort", "imag" + ) + self._pydmd_kwargs["proj_basis"] = pydmd_kwargs.get( + "proj_basis", None + ) + self._pydmd_kwargs["use_proj"] = pydmd_kwargs.get( + "proj_basis", False + ) def _compute_svd_rank(self, data, svd_rank=None): - def omega(x): - return 0.56 * x ** 3 - 0.95 * x ** 2 + 1.82 * x + 1.43 + return 0.56 * x**3 - 0.95 * x**2 + 1.82 * x + 1.43 U, s, _ = np.linalg.svd(data, full_matrices=False) @@ -59,7 +74,7 @@ def omega(x): tau = np.median(s) * omega(beta) svd_rank = np.sum(s > tau) elif 0 < svd_rank < 1: - cumulative_energy = np.cumsum(s ** 2 / (s ** 2).sum()) + cumulative_energy = np.cumsum(s**2 / (s**2).sum()) svd_rank = np.searchsorted(cumulative_energy, svd_rank) + 1 elif svd_rank >= 1 and isinstance(svd_rank, (int, np.integer)): svd_rank = min(svd_rank, self._n_data_vars) @@ -68,21 +83,14 @@ def omega(x): return svd_rank - def _build_proj_basis(self, data, global_svd=True, svd_rank=None): - - if svd_rank is None: - self._svd_rank = self._compute_svd_rank(data, svd_rank=svd_rank) - - # Use global SVD modes for each DMD rather than individual SVD on - # each window. - if global_svd: - # Recover the first r modes of the global svd - u, _, _ = scipy.linalg.svd(data, full_matrices=False) - return u[:, :self._svd_rank] + def _build_proj_basis(self, data, svd_rank=None): + self._svd_rank = self._compute_svd_rank(data, svd_rank=svd_rank) + # Recover the first r modes of the global svd + u, _, _ = scipy.linalg.svd(data, full_matrices=False) + return u[:, : self._svd_rank] def _build_initizialization(self): - """ Method for making initial guess of DMD eigenvalues. - """ + """Method for making initial guess of DMD eigenvalues.""" # User provided initial eigenvalues. init_alpha = None @@ -91,17 +99,19 @@ def _build_initizialization(self): # Initial eigenvalue guesses from kmeans clustering. elif self._initialize_artificially and self._init_alpha is None: if self._use_kmean_freqs: - init_alpha = np.repeat(np.sqrt(self._cluster_centroids) * 1j, - int(self._svd_rank / self._n_components)) - init_alpha = init_alpha * np.tile([1, -1], - int(self._svd_rank / self._n_components)) + init_alpha = np.repeat( + np.sqrt(self._cluster_centroids) * 1j, + int(self._svd_rank / self._n_components), + ) + init_alpha = init_alpha * np.tile( + [1, -1], int(self._svd_rank / self._n_components) + ) return init_alpha - def build_windows(self, data, window_length, step_size, integer_windows=False): - """Calculate how many times to slide the window across the data. - - """ + @staticmethod + def build_windows(data, window_length, step_size, integer_windows=False): + """Calculate how many times to slide the window across the data.""" if integer_windows: n_split = np.floor(data.shape[1] / window_length).astype(int) @@ -113,24 +123,28 @@ def build_windows(self, data, window_length, step_size, integer_windows=False): # Number of sliding-window iterations n_slides = np.floor((n_steps - window_length) / step_size).astype(int) - return n_slides - - def calculate_lv_kern(self, window_length, corner_sharpness=None): - """Calculate the kerning window for suppressing real eigenvalues. + return n_slides + 1 - """ + @staticmethod + def calculate_lv_kern(window_length, corner_sharpness=None): + """Calculate the kerning window for suppressing real eigenvalues.""" # Higher = sharper corners if corner_sharpness is None: corner_sharpness = 16 lv_kern = ( - np.tanh( - corner_sharpness * np.arange(1, window_length + 1) / window_length - ) - - np.tanh( - corner_sharpness * ( - np.arange(1, window_length + 1) - window_length) / window_length) - 1 + np.tanh( + corner_sharpness + * np.arange(1, window_length + 1) + / window_length + ) + - np.tanh( + corner_sharpness + * (np.arange(1, window_length + 1) - window_length) + / window_length + ) + - 1 ) return lv_kern @@ -166,12 +180,6 @@ def omega_array(self): raise ValueError("You need to call fit before") return self._omega_array - @property - def omega_array(self): - if not hasattr(self, "_omega_array"): - raise ValueError("You need to call fit before") - return self._omega_array - @property def time_array(self): if not hasattr(self, "_time_array"): @@ -196,43 +204,73 @@ def time_means_array(self): raise ValueError("You need to call fit before") return self._time_means_array - def fit(self, data, time, window_length, step_size, verbose=False, - corner_sharpness=None): - + def fit( + self, + data, + time, + window_length, + step_size, + verbose=False, + corner_sharpness=None, + ): # Prepare window and data properties. self._window_length = window_length self._step_size = step_size self._n_time_steps, self._n_data_vars = self._data_shape(data) - self._n_slides = self.build_windows(data, self._window_length, self._step_size) + self._n_slides = self.build_windows( + data, self._window_length, self._step_size + ) + + # If the window size and step size do not span the data in an integer + # number of slides, we add one last window that has a smaller step spacing + # relative to the other window spacings. + n_slide_last_window = self._n_time_steps - ( + self._step_size * (self._n_slides - 1) + self._window_length + ) + if n_slide_last_window > 0: + self._n_slides += 1 + self._n_slide_last_window = n_slide_last_window # Build the projection basis if using a global svd. if self._global_svd: - u = self._build_proj_basis(data, global_svd=self._global_svd, - svd_rank=self._svd_rank) - self._pydmd_kwargs['proj_basis'] = self._pydmd_kwargs.get('proj_basis', u) - self._pydmd_kwargs['use_proj'] = self._pydmd_kwargs.get('use_proj', False) + u = self._build_proj_basis(data, svd_rank=self._svd_rank) + # self._pydmd_kwargs['proj_basis'] = self._pydmd_kwargs.get('proj_basis', u) + self._pydmd_kwargs["proj_basis"] = u + self._pydmd_kwargs["use_proj"] = self._pydmd_kwargs.get( + "use_proj", False + ) - self._svd_rank = self._compute_svd_rank(data, svd_rank=self._svd_rank) + self._svd_rank = self._compute_svd_rank( + data, svd_rank=self._svd_rank + ) svd_rank_pre_allocate = self._svd_rank + elif not self._global_svd and self._svd_rank > 0: + svd_rank_pre_allocate = self._compute_svd_rank( + data, svd_rank=self._svd_rank + ) # If not using a global svd, local u from each window is used instead. else: - # The various arrays the optimal svd_rank may change when using the locally - # optimal svd_rank. To deal with this situation, we give the maximally + # The optimal svd_rank may change when using the locally optimal svd_rank. + # To deal with this situation in the pre-allocation we give the maximally # allowed svd_rank for pre-allocation. svd_rank_pre_allocate = self._n_data_vars # Pre-allocate all elements for the sliding window DMD. # Each element calculated for a window is returned through these array. - data_array = np.zeros((self._n_slides, self._n_data_vars, self._window_length)) + # data_array = np.zeros( + # (self._n_slides, self._n_data_vars, self._window_length)) # * np.nan self._time_array = np.zeros((self._n_slides, self._window_length)) self._modes_array = np.zeros( (self._n_slides, self._n_data_vars, svd_rank_pre_allocate), - np.complex128) - self._omega_array = np.zeros((self._n_slides, svd_rank_pre_allocate), - np.complex128) - self._amplitudes_array = np.zeros((self._n_slides, svd_rank_pre_allocate), - np.complex128) + np.complex128, + ) + self._omega_array = np.zeros( + (self._n_slides, svd_rank_pre_allocate), np.complex128 + ) + self._amplitudes_array = np.zeros( + (self._n_slides, svd_rank_pre_allocate), np.complex128 + ) self._window_means_array = np.zeros((self._n_slides, self._n_data_vars)) self._t_starts_array = np.zeros(self._n_slides) self._time_means_array = np.zeros(self._n_slides) @@ -241,30 +279,41 @@ def fit(self, data, time, window_length, step_size, verbose=False, if self._initialize_artificially: self._init_alpha = self._build_initizialization() optdmd = BOPDMD( - svd_rank=self._svd_rank, num_trials=0, init_alpha=self._init_alpha, - **self._pydmd_kwargs + svd_rank=self._svd_rank, + num_trials=0, + init_alpha=self._init_alpha, + **self._pydmd_kwargs, ) else: optdmd = BOPDMD( - svd_rank=self._svd_rank, num_trials=0, **self._pydmd_kwargs, + svd_rank=self._svd_rank, + num_trials=0, + **self._pydmd_kwargs, ) # Round the corners of the window to shrink real components. - lv_kern = self.calculate_lv_kern(self._window_length, - corner_sharpness=corner_sharpness) + lv_kern = self.calculate_lv_kern( + self._window_length, corner_sharpness=corner_sharpness + ) # Perform the sliding window DMD fitting. for k in range(self._n_slides): if verbose: if k // 50 == k / 50: - print('{} of {}'.format(k, self._n_slides)) + print("{} of {}".format(k, self._n_slides)) # Get the window indices and data. sample_start = self._step_size * k - sample_steps = np.arange(sample_start, sample_start + self._window_length) - data_window = data[:, sample_steps] - time_window = time[:, sample_steps] - data_array[k, :, :] = data_window + if k == self._n_slides - 1 and self._n_slide_last_window > 0: + sample_slice = slice(-self._window_length, None) + else: + sample_slice = slice( + sample_start, sample_start + self._window_length + ) + # sample_steps = np.arange(sample_start, sample_start + self._window_length) + data_window = data[:, sample_slice] + time_window = time[:, sample_slice] + # data_array[k, :, :] = data_window self._time_array[k, :] = time_window self._time_means_array[k] = np.mean(time_window) @@ -274,8 +323,6 @@ def fit(self, data, time, window_length, step_size, verbose=False, self._t_starts_array[k] = t_start # Subtract off mean before rounding corners - # https://stackoverflow.com/questions/32030343/ - # subtracting-the-mean-of-each-row-in-numpy-with-broadcasting c = np.mean(data_window, 1, keepdims=True) data_window = data_window - c @@ -285,25 +332,39 @@ def fit(self, data, time, window_length, step_size, verbose=False, # Fit with the desired DMD class optdmd.fit(data_window, time_window) - # if use_last_freq == 1: - # e_init = e - # Assign the results from this window - self._modes_array[k, :, :optdmd.modes.shape[-1]] = optdmd.modes - self._omega_array[k, :optdmd.eigs.shape[0]] = optdmd.eigs - self._amplitudes_array[k, :optdmd.eigs.shape[0]] = optdmd.amplitudes + self._modes_array[k, :, : optdmd.modes.shape[-1]] = optdmd.modes + self._omega_array[k, : optdmd.eigs.shape[0]] = optdmd.eigs + self._amplitudes_array[ + k, : optdmd.eigs.shape[0] + ] = optdmd.amplitudes self._window_means_array[k, :] = c.flatten() # Reset optdmd between iterations if not self._global_svd: optdmd._svd_rank = self._svd_rank - optdmd._proj_basis = self._pydmd_kwargs['proj_basis'] - optdmd._init_alpha = self._init_alpha + optdmd._proj_basis = self._pydmd_kwargs["proj_basis"] + + # The default behavior is to reset the optdmd object to use the default + # initial value (None) or the user provided values. + if not self._use_last_freq: + optdmd._init_alpha = self._init_alpha + # Otherwise use the eigenvalues from this window to seed the next window. + elif self._use_last_freq: + optdmd._init_alpha = optdmd.eigs def cluster_omega(self, omega_array, n_components, kmeans_kwargs=None): self._n_components = n_components - omega_rshp = omega_array.reshape(self._n_slides * self._svd_rank) - omega_squared = (np.conj(omega_rshp) * omega_rshp).astype('float') + + # Reshape the omega array into a 1d array + n_slides = self._n_slides + if self._svd_rank == 0: + svd_rank = self._n_data_vars + else: + svd_rank = self._svd_rank + omega_rshp = omega_array.reshape(n_slides * svd_rank) + + omega_squared = (np.conj(omega_rshp) * omega_rshp).astype("float") if kmeans_kwargs is None: random_state = 0 @@ -311,45 +372,62 @@ def cluster_omega(self, omega_array, n_components, kmeans_kwargs=None): "n_init": "auto", "random_state": random_state, } - kmeans = KMeans(n_clusters=self._n_components, **kmeans_kwargs) - y_pred = kmeans.fit_predict(np.atleast_2d(omega_squared).T) - omega_classes = y_pred.reshape(self._n_slides, self._svd_rank) + kmeans = KMeans(n_clusters=n_components, **kmeans_kwargs) + omega_classes = kmeans.fit_predict(np.atleast_2d(omega_squared).T) + omega_classes = omega_classes.reshape(n_slides, svd_rank) cluster_centroids = kmeans.cluster_centers_.flatten() return omega_classes, omega_squared, cluster_centroids - def plot_omega_squared_histogram(self, omega_squared, cluster_centroids): + @staticmethod + def plot_omega_squared_histogram(omega_squared, cluster_centroids): + colors = plt.rcParams["axes.prop_cycle"].by_key()["color"] fig, ax = plt.subplots(1, 1) ax.hist(omega_squared, bins=64) - ax.set_xlabel('$|\omega|^{2}$') - ax.set_ylabel('Count') - ax.set_title('$|\omega|^2$ Spectrum & k-Means Centroids') - [ax.axvline(c, color='r') for c in cluster_centroids] + ax.set_xlabel("$|\omega|^{2}$") + ax.set_ylabel("Count") + ax.set_title("$|\omega|^2$ Spectrum & k-Means Centroids") + [ + ax.axvline(c, color=colors[nc % len(colors)]) + for nc, c in enumerate(cluster_centroids) + ] return fig, ax def plot_omega_squared_time_series(self, omega_squared, omega_classes): fig, ax = plt.subplots(1, 1) colors = plt.rcParams["axes.prop_cycle"].by_key()["color"] + + # Reshape the omega array into a 1d array + n_slides = self._n_slides + if self._svd_rank == 0: + svd_rank = self._n_data_vars + else: + svd_rank = self._svd_rank + for ncomponent, component in enumerate(range(self._n_components)): ax.plot( self._time_means_array, np.where( omega_classes == component, - omega_squared.reshape((self._n_slides, self._svd_rank)), np.nan + omega_squared.reshape((n_slides, svd_rank)), + np.nan, ), - color=colors[ncomponent % len(colors)] + color=colors[ncomponent % len(colors)], ) - ax.set_ylabel('$|\omega|^{2}$') - ax.set_xlabel('Time') - ax.set_title('$|\omega|^2$ Spectrum (Moving Window)') + ax.set_ylabel("$|\omega|^{2}$") + ax.set_xlabel("Time") + ax.set_title("$|\omega|^2$ Spectrum (Moving Window)") return fig, ax - def global_reconstruction(self, ): + def global_reconstruction( + self, + ): # Container for the reconstructed time series glbl_reconstruction = np.zeros( - (self._n_data_vars, self._n_time_steps)).astype('complex128') + (self._n_data_vars, self._n_time_steps) + ).astype("complex128") # Count the number of windows contributing to each step xn = np.zeros(self._n_time_steps) @@ -367,10 +445,13 @@ def global_reconstruction(self, ): t = t - t_start # Perform the global reconstruction. - recon_window = np.linalg.multi_dot([w, np.diag(b), np.exp(omega * t)]) + c + recon_window = ( + np.linalg.multi_dot([w, np.diag(b), np.exp(omega * t)]) + c + ) - window_indices = slice(k * self._step_size, - k * self._step_size + self._window_length) + window_indices = slice( + k * self._step_size, k * self._step_size + self._window_length + ) glbl_reconstruction[:, window_indices] += recon_window xn[window_indices] += 1 @@ -379,8 +460,13 @@ def global_reconstruction(self, ): return glbl_reconstruction - def scale_reconstruction(self, omega_classes, cluster_centroids, suppress_growth=True, - include_means=True): + def scale_reconstruction( + self, + omega_classes, + cluster_centroids, + suppress_growth=True, + include_means=True, + ): """Reconstruct the sliding mrDMD into the constituent components. The reconstructed data are convolved with a guassian filter since @@ -395,7 +481,9 @@ def scale_reconstruction(self, omega_classes, cluster_centroids, suppress_growth """ # Each individual reconstructed window - xr_sep = np.zeros((self._n_components, self._n_data_vars, self._n_time_steps)) + xr_sep = np.zeros( + (self._n_components, self._n_data_vars, self._n_time_steps) + ) # Track the total contribution from all windows to each time step xn = np.zeros(self._n_time_steps) @@ -405,9 +493,11 @@ def scale_reconstruction(self, omega_classes, cluster_centroids, suppress_growth recon_filter_sd = self._window_length / 8 recon_filter = np.exp( -( - np.arange(self._window_length) - - (self._window_length + 1) / 2 - ) ** 2 / recon_filter_sd ** 2) + (np.arange(self._window_length) - (self._window_length + 1) / 2) + ** 2 + ) + / recon_filter_sd**2 + ) for k in range(self._n_slides): w = self._modes_array[k] @@ -426,13 +516,14 @@ def scale_reconstruction(self, omega_classes, cluster_centroids, suppress_growth t = t - t_start xr_sep_window = np.zeros( - (self._n_components, self._n_data_vars, self._window_length)) + (self._n_components, self._n_data_vars, self._window_length) + ) for j in np.unique(omega_classes): xr_sep_window[j, :, :] = np.linalg.multi_dot( [ w[:, classification == j], np.diag(b[classification == j]), - np.exp(omega[classification == j] * t) + np.exp(omega[classification == j] * t), ] ) @@ -440,13 +531,20 @@ def scale_reconstruction(self, omega_classes, cluster_centroids, suppress_growth if include_means and (j == np.argmin(cluster_centroids)): xr_sep_window[j, :, :] += c xr_sep_window[j, :, :] = xr_sep_window[j, :, :] * recon_filter - window_indices = slice(k * self._step_size, - k * self._step_size + self._window_length) - xr_sep[j, :, window_indices] = xr_sep[j, :, - window_indices] + xr_sep_window[j, :, :] + + if k == self._n_slides - 1 and self._n_slide_last_window > 0: + window_indices = slice(-self._window_length, None) + else: + window_indices = slice( + k * self._step_size, + k * self._step_size + self._window_length, + ) + xr_sep[j, :, window_indices] = ( + xr_sep[j, :, window_indices] + xr_sep_window[j, :, :] + ) xn[window_indices] += recon_filter xr_sep = xr_sep / xn - return xr_sep \ No newline at end of file + return xr_sep From eee0d2221aed5a904b17b0ce9569d048cc57d5fb Mon Sep 17 00:00:00 2001 From: klapo Date: Thu, 13 Apr 2023 15:39:16 +0200 Subject: [PATCH 07/31] Feat: add setter to initial alpha for multi res disc --- pydmd/bopdmd.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pydmd/bopdmd.py b/pydmd/bopdmd.py index 2de8115f5..a7f3b81e5 100644 --- a/pydmd/bopdmd.py +++ b/pydmd/bopdmd.py @@ -755,6 +755,7 @@ def __init__( trial_size=0.2, eig_sort="auto", varpro_opts_dict=None, + max_rank=None, ): self._svd_rank = svd_rank self._compute_A = compute_A @@ -764,6 +765,7 @@ def __init__( self._num_trials = num_trials self._trial_size = trial_size self._eig_sort = eig_sort + self._max_rank = max_rank if varpro_opts_dict is None: self._varpro_opts_dict = {} @@ -815,6 +817,10 @@ def init_alpha(self): raise RuntimeError(msg) return self._init_alpha + @init_alpha.setter + def init_alpha(self, value): + self._init_alpha = value + @property def proj_basis(self): """ @@ -898,7 +904,6 @@ def amplitudes_std(self): """ return self.operator.amplitudes_std - @property def eigenvalues_std(self): """ @@ -909,7 +914,6 @@ def eigenvalues_std(self): """ return self.operator.eigenvalues_std - def print_varpro_opts(self): """ Prints a formatted information string that displays all chosen From 5d4d559d05eaab1f9683fd1f5dfd1dad294d4f36 Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 18 Apr 2023 14:15:26 +0200 Subject: [PATCH 08/31] Feat: added svd_rank setter to bopdmd --- pydmd/bopdmd.py | 6 +- pydmd/{multires_discovery.py => costsdmd.py} | 123 +++++++++++-------- 2 files changed, 75 insertions(+), 54 deletions(-) rename pydmd/{multires_discovery.py => costsdmd.py} (84%) diff --git a/pydmd/bopdmd.py b/pydmd/bopdmd.py index 10e2e6912..21d81132e 100644 --- a/pydmd/bopdmd.py +++ b/pydmd/bopdmd.py @@ -782,7 +782,6 @@ def __init__( trial_size=0.2, eig_sort="auto", varpro_opts_dict=None, - max_rank=None, ): self._svd_rank = svd_rank self._compute_A = compute_A @@ -792,7 +791,6 @@ def __init__( self._num_trials = num_trials self._trial_size = trial_size self._eig_sort = eig_sort - self._max_rank = max_rank if varpro_opts_dict is None: self._varpro_opts_dict = {} @@ -814,6 +812,10 @@ def svd_rank(self): """ return self._svd_rank + @svd_rank.setter + def svd_rank(self, value): + self._svd_rank = value + @property def compute_A(self): """ diff --git a/pydmd/multires_discovery.py b/pydmd/costsdmd.py similarity index 84% rename from pydmd/multires_discovery.py rename to pydmd/costsdmd.py index 8d6a1248d..3515c61b0 100644 --- a/pydmd/multires_discovery.py +++ b/pydmd/costsdmd.py @@ -5,7 +5,15 @@ import matplotlib.pyplot as plt -class multi_res_discovery: +class CospamDMD: + """Coherent Spatiotemporal Scale Separation from Dynamic Mode Decomposition + + :param window_length: Length of the analysis window in number of time steps. + :type window_length: int + :param step_size: Number of time steps to slide each CSM-DMD window. + :type compute_A: bool + """ + def __init__( self, window_length=None, @@ -20,6 +28,8 @@ def __init__( pydmd_kwargs=None, cluster_centroids=None, reset_alpha_init=False, + force_even_eigs=True, + max_rank=None, ): self._n_components = n_components self._step_size = step_size @@ -32,6 +42,8 @@ def __init__( self._init_alpha = init_alpha self._cluster_centroids = cluster_centroids self._reset_alpha_init = reset_alpha_init + self._force_even_eigs = force_even_eigs + self._max_rank = max_rank # Initialize variables that are defined in fitting. self._n_data_vars = None @@ -59,9 +71,7 @@ def __init__( self._pydmd_kwargs["proj_basis"] = pydmd_kwargs.get( "proj_basis", None ) - self._pydmd_kwargs["use_proj"] = pydmd_kwargs.get( - "proj_basis", False - ) + self._pydmd_kwargs["use_proj"] = pydmd_kwargs.get("use_proj", False) def _compute_svd_rank(self, data, svd_rank=None): def omega(x): @@ -171,39 +181,45 @@ def modes_array(self): @property def amplitudes_array(self): if not hasattr(self, "_amplitudes_array"): - raise ValueError("You need to call fit before") + raise ValueError("You need to call fit first.") return self._amplitudes_array @property def omega_array(self): if not hasattr(self, "_omega_array"): - raise ValueError("You need to call fit before") + raise ValueError("You need to call fit first.") return self._omega_array @property def time_array(self): if not hasattr(self, "_time_array"): - raise ValueError("You need to call fit before") + raise ValueError("You need to call fit first.") return self._time_array @property def window_means_array(self): if not hasattr(self, "_window_means_array"): - raise ValueError("You need to call fit before") + raise ValueError("You need to call fit first.") return self._window_means_array @property def t_starts_array(self): if not hasattr(self, "_t_starts_array"): - raise ValueError("You need to call fit before") + raise ValueError("You need to call fit first.") return self._t_starts_array @property def time_means_array(self): if not hasattr(self, "_time_means_array"): - raise ValueError("You need to call fit before") + raise ValueError("You need to call fit first.") return self._time_means_array + @property + def n_components(self): + if not hasattr(self, "_n_components"): + raise ValueError("You need to call `cluster_omega()` first.") + return self._n_components + def fit( self, data, @@ -216,7 +232,6 @@ def fit( # Prepare window and data properties. self._window_length = window_length self._step_size = step_size - self._n_time_steps, self._n_data_vars = self._data_shape(data) self._n_slides = self.build_windows( data, self._window_length, self._step_size @@ -235,12 +250,10 @@ def fit( # Build the projection basis if using a global svd. if self._global_svd: u = self._build_proj_basis(data, svd_rank=self._svd_rank) - # self._pydmd_kwargs['proj_basis'] = self._pydmd_kwargs.get('proj_basis', u) self._pydmd_kwargs["proj_basis"] = u self._pydmd_kwargs["use_proj"] = self._pydmd_kwargs.get( "use_proj", False ) - self._svd_rank = self._compute_svd_rank( data, svd_rank=self._svd_rank ) @@ -249,17 +262,16 @@ def fit( svd_rank_pre_allocate = self._compute_svd_rank( data, svd_rank=self._svd_rank ) - # If not using a global svd, local u from each window is used instead. + # If not using a global svd or a specified svd_rank, local u from each window is + # used instead. The optimal svd_rank may change when using the locally optimal + # svd_rank. To deal with this situation in the pre-allocation we give the + # maximally allowed svd_rank for pre-allocation. + elif self._max_rank is not None: + svd_rank_pre_allocate = self._max_rank else: - # The optimal svd_rank may change when using the locally optimal svd_rank. - # To deal with this situation in the pre-allocation we give the maximally - # allowed svd_rank for pre-allocation. svd_rank_pre_allocate = self._n_data_vars # Pre-allocate all elements for the sliding window DMD. - # Each element calculated for a window is returned through these array. - # data_array = np.zeros( - # (self._n_slides, self._n_data_vars, self._window_length)) # * np.nan self._time_array = np.zeros((self._n_slides, self._window_length)) self._modes_array = np.zeros( (self._n_slides, self._n_data_vars, svd_rank_pre_allocate), @@ -275,21 +287,15 @@ def fit( self._t_starts_array = np.zeros(self._n_slides) self._time_means_array = np.zeros(self._n_slides) - # Initialize the DMD class. - if self._initialize_artificially: - self._init_alpha = self._build_initizialization() - optdmd = BOPDMD( - svd_rank=self._svd_rank, - num_trials=0, - init_alpha=self._init_alpha, - **self._pydmd_kwargs, - ) - else: - optdmd = BOPDMD( - svd_rank=self._svd_rank, - num_trials=0, - **self._pydmd_kwargs, - ) + # Get initial values for the eigenvalues. + self._init_alpha = self._build_initizialization() + + # Initialize the BOPDMD object. + optdmd = BOPDMD( + svd_rank=self._svd_rank, + init_alpha=self._init_alpha, + **self._pydmd_kwargs, + ) # Round the corners of the window to shrink real components. lv_kern = self.calculate_lv_kern( @@ -310,48 +316,61 @@ def fit( sample_slice = slice( sample_start, sample_start + self._window_length ) - # sample_steps = np.arange(sample_start, sample_start + self._window_length) data_window = data[:, sample_slice] - time_window = time[:, sample_slice] - # data_array[k, :, :] = data_window - self._time_array[k, :] = time_window - self._time_means_array[k] = np.mean(time_window) + original_time_window = time[:, sample_slice] + time_window_mean = np.mean(original_time_window) # All windows are fit with the time array reset to start at t=0. - t_start = time_window[:, 0] - time_window = time_window - t_start - self._t_starts_array[k] = t_start + t_start = original_time_window[:, 0] + time_window = original_time_window - t_start - # Subtract off mean before rounding corners + # Subtract off the time mean before rounding corners. c = np.mean(data_window, 1, keepdims=True) data_window = data_window - c - # Round corners of the window + # Round the corners of the window. data_window = data_window * lv_kern - # Fit with the desired DMD class + # Reset optdmd between iterations + if not self._global_svd: + # Get the svd rank for this window. Primarily used when svd_rank is not + # fixed, i.e. svd_rank = 0. + _svd_rank = self._compute_svd_rank( + data_window, svd_rank=self._svd_rank + ) + # Force svd rank to be even to allow for conjugate pairs. + if _svd_rank % 2: + _svd_rank += 1 + # Force svd rank to not exceed a user specified amount. + if self._max_rank is not None: + optdmd.svd_rank = min(_svd_rank, self._max_rank) + else: + optdmd.svd_rank = _svd_rank + optdmd._proj_basis = self._pydmd_kwargs["proj_basis"] + + # Fit the window using the optDMD. optdmd.fit(data_window, time_window) - # Assign the results from this window + # Assign the results from this window. self._modes_array[k, :, : optdmd.modes.shape[-1]] = optdmd.modes self._omega_array[k, : optdmd.eigs.shape[0]] = optdmd.eigs self._amplitudes_array[ k, : optdmd.eigs.shape[0] ] = optdmd.amplitudes self._window_means_array[k, :] = c.flatten() + self._time_array[k, :] = original_time_window + self._time_means_array[k] = time_window_mean + self._t_starts_array[k] = t_start # Reset optdmd between iterations if not self._global_svd: - optdmd._svd_rank = self._svd_rank - optdmd._proj_basis = self._pydmd_kwargs["proj_basis"] - # The default behavior is to reset the optdmd object to use the default # initial value (None) or the user provided values. if not self._use_last_freq: - optdmd._init_alpha = self._init_alpha + optdmd.init_alpha = self._init_alpha # Otherwise use the eigenvalues from this window to seed the next window. elif self._use_last_freq: - optdmd._init_alpha = optdmd.eigs + optdmd.init_alpha = optdmd.eigs def cluster_omega(self, omega_array, n_components, kmeans_kwargs=None): self._n_components = n_components From 423bfdf9c542d6e6b3a4c7c65f3c315c379710a5 Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 18 Apr 2023 14:16:06 +0200 Subject: [PATCH 09/31] Feat: Refactor class name, added comments, bug fixes - Refactored to have a hopefully more catchy and descriptive name. No promises that this is the last rename. - re-blacked - Added checks for how to initialize the DMD fit. - Fixed global reconstruction --- pydmd/costsdmd.py | 88 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 19 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index 3515c61b0..917f3ea16 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -5,20 +5,52 @@ import matplotlib.pyplot as plt -class CospamDMD: - """Coherent Spatiotemporal Scale Separation from Dynamic Mode Decomposition +class CostsDMD: + """Coherent Spatio-Temporal Scale Separation with DMD :param window_length: Length of the analysis window in number of time steps. :type window_length: int :param step_size: Number of time steps to slide each CSM-DMD window. - :type compute_A: bool + :type step_size: int + :param n_components: Number of independent frequency bands for this window length. + :type n_components: int + :param svd_rank: The rank of the BOPDMD fit. + :type svd_rank: int + :param global_svd: Flag indicating whether to find the proj_basis and initial + values using the entire dataset instead of individually for each window. + Generally using the global_svd speeds up the fitting process by not finding a + new initial value for each window. Default is True. + :type global_svd: bool + :param initialize_artificially: Flag indicating whether to initialize the DMD using + imaginary eigenvalues (i.e., the imaginary component of the cluster results from a + previous iteration) through the `cluster_centroids` keyword. Default is False. + :type initialize_artificially: bool + :param pydmd_kwargs: Keyword arguments to pass onto the BOPDMD object. + :type global_svd: dict + :param cluster_centroids: Cluster centroids from a previous fitting iteration to + use for the initial guess of the eigenvalues. Should only be the imaginary + component. + :type cluster_centroids: numpy array + :param reset_alpha_init: Flag indicating whether the initial guess for the BOPDMD + eigenvalues should be reset for each window. Resetting the initial value increases + the computation time due to finding a new initial guess. Default is False. + :type reset_alpha_init: bool + :param force_even_eigs: Flag indicating whether an even svd_rank should be forced + when not specifying the svd_rank directly (i.e., svd_rank=0). Default is True. + :type global_svd: bool + :param max_rank: Maximum svd_rank allowed when the svd_rank is found through rank + truncation (i.e., svd_rank=0). + :type max_rank: int + :param use_kmean_freqs: + :type use_kmean_freqs: bool + :param init_alpha: + :type init_alpha: numpy array """ def __init__( self, window_length=None, step_size=None, - n_components=None, svd_rank=None, global_svd=True, initialize_artificially=False, @@ -30,6 +62,7 @@ def __init__( reset_alpha_init=False, force_even_eigs=True, max_rank=None, + n_components=None, ): self._n_components = n_components self._step_size = step_size @@ -102,20 +135,33 @@ def _build_proj_basis(self, data, svd_rank=None): def _build_initizialization(self): """Method for making initial guess of DMD eigenvalues.""" - # User provided initial eigenvalues. + # If not initial values are provided return None by default. init_alpha = None + # User provided initial eigenvalues. if self._initialize_artificially and self._init_alpha is not None: init_alpha = self._init_alpha # Initial eigenvalue guesses from kmeans clustering. - elif self._initialize_artificially and self._init_alpha is None: - if self._use_kmean_freqs: - init_alpha = np.repeat( - np.sqrt(self._cluster_centroids) * 1j, - int(self._svd_rank / self._n_components), - ) - init_alpha = init_alpha * np.tile( - [1, -1], int(self._svd_rank / self._n_components) - ) + elif ( + self._initialize_artificially + and self._init_alpha is None + and self._cluster_centroids is not None + ): + init_alpha = np.repeat( + np.sqrt(self._cluster_centroids) * 1j, + int(self._svd_rank / self._n_components), + ) + init_alpha = init_alpha * np.tile( + [1, -1], int(self._svd_rank / self._n_components) + ) + # The user accidentally provided both methods of initializing the eigenvalues. + elif ( + self._initialize_artificially + and self._init_alpha is not None + and self._cluster_centroids is not None + ): + raise ValueError( + "Only one of `init_alpha` and `cluster_centroids` can be provided" + ) return init_alpha @@ -333,8 +379,8 @@ def fit( # Reset optdmd between iterations if not self._global_svd: - # Get the svd rank for this window. Primarily used when svd_rank is not - # fixed, i.e. svd_rank = 0. + # Get the svd rank for this window. Uses rank truncation when svd_rank is + # not fixed, i.e. svd_rank = 0, otherwise uses the specified rank. _svd_rank = self._compute_svd_rank( data_window, svd_rank=self._svd_rank ) @@ -468,9 +514,13 @@ def global_reconstruction( np.linalg.multi_dot([w, np.diag(b), np.exp(omega * t)]) + c ) - window_indices = slice( - k * self._step_size, k * self._step_size + self._window_length - ) + if k == self._n_slides - 1 and self._n_slide_last_window > 0: + window_indices = slice(-self._window_length, None) + else: + window_indices = slice( + k * self._step_size, + k * self._step_size + self._window_length, + ) glbl_reconstruction[:, window_indices] += recon_window xn[window_indices] += 1 From a9de7e8f514ec040201e1cba63bcdcb7687a8101 Mon Sep 17 00:00:00 2001 From: klapo Date: Fri, 21 Apr 2023 10:59:22 +0200 Subject: [PATCH 10/31] Update: Clustering and reconstruciton as native to object. - Added `cluster_centroids`, `n_components`, `omega_classes` as properties. - Changed detection of array shapes to use object derived values. - Added flag to square the frequency or not. - Sort the frequency band clusters according to centroid magnitude. - Reconstructions, scale-separation, and plots according to object values instead of externally provided arguments. - Mode thresholding for discarding insignificant frequency bands. - Scale separation for iterative decomposition. --- pydmd/costsdmd.py | 211 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 185 insertions(+), 26 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index 917f3ea16..3f4ddc0c4 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -89,6 +89,8 @@ def __init__( self._time_means_array = None self._amplitudes_array = None self._t_starts_array = None + self._cluster_centroids = None + self._omega_classes = None if pydmd_kwargs is None: self._pydmd_kwargs = { @@ -266,6 +268,18 @@ def n_components(self): raise ValueError("You need to call `cluster_omega()` first.") return self._n_components + @property + def cluster_centroids(self): + if not hasattr(self, "_cluster_centroids"): + raise ValueError("You need to call `cluster_omega()` first.") + return self._cluster_centroids + + @property + def omega_classes(self): + if not hasattr(self, "_omega_classes"): + raise ValueError("You need to call `cluster_omega()` first.") + return self._omega_classes + def fit( self, data, @@ -418,18 +432,20 @@ def fit( elif self._use_last_freq: optdmd.init_alpha = optdmd.eigs - def cluster_omega(self, omega_array, n_components, kmeans_kwargs=None): - self._n_components = n_components + def cluster_omega( + self, n_components, kmeans_kwargs=None, square_frequencies=True + ): # Reshape the omega array into a 1d array - n_slides = self._n_slides - if self._svd_rank == 0: - svd_rank = self._n_data_vars - else: - svd_rank = self._svd_rank + omega_array = self.omega_array + n_slides = omega_array.shape[0] + svd_rank = omega_array.shape[1] omega_rshp = omega_array.reshape(n_slides * svd_rank) - omega_squared = (np.conj(omega_rshp) * omega_rshp).astype("float") + if square_frequencies: + omega_squared = (np.conj(omega_rshp) * omega_rshp).astype("float") + else: + omega_squared = np.abs(omega_rshp.imag.astype("float")) if kmeans_kwargs is None: random_state = 0 @@ -442,16 +458,44 @@ def cluster_omega(self, omega_array, n_components, kmeans_kwargs=None): omega_classes = omega_classes.reshape(n_slides, svd_rank) cluster_centroids = kmeans.cluster_centers_.flatten() - return omega_classes, omega_squared, cluster_centroids + # Sort the clusters by the centroid magnitude. + idx = np.argsort(cluster_centroids) + lut = np.zeros_like(idx) + lut[idx] = np.arange(n_components) + omega_classes = lut[omega_classes] + cluster_centroids = cluster_centroids[idx] + + # Assign the results to the object. + self._cluster_centroids = cluster_centroids + self._omega_classes = omega_classes + self._square_frequencies = square_frequencies + self._n_components = n_components + + return self + + def plot_omega_squared_histogram(self): + + # Reshape the omega array into a 1d array + omega_array = self.omega_array + n_slides = omega_array.shape[0] + svd_rank = omega_array.shape[1] + omega_rshp = omega_array.reshape(n_slides * svd_rank) + + if self._square_frequencies: + omega_squared = (np.conj(omega_rshp) * omega_rshp).astype("float") + label = "$|\omega|^{2}$" + else: + omega_squared = np.abs(omega_rshp.imag.astype("float")) + label = "$|\omega|$" + + cluster_centroids = self._cluster_centroids - @staticmethod - def plot_omega_squared_histogram(omega_squared, cluster_centroids): colors = plt.rcParams["axes.prop_cycle"].by_key()["color"] fig, ax = plt.subplots(1, 1) ax.hist(omega_squared, bins=64) - ax.set_xlabel("$|\omega|^{2}$") + ax.set_xlabel(label) ax.set_ylabel("Count") - ax.set_title("$|\omega|^2$ Spectrum & k-Means Centroids") + ax.set_title("$\omega$ Spectrum & k-Means Centroids") [ ax.axvline(c, color=colors[nc % len(colors)]) for nc, c in enumerate(cluster_centroids) @@ -459,30 +503,36 @@ def plot_omega_squared_histogram(omega_squared, cluster_centroids): return fig, ax - def plot_omega_squared_time_series(self, omega_squared, omega_classes): + def plot_omega_squared_time_series(self): fig, ax = plt.subplots(1, 1) colors = plt.rcParams["axes.prop_cycle"].by_key()["color"] # Reshape the omega array into a 1d array - n_slides = self._n_slides - if self._svd_rank == 0: - svd_rank = self._n_data_vars + omega_array = self.omega_array + n_slides = omega_array.shape[0] + svd_rank = omega_array.shape[1] + omega_rshp = omega_array.reshape(n_slides * svd_rank) + + if self._square_frequencies: + omega_squared = (np.conj(omega_rshp) * omega_rshp).astype("float") + label = "$|\omega|^{2}$" else: - svd_rank = self._svd_rank + omega_squared = np.abs(omega_rshp.imag.astype("float")) + label = "$|\omega|$" for ncomponent, component in enumerate(range(self._n_components)): ax.plot( self._time_means_array, np.where( - omega_classes == component, + self._omega_classes == component, omega_squared.reshape((n_slides, svd_rank)), np.nan, ), color=colors[ncomponent % len(colors)], ) - ax.set_ylabel("$|\omega|^{2}$") + ax.set_ylabel(label) ax.set_xlabel("Time") - ax.set_title("$|\omega|^2$ Spectrum (Moving Window)") + ax.set_title("$\omega$ Time Series") return fig, ax @@ -531,10 +581,10 @@ def global_reconstruction( def scale_reconstruction( self, - omega_classes, - cluster_centroids, suppress_growth=True, include_means=True, + magnitude_threshold=False, + data=None, ): """Reconstruct the sliding mrDMD into the constituent components. @@ -572,7 +622,7 @@ def scale_reconstruction( w = self._modes_array[k] b = self._amplitudes_array[k] omega = np.atleast_2d(self._omega_array[k]).T - classification = omega_classes[k] + classification = self._omega_classes[k] if suppress_growth: omega[omega.real > 0] = 1j * omega[omega.real > 0].imag @@ -587,7 +637,7 @@ def scale_reconstruction( xr_sep_window = np.zeros( (self._n_components, self._n_data_vars, self._window_length) ) - for j in np.unique(omega_classes): + for j in np.unique(self._omega_classes): xr_sep_window[j, :, :] = np.linalg.multi_dot( [ w[:, classification == j], @@ -597,7 +647,7 @@ def scale_reconstruction( ) # Add the constant offset to the lowest frequency cluster. - if include_means and (j == np.argmin(cluster_centroids)): + if include_means and (j == np.argmin(self._cluster_centroids)): xr_sep_window[j, :, :] += c xr_sep_window[j, :, :] = xr_sep_window[j, :, :] * recon_filter @@ -616,4 +666,113 @@ def scale_reconstruction( xr_sep = xr_sep / xn + if magnitude_threshold: + if data is None: + raise ValueError( + "The data must be provided when doing a magnitude cut-off of modes." + ) + xr_sep = self.threshold_modes(data, xr_sep) + return xr_sep + + def threshold_modes(self, data, xr_sep): + """Remove frequency bands that do not contribute significantly to the magnitude of the reconstruction.""" + + # Remove scales that do not contribute significantly to the magnitude of the signal + n = np.nanmedian(np.abs(xr_sep.real), axis=(1, 2)) + magnitude_threshold = np.nanmedian(np.abs(data.real)) / 10 + + # Trim frequencies bands that do not meet the magnitude threshold. + xr_sep = xr_sep[n > magnitude_threshold, ::] + self._cluster_centroids = self._cluster_centroids[ + n > magnitude_threshold + ] + + return xr_sep + + def scale_separation( + self, + scale_reconstruction_kwargs=None, + ): + """Separate the lowest frequency band from the high frequency bands. + + The lowest frequency band should contain the window means and can be passed on as the data for the next + decomposition level. The high frequencies should have frequencies shorter than 1 / window_length. + + """ + + if scale_reconstruction_kwargs is None: + scale_reconstruction_kwargs = {} + + xr_sep = self.scale_reconstruction(**scale_reconstruction_kwargs) + xr_low_frequency = xr_sep[0, :, :] + xr_high_frequency = xr_sep[1:, :, :].sum(axis=0) + + return xr_low_frequency, xr_high_frequency + + def plot_scale_separation(self, data, scale_reconstruction_kwargs=None): + """Plot the scale-separated low and high frequency bands.""" + xr_low_frequency, xr_high_frequency = self.scale_separation( + data=data, **scale_reconstruction_kwargs + ) + + fig, axes = plt.subplots(3, 1, sharex=True, figsize=(10, 8)) + + ax = axes[0] + ax.pcolormesh(data, cmap="RdBu_r", vmin=-2, vmax=2) + ax.set_title( + "Input Data at decomposition window length = {}".format( + self._window_length + ) + ) + + ax = axes[2] + ax.set_title("Reconstruction, high frequency") + ax.pcolormesh(xr_high_frequency, cmap="RdBu_r", vmin=-2, vmax=2) + ax.set_ylabel("Space (-)") + ax.set_xlabel("Time (-)") + + ax = axes[1] + ax.set_title("Reconstruction, low frequency") + ax.pcolormesh(xr_low_frequency, cmap="RdBu_r", vmin=-2, vmax=2) + ax.set_ylabel("Space (-)") + ax.set_xlabel("Time (-)") + + fig.tight_layout() + + def plot_reconstructions( + self, data, plot_period=False, scale_reconstruction_kwargs=None + ): + if scale_reconstruction_kwargs is None: + scale_reconstruction_kwargs = {} + + xr_sep = self.scale_reconstruction( + data=data, **scale_reconstruction_kwargs + ) + + fig, axes = plt.subplots( + len(self._cluster_centroids) + 1, 1, sharex=True, figsize=(10, 10) + ) + + ax = axes[0] + ax.pcolormesh(data.real, cmap="RdBu_r", vmin=-2, vmax=2) + ax.set_ylabel("Space (-)") + ax.set_xlabel("Time (-)") + ax.set_title("Input Data") + + for n_cluster, cluster in enumerate(self._cluster_centroids): + if plot_period: + x = 2 * np.pi / cluster + title = "Reconstruction, central period={:.2f}" + else: + x = cluster + title = "Reconstruction, central eig={:.2f}" + + ax = axes[n_cluster + 1] + xr_scale = xr_sep[n_cluster, :, :] + ax.pcolormesh(xr_scale, cmap="RdBu_r", vmin=-2, vmax=2) + ax.set_ylabel("Space (-)") + ax.set_xlabel("Time (-)") + ax.set_title(title.format(x)) + + fig.tight_layout() From c6bf12a4aa46fde4394e23b3621156531a28096e Mon Sep 17 00:00:00 2001 From: klapo Date: Fri, 21 Apr 2023 14:53:21 +0200 Subject: [PATCH 11/31] Fix: mode trimming and clsutering results - Add a sorted list of labels for cluster labels, useful for trimming - Fixed mode trimming and added a flag for resetting trimming. --- pydmd/costsdmd.py | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index 3f4ddc0c4..16e19f456 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -470,6 +470,8 @@ def cluster_omega( self._omega_classes = omega_classes self._square_frequencies = square_frequencies self._n_components = n_components + self._class_values = np.unique(omega_classes)[idx] + self._trimmed = False return self @@ -678,15 +680,22 @@ def scale_reconstruction( def threshold_modes(self, data, xr_sep): """Remove frequency bands that do not contribute significantly to the magnitude of the reconstruction.""" - # Remove scales that do not contribute significantly to the magnitude of the signal - n = np.nanmedian(np.abs(xr_sep.real), axis=(1, 2)) - magnitude_threshold = np.nanmedian(np.abs(data.real)) / 10 + if not self._trimmed: + # Remove scales that do not contribute significantly to the magnitude of the signal + n = np.nanmedian(np.abs(xr_sep.real), axis=(1, 2)) + magnitude_threshold = np.nanmedian(np.abs(data.real)) / 100 - # Trim frequencies bands that do not meet the magnitude threshold. - xr_sep = xr_sep[n > magnitude_threshold, ::] - self._cluster_centroids = self._cluster_centroids[ - n > magnitude_threshold - ] + # Trim frequencies bands that do not meet the magnitude threshold. + xr_sep = xr_sep[n > magnitude_threshold, ::] + self._cluster_centroids = self._cluster_centroids[ + n > magnitude_threshold + ] + num_modes_to_keep = np.sum(n > magnitude_threshold) + self._class_values = self._class_values[ + self._class_values < num_modes_to_keep + ] + + self._trimmed = True return xr_sep @@ -712,8 +721,14 @@ def scale_separation( def plot_scale_separation(self, data, scale_reconstruction_kwargs=None): """Plot the scale-separated low and high frequency bands.""" + if scale_reconstruction_kwargs is None: + scale_reconstruction_kwargs = {} + scale_reconstruction_kwargs["data"] = scale_reconstruction_kwargs.get( + "data", data + ) + xr_low_frequency, xr_high_frequency = self.scale_separation( - data=data, **scale_reconstruction_kwargs + scale_reconstruction_kwargs ) fig, axes = plt.subplots(3, 1, sharex=True, figsize=(10, 8)) @@ -746,9 +761,7 @@ def plot_reconstructions( if scale_reconstruction_kwargs is None: scale_reconstruction_kwargs = {} - xr_sep = self.scale_reconstruction( - data=data, **scale_reconstruction_kwargs - ) + xr_sep = self.scale_reconstruction(scale_reconstruction_kwargs) fig, axes = plt.subplots( len(self._cluster_centroids) + 1, 1, sharex=True, figsize=(10, 10) From 532d8760db297a34ebf7f3c70f692f846d9eca55 Mon Sep 17 00:00:00 2001 From: klapo Date: Mon, 24 Apr 2023 16:42:10 +0200 Subject: [PATCH 12/31] Doc: Added to docstring, reorganized class, added residuals plot - Documented several new additions to the doc string - Reorganized the class to have properties first. - Added the ability to include residuals to the reconstruction plots. --- pydmd/costsdmd.py | 215 ++++++++++++++++++++++++++++------------------ 1 file changed, 130 insertions(+), 85 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index 16e19f456..cd733bd92 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -6,7 +6,7 @@ class CostsDMD: - """Coherent Spatio-Temporal Scale Separation with DMD + """Coherent Spatio-Temporal Scale Separation with DMD. :param window_length: Length of the analysis window in number of time steps. :type window_length: int @@ -26,7 +26,7 @@ class CostsDMD: previous iteration) through the `cluster_centroids` keyword. Default is False. :type initialize_artificially: bool :param pydmd_kwargs: Keyword arguments to pass onto the BOPDMD object. - :type global_svd: dict + :type pydmd_kwargs: dict :param cluster_centroids: Cluster centroids from a previous fitting iteration to use for the initial guess of the eigenvalues. Should only be the imaginary component. @@ -41,10 +41,22 @@ class CostsDMD: :param max_rank: Maximum svd_rank allowed when the svd_rank is found through rank truncation (i.e., svd_rank=0). :type max_rank: int - :param use_kmean_freqs: + :param use_kmean_freqs: Flag specifying if the BOPDMD fit should use initial values + taken from cluster centroids, e.g., from a previoius iteration. :type use_kmean_freqs: bool - :param init_alpha: + :param init_alpha: Initial guess for the eigenvalues provided to BOPDMD. Must be equal + to the `svd_rank`. :type init_alpha: numpy array + :param max_rank: Maximum allowed `svd_rank`. Overrides the optimal rank truncation if + `svd_rank=0`. + :type max_rank: int + :param n_components: Number of frequency bands to use for clustering. + :type n_components: int + :param force_even_eigs: Flag specifying if the `svd_rank` should be forced to be even. + :type force_even_eigs: bool + :param reset_alpha_init: Flag specifying if the initial eigenvalue guess should be reset + between windows. + :type reset_alpha_init: bool """ def __init__( @@ -92,6 +104,7 @@ def __init__( self._cluster_centroids = None self._omega_classes = None + # Specify default keywords to hand to BOPDMD. if pydmd_kwargs is None: self._pydmd_kwargs = { "eig_sort": "imag", @@ -108,6 +121,74 @@ def __init__( ) self._pydmd_kwargs["use_proj"] = pydmd_kwargs.get("use_proj", False) + @property + def svd_rank(self): + """ + :return: the rank used for the svd truncation. + :rtype: int or float + """ + return self._svd_rank + + @property + def modes_array(self): + if not hasattr(self, "_modes_array"): + raise ValueError("You need to call fit before") + return self._modes_array + + @property + def amplitudes_array(self): + if not hasattr(self, "_amplitudes_array"): + raise ValueError("You need to call fit first.") + return self._amplitudes_array + + @property + def omega_array(self): + if not hasattr(self, "_omega_array"): + raise ValueError("You need to call fit first.") + return self._omega_array + + @property + def time_array(self): + if not hasattr(self, "_time_array"): + raise ValueError("You need to call fit first.") + return self._time_array + + @property + def window_means_array(self): + if not hasattr(self, "_window_means_array"): + raise ValueError("You need to call fit first.") + return self._window_means_array + + @property + def t_starts_array(self): + if not hasattr(self, "_t_starts_array"): + raise ValueError("You need to call fit first.") + return self._t_starts_array + + @property + def time_means_array(self): + if not hasattr(self, "_time_means_array"): + raise ValueError("You need to call fit first.") + return self._time_means_array + + @property + def n_components(self): + if not hasattr(self, "_n_components"): + raise ValueError("You need to call `cluster_omega()` first.") + return self._n_components + + @property + def cluster_centroids(self): + if not hasattr(self, "_cluster_centroids"): + raise ValueError("You need to call `cluster_omega()` first.") + return self._cluster_centroids + + @property + def omega_classes(self): + if not hasattr(self, "_omega_classes"): + raise ValueError("You need to call `cluster_omega()` first.") + return self._omega_classes + def _compute_svd_rank(self, data, svd_rank=None): def omega(x): return 0.56 * x**3 - 0.95 * x**2 + 1.82 * x + 1.43 @@ -212,74 +293,6 @@ def _data_shape(self, data): n_data_vars = np.shape(data)[0] return n_time_steps, n_data_vars - @property - def svd_rank(self): - """ - :return: the rank used for the svd truncation. - :rtype: int or float - """ - return self._svd_rank - - @property - def modes_array(self): - if not hasattr(self, "_modes_array"): - raise ValueError("You need to call fit before") - return self._modes_array - - @property - def amplitudes_array(self): - if not hasattr(self, "_amplitudes_array"): - raise ValueError("You need to call fit first.") - return self._amplitudes_array - - @property - def omega_array(self): - if not hasattr(self, "_omega_array"): - raise ValueError("You need to call fit first.") - return self._omega_array - - @property - def time_array(self): - if not hasattr(self, "_time_array"): - raise ValueError("You need to call fit first.") - return self._time_array - - @property - def window_means_array(self): - if not hasattr(self, "_window_means_array"): - raise ValueError("You need to call fit first.") - return self._window_means_array - - @property - def t_starts_array(self): - if not hasattr(self, "_t_starts_array"): - raise ValueError("You need to call fit first.") - return self._t_starts_array - - @property - def time_means_array(self): - if not hasattr(self, "_time_means_array"): - raise ValueError("You need to call fit first.") - return self._time_means_array - - @property - def n_components(self): - if not hasattr(self, "_n_components"): - raise ValueError("You need to call `cluster_omega()` first.") - return self._n_components - - @property - def cluster_centroids(self): - if not hasattr(self, "_cluster_centroids"): - raise ValueError("You need to call `cluster_omega()` first.") - return self._cluster_centroids - - @property - def omega_classes(self): - if not hasattr(self, "_omega_classes"): - raise ValueError("You need to call `cluster_omega()` first.") - return self._omega_classes - def fit( self, data, @@ -719,7 +732,9 @@ def scale_separation( return xr_low_frequency, xr_high_frequency - def plot_scale_separation(self, data, scale_reconstruction_kwargs=None): + def plot_scale_separation( + self, data, scale_reconstruction_kwargs=None, plot_residual=False + ): """Plot the scale-separated low and high frequency bands.""" if scale_reconstruction_kwargs is None: scale_reconstruction_kwargs = {} @@ -731,7 +746,10 @@ def plot_scale_separation(self, data, scale_reconstruction_kwargs=None): scale_reconstruction_kwargs ) - fig, axes = plt.subplots(3, 1, sharex=True, figsize=(10, 8)) + if plot_residual: + fig, axes = plt.subplots(4, 1, sharex=True, figsize=(6, 4)) + else: + fig, axes = plt.subplots(3, 1, sharex=True, figsize=(6, 4)) ax = axes[0] ax.pcolormesh(data, cmap="RdBu_r", vmin=-2, vmax=2) @@ -740,23 +758,36 @@ def plot_scale_separation(self, data, scale_reconstruction_kwargs=None): self._window_length ) ) + ax = axes[1] + ax.set_title("Reconstruction, low frequency") + ax.pcolormesh(xr_low_frequency, cmap="RdBu_r", vmin=-2, vmax=2) + ax.set_ylabel("Space (-)") ax = axes[2] ax.set_title("Reconstruction, high frequency") ax.pcolormesh(xr_high_frequency, cmap="RdBu_r", vmin=-2, vmax=2) ax.set_ylabel("Space (-)") - ax.set_xlabel("Time (-)") - ax = axes[1] - ax.set_title("Reconstruction, low frequency") - ax.pcolormesh(xr_low_frequency, cmap="RdBu_r", vmin=-2, vmax=2) - ax.set_ylabel("Space (-)") - ax.set_xlabel("Time (-)") + if plot_residual: + ax = axes[3] + ax.set_title("Residual") + ax.pcolormesh( + data - xr_high_frequency - xr_low_frequency, + cmap="RdBu_r", + vmin=-2, + vmax=2, + ) + ax.set_ylabel("Space (-)") + axes[-1].set_xlabel("Time (-)") fig.tight_layout() def plot_reconstructions( - self, data, plot_period=False, scale_reconstruction_kwargs=None + self, + data, + plot_period=False, + scale_reconstruction_kwargs=None, + plot_residual=False, ): if scale_reconstruction_kwargs is None: scale_reconstruction_kwargs = {} @@ -764,15 +795,18 @@ def plot_reconstructions( xr_sep = self.scale_reconstruction(scale_reconstruction_kwargs) fig, axes = plt.subplots( - len(self._cluster_centroids) + 1, 1, sharex=True, figsize=(10, 10) + len(self._cluster_centroids) + 1, 1, sharex=True, figsize=(6, 6) ) ax = axes[0] ax.pcolormesh(data.real, cmap="RdBu_r", vmin=-2, vmax=2) ax.set_ylabel("Space (-)") ax.set_xlabel("Time (-)") - ax.set_title("Input Data") - + ax.set_title( + "Input Data at decomposition window length = {}".format( + self._window_length + ) + ) for n_cluster, cluster in enumerate(self._cluster_centroids): if plot_period: x = 2 * np.pi / cluster @@ -785,7 +819,18 @@ def plot_reconstructions( xr_scale = xr_sep[n_cluster, :, :] ax.pcolormesh(xr_scale, cmap="RdBu_r", vmin=-2, vmax=2) ax.set_ylabel("Space (-)") - ax.set_xlabel("Time (-)") ax.set_title(title.format(x)) + if plot_residual: + ax = axes[-1] + ax.set_title("Residual") + ax.pcolormesh( + data - xr_sep.sum(axis=0), + cmap="RdBu_r", + vmin=-2, + vmax=2, + ) + ax.set_ylabel("Space (-)") + + axes[-1].set_xlabel("Time (-)") fig.tight_layout() From ad91181da62ee62fd3a22b3146b372b6f70f70aa Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 25 Apr 2023 16:04:47 +0200 Subject: [PATCH 13/31] Plot fix for scale separated reconstruction --- pydmd/costsdmd.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index cd733bd92..3309d0a22 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -795,7 +795,10 @@ def plot_reconstructions( xr_sep = self.scale_reconstruction(scale_reconstruction_kwargs) fig, axes = plt.subplots( - len(self._cluster_centroids) + 1, 1, sharex=True, figsize=(6, 6) + len(self._cluster_centroids) + 1, + 1, + sharex=True, + figsize=(6, 1.5 * len(self._cluster_centroids) + 1), ) ax = axes[0] From a11133351213b9ec66646c00e94bd0a29001f357 Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 25 Apr 2023 16:10:54 +0200 Subject: [PATCH 14/31] Fix: Global reconstruction uses correct gaussian weighting - matlab code and previous versions did not do the correct weighting of each window by a gaussian, as described in the paper or implemented for the scale separated reconstruction (e.g., high vs low frequency). The global reconstruction now just sums up the individual reconstructed components, using the gaussian weighting. --- pydmd/costsdmd.py | 49 +++++++---------------------------------------- 1 file changed, 7 insertions(+), 42 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index 3309d0a22..edcd98fed 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -551,48 +551,13 @@ def plot_omega_squared_time_series(self): return fig, ax - def global_reconstruction( - self, - ): - # Container for the reconstructed time series - glbl_reconstruction = np.zeros( - (self._n_data_vars, self._n_time_steps) - ).astype("complex128") - - # Count the number of windows contributing to each step - xn = np.zeros(self._n_time_steps) - - for k in range(self._n_slides): - # Extract out the DMD fit for this window. - w = self._modes_array[k] - b = self._amplitudes_array[k] - omega = np.atleast_2d(self._omega_array[k]).T - c = np.atleast_2d(self._window_means_array[k]).T - - # Compute each segment starting at t=0 - t = self._time_array[k] - t_start = self._t_starts_array[k] - t = t - t_start - - # Perform the global reconstruction. - recon_window = ( - np.linalg.multi_dot([w, np.diag(b), np.exp(omega * t)]) + c - ) - - if k == self._n_slides - 1 and self._n_slide_last_window > 0: - window_indices = slice(-self._window_length, None) - else: - window_indices = slice( - k * self._step_size, - k * self._step_size + self._window_length, - ) - glbl_reconstruction[:, window_indices] += recon_window - xn[window_indices] += 1 - - # Weight xr so all steps are on equal footing - glbl_reconstruction = glbl_reconstruction / xn - - return glbl_reconstruction + def global_reconstruction(self, kwargs=None): + """Helper function for generating the global reconstruction.""" + if kwargs is None: + kwargs = {} + xr_sep = self.scale_reconstruction(**kwargs) + x_global_recon = xr_sep.sum(axis=0) + return x_global_recon def scale_reconstruction( self, From f5d7e04413a93317cd8b16a1ecb5bf5bc781f550 Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 25 Apr 2023 18:40:09 +0200 Subject: [PATCH 15/31] Renamed plotting function --- pydmd/costsdmd.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index edcd98fed..4b7e73298 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -103,6 +103,7 @@ def __init__( self._t_starts_array = None self._cluster_centroids = None self._omega_classes = None + self._square_frequencies = None # Specify default keywords to hand to BOPDMD. if pydmd_kwargs is None: @@ -488,7 +489,7 @@ def cluster_omega( return self - def plot_omega_squared_histogram(self): + def plot_omega_histogram(self): # Reshape the omega array into a 1d array omega_array = self.omega_array From 9eb521e461a6eecb915965dbadedc1047e91bacf Mon Sep 17 00:00:00 2001 From: klapo Date: Wed, 26 Apr 2023 11:53:46 +0200 Subject: [PATCH 16/31] Feat: hyperparameter sweep for n clusters - Hyperparameter sweep function for finding the optimal number of clusters in the kmeans clustering of window frequencies. Uses the silhouette score. --- pydmd/costsdmd.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index 4b7e73298..3ce39e04f 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -2,6 +2,7 @@ from pydmd.bopdmd import BOPDMD import scipy from sklearn.cluster import KMeans +from sklearn.metrics import silhouette_score import matplotlib.pyplot as plt @@ -489,6 +490,35 @@ def cluster_omega( return self + def cluster_hyperparameter_sweep( + self, n_components_range=None, square_frequencies=False + ): + """Performs a hyperparameter search for the number of components in the kmeans clustering.""" + if n_components_range is None: + n_components_range = np.arange(self.svd_rank // 4, self.svd_rank) + score = np.zeros_like(n_components_range, float) + + # Reshape the omega array into a 1d array + omega_array = self.omega_array + n_slides = omega_array.shape[0] + svd_rank = omega_array.shape[1] + omega_rshp = omega_array.reshape(n_slides * svd_rank) + if square_frequencies: + omega_squared = (np.conj(omega_rshp) * omega_rshp).astype("float") + else: + omega_squared = np.abs(omega_rshp.imag.astype("float")) + + for nind, n in enumerate(n_components_range): + _ = self.cluster_omega(n_components=n, square_frequencies=False) + + classes_reshape = self.omega_classes.reshape(n_slides * svd_rank) + + score[nind] = silhouette_score( + np.atleast_2d(omega_squared).T, np.atleast_2d(classes_reshape).T + ) + + return n_components_range[np.argmax(score)] + def plot_omega_histogram(self): # Reshape the omega array into a 1d array From 1e64ed4664f172b7537fab66c7020e065189bbf6 Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 2 May 2023 17:31:10 +0200 Subject: [PATCH 17/31] Feat: Added `to_xarray` method for class - to_xarray allows converting results to an xarray Dataset, enabling in and out functions with netcdfs and the powerful alignment tools for comparing between decomposition levels. --- pydmd/costsdmd.py | 89 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 80 insertions(+), 9 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index 3ce39e04f..961203649 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -4,6 +4,7 @@ from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score import matplotlib.pyplot as plt +import xarray as xr class CostsDMD: @@ -334,6 +335,10 @@ def fit( ) svd_rank_pre_allocate = self._svd_rank elif not self._global_svd and self._svd_rank > 0: + if self._force_even_eigs and self._svd_rank % 2: + raise ValueError( + "svd_rank is odd, but force_even_eigs is True." + ) svd_rank_pre_allocate = self._compute_svd_rank( data, svd_rank=self._svd_rank ) @@ -383,14 +388,7 @@ def fit( if k // 50 == k / 50: print("{} of {}".format(k, self._n_slides)) - # Get the window indices and data. - sample_start = self._step_size * k - if k == self._n_slides - 1 and self._n_slide_last_window > 0: - sample_slice = slice(-self._window_length, None) - else: - sample_slice = slice( - sample_start, sample_start + self._window_length - ) + sample_slice = self.get_window_indices(k) data_window = data[:, sample_slice] original_time_window = time[:, sample_slice] time_window_mean = np.mean(original_time_window) @@ -414,7 +412,7 @@ def fit( data_window, svd_rank=self._svd_rank ) # Force svd rank to be even to allow for conjugate pairs. - if _svd_rank % 2: + if self._force_even_eigs and _svd_rank % 2: _svd_rank += 1 # Force svd rank to not exceed a user specified amount. if self._max_rank is not None: @@ -447,6 +445,21 @@ def fit( elif self._use_last_freq: optdmd.init_alpha = optdmd.eigs + def get_window_indices(self, k): + """ + + @return: + """ + # Get the window indices and data. + sample_start = self._step_size * k + if k == self._n_slides - 1 and self._n_slide_last_window > 0: + sample_slice = slice(-self._window_length, None) + else: + sample_slice = slice( + sample_start, sample_start + self._window_length + ) + return sample_slice + def cluster_omega( self, n_components, kmeans_kwargs=None, square_frequencies=True ): @@ -833,3 +846,61 @@ def plot_reconstructions( axes[-1].set_xlabel("Time (-)") fig.tight_layout() + + return fig, axes + + def to_xarray(self): + """Build an xarray dataset from the fitted CoSTS object. + + The CoSTS object is converted to an xarray dataset, which allows + saving the computationally expensive results, e.g., between iterations. + + The reconstructed data are not included since their size can rapidly + explode to unexpected sizes. e.g., a 30MB dataset, decomposed at 6 + levels with an average number of frequency bands across decomposition + levels equal to 8 becomes 1.3GB once reconstructed for each band. + + """ + ds = xr.Dataset( + { + "omega": (("window_time_means", "rank"), self.omega_array), + "omega_classes": ( + ("window_time_means", "rank"), + self.omega_classes, + ), + "amplitudes": ( + ("window_time_means", "rank"), + self.amplitudes_array, + ), + "modes": ( + ("window_time_means", "space", "rank"), + self.modes_array, + ), + "window_means": ( + ("window_time_means", "space"), + self.window_means_array, + ), + "cluster_centroids": ( + "frequency_band", + self._cluster_centroids, + ), + }, + coords={ + "window_time_means": self.time_means_array, + "slide": ("window_time_means", np.arange(self._n_slides)), + "rank": np.arange(self.svd_rank), + "space": np.arange(self._n_data_vars), + "frequency_band": np.arange(self.n_components), + }, + attrs={ + "svd_rank": self.svd_rank, + "square_frequencies": self._square_frequencies, + "n_slides": self._n_slides, + "window_length": self._window_length, + "num_frequency_bands": self.n_components, + "n_data_vars": self._n_data_vars, + "n_time_steps": self._n_time_steps, + }, + ) + + return ds From fad681315241912cd52452a1c488619f73ec650b Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 9 May 2023 13:33:02 +0200 Subject: [PATCH 18/31] Fix: Force n_components > 1 and plot kwargs - In the hyperparameter sweep for the optimal number of clusters, a cluster size of 1 causes an error. For the case with a small svd_rank the automatic selection of clusters is forced to be larger than one. - Plotting functions now accept plot_kwargs and fig_kwargs controlling the appearance of the plots and the figures, respectively. --- pydmd/costsdmd.py | 66 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 21 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index 961203649..ccd52badb 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -508,7 +508,9 @@ def cluster_hyperparameter_sweep( ): """Performs a hyperparameter search for the number of components in the kmeans clustering.""" if n_components_range is None: - n_components_range = np.arange(self.svd_rank // 4, self.svd_rank) + n_components_range = np.arange( + np.max((self.svd_rank // 4, 2)), self.svd_rank + ) score = np.zeros_like(n_components_range, float) # Reshape the omega array into a 1d array @@ -742,7 +744,12 @@ def scale_separation( return xr_low_frequency, xr_high_frequency def plot_scale_separation( - self, data, scale_reconstruction_kwargs=None, plot_residual=False + self, + data, + scale_reconstruction_kwargs=None, + plot_residual=False, + fig_kwargs=None, + plot_kwargs=None, ): """Plot the scale-separated low and high frequency bands.""" if scale_reconstruction_kwargs is None: @@ -755,13 +762,24 @@ def plot_scale_separation( scale_reconstruction_kwargs ) + if fig_kwargs is None: + fig_kwargs = {} + fig_kwargs["sharex"] = fig_kwargs.get("sharex", True) + fig_kwargs["figsize"] = fig_kwargs.get("figsize", (6, 4)) + + if plot_kwargs is None: + plot_kwargs = {} + plot_kwargs["vmin"] = plot_kwargs.get("vmin", -np.abs(data).max()) + plot_kwargs["vmax"] = plot_kwargs.get("vmax", np.abs(data).max()) + plot_kwargs["cmap"] = plot_kwargs.get("cmap", "RdBu_r") + if plot_residual: - fig, axes = plt.subplots(4, 1, sharex=True, figsize=(6, 4)) + fig, axes = plt.subplots(4, 1, **fig_kwargs) else: - fig, axes = plt.subplots(3, 1, sharex=True, figsize=(6, 4)) + fig, axes = plt.subplots(3, 1, **fig_kwargs) ax = axes[0] - ax.pcolormesh(data, cmap="RdBu_r", vmin=-2, vmax=2) + ax.pcolormesh(data, **plot_kwargs) ax.set_title( "Input Data at decomposition window length = {}".format( self._window_length @@ -769,22 +787,19 @@ def plot_scale_separation( ) ax = axes[1] ax.set_title("Reconstruction, low frequency") - ax.pcolormesh(xr_low_frequency, cmap="RdBu_r", vmin=-2, vmax=2) + ax.pcolormesh(xr_low_frequency, **plot_kwargs) ax.set_ylabel("Space (-)") ax = axes[2] ax.set_title("Reconstruction, high frequency") - ax.pcolormesh(xr_high_frequency, cmap="RdBu_r", vmin=-2, vmax=2) + ax.pcolormesh(xr_high_frequency, **plot_kwargs) ax.set_ylabel("Space (-)") if plot_residual: ax = axes[3] ax.set_title("Residual") ax.pcolormesh( - data - xr_high_frequency - xr_low_frequency, - cmap="RdBu_r", - vmin=-2, - vmax=2, + data - xr_high_frequency - xr_low_frequency, **plot_kwargs ) ax.set_ylabel("Space (-)") @@ -797,21 +812,35 @@ def plot_reconstructions( plot_period=False, scale_reconstruction_kwargs=None, plot_residual=False, + fig_kwargs=None, + plot_kwargs=None, ): if scale_reconstruction_kwargs is None: scale_reconstruction_kwargs = {} xr_sep = self.scale_reconstruction(scale_reconstruction_kwargs) + if fig_kwargs is None: + fig_kwargs = {} + fig_kwargs["sharex"] = fig_kwargs.get("sharex", True) + fig_kwargs["figsize"] = fig_kwargs.get( + "figsize", (6, 1.5 * len(self._cluster_centroids) + 1) + ) + + if plot_kwargs is None: + plot_kwargs = {} + plot_kwargs["vmin"] = plot_kwargs.get("vmin", -np.abs(data).max()) + plot_kwargs["vmax"] = plot_kwargs.get("vmax", np.abs(data).max()) + plot_kwargs["cmap"] = plot_kwargs.get("cmap", "RdBu_r") + fig, axes = plt.subplots( len(self._cluster_centroids) + 1, 1, - sharex=True, - figsize=(6, 1.5 * len(self._cluster_centroids) + 1), + **fig_kwargs, ) ax = axes[0] - ax.pcolormesh(data.real, cmap="RdBu_r", vmin=-2, vmax=2) + ax.pcolormesh(data.real, **plot_kwargs) ax.set_ylabel("Space (-)") ax.set_xlabel("Time (-)") ax.set_title( @@ -829,19 +858,14 @@ def plot_reconstructions( ax = axes[n_cluster + 1] xr_scale = xr_sep[n_cluster, :, :] - ax.pcolormesh(xr_scale, cmap="RdBu_r", vmin=-2, vmax=2) + ax.pcolormesh(xr_scale, **plot_kwargs) ax.set_ylabel("Space (-)") ax.set_title(title.format(x)) if plot_residual: ax = axes[-1] ax.set_title("Residual") - ax.pcolormesh( - data - xr_sep.sum(axis=0), - cmap="RdBu_r", - vmin=-2, - vmax=2, - ) + ax.pcolormesh(data - xr_sep.sum(axis=0), **plot_kwargs) ax.set_ylabel("Space (-)") axes[-1].set_xlabel("Time (-)") From e0d0e76fdde7eae981230ab802f2233a14ea44d6 Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 9 May 2023 13:33:02 +0200 Subject: [PATCH 19/31] Fix: Force n_components > 1 and plot kwargs - In the hyperparameter sweep for the optimal number of clusters, a cluster size of 1 causes an error. For the case with a small svd_rank the automatic selection of clusters is forced to be larger than one. - Plotting functions now accept plot_kwargs and fig_kwargs controlling the appearance of the plots and the figures, respectively. --- pydmd/costsdmd.py | 66 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 21 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index 961203649..ccd52badb 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -508,7 +508,9 @@ def cluster_hyperparameter_sweep( ): """Performs a hyperparameter search for the number of components in the kmeans clustering.""" if n_components_range is None: - n_components_range = np.arange(self.svd_rank // 4, self.svd_rank) + n_components_range = np.arange( + np.max((self.svd_rank // 4, 2)), self.svd_rank + ) score = np.zeros_like(n_components_range, float) # Reshape the omega array into a 1d array @@ -742,7 +744,12 @@ def scale_separation( return xr_low_frequency, xr_high_frequency def plot_scale_separation( - self, data, scale_reconstruction_kwargs=None, plot_residual=False + self, + data, + scale_reconstruction_kwargs=None, + plot_residual=False, + fig_kwargs=None, + plot_kwargs=None, ): """Plot the scale-separated low and high frequency bands.""" if scale_reconstruction_kwargs is None: @@ -755,13 +762,24 @@ def plot_scale_separation( scale_reconstruction_kwargs ) + if fig_kwargs is None: + fig_kwargs = {} + fig_kwargs["sharex"] = fig_kwargs.get("sharex", True) + fig_kwargs["figsize"] = fig_kwargs.get("figsize", (6, 4)) + + if plot_kwargs is None: + plot_kwargs = {} + plot_kwargs["vmin"] = plot_kwargs.get("vmin", -np.abs(data).max()) + plot_kwargs["vmax"] = plot_kwargs.get("vmax", np.abs(data).max()) + plot_kwargs["cmap"] = plot_kwargs.get("cmap", "RdBu_r") + if plot_residual: - fig, axes = plt.subplots(4, 1, sharex=True, figsize=(6, 4)) + fig, axes = plt.subplots(4, 1, **fig_kwargs) else: - fig, axes = plt.subplots(3, 1, sharex=True, figsize=(6, 4)) + fig, axes = plt.subplots(3, 1, **fig_kwargs) ax = axes[0] - ax.pcolormesh(data, cmap="RdBu_r", vmin=-2, vmax=2) + ax.pcolormesh(data, **plot_kwargs) ax.set_title( "Input Data at decomposition window length = {}".format( self._window_length @@ -769,22 +787,19 @@ def plot_scale_separation( ) ax = axes[1] ax.set_title("Reconstruction, low frequency") - ax.pcolormesh(xr_low_frequency, cmap="RdBu_r", vmin=-2, vmax=2) + ax.pcolormesh(xr_low_frequency, **plot_kwargs) ax.set_ylabel("Space (-)") ax = axes[2] ax.set_title("Reconstruction, high frequency") - ax.pcolormesh(xr_high_frequency, cmap="RdBu_r", vmin=-2, vmax=2) + ax.pcolormesh(xr_high_frequency, **plot_kwargs) ax.set_ylabel("Space (-)") if plot_residual: ax = axes[3] ax.set_title("Residual") ax.pcolormesh( - data - xr_high_frequency - xr_low_frequency, - cmap="RdBu_r", - vmin=-2, - vmax=2, + data - xr_high_frequency - xr_low_frequency, **plot_kwargs ) ax.set_ylabel("Space (-)") @@ -797,21 +812,35 @@ def plot_reconstructions( plot_period=False, scale_reconstruction_kwargs=None, plot_residual=False, + fig_kwargs=None, + plot_kwargs=None, ): if scale_reconstruction_kwargs is None: scale_reconstruction_kwargs = {} xr_sep = self.scale_reconstruction(scale_reconstruction_kwargs) + if fig_kwargs is None: + fig_kwargs = {} + fig_kwargs["sharex"] = fig_kwargs.get("sharex", True) + fig_kwargs["figsize"] = fig_kwargs.get( + "figsize", (6, 1.5 * len(self._cluster_centroids) + 1) + ) + + if plot_kwargs is None: + plot_kwargs = {} + plot_kwargs["vmin"] = plot_kwargs.get("vmin", -np.abs(data).max()) + plot_kwargs["vmax"] = plot_kwargs.get("vmax", np.abs(data).max()) + plot_kwargs["cmap"] = plot_kwargs.get("cmap", "RdBu_r") + fig, axes = plt.subplots( len(self._cluster_centroids) + 1, 1, - sharex=True, - figsize=(6, 1.5 * len(self._cluster_centroids) + 1), + **fig_kwargs, ) ax = axes[0] - ax.pcolormesh(data.real, cmap="RdBu_r", vmin=-2, vmax=2) + ax.pcolormesh(data.real, **plot_kwargs) ax.set_ylabel("Space (-)") ax.set_xlabel("Time (-)") ax.set_title( @@ -829,19 +858,14 @@ def plot_reconstructions( ax = axes[n_cluster + 1] xr_scale = xr_sep[n_cluster, :, :] - ax.pcolormesh(xr_scale, cmap="RdBu_r", vmin=-2, vmax=2) + ax.pcolormesh(xr_scale, **plot_kwargs) ax.set_ylabel("Space (-)") ax.set_title(title.format(x)) if plot_residual: ax = axes[-1] ax.set_title("Residual") - ax.pcolormesh( - data - xr_sep.sum(axis=0), - cmap="RdBu_r", - vmin=-2, - vmax=2, - ) + ax.pcolormesh(data - xr_sep.sum(axis=0), **plot_kwargs) ax.set_ylabel("Space (-)") axes[-1].set_xlabel("Time (-)") From 7f0e922ad82157c8e950223bbb5659bc0e17ddcf Mon Sep 17 00:00:00 2001 From: klapo Date: Wed, 10 May 2023 16:35:58 +0200 Subject: [PATCH 20/31] Feat: added conversion from xarray, cleaned up object - Added the from_xarray() method which converts an xarray Dataset (e.g., one stored on disk as a netcdf) into a costs object. Costs methods, such as scale separation and reconstruction can be made from this converted object. - Cleaned up unnecessary variables in the object related to time keeping. Somewhat simplifies the time keeping. --- pydmd/costsdmd.py | 73 +++++++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index ccd52badb..bc4c1e40f 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -100,12 +100,12 @@ def __init__( self._time_array = None self._modes_array = None self._omega_array = None - self._time_means_array = None self._amplitudes_array = None - self._t_starts_array = None self._cluster_centroids = None self._omega_classes = None self._square_frequencies = None + self._window_means_array = None + self._non_integer_n_slide = None # Specify default keywords to hand to BOPDMD. if pydmd_kwargs is None: @@ -162,18 +162,6 @@ def window_means_array(self): raise ValueError("You need to call fit first.") return self._window_means_array - @property - def t_starts_array(self): - if not hasattr(self, "_t_starts_array"): - raise ValueError("You need to call fit first.") - return self._t_starts_array - - @property - def time_means_array(self): - if not hasattr(self, "_time_means_array"): - raise ValueError("You need to call fit first.") - return self._time_means_array - @property def n_components(self): if not hasattr(self, "_n_components"): @@ -321,7 +309,9 @@ def fit( ) if n_slide_last_window > 0: self._n_slides += 1 - self._n_slide_last_window = n_slide_last_window + self._non_integer_n_slide = True + else: + self._non_integer_n_slide = False # Build the projection basis if using a global svd. if self._global_svd: @@ -364,8 +354,6 @@ def fit( (self._n_slides, svd_rank_pre_allocate), np.complex128 ) self._window_means_array = np.zeros((self._n_slides, self._n_data_vars)) - self._t_starts_array = np.zeros(self._n_slides) - self._time_means_array = np.zeros(self._n_slides) # Get initial values for the eigenvalues. self._init_alpha = self._build_initizialization() @@ -391,7 +379,6 @@ def fit( sample_slice = self.get_window_indices(k) data_window = data[:, sample_slice] original_time_window = time[:, sample_slice] - time_window_mean = np.mean(original_time_window) # All windows are fit with the time array reset to start at t=0. t_start = original_time_window[:, 0] @@ -432,8 +419,6 @@ def fit( ] = optdmd.amplitudes self._window_means_array[k, :] = c.flatten() self._time_array[k, :] = original_time_window - self._time_means_array[k] = time_window_mean - self._t_starts_array[k] = t_start # Reset optdmd between iterations if not self._global_svd: @@ -446,13 +431,16 @@ def fit( optdmd.init_alpha = optdmd.eigs def get_window_indices(self, k): - """ + """Returns the window indices for slide `k`. + + Handles non-integer number of slides by making the last slide + correspond to `slice(-window_length, None)`. @return: """ # Get the window indices and data. sample_start = self._step_size * k - if k == self._n_slides - 1 and self._n_slide_last_window > 0: + if k == self._n_slides - 1 and self._non_integer_n_slide: sample_slice = slice(-self._window_length, None) else: sample_slice = slice( @@ -583,7 +571,7 @@ def plot_omega_squared_time_series(self): for ncomponent, component in enumerate(range(self._n_components)): ax.plot( - self._time_means_array, + np.mean(self.time_array, axis=1), np.where( self._omega_classes == component, omega_squared.reshape((n_slides, svd_rank)), @@ -655,9 +643,9 @@ def scale_reconstruction( c = np.atleast_2d(self._window_means_array[k]).T - # Compute each segment of xr starting at "t = 0" + # Compute each segment of the reconstructed data starting at "t = 0" t = self._time_array[k] - t_start = self._t_starts_array[k] + t_start = t.min() t = t - t_start xr_sep_window = np.zeros( @@ -677,7 +665,7 @@ def scale_reconstruction( xr_sep_window[j, :, :] += c xr_sep_window[j, :, :] = xr_sep_window[j, :, :] * recon_filter - if k == self._n_slides - 1 and self._n_slide_last_window > 0: + if k == self._n_slides - 1 and self._non_integer_n_slide: window_indices = slice(-self._window_length, None) else: window_indices = slice( @@ -910,11 +898,16 @@ def to_xarray(self): ), }, coords={ - "window_time_means": self.time_means_array, + "window_time_means": np.mean(self.time_array, axis=1), "slide": ("window_time_means", np.arange(self._n_slides)), "rank": np.arange(self.svd_rank), "space": np.arange(self._n_data_vars), "frequency_band": np.arange(self.n_components), + "window_index": np.arange(self._window_length), + "time": ( + ("window_time_means", "window_index"), + self.time_array, + ), }, attrs={ "svd_rank": self.svd_rank, @@ -924,7 +917,33 @@ def to_xarray(self): "num_frequency_bands": self.n_components, "n_data_vars": self._n_data_vars, "n_time_steps": self._n_time_steps, + "step_size": self._step_size, + "non_integer_n_slide": self._non_integer_n_slide, }, ) return ds + + def from_xarray(self, ds): + """Convert xarray Dataset into a fitted CoSTS object + + @return: + """ + + self._omega_array = ds.omega.values + self._omega_classes = ds.omega_classes + self._amplitudes_array = ds.amplitudes.values + self._modes_array = ds.modes.values + self._window_means_array = ds.window_means.values + self._cluster_centroids = ds.cluster_centroids.values + self._time_array = ds.time.values + self._n_slides = ds.attrs["n_slides"] + self._svd_rank = ds.attrs["svd_rank"] + self._n_data_vars = ds.attrs["n_data_vars"] + self._n_time_steps = ds.attrs["n_time_steps"] + self._n_components = ds.attrs["num_frequency_bands"] + self._non_integer_n_slide = ds.attrs["non_integer_n_slide"] + self._step_size = ds.attrs["step_size"] + self._window_length = ds.attrs["window_length"] + + return self From 740088d721e6c14f2b7523f02eabce8b3b6cd99e Mon Sep 17 00:00:00 2001 From: klapo Date: Sun, 16 Jul 2023 13:30:00 -0400 Subject: [PATCH 21/31] Feat: transform methods for omega - Replaced the `squared_frequences` keyword with a `transform_method` keyword which transforms the omega array imaginary components depending on several options (no transformation, squared frequencies, or log10 scaled frequencies) for the frequency band clustering. --- pydmd/costsdmd.py | 72 +++++++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index bc4c1e40f..bb8db986f 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -103,7 +103,7 @@ def __init__( self._amplitudes_array = None self._cluster_centroids = None self._omega_classes = None - self._square_frequencies = None + self._transform_method = None self._window_means_array = None self._non_integer_n_slide = None @@ -449,19 +449,22 @@ def get_window_indices(self, k): return sample_slice def cluster_omega( - self, n_components, kmeans_kwargs=None, square_frequencies=True + self, n_components, kmeans_kwargs=None, transform_method=None ): - # Reshape the omega array into a 1d array omega_array = self.omega_array n_slides = omega_array.shape[0] svd_rank = omega_array.shape[1] omega_rshp = omega_array.reshape(n_slides * svd_rank) - if square_frequencies: - omega_squared = (np.conj(omega_rshp) * omega_rshp).astype("float") + # Apply a transformation to omega to (maybe) better separate frequency bands + if transform_method == "squared": + omega_transform = (np.conj(omega_rshp) * omega_rshp).astype("float") + elif transform_method == "log10": + omega_transform = np.log10(np.abs(omega_array.imag.astype("float"))) else: - omega_squared = np.abs(omega_rshp.imag.astype("float")) + transform_method = "absolute_value" + omega_transform = np.abs(omega_rshp.imag.astype("float")) if kmeans_kwargs is None: random_state = 0 @@ -470,7 +473,7 @@ def cluster_omega( "random_state": random_state, } kmeans = KMeans(n_clusters=n_components, **kmeans_kwargs) - omega_classes = kmeans.fit_predict(np.atleast_2d(omega_squared).T) + omega_classes = kmeans.fit_predict(np.atleast_2d(omega_transform).T) omega_classes = omega_classes.reshape(n_slides, svd_rank) cluster_centroids = kmeans.cluster_centers_.flatten() @@ -484,7 +487,7 @@ def cluster_omega( # Assign the results to the object. self._cluster_centroids = cluster_centroids self._omega_classes = omega_classes - self._square_frequencies = square_frequencies + self._transform_method = transform_method self._n_components = n_components self._class_values = np.unique(omega_classes)[idx] self._trimmed = False @@ -492,7 +495,7 @@ def cluster_omega( return self def cluster_hyperparameter_sweep( - self, n_components_range=None, square_frequencies=False + self, n_components_range=None, transform_method=None ): """Performs a hyperparameter search for the number of components in the kmeans clustering.""" if n_components_range is None: @@ -506,42 +509,56 @@ def cluster_hyperparameter_sweep( n_slides = omega_array.shape[0] svd_rank = omega_array.shape[1] omega_rshp = omega_array.reshape(n_slides * svd_rank) - if square_frequencies: - omega_squared = (np.conj(omega_rshp) * omega_rshp).astype("float") + + # Apply a transformation to omega to (maybe) better separate frequency bands + if transform_method == "squared": + omega_transform = (np.conj(omega_rshp) * omega_rshp).astype("float") + elif transform_method == "log10": + omega_transform = np.log10(np.abs(omega_array.imag.astype("float"))) else: - omega_squared = np.abs(omega_rshp.imag.astype("float")) + omega_transform = np.abs(omega_rshp.imag.astype("float")) for nind, n in enumerate(n_components_range): - _ = self.cluster_omega(n_components=n, square_frequencies=False) + _ = self.cluster_omega(n_components=n, transform_method=False) classes_reshape = self.omega_classes.reshape(n_slides * svd_rank) score[nind] = silhouette_score( - np.atleast_2d(omega_squared).T, np.atleast_2d(classes_reshape).T + np.atleast_2d(omega_transform).T, + np.atleast_2d(classes_reshape).T, ) return n_components_range[np.argmax(score)] def plot_omega_histogram(self): - # Reshape the omega array into a 1d array omega_array = self.omega_array n_slides = omega_array.shape[0] svd_rank = omega_array.shape[1] omega_rshp = omega_array.reshape(n_slides * svd_rank) - if self._square_frequencies: - omega_squared = (np.conj(omega_rshp) * omega_rshp).astype("float") + hist_kwargs = {"bins": 64} + # Apply a transformation to omega to (maybe) better separate frequency bands + if self._transform_method == "squared": + omega_transform = (np.conj(omega_rshp) * omega_rshp).astype("float") label = "$|\omega|^{2}$" + elif self._transform_method == "log10": + omega_rshp = np.abs(omega_rshp.imag) + omega_transform = np.log10(np.abs(omega_array.imag.astype("float"))) + label = "$log_{10}(|\omega|)$" + hist_kwargs["bins"] = np.linspace( + np.min(np.log10(omega_transform[omega_rshp > 0])), + np.max(np.log10(omega_transform[omega_rshp > 0])), + ) else: - omega_squared = np.abs(omega_rshp.imag.astype("float")) + omega_transform = np.abs(omega_rshp.imag.astype("float")) label = "$|\omega|$" cluster_centroids = self._cluster_centroids colors = plt.rcParams["axes.prop_cycle"].by_key()["color"] fig, ax = plt.subplots(1, 1) - ax.hist(omega_squared, bins=64) + ax.hist(omega_transform, **hist_kwargs) ax.set_xlabel(label) ax.set_ylabel("Count") ax.set_title("$\omega$ Spectrum & k-Means Centroids") @@ -562,11 +579,15 @@ def plot_omega_squared_time_series(self): svd_rank = omega_array.shape[1] omega_rshp = omega_array.reshape(n_slides * svd_rank) - if self._square_frequencies: - omega_squared = (np.conj(omega_rshp) * omega_rshp).astype("float") + # Apply a transformation to omega to (maybe) better separate frequency bands + if self._transform_method == "squared": + omega_transform = (np.conj(omega_rshp) * omega_rshp).astype("float") label = "$|\omega|^{2}$" + elif self._transform_method == "log10": + omega_transform = np.log10(np.abs(omega_array.imag.astype("float"))) + label = "$log_{10}(|\omega|)$" else: - omega_squared = np.abs(omega_rshp.imag.astype("float")) + omega_transform = np.abs(omega_rshp.imag.astype("float")) label = "$|\omega|$" for ncomponent, component in enumerate(range(self._n_components)): @@ -574,7 +595,7 @@ def plot_omega_squared_time_series(self): np.mean(self.time_array, axis=1), np.where( self._omega_classes == component, - omega_squared.reshape((n_slides, svd_rank)), + omega_transform.reshape((n_slides, svd_rank)), np.nan, ), color=colors[ncomponent % len(colors)], @@ -691,7 +712,8 @@ def scale_reconstruction( def threshold_modes(self, data, xr_sep): """Remove frequency bands that do not contribute significantly to the magnitude of the reconstruction.""" - + # @ToDo: rename truncate and return the object or remove since it relies on + # poorly understood thresholds. if not self._trimmed: # Remove scales that do not contribute significantly to the magnitude of the signal n = np.nanmedian(np.abs(xr_sep.real), axis=(1, 2)) @@ -911,7 +933,7 @@ def to_xarray(self): }, attrs={ "svd_rank": self.svd_rank, - "square_frequencies": self._square_frequencies, + "square_frequencies": self._transform_method, "n_slides": self._n_slides, "window_length": self._window_length, "num_frequency_bands": self.n_components, From 442a0c9c0705c7f38ddb441919bd804022e3863d Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 18 Jul 2023 11:30:04 -0400 Subject: [PATCH 22/31] Bug fix: Catch incompatible svd_ranks --- pydmd/costsdmd.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index bb8db986f..e0779f7b0 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -329,6 +329,10 @@ def fit( raise ValueError( "svd_rank is odd, but force_even_eigs is True." ) + if self._svd_rank > self._n_data_vars: + raise ValueError( + "Rank is larger than the data spatial dimension." + ) svd_rank_pre_allocate = self._compute_svd_rank( data, svd_rank=self._svd_rank ) From 45b2227aa29969aa52f25515d14bb73b1b99ac58 Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 18 Jul 2023 14:26:49 -0400 Subject: [PATCH 23/31] Fix: Use utils.compute_rank. Added dependencies to setup.py --- pydmd/costsdmd.py | 35 +++++------------------------------ setup.py | 3 +++ 2 files changed, 8 insertions(+), 30 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index e0779f7b0..12cf7447c 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -1,5 +1,6 @@ import numpy as np from pydmd.bopdmd import BOPDMD +from .utils import compute_rank import scipy from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score @@ -180,28 +181,8 @@ def omega_classes(self): raise ValueError("You need to call `cluster_omega()` first.") return self._omega_classes - def _compute_svd_rank(self, data, svd_rank=None): - def omega(x): - return 0.56 * x**3 - 0.95 * x**2 + 1.82 * x + 1.43 - - U, s, _ = np.linalg.svd(data, full_matrices=False) - - if svd_rank == 0: - beta = np.divide(*sorted(data.shape)) - tau = np.median(s) * omega(beta) - svd_rank = np.sum(s > tau) - elif 0 < svd_rank < 1: - cumulative_energy = np.cumsum(s**2 / (s**2).sum()) - svd_rank = np.searchsorted(cumulative_energy, svd_rank) + 1 - elif svd_rank >= 1 and isinstance(svd_rank, (int, np.integer)): - svd_rank = min(svd_rank, self._n_data_vars) - else: - svd_rank = self._n_data_vars - - return svd_rank - def _build_proj_basis(self, data, svd_rank=None): - self._svd_rank = self._compute_svd_rank(data, svd_rank=svd_rank) + self._svd_rank = compute_rank(data, svd_rank=svd_rank) # Recover the first r modes of the global svd u, _, _ = scipy.linalg.svd(data, full_matrices=False) return u[:, : self._svd_rank] @@ -320,9 +301,7 @@ def fit( self._pydmd_kwargs["use_proj"] = self._pydmd_kwargs.get( "use_proj", False ) - self._svd_rank = self._compute_svd_rank( - data, svd_rank=self._svd_rank - ) + self._svd_rank = compute_rank(data, svd_rank=self._svd_rank) svd_rank_pre_allocate = self._svd_rank elif not self._global_svd and self._svd_rank > 0: if self._force_even_eigs and self._svd_rank % 2: @@ -333,9 +312,7 @@ def fit( raise ValueError( "Rank is larger than the data spatial dimension." ) - svd_rank_pre_allocate = self._compute_svd_rank( - data, svd_rank=self._svd_rank - ) + svd_rank_pre_allocate = compute_rank(data, svd_rank=self._svd_rank) # If not using a global svd or a specified svd_rank, local u from each window is # used instead. The optimal svd_rank may change when using the locally optimal # svd_rank. To deal with this situation in the pre-allocation we give the @@ -399,9 +376,7 @@ def fit( if not self._global_svd: # Get the svd rank for this window. Uses rank truncation when svd_rank is # not fixed, i.e. svd_rank = 0, otherwise uses the specified rank. - _svd_rank = self._compute_svd_rank( - data_window, svd_rank=self._svd_rank - ) + _svd_rank = compute_rank(data_window, svd_rank=self._svd_rank) # Force svd rank to be even to allow for conjugate pairs. if self._force_even_eigs and _svd_rank % 2: _svd_rank += 1 diff --git a/setup.py b/setup.py index 33cb633b4..05a2dd746 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,9 @@ KEYWORDS = "dynamic-mode-decomposition dmd" REQUIRED = ["numpy", "scipy", "matplotlib"] +EXTRAS_REQUIRE = { + "costs": ["scikit-learn", "xarray"], +} EXTRAS = { "docs": ["Sphinx>=1.4", "sphinx_rtd_theme"], From 3338d1963308aca39e18470bf463112e59826f5f Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 18 Jul 2023 14:47:44 -0400 Subject: [PATCH 24/31] Use utils.compute_svd --- pydmd/costsdmd.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index 12cf7447c..09b255f05 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -1,6 +1,6 @@ import numpy as np from pydmd.bopdmd import BOPDMD -from .utils import compute_rank +from .utils import compute_rank, compute_svd import scipy from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score @@ -184,8 +184,9 @@ def omega_classes(self): def _build_proj_basis(self, data, svd_rank=None): self._svd_rank = compute_rank(data, svd_rank=svd_rank) # Recover the first r modes of the global svd - u, _, _ = scipy.linalg.svd(data, full_matrices=False) - return u[:, : self._svd_rank] + # u, _, _ = scipy.linalg.svd(data, full_matrices=False) + u, _, _ = compute_svd(data, svd_rank=svd_rank) + return u def _build_initizialization(self): """Method for making initial guess of DMD eigenvalues.""" From 88db3168cbf843438ced4373232b5609a8196a53 Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 18 Jul 2023 14:48:06 -0400 Subject: [PATCH 25/31] Updated extra_requires to use h5netcdf --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 05a2dd746..d10928df7 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ REQUIRED = ["numpy", "scipy", "matplotlib"] EXTRAS_REQUIRE = { - "costs": ["scikit-learn", "xarray"], + "costs": ["scikit-learn", "xarray", "h5netcdf"], } EXTRAS = { From 10fe92d6851aaf5a319bb864c6ef9c10b72e94de Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 18 Jul 2023 18:24:28 -0400 Subject: [PATCH 26/31] Cleanup of `to_xarray` method --- pydmd/costsdmd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index 09b255f05..86b850ba8 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -913,7 +913,7 @@ def to_xarray(self): }, attrs={ "svd_rank": self.svd_rank, - "square_frequencies": self._transform_method, + "omega_transformation": self._transform_method, "n_slides": self._n_slides, "window_length": self._window_length, "num_frequency_bands": self.n_components, From 137dd7d86ea533eaba7cf397e5838f55f7fd9640 Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 18 Jul 2023 18:45:10 -0400 Subject: [PATCH 27/31] Feat: property getter for commonly accessed values --- pydmd/costsdmd.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index 86b850ba8..c75d2a67b 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -133,6 +133,22 @@ def svd_rank(self): """ return self._svd_rank + @property + def window_length(self): + """ + :return: the length of the windows used for this decomposition level. + :rtype: int or float + """ + return self._window_length + + @property + def n_slides(self): + """ + :return: number of window slides for this decomposition level. + :rtype: int + """ + return self._n_slides + @property def modes_array(self): if not hasattr(self, "_modes_array"): From 9c48f2253fd20e80f3ea4f2a45ecce37db350a3a Mon Sep 17 00:00:00 2001 From: klapo Date: Thu, 20 Jul 2023 16:57:02 +0200 Subject: [PATCH 28/31] Updates to plotting routine for high and low frequencies - The low frequency/input data component is now in `cividis` by default with its own colorscale - The high frequency components are now in `RdBu_r` with a separataely defined colorscale - Added keyword arguments for interacting with both sets of options - Low frequency/input data can now optionally include black contours. --- pydmd/costsdmd.py | 59 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index c75d2a67b..f04560a91 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -756,6 +756,8 @@ def plot_scale_separation( plot_residual=False, fig_kwargs=None, plot_kwargs=None, + hf_plot_kwargs=None, + plot_contours=False, ): """Plot the scale-separated low and high frequency bands.""" if scale_reconstruction_kwargs is None: @@ -777,7 +779,17 @@ def plot_scale_separation( plot_kwargs = {} plot_kwargs["vmin"] = plot_kwargs.get("vmin", -np.abs(data).max()) plot_kwargs["vmax"] = plot_kwargs.get("vmax", np.abs(data).max()) - plot_kwargs["cmap"] = plot_kwargs.get("cmap", "RdBu_r") + plot_kwargs["cmap"] = plot_kwargs.get("cmap", "cividis") + + if hf_plot_kwargs is None: + hf_plot_kwargs = {} + hf_plot_kwargs["vmin"] = hf_plot_kwargs.get( + "vmin", -np.abs(xr_high_frequency).max() + ) + hf_plot_kwargs["vmax"] = hf_plot_kwargs.get( + "vmax", np.abs(xr_high_frequency).max() + ) + hf_plot_kwargs["cmap"] = hf_plot_kwargs.get("cmap", "RdBu_r") if plot_residual: fig, axes = plt.subplots(4, 1, **fig_kwargs) @@ -786,6 +798,8 @@ def plot_scale_separation( ax = axes[0] ax.pcolormesh(data, **plot_kwargs) + if plot_contours: + ax.contour(data, colors=["k"]) ax.set_title( "Input Data at decomposition window length = {}".format( self._window_length @@ -794,24 +808,28 @@ def plot_scale_separation( ax = axes[1] ax.set_title("Reconstruction, low frequency") ax.pcolormesh(xr_low_frequency, **plot_kwargs) + if plot_contours: + ax.contour(data, colors=["k"]) ax.set_ylabel("Space (-)") ax = axes[2] ax.set_title("Reconstruction, high frequency") - ax.pcolormesh(xr_high_frequency, **plot_kwargs) + ax.pcolormesh(xr_high_frequency, **hf_plot_kwargs) ax.set_ylabel("Space (-)") if plot_residual: ax = axes[3] ax.set_title("Residual") ax.pcolormesh( - data - xr_high_frequency - xr_low_frequency, **plot_kwargs + data - xr_high_frequency - xr_low_frequency, **hf_plot_kwargs ) ax.set_ylabel("Space (-)") axes[-1].set_xlabel("Time (-)") fig.tight_layout() + return fig, axes + def plot_reconstructions( self, data, @@ -820,6 +838,8 @@ def plot_reconstructions( plot_residual=False, fig_kwargs=None, plot_kwargs=None, + hf_plot_kwargs=None, + plot_contours=False, ): if scale_reconstruction_kwargs is None: scale_reconstruction_kwargs = {} @@ -833,20 +853,40 @@ def plot_reconstructions( "figsize", (6, 1.5 * len(self._cluster_centroids) + 1) ) + # Low frequency and input data often require separate plotting parameters. if plot_kwargs is None: plot_kwargs = {} plot_kwargs["vmin"] = plot_kwargs.get("vmin", -np.abs(data).max()) plot_kwargs["vmax"] = plot_kwargs.get("vmax", np.abs(data).max()) - plot_kwargs["cmap"] = plot_kwargs.get("cmap", "RdBu_r") + plot_kwargs["cmap"] = plot_kwargs.get("cmap", "cividis") + # High frequency components often require separate plotting parameters. + if hf_plot_kwargs is None: + hf_plot_kwargs = {} + hf_plot_kwargs["vmin"] = hf_plot_kwargs.get( + "vmin", -np.abs(xr_sep[1:, :, :]).max() + ) + hf_plot_kwargs["vmax"] = hf_plot_kwargs.get( + "vmax", np.abs(xr_sep[1:, :, :]).max() + ) + hf_plot_kwargs["cmap"] = hf_plot_kwargs.get("cmap", "RdBu_r") + + # Determine the number of plotting elements, which changes depending on if the + # residual is included. + if plot_residual: + num_plot_elements = len(self._cluster_centroids) + 2 + else: + num_plot_elements = len(self._cluster_centroids) + 1 fig, axes = plt.subplots( - len(self._cluster_centroids) + 1, + num_plot_elements, 1, **fig_kwargs, ) ax = axes[0] ax.pcolormesh(data.real, **plot_kwargs) + if plot_contours: + ax.contour(data.real, colors=["k"]) ax.set_ylabel("Space (-)") ax.set_xlabel("Time (-)") ax.set_title( @@ -864,14 +904,19 @@ def plot_reconstructions( ax = axes[n_cluster + 1] xr_scale = xr_sep[n_cluster, :, :] - ax.pcolormesh(xr_scale, **plot_kwargs) + if n_cluster == 0: + ax.pcolormesh(xr_scale, **plot_kwargs) + if plot_contours: + ax.contour(xr_scale, colors=["k"]) + else: + ax.pcolormesh(xr_scale, **hf_plot_kwargs) ax.set_ylabel("Space (-)") ax.set_title(title.format(x)) if plot_residual: ax = axes[-1] ax.set_title("Residual") - ax.pcolormesh(data - xr_sep.sum(axis=0), **plot_kwargs) + ax.pcolormesh(data - xr_sep.sum(axis=0), **hf_plot_kwargs) ax.set_ylabel("Space (-)") axes[-1].set_xlabel("Time (-)") From 63a5dbc282ee56ca326926b3d11952c7ed9c3d80 Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 25 Jul 2023 17:55:57 +0200 Subject: [PATCH 29/31] Update: cleaning code and comments - Dealt with some overlong comment lines - Moved the gaussian kernel convolution to dedicated function --- pydmd/costsdmd.py | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index f04560a91..f376dd36f 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -277,6 +277,15 @@ def calculate_lv_kern(window_length, corner_sharpness=None): return lv_kern + @staticmethod + def build_kern(window_length): + recon_filter_sd = window_length / 8 + recon_filter = np.exp( + -((np.arange(window_length) - (window_length + 1) / 2) ** 2) + / recon_filter_sd**2 + ) + return recon_filter + def _data_shape(self, data): n_time_steps = np.shape(data)[1] n_data_vars = np.shape(data)[0] @@ -640,14 +649,15 @@ def scale_reconstruction( # Convolve each windowed reconstruction with a gaussian filter. # Std dev of gaussian filter - recon_filter_sd = self._window_length / 8 - recon_filter = np.exp( - -( - (np.arange(self._window_length) - (self._window_length + 1) / 2) - ** 2 - ) - / recon_filter_sd**2 - ) + recon_filter = self.build_kern(self._window_length) + # recon_filter_sd = self._window_length / 8 + # recon_filter = np.exp( + # -( + # (np.arange(self._window_length) - (self._window_length + 1) / 2) + # ** 2 + # ) + # / recon_filter_sd**2 + # ) for k in range(self._n_slides): w = self._modes_array[k] @@ -707,11 +717,13 @@ def scale_reconstruction( return xr_sep def threshold_modes(self, data, xr_sep): - """Remove frequency bands that do not contribute significantly to the magnitude of the reconstruction.""" + """Remove frequency bands that do not contribute significantly to the magnitude + of the reconstruction.""" # @ToDo: rename truncate and return the object or remove since it relies on # poorly understood thresholds. if not self._trimmed: - # Remove scales that do not contribute significantly to the magnitude of the signal + # Remove scales that do not contribute significantly to the magnitude of + # the signal n = np.nanmedian(np.abs(xr_sep.real), axis=(1, 2)) magnitude_threshold = np.nanmedian(np.abs(data.real)) / 100 @@ -735,8 +747,9 @@ def scale_separation( ): """Separate the lowest frequency band from the high frequency bands. - The lowest frequency band should contain the window means and can be passed on as the data for the next - decomposition level. The high frequencies should have frequencies shorter than 1 / window_length. + The lowest frequency band should contain the window means and can be passed on + as the data for the next decomposition level. The high frequencies should have + frequencies shorter than 1 / window_length. """ From 02931248dfa9ff8e14b3b3e649f1cfa8431215ef Mon Sep 17 00:00:00 2001 From: klapo Date: Tue, 25 Jul 2023 18:27:49 +0200 Subject: [PATCH 30/31] Big bug fix for reconstruction - Reconstructing the scale separated data no longer can modify omega inadvertantly fixing a major bug when not suppressing growth. - Removed the scale truncation method as it no longer functions. - Converted `scale_reconstruction` to use the `get_window_indices` method. --- pydmd/costsdmd.py | 60 ++++------------------------------------------- 1 file changed, 5 insertions(+), 55 deletions(-) diff --git a/pydmd/costsdmd.py b/pydmd/costsdmd.py index f376dd36f..a10ce9ee1 100644 --- a/pydmd/costsdmd.py +++ b/pydmd/costsdmd.py @@ -1,7 +1,7 @@ import numpy as np from pydmd.bopdmd import BOPDMD from .utils import compute_rank, compute_svd -import scipy +import copy from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score import matplotlib.pyplot as plt @@ -623,8 +623,6 @@ def scale_reconstruction( self, suppress_growth=True, include_means=True, - magnitude_threshold=False, - data=None, ): """Reconstruct the sliding mrDMD into the constituent components. @@ -634,7 +632,6 @@ def scale_reconstruction( and end of time series prone to larger errors. A best practice is to cut off `window_length` from each end before further analysis. - suppress_growth: Kill positive real components of frequencies """ @@ -650,19 +647,14 @@ def scale_reconstruction( # Convolve each windowed reconstruction with a gaussian filter. # Std dev of gaussian filter recon_filter = self.build_kern(self._window_length) - # recon_filter_sd = self._window_length / 8 - # recon_filter = np.exp( - # -( - # (np.arange(self._window_length) - (self._window_length + 1) / 2) - # ** 2 - # ) - # / recon_filter_sd**2 - # ) for k in range(self._n_slides): + window_indices = self.get_window_indices(k) + w = self._modes_array[k] b = self._amplitudes_array[k] - omega = np.atleast_2d(self._omega_array[k]).T + # @ToDo: global flag for suppressing growth? + omega = copy.deepcopy(np.atleast_2d(self._omega_array[k]).T) classification = self._omega_classes[k] if suppress_growth: @@ -692,13 +684,6 @@ def scale_reconstruction( xr_sep_window[j, :, :] += c xr_sep_window[j, :, :] = xr_sep_window[j, :, :] * recon_filter - if k == self._n_slides - 1 and self._non_integer_n_slide: - window_indices = slice(-self._window_length, None) - else: - window_indices = slice( - k * self._step_size, - k * self._step_size + self._window_length, - ) xr_sep[j, :, window_indices] = ( xr_sep[j, :, window_indices] + xr_sep_window[j, :, :] ) @@ -707,38 +692,6 @@ def scale_reconstruction( xr_sep = xr_sep / xn - if magnitude_threshold: - if data is None: - raise ValueError( - "The data must be provided when doing a magnitude cut-off of modes." - ) - xr_sep = self.threshold_modes(data, xr_sep) - - return xr_sep - - def threshold_modes(self, data, xr_sep): - """Remove frequency bands that do not contribute significantly to the magnitude - of the reconstruction.""" - # @ToDo: rename truncate and return the object or remove since it relies on - # poorly understood thresholds. - if not self._trimmed: - # Remove scales that do not contribute significantly to the magnitude of - # the signal - n = np.nanmedian(np.abs(xr_sep.real), axis=(1, 2)) - magnitude_threshold = np.nanmedian(np.abs(data.real)) / 100 - - # Trim frequencies bands that do not meet the magnitude threshold. - xr_sep = xr_sep[n > magnitude_threshold, ::] - self._cluster_centroids = self._cluster_centroids[ - n > magnitude_threshold - ] - num_modes_to_keep = np.sum(n > magnitude_threshold) - self._class_values = self._class_values[ - self._class_values < num_modes_to_keep - ] - - self._trimmed = True - return xr_sep def scale_separation( @@ -775,9 +728,6 @@ def plot_scale_separation( """Plot the scale-separated low and high frequency bands.""" if scale_reconstruction_kwargs is None: scale_reconstruction_kwargs = {} - scale_reconstruction_kwargs["data"] = scale_reconstruction_kwargs.get( - "data", data - ) xr_low_frequency, xr_high_frequency = self.scale_separation( scale_reconstruction_kwargs From 18d815dcc1942ceae230f54cbebb561be8eea03b Mon Sep 17 00:00:00 2001 From: klapo Date: Mon, 31 Jul 2023 09:07:10 +0000 Subject: [PATCH 31/31] export tutorials changed in cce021a --- docs/source/_tutorials/tutorial16rdmd.html | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/source/_tutorials/tutorial16rdmd.html b/docs/source/_tutorials/tutorial16rdmd.html index bc8c100a8..b03515415 100644 --- a/docs/source/_tutorials/tutorial16rdmd.html +++ b/docs/source/_tutorials/tutorial16rdmd.html @@ -7661,8 +7661,8 @@

Exact DMD¶

@@ -7722,8 +7722,8 @@

Compressed DMD @@ -7837,7 +7837,7 @@

Randomized DMD: Varying Oversampli @@ -7948,7 +7948,7 @@

Randomized DMD: Varying Power @@ -8043,16 +8043,16 @@

Runtime Comparison