From ac1c613638040497edb81769e5c8cbf13ab7adb2 Mon Sep 17 00:00:00 2001 From: statefb Date: Mon, 18 May 2020 23:24:39 +0900 Subject: [PATCH 01/20] add qresiduals cost implementation --- ruptures/costs/__init__.py | 2 + ruptures/costs/costqresiduals.py | 64 ++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 ruptures/costs/costqresiduals.py diff --git a/ruptures/costs/__init__.py b/ruptures/costs/__init__.py index fa205684..b5ebbd74 100644 --- a/ruptures/costs/__init__.py +++ b/ruptures/costs/__init__.py @@ -17,6 +17,7 @@ costautoregressive costml costrank + costqresid costcustom """ @@ -31,3 +32,4 @@ from .costautoregressive import CostAR from .costml import CostMl from .costrank import CostRank +from .costqresiduals import CostQResiduals \ No newline at end of file diff --git a/ruptures/costs/costqresiduals.py b/ruptures/costs/costqresiduals.py new file mode 100644 index 00000000..eac6f450 --- /dev/null +++ b/ruptures/costs/costqresiduals.py @@ -0,0 +1,64 @@ +import numpy as np +from numpy.linalg import svd, eig +from sklearn.decomposition import PCA +from sklearn.preprocessing import scale + +from ruptures.base import BaseCost +from ruptures.costs import NotEnoughPoints + +class CostQResiduals(BaseCost): + + """Q residuals.""" + + model = "qresid" + + def __init__(self, n_components=2): + self.min_size = 2 + self.signal = None + self.n_components = n_components + + def fit(self, signal): + """Set parameters of the instance. + + Args: + signal (array): signal. Shape (n_samples,) or (n_samples, n_features) + + Returns: + self + """ + if signal.ndim == 1 or (signal.ndim == 2 and signal.shape[1] == 1): + raise ValueError("The signal must be multivariate.") + else: + self.signal = signal + + return self + + def error(self, start, end): + """Return the approximation cost on the segment [start:end]. + + Args: + start (int): start of the segment + end (int): end of the segment + + Returns: + float: segment cost + + Raises: + NotEnoughPoints: when the segment is too short (less than ``'min_size'`` samples). + """ + if end - start < self.min_size: + raise NotEnoughPoints + sub = self.signal[start:end] + + pca = PCA(self.n_components).fit(sub) + + # loading vectors + P = pca.components_.T + # compute residuals + X = sub - np.mean(sub, axis=0) + Q = X @ (np.eye(sub.shape[1]) - P @ P.T) @ X.T + + return np.sum(np.diag(Q)) + + + From a158e036f7e0225b1d768e4ef1a7270a599213a1 Mon Sep 17 00:00:00 2001 From: statefb Date: Fri, 22 May 2020 02:00:43 +0900 Subject: [PATCH 02/20] add hotelling t2 cost implementation add hotelling t2 cost implementation --- ruptures/costs/__init__.py | 6 ++-- ruptures/costs/costt2.py | 57 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 ruptures/costs/costt2.py diff --git a/ruptures/costs/__init__.py b/ruptures/costs/__init__.py index b5ebbd74..573b8287 100644 --- a/ruptures/costs/__init__.py +++ b/ruptures/costs/__init__.py @@ -17,7 +17,8 @@ costautoregressive costml costrank - costqresid + costqresiduals + costt2 costcustom """ @@ -32,4 +33,5 @@ from .costautoregressive import CostAR from .costml import CostMl from .costrank import CostRank -from .costqresiduals import CostQResiduals \ No newline at end of file +from .costqresiduals import CostQResiduals +from .costt2 import CostTSquared \ No newline at end of file diff --git a/ruptures/costs/costt2.py b/ruptures/costs/costt2.py new file mode 100644 index 00000000..f64dc720 --- /dev/null +++ b/ruptures/costs/costt2.py @@ -0,0 +1,57 @@ +import numpy as np +from sklearn.decomposition import PCA + +from ruptures.base import BaseCost +from ruptures.costs import NotEnoughPoints + +class CostTSquared(BaseCost): + + """Hotelling's T-Squared.""" + + model = "t2" + + def __init__(self, n_components=2): + self.min_size = 2 + self.signal = None + self.n_components = n_components + + def fit(self, signal): + """Set parameters of the instance. + + Args: + signal (array): signal. Shape (n_samples,) or (n_samples, n_features) + + Returns: + self + """ + if signal.ndim == 1 or (signal.ndim == 2 and signal.shape[1] == 1): + raise ValueError("The signal must be multivariate.") + else: + self.signal = signal + + return self + + def error(self, start, end): + """Return the approximation cost on the segment [start:end]. + + Args: + start (int): start of the segment + end (int): end of the segment + + Returns: + float: segment cost + + Raises: + NotEnoughPoints: when the segment is too short (less than ``'min_size'`` samples). + """ + if end - start < self.min_size: + raise NotEnoughPoints + sub = self.signal[start:end] + + pca = PCA(self.n_components) + sub_tr = pca.fit_transform(sub) + + return np.sum(np.diag(sub_tr @ sub_tr.T)) + + + From 5eaf7147f4b38c320c58ce23ec95859c2f4223a3 Mon Sep 17 00:00:00 2001 From: statefb Date: Sat, 23 May 2020 18:29:11 +0900 Subject: [PATCH 03/20] add noisy features generation to pw_normal func --- ruptures/datasets/pw_normal.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ruptures/datasets/pw_normal.py b/ruptures/datasets/pw_normal.py index d1c18ae7..30e1b369 100644 --- a/ruptures/datasets/pw_normal.py +++ b/ruptures/datasets/pw_normal.py @@ -47,12 +47,15 @@ from ruptures.utils import draw_bkps -def pw_normal(n_samples=200, n_bkps=3): +def pw_normal(n_samples=200, n_bkps=3, n_noisy_features=0): """Return a 2D piecewise Gaussian signal and the associated changepoints. Args: n_samples (int, optional): signal length n_bkps (int, optional): number of change points + add_noise (int, optional): number of noisy dimensions. If not zero, + total dimensions of the signal will be 2 + `n_noisy_features` and + the last `n_noisy_features` demensions will be noise. Returns: tuple: signal of shape (n_samples, 2), list of breakpoints @@ -68,4 +71,8 @@ def pw_normal(n_samples=200, n_bkps=3): n_sub, _ = sub.shape sub += rd.multivariate_normal([0, 0], cov, size=n_sub) + # Add noise + if n_noisy_features > 0: + signal = np.hstack((signal, np.random.randn(n_samples, n_noisy_features))) + return signal, bkps From 01a991ed2ce333a89dff150084502bc32b7389b9 Mon Sep 17 00:00:00 2001 From: statefb Date: Sat, 23 May 2020 19:24:15 +0900 Subject: [PATCH 04/20] add scikit-learn to dependencies --- requirements.txt | 3 ++- setup.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5576e19f..0f3b3811 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ numpy -scipy \ No newline at end of file +scipy +scikit-learn \ No newline at end of file diff --git a/setup.py b/setup.py index cf7c40c0..99e5b0a0 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ name='ruptures', version='1.0.3', packages=find_packages(exclude=['docs', 'tests*', 'images']), - install_requires=['numpy', 'scipy'], + install_requires=['numpy', 'scipy', 'scikit-learn'], extras_require={ 'display': ['matplotlib'] }, From 86a6a1f2364b45ec81384b66b0ce03a2a70dbeb5 Mon Sep 17 00:00:00 2001 From: statefb Date: Sun, 24 May 2020 00:04:18 +0900 Subject: [PATCH 05/20] add docstring --- ruptures/costs/costqresiduals.py | 71 +++++++++++++++++++++++++++++++ ruptures/costs/costt2.py | 72 ++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) diff --git a/ruptures/costs/costqresiduals.py b/ruptures/costs/costqresiduals.py index eac6f450..84acd616 100644 --- a/ruptures/costs/costqresiduals.py +++ b/ruptures/costs/costqresiduals.py @@ -1,3 +1,74 @@ +r""" + +.. _sec-costqresiduals: + +Q reconstruction error based cost function +==================================================================================================== + +Description +---------------------------------------------------------------------------------------------------- + +This cost function detects change of the correlation between the variables with ignoring noisy demension. +Formally, for a signal :math:`\{y_t\}_t` on an interval :math:`I`, the cost function is equal to + + .. math:: c(y_{I}) = \sum_{t\in I} \|y_t - \bar{y_t}\|_2 + +where :math:`\bar{y_t}` is the PCA based reconstructed value of :math:`\{y_t\}_{t\in I}` computed by + + .. math:: \bar{y_t} = {U_p}^T U_p y_t + +where :math:`U_p` is singular vectors belong to the most important :math:`p` singular values of :math:`y_t`. + +Usage +---------------------------------------------------------------------------------------------------- + +Start with the usual imports and create a signal. + +.. code-block:: python + + import numpy as np + import matplotlib.pylab as plt + import ruptures as rpt + # creation of data + n = 200 # number of samples + n_bkps = 3 # number of change points + n_noisy_features = 3 # number of noisy features + signal, bkps = rpt.pw_normal(n, n_bkps, n_noisy_features) + +Then create a :class:`CostQResiduals` instance and print the cost of the sub-signal :code:`signal[50:150]`. + +.. code-block:: python + + c = rpt.costs.CostQResiduals(n_components=1).fit(signal) + print(c.error(50, 150)) + +You can also compute the sum of costs for a given list of change points. + +.. code-block:: python + + print(c.sum_of_costs(bkps)) + print(c.sum_of_costs([10, 100, 200, 250, n])) + + +In order to use this cost class in a change point detection algorithm (inheriting from :class:`BaseEstimator`), either pass a :class:`CostNormal` instance (through the argument ``'custom_cost'``) or set :code:`model="qresid"`. + +.. code-block:: python + + c = rpt.costs.CostQResiduals(); algo = rpt.Dynp(custom_cost=c) + # is equivalent to + algo = rpt.Dynp(model="qresid") + + +Code explanation +---------------------------------------------------------------------------------------------------- + +.. autoclass:: ruptures.costs.CostQResiduals + :members: + :special-members: __init__ + +""" + + import numpy as np from numpy.linalg import svd, eig from sklearn.decomposition import PCA diff --git a/ruptures/costs/costt2.py b/ruptures/costs/costt2.py index f64dc720..56056277 100644 --- a/ruptures/costs/costt2.py +++ b/ruptures/costs/costt2.py @@ -1,3 +1,75 @@ +r""" + +.. _sec_costt2 + +Hotelling's T2 statistic based cost function + +==================================================================================================== + +Description +---------------------------------------------------------------------------------------------------- + +This cost function detects drift of the center of the signal with ignoring noisy demension. +Formally, for a signal :math:`\{y_t\}_t` on an interval :math:`I`, the cost function is equal to + + .. math:: c(y_{I}) = \sum_{t\in I} \|y_t - \bar{y_t}\|_2 + +where :math:`\bar{y_t}` is the deviation from the center of the data on lower dimensional representation of :math:`\{y_t\}_{t\in I}`, + + .. math:: \bar{y_t} = {U_p}^T y_t {y_t}^T {U_p} + +where :math:`U_p` is singular vectors belong to the most important :math:`p` singular values of :math:`\{y_t\}_{t\in I}`. + +Usage +---------------------------------------------------------------------------------------------------- + +Start with the usual imports and create a signal. + +.. code-block:: python + + import numpy as np + import matplotlib.pylab as plt + import ruptures as rpt + # creation of data + n, dim = 500, 3 # number of samples, dimension + n_bkps, sigma = 3, 5 # number of change points, noise standard deviation + signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) + +Then create a :class:`CostTSquared` instance and print the cost of the sub-signal :code:`signal[50:150]`. + +.. code-block:: python + + c = rpt.costs.CostTSquared(n_components=2).fit(signal) + print(c.error(50, 150)) + + +You can also compute the sum of costs for a given list of change points. + +.. code-block:: python + + print(c.sum_of_costs(bkps)) + print(c.sum_of_costs([10, 100, 200, 250, n])) + + +In order to use this cost class in a change point detection algorithm (inheriting from :class:`BaseEstimator`), either pass a :class:`CostTSquared` instance (through the argument ``'custom_cost'``) or set :code:`model="rank"`. + +.. code-block:: python + + c = rpt.costs.CostTSquared(); algo = rpt.Dynp(custom_cost=c) + # is equivalent to + algo = rpt.Dynp(model="t2") + + +Code explanation +---------------------------------------------------------------------------------------------------- + +.. autoclass:: ruptures.costs.CostTSquared + :members: + :special-members: __init__ + +""" + + import numpy as np from sklearn.decomposition import PCA From 948586d4f873523893c048ae905343852a783979 Mon Sep 17 00:00:00 2001 From: statefb Date: Sun, 24 May 2020 00:09:53 +0900 Subject: [PATCH 06/20] fix Q statistics implementation --- ruptures/costs/costqresiduals.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/ruptures/costs/costqresiduals.py b/ruptures/costs/costqresiduals.py index 84acd616..83d8d22b 100644 --- a/ruptures/costs/costqresiduals.py +++ b/ruptures/costs/costqresiduals.py @@ -122,14 +122,9 @@ def error(self, start, end): sub = self.signal[start:end] pca = PCA(self.n_components).fit(sub) - - # loading vectors - P = pca.components_.T - # compute residuals - X = sub - np.mean(sub, axis=0) - Q = X @ (np.eye(sub.shape[1]) - P @ P.T) @ X.T + sub_reconstruct = pca.inverse_transform(pca.transform(sub)) - return np.sum(np.diag(Q)) + return np.sum((sub - sub_reconstruct)**2) From d1060a1bba7bdfde78a6cf5307465b2775c3bde3 Mon Sep 17 00:00:00 2001 From: statefb Date: Sun, 28 Jun 2020 21:43:19 +0900 Subject: [PATCH 07/20] format scripts --- ruptures/costs/costqresiduals.py | 12 ++++-------- ruptures/costs/costt2.py | 10 ++++------ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/ruptures/costs/costqresiduals.py b/ruptures/costs/costqresiduals.py index 83d8d22b..ec90e994 100644 --- a/ruptures/costs/costqresiduals.py +++ b/ruptures/costs/costqresiduals.py @@ -70,17 +70,16 @@ import numpy as np -from numpy.linalg import svd, eig from sklearn.decomposition import PCA -from sklearn.preprocessing import scale from ruptures.base import BaseCost from ruptures.costs import NotEnoughPoints + class CostQResiduals(BaseCost): - + """Q residuals.""" - + model = "qresid" def __init__(self, n_components=2): @@ -123,8 +122,5 @@ def error(self, start, end): pca = PCA(self.n_components).fit(sub) sub_reconstruct = pca.inverse_transform(pca.transform(sub)) - - return np.sum((sub - sub_reconstruct)**2) - - + return np.sum((sub - sub_reconstruct)**2) diff --git a/ruptures/costs/costt2.py b/ruptures/costs/costt2.py index 56056277..f61b0172 100644 --- a/ruptures/costs/costt2.py +++ b/ruptures/costs/costt2.py @@ -76,10 +76,11 @@ from ruptures.base import BaseCost from ruptures.costs import NotEnoughPoints + class CostTSquared(BaseCost): - + """Hotelling's T-Squared.""" - + model = "t2" def __init__(self, n_components=2): @@ -122,8 +123,5 @@ def error(self, start, end): pca = PCA(self.n_components) sub_tr = pca.fit_transform(sub) - - return np.sum(np.diag(sub_tr @ sub_tr.T)) - - + return np.sum(np.diag(sub_tr @ sub_tr.T)) From b5c5cca2607c284192d221f77ab6059872e24058 Mon Sep 17 00:00:00 2001 From: Charles Truong Date: Tue, 21 Jul 2020 08:50:28 +0200 Subject: [PATCH 08/20] small typos + formatting --- ruptures/datasets/pw_normal.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ruptures/datasets/pw_normal.py b/ruptures/datasets/pw_normal.py index 30e1b369..94c37e43 100644 --- a/ruptures/datasets/pw_normal.py +++ b/ruptures/datasets/pw_normal.py @@ -53,9 +53,10 @@ def pw_normal(n_samples=200, n_bkps=3, n_noisy_features=0): Args: n_samples (int, optional): signal length n_bkps (int, optional): number of change points - add_noise (int, optional): number of noisy dimensions. If not zero, - total dimensions of the signal will be 2 + `n_noisy_features` and - the last `n_noisy_features` demensions will be noise. + n_noisy_features (int, optional): number of noisy dimensions. If not + zero, total dimensions of the signal will be 2 + `n_noisy_features` + and the last `n_noisy_features` dimensions will be white noise only + (no change point). Returns: tuple: signal of shape (n_samples, 2), list of breakpoints @@ -73,6 +74,6 @@ def pw_normal(n_samples=200, n_bkps=3, n_noisy_features=0): # Add noise if n_noisy_features > 0: - signal = np.hstack((signal, np.random.randn(n_samples, n_noisy_features))) + signal = np.c_[signal, np.random.randn(n_samples, n_noisy_features)] return signal, bkps From 42a8b5aeca9916c8158dff1d341eee3e0a52cd10 Mon Sep 17 00:00:00 2001 From: Charles Truong Date: Tue, 21 Jul 2020 09:36:03 +0200 Subject: [PATCH 09/20] fix doc --- ruptures/datasets/pw_normal.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ruptures/datasets/pw_normal.py b/ruptures/datasets/pw_normal.py index 94c37e43..939bbc8c 100644 --- a/ruptures/datasets/pw_normal.py +++ b/ruptures/datasets/pw_normal.py @@ -8,6 +8,7 @@ ---------------------------------------------------------------------------------------------------- This function simulates a 2D signal of Gaussian i.i.d. random variables with zero mean and covariance matrix alternating between :math:`[[1, 0.9], [0.9, 1]]` and :math:`[[1, -0.9], [-0.9, 1]]` at every change point. +Users can also append `noisy` dimensions (i.e. without change points). .. figure:: /images/correlation_shift.png :scale: 50 % @@ -49,6 +50,7 @@ def pw_normal(n_samples=200, n_bkps=3, n_noisy_features=0): """Return a 2D piecewise Gaussian signal and the associated changepoints. + Users can also append `noisy` dimensions (i.e. without change points). Args: n_samples (int, optional): signal length From 568b27bd0b29ab4bb0e9780b2f4b2066c41493bb Mon Sep 17 00:00:00 2001 From: Charles Truong Date: Tue, 21 Jul 2020 09:37:43 +0200 Subject: [PATCH 10/20] replace norm computation by numpy equivalent --- ruptures/costs/costt2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ruptures/costs/costt2.py b/ruptures/costs/costt2.py index f61b0172..4e2b2b62 100644 --- a/ruptures/costs/costt2.py +++ b/ruptures/costs/costt2.py @@ -124,4 +124,4 @@ def error(self, start, end): pca = PCA(self.n_components) sub_tr = pca.fit_transform(sub) - return np.sum(np.diag(sub_tr @ sub_tr.T)) + return np.linalg.norm(sub_tr)**2 From e5588fcdc024d976e3e044989c1ea7e6a2877c7f Mon Sep 17 00:00:00 2001 From: Charles Truong Date: Tue, 21 Jul 2020 10:41:05 +0200 Subject: [PATCH 11/20] change filename --- ruptures/costs/costt2.py | 127 --------------------------------------- 1 file changed, 127 deletions(-) delete mode 100644 ruptures/costs/costt2.py diff --git a/ruptures/costs/costt2.py b/ruptures/costs/costt2.py deleted file mode 100644 index 4e2b2b62..00000000 --- a/ruptures/costs/costt2.py +++ /dev/null @@ -1,127 +0,0 @@ -r""" - -.. _sec_costt2 - -Hotelling's T2 statistic based cost function - -==================================================================================================== - -Description ----------------------------------------------------------------------------------------------------- - -This cost function detects drift of the center of the signal with ignoring noisy demension. -Formally, for a signal :math:`\{y_t\}_t` on an interval :math:`I`, the cost function is equal to - - .. math:: c(y_{I}) = \sum_{t\in I} \|y_t - \bar{y_t}\|_2 - -where :math:`\bar{y_t}` is the deviation from the center of the data on lower dimensional representation of :math:`\{y_t\}_{t\in I}`, - - .. math:: \bar{y_t} = {U_p}^T y_t {y_t}^T {U_p} - -where :math:`U_p` is singular vectors belong to the most important :math:`p` singular values of :math:`\{y_t\}_{t\in I}`. - -Usage ----------------------------------------------------------------------------------------------------- - -Start with the usual imports and create a signal. - -.. code-block:: python - - import numpy as np - import matplotlib.pylab as plt - import ruptures as rpt - # creation of data - n, dim = 500, 3 # number of samples, dimension - n_bkps, sigma = 3, 5 # number of change points, noise standard deviation - signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) - -Then create a :class:`CostTSquared` instance and print the cost of the sub-signal :code:`signal[50:150]`. - -.. code-block:: python - - c = rpt.costs.CostTSquared(n_components=2).fit(signal) - print(c.error(50, 150)) - - -You can also compute the sum of costs for a given list of change points. - -.. code-block:: python - - print(c.sum_of_costs(bkps)) - print(c.sum_of_costs([10, 100, 200, 250, n])) - - -In order to use this cost class in a change point detection algorithm (inheriting from :class:`BaseEstimator`), either pass a :class:`CostTSquared` instance (through the argument ``'custom_cost'``) or set :code:`model="rank"`. - -.. code-block:: python - - c = rpt.costs.CostTSquared(); algo = rpt.Dynp(custom_cost=c) - # is equivalent to - algo = rpt.Dynp(model="t2") - - -Code explanation ----------------------------------------------------------------------------------------------------- - -.. autoclass:: ruptures.costs.CostTSquared - :members: - :special-members: __init__ - -""" - - -import numpy as np -from sklearn.decomposition import PCA - -from ruptures.base import BaseCost -from ruptures.costs import NotEnoughPoints - - -class CostTSquared(BaseCost): - - """Hotelling's T-Squared.""" - - model = "t2" - - def __init__(self, n_components=2): - self.min_size = 2 - self.signal = None - self.n_components = n_components - - def fit(self, signal): - """Set parameters of the instance. - - Args: - signal (array): signal. Shape (n_samples,) or (n_samples, n_features) - - Returns: - self - """ - if signal.ndim == 1 or (signal.ndim == 2 and signal.shape[1] == 1): - raise ValueError("The signal must be multivariate.") - else: - self.signal = signal - - return self - - def error(self, start, end): - """Return the approximation cost on the segment [start:end]. - - Args: - start (int): start of the segment - end (int): end of the segment - - Returns: - float: segment cost - - Raises: - NotEnoughPoints: when the segment is too short (less than ``'min_size'`` samples). - """ - if end - start < self.min_size: - raise NotEnoughPoints - sub = self.signal[start:end] - - pca = PCA(self.n_components) - sub_tr = pca.fit_transform(sub) - - return np.linalg.norm(sub_tr)**2 From fbe03451a7008dacad6cad7a2c367e8da718777d Mon Sep 17 00:00:00 2001 From: Charles Truong Date: Tue, 21 Jul 2020 10:41:27 +0200 Subject: [PATCH 12/20] change filename and correct doc --- ruptures/costs/costtsquared.py | 132 +++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 ruptures/costs/costtsquared.py diff --git a/ruptures/costs/costtsquared.py b/ruptures/costs/costtsquared.py new file mode 100644 index 00000000..13be1e61 --- /dev/null +++ b/ruptures/costs/costtsquared.py @@ -0,0 +1,132 @@ +r""" +.. _sec-costtsquared: + +Hotelling's T2 statistic based cost function +==================================================================================================== + +Description +---------------------------------------------------------------------------------------------------- + +This cost function detects drift of the center of the signal :cite:`tsquared-Banko2011`. +Formally, for a signal :math:`\{y_t\}_t` on an interval :math:`I`, the cost function is equal to + + .. math:: c(y_{I}) = \sum_{t\in I} \|y_t - \bar{y_t}\|_2 + +where :math:`\bar{y_t}` is the deviation from the center of the data on lower dimensional representation of :math:`\{y_t\}_{t\in I}`, + + .. math:: \bar{y_t} = {U_p}^T y_t {y_t}^T {U_p} + +where :math:`U_p` is singular vectors belong to the most important :math:`p` singular values of :math:`\{y_t\}_{t\in I}`. + +Usage +---------------------------------------------------------------------------------------------------- + +Start with the usual imports and create a signal. + +.. code-block:: python + + import numpy as np + import matplotlib.pylab as plt + import ruptures as rpt + # creation of data + n, dim = 500, 3 # number of samples, dimension + n_bkps, sigma = 3, 5 # number of change points, noise standard deviation + signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) + +Then create a :class:`CostTSquared` instance and print the cost of the sub-signal :code:`signal[50:150]`. + +.. code-block:: python + + c = rpt.costs.CostTSquared(n_components=2).fit(signal) + print(c.error(50, 150)) + + +You can also compute the sum of costs for a given list of change points. + +.. code-block:: python + + print(c.sum_of_costs(bkps)) + print(c.sum_of_costs([10, 100, 200, 250, n])) + + +In order to use this cost class in a change point detection algorithm (inheriting from :class:`BaseEstimator`), either pass a :class:`CostTSquared` instance (through the argument ``'custom_cost'``) or set :code:`model="rank"`. + +.. code-block:: python + + c = rpt.costs.CostTSquared(); algo = rpt.Dynp(custom_cost=c) + # is equivalent to + algo = rpt.Dynp(model="t2") + + +Code explanation +---------------------------------------------------------------------------------------------------- + +.. autoclass:: ruptures.costs.CostTSquared + :members: + :special-members: __init__ + +.. rubric:: References + +.. bibliography:: ../biblio.bib + :style: alpha + :cited: + :labelprefix: RA + :keyprefix: tsquared- +""" + + +import numpy as np +from sklearn.decomposition import PCA + +from ruptures.base import BaseCost +from ruptures.costs import NotEnoughPoints + + +class CostTSquared(BaseCost): + + """Hotelling's T-Squared.""" + + model = "t2" + + def __init__(self, n_components=2): + self.min_size = 2 + self.signal = None + self.n_components = n_components + + def fit(self, signal): + """Set parameters of the instance. + + Args: + signal (array): signal. Shape (n_samples,) or (n_samples, n_features) + + Returns: + self + """ + if signal.ndim == 1 or (signal.ndim == 2 and signal.shape[1] == 1): + raise ValueError("The signal must be multivariate.") + else: + self.signal = signal + + return self + + def error(self, start, end): + """Return the approximation cost on the segment [start:end]. + + Args: + start (int): start of the segment + end (int): end of the segment + + Returns: + float: segment cost + + Raises: + NotEnoughPoints: when the segment is too short (less than ``'min_size'`` samples). + """ + if end - start < self.min_size: + raise NotEnoughPoints + sub = self.signal[start:end] + + pca = PCA(self.n_components) + sub_tr = pca.fit_transform(sub) + + return np.linalg.norm(sub_tr)**2 From a9c1f22cc911b25180972af1119a054c9ab52676 Mon Sep 17 00:00:00 2001 From: Charles Truong Date: Tue, 21 Jul 2020 10:41:44 +0200 Subject: [PATCH 13/20] add citation article --- docs/source/biblio.bib | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/source/biblio.bib b/docs/source/biblio.bib index 4a15fd48..9b1438d6 100644 --- a/docs/source/biblio.bib +++ b/docs/source/biblio.bib @@ -2198,3 +2198,12 @@ @article{TRUONG2020107299 volume = {167}, year = {2020} } +@article{Banko2011, +author = {Banko, Z. and Dobos, L. and Abonyi, J.}, +journal = {Conservation, Information, Evolution - towards a sustainable engineering and economy}, +number = {1}, +pages = {11--24}, +title = {{Dynamic principal component analysis in multivariate time-series segmentation}}, +volume = {1}, +year = {2011} +} From 03b0fe6a493730bcbceeb968c27a6c58e413a489 Mon Sep 17 00:00:00 2001 From: Charles Truong Date: Tue, 21 Jul 2020 10:43:14 +0200 Subject: [PATCH 14/20] correct filename --- ruptures/costs/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ruptures/costs/__init__.py b/ruptures/costs/__init__.py index 573b8287..8570da12 100644 --- a/ruptures/costs/__init__.py +++ b/ruptures/costs/__init__.py @@ -18,7 +18,7 @@ costml costrank costqresiduals - costt2 + costtsquared costcustom """ @@ -34,4 +34,4 @@ from .costml import CostMl from .costrank import CostRank from .costqresiduals import CostQResiduals -from .costt2 import CostTSquared \ No newline at end of file +from .costtsquared import CostTSquared \ No newline at end of file From a1465d74fee68dcb43e7422d0f0157e28892aae0 Mon Sep 17 00:00:00 2001 From: Charles Truong Date: Tue, 21 Jul 2020 10:43:40 +0200 Subject: [PATCH 15/20] add section --- docs/source/costs/costtsquared.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/source/costs/costtsquared.rst diff --git a/docs/source/costs/costtsquared.rst b/docs/source/costs/costtsquared.rst new file mode 100644 index 00000000..4fa91929 --- /dev/null +++ b/docs/source/costs/costtsquared.rst @@ -0,0 +1 @@ +.. automodule:: ruptures.costs.costtsquared From 62437ef04efa4a651c6aa4238426e2e93d383704 Mon Sep 17 00:00:00 2001 From: statefb Date: Sun, 2 Aug 2020 14:37:39 +0900 Subject: [PATCH 16/20] add `n_component` argument explanation --- ruptures/costs/costqresiduals.py | 8 ++++++++ ruptures/costs/costtsquared.py | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/ruptures/costs/costqresiduals.py b/ruptures/costs/costqresiduals.py index ec90e994..cf740006 100644 --- a/ruptures/costs/costqresiduals.py +++ b/ruptures/costs/costqresiduals.py @@ -83,6 +83,14 @@ class CostQResiduals(BaseCost): model = "qresid" def __init__(self, n_components=2): + """Create a new instance. + + Args: + n_components (int, optional): number of components to keep in signal in each segment. + + Returns: + self + """ self.min_size = 2 self.signal = None self.n_components = n_components diff --git a/ruptures/costs/costtsquared.py b/ruptures/costs/costtsquared.py index 13be1e61..6ac84331 100644 --- a/ruptures/costs/costtsquared.py +++ b/ruptures/costs/costtsquared.py @@ -89,6 +89,14 @@ class CostTSquared(BaseCost): model = "t2" def __init__(self, n_components=2): + """Create a new instance. + + Args: + n_components (int, optional): number of components to keep in signal in each segment. + + Returns: + self + """ self.min_size = 2 self.signal = None self.n_components = n_components From 9280d25685c48d2fdc1e533bdb214d7a7da40bd4 Mon Sep 17 00:00:00 2001 From: statefb Date: Sun, 2 Aug 2020 21:48:45 +0900 Subject: [PATCH 17/20] add citation to qresiduals --- ruptures/costs/costqresiduals.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ruptures/costs/costqresiduals.py b/ruptures/costs/costqresiduals.py index cf740006..eb56d572 100644 --- a/ruptures/costs/costqresiduals.py +++ b/ruptures/costs/costqresiduals.py @@ -8,7 +8,7 @@ Description ---------------------------------------------------------------------------------------------------- -This cost function detects change of the correlation between the variables with ignoring noisy demension. +This cost function detects change of the correlation between the variables with ignoring noisy demension :cite:`qresiduals-Banko2011`. Formally, for a signal :math:`\{y_t\}_t` on an interval :math:`I`, the cost function is equal to .. math:: c(y_{I}) = \sum_{t\in I} \|y_t - \bar{y_t}\|_2 @@ -66,6 +66,13 @@ :members: :special-members: __init__ +.. rubric:: References + +.. bibliography:: ../biblio.bib + :style: alpha + :cited: + :labelprefix: RA + :keyprefix: qresiduals- """ From c6e496b764359da89b2dc4e3c0a4ab5b695e46a2 Mon Sep 17 00:00:00 2001 From: Charles Truong Date: Thu, 13 Aug 2020 15:40:14 +0200 Subject: [PATCH 18/20] Fix citation label (documentation) --- ruptures/costs/costqresiduals.py | 2 +- ruptures/costs/costtsquared.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ruptures/costs/costqresiduals.py b/ruptures/costs/costqresiduals.py index eb56d572..f4a79d9c 100644 --- a/ruptures/costs/costqresiduals.py +++ b/ruptures/costs/costqresiduals.py @@ -71,7 +71,7 @@ .. bibliography:: ../biblio.bib :style: alpha :cited: - :labelprefix: RA + :labelprefix: Q :keyprefix: qresiduals- """ diff --git a/ruptures/costs/costtsquared.py b/ruptures/costs/costtsquared.py index 6ac84331..dea24f53 100644 --- a/ruptures/costs/costtsquared.py +++ b/ruptures/costs/costtsquared.py @@ -70,7 +70,7 @@ .. bibliography:: ../biblio.bib :style: alpha :cited: - :labelprefix: RA + :labelprefix: T :keyprefix: tsquared- """ From 24d664f163ddbbecc9f5b2916e1d55e1695ae139 Mon Sep 17 00:00:00 2001 From: Charles Truong Date: Thu, 13 Aug 2020 15:40:29 +0200 Subject: [PATCH 19/20] add link to python file in documentation --- docs/source/costs/costqresiduals.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/source/costs/costqresiduals.rst diff --git a/docs/source/costs/costqresiduals.rst b/docs/source/costs/costqresiduals.rst new file mode 100644 index 00000000..d913c8bb --- /dev/null +++ b/docs/source/costs/costqresiduals.rst @@ -0,0 +1 @@ +.. automodule:: ruptures.costs.costqresiduals From 22305ea06471e5418bd3ff19bd12447a338c8544 Mon Sep 17 00:00:00 2001 From: Charles Truong Date: Thu, 13 Aug 2020 15:53:47 +0200 Subject: [PATCH 20/20] add dependency (scikit-learn) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a8590319..b8fda62d 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ name="ruptures", version="1.0.5", packages=find_packages(exclude=["docs", "tests*", "images"]), - install_requires=["numpy", "scipy"], + install_requires=["numpy", "scikit-learn", "scipy"], extras_require={"display": ["matplotlib"]}, python_requires=">=3", # url='ctruong.perso.math.cnrs.fr/ruptures',