Merge pull request #14 from BuildingEnergySimulationTools/SkProphet

BaptisteDE · web-flow · commit 1c7b314f50cf · 2025-01-31T10:22:08.000+01:00
Sk prophet
diff --git a/setup.py b/setup.py
@@ -40,6 +40,7 @@
         "plotly>=5.3.1",
         "requests>=2.32.3",
         "influxdb-client>=1.48.0",
+        "prophet>=1.1.6",
     ],
     packages=find_packages(exclude=["tests*"]),
     include_package_data=True,
diff --git a/tests/test_regressors.py b/tests/test_regressors.py
@@ -2,45 +2,74 @@
 
 import pandas as pd
 import numpy as np
+import pytest
 
-from tide.regressors import SkSTLForecast
+from tide.regressors import SkSTLForecast, SkProphet
 
 
 class TestRegressors:
-    def test_stl_forecaster(self):
+    @pytest.fixture
+    def toy_data(self):
         index = pd.date_range("2009-01-01", "2009-12-31 23:00:00", freq="h", tz="UTC")
         cumsum_second = np.arange(
-            start=0, stop=(index[-1] - index[0]).total_seconds() + 1, step=3600
+            0, (index[-1] - index[0]).total_seconds() + 1, step=3600
         )
+
         annual = 5 * -np.cos(
             2 * np.pi / dt.timedelta(days=360).total_seconds() * cumsum_second
         )
         daily = 5 * np.sin(
             2 * np.pi / dt.timedelta(days=1).total_seconds() * cumsum_second
         )
-        toy_series = pd.Series(annual + daily + 5, index=index)
 
-        toy_df = pd.DataFrame({"Temp_1": toy_series, "Temp_2": toy_series * 1.25 + 2})
+        toy_series = pd.Series(annual + daily + 5, index=index)
+        toy_df = pd.DataFrame(
+            {"Temp_1__°C": toy_series, "Temp_2__°C": toy_series * 1.25 + 2}
+        )
+        return toy_df
 
+    def test_stl_forecaster(self, toy_data):
         forecaster = SkSTLForecast(
             period="24h",
             trend="15d",
             ar_kwargs=dict(order=(1, 1, 0), trend="t"),
             backcast=False,
         )
 
-        forecaster.fit(toy_df["2009-01-24":"2009-07-24"])
-
+        forecaster.fit(toy_data["2009-01-24":"2009-07-24"])
         reg_score = forecaster.score(
-            toy_df["2009-07-27":"2009-07-30"], toy_df["2009-07-27":"2009-07-30"]
+            toy_data["2009-07-27":"2009-07-30"], toy_data["2009-07-27":"2009-07-30"]
         )
         assert reg_score > 0.99
 
         backcaster = SkSTLForecast(backcast=True)
 
-        backcaster.fit(toy_df["2009-01-24":"2009-07-24"])
+        backcaster.fit(toy_data["2009-01-24":"2009-07-24"])
 
         reg_score = backcaster.score(
-            toy_df["2009-01-20":"2009-01-22"], toy_df["2009-01-20":"2009-01-22"]
+            toy_data["2009-01-20":"2009-01-22"], toy_data["2009-01-20":"2009-01-22"]
+        )
+
+        assert reg_score > 0.99
+
+    def test_prophet_forecaster(self, toy_data):
+        forecaster = SkProphet()
+        forecaster.fit(toy_data["2009-01-24":"2009-07-24"])
+
+        reg_score = forecaster.score(
+            toy_data["2009-07-27":"2009-07-30"], toy_data["2009-07-27":"2009-07-30"]
         )
         assert reg_score > 0.99
+
+        reg_score = forecaster.score(
+            toy_data["2009-01-20":"2009-01-22"], toy_data["2009-01-20":"2009-01-22"]
+        )
+
+        assert reg_score > 0.99
+
+        forecaster = SkProphet(return_upper_lower_bounds=True)
+        forecaster.fit(toy_data["2009-01-24":"2009-07-24"])
+        feat_out = list(forecaster.get_feature_names_out())
+        predictions = forecaster.predict(toy_data["2009-07-27":"2009-07-30"])
+
+        assert np.all([feat in predictions.columns for feat in feat_out])
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -2,6 +2,7 @@
 
 import pandas as pd
 import numpy as np
+import pytest
 
 from tide.utils import (
     get_data_blocks,
@@ -13,6 +14,7 @@
     timedelta_to_int,
     NamedList,
     get_series_bloc,
+    edit_tag_name_by_level,
 )
 
 DF_COLUMNS = pd.DataFrame(
@@ -29,6 +31,18 @@
 
 
 class TestUtils:
+    def test_edit_tag_name_by_level(self):
+        col_name = "temp__°C__bloc1"
+        new_name = edit_tag_name_by_level(col_name, 0, "temp_1")
+
+        assert new_name == "temp_1__°C__bloc1"
+
+        with pytest.raises(
+            ValueError,
+            match=r"Cannot edit tag name at level index 3. Columns have only 3 tag levels.",
+        ):
+            edit_tag_name_by_level(col_name, 3, "temp_1")
+
     def test_named_list(self):
         test = NamedList(["a", "b", "c", "d"])
 
diff --git a/tide/base.py b/tide/base.py
@@ -25,38 +25,12 @@
 from tide.meteo import get_oikolab_df
 
 
-def _ensure_list(item):
+class TideBaseMixin:
     """
-    Ensures the input is returned as a list.
-
-    Parameters
-    ----------
-    item : any
-        The input item to be converted to a list if it is not already one.
-        If the input is `None`, an empty list is returned.
-
-    Returns
-    -------
-    list
-        - If `item` is `None`, returns an empty list.
-        - If `item` is already a list, it is returned as is.
-        - Otherwise, wraps the `item` in a list and returns it.
-    """
-    if item is None:
-        return []
-    return item if isinstance(item, list) else [item]
-
-
-class BaseProcessing(ABC, TransformerMixin, BaseEstimator):
-    """
-    Abstract base class for processing pipelines with feature checks and
-    transformation logic.
-
-    This class is designed to facilitate transformations by checking input data
-    (DataFrame or Series with DatetimeIndex), ensuring the presence
-    of required features, tracking added and removed features, and enabling
-    seamless integration with scikit-learn's API through fit and transform
-    methods.
+    This class is designed to provide Tide base functionalities including :
+    - checking features in and out
+    - checking mandatory features
+    - Modifying features names according to tide's tags
 
     Parameters
     ----------
@@ -79,15 +53,6 @@ class BaseProcessing(ABC, TransformerMixin, BaseEstimator):
         columns.
     get_feature_names_in():
         Returns the names of the features as initially fitted.
-    fit(X, y=None):
-        Fits the transformer to the input data.
-    transform(X):
-        Applies the transformation to the input data.
-    _fit_implementation(X, y=None):
-        Abstract method for the fitting logic. Must be implemented by subclasses.
-    _transform_implementation(X):
-        Abstract method for the transformation logic. Must be implemented by
-        subclasses.
     """
 
     def __init__(
@@ -100,6 +65,15 @@ def __init__(
         self.removed_columns = removed_columns
         self.added_columns = added_columns
 
+    def check_required_features(self, X):
+        if self.required_columns is not None:
+            if not set(self.required_columns).issubset(X.columns):
+                raise ValueError("One or several required columns are missing")
+
+    def fit_check_features(self, X):
+        self.check_required_features(X)
+        self.feature_names_in_ = list(X.columns)
+
     def get_set_tags_values_columns(self, X, tag_level: int, value: str):
         nb_tags = get_tag_levels(X.columns)
         if tag_level > nb_tags - 1:
@@ -119,15 +93,6 @@ def get_set_tags_values_columns(self, X, tag_level: int, value: str):
     def set_tags_values(self, X, tag_level: int, value: str):
         X.columns = self.get_set_tags_values_columns(X, tag_level, value)
 
-    def check_features(self, X):
-        if self.required_columns is not None:
-            if not set(self.required_columns).issubset(X.columns):
-                raise ValueError("One or several required columns are missing")
-
-    def fit_check_features(self, X):
-        self.check_features(X)
-        self.feature_names_in_ = list(X.columns)
-
     def get_feature_names_out(self, input_features=None):
         if input_features is None:
             check_is_fitted(self, attributes=["feature_names_in_"])
@@ -146,14 +111,73 @@ def get_feature_names_in(self):
         check_is_fitted(self, attributes=["feature_names_in_"])
         return self.feature_names_in_
 
+
+class BaseProcessing(ABC, TransformerMixin, BaseEstimator, TideBaseMixin):
+    """
+    Abstract base class for processing pipelines with feature checks and
+    transformation logic.
+
+    This class is designed to facilitate transformations by checking input data
+    (DataFrame or Series with DatetimeIndex), ensuring the presence
+    of required features, tracking added and removed features, and enabling
+    seamless integration with scikit-learn's API through fit and transform
+    methods.
+
+    Parameters
+    ----------
+    required_columns : str or list[str], optional
+        Column names that must be present in the input data. Defaults to None.
+    removed_columns : str or list[str], optional
+        Column that will be removed during the transform process. Defaults to None.
+    added_columns : str or list[str], optional
+        Column that will be added to the output feature set during transform
+        process. Defaults to None.
+
+    Methods
+    -------
+    check_features(X):
+        Ensures that the required columns are present in the input DataFrame.
+    fit_check_features(X):
+        Checks required columns and stores the initial feature names.
+    get_feature_names_out():
+        Computes the final set of feature names, accounting for added and removed
+        columns.
+    get_feature_names_in():
+        Returns the names of the features as initially fitted.
+    fit(X, y=None):
+        Fits the transformer to the input data.
+    transform(X):
+        Applies the transformation to the input data.
+    _fit_implementation(X, y=None):
+        Abstract method for the fitting logic. Must be implemented by subclasses.
+    _transform_implementation(X):
+        Abstract method for the transformation logic. Must be implemented by
+        subclasses.
+    """
+
+    def __init__(
+        self,
+        required_columns: str | list[str] = None,
+        removed_columns: str | list[str] = None,
+        added_columns: str | list[str] = None,
+    ):
+        TideBaseMixin.__init__(
+            self,
+            required_columns=required_columns,
+            removed_columns=removed_columns,
+            added_columns=added_columns,
+        )
+        TransformerMixin.__init__(self)
+        BaseEstimator.__init__(self)
+
     def fit(self, X: pd.Series | pd.DataFrame, y=None):
         X = check_and_return_dt_index_df(X)
         self.fit_check_features(X)
         self._fit_implementation(X, y)
         return self
 
     def transform(self, X: pd.Series | pd.DataFrame):
-        self.check_features(X)
+        self.check_required_features(X)
         X = check_and_return_dt_index_df(X)
         return self._transform_implementation(X)
 
diff --git a/tide/regressors.py b/tide/regressors.py
diff --git a/tide/utils.py b/tide/utils.py