BuildingEnergySimulationTools · BaptisteDE · Apr 16, 2025 · Apr 16, 2025
diff --git a/tests/test_processing.py b/tests/test_processing.py
@@ -963,19 +963,19 @@ def test_drop_columns(self):
             index=pd.date_range("2009", freq="h", periods=2, tz="UTC"),
         )
 
-        col_dropper = KeepColumns()
+        col_dropper = DropColumns()
         col_dropper.fit(df)
         res = col_dropper.transform(df.copy())
-        pd.testing.assert_frame_equal(df, res)
-        check_feature_names_out(col_dropper, res)
+        assert res.shape == (2, 0)
+        check_feature_names_out(col_dropper, pd.DataFrame(index=df.index))
 
         col_dropper = DropColumns(columns="a")
         col_dropper.fit(df)
         res = col_dropper.transform(df.copy())
         pd.testing.assert_frame_equal(df[["b", "c"]], res)
         check_feature_names_out(col_dropper, res)
 
-        col_dropper = DropColumns(columns=["a", "b", "c"])
+        col_dropper = DropColumns(columns=["a|b", "c"])
         col_dropper.fit(df)
         res = col_dropper.transform(df.copy())
         assert res.shape == (2, 0)

diff --git a/tide/processing.py b/tide/processing.py
@@ -2639,8 +2639,8 @@ class DropColumns(BaseProcessing):
     ----------
     columns : str | list[str], optional (default=None)
         The column name or a list of column names to be dropped.
-        If None, no columns are dropped and the DataFrame is returned unchanged.
-        Example: 'temp__°C' or ['temp__°C', 'humid__%']
+        If None, ALL columns are dropped, only the index is kept.
+        Example: 'temp__°C' or ['temp__°C', 'humid__%'] or '°C|%'
 
     Attributes
     ----------
@@ -2673,7 +2673,7 @@ class DropColumns(BaseProcessing):
     2024-01-01 00:01:00+00:00     50.0     1010.0
     2024-01-01 00:02:00+00:00     55.0     1020.0
     >>> # Drop multiple columns
-    >>> dropper_multi = DropColumns(columns=["temp__°C", "humid__%"])
+    >>> dropper_multi = DropColumns(columns="°C|%")
     >>> result_multi = dropper_multi.fit_transform(df)
     >>> print(result_multi)
                            press__Pa
@@ -2686,8 +2686,8 @@ class DropColumns(BaseProcessing):
     - If a specified column doesn't exist in the DataFrame, it will be silently
       ignored
     - The order of remaining columns is preserved
-    - If no columns are specified (columns=None), the DataFrame is returned
-      unchanged
+    - If no columns are specified (columns=None), a DataFrame with no values is
+      returned
 
     Returns
     -------
@@ -2702,85 +2702,82 @@ def __init__(self, columns: str | list[str] = None):
         BaseProcessing.__init__(self)
 
     def _fit_implementation(self, X: pd.Series | pd.DataFrame, y=None):
-        self.required_columns = self.columns
-        if self.columns is not None:
-            self.feature_names_out_ = list(X.columns.drop(self.columns))
+        self.required_columns = tide_request(X, self.columns)
+        self.feature_names_out_ = list(
+            X.drop(self.required_columns, axis="columns").columns
+        )
 
     def _transform_implementation(self, X: pd.Series | pd.DataFrame):
-        return (
-            X.drop(self.required_columns, axis="columns")
-            if self.columns is not None
-            else X
-        )
+        return X.drop(self.required_columns, axis="columns")
 
 
 class KeepColumns(BaseProcessing):
     """
-        A transformer that keeps specified columns from a pandas DataFrame.
-
-        It is particularly useful at the final step of data preprocessing.
-        When only some columns are passed to a model
-
-        Parameters
-        ----------
-        columns : str | list[str], optional (default=None)
-            The column name or a list of column names to be dropped.
-            If None, no columns are dropped and the DataFrame is returned unchanged.
-            Example: 'temp__°C' or ['temp__°C', 'humid__%'] or '°C|%'
-
-        Attributes
-        ----------
-        feature_names_in_ : list[str]
-            Names of input columns (set during fit).
-        feature_names_out_ : list[str]
-            Names of output columns (input columns minus dropped columns).
-
-        Examples
-        --------
-        >>> import pandas as pd
-        >>> # Create DataFrame with DateTimeIndex
-        >>> dates = pd.date_range(
-        ...     start="2024-01-01 00:00:00", end="2024-01-01 00:02:00", freq="1min"
-        ... ).tz_localize("UTC")
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "temp__°C": [20, 21, 22],
-        ...         "humid__%": [45, 50, 55],
-        ...         "press__Pa": [1000, 1010, 1020],
-        ...     },
-        ...     index=dates,
-        ... )
-        >>> # Keep a single column
-        >>> keeper = KeepColumns(columns="temp__°C")
-        >>> result = keeper.fit_transform(df)
-        >>> print(result)
-                                   temp__°C
-        2024-01-01 00:00:00+00:00        20
-        2024-01-01 00:01:00+00:00        21
-        2024-01-01 00:02:00+00:00        22
-        >>> # Keep multiple columns
-        >>> keeper_multi = KeepColumns(columns="°C|%")
-        >>> result_multi = keeper_multi.fit_transform(df)
-        >>> print(result_multi)
-                                   temp__°C  humid__%
-        2024-01-01 00:00:00+00:00        20        45
-        2024-01-01 00:01:00+00:00        21        50
-        2024-01-01 00:02:00+00:00        22        55
-
-        Notes
-        -----
-        - If a specified column doesn't exist in the DataFrame, it will be silently
-          ignored
-        - The order of selected columns is preserved
-        - If no columns are specified (columns=None), the DataFrame is returned
-          unchanged
-
-        Returns
-        -------
-        pd.DataFrame
-            The DataFrame with specified columns removed. The output maintains
-            the same DateTimeIndex as the input, with only the specified columns
-            removed.
+    A transformer that keeps specified columns from a pandas DataFrame.
+
+    It is particularly useful at the final step of data preprocessing.
+    When only some columns are passed to a model
+
+    Parameters
+    ----------
+    columns : str | list[str], optional (default=None)
+        The column name or a list of column names to be kept.
+        If None, no columns are dropped and the DataFrame is returned unchanged.
+        Example: 'temp__°C' or ['temp__°C', 'humid__%'] or '°C|%'
+
+    Attributes
+    ----------
+    feature_names_in_ : list[str]
+        Names of input columns (set during fit).
+    feature_names_out_ : list[str]
+        Names of output columns (input columns minus dropped columns).
+
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> # Create DataFrame with DateTimeIndex
+    >>> dates = pd.date_range(
+    ...     start="2024-01-01 00:00:00", end="2024-01-01 00:02:00", freq="1min"
+    ... ).tz_localize("UTC")
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "temp__°C": [20, 21, 22],
+    ...         "humid__%": [45, 50, 55],
+    ...         "press__Pa": [1000, 1010, 1020],
+    ...     },
+    ...     index=dates,
+    ... )
+    >>> # Keep a single column
+    >>> keeper = KeepColumns(columns="temp__°C")
+    >>> result = keeper.fit_transform(df)
+    >>> print(result)
+                               temp__°C
+    2024-01-01 00:00:00+00:00        20
+    2024-01-01 00:01:00+00:00        21
+    2024-01-01 00:02:00+00:00        22
+    >>> # Keep multiple columns
+    >>> keeper_multi = KeepColumns(columns="°C|%")
+    >>> result_multi = keeper_multi.fit_transform(df)
+    >>> print(result_multi)
+                               temp__°C  humid__%
+    2024-01-01 00:00:00+00:00        20        45
+    2024-01-01 00:01:00+00:00        21        50
+    2024-01-01 00:02:00+00:00        22        55
+
+    Notes
+    -----
+    - If a specified column doesn't exist in the DataFrame, it will be silently
+      ignored
+    - The order of selected columns is preserved
+    - If no columns are specified (columns=None), the DataFrame is returned
+      unchanged
+
+    Returns
+    -------
+    pd.DataFrame
+        The DataFrame with specified columns removed. The output maintains
+        the same DateTimeIndex as the input, with only the specified columns
+        removed.
     """
 
     def __init__(self, columns: str | list[str] = None):