From 61f3b12e5b6102a8c4dd0faa9824600b322828de Mon Sep 17 00:00:00 2001
From: Indrayudd Roy Chowdhury <indro@Indrayudds-MacBook-Air.local>
Date: Tue, 20 May 2025 18:41:49 -0400
Subject: [PATCH 1/9] TutorTask527: Create a downloader script identical to
 FRED in interface - badly linted

---
 causal_automl/download_gsio_data.py | 101 ++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 causal_automl/download_gsio_data.py

diff --git a/causal_automl/download_gsio_data.py b/causal_automl/download_gsio_data.py
new file mode 100644
index 0000000000..454cab33ac
--- /dev/null
+++ b/causal_automl/download_gsio_data.py
@@ -0,0 +1,101 @@
+import logging as log
+import os
+import time
+from typing import Dict, Optional
+
+import gridstatusio
+import pandas as pd
+import ratelimit
+
+_LOG = log.getLogger(__name__)
+
+# #############################################################################
+# GridStatusDataDownloader
+# #############################################################################
+
+
+class GridStatusDataDownloader:
+    """
+    Download historical data from GridStatus.io.
+    """
+
+    def __init__(self, api_key: Optional[str] = None) -> None:
+        """
+        Initialize the GridStatus data downloader with the API key.
+
+        If no API key is passed as a parameter, it is read from the
+        GRIDSTATUS_API_KEY environment variable.
+
+        :param api_key: GridStatus API key
+        """
+        key = api_key or os.getenv("GRIDSTATUS_API_KEY")
+        if not key:
+            raise ValueError("GridStatus API key is required")
+        self._client = gridstatusio.GridStatusClient(api_key=key)
+
+    @ratelimit.sleep_and_retry
+    @ratelimit.limits(calls=60, period=60)
+    def download_series(
+        self,
+        id_: str,
+        start_timestamp: Optional[pd.Timestamp] = None,
+        end_timestamp: Optional[pd.Timestamp] = None,
+    ) -> Optional[pd.DataFrame]:
+        """
+        Download historical series data.
+
+        When no start and end timestamps are passed, the entire time series is downloaded.
+
+        Example of timestamp - "2010-01-01 08:00:00+00:00"
+
+        Example of a returned series:
+
+        ```
+        interval_start_utc          interval_end_utc            region          market  non_spinning_reserves   regulation_down regulation_mileage_down regulation_mileage_up   regulation_up   spinning_reserves
+        2010-01-01 08:00:00+00:00   2010-01-01 09:00:00+00:00   AS_CAISO        DAM     0.0                     0.00            NaN                     NaN                     0.00000         0.00
+        2010-01-01 08:00:00+00:00   2010-01-01 09:00:00+00:00   AS_CAISO_EXP    DAM     0.5                     2.25            NaN                     NaN                     10.00089        2.08
+        ```
+
+        :param id_: GridStatus dataset identifier (e.g., "caiso_as_prices")
+        :param start_timestamp: first observation timestamp
+        :param end_timestamp: last observation timestamp (non inclusive)
+        :return: relevant GridStatus series data
+        """
+        # Build request parameters.
+        request_kwargs: Dict[str, str] = {}
+        if start_timestamp is not None:
+            request_kwargs["start"] = start_timestamp
+        if end_timestamp is not None:
+            request_kwargs["end"] = end_timestamp
+        # Start attempts.
+        attempt = 1
+        max_attempts = 4
+        err_msgs: Dict[str, str] = {}
+        while attempt <= max_attempts:
+            try:
+                # Download the data for the dataset.
+                df = self._client.get_dataset(
+                    dataset=id_,
+                    **request_kwargs,
+                )
+            except Exception as err:
+                msg = str(err)
+                if msg.startswith("Error 5"):
+                    _LOG.error("Attempt %d: %s Retrying...", attempt, msg)
+                    # Wait before retrying.
+                    time.sleep(10)
+                else:
+                    raise
+                err_msgs[f"Attempt {attempt}"] = msg
+                attempt += 1
+                continue
+            # Log success and return.
+            _LOG.info(
+                "Downloaded dataset %s with %d records",
+                id_,
+                len(df),
+            )
+            return df
+        raise RuntimeError(
+            f"Failed to fetch after {max_attempts} attempts. Errors per run: {err_msgs}"
+        )

From 746f72b5dd837fd0ac538620f1af167ca5cc250c Mon Sep 17 00:00:00 2001
From: Indrayudd Roy Chowdhury <indro@Indrayudds-MacBook-Air.local>
Date: Tue, 20 May 2025 18:46:01 -0400
Subject: [PATCH 2/9] TutorTask527: Minor changes

---
 causal_automl/download_gsio_data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/causal_automl/download_gsio_data.py b/causal_automl/download_gsio_data.py
index 454cab33ac..569d5f4050 100644
--- a/causal_automl/download_gsio_data.py
+++ b/causal_automl/download_gsio_data.py
@@ -46,7 +46,7 @@ def download_series(
 
         When no start and end timestamps are passed, the entire time series is downloaded.
 
-        Example of timestamp - "2010-01-01 08:00:00+00:00"
+        Example of a timestamp - "2010-01-01 08:00:00+00:00"
 
         Example of a returned series:
 
@@ -58,7 +58,7 @@ def download_series(
 
         :param id_: GridStatus dataset identifier (e.g., "caiso_as_prices")
         :param start_timestamp: first observation timestamp
-        :param end_timestamp: last observation timestamp (non inclusive)
+        :param end_timestamp: last observation timestamp
         :return: relevant GridStatus series data
         """
         # Build request parameters.

From ff273199eb24ad94f80ccd4505d3c717a5400d0d Mon Sep 17 00:00:00 2001
From: Indrayudd Roy Chowdhury <indro@Indrayudds-MacBook-Air.local>
Date: Wed, 28 May 2025 19:16:25 -0400
Subject: [PATCH 3/9] TutorTask527: Remove download_gsio_data.py from remote
 repo (to be renamed)

---
 causal_automl/download_gsio_data.py | 101 ----------------------------
 1 file changed, 101 deletions(-)
 delete mode 100644 causal_automl/download_gsio_data.py

diff --git a/causal_automl/download_gsio_data.py b/causal_automl/download_gsio_data.py
deleted file mode 100644
index 569d5f4050..0000000000
--- a/causal_automl/download_gsio_data.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import logging as log
-import os
-import time
-from typing import Dict, Optional
-
-import gridstatusio
-import pandas as pd
-import ratelimit
-
-_LOG = log.getLogger(__name__)
-
-# #############################################################################
-# GridStatusDataDownloader
-# #############################################################################
-
-
-class GridStatusDataDownloader:
-    """
-    Download historical data from GridStatus.io.
-    """
-
-    def __init__(self, api_key: Optional[str] = None) -> None:
-        """
-        Initialize the GridStatus data downloader with the API key.
-
-        If no API key is passed as a parameter, it is read from the
-        GRIDSTATUS_API_KEY environment variable.
-
-        :param api_key: GridStatus API key
-        """
-        key = api_key or os.getenv("GRIDSTATUS_API_KEY")
-        if not key:
-            raise ValueError("GridStatus API key is required")
-        self._client = gridstatusio.GridStatusClient(api_key=key)
-
-    @ratelimit.sleep_and_retry
-    @ratelimit.limits(calls=60, period=60)
-    def download_series(
-        self,
-        id_: str,
-        start_timestamp: Optional[pd.Timestamp] = None,
-        end_timestamp: Optional[pd.Timestamp] = None,
-    ) -> Optional[pd.DataFrame]:
-        """
-        Download historical series data.
-
-        When no start and end timestamps are passed, the entire time series is downloaded.
-
-        Example of a timestamp - "2010-01-01 08:00:00+00:00"
-
-        Example of a returned series:
-
-        ```
-        interval_start_utc          interval_end_utc            region          market  non_spinning_reserves   regulation_down regulation_mileage_down regulation_mileage_up   regulation_up   spinning_reserves
-        2010-01-01 08:00:00+00:00   2010-01-01 09:00:00+00:00   AS_CAISO        DAM     0.0                     0.00            NaN                     NaN                     0.00000         0.00
-        2010-01-01 08:00:00+00:00   2010-01-01 09:00:00+00:00   AS_CAISO_EXP    DAM     0.5                     2.25            NaN                     NaN                     10.00089        2.08
-        ```
-
-        :param id_: GridStatus dataset identifier (e.g., "caiso_as_prices")
-        :param start_timestamp: first observation timestamp
-        :param end_timestamp: last observation timestamp
-        :return: relevant GridStatus series data
-        """
-        # Build request parameters.
-        request_kwargs: Dict[str, str] = {}
-        if start_timestamp is not None:
-            request_kwargs["start"] = start_timestamp
-        if end_timestamp is not None:
-            request_kwargs["end"] = end_timestamp
-        # Start attempts.
-        attempt = 1
-        max_attempts = 4
-        err_msgs: Dict[str, str] = {}
-        while attempt <= max_attempts:
-            try:
-                # Download the data for the dataset.
-                df = self._client.get_dataset(
-                    dataset=id_,
-                    **request_kwargs,
-                )
-            except Exception as err:
-                msg = str(err)
-                if msg.startswith("Error 5"):
-                    _LOG.error("Attempt %d: %s Retrying...", attempt, msg)
-                    # Wait before retrying.
-                    time.sleep(10)
-                else:
-                    raise
-                err_msgs[f"Attempt {attempt}"] = msg
-                attempt += 1
-                continue
-            # Log success and return.
-            _LOG.info(
-                "Downloaded dataset %s with %d records",
-                id_,
-                len(df),
-            )
-            return df
-        raise RuntimeError(
-            f"Failed to fetch after {max_attempts} attempts. Errors per run: {err_msgs}"
-        )

From 3b68ff04fd604c423d51a605cae03c91ad4ce28e Mon Sep 17 00:00:00 2001
From: Indrayudd Roy Chowdhury <indro@Indrayudds-MacBook-Air.local>
Date: Wed, 28 May 2025 19:24:52 -0400
Subject: [PATCH 4/9] TutorTask527: Reviewer Changes, changes made with v2.0 in
 mind

---
 causal_automl/download_gridstatus_data.py | 119 ++++++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 causal_automl/download_gridstatus_data.py

diff --git a/causal_automl/download_gridstatus_data.py b/causal_automl/download_gridstatus_data.py
new file mode 100644
index 0000000000..fffc294124
--- /dev/null
+++ b/causal_automl/download_gridstatus_data.py
@@ -0,0 +1,119 @@
+"""
+Import as:
+
+import causal_automl.download_gridstatus_data as cadogrda
+"""
+
+import logging
+import os
+import time
+from typing import Dict, Optional
+
+import gridstatusio
+import helpers.hdbg as hdbg
+import pandas as pd
+import ratelimit
+
+_LOG = logging.getLogger(__name__)
+
+
+# #############################################################################
+# GridstatusDataDownloader
+# #############################################################################
+
+
+class GridstatusDataDownloader:
+    """
+    Download historical data from GridStatus.io.
+    """
+
+    def __init__(self) -> None:
+        """
+        Initialize the GridStatus data downloader with the API key.
+
+        If no API key is passed as a parameter, it is read from the
+        GRIDSTATUS_API_KEY environment variable.
+
+        :param api_key: GridStatus API key
+        """
+        hdbg.dassert_in(
+            "GRIDSTATUS_API_KEY",
+            os.environ,
+            msg="GRIDSTATUS_API_KEY is not found in environment variables",
+        )
+        api_key = os.getenv("GRIDSTATUS_API_KEY")
+        key = api_key or os.getenv("GRIDSTATUS_API_KEY")
+        if not key:
+            raise ValueError("GridStatus API key is required")
+        self._client = gridstatusio.GridStatusClient(api_key=key)
+
+    @ratelimit.sleep_and_retry
+    @ratelimit.limits(calls=60, period=60)
+    def download_series(
+        self,
+        id_: str,
+        start_timestamp: Optional[pd.Timestamp] = None,
+        end_timestamp: Optional[pd.Timestamp] = None,
+    ) -> Optional[pd.DataFrame]:
+        """
+        Download historical series data.
+
+        When no start and end timestamps are passed, the entire time series is downloaded.
+
+        Example of a returned series:
+
+        ```
+        interval_start_utc          interval_end_utc            region          market
+        2010-01-01 08:00:00+00:00   2010-01-01 09:00:00+00:00   AS_CAISO        DAM
+        2010-01-01 08:00:00+00:00   2010-01-01 09:00:00+00:00   AS_CAISO_EXP    DAM
+        /
+        non_spinning_reserves
+        0.0
+        0.5
+        ```
+
+        :param id_: GridStatus dataset identifier (e.g., "caiso_as_prices.spinning_reserves")
+        :param start_timestamp: first observation timestamp (e.g., "2010-01-01 08:00:00+00:00")
+        :param end_timestamp: last observation timestamp
+        :return: relevant GridStatus series data
+        """
+        # Build request parameters.
+        id_series, name_series = id_.split(".", 1)
+        request_kwargs: Dict[str, str] = {}
+        if start_timestamp is not None:
+            request_kwargs["start"] = start_timestamp
+        if end_timestamp is not None:
+            request_kwargs["end"] = end_timestamp
+        # Start attempts.
+        attempt = 1
+        max_attempts = 4
+        err_msgs: Dict[str, str] = {}
+        while attempt <= max_attempts:
+            try:
+                # Download the data for the dataset.
+                df = self._client.get_dataset(
+                    dataset=id_series,
+                    columns=[name_series],
+                    **request_kwargs,
+                )
+            except Exception as err:
+                msg = str(err)
+                if msg.startswith("Error 5"):
+                    _LOG.error("Attempt %d: %s Retrying...", attempt, msg)
+                    # Wait before retrying.
+                    time.sleep(10)
+                else:
+                    raise
+                err_msgs[f"Attempt {attempt}"] = msg
+                attempt += 1
+                continue
+            # Log success and return.
+            _LOG.info(
+                "Downloaded dataset %s with %d records",
+                id_,
+                len(df),
+            )
+            return df
+        raise RuntimeError(
+            f"Failed to fetch after {max_attempts} attempts. Errors per run: {err_msgs}"
+        )

From 977cd9156295c0f06dfc4db1727e14f3dc7da485 Mon Sep 17 00:00:00 2001
From: Indrayudd Roy Chowdhury <indro@Indrayudds-MacBook-Air.local>
Date: Wed, 28 May 2025 19:31:18 -0400
Subject: [PATCH 5/9] TutorTask527: Made changes to env variable assertation

---
 causal_automl/download_fred_data.py       | 13 ++++++++-----
 causal_automl/download_gridstatus_data.py |  2 --
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/causal_automl/download_fred_data.py b/causal_automl/download_fred_data.py
index ad72055a79..b3c06044fe 100644
--- a/causal_automl/download_fred_data.py
+++ b/causal_automl/download_fred_data.py
@@ -27,7 +27,7 @@ class FredDataDownloader:
     Download historical data from FRED.
     """
 
-    def __init__(self, api_key: Optional[str] = None) -> None:
+    def __init__(self) -> None:
         """
         Initialize the FRED data downloader with the API key.
 
@@ -36,10 +36,13 @@ def __init__(self, api_key: Optional[str] = None) -> None:
 
         :param api_key: FRED API key
         """
-        key = api_key or os.getenv("FRED_API_KEY")
-        if not key:
-            raise ValueError("FRED API key is required")
-        self._client = fredapi.Fred(api_key=key)
+        hdbg.dassert_in(
+            "FRED_API_KEY",
+            os.environ,
+            msg="FRED_API_KEY is not found in environment variables",
+        )
+        api_key = os.getenv("FRED_API_KEY")
+        self._client = fredapi.Fred(api_key=api_key)
 
     @ratelimit.sleep_and_retry
     @ratelimit.limits(calls=60, period=60)
diff --git a/causal_automl/download_gridstatus_data.py b/causal_automl/download_gridstatus_data.py
index fffc294124..e16b80b539 100644
--- a/causal_automl/download_gridstatus_data.py
+++ b/causal_automl/download_gridstatus_data.py
@@ -43,8 +43,6 @@ def __init__(self) -> None:
         )
         api_key = os.getenv("GRIDSTATUS_API_KEY")
         key = api_key or os.getenv("GRIDSTATUS_API_KEY")
-        if not key:
-            raise ValueError("GridStatus API key is required")
         self._client = gridstatusio.GridStatusClient(api_key=key)
 
     @ratelimit.sleep_and_retry

From 0093139182132a734635a8402d3a3c2756c3d3c4 Mon Sep 17 00:00:00 2001
From: Indrayudd Roy Chowdhury <indro@Indrayudds-MacBook-Air.local>
Date: Wed, 28 May 2025 19:32:58 -0400
Subject: [PATCH 6/9] TutorTask527: Syntax fixed in gridstatus downloader

---
 causal_automl/download_gridstatus_data.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/causal_automl/download_gridstatus_data.py b/causal_automl/download_gridstatus_data.py
index e16b80b539..903057177c 100644
--- a/causal_automl/download_gridstatus_data.py
+++ b/causal_automl/download_gridstatus_data.py
@@ -42,8 +42,7 @@ def __init__(self) -> None:
             msg="GRIDSTATUS_API_KEY is not found in environment variables",
         )
         api_key = os.getenv("GRIDSTATUS_API_KEY")
-        key = api_key or os.getenv("GRIDSTATUS_API_KEY")
-        self._client = gridstatusio.GridStatusClient(api_key=key)
+        self._client = gridstatusio.GridStatusClient(api_key=api_key)
 
     @ratelimit.sleep_and_retry
     @ratelimit.limits(calls=60, period=60)

From fc935ccc380b1f79c7dc92e67cefd30bd32cc45f Mon Sep 17 00:00:00 2001
From: Indrayudd Roy Chowdhury <indrayudd1@gmail.com>
Date: Fri, 30 May 2025 18:04:39 -0400
Subject: [PATCH 7/9] TutorTask527: Reviewer changes + minor equivalent change
 in the FRED downloader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 causal_automl/download_fred_data.py       |  5 --
 causal_automl/download_gridstatus_data.py | 65 ++++++++++++++++++-----
 2 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/causal_automl/download_fred_data.py b/causal_automl/download_fred_data.py
index b3c06044fe..9a44bdbce0 100644
--- a/causal_automl/download_fred_data.py
+++ b/causal_automl/download_fred_data.py
@@ -30,11 +30,6 @@ class FredDataDownloader:
     def __init__(self) -> None:
         """
         Initialize the FRED data downloader with the API key.
-
-        If no FRED API key is passed as a parameter, it is read from the
-        environment variable.
-
-        :param api_key: FRED API key
         """
         hdbg.dassert_in(
             "FRED_API_KEY",
diff --git a/causal_automl/download_gridstatus_data.py b/causal_automl/download_gridstatus_data.py
index 903057177c..f0adab67dc 100644
--- a/causal_automl/download_gridstatus_data.py
+++ b/causal_automl/download_gridstatus_data.py
@@ -7,7 +7,7 @@
 import logging
 import os
 import time
-from typing import Dict, Optional
+from typing import Dict, Optional, Union
 
 import gridstatusio
 import helpers.hdbg as hdbg
@@ -30,11 +30,6 @@ class GridstatusDataDownloader:
     def __init__(self) -> None:
         """
         Initialize the GridStatus data downloader with the API key.
-
-        If no API key is passed as a parameter, it is read from the
-        GRIDSTATUS_API_KEY environment variable.
-
-        :param api_key: GridStatus API key
         """
         hdbg.dassert_in(
             "GRIDSTATUS_API_KEY",
@@ -49,8 +44,8 @@ def __init__(self) -> None:
     def download_series(
         self,
         id_: str,
-        start_timestamp: Optional[pd.Timestamp] = None,
-        end_timestamp: Optional[pd.Timestamp] = None,
+        start_timestamp: Optional[Union[str, pd.Timestamp]] = None,
+        end_timestamp: Optional[Union[str, pd.Timestamp]] = None,
     ) -> Optional[pd.DataFrame]:
         """
         Download historical series data.
@@ -69,13 +64,14 @@ def download_series(
         0.5
         ```
 
-        :param id_: GridStatus dataset identifier (e.g., "caiso_as_prices.spinning_reserves")
-        :param start_timestamp: first observation timestamp (e.g., "2010-01-01 08:00:00+00:00")
+        :param id_: Gridstatus series identifier (e.g., "caiso_as_prices.spinning_reserves")
+        :param start_timestamp: first observation timestamp
+            (e.g., "2010-01-01 08:00:00+00:00" or pd.Timestamp("2023-04-01 01:00:00"))
         :param end_timestamp: last observation timestamp
-        :return: relevant GridStatus series data
+        :return: relevant Gridstatus series data
         """
         # Build request parameters.
-        id_series, name_series = id_.split(".", 1)
+        id_dataset, name_series = id_.split(".", 1)
         request_kwargs: Dict[str, str] = {}
         if start_timestamp is not None:
             request_kwargs["start"] = start_timestamp
@@ -89,7 +85,7 @@ def download_series(
             try:
                 # Download the data for the dataset.
                 df = self._client.get_dataset(
-                    dataset=id_series,
+                    dataset=id_dataset,
                     columns=[name_series],
                     **request_kwargs,
                 )
@@ -106,7 +102,7 @@ def download_series(
                 continue
             # Log success and return.
             _LOG.info(
-                "Downloaded dataset %s with %d records",
+                "Downloaded series %s with %d records",
                 id_,
                 len(df),
             )
@@ -114,3 +110,44 @@ def download_series(
         raise RuntimeError(
             f"Failed to fetch after {max_attempts} attempts. Errors per run: {err_msgs}"
         )
+
+    def filter_series(
+        self,
+        df: pd.DataFrame,
+        id_: str,
+        filters: Dict[str, str],
+    ) -> Optional[pd.DataFrame]:
+        """
+        Filter out a single time series from a Gridstatus dataset.
+
+        Apply single filters across columns (e.g., `region`, `market`),
+        drop missing rows and return end timestamp-indexed single series.
+
+        :param df: Gridstatus data series to filter
+        :param id_: Gridstatus series identifier (e.g., "caiso_as_prices.spinning_reserves")
+        :param filters: filters to apply on the dataset
+            (e.g., {"region":"AS_CAISO_EXP", "market":"DAM"})
+        :return: filtered Gridstatus series
+        """
+        # Filter data.
+        filtered_data = df.copy()
+        for k, v in filters.items():
+            hdbg.dassert_in(
+                k,
+                filtered_data.columns,
+                "%s not found in columns: %s",
+                k,
+                list(filtered_data.columns),
+            )
+            filtered_data = filtered_data[filtered_data[k] == v]
+        # Find the series name.
+        name_series = id_.split(".", 1)[1]
+        # Drop missing value rows.
+        filtered_data = filtered_data.dropna(subset=[name_series])
+        if filtered_data.empty:
+            _LOG.warning("No data remaining after applying filters")
+            return None
+        filtered_data = filtered_data[["interval_end_utc", name_series]]
+        filtered_data = filtered_data.set_index("interval_end_utc")
+        filtered_data = filtered_data.sort_index()
+        return filtered_data

From 7dc0efd00436c4d579c55159115e7fd0d8bb404f Mon Sep 17 00:00:00 2001
From: Indrayudd Roy Chowdhury <indrayudd1@gmail.com>
Date: Mon, 2 Jun 2025 15:39:03 -0400
Subject: [PATCH 8/9] TutorTask527: Reviewer Changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 causal_automl/download_gridstatus_data.py | 48 ++++++++++++++++++++---
 1 file changed, 42 insertions(+), 6 deletions(-)

diff --git a/causal_automl/download_gridstatus_data.py b/causal_automl/download_gridstatus_data.py
index f0adab67dc..1e7104264d 100644
--- a/causal_automl/download_gridstatus_data.py
+++ b/causal_automl/download_gridstatus_data.py
@@ -116,17 +116,52 @@ def filter_series(
         df: pd.DataFrame,
         id_: str,
         filters: Dict[str, str],
-    ) -> Optional[pd.DataFrame]:
+    ) -> pd.DataFrame:
         """
         Filter out a single time series from a Gridstatus dataset.
 
-        Apply single filters across columns (e.g., `region`, `market`),
-        drop missing rows and return end timestamp-indexed single series.
+        - Apply single filters across columns (e.g., `region`, `market`)
+        - drop missing rows
+        - return end timestamp-indexed single series
+
+        E.g.,
+
+        Input series (caiso_as_prices.non_spinning_reserves):
+        ```
+        interval_start_utc          interval_end_utc            region          market
+        2022-01-01 08:00:00+00:00   2022-01-01 09:00:00+00:00   AS_CAISO        DAM
+        2022-01-01 08:00:00+00:00   2022-01-01 09:00:00+00:00   AS_CAISO_EXP    DAM
+        2022-01-01 08:00:00+00:00   2022-01-01 09:00:00+00:00   AS_NP26         DAM
+        2022-01-01 08:00:00+00:00   2022-01-01 09:00:00+00:00   AS_NP26_EXP     DAM
+        2022-01-01 08:00:00+00:00   2022-01-01 09:00:00+00:00   AS_SP26         DAM
+        ...                         ...                         ...             ...
+        /
+        non_spinning_reserves
+        0.00
+        0.15
+        0.00
+        0.00
+        0.00
+        ...
+        ```
+        Output series (with filters - {"region": "AS_CAISO_EXP", "market": "DAM"})):
+        ```
+                                        non_spinning_reserves
+        interval_end_utc
+        2022-01-01 09:00:00+00:00                   0.15
+        2022-01-01 10:00:00+00:00                   0.15
+        2022-01-01 11:00:00+00:00                   0.15
+        2022-01-01 12:00:00+00:00                   0.15
+        2022-01-01 13:00:00+00:00                   0.15
+        ...                                          ...
+        ```
+
+
 
         :param df: Gridstatus data series to filter
         :param id_: Gridstatus series identifier (e.g., "caiso_as_prices.spinning_reserves")
         :param filters: filters to apply on the dataset
-            (e.g., {"region":"AS_CAISO_EXP", "market":"DAM"})
+            (e.g., {"region": "AS_CAISO_EXP", "market": "DAM"})
         :return: filtered Gridstatus series
         """
         # Filter data.
@@ -140,13 +175,14 @@ def filter_series(
                 list(filtered_data.columns),
             )
             filtered_data = filtered_data[filtered_data[k] == v]
+        if filtered_data.empty:
+            _LOG.warning("No data remaining after applying filters")
         # Find the series name.
         name_series = id_.split(".", 1)[1]
         # Drop missing value rows.
         filtered_data = filtered_data.dropna(subset=[name_series])
         if filtered_data.empty:
-            _LOG.warning("No data remaining after applying filters")
-            return None
+            _LOG.warning("No data remaining after dropping NaN values")
         filtered_data = filtered_data[["interval_end_utc", name_series]]
         filtered_data = filtered_data.set_index("interval_end_utc")
         filtered_data = filtered_data.sort_index()

From f6d494996d12223fb82f8da9e2f48c2c7567f675 Mon Sep 17 00:00:00 2001
From: Sonya Nikiforova <son.nik@mail.ru>
Date: Mon, 2 Jun 2025 21:56:00 +0200
Subject: [PATCH 9/9] TutorTask527: Fix style

---
 causal_automl/download_gridstatus_data.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/causal_automl/download_gridstatus_data.py b/causal_automl/download_gridstatus_data.py
index 1e7104264d..8a3e6b9212 100644
--- a/causal_automl/download_gridstatus_data.py
+++ b/causal_automl/download_gridstatus_data.py
@@ -121,8 +121,8 @@ def filter_series(
         Filter out a single time series from a Gridstatus dataset.
 
         - Apply single filters across columns (e.g., `region`, `market`)
-        - drop missing rows
-        - return end timestamp-indexed single series
+        - Drop NaN values
+        - Set the end timestamp as index
 
         E.g.,
 
@@ -156,13 +156,11 @@ def filter_series(
         ...                                          ...
         ```
 
-
-
-        :param df: Gridstatus data series to filter
-        :param id_: Gridstatus series identifier (e.g., "caiso_as_prices.spinning_reserves")
+        :param df: data series to filter
+        :param id_: series identifier (e.g., "caiso_as_prices.spinning_reserves")
         :param filters: filters to apply on the dataset
             (e.g., {"region": "AS_CAISO_EXP", "market": "DAM"})
-        :return: filtered Gridstatus series
+        :return: filtered series
         """
         # Filter data.
         filtered_data = df.copy()