From 61f3b12e5b6102a8c4dd0faa9824600b322828de Mon Sep 17 00:00:00 2001 From: Indrayudd Roy Chowdhury Date: Tue, 20 May 2025 18:41:49 -0400 Subject: [PATCH 1/9] TutorTask527: Create a downloader script identical to FRED in interface - badly linted --- causal_automl/download_gsio_data.py | 101 ++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 causal_automl/download_gsio_data.py diff --git a/causal_automl/download_gsio_data.py b/causal_automl/download_gsio_data.py new file mode 100644 index 0000000000..454cab33ac --- /dev/null +++ b/causal_automl/download_gsio_data.py @@ -0,0 +1,101 @@ +import logging as log +import os +import time +from typing import Dict, Optional + +import gridstatusio +import pandas as pd +import ratelimit + +_LOG = log.getLogger(__name__) + +# ############################################################################# +# GridStatusDataDownloader +# ############################################################################# + + +class GridStatusDataDownloader: + """ + Download historical data from GridStatus.io. + """ + + def __init__(self, api_key: Optional[str] = None) -> None: + """ + Initialize the GridStatus data downloader with the API key. + + If no API key is passed as a parameter, it is read from the + GRIDSTATUS_API_KEY environment variable. + + :param api_key: GridStatus API key + """ + key = api_key or os.getenv("GRIDSTATUS_API_KEY") + if not key: + raise ValueError("GridStatus API key is required") + self._client = gridstatusio.GridStatusClient(api_key=key) + + @ratelimit.sleep_and_retry + @ratelimit.limits(calls=60, period=60) + def download_series( + self, + id_: str, + start_timestamp: Optional[pd.Timestamp] = None, + end_timestamp: Optional[pd.Timestamp] = None, + ) -> Optional[pd.DataFrame]: + """ + Download historical series data. + + When no start and end timestamps are passed, the entire time series is downloaded. + + Example of timestamp - "2010-01-01 08:00:00+00:00" + + Example of a returned series: + + ``` + interval_start_utc interval_end_utc region market non_spinning_reserves regulation_down regulation_mileage_down regulation_mileage_up regulation_up spinning_reserves + 2010-01-01 08:00:00+00:00 2010-01-01 09:00:00+00:00 AS_CAISO DAM 0.0 0.00 NaN NaN 0.00000 0.00 + 2010-01-01 08:00:00+00:00 2010-01-01 09:00:00+00:00 AS_CAISO_EXP DAM 0.5 2.25 NaN NaN 10.00089 2.08 + ``` + + :param id_: GridStatus dataset identifier (e.g., "caiso_as_prices") + :param start_timestamp: first observation timestamp + :param end_timestamp: last observation timestamp (non inclusive) + :return: relevant GridStatus series data + """ + # Build request parameters. + request_kwargs: Dict[str, str] = {} + if start_timestamp is not None: + request_kwargs["start"] = start_timestamp + if end_timestamp is not None: + request_kwargs["end"] = end_timestamp + # Start attempts. + attempt = 1 + max_attempts = 4 + err_msgs: Dict[str, str] = {} + while attempt <= max_attempts: + try: + # Download the data for the dataset. + df = self._client.get_dataset( + dataset=id_, + **request_kwargs, + ) + except Exception as err: + msg = str(err) + if msg.startswith("Error 5"): + _LOG.error("Attempt %d: %s Retrying...", attempt, msg) + # Wait before retrying. + time.sleep(10) + else: + raise + err_msgs[f"Attempt {attempt}"] = msg + attempt += 1 + continue + # Log success and return. + _LOG.info( + "Downloaded dataset %s with %d records", + id_, + len(df), + ) + return df + raise RuntimeError( + f"Failed to fetch after {max_attempts} attempts. Errors per run: {err_msgs}" + ) From 746f72b5dd837fd0ac538620f1af167ca5cc250c Mon Sep 17 00:00:00 2001 From: Indrayudd Roy Chowdhury Date: Tue, 20 May 2025 18:46:01 -0400 Subject: [PATCH 2/9] TutorTask527: Minor changes --- causal_automl/download_gsio_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/causal_automl/download_gsio_data.py b/causal_automl/download_gsio_data.py index 454cab33ac..569d5f4050 100644 --- a/causal_automl/download_gsio_data.py +++ b/causal_automl/download_gsio_data.py @@ -46,7 +46,7 @@ def download_series( When no start and end timestamps are passed, the entire time series is downloaded. - Example of timestamp - "2010-01-01 08:00:00+00:00" + Example of a timestamp - "2010-01-01 08:00:00+00:00" Example of a returned series: @@ -58,7 +58,7 @@ def download_series( :param id_: GridStatus dataset identifier (e.g., "caiso_as_prices") :param start_timestamp: first observation timestamp - :param end_timestamp: last observation timestamp (non inclusive) + :param end_timestamp: last observation timestamp :return: relevant GridStatus series data """ # Build request parameters. From ff273199eb24ad94f80ccd4505d3c717a5400d0d Mon Sep 17 00:00:00 2001 From: Indrayudd Roy Chowdhury Date: Wed, 28 May 2025 19:16:25 -0400 Subject: [PATCH 3/9] TutorTask527: Remove download_gsio_data.py from remote repo (to be renamed) --- causal_automl/download_gsio_data.py | 101 ---------------------------- 1 file changed, 101 deletions(-) delete mode 100644 causal_automl/download_gsio_data.py diff --git a/causal_automl/download_gsio_data.py b/causal_automl/download_gsio_data.py deleted file mode 100644 index 569d5f4050..0000000000 --- a/causal_automl/download_gsio_data.py +++ /dev/null @@ -1,101 +0,0 @@ -import logging as log -import os -import time -from typing import Dict, Optional - -import gridstatusio -import pandas as pd -import ratelimit - -_LOG = log.getLogger(__name__) - -# ############################################################################# -# GridStatusDataDownloader -# ############################################################################# - - -class GridStatusDataDownloader: - """ - Download historical data from GridStatus.io. - """ - - def __init__(self, api_key: Optional[str] = None) -> None: - """ - Initialize the GridStatus data downloader with the API key. - - If no API key is passed as a parameter, it is read from the - GRIDSTATUS_API_KEY environment variable. - - :param api_key: GridStatus API key - """ - key = api_key or os.getenv("GRIDSTATUS_API_KEY") - if not key: - raise ValueError("GridStatus API key is required") - self._client = gridstatusio.GridStatusClient(api_key=key) - - @ratelimit.sleep_and_retry - @ratelimit.limits(calls=60, period=60) - def download_series( - self, - id_: str, - start_timestamp: Optional[pd.Timestamp] = None, - end_timestamp: Optional[pd.Timestamp] = None, - ) -> Optional[pd.DataFrame]: - """ - Download historical series data. - - When no start and end timestamps are passed, the entire time series is downloaded. - - Example of a timestamp - "2010-01-01 08:00:00+00:00" - - Example of a returned series: - - ``` - interval_start_utc interval_end_utc region market non_spinning_reserves regulation_down regulation_mileage_down regulation_mileage_up regulation_up spinning_reserves - 2010-01-01 08:00:00+00:00 2010-01-01 09:00:00+00:00 AS_CAISO DAM 0.0 0.00 NaN NaN 0.00000 0.00 - 2010-01-01 08:00:00+00:00 2010-01-01 09:00:00+00:00 AS_CAISO_EXP DAM 0.5 2.25 NaN NaN 10.00089 2.08 - ``` - - :param id_: GridStatus dataset identifier (e.g., "caiso_as_prices") - :param start_timestamp: first observation timestamp - :param end_timestamp: last observation timestamp - :return: relevant GridStatus series data - """ - # Build request parameters. - request_kwargs: Dict[str, str] = {} - if start_timestamp is not None: - request_kwargs["start"] = start_timestamp - if end_timestamp is not None: - request_kwargs["end"] = end_timestamp - # Start attempts. - attempt = 1 - max_attempts = 4 - err_msgs: Dict[str, str] = {} - while attempt <= max_attempts: - try: - # Download the data for the dataset. - df = self._client.get_dataset( - dataset=id_, - **request_kwargs, - ) - except Exception as err: - msg = str(err) - if msg.startswith("Error 5"): - _LOG.error("Attempt %d: %s Retrying...", attempt, msg) - # Wait before retrying. - time.sleep(10) - else: - raise - err_msgs[f"Attempt {attempt}"] = msg - attempt += 1 - continue - # Log success and return. - _LOG.info( - "Downloaded dataset %s with %d records", - id_, - len(df), - ) - return df - raise RuntimeError( - f"Failed to fetch after {max_attempts} attempts. Errors per run: {err_msgs}" - ) From 3b68ff04fd604c423d51a605cae03c91ad4ce28e Mon Sep 17 00:00:00 2001 From: Indrayudd Roy Chowdhury Date: Wed, 28 May 2025 19:24:52 -0400 Subject: [PATCH 4/9] TutorTask527: Reviewer Changes, changes made with v2.0 in mind --- causal_automl/download_gridstatus_data.py | 119 ++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 causal_automl/download_gridstatus_data.py diff --git a/causal_automl/download_gridstatus_data.py b/causal_automl/download_gridstatus_data.py new file mode 100644 index 0000000000..fffc294124 --- /dev/null +++ b/causal_automl/download_gridstatus_data.py @@ -0,0 +1,119 @@ +""" +Import as: + +import causal_automl.download_gridstatus_data as cadogrda +""" + +import logging +import os +import time +from typing import Dict, Optional + +import gridstatusio +import helpers.hdbg as hdbg +import pandas as pd +import ratelimit + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# GridstatusDataDownloader +# ############################################################################# + + +class GridstatusDataDownloader: + """ + Download historical data from GridStatus.io. + """ + + def __init__(self) -> None: + """ + Initialize the GridStatus data downloader with the API key. + + If no API key is passed as a parameter, it is read from the + GRIDSTATUS_API_KEY environment variable. + + :param api_key: GridStatus API key + """ + hdbg.dassert_in( + "GRIDSTATUS_API_KEY", + os.environ, + msg="GRIDSTATUS_API_KEY is not found in environment variables", + ) + api_key = os.getenv("GRIDSTATUS_API_KEY") + key = api_key or os.getenv("GRIDSTATUS_API_KEY") + if not key: + raise ValueError("GridStatus API key is required") + self._client = gridstatusio.GridStatusClient(api_key=key) + + @ratelimit.sleep_and_retry + @ratelimit.limits(calls=60, period=60) + def download_series( + self, + id_: str, + start_timestamp: Optional[pd.Timestamp] = None, + end_timestamp: Optional[pd.Timestamp] = None, + ) -> Optional[pd.DataFrame]: + """ + Download historical series data. + + When no start and end timestamps are passed, the entire time series is downloaded. + + Example of a returned series: + + ``` + interval_start_utc interval_end_utc region market + 2010-01-01 08:00:00+00:00 2010-01-01 09:00:00+00:00 AS_CAISO DAM + 2010-01-01 08:00:00+00:00 2010-01-01 09:00:00+00:00 AS_CAISO_EXP DAM + / + non_spinning_reserves + 0.0 + 0.5 + ``` + + :param id_: GridStatus dataset identifier (e.g., "caiso_as_prices.spinning_reserves") + :param start_timestamp: first observation timestamp (e.g., "2010-01-01 08:00:00+00:00") + :param end_timestamp: last observation timestamp + :return: relevant GridStatus series data + """ + # Build request parameters. + id_series, name_series = id_.split(".", 1) + request_kwargs: Dict[str, str] = {} + if start_timestamp is not None: + request_kwargs["start"] = start_timestamp + if end_timestamp is not None: + request_kwargs["end"] = end_timestamp + # Start attempts. + attempt = 1 + max_attempts = 4 + err_msgs: Dict[str, str] = {} + while attempt <= max_attempts: + try: + # Download the data for the dataset. + df = self._client.get_dataset( + dataset=id_series, + columns=[name_series], + **request_kwargs, + ) + except Exception as err: + msg = str(err) + if msg.startswith("Error 5"): + _LOG.error("Attempt %d: %s Retrying...", attempt, msg) + # Wait before retrying. + time.sleep(10) + else: + raise + err_msgs[f"Attempt {attempt}"] = msg + attempt += 1 + continue + # Log success and return. + _LOG.info( + "Downloaded dataset %s with %d records", + id_, + len(df), + ) + return df + raise RuntimeError( + f"Failed to fetch after {max_attempts} attempts. Errors per run: {err_msgs}" + ) From 977cd9156295c0f06dfc4db1727e14f3dc7da485 Mon Sep 17 00:00:00 2001 From: Indrayudd Roy Chowdhury Date: Wed, 28 May 2025 19:31:18 -0400 Subject: [PATCH 5/9] TutorTask527: Made changes to env variable assertation --- causal_automl/download_fred_data.py | 13 ++++++++----- causal_automl/download_gridstatus_data.py | 2 -- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/causal_automl/download_fred_data.py b/causal_automl/download_fred_data.py index ad72055a79..b3c06044fe 100644 --- a/causal_automl/download_fred_data.py +++ b/causal_automl/download_fred_data.py @@ -27,7 +27,7 @@ class FredDataDownloader: Download historical data from FRED. """ - def __init__(self, api_key: Optional[str] = None) -> None: + def __init__(self) -> None: """ Initialize the FRED data downloader with the API key. @@ -36,10 +36,13 @@ def __init__(self, api_key: Optional[str] = None) -> None: :param api_key: FRED API key """ - key = api_key or os.getenv("FRED_API_KEY") - if not key: - raise ValueError("FRED API key is required") - self._client = fredapi.Fred(api_key=key) + hdbg.dassert_in( + "FRED_API_KEY", + os.environ, + msg="FRED_API_KEY is not found in environment variables", + ) + api_key = os.getenv("FRED_API_KEY") + self._client = fredapi.Fred(api_key=api_key) @ratelimit.sleep_and_retry @ratelimit.limits(calls=60, period=60) diff --git a/causal_automl/download_gridstatus_data.py b/causal_automl/download_gridstatus_data.py index fffc294124..e16b80b539 100644 --- a/causal_automl/download_gridstatus_data.py +++ b/causal_automl/download_gridstatus_data.py @@ -43,8 +43,6 @@ def __init__(self) -> None: ) api_key = os.getenv("GRIDSTATUS_API_KEY") key = api_key or os.getenv("GRIDSTATUS_API_KEY") - if not key: - raise ValueError("GridStatus API key is required") self._client = gridstatusio.GridStatusClient(api_key=key) @ratelimit.sleep_and_retry From 0093139182132a734635a8402d3a3c2756c3d3c4 Mon Sep 17 00:00:00 2001 From: Indrayudd Roy Chowdhury Date: Wed, 28 May 2025 19:32:58 -0400 Subject: [PATCH 6/9] TutorTask527: Syntax fixed in gridstatus downloader --- causal_automl/download_gridstatus_data.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/causal_automl/download_gridstatus_data.py b/causal_automl/download_gridstatus_data.py index e16b80b539..903057177c 100644 --- a/causal_automl/download_gridstatus_data.py +++ b/causal_automl/download_gridstatus_data.py @@ -42,8 +42,7 @@ def __init__(self) -> None: msg="GRIDSTATUS_API_KEY is not found in environment variables", ) api_key = os.getenv("GRIDSTATUS_API_KEY") - key = api_key or os.getenv("GRIDSTATUS_API_KEY") - self._client = gridstatusio.GridStatusClient(api_key=key) + self._client = gridstatusio.GridStatusClient(api_key=api_key) @ratelimit.sleep_and_retry @ratelimit.limits(calls=60, period=60) From fc935ccc380b1f79c7dc92e67cefd30bd32cc45f Mon Sep 17 00:00:00 2001 From: Indrayudd Roy Chowdhury Date: Fri, 30 May 2025 18:04:39 -0400 Subject: [PATCH 7/9] TutorTask527: Reviewer changes + minor equivalent change in the FRED downloader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- causal_automl/download_fred_data.py | 5 -- causal_automl/download_gridstatus_data.py | 65 ++++++++++++++++++----- 2 files changed, 51 insertions(+), 19 deletions(-) diff --git a/causal_automl/download_fred_data.py b/causal_automl/download_fred_data.py index b3c06044fe..9a44bdbce0 100644 --- a/causal_automl/download_fred_data.py +++ b/causal_automl/download_fred_data.py @@ -30,11 +30,6 @@ class FredDataDownloader: def __init__(self) -> None: """ Initialize the FRED data downloader with the API key. - - If no FRED API key is passed as a parameter, it is read from the - environment variable. - - :param api_key: FRED API key """ hdbg.dassert_in( "FRED_API_KEY", diff --git a/causal_automl/download_gridstatus_data.py b/causal_automl/download_gridstatus_data.py index 903057177c..f0adab67dc 100644 --- a/causal_automl/download_gridstatus_data.py +++ b/causal_automl/download_gridstatus_data.py @@ -7,7 +7,7 @@ import logging import os import time -from typing import Dict, Optional +from typing import Dict, Optional, Union import gridstatusio import helpers.hdbg as hdbg @@ -30,11 +30,6 @@ class GridstatusDataDownloader: def __init__(self) -> None: """ Initialize the GridStatus data downloader with the API key. - - If no API key is passed as a parameter, it is read from the - GRIDSTATUS_API_KEY environment variable. - - :param api_key: GridStatus API key """ hdbg.dassert_in( "GRIDSTATUS_API_KEY", @@ -49,8 +44,8 @@ def __init__(self) -> None: def download_series( self, id_: str, - start_timestamp: Optional[pd.Timestamp] = None, - end_timestamp: Optional[pd.Timestamp] = None, + start_timestamp: Optional[Union[str, pd.Timestamp]] = None, + end_timestamp: Optional[Union[str, pd.Timestamp]] = None, ) -> Optional[pd.DataFrame]: """ Download historical series data. @@ -69,13 +64,14 @@ def download_series( 0.5 ``` - :param id_: GridStatus dataset identifier (e.g., "caiso_as_prices.spinning_reserves") - :param start_timestamp: first observation timestamp (e.g., "2010-01-01 08:00:00+00:00") + :param id_: Gridstatus series identifier (e.g., "caiso_as_prices.spinning_reserves") + :param start_timestamp: first observation timestamp + (e.g., "2010-01-01 08:00:00+00:00" or pd.Timestamp("2023-04-01 01:00:00")) :param end_timestamp: last observation timestamp - :return: relevant GridStatus series data + :return: relevant Gridstatus series data """ # Build request parameters. - id_series, name_series = id_.split(".", 1) + id_dataset, name_series = id_.split(".", 1) request_kwargs: Dict[str, str] = {} if start_timestamp is not None: request_kwargs["start"] = start_timestamp @@ -89,7 +85,7 @@ def download_series( try: # Download the data for the dataset. df = self._client.get_dataset( - dataset=id_series, + dataset=id_dataset, columns=[name_series], **request_kwargs, ) @@ -106,7 +102,7 @@ def download_series( continue # Log success and return. _LOG.info( - "Downloaded dataset %s with %d records", + "Downloaded series %s with %d records", id_, len(df), ) @@ -114,3 +110,44 @@ def download_series( raise RuntimeError( f"Failed to fetch after {max_attempts} attempts. Errors per run: {err_msgs}" ) + + def filter_series( + self, + df: pd.DataFrame, + id_: str, + filters: Dict[str, str], + ) -> Optional[pd.DataFrame]: + """ + Filter out a single time series from a Gridstatus dataset. + + Apply single filters across columns (e.g., `region`, `market`), + drop missing rows and return end timestamp-indexed single series. + + :param df: Gridstatus data series to filter + :param id_: Gridstatus series identifier (e.g., "caiso_as_prices.spinning_reserves") + :param filters: filters to apply on the dataset + (e.g., {"region":"AS_CAISO_EXP", "market":"DAM"}) + :return: filtered Gridstatus series + """ + # Filter data. + filtered_data = df.copy() + for k, v in filters.items(): + hdbg.dassert_in( + k, + filtered_data.columns, + "%s not found in columns: %s", + k, + list(filtered_data.columns), + ) + filtered_data = filtered_data[filtered_data[k] == v] + # Find the series name. + name_series = id_.split(".", 1)[1] + # Drop missing value rows. + filtered_data = filtered_data.dropna(subset=[name_series]) + if filtered_data.empty: + _LOG.warning("No data remaining after applying filters") + return None + filtered_data = filtered_data[["interval_end_utc", name_series]] + filtered_data = filtered_data.set_index("interval_end_utc") + filtered_data = filtered_data.sort_index() + return filtered_data From 7dc0efd00436c4d579c55159115e7fd0d8bb404f Mon Sep 17 00:00:00 2001 From: Indrayudd Roy Chowdhury Date: Mon, 2 Jun 2025 15:39:03 -0400 Subject: [PATCH 8/9] TutorTask527: Reviewer Changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- causal_automl/download_gridstatus_data.py | 48 ++++++++++++++++++++--- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/causal_automl/download_gridstatus_data.py b/causal_automl/download_gridstatus_data.py index f0adab67dc..1e7104264d 100644 --- a/causal_automl/download_gridstatus_data.py +++ b/causal_automl/download_gridstatus_data.py @@ -116,17 +116,52 @@ def filter_series( df: pd.DataFrame, id_: str, filters: Dict[str, str], - ) -> Optional[pd.DataFrame]: + ) -> pd.DataFrame: """ Filter out a single time series from a Gridstatus dataset. - Apply single filters across columns (e.g., `region`, `market`), - drop missing rows and return end timestamp-indexed single series. + - Apply single filters across columns (e.g., `region`, `market`) + - drop missing rows + - return end timestamp-indexed single series + + E.g., + + Input series (caiso_as_prices.non_spinning_reserves): + ``` + interval_start_utc interval_end_utc region market + 2022-01-01 08:00:00+00:00 2022-01-01 09:00:00+00:00 AS_CAISO DAM + 2022-01-01 08:00:00+00:00 2022-01-01 09:00:00+00:00 AS_CAISO_EXP DAM + 2022-01-01 08:00:00+00:00 2022-01-01 09:00:00+00:00 AS_NP26 DAM + 2022-01-01 08:00:00+00:00 2022-01-01 09:00:00+00:00 AS_NP26_EXP DAM + 2022-01-01 08:00:00+00:00 2022-01-01 09:00:00+00:00 AS_SP26 DAM + ... ... ... ... + / + non_spinning_reserves + 0.00 + 0.15 + 0.00 + 0.00 + 0.00 + ... + ``` + Output series (with filters - {"region": "AS_CAISO_EXP", "market": "DAM"})): + ``` + non_spinning_reserves + interval_end_utc + 2022-01-01 09:00:00+00:00 0.15 + 2022-01-01 10:00:00+00:00 0.15 + 2022-01-01 11:00:00+00:00 0.15 + 2022-01-01 12:00:00+00:00 0.15 + 2022-01-01 13:00:00+00:00 0.15 + ... ... + ``` + + :param df: Gridstatus data series to filter :param id_: Gridstatus series identifier (e.g., "caiso_as_prices.spinning_reserves") :param filters: filters to apply on the dataset - (e.g., {"region":"AS_CAISO_EXP", "market":"DAM"}) + (e.g., {"region": "AS_CAISO_EXP", "market": "DAM"}) :return: filtered Gridstatus series """ # Filter data. @@ -140,13 +175,14 @@ def filter_series( list(filtered_data.columns), ) filtered_data = filtered_data[filtered_data[k] == v] + if filtered_data.empty: + _LOG.warning("No data remaining after applying filters") # Find the series name. name_series = id_.split(".", 1)[1] # Drop missing value rows. filtered_data = filtered_data.dropna(subset=[name_series]) if filtered_data.empty: - _LOG.warning("No data remaining after applying filters") - return None + _LOG.warning("No data remaining after dropping NaN values") filtered_data = filtered_data[["interval_end_utc", name_series]] filtered_data = filtered_data.set_index("interval_end_utc") filtered_data = filtered_data.sort_index() From f6d494996d12223fb82f8da9e2f48c2c7567f675 Mon Sep 17 00:00:00 2001 From: Sonya Nikiforova Date: Mon, 2 Jun 2025 21:56:00 +0200 Subject: [PATCH 9/9] TutorTask527: Fix style --- causal_automl/download_gridstatus_data.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/causal_automl/download_gridstatus_data.py b/causal_automl/download_gridstatus_data.py index 1e7104264d..8a3e6b9212 100644 --- a/causal_automl/download_gridstatus_data.py +++ b/causal_automl/download_gridstatus_data.py @@ -121,8 +121,8 @@ def filter_series( Filter out a single time series from a Gridstatus dataset. - Apply single filters across columns (e.g., `region`, `market`) - - drop missing rows - - return end timestamp-indexed single series + - Drop NaN values + - Set the end timestamp as index E.g., @@ -156,13 +156,11 @@ def filter_series( ... ... ``` - - - :param df: Gridstatus data series to filter - :param id_: Gridstatus series identifier (e.g., "caiso_as_prices.spinning_reserves") + :param df: data series to filter + :param id_: series identifier (e.g., "caiso_as_prices.spinning_reserves") :param filters: filters to apply on the dataset (e.g., {"region": "AS_CAISO_EXP", "market": "DAM"}) - :return: filtered Gridstatus series + :return: filtered series """ # Filter data. filtered_data = df.copy()