From 61243ab58e5ab32712dc1563b5af3a958f15e79a Mon Sep 17 00:00:00 2001 From: Treyson Le Date: Thu, 5 Dec 2024 18:00:58 -0700 Subject: [PATCH 1/4] Adding Critscript and Timeseries functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Added critscript to Init • Added Update_timeseries_groups • Added timeseries_group_df_to_json --- cwms/__init__.py | 1 + cwms/timeseries/critscript.py | 111 +++++++++++++++++++++++++++++++++ cwms/timeseries/timeseries.py | 113 ++++++++++++++++++++++++++++++++++ 3 files changed, 225 insertions(+) create mode 100644 cwms/timeseries/critscript.py diff --git a/cwms/__init__.py b/cwms/__init__.py index 12870b40..d6a65a57 100644 --- a/cwms/__init__.py +++ b/cwms/__init__.py @@ -17,6 +17,7 @@ from cwms.ratings.ratings_spec import * from cwms.ratings.ratings_template import * from cwms.standard_text.standard_text import * +from cwms.timeseries.critscript import * from cwms.timeseries.timerseries_identifier import * from cwms.timeseries.timeseries import * from cwms.timeseries.timeseries_bin import * diff --git a/cwms/timeseries/critscript.py b/cwms/timeseries/critscript.py new file mode 100644 index 00000000..16ddfb95 --- /dev/null +++ b/cwms/timeseries/critscript.py @@ -0,0 +1,111 @@ +import re + +import pandas as pd + +import cwms + + +def crit_script( + file_path, + office_id, + group_id="SHEF Data Acquisition", + category_id="Data Aquisition", + group_office_id="CWMS", +): + def parse_crit_file(file_path): + """ + Parses a .crit file into a dictionary containing timeseries ID and Alias. + + Parameters + ---------- + file_path : str + Path to the .crit file. + office_id : str + The ID of the office associated with the specified timeseries. + group_id : str + The specified group associated with the timeseries data. Defaults to "SHEF Data Acquisition". + category_id: string + The category id that contains the timeseries group. Defaults to "Data Acquisition". + group_office_id : str + The specified office group associated with the timeseries data. Defaults to "CWMS". + Returns + ------- + list of dict + A list of dictionaries containing the parsed key-value pairs, each with Alias and Timeseries ID. + """ + parsed_data = [] + with open(file_path, "r") as file: + for line in file: + # Ignore comment lines and empty lines + if line.startswith("#") or not line.strip(): + continue + + # Extract alias, timeseries ID, and TZ + match = re.match(r"([^=]+)=([^;]+);(.+)", line.strip()) + if match: + alias = match.group(1).strip() + timeseries_id = match.group(2).strip() + alias2 = match.group(3).strip() + + parsed_data.append( + { + "Alias": alias + ":" + alias2, + "Timeseries ID": timeseries_id, + } + ) + + return parsed_data + + def append_df(df: pd.DataFrame, office_id: str, tsId: str, alias: str): + """ + Appends a row to the DataFrame. + + Parameters + ---------- + df : pandas.DataFrame + The DataFrame to append to. + office_id : str + The ID of the office associated with the specified timeseries. + tsId : str + The timeseries ID from the file. + alias : str + The alias from the file. + Returns + ------- + pandas.DataFrame + The updated DataFrame. + """ + data = { + "officeId": [office_id], + "timeseriesId": [tsId], + "aliasId": [alias], + "tsCode": ["none"], # Default value for ts-code + "attribute": [0], # Default value for attribute + } + df = pd.concat([df, pd.DataFrame(data)]) + return df + + # Parse the file and get the parsed data + parsed_data = parse_crit_file(file_path) + + df = pd.DataFrame() + + for data in parsed_data: + # Create DataFrame for the current row + df = append_df(df, office_id, data["Timeseries ID"], data["Alias"]) + + # Generate JSON dictionary + json_dict = cwms.timeseries_group_df_to_json( + df, group_id, group_office_id, category_id + ) + + # Print DataFrame for verification + pd.set_option("display.max_columns", None) + + cwms.update_timeseries_groups( + group_id=group_id, + office_id=office_id, + category_id=category_id, + replace_assigned_ts=None, + data=json_dict, + ) diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py index f58deb62..c5e37c3a 100644 --- a/cwms/timeseries/timeseries.py +++ b/cwms/timeseries/timeseries.py @@ -9,6 +9,119 @@ from cwms.cwms_types import JSON, Data +def update_timeseries_groups( + group_id: str, + office_id: str, + category_id: str, + replace_assigned_ts: Optional[bool], + data: JSON, +) -> None: + """ + Updates the timeseries groups with the provided group ID and office ID. + + Parameters + ---------- + group_id : str + The new specified timeseries ID that will replace the old ID. + office_id : str + The ID of the office associated with the specified timeseries. + category_id: string + The category id that contains the timeseries group. + replace_assigned_ts : bool, optional + Specifies whether to unassign all existing time series before assigning new time series specified in the content body. Default is False. + data: JSON dictionary + Time Series data to be stored. + + Returns + ------- + None + """ + if not group_id: + raise ValueError("Cannot update a specified level without an id") + if not office_id: + raise ValueError("Cannot update a specified level without an office id") + + endpoint = f"timeseries/group/{group_id}" + params = { + "replace-assigned-ts": replace_assigned_ts, + "office": office_id, + "category-id": category_id, + } + + api.patch(endpoint=endpoint, data=data, params=params) + + +def timeseries_group_df_to_json( + data: pd.DataFrame, + group_id: str, + office_id: str, + category_id: str, +) -> JSON: + """ + Converts a dataframe to a json dictionary in the correct format. + + Parameters + ---------- + data: pd.DataFrame + Dataframe containing timeseries information. + group_id: str + The group ID for the timeseries. + office_id: str + The ID of the office associated with the specified timeseries. + category_id: str + The ID of the category associated with the group + + Returns + ------- + JSON + JSON dictionary of the timeseries data. + """ + required_columns = ["officeId", "timeseriesId"] + for column in required_columns: + if column not in data.columns: + raise TypeError( + f"{column} is a required column in data when posting as a dataframe" + ) + + if data.isnull().values.any(): + raise ValueError("Null/NaN data must be removed from the dataframe") + + # Check if 'alias' column exists, if not create it and set to None + if "aliasId" not in data.columns: + data["aliasId"] = None + + # Check if 'attribute' column exists, if not create it and set to 0 + if "attribute" not in data.columns: + data["attribute"] = 0 + + json_dict = { + "office-id": office_id, + "id": group_id, + "time-series-category": {"office-id": office_id, "id": category_id}, + "assigned-time-series": [], + } + + # Convert DataFrame to a list of dictionaries with each row becoming a dict + entries = data.to_dict(orient="records") + + # Iterate through each record and add it to the JSON dictionary + for entry in entries: + ts_dict = { + "office-id": entry["officeId"], + "timeseries-id": entry["timeseriesId"], + "alias-id": entry["aliasId"], + "attribute": entry["attribute"], + } + + # Only include 'ts-code' if it exists and is not NaN + if entry.get("tsCode") and pd.notna(entry["tsCode"]): + ts_dict["tsCode"] = entry["tsCode"] + + json_dict["assigned-time-series"].append(ts_dict) + + return json_dict + + def get_timeseries_group(group_id: str, category_id: str, office_id: str) -> Data: """Retreives time series stored in the requested time series group From 6ea25e7db88365f8c14faa46fa8f6afca2466c06 Mon Sep 17 00:00:00 2001 From: Treyson Le Date: Thu, 5 Dec 2024 18:20:05 -0700 Subject: [PATCH 2/4] Updated to fix some poetry test. Still fails: poetry run mypy --strict cwms/ --- cwms/timeseries/critscript.py | 53 +++++++++++++++++++++++------------ cwms/timeseries/timeseries.py | 8 +++--- 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/cwms/timeseries/critscript.py b/cwms/timeseries/critscript.py index 16ddfb95..5338dbaa 100644 --- a/cwms/timeseries/critscript.py +++ b/cwms/timeseries/critscript.py @@ -1,4 +1,5 @@ import re +from typing import Dict, List import pandas as pd @@ -6,13 +7,34 @@ def crit_script( - file_path, - office_id, - group_id="SHEF Data Acquisition", - category_id="Data Aquisition", - group_office_id="CWMS", -): - def parse_crit_file(file_path): + file_path: str, + office_id: str, + group_id: str = "SHEF Data Acquisition", + category_id: str = "Data Aquisition", + group_office_id: str = "CWMS", +) -> None: + """ + Processes a .crit file, updates the timeseries groups, and generates a JSON dictionary. + + Parameters + ---------- + file_path : str + Path to the .crit file. + office_id : str + The ID of the office associated with the specified timeseries. + group_id : str, optional + The specified group associated with the timeseries data. Defaults to "SHEF Data Acquisition". + category_id : str, optional + The category ID that contains the timeseries group. Defaults to "Data Acquisition". + group_office_id : str, optional + The specified office group associated with the timeseries data. Defaults to "CWMS". + + Returns + ------- + None + """ + + def parse_crit_file(file_path: str) -> List[Dict[str, str]]: """ Parses a .crit file into a dictionary containing timeseries ID and Alias. @@ -20,18 +42,11 @@ def parse_crit_file(file_path): ---------- file_path : str Path to the .crit file. - office_id : str - The ID of the office associated with the specified timeseries. - group_id : str - The specified group associated with the timeseries data. Defaults to "SHEF Data Acquisition". - category_id: string - The category id that contains the timeseries group. Defaults to "Data Acquisition". - group_office_id : str - The specified office group associated with the timeseries data. Defaults to "CWMS". + Returns ------- - list of dict - A list of dictionaries containing the parsed key-value pairs, each with Alias and Timeseries ID. + List[Dict[str, str]] + A list of dictionaries with "Alias" and "Timeseries ID" as keys. """ parsed_data = [] with open(file_path, "r") as file: @@ -56,7 +71,9 @@ def parse_crit_file(file_path): return parsed_data - def append_df(df: pd.DataFrame, office_id: str, tsId: str, alias: str): + def append_df( + df: pd.DataFrame, office_id: str, tsId: str, alias: str + ) -> pd.DataFrame: """ Appends a row to the DataFrame. diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py index c5e37c3a..b8888fba 100644 --- a/cwms/timeseries/timeseries.py +++ b/cwms/timeseries/timeseries.py @@ -113,11 +113,11 @@ def timeseries_group_df_to_json( "attribute": entry["attribute"], } - # Only include 'ts-code' if it exists and is not NaN - if entry.get("tsCode") and pd.notna(entry["tsCode"]): - ts_dict["tsCode"] = entry["tsCode"] + # Only include 'ts-code' if it exists and is not NaN + if entry.get("tsCode") and pd.notna(entry["tsCode"]): + ts_dict["tsCode"] = entry["tsCode"] - json_dict["assigned-time-series"].append(ts_dict) + json_dict["assigned-time-series"].append(ts_dict) return json_dict From 5d68ab4943b99d574e599369bfe85de937fd1bcf Mon Sep 17 00:00:00 2001 From: Treyson Le Date: Fri, 6 Dec 2024 13:51:27 -0700 Subject: [PATCH 3/4] Passes poetry. Need to test --- cwms/timeseries/timeseries.py | 37 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py index b8888fba..71c00b7d 100644 --- a/cwms/timeseries/timeseries.py +++ b/cwms/timeseries/timeseries.py @@ -94,31 +94,30 @@ def timeseries_group_df_to_json( if "attribute" not in data.columns: data["attribute"] = 0 + # Build the list of time-series entries + assigned_time_series = data.apply( + lambda entry: { + "office-id": entry["officeId"], + "timeseries-id": entry["timeseriesId"], + "alias-id": entry["aliasId"], + "attribute": entry["attribute"], + **( + {"tsCode": entry["tsCode"]} + if "tsCode" in entry and pd.notna(entry["tsCode"]) + else {} + ), + }, + axis=1, + ).tolist() + + # Construct the final JSON dictionary json_dict = { "office-id": office_id, "id": group_id, "time-series-category": {"office-id": office_id, "id": category_id}, - "assigned-time-series": [], + "assigned-time-series": assigned_time_series, } - # Convert DataFrame to a list of dictionaries with each row becoming a dict - entries = data.to_dict(orient="records") - - # Iterate through each record and add it to the JSON dictionary - for entry in entries: - ts_dict = { - "office-id": entry["officeId"], - "timeseries-id": entry["timeseriesId"], - "alias-id": entry["aliasId"], - "attribute": entry["attribute"], - } - - # Only include 'ts-code' if it exists and is not NaN - if entry.get("tsCode") and pd.notna(entry["tsCode"]): - ts_dict["tsCode"] = entry["tsCode"] - - json_dict["assigned-time-series"].append(ts_dict) - return json_dict From b043f5e914c189812902cec1129f8e486620e263 Mon Sep 17 00:00:00 2001 From: Treyson Le Date: Fri, 20 Dec 2024 14:09:26 -0800 Subject: [PATCH 4/4] Added test_timeseries_group_df_to_json_valid_data() Passes all poetry tests --- cwms/timeseries/critscript.py | 1 - cwms/timeseries/timeseries.py | 50 +++++++++++----------- tests/timeseries/timeseries_test.py | 66 +++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 25 deletions(-) diff --git a/cwms/timeseries/critscript.py b/cwms/timeseries/critscript.py index 5338dbaa..40751d5f 100644 --- a/cwms/timeseries/critscript.py +++ b/cwms/timeseries/critscript.py @@ -122,7 +122,6 @@ def append_df( cwms.update_timeseries_groups( group_id=group_id, office_id=office_id, - category_id=category_id, replace_assigned_ts=None, data=json_dict, ) diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py index 71c00b7d..64bbc033 100644 --- a/cwms/timeseries/timeseries.py +++ b/cwms/timeseries/timeseries.py @@ -12,29 +12,26 @@ def update_timeseries_groups( group_id: str, office_id: str, - category_id: str, replace_assigned_ts: Optional[bool], data: JSON, ) -> None: """ - Updates the timeseries groups with the provided group ID and office ID. + Updates the timeseries groups with the provided group ID and office ID. - Parameters - ---------- - group_id : str - The new specified timeseries ID that will replace the old ID. - office_id : str - The ID of the office associated with the specified timeseries. - category_id: string - The category id that contains the timeseries group. - replace_assigned_ts : bool, optional - Specifies whether to unassign all existing time series before assigning new time series specified in the content body. Default is False. - data: JSON dictionary - Time Series data to be stored. - - Returns - ------- - None + Parameters + ---------- + group_id : str + The new specified timeseries ID that will replace the old ID. + office_id : str + The ID of the office associated with the specified timeseries. + replace_assigned_ts : bool, optional + Specifies whether to unassign all existing time series before assigning new time series specified in the content body. Default is False. + data: JSON dictionary + Time Series data to be stored. + ```````````````````````````````````````` + Returns + ------- + None """ if not group_id: raise ValueError("Cannot update a specified level without an id") @@ -45,7 +42,6 @@ def update_timeseries_groups( params = { "replace-assigned-ts": replace_assigned_ts, "office": office_id, - "category-id": category_id, } api.patch(endpoint=endpoint, data=data, params=params) @@ -77,23 +73,29 @@ def timeseries_group_df_to_json( JSON dictionary of the timeseries data. """ required_columns = ["officeId", "timeseriesId"] + optional_columns = ["aliasId", "attribute", "tsCode"] for column in required_columns: if column not in data.columns: raise TypeError( f"{column} is a required column in data when posting as a dataframe" ) - if data.isnull().values.any(): - raise ValueError("Null/NaN data must be removed from the dataframe") + if data[required_columns].isnull().any().any(): + raise ValueError( + f"Null/NaN values found in required columns: {required_columns}. " + ) - # Check if 'alias' column exists, if not create it and set to None + # Fill optional columns with default values if missing if "aliasId" not in data.columns: data["aliasId"] = None - - # Check if 'attribute' column exists, if not create it and set to 0 if "attribute" not in data.columns: data["attribute"] = 0 + # Replace NaN with None for optional columns + for column in optional_columns: + if column in data.columns: + data[column] = data[column].where(pd.notnull(data[column]), None) + # Build the list of time-series entries assigned_time_series = data.apply( lambda entry: { diff --git a/tests/timeseries/timeseries_test.py b/tests/timeseries/timeseries_test.py index 6f05b033..580db3a6 100644 --- a/tests/timeseries/timeseries_test.py +++ b/tests/timeseries/timeseries_test.py @@ -29,6 +29,72 @@ def init_session(): cwms.api.init_session(api_root=_MOCK_ROOT) +def test_update_timeseries_groups(requests_mock): + group_id = "USGS TS Data Acquisition" + office_id = "CWMS" + replace_assigned_ts = True + data = _TS_GROUP + + requests_mock.patch( + f"{_MOCK_ROOT}/timeseries/group/USGS%20TS%20Data%20Acquisition?replace-assigned-ts=True&office=CWMS", + status_code=200, + ) + + timeseries.update_timeseries_groups(group_id, office_id, replace_assigned_ts, data) + + assert requests_mock.called + assert requests_mock.call_count == 1 + + +def test_timeseries_group_df_to_json_valid_data(): + data = pd.DataFrame( + { + "officeId": ["office123", "office456"], + "timeseriesId": ["ts1", "ts2"], + "aliasId": [None, "alias2"], + "attribute": [0, 10], + "tsCode": ["code1", None], + } + ) + + # Clean DataFrame by removing NaN from required columns and fix optional ones + required_columns = ["officeId", "timeseriesId"] + data = data.dropna(subset=required_columns) + optional_columns = ["aliasId", "tsCode"] + for col in optional_columns: + if col in data.columns: + data[col] = data[col].where(pd.notnull(data[col]), None) + + expected_json = { + "office-id": "office123", + "id": "group123", + "time-series-category": { + "office-id": "office123", + "id": "cat123", + }, + "assigned-time-series": [ + { + "office-id": "office123", + "timeseries-id": "ts1", + "alias-id": None, + "attribute": 0, + "tsCode": "code1", + }, + { + "office-id": "office456", + "timeseries-id": "ts2", + "alias-id": "alias2", + "attribute": 10, + }, + ], + } + + result = timeseries.timeseries_group_df_to_json( + data, "group123", "office123", "cat123" + ) + assert result == expected_json + + def test_timeseries_df_to_json(): test_json = { "name": "TestLoc.Stage.Inst.1Hour.0.Testing",