From 61243ab58e5ab32712dc1563b5af3a958f15e79a Mon Sep 17 00:00:00 2001 From: Treyson Le Date: Thu, 5 Dec 2024 18:00:58 -0700 Subject: [PATCH 1/7] Adding Critscript and Timeseries functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Added critscript to Init • Added Update_timeseries_groups • Added timeseries_group_df_to_json --- cwms/__init__.py | 1 + cwms/timeseries/critscript.py | 111 +++++++++++++++++++++++++++++++++ cwms/timeseries/timeseries.py | 113 ++++++++++++++++++++++++++++++++++ 3 files changed, 225 insertions(+) create mode 100644 cwms/timeseries/critscript.py diff --git a/cwms/__init__.py b/cwms/__init__.py index 12870b40..d6a65a57 100644 --- a/cwms/__init__.py +++ b/cwms/__init__.py @@ -17,6 +17,7 @@ from cwms.ratings.ratings_spec import * from cwms.ratings.ratings_template import * from cwms.standard_text.standard_text import * +from cwms.timeseries.critscript import * from cwms.timeseries.timerseries_identifier import * from cwms.timeseries.timeseries import * from cwms.timeseries.timeseries_bin import * diff --git a/cwms/timeseries/critscript.py b/cwms/timeseries/critscript.py new file mode 100644 index 00000000..16ddfb95 --- /dev/null +++ b/cwms/timeseries/critscript.py @@ -0,0 +1,111 @@ +import re + +import pandas as pd + +import cwms + + +def crit_script( + file_path, + office_id, + group_id="SHEF Data Acquisition", + category_id="Data Aquisition", + group_office_id="CWMS", +): + def parse_crit_file(file_path): + """ + Parses a .crit file into a dictionary containing timeseries ID and Alias. + + Parameters + ---------- + file_path : str + Path to the .crit file. + office_id : str + The ID of the office associated with the specified timeseries. + group_id : str + The specified group associated with the timeseries data. Defaults to "SHEF Data Acquisition". + category_id: string + The category id that contains the timeseries group. Defaults to "Data Acquisition". + group_office_id : str + The specified office group associated with the timeseries data. Defaults to "CWMS". + Returns + ------- + list of dict + A list of dictionaries containing the parsed key-value pairs, each with Alias and Timeseries ID. + """ + parsed_data = [] + with open(file_path, "r") as file: + for line in file: + # Ignore comment lines and empty lines + if line.startswith("#") or not line.strip(): + continue + + # Extract alias, timeseries ID, and TZ + match = re.match(r"([^=]+)=([^;]+);(.+)", line.strip()) + if match: + alias = match.group(1).strip() + timeseries_id = match.group(2).strip() + alias2 = match.group(3).strip() + + parsed_data.append( + { + "Alias": alias + ":" + alias2, + "Timeseries ID": timeseries_id, + } + ) + + return parsed_data + + def append_df(df: pd.DataFrame, office_id: str, tsId: str, alias: str): + """ + Appends a row to the DataFrame. + + Parameters + ---------- + df : pandas.DataFrame + The DataFrame to append to. + office_id : str + The ID of the office associated with the specified timeseries. + tsId : str + The timeseries ID from the file. + alias : str + The alias from the file. + Returns + ------- + pandas.DataFrame + The updated DataFrame. + """ + data = { + "officeId": [office_id], + "timeseriesId": [tsId], + "aliasId": [alias], + "tsCode": ["none"], # Default value for ts-code + "attribute": [0], # Default value for attribute + } + df = pd.concat([df, pd.DataFrame(data)]) + return df + + # Parse the file and get the parsed data + parsed_data = parse_crit_file(file_path) + + df = pd.DataFrame() + + for data in parsed_data: + # Create DataFrame for the current row + df = append_df(df, office_id, data["Timeseries ID"], data["Alias"]) + + # Generate JSON dictionary + json_dict = cwms.timeseries_group_df_to_json( + df, group_id, group_office_id, category_id + ) + + # Print DataFrame for verification + pd.set_option("display.max_columns", None) + + cwms.update_timeseries_groups( + group_id=group_id, + office_id=office_id, + category_id=category_id, + replace_assigned_ts=None, + data=json_dict, + ) diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py index f58deb62..c5e37c3a 100644 --- a/cwms/timeseries/timeseries.py +++ b/cwms/timeseries/timeseries.py @@ -9,6 +9,119 @@ from cwms.cwms_types import JSON, Data +def update_timeseries_groups( + group_id: str, + office_id: str, + category_id: str, + replace_assigned_ts: Optional[bool], + data: JSON, +) -> None: + """ + Updates the timeseries groups with the provided group ID and office ID. + + Parameters + ---------- + group_id : str + The new specified timeseries ID that will replace the old ID. + office_id : str + The ID of the office associated with the specified timeseries. + category_id: string + The category id that contains the timeseries group. + replace_assigned_ts : bool, optional + Specifies whether to unassign all existing time series before assigning new time series specified in the content body. Default is False. + data: JSON dictionary + Time Series data to be stored. + + Returns + ------- + None + """ + if not group_id: + raise ValueError("Cannot update a specified level without an id") + if not office_id: + raise ValueError("Cannot update a specified level without an office id") + + endpoint = f"timeseries/group/{group_id}" + params = { + "replace-assigned-ts": replace_assigned_ts, + "office": office_id, + "category-id": category_id, + } + + api.patch(endpoint=endpoint, data=data, params=params) + + +def timeseries_group_df_to_json( + data: pd.DataFrame, + group_id: str, + office_id: str, + category_id: str, +) -> JSON: + """ + Converts a dataframe to a json dictionary in the correct format. + + Parameters + ---------- + data: pd.DataFrame + Dataframe containing timeseries information. + group_id: str + The group ID for the timeseries. + office_id: str + The ID of the office associated with the specified timeseries. + category_id: str + The ID of the category associated with the group + + Returns + ------- + JSON + JSON dictionary of the timeseries data. + """ + required_columns = ["officeId", "timeseriesId"] + for column in required_columns: + if column not in data.columns: + raise TypeError( + f"{column} is a required column in data when posting as a dataframe" + ) + + if data.isnull().values.any(): + raise ValueError("Null/NaN data must be removed from the dataframe") + + # Check if 'alias' column exists, if not create it and set to None + if "aliasId" not in data.columns: + data["aliasId"] = None + + # Check if 'attribute' column exists, if not create it and set to 0 + if "attribute" not in data.columns: + data["attribute"] = 0 + + json_dict = { + "office-id": office_id, + "id": group_id, + "time-series-category": {"office-id": office_id, "id": category_id}, + "assigned-time-series": [], + } + + # Convert DataFrame to a list of dictionaries with each row becoming a dict + entries = data.to_dict(orient="records") + + # Iterate through each record and add it to the JSON dictionary + for entry in entries: + ts_dict = { + "office-id": entry["officeId"], + "timeseries-id": entry["timeseriesId"], + "alias-id": entry["aliasId"], + "attribute": entry["attribute"], + } + + # Only include 'ts-code' if it exists and is not NaN + if entry.get("tsCode") and pd.notna(entry["tsCode"]): + ts_dict["tsCode"] = entry["tsCode"] + + json_dict["assigned-time-series"].append(ts_dict) + + return json_dict + + def get_timeseries_group(group_id: str, category_id: str, office_id: str) -> Data: """Retreives time series stored in the requested time series group From 6ea25e7db88365f8c14faa46fa8f6afca2466c06 Mon Sep 17 00:00:00 2001 From: Treyson Le Date: Thu, 5 Dec 2024 18:20:05 -0700 Subject: [PATCH 2/7] Updated to fix some poetry test. Still fails: poetry run mypy --strict cwms/ --- cwms/timeseries/critscript.py | 53 +++++++++++++++++++++++------------ cwms/timeseries/timeseries.py | 8 +++--- 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/cwms/timeseries/critscript.py b/cwms/timeseries/critscript.py index 16ddfb95..5338dbaa 100644 --- a/cwms/timeseries/critscript.py +++ b/cwms/timeseries/critscript.py @@ -1,4 +1,5 @@ import re +from typing import Dict, List import pandas as pd @@ -6,13 +7,34 @@ def crit_script( - file_path, - office_id, - group_id="SHEF Data Acquisition", - category_id="Data Aquisition", - group_office_id="CWMS", -): - def parse_crit_file(file_path): + file_path: str, + office_id: str, + group_id: str = "SHEF Data Acquisition", + category_id: str = "Data Aquisition", + group_office_id: str = "CWMS", +) -> None: + """ + Processes a .crit file, updates the timeseries groups, and generates a JSON dictionary. + + Parameters + ---------- + file_path : str + Path to the .crit file. + office_id : str + The ID of the office associated with the specified timeseries. + group_id : str, optional + The specified group associated with the timeseries data. Defaults to "SHEF Data Acquisition". + category_id : str, optional + The category ID that contains the timeseries group. Defaults to "Data Acquisition". + group_office_id : str, optional + The specified office group associated with the timeseries data. Defaults to "CWMS". + + Returns + ------- + None + """ + + def parse_crit_file(file_path: str) -> List[Dict[str, str]]: """ Parses a .crit file into a dictionary containing timeseries ID and Alias. @@ -20,18 +42,11 @@ def parse_crit_file(file_path): ---------- file_path : str Path to the .crit file. - office_id : str - The ID of the office associated with the specified timeseries. - group_id : str - The specified group associated with the timeseries data. Defaults to "SHEF Data Acquisition". - category_id: string - The category id that contains the timeseries group. Defaults to "Data Acquisition". - group_office_id : str - The specified office group associated with the timeseries data. Defaults to "CWMS". + Returns ------- - list of dict - A list of dictionaries containing the parsed key-value pairs, each with Alias and Timeseries ID. + List[Dict[str, str]] + A list of dictionaries with "Alias" and "Timeseries ID" as keys. """ parsed_data = [] with open(file_path, "r") as file: @@ -56,7 +71,9 @@ def parse_crit_file(file_path): return parsed_data - def append_df(df: pd.DataFrame, office_id: str, tsId: str, alias: str): + def append_df( + df: pd.DataFrame, office_id: str, tsId: str, alias: str + ) -> pd.DataFrame: """ Appends a row to the DataFrame. diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py index c5e37c3a..b8888fba 100644 --- a/cwms/timeseries/timeseries.py +++ b/cwms/timeseries/timeseries.py @@ -113,11 +113,11 @@ def timeseries_group_df_to_json( "attribute": entry["attribute"], } - # Only include 'ts-code' if it exists and is not NaN - if entry.get("tsCode") and pd.notna(entry["tsCode"]): - ts_dict["tsCode"] = entry["tsCode"] + # Only include 'ts-code' if it exists and is not NaN + if entry.get("tsCode") and pd.notna(entry["tsCode"]): + ts_dict["tsCode"] = entry["tsCode"] - json_dict["assigned-time-series"].append(ts_dict) + json_dict["assigned-time-series"].append(ts_dict) return json_dict From 5d68ab4943b99d574e599369bfe85de937fd1bcf Mon Sep 17 00:00:00 2001 From: Treyson Le Date: Fri, 6 Dec 2024 13:51:27 -0700 Subject: [PATCH 3/7] Passes poetry. Need to test --- cwms/timeseries/timeseries.py | 37 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py index b8888fba..71c00b7d 100644 --- a/cwms/timeseries/timeseries.py +++ b/cwms/timeseries/timeseries.py @@ -94,31 +94,30 @@ def timeseries_group_df_to_json( if "attribute" not in data.columns: data["attribute"] = 0 + # Build the list of time-series entries + assigned_time_series = data.apply( + lambda entry: { + "office-id": entry["officeId"], + "timeseries-id": entry["timeseriesId"], + "alias-id": entry["aliasId"], + "attribute": entry["attribute"], + **( + {"tsCode": entry["tsCode"]} + if "tsCode" in entry and pd.notna(entry["tsCode"]) + else {} + ), + }, + axis=1, + ).tolist() + + # Construct the final JSON dictionary json_dict = { "office-id": office_id, "id": group_id, "time-series-category": {"office-id": office_id, "id": category_id}, - "assigned-time-series": [], + "assigned-time-series": assigned_time_series, } - # Convert DataFrame to a list of dictionaries with each row becoming a dict - entries = data.to_dict(orient="records") - - # Iterate through each record and add it to the JSON dictionary - for entry in entries: - ts_dict = { - "office-id": entry["officeId"], - "timeseries-id": entry["timeseriesId"], - "alias-id": entry["aliasId"], - "attribute": entry["attribute"], - } - - # Only include 'ts-code' if it exists and is not NaN - if entry.get("tsCode") and pd.notna(entry["tsCode"]): - ts_dict["tsCode"] = entry["tsCode"] - - json_dict["assigned-time-series"].append(ts_dict) - return json_dict From b043f5e914c189812902cec1129f8e486620e263 Mon Sep 17 00:00:00 2001 From: Treyson Le Date: Fri, 20 Dec 2024 14:09:26 -0800 Subject: [PATCH 4/7] Added test_timeseries_group_df_to_json_valid_data() Passes all poetry tests --- cwms/timeseries/critscript.py | 1 - cwms/timeseries/timeseries.py | 50 +++++++++++----------- tests/timeseries/timeseries_test.py | 66 +++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 25 deletions(-) diff --git a/cwms/timeseries/critscript.py b/cwms/timeseries/critscript.py index 5338dbaa..40751d5f 100644 --- a/cwms/timeseries/critscript.py +++ b/cwms/timeseries/critscript.py @@ -122,7 +122,6 @@ def append_df( cwms.update_timeseries_groups( group_id=group_id, office_id=office_id, - category_id=category_id, replace_assigned_ts=None, data=json_dict, ) diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py index 71c00b7d..64bbc033 100644 --- a/cwms/timeseries/timeseries.py +++ b/cwms/timeseries/timeseries.py @@ -12,29 +12,26 @@ def update_timeseries_groups( group_id: str, office_id: str, - category_id: str, replace_assigned_ts: Optional[bool], data: JSON, ) -> None: """ - Updates the timeseries groups with the provided group ID and office ID. + Updates the timeseries groups with the provided group ID and office ID. - Parameters - ---------- - group_id : str - The new specified timeseries ID that will replace the old ID. - office_id : str - The ID of the office associated with the specified timeseries. - category_id: string - The category id that contains the timeseries group. - replace_assigned_ts : bool, optional - Specifies whether to unassign all existing time series before assigning new time series specified in the content body. Default is False. - data: JSON dictionary - Time Series data to be stored. - - Returns - ------- - None + Parameters + ---------- + group_id : str + The new specified timeseries ID that will replace the old ID. + office_id : str + The ID of the office associated with the specified timeseries. + replace_assigned_ts : bool, optional + Specifies whether to unassign all existing time series before assigning new time series specified in the content body. Default is False. + data: JSON dictionary + Time Series data to be stored. + ```````````````````````````````````````` + Returns + ------- + None """ if not group_id: raise ValueError("Cannot update a specified level without an id") @@ -45,7 +42,6 @@ def update_timeseries_groups( params = { "replace-assigned-ts": replace_assigned_ts, "office": office_id, - "category-id": category_id, } api.patch(endpoint=endpoint, data=data, params=params) @@ -77,23 +73,29 @@ def timeseries_group_df_to_json( JSON dictionary of the timeseries data. """ required_columns = ["officeId", "timeseriesId"] + optional_columns = ["aliasId", "attribute", "tsCode"] for column in required_columns: if column not in data.columns: raise TypeError( f"{column} is a required column in data when posting as a dataframe" ) - if data.isnull().values.any(): - raise ValueError("Null/NaN data must be removed from the dataframe") + if data[required_columns].isnull().any().any(): + raise ValueError( + f"Null/NaN values found in required columns: {required_columns}. " + ) - # Check if 'alias' column exists, if not create it and set to None + # Fill optional columns with default values if missing if "aliasId" not in data.columns: data["aliasId"] = None - - # Check if 'attribute' column exists, if not create it and set to 0 if "attribute" not in data.columns: data["attribute"] = 0 + # Replace NaN with None for optional columns + for column in optional_columns: + if column in data.columns: + data[column] = data[column].where(pd.notnull(data[column]), None) + # Build the list of time-series entries assigned_time_series = data.apply( lambda entry: { diff --git a/tests/timeseries/timeseries_test.py b/tests/timeseries/timeseries_test.py index 6f05b033..580db3a6 100644 --- a/tests/timeseries/timeseries_test.py +++ b/tests/timeseries/timeseries_test.py @@ -29,6 +29,72 @@ def init_session(): cwms.api.init_session(api_root=_MOCK_ROOT) +def test_update_timeseries_groups(requests_mock): + group_id = "USGS TS Data Acquisition" + office_id = "CWMS" + replace_assigned_ts = True + data = _TS_GROUP + + requests_mock.patch( + f"{_MOCK_ROOT}/timeseries/group/USGS%20TS%20Data%20Acquisition?replace-assigned-ts=True&office=CWMS", + status_code=200, + ) + + timeseries.update_timeseries_groups(group_id, office_id, replace_assigned_ts, data) + + assert requests_mock.called + assert requests_mock.call_count == 1 + + +def test_timeseries_group_df_to_json_valid_data(): + data = pd.DataFrame( + { + "officeId": ["office123", "office456"], + "timeseriesId": ["ts1", "ts2"], + "aliasId": [None, "alias2"], + "attribute": [0, 10], + "tsCode": ["code1", None], + } + ) + + # Clean DataFrame by removing NaN from required columns and fix optional ones + required_columns = ["officeId", "timeseriesId"] + data = data.dropna(subset=required_columns) + optional_columns = ["aliasId", "tsCode"] + for col in optional_columns: + if col in data.columns: + data[col] = data[col].where(pd.notnull(data[col]), None) + + expected_json = { + "office-id": "office123", + "id": "group123", + "time-series-category": { + "office-id": "office123", + "id": "cat123", + }, + "assigned-time-series": [ + { + "office-id": "office123", + "timeseries-id": "ts1", + "alias-id": None, + "attribute": 0, + "tsCode": "code1", + }, + { + "office-id": "office456", + "timeseries-id": "ts2", + "alias-id": "alias2", + "attribute": 10, + }, + ], + } + + result = timeseries.timeseries_group_df_to_json( + data, "group123", "office123", "cat123" + ) + assert result == expected_json + + def test_timeseries_df_to_json(): test_json = { "name": "TestLoc.Stage.Inst.1Hour.0.Testing", From 12caa59eb2a472d305b2ae4cd044a90e73973d8a Mon Sep 17 00:00:00 2001 From: Novotny <1533907136121002@mil> Date: Fri, 3 Jan 2025 14:37:27 -0600 Subject: [PATCH 5/7] updates to get crit script to work and reorganization --- cwms/__init__.py | 2 +- .../shef_critfile_import.py} | 60 +++++++++---------- cwms/timeseries/timeseries.py | 39 ++++++------ tests/timeseries/timeseries_test.py | 21 ++++--- 4 files changed, 64 insertions(+), 58 deletions(-) rename cwms/{timeseries/critscript.py => datafile_imports/shef_critfile_import.py} (65%) diff --git a/cwms/__init__.py b/cwms/__init__.py index d6a65a57..e2d9f159 100644 --- a/cwms/__init__.py +++ b/cwms/__init__.py @@ -2,6 +2,7 @@ from cwms.api import * from cwms.catalog.catalog import * +from cwms.datafile_imports.shef_critfile_import import * from cwms.forecast.forecast_instance import * from cwms.forecast.forecast_spec import * from cwms.levels.location_levels import * @@ -17,7 +18,6 @@ from cwms.ratings.ratings_spec import * from cwms.ratings.ratings_template import * from cwms.standard_text.standard_text import * -from cwms.timeseries.critscript import * from cwms.timeseries.timerseries_identifier import * from cwms.timeseries.timeseries import * from cwms.timeseries.timeseries_bin import * diff --git a/cwms/timeseries/critscript.py b/cwms/datafile_imports/shef_critfile_import.py similarity index 65% rename from cwms/timeseries/critscript.py rename to cwms/datafile_imports/shef_critfile_import.py index 40751d5f..92b55773 100644 --- a/cwms/timeseries/critscript.py +++ b/cwms/datafile_imports/shef_critfile_import.py @@ -1,20 +1,25 @@ +import json import re from typing import Dict, List import pandas as pd -import cwms +from cwms.timeseries.timeseries import ( + timeseries_group_df_to_json, + update_timeseries_groups, +) -def crit_script( +def import_critfile_to_ts_group( file_path: str, office_id: str, group_id: str = "SHEF Data Acquisition", - category_id: str = "Data Aquisition", + category_id: str = "Data Acquisition", group_office_id: str = "CWMS", + replace_assigned_ts: bool = False, ) -> None: """ - Processes a .crit file, updates the timeseries groups, and generates a JSON dictionary. + Processes a .crit file and saves the information to the SHEF Data Acquisition time series group. Parameters ---------- @@ -28,6 +33,8 @@ def crit_script( The category ID that contains the timeseries group. Defaults to "Data Acquisition". group_office_id : str, optional The specified office group associated with the timeseries data. Defaults to "CWMS". + replace_assigned_ts : bool, optional + Specifies whether to unassign all existing time series before assigning new time series specified in the content body. Default is False. Returns ------- @@ -55,19 +62,20 @@ def parse_crit_file(file_path: str) -> List[Dict[str, str]]: if line.startswith("#") or not line.strip(): continue - # Extract alias, timeseries ID, and TZ - match = re.match(r"([^=]+)=([^;]+);(.+)", line.strip()) - if match: - alias = match.group(1).strip() - timeseries_id = match.group(2).strip() - alias2 = match.group(3).strip() + # Extract alias, timeseries ID, and TZ + match = re.match(r"([^=]+)=([^;]+);(.+)", line.strip()) - parsed_data.append( - { - "Alias": alias + ":" + alias2, - "Timeseries ID": timeseries_id, - } - ) + if match: + alias = match.group(1).strip() + timeseries_id = match.group(2).strip() + alias2 = match.group(3).strip() + + parsed_data.append( + { + "Alias": alias + ":" + alias2, + "Timeseries ID": timeseries_id, + } + ) return parsed_data @@ -93,11 +101,9 @@ def append_df( The updated DataFrame. """ data = { - "officeId": [office_id], - "timeseriesId": [tsId], - "aliasId": [alias], - "tsCode": ["none"], # Default value for ts-code - "attribute": [0], # Default value for attribute + "office-id": [office_id], + "timeseries-id": [tsId], + "alias-id": [alias], } df = pd.concat([df, pd.DataFrame(data)]) return df @@ -106,22 +112,16 @@ def append_df( parsed_data = parse_crit_file(file_path) df = pd.DataFrame() - for data in parsed_data: # Create DataFrame for the current row df = append_df(df, office_id, data["Timeseries ID"], data["Alias"]) # Generate JSON dictionary - json_dict = cwms.timeseries_group_df_to_json( - df, group_id, group_office_id, category_id - ) - - # Print DataFrame for verification - pd.set_option("display.max_columns", None) + json_dict = timeseries_group_df_to_json(df, group_id, group_office_id, category_id) - cwms.update_timeseries_groups( + update_timeseries_groups( group_id=group_id, office_id=office_id, - replace_assigned_ts=None, + replace_assigned_ts=replace_assigned_ts, data=json_dict, ) diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py index 64bbc033..e82a21f9 100644 --- a/cwms/timeseries/timeseries.py +++ b/cwms/timeseries/timeseries.py @@ -10,10 +10,10 @@ def update_timeseries_groups( + data: JSON, group_id: str, office_id: str, - replace_assigned_ts: Optional[bool], - data: JSON, + replace_assigned_ts: Optional[bool] = False, ) -> None: """ Updates the timeseries groups with the provided group ID and office ID. @@ -44,7 +44,7 @@ def update_timeseries_groups( "office": office_id, } - api.patch(endpoint=endpoint, data=data, params=params) + api.patch(endpoint=endpoint, data=data, params=params, api_version=1) def timeseries_group_df_to_json( @@ -72,40 +72,41 @@ def timeseries_group_df_to_json( JSON JSON dictionary of the timeseries data. """ - required_columns = ["officeId", "timeseriesId"] - optional_columns = ["aliasId", "attribute", "tsCode"] + df = data.copy() + required_columns = ["office-id", "timeseries-id"] + optional_columns = ["alias-id", "attribute", "ts-code"] for column in required_columns: - if column not in data.columns: + if column not in df.columns: raise TypeError( f"{column} is a required column in data when posting as a dataframe" ) - if data[required_columns].isnull().any().any(): + if df[required_columns].isnull().any().any(): raise ValueError( f"Null/NaN values found in required columns: {required_columns}. " ) # Fill optional columns with default values if missing - if "aliasId" not in data.columns: - data["aliasId"] = None - if "attribute" not in data.columns: - data["attribute"] = 0 + if "alias-id" not in df.columns: + df["alias-id"] = None + if "attribute" not in df.columns: + df["attribute"] = 0 # Replace NaN with None for optional columns for column in optional_columns: - if column in data.columns: - data[column] = data[column].where(pd.notnull(data[column]), None) + if column in df.columns: + data[column] = df[column].where(pd.notnull(df[column]), None) # Build the list of time-series entries - assigned_time_series = data.apply( + assigned_time_series = df.apply( lambda entry: { - "office-id": entry["officeId"], - "timeseries-id": entry["timeseriesId"], - "alias-id": entry["aliasId"], + "office-id": entry["office-id"], + "timeseries-id": entry["timeseries-id"], + "alias-id": entry["alias-id"], "attribute": entry["attribute"], **( - {"tsCode": entry["tsCode"]} - if "tsCode" in entry and pd.notna(entry["tsCode"]) + {"ts-code": entry["ts-code"]} + if "ts-code" in entry and pd.notna(entry["ts-code"]) else {} ), }, diff --git a/tests/timeseries/timeseries_test.py b/tests/timeseries/timeseries_test.py index 580db3a6..2ca21f76 100644 --- a/tests/timeseries/timeseries_test.py +++ b/tests/timeseries/timeseries_test.py @@ -40,7 +40,12 @@ def test_update_timeseries_groups(requests_mock): status_code=200, ) - timeseries.update_timeseries_groups(group_id, office_id, replace_assigned_ts, data) + timeseries.update_timeseries_groups( + data=data, + group_id=group_id, + office_id=office_id, + replace_assigned_ts=replace_assigned_ts, + ) assert requests_mock.called assert requests_mock.call_count == 1 @@ -49,18 +54,18 @@ def test_update_timeseries_groups(requests_mock): def test_timeseries_group_df_to_json_valid_data(): data = pd.DataFrame( { - "officeId": ["office123", "office456"], - "timeseriesId": ["ts1", "ts2"], - "aliasId": [None, "alias2"], + "office-id": ["office123", "office456"], + "timeseries-id": ["ts1", "ts2"], + "alias-id": [None, "alias2"], "attribute": [0, 10], - "tsCode": ["code1", None], + "ts-code": ["code1", None], } ) # Clean DataFrame by removing NaN from required columns and fix optional ones - required_columns = ["officeId", "timeseriesId"] + required_columns = ["office-id", "timeseries-id"] data = data.dropna(subset=required_columns) - optional_columns = ["aliasId", "tsCode"] + optional_columns = ["alias-id", "ts-code"] for col in optional_columns: if col in data.columns: data[col] = data[col].where(pd.notnull(data[col]), None) @@ -78,7 +83,7 @@ def test_timeseries_group_df_to_json_valid_data(): "timeseries-id": "ts1", "alias-id": None, "attribute": 0, - "tsCode": "code1", + "ts-code": "code1", }, { "office-id": "office456", From c86c77b16a8615aa6423a3c8acd3685046f32eea Mon Sep 17 00:00:00 2001 From: Novotny <1533907136121002@mil> Date: Fri, 3 Jan 2025 14:43:30 -0600 Subject: [PATCH 6/7] update for sonarQube check --- cwms/datafile_imports/shef_critfile_import.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cwms/datafile_imports/shef_critfile_import.py b/cwms/datafile_imports/shef_critfile_import.py index 92b55773..d8862bb3 100644 --- a/cwms/datafile_imports/shef_critfile_import.py +++ b/cwms/datafile_imports/shef_critfile_import.py @@ -80,7 +80,7 @@ def parse_crit_file(file_path: str) -> List[Dict[str, str]]: return parsed_data def append_df( - df: pd.DataFrame, office_id: str, tsId: str, alias: str + df: pd.DataFrame, office_id: str, ts_id: str, alias: str ) -> pd.DataFrame: """ Appends a row to the DataFrame. @@ -102,7 +102,7 @@ def append_df( """ data = { "office-id": [office_id], - "timeseries-id": [tsId], + "timeseries-id": [ts_id], "alias-id": [alias], } df = pd.concat([df, pd.DataFrame(data)]) From c0f29aa0d4cf132a52c5fc2a460e806c1350bdad Mon Sep 17 00:00:00 2001 From: Novotny <1533907136121002@mil> Date: Fri, 3 Jan 2025 14:46:48 -0600 Subject: [PATCH 7/7] remove json package --- cwms/datafile_imports/shef_critfile_import.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cwms/datafile_imports/shef_critfile_import.py b/cwms/datafile_imports/shef_critfile_import.py index d8862bb3..b0b5593c 100644 --- a/cwms/datafile_imports/shef_critfile_import.py +++ b/cwms/datafile_imports/shef_critfile_import.py @@ -1,4 +1,3 @@ -import json import re from typing import Dict, List