From 61243ab58e5ab32712dc1563b5af3a958f15e79a Mon Sep 17 00:00:00 2001
From: Treyson Le <treysonle11@gmail.com>
Date: Thu, 5 Dec 2024 18:00:58 -0700
Subject: [PATCH 1/4] Adding Critscript and Timeseries functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

• Added critscript to Init
• Added Update_timeseries_groups
• Added timeseries_group_df_to_json
---
 cwms/__init__.py              |   1 +
 cwms/timeseries/critscript.py | 111 +++++++++++++++++++++++++++++++++
 cwms/timeseries/timeseries.py | 113 ++++++++++++++++++++++++++++++++++
 3 files changed, 225 insertions(+)
 create mode 100644 cwms/timeseries/critscript.py

diff --git a/cwms/__init__.py b/cwms/__init__.py
index 12870b40..d6a65a57 100644
--- a/cwms/__init__.py
+++ b/cwms/__init__.py
@@ -17,6 +17,7 @@
 from cwms.ratings.ratings_spec import *
 from cwms.ratings.ratings_template import *
 from cwms.standard_text.standard_text import *
+from cwms.timeseries.critscript import *
 from cwms.timeseries.timerseries_identifier import *
 from cwms.timeseries.timeseries import *
 from cwms.timeseries.timeseries_bin import *
diff --git a/cwms/timeseries/critscript.py b/cwms/timeseries/critscript.py
new file mode 100644
index 00000000..16ddfb95
--- /dev/null
+++ b/cwms/timeseries/critscript.py
@@ -0,0 +1,111 @@
+import re
+
+import pandas as pd
+
+import cwms
+
+
+def crit_script(
+    file_path,
+    office_id,
+    group_id="SHEF Data Acquisition",
+    category_id="Data Aquisition",
+    group_office_id="CWMS",
+):
+    def parse_crit_file(file_path):
+        """
+        Parses a .crit file into a dictionary containing timeseries ID and Alias.
+
+        Parameters
+        ----------
+            file_path : str
+                   Path to the .crit file.
+            office_id : str
+                The ID of the office associated with the specified timeseries.
+            group_id : str
+                The specified group associated with the timeseries data. Defaults to "SHEF Data Acquisition".
+            category_id: string
+                The category id that contains the timeseries group. Defaults to "Data Acquisition".
+            group_office_id : str
+                The specified office group associated with the timeseries data. Defaults to "CWMS".
+        Returns
+        -------
+        list of dict
+            A list of dictionaries containing the parsed key-value pairs, each with Alias and Timeseries ID.
+        """
+        parsed_data = []
+        with open(file_path, "r") as file:
+            for line in file:
+                # Ignore comment lines and empty lines
+                if line.startswith("#") or not line.strip():
+                    continue
+
+            # Extract alias, timeseries ID, and TZ
+            match = re.match(r"([^=]+)=([^;]+);(.+)", line.strip())
+            if match:
+                alias = match.group(1).strip()
+                timeseries_id = match.group(2).strip()
+                alias2 = match.group(3).strip()
+
+                parsed_data.append(
+                    {
+                        "Alias": alias + ":" + alias2,
+                        "Timeseries ID": timeseries_id,
+                    }
+                )
+
+        return parsed_data
+
+    def append_df(df: pd.DataFrame, office_id: str, tsId: str, alias: str):
+        """
+        Appends a row to the DataFrame.
+
+        Parameters
+        ----------
+            df : pandas.DataFrame
+                The DataFrame to append to.
+            office_id : str
+                The ID of the office associated with the specified timeseries.
+            tsId : str
+                The timeseries ID from the file.
+            alias : str
+                The alias from the file.
+        Returns
+        -------
+        pandas.DataFrame
+            The updated DataFrame.
+        """
+        data = {
+            "officeId": [office_id],
+            "timeseriesId": [tsId],
+            "aliasId": [alias],
+            "tsCode": ["none"],  # Default value for ts-code
+            "attribute": [0],  # Default value for attribute
+        }
+        df = pd.concat([df, pd.DataFrame(data)])
+        return df
+
+    # Parse the file and get the parsed data
+    parsed_data = parse_crit_file(file_path)
+
+    df = pd.DataFrame()
+
+    for data in parsed_data:
+        # Create DataFrame for the current row
+        df = append_df(df, office_id, data["Timeseries ID"], data["Alias"])
+
+    # Generate JSON dictionary
+    json_dict = cwms.timeseries_group_df_to_json(
+        df, group_id, group_office_id, category_id
+    )
+
+    # Print DataFrame for verification
+    pd.set_option("display.max_columns", None)
+
+    cwms.update_timeseries_groups(
+        group_id=group_id,
+        office_id=office_id,
+        category_id=category_id,
+        replace_assigned_ts=None,
+        data=json_dict,
+    )
diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py
index f58deb62..c5e37c3a 100644
--- a/cwms/timeseries/timeseries.py
+++ b/cwms/timeseries/timeseries.py
@@ -9,6 +9,119 @@
 from cwms.cwms_types import JSON, Data
 
 
+def update_timeseries_groups(
+    group_id: str,
+    office_id: str,
+    category_id: str,
+    replace_assigned_ts: Optional[bool],
+    data: JSON,
+) -> None:
+    """
+    Updates the timeseries groups with the provided group ID and office ID.
+
+    Parameters
+    ----------
+        group_id : str
+            The new specified timeseries ID that will replace the old ID.
+        office_id : str
+            The ID of the office associated with the specified timeseries.
+        category_id: string
+            The category id that contains the timeseries group.
+        replace_assigned_ts : bool, optional
+            Specifies whether to unassign all existing time series before assigning new time series specified in the content body. Default is False.
+        data: JSON dictionary
+            Time Series data to be stored.
+
+    Returns
+    -------
+    None
+    """
+    if not group_id:
+        raise ValueError("Cannot update a specified level without an id")
+    if not office_id:
+        raise ValueError("Cannot update a specified level without an office id")
+
+    endpoint = f"timeseries/group/{group_id}"
+    params = {
+        "replace-assigned-ts": replace_assigned_ts,
+        "office": office_id,
+        "category-id": category_id,
+    }
+
+    api.patch(endpoint=endpoint, data=data, params=params)
+
+
+def timeseries_group_df_to_json(
+    data: pd.DataFrame,
+    group_id: str,
+    office_id: str,
+    category_id: str,
+) -> JSON:
+    """
+    Converts a dataframe to a json dictionary in the correct format.
+
+    Parameters
+    ----------
+        data: pd.DataFrame
+            Dataframe containing timeseries information.
+        group_id: str
+            The group ID for the timeseries.
+        office_id: str
+            The ID of the office associated with the specified timeseries.
+        category_id: str
+            The ID of the category associated with the group
+
+    Returns
+    -------
+    JSON
+        JSON dictionary of the timeseries data.
+    """
+    required_columns = ["officeId", "timeseriesId"]
+    for column in required_columns:
+        if column not in data.columns:
+            raise TypeError(
+                f"{column} is a required column in data when posting as a dataframe"
+            )
+
+    if data.isnull().values.any():
+        raise ValueError("Null/NaN data must be removed from the dataframe")
+
+    # Check if 'alias' column exists, if not create it and set to None
+    if "aliasId" not in data.columns:
+        data["aliasId"] = None
+
+    # Check if 'attribute' column exists, if not create it and set to 0
+    if "attribute" not in data.columns:
+        data["attribute"] = 0
+
+    json_dict = {
+        "office-id": office_id,
+        "id": group_id,
+        "time-series-category": {"office-id": office_id, "id": category_id},
+        "assigned-time-series": [],
+    }
+
+    # Convert DataFrame to a list of dictionaries with each row becoming a dict
+    entries = data.to_dict(orient="records")
+
+    # Iterate through each record and add it to the JSON dictionary
+    for entry in entries:
+        ts_dict = {
+            "office-id": entry["officeId"],
+            "timeseries-id": entry["timeseriesId"],
+            "alias-id": entry["aliasId"],
+            "attribute": entry["attribute"],
+        }
+
+    # Only include 'ts-code' if it exists and is not NaN
+    if entry.get("tsCode") and pd.notna(entry["tsCode"]):
+        ts_dict["tsCode"] = entry["tsCode"]
+
+    json_dict["assigned-time-series"].append(ts_dict)
+
+    return json_dict
+
+
 def get_timeseries_group(group_id: str, category_id: str, office_id: str) -> Data:
     """Retreives time series stored in the requested time series group
 

From 6ea25e7db88365f8c14faa46fa8f6afca2466c06 Mon Sep 17 00:00:00 2001
From: Treyson Le <treysonle11@gmail.com>
Date: Thu, 5 Dec 2024 18:20:05 -0700
Subject: [PATCH 2/4] Updated to fix some poetry test.

Still fails:
poetry run mypy --strict cwms/
---
 cwms/timeseries/critscript.py | 53 +++++++++++++++++++++++------------
 cwms/timeseries/timeseries.py |  8 +++---
 2 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/cwms/timeseries/critscript.py b/cwms/timeseries/critscript.py
index 16ddfb95..5338dbaa 100644
--- a/cwms/timeseries/critscript.py
+++ b/cwms/timeseries/critscript.py
@@ -1,4 +1,5 @@
 import re
+from typing import Dict, List
 
 import pandas as pd
 
@@ -6,13 +7,34 @@
 
 
 def crit_script(
-    file_path,
-    office_id,
-    group_id="SHEF Data Acquisition",
-    category_id="Data Aquisition",
-    group_office_id="CWMS",
-):
-    def parse_crit_file(file_path):
+    file_path: str,
+    office_id: str,
+    group_id: str = "SHEF Data Acquisition",
+    category_id: str = "Data Aquisition",
+    group_office_id: str = "CWMS",
+) -> None:
+    """
+    Processes a .crit file, updates the timeseries groups, and generates a JSON dictionary.
+
+    Parameters
+    ----------
+    file_path : str
+        Path to the .crit file.
+    office_id : str
+        The ID of the office associated with the specified timeseries.
+    group_id : str, optional
+        The specified group associated with the timeseries data. Defaults to "SHEF Data Acquisition".
+    category_id : str, optional
+        The category ID that contains the timeseries group. Defaults to "Data Acquisition".
+    group_office_id : str, optional
+        The specified office group associated with the timeseries data. Defaults to "CWMS".
+
+    Returns
+    -------
+    None
+    """
+
+    def parse_crit_file(file_path: str) -> List[Dict[str, str]]:
         """
         Parses a .crit file into a dictionary containing timeseries ID and Alias.
 
@@ -20,18 +42,11 @@ def parse_crit_file(file_path):
         ----------
             file_path : str
                    Path to the .crit file.
-            office_id : str
-                The ID of the office associated with the specified timeseries.
-            group_id : str
-                The specified group associated with the timeseries data. Defaults to "SHEF Data Acquisition".
-            category_id: string
-                The category id that contains the timeseries group. Defaults to "Data Acquisition".
-            group_office_id : str
-                The specified office group associated with the timeseries data. Defaults to "CWMS".
+
         Returns
         -------
-        list of dict
-            A list of dictionaries containing the parsed key-value pairs, each with Alias and Timeseries ID.
+        List[Dict[str, str]]
+            A list of dictionaries with "Alias" and "Timeseries ID" as keys.
         """
         parsed_data = []
         with open(file_path, "r") as file:
@@ -56,7 +71,9 @@ def parse_crit_file(file_path):
 
         return parsed_data
 
-    def append_df(df: pd.DataFrame, office_id: str, tsId: str, alias: str):
+    def append_df(
+        df: pd.DataFrame, office_id: str, tsId: str, alias: str
+    ) -> pd.DataFrame:
         """
         Appends a row to the DataFrame.
 
diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py
index c5e37c3a..b8888fba 100644
--- a/cwms/timeseries/timeseries.py
+++ b/cwms/timeseries/timeseries.py
@@ -113,11 +113,11 @@ def timeseries_group_df_to_json(
             "attribute": entry["attribute"],
         }
 
-    # Only include 'ts-code' if it exists and is not NaN
-    if entry.get("tsCode") and pd.notna(entry["tsCode"]):
-        ts_dict["tsCode"] = entry["tsCode"]
+        # Only include 'ts-code' if it exists and is not NaN
+        if entry.get("tsCode") and pd.notna(entry["tsCode"]):
+            ts_dict["tsCode"] = entry["tsCode"]
 
-    json_dict["assigned-time-series"].append(ts_dict)
+        json_dict["assigned-time-series"].append(ts_dict)
 
     return json_dict
 

From 5d68ab4943b99d574e599369bfe85de937fd1bcf Mon Sep 17 00:00:00 2001
From: Treyson Le <treysonle11@gmail.com>
Date: Fri, 6 Dec 2024 13:51:27 -0700
Subject: [PATCH 3/4] Passes poetry. Need to test

---
 cwms/timeseries/timeseries.py | 37 +++++++++++++++++------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py
index b8888fba..71c00b7d 100644
--- a/cwms/timeseries/timeseries.py
+++ b/cwms/timeseries/timeseries.py
@@ -94,31 +94,30 @@ def timeseries_group_df_to_json(
     if "attribute" not in data.columns:
         data["attribute"] = 0
 
+    # Build the list of time-series entries
+    assigned_time_series = data.apply(
+        lambda entry: {
+            "office-id": entry["officeId"],
+            "timeseries-id": entry["timeseriesId"],
+            "alias-id": entry["aliasId"],
+            "attribute": entry["attribute"],
+            **(
+                {"tsCode": entry["tsCode"]}
+                if "tsCode" in entry and pd.notna(entry["tsCode"])
+                else {}
+            ),
+        },
+        axis=1,
+    ).tolist()
+
+    # Construct the final JSON dictionary
     json_dict = {
         "office-id": office_id,
         "id": group_id,
         "time-series-category": {"office-id": office_id, "id": category_id},
-        "assigned-time-series": [],
+        "assigned-time-series": assigned_time_series,
     }
 
-    # Convert DataFrame to a list of dictionaries with each row becoming a dict
-    entries = data.to_dict(orient="records")
-
-    # Iterate through each record and add it to the JSON dictionary
-    for entry in entries:
-        ts_dict = {
-            "office-id": entry["officeId"],
-            "timeseries-id": entry["timeseriesId"],
-            "alias-id": entry["aliasId"],
-            "attribute": entry["attribute"],
-        }
-
-        # Only include 'ts-code' if it exists and is not NaN
-        if entry.get("tsCode") and pd.notna(entry["tsCode"]):
-            ts_dict["tsCode"] = entry["tsCode"]
-
-        json_dict["assigned-time-series"].append(ts_dict)
-
     return json_dict
 
 

From b043f5e914c189812902cec1129f8e486620e263 Mon Sep 17 00:00:00 2001
From: Treyson Le <treysonle11@gmail.com>
Date: Fri, 20 Dec 2024 14:09:26 -0800
Subject: [PATCH 4/4] Added test_timeseries_group_df_to_json_valid_data()

Passes all poetry tests
---
 cwms/timeseries/critscript.py       |  1 -
 cwms/timeseries/timeseries.py       | 50 +++++++++++-----------
 tests/timeseries/timeseries_test.py | 66 +++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 25 deletions(-)

diff --git a/cwms/timeseries/critscript.py b/cwms/timeseries/critscript.py
index 5338dbaa..40751d5f 100644
--- a/cwms/timeseries/critscript.py
+++ b/cwms/timeseries/critscript.py
@@ -122,7 +122,6 @@ def append_df(
     cwms.update_timeseries_groups(
         group_id=group_id,
         office_id=office_id,
-        category_id=category_id,
         replace_assigned_ts=None,
         data=json_dict,
     )
diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py
index 71c00b7d..64bbc033 100644
--- a/cwms/timeseries/timeseries.py
+++ b/cwms/timeseries/timeseries.py
@@ -12,29 +12,26 @@
 def update_timeseries_groups(
     group_id: str,
     office_id: str,
-    category_id: str,
     replace_assigned_ts: Optional[bool],
     data: JSON,
 ) -> None:
     """
-    Updates the timeseries groups with the provided group ID and office ID.
+        Updates the timeseries groups with the provided group ID and office ID.
 
-    Parameters
-    ----------
-        group_id : str
-            The new specified timeseries ID that will replace the old ID.
-        office_id : str
-            The ID of the office associated with the specified timeseries.
-        category_id: string
-            The category id that contains the timeseries group.
-        replace_assigned_ts : bool, optional
-            Specifies whether to unassign all existing time series before assigning new time series specified in the content body. Default is False.
-        data: JSON dictionary
-            Time Series data to be stored.
-
-    Returns
-    -------
-    None
+        Parameters
+        ----------
+            group_id : str
+                The new specified timeseries ID that will replace the old ID.
+            office_id : str
+                The ID of the office associated with the specified timeseries.
+            replace_assigned_ts : bool, optional
+                Specifies whether to unassign all existing time series before assigning new time series specified in the content body. Default is False.
+            data: JSON dictionary
+                Time Series data to be stored.
+    ````````````````````````````````````````
+        Returns
+        -------
+        None
     """
     if not group_id:
         raise ValueError("Cannot update a specified level without an id")
@@ -45,7 +42,6 @@ def update_timeseries_groups(
     params = {
         "replace-assigned-ts": replace_assigned_ts,
         "office": office_id,
-        "category-id": category_id,
     }
 
     api.patch(endpoint=endpoint, data=data, params=params)
@@ -77,23 +73,29 @@ def timeseries_group_df_to_json(
         JSON dictionary of the timeseries data.
     """
     required_columns = ["officeId", "timeseriesId"]
+    optional_columns = ["aliasId", "attribute", "tsCode"]
     for column in required_columns:
         if column not in data.columns:
             raise TypeError(
                 f"{column} is a required column in data when posting as a dataframe"
             )
 
-    if data.isnull().values.any():
-        raise ValueError("Null/NaN data must be removed from the dataframe")
+    if data[required_columns].isnull().any().any():
+        raise ValueError(
+            f"Null/NaN values found in required columns: {required_columns}. "
+        )
 
-    # Check if 'alias' column exists, if not create it and set to None
+    # Fill optional columns with default values if missing
     if "aliasId" not in data.columns:
         data["aliasId"] = None
-
-    # Check if 'attribute' column exists, if not create it and set to 0
     if "attribute" not in data.columns:
         data["attribute"] = 0
 
+    # Replace NaN with None for optional columns
+    for column in optional_columns:
+        if column in data.columns:
+            data[column] = data[column].where(pd.notnull(data[column]), None)
+
     # Build the list of time-series entries
     assigned_time_series = data.apply(
         lambda entry: {
diff --git a/tests/timeseries/timeseries_test.py b/tests/timeseries/timeseries_test.py
index 6f05b033..580db3a6 100644
--- a/tests/timeseries/timeseries_test.py
+++ b/tests/timeseries/timeseries_test.py
@@ -29,6 +29,72 @@ def init_session():
     cwms.api.init_session(api_root=_MOCK_ROOT)
 
 
+def test_update_timeseries_groups(requests_mock):
+    group_id = "USGS TS Data Acquisition"
+    office_id = "CWMS"
+    replace_assigned_ts = True
+    data = _TS_GROUP
+
+    requests_mock.patch(
+        f"{_MOCK_ROOT}/timeseries/group/USGS%20TS%20Data%20Acquisition?replace-assigned-ts=True&office=CWMS",
+        status_code=200,
+    )
+
+    timeseries.update_timeseries_groups(group_id, office_id, replace_assigned_ts, data)
+
+    assert requests_mock.called
+    assert requests_mock.call_count == 1
+
+
+def test_timeseries_group_df_to_json_valid_data():
+    data = pd.DataFrame(
+        {
+            "officeId": ["office123", "office456"],
+            "timeseriesId": ["ts1", "ts2"],
+            "aliasId": [None, "alias2"],
+            "attribute": [0, 10],
+            "tsCode": ["code1", None],
+        }
+    )
+
+    # Clean DataFrame by removing NaN from required columns and fix optional ones
+    required_columns = ["officeId", "timeseriesId"]
+    data = data.dropna(subset=required_columns)
+    optional_columns = ["aliasId", "tsCode"]
+    for col in optional_columns:
+        if col in data.columns:
+            data[col] = data[col].where(pd.notnull(data[col]), None)
+
+    expected_json = {
+        "office-id": "office123",
+        "id": "group123",
+        "time-series-category": {
+            "office-id": "office123",
+            "id": "cat123",
+        },
+        "assigned-time-series": [
+            {
+                "office-id": "office123",
+                "timeseries-id": "ts1",
+                "alias-id": None,
+                "attribute": 0,
+                "tsCode": "code1",
+            },
+            {
+                "office-id": "office456",
+                "timeseries-id": "ts2",
+                "alias-id": "alias2",
+                "attribute": 10,
+            },
+        ],
+    }
+
+    result = timeseries.timeseries_group_df_to_json(
+        data, "group123", "office123", "cat123"
+    )
+    assert result == expected_json
+
+
 def test_timeseries_df_to_json():
     test_json = {
         "name": "TestLoc.Stage.Inst.1Hour.0.Testing",