From 354372a723f669b6f0140ca99f3aa6e5dfd5c8ff Mon Sep 17 00:00:00 2001 From: Matt Denno Date: Sun, 21 Dec 2025 14:25:46 -0500 Subject: [PATCH 01/14] remove h5netcdf as test --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2739ec94..796229be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,6 @@ protobuf = "5.28.3" psutil = ">=7.0.0,<8" baseflow = "^0.1.0" apache-sedona = ">=1.8.0,<2" -h5netcdf = "^1.7.3" [tool.poetry.group.test.dependencies] pytest = ">=7.4.3" From 38285b801b926b8791eedf824c1b742f76f7343a Mon Sep 17 00:00:00 2001 From: Matt Denno Date: Mon, 22 Dec 2025 17:46:32 -0500 Subject: [PATCH 02/14] change protobuf version and add h5netcdf back --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 796229be..9c5c925f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,10 +39,11 @@ scoringrules = ">=0.7.1,<1" pyiceberg = ">=0.9.1,<1" lxml = ">=5.4.0,<6" bokeh = ">=3.7.3,<4" -protobuf = "5.28.3" +protobuf = ">=5.28.3" psutil = ">=7.0.0,<8" baseflow = "^0.1.0" apache-sedona = ">=1.8.0,<2" +h5netcdf = "^1.7.3" [tool.poetry.group.test.dependencies] pytest = ">=7.4.3" From f05f238883ba7109dc9218bc93e2639739495f7c Mon Sep 17 00:00:00 2001 From: Matt Denno Date: Mon, 22 Dec 2025 20:02:36 -0500 Subject: [PATCH 03/14] force pyspark to 4.0.x --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9c5c925f..fcf5d7ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ h5py = ">=3.12.1,<4" pyarrow = ">=15.0.0,<21" httpx = ">=0.25.1,<1" pandas = ">=2.2.0,<3" -pyspark = {extras = ["pandas-on-spark"], version = ">=4,<5"} +pyspark = {extras = ["pandas-on-spark"], version = ">=4.0,<4.1"} dataretrieval = ">=1.0.9,<2" numba = ">=0.60.0,<1" arch = ">=7.0.0,<8" From da7766a523fb1c8cc7f5ad59bbeda78eb3e88620 Mon Sep 17 00:00:00 2001 From: Matt Denno Date: Tue, 23 Dec 2025 21:55:20 -0500 Subject: [PATCH 04/14] fix pyspark at 4.0.0 and protobuf at 5.28.3 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fcf5d7ff..0059fdb7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ h5py = ">=3.12.1,<4" pyarrow = ">=15.0.0,<21" httpx = ">=0.25.1,<1" pandas = ">=2.2.0,<3" -pyspark = {extras = ["pandas-on-spark"], version = ">=4.0,<4.1"} +pyspark = {extras = ["pandas-on-spark"], version = "4.0.0"} dataretrieval = ">=1.0.9,<2" numba = ">=0.60.0,<1" arch = ">=7.0.0,<8" @@ -39,7 +39,7 @@ scoringrules = ">=0.7.1,<1" pyiceberg = ">=0.9.1,<1" lxml = ">=5.4.0,<6" bokeh = ">=3.7.3,<4" -protobuf = ">=5.28.3" +protobuf = "5.28.3" psutil = ">=7.0.0,<8" baseflow = "^0.1.0" apache-sedona = ">=1.8.0,<2" From 13a0875d2a3400450c3d5587a2bb74c9e085fd95 Mon Sep 17 00:00:00 2001 From: Matt Denno Date: Tue, 23 Dec 2025 22:16:16 -0500 Subject: [PATCH 05/14] add some logging --- src/teehr/evaluation/metrics.py | 2 ++ src/teehr/evaluation/tables/base_table.py | 1 + 2 files changed, 3 insertions(+) diff --git a/src/teehr/evaluation/metrics.py b/src/teehr/evaluation/metrics.py index a353ac9c..65b0d478 100644 --- a/src/teehr/evaluation/metrics.py +++ b/src/teehr/evaluation/metrics.py @@ -65,6 +65,8 @@ def __call__( >>> ev = teehr.Evaluation() >>> metrics = ev.metrics(table_name="primary_timeseries") """ + logger.info(f"Initializing Metrics for table: {table_name}.{namespace_name or ''}{'.' if namespace_name else ''}{catalog_name or ''}") + self.table_name = table_name self.table = self._ev.table( table_name=table_name, diff --git a/src/teehr/evaluation/tables/base_table.py b/src/teehr/evaluation/tables/base_table.py index e1e29a4a..a255ae6e 100644 --- a/src/teehr/evaluation/tables/base_table.py +++ b/src/teehr/evaluation/tables/base_table.py @@ -53,6 +53,7 @@ def __call__( catalog_name: Union[str, None] = None ) -> "Table": """Initialize the Table class.""" + logger.info(f"Initializing Table for table: {table_name}.{namespace_name or ''}{'.' if namespace_name else ''}{catalog_name or ''}") self.table_name = table_name self.sdf = None tbl_props = TBLPROPERTIES.get(table_name) From c920ee9b004ac8e76e21ef843cad0dffce2244fd Mon Sep 17 00:00:00 2001 From: Matt Denno Date: Tue, 23 Dec 2025 22:40:25 -0500 Subject: [PATCH 06/14] try changing the __call__ method to return a new object, --- src/teehr/evaluation/metrics.py | 69 +++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/src/teehr/evaluation/metrics.py b/src/teehr/evaluation/metrics.py index 65b0d478..d872c2ff 100644 --- a/src/teehr/evaluation/metrics.py +++ b/src/teehr/evaluation/metrics.py @@ -36,37 +36,59 @@ def __init__(self, ev) -> None: self.locations = ev.locations self._write = ev.write + # def __call__( + # self, + # table_name: str = "joined_timeseries", + # namespace_name: Union[str, None] = None, + # catalog_name: Union[str, None] = None, + # ) -> "Metrics": + # """Initialize the Metrics class. + + # Parameters + # ---------- + # table_name : str, optional + # The name of the table to use for metrics calculations, + # by default "joined_timeseries" + # namespace_name : Union[str, None], optional + # The namespace of the table, by default None in which case the + # namespace_name of the active catalog is used. + # catalog_name : Union[str, None], optional + # The catalog of the table, by default None in which case the + # catalog_name of the active catalog is used. + + # Example + # ------- + # By default, the Metrics class operates on the "joined_timeseries" table. + # This can be changed by specifying a different table name. + + # >>> import teehr + # >>> ev = teehr.Evaluation() + # >>> metrics = ev.metrics(table_name="primary_timeseries") + # """ + # logger.info(f"Initializing Metrics for table: {table_name}.{namespace_name or ''}{'.' if namespace_name else ''}{catalog_name or ''}") + + # self.table_name = table_name + # self.table = self._ev.table( + # table_name=table_name, + # namespace_name=namespace_name, + # catalog_name=catalog_name, + # ) + # self.sdf = self.table.to_sdf() + + # return self + def __call__( self, table_name: str = "joined_timeseries", namespace_name: Union[str, None] = None, catalog_name: Union[str, None] = None, ) -> "Metrics": - """Initialize the Metrics class. + return Metrics( + self._ev + )._configure(table_name, namespace_name, catalog_name) - Parameters - ---------- - table_name : str, optional - The name of the table to use for metrics calculations, - by default "joined_timeseries" - namespace_name : Union[str, None], optional - The namespace of the table, by default None in which case the - namespace_name of the active catalog is used. - catalog_name : Union[str, None], optional - The catalog of the table, by default None in which case the - catalog_name of the active catalog is used. - - Example - ------- - By default, the Metrics class operates on the "joined_timeseries" table. - This can be changed by specifying a different table name. - - >>> import teehr - >>> ev = teehr.Evaluation() - >>> metrics = ev.metrics(table_name="primary_timeseries") - """ + def _configure(self, table_name, namespace_name, catalog_name): logger.info(f"Initializing Metrics for table: {table_name}.{namespace_name or ''}{'.' if namespace_name else ''}{catalog_name or ''}") - self.table_name = table_name self.table = self._ev.table( table_name=table_name, @@ -74,7 +96,6 @@ def __call__( catalog_name=catalog_name, ) self.sdf = self.table.to_sdf() - return self def query( From d011e0f192a99668105cd0ea6b258de9b143bbdb Mon Sep 17 00:00:00 2001 From: Matt Denno Date: Tue, 23 Dec 2025 23:05:35 -0500 Subject: [PATCH 07/14] revert creating a new class and remove default value --- src/teehr/evaluation/metrics.py | 71 ++++++++++++--------------------- 1 file changed, 25 insertions(+), 46 deletions(-) diff --git a/src/teehr/evaluation/metrics.py b/src/teehr/evaluation/metrics.py index d872c2ff..acf6d987 100644 --- a/src/teehr/evaluation/metrics.py +++ b/src/teehr/evaluation/metrics.py @@ -36,59 +36,37 @@ def __init__(self, ev) -> None: self.locations = ev.locations self._write = ev.write - # def __call__( - # self, - # table_name: str = "joined_timeseries", - # namespace_name: Union[str, None] = None, - # catalog_name: Union[str, None] = None, - # ) -> "Metrics": - # """Initialize the Metrics class. - - # Parameters - # ---------- - # table_name : str, optional - # The name of the table to use for metrics calculations, - # by default "joined_timeseries" - # namespace_name : Union[str, None], optional - # The namespace of the table, by default None in which case the - # namespace_name of the active catalog is used. - # catalog_name : Union[str, None], optional - # The catalog of the table, by default None in which case the - # catalog_name of the active catalog is used. - - # Example - # ------- - # By default, the Metrics class operates on the "joined_timeseries" table. - # This can be changed by specifying a different table name. - - # >>> import teehr - # >>> ev = teehr.Evaluation() - # >>> metrics = ev.metrics(table_name="primary_timeseries") - # """ - # logger.info(f"Initializing Metrics for table: {table_name}.{namespace_name or ''}{'.' if namespace_name else ''}{catalog_name or ''}") - - # self.table_name = table_name - # self.table = self._ev.table( - # table_name=table_name, - # namespace_name=namespace_name, - # catalog_name=catalog_name, - # ) - # self.sdf = self.table.to_sdf() - - # return self - def __call__( self, - table_name: str = "joined_timeseries", + table_name: str, namespace_name: Union[str, None] = None, catalog_name: Union[str, None] = None, ) -> "Metrics": - return Metrics( - self._ev - )._configure(table_name, namespace_name, catalog_name) + """Initialize the Metrics class. - def _configure(self, table_name, namespace_name, catalog_name): + Parameters + ---------- + table_name : str, optional + The name of the table to use for metrics calculations, + by default "joined_timeseries" + namespace_name : Union[str, None], optional + The namespace of the table, by default None in which case the + namespace_name of the active catalog is used. + catalog_name : Union[str, None], optional + The catalog of the table, by default None in which case the + catalog_name of the active catalog is used. + + Example + ------- + By default, the Metrics class operates on the "joined_timeseries" table. + This can be changed by specifying a different table name. + + >>> import teehr + >>> ev = teehr.Evaluation() + >>> metrics = ev.metrics(table_name="primary_timeseries") + """ logger.info(f"Initializing Metrics for table: {table_name}.{namespace_name or ''}{'.' if namespace_name else ''}{catalog_name or ''}") + self.table_name = table_name self.table = self._ev.table( table_name=table_name, @@ -96,6 +74,7 @@ def _configure(self, table_name, namespace_name, catalog_name): catalog_name=catalog_name, ) self.sdf = self.table.to_sdf() + return self def query( From d64998e3fccb031cb3c569dff4913fd57646c53e Mon Sep 17 00:00:00 2001 From: Matt Denno Date: Tue, 23 Dec 2025 23:26:50 -0500 Subject: [PATCH 08/14] add default back but change the call in evaluation --- src/teehr/evaluation/evaluation.py | 3 +-- src/teehr/evaluation/metrics.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/teehr/evaluation/evaluation.py b/src/teehr/evaluation/evaluation.py index 618fa6fa..f1833637 100644 --- a/src/teehr/evaluation/evaluation.py +++ b/src/teehr/evaluation/evaluation.py @@ -193,8 +193,7 @@ def fetch(self) -> Fetch: @property def metrics(self) -> Metrics: """The metrics component class for calculating performance metrics.""" - cls = Metrics(self) - return cls() + return Metrics(self) @property def units(self) -> UnitTable: diff --git a/src/teehr/evaluation/metrics.py b/src/teehr/evaluation/metrics.py index acf6d987..65b0d478 100644 --- a/src/teehr/evaluation/metrics.py +++ b/src/teehr/evaluation/metrics.py @@ -38,7 +38,7 @@ def __init__(self, ev) -> None: def __call__( self, - table_name: str, + table_name: str = "joined_timeseries", namespace_name: Union[str, None] = None, catalog_name: Union[str, None] = None, ) -> "Metrics": From b634945c60e63b30ba0dd7a9e48030672186d1da Mon Sep 17 00:00:00 2001 From: Matt Denno Date: Mon, 5 Jan 2026 13:41:41 -0500 Subject: [PATCH 09/14] update tests but they don't pass due to hard coded path. --- tests/query/test_metrics_query.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/query/test_metrics_query.py b/tests/query/test_metrics_query.py index aef1f024..773f7352 100644 --- a/tests/query/test_metrics_query.py +++ b/tests/query/test_metrics_query.py @@ -51,7 +51,7 @@ def test_executing_deterministic_metrics(tmpdir): # Get the currently available fields to use in the query. flds = ev.joined_timeseries.field_enum() - metrics_df = ev.metrics.query( + metrics_df = ev.metrics().query( include_metrics=include_nonconditional_metrics, group_by=[flds.primary_location_id], order_by=[flds.primary_location_id], @@ -75,7 +75,7 @@ def test_executing_deterministic_metrics(tmpdir): if callable(func) and func().attrs.get('requires_threshold_field', True) # noqa ] - metrics_df = ev.metrics.add_calculated_fields([ + metrics_df = ev.metrics().add_calculated_fields([ tcf.AbovePercentileEventDetection( skip_event_id=True, add_quantile_field=True, @@ -105,7 +105,7 @@ def test_executing_signatures(tmpdir): # Get the currently available fields to use in the query. flds = ev.joined_timeseries.field_enum() - metrics_df = ev.metrics.query( + metrics_df = ev.metrics().query( include_metrics=include_all_metrics, group_by=[flds.primary_location_id], order_by=[flds.primary_location_id], @@ -142,7 +142,7 @@ def test_metrics_filter_and_geometry(tmpdir): ) ] - metrics_df = ev.metrics.query( + metrics_df = ev.metrics().query( include_metrics=include_metrics, group_by=[flds.primary_location_id], order_by=[flds.primary_location_id], @@ -161,7 +161,7 @@ def test_metric_chaining(tmpdir): ev = setup_v0_3_study(tmpdir) # Test chaining. - metrics_df = ev.metrics.query( + metrics_df = ev.metrics().query( order_by=["primary_location_id", "month"], group_by=["primary_location_id", "month"], include_metrics=[ @@ -302,7 +302,7 @@ def test_ensemble_metrics(tmpdir): crps.reference_configuration = "benchmark_forecast_hourly_normals" include_metrics = [crps] - metrics_df = ev.metrics.query( + metrics_df = ev.metrics().query( include_metrics=include_metrics, group_by=[ "primary_location_id", @@ -341,21 +341,21 @@ def test_metrics_transforms(tmpdir): mvtd_t.transform = 'log' # get metrics_df - metrics_df_tansformed_e = test_eval.metrics.query( + metrics_df_tansformed_e = test_eval.metrics().query( group_by=["primary_location_id", "configuration_name"], include_metrics=[ kge_t_e, mvtd_t ] ).to_pandas() - metrics_df_transformed = test_eval.metrics.query( + metrics_df_transformed = test_eval.metrics().query( group_by=["primary_location_id", "configuration_name"], include_metrics=[ kge_t, mvtd_t ] ).to_pandas() - metrics_df = test_eval.metrics.query( + metrics_df = test_eval.metrics().query( group_by=["primary_location_id", "configuration_name"], include_metrics=[ kge, @@ -396,7 +396,7 @@ def test_metrics_transforms(tmpdir): ) # get metrics df control and assert divide by zero occurs - metrics_df_e_control = test_eval.metrics.query( + metrics_df_e_control = test_eval.metrics().query( group_by=["primary_location_id", "configuration_name"], include_metrics=[ r2, @@ -407,7 +407,7 @@ def test_metrics_transforms(tmpdir): assert np.isnan(metrics_df_e_control.pearson_correlation.values).all() # get metrics df test and ensure no divide by zero occurs - metrics_df_e_test = test_eval.metrics.query( + metrics_df_e_test = test_eval.metrics().query( group_by=["primary_location_id", "configuration_name"], include_metrics=[ r2_e, @@ -436,7 +436,7 @@ def test_metrics_transforms(tmpdir): ) # get metrics df control and assert divide by zero occurs - metrics_df_e_control = test_eval.metrics.query( + metrics_df_e_control = test_eval.metrics().query( group_by=["primary_location_id", "configuration_name"], include_metrics=[ r2, @@ -447,7 +447,7 @@ def test_metrics_transforms(tmpdir): assert np.isnan(metrics_df_e_control.pearson_correlation.values).all() # get metrics df test and ensure no divide by zero occurs - metrics_df_e_test = test_eval.metrics.query( + metrics_df_e_test = test_eval.metrics().query( group_by=["primary_location_id", "configuration_name"], include_metrics=[ r2_e, From 86411704e760db38b3a6b576866325d0c358dc96 Mon Sep 17 00:00:00 2001 From: samlamont Date: Mon, 5 Jan 2026 14:16:45 -0500 Subject: [PATCH 10/14] rollback setup_v0_3_study to start from scratch --- tests/data/setup_v0_3_study.py | 124 ++++++++++++++++++++++++++++----- 1 file changed, 106 insertions(+), 18 deletions(-) diff --git a/tests/data/setup_v0_3_study.py b/tests/data/setup_v0_3_study.py index e171e48c..62e9489f 100644 --- a/tests/data/setup_v0_3_study.py +++ b/tests/data/setup_v0_3_study.py @@ -1,36 +1,124 @@ """Fixtures for v0.3 study tests.""" from pathlib import Path -from teehr import Evaluation +from teehr import Evaluation, Configuration, Attribute -import tarfile -import os -import shutil import logging logger = logging.getLogger(__name__) -TEST_DATA_FILE = Path("tests", "data", "v0_3_study_test.tar.gz") +TEST_DATA_DIR = Path("tests", "data", "v0_3_test_study") +GEOJSON_GAGES_FILEPATH = Path(TEST_DATA_DIR, "geo", "gages.geojson") +PRIMARY_TIMESERIES_FILEPATH = Path( + TEST_DATA_DIR, "timeseries", "test_short_obs.parquet" +) +CROSSWALK_FILEPATH = Path(TEST_DATA_DIR, "geo", "crosswalk.csv") +SECONDARY_TIMESERIES_FILEPATH = Path( + TEST_DATA_DIR, "timeseries", "test_short_fcast.parquet" +) +GEO_FILEPATH = Path(TEST_DATA_DIR, "geo") def setup_v0_3_study(tmpdir): """Set up a v0.3 study.""" - shutil.copyfile(TEST_DATA_FILE, Path(tmpdir, "v0_3_study_test.tar.gz")) + ev = Evaluation(dir_path=tmpdir, create_dir=True) - logger.info("Extracting archive...") - with tarfile.open(Path(tmpdir, "v0_3_study_test.tar.gz"), 'r:gz') as tar: - tar.extractall(path=tmpdir) - logger.info("✅ Extraction complete") + # Enable logging + ev.enable_logging() - os.remove(Path(tmpdir, "v0_3_study_test.tar.gz")) - logger.info(f"✅ Removed archive {tmpdir}") + # Clone the template + ev.clone_template() - ev = Evaluation( - dir_path=Path(tmpdir, "v0_3_study_test"), - create_dir=False + # Load the location data + ev.locations.load_spatial(in_path=GEOJSON_GAGES_FILEPATH) + + ev.configurations.add( + Configuration( + name="usgs_observations", + type="primary", + description="setup_v0_3_study primary configuration" + ) ) - return ev + # Load the timeseries data and map over the fields and set constants + ev.primary_timeseries.load_parquet( + in_path=PRIMARY_TIMESERIES_FILEPATH, + field_mapping={ + "reference_time": "reference_time", + "value_time": "value_time", + "configuration": "configuration_name", + "measurement_unit": "unit_name", + "variable_name": "variable_name", + "value": "value", + "location_id": "location_id" + }, + constant_field_values={ + "unit_name": "m^3/s", + "variable_name": "streamflow_hourly_inst", + "configuration_name": "usgs_observations" + } + ) + + # Load the crosswalk data + ev.location_crosswalks.load_csv( + in_path=CROSSWALK_FILEPATH + ) + + ev.configurations.add( + Configuration( + name="nwm30_retrospective", + type="secondary", + description="setup_v0_3_study secondary configuration" + ) + ) + # Load the secondary timeseries data and map over the fields and set constants + ev.secondary_timeseries.load_parquet( + in_path=SECONDARY_TIMESERIES_FILEPATH, + field_mapping={ + "reference_time": "reference_time", + "value_time": "value_time", + "configuration": "configuration_name", + "measurement_unit": "unit_name", + "variable_name": "variable_name", + "value": "value", + "location_id": "location_id" + }, + constant_field_values={ + "unit_name": "m^3/s", + "variable_name": "streamflow_hourly_inst", + "configuration_name": "nwm30_retrospective" + } + ) + + # Add some attributes + ev.attributes.add( + [ + Attribute( + name="drainage_area", + type="continuous", + description="Drainage area in square kilometers" + ), + Attribute( + name="ecoregion", + type="categorical", + description="Ecoregion" + ), + Attribute( + name="year_2_discharge", + type="continuous", + description="2-yr discharge in cubic meters per second" + ), + ] + ) -if __name__ == "__main__": - setup_v0_3_study("/home/slamont/temp/v0_3_study_test") \ No newline at end of file + # Load the location attribute data + ev.location_attributes.load_parquet( + in_path=GEO_FILEPATH, + field_mapping={"attribute_value": "value"}, + pattern="test_attr_*.parquet", + ) + + # Create the joined timeseries + ev.joined_timeseries.create(add_attrs=True, execute_scripts=True) + + return ev From e235eb596685874e2ab3d9eb646aed64a301686c Mon Sep 17 00:00:00 2001 From: samlamont Date: Mon, 5 Jan 2026 16:28:26 -0500 Subject: [PATCH 11/14] update pyspark to 4.0.1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0059fdb7..6de4f08f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ h5py = ">=3.12.1,<4" pyarrow = ">=15.0.0,<21" httpx = ">=0.25.1,<1" pandas = ">=2.2.0,<3" -pyspark = {extras = ["pandas-on-spark"], version = "4.0.0"} +pyspark = {extras = ["pandas-on-spark"], version = "4.0.1"} dataretrieval = ">=1.0.9,<2" numba = ">=0.60.0,<1" arch = ">=7.0.0,<8" From 122bf26c6b36a1705b2c237e923aabb224792d73 Mon Sep 17 00:00:00 2001 From: samlamont Date: Tue, 6 Jan 2026 10:10:25 -0500 Subject: [PATCH 12/14] revert metrics initialization for testing --- src/teehr/evaluation/evaluation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/teehr/evaluation/evaluation.py b/src/teehr/evaluation/evaluation.py index f1833637..577c5093 100644 --- a/src/teehr/evaluation/evaluation.py +++ b/src/teehr/evaluation/evaluation.py @@ -193,7 +193,9 @@ def fetch(self) -> Fetch: @property def metrics(self) -> Metrics: """The metrics component class for calculating performance metrics.""" - return Metrics(self) + cls = Metrics(self) + return cls() + # return Metrics(self) @property def units(self) -> UnitTable: From dbe65288c182425c41f63ba754feee44899df958 Mon Sep 17 00:00:00 2001 From: samlamont Date: Tue, 6 Jan 2026 17:10:04 -0500 Subject: [PATCH 13/14] update tests --- src/teehr/evaluation/evaluation.py | 1 - tests/query/test_metrics_query.py | 42 ++++++++++++++++++------------ 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/src/teehr/evaluation/evaluation.py b/src/teehr/evaluation/evaluation.py index 577c5093..618fa6fa 100644 --- a/src/teehr/evaluation/evaluation.py +++ b/src/teehr/evaluation/evaluation.py @@ -195,7 +195,6 @@ def metrics(self) -> Metrics: """The metrics component class for calculating performance metrics.""" cls = Metrics(self) return cls() - # return Metrics(self) @property def units(self) -> UnitTable: diff --git a/tests/query/test_metrics_query.py b/tests/query/test_metrics_query.py index 773f7352..9dc0b8f2 100644 --- a/tests/query/test_metrics_query.py +++ b/tests/query/test_metrics_query.py @@ -51,7 +51,7 @@ def test_executing_deterministic_metrics(tmpdir): # Get the currently available fields to use in the query. flds = ev.joined_timeseries.field_enum() - metrics_df = ev.metrics().query( + metrics_df = ev.metrics.query( include_metrics=include_nonconditional_metrics, group_by=[flds.primary_location_id], order_by=[flds.primary_location_id], @@ -66,7 +66,7 @@ def test_executing_deterministic_metrics(tmpdir): assert metrics_df.equals(metrics_df2) assert isinstance(metrics_df, pd.DataFrame) assert metrics_df.index.size == 3 - assert metrics_df.columns.size == 21 + assert metrics_df.columns.size == 20 # Test all the conditional metrics. include_conditional_metrics = [ @@ -75,7 +75,7 @@ def test_executing_deterministic_metrics(tmpdir): if callable(func) and func().attrs.get('requires_threshold_field', True) # noqa ] - metrics_df = ev.metrics().add_calculated_fields([ + metrics_df = ev.metrics.add_calculated_fields([ tcf.AbovePercentileEventDetection( skip_event_id=True, add_quantile_field=True, @@ -105,7 +105,7 @@ def test_executing_signatures(tmpdir): # Get the currently available fields to use in the query. flds = ev.joined_timeseries.field_enum() - metrics_df = ev.metrics().query( + metrics_df = ev.metrics.query( include_metrics=include_all_metrics, group_by=[flds.primary_location_id], order_by=[flds.primary_location_id], @@ -142,7 +142,7 @@ def test_metrics_filter_and_geometry(tmpdir): ) ] - metrics_df = ev.metrics().query( + metrics_df = ev.metrics.query( include_metrics=include_metrics, group_by=[flds.primary_location_id], order_by=[flds.primary_location_id], @@ -152,6 +152,10 @@ def test_metrics_filter_and_geometry(tmpdir): assert isinstance(metrics_df, gpd.GeoDataFrame) assert metrics_df.index.size == 1 assert metrics_df.columns.size == 6 + + + tbl = ev.metrics(table_name="primary_timeseries") + ev.spark.stop() @@ -161,7 +165,7 @@ def test_metric_chaining(tmpdir): ev = setup_v0_3_study(tmpdir) # Test chaining. - metrics_df = ev.metrics().query( + metrics_df = ev.metrics.query( order_by=["primary_location_id", "month"], group_by=["primary_location_id", "month"], include_metrics=[ @@ -302,7 +306,7 @@ def test_ensemble_metrics(tmpdir): crps.reference_configuration = "benchmark_forecast_hourly_normals" include_metrics = [crps] - metrics_df = ev.metrics().query( + metrics_df = ev.metrics.query( include_metrics=include_metrics, group_by=[ "primary_location_id", @@ -341,21 +345,21 @@ def test_metrics_transforms(tmpdir): mvtd_t.transform = 'log' # get metrics_df - metrics_df_tansformed_e = test_eval.metrics().query( + metrics_df_tansformed_e = test_eval.metrics.query( group_by=["primary_location_id", "configuration_name"], include_metrics=[ kge_t_e, mvtd_t ] ).to_pandas() - metrics_df_transformed = test_eval.metrics().query( + metrics_df_transformed = test_eval.metrics.query( group_by=["primary_location_id", "configuration_name"], include_metrics=[ kge_t, mvtd_t ] ).to_pandas() - metrics_df = test_eval.metrics().query( + metrics_df = test_eval.metrics.query( group_by=["primary_location_id", "configuration_name"], include_metrics=[ kge, @@ -396,7 +400,7 @@ def test_metrics_transforms(tmpdir): ) # get metrics df control and assert divide by zero occurs - metrics_df_e_control = test_eval.metrics().query( + metrics_df_e_control = test_eval.metrics.query( group_by=["primary_location_id", "configuration_name"], include_metrics=[ r2, @@ -407,7 +411,7 @@ def test_metrics_transforms(tmpdir): assert np.isnan(metrics_df_e_control.pearson_correlation.values).all() # get metrics df test and ensure no divide by zero occurs - metrics_df_e_test = test_eval.metrics().query( + metrics_df_e_test = test_eval.metrics.query( group_by=["primary_location_id", "configuration_name"], include_metrics=[ r2_e, @@ -436,7 +440,7 @@ def test_metrics_transforms(tmpdir): ) # get metrics df control and assert divide by zero occurs - metrics_df_e_control = test_eval.metrics().query( + metrics_df_e_control = test_eval.metrics.query( group_by=["primary_location_id", "configuration_name"], include_metrics=[ r2, @@ -447,7 +451,7 @@ def test_metrics_transforms(tmpdir): assert np.isnan(metrics_df_e_control.pearson_correlation.values).all() # get metrics df test and ensure no divide by zero occurs - metrics_df_e_test = test_eval.metrics().query( + metrics_df_e_test = test_eval.metrics.query( group_by=["primary_location_id", "configuration_name"], include_metrics=[ r2_e, @@ -508,13 +512,19 @@ def test_table_based_metrics(tmpdir): include_metrics=[primary_avg], group_by=["location_id"], order_by=["location_id"], - # filters="season = 'winter'", ).to_pandas() assert isinstance(sigs_df, pd.DataFrame) assert sigs_df.index.size == 3 assert "location_id" in sigs_df.columns + sigs_df2 = ev.metrics(table_name="primary_timeseries").query( + include_metrics=[primary_avg], + group_by=["location_id"], + order_by=["location_id"], + ).to_pandas() + + assert sigs_df.sort_index().equals(sigs_df2.sort_index()) ev.spark.stop() @@ -546,14 +556,12 @@ def test_table_based_metrics(tmpdir): dir=tempdir ) ) - # High memory usage? test_ensemble_metrics( tempfile.mkdtemp( prefix="5-", dir=tempdir ) ) - # High memory usage? test_metrics_transforms( tempfile.mkdtemp( prefix="6-", From 773fc8baff8c445b1f64f3920e0be55b528b2308 Mon Sep 17 00:00:00 2001 From: samlamont Date: Wed, 7 Jan 2026 08:55:18 -0500 Subject: [PATCH 14/14] update lock file --- poetry.lock | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 75d41294..93338a03 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "accessible-pygments" @@ -5253,13 +5253,13 @@ files = [ [[package]] name = "pyspark" -version = "4.0.0" +version = "4.0.1" description = "Apache Spark Python API" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "pyspark-4.0.0.tar.gz", hash = "sha256:38db1b4f6095a080d7605e578d775528990e66dc326311d93e94a71cfc24e5a5"}, + {file = "pyspark-4.0.1.tar.gz", hash = "sha256:9d1f22d994f60369228397e3479003ffe2dd736ba79165003246ff7bd48e2c73"}, ] [package.dependencies] @@ -5901,6 +5901,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f66efbc1caa63c088dead1c4170d148eabc9b80d95fb75b6c92ac0aad2437d76"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22353049ba4181685023b25b5b51a574bce33e7f51c759371a7422dcae5402a6"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:932205970b9f9991b34f55136be327501903f7c66830e9760a8ffb15b07f05cd"}, + {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a52d48f4e7bf9005e8f0a89209bf9a73f7190ddf0489eee5eb51377385f59f2a"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win32.whl", hash = "sha256:3eac5a91891ceb88138c113f9db04f3cebdae277f5d44eaa3651a4f573e6a5da"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win_amd64.whl", hash = "sha256:ab007f2f5a87bd08ab1499bdf96f3d5c6ad4dcfa364884cb4549aa0154b13a28"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:4a6679521a58256a90b0d89e03992c15144c5f3858f40d7c18886023d7943db6"}, @@ -5909,6 +5910,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:811ea1594b8a0fb466172c384267a4e5e367298af6b228931f273b111f17ef52"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cf12567a7b565cbf65d438dec6cfbe2917d3c1bdddfce84a9930b7d35ea59642"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7dd5adc8b930b12c8fc5b99e2d535a09889941aa0d0bd06f4749e9a9397c71d2"}, + {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1492a6051dab8d912fc2adeef0e8c72216b24d57bd896ea607cb90bb0c4981d3"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win32.whl", hash = "sha256:bd0a08f0bab19093c54e18a14a10b4322e1eacc5217056f3c063bd2f59853ce4"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win_amd64.whl", hash = "sha256:a274fb2cb086c7a3dea4322ec27f4cb5cc4b6298adb583ab0e211a4682f241eb"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632"}, @@ -5917,6 +5919,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680"}, + {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b82a7c94a498853aa0b272fd5bc67f29008da798d4f93a2f9f289feb8426a58d"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a"}, @@ -5925,6 +5928,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1"}, + {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f6f3eac23941b32afccc23081e1f50612bdbe4e982012ef4f5797986828cd01"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:fc4b630cd3fa2cf7fce38afa91d7cfe844a9f75d7f0f36393fa98815e911d987"}, @@ -5933,6 +5937,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2f1c3765db32be59d18ab3953f43ab62a761327aafc1594a2a1fbe038b8b8a7"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d85252669dc32f98ebcd5d36768f5d4faeaeaa2d655ac0473be490ecdae3c285"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e143ada795c341b56de9418c58d028989093ee611aa27ffb9b7f609c00d813ed"}, + {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2c59aa6170b990d8d2719323e628aaf36f3bfbc1c26279c0eeeb24d05d2d11c7"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win32.whl", hash = "sha256:beffaed67936fbbeffd10966a4eb53c402fafd3d6833770516bf7314bc6ffa12"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win_amd64.whl", hash = "sha256:040ae85536960525ea62868b642bdb0c2cc6021c9f9d507810c0c604e66f5a7b"}, {file = "ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f"}, @@ -7505,4 +7510,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.14" -content-hash = "9832104431d55c01478d9e9229b2009ce9b8e342f00e7922453cc3a996b245c1" +content-hash = "24d082d3cf45058da4f8a217f85ec85c0ce081dca351f3469eebae4dbddc1511"