diff --git a/poetry.lock b/poetry.lock index 75d41294..93338a03 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "accessible-pygments" @@ -5253,13 +5253,13 @@ files = [ [[package]] name = "pyspark" -version = "4.0.0" +version = "4.0.1" description = "Apache Spark Python API" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "pyspark-4.0.0.tar.gz", hash = "sha256:38db1b4f6095a080d7605e578d775528990e66dc326311d93e94a71cfc24e5a5"}, + {file = "pyspark-4.0.1.tar.gz", hash = "sha256:9d1f22d994f60369228397e3479003ffe2dd736ba79165003246ff7bd48e2c73"}, ] [package.dependencies] @@ -5901,6 +5901,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f66efbc1caa63c088dead1c4170d148eabc9b80d95fb75b6c92ac0aad2437d76"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22353049ba4181685023b25b5b51a574bce33e7f51c759371a7422dcae5402a6"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:932205970b9f9991b34f55136be327501903f7c66830e9760a8ffb15b07f05cd"}, + {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a52d48f4e7bf9005e8f0a89209bf9a73f7190ddf0489eee5eb51377385f59f2a"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win32.whl", hash = "sha256:3eac5a91891ceb88138c113f9db04f3cebdae277f5d44eaa3651a4f573e6a5da"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win_amd64.whl", hash = "sha256:ab007f2f5a87bd08ab1499bdf96f3d5c6ad4dcfa364884cb4549aa0154b13a28"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:4a6679521a58256a90b0d89e03992c15144c5f3858f40d7c18886023d7943db6"}, @@ -5909,6 +5910,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:811ea1594b8a0fb466172c384267a4e5e367298af6b228931f273b111f17ef52"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cf12567a7b565cbf65d438dec6cfbe2917d3c1bdddfce84a9930b7d35ea59642"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7dd5adc8b930b12c8fc5b99e2d535a09889941aa0d0bd06f4749e9a9397c71d2"}, + {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1492a6051dab8d912fc2adeef0e8c72216b24d57bd896ea607cb90bb0c4981d3"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win32.whl", hash = "sha256:bd0a08f0bab19093c54e18a14a10b4322e1eacc5217056f3c063bd2f59853ce4"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win_amd64.whl", hash = "sha256:a274fb2cb086c7a3dea4322ec27f4cb5cc4b6298adb583ab0e211a4682f241eb"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632"}, @@ -5917,6 +5919,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680"}, + {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b82a7c94a498853aa0b272fd5bc67f29008da798d4f93a2f9f289feb8426a58d"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a"}, @@ -5925,6 +5928,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1"}, + {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f6f3eac23941b32afccc23081e1f50612bdbe4e982012ef4f5797986828cd01"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:fc4b630cd3fa2cf7fce38afa91d7cfe844a9f75d7f0f36393fa98815e911d987"}, @@ -5933,6 +5937,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2f1c3765db32be59d18ab3953f43ab62a761327aafc1594a2a1fbe038b8b8a7"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d85252669dc32f98ebcd5d36768f5d4faeaeaa2d655ac0473be490ecdae3c285"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e143ada795c341b56de9418c58d028989093ee611aa27ffb9b7f609c00d813ed"}, + {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2c59aa6170b990d8d2719323e628aaf36f3bfbc1c26279c0eeeb24d05d2d11c7"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win32.whl", hash = "sha256:beffaed67936fbbeffd10966a4eb53c402fafd3d6833770516bf7314bc6ffa12"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win_amd64.whl", hash = "sha256:040ae85536960525ea62868b642bdb0c2cc6021c9f9d507810c0c604e66f5a7b"}, {file = "ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f"}, @@ -7505,4 +7510,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.14" -content-hash = "9832104431d55c01478d9e9229b2009ce9b8e342f00e7922453cc3a996b245c1" +content-hash = "24d082d3cf45058da4f8a217f85ec85c0ce081dca351f3469eebae4dbddc1511" diff --git a/pyproject.toml b/pyproject.toml index 2739ec94..63d73a24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "teehr" -version = "0.6.0dev5" +version = "0.6.0dev6" description = "Tools for Exploratory Evaluation in Hydrologic Research" authors = [ "RTI International", @@ -29,7 +29,7 @@ h5py = ">=3.12.1,<4" pyarrow = ">=15.0.0,<21" httpx = ">=0.25.1,<1" pandas = ">=2.2.0,<3" -pyspark = {extras = ["pandas-on-spark"], version = ">=4,<5"} +pyspark = {extras = ["pandas-on-spark"], version = "4.0.1"} dataretrieval = ">=1.0.9,<2" numba = ">=0.60.0,<1" arch = ">=7.0.0,<8" diff --git a/src/teehr/__init__.py b/src/teehr/__init__.py index c7524ae6..5a003527 100644 --- a/src/teehr/__init__.py +++ b/src/teehr/__init__.py @@ -1,7 +1,7 @@ """Initialize the TEEHR package.""" import warnings -__version__ = "0.6.0dev5" +__version__ = "0.6.0dev6" with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) diff --git a/src/teehr/evaluation/metrics.py b/src/teehr/evaluation/metrics.py index a353ac9c..65b0d478 100644 --- a/src/teehr/evaluation/metrics.py +++ b/src/teehr/evaluation/metrics.py @@ -65,6 +65,8 @@ def __call__( >>> ev = teehr.Evaluation() >>> metrics = ev.metrics(table_name="primary_timeseries") """ + logger.info(f"Initializing Metrics for table: {table_name}.{namespace_name or ''}{'.' if namespace_name else ''}{catalog_name or ''}") + self.table_name = table_name self.table = self._ev.table( table_name=table_name, diff --git a/src/teehr/evaluation/tables/base_table.py b/src/teehr/evaluation/tables/base_table.py index e1e29a4a..a255ae6e 100644 --- a/src/teehr/evaluation/tables/base_table.py +++ b/src/teehr/evaluation/tables/base_table.py @@ -53,6 +53,7 @@ def __call__( catalog_name: Union[str, None] = None ) -> "Table": """Initialize the Table class.""" + logger.info(f"Initializing Table for table: {table_name}.{namespace_name or ''}{'.' if namespace_name else ''}{catalog_name or ''}") self.table_name = table_name self.sdf = None tbl_props = TBLPROPERTIES.get(table_name) diff --git a/tests/data/setup_v0_3_study.py b/tests/data/setup_v0_3_study.py index e171e48c..62e9489f 100644 --- a/tests/data/setup_v0_3_study.py +++ b/tests/data/setup_v0_3_study.py @@ -1,36 +1,124 @@ """Fixtures for v0.3 study tests.""" from pathlib import Path -from teehr import Evaluation +from teehr import Evaluation, Configuration, Attribute -import tarfile -import os -import shutil import logging logger = logging.getLogger(__name__) -TEST_DATA_FILE = Path("tests", "data", "v0_3_study_test.tar.gz") +TEST_DATA_DIR = Path("tests", "data", "v0_3_test_study") +GEOJSON_GAGES_FILEPATH = Path(TEST_DATA_DIR, "geo", "gages.geojson") +PRIMARY_TIMESERIES_FILEPATH = Path( + TEST_DATA_DIR, "timeseries", "test_short_obs.parquet" +) +CROSSWALK_FILEPATH = Path(TEST_DATA_DIR, "geo", "crosswalk.csv") +SECONDARY_TIMESERIES_FILEPATH = Path( + TEST_DATA_DIR, "timeseries", "test_short_fcast.parquet" +) +GEO_FILEPATH = Path(TEST_DATA_DIR, "geo") def setup_v0_3_study(tmpdir): """Set up a v0.3 study.""" - shutil.copyfile(TEST_DATA_FILE, Path(tmpdir, "v0_3_study_test.tar.gz")) + ev = Evaluation(dir_path=tmpdir, create_dir=True) - logger.info("Extracting archive...") - with tarfile.open(Path(tmpdir, "v0_3_study_test.tar.gz"), 'r:gz') as tar: - tar.extractall(path=tmpdir) - logger.info("✅ Extraction complete") + # Enable logging + ev.enable_logging() - os.remove(Path(tmpdir, "v0_3_study_test.tar.gz")) - logger.info(f"✅ Removed archive {tmpdir}") + # Clone the template + ev.clone_template() - ev = Evaluation( - dir_path=Path(tmpdir, "v0_3_study_test"), - create_dir=False + # Load the location data + ev.locations.load_spatial(in_path=GEOJSON_GAGES_FILEPATH) + + ev.configurations.add( + Configuration( + name="usgs_observations", + type="primary", + description="setup_v0_3_study primary configuration" + ) ) - return ev + # Load the timeseries data and map over the fields and set constants + ev.primary_timeseries.load_parquet( + in_path=PRIMARY_TIMESERIES_FILEPATH, + field_mapping={ + "reference_time": "reference_time", + "value_time": "value_time", + "configuration": "configuration_name", + "measurement_unit": "unit_name", + "variable_name": "variable_name", + "value": "value", + "location_id": "location_id" + }, + constant_field_values={ + "unit_name": "m^3/s", + "variable_name": "streamflow_hourly_inst", + "configuration_name": "usgs_observations" + } + ) + + # Load the crosswalk data + ev.location_crosswalks.load_csv( + in_path=CROSSWALK_FILEPATH + ) + + ev.configurations.add( + Configuration( + name="nwm30_retrospective", + type="secondary", + description="setup_v0_3_study secondary configuration" + ) + ) + # Load the secondary timeseries data and map over the fields and set constants + ev.secondary_timeseries.load_parquet( + in_path=SECONDARY_TIMESERIES_FILEPATH, + field_mapping={ + "reference_time": "reference_time", + "value_time": "value_time", + "configuration": "configuration_name", + "measurement_unit": "unit_name", + "variable_name": "variable_name", + "value": "value", + "location_id": "location_id" + }, + constant_field_values={ + "unit_name": "m^3/s", + "variable_name": "streamflow_hourly_inst", + "configuration_name": "nwm30_retrospective" + } + ) + + # Add some attributes + ev.attributes.add( + [ + Attribute( + name="drainage_area", + type="continuous", + description="Drainage area in square kilometers" + ), + Attribute( + name="ecoregion", + type="categorical", + description="Ecoregion" + ), + Attribute( + name="year_2_discharge", + type="continuous", + description="2-yr discharge in cubic meters per second" + ), + ] + ) -if __name__ == "__main__": - setup_v0_3_study("/home/slamont/temp/v0_3_study_test") \ No newline at end of file + # Load the location attribute data + ev.location_attributes.load_parquet( + in_path=GEO_FILEPATH, + field_mapping={"attribute_value": "value"}, + pattern="test_attr_*.parquet", + ) + + # Create the joined timeseries + ev.joined_timeseries.create(add_attrs=True, execute_scripts=True) + + return ev diff --git a/tests/query/test_metrics_query.py b/tests/query/test_metrics_query.py index aef1f024..9dc0b8f2 100644 --- a/tests/query/test_metrics_query.py +++ b/tests/query/test_metrics_query.py @@ -66,7 +66,7 @@ def test_executing_deterministic_metrics(tmpdir): assert metrics_df.equals(metrics_df2) assert isinstance(metrics_df, pd.DataFrame) assert metrics_df.index.size == 3 - assert metrics_df.columns.size == 21 + assert metrics_df.columns.size == 20 # Test all the conditional metrics. include_conditional_metrics = [ @@ -152,6 +152,10 @@ def test_metrics_filter_and_geometry(tmpdir): assert isinstance(metrics_df, gpd.GeoDataFrame) assert metrics_df.index.size == 1 assert metrics_df.columns.size == 6 + + + tbl = ev.metrics(table_name="primary_timeseries") + ev.spark.stop() @@ -508,13 +512,19 @@ def test_table_based_metrics(tmpdir): include_metrics=[primary_avg], group_by=["location_id"], order_by=["location_id"], - # filters="season = 'winter'", ).to_pandas() assert isinstance(sigs_df, pd.DataFrame) assert sigs_df.index.size == 3 assert "location_id" in sigs_df.columns + sigs_df2 = ev.metrics(table_name="primary_timeseries").query( + include_metrics=[primary_avg], + group_by=["location_id"], + order_by=["location_id"], + ).to_pandas() + + assert sigs_df.sort_index().equals(sigs_df2.sort_index()) ev.spark.stop() @@ -546,14 +556,12 @@ def test_table_based_metrics(tmpdir): dir=tempdir ) ) - # High memory usage? test_ensemble_metrics( tempfile.mkdtemp( prefix="5-", dir=tempdir ) ) - # High memory usage? test_metrics_transforms( tempfile.mkdtemp( prefix="6-",