From 7e7f72f2f6723e5af1563cd35db451e96c94c80d Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Tue, 1 Jul 2025 14:55:41 -0600 Subject: [PATCH 01/10] Add support for reading 0802 binary data This dataset is currently a prototype, but is expected to be released soon. --- pm_tb_data/fetch/nsidc_0007.py | 36 +---------------- pm_tb_data/fetch/nsidc_0802.py | 60 ++++++++++++++++++++++++++++ pm_tb_data/fetch/nsidc_binary.py | 41 +++++++++++++++++++ tests/integration/test_nsidc_0802.py | 18 +++++++++ 4 files changed, 120 insertions(+), 35 deletions(-) create mode 100644 pm_tb_data/fetch/nsidc_0802.py create mode 100644 pm_tb_data/fetch/nsidc_binary.py create mode 100644 tests/integration/test_nsidc_0802.py diff --git a/pm_tb_data/fetch/nsidc_0007.py b/pm_tb_data/fetch/nsidc_0007.py index 5720787..e10db9c 100644 --- a/pm_tb_data/fetch/nsidc_0007.py +++ b/pm_tb_data/fetch/nsidc_0007.py @@ -12,44 +12,10 @@ import re from pathlib import Path -import numpy as np -import numpy.typing as npt import xarray as xr from pm_tb_data._types import Hemisphere - - -def read_binary_tb_file( - *, filepath: Path, hemisphere: Hemisphere -) -> npt.NDArray[np.float64]: - """Read 25km binary NSIDC0007 data from disk. - - Returns data in Kelvins. No/missing data areas are masked with `np.nan`. - """ - grid_shape = dict( - north=(448, 304), - south=(332, 316), - )[hemisphere] - - try: - tb_data = np.fromfile(filepath, np.dtype(" xr.Dataset: + """Return TB data from NSIDC-0802.""" + # This assumes `data_dir` points to the "nsidc0007_smmr_radiance_seaice_v01" + # directory. E.g., /projects/DATASETS/nsidc0007_smmr_radiance_seaice_v01/. + + # Get all of the files containing TB data and match the expected format + # (e.g., the file `800929S.37H` contains Sept. 29, 1980 SH Tbs for the + # horizontal 37GHz channel. + fn_glob = f"tb_as2_{date:%Y%m%d}_nrt_{hemisphere[0].lower()}*.bin" + results = list(data_dir.rglob(fn_glob)) + if not results: + raise FileNotFoundError(f"No NSIDC-0007 TBs found for {date=} {hemisphere=}") + + tb_data_mapping = {} + tb_fn_re = re.compile( + r".*_" + hemisphere[0].lower() + r"(?P\d{2})(?Ph|v).bin" + ) + for tb_fp in results: + match = tb_fn_re.match(tb_fp.name) + assert match is not None + + data = read_binary_tb_file( + filepath=tb_fp, + hemisphere=hemisphere, + ) + + tb_data_mapping[ + f"{match.group('polarization').lower()}{match.group('channel')}" + ] = xr.DataArray( + data, + dims=("fake_y", "fake_x"), + attrs={ + "source_filename": tb_fp.name, + }, + ) + + normalized = xr.Dataset(tb_data_mapping) + + return normalized diff --git a/pm_tb_data/fetch/nsidc_binary.py b/pm_tb_data/fetch/nsidc_binary.py new file mode 100644 index 0000000..345e070 --- /dev/null +++ b/pm_tb_data/fetch/nsidc_binary.py @@ -0,0 +1,41 @@ +from pathlib import Path + +import numpy as np +import numpy.typing as npt + +from pm_tb_data._types import Hemisphere + + +def read_binary_tb_file( + *, + filepath: Path, + hemisphere: Hemisphere, +) -> npt.NDArray[np.float64]: + """Read 25km NSIDC binary data from disk. + + Returns data in Kelvins. No/missing data areas are masked with `np.nan`. + """ + grid_shape = dict( + north=(448, 304), + south=(332, 316), + )[hemisphere] + + try: + tb_data = np.fromfile(filepath, np.dtype(" Date: Tue, 1 Jul 2025 14:56:51 -0600 Subject: [PATCH 02/10] Prepare CHANGELOG for v0.6.0 release --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 968473a..1fff62d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.6.0 + +* Add support for fetching data from NSIDC0802 from disk. + ## 0.5.0 * Add support for fetching data from NSIDC0080 from disk. From b3ae89c34a78265e712ef2e5c9a8ad08be98ff9e Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Tue, 1 Jul 2025 14:58:00 -0600 Subject: [PATCH 03/10] Bumpversion for v0.6.0 --- pm_tb_data/__init__.py | 2 +- pyproject.toml | 4 ++-- recipe/meta.yaml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pm_tb_data/__init__.py b/pm_tb_data/__init__.py index 1c7d256..458018d 100644 --- a/pm_tb_data/__init__.py +++ b/pm_tb_data/__init__.py @@ -1 +1 @@ -__version__ = "v0.5.0" +__version__ = "v0.6.0" diff --git a/pyproject.toml b/pyproject.toml index 49ff991..165bfd1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pm_tb_data" -version = "0.5.0" +version = "0.6.0" [tool.setuptools] include-package-data = true @@ -63,7 +63,7 @@ ignore_missing_imports = true [tool.bumpversion] -current_version = "0.5.0" +current_version = "0.6.0" commit = false tag = false diff --git a/recipe/meta.yaml b/recipe/meta.yaml index cf1961f..d67682a 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,6 +1,6 @@ package: name: pm_tb_data - version: "0.5.0" + version: "0.6.0" source: path: ../ From 434a287177503d678c7fdc4992d7a67d76c659eb Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Wed, 2 Jul 2025 11:43:09 -0600 Subject: [PATCH 04/10] Move nsidc-0082 fetch code into amsr submodule --- pm_tb_data/fetch/{ => amsr}/nsidc_0802.py | 0 tests/integration/test_nsidc_0802.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename pm_tb_data/fetch/{ => amsr}/nsidc_0802.py (100%) diff --git a/pm_tb_data/fetch/nsidc_0802.py b/pm_tb_data/fetch/amsr/nsidc_0802.py similarity index 100% rename from pm_tb_data/fetch/nsidc_0802.py rename to pm_tb_data/fetch/amsr/nsidc_0802.py diff --git a/tests/integration/test_nsidc_0802.py b/tests/integration/test_nsidc_0802.py index 731aaa5..89cfae1 100644 --- a/tests/integration/test_nsidc_0802.py +++ b/tests/integration/test_nsidc_0802.py @@ -1,7 +1,7 @@ import datetime as dt from pathlib import Path -from pm_tb_data.fetch.nsidc_0802 import get_nsidc_0802_tbs_from_disk +from pm_tb_data.fetch.amsr.nsidc_0802 import get_nsidc_0802_tbs_from_disk def test_get_nsidc_0802_tbs_from_disk(): From 3506e22702feb6e41424f81068cf5d4aedc31a5c Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Mon, 7 Jul 2025 10:46:54 -0600 Subject: [PATCH 05/10] Update path and fn expectations for 0802 based on newly published data 0802 is now officially published: https://nsidc.org/data/nsidc-0802/versions/1 --- pm_tb_data/fetch/amsr/nsidc_0802.py | 14 +++++++------- tests/integration/test_nsidc_0802.py | 4 +--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pm_tb_data/fetch/amsr/nsidc_0802.py b/pm_tb_data/fetch/amsr/nsidc_0802.py index 8ea280b..ba8d782 100644 --- a/pm_tb_data/fetch/amsr/nsidc_0802.py +++ b/pm_tb_data/fetch/amsr/nsidc_0802.py @@ -21,20 +21,20 @@ def get_nsidc_0802_tbs_from_disk( data_dir: Path, ) -> xr.Dataset: """Return TB data from NSIDC-0802.""" - # This assumes `data_dir` points to the "nsidc0007_smmr_radiance_seaice_v01" - # directory. E.g., /projects/DATASETS/nsidc0007_smmr_radiance_seaice_v01/. + # This assumes `data_dir` points to the "nsidc0802_polar_stereo_tbs" + # directory. E.g., /disks/sidads_ftp/DATASETS/nsidc0802_polar_stereo_tbs/. - # Get all of the files containing TB data and match the expected format - # (e.g., the file `800929S.37H` contains Sept. 29, 1980 SH Tbs for the - # horizontal 37GHz channel. - fn_glob = f"tb_as2_{date:%Y%m%d}_nrt_{hemisphere[0].lower()}*.bin" + # Example fn: NSIDC-0802_TB_AMSR2_N_37V_20250702_V1.0.bin + fn_glob = f"NSIDC-0802_TB_AMSR2_{hemisphere[0].upper()}_*_{date:%Y%m%d}_*.bin" results = list(data_dir.rglob(fn_glob)) if not results: raise FileNotFoundError(f"No NSIDC-0007 TBs found for {date=} {hemisphere=}") tb_data_mapping = {} tb_fn_re = re.compile( - r".*_" + hemisphere[0].lower() + r"(?P\d{2})(?Ph|v).bin" + r"NSIDC-0802_TB_AMSR2_" + + hemisphere[0].upper() + + r"_(?P\d{2})(?PH|V)_.*.bin" ) for tb_fp in results: match = tb_fn_re.match(tb_fp.name) diff --git a/tests/integration/test_nsidc_0802.py b/tests/integration/test_nsidc_0802.py index 89cfae1..21afffb 100644 --- a/tests/integration/test_nsidc_0802.py +++ b/tests/integration/test_nsidc_0802.py @@ -5,9 +5,7 @@ def test_get_nsidc_0802_tbs_from_disk(): - data_dir = Path( - "/disks/sidads_staging/DATASETS/nsidc0739_AS2_nrt_polar_stereo_tbs_v1/" - ) + data_dir = Path("/disks/sidads_ftp/DATASETS/nsidc0802_polar_stereo_tbs") data = get_nsidc_0802_tbs_from_disk( date=dt.date(2025, 1, 1), hemisphere="north", From 44b7fcb8337a41767785b950cd2428147b45be19 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Tue, 5 Aug 2025 16:54:12 -0600 Subject: [PATCH 06/10] NSIDC-0802: Support reading 37h from prototype location --- pm_tb_data/fetch/amsr/nsidc_0802.py | 27 +++++++++++++++++++++++++++ tests/integration/test_nsidc_0802.py | 9 +++++++++ 2 files changed, 36 insertions(+) diff --git a/pm_tb_data/fetch/amsr/nsidc_0802.py b/pm_tb_data/fetch/amsr/nsidc_0802.py index ba8d782..3c58d72 100644 --- a/pm_tb_data/fetch/amsr/nsidc_0802.py +++ b/pm_tb_data/fetch/amsr/nsidc_0802.py @@ -9,6 +9,7 @@ from pathlib import Path import xarray as xr +from loguru import logger from pm_tb_data._types import Hemisphere from pm_tb_data.fetch.nsidc_binary import read_binary_tb_file @@ -19,6 +20,7 @@ def get_nsidc_0802_tbs_from_disk( date: dt.date, hemisphere: Hemisphere, data_dir: Path, + prototype_37h_data_dir: Path, ) -> xr.Dataset: """Return TB data from NSIDC-0802.""" # This assumes `data_dir` points to the "nsidc0802_polar_stereo_tbs" @@ -30,7 +32,17 @@ def get_nsidc_0802_tbs_from_disk( if not results: raise FileNotFoundError(f"No NSIDC-0007 TBs found for {date=} {hemisphere=}") + # Example fn: tb_as2_20240108_sfm_s37h.dat + prototype_37h_fn_glob = f"tb_as2_{date:%Y%m%d}_sfm_{hemisphere[0].lower()}37h.dat" + prototype_37h_results = list(prototype_37h_data_dir.rglob(prototype_37h_fn_glob)) + prototype_37h_fp: None | Path = None + if len(prototype_37h_results) == 1: + prototype_37h_fp = prototype_37h_results[0] + else: + logger.warning(f"Failed to find 37h data for {date=}") + tb_data_mapping = {} + # Published binary files tb_fn_re = re.compile( r"NSIDC-0802_TB_AMSR2_" + hemisphere[0].upper() @@ -55,6 +67,21 @@ def get_nsidc_0802_tbs_from_disk( }, ) + # prototype 37h + if prototype_37h_fp is not None: + prototype_37h_data = read_binary_tb_file( + filepath=prototype_37h_fp, + hemisphere=hemisphere, + ) + + tb_data_mapping["h37"] = xr.DataArray( + prototype_37h_data, + dims=("fake_y", "fake_x"), + attrs={ + "source_filename": tb_fp.name, + }, + ) + normalized = xr.Dataset(tb_data_mapping) return normalized diff --git a/tests/integration/test_nsidc_0802.py b/tests/integration/test_nsidc_0802.py index 21afffb..ed20ff3 100644 --- a/tests/integration/test_nsidc_0802.py +++ b/tests/integration/test_nsidc_0802.py @@ -6,11 +6,20 @@ def test_get_nsidc_0802_tbs_from_disk(): data_dir = Path("/disks/sidads_ftp/DATASETS/nsidc0802_polar_stereo_tbs") + # TODO: this path should be updated to something like + # `/share/apps/G02202_V6` once created and prototype data are staged there. + # eventually, the code to handle the prototype 37h data will be replaced + # with release ov 0802 v2, which will have all the necessary tbs in nc files. + prototype_37h_data_dir = Path("/home/vagrant/seaice_ecdr/nise_at_tbs") data = get_nsidc_0802_tbs_from_disk( date=dt.date(2025, 1, 1), hemisphere="north", data_dir=data_dir, + prototype_37h_data_dir=prototype_37h_data_dir, ) assert "h19" in data.variables + assert "h37" in data.variables + assert not data["h19"].isnull().all() + assert not data["h37"].isnull().all() From 0e930955fdbfe180d92147841d5bc4000b014c6c Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Fri, 15 Aug 2025 10:44:35 -0600 Subject: [PATCH 07/10] NSIDC-0802: support prototype v2 data --- CHANGELOG.md | 2 +- pm_tb_data/fetch/amsr/nsidc_0802.py | 76 +++++----------------------- tests/integration/test_nsidc_0802.py | 17 +++---- 3 files changed, 21 insertions(+), 74 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fff62d..3d2a406 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ## 0.6.0 -* Add support for fetching data from NSIDC0802 from disk. +* Add support for fetching data from NSIDC0802 v2 from disk. ## 0.5.0 diff --git a/pm_tb_data/fetch/amsr/nsidc_0802.py b/pm_tb_data/fetch/amsr/nsidc_0802.py index 3c58d72..f43e279 100644 --- a/pm_tb_data/fetch/amsr/nsidc_0802.py +++ b/pm_tb_data/fetch/amsr/nsidc_0802.py @@ -2,17 +2,23 @@ TODO: link to dataset landing page. Product is currently a prototype, but expected to be published soon. + +NOTE: most of the data products incldued in `pm_tb_data` "normalize" tb names to +be something like `h19`. This is not currently done with nsidc0802, in part +because there are "calibrated" versions of each channel (e.g., +`tb_19h_calibrated`). We could drop the `tb_` and remap `19h_` to `h19_`, but it +does not seem necessary for this dataset. The nc dataset is already nicely +formatted and contains all the metadata it needs. Ideally, `pm_tb_data` +structures poorly structured data into a better format, and this one doesn't +really need it. """ import datetime as dt -import re from pathlib import Path import xarray as xr -from loguru import logger from pm_tb_data._types import Hemisphere -from pm_tb_data.fetch.nsidc_binary import read_binary_tb_file def get_nsidc_0802_tbs_from_disk( @@ -20,68 +26,14 @@ def get_nsidc_0802_tbs_from_disk( date: dt.date, hemisphere: Hemisphere, data_dir: Path, - prototype_37h_data_dir: Path, ) -> xr.Dataset: """Return TB data from NSIDC-0802.""" - # This assumes `data_dir` points to the "nsidc0802_polar_stereo_tbs" - # directory. E.g., /disks/sidads_ftp/DATASETS/nsidc0802_polar_stereo_tbs/. - - # Example fn: NSIDC-0802_TB_AMSR2_N_37V_20250702_V1.0.bin - fn_glob = f"NSIDC-0802_TB_AMSR2_{hemisphere[0].upper()}_*_{date:%Y%m%d}_*.bin" + fn_glob = f"NSIDC-0802_TB_AMSR2_{hemisphere[0].upper()}_{date:%Y%m%d}_*.nc" results = list(data_dir.rglob(fn_glob)) - if not results: + if not len(results) == 1: raise FileNotFoundError(f"No NSIDC-0007 TBs found for {date=} {hemisphere=}") - # Example fn: tb_as2_20240108_sfm_s37h.dat - prototype_37h_fn_glob = f"tb_as2_{date:%Y%m%d}_sfm_{hemisphere[0].lower()}37h.dat" - prototype_37h_results = list(prototype_37h_data_dir.rglob(prototype_37h_fn_glob)) - prototype_37h_fp: None | Path = None - if len(prototype_37h_results) == 1: - prototype_37h_fp = prototype_37h_results[0] - else: - logger.warning(f"Failed to find 37h data for {date=}") - - tb_data_mapping = {} - # Published binary files - tb_fn_re = re.compile( - r"NSIDC-0802_TB_AMSR2_" - + hemisphere[0].upper() - + r"_(?P\d{2})(?PH|V)_.*.bin" - ) - for tb_fp in results: - match = tb_fn_re.match(tb_fp.name) - assert match is not None - - data = read_binary_tb_file( - filepath=tb_fp, - hemisphere=hemisphere, - ) - - tb_data_mapping[ - f"{match.group('polarization').lower()}{match.group('channel')}" - ] = xr.DataArray( - data, - dims=("fake_y", "fake_x"), - attrs={ - "source_filename": tb_fp.name, - }, - ) - - # prototype 37h - if prototype_37h_fp is not None: - prototype_37h_data = read_binary_tb_file( - filepath=prototype_37h_fp, - hemisphere=hemisphere, - ) - - tb_data_mapping["h37"] = xr.DataArray( - prototype_37h_data, - dims=("fake_y", "fake_x"), - attrs={ - "source_filename": tb_fp.name, - }, - ) - - normalized = xr.Dataset(tb_data_mapping) + matching_filepath = results[0] + ds = xr.open_dataset(matching_filepath) - return normalized + return ds diff --git a/tests/integration/test_nsidc_0802.py b/tests/integration/test_nsidc_0802.py index ed20ff3..7d0d7b0 100644 --- a/tests/integration/test_nsidc_0802.py +++ b/tests/integration/test_nsidc_0802.py @@ -5,21 +5,16 @@ def test_get_nsidc_0802_tbs_from_disk(): - data_dir = Path("/disks/sidads_ftp/DATASETS/nsidc0802_polar_stereo_tbs") - # TODO: this path should be updated to something like - # `/share/apps/G02202_V6` once created and prototype data are staged there. - # eventually, the code to handle the prototype 37h data will be replaced - # with release ov 0802 v2, which will have all the necessary tbs in nc files. - prototype_37h_data_dir = Path("/home/vagrant/seaice_ecdr/nise_at_tbs") + # TODO: this data path is temporary! + data_dir = Path("/share/apps/nsidc0802/dev/scotts/output/") data = get_nsidc_0802_tbs_from_disk( date=dt.date(2025, 1, 1), hemisphere="north", data_dir=data_dir, - prototype_37h_data_dir=prototype_37h_data_dir, ) - assert "h19" in data.variables - assert "h37" in data.variables + assert "tb_19h_calibrated" in data.variables + assert "tb_37h_calibrated" in data.variables - assert not data["h19"].isnull().all() - assert not data["h37"].isnull().all() + assert not data["tb_19h_calibrated"].isnull().all() + assert not data["tb_37h_calibrated"].isnull().all() From 603d67c8da9e3adeec74998fe64f234f1b64f9bb Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Fri, 15 Aug 2025 11:02:51 -0600 Subject: [PATCH 08/10] Squeeze time dim from 0802 v2 data --- pm_tb_data/fetch/amsr/nsidc_0802.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pm_tb_data/fetch/amsr/nsidc_0802.py b/pm_tb_data/fetch/amsr/nsidc_0802.py index f43e279..0fbbd3a 100644 --- a/pm_tb_data/fetch/amsr/nsidc_0802.py +++ b/pm_tb_data/fetch/amsr/nsidc_0802.py @@ -11,6 +11,9 @@ formatted and contains all the metadata it needs. Ideally, `pm_tb_data` structures poorly structured data into a better format, and this one doesn't really need it. + +The one exception is that the `time` dimension is dropped from the variables, as +it is of length 1 and the `seaice_ecdr` expects no explicit time dim. Just x/y. """ import datetime as dt @@ -36,4 +39,8 @@ def get_nsidc_0802_tbs_from_disk( matching_filepath = results[0] ds = xr.open_dataset(matching_filepath) + # Squeeze the dataset, dropping the time dim (of length 1) from the + # variables, which is expected from code that imports this package. + ds = ds.squeeze() + return ds From fcf0a683c16d47cd2497bc9faaa588e1c3e9aa9e Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Fri, 22 Aug 2025 11:04:56 -0600 Subject: [PATCH 09/10] Update 0802 path in test --- tests/integration/test_nsidc_0802.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_nsidc_0802.py b/tests/integration/test_nsidc_0802.py index 7d0d7b0..d7ef9a3 100644 --- a/tests/integration/test_nsidc_0802.py +++ b/tests/integration/test_nsidc_0802.py @@ -5,8 +5,7 @@ def test_get_nsidc_0802_tbs_from_disk(): - # TODO: this data path is temporary! - data_dir = Path("/share/apps/nsidc0802/dev/scotts/output/") + data_dir = Path("/disks/sidads_ftp/DATASETS/nsidc0802_daily_a2_tb_v2/") data = get_nsidc_0802_tbs_from_disk( date=dt.date(2025, 1, 1), hemisphere="north", From 56380421acdc83b604e5fc0fc22e35eb86432045 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Fri, 22 Aug 2025 11:07:54 -0600 Subject: [PATCH 10/10] Update 0802 module docstring w/ dataset landing page url --- pm_tb_data/fetch/amsr/nsidc_0802.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pm_tb_data/fetch/amsr/nsidc_0802.py b/pm_tb_data/fetch/amsr/nsidc_0802.py index 0fbbd3a..b8b4430 100644 --- a/pm_tb_data/fetch/amsr/nsidc_0802.py +++ b/pm_tb_data/fetch/amsr/nsidc_0802.py @@ -1,7 +1,6 @@ """Functions to read tbs from NSIDC-0802 binary files. -TODO: link to dataset landing page. Product is currently a prototype, but -expected to be published soon. +See https://nsidc.org/data/nsidc-0802/versions/2 for more information. NOTE: most of the data products incldued in `pm_tb_data` "normalize" tb names to be something like `h19`. This is not currently done with nsidc0802, in part