diff --git a/CHANGELOG.md b/CHANGELOG.md index 968473a..3d2a406 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.6.0 + +* Add support for fetching data from NSIDC0802 v2 from disk. + ## 0.5.0 * Add support for fetching data from NSIDC0080 from disk. diff --git a/pm_tb_data/__init__.py b/pm_tb_data/__init__.py index 1c7d256..458018d 100644 --- a/pm_tb_data/__init__.py +++ b/pm_tb_data/__init__.py @@ -1 +1 @@ -__version__ = "v0.5.0" +__version__ = "v0.6.0" diff --git a/pm_tb_data/fetch/amsr/nsidc_0802.py b/pm_tb_data/fetch/amsr/nsidc_0802.py new file mode 100644 index 0000000..b8b4430 --- /dev/null +++ b/pm_tb_data/fetch/amsr/nsidc_0802.py @@ -0,0 +1,45 @@ +"""Functions to read tbs from NSIDC-0802 binary files. + +See https://nsidc.org/data/nsidc-0802/versions/2 for more information. + +NOTE: most of the data products incldued in `pm_tb_data` "normalize" tb names to +be something like `h19`. This is not currently done with nsidc0802, in part +because there are "calibrated" versions of each channel (e.g., +`tb_19h_calibrated`). We could drop the `tb_` and remap `19h_` to `h19_`, but it +does not seem necessary for this dataset. The nc dataset is already nicely +formatted and contains all the metadata it needs. Ideally, `pm_tb_data` +structures poorly structured data into a better format, and this one doesn't +really need it. + +The one exception is that the `time` dimension is dropped from the variables, as +it is of length 1 and the `seaice_ecdr` expects no explicit time dim. Just x/y. +""" + +import datetime as dt +from pathlib import Path + +import xarray as xr + +from pm_tb_data._types import Hemisphere + + +def get_nsidc_0802_tbs_from_disk( + *, + date: dt.date, + hemisphere: Hemisphere, + data_dir: Path, +) -> xr.Dataset: + """Return TB data from NSIDC-0802.""" + fn_glob = f"NSIDC-0802_TB_AMSR2_{hemisphere[0].upper()}_{date:%Y%m%d}_*.nc" + results = list(data_dir.rglob(fn_glob)) + if not len(results) == 1: + raise FileNotFoundError(f"No NSIDC-0007 TBs found for {date=} {hemisphere=}") + + matching_filepath = results[0] + ds = xr.open_dataset(matching_filepath) + + # Squeeze the dataset, dropping the time dim (of length 1) from the + # variables, which is expected from code that imports this package. + ds = ds.squeeze() + + return ds diff --git a/pm_tb_data/fetch/nsidc_0007.py b/pm_tb_data/fetch/nsidc_0007.py index 5720787..e10db9c 100644 --- a/pm_tb_data/fetch/nsidc_0007.py +++ b/pm_tb_data/fetch/nsidc_0007.py @@ -12,44 +12,10 @@ import re from pathlib import Path -import numpy as np -import numpy.typing as npt import xarray as xr from pm_tb_data._types import Hemisphere - - -def read_binary_tb_file( - *, filepath: Path, hemisphere: Hemisphere -) -> npt.NDArray[np.float64]: - """Read 25km binary NSIDC0007 data from disk. - - Returns data in Kelvins. No/missing data areas are masked with `np.nan`. - """ - grid_shape = dict( - north=(448, 304), - south=(332, 316), - )[hemisphere] - - try: - tb_data = np.fromfile(filepath, np.dtype(" npt.NDArray[np.float64]: + """Read 25km NSIDC binary data from disk. + + Returns data in Kelvins. No/missing data areas are masked with `np.nan`. + """ + grid_shape = dict( + north=(448, 304), + south=(332, 316), + )[hemisphere] + + try: + tb_data = np.fromfile(filepath, np.dtype("