Skip to content
Merged
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 0.6.0

* Add support for fetching data from NSIDC0802 v2 from disk.

## 0.5.0

* Add support for fetching data from NSIDC0080 from disk.
Expand Down
2 changes: 1 addition & 1 deletion pm_tb_data/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "v0.5.0"
__version__ = "v0.6.0"
45 changes: 45 additions & 0 deletions pm_tb_data/fetch/amsr/nsidc_0802.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Functions to read tbs from NSIDC-0802 binary files.

See https://nsidc.org/data/nsidc-0802/versions/2 for more information.

NOTE: most of the data products incldued in `pm_tb_data` "normalize" tb names to
be something like `h19`. This is not currently done with nsidc0802, in part
because there are "calibrated" versions of each channel (e.g.,
`tb_19h_calibrated`). We could drop the `tb_` and remap `19h_` to `h19_`, but it
does not seem necessary for this dataset. The nc dataset is already nicely
formatted and contains all the metadata it needs. Ideally, `pm_tb_data`
structures poorly structured data into a better format, and this one doesn't
really need it.

The one exception is that the `time` dimension is dropped from the variables, as
it is of length 1 and the `seaice_ecdr` expects no explicit time dim. Just x/y.
"""

import datetime as dt
from pathlib import Path

import xarray as xr

from pm_tb_data._types import Hemisphere


def get_nsidc_0802_tbs_from_disk(
*,
date: dt.date,
hemisphere: Hemisphere,
data_dir: Path,
) -> xr.Dataset:
"""Return TB data from NSIDC-0802."""
fn_glob = f"NSIDC-0802_TB_AMSR2_{hemisphere[0].upper()}_{date:%Y%m%d}_*.nc"
results = list(data_dir.rglob(fn_glob))
if not len(results) == 1:
raise FileNotFoundError(f"No NSIDC-0007 TBs found for {date=} {hemisphere=}")

matching_filepath = results[0]
ds = xr.open_dataset(matching_filepath)

# Squeeze the dataset, dropping the time dim (of length 1) from the
# variables, which is expected from code that imports this package.
ds = ds.squeeze()

return ds
36 changes: 1 addition & 35 deletions pm_tb_data/fetch/nsidc_0007.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,44 +12,10 @@
import re
from pathlib import Path

import numpy as np
import numpy.typing as npt
import xarray as xr

from pm_tb_data._types import Hemisphere


def read_binary_tb_file(
*, filepath: Path, hemisphere: Hemisphere
) -> npt.NDArray[np.float64]:
"""Read 25km binary NSIDC0007 data from disk.

Returns data in Kelvins. No/missing data areas are masked with `np.nan`.
"""
grid_shape = dict(
north=(448, 304),
south=(332, 316),
)[hemisphere]

try:
tb_data = np.fromfile(filepath, np.dtype("<i2")).reshape(grid_shape)
except ValueError as e:
# TODO: we want a log statement and a warning here!
# NOTE: This occurs for file:
# /projects/DATASETS/nsidc0007_smmr_radiance_seaice_v01/TBS/1985/AUG/850804S.37H
print(f"ValueError trying to read from 0007 file\n{e}")
print(f"file: {filepath}")
tb_data = np.zeros(grid_shape, np.dtype("<i2"))

# Radiances are in 0.1 kelvins, stored as 2-byte integers, with the least
# significant byte (lsb) first (lower address) and msb second (higher
# address). Thus, a value of 1577 represents a Tb of 157.7 kelvins.
tb_data_kelvins = tb_data * 0.1

# Mask out values of 0 (missing data):
tb_data_kelvins[tb_data == 0] = np.nan

return tb_data_kelvins
from pm_tb_data.fetch.nsidc_binary import read_binary_tb_file


def get_nsidc_0007_tbs_from_disk(
Expand Down
41 changes: 41 additions & 0 deletions pm_tb_data/fetch/nsidc_binary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from pathlib import Path

import numpy as np
import numpy.typing as npt

from pm_tb_data._types import Hemisphere


def read_binary_tb_file(
*,
filepath: Path,
hemisphere: Hemisphere,
) -> npt.NDArray[np.float64]:
"""Read 25km NSIDC binary data from disk.

Returns data in Kelvins. No/missing data areas are masked with `np.nan`.
"""
grid_shape = dict(
north=(448, 304),
south=(332, 316),
)[hemisphere]

try:
tb_data = np.fromfile(filepath, np.dtype("<i2")).reshape(grid_shape)
except ValueError as e:
# TODO: we want a log statement and a warning here!
# NOTE: This occurs for file:
# /projects/DATASETS/nsidc0007_smmr_radiance_seaice_v01/TBS/1985/AUG/850804S.37H
print(f"ValueError trying to read from binary file\n{e}")
print(f"file: {filepath}")
tb_data = np.zeros(grid_shape, np.dtype("<i2"))

# Radiances are in 0.1 kelvins, stored as 2-byte integers, with the least
# significant byte (lsb) first (lower address) and msb second (higher
# address). Thus, a value of 1577 represents a Tb of 157.7 kelvins.
tb_data_kelvins = tb_data * 0.1

# Mask out values of 0 (missing data):
tb_data_kelvins[tb_data == 0] = np.nan

return tb_data_kelvins
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "pm_tb_data"
version = "0.5.0"
version = "0.6.0"

[tool.setuptools]
include-package-data = true
Expand Down Expand Up @@ -63,7 +63,7 @@ ignore_missing_imports = true


[tool.bumpversion]
current_version = "0.5.0"
current_version = "0.6.0"
commit = false
tag = false

Expand Down
2 changes: 1 addition & 1 deletion recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package:
name: pm_tb_data
version: "0.5.0"
version: "0.6.0"

source:
path: ../
Expand Down
19 changes: 19 additions & 0 deletions tests/integration/test_nsidc_0802.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import datetime as dt
from pathlib import Path

from pm_tb_data.fetch.amsr.nsidc_0802 import get_nsidc_0802_tbs_from_disk


def test_get_nsidc_0802_tbs_from_disk():
data_dir = Path("/disks/sidads_ftp/DATASETS/nsidc0802_daily_a2_tb_v2/")
data = get_nsidc_0802_tbs_from_disk(
date=dt.date(2025, 1, 1),
hemisphere="north",
data_dir=data_dir,
)

assert "tb_19h_calibrated" in data.variables
assert "tb_37h_calibrated" in data.variables

assert not data["tb_19h_calibrated"].isnull().all()
assert not data["tb_37h_calibrated"].isnull().all()
Loading