Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ install_requires =
pyhdf>=0.10.5
numpy>=2.2.1
netCDF4>=1.6.0
h5py>=3.8.0
xarray>=2022.6.0
pystare>=0.8.9
distributed>=2022.7.1
Expand Down
58 changes: 54 additions & 4 deletions staremaster/products/xcal.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import netCDF4
import h5py
import numpy
from staremaster.sidecar import Sidecar
import staremaster.conversions
import pystare
import os


class XCAL:
Expand All @@ -11,17 +13,51 @@ class XCAL:

def __init__(self, file_path):
self.file_path = file_path
self.netcdf = netCDF4.Dataset(file_path, 'r', format='NETCDF4')
self.file_format = self._detect_file_format()
self.data_file = self._open_file()
self.lats = {}
self.lons = {}

def _detect_file_format(self):
"""Detect if the file is NetCDF4 or HDF5 based on extension and content."""
file_ext = os.path.splitext(self.file_path)[1].lower()

if file_ext in ['.nc', '.nc4', '.netcdf']:
return 'netcdf4'
elif file_ext in ['.h5', '.hdf5', '.hdf']:
return 'hdf5'
else:
# Try to detect by attempting to open with each format
try:
with netCDF4.Dataset(self.file_path, 'r') as test_file:
return 'netcdf4'
except:
try:
with h5py.File(self.file_path, 'r') as test_file:
return 'hdf5'
except:
raise ValueError(f"Could not determine file format for {self.file_path}")

def _open_file(self):
"""Open the file with the appropriate library."""
if self.file_format == 'netcdf4':
return netCDF4.Dataset(self.file_path, 'r', format='NETCDF4')
elif self.file_format == 'hdf5':
return h5py.File(self.file_path, 'r')
else:
raise ValueError(f"Unsupported file format: {self.file_format}")

def load(self):
self.get_latlon()

def get_latlon(self):
for scan in self.scans:
self.lats[scan] = self.netcdf.groups[scan]['Latitude'][:].data.astype(numpy.double)
self.lons[scan] = self.netcdf.groups[scan]['Longitude'][:].data.astype(numpy.double)
if self.file_format == 'netcdf4':
self.lats[scan] = self.data_file.groups[scan]['Latitude'][:].data.astype(numpy.double)
self.lons[scan] = self.data_file.groups[scan]['Longitude'][:].data.astype(numpy.double)
elif self.file_format == 'hdf5':
self.lats[scan] = self.data_file[scan]['Latitude'][:].astype(numpy.double)
self.lons[scan] = self.data_file[scan]['Longitude'][:].astype(numpy.double)

def create_sidecar(self, n_workers=1, cover_res=None, out_path=None):

Expand All @@ -37,6 +73,8 @@ def create_sidecar(self, n_workers=1, cover_res=None, out_path=None):
# Need to drop the resolution to make the cover less sparse
cover_res = staremaster.conversions.min_resolution(sids)
cover_res = cover_res - 2
# Clamp cover_res to [0, 27]
cover_res = max(0, min(27, cover_res))

sids_adapted = pystare.spatial_coerce_resolution(sids, cover_res)

Expand All @@ -58,8 +96,20 @@ def create_sidecar(self, n_workers=1, cover_res=None, out_path=None):

cover_all = numpy.concatenate(cover_all)
cover_all = staremaster.conversions.merge_stare(cover_all, n_workers=n_workers)
sidecar.write_dimension('l', cover_all.size)

# Only create the 'l' dimension if it does not already exist
with netCDF4.Dataset(sidecar.file_path, 'a', format='NETCDF4') as ncfile:
if 'l' not in ncfile.dimensions:
sidecar.write_dimension('l', cover_all.size)
sidecar.write_cover(cover_all, nom_res=nom_res)

return sidecar

def __del__(self):
"""Clean up file handles when the object is destroyed."""
if hasattr(self, 'data_file') and self.data_file is not None:
try:
self.data_file.close()
except:
pass

4 changes: 3 additions & 1 deletion staremaster/sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ def write_cover(self, cover, nom_res=None, group=None, fill_value=None):
grp = rootgrp.createGroup(group)
else:
grp = rootgrp
grp.createDimension(l_name, l)
# Only create the 'l' dimension if it does not already exist
if l_name not in grp.dimensions:
grp.createDimension(l_name, l)
cover_netcdf = grp.createVariable(varname=varname,
datatype='u8',
dimensions=(l_name),
Expand Down
Binary file modified tests/data/mod05/MOD05_L2.A2005349.2125.061.2017294065400_stare.nc
Binary file not shown.
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Binary file not shown.
Git LFS file not shown
130 changes: 129 additions & 1 deletion tests/test_ssmis.py
Original file line number Diff line number Diff line change
@@ -1 +1,129 @@

import os
import pytest
from staremaster.products.ssmis import SSMIS
import tempfile
import shutil


def test_ssmis_hdf5_support():
"""Test that SSMIS can handle HDF5 files."""
# Use one of the test HDF5 files
test_file = "tests/data/xcal/1C.F16.SSMIS.XCAL2016-V.20210201-S004436-E022630.089218.V05A.HDF5"

if not os.path.exists(test_file):
pytest.skip(f"Test file {test_file} not found")

# Test that we can instantiate SSMIS with HDF5 file
ssmis = SSMIS(test_file)

# Test that file format is detected correctly
assert ssmis.file_format == 'hdf5'

# Test that we can load the data
ssmis.load()

# Test that we have the expected scans
expected_scans = ['S1', 'S2', 'S3', 'S4']
assert ssmis.scans == expected_scans

# Test that we have latitude and longitude data for each scan
for scan in expected_scans:
assert scan in ssmis.lats
assert scan in ssmis.lons
assert ssmis.lats[scan] is not None
assert ssmis.lons[scan] is not None
assert ssmis.lats[scan].shape == ssmis.lons[scan].shape

# Clean up
del ssmis


def test_ssmis_hdf5_support_new_file():
"""Test that SSMIS can handle the new HDF5 file."""
# Use the new test HDF5 file
test_file = "tests/data/xcal/1C.F18.SSMIS.XCAL2021-V.20250105-S222535-E000725.078504.V07B.HDF5"

if not os.path.exists(test_file):
pytest.skip(f"Test file {test_file} not found")

# Test that we can instantiate SSMIS with HDF5 file
ssmis = SSMIS(test_file)

# Test that file format is detected correctly
assert ssmis.file_format == 'hdf5'

# Test that we can load the data
ssmis.load()

# Test that we have the expected scans
expected_scans = ['S1', 'S2', 'S3', 'S4']
assert ssmis.scans == expected_scans

# Test that we have latitude and longitude data for each scan
for scan in expected_scans:
assert scan in ssmis.lats
assert scan in ssmis.lons
assert ssmis.lats[scan] is not None
assert ssmis.lons[scan] is not None
assert ssmis.lats[scan].shape == ssmis.lons[scan].shape

# Clean up
del ssmis


def test_xcal_file_format_detection():
"""Test that file format detection works correctly."""
from staremaster.products.xcal import XCAL

# Test HDF5 file
hdf5_file = "tests/data/xcal/1C.F16.SSMIS.XCAL2016-V.20210201-S004436-E022630.089218.V05A.HDF5"
if os.path.exists(hdf5_file):
xcal = XCAL(hdf5_file)
assert xcal.file_format == 'hdf5'
del xcal


def test_xcal_file_format_detection_new_file():
"""Test that file format detection works correctly for the new file."""
from staremaster.products.xcal import XCAL

# Test new HDF5 file
hdf5_file = "tests/data/xcal/1C.F18.SSMIS.XCAL2021-V.20250105-S222535-E000725.078504.V07B.HDF5"
if os.path.exists(hdf5_file):
xcal = XCAL(hdf5_file)
assert xcal.file_format == 'hdf5'
del xcal


def test_ssmis_create_sidecar():
"""Test the create_sidecar function for the original SSMIS HDF5 file."""
test_file = "tests/data/xcal/1C.F16.SSMIS.XCAL2016-V.20210201-S004436-E022630.089218.V05A.HDF5"
if not os.path.exists(test_file):
pytest.skip(f"Test file {test_file} not found")
ssmis = SSMIS(test_file)
ssmis.load()
# Use a temporary directory for output
with tempfile.TemporaryDirectory() as tmpdir:
out_path = os.path.join(tmpdir, "sidecar.nc")
sidecar = ssmis.create_sidecar(out_path=out_path)
assert sidecar is not None
assert os.path.exists(sidecar.file_path)
# Optionally, check file size > 0
assert os.path.getsize(sidecar.file_path) > 0
del ssmis


def test_ssmis_create_sidecar_new_file():
"""Test the create_sidecar function for the new SSMIS HDF5 file."""
test_file = "tests/data/xcal/1C.F18.SSMIS.XCAL2021-V.20250105-S222535-E000725.078504.V07B.HDF5"
if not os.path.exists(test_file):
pytest.skip(f"Test file {test_file} not found")
ssmis = SSMIS(test_file)
ssmis.load()
with tempfile.TemporaryDirectory() as tmpdir:
out_path = os.path.join(tmpdir, "sidecar.nc")
sidecar = ssmis.create_sidecar(out_path=out_path)
assert sidecar is not None
assert os.path.exists(sidecar.file_path)
assert os.path.getsize(sidecar.file_path) > 0
del ssmis