diff --git a/setup.cfg b/setup.cfg index 94c9b22..8f4899a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,6 +31,7 @@ install_requires = pyhdf>=0.10.5 numpy>=2.2.1 netCDF4>=1.6.0 + h5py>=3.8.0 xarray>=2022.6.0 pystare>=0.8.9 distributed>=2022.7.1 diff --git a/staremaster/products/xcal.py b/staremaster/products/xcal.py index 91bead9..17002be 100644 --- a/staremaster/products/xcal.py +++ b/staremaster/products/xcal.py @@ -1,8 +1,10 @@ import netCDF4 +import h5py import numpy from staremaster.sidecar import Sidecar import staremaster.conversions import pystare +import os class XCAL: @@ -11,17 +13,51 @@ class XCAL: def __init__(self, file_path): self.file_path = file_path - self.netcdf = netCDF4.Dataset(file_path, 'r', format='NETCDF4') + self.file_format = self._detect_file_format() + self.data_file = self._open_file() self.lats = {} self.lons = {} + def _detect_file_format(self): + """Detect if the file is NetCDF4 or HDF5 based on extension and content.""" + file_ext = os.path.splitext(self.file_path)[1].lower() + + if file_ext in ['.nc', '.nc4', '.netcdf']: + return 'netcdf4' + elif file_ext in ['.h5', '.hdf5', '.hdf']: + return 'hdf5' + else: + # Try to detect by attempting to open with each format + try: + with netCDF4.Dataset(self.file_path, 'r') as test_file: + return 'netcdf4' + except: + try: + with h5py.File(self.file_path, 'r') as test_file: + return 'hdf5' + except: + raise ValueError(f"Could not determine file format for {self.file_path}") + + def _open_file(self): + """Open the file with the appropriate library.""" + if self.file_format == 'netcdf4': + return netCDF4.Dataset(self.file_path, 'r', format='NETCDF4') + elif self.file_format == 'hdf5': + return h5py.File(self.file_path, 'r') + else: + raise ValueError(f"Unsupported file format: {self.file_format}") + def load(self): self.get_latlon() def get_latlon(self): for scan in self.scans: - self.lats[scan] = self.netcdf.groups[scan]['Latitude'][:].data.astype(numpy.double) - self.lons[scan] = self.netcdf.groups[scan]['Longitude'][:].data.astype(numpy.double) + if self.file_format == 'netcdf4': + self.lats[scan] = self.data_file.groups[scan]['Latitude'][:].data.astype(numpy.double) + self.lons[scan] = self.data_file.groups[scan]['Longitude'][:].data.astype(numpy.double) + elif self.file_format == 'hdf5': + self.lats[scan] = self.data_file[scan]['Latitude'][:].astype(numpy.double) + self.lons[scan] = self.data_file[scan]['Longitude'][:].astype(numpy.double) def create_sidecar(self, n_workers=1, cover_res=None, out_path=None): @@ -37,6 +73,8 @@ def create_sidecar(self, n_workers=1, cover_res=None, out_path=None): # Need to drop the resolution to make the cover less sparse cover_res = staremaster.conversions.min_resolution(sids) cover_res = cover_res - 2 + # Clamp cover_res to [0, 27] + cover_res = max(0, min(27, cover_res)) sids_adapted = pystare.spatial_coerce_resolution(sids, cover_res) @@ -58,8 +96,20 @@ def create_sidecar(self, n_workers=1, cover_res=None, out_path=None): cover_all = numpy.concatenate(cover_all) cover_all = staremaster.conversions.merge_stare(cover_all, n_workers=n_workers) - sidecar.write_dimension('l', cover_all.size) + + # Only create the 'l' dimension if it does not already exist + with netCDF4.Dataset(sidecar.file_path, 'a', format='NETCDF4') as ncfile: + if 'l' not in ncfile.dimensions: + sidecar.write_dimension('l', cover_all.size) sidecar.write_cover(cover_all, nom_res=nom_res) return sidecar + def __del__(self): + """Clean up file handles when the object is destroyed.""" + if hasattr(self, 'data_file') and self.data_file is not None: + try: + self.data_file.close() + except: + pass + diff --git a/staremaster/sidecar.py b/staremaster/sidecar.py index 2cdebbb..0de1ee5 100644 --- a/staremaster/sidecar.py +++ b/staremaster/sidecar.py @@ -128,7 +128,9 @@ def write_cover(self, cover, nom_res=None, group=None, fill_value=None): grp = rootgrp.createGroup(group) else: grp = rootgrp - grp.createDimension(l_name, l) + # Only create the 'l' dimension if it does not already exist + if l_name not in grp.dimensions: + grp.createDimension(l_name, l) cover_netcdf = grp.createVariable(varname=varname, datatype='u8', dimensions=(l_name), diff --git a/tests/data/mod05/MOD05_L2.A2005349.2125.061.2017294065400_stare.nc b/tests/data/mod05/MOD05_L2.A2005349.2125.061.2017294065400_stare.nc index 2cd17f4..9226585 100644 Binary files a/tests/data/mod05/MOD05_L2.A2005349.2125.061.2017294065400_stare.nc and b/tests/data/mod05/MOD05_L2.A2005349.2125.061.2017294065400_stare.nc differ diff --git a/tests/data/mod09/MOD09.A2002299.0710.006.2015151173939_stare.nc b/tests/data/mod09/MOD09.A2002299.0710.006.2015151173939_stare.nc index 7af6210..26d89fa 100644 --- a/tests/data/mod09/MOD09.A2002299.0710.006.2015151173939_stare.nc +++ b/tests/data/mod09/MOD09.A2002299.0710.006.2015151173939_stare.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b23c54fff30aebab8f1fbe21b740ad7c28c89d298d771d2dedf813754511b326 -size 94570973 +oid sha256:c6ffc98a281ef968f7e0fab2e751980eea661dc93422c164815811bdb320b498 +size 94571444 diff --git a/tests/data/mod09/MOD09.A2020032.1940.006.2020034015024_stare.nc b/tests/data/mod09/MOD09.A2020032.1940.006.2020034015024_stare.nc new file mode 100644 index 0000000..de6c43d --- /dev/null +++ b/tests/data/mod09/MOD09.A2020032.1940.006.2020034015024_stare.nc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628db78899f7db01b6ba3d99f3e9b509911bff5b3c61cb928e8557bf9bd04ed1 +size 95625607 diff --git a/tests/data/viirs/VNP03IMG.A2022308.1930.002.2022309041547_stare.nc b/tests/data/viirs/VNP03IMG.A2022308.1930.002.2022309041547_stare.nc index a652f5c..af06b13 100644 --- a/tests/data/viirs/VNP03IMG.A2022308.1930.002.2022309041547_stare.nc +++ b/tests/data/viirs/VNP03IMG.A2022308.1930.002.2022309041547_stare.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:04ca498b11d2501b66cf9147f1e7ec9f29a0c52cf0d0cffb17beba4c73ae1647 -size 281820306 +oid sha256:3437853e2c9a61a8c98bbd8d9d40adbddf6f76af7e3e872a826407352d9b2095 +size 281819697 diff --git a/tests/data/xcal/1C.F18.SSMIS.XCAL2021-V.20250105-S222535-E000725.078504.V07B.HDF5 b/tests/data/xcal/1C.F18.SSMIS.XCAL2021-V.20250105-S222535-E000725.078504.V07B.HDF5 new file mode 100644 index 0000000..fe49635 Binary files /dev/null and b/tests/data/xcal/1C.F18.SSMIS.XCAL2021-V.20250105-S222535-E000725.078504.V07B.HDF5 differ diff --git a/tests/data/xcal/1C.F18.SSMIS.XCAL2021-V.20250105-S222535-E000725.078504.V07B_stare.nc b/tests/data/xcal/1C.F18.SSMIS.XCAL2021-V.20250105-S222535-E000725.078504.V07B_stare.nc new file mode 100644 index 0000000..b25ef22 --- /dev/null +++ b/tests/data/xcal/1C.F18.SSMIS.XCAL2021-V.20250105-S222535-E000725.078504.V07B_stare.nc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e1e5949863229ffb62c939faa3ebae9af62727767958e181fb5a29fb35c843 +size 16429837 diff --git a/tests/test_ssmis.py b/tests/test_ssmis.py index 8d1c8b6..0bf69cd 100644 --- a/tests/test_ssmis.py +++ b/tests/test_ssmis.py @@ -1 +1,129 @@ - +import os +import pytest +from staremaster.products.ssmis import SSMIS +import tempfile +import shutil + + +def test_ssmis_hdf5_support(): + """Test that SSMIS can handle HDF5 files.""" + # Use one of the test HDF5 files + test_file = "tests/data/xcal/1C.F16.SSMIS.XCAL2016-V.20210201-S004436-E022630.089218.V05A.HDF5" + + if not os.path.exists(test_file): + pytest.skip(f"Test file {test_file} not found") + + # Test that we can instantiate SSMIS with HDF5 file + ssmis = SSMIS(test_file) + + # Test that file format is detected correctly + assert ssmis.file_format == 'hdf5' + + # Test that we can load the data + ssmis.load() + + # Test that we have the expected scans + expected_scans = ['S1', 'S2', 'S3', 'S4'] + assert ssmis.scans == expected_scans + + # Test that we have latitude and longitude data for each scan + for scan in expected_scans: + assert scan in ssmis.lats + assert scan in ssmis.lons + assert ssmis.lats[scan] is not None + assert ssmis.lons[scan] is not None + assert ssmis.lats[scan].shape == ssmis.lons[scan].shape + + # Clean up + del ssmis + + +def test_ssmis_hdf5_support_new_file(): + """Test that SSMIS can handle the new HDF5 file.""" + # Use the new test HDF5 file + test_file = "tests/data/xcal/1C.F18.SSMIS.XCAL2021-V.20250105-S222535-E000725.078504.V07B.HDF5" + + if not os.path.exists(test_file): + pytest.skip(f"Test file {test_file} not found") + + # Test that we can instantiate SSMIS with HDF5 file + ssmis = SSMIS(test_file) + + # Test that file format is detected correctly + assert ssmis.file_format == 'hdf5' + + # Test that we can load the data + ssmis.load() + + # Test that we have the expected scans + expected_scans = ['S1', 'S2', 'S3', 'S4'] + assert ssmis.scans == expected_scans + + # Test that we have latitude and longitude data for each scan + for scan in expected_scans: + assert scan in ssmis.lats + assert scan in ssmis.lons + assert ssmis.lats[scan] is not None + assert ssmis.lons[scan] is not None + assert ssmis.lats[scan].shape == ssmis.lons[scan].shape + + # Clean up + del ssmis + + +def test_xcal_file_format_detection(): + """Test that file format detection works correctly.""" + from staremaster.products.xcal import XCAL + + # Test HDF5 file + hdf5_file = "tests/data/xcal/1C.F16.SSMIS.XCAL2016-V.20210201-S004436-E022630.089218.V05A.HDF5" + if os.path.exists(hdf5_file): + xcal = XCAL(hdf5_file) + assert xcal.file_format == 'hdf5' + del xcal + + +def test_xcal_file_format_detection_new_file(): + """Test that file format detection works correctly for the new file.""" + from staremaster.products.xcal import XCAL + + # Test new HDF5 file + hdf5_file = "tests/data/xcal/1C.F18.SSMIS.XCAL2021-V.20250105-S222535-E000725.078504.V07B.HDF5" + if os.path.exists(hdf5_file): + xcal = XCAL(hdf5_file) + assert xcal.file_format == 'hdf5' + del xcal + + +def test_ssmis_create_sidecar(): + """Test the create_sidecar function for the original SSMIS HDF5 file.""" + test_file = "tests/data/xcal/1C.F16.SSMIS.XCAL2016-V.20210201-S004436-E022630.089218.V05A.HDF5" + if not os.path.exists(test_file): + pytest.skip(f"Test file {test_file} not found") + ssmis = SSMIS(test_file) + ssmis.load() + # Use a temporary directory for output + with tempfile.TemporaryDirectory() as tmpdir: + out_path = os.path.join(tmpdir, "sidecar.nc") + sidecar = ssmis.create_sidecar(out_path=out_path) + assert sidecar is not None + assert os.path.exists(sidecar.file_path) + # Optionally, check file size > 0 + assert os.path.getsize(sidecar.file_path) > 0 + del ssmis + + +def test_ssmis_create_sidecar_new_file(): + """Test the create_sidecar function for the new SSMIS HDF5 file.""" + test_file = "tests/data/xcal/1C.F18.SSMIS.XCAL2021-V.20250105-S222535-E000725.078504.V07B.HDF5" + if not os.path.exists(test_file): + pytest.skip(f"Test file {test_file} not found") + ssmis = SSMIS(test_file) + ssmis.load() + with tempfile.TemporaryDirectory() as tmpdir: + out_path = os.path.join(tmpdir, "sidecar.nc") + sidecar = ssmis.create_sidecar(out_path=out_path) + assert sidecar is not None + assert os.path.exists(sidecar.file_path) + assert os.path.getsize(sidecar.file_path) > 0 + del ssmis