diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a7fba72..01ae19f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -63,7 +63,7 @@ repos: additional_dependencies: - pytest - xarray - - adios2 + - adios2py - repo: https://github.com/codespell-project/codespell rev: "v2.3.0" diff --git a/pyproject.toml b/pyproject.toml index 46002a5..43296e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = [ description = "Python utilities for reading PSC data" readme = "README.md" license.file = "LICENSE" -requires-python = ">=3.8" +requires-python = ">=3.10" classifiers = [ "Development Status :: 1 - Planning", "Intended Audience :: Science/Research", @@ -23,7 +23,7 @@ classifiers = [ "Typing :: Typed", ] dynamic = ["version"] -dependencies = ["xarray", "adios2", "typing-extensions"] +dependencies = ["xarray", "xarray-adios2", "typing-extensions"] [project.optional-dependencies] test = ["pytest >=6", "pytest-cov >=3"] @@ -42,9 +42,6 @@ Homepage = "https://github.com/psc-code/pscpy" Discussions = "https://github.com/psc-code/pscpy/discussions" Changelog = "https://github.com/psc-code/pscpy/releases" -[project.entry-points."xarray.backends"] -pscadios2_engine = "pscpy.pscadios2:PscAdios2BackendEntrypoint" - [build-system] build-backend = "setuptools.build_meta" requires = ["setuptools>=42", "setuptools-scm>=7"] @@ -81,7 +78,7 @@ report.exclude_also = ['\.\.\.', 'if typing.TYPE_CHECKING:'] [tool.mypy] files = ["src", "tests"] -python_version = "3.8" +python_version = "3.10" warn_unused_configs = true strict = true enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] @@ -147,3 +144,6 @@ messages_control.disable = [ "missing-function-docstring", "wrong-import-position", ] + +[tool.uv] +reinstall-package = ["pscpy"] diff --git a/src/pscpy/__init__.py b/src/pscpy/__init__.py index cf141e9..684402b 100644 --- a/src/pscpy/__init__.py +++ b/src/pscpy/__init__.py @@ -8,11 +8,14 @@ import pathlib -from pscpy import pscadios2 # noqa: F401 - from ._version import version as __version__ +from .psc import decode_psc sample_dir = pathlib.Path(__file__).parent / "sample" -__all__ = ["__version__"] +__all__ = [ + "__version__", + "decode_psc", + "sample_dir", +] diff --git a/src/pscpy/_core.pyi b/src/pscpy/_core.pyi deleted file mode 100644 index 537c611..0000000 --- a/src/pscpy/_core.pyi +++ /dev/null @@ -1,4 +0,0 @@ -from __future__ import annotations - -def add(_x: int, _y: int) -> int: ... -def subtract(_x: int, _y: int) -> int: ... diff --git a/src/pscpy/adios2py/__init__.py b/src/pscpy/adios2py/__init__.py deleted file mode 100644 index 1096959..0000000 --- a/src/pscpy/adios2py/__init__.py +++ /dev/null @@ -1,196 +0,0 @@ -from __future__ import annotations - -import itertools -import logging -import os -from collections.abc import Collection -from types import TracebackType -from typing import Any, SupportsInt - -import adios2 # type: ignore[import-untyped] -import adios2.stream # type: ignore[import-untyped] -import numpy as np -from adios2.adios import Adios # type: ignore[import-untyped] -from numpy.typing import NDArray -from typing_extensions import TypeGuard - -logger = logging.getLogger(__name__) - - -class Variable: - """Wrapper for an `adios2.Variable` object to facilitate loading and indexing into it.""" - - def __init__(self, var: adios2.Variable, engine: adios2.Engine) -> None: - self._var = var - self._engine = engine - self.name = self._name() - self.shape = self._shape() - self.dtype = self._dtype() - logger.debug("variable __init__ var %s engine %s", var, engine) - - def close(self) -> None: - logger.debug("adios2py.variable close") - self._var = None - self._engine = None - - def _assert_not_closed(self) -> None: - if not self._var: - error_message = "adios2py: variable is closed" - raise ValueError(error_message) - - def _set_selection( - self, start: NDArray[np.integer[Any]], count: NDArray[np.integer[Any]] - ) -> None: - self._assert_not_closed() - - self._var.set_selection((start[::-1], count[::-1])) - - def _shape(self) -> tuple[int, ...]: - self._assert_not_closed() - - return tuple(self._var.shape())[::-1] - - def _name(self) -> str: - self._assert_not_closed() - - return self._var.name() # type: ignore[no-any-return] - - def _dtype(self) -> np.dtype[Any]: - self._assert_not_closed() - - return np.dtype(adios2.type_adios_to_numpy(self._var.type())) # type: ignore[no-any-return] - - def __getitem__( - self, args: SupportsInt | slice | tuple[SupportsInt | slice, ...] - ) -> NDArray[Any]: - self._assert_not_closed() - - if not isinstance(args, tuple): - args = (args,) - - shape = self.shape - sel_start = np.zeros_like(shape) - sel_count = np.zeros_like(shape) - arr_shape = [] - - for d, arg in enumerate(args): - if isinstance(arg, slice): - start, stop, step = arg.indices(shape[d]) - assert stop > start - assert step == 1 - sel_start[d] = start - sel_count[d] = stop - start - arr_shape.append(sel_count[d]) - continue - - try: - idx = int(arg) - except ValueError: - pass - else: - if idx < 0: - idx += shape[d] - sel_start[d] = idx - sel_count[d] = 1 - continue - - error_message = f"invalid args to __getitem__: {args}" - raise RuntimeError(error_message) - - for d in range(len(args), len(shape)): - sel_start[d] = 0 - sel_count[d] = shape[d] - arr_shape.append(sel_count[d]) - - self._set_selection(sel_start, sel_count) - - arr = np.empty( - arr_shape, dtype=self.dtype, order="F" - ) # FIXME is column-major correct? - self._engine.get(self._var, arr, adios2.bindings.Mode.Sync) - return arr - - def __repr__(self) -> str: - return f"{self.__class__.__module__}.{self.__class__.__name__}(name={self.name}, shape={self.shape}, dtype={self.dtype}" - - -class FileState: - """Collects the state of a `File` to reflect the fact that they are coupled.""" - - _ad = Adios() - _io_count = itertools.count() - - def __init__(self, filename: str | os.PathLike[Any]) -> None: - self.io_name = f"io-adios2py-{next(self._io_count)}" - logger.debug("io_name = %s", self.io_name) - self.io = self._ad.declare_io(self.io_name) - self.engine = self.io.open(str(filename), adios2.bindings.Mode.Read) - - def close(self) -> None: - self.engine.close() - self._ad.remove_io(self.io_name) - - @staticmethod - def is_open(maybe_state: FileState | None) -> TypeGuard[FileState]: - return maybe_state is not None - - -class File: - """Wrapper for an `adios2.IO` object to facilitate variable and attribute reading.""" - - _state: FileState | None - - def __init__(self, filename: str | os.PathLike[Any], mode: str = "r") -> None: - logger.debug("File.__init__(%s, %s)", filename, mode) - assert mode == "r" - self._state = FileState(filename) - self._open_vars: dict[str, Variable] = {} - - self.variable_names: Collection[str] = ( - self._state.io.available_variables().keys() - ) - self.attribute_names: Collection[str] = ( - self._state.io.available_attributes().keys() - ) - - def __enter__(self) -> File: - logger.debug("File.__enter__()") - return self - - def __exit__( - self, - exception_type: type[BaseException] | None, - exception: BaseException | None, - traceback: TracebackType | None, - ) -> None: - logger.debug("File.__exit__()") - self.close() - - def __del__(self) -> None: - logger.debug("File.__del__()") - if FileState.is_open(self._state): - self.close() - - def close(self) -> None: - assert FileState.is_open(self._state) - - logger.debug("File.close(): open vars %s", self._open_vars) - for var in self._open_vars.values(): - var.close() - - self._state.close() - self._state = None - - def get_variable(self, variable_name: str) -> Variable: - assert FileState.is_open(self._state) - - var = Variable( - self._state.io.inquire_variable(variable_name), self._state.engine - ) - self._open_vars[variable_name] = var - return var - - def get_attribute(self, attribute_name: str) -> Any: - assert FileState.is_open(self._state) - - return self._state.io.inquire_attribute(attribute_name).data() diff --git a/src/pscpy/psc.py b/src/pscpy/psc.py index 8816784..36dd00d 100644 --- a/src/pscpy/psc.py +++ b/src/pscpy/psc.py @@ -1,23 +1,12 @@ from __future__ import annotations -from typing import Any, Iterable +from collections.abc import Iterable +from typing import Any import numpy as np +import xarray as xr from numpy.typing import ArrayLike, NDArray -from .adios2py import File - - -def _get_array_attribute( - file: File, attribute_name: str, default: ArrayLike | None -) -> NDArray[np.floating[Any]]: - if attribute_name in file.attribute_names: - return np.asarray(file.get_attribute(attribute_name)) - if default is not None: - return np.asarray(default) - error_messsage = f"Missing attribute '{attribute_name}' with no default specified." - raise KeyError(error_messsage) - class RunInfo: """Global information about the PSC run @@ -28,23 +17,22 @@ class RunInfo: def __init__( self, - file: File, + ds: xr.Dataset, length: ArrayLike | None = None, corner: ArrayLike | None = None, ) -> None: - assert len(file.variable_names) > 0 - var = next(iter(file.variable_names)) - self.gdims = np.asarray(file.get_variable(var).shape)[0:3] + first_var = ds[next(iter(ds))] + self.gdims = np.asarray(first_var.shape)[::-1][:3] - self.length = _get_array_attribute(file, "length", length) - self.corner = _get_array_attribute(file, "corner", corner) + self.length = ds.attrs.get("length", length) + self.corner = ds.attrs.get("corner", corner) self.x = self._get_coord(0) self.y = self._get_coord(1) self.z = self._get_coord(2) - def _get_coord(self, coord_idx: int) -> NDArray[np.floating[Any]]: - return np.linspace( + def _get_coord(self, coord_idx: int) -> NDArray[Any]: + return np.linspace( # type: ignore[no-any-return] start=self.corner[coord_idx], stop=self.corner[coord_idx] + self.length[coord_idx], num=self.gdims[coord_idx], @@ -101,3 +89,44 @@ def get_field_to_component(species_names: Iterable[str]) -> dict[str, dict[str, ) return field_to_component + + +def decode_psc( + ds: xr.Dataset, + species_names: Iterable[str], + length: ArrayLike | None = None, + corner: ArrayLike | None = None, +) -> xr.Dataset: + da = ds[next(iter(ds))] # first dataset + if da.dims[0] == "dim_0_1": + # for compatibility, if dimensions weren't saved as attribute in the .bp file, + # fix them up here + ds = ds.rename_dims( + { + da.dims[0]: "step", + da.dims[1]: f"comp_{da.name}", + da.dims[2]: "z", + da.dims[3]: "y", + da.dims[4]: "x", + } + ) + ds = ds.squeeze("step") + field_to_component = get_field_to_component(species_names) + + data_vars = {} + for var_name in ds: + if var_name in field_to_component: + for field, component in field_to_component[var_name].items(): # type: ignore[index] + data_vars[field] = ds[var_name].isel({f"comp_{var_name}": component}) + ds = ds.assign(data_vars) + + if length is not None: + run_info = RunInfo(ds, length=length, corner=corner) + coords = { + "x": ("x", run_info.x), + "y": ("y", run_info.y), + "z": ("z", run_info.z), + } + ds = ds.assign_coords(coords) + + return ds diff --git a/src/pscpy/pscadios2.py b/src/pscpy/pscadios2.py deleted file mode 100644 index aec99d8..0000000 --- a/src/pscpy/pscadios2.py +++ /dev/null @@ -1,230 +0,0 @@ -from __future__ import annotations - -import os -import pathlib -from typing import Any, Iterable, Protocol, SupportsInt - -import numpy as np -import xarray -from numpy.typing import ArrayLike, NDArray -from typing_extensions import Never, override -from xarray.backends import CachingFileManager -from xarray.backends.common import ( - AbstractDataStore, - BackendArray, - BackendEntrypoint, - _normalize_path, -) -from xarray.backends.locks import ( - SerializableLock, - combine_locks, - ensure_lock, - get_write_lock, -) -from xarray.core import indexing -from xarray.core.datatree import DataTree -from xarray.core.types import ReadBuffer -from xarray.core.utils import Frozen, FrozenDict - -from .adios2py import File, Variable -from .psc import RunInfo, get_field_to_component - - -class Lock(Protocol): - """Provides duck typing for xarray locks, which do not inherit from a common base class.""" - - def acquire(self, blocking: bool = True) -> bool: ... - def release(self) -> None: ... - def __enter__(self) -> None: ... - def __exit__(self, *args: Any) -> None: ... - def locked(self) -> bool: ... - - -# adios2 is not thread safe -ADIOS2_LOCK = SerializableLock() - - -class PscAdios2Array(BackendArray): - """Lazy evaluation of a variable stored in PSC's adios2 field output. - - This also takes care of slicing out the specific component of the data stored as 4-d array. - """ - - def __init__( - self, - variable_name: str, - datastore: PscAdios2Store, - orig_varname: str, - component: int, - ) -> None: - self.variable_name = variable_name - self.datastore = datastore - self._orig_varname = orig_varname - self._component = component - array = self.get_array() - self.shape = array.shape[:-1] - self.dtype = array.dtype - - def get_array(self, needs_lock: bool = True) -> Variable: - return self.datastore.acquire(needs_lock).get_variable(self._orig_varname) - - def __getitem__(self, key: indexing.ExplicitIndexer) -> Any: - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.BASIC, self._getitem - ) - - def _getitem( - self, args: tuple[SupportsInt | slice, ...] - ) -> NDArray[np.floating[Any]]: - with self.datastore.lock: - return self.get_array(needs_lock=False)[ - (*args, self._component) - ] # FIXME add ... in between - - -class PscAdios2Store(AbstractDataStore): - """DataStore to facilitate loading an Adios2 file.""" - - def __init__( - self, - manager: CachingFileManager, - species_names: Iterable[str], - mode: str | None = None, - lock: Lock = ADIOS2_LOCK, - length: ArrayLike | None = None, - corner: ArrayLike | None = None, - ) -> None: - self._manager = manager - self._mode = mode - self.lock: Lock = ensure_lock(lock) # type: ignore[no-untyped-call] - self.psc = RunInfo(self.ds, length=length, corner=corner) - self._species_names = species_names - - @staticmethod - def open( - filename: str, - species_names: Iterable[str], - mode: str = "r", - lock: Lock | None = None, - length: ArrayLike | None = None, - corner: ArrayLike | None = None, - ) -> PscAdios2Store: - if lock is None: - if mode == "r": - lock = ADIOS2_LOCK - else: - lock = combine_locks([ADIOS2_LOCK, get_write_lock(filename)]) # type: ignore[no-untyped-call] - - manager = CachingFileManager(File, filename, mode=mode) - return PscAdios2Store( - manager, species_names, mode=mode, lock=lock, length=length, corner=corner - ) - - def acquire(self, needs_lock: bool = True) -> File: - with self._manager.acquire_context(needs_lock) as root: - ds = root - assert isinstance(ds, File) - return ds - - @property - def ds(self) -> File: - return self.acquire() - - @override - def get_variables(self) -> Frozen[str, xarray.DataArray]: - field_to_component = get_field_to_component(self._species_names) - - variables: dict[str, tuple[str, int]] = {} - for orig_varname in self.ds.variable_names: - for field, component in field_to_component[orig_varname].items(): - variables[field] = (orig_varname, component) - - return FrozenDict( - (field, self.open_store_variable(field, *tup)) - for field, tup in variables.items() - ) - - def open_store_variable( - self, field: str, orig_varname: str, component: int - ) -> xarray.DataArray: - data = indexing.LazilyIndexedArray( - PscAdios2Array(field, self, orig_varname, component) - ) - dims = ["x", "y", "z"] - coords = {"x": self.psc.x, "y": self.psc.y, "z": self.psc.z} - return xarray.DataArray(data, dims=dims, coords=coords) - - @override - def get_attrs(self) -> Frozen[str, Any]: - return FrozenDict( - (name, self.ds.get_attribute(name)) for name in self.ds.attribute_names - ) - - @override - def get_dimensions(self) -> Never: - raise NotImplementedError() - - -def psc_open_dataset( - filename_or_obj: Any, - species_names: Iterable[str], - length: ArrayLike | None = None, - corner: ArrayLike | None = None, -) -> xarray.Dataset: - filename = _normalize_path(filename_or_obj) - store = PscAdios2Store.open(filename, species_names, length=length, corner=corner) - - data_vars, attrs = store.load() # type: ignore[no-untyped-call] - ds = xarray.Dataset(data_vars=data_vars, attrs=attrs) - ds.set_close(store.close) - return ds - - -class PscAdios2BackendEntrypoint(BackendEntrypoint): - """Entrypoint that lets xarray recognize and read (PSC's) Adios2 output.""" - - open_dataset_parameters = ("filename_or_obj", "drop_variables") - available = True - - @override - def open_dataset( - self, - filename_or_obj: str | os.PathLike[Any] | ReadBuffer[Any] | AbstractDataStore, - *, - drop_variables: str | Iterable[str] | None = None, - length: ArrayLike | None = None, - corner: ArrayLike | None = None, - species_names: Iterable[str] - | None = None, # e.g. ['e', 'i']; FIXME should be readable from file - ) -> xarray.Dataset: - if not isinstance(filename_or_obj, (str, os.PathLike)): - raise NotImplementedError() - - if species_names is None: - error_message = f"Missing required keyword argument: '{species_names=}'" - raise ValueError(error_message) - - return psc_open_dataset( - filename_or_obj, - species_names, - length=length, - corner=corner, - ) - - @override - def guess_can_open( - self, - filename_or_obj: str | os.PathLike[Any] | ReadBuffer[Any] | AbstractDataStore, - ) -> bool: - if isinstance(filename_or_obj, (str, os.PathLike)): - ext = pathlib.Path(filename_or_obj).suffix - return ext in {".bp"} - return False - - @override - def open_datatree( - self, - filename_or_obj: str | os.PathLike[Any] | ReadBuffer[Any] | AbstractDataStore, - **kwargs: Any, - ) -> DataTree: - raise NotImplementedError() diff --git a/src/pscpy/sample/pfd_moments.000000400.bp/data.0 b/src/pscpy/sample/pfd_moments.000000400.bp/data.0 new file mode 100644 index 0000000..f2a2fbf Binary files /dev/null and b/src/pscpy/sample/pfd_moments.000000400.bp/data.0 differ diff --git a/src/pscpy/sample/pfd_moments.000000400.bp/md.0 b/src/pscpy/sample/pfd_moments.000000400.bp/md.0 new file mode 100644 index 0000000..476dce9 Binary files /dev/null and b/src/pscpy/sample/pfd_moments.000000400.bp/md.0 differ diff --git a/src/pscpy/sample/pfd_moments.000000400.bp/md.idx b/src/pscpy/sample/pfd_moments.000000400.bp/md.idx new file mode 100644 index 0000000..ef89707 Binary files /dev/null and b/src/pscpy/sample/pfd_moments.000000400.bp/md.idx differ diff --git a/src/pscpy/sample/pfd_moments.000000400.bp/profiling.json b/src/pscpy/sample/pfd_moments.000000400.bp/profiling.json new file mode 100644 index 0000000..c906f04 --- /dev/null +++ b/src/pscpy/sample/pfd_moments.000000400.bp/profiling.json @@ -0,0 +1,6 @@ +[ +{ "rank": 0, "start": "Tue_Sep_27_10:16:06_2022", "threads": 1, "bytes": 1704384, "mkdir_mus": 159, "memcpy_mus": 1709727, "aggregation_mus": 0, "meta_sort_merge_mus": 485, "minmax_mus": 667801, "buffering_mus": 2425146, "transport_0": { "type": "File_POSIX", "close_mus": 529, "write_mus": 2476, "open_mus": 152 },"transport_1": { "type": "File_POSIX", "close_mus": 0, "write_mus": 73, "open_mus": 185 } }, +{ "rank": 1, "start": "Tue_Sep_27_10:16:06_2022", "threads": 1, "bytes": 1704317, "mkdir_mus": 154, "memcpy_mus": 1729125, "aggregation_mus": 0, "meta_sort_merge_mus": 720, "minmax_mus": 566461, "buffering_mus": 2343236, }, +{ "rank": 2, "start": "Tue_Sep_27_10:16:06_2022", "threads": 1, "bytes": 1704317, "mkdir_mus": 167, "memcpy_mus": 1693762, "aggregation_mus": 0, "meta_sort_merge_mus": 679, "minmax_mus": 686806, "buffering_mus": 2426737, }, +{ "rank": 3, "start": "Tue_Sep_27_10:16:06_2022", "threads": 1, "bytes": 1704317, "mkdir_mus": 169, "memcpy_mus": 1735177, "aggregation_mus": 0, "meta_sort_merge_mus": 162, "minmax_mus": 695790, "buffering_mus": 2477657, } +] diff --git a/tests/test_Adios2Store.py b/tests/test_Adios2Store.py new file mode 100644 index 0000000..a317de1 --- /dev/null +++ b/tests/test_Adios2Store.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +import adios2py +import numpy as np +import pytest +import xarray as xr +from xarray_adios2 import Adios2Store + +import pscpy + + +# FIXME, duplicated +@pytest.fixture +def test_filename(tmp_path): + filename = tmp_path / "test_file.bp" + with adios2py.File(filename, mode="w") as file: + for n, step in zip(range(5), file.steps, strict=False): + step["scalar"] = n + step["scalar"].attrs["dimensions"] = "steps" + step["arr1d"] = np.arange(10) + step["arr1d"].attrs["dimensions"] = "steps x" + + return filename + + +@pytest.fixture +def test_store(test_filename): + return Adios2Store.open(test_filename, mode="r") + + +def test_open_close(test_store): + test_store.close() + + +def test_open_with_parameters(test_store): + filename = test_store.ds.filename + test_store.close() + + params = {"OpenTimeoutSecs": "20"} + with Adios2Store.open(filename, parameters=params) as store: + assert store.ds.parameters == params + + +def test_open_with_engine(): + with Adios2Store.open( + str(pscpy.sample_dir / "pfd.000000400.bp"), engine_type="BP4" + ) as store: + assert store.ds.engine_type == "BP4" + + +def test_vars_attrs(test_store): + vars, attrs = test_store.load() + assert vars == {} + assert attrs == {} + + for step in test_store.ds: + vars, attrs = step.load() + assert vars.keys() == set({"scalar", "arr1d"}) + assert attrs == {} + + +def test_rra(test_filename): + with Adios2Store.open(test_filename, mode="rra") as store: + vars, _ = store.load() + assert np.all(vars["scalar"] == np.arange(5)) + + +def test_dump_to_store(tmp_path): + ds = xr.Dataset( + data_vars={"var": xr.DataArray(np.arange(10) * 10.0, dims=("x",))}, + coords={"x": np.arange(10)}, + ) + + filename = tmp_path / "test_store1.bp" + with adios2py.File(filename, "w") as file: + store = Adios2Store(file, mode="w") + ds.dump_to_store(store) + + with adios2py.File(filename, "rra") as file: + ds_read = xr.open_dataset(Adios2Store(file.steps[0])) + assert ds == ds_read + assert ds.coords.keys() == ds_read.coords.keys() diff --git a/tests/test_adios2py.py b/tests/test_adios2py.py index b48fa2b..905c4d2 100644 --- a/tests/test_adios2py.py +++ b/tests/test_adios2py.py @@ -1,38 +1,377 @@ from __future__ import annotations +import adios2 +import adios2py import numpy as np +import pytest import pscpy -from pscpy import adios2py -def test_open_close(): - file = adios2py.File(pscpy.sample_dir / "pfd.000000400.bp") - file.close() +@pytest.fixture +def pfd_file(): + return adios2py.File(pscpy.sample_dir / "pfd.000000400.bp", mode="rra") -def test_with(): - with adios2py.File(pscpy.sample_dir / "pfd.000000400.bp"): +@pytest.fixture +def test_filename(tmp_path): + filename = tmp_path / "test_file.bp" + with adios2.Stream(str(filename), mode="w") as file: + for step, _ in enumerate(file.steps(5)): + file.write("scalar", step) + arr1d = np.arange(10) + file.write("arr1d", arr1d, arr1d.shape, [0], arr1d.shape) + arr2d = (np.arange(12) + step).reshape(3, 4) + file.write("arr2d", arr2d, arr2d.shape, [0, 0], arr2d.shape) + + return filename + + +@pytest.fixture +def test_file(test_filename): + return adios2py.File(test_filename, mode="r") + + +def test_open_close(pfd_file): + assert pfd_file # is open + pfd_file.close() + assert not pfd_file # is closed + + +def test_open_twice(): + file1 = adios2py.File(pscpy.sample_dir / "pfd.000000400.bp") # noqa: F841 + file2 = adios2py.File(pscpy.sample_dir / "pfd.000000400.bp") # noqa: F841 + + +def test_open_with_parameters(): + params = {"OpenTimeoutSecs": "15"} + with adios2py.File( + pscpy.sample_dir / "pfd.000000400.bp", parameters=params + ) as file: + assert file.parameters == params + + +def test_open_with_engine(): + with adios2py.File( + pscpy.sample_dir / "pfd.000000400.bp", engine_type="BP4" + ) as file: + assert file.engine_type == "BP4" + + +def test_with(pfd_file): + with pfd_file: pass -def test_variable_names(): - with adios2py.File(pscpy.sample_dir / "pfd.000000400.bp") as file: - assert file.variable_names == set({"jeh"}) - assert file.attribute_names == set({"ib", "im", "step", "time"}) +def test_file_repr(pfd_file): + assert repr(pfd_file) + + +def test_keys(pfd_file): + assert pfd_file.keys() == set({"jeh"}) + + +def test_attrs_keys(pfd_file): + assert pfd_file.attrs.keys() == set({"ib", "im", "step", "time"}) + + +def test_attrs_contains(pfd_file): + assert "ib" in pfd_file.attrs + assert "ix" not in pfd_file.attrs + + +def test_attrs_iter(pfd_file): + assert set(pfd_file.attrs) == set({"ib", "im", "step", "time"}) + + +def test_get_variable(pfd_file): + var = pfd_file["jeh"] + assert var.name == "jeh" + assert var.shape == (1, 9, 512, 128, 1) + assert var.dtype == np.float32 + +def test_get_variable_not_found(pfd_file): + with pytest.raises(KeyError): + pfd_file["xyz"] -def test_get_variable(): - with adios2py.File(pscpy.sample_dir / "pfd.000000400.bp") as file: - var = file.get_variable("jeh") + +def test_variable_bool(pfd_file): + with pfd_file: + var = pfd_file["jeh"] + assert var + assert var.shape == (1, 9, 512, 128, 1) + + assert not var + + +def test_variable_shape(pfd_file): + with pfd_file: + var = pfd_file["jeh"] + assert var.shape == (1, 9, 512, 128, 1) + + +def test_variable_name(pfd_file): + with pfd_file: + var = pfd_file["jeh"] assert var.name == "jeh" - assert var.shape == (1, 128, 512, 9) + + +def test_variable_dtype(pfd_file): + with pfd_file: + var = pfd_file["jeh"] assert var.dtype == np.float32 -def test_get_attribute(): - with adios2py.File(pscpy.sample_dir / "pfd.000000400.bp") as file: - assert all(file.get_attribute("ib") == (0, 0, 0)) - assert all(file.get_attribute("im") == (1, 128, 128)) - assert np.isclose(file.get_attribute("time"), 109.38) - assert file.get_attribute("step") == 400 +def test_variable_repr(pfd_file): + with pfd_file: + var = pfd_file["jeh"] + assert "name=jeh" in repr(var) + + +@pytest.mark.skip +def test_variable_is_reverse_dims(pfd_file): + var = pfd_file["jeh"] + assert not var._is_reverse_dims() + + # with adios2py.File( + # "/workspaces/openggcm/ggcm-gitm-coupling-tools/data/iono_to_sigmas.bp" + # ) as file: + # var = file["pot"] + # assert var.is_reverse_dims + + +def test_variable_getitem_scalar(test_file): + for step in test_file.steps: + var = step["scalar"] + assert var[()] == step.step() + + +def test_variable_getitem_arr1d(test_file): + for step in test_file.steps: + var = step["arr1d"] + assert np.all(var[()] == np.arange(10)) + + +def test_variable_getitem_arr1d_indexing(test_file): + for step in test_file.steps: + var = step["arr1d"] + assert var[2] == 2 + assert np.all(var[2:4] == np.arange(10)[2:4]) + assert np.all(var[:] == np.arange(10)[:]) + + +@pytest.mark.xfail +def test_variable_getitem_arr1d_indexing_step(test_file): + for step in test_file.steps: + var = step["arr1d"] + assert np.all(var[::2] == np.arange(10)[::2]) + + +@pytest.mark.xfail +def test_variable_getitem_arr1d_indexing_reverse(test_file): + for step in test_file.steps: + var = step["arr1d"] + assert np.all(var[::-1] == np.arange(10)[::-1]) + + +def test_variable_array(test_file): + for step in test_file.steps: + scalar = np.asarray(step["scalar"]) + assert np.array_equal(scalar, step.step()) + arr1d = np.asarray(step["arr1d"]) + assert np.array_equal(arr1d, np.arange(10)) + + +def test_get_attribute(pfd_file): + assert all(pfd_file.attrs["ib"] == (0, 0, 0)) + assert all(pfd_file.attrs["im"] == (1, 128, 128)) + assert np.isclose(pfd_file.attrs["time"], 109.38) + assert pfd_file.attrs["step"] == 400 + + +def test_write_streaming(tmp_path): + with adios2.Stream(str(tmp_path / "test_streaming.bp"), mode="w") as file: + for step, _ in enumerate(file.steps(5)): + file.write("scalar", step) + + +def test_read_streaming_adios2(tmp_path): + test_write_streaming(tmp_path) # type: ignore[no-untyped-call] + with adios2.Stream(str(tmp_path / "test_streaming.bp"), mode="r") as file: + for n, step in enumerate(file): + scalar = step.read("scalar") + assert scalar == n + assert n == 4 + + +def test_read_streaming_adios2_step_persist(tmp_path): + test_write_streaming(tmp_path) # type: ignore[no-untyped-call] + with adios2.Stream(str(tmp_path / "test_streaming.bp"), mode="r") as file: + for n, step in enumerate(file): + if n == 1: + step1 = step + + # This may be confusing, but behaves as designed + assert step1.read("scalar") == 4 + + +@pytest.mark.parametrize("mode", ["r", "rra"]) +def test_read_adios2py(test_filename, mode): + with adios2py.File(test_filename, mode=mode) as file: + for n, step in enumerate(file.steps): + scalar = step["scalar"][()] + assert scalar == n + assert n == 4 + + +def test_read_streaming_adios2py_resume(test_file): + # do 0th iteration + for step in test_file.steps: + scalar = step["scalar"][()] + assert step.step() == 0 + assert scalar == 0 + break + + # then do the rest separately + for n, step in enumerate(test_file.steps): + scalar = step["scalar"][()] + assert step.step() == n + 1 + assert scalar == n + 1 + assert n == 3 + + +def test_read_streaming_adios2py_next(test_file): + # do 0th iteration + with test_file.steps.next() as step: + scalar = step["scalar"][()] + assert scalar == 0 + + # then do the rest separately + for n, step in enumerate(test_file.steps): + scalar = step["scalar"][()] + assert step.step() == n + 1 + assert scalar == n + 1 + assert n == 3 + + +@pytest.mark.parametrize("mode", ["rra", "r"]) +def test_read_adios2py_step_persist(test_filename, mode): + with adios2py.File(test_filename, mode=mode) as file: + for n, step in enumerate(file.steps): + if n == 1: + step1 = step + + assert step1.step() == 1 + if mode == "rra": + assert step1["scalar"][()] == 1 + else: + with pytest.raises(IndexError): + step1["scalar"][()] + + +@pytest.mark.parametrize("mode", ["rra", "r"]) +def test_read_adios2py_var_persist_r(test_filename, mode): + with adios2py.File(test_filename, mode=mode) as file: + for n, step in enumerate(file.steps): + if n == 1: + var1 = step["scalar"] + + if mode == "rra": + assert var1[()] == 1 + else: + with pytest.raises(IndexError): + var1[()] + + +@pytest.mark.parametrize("mode", ["r", "rra"]) +def test_read_adios2py_steps_len(test_filename, mode): + with adios2py.File(test_filename, mode=mode) as file: + assert len(file.steps) == 5 + + +def test_read_adios2py_steps_getitem_rra(test_filename): + with adios2py.File(test_filename, mode="rra") as file: + step = file.steps[2] + assert step["scalar"][()] == 2 + + +def test_read_adios2py_steps_getitem_r(test_filename): + with adios2py.File(test_filename, mode="r") as file: # noqa: SIM117 + with pytest.raises(IndexError): + file.steps[2] + + +def test_read_streaming_adios2py_current_step_outside_step(test_file): + assert not test_file.in_step() + + +@pytest.mark.parametrize("mode", ["r", "rra"]) +def test_write_read_mixed(tmp_path, mode): + filename = tmp_path / "mixed.bp" + with adios2.Stream(str(filename), mode="w") as file: + file.write("scalar", -1) + arr1d = np.arange(5) + file.write("arr1d", arr1d, arr1d.shape, [0], arr1d.shape) + + # these steps just get lost... + for step, _ in enumerate(file.steps(5)): + file.write("scalar", step) + arr1d = np.arange(10) + file.write("arr1d", arr1d, arr1d.shape, [0], arr1d.shape) + + with adios2.Stream(str(filename), mode=mode) as file: + assert file.num_steps() == 1 + + +@pytest.mark.parametrize("mode", ["r", "rra"]) +def test_write_read(tmp_path, mode): + filename = tmp_path / "steps.bp" + with adios2.Stream(str(filename), mode="w") as file: + for step, _ in enumerate(file.steps(5)): + file.write("scalar", step) + arr1d = np.arange(10) + file.write("arr1d", arr1d, arr1d.shape, [0], arr1d.shape) + + with adios2.Stream(str(filename), mode=mode) as file: + assert file.num_steps() == 5 + + +def test_read_steps(test_filename): + with adios2py.File(test_filename, mode="rra") as file: + assert len(file.steps) == 5 + + var_scalar = file["scalar"] + assert var_scalar.shape == (5,) + assert np.all(var_scalar == np.arange(5)) + + var_arr1d = file["arr1d"] + assert var_arr1d.shape == (5, 10) + assert np.all(var_arr1d == np.arange(10)) + + var_arr2d = file["arr2d"] + assert var_arr2d.shape == (5, 3, 4) + + for n in range(len(file.steps)): + ref2d = (np.arange(12) + n).reshape(3, 4) + assert np.all(var_arr2d[n] == ref2d) + + +# def test_read_steps_r(): +# with adios2py.File( +# "../ggcm-gitm-coupling-tools/data/coupling0001/ref/iono_to_sigmas.bp", mode="r" +# ) as file: +# for n, step in enumerate(file.steps): +# var = step["pot"] +# assert var.shape == (61, 181) +# assert np.any(var[:]) + + +# def test_single_value(): +# with adios2py.File( +# "/workspaces/openggcm/ggcm-gitm-coupling-tools/data/iono_to_sigmas.bp" +# ) as file: +# assert "dacttime" in file.variable_names +# var = file["dacttime"] +# val = var[()] +# assert np.isclose(val, 1.4897556e09) diff --git a/tests/test_xarray_adios2.py b/tests/test_xarray_adios2.py index 36cba8e..74f0513 100644 --- a/tests/test_xarray_adios2.py +++ b/tests/test_xarray_adios2.py @@ -1,19 +1,179 @@ from __future__ import annotations +import os +from typing import Any + +import adios2py import numpy as np +import pytest import xarray as xr +from xarray_adios2 import Adios2Store import pscpy -def test_open_dataset(): - ds = xr.open_dataset( - pscpy.sample_dir / "pfd.000000400.bp", - species_names=[], +@pytest.fixture +def test_filename(tmp_path): + filename = tmp_path / "test_file.bp" + with adios2py.File(filename, mode="w") as file: + file.attrs["step_dimension"] = "step" + for n, step in zip(range(5), file.steps, strict=False): + step["scalar"] = n + step["arr1d"] = np.arange(10) + step["arr1d"].attrs["dimensions"] = "x" + + return filename + + +@pytest.fixture +def test_filename_2(tmp_path): + filename = tmp_path / "test_file_2.bp" + with adios2py.File(filename, mode="w") as file: + file.attrs["step_dimension"] = "time" + for n, step in zip(range(5), file.steps, strict=False): + step["step"] = n + step["time"] = 10.0 * n + + step["x"] = np.linspace(0, 1, 10) + step["x"].attrs["dimensions"] = "x" + + step["arr1d"] = np.arange(10) + step["arr1d"].attrs["dimensions"] = "x" + + return filename + + +@pytest.fixture +def test_filename_3(tmp_path): + filename = tmp_path / "test_file_3.bp" + with adios2py.File(filename, mode="w") as file: + file.attrs["step_dimension"] = "time" + for n, step in zip(range(5), file.steps, strict=False): + step["step"] = n + # step["step"].attrs["dimensions"] = "step" + + step["time"] = 100.0 + 10 * n + step["time"].attrs["units"] = "second since 2020-01-01" + + return filename + + +@pytest.fixture +def test_filename_4(tmp_path): + filename = tmp_path / "test_file_4.bp" + with adios2py.File(filename, mode="w") as file: + file.attrs["step_dimension"] = "time" + for n, step in zip(range(5), file.steps, strict=False): + step["time"] = n + step["time"].attrs["units"] = "seconds since 1970-01-01" + + return filename + + +def _open_dataset(filename: os.PathLike[Any]) -> xr.Dataset: + ds = xr.open_dataset(filename) + return pscpy.decode_psc( + ds, + species_names=["e", "i"], length=[1, 12.8, 51.2], corner=[0, -6.4, -25.6], ) + + +def test_open_dataset(): + ds = _open_dataset(pscpy.sample_dir / "pfd.000000400.bp") assert "jx_ec" in ds assert ds.coords.keys() == set({"x", "y", "z"}) - assert ds.jx_ec.shape == (1, 128, 512) + assert ds.jx_ec.sizes == dict(x=1, y=128, z=512) # noqa: C408 assert np.allclose(ds.jx_ec.z, np.linspace(-25.6, 25.6, 512, endpoint=False)) + + +def test_component(): + ds = _open_dataset(pscpy.sample_dir / "pfd.000000400.bp") + assert ds.jeh.sizes == dict(x=1, y=128, z=512, comp_jeh=9) # noqa: C408 + assert np.all(ds.jeh.isel(comp_jeh=0) == ds.jx_ec) + + +def test_selection(): + ds = _open_dataset(pscpy.sample_dir / "pfd.000000400.bp") + assert ds.jeh.sizes == dict(x=1, y=128, z=512, comp_jeh=9) # noqa: C408 + assert np.all( + ds.jeh.isel(comp_jeh=0, y=slice(0, 10), z=slice(0, 40)) + == ds.jx_ec.isel(y=slice(0, 10), z=slice(0, 40)) + ) + + +def test_computed(): + ds = _open_dataset(pscpy.sample_dir / "pfd.000000400.bp") + ds = ds.assign(jx=ds.jeh.isel(comp_jeh=0)) + assert np.all(ds.jx == ds.jx_ec) + + +def test_computed_via_lambda(): + ds = _open_dataset(pscpy.sample_dir / "pfd.000000400.bp") + ds = ds.assign(jx=lambda ds: ds.jeh.isel(comp_jeh=0)) + assert np.all(ds.jx == ds.jx_ec) + + +def test_pfd_moments(): + ds = _open_dataset(pscpy.sample_dir / "pfd_moments.000000400.bp") + assert "all_1st" in ds + assert ds.all_1st.sizes == dict(x=1, y=128, z=512, comp_all_1st=26) # noqa: C408 + assert "rho_i" in ds + assert np.all(ds.rho_i == ds.all_1st.isel(comp_all_1st=13)) + + +def test_open_dataset_steps(test_filename): + ds = xr.open_dataset(test_filename) + assert ds.keys() == set({"scalar", "arr1d"}) + + +@pytest.mark.parametrize("mode", ["r", "rra"]) +def test_open_dataset_steps_from_Step(test_filename, mode): + with adios2py.File(test_filename, mode) as file: + for n, step in enumerate(file.steps): + store = Adios2Store(step) + ds = xr.open_dataset(store) + assert ds.keys() == set({"scalar", "arr1d"}) + assert ds["scalar"] == n + + +def test_open_dataset_2(test_filename_2): + ds = xr.open_dataset(test_filename_2) + assert ds.keys() == set({"step", "arr1d"}) + assert ds.step.shape == (5,) + assert ds.arr1d.shape == (5, 10) + assert ds.coords.keys() == set({"time", "x"}) + assert ds.time.shape == (5,) + + +@pytest.mark.parametrize("mode", ["r", "rra"]) +def test_open_dataset_2_step(test_filename_2, mode): + with adios2py.File(test_filename_2, mode=mode) as file: + for _, step in enumerate(file.steps): + ds = xr.open_dataset(Adios2Store(step)) + assert ds.keys() == set({"step", "time", "arr1d"}) + assert ds.coords.keys() == set({"x"}) + + +def test_open_dataset_3(test_filename_3): + ds = xr.open_dataset(test_filename_3) + assert ds.time.shape == (5,) + assert ds.time[0] == np.datetime64("2020-01-01T00:01:40") + assert ds.time[1] == np.datetime64("2020-01-01T00:01:50") + + +@pytest.mark.parametrize("mode", ["r", "rra"]) +def test_open_dataset_3_step(test_filename_3, mode): + with adios2py.File(test_filename_3, mode=mode) as file: + for n, step in enumerate(file.steps): + ds = xr.open_dataset(Adios2Store(step)) + assert ds.time == np.datetime64("2020-01-01T00:01:40") + np.timedelta64( + 10 * n, "s" + ) + + +def test_open_dataset_4(test_filename_4): + ds = xr.open_dataset(test_filename_4) + assert ds.time[0] == np.datetime64("1970-01-01T00:00:00.000") + assert ds.time[1] == np.datetime64("1970-01-01T00:00:01.000")