From c5150c9de20af6d73a36c75b5f92042bf4f2b757 Mon Sep 17 00:00:00 2001 From: Talley Lambert Date: Thu, 26 Feb 2026 15:17:05 -0500 Subject: [PATCH 1/2] tests: add scratch to available backends --- tests/_utils.py | 61 ++++++++++++++++------------ tests/conftest.py | 2 +- tests/test_acquire_zarr_streaming.py | 2 +- tests/test_examples.py | 3 +- tests/test_integration.py | 27 ++++++++++-- tests/test_schema.py | 4 +- 6 files changed, 65 insertions(+), 34 deletions(-) diff --git a/tests/_utils.py b/tests/_utils.py index a04b6c2e..9ab2ab94 100644 --- a/tests/_utils.py +++ b/tests/_utils.py @@ -2,55 +2,64 @@ from __future__ import annotations +import json import time from contextlib import suppress -from typing import TYPE_CHECKING, Any +from pathlib import Path +from typing import Any -if TYPE_CHECKING: - from pathlib import Path +import numpy as np - import numpy as np +def read_array_data(root: Path | str, position_index: int = 0) -> np.ndarray: + """Read array data from a Zarr, TIFF, or scratch output root. -def read_array_data(path: Path | str) -> np.ndarray: - """Read array data from either Zarr or TIFF file. + Handles format-specific position/resolution path logic internally. Parameters ---------- - path : Path | str - Path to either a Zarr array directory or TIFF file. - - Returns - ------- - np.ndarray - The array data. + root : Path | str + Root output path (e.g. the OME-Zarr store, TIFF file, or scratch dir). + position_index : int + Position index to read (default 0). """ - # path = Path(path) - # Detect format by checking if it's a directory (Zarr) or file (TIFF) - if str(path).endswith((".tif", ".tiff")): - # TIFF format - import numpy as np - import tifffile + root = Path(root) - return np.asarray(tifffile.imread(path)) + # TIFF format + if str(root).endswith((".tif", ".tiff")): + import tifffile - # Zarr format - try tensorstore first, fall back to zarr + return np.asarray(tifffile.imread(root)) + + # Scratch format - has manifest.json with position shapes + manifest = root / "manifest.json" + if manifest.exists(): + meta = json.loads(manifest.read_text()) + shape = tuple(meta["position_shapes"][position_index]) + dtype = np.dtype(meta["dtype"]) + arr = np.memmap( + root / f"pos_{position_index}.dat", dtype=dtype, mode="r", shape=shape + ) + return np.array(arr) + + # Zarr format - resolve to resolution level 0 array + array_path = root / "0" try: import tensorstore as ts ts_array = ts.open( - {"driver": "zarr3", "kvstore": {"driver": "file", "path": str(path)}}, + { + "driver": "zarr3", + "kvstore": {"driver": "file", "path": str(array_path)}, + }, open=True, ).result() - import numpy as np - return np.asarray(ts_array.read().result()) except ImportError: - import numpy as np import zarr - return np.asarray(zarr.open_array(path)) + return np.asarray(zarr.open_array(array_path)) def wait_for_frames( diff --git a/tests/conftest.py b/tests/conftest.py index 0f38cb33..82616455 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,7 +10,7 @@ ZARR_BACKENDS.append(name) elif meta.format.endswith("tiff"): TIFF_BACKENDS.append(name) -AVAILABLE_BACKENDS = ZARR_BACKENDS + TIFF_BACKENDS +AVAILABLE_BACKENDS = ZARR_BACKENDS + TIFF_BACKENDS + INTERNAL_BACKENDS @pytest.fixture(params=AVAILABLE_BACKENDS) diff --git a/tests/test_acquire_zarr_streaming.py b/tests/test_acquire_zarr_streaming.py index 7a0cb3c8..361ee246 100644 --- a/tests/test_acquire_zarr_streaming.py +++ b/tests/test_acquire_zarr_streaming.py @@ -43,7 +43,7 @@ def test_acquire_zarr_full_streaming_support(tmp_path: Path) -> None: for bit in append_bits: stream.append(bit) - output_data = read_array_data(f"{settings.output_path}/0") + output_data = read_array_data(settings.output_path) assert output_data.shape == (18, 128, 128) assert output_data.dtype == np.dtype(settings.dtype) assert np.array_equal(output_data.flatten(), flat_data) diff --git a/tests/test_examples.py b/tests/test_examples.py index 00ef6ba8..8ce7457a 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -40,5 +40,6 @@ def test_example_runs(example_path: Path, tmp_path: Path, any_backend: str) -> N raise # Validate that example created output files (use rglob for subdirectories) - output_files = list(tmp_path.rglob("*.ome.*")) + # By convention, all examples should create outputs starting with "example_" + output_files = list(tmp_path.rglob("example_*")) assert output_files, f"Example {example_path.name} did not create output files" diff --git a/tests/test_integration.py b/tests/test_integration.py index 32d5394b..63300aa8 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -274,8 +274,10 @@ def test_cases( if settings.format.name == "ome-tiff": _assert_valid_ome_tiff(settings) - else: + elif settings.format.name == "ome-zarr": _assert_valid_ome_zarr(settings) + elif settings.format.name == "scratch": + _assert_valid_scratch(settings) @pytest.mark.parametrize("fmt", ["tiff", "zarr"]) @@ -579,6 +581,25 @@ def _assert_valid_ome_zarr(case: AcquisitionSettings) -> None: _assert_array_valid(data, dims, case.dtype, i) +def _assert_valid_scratch(case: AcquisitionSettings) -> None: + root = Path(case.output_path) + manifest = json.loads((root / "manifest.json").read_text()) + + # Manifest should round-trip the key settings + assert manifest["dtype"] == case.dtype + assert len(manifest["position_shapes"]) == len(case.positions) + + dims = case.array_storage_dimensions + expected_shape = tuple(d.count or UNBOUNDED_FRAME_COUNT for d in dims) + + for i in range(len(case.positions)): + data = read_array_data(root, position_index=i) + assert data.shape == expected_shape + assert data.dtype == np.dtype(case.dtype) + storage_names = [d.name for d in dims[:-2]] + validate_encoded_frame_values(data, storage_names, i) + + def test_skip_frames(tmp_path: Path, any_backend: str) -> None: """Test frame skipping with OMEStream.skip().""" root_path = tmp_path / f"skip_test{BACKEND_TO_EXT[any_backend]}" @@ -609,9 +630,7 @@ def test_skip_frames(tmp_path: Path, any_backend: str) -> None: stream.append(frame_value) # Verify skipped frames are zeros - is_zarr = settings.format.name == "ome-zarr" - array_path = root_path / "0" if is_zarr else root_path - data = read_array_data(array_path) + data = read_array_data(root_path) empty_frame = np.zeros(frame_shape, dtype="uint16") # Check written frames diff --git a/tests/test_schema.py b/tests/test_schema.py index c6bc0bf9..22d9b81d 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -545,7 +545,9 @@ def test_storage_order_ome(any_backend: str) -> None: assert len(settings.storage_index_dimensions) == 2 if settings.format.name == "ome-tiff": assert not settings.storage_index_permutation # CTYX is already correct - else: + elif settings.format.name == "scratch": + assert not settings.storage_index_permutation # scratch uses acquisition order + elif settings.format.name == "ome-zarr": assert settings.storage_index_permutation == (1, 0) # CTYX -> TCYX From f7c498426e88d6f26f4b4aa653083dccae1a5f2f Mon Sep 17 00:00:00 2001 From: Talley Lambert Date: Thu, 26 Feb 2026 17:01:23 -0500 Subject: [PATCH 2/2] feat: enhance ScratchBackend with array re-opening and manifest handling --- src/ome_writers/_backends/_backend.py | 4 +-- src/ome_writers/_backends/_scratch.py | 41 +++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/ome_writers/_backends/_backend.py b/src/ome_writers/_backends/_backend.py index ac540ec8..f0665d08 100644 --- a/src/ome_writers/_backends/_backend.py +++ b/src/ome_writers/_backends/_backend.py @@ -66,9 +66,7 @@ class ArrayLike(Protocol): @property def shape(self) -> tuple[int, ...]: ... - - def __getitem__(self, key: Any) -> Any: ... - + def __getitem__(self, key: Any, /) -> Any: ... @property def dtype(self) -> Any: ... diff --git a/src/ome_writers/_backends/_scratch.py b/src/ome_writers/_backends/_scratch.py index 83670e52..52e3d5fe 100644 --- a/src/ome_writers/_backends/_scratch.py +++ b/src/ome_writers/_backends/_scratch.py @@ -10,7 +10,7 @@ import warnings from contextlib import suppress from pathlib import Path -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING import numpy as np @@ -21,11 +21,14 @@ if TYPE_CHECKING: from collections.abc import Sequence from io import IOBase + from typing import Any, Final, Literal from ome_writers._backends._backend import ArrayLike from ome_writers._router import FrameRouter from ome_writers._schema import AcquisitionSettings +MANIFEST: Final = "manifest.json" + class ScratchBackend(ArrayBackend): """Backend that stores frames in numpy arrays (or memmap for crash recovery).""" @@ -162,10 +165,32 @@ def finalize(self) -> None: if isinstance(arr, np.memmap): arr.flush() self._write_manifest() + # Release all memmap file handles so files can be deleted/overwritten. + # Critical on Windows where open memmaps prevent file deletion. + # NOTE: this is inside of _root_path check, so purely in-memory mode + # retains arrays for get_arrays() after finalize. + self._arrays.clear() def get_arrays(self) -> Sequence[ArrayLike]: + if self._finalized and not self._arrays and self._root_path: + return self._reopen_arrays() return [_ScratchArrayView(self, i) for i in range(len(self._arrays))] + def _reopen_arrays(self) -> Sequence[ArrayLike]: + """Re-open memmap files read-only from disk after finalization.""" + assert self._root_path is not None + manifest = json.loads((self._root_path / MANIFEST).read_text()) + dtype = np.dtype(manifest["dtype"]) + return [ + np.memmap( + self._root_path / f"pos_{i}.dat", + dtype=dtype, + mode="r", + shape=tuple(shape), + ) + for i, shape in enumerate(manifest["position_shapes"]) + ] + # ------------------------------------------------------------------ # Internal helpers # ------------------------------------------------------------------ @@ -249,10 +274,8 @@ def _write_manifest(self) -> None: """Write manifest.json alongside memmap files.""" if self._root_path is None: return - self._settings_dump["position_shapes"] = [ - self._logical_shapes[i] for i in range(len(self._arrays)) - ] - (self._root_path / "manifest.json").write_text(json.dumps(self._settings_dump)) + self._settings_dump["position_shapes"] = list(self._logical_shapes) + (self._root_path / MANIFEST).write_text(json.dumps(self._settings_dump)) class _ScratchArrayView: @@ -270,12 +293,18 @@ def shape(self) -> tuple[int, ...]: @property def dtype(self) -> np.dtype: - return self._backend._arrays[self._pos_idx].dtype + if self._backend._arrays: + return self._backend._arrays[self._pos_idx].dtype + # After finalize, arrays are cleared; re-open to get dtype. + return self._backend._reopen_arrays()[self._pos_idx].dtype def __setitem__(self, key: Any, value: Any) -> None: raise TypeError("_ScratchArrayView is read-only") def __getitem__(self, key: Any) -> Any: + if not self._backend._arrays: + # After finalize, arrays are cleared; delegate to re-opened view. + return self._backend._reopen_arrays()[self._pos_idx][key] arr = self._backend._arrays[self._pos_idx] if self._backend._unbounded_axes: # Clip to logical shape so over-allocated backing storage is hidden