From 7b1b945a5c8a083f58d895e65532782d6e34e4c3 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Feb 2026 21:00:05 -0500 Subject: [PATCH 1/7] feat: populate per-asset dataStandard for BIDS, HED, NWB, and OME/NGFF - BIDSDatasetDescriptionAsset.get_metadata(): always sets BIDS standard (with BIDSVersion), adds HED when HEDVersion present in JSON, warns on read failure - NWBAsset.get_metadata(): sets NWB standard - ZarrBIDSAsset.get_metadata(): sets OME/NGFF for .ome.zarr assets - Guard for older dandischema without dataStandard on BareAsset; RuntimeError if dandischema >= 0.12.2 lacks it - Register ai_generated pytest marker in tox.ini Requires dandischema with per-asset dataStandard support (0.12.2+). Works silently with older dandischema. Co-Authored-By: Claude Opus 4.6 --- dandi/files/bases.py | 8 ++++- dandi/files/bids.py | 64 +++++++++++++++++++++++++++++++++++++-- dandi/tests/test_files.py | 63 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+), 3 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 15bc616c4..1ba0bb7b6 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -18,7 +18,7 @@ from dandischema.digests.dandietag import DandiETag from dandischema.models import BareAsset, CommonModel from dandischema.models import Dandiset as DandisetMeta -from dandischema.models import get_schema_version +from dandischema.models import StandardsType, get_schema_version, nwb_standard from packaging.version import Version from pydantic import ValidationError from pydantic_core import ErrorDetails @@ -504,6 +504,12 @@ def get_metadata( else: raise metadata.path = self.path + if "dataStandard" in BareAsset.model_fields: + nwb = StandardsType(**nwb_standard) + if metadata.dataStandard is None: + metadata.dataStandard = [nwb] + elif nwb not in metadata.dataStandard: + metadata.dataStandard.append(nwb) return metadata # TODO: @validate_cache.memoize_path diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 7e43163ec..17573d5e9 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -3,12 +3,22 @@ from collections import defaultdict from dataclasses import dataclass, field from datetime import datetime +import json from pathlib import Path from threading import Lock import weakref -from dandischema.models import BareAsset +from dandischema import __version__ as dandischema_version +from dandischema.models import ( + BareAsset, + StandardsType, + bids_standard, + hed_standard, + ome_ngff_standard, +) +from packaging.version import Version +import dandi from dandi.bids_validator_deno import bids_validate from .bases import GenericAsset, LocalFileAsset, NWBAsset @@ -23,9 +33,36 @@ ValidationResult, ) +lgr = dandi.get_logger() + BIDS_ASSET_ERRORS = ("BIDS.NON_BIDS_PATH_PLACEHOLDER",) BIDS_DATASET_ERRORS = ("BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",) +_HAS_DATA_STANDARD = "dataStandard" in BareAsset.model_fields +if not _HAS_DATA_STANDARD and Version(dandischema_version) >= Version("0.12.2"): + raise RuntimeError( + f"dandischema {dandischema_version} should have " + f"'dataStandard' field on BareAsset" + ) + + +def _add_standard( + metadata: BareAsset, + standard_dict: dict, + version: str | None = None, +) -> None: + """Add a data standard to asset metadata if the field is available.""" + if not _HAS_DATA_STANDARD: + return + kwargs = dict(standard_dict) + if version and "version" in StandardsType.model_fields: + kwargs["version"] = version + standard = StandardsType(**kwargs) + if metadata.dataStandard is None: + metadata.dataStandard = [standard] + elif standard not in metadata.dataStandard: + metadata.dataStandard.append(standard) + @dataclass class BIDSDatasetDescriptionAsset(LocalFileAsset): @@ -192,7 +229,28 @@ def get_validation_errors( assert self._dataset_errors is not None return self._dataset_errors.copy() - # get_metadata(): inherit use of default metadata from LocalFileAsset + def get_metadata( + self, + digest: Digest | None = None, + ignore_errors: bool = True, + ) -> BareAsset: + metadata = super().get_metadata(digest=digest, ignore_errors=ignore_errors) + try: + with open(self.filepath) as f: + desc = json.load(f) + except (OSError, json.JSONDecodeError) as e: + lgr.warning("Failed to read %s: %s", self.filepath, e) + _add_standard(metadata, bids_standard) + return metadata + _add_standard(metadata, bids_standard, version=desc.get("BIDSVersion")) + if hed_version := desc.get("HEDVersion"): + # HEDVersion can be a string or list; use first element as version + if isinstance(hed_version, list): + version = hed_version[0] if hed_version else None + else: + version = hed_version + _add_standard(metadata, hed_standard, version=version) + return metadata @dataclass @@ -312,6 +370,8 @@ def get_metadata( add_common_metadata(metadata, self.filepath, start_time, end_time, digest) metadata.path = self.path metadata.encodingFormat = ZARR_MIME_TYPE + if Path(self.path).suffixes == [".ome", ".zarr"]: + _add_standard(metadata, ome_ngff_standard) return metadata diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py index cef6c8ac3..fcc51d268 100644 --- a/dandi/tests/test_files.py +++ b/dandi/tests/test_files.py @@ -608,3 +608,66 @@ def test_validate_invalid_zarr3(path: str, expected_result_ids: set[str]) -> Non result_ids = {r.id for r in zf.get_validation_errors()} assert result_ids == expected_result_ids + + +@pytest.mark.ai_generated +class TestBIDSDatasetDescriptionDataStandard: + """Tests for per-asset dataStandard population from dataset_description.json""" + + @staticmethod + def _make_bids_dd(tmp_path: Path, content: dict) -> BIDSDatasetDescriptionAsset: + import json + + dd_path = tmp_path / "dataset_description.json" + dd_path.write_text(json.dumps(content)) + return BIDSDatasetDescriptionAsset( + filepath=dd_path, + path="dataset_description.json", + dandiset_path=tmp_path, + ) + + @staticmethod + def _standard_names(metadata): # type: ignore[no-untyped-def] + from dandischema.models import BareAsset + + if "dataStandard" not in BareAsset.model_fields: + pytest.skip("dandischema too old, no dataStandard on BareAsset") + return [s.name for s in (metadata.dataStandard or [])] + + def test_bids_always_set(self, tmp_path: Path) -> None: + asset = self._make_bids_dd( + tmp_path, + {"Name": "Test", "BIDSVersion": "1.9.0"}, + ) + names = self._standard_names(asset.get_metadata()) + assert "Brain Imaging Data Structure (BIDS)" in names + + def test_hed_detected_when_hedversion_present(self, tmp_path: Path) -> None: + asset = self._make_bids_dd( + tmp_path, + {"Name": "Test", "BIDSVersion": "1.9.0", "HEDVersion": "8.2.0"}, + ) + names = self._standard_names(asset.get_metadata()) + assert "Hierarchical Event Descriptors (HED)" in names + assert "Brain Imaging Data Structure (BIDS)" in names + + def test_hed_not_detected_when_hedversion_absent(self, tmp_path: Path) -> None: + asset = self._make_bids_dd( + tmp_path, + {"Name": "Test", "BIDSVersion": "1.9.0"}, + ) + names = self._standard_names(asset.get_metadata()) + assert "Hierarchical Event Descriptors (HED)" not in names + + def test_hed_detected_with_list_hedversion(self, tmp_path: Path) -> None: + """HEDVersion can be a list of strings per BIDS spec.""" + asset = self._make_bids_dd( + tmp_path, + { + "Name": "Test", + "BIDSVersion": "1.9.0", + "HEDVersion": ["8.2.0", "sc:1.0.0"], + }, + ) + names = self._standard_names(asset.get_metadata()) + assert "Hierarchical Event Descriptors (HED)" in names From 423f902de0c161f689dc69ca6d3f4ef69536969a Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Feb 2026 21:03:21 -0500 Subject: [PATCH 2/7] ai: fix up CLAUDE.md as to pytest markers placement --- CLAUDE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index ec4a22d19..da7d65421 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -18,7 +18,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Test Markers - When adding AI-generated tests, mark them with `@pytest.mark.ai_generated` -- Any new pytest markers must be registered in `tox.ini` under `[pytest]` section in the `markers` list +- Any new pytest markers must be registered in `pytest_configure` function of `dandi/pytest_plugin.py` ## Code Style - Code is formatted with Black (line length 100) From df4c01e7b89ebf5d56979338589017701503ef5e Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Feb 2026 21:09:37 -0500 Subject: [PATCH 3/7] feat: extract NWB extensions and HED library schemas into dataStandard - Add get_nwb_extensions() to pynwb_utils that reads h5file["specifications"] to discover ndx-* namespaces and their versions (filtering out core/hdmf) - NWBAsset.get_metadata() populates StandardsType.extensions with ndx-* extensions found in the NWB file - BIDSDatasetDescriptionAsset.get_metadata() extracts HED library schemas from list-valued HEDVersion (e.g. ["8.2.0", "sc:1.0.0"]) into extensions - Add tests for both NWB extension extraction and HED library schema parsing Co-Authored-By: Claude Opus 4.6 --- dandi/files/bases.py | 23 +++++++++++++++++- dandi/files/bids.py | 29 ++++++++++++++++++++-- dandi/pynwb_utils.py | 43 +++++++++++++++++++++++++++++++++ dandi/tests/test_files.py | 30 +++++++++++++++++++++++ dandi/tests/test_pynwb_utils.py | 39 ++++++++++++++++++++++++++++++ 5 files changed, 161 insertions(+), 3 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 1ba0bb7b6..48e49bcdd 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -505,7 +505,28 @@ def get_metadata( raise metadata.path = self.path if "dataStandard" in BareAsset.model_fields: - nwb = StandardsType(**nwb_standard) + kwargs: dict[str, Any] = dict(nwb_standard) + # Populate NWB extensions (ndx-*) if the schema supports it + if "extensions" in StandardsType.model_fields: + from dandi.pynwb_utils import get_nwb_extensions + + try: + nwb_exts = get_nwb_extensions(self.filepath) + except Exception: + lgr.debug( + "Failed to extract NWB extensions from %s", + self.filepath, + exc_info=True, + ) + nwb_exts = {} + if nwb_exts: + kwargs["extensions"] = [ + StandardsType(name=name, version=ver).model_dump( + mode="json", exclude_none=True + ) + for name, ver in sorted(nwb_exts.items()) + ] + nwb = StandardsType(**kwargs) if metadata.dataStandard is None: metadata.dataStandard = [nwb] elif nwb not in metadata.dataStandard: diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 17573d5e9..1e8319acf 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -244,12 +244,37 @@ def get_metadata( return metadata _add_standard(metadata, bids_standard, version=desc.get("BIDSVersion")) if hed_version := desc.get("HEDVersion"): - # HEDVersion can be a string or list; use first element as version + # HEDVersion can be a string or list. + # List form: ["8.2.0", "sc:1.0.0"] where first element is base + # HED version and subsequent "prefix:version" entries are library + # schemas recorded as extensions. if isinstance(hed_version, list): version = hed_version[0] if hed_version else None + library_entries = hed_version[1:] else: version = hed_version - _add_standard(metadata, hed_standard, version=version) + library_entries = [] + kwargs: dict = dict(hed_standard) + if version and "version" in StandardsType.model_fields: + kwargs["version"] = version + if ( + library_entries + and "extensions" in StandardsType.model_fields + ): + extensions = [] + for entry in library_entries: + # Format is "prefix:version" (e.g. "sc:1.0.0") + if ":" in str(entry): + lib_name, lib_ver = str(entry).split(":", 1) + else: + lib_name, lib_ver = str(entry), None + ext = StandardsType(name=lib_name, version=lib_ver) + extensions.append( + ext.model_dump(mode="json", exclude_none=True) + ) + if extensions: + kwargs["extensions"] = extensions + _add_standard(metadata, kwargs) return metadata diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 7ffe552a8..ff41bbe6b 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -152,6 +152,49 @@ def _sanitize(v: Any) -> str: return None +# Namespaces bundled with NWB/HDMF core — not extensions +_NWB_CORE_NAMESPACES = frozenset({"core", "hdmf-common", "hdmf-experimental"}) + + +def get_nwb_extensions(filepath: str | Path | Readable) -> dict[str, str]: + """Return NWB extensions embedded in an HDF5 file. + + Reads the ``specifications`` group of an NWB HDF5 file and returns a + mapping of extension namespace names to their latest embedded version, + excluding core NWB/HDMF namespaces. + + Parameters + ---------- + filepath + Path to an NWB ``.nwb`` HDF5 file, or a :class:`Readable`. + + Returns + ------- + dict[str, str] + ``{namespace_name: latest_version}`` for each non-core namespace + found in the file's ``specifications`` group. Empty dict if no + extensions are present or the group does not exist. + """ + extensions: dict[str, str] = {} + with open_readable(filepath) as fp, h5py.File(fp, "r") as h5file: + specs = h5file.get("specifications") + if specs is None: + return extensions + for name in specs: + if name in _NWB_CORE_NAMESPACES: + continue + ns_group = specs[name] + if not isinstance(ns_group, h5py.Group): + continue + try: + sorted_versions = sorted(ns_group, key=Version) + if sorted_versions: + extensions[name] = sorted_versions[-1] + except Exception: + lgr.debug("Failed to parse versions for NWB extension %s", name) + return extensions + + def get_neurodata_types_to_modalities_map() -> dict[str, str]: """Return a dict to map neurodata types known to pynwb to "modalities" diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py index fcc51d268..00e4bad4e 100644 --- a/dandi/tests/test_files.py +++ b/dandi/tests/test_files.py @@ -671,3 +671,33 @@ def test_hed_detected_with_list_hedversion(self, tmp_path: Path) -> None: ) names = self._standard_names(asset.get_metadata()) assert "Hierarchical Event Descriptors (HED)" in names + + def test_hed_library_schemas_as_extensions(self, tmp_path: Path) -> None: + """HED library schemas in list HEDVersion populate extensions.""" + from dandischema.models import BareAsset, StandardsType + + if "extensions" not in StandardsType.model_fields: + pytest.skip("dandischema too old, no extensions on StandardsType") + if "dataStandard" not in BareAsset.model_fields: + pytest.skip("dandischema too old, no dataStandard on BareAsset") + asset = self._make_bids_dd( + tmp_path, + { + "Name": "Test", + "BIDSVersion": "1.9.0", + "HEDVersion": ["8.2.0", "sc:1.0.0", "lang:1.1.0"], + }, + ) + metadata = asset.get_metadata() + hed_standards = [ + s for s in (metadata.dataStandard or []) + if s.name == "Hierarchical Event Descriptors (HED)" + ] + assert len(hed_standards) == 1 + hed = hed_standards[0] + assert hed.version == "8.2.0" + assert hed.extensions is not None + ext_names = {e.name for e in hed.extensions} + assert ext_names == {"sc", "lang"} + ext_map = {e.name: e.version for e in hed.extensions} + assert ext_map == {"sc": "1.0.0", "lang": "1.1.0"} diff --git a/dandi/tests/test_pynwb_utils.py b/dandi/tests/test_pynwb_utils.py index 0de33d555..d47f294f7 100644 --- a/dandi/tests/test_pynwb_utils.py +++ b/dandi/tests/test_pynwb_utils.py @@ -103,3 +103,42 @@ def test_nwb_has_external_links(tmp_path): assert not nwb_has_external_links(filename1) assert nwb_has_external_links(filename4) + + +def test_get_nwb_extensions(tmp_path: Path) -> None: + """Test extraction of NWB extensions from HDF5 specifications group.""" + import h5py + + from ..pynwb_utils import get_nwb_extensions + + h5path = tmp_path / "test.nwb" + with h5py.File(h5path, "w") as f: + specs = f.create_group("specifications") + # Core namespaces should be excluded + core_grp = specs.create_group("core") + core_grp.create_group("2.7.0") + hdmf_grp = specs.create_group("hdmf-common") + hdmf_grp.create_group("1.8.0") + # An extension namespace should be included, latest version used + ndx_ecog = specs.create_group("ndx-ecog") + ndx_ecog.create_group("0.1.0") + ndx_ecog.create_group("0.2.0") + # Another extension + ndx_events = specs.create_group("ndx-events") + ndx_events.create_group("0.3.0") + + result = get_nwb_extensions(h5path) + assert result == {"ndx-ecog": "0.2.0", "ndx-events": "0.3.0"} + + +def test_get_nwb_extensions_no_specs(tmp_path: Path) -> None: + """No specifications group returns empty dict.""" + import h5py + + from ..pynwb_utils import get_nwb_extensions + + h5path = tmp_path / "test.nwb" + with h5py.File(h5path, "w") as f: + f.attrs["nwb_version"] = "2.7.0" + + assert get_nwb_extensions(h5path) == {} From 5d5cae5683156e499218ba0a460fd0dbedbca747 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Feb 2026 21:23:10 -0500 Subject: [PATCH 4/7] chore: git ignore uv.lock --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d0f2d8d8f..a2de93108 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ sandbox/ venv/ venvs/ .DS_Store +uv.lock From 906603e9a4d6a93757187705871f1f8648a08a0f Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Feb 2026 21:29:30 -0500 Subject: [PATCH 5/7] refactor: centralize dandischema compat guard as _SCHEMA_BAREASSET_HAS_DATASTANDARD Single bool in bases.py guards all dataStandard/version/extensions features that ship together in dandischema >= 0.12.2, replacing scattered "field in Model.model_fields" checks. Co-Authored-By: Claude Opus 4.6 --- dandi/files/bases.py | 51 ++++++++++++++++++++++++--------------- dandi/files/bids.py | 24 +++++------------- dandi/tests/test_files.py | 10 +++----- 3 files changed, 42 insertions(+), 43 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 48e49bcdd..b926bd8c7 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -21,6 +21,20 @@ from dandischema.models import StandardsType, get_schema_version, nwb_standard from packaging.version import Version from pydantic import ValidationError + +# True when the installed dandischema exposes the per-asset dataStandard field +# and related StandardsType enhancements (version, extensions). All these +# fields ship together starting with dandischema 0.12.2. +# TODO: remove this guard (and all branches that check it) once the minimum +# required dandischema version is >= 0.12.2. +_SCHEMA_BAREASSET_HAS_DATASTANDARD = "dataStandard" in BareAsset.model_fields +if not _SCHEMA_BAREASSET_HAS_DATASTANDARD and Version( + dandischema.__version__ +) >= Version("0.12.2"): + raise RuntimeError( + f"dandischema {dandischema.__version__} should have " + f"'dataStandard' field on BareAsset" + ) from pydantic_core import ErrorDetails import requests @@ -504,28 +518,27 @@ def get_metadata( else: raise metadata.path = self.path - if "dataStandard" in BareAsset.model_fields: + if _SCHEMA_BAREASSET_HAS_DATASTANDARD: kwargs: dict[str, Any] = dict(nwb_standard) - # Populate NWB extensions (ndx-*) if the schema supports it - if "extensions" in StandardsType.model_fields: - from dandi.pynwb_utils import get_nwb_extensions + # Populate NWB extensions (ndx-*) from the h5 specifications group + from dandi.pynwb_utils import get_nwb_extensions - try: - nwb_exts = get_nwb_extensions(self.filepath) - except Exception: - lgr.debug( - "Failed to extract NWB extensions from %s", - self.filepath, - exc_info=True, + try: + nwb_exts = get_nwb_extensions(self.filepath) + except Exception: + lgr.debug( + "Failed to extract NWB extensions from %s", + self.filepath, + exc_info=True, + ) + nwb_exts = {} + if nwb_exts: + kwargs["extensions"] = [ + StandardsType(name=name, version=ver).model_dump( + mode="json", exclude_none=True ) - nwb_exts = {} - if nwb_exts: - kwargs["extensions"] = [ - StandardsType(name=name, version=ver).model_dump( - mode="json", exclude_none=True - ) - for name, ver in sorted(nwb_exts.items()) - ] + for name, ver in sorted(nwb_exts.items()) + ] nwb = StandardsType(**kwargs) if metadata.dataStandard is None: metadata.dataStandard = [nwb] diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 1e8319acf..30a8f3baf 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -8,7 +8,6 @@ from threading import Lock import weakref -from dandischema import __version__ as dandischema_version from dandischema.models import ( BareAsset, StandardsType, @@ -16,12 +15,11 @@ hed_standard, ome_ngff_standard, ) -from packaging.version import Version import dandi from dandi.bids_validator_deno import bids_validate -from .bases import GenericAsset, LocalFileAsset, NWBAsset +from .bases import GenericAsset, LocalFileAsset, NWBAsset, _SCHEMA_BAREASSET_HAS_DATASTANDARD from .zarr import ZarrAsset from ..consts import ZARR_MIME_TYPE, dandiset_metadata_file from ..metadata.core import add_common_metadata, prepare_metadata @@ -38,24 +36,17 @@ BIDS_ASSET_ERRORS = ("BIDS.NON_BIDS_PATH_PLACEHOLDER",) BIDS_DATASET_ERRORS = ("BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",) -_HAS_DATA_STANDARD = "dataStandard" in BareAsset.model_fields -if not _HAS_DATA_STANDARD and Version(dandischema_version) >= Version("0.12.2"): - raise RuntimeError( - f"dandischema {dandischema_version} should have " - f"'dataStandard' field on BareAsset" - ) - def _add_standard( - metadata: BareAsset, + metadata, # type: ignore[no-untyped-def] standard_dict: dict, version: str | None = None, ) -> None: """Add a data standard to asset metadata if the field is available.""" - if not _HAS_DATA_STANDARD: + if not _SCHEMA_BAREASSET_HAS_DATASTANDARD: return kwargs = dict(standard_dict) - if version and "version" in StandardsType.model_fields: + if version: kwargs["version"] = version standard = StandardsType(**kwargs) if metadata.dataStandard is None: @@ -255,12 +246,9 @@ def get_metadata( version = hed_version library_entries = [] kwargs: dict = dict(hed_standard) - if version and "version" in StandardsType.model_fields: + if version: kwargs["version"] = version - if ( - library_entries - and "extensions" in StandardsType.model_fields - ): + if library_entries: extensions = [] for entry in library_entries: # Format is "prefix:version" (e.g. "sc:1.0.0") diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py index 00e4bad4e..c8bbcae08 100644 --- a/dandi/tests/test_files.py +++ b/dandi/tests/test_files.py @@ -628,9 +628,9 @@ def _make_bids_dd(tmp_path: Path, content: dict) -> BIDSDatasetDescriptionAsset: @staticmethod def _standard_names(metadata): # type: ignore[no-untyped-def] - from dandischema.models import BareAsset + from ..files.bases import _SCHEMA_BAREASSET_HAS_DATASTANDARD - if "dataStandard" not in BareAsset.model_fields: + if not _SCHEMA_BAREASSET_HAS_DATASTANDARD: pytest.skip("dandischema too old, no dataStandard on BareAsset") return [s.name for s in (metadata.dataStandard or [])] @@ -674,11 +674,9 @@ def test_hed_detected_with_list_hedversion(self, tmp_path: Path) -> None: def test_hed_library_schemas_as_extensions(self, tmp_path: Path) -> None: """HED library schemas in list HEDVersion populate extensions.""" - from dandischema.models import BareAsset, StandardsType + from ..files.bases import _SCHEMA_BAREASSET_HAS_DATASTANDARD - if "extensions" not in StandardsType.model_fields: - pytest.skip("dandischema too old, no extensions on StandardsType") - if "dataStandard" not in BareAsset.model_fields: + if not _SCHEMA_BAREASSET_HAS_DATASTANDARD: pytest.skip("dandischema too old, no dataStandard on BareAsset") asset = self._make_bids_dd( tmp_path, From 53757db355254ab994f5bc5f454ba5428fc59c2a Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Feb 2026 21:39:57 -0500 Subject: [PATCH 6/7] refactor: move imports to top level, document import policy in CLAUDE.md Move json, h5py, get_nwb_extensions, and _SCHEMA_BAREASSET_HAS_DATASTANDARD imports to module top level in tests. Keep pynwb_utils import deferred in bases.py (heavy transitive deps: h5py/pynwb/hdmf/numpy) per existing convention. Add import guidance to CLAUDE.md. Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 5 +++++ dandi/files/bases.py | 2 +- dandi/tests/test_files.py | 8 ++------ dandi/tests/test_pynwb_utils.py | 11 ++--------- 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index da7d65421..e6d47b9f2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -31,6 +31,11 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co - Prefer specific exceptions over generic ones - For CLI, use click library patterns - Imports organized: stdlib, third-party, local (alphabetical within groups) +- **Imports must be at the top of the file** — do NOT place imports inside + functions or methods unless there is a concrete reason (circular dependency, + or heavy transitive imports like `pynwb`/`h5py`/`nwbinspector` that would + slow down module load for unrelated code paths). When deferring an import + for weight, add the comment `# Avoid heavy import by importing within function:`. ## Documentation - Keep docstrings updated when changing function signatures diff --git a/dandi/files/bases.py b/dandi/files/bases.py index b926bd8c7..6ade4d09b 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -520,7 +520,7 @@ def get_metadata( metadata.path = self.path if _SCHEMA_BAREASSET_HAS_DATASTANDARD: kwargs: dict[str, Any] = dict(nwb_standard) - # Populate NWB extensions (ndx-*) from the h5 specifications group + # Avoid heavy import by importing within function: from dandi.pynwb_utils import get_nwb_extensions try: diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py index c8bbcae08..b25d86ffe 100644 --- a/dandi/tests/test_files.py +++ b/dandi/tests/test_files.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json from operator import attrgetter import os from pathlib import Path @@ -16,6 +17,7 @@ from ..consts import ZARR_MIME_TYPE, dandiset_metadata_file from ..dandiapi import AssetType, RemoteZarrAsset from ..exceptions import UnknownAssetError +from ..files.bases import _SCHEMA_BAREASSET_HAS_DATASTANDARD from ..files import ( BIDSDatasetDescriptionAsset, DandisetMetadataFile, @@ -616,8 +618,6 @@ class TestBIDSDatasetDescriptionDataStandard: @staticmethod def _make_bids_dd(tmp_path: Path, content: dict) -> BIDSDatasetDescriptionAsset: - import json - dd_path = tmp_path / "dataset_description.json" dd_path.write_text(json.dumps(content)) return BIDSDatasetDescriptionAsset( @@ -628,8 +628,6 @@ def _make_bids_dd(tmp_path: Path, content: dict) -> BIDSDatasetDescriptionAsset: @staticmethod def _standard_names(metadata): # type: ignore[no-untyped-def] - from ..files.bases import _SCHEMA_BAREASSET_HAS_DATASTANDARD - if not _SCHEMA_BAREASSET_HAS_DATASTANDARD: pytest.skip("dandischema too old, no dataStandard on BareAsset") return [s.name for s in (metadata.dataStandard or [])] @@ -674,8 +672,6 @@ def test_hed_detected_with_list_hedversion(self, tmp_path: Path) -> None: def test_hed_library_schemas_as_extensions(self, tmp_path: Path) -> None: """HED library schemas in list HEDVersion populate extensions.""" - from ..files.bases import _SCHEMA_BAREASSET_HAS_DATASTANDARD - if not _SCHEMA_BAREASSET_HAS_DATASTANDARD: pytest.skip("dandischema too old, no dataStandard on BareAsset") asset = self._make_bids_dd( diff --git a/dandi/tests/test_pynwb_utils.py b/dandi/tests/test_pynwb_utils.py index d47f294f7..47bac6fa4 100644 --- a/dandi/tests/test_pynwb_utils.py +++ b/dandi/tests/test_pynwb_utils.py @@ -6,10 +6,11 @@ import re from typing import Any, NoReturn +import h5py import numpy as np from pynwb import NWBHDF5IO, NWBFile, TimeSeries -from ..pynwb_utils import _sanitize_nwb_version, nwb_has_external_links +from ..pynwb_utils import _sanitize_nwb_version, get_nwb_extensions, nwb_has_external_links def test_pynwb_io(simple1_nwb: Path) -> None: @@ -107,10 +108,6 @@ def test_nwb_has_external_links(tmp_path): def test_get_nwb_extensions(tmp_path: Path) -> None: """Test extraction of NWB extensions from HDF5 specifications group.""" - import h5py - - from ..pynwb_utils import get_nwb_extensions - h5path = tmp_path / "test.nwb" with h5py.File(h5path, "w") as f: specs = f.create_group("specifications") @@ -133,10 +130,6 @@ def test_get_nwb_extensions(tmp_path: Path) -> None: def test_get_nwb_extensions_no_specs(tmp_path: Path) -> None: """No specifications group returns empty dict.""" - import h5py - - from ..pynwb_utils import get_nwb_extensions - h5path = tmp_path / "test.nwb" with h5py.File(h5path, "w") as f: f.attrs["nwb_version"] = "2.7.0" From d000be8eb6729120bea08cd413c8dd7ea9e2895f Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Feb 2026 22:34:11 -0500 Subject: [PATCH 7/7] fix: guard hed_standard import for compat with released dandischema hed_standard does not exist in dandischema < 0.12.2, so gate the import on _SCHEMA_BAREASSET_HAS_DATASTANDARD (all new symbols ship together). HED detection is skipped when unavailable. Co-Authored-By: Claude Opus 4.6 --- dandi/files/bids.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 30a8f3baf..e9507aac8 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -12,7 +12,6 @@ BareAsset, StandardsType, bids_standard, - hed_standard, ome_ngff_standard, ) @@ -20,6 +19,11 @@ from dandi.bids_validator_deno import bids_validate from .bases import GenericAsset, LocalFileAsset, NWBAsset, _SCHEMA_BAREASSET_HAS_DATASTANDARD + +if _SCHEMA_BAREASSET_HAS_DATASTANDARD: + from dandischema.models import hed_standard +else: + hed_standard = None # type: ignore[assignment] from .zarr import ZarrAsset from ..consts import ZARR_MIME_TYPE, dandiset_metadata_file from ..metadata.core import add_common_metadata, prepare_metadata @@ -234,7 +238,7 @@ def get_metadata( _add_standard(metadata, bids_standard) return metadata _add_standard(metadata, bids_standard, version=desc.get("BIDSVersion")) - if hed_version := desc.get("HEDVersion"): + if hed_standard and (hed_version := desc.get("HEDVersion")): # HEDVersion can be a string or list. # List form: ["8.2.0", "sc:1.0.0"] where first element is base # HED version and subsequent "prefix:version" entries are library