diff --git a/src/atdata/__init__.py b/src/atdata/__init__.py index 511c006..dbcd266 100644 --- a/src/atdata/__init__.py +++ b/src/atdata/__init__.py @@ -100,6 +100,10 @@ create_repository as create_repository, ) +from .dataset_meta import ( + DatasetMeta as DatasetMeta, +) + from .index import ( Index as Index, ) diff --git a/src/atdata/atmosphere/records.py b/src/atdata/atmosphere/records.py index b91465f..b9ce764 100644 --- a/src/atdata/atmosphere/records.py +++ b/src/atdata/atmosphere/records.py @@ -22,6 +22,7 @@ BlobEntry, ShardChecksum, ) +from ..dataset_meta import DatasetMeta, _resolve_meta # Import for type checking only to avoid circular imports from typing import TYPE_CHECKING @@ -157,7 +158,8 @@ def publish( self, dataset: "Dataset[ST]", *, - name: str, + name: str | None = None, + meta: DatasetMeta | None = None, schema_uri: Optional[str] = None, description: Optional[str] = None, tags: Optional[list[str]] = None, @@ -172,7 +174,11 @@ def publish( Args: dataset: The Dataset to publish. - name: Human-readable dataset name. + name: Human-readable dataset name. Can be provided via + *meta* instead. + meta: Optional :class:`~atdata.DatasetMeta` bundling name, + schema_ref, description, tags, license, and metadata. + Explicit keyword arguments override fields in *meta*. schema_uri: AT URI of the schema record. If not provided and auto_publish_schema is True, the schema will be published. description: Human-readable description. @@ -194,7 +200,21 @@ def publish( Raises: ValueError: If schema_uri is not provided and auto_publish_schema is False. + TypeError: If neither *name* nor *meta* is provided. """ + resolved = _resolve_meta( + meta, + name=name, + schema_ref=schema_uri, + description=description, + tags=tags, + license=license, + ) + name = resolved.name + schema_uri = resolved.schema_ref if schema_uri is None else schema_uri + description = resolved.description + tags = resolved.tags + license = resolved.license if schema_uri is None: if not auto_publish_schema: raise ValueError( @@ -253,7 +273,8 @@ def publish_with_urls( urls: list[str], schema_uri: str, *, - name: str, + name: str | None = None, + meta: DatasetMeta | None = None, description: Optional[str] = None, tags: Optional[list[str]] = None, license: Optional[str] = None, @@ -272,7 +293,11 @@ def publish_with_urls( Args: urls: List of individual shard URLs. schema_uri: AT URI of the schema record. - name: Human-readable dataset name. + name: Human-readable dataset name. Can be provided via + *meta* instead. + meta: Optional :class:`~atdata.DatasetMeta` bundling name, + schema_ref, description, tags, license, and metadata. + Explicit keyword arguments override fields in *meta*. description: Human-readable description. tags: Searchable tags for discovery. license: SPDX license identifier. @@ -285,7 +310,23 @@ def publish_with_urls( Returns: The AT URI of the created dataset record. + + Raises: + TypeError: If neither *name* nor *meta* is provided. """ + resolved = _resolve_meta( + meta, + name=name, + description=description, + tags=tags, + license=license, + metadata=metadata, + ) + name = resolved.name + description = resolved.description + tags = resolved.tags + license = resolved.license + metadata = resolved.metadata if checksums and len(checksums) != len(urls): raise ValueError( f"checksums length ({len(checksums)}) must match " @@ -319,7 +360,8 @@ def publish_with_s3( keys: list[str], schema_uri: str, *, - name: str, + name: str | None = None, + meta: DatasetMeta | None = None, region: Optional[str] = None, endpoint: Optional[str] = None, description: Optional[str] = None, @@ -337,7 +379,11 @@ def publish_with_s3( bucket: S3 bucket name. keys: List of S3 object keys for shard files. schema_uri: AT URI of the schema record. - name: Human-readable dataset name. + name: Human-readable dataset name. Can be provided via + *meta* instead. + meta: Optional :class:`~atdata.DatasetMeta` bundling name, + schema_ref, description, tags, license, and metadata. + Explicit keyword arguments override fields in *meta*. region: AWS region (e.g., 'us-east-1'). endpoint: Custom S3-compatible endpoint URL. description: Human-readable description. @@ -351,7 +397,23 @@ def publish_with_s3( Returns: The AT URI of the created dataset record. + + Raises: + TypeError: If neither *name* nor *meta* is provided. """ + resolved = _resolve_meta( + meta, + name=name, + description=description, + tags=tags, + license=license, + metadata=metadata, + ) + name = resolved.name + description = resolved.description + tags = resolved.tags + license = resolved.license + metadata = resolved.metadata if checksums and len(checksums) != len(keys): raise ValueError( f"checksums length ({len(checksums)}) must match " @@ -384,7 +446,8 @@ def publish_with_blob_refs( blob_refs: list[dict], schema_uri: str, *, - name: str, + name: str | None = None, + meta: DatasetMeta | None = None, description: Optional[str] = None, tags: Optional[list[str]] = None, license: Optional[str] = None, @@ -406,7 +469,11 @@ def publish_with_blob_refs( ``Atmosphere.upload_blob()``. Each dict must contain ``$type``, ``ref`` (with ``$link``), ``mimeType``, and ``size``. schema_uri: AT URI of the schema record. - name: Human-readable dataset name. + name: Human-readable dataset name. Can be provided via + *meta* instead. + meta: Optional :class:`~atdata.DatasetMeta` bundling name, + schema_ref, description, tags, license, and metadata. + Explicit keyword arguments override fields in *meta*. description: Human-readable description. tags: Searchable tags for discovery. license: SPDX license identifier. @@ -419,7 +486,23 @@ def publish_with_blob_refs( Returns: The AT URI of the created dataset record. + + Raises: + TypeError: If neither *name* nor *meta* is provided. """ + resolved = _resolve_meta( + meta, + name=name, + description=description, + tags=tags, + license=license, + metadata=metadata, + ) + name = resolved.name + description = resolved.description + tags = resolved.tags + license = resolved.license + metadata = resolved.metadata if checksums and len(checksums) != len(blob_refs): raise ValueError( f"checksums length ({len(checksums)}) must match " @@ -452,7 +535,8 @@ def publish_with_blobs( blobs: list[bytes], schema_uri: str, *, - name: str, + name: str | None = None, + meta: DatasetMeta | None = None, description: Optional[str] = None, tags: Optional[list[str]] = None, license: Optional[str] = None, @@ -471,7 +555,11 @@ def publish_with_blobs( Args: blobs: List of binary data (e.g., tar shards) to upload as blobs. schema_uri: AT URI of the schema record. - name: Human-readable dataset name. + name: Human-readable dataset name. Can be provided via + *meta* instead. + meta: Optional :class:`~atdata.DatasetMeta` bundling name, + schema_ref, description, tags, license, and metadata. + Explicit keyword arguments override fields in *meta*. description: Human-readable description. tags: Searchable tags for discovery. license: SPDX license identifier. @@ -484,10 +572,26 @@ def publish_with_blobs( Returns: The AT URI of the created dataset record. + Raises: + TypeError: If neither *name* nor *meta* is provided. + Note: Blobs are only retained by the PDS when referenced in a committed record. This method handles that automatically. """ + resolved = _resolve_meta( + meta, + name=name, + description=description, + tags=tags, + license=license, + metadata=metadata, + ) + name = resolved.name + description = resolved.description + tags = resolved.tags + license = resolved.license + metadata = resolved.metadata import hashlib blob_entries = [] diff --git a/src/atdata/dataset_meta.py b/src/atdata/dataset_meta.py new file mode 100644 index 0000000..a8027e9 --- /dev/null +++ b/src/atdata/dataset_meta.py @@ -0,0 +1,103 @@ +"""DatasetMeta parameter object for bundling shared metadata fields. + +Reduces parameter explosion across ``Index.insert_dataset``, +``Index.write_samples``, and ``DatasetPublisher.publish*`` by collecting +the six metadata fields (name, schema_ref, description, tags, license, +metadata) into a single dataclass. +""" + +from __future__ import annotations + +import dataclasses +from dataclasses import dataclass + + +@dataclass +class DatasetMeta: + """Metadata for publishing or indexing a dataset. + + Bundle common fields shared across ``write_samples``, + ``insert_dataset``, and atmosphere publication. + + Args: + name: Human-readable name for the dataset. + schema_ref: Optional schema reference (AT URI or local ref). + description: Optional dataset description. + tags: Optional tags for discovery. + license: Optional SPDX license identifier. + metadata: Optional arbitrary metadata dict. + + Examples: + >>> meta = DatasetMeta(name="mnist", tags=["vision"]) + >>> meta.name + 'mnist' + """ + + name: str + schema_ref: str | None = None + description: str | None = None + tags: list[str] | None = None + license: str | None = None + metadata: dict | None = None + + +def _resolve_meta( + meta: DatasetMeta | None = None, + *, + name: str | None = None, + schema_ref: str | None = None, + description: str | None = None, + tags: list[str] | None = None, + license: str | None = None, + metadata: dict | None = None, +) -> DatasetMeta: + """Normalize a ``DatasetMeta`` from either a meta object or flat kwargs. + + When both *meta* and explicit kwargs are provided, explicit kwargs + override the corresponding fields in *meta* (explicit wins). + + Args: + meta: Optional pre-built metadata object. + name: Dataset name (required if *meta* is ``None``). + schema_ref: Optional schema reference override. + description: Optional description override. + tags: Optional tags override. + license: Optional license override. + metadata: Optional metadata dict override. + + Returns: + Resolved ``DatasetMeta`` instance. + + Raises: + TypeError: If neither *name* nor *meta* is provided. + """ + if meta is None: + if name is None: + raise TypeError("Either 'meta' or 'name' must be provided.") + return DatasetMeta( + name=name, + schema_ref=schema_ref, + description=description, + tags=tags, + license=license, + metadata=metadata, + ) + + # Build overrides from explicit kwargs (only non-None values win) + overrides: dict = {} + if name is not None: + overrides["name"] = name + if schema_ref is not None: + overrides["schema_ref"] = schema_ref + if description is not None: + overrides["description"] = description + if tags is not None: + overrides["tags"] = tags + if license is not None: + overrides["license"] = license + if metadata is not None: + overrides["metadata"] = metadata + + if overrides: + return dataclasses.replace(meta, **overrides) + return meta diff --git a/src/atdata/index/_index.py b/src/atdata/index/_index.py index f0b1ece..bab6dfc 100644 --- a/src/atdata/index/_index.py +++ b/src/atdata/index/_index.py @@ -6,6 +6,7 @@ Dataset, ) from atdata._protocols import AbstractDataStore, Packable +from atdata.dataset_meta import DatasetMeta, _resolve_meta from atdata.index._entry import LocalDatasetEntry from atdata.index._schema import ( @@ -696,7 +697,8 @@ def insert_dataset( self, ds: Dataset, *, - name: str, + name: str | None = None, + meta: DatasetMeta | None = None, schema_ref: str | None = None, description: str | None = None, tags: list[str] | None = None, @@ -729,7 +731,11 @@ def insert_dataset( Args: ds: The Dataset to register. name: Human-readable name for the dataset, optionally prefixed - with a repository name (e.g. ``"lab/mnist"``). + with a repository name (e.g. ``"lab/mnist"``). Can be + provided via *meta* instead. + meta: Optional :class:`~atdata.DatasetMeta` bundling name, + schema_ref, description, tags, license, and metadata. + Explicit keyword arguments override fields in *meta*. schema_ref: Optional schema reference. description: Optional dataset description (atmosphere only). tags: Optional tags for discovery (atmosphere only). @@ -750,7 +756,23 @@ def insert_dataset( ValueError: If atmosphere limits are exceeded (when *force* is ``False``), or if a credentialed source targets the atmosphere without *copy*. + TypeError: If neither *name* nor *meta* is provided. """ + meta = _resolve_meta( + meta, + name=name, + schema_ref=schema_ref, + description=description, + tags=tags, + license=license, + metadata=metadata, + ) + name = meta.name + schema_ref = meta.schema_ref + description = meta.description + tags = meta.tags + license = meta.license + metadata = meta.metadata from atdata.atmosphere.store import PDS_TOTAL_DATASET_LIMIT_BYTES backend_key, resolved_name, handle_or_did = self._resolve_prefix(name) @@ -875,7 +897,8 @@ def write_samples( self, samples: Iterable, *, - name: str, + name: str | None = None, + meta: DatasetMeta | None = None, schema_ref: str | None = None, description: str | None = None, tags: list[str] | None = None, @@ -909,7 +932,11 @@ def write_samples( Args: samples: Iterable of ``Packable`` samples. Must be non-empty. - name: Dataset name, optionally prefixed with target. + name: Dataset name, optionally prefixed with target. Can be + provided via *meta* instead. + meta: Optional :class:`~atdata.DatasetMeta` bundling name, + schema_ref, description, tags, license, and metadata. + Explicit keyword arguments override fields in *meta*. schema_ref: Optional schema reference. Auto-generated if ``None``. description: Optional dataset description (atmosphere only). tags: Optional tags for discovery (atmosphere only). @@ -933,12 +960,28 @@ def write_samples( Raises: ValueError: If *samples* is empty, or if atmosphere size limits are exceeded (when *force* is ``False``). + TypeError: If neither *name* nor *meta* is provided. Examples: >>> index = Index() >>> samples = [MySample(key="0", text="hello")] >>> entry = index.write_samples(samples, name="my-dataset") """ + meta = _resolve_meta( + meta, + name=name, + schema_ref=schema_ref, + description=description, + tags=tags, + license=license, + metadata=metadata, + ) + name = meta.name + schema_ref = meta.schema_ref + description = meta.description + tags = meta.tags + license = meta.license + metadata = meta.metadata import tempfile from atdata.dataset import write_samples as _write_samples diff --git a/src/atdata/repository.py b/src/atdata/repository.py index 65c9a59..4893e6d 100644 --- a/src/atdata/repository.py +++ b/src/atdata/repository.py @@ -27,6 +27,7 @@ from typing import Any, Iterator, Optional, TYPE_CHECKING from ._protocols import AbstractDataStore +from .dataset_meta import DatasetMeta if TYPE_CHECKING: from .providers._base import IndexProvider @@ -238,7 +239,7 @@ def insert_dataset( ``PDSBlobStore``. Takes precedence over *data_urls*. checksums: Per-shard ``ShardChecksum`` objects. Forwarded to the publisher so each storage entry gets the correct digest. - **kwargs: Additional options (description, tags, license). + **kwargs: Additional options (description, tags, license, metadata). Returns: AtmosphereIndexEntry for the inserted dataset. @@ -246,6 +247,16 @@ def insert_dataset( self._ensure_loaders() from .atmosphere import AtmosphereIndexEntry + # Build a DatasetMeta to pass through to publishers + pub_meta = DatasetMeta( + name=name, + schema_ref=schema_ref, + description=kwargs.get("description"), + tags=kwargs.get("tags"), + license=kwargs.get("license"), + metadata=kwargs.get("metadata"), + ) + if blob_refs is not None or data_urls is not None: # Ensure schema is published first if schema_ref is None: @@ -258,7 +269,7 @@ def insert_dataset( ) schema_ref = str(schema_uri_obj) - metadata = kwargs.get("metadata") + metadata = pub_meta.metadata if metadata is None and hasattr(ds, "_metadata"): metadata = ds._metadata @@ -267,9 +278,9 @@ def insert_dataset( blob_refs=blob_refs, schema_uri=schema_ref, name=name, - description=kwargs.get("description"), - tags=kwargs.get("tags"), - license=kwargs.get("license"), + description=pub_meta.description, + tags=pub_meta.tags, + license=pub_meta.license, metadata=metadata, checksums=checksums, ) @@ -278,9 +289,9 @@ def insert_dataset( urls=data_urls, schema_uri=schema_ref, name=name, - description=kwargs.get("description"), - tags=kwargs.get("tags"), - license=kwargs.get("license"), + description=pub_meta.description, + tags=pub_meta.tags, + license=pub_meta.license, metadata=metadata, ) else: @@ -288,9 +299,9 @@ def insert_dataset( ds, name=name, schema_uri=schema_ref, - description=kwargs.get("description"), - tags=kwargs.get("tags"), - license=kwargs.get("license"), + description=pub_meta.description, + tags=pub_meta.tags, + license=pub_meta.license, auto_publish_schema=(schema_ref is None), ) diff --git a/tests/test_dataset_meta.py b/tests/test_dataset_meta.py new file mode 100644 index 0000000..877b93c --- /dev/null +++ b/tests/test_dataset_meta.py @@ -0,0 +1,275 @@ +"""Tests for DatasetMeta dataclass and _resolve_meta helper.""" + +import dataclasses + +import pytest + +from atdata.dataset_meta import DatasetMeta, _resolve_meta + + +class TestDatasetMeta: + """Tests for the DatasetMeta dataclass.""" + + def test_create_with_name_only(self): + meta = DatasetMeta(name="mnist") + assert meta.name == "mnist" + assert meta.schema_ref is None + assert meta.description is None + assert meta.tags is None + assert meta.license is None + assert meta.metadata is None + + def test_create_with_all_fields(self): + meta = DatasetMeta( + name="mnist", + schema_ref="local://schemas/Mnist@1.0.0", + description="Handwritten digits", + tags=["vision", "classification"], + license="MIT", + metadata={"source": "yann.lecun.com"}, + ) + assert meta.name == "mnist" + assert meta.schema_ref == "local://schemas/Mnist@1.0.0" + assert meta.description == "Handwritten digits" + assert meta.tags == ["vision", "classification"] + assert meta.license == "MIT" + assert meta.metadata == {"source": "yann.lecun.com"} + + def test_is_dataclass(self): + meta = DatasetMeta(name="test") + assert dataclasses.is_dataclass(meta) + + def test_replace(self): + meta = DatasetMeta(name="mnist", description="original") + updated = dataclasses.replace(meta, description="updated") + assert updated.name == "mnist" + assert updated.description == "updated" + assert meta.description == "original" + + def test_equality(self): + a = DatasetMeta(name="mnist", tags=["vision"]) + b = DatasetMeta(name="mnist", tags=["vision"]) + assert a == b + + def test_importable_from_atdata(self): + import atdata + + assert hasattr(atdata, "DatasetMeta") + assert atdata.DatasetMeta is DatasetMeta + + +class TestResolveMeta: + """Tests for the _resolve_meta helper.""" + + def test_from_flat_kwargs(self): + meta = _resolve_meta( + name="mnist", + description="digits", + tags=["vision"], + ) + assert meta.name == "mnist" + assert meta.description == "digits" + assert meta.tags == ["vision"] + + def test_from_meta_object(self): + original = DatasetMeta( + name="mnist", + description="digits", + tags=["vision"], + ) + meta = _resolve_meta(original) + assert meta is original + + def test_raises_when_neither_name_nor_meta(self): + with pytest.raises(TypeError, match="Either 'meta' or 'name' must be provided"): + _resolve_meta() + + def test_explicit_kwargs_override_meta(self): + original = DatasetMeta( + name="mnist", + description="original", + tags=["old"], + license="MIT", + ) + meta = _resolve_meta( + original, + description="overridden", + tags=["new"], + ) + assert meta.name == "mnist" + assert meta.description == "overridden" + assert meta.tags == ["new"] + assert meta.license == "MIT" + + def test_name_override(self): + original = DatasetMeta(name="original") + meta = _resolve_meta(original, name="overridden") + assert meta.name == "overridden" + + def test_none_kwargs_do_not_override(self): + original = DatasetMeta( + name="mnist", + description="digits", + tags=["vision"], + ) + meta = _resolve_meta( + original, + description=None, + tags=None, + ) + assert meta.description == "digits" + assert meta.tags == ["vision"] + + def test_override_returns_new_instance(self): + original = DatasetMeta(name="mnist", description="old") + meta = _resolve_meta(original, description="new") + assert meta is not original + assert original.description == "old" + + def test_no_override_returns_same_instance(self): + original = DatasetMeta(name="mnist") + meta = _resolve_meta(original) + assert meta is original + + def test_all_fields_from_kwargs(self): + meta = _resolve_meta( + name="ds", + schema_ref="ref", + description="desc", + tags=["t"], + license="Apache-2.0", + metadata={"k": "v"}, + ) + assert meta == DatasetMeta( + name="ds", + schema_ref="ref", + description="desc", + tags=["t"], + license="Apache-2.0", + metadata={"k": "v"}, + ) + + def test_metadata_dict_override(self): + original = DatasetMeta(name="ds", metadata={"a": 1}) + meta = _resolve_meta(original, metadata={"b": 2}) + assert meta.metadata == {"b": 2} + + +class TestDatasetMetaWithIndex: + """Tests for DatasetMeta integration with Index methods.""" + + def test_write_samples_with_meta(self, tmp_path): + from atdata.index import Index + from atdata.providers._sqlite import SqliteProvider + from atdata.stores import LocalDiskStore + from conftest import SharedBasicSample + + provider = SqliteProvider(path=tmp_path / "test.db") + store = LocalDiskStore(root=tmp_path / "store") + index = Index(provider=provider, data_store=store, atmosphere=None) + + samples = [SharedBasicSample(name=f"s{i}", value=i) for i in range(5)] + meta = DatasetMeta(name="meta-ds", metadata={"source": "test"}) + + entry = index.write_samples(samples, meta=meta) + assert entry.name == "meta-ds" + + def test_write_samples_meta_with_kwargs_override(self, tmp_path): + from atdata.index import Index + from atdata.providers._sqlite import SqliteProvider + from atdata.stores import LocalDiskStore + from conftest import SharedBasicSample + + provider = SqliteProvider(path=tmp_path / "test.db") + store = LocalDiskStore(root=tmp_path / "store") + index = Index(provider=provider, data_store=store, atmosphere=None) + + samples = [SharedBasicSample(name=f"s{i}", value=i) for i in range(5)] + meta = DatasetMeta(name="original-name") + + entry = index.write_samples(samples, meta=meta, name="overridden-name") + assert entry.name == "overridden-name" + + def test_write_samples_flat_kwargs_still_work(self, tmp_path): + from atdata.index import Index + from atdata.providers._sqlite import SqliteProvider + from atdata.stores import LocalDiskStore + from conftest import SharedBasicSample + + provider = SqliteProvider(path=tmp_path / "test.db") + store = LocalDiskStore(root=tmp_path / "store") + index = Index(provider=provider, data_store=store, atmosphere=None) + + samples = [SharedBasicSample(name=f"s{i}", value=i) for i in range(3)] + + entry = index.write_samples(samples, name="flat-ds") + assert entry.name == "flat-ds" + + def test_insert_dataset_with_meta(self, tmp_path): + import atdata + from atdata.index import Index + from atdata.providers._sqlite import SqliteProvider + from conftest import SharedBasicSample, create_tar_with_samples + + provider = SqliteProvider(path=tmp_path / "test.db") + index = Index(provider=provider, atmosphere=None) + + tar_path = tmp_path / "data-000000.tar" + samples = [SharedBasicSample(name=f"s{i}", value=i) for i in range(3)] + create_tar_with_samples(tar_path, samples) + + ds = atdata.Dataset[SharedBasicSample](str(tar_path)) + meta = DatasetMeta(name="insert-meta-ds") + + entry = index.insert_dataset(ds, meta=meta) + assert entry.name == "insert-meta-ds" + + def test_insert_dataset_meta_with_kwargs_override(self, tmp_path): + import atdata + from atdata.index import Index + from atdata.providers._sqlite import SqliteProvider + from conftest import SharedBasicSample, create_tar_with_samples + + provider = SqliteProvider(path=tmp_path / "test.db") + index = Index(provider=provider, atmosphere=None) + + tar_path = tmp_path / "data-000000.tar" + samples = [SharedBasicSample(name=f"s{i}", value=i) for i in range(3)] + create_tar_with_samples(tar_path, samples) + + ds = atdata.Dataset[SharedBasicSample](str(tar_path)) + meta = DatasetMeta(name="original-name", description="from meta") + + entry = index.insert_dataset(ds, meta=meta, name="overridden-name") + assert entry.name == "overridden-name" + + def test_write_samples_requires_name_or_meta(self, tmp_path): + from atdata.index import Index + from atdata.providers._sqlite import SqliteProvider + from conftest import SharedBasicSample + + provider = SqliteProvider(path=tmp_path / "test.db") + index = Index(provider=provider, atmosphere=None) + + samples = [SharedBasicSample(name="s0", value=0)] + + with pytest.raises(TypeError, match="Either 'meta' or 'name'"): + index.write_samples(samples) + + def test_insert_dataset_requires_name_or_meta(self, tmp_path): + import atdata + from atdata.index import Index + from atdata.providers._sqlite import SqliteProvider + from conftest import SharedBasicSample, create_tar_with_samples + + provider = SqliteProvider(path=tmp_path / "test.db") + index = Index(provider=provider, atmosphere=None) + + tar_path = tmp_path / "data-000000.tar" + samples = [SharedBasicSample(name="s0", value=0)] + create_tar_with_samples(tar_path, samples) + + ds = atdata.Dataset[SharedBasicSample](str(tar_path)) + + with pytest.raises(TypeError, match="Either 'meta' or 'name'"): + index.insert_dataset(ds)