From edb7d7999579ddd7aa1d4616e13c4a9828df826e Mon Sep 17 00:00:00 2001
From: Maxine Levesque <220467675+maxine-at-forecast@users.noreply.github.com>
Date: Sun, 5 Apr 2026 17:15:51 -0700
Subject: [PATCH 1/3] Add configurable generic AppView, lexicon drift test,
 vector runner, and ecosystem coordination

- Make generic AppView URL configurable via ATDATA_GENERIC_APPVIEW env var
  (default: bsky.social) for Tier 1 unauthenticated cross-account reads
- Add lexicon drift-detection test that verifies every lexicon JSON property
  has a corresponding Python dataclass field
- Add test vector runner for cross-repo shard-roundtrip verification
- Add .atdata-ecosystem.json declaring SDK role and capabilities
- Add compat-check workflow triggered by lexicon dispatch events

Part of ecosystem coordination initiative (#17).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .atdata-ecosystem.json             |  17 +++
 .github/workflows/compat-check.yml |  32 ++++++
 src/atdata/atmosphere/client.py    |  23 +++-
 tests/test_lexicon_drift.py        | 101 ++++++++++++++++++
 tests/test_vectors.py              | 165 +++++++++++++++++++++++++++++
 5 files changed, 335 insertions(+), 3 deletions(-)
 create mode 100644 .atdata-ecosystem.json
 create mode 100644 .github/workflows/compat-check.yml
 create mode 100644 tests/test_lexicon_drift.py
 create mode 100644 tests/test_vectors.py

diff --git a/.atdata-ecosystem.json b/.atdata-ecosystem.json
new file mode 100644
index 0000000..1824fcd
--- /dev/null
+++ b/.atdata-ecosystem.json
@@ -0,0 +1,17 @@
+{
+  "role": "sdk",
+  "language": "python",
+  "lexicon_sync": {"method": "tarball", "ref": "v1.2.0"},
+  "capabilities": [
+    "shard_read_write",
+    "schema_publish",
+    "schema_resolve_xrpc",
+    "dataset_search_xrpc",
+    "lens_transforms",
+    "manifest_queries",
+    "load_dataset_hf_api",
+    "atmosphere_crud",
+    "label_resolve_xrpc",
+    "blob_resolve_xrpc"
+  ]
+}
diff --git a/.github/workflows/compat-check.yml b/.github/workflows/compat-check.yml
new file mode 100644
index 0000000..49f8830
--- /dev/null
+++ b/.github/workflows/compat-check.yml
@@ -0,0 +1,32 @@
+# Lexicon compatibility check.
+#
+# Triggered by atdata-lexicon's dispatch-consumers workflow when a PR
+# proposes lexicon changes.  Syncs lexicons to the proposed ref and runs
+# the full test suite to detect breaking changes.
+
+name: Lexicon Compat Check
+
+on:
+  repository_dispatch:
+    types: [lexicon-compat-check]
+
+jobs:
+  compat:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Set up Python
+        run: uv python install
+
+      - name: Install dependencies
+        run: uv sync
+
+      - name: Sync lexicons to proposed ref
+        run: just sync-lexicons ref=${{ github.event.client_payload.lexicon_ref }}
+
+      - name: Run tests
+        run: uv run pytest tests/
diff --git a/src/atdata/atmosphere/client.py b/src/atdata/atmosphere/client.py
index d49fbbe..29a87d2 100644
--- a/src/atdata/atmosphere/client.py
+++ b/src/atdata/atmosphere/client.py
@@ -475,10 +475,25 @@ def xrpc_procedure(
             raise AppViewUnavailableError(self._appview_url, str(exc)) from exc
 
     # ------------------------------------------------------------------ #
-    # Cross-account reads via bsky.social AppView (existing behavior)
+    # Cross-account reads via generic AppView (Tier 1)
     # ------------------------------------------------------------------ #
 
-    _APPVIEW_URL = "https://bsky.social"
+    _GENERIC_APPVIEW_URL: str | None = None
+
+    @classmethod
+    def _get_generic_appview_url(cls) -> str:
+        """Return the generic AppView URL for unauthenticated cross-account reads.
+
+        Reads from the ``ATDATA_GENERIC_APPVIEW`` environment variable,
+        falling back to ``https://bsky.social``.
+        """
+        if cls._GENERIC_APPVIEW_URL is None:
+            import os
+
+            cls._GENERIC_APPVIEW_URL = os.environ.get(
+                "ATDATA_GENERIC_APPVIEW", "https://bsky.social"
+            )
+        return cls._GENERIC_APPVIEW_URL
 
     def _get_appview_client(self) -> Any:
         """Return a shared, unauthenticated client pointed at the public AppView.
@@ -488,7 +503,9 @@ def _get_appview_client(self) -> Any:
         """
         if not hasattr(self, "_appview_client") or self._appview_client is None:
             Client = _get_atproto_client_class()
-            self._appview_client = Client(base_url=self._APPVIEW_URL)
+            self._appview_client = Client(
+                base_url=self._get_generic_appview_url()
+            )
         return self._appview_client
 
     # Low-level record operations
diff --git a/tests/test_lexicon_drift.py b/tests/test_lexicon_drift.py
new file mode 100644
index 0000000..d69ff11
--- /dev/null
+++ b/tests/test_lexicon_drift.py
@@ -0,0 +1,101 @@
+"""Lexicon drift detection tests.
+
+Ensures every property defined in the lexicon JSON files has a corresponding
+field in the Python dataclass types in ``_lexicon_types.py``.  When a new
+property is added to a lexicon but not to the Python type, this test fails.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from pathlib import Path
+
+import pytest
+
+from atdata.atmosphere._lexicon_types import (
+    LexDatasetEntry,
+    LexLabelRecord,
+    LexLensRecord,
+    LexLensVerification,
+    LexSchemaRecord,
+)
+
+LEXICON_DIR = (
+    Path(__file__).resolve().parent.parent
+    / "src"
+    / "atdata"
+    / "lexicons"
+    / "science"
+    / "alt"
+    / "dataset"
+)
+
+# Map lexicon JSON file stems to their corresponding Python types.
+RECORD_TYPES: dict[str, type] = {
+    "entry": LexDatasetEntry,
+    "schema": LexSchemaRecord,
+    "lens": LexLensRecord,
+    "label": LexLabelRecord,
+    "lensVerification": LexLensVerification,
+}
+
+
+def _camel_to_snake(name: str) -> str:
+    """Convert camelCase to snake_case."""
+    s1 = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", name)
+    return re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s1).lower()
+
+
+def _get_lexicon_properties(lexicon_path: Path) -> set[str]:
+    """Extract main record property names from a lexicon JSON file."""
+    data = json.loads(lexicon_path.read_text())
+    main_def = data.get("defs", {}).get("main", {})
+    record = main_def.get("record", {})
+    return set(record.get("properties", {}).keys())
+
+
+def _get_dataclass_fields(cls: type) -> set[str]:
+    """Get field names from a dataclass, including inherited fields."""
+    import dataclasses
+
+    return {f.name for f in dataclasses.fields(cls)}
+
+
+def _build_test_cases() -> list[tuple[str, Path, type]]:
+    """Build parametrized test cases for each lexicon/type pair."""
+    cases = []
+    for stem, py_type in RECORD_TYPES.items():
+        lexicon_path = LEXICON_DIR / f"{stem}.json"
+        if lexicon_path.exists():
+            cases.append((stem, lexicon_path, py_type))
+    return cases
+
+
+@pytest.mark.parametrize(
+    "stem,lexicon_path,py_type",
+    _build_test_cases(),
+    ids=[c[0] for c in _build_test_cases()],
+)
+def test_lexicon_properties_covered_by_python_type(
+    stem: str, lexicon_path: Path, py_type: type
+) -> None:
+    """Every lexicon record property must have a Python dataclass field."""
+    lexicon_props = _get_lexicon_properties(lexicon_path)
+    python_fields = _get_dataclass_fields(py_type)
+
+    # Build mapping: camelCase lexicon prop -> expected snake_case Python field
+    missing = []
+    for prop in sorted(lexicon_props):
+        # Skip $type — it's not a stored field
+        if prop.startswith("$"):
+            continue
+        expected_field = _camel_to_snake(prop)
+        if expected_field not in python_fields:
+            missing.append(f"  {prop} -> {expected_field}")
+
+    assert not missing, (
+        f"Lexicon '{stem}' has properties not covered by {py_type.__name__}:\n"
+        + "\n".join(missing)
+        + "\nAdd the missing fields to the Python dataclass."
+    )
diff --git a/tests/test_vectors.py b/tests/test_vectors.py
new file mode 100644
index 0000000..9a60fdd
--- /dev/null
+++ b/tests/test_vectors.py
@@ -0,0 +1,165 @@
+"""Cross-repo test vector runner.
+
+Executes shard-roundtrip test vectors from ``test-vectors/shard-roundtrip/``
+in the vendored lexicons directory.  Each vector specifies a schema, a set of
+input samples, and expected outputs.  The runner writes samples to a shard,
+reads them back, and verifies against expected values.
+
+Skips gracefully if the test-vectors directory does not exist yet.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import pytest
+
+import atdata
+
+# Try two locations: sibling test-vectors dir from atdata-lexicon, or vendored
+_REPO_ROOT = Path(__file__).resolve().parent.parent
+_CANDIDATE_PATHS = [
+    _REPO_ROOT / "test-vectors" / "shard-roundtrip",
+    _REPO_ROOT / "src" / "atdata" / "lexicons" / ".." / ".." / ".." / ".."
+    / "test-vectors" / "shard-roundtrip",
+]
+
+VECTORS_DIR: Path | None = None
+for _p in _CANDIDATE_PATHS:
+    _resolved = _p.resolve()
+    if _resolved.is_dir():
+        VECTORS_DIR = _resolved
+        break
+
+
+def _collect_vector_files() -> list[Path]:
+    """Collect all vector JSON files, or return empty if dir missing."""
+    if VECTORS_DIR is None:
+        return []
+    return sorted(VECTORS_DIR.glob("*.json"))
+
+
+def _make_packable_class(
+    schema_def: dict[str, Any],
+) -> type:
+    """Dynamically create a packable dataclass from a vector schema definition."""
+    fields_def = schema_def["fields"]
+    annotations: dict[str, type] = {}
+    for f in fields_def:
+        type_str = f["type"]
+        if type_str == "str":
+            annotations[f["name"]] = str
+        elif type_str == "int":
+            annotations[f["name"]] = int
+        elif type_str == "float":
+            annotations[f["name"]] = float
+        elif type_str == "dict":
+            annotations[f["name"]] = dict
+        elif type_str == "ndarray":
+            annotations[f["name"]] = np.ndarray
+        else:
+            annotations[f["name"]] = Any
+
+    ns: dict[str, Any] = {"__annotations__": annotations}
+    cls = type("VectorSample", (), ns)
+    cls = dataclass(cls)
+    cls = atdata.packable(cls)
+    return cls
+
+
+def _make_sample(cls: type, sample_data: dict[str, Any], schema_def: dict) -> Any:
+    """Create a sample instance from vector data."""
+    kwargs: dict[str, Any] = {}
+    field_types = {f["name"]: f for f in schema_def["fields"]}
+
+    for fname, fdef in field_types.items():
+        val = sample_data.get(fname)
+        if fdef["type"] == "ndarray" and val is not None:
+            dtype = fdef.get("dtype", "float32")
+            val = np.array(val, dtype=dtype)
+        kwargs[fname] = val
+
+    kwargs["__key__"] = sample_data["__key__"]
+    return cls(**kwargs)
+
+
+def _check_field_value(
+    actual: Any, expected: Any, field_name: str, key: str
+) -> None:
+    """Assert a single field value matches the expected value."""
+    if isinstance(expected, dict) and "dtype" in expected and "values" in expected:
+        # NDArray check
+        assert isinstance(actual, np.ndarray), (
+            f"Sample {key}.{field_name}: expected ndarray, got {type(actual)}"
+        )
+        np.testing.assert_array_almost_equal(
+            actual,
+            np.array(expected["values"], dtype=expected["dtype"]),
+            err_msg=f"Sample {key}.{field_name} values mismatch",
+        )
+        assert list(actual.shape) == expected["shape"], (
+            f"Sample {key}.{field_name} shape mismatch: "
+            f"{list(actual.shape)} != {expected['shape']}"
+        )
+    else:
+        assert actual == expected, (
+            f"Sample {key}.{field_name}: {actual!r} != {expected!r}"
+        )
+
+
+_vector_files = _collect_vector_files()
+
+
+@pytest.mark.skipif(
+    not _vector_files,
+    reason="No test vectors found (test-vectors/shard-roundtrip/ not present)",
+)
+@pytest.mark.parametrize(
+    "vector_path",
+    _vector_files,
+    ids=[p.stem for p in _vector_files],
+)
+def test_shard_roundtrip_vector(vector_path: Path, tmp_path: Path) -> None:
+    """Write samples from a test vector to a shard, read back, verify."""
+    vector = json.loads(vector_path.read_text())
+
+    schema_def = vector["inputs"]["schema"]
+    sample_cls = _make_packable_class(schema_def)
+
+    # Create sample instances
+    samples = [
+        _make_sample(sample_cls, s, schema_def)
+        for s in vector["inputs"]["samples"]
+    ]
+
+    # Write to shard
+    shard_path = tmp_path / "test-shard-000000.tar"
+    atdata.write_samples(samples, str(shard_path))
+
+    # Read back
+    ds = atdata.Dataset[sample_cls](str(shard_path))
+    read_samples = list(ds)
+
+    # Verify expected sample count
+    expected = vector["expected"]
+    assert len(read_samples) == expected["sample_count"], (
+        f"Expected {expected['sample_count']} samples, got {len(read_samples)}"
+    )
+
+    # Verify keys
+    read_keys = [s.__key__ for s in read_samples]
+    assert read_keys == expected["keys"], (
+        f"Key mismatch: {read_keys} != {expected['keys']}"
+    )
+
+    # Verify field values
+    samples_by_key = {s.__key__: s for s in read_samples}
+    for key, field_checks in expected.get("field_checks", {}).items():
+        sample = samples_by_key[key]
+        for field_name, expected_val in field_checks.items():
+            actual_val = getattr(sample, field_name)
+            _check_field_value(actual_val, expected_val, field_name, key)

From 92daeaa9f0860e91f79bfbec09de57a54a47e077 Mon Sep 17 00:00:00 2001
From: Maxine Levesque <220467675+maxine-at-forecast@users.noreply.github.com>
Date: Mon, 6 Apr 2026 13:40:31 -0700
Subject: [PATCH 2/3] style: remove unused import in test_vectors.py

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/test_vectors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_vectors.py b/tests/test_vectors.py
index 9a60fdd..69a59b8 100644
--- a/tests/test_vectors.py
+++ b/tests/test_vectors.py
@@ -11,7 +11,7 @@
 from __future__ import annotations
 
 import json
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Any
 

From ea3116501c81b4806eb36e97b6ddb1ea168e0135 Mon Sep 17 00:00:00 2001
From: Maxine Levesque <220467675+maxine-at-forecast@users.noreply.github.com>
Date: Tue, 7 Apr 2026 11:03:58 -0700
Subject: [PATCH 3/3] style: apply ruff formatting to client.py and
 test_vectors.py

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/atdata/atmosphere/client.py |  4 +---
 tests/test_vectors.py           | 19 ++++++++++++-------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/atdata/atmosphere/client.py b/src/atdata/atmosphere/client.py
index 29a87d2..9d42841 100644
--- a/src/atdata/atmosphere/client.py
+++ b/src/atdata/atmosphere/client.py
@@ -503,9 +503,7 @@ def _get_appview_client(self) -> Any:
         """
         if not hasattr(self, "_appview_client") or self._appview_client is None:
             Client = _get_atproto_client_class()
-            self._appview_client = Client(
-                base_url=self._get_generic_appview_url()
-            )
+            self._appview_client = Client(base_url=self._get_generic_appview_url())
         return self._appview_client
 
     # Low-level record operations
diff --git a/tests/test_vectors.py b/tests/test_vectors.py
index 69a59b8..fea0fc0 100644
--- a/tests/test_vectors.py
+++ b/tests/test_vectors.py
@@ -24,8 +24,16 @@
 _REPO_ROOT = Path(__file__).resolve().parent.parent
 _CANDIDATE_PATHS = [
     _REPO_ROOT / "test-vectors" / "shard-roundtrip",
-    _REPO_ROOT / "src" / "atdata" / "lexicons" / ".." / ".." / ".." / ".."
-    / "test-vectors" / "shard-roundtrip",
+    _REPO_ROOT
+    / "src"
+    / "atdata"
+    / "lexicons"
+    / ".."
+    / ".."
+    / ".."
+    / ".."
+    / "test-vectors"
+    / "shard-roundtrip",
 ]
 
 VECTORS_DIR: Path | None = None
@@ -87,9 +95,7 @@ def _make_sample(cls: type, sample_data: dict[str, Any], schema_def: dict) -> An
     return cls(**kwargs)
 
 
-def _check_field_value(
-    actual: Any, expected: Any, field_name: str, key: str
-) -> None:
+def _check_field_value(actual: Any, expected: Any, field_name: str, key: str) -> None:
     """Assert a single field value matches the expected value."""
     if isinstance(expected, dict) and "dtype" in expected and "values" in expected:
         # NDArray check
@@ -132,8 +138,7 @@ def test_shard_roundtrip_vector(vector_path: Path, tmp_path: Path) -> None:
 
     # Create sample instances
     samples = [
-        _make_sample(sample_cls, s, schema_def)
-        for s in vector["inputs"]["samples"]
+        _make_sample(sample_cls, s, schema_def) for s in vector["inputs"]["samples"]
     ]
 
     # Write to shard