foundation-ac
diff --git a/‎.chainlink/issues.db‎
0 Bytes b/‎.chainlink/issues.db‎
0 Bytes
diff --git a/‎.github/workflows/uv-publish-pypi.yml‎
Lines changed: 40 additions & 21 deletions b/‎.github/workflows/uv-publish-pypi.yml‎
Lines changed: 40 additions & 21 deletions
diff --git a/‎.github/workflows/uv-test.yml‎
Lines changed: 40 additions & 19 deletions b/‎.github/workflows/uv-test.yml‎
Lines changed: 40 additions & 19 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/atdata/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/atdata/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/atdata/_cid.py‎
Lines changed: 6 additions & 2 deletions b/‎src/atdata/_cid.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎src/atdata/_helpers.py‎
Lines changed: 7 additions & 5 deletions b/‎src/atdata/_helpers.py‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎src/atdata/_hf_api.py‎
Lines changed: 8 additions & 4 deletions b/‎src/atdata/_hf_api.py‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎src/atdata/_protocols.py‎
Lines changed: 0 additions & 1 deletion b/‎src/atdata/_protocols.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/atdata/_schema_codec.py‎
Lines changed: 6 additions & 2 deletions b/‎src/atdata/_schema_codec.py‎
Lines changed: 6 additions & 2 deletions
@@ -1,5 +1,3 @@
-#
-
 name: Build and upload package to PyPI
 
 on:
@@ -9,38 +7,59 @@ on:
 
 permissions:
   contents: read
+  id-token: write
 
-jobs:
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
 
-  uv-build-release-pypi-publish:
-    name: "Build release distribution and publish to PyPI"
+jobs:
+  build:
+    name: Build release distribution
     runs-on: ubuntu-latest
-    environment:
-      name: pypi
-    
+
     steps:
       - uses: actions/checkout@v5
-      
-      - name: "Set up Python"
+
+      - name: Set up Python
         uses: actions/setup-python@v5
         with:
           python-version-file: "pyproject.toml"
 
       - name: Install uv
         uses: astral-sh/setup-uv@v6
-      
+        with:
+          enable-cache: true
+
       - name: Install project
-        run: uv sync --all-extras --dev
-        # TODO Better to use --locked for author control over versions?
-        # run: uv sync --locked --all-extras --dev
-        
+        run: uv sync --locked --all-extras --dev
+
       - name: Build release distributions
         run: uv build
-        
-      - name: Publish to PyPI
-        env:
-          UV_PUBLISH_TOKEN: ${{ secrets.UV_PUBLISH_TOKEN }}
-        run: uv publish
 
+      - name: Upload dist artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
 
-##
+  publish:
+    name: Publish to PyPI
+    runs-on: ubuntu-latest
+    needs: build
+    environment:
+      name: pypi
+      url: https://pypi.org/project/atdata/
+
+    steps:
+      - name: Download dist artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+
+      - name: Publish to PyPI
+        run: uv publish --trusted-publishing always dist/*
@@ -1,5 +1,3 @@
-#
-
 name: Run tests with `uv`
 
 on:
@@ -11,33 +9,60 @@ on:
     branches:
       - main
 
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
-  uv-test:
-    name: Run tests
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+
+      - name: Install the project
+        run: uv sync --locked --dev
+
+      - name: Run ruff check
+        run: uv run ruff check src/ tests/
+
+      - name: Run ruff format check
+        run: uv run ruff format --check src/ tests/
+
+  test:
+    name: Test (py${{ matrix.python-version }}, redis${{ matrix.redis-version }})
     runs-on: ubuntu-latest
     environment:
       name: test
     strategy:
+      fail-fast: false
       matrix:
-        python-version: [3.12, 3.13, 3.14]
+        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
         redis-version: [6, 7]
 
     steps:
       - uses: actions/checkout@v5
 
-      - name: "Set up Python"
+      - name: Set up Python
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-          # python-version-file: "pyproject.toml"
 
       - name: Install uv
         uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
 
       - name: Install the project
-        run: uv sync --all-extras --dev
-        # TODO Better to use --locked for author control over versions?
-        # run: uv sync --locked --all-extras --dev
+        run: uv sync --locked --all-extras --dev
 
       - name: Start Redis
         uses: supercharge/redis-github-action@1.8.1
@@ -47,12 +72,8 @@ jobs:
       - name: Run tests with coverage
         run: uv run pytest --cov=atdata --cov-report=xml --cov-report=term
 
-      # - name: Upload coverage to Codecov
-      #   uses: codecov/codecov-action@v5
-      #   with:
-      #     # file: ./coverage.xml # Claude hallucination -- fascinating!
-      #     fail_ci_if_error: false
-      #     token: ${{ secrets.CODECOV_TOKEN }}
-
-
-#
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v5
+        with:
+          fail_ci_if_error: false
+          token: ${{ secrets.CODECOV_TOKEN }}
@@ -25,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - **Comprehensive integration test suite**: 593 tests covering E2E flows, error handling, edge cases
 
 ### Changed
+- Review GitHub workflows and recommend CI improvements (#405)
 - Fix type signatures for Dataset.ordered and Dataset.shuffled (GH#28) (#404)
 - Investigate quartodoc Example section rendering - missing CSS classes on pre/code tags (#401)
 - Update all docstrings from Example: to Examples: format (#403)
 
@@ -88,4 +88,4 @@
 from . import atmosphere as atmosphere
 
 # CLI entry point
-from .cli import main as main
+from .cli import main as main
@@ -64,7 +64,9 @@ def generate_cid(data: Any) -> str:
     # Build raw CID bytes:
     # CIDv1 = version(1) + codec(dag-cbor) + multihash
     # Multihash = code(sha256) + size(32) + digest
-    raw_cid_bytes = bytes([CID_VERSION_1, CODEC_DAG_CBOR, HASH_SHA256, SHA256_SIZE]) + sha256_hash
+    raw_cid_bytes = (
+        bytes([CID_VERSION_1, CODEC_DAG_CBOR, HASH_SHA256, SHA256_SIZE]) + sha256_hash
+    )
 
     # Encode to base32 multibase string
     return libipld.encode_cid(raw_cid_bytes)
@@ -87,7 +89,9 @@ def generate_cid_from_bytes(data_bytes: bytes) -> str:
         >>> cid = generate_cid_from_bytes(cbor_bytes)
     """
     sha256_hash = hashlib.sha256(data_bytes).digest()
-    raw_cid_bytes = bytes([CID_VERSION_1, CODEC_DAG_CBOR, HASH_SHA256, SHA256_SIZE]) + sha256_hash
+    raw_cid_bytes = (
+        bytes([CID_VERSION_1, CODEC_DAG_CBOR, HASH_SHA256, SHA256_SIZE]) + sha256_hash
+    )
     return libipld.encode_cid(raw_cid_bytes)
 
 
 
@@ -22,7 +22,8 @@
 
 ##
 
-def array_to_bytes( x: np.ndarray ) -> bytes:
+
+def array_to_bytes(x: np.ndarray) -> bytes:
     """Convert a numpy array to bytes for msgpack serialization.
 
     Uses numpy's native ``save()`` format to preserve array dtype and shape.
@@ -37,10 +38,11 @@ def array_to_bytes( x: np.ndarray ) -> bytes:
         Uses ``allow_pickle=True`` to support object dtypes.
     """
     np_bytes = BytesIO()
-    np.save( np_bytes, x, allow_pickle = True )
+    np.save(np_bytes, x, allow_pickle=True)
     return np_bytes.getvalue()
 
-def bytes_to_array( b: bytes ) -> np.ndarray:
+
+def bytes_to_array(b: bytes) -> np.ndarray:
     """Convert serialized bytes back to a numpy array.
 
     Reverses the serialization performed by ``array_to_bytes()``.
@@ -54,5 +56,5 @@ def bytes_to_array( b: bytes ) -> np.ndarray:
     Note:
         Uses ``allow_pickle=True`` to support object dtypes.
     """
-    np_bytes = BytesIO( b )
-    return np.load( np_bytes, allow_pickle = True )
+    np_bytes = BytesIO(b)
+    return np.load(np_bytes, allow_pickle=True)
@@ -46,7 +46,6 @@
 
 if TYPE_CHECKING:
     from ._protocols import AbstractIndex
-    from .local import S3DataStore
 
 ##
 # Type variables
@@ -77,6 +76,7 @@ class DatasetDict(Generic[ST], dict):
         >>> for split_name, dataset in ds_dict.items():
         ...     print(f"{split_name}: {len(dataset.shard_list)} shards")
     """
+
     # TODO The above has a line for "Parameters:" that should be "Type Parameters:"; this is a temporary fix for `quartodoc` auto-generation bugs.
 
     def __init__(
@@ -464,7 +464,7 @@ def _resolve_indexed_path(
     data_urls = entry.data_urls
 
     # Check if index has a data store
-    if hasattr(index, 'data_store') and index.data_store is not None:
+    if hasattr(index, "data_store") and index.data_store is not None:
         store = index.data_store
 
         # Import here to avoid circular imports at module level
@@ -638,7 +638,9 @@ def load_dataset(
         source, schema_ref = _resolve_indexed_path(path, index)
 
         # Resolve sample_type from schema if not provided
-        resolved_type: Type = sample_type if sample_type is not None else index.decode_schema(schema_ref)
+        resolved_type: Type = (
+            sample_type if sample_type is not None else index.decode_schema(schema_ref)
+        )
 
         # Create dataset from the resolved source (includes credentials if S3)
         ds = Dataset[resolved_type](source)
@@ -647,7 +649,9 @@ def load_dataset(
             # Indexed datasets are single-split by default
             return ds
 
-        return DatasetDict({"train": ds}, sample_type=resolved_type, streaming=streaming)
+        return DatasetDict(
+            {"train": ds}, sample_type=resolved_type, streaming=streaming
+        )
 
     # Use DictSample as default when no type specified
     resolved_type = sample_type if sample_type is not None else DictSample
 
@@ -32,7 +32,6 @@
 from typing import (
     IO,
     Any,
-    ClassVar,
     Iterator,
     Optional,
     Protocol,
 
@@ -203,7 +203,9 @@ def schema_to_type(
         namespace={
             "__post_init__": lambda self: PackableSample.__post_init__(self),
             "__schema_version__": version,
-            "__schema_ref__": schema.get("$ref", None),  # Store original ref if available
+            "__schema_ref__": schema.get(
+                "$ref", None
+            ),  # Store original ref if available
         },
     )
 
@@ -239,7 +241,9 @@ def _field_type_to_stub_str(field_type: dict, optional: bool = False) -> str:
 
     if kind == "primitive":
         primitive = field_type.get("primitive", "str")
-        py_type = primitive  # str, int, float, bool, bytes are all valid Python type names
+        py_type = (
+            primitive  # str, int, float, bool, bytes are all valid Python type names
+        )
     elif kind == "ndarray":
         py_type = "NDArray[Any]"
     elif kind == "array":