From 1359aec774387c58c0a715c2ea0909fbdb1286d8 Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <aldenkeefesampson@gmail.com>
Date: Wed, 30 Apr 2025 21:45:37 -0400
Subject: [PATCH 01/21] Add performance test of partial shard reads

---
 tests/test_codecs/test_sharding.py | 65 ++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py
index 403fd80e81..6efc3a251c 100644
--- a/tests/test_codecs/test_sharding.py
+++ b/tests/test_codecs/test_sharding.py
@@ -197,6 +197,71 @@ def test_sharding_partial_read(
     assert np.all(read_data == 1)
 
 
+@pytest.mark.slow_hypothesis
+@pytest.mark.parametrize("store", ["local"], indirect=["store"])
+def test_partial_shard_read_performance(store: Store) -> None:
+    import asyncio
+    import json
+    from functools import partial
+    from itertools import product
+    from timeit import timeit
+    from unittest.mock import AsyncMock
+
+    # The whole test array is a single shard to keep runtime manageable while
+    # using a realistic shard size (256 MiB uncompressed, ~115 MiB compressed).
+    # In practice, the array is likely to be much larger with many shards of this
+    # rough order of magnitude. There are 512 chunks per shard in this example.
+    array_shape = (512, 512, 512)
+    shard_shape = (512, 512, 512)  # 256 MiB uncompressed unit16s
+    chunk_shape = (64, 64, 64)  # 512 KiB uncompressed unit16s
+    dtype = np.uint16
+
+    a = zarr.create_array(
+        StorePath(store),
+        shape=array_shape,
+        chunks=chunk_shape,
+        shards=shard_shape,
+        compressors=BloscCodec(cname="zstd"),
+        dtype=dtype,
+        fill_value=np.iinfo(dtype).max,
+    )
+    # Narrow range of values lets zstd compress to about 1/2 of uncompressed size
+    a[:] = np.random.default_rng(123).integers(low=0, high=50, size=array_shape, dtype=dtype)
+
+    num_calls = 20
+    experiments = []
+    for concurrency, get_latency, statement in product(
+        [1, 10, 100], [0.0, 0.01], ["a[0, :, :]", "a[:, 0, :]", "a[:, :, 0]"]
+    ):
+        zarr.config.set({"async.concurrency": concurrency})
+
+        async def get_with_latency(*args: Any, get_latency: float, **kwargs: Any) -> Any:
+            await asyncio.sleep(get_latency)
+            return await store.get(*args, **kwargs)
+
+        store_mock = AsyncMock(wraps=store, spec=store.__class__)
+        store_mock.get.side_effect = partial(get_with_latency, get_latency=get_latency)
+
+        a = zarr.open_array(StorePath(store_mock))
+
+        store_mock.reset_mock()
+
+        # Each timeit call accesses a 512x512 slice covering 64 chunks
+        time = timeit(statement, number=num_calls, globals={"a": a}) / num_calls
+        experiments.append(
+            {
+                "concurrency": concurrency,
+                "statement": statement,
+                "get_latency": get_latency,
+                "time": time,
+                "store_get_calls": store_mock.get.call_count,
+            }
+        )
+
+    with open("zarr-python-partial-shard-read-performance-no-coalesce.json", "w") as f:
+        json.dump(experiments, f)
+
+
 @pytest.mark.parametrize(
     "array_fixture",
     [

From c7269944fde1bd9675771096992043fd0c95b715 Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <aldenkeefesampson@gmail.com>
Date: Sat, 19 Apr 2025 00:57:46 -0400
Subject: [PATCH 02/21] WIP Consolidate reads of multiple chunks in the same
 shard

Add test and make max gap and max coalesce size config options

Code clarity and comments

Test that chunk request coalescing reduces calls to store

Profile a few values for coalesce_max_gap

Update [doc]tests to include new sharding.read.* values

document sharded read config options in user-guide/config.rst

tweak logic: start new coalesced group if coalescing would exceed `coalesce_max_bytes`

previous logic only started a new group if existing group
was size already exceeded coalesce_max_bytes.

set `mypy_path = "src"` to help pre-commit mypy find imported classes

Reorder methods in sharding.py, add docstring + commenting

wording docs fix

docstring clarification

trigger precommit on all python files changed in this pull request

trying to get the ruff format that's happening locally during pre-commit
to match the pre-commit run that is failing on CI.

revert trigger for pre-commit ruff format
---
 docs/user-guide/config.rst         |   8 +-
 pyproject.toml                     |   1 +
 src/zarr/codecs/sharding.py        | 169 ++++++++++++++++++++++++-----
 src/zarr/core/config.py            |   6 +
 tests/test_codecs/test_sharding.py |  88 ++++++++++++++-
 tests/test_config.py               |   8 ++
 6 files changed, 246 insertions(+), 34 deletions(-)

diff --git a/docs/user-guide/config.rst b/docs/user-guide/config.rst
index 5a9d26f2b9..06b1e79473 100644
--- a/docs/user-guide/config.rst
+++ b/docs/user-guide/config.rst
@@ -33,6 +33,10 @@ Configuration options include the following:
 - Async and threading options, e.g. ``async.concurrency`` and ``threading.max_workers``
 - Selections of implementations of codecs, codec pipelines and buffers
 - Enabling GPU support with ``zarr.config.enable_gpu()``. See :ref:`user-guide-gpu` for more.
+- Tuning reads from sharded zarrs. When reading less than a complete shard, reads of nearby chunks
+  within the same shard will be combined into a single request if they are less than
+  ``sharding.read.coalesce_max_gap_bytes`` apart and the combined request size is less than
+  ``sharding.read.coalesce_max_bytes``.
 
 For selecting custom implementations of codecs, pipelines, buffers and ndbuffers,
 first register the implementations in the registry and then select them in the config.
@@ -79,4 +83,6 @@ This is the current default configuration::
    'default_zarr_format': 3,
    'json_indent': 2,
    'ndbuffer': 'zarr.buffer.cpu.NDBuffer',
-   'threading': {'max_workers': None}}
+   'sharding': {'read': {'coalesce_max_bytes': 104857600,
+                         'coalesce_max_gap_bytes': 1048576}},
+   'threading': {'max_workers': None}}
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index a48a5eea25..cd5de2115e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -354,6 +354,7 @@ ignore = [
 [tool.mypy]
 python_version = "3.11"
 ignore_missing_imports = true
+mypy_path = "src"
 namespace_packages = false
 
 strict = true
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index cd8676b4d1..e7499f726f 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -38,11 +38,13 @@
 from zarr.core.common import (
     ChunkCoords,
     ChunkCoordsLike,
+    concurrent_map,
     parse_enum,
     parse_named_configuration,
     parse_shapelike,
     product,
 )
+from zarr.core.config import config
 from zarr.core.dtype.npy.int import UInt64
 from zarr.core.indexing import (
     BasicIndexer,
@@ -198,7 +200,9 @@ async def from_bytes(
 
     @classmethod
     def create_empty(
-        cls, chunks_per_shard: ChunkCoords, buffer_prototype: BufferPrototype | None = None
+        cls,
+        chunks_per_shard: ChunkCoords,
+        buffer_prototype: BufferPrototype | None = None,
     ) -> _ShardReader:
         if buffer_prototype is None:
             buffer_prototype = default_buffer_prototype()
@@ -248,7 +252,9 @@ def merge_with_morton_order(
 
     @classmethod
     def create_empty(
-        cls, chunks_per_shard: ChunkCoords, buffer_prototype: BufferPrototype | None = None
+        cls,
+        chunks_per_shard: ChunkCoords,
+        buffer_prototype: BufferPrototype | None = None,
     ) -> _ShardBuilder:
         if buffer_prototype is None:
             buffer_prototype = default_buffer_prototype()
@@ -329,9 +335,18 @@ async def finalize(
         return await shard_builder.finalize(index_location, index_encoder)
 
 
+class _ChunkCoordsByteSlice(NamedTuple):
+    """Holds a chunk's coordinates and its byte range in a serialized shard."""
+
+    coords: ChunkCoords
+    byte_slice: slice
+
+
 @dataclass(frozen=True)
 class ShardingCodec(
-    ArrayBytesCodec, ArrayBytesCodecPartialDecodeMixin, ArrayBytesCodecPartialEncodeMixin
+    ArrayBytesCodec,
+    ArrayBytesCodecPartialDecodeMixin,
+    ArrayBytesCodecPartialEncodeMixin,
 ):
     chunk_shape: ChunkCoords
     codecs: tuple[Codec, ...]
@@ -508,32 +523,21 @@ async def _decode_partial_single(
         all_chunk_coords = {chunk_coords for chunk_coords, *_ in indexed_chunks}
 
         # reading bytes of all requested chunks
-        shard_dict: ShardMapping = {}
+        shard_dict_maybe: ShardMapping | None = {}
         if self._is_total_shard(all_chunk_coords, chunks_per_shard):
             # read entire shard
             shard_dict_maybe = await self._load_full_shard_maybe(
-                byte_getter=byte_getter,
-                prototype=chunk_spec.prototype,
-                chunks_per_shard=chunks_per_shard,
+                byte_getter, chunk_spec.prototype, chunks_per_shard
             )
-            if shard_dict_maybe is None:
-                return None
-            shard_dict = shard_dict_maybe
         else:
             # read some chunks within the shard
-            shard_index = await self._load_shard_index_maybe(byte_getter, chunks_per_shard)
-            if shard_index is None:
-                return None
-            shard_dict = {}
-            for chunk_coords in all_chunk_coords:
-                chunk_byte_slice = shard_index.get_chunk_slice(chunk_coords)
-                if chunk_byte_slice:
-                    chunk_bytes = await byte_getter.get(
-                        prototype=chunk_spec.prototype,
-                        byte_range=RangeByteRequest(chunk_byte_slice[0], chunk_byte_slice[1]),
-                    )
-                    if chunk_bytes:
-                        shard_dict[chunk_coords] = chunk_bytes
+            shard_dict_maybe = await self._load_partial_shard_maybe(
+                byte_getter, chunk_spec.prototype, chunks_per_shard, all_chunk_coords
+            )
+
+        if shard_dict_maybe is None:
+            return None
+        shard_dict = shard_dict_maybe
 
         # decoding chunks and writing them into the output buffer
         await self.codec_pipeline.read(
@@ -615,7 +619,9 @@ async def _encode_partial_single(
 
         indexer = list(
             get_indexer(
-                selection, shape=shard_shape, chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape)
+                selection,
+                shape=shard_shape,
+                chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape),
             )
         )
 
@@ -689,7 +695,8 @@ def _shard_index_size(self, chunks_per_shard: ChunkCoords) -> int:
             get_pipeline_class()
             .from_codecs(self.index_codecs)
             .compute_encoded_size(
-                16 * product(chunks_per_shard), self._get_index_chunk_spec(chunks_per_shard)
+                16 * product(chunks_per_shard),
+                self._get_index_chunk_spec(chunks_per_shard),
             )
         )
 
@@ -734,7 +741,8 @@ async def _load_shard_index_maybe(
             )
         else:
             index_bytes = await byte_getter.get(
-                prototype=numpy_buffer_prototype(), byte_range=SuffixByteRequest(shard_index_size)
+                prototype=numpy_buffer_prototype(),
+                byte_range=SuffixByteRequest(shard_index_size),
             )
         if index_bytes is not None:
             return await self._decode_shard_index(index_bytes, chunks_per_shard)
@@ -748,7 +756,10 @@ async def _load_shard_index(
         ) or _ShardIndex.create_empty(chunks_per_shard)
 
     async def _load_full_shard_maybe(
-        self, byte_getter: ByteGetter, prototype: BufferPrototype, chunks_per_shard: ChunkCoords
+        self,
+        byte_getter: ByteGetter,
+        prototype: BufferPrototype,
+        chunks_per_shard: ChunkCoords,
     ) -> _ShardReader | None:
         shard_bytes = await byte_getter.get(prototype=prototype)
 
@@ -758,6 +769,110 @@ async def _load_full_shard_maybe(
             else None
         )
 
+    async def _load_partial_shard_maybe(
+        self,
+        byte_getter: ByteGetter,
+        prototype: BufferPrototype,
+        chunks_per_shard: ChunkCoords,
+        all_chunk_coords: set[ChunkCoords],
+    ) -> ShardMapping | None:
+        """
+        Read chunks from `byte_getter` for the case where the read is less than a full shard.
+        Returns a mapping of chunk coordinates to bytes.
+        """
+        shard_index = await self._load_shard_index_maybe(byte_getter, chunks_per_shard)
+        if shard_index is None:
+            return None
+
+        chunks = [
+            _ChunkCoordsByteSlice(chunk_coords, slice(*chunk_byte_slice))
+            for chunk_coords in all_chunk_coords
+            # Drop chunks where index lookup fails
+            if (chunk_byte_slice := shard_index.get_chunk_slice(chunk_coords))
+        ]
+        if len(chunks) == 0:
+            return {}
+
+        groups = self._coalesce_chunks(chunks)
+
+        shard_dicts = await concurrent_map(
+            [(group, byte_getter, prototype) for group in groups],
+            self._get_group_bytes,
+            config.get("async.concurrency"),
+        )
+
+        shard_dict: ShardMutableMapping = {}
+        for d in shard_dicts:
+            shard_dict.update(d)
+
+        return shard_dict
+
+    def _coalesce_chunks(
+        self,
+        chunks: list[_ChunkCoordsByteSlice],
+    ) -> list[list[_ChunkCoordsByteSlice]]:
+        """
+        Combine chunks from a single shard into groups that should be read together
+        in a single request.
+
+        Respects the following configuration options:
+        - `sharding.read.coalesce_max_gap_bytes`: The maximum gap between
+          chunks to coalesce into a single group.
+        - `sharding.read.coalesce_max_bytes`: The maximum number of bytes in a group.
+        """
+        max_gap_bytes = config.get("sharding.read.coalesce_max_gap_bytes")
+        coalesce_max_bytes = config.get("sharding.read.coalesce_max_bytes")
+
+        sorted_chunks = sorted(chunks, key=lambda c: c.byte_slice.start)
+
+        groups = []
+        current_group = [sorted_chunks[0]]
+
+        for chunk in sorted_chunks[1:]:
+            gap_to_chunk = chunk.byte_slice.start - current_group[-1].byte_slice.stop
+            size_if_coalesced = chunk.byte_slice.stop - current_group[0].byte_slice.start
+            if gap_to_chunk < max_gap_bytes and size_if_coalesced < coalesce_max_bytes:
+                current_group.append(chunk)
+            else:
+                groups.append(current_group)
+                current_group = [chunk]
+
+        groups.append(current_group)
+
+        return groups
+
+    async def _get_group_bytes(
+        self,
+        group: list[_ChunkCoordsByteSlice],
+        byte_getter: ByteGetter,
+        prototype: BufferPrototype,
+    ) -> ShardMapping:
+        """
+        Reads a possibly coalesced group of one or more chunks from a shard.
+        Returns a mapping of chunk coordinates to bytes.
+        """
+        group_start = group[0].byte_slice.start
+        group_end = group[-1].byte_slice.stop
+
+        # A single call to retrieve the bytes for the entire group.
+        group_bytes = await byte_getter.get(
+            prototype=prototype,
+            byte_range=RangeByteRequest(group_start, group_end),
+        )
+        if group_bytes is None:
+            return {}
+
+        # Extract the bytes corresponding to each chunk in group from group_bytes.
+        shard_dict = {}
+        for chunk in group:
+            chunk_slice = slice(
+                chunk.byte_slice.start - group_start,
+                chunk.byte_slice.stop - group_start,
+            )
+            shard_dict[chunk.coords] = group_bytes[chunk_slice]
+
+        return shard_dict
+
     def compute_encoded_size(self, input_byte_length: int, shard_spec: ArraySpec) -> int:
         chunks_per_shard = self._get_chunks_per_shard(shard_spec)
         return input_byte_length + self._shard_index_size(chunks_per_shard)
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 05d048ef74..993eaa919d 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -111,6 +111,12 @@ def enable_gpu(self) -> ConfigSet:
             },
             "async": {"concurrency": 10, "timeout": None},
             "threading": {"max_workers": None},
+            "sharding": {
+                "read": {
+                    "coalesce_max_bytes": 100 * 2**20,  # 100MiB
+                    "coalesce_max_gap_bytes": 2**20,  # 1MiB
+                }
+            },
             "json_indent": 2,
             "codec_pipeline": {
                 "path": "zarr.core.codec_pipeline.BatchedCodecPipeline",
diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py
index 6efc3a251c..cb14ee97dc 100644
--- a/tests/test_codecs/test_sharding.py
+++ b/tests/test_codecs/test_sharding.py
@@ -1,5 +1,6 @@
 import pickle
 from typing import Any
+from unittest.mock import AsyncMock
 
 import numpy as np
 import numpy.typing as npt
@@ -9,7 +10,7 @@
 import zarr.api
 import zarr.api.asynchronous
 from zarr import Array
-from zarr.abc.store import Store
+from zarr.abc.store import RangeByteRequest, Store, SuffixByteRequest
 from zarr.codecs import (
     BloscCodec,
     ShardingCodec,
@@ -197,6 +198,7 @@ def test_sharding_partial_read(
     assert np.all(read_data == 1)
 
 
+@pytest.mark.skip("This is profiling rather than a test")
 @pytest.mark.slow_hypothesis
 @pytest.mark.parametrize("store", ["local"], indirect=["store"])
 def test_partial_shard_read_performance(store: Store) -> None:
@@ -230,10 +232,18 @@ def test_partial_shard_read_performance(store: Store) -> None:
 
     num_calls = 20
     experiments = []
-    for concurrency, get_latency, statement in product(
-        [1, 10, 100], [0.0, 0.01], ["a[0, :, :]", "a[:, 0, :]", "a[:, :, 0]"]
+    for concurrency, get_latency, coalesce_max_gap, statement in product(
+        [1, 10, 100],
+        [0.0, 0.01],
+        [-1, 2**20, 10 * 2**20],
+        ["a[0, :, :]", "a[:, 0, :]", "a[:, :, 0]"],
     ):
-        zarr.config.set({"async.concurrency": concurrency})
+        zarr.config.set(
+            {
+                "async.concurrency": concurrency,
+                "sharding.read.coalesce_max_gap_bytes": coalesce_max_gap,
+            }
+        )
 
         async def get_with_latency(*args: Any, get_latency: float, **kwargs: Any) -> Any:
             await asyncio.sleep(get_latency)
@@ -251,17 +261,83 @@ async def get_with_latency(*args: Any, get_latency: float, **kwargs: Any) -> Any
         experiments.append(
             {
                 "concurrency": concurrency,
-                "statement": statement,
+                "coalesce_max_gap": coalesce_max_gap,
                 "get_latency": get_latency,
+                "statement": statement,
                 "time": time,
                 "store_get_calls": store_mock.get.call_count,
             }
         )
 
-    with open("zarr-python-partial-shard-read-performance-no-coalesce.json", "w") as f:
+    with open("zarr-python-partial-shard-read-performance-with-coalesce.json", "w") as f:
         json.dump(experiments, f)
 
 
+@pytest.mark.parametrize("index_location", ["start", "end"])
+@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+@pytest.mark.parametrize("coalesce_reads", [True, False])
+def test_sharding_multiple_chunks_partial_shard_read(
+    store: Store, index_location: ShardingCodecIndexLocation, coalesce_reads: bool
+) -> None:
+    array_shape = (16, 64)
+    shard_shape = (8, 32)
+    chunk_shape = (2, 4)
+    data = np.arange(np.prod(array_shape), dtype="float32").reshape(array_shape)
+
+    if coalesce_reads:
+        # 1MiB, enough to coalesce all chunks within a shard in this example
+        zarr.config.set({"sharding.read.coalesce_max_gap_bytes": 2**20})
+    else:
+        zarr.config.set({"sharding.read.coalesce_max_gap_bytes": -1})  # disable coalescing
+
+    store_mock = AsyncMock(wraps=store, spec=store.__class__)
+    a = zarr.create_array(
+        StorePath(store_mock),
+        shape=data.shape,
+        chunks=chunk_shape,
+        shards={"shape": shard_shape, "index_location": index_location},
+        compressors=BloscCodec(cname="lz4"),
+        dtype=data.dtype,
+        fill_value=1,
+    )
+    a[:] = data
+
+    store_mock.reset_mock()  # ignore store calls during array creation
+
+    # Reads 3 (2 full, 1 partial) chunks each from 2 shards (a subset of both shards)
+    # for a total of 6 chunks accessed
+    assert np.allclose(a[0, 22:42], np.arange(22, 42, dtype="float32"))
+
+    if coalesce_reads:
+        # 2 shard index requests + 2 coalesced chunk data byte ranges (one for each shard)
+        assert store_mock.get.call_count == 4
+    else:
+        # 2 shard index requests + 6 chunks
+        assert store_mock.get.call_count == 8
+
+    for method, args, kwargs in store_mock.method_calls:
+        assert method == "get"
+        assert args[0].startswith("c/")  # get from a chunk
+        assert isinstance(kwargs["byte_range"], (SuffixByteRequest, RangeByteRequest))
+
+    store_mock.reset_mock()
+
+    # Reads 4 chunks from both shards along dimension 0 for a total of 8 chunks accessed
+    assert np.allclose(a[:, 0], np.arange(0, data.size, array_shape[1], dtype="float32"))
+
+    if coalesce_reads:
+        # 2 shard index requests + 2 coalesced chunk data byte ranges (one for each shard)
+        assert store_mock.get.call_count == 4
+    else:
+        # 2 shard index requests + 8 chunks
+        assert store_mock.get.call_count == 10
+
+    for method, args, kwargs in store_mock.method_calls:
+        assert method == "get"
+        assert args[0].startswith("c/")  # get from a chunk
+        assert isinstance(kwargs["byte_range"], (SuffixByteRequest, RangeByteRequest))
+
+
 @pytest.mark.parametrize(
     "array_fixture",
     [
diff --git a/tests/test_config.py b/tests/test_config.py
index e267601272..0c941ee62e 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -103,6 +103,12 @@ def test_config_defaults_set() -> None:
                 },
                 "buffer": "zarr.buffer.cpu.Buffer",
                 "ndbuffer": "zarr.buffer.cpu.NDBuffer",
+                "sharding": {
+                    "read": {
+                        "coalesce_max_bytes": 100 * 2**20,  # 100 MiB
+                        "coalesce_max_gap_bytes": 2**20,  # 1 MiB
+                    }
+                },
             }
         ]
     )
@@ -111,6 +117,8 @@ def test_config_defaults_set() -> None:
     assert config.get("async.timeout") is None
     assert config.get("codec_pipeline.batch_size") == 1
     assert config.get("json_indent") == 2
+    assert config.get("sharding.read.coalesce_max_bytes") == 100 * 2**20  # 100 MiB
+    assert config.get("sharding.read.coalesce_max_gap_bytes") == 2**20  # 1 MiB
 
 
 @pytest.mark.parametrize(

From 44d9ce4d8c655588ee12beb79085ac809fc7cfcb Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <aldenkeefesampson@gmail.com>
Date: Mon, 2 Jun 2025 14:16:05 -0400
Subject: [PATCH 03/21] Add changes/3004.feature.rst

---
 changes/3004.feature.rst | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 changes/3004.feature.rst

diff --git a/changes/3004.feature.rst b/changes/3004.feature.rst
new file mode 100644
index 0000000000..b15a5ec943
--- /dev/null
+++ b/changes/3004.feature.rst
@@ -0,0 +1,3 @@
+Optimizes reading more than one, but not all, chunks from a shard. Chunks are now read in parallel
+and reads of nearby chunks within the same shard are combined to reduce the number of calls to the store.
+See :ref:`user-guide-config` for more details.
\ No newline at end of file

From 009ce6a1abfd9d1d6b32bd51c2e7dde4ecbaa592 Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <aldenkeefesampson@gmail.com>
Date: Mon, 2 Jun 2025 22:10:56 -0400
Subject: [PATCH 04/21] Consistently return None on failure and test partial
 shard read failure modes

Use range of integers as out_selection not slice in CoordinateIndexer
To fix issue when using vindex with repeated indexes in indexer

test: improve formatting and add debugging breakpoint in array property tests

test: disable hypothesis deadline for test_array_roundtrip to prevent timeout

fix: initialize decode buffers with shard_spec.fill_value instead of 0 to fix partial shard holes

style: reformat code for improved readability and consistency in sharding.py

fix: revert incorrect RangeByteRequest length fix in sharding byte retrieval
---
 src/zarr/codecs/sharding.py        |  72 +++++++++++-----
 src/zarr/core/indexing.py          |   2 +-
 tests/test_codecs/test_sharding.py | 128 +++++++++++++++++++++++++++--
 tests/test_properties.py           |  49 +++++++----
 4 files changed, 206 insertions(+), 45 deletions(-)

diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index e7499f726f..ec4fe476f6 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -90,9 +90,9 @@ async def get(
         self, prototype: BufferPrototype, byte_range: ByteRequest | None = None
     ) -> Buffer | None:
         assert byte_range is None, "byte_range is not supported within shards"
-        assert prototype == default_buffer_prototype(), (
-            f"prototype is not supported within shards currently. diff: {prototype} != {default_buffer_prototype()}"
-        )
+        assert (
+            prototype == default_buffer_prototype()
+        ), f"prototype is not supported within shards currently. diff: {prototype} != {default_buffer_prototype()}"
         return self.shard_dict.get(self.chunk_coords)
 
 
@@ -124,7 +124,9 @@ def chunks_per_shard(self) -> ChunkCoords:
     def _localize_chunk(self, chunk_coords: ChunkCoords) -> ChunkCoords:
         return tuple(
             chunk_i % shard_i
-            for chunk_i, shard_i in zip(chunk_coords, self.offsets_and_lengths.shape, strict=False)
+            for chunk_i, shard_i in zip(
+                chunk_coords, self.offsets_and_lengths.shape, strict=False
+            )
         )
 
     def is_all_empty(self) -> bool:
@@ -141,7 +143,9 @@ def get_chunk_slice(self, chunk_coords: ChunkCoords) -> tuple[int, int] | None:
         else:
             return (int(chunk_start), int(chunk_start + chunk_len))
 
-    def set_chunk_slice(self, chunk_coords: ChunkCoords, chunk_slice: slice | None) -> None:
+    def set_chunk_slice(
+        self, chunk_coords: ChunkCoords, chunk_slice: slice | None
+    ) -> None:
         localized_chunk = self._localize_chunk(chunk_coords)
         if chunk_slice is None:
             self.offsets_and_lengths[localized_chunk] = (MAX_UINT_64, MAX_UINT_64)
@@ -163,7 +167,11 @@ def is_dense(self, chunk_byte_length: int) -> bool:
 
         # Are all non-empty offsets unique?
         if len(
-            {offset for offset, _ in sorted_offsets_and_lengths if offset != MAX_UINT_64}
+            {
+                offset
+                for offset, _ in sorted_offsets_and_lengths
+                if offset != MAX_UINT_64
+            }
         ) != len(sorted_offsets_and_lengths):
             return False
 
@@ -267,7 +275,9 @@ def __setitem__(self, chunk_coords: ChunkCoords, value: Buffer) -> None:
         chunk_start = len(self.buf)
         chunk_length = len(value)
         self.buf += value
-        self.index.set_chunk_slice(chunk_coords, slice(chunk_start, chunk_start + chunk_length))
+        self.index.set_chunk_slice(
+            chunk_coords, slice(chunk_start, chunk_start + chunk_length)
+        )
 
     def __delitem__(self, chunk_coords: ChunkCoords) -> None:
         raise NotImplementedError
@@ -281,7 +291,9 @@ async def finalize(
         if index_location == ShardingCodecIndexLocation.start:
             empty_chunks_mask = self.index.offsets_and_lengths[..., 0] == MAX_UINT_64
             self.index.offsets_and_lengths[~empty_chunks_mask, 0] += len(index_bytes)
-            index_bytes = await index_encoder(self.index)  # encode again with corrected offsets
+            index_bytes = await index_encoder(
+                self.index
+            )  # encode again with corrected offsets
             out_buf = index_bytes + self.buf
         else:
             out_buf = self.buf + index_bytes
@@ -359,7 +371,8 @@ def __init__(
         chunk_shape: ChunkCoordsLike,
         codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(),),
         index_codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(), Crc32cCodec()),
-        index_location: ShardingCodecIndexLocation | str = ShardingCodecIndexLocation.end,
+        index_location: ShardingCodecIndexLocation
+        | str = ShardingCodecIndexLocation.end,
     ) -> None:
         chunk_shape_parsed = parse_shapelike(chunk_shape)
         codecs_parsed = parse_codecs(codecs)
@@ -389,7 +402,9 @@ def __setstate__(self, state: dict[str, Any]) -> None:
         object.__setattr__(self, "chunk_shape", parse_shapelike(config["chunk_shape"]))
         object.__setattr__(self, "codecs", parse_codecs(config["codecs"]))
         object.__setattr__(self, "index_codecs", parse_codecs(config["index_codecs"]))
-        object.__setattr__(self, "index_location", parse_index_location(config["index_location"]))
+        object.__setattr__(
+            self, "index_location", parse_index_location(config["index_location"])
+        )
 
         # Use instance-local lru_cache to avoid memory leaks
         # object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec))
@@ -418,7 +433,9 @@ def to_dict(self) -> dict[str, JSON]:
 
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
         shard_spec = self._get_chunk_spec(array_spec)
-        evolved_codecs = tuple(c.evolve_from_array_spec(array_spec=shard_spec) for c in self.codecs)
+        evolved_codecs = tuple(
+            c.evolve_from_array_spec(array_spec=shard_spec) for c in self.codecs
+        )
         if evolved_codecs != self.codecs:
             return replace(self, codecs=evolved_codecs)
         return self
@@ -469,7 +486,7 @@ async def _decode_single(
             shape=shard_shape,
             dtype=shard_spec.dtype.to_native_dtype(),
             order=shard_spec.order,
-            fill_value=0,
+            fill_value=shard_spec.fill_value,
         )
         shard_dict = await _ShardReader.from_bytes(shard_bytes, self, chunks_per_shard)
 
@@ -516,7 +533,7 @@ async def _decode_partial_single(
             shape=indexer.shape,
             dtype=shard_spec.dtype.to_native_dtype(),
             order=shard_spec.order,
-            fill_value=0,
+            fill_value=shard_spec.fill_value,
         )
 
         indexed_chunks = list(indexer)
@@ -593,7 +610,9 @@ async def _encode_single(
             shard_array,
         )
 
-        return await shard_builder.finalize(self.index_location, self._encode_shard_index)
+        return await shard_builder.finalize(
+            self.index_location, self._encode_shard_index
+        )
 
     async def _encode_partial_single(
         self,
@@ -653,7 +672,8 @@ def _is_total_shard(
         self, all_chunk_coords: set[ChunkCoords], chunks_per_shard: ChunkCoords
     ) -> bool:
         return len(all_chunk_coords) == product(chunks_per_shard) and all(
-            chunk_coords in all_chunk_coords for chunk_coords in c_order_iter(chunks_per_shard)
+            chunk_coords in all_chunk_coords
+            for chunk_coords in c_order_iter(chunks_per_shard)
         )
 
     async def _decode_shard_index(
@@ -679,7 +699,9 @@ async def _encode_shard_index(self, index: _ShardIndex) -> Buffer:
                 .encode(
                     [
                         (
-                            get_ndbuffer_class().from_numpy_array(index.offsets_and_lengths),
+                            get_ndbuffer_class().from_numpy_array(
+                                index.offsets_and_lengths
+                            ),
                             self._get_index_chunk_spec(index.chunks_per_shard),
                         )
                     ],
@@ -790,8 +812,8 @@ async def _load_partial_shard_maybe(
             # Drop chunks where index lookup fails
             if (chunk_byte_slice := shard_index.get_chunk_slice(chunk_coords))
         ]
-        if len(chunks) == 0:
-            return {}
+        if len(chunks) < len(all_chunk_coords):
+            return None
 
         groups = self._coalesce_chunks(chunks)
 
@@ -803,6 +825,8 @@ async def _load_partial_shard_maybe(
 
         shard_dict: ShardMutableMapping = {}
         for d in shard_dicts:
+            if d is None:
+                return None
             shard_dict.update(d)
 
         return shard_dict
@@ -830,7 +854,9 @@ def _coalesce_chunks(
 
         for chunk in sorted_chunks[1:]:
             gap_to_chunk = chunk.byte_slice.start - current_group[-1].byte_slice.stop
-            size_if_coalesced = chunk.byte_slice.stop - current_group[0].byte_slice.start
+            size_if_coalesced = (
+                chunk.byte_slice.stop - current_group[0].byte_slice.start
+            )
             if gap_to_chunk < max_gap_bytes and size_if_coalesced < coalesce_max_bytes:
                 current_group.append(chunk)
             else:
@@ -846,7 +872,7 @@ async def _get_group_bytes(
         group: list[_ChunkCoordsByteSlice],
         byte_getter: ByteGetter,
         prototype: BufferPrototype,
-    ) -> ShardMapping:
+    ) -> ShardMapping | None:
         """
         Reads a possibly coalesced group of one or more chunks from a shard.
         Returns a mapping of chunk coordinates to bytes.
@@ -860,7 +886,7 @@ async def _get_group_bytes(
             byte_range=RangeByteRequest(group_start, group_end),
         )
         if group_bytes is None:
-            return {}
+            return None
 
         # Extract the bytes corresponding to each chunk in group from group_bytes.
         shard_dict = {}
@@ -873,7 +899,9 @@ async def _get_group_bytes(
 
         return shard_dict
 
-    def compute_encoded_size(self, input_byte_length: int, shard_spec: ArraySpec) -> int:
+    def compute_encoded_size(
+        self, input_byte_length: int, shard_spec: ArraySpec
+    ) -> int:
         chunks_per_shard = self._get_chunks_per_shard(shard_spec)
         return input_byte_length + self._shard_index_size(chunks_per_shard)
 
diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py
index c11889f7f4..0e0bb664d8 100644
--- a/src/zarr/core/indexing.py
+++ b/src/zarr/core/indexing.py
@@ -1193,7 +1193,7 @@ def __iter__(self) -> Iterator[ChunkProjection]:
             stop = self.chunk_nitems_cumsum[chunk_rix]
             out_selection: slice | npt.NDArray[np.intp]
             if self.sel_sort is None:
-                out_selection = slice(start, stop)
+                out_selection = np.arange(start, stop)
             else:
                 out_selection = self.sel_sort[start:stop]
 
diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py
index cb14ee97dc..dbe64a32d5 100644
--- a/tests/test_codecs/test_sharding.py
+++ b/tests/test_codecs/test_sharding.py
@@ -111,7 +111,9 @@ def test_sharding_scalar(
     indirect=["array_fixture"],
 )
 def test_sharding_partial(
-    store: Store, array_fixture: npt.NDArray[Any], index_location: ShardingCodecIndexLocation
+    store: Store,
+    array_fixture: npt.NDArray[Any],
+    index_location: ShardingCodecIndexLocation,
 ) -> None:
     data = array_fixture
     spath = StorePath(store)
@@ -147,7 +149,9 @@ def test_sharding_partial(
     indirect=["array_fixture"],
 )
 def test_sharding_partial_readwrite(
-    store: Store, array_fixture: npt.NDArray[Any], index_location: ShardingCodecIndexLocation
+    store: Store,
+    array_fixture: npt.NDArray[Any],
+    index_location: ShardingCodecIndexLocation,
 ) -> None:
     data = array_fixture
     spath = StorePath(store)
@@ -179,7 +183,9 @@ def test_sharding_partial_readwrite(
 @pytest.mark.parametrize("index_location", ["start", "end"])
 @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
 def test_sharding_partial_read(
-    store: Store, array_fixture: npt.NDArray[Any], index_location: ShardingCodecIndexLocation
+    store: Store,
+    array_fixture: npt.NDArray[Any],
+    index_location: ShardingCodecIndexLocation,
 ) -> None:
     data = array_fixture
     spath = StorePath(store)
@@ -338,6 +344,114 @@ def test_sharding_multiple_chunks_partial_shard_read(
         assert isinstance(kwargs["byte_range"], (SuffixByteRequest, RangeByteRequest))
 
 
+@pytest.mark.parametrize("index_location", ["start", "end"])
+@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+def test_sharding_partial_shard_read__index_load_fails(
+    store: Store, index_location: ShardingCodecIndexLocation
+) -> None:
+    """Test fill value is returned when the call to the store to load the bytes of the shard's chunk index fails."""
+    array_shape = (16,)
+    shard_shape = (16,)
+    chunk_shape = (8,)
+    data = np.arange(np.prod(array_shape), dtype="float32").reshape(array_shape)
+    fill_value = -999
+
+    store_mock = AsyncMock(wraps=store, spec=store.__class__)
+    # loading the index is the first call to .get() so returning None will simulate an index load failure
+    store_mock.get.return_value = None
+
+    a = zarr.create_array(
+        StorePath(store_mock),
+        shape=data.shape,
+        chunks=chunk_shape,
+        shards={"shape": shard_shape, "index_location": index_location},
+        compressors=BloscCodec(cname="lz4"),
+        dtype=data.dtype,
+        fill_value=fill_value,
+    )
+    a[:] = data
+
+    # Read from one of two chunks in a shard to test the partial shard read path
+    assert a[0] == fill_value
+    assert a[0] != data[0]
+
+
+@pytest.mark.parametrize("index_location", ["start", "end"])
+@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+def test_sharding_partial_shard_read__index_chunk_slice_fails(
+    store: Store,
+    index_location: ShardingCodecIndexLocation,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Test fill value is returned when looking up a chunk's byte slice within a shard fails."""
+    array_shape = (16,)
+    shard_shape = (16,)
+    chunk_shape = (8,)
+    data = np.arange(np.prod(array_shape), dtype="float32").reshape(array_shape)
+    fill_value = -999
+
+    monkeypatch.setattr(
+        "zarr.codecs.sharding._ShardIndex.get_chunk_slice",
+        lambda self, chunk_coords: None,
+    )
+
+    a = zarr.create_array(
+        StorePath(store),
+        shape=data.shape,
+        chunks=chunk_shape,
+        shards={"shape": shard_shape, "index_location": index_location},
+        compressors=BloscCodec(cname="lz4"),
+        dtype=data.dtype,
+        fill_value=fill_value,
+    )
+    a[:] = data
+
+    # Read from one of two chunks in a shard to test the partial shard read path
+    assert a[0] == fill_value
+    assert a[0] != data[0]
+
+
+@pytest.mark.parametrize("index_location", ["start", "end"])
+@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+def test_sharding_partial_shard_read__chunk_load_fails(
+    store: Store, index_location: ShardingCodecIndexLocation
+) -> None:
+    """Test fill value is returned when the call to the store to load a chunk's bytes fails."""
+    array_shape = (16,)
+    shard_shape = (16,)
+    chunk_shape = (8,)
+    data = np.arange(np.prod(array_shape), dtype="float32").reshape(array_shape)
+    fill_value = -999
+
+    store_mock = AsyncMock(wraps=store, spec=store.__class__)
+
+    a = zarr.create_array(
+        StorePath(store_mock),
+        shape=data.shape,
+        chunks=chunk_shape,
+        shards={"shape": shard_shape, "index_location": index_location},
+        compressors=BloscCodec(cname="lz4"),
+        dtype=data.dtype,
+        fill_value=fill_value,
+    )
+    a[:] = data
+
+    # Set up store mock after array creation to only modify calls during array indexing
+    # Succeed on first call (index load), fail on subsequent calls (chunk loads)
+    async def first_success_then_fail(*args: Any, **kwargs: Any) -> Any:
+        if store_mock.get.call_count == 1:
+            return await store.get(*args, **kwargs)
+        else:
+            return None
+
+    store_mock.get.reset_mock()
+    store_mock.get.side_effect = first_success_then_fail
+
+    # Read from one of two chunks in a shard to test the partial shard read path
+    assert a[0] == fill_value
+    assert a[0] != data[0]
+
+
 @pytest.mark.parametrize(
     "array_fixture",
     [
@@ -348,7 +462,9 @@ def test_sharding_multiple_chunks_partial_shard_read(
 @pytest.mark.parametrize("index_location", ["start", "end"])
 @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
 def test_sharding_partial_overwrite(
-    store: Store, array_fixture: npt.NDArray[Any], index_location: ShardingCodecIndexLocation
+    store: Store,
+    array_fixture: npt.NDArray[Any],
+    index_location: ShardingCodecIndexLocation,
 ) -> None:
     data = array_fixture[:10, :10, :10]
     spath = StorePath(store)
@@ -578,7 +694,9 @@ async def test_sharding_with_empty_inner_chunk(
 )
 @pytest.mark.parametrize("chunks_per_shard", [(5, 2), (2, 5), (5, 5)])
 async def test_sharding_with_chunks_per_shard(
-    store: Store, index_location: ShardingCodecIndexLocation, chunks_per_shard: tuple[int]
+    store: Store,
+    index_location: ShardingCodecIndexLocation,
+    chunks_per_shard: tuple[int],
 ) -> None:
     chunk_shape = (2, 1)
     shape = tuple(x * y for x, y in zip(chunks_per_shard, chunk_shape, strict=False))
diff --git a/tests/test_properties.py b/tests/test_properties.py
index b8d50ef0b1..de302c56b0 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -79,8 +79,14 @@ def deep_equal(a: Any, b: Any) -> bool:
 @given(data=st.data(), zarr_format=zarr_formats)
 def test_array_roundtrip(data: st.DataObject, zarr_format: int) -> None:
     nparray = data.draw(numpy_arrays(zarr_formats=st.just(zarr_format)))
-    zarray = data.draw(arrays(arrays=st.just(nparray), zarr_formats=st.just(zarr_format)))
-    assert_array_equal(nparray, zarray[:])
+    zarray = data.draw(
+        arrays(arrays=st.just(nparray), zarr_formats=st.just(zarr_format))
+    )
+    try:
+        assert_array_equal(nparray, zarray[:])
+    except Exception as e:
+        breakpoint()
+        raise e
 
 
 @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
@@ -92,12 +98,20 @@ def test_array_creates_implicit_groups(array):
         parent = "/".join(ancestry[: i + 1])
         if array.metadata.zarr_format == 2:
             assert (
-                sync(array.store.get(f"{parent}/.zgroup", prototype=default_buffer_prototype()))
+                sync(
+                    array.store.get(
+                        f"{parent}/.zgroup", prototype=default_buffer_prototype()
+                    )
+                )
                 is not None
             )
         elif array.metadata.zarr_format == 3:
             assert (
-                sync(array.store.get(f"{parent}/zarr.json", prototype=default_buffer_prototype()))
+                sync(
+                    array.store.get(
+                        f"{parent}/zarr.json", prototype=default_buffer_prototype()
+                    )
+                )
                 is not None
             )
 
@@ -115,7 +129,9 @@ def test_basic_indexing(data: st.DataObject) -> None:
     actual = zarray[indexer]
     assert_array_equal(nparray[indexer], actual)
 
-    new_data = data.draw(numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype))
+    new_data = data.draw(
+        numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype)
+    )
     zarray[indexer] = new_data
     nparray[indexer] = new_data
     assert_array_equal(nparray, zarray[:])
@@ -137,7 +153,9 @@ def test_oindex(data: st.DataObject) -> None:
         if isinstance(idxr, np.ndarray) and idxr.size != np.unique(idxr).size:
             # behaviour of setitem with repeated indices is not guaranteed in practice
             assume(False)
-    new_data = data.draw(numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype))
+    new_data = data.draw(
+        numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype)
+    )
     nparray[npindexer] = new_data
     zarray.oindex[zindexer] = new_data
     assert_array_equal(nparray, zarray[:])
@@ -152,20 +170,13 @@ def test_vindex(data: st.DataObject) -> None:
 
     indexer = data.draw(
         npst.integer_array_indices(
-            shape=nparray.shape, result_shape=npst.array_shapes(min_side=1, max_dims=None)
+            shape=nparray.shape,
+            result_shape=npst.array_shapes(min_side=1, max_dims=None),
         )
     )
     actual = zarray.vindex[indexer]
     assert_array_equal(nparray[indexer], actual)
 
-    # FIXME!
-    # when the indexer is such that a value gets overwritten multiple times,
-    # I think the output depends on chunking.
-    # new_data = data.draw(npst.arrays(shape=st.just(actual.shape), dtype=nparray.dtype))
-    # nparray[indexer] = new_data
-    # zarray.vindex[indexer] = new_data
-    # assert_array_equal(nparray, zarray[:])
-
 
 @given(store=stores, meta=array_metadata())  # type: ignore[misc]
 @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
@@ -220,7 +231,9 @@ def test_roundtrip_array_metadata_from_json(data: st.DataObject, zarr_format: in
     orig = metadata.to_dict()
     rt = metadata_roundtripped.to_dict()
 
-    assert deep_equal(orig, rt), f"Roundtrip mismatch:\nOriginal: {orig}\nRoundtripped: {rt}"
+    assert deep_equal(
+        orig, rt
+    ), f"Roundtrip mismatch:\nOriginal: {orig}\nRoundtripped: {rt}"
 
 
 # @st.composite
@@ -320,7 +333,9 @@ def test_array_metadata_meets_spec(meta: ArrayV2Metadata | ArrayV3Metadata) -> N
     # version-specific validations
     if isinstance(meta, ArrayV2Metadata):
         assert asdict_dict["filters"] != ()
-        assert asdict_dict["filters"] is None or isinstance(asdict_dict["filters"], tuple)
+        assert asdict_dict["filters"] is None or isinstance(
+            asdict_dict["filters"], tuple
+        )
         assert asdict_dict["zarr_format"] == 2
     else:
         assert asdict_dict["zarr_format"] == 3

From c65cf828eef63a563ada3ea7f1797c0f8f7b4439 Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <aldenkeefesampson@gmail.com>
Date: Mon, 21 Jul 2025 16:47:46 -0400
Subject: [PATCH 05/21] Fix and test for case where some chunks in shard are
 all fill

---
 src/zarr/codecs/sharding.py        | 61 +++++++-----------------
 tests/test_codecs/test_sharding.py | 76 ++++++++++++++++++++++++++++--
 tests/test_properties.py           | 68 +++++---------------------
 3 files changed, 102 insertions(+), 103 deletions(-)

diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index ec4fe476f6..8b64e68130 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -90,9 +90,9 @@ async def get(
         self, prototype: BufferPrototype, byte_range: ByteRequest | None = None
     ) -> Buffer | None:
         assert byte_range is None, "byte_range is not supported within shards"
-        assert (
-            prototype == default_buffer_prototype()
-        ), f"prototype is not supported within shards currently. diff: {prototype} != {default_buffer_prototype()}"
+        assert prototype == default_buffer_prototype(), (
+            f"prototype is not supported within shards currently. diff: {prototype} != {default_buffer_prototype()}"
+        )
         return self.shard_dict.get(self.chunk_coords)
 
 
@@ -124,9 +124,7 @@ def chunks_per_shard(self) -> ChunkCoords:
     def _localize_chunk(self, chunk_coords: ChunkCoords) -> ChunkCoords:
         return tuple(
             chunk_i % shard_i
-            for chunk_i, shard_i in zip(
-                chunk_coords, self.offsets_and_lengths.shape, strict=False
-            )
+            for chunk_i, shard_i in zip(chunk_coords, self.offsets_and_lengths.shape, strict=False)
         )
 
     def is_all_empty(self) -> bool:
@@ -143,9 +141,7 @@ def get_chunk_slice(self, chunk_coords: ChunkCoords) -> tuple[int, int] | None:
         else:
             return (int(chunk_start), int(chunk_start + chunk_len))
 
-    def set_chunk_slice(
-        self, chunk_coords: ChunkCoords, chunk_slice: slice | None
-    ) -> None:
+    def set_chunk_slice(self, chunk_coords: ChunkCoords, chunk_slice: slice | None) -> None:
         localized_chunk = self._localize_chunk(chunk_coords)
         if chunk_slice is None:
             self.offsets_and_lengths[localized_chunk] = (MAX_UINT_64, MAX_UINT_64)
@@ -167,11 +163,7 @@ def is_dense(self, chunk_byte_length: int) -> bool:
 
         # Are all non-empty offsets unique?
         if len(
-            {
-                offset
-                for offset, _ in sorted_offsets_and_lengths
-                if offset != MAX_UINT_64
-            }
+            {offset for offset, _ in sorted_offsets_and_lengths if offset != MAX_UINT_64}
         ) != len(sorted_offsets_and_lengths):
             return False
 
@@ -275,9 +267,7 @@ def __setitem__(self, chunk_coords: ChunkCoords, value: Buffer) -> None:
         chunk_start = len(self.buf)
         chunk_length = len(value)
         self.buf += value
-        self.index.set_chunk_slice(
-            chunk_coords, slice(chunk_start, chunk_start + chunk_length)
-        )
+        self.index.set_chunk_slice(chunk_coords, slice(chunk_start, chunk_start + chunk_length))
 
     def __delitem__(self, chunk_coords: ChunkCoords) -> None:
         raise NotImplementedError
@@ -291,9 +281,7 @@ async def finalize(
         if index_location == ShardingCodecIndexLocation.start:
             empty_chunks_mask = self.index.offsets_and_lengths[..., 0] == MAX_UINT_64
             self.index.offsets_and_lengths[~empty_chunks_mask, 0] += len(index_bytes)
-            index_bytes = await index_encoder(
-                self.index
-            )  # encode again with corrected offsets
+            index_bytes = await index_encoder(self.index)  # encode again with corrected offsets
             out_buf = index_bytes + self.buf
         else:
             out_buf = self.buf + index_bytes
@@ -371,8 +359,7 @@ def __init__(
         chunk_shape: ChunkCoordsLike,
         codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(),),
         index_codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(), Crc32cCodec()),
-        index_location: ShardingCodecIndexLocation
-        | str = ShardingCodecIndexLocation.end,
+        index_location: ShardingCodecIndexLocation | str = ShardingCodecIndexLocation.end,
     ) -> None:
         chunk_shape_parsed = parse_shapelike(chunk_shape)
         codecs_parsed = parse_codecs(codecs)
@@ -402,9 +389,7 @@ def __setstate__(self, state: dict[str, Any]) -> None:
         object.__setattr__(self, "chunk_shape", parse_shapelike(config["chunk_shape"]))
         object.__setattr__(self, "codecs", parse_codecs(config["codecs"]))
         object.__setattr__(self, "index_codecs", parse_codecs(config["index_codecs"]))
-        object.__setattr__(
-            self, "index_location", parse_index_location(config["index_location"])
-        )
+        object.__setattr__(self, "index_location", parse_index_location(config["index_location"]))
 
         # Use instance-local lru_cache to avoid memory leaks
         # object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec))
@@ -433,9 +418,7 @@ def to_dict(self) -> dict[str, JSON]:
 
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
         shard_spec = self._get_chunk_spec(array_spec)
-        evolved_codecs = tuple(
-            c.evolve_from_array_spec(array_spec=shard_spec) for c in self.codecs
-        )
+        evolved_codecs = tuple(c.evolve_from_array_spec(array_spec=shard_spec) for c in self.codecs)
         if evolved_codecs != self.codecs:
             return replace(self, codecs=evolved_codecs)
         return self
@@ -610,9 +593,7 @@ async def _encode_single(
             shard_array,
         )
 
-        return await shard_builder.finalize(
-            self.index_location, self._encode_shard_index
-        )
+        return await shard_builder.finalize(self.index_location, self._encode_shard_index)
 
     async def _encode_partial_single(
         self,
@@ -672,8 +653,7 @@ def _is_total_shard(
         self, all_chunk_coords: set[ChunkCoords], chunks_per_shard: ChunkCoords
     ) -> bool:
         return len(all_chunk_coords) == product(chunks_per_shard) and all(
-            chunk_coords in all_chunk_coords
-            for chunk_coords in c_order_iter(chunks_per_shard)
+            chunk_coords in all_chunk_coords for chunk_coords in c_order_iter(chunks_per_shard)
         )
 
     async def _decode_shard_index(
@@ -699,9 +679,7 @@ async def _encode_shard_index(self, index: _ShardIndex) -> Buffer:
                 .encode(
                     [
                         (
-                            get_ndbuffer_class().from_numpy_array(
-                                index.offsets_and_lengths
-                            ),
+                            get_ndbuffer_class().from_numpy_array(index.offsets_and_lengths),
                             self._get_index_chunk_spec(index.chunks_per_shard),
                         )
                     ],
@@ -810,9 +788,10 @@ async def _load_partial_shard_maybe(
             _ChunkCoordsByteSlice(chunk_coords, slice(*chunk_byte_slice))
             for chunk_coords in all_chunk_coords
             # Drop chunks where index lookup fails
+            # e.g. when write_empty_chunks = False and the chunk is empty
             if (chunk_byte_slice := shard_index.get_chunk_slice(chunk_coords))
         ]
-        if len(chunks) < len(all_chunk_coords):
+        if len(chunks) == 0:
             return None
 
         groups = self._coalesce_chunks(chunks)
@@ -854,9 +833,7 @@ def _coalesce_chunks(
 
         for chunk in sorted_chunks[1:]:
             gap_to_chunk = chunk.byte_slice.start - current_group[-1].byte_slice.stop
-            size_if_coalesced = (
-                chunk.byte_slice.stop - current_group[0].byte_slice.start
-            )
+            size_if_coalesced = chunk.byte_slice.stop - current_group[0].byte_slice.start
             if gap_to_chunk < max_gap_bytes and size_if_coalesced < coalesce_max_bytes:
                 current_group.append(chunk)
             else:
@@ -899,9 +876,7 @@ async def _get_group_bytes(
 
         return shard_dict
 
-    def compute_encoded_size(
-        self, input_byte_length: int, shard_spec: ArraySpec
-    ) -> int:
+    def compute_encoded_size(self, input_byte_length: int, shard_spec: ArraySpec) -> int:
         chunks_per_shard = self._get_chunks_per_shard(shard_spec)
         return input_byte_length + self._shard_index_size(chunks_per_shard)
 
diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py
index dbe64a32d5..35940feb47 100644
--- a/tests/test_codecs/test_sharding.py
+++ b/tests/test_codecs/test_sharding.py
@@ -344,6 +344,79 @@ def test_sharding_multiple_chunks_partial_shard_read(
         assert isinstance(kwargs["byte_range"], (SuffixByteRequest, RangeByteRequest))
 
 
+@pytest.mark.parametrize("index_location", ["start", "end"])
+@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+def test_sharding_read_empty_chunks_within_non_empty_shard_write_empty_false(
+    store: Store, index_location: ShardingCodecIndexLocation
+) -> None:
+    """
+    Case where
+        - some, but not all, chunks in the last shard are empty
+        - the last shard is not complete (array length is not a multiple of shard shape),
+          this takes us down the partial shard read path
+        - write_empty_chunks=False so the shard index will have less entries than chunks in the shard
+    """
+    # array with mixed empty and non-empty chunks in second shard
+    data = np.array([
+        # shard 0. full 8 elements, all chunks have some non-fill data
+        0, 1, 2, 3, 4, 5, 6, 7,
+        # shard 1. 6 elements (< shard shape)
+        2, 0, # chunk 0, written
+        0, 0, # chunk 1, all fill, not written
+        4, 5  # chunk 2, written
+    ], dtype="int32")  # fmt: off
+
+    spath = StorePath(store)
+    a = zarr.create_array(
+        spath,
+        shape=(14,),
+        chunks=(2,),
+        shards={"shape": (8,), "index_location": index_location},
+        dtype="int32",
+        fill_value=0,
+        filters=None,
+        compressors=None,
+        config={"write_empty_chunks": False},
+    )
+    a[:] = data
+
+    assert np.array_equal(a[:], data)
+
+
+@pytest.mark.parametrize("index_location", ["start", "end"])
+@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+def test_sharding_read_empty_chunks_within_empty_shard_write_empty_false(
+    store: Store, index_location: ShardingCodecIndexLocation
+) -> None:
+    """
+    Case where
+        - all chunks in last shard are empty
+        - the last shard is not complete (array length is not a multiple of shard shape),
+          this takes us down the partial shard read path
+        - write_empty_chunks=False so the shard index will have no entries
+    """
+    fill_value = -99
+    shard_size = 8
+    data = np.arange(14, dtype="int32")
+    data[shard_size:] = fill_value  # 2nd shard is all fill value
+
+    spath = StorePath(store)
+    a = zarr.create_array(
+        spath,
+        shape=(14,),
+        chunks=(2,),
+        shards={"shape": (shard_size,), "index_location": index_location},
+        dtype="int32",
+        fill_value=fill_value,
+        filters=None,
+        compressors=None,
+        config={"write_empty_chunks": False},
+    )
+    a[:] = data
+
+    assert np.array_equal(a[:], data)
+
+
 @pytest.mark.parametrize("index_location", ["start", "end"])
 @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
 def test_sharding_partial_shard_read__index_load_fails(
@@ -577,7 +650,6 @@ def test_nested_sharding_create_array(
         filters=None,
         compressors=None,
     )
-    print(a.metadata.to_dict())
 
     a[:, :, :] = data
 
@@ -637,7 +709,6 @@ async def test_delete_empty_shards(store: Store) -> None:
         compressors=None,
         fill_value=1,
     )
-    print(a.metadata.to_dict())
     await _AsyncArrayProxy(a)[:, :].set(np.zeros((16, 16)))
     await _AsyncArrayProxy(a)[8:, :].set(np.ones((8, 16)))
     await _AsyncArrayProxy(a)[:, 8:].set(np.ones((16, 8)))
@@ -682,7 +753,6 @@ async def test_sharding_with_empty_inner_chunk(
     )
     data[:4, :4] = fill_value
     await a.setitem(..., data)
-    print("read data")
     data_read = await a.getitem(...)
     assert np.array_equal(data_read, data)
 
diff --git a/tests/test_properties.py b/tests/test_properties.py
index de302c56b0..e941250872 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -76,17 +76,11 @@ def deep_equal(a: Any, b: Any) -> bool:
 
 
 @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
-@given(data=st.data(), zarr_format=zarr_formats)
-def test_array_roundtrip(data: st.DataObject, zarr_format: int) -> None:
-    nparray = data.draw(numpy_arrays(zarr_formats=st.just(zarr_format)))
-    zarray = data.draw(
-        arrays(arrays=st.just(nparray), zarr_formats=st.just(zarr_format))
-    )
-    try:
-        assert_array_equal(nparray, zarray[:])
-    except Exception as e:
-        breakpoint()
-        raise e
+@given(data=st.data())
+def test_array_roundtrip(data: st.DataObject) -> None:
+    nparray = data.draw(numpy_arrays())
+    zarray = data.draw(arrays(arrays=st.just(nparray)))
+    assert_array_equal(nparray, zarray[:])
 
 
 @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
@@ -98,20 +92,12 @@ def test_array_creates_implicit_groups(array):
         parent = "/".join(ancestry[: i + 1])
         if array.metadata.zarr_format == 2:
             assert (
-                sync(
-                    array.store.get(
-                        f"{parent}/.zgroup", prototype=default_buffer_prototype()
-                    )
-                )
+                sync(array.store.get(f"{parent}/.zgroup", prototype=default_buffer_prototype()))
                 is not None
             )
         elif array.metadata.zarr_format == 3:
             assert (
-                sync(
-                    array.store.get(
-                        f"{parent}/zarr.json", prototype=default_buffer_prototype()
-                    )
-                )
+                sync(array.store.get(f"{parent}/zarr.json", prototype=default_buffer_prototype()))
                 is not None
             )
 
@@ -129,9 +115,7 @@ def test_basic_indexing(data: st.DataObject) -> None:
     actual = zarray[indexer]
     assert_array_equal(nparray[indexer], actual)
 
-    new_data = data.draw(
-        numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype)
-    )
+    new_data = data.draw(numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype))
     zarray[indexer] = new_data
     nparray[indexer] = new_data
     assert_array_equal(nparray, zarray[:])
@@ -153,9 +137,7 @@ def test_oindex(data: st.DataObject) -> None:
         if isinstance(idxr, np.ndarray) and idxr.size != np.unique(idxr).size:
             # behaviour of setitem with repeated indices is not guaranteed in practice
             assume(False)
-    new_data = data.draw(
-        numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype)
-    )
+    new_data = data.draw(numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype))
     nparray[npindexer] = new_data
     zarray.oindex[zindexer] = new_data
     assert_array_equal(nparray, zarray[:])
@@ -231,33 +213,7 @@ def test_roundtrip_array_metadata_from_json(data: st.DataObject, zarr_format: in
     orig = metadata.to_dict()
     rt = metadata_roundtripped.to_dict()
 
-    assert deep_equal(
-        orig, rt
-    ), f"Roundtrip mismatch:\nOriginal: {orig}\nRoundtripped: {rt}"
-
-
-# @st.composite
-# def advanced_indices(draw, *, shape):
-#     basic_idxr = draw(
-#         basic_indices(
-#             shape=shape, min_dims=len(shape), max_dims=len(shape), allow_ellipsis=False
-#         ).filter(lambda x: isinstance(x, tuple))
-#     )
-
-#     int_idxr = draw(
-#         npst.integer_array_indices(shape=shape, result_shape=npst.array_shapes(max_dims=1))
-#     )
-#     args = tuple(
-#         st.sampled_from((l, r)) for l, r in zip_longest(basic_idxr, int_idxr, fillvalue=slice(None))
-#     )
-#     return draw(st.tuples(*args))
-
-
-# @given(st.data())
-# def test_roundtrip_object_array(data):
-#     nparray = data.draw(np_arrays)
-#     zarray = data.draw(arrays(arrays=st.just(nparray)))
-#     assert_array_equal(nparray, zarray[:])
+    assert deep_equal(orig, rt), f"Roundtrip mismatch:\nOriginal: {orig}\nRoundtripped: {rt}"
 
 
 def serialized_complex_float_is_valid(
@@ -333,9 +289,7 @@ def test_array_metadata_meets_spec(meta: ArrayV2Metadata | ArrayV3Metadata) -> N
     # version-specific validations
     if isinstance(meta, ArrayV2Metadata):
         assert asdict_dict["filters"] != ()
-        assert asdict_dict["filters"] is None or isinstance(
-            asdict_dict["filters"], tuple
-        )
+        assert asdict_dict["filters"] is None or isinstance(asdict_dict["filters"], tuple)
         assert asdict_dict["zarr_format"] == 2
     else:
         assert asdict_dict["zarr_format"] == 3

From 501e7a570dccde02b5e88a7b44f5036f5ff9395e Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <aldenkeefesampson@gmail.com>
Date: Mon, 21 Jul 2025 21:42:51 -0400
Subject: [PATCH 06/21] Self review

---
 src/zarr/codecs/sharding.py        | 11 ++++++-----
 tests/test_codecs/test_sharding.py |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 8b64e68130..073320f2f1 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -778,7 +778,7 @@ async def _load_partial_shard_maybe(
     ) -> ShardMapping | None:
         """
         Read chunks from `byte_getter` for the case where the read is less than a full shard.
-        Returns a mapping of chunk coordinates to bytes.
+        Returns a mapping of chunk coordinates to bytes or None.
         """
         shard_index = await self._load_shard_index_maybe(byte_getter, chunks_per_shard)
         if shard_index is None:
@@ -788,11 +788,9 @@ async def _load_partial_shard_maybe(
             _ChunkCoordsByteSlice(chunk_coords, slice(*chunk_byte_slice))
             for chunk_coords in all_chunk_coords
             # Drop chunks where index lookup fails
-            # e.g. when write_empty_chunks = False and the chunk is empty
+            # e.g. empty chunks when write_empty_chunks = False
             if (chunk_byte_slice := shard_index.get_chunk_slice(chunk_coords))
         ]
-        if len(chunks) == 0:
-            return None
 
         groups = self._coalesce_chunks(chunks)
 
@@ -816,7 +814,7 @@ def _coalesce_chunks(
     ) -> list[list[_ChunkCoordsByteSlice]]:
         """
         Combine chunks from a single shard into groups that should be read together
-        in a single request.
+        in a single request to the store.
 
         Respects the following configuration options:
         - `sharding.read.coalesce_max_gap_bytes`: The maximum gap between
@@ -828,6 +826,9 @@ def _coalesce_chunks(
 
         sorted_chunks = sorted(chunks, key=lambda c: c.byte_slice.start)
 
+        if len(sorted_chunks) == 0:
+            return []
+
         groups = []
         current_group = [sorted_chunks[0]]
 
diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py
index 35940feb47..5df25d4754 100644
--- a/tests/test_codecs/test_sharding.py
+++ b/tests/test_codecs/test_sharding.py
@@ -354,7 +354,7 @@ def test_sharding_read_empty_chunks_within_non_empty_shard_write_empty_false(
         - some, but not all, chunks in the last shard are empty
         - the last shard is not complete (array length is not a multiple of shard shape),
           this takes us down the partial shard read path
-        - write_empty_chunks=False so the shard index will have less entries than chunks in the shard
+        - write_empty_chunks=False so the shard index will have fewer entries than chunks in the shard
     """
     # array with mixed empty and non-empty chunks in second shard
     data = np.array([

From 12c3308452db4c56de9a13011cea0a0a77b8c5a8 Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <aldenkeefesampson@gmail.com>
Date: Mon, 21 Jul 2025 21:44:09 -0400
Subject: [PATCH 07/21] Removing profiling code masquerading as a skipped test

---
 tests/test_codecs/test_sharding.py | 75 ------------------------------
 1 file changed, 75 deletions(-)

diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py
index 5df25d4754..f4c2361a1d 100644
--- a/tests/test_codecs/test_sharding.py
+++ b/tests/test_codecs/test_sharding.py
@@ -204,81 +204,6 @@ def test_sharding_partial_read(
     assert np.all(read_data == 1)
 
 
-@pytest.mark.skip("This is profiling rather than a test")
-@pytest.mark.slow_hypothesis
-@pytest.mark.parametrize("store", ["local"], indirect=["store"])
-def test_partial_shard_read_performance(store: Store) -> None:
-    import asyncio
-    import json
-    from functools import partial
-    from itertools import product
-    from timeit import timeit
-    from unittest.mock import AsyncMock
-
-    # The whole test array is a single shard to keep runtime manageable while
-    # using a realistic shard size (256 MiB uncompressed, ~115 MiB compressed).
-    # In practice, the array is likely to be much larger with many shards of this
-    # rough order of magnitude. There are 512 chunks per shard in this example.
-    array_shape = (512, 512, 512)
-    shard_shape = (512, 512, 512)  # 256 MiB uncompressed unit16s
-    chunk_shape = (64, 64, 64)  # 512 KiB uncompressed unit16s
-    dtype = np.uint16
-
-    a = zarr.create_array(
-        StorePath(store),
-        shape=array_shape,
-        chunks=chunk_shape,
-        shards=shard_shape,
-        compressors=BloscCodec(cname="zstd"),
-        dtype=dtype,
-        fill_value=np.iinfo(dtype).max,
-    )
-    # Narrow range of values lets zstd compress to about 1/2 of uncompressed size
-    a[:] = np.random.default_rng(123).integers(low=0, high=50, size=array_shape, dtype=dtype)
-
-    num_calls = 20
-    experiments = []
-    for concurrency, get_latency, coalesce_max_gap, statement in product(
-        [1, 10, 100],
-        [0.0, 0.01],
-        [-1, 2**20, 10 * 2**20],
-        ["a[0, :, :]", "a[:, 0, :]", "a[:, :, 0]"],
-    ):
-        zarr.config.set(
-            {
-                "async.concurrency": concurrency,
-                "sharding.read.coalesce_max_gap_bytes": coalesce_max_gap,
-            }
-        )
-
-        async def get_with_latency(*args: Any, get_latency: float, **kwargs: Any) -> Any:
-            await asyncio.sleep(get_latency)
-            return await store.get(*args, **kwargs)
-
-        store_mock = AsyncMock(wraps=store, spec=store.__class__)
-        store_mock.get.side_effect = partial(get_with_latency, get_latency=get_latency)
-
-        a = zarr.open_array(StorePath(store_mock))
-
-        store_mock.reset_mock()
-
-        # Each timeit call accesses a 512x512 slice covering 64 chunks
-        time = timeit(statement, number=num_calls, globals={"a": a}) / num_calls
-        experiments.append(
-            {
-                "concurrency": concurrency,
-                "coalesce_max_gap": coalesce_max_gap,
-                "get_latency": get_latency,
-                "statement": statement,
-                "time": time,
-                "store_get_calls": store_mock.get.call_count,
-            }
-        )
-
-    with open("zarr-python-partial-shard-read-performance-with-coalesce.json", "w") as f:
-        json.dump(experiments, f)
-
-
 @pytest.mark.parametrize("index_location", ["start", "end"])
 @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
 @pytest.mark.parametrize("coalesce_reads", [True, False])

From 6322ca63ec4cec2fd26644183c04bf8c86932d17 Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <aldenkeefesampson@gmail.com>
Date: Mon, 21 Jul 2025 22:54:56 -0400
Subject: [PATCH 08/21] revert change to indexing.py, not needed

---
 src/zarr/core/indexing.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py
index 0e0bb664d8..b95c8c642e 100644
--- a/src/zarr/core/indexing.py
+++ b/src/zarr/core/indexing.py
@@ -76,7 +76,9 @@ def err_too_many_indices(selection: Any, shape: ChunkCoords) -> None:
     raise IndexError(f"too many indices for array; expected {len(shape)}, got {len(selection)}")
 
 
-def _zarr_array_to_int_or_bool_array(arr: Array) -> npt.NDArray[np.intp] | npt.NDArray[np.bool_]:
+def _zarr_array_to_int_or_bool_array(
+    arr: Array,
+) -> npt.NDArray[np.intp] | npt.NDArray[np.bool_]:
     if arr.dtype.kind in ("i", "b"):
         return np.asarray(arr)
     else:
@@ -1193,7 +1195,7 @@ def __iter__(self) -> Iterator[ChunkProjection]:
             stop = self.chunk_nitems_cumsum[chunk_rix]
             out_selection: slice | npt.NDArray[np.intp]
             if self.sel_sort is None:
-                out_selection = np.arange(start, stop)
+                out_selection = slice(start, stop)
             else:
                 out_selection = self.sel_sort[start:stop]
 
@@ -1318,7 +1320,8 @@ def pop_fields(selection: SelectionWithFields) -> tuple[Fields | None, Selection
         fields = fields[0] if len(fields) == 1 else fields
         selection_tuple = tuple(s for s in selection if not isinstance(s, str))
         selection = cast(
-            "Selection", selection_tuple[0] if len(selection_tuple) == 1 else selection_tuple
+            "Selection",
+            selection_tuple[0] if len(selection_tuple) == 1 else selection_tuple,
         )
         return fields, selection
 

From d9a7842537a33482249284626a55dd4c4aeefcea Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <aldenkeefesampson@gmail.com>
Date: Mon, 21 Jul 2025 23:05:41 -0400
Subject: [PATCH 09/21] Add test for duplicate integer indexing into a
 coalesced group

---
 tests/test_codecs/test_sharding.py | 51 +++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py
index f4c2361a1d..c24be7d0d3 100644
--- a/tests/test_codecs/test_sharding.py
+++ b/tests/test_codecs/test_sharding.py
@@ -219,7 +219,8 @@ def test_sharding_multiple_chunks_partial_shard_read(
         # 1MiB, enough to coalesce all chunks within a shard in this example
         zarr.config.set({"sharding.read.coalesce_max_gap_bytes": 2**20})
     else:
-        zarr.config.set({"sharding.read.coalesce_max_gap_bytes": -1})  # disable coalescing
+        # disable coalescing
+        zarr.config.set({"sharding.read.coalesce_max_gap_bytes": -1})
 
     store_mock = AsyncMock(wraps=store, spec=store.__class__)
     a = zarr.create_array(
@@ -269,6 +270,54 @@ def test_sharding_multiple_chunks_partial_shard_read(
         assert isinstance(kwargs["byte_range"], (SuffixByteRequest, RangeByteRequest))
 
 
+@pytest.mark.parametrize("index_location", ["start", "end"])
+@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+@pytest.mark.parametrize("coalesce_reads", [True, False])
+def test_sharding_duplicate_read_indexes(
+    store: Store, index_location: ShardingCodecIndexLocation, coalesce_reads: bool
+) -> None:
+    """
+    Check that coalesce optimization parses the grouped reads back out correctly
+    when there are multiple reads for the same index.
+    """
+    array_shape = (15,)
+    shard_shape = (8,)
+    chunk_shape = (2,)
+    data = np.arange(np.prod(array_shape), dtype="float32").reshape(array_shape)
+
+    if coalesce_reads:
+        # 1MiB, enough to coalesce all chunks within a shard in this example
+        zarr.config.set({"sharding.read.coalesce_max_gap_bytes": 2**20})
+    else:
+        # disable coalescing
+        zarr.config.set({"sharding.read.coalesce_max_gap_bytes": -1})
+
+    store_mock = AsyncMock(wraps=store, spec=store.__class__)
+    a = zarr.create_array(
+        StorePath(store_mock),
+        shape=data.shape,
+        chunks=chunk_shape,
+        shards={"shape": shard_shape, "index_location": index_location},
+        compressors=BloscCodec(cname="lz4"),
+        dtype=data.dtype,
+        fill_value=-1,
+    )
+    a[:] = data
+
+    store_mock.reset_mock()  # ignore store calls during array creation
+
+    # Read the same index multiple times, do that from two chunks which can be coalesced
+    indexer = [8, 8, 12, 12]
+    np.array_equal(a[indexer], data[indexer])
+
+    if coalesce_reads:
+        # 1 shard index request + 1 coalesced read
+        assert store_mock.get.call_count == 2
+    else:
+        # 1 shard index request + 2 chunks
+        assert store_mock.get.call_count == 3
+
+
 @pytest.mark.parametrize("index_location", ["start", "end"])
 @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
 def test_sharding_read_empty_chunks_within_non_empty_shard_write_empty_false(

From 8469e9c0cb5b9ae222e15ccde6a75e18dfb65a84 Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <aldenkeefesampson@gmail.com>
Date: Mon, 21 Jul 2025 23:15:00 -0400
Subject: [PATCH 10/21] Undo change to fill value when initializing shard
 arrays

---
 src/zarr/codecs/sharding.py        | 4 ++--
 tests/test_codecs/test_sharding.py | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 073320f2f1..cda60589fc 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -469,7 +469,7 @@ async def _decode_single(
             shape=shard_shape,
             dtype=shard_spec.dtype.to_native_dtype(),
             order=shard_spec.order,
-            fill_value=shard_spec.fill_value,
+            fill_value=0,
         )
         shard_dict = await _ShardReader.from_bytes(shard_bytes, self, chunks_per_shard)
 
@@ -516,7 +516,7 @@ async def _decode_partial_single(
             shape=indexer.shape,
             dtype=shard_spec.dtype.to_native_dtype(),
             order=shard_spec.order,
-            fill_value=shard_spec.fill_value,
+            fill_value=0,
         )
 
         indexed_chunks = list(indexer)
diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py
index c24be7d0d3..f124e14675 100644
--- a/tests/test_codecs/test_sharding.py
+++ b/tests/test_codecs/test_sharding.py
@@ -335,9 +335,9 @@ def test_sharding_read_empty_chunks_within_non_empty_shard_write_empty_false(
         # shard 0. full 8 elements, all chunks have some non-fill data
         0, 1, 2, 3, 4, 5, 6, 7,
         # shard 1. 6 elements (< shard shape)
-        2, 0, # chunk 0, written
-        0, 0, # chunk 1, all fill, not written
-        4, 5  # chunk 2, written
+         2,  0, # chunk 0, written
+        -9, -9, # chunk 1, all fill, not written
+         4,  5  # chunk 2, written
     ], dtype="int32")  # fmt: off
 
     spath = StorePath(store)
@@ -347,7 +347,7 @@ def test_sharding_read_empty_chunks_within_non_empty_shard_write_empty_false(
         chunks=(2,),
         shards={"shape": (8,), "index_location": index_location},
         dtype="int32",
-        fill_value=0,
+        fill_value=-9,
         filters=None,
         compressors=None,
         config={"write_empty_chunks": False},

From baf1062b625583750e4b77519b15aa49c1cdb4a9 Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <aldenkeefesampson@gmail.com>
Date: Mon, 21 Jul 2025 23:17:51 -0400
Subject: [PATCH 11/21] Undo change to set mypy_path = "src"

---
 pyproject.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 624af6ab4a..0b7cb9f856 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -352,7 +352,6 @@ ignore = [
 [tool.mypy]
 python_version = "3.11"
 ignore_missing_imports = true
-mypy_path = "src"
 namespace_packages = false
 
 strict = true

From 50d8822ab62ca347a63861dc32155d3bdcc4401d Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <aldenkeefesampson@gmail.com>
Date: Mon, 21 Jul 2025 23:35:47 -0400
Subject: [PATCH 12/21] Commenting and revert uncessary changes to files for
 smaller diff

---
 src/zarr/codecs/sharding.py |  1 +
 src/zarr/core/indexing.py   |  7 ++-----
 tests/test_properties.py    | 35 +++++++++++++++++++++++++++++++++--
 3 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index cda60589fc..b29e24cfb9 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -855,6 +855,7 @@ async def _get_group_bytes(
         Reads a possibly coalesced group of one or more chunks from a shard.
         Returns a mapping of chunk coordinates to bytes.
         """
+        # _coalesce_chunks ensures that the group is not empty.
         group_start = group[0].byte_slice.start
         group_end = group[-1].byte_slice.stop
 
diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py
index b95c8c642e..c11889f7f4 100644
--- a/src/zarr/core/indexing.py
+++ b/src/zarr/core/indexing.py
@@ -76,9 +76,7 @@ def err_too_many_indices(selection: Any, shape: ChunkCoords) -> None:
     raise IndexError(f"too many indices for array; expected {len(shape)}, got {len(selection)}")
 
 
-def _zarr_array_to_int_or_bool_array(
-    arr: Array,
-) -> npt.NDArray[np.intp] | npt.NDArray[np.bool_]:
+def _zarr_array_to_int_or_bool_array(arr: Array) -> npt.NDArray[np.intp] | npt.NDArray[np.bool_]:
     if arr.dtype.kind in ("i", "b"):
         return np.asarray(arr)
     else:
@@ -1320,8 +1318,7 @@ def pop_fields(selection: SelectionWithFields) -> tuple[Fields | None, Selection
         fields = fields[0] if len(fields) == 1 else fields
         selection_tuple = tuple(s for s in selection if not isinstance(s, str))
         selection = cast(
-            "Selection",
-            selection_tuple[0] if len(selection_tuple) == 1 else selection_tuple,
+            "Selection", selection_tuple[0] if len(selection_tuple) == 1 else selection_tuple
         )
         return fields, selection
 
diff --git a/tests/test_properties.py b/tests/test_properties.py
index e941250872..27f847fa69 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -152,13 +152,20 @@ def test_vindex(data: st.DataObject) -> None:
 
     indexer = data.draw(
         npst.integer_array_indices(
-            shape=nparray.shape,
-            result_shape=npst.array_shapes(min_side=1, max_dims=None),
+            shape=nparray.shape, result_shape=npst.array_shapes(min_side=1, max_dims=None)
         )
     )
     actual = zarray.vindex[indexer]
     assert_array_equal(nparray[indexer], actual)
 
+    # FIXME!
+    # when the indexer is such that a value gets overwritten multiple times,
+    # I think the output depends on chunking.
+    # new_data = data.draw(npst.arrays(shape=st.just(actual.shape), dtype=nparray.dtype))
+    # nparray[indexer] = new_data
+    # zarray.vindex[indexer] = new_data
+    # assert_array_equal(nparray, zarray[:])
+
 
 @given(store=stores, meta=array_metadata())  # type: ignore[misc]
 @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
@@ -216,6 +223,30 @@ def test_roundtrip_array_metadata_from_json(data: st.DataObject, zarr_format: in
     assert deep_equal(orig, rt), f"Roundtrip mismatch:\nOriginal: {orig}\nRoundtripped: {rt}"
 
 
+# @st.composite
+# def advanced_indices(draw, *, shape):
+#     basic_idxr = draw(
+#         basic_indices(
+#             shape=shape, min_dims=len(shape), max_dims=len(shape), allow_ellipsis=False
+#         ).filter(lambda x: isinstance(x, tuple))
+#     )
+
+#     int_idxr = draw(
+#         npst.integer_array_indices(shape=shape, result_shape=npst.array_shapes(max_dims=1))
+#     )
+#     args = tuple(
+#         st.sampled_from((l, r)) for l, r in zip_longest(basic_idxr, int_idxr, fillvalue=slice(None))
+#     )
+#     return draw(st.tuples(*args))
+
+
+# @given(st.data())
+# def test_roundtrip_object_array(data):
+#     nparray = data.draw(np_arrays)
+#     zarray = data.draw(arrays(arrays=st.just(nparray)))
+#     assert_array_equal(nparray, zarray[:])
+
+
 def serialized_complex_float_is_valid(
     serialized: tuple[numbers.Real | str, numbers.Real | str],
 ) -> bool:

From 904240bf26bbefb67e4cec1b0d5881c1ae887bff Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <alden@dynamical.org>
Date: Thu, 4 Dec 2025 22:34:27 -0500
Subject: [PATCH 13/21] remove now redundant cast

---
 src/zarr/codecs/sharding.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index ebbe9f4e0f..cf6111d72f 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -116,9 +116,7 @@ class _ShardIndex(NamedTuple):
 
     @property
     def chunks_per_shard(self) -> tuple[int, ...]:
-        result = tuple(self.offsets_and_lengths.shape[0:-1])
-        # The cast is required until https://github.com/numpy/numpy/pull/27211 is merged
-        return cast("tuple[int, ...]", result)
+        return tuple(self.offsets_and_lengths.shape[0:-1])
 
     def _localize_chunk(self, chunk_coords: tuple[int, ...]) -> tuple[int, ...]:
         return tuple(

From 5283f1adcf6bfe555dc5a1de4fe53fb0eda33436 Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <alden@dynamical.org>
Date: Thu, 4 Dec 2025 22:46:16 -0500
Subject: [PATCH 14/21] Document runtime config keys

---
 docs/user-guide/config.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/user-guide/config.md b/docs/user-guide/config.md
index 21fe9b5def..572a9db12c 100644
--- a/docs/user-guide/config.md
+++ b/docs/user-guide/config.md
@@ -33,6 +33,7 @@ Configuration options include the following:
 - Async and threading options, e.g. `async.concurrency` and `threading.max_workers`
 - Selections of implementations of codecs, codec pipelines and buffers
 - Enabling GPU support with `zarr.config.enable_gpu()`. See GPU support for more.
+- Control request merging when reading multiple chunks from the same shard with `sharding.read.coalesce_max_gap_bytes` and `sharding.read.coalesce_max_bytes`
 
 For selecting custom implementations of codecs, pipelines, buffers and ndbuffers,
 first register the implementations in the registry and then select them in the config.

From f7d5de39b4792f9fe26c1da46885612ea5f1df9f Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <alden@dynamical.org>
Date: Thu, 4 Dec 2025 23:01:25 -0500
Subject: [PATCH 15/21] Improve changelog entry and .rst -> .md

---
 changes/3004.feature.md  | 7 +++++++
 changes/3004.feature.rst | 3 ---
 2 files changed, 7 insertions(+), 3 deletions(-)
 create mode 100644 changes/3004.feature.md
 delete mode 100644 changes/3004.feature.rst

diff --git a/changes/3004.feature.md b/changes/3004.feature.md
new file mode 100644
index 0000000000..7c78a2fae2
--- /dev/null
+++ b/changes/3004.feature.md
@@ -0,0 +1,7 @@
+Optimizes reading multiple chunks from a shard. Reads of nearby chunks within
+the same shard are coalesced to reduce the number of calls to the store.
+After any coalescing, the resulting byte ranges are read in parallel.
+
+Coalescing respects two config options. Reads are coalesced if there are fewer
+than `sharding.read.coalesce_max_gap_bytes` bytes between chunks and the total
+size of the coalesced read is no more than `sharding.read.coalesce_max_bytes`. 
\ No newline at end of file
diff --git a/changes/3004.feature.rst b/changes/3004.feature.rst
deleted file mode 100644
index b15a5ec943..0000000000
--- a/changes/3004.feature.rst
+++ /dev/null
@@ -1,3 +0,0 @@
-Optimizes reading more than one, but not all, chunks from a shard. Chunks are now read in parallel
-and reads of nearby chunks within the same shard are combined to reduce the number of calls to the store.
-See :ref:`user-guide-config` for more details.
\ No newline at end of file

From f4b2bccb6fde09b4af72c4fd915fb7b961feaf3c Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <alden@dynamical.org>
Date: Thu, 4 Dec 2025 23:26:21 -0500
Subject: [PATCH 16/21] .coords -> .chunk_coords in _ChunkCoordsByteSlice
 dataclass

---
 changes/3004.feature.md     |  2 +-
 src/zarr/codecs/sharding.py | 11 ++++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/changes/3004.feature.md b/changes/3004.feature.md
index 7c78a2fae2..1855acec89 100644
--- a/changes/3004.feature.md
+++ b/changes/3004.feature.md
@@ -4,4 +4,4 @@ After any coalescing, the resulting byte ranges are read in parallel.
 
 Coalescing respects two config options. Reads are coalesced if there are fewer
 than `sharding.read.coalesce_max_gap_bytes` bytes between chunks and the total
-size of the coalesced read is no more than `sharding.read.coalesce_max_bytes`. 
\ No newline at end of file
+size of the coalesced read is no more than `sharding.read.coalesce_max_bytes`.
\ No newline at end of file
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index cf6111d72f..f40f814472 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -5,7 +5,7 @@
 from enum import Enum
 from functools import lru_cache
 from operator import itemgetter
-from typing import TYPE_CHECKING, Any, NamedTuple, cast
+from typing import TYPE_CHECKING, Any, NamedTuple
 
 import numpy as np
 import numpy.typing as npt
@@ -220,10 +220,11 @@ def __iter__(self) -> Iterator[tuple[int, ...]]:
         return c_order_iter(self.index.offsets_and_lengths.shape[:-1])
 
 
-class _ChunkCoordsByteSlice(NamedTuple):
-    """Holds a chunk's coordinates and its byte range in a serialized shard."""
+@dataclass(frozen=True)
+class _ChunkCoordsByteSlice:
+    """Holds a core.indexing.ChunkProjection.chunk_coords and its byte range in a serialized shard."""
 
-    coords: tuple[int, ...]
+    chunk_coords: tuple[int, ...]
     byte_slice: slice
 
 
@@ -800,7 +801,7 @@ async def _get_group_bytes(
                 chunk.byte_slice.start - group_start,
                 chunk.byte_slice.stop - group_start,
             )
-            shard_dict[chunk.coords] = group_bytes[chunk_slice]
+            shard_dict[chunk.chunk_coords] = group_bytes[chunk_slice]
 
         return shard_dict
 

From 04c5cde67c3b65c4b0f7340aaad5f987422b7298 Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <alden@dynamical.org>
Date: Mon, 19 Jan 2026 12:02:18 -0500
Subject: [PATCH 17/21] Update test env in docs/contributing.md

---
 docs/contributing.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/contributing.md b/docs/contributing.md
index e42ba0edf1..cb7de3ed07 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -64,7 +64,7 @@ hatch env show  # list all available environments
 To verify that your development environment is working, you can run the unit tests for one of the test environments, e.g.:
 
 ```bash
-hatch env run --env test.py3.12-2.2-optional run-pytest
+hatch env run --env test.py3.13-optional run-pytest
 ```
 
 ### Creating a branch

From 43e326fcb30d2749909855852bd1491f4540b56c Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <alden@dynamical.org>
Date: Mon, 19 Jan 2026 12:05:54 -0500
Subject: [PATCH 18/21] Move `config.get` calls up into
 `_decode_partial_single`

---
 src/zarr/codecs/sharding.py | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index f40f814472..1f900fb7e8 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -418,8 +418,18 @@ async def _decode_partial_single(
             )
         else:
             # read some chunks within the shard
+            max_gap_bytes = config.get("sharding.read.coalesce_max_gap_bytes")
+            coalesce_max_bytes = config.get("sharding.read.coalesce_max_bytes")
+            async_concurrency = config.get("async.concurrency")
+
             shard_dict_maybe = await self._load_partial_shard_maybe(
-                byte_getter, chunk_spec.prototype, chunks_per_shard, all_chunk_coords
+                byte_getter,
+                chunk_spec.prototype,
+                chunks_per_shard,
+                all_chunk_coords,
+                max_gap_bytes,
+                coalesce_max_bytes,
+                async_concurrency,
             )
 
         if shard_dict_maybe is None:
@@ -702,10 +712,16 @@ async def _load_partial_shard_maybe(
         prototype: BufferPrototype,
         chunks_per_shard: tuple[int, ...],
         all_chunk_coords: set[tuple[int, ...]],
+        max_gap_bytes: int,
+        coalesce_max_bytes: int,
+        async_concurrency: int,
     ) -> ShardMapping | None:
         """
         Read chunks from `byte_getter` for the case where the read is less than a full shard.
         Returns a mapping of chunk coordinates to bytes or None.
+
+        Reads are coalesced if there are fewer than `max_gap_bytes` bytes between chunks
+        and the total size of the coalesced read is no more than `coalesce_max_bytes`.
         """
         shard_index = await self._load_shard_index_maybe(byte_getter, chunks_per_shard)
         if shard_index is None:
@@ -719,12 +735,12 @@ async def _load_partial_shard_maybe(
             if (chunk_byte_slice := shard_index.get_chunk_slice(chunk_coords))
         ]
 
-        groups = self._coalesce_chunks(chunks)
+        groups = self._coalesce_chunks(chunks, max_gap_bytes, coalesce_max_bytes)
 
         shard_dicts = await concurrent_map(
             [(group, byte_getter, prototype) for group in groups],
             self._get_group_bytes,
-            config.get("async.concurrency"),
+            async_concurrency,
         )
 
         shard_dict: ShardMutableMapping = {}
@@ -738,19 +754,13 @@ async def _load_partial_shard_maybe(
     def _coalesce_chunks(
         self,
         chunks: list[_ChunkCoordsByteSlice],
+        max_gap_bytes: int,
+        coalesce_max_bytes: int,
     ) -> list[list[_ChunkCoordsByteSlice]]:
         """
         Combine chunks from a single shard into groups that should be read together
         in a single request to the store.
-
-        Respects the following configuration options:
-        - `sharding.read.coalesce_max_gap_bytes`: The maximum gap between
-          chunks to coalesce into a single group.
-        - `sharding.read.coalesce_max_bytes`: The maximum number of bytes in a group.
         """
-        max_gap_bytes = config.get("sharding.read.coalesce_max_gap_bytes")
-        coalesce_max_bytes = config.get("sharding.read.coalesce_max_bytes")
-
         sorted_chunks = sorted(chunks, key=lambda c: c.byte_slice.start)
 
         if len(sorted_chunks) == 0:

From b71302f16409b359a3890ee05641d06ab5bbb509 Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <alden@dynamical.org>
Date: Mon, 19 Jan 2026 12:06:53 -0500
Subject: [PATCH 19/21] Ensure no change in behavior when ByteGetter.get
 returns None + comment

---
 src/zarr/codecs/sharding.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 1f900fb7e8..86ef8dcd85 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -725,7 +725,7 @@ async def _load_partial_shard_maybe(
         """
         shard_index = await self._load_shard_index_maybe(byte_getter, chunks_per_shard)
         if shard_index is None:
-            return None
+            return None  # shard index read failure, the ByteGetter returned None
 
         chunks = [
             _ChunkCoordsByteSlice(chunk_coords, slice(*chunk_byte_slice))
@@ -745,9 +745,9 @@ async def _load_partial_shard_maybe(
 
         shard_dict: ShardMutableMapping = {}
         for d in shard_dicts:
-            if d is None:
-                return None
-            shard_dict.update(d)
+            # can be None if the ByteGetter returned None when reading chunk data
+            if d is not None:
+                shard_dict.update(d)
 
         return shard_dict
 

From 24f6f1c5aa08fd810bb33fc3f4010fa38ad41694 Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <alden@dynamical.org>
Date: Mon, 19 Jan 2026 12:28:49 -0500
Subject: [PATCH 20/21] Add test_sharing_unit.py, focusing on coallesce
 behavior, but with basic tests for other components

---
 tests/test_codecs/test_sharding_unit.py | 655 ++++++++++++++++++++++++
 1 file changed, 655 insertions(+)
 create mode 100644 tests/test_codecs/test_sharding_unit.py

diff --git a/tests/test_codecs/test_sharding_unit.py b/tests/test_codecs/test_sharding_unit.py
new file mode 100644
index 0000000000..32d3e4b4af
--- /dev/null
+++ b/tests/test_codecs/test_sharding_unit.py
@@ -0,0 +1,655 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+import numpy as np
+import pytest
+
+from zarr.codecs.sharding import (
+    MAX_UINT_64,
+    ShardingCodec,
+    _ChunkCoordsByteSlice,
+    _ShardIndex,
+    _ShardReader,
+)
+from zarr.core.buffer import default_buffer_prototype
+from zarr.core.buffer.cpu import Buffer
+
+if TYPE_CHECKING:
+    from zarr.abc.store import RangeByteRequest
+    from zarr.core.buffer import BufferPrototype
+
+
+# ============================================================================
+# _ShardIndex tests
+# ============================================================================
+
+
+def test_shard_index_create_empty() -> None:
+    """Test that create_empty creates an index filled with MAX_UINT_64."""
+    chunks_per_shard = (2, 3)
+    index = _ShardIndex.create_empty(chunks_per_shard)
+
+    assert index.chunks_per_shard == chunks_per_shard
+    assert index.offsets_and_lengths.shape == (2, 3, 2)
+    assert index.offsets_and_lengths.dtype == np.dtype("<u8")
+    assert np.all(index.offsets_and_lengths == MAX_UINT_64)
+
+
+def test_shard_index_create_empty_1d() -> None:
+    """Test create_empty with 1D chunks_per_shard."""
+    chunks_per_shard = (4,)
+    index = _ShardIndex.create_empty(chunks_per_shard)
+
+    assert index.chunks_per_shard == chunks_per_shard
+    assert index.offsets_and_lengths.shape == (4, 2)
+
+
+def test_shard_index_is_all_empty_true() -> None:
+    """Test is_all_empty returns True for a freshly created empty index."""
+    index = _ShardIndex.create_empty((2, 2))
+    assert index.is_all_empty() is True
+
+
+def test_shard_index_is_all_empty_false() -> None:
+    """Test is_all_empty returns False when at least one chunk is set."""
+    index = _ShardIndex.create_empty((2, 2))
+    index.set_chunk_slice((0, 0), slice(0, 100))
+    assert index.is_all_empty() is False
+
+
+def test_shard_index_get_chunk_slice_empty() -> None:
+    """Test get_chunk_slice returns None for empty chunks."""
+    index = _ShardIndex.create_empty((2, 2))
+    assert index.get_chunk_slice((0, 0)) is None
+    assert index.get_chunk_slice((1, 1)) is None
+
+
+def test_shard_index_get_chunk_slice_set() -> None:
+    """Test get_chunk_slice returns correct (start, end) tuple after setting."""
+    index = _ShardIndex.create_empty((2, 2))
+    index.set_chunk_slice((0, 1), slice(100, 200))
+
+    result = index.get_chunk_slice((0, 1))
+    assert result == (100, 200)
+
+
+def test_shard_index_set_chunk_slice() -> None:
+    """Test set_chunk_slice correctly sets offset and length."""
+    index = _ShardIndex.create_empty((3, 3))
+
+    # Set a chunk slice
+    index.set_chunk_slice((1, 2), slice(50, 150))
+
+    # Verify the underlying array
+    assert index.offsets_and_lengths[1, 2, 0] == 50  # offset
+    assert index.offsets_and_lengths[1, 2, 1] == 100  # length (150 - 50)
+
+
+def test_shard_index_set_chunk_slice_none() -> None:
+    """Test set_chunk_slice with None marks chunk as empty."""
+    index = _ShardIndex.create_empty((2, 2))
+
+    # First set a value
+    index.set_chunk_slice((0, 0), slice(0, 100))
+    assert index.get_chunk_slice((0, 0)) == (0, 100)
+
+    # Then clear it
+    index.set_chunk_slice((0, 0), None)
+    assert index.get_chunk_slice((0, 0)) is None
+    assert index.offsets_and_lengths[0, 0, 0] == MAX_UINT_64
+    assert index.offsets_and_lengths[0, 0, 1] == MAX_UINT_64
+
+
+def test_shard_index_get_full_chunk_map() -> None:
+    """Test get_full_chunk_map returns correct boolean array."""
+    index = _ShardIndex.create_empty((2, 3))
+
+    # Set some chunks
+    index.set_chunk_slice((0, 0), slice(0, 10))
+    index.set_chunk_slice((1, 2), slice(10, 20))
+
+    chunk_map = index.get_full_chunk_map()
+
+    assert chunk_map.shape == (2, 3)
+    assert chunk_map.dtype == np.bool_
+    assert chunk_map[0, 0] is np.True_
+    assert chunk_map[0, 1] is np.False_
+    assert chunk_map[0, 2] is np.False_
+    assert chunk_map[1, 0] is np.False_
+    assert chunk_map[1, 1] is np.False_
+    assert chunk_map[1, 2] is np.True_
+
+
+def test_shard_index_localize_chunk() -> None:
+    """Test _localize_chunk maps global coords to local shard coords via modulo."""
+    index = _ShardIndex.create_empty((2, 3))
+
+    # Within bounds - should return same coords
+    assert index._localize_chunk((0, 0)) == (0, 0)
+    assert index._localize_chunk((1, 2)) == (1, 2)
+
+    # Out of bounds - should wrap via modulo
+    assert index._localize_chunk((2, 0)) == (0, 0)  # 2 % 2 = 0
+    assert index._localize_chunk((3, 5)) == (1, 2)  # 3 % 2 = 1, 5 % 3 = 2
+    assert index._localize_chunk((4, 6)) == (0, 0)  # 4 % 2 = 0, 6 % 3 = 0
+
+
+def test_shard_index_is_dense_true() -> None:
+    """Test is_dense returns True when chunks are contiguously packed."""
+    index = _ShardIndex.create_empty((2,))
+    chunk_byte_length = 100
+
+    # Set chunks contiguously: [0-100), [100-200)
+    index.set_chunk_slice((0,), slice(0, 100))
+    index.set_chunk_slice((1,), slice(100, 200))
+
+    assert index.is_dense(chunk_byte_length) is True
+
+
+def test_shard_index_is_dense_false_duplicate_offsets() -> None:
+    """Test is_dense returns False when chunks have duplicate offsets."""
+    index = _ShardIndex.create_empty((2,))
+    chunk_byte_length = 100
+
+    # Set both chunks to same offset (duplicate)
+    index.set_chunk_slice((0,), slice(0, 100))
+    index.set_chunk_slice((1,), slice(0, 100))
+
+    assert index.is_dense(chunk_byte_length) is False
+
+
+def test_shard_index_is_dense_false_wrong_alignment() -> None:
+    """Test is_dense returns False when chunks are not aligned to chunk_byte_length."""
+    index = _ShardIndex.create_empty((2,))
+    chunk_byte_length = 100
+
+    # Set chunks not aligned: [0-100), [150-250)
+    index.set_chunk_slice((0,), slice(0, 100))
+    index.set_chunk_slice((1,), slice(150, 250))
+
+    assert index.is_dense(chunk_byte_length) is False
+
+
+def test_shard_index_is_dense_with_empty_chunks() -> None:
+    """Test is_dense handles empty chunks correctly."""
+    index = _ShardIndex.create_empty((3,))
+    chunk_byte_length = 100
+
+    # Only set first and third chunk, skip middle
+    index.set_chunk_slice((0,), slice(0, 100))
+    # (1,) is empty
+    index.set_chunk_slice((2,), slice(100, 200))
+
+    # Should still be dense since only non-empty chunks are considered
+    assert index.is_dense(chunk_byte_length) is True
+
+
+# ============================================================================
+# _coalesce_chunks tests
+# ============================================================================
+
+
+def test_coalesce_chunks_empty_list() -> None:
+    """Test _coalesce_chunks returns empty list for empty input."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    result = codec._coalesce_chunks([], max_gap_bytes=100, coalesce_max_bytes=1000)
+    assert result == []
+
+
+def test_coalesce_chunks_single_chunk() -> None:
+    """Test _coalesce_chunks returns single group for single chunk."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunk = _ChunkCoordsByteSlice(chunk_coords=(0,), byte_slice=slice(0, 100))
+
+    result = codec._coalesce_chunks([chunk], max_gap_bytes=100, coalesce_max_bytes=1000)
+
+    assert len(result) == 1
+    assert len(result[0]) == 1
+    assert result[0][0] == chunk
+
+
+def test_coalesce_chunks_adjacent_small_gap() -> None:
+    """Test adjacent chunks with small gap are coalesced."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunk0 = _ChunkCoordsByteSlice(chunk_coords=(0,), byte_slice=slice(0, 100))
+    chunk1 = _ChunkCoordsByteSlice(chunk_coords=(1,), byte_slice=slice(110, 210))  # 10 byte gap
+
+    result = codec._coalesce_chunks([chunk0, chunk1], max_gap_bytes=20, coalesce_max_bytes=1000)
+
+    assert len(result) == 1
+    assert len(result[0]) == 2
+    assert result[0][0] == chunk0
+    assert result[0][1] == chunk1
+
+
+def test_coalesce_chunks_distant_large_gap() -> None:
+    """Test chunks with large gap are not coalesced."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunk0 = _ChunkCoordsByteSlice(chunk_coords=(0,), byte_slice=slice(0, 100))
+    chunk1 = _ChunkCoordsByteSlice(chunk_coords=(1,), byte_slice=slice(500, 600))  # 400 byte gap
+
+    result = codec._coalesce_chunks([chunk0, chunk1], max_gap_bytes=100, coalesce_max_bytes=1000)
+
+    assert len(result) == 2
+    assert result[0] == [chunk0]
+    assert result[1] == [chunk1]
+
+
+def test_coalesce_chunks_disabled_negative_gap() -> None:
+    """Test coalescing is disabled when max_gap_bytes is negative (like -1)."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunk0 = _ChunkCoordsByteSlice(chunk_coords=(0,), byte_slice=slice(0, 100))
+    chunk1 = _ChunkCoordsByteSlice(chunk_coords=(1,), byte_slice=slice(100, 200))  # Adjacent!
+
+    result = codec._coalesce_chunks([chunk0, chunk1], max_gap_bytes=-1, coalesce_max_bytes=1000)
+
+    # Even adjacent chunks should not be coalesced
+    assert len(result) == 2
+
+
+def test_coalesce_chunks_exceeds_max_bytes() -> None:
+    """Test chunks are split when total size exceeds coalesce_max_bytes."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunk0 = _ChunkCoordsByteSlice(chunk_coords=(0,), byte_slice=slice(0, 100))
+    chunk1 = _ChunkCoordsByteSlice(chunk_coords=(1,), byte_slice=slice(100, 200))
+    chunk2 = _ChunkCoordsByteSlice(chunk_coords=(2,), byte_slice=slice(200, 300))
+
+    # Total would be 300 bytes, but max is 250
+    result = codec._coalesce_chunks(
+        [chunk0, chunk1, chunk2], max_gap_bytes=100, coalesce_max_bytes=250
+    )
+
+    # First two chunks (200 bytes) should be coalesced, third separate
+    assert len(result) == 2
+    assert len(result[0]) == 2
+    assert result[0][0] == chunk0
+    assert result[0][1] == chunk1
+    assert result[1] == [chunk2]
+
+
+def test_coalesce_chunks_unsorted_input() -> None:
+    """Test chunks are sorted by byte_slice.start before coalescing."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunk0 = _ChunkCoordsByteSlice(chunk_coords=(0,), byte_slice=slice(200, 300))
+    chunk1 = _ChunkCoordsByteSlice(chunk_coords=(1,), byte_slice=slice(0, 100))
+    chunk2 = _ChunkCoordsByteSlice(chunk_coords=(2,), byte_slice=slice(100, 200))
+
+    # Input is out of order
+    result = codec._coalesce_chunks(
+        [chunk0, chunk1, chunk2], max_gap_bytes=100, coalesce_max_bytes=1000
+    )
+
+    # All should be coalesced and in sorted order
+    assert len(result) == 1
+    assert len(result[0]) == 3
+    assert result[0][0] == chunk1  # slice(0, 100)
+    assert result[0][1] == chunk2  # slice(100, 200)
+    assert result[0][2] == chunk0  # slice(200, 300)
+
+
+def test_coalesce_chunks_mixed_coalescing() -> None:
+    """Test mixed scenario with some chunks coalesced and some separate."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    # Group 1: chunks at 0-100, 100-200 (adjacent)
+    chunk0 = _ChunkCoordsByteSlice(chunk_coords=(0,), byte_slice=slice(0, 100))
+    chunk1 = _ChunkCoordsByteSlice(chunk_coords=(1,), byte_slice=slice(100, 200))
+    # Gap of 300 bytes
+    # Group 2: chunks at 500-600, 600-700 (adjacent)
+    chunk2 = _ChunkCoordsByteSlice(chunk_coords=(2,), byte_slice=slice(500, 600))
+    chunk3 = _ChunkCoordsByteSlice(chunk_coords=(3,), byte_slice=slice(600, 700))
+
+    result = codec._coalesce_chunks(
+        [chunk0, chunk1, chunk2, chunk3], max_gap_bytes=100, coalesce_max_bytes=1000
+    )
+
+    assert len(result) == 2
+    assert len(result[0]) == 2
+    assert result[0][0] == chunk0
+    assert result[0][1] == chunk1
+    assert len(result[1]) == 2
+    assert result[1][0] == chunk2
+    assert result[1][1] == chunk3
+
+
+def test_coalesce_chunks_boundary_gap_equals_max() -> None:
+    """Test boundary condition where gap equals max_gap_bytes (should NOT coalesce)."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunk0 = _ChunkCoordsByteSlice(chunk_coords=(0,), byte_slice=slice(0, 100))
+    chunk1 = _ChunkCoordsByteSlice(chunk_coords=(1,), byte_slice=slice(150, 250))  # 50 byte gap
+
+    # Gap is exactly max_gap_bytes, condition is `gap < max_gap_bytes` so should NOT coalesce
+    result = codec._coalesce_chunks([chunk0, chunk1], max_gap_bytes=50, coalesce_max_bytes=1000)
+
+    assert len(result) == 2
+
+
+def test_coalesce_chunks_boundary_gap_less_than_max() -> None:
+    """Test boundary condition where gap is just under max_gap_bytes (should coalesce)."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunk0 = _ChunkCoordsByteSlice(chunk_coords=(0,), byte_slice=slice(0, 100))
+    chunk1 = _ChunkCoordsByteSlice(chunk_coords=(1,), byte_slice=slice(149, 249))  # 49 byte gap
+
+    result = codec._coalesce_chunks([chunk0, chunk1], max_gap_bytes=50, coalesce_max_bytes=1000)
+
+    assert len(result) == 1
+
+
+# ============================================================================
+# _get_group_bytes tests
+# ============================================================================
+
+
+@dataclass
+class MockByteGetter:
+    """Mock ByteGetter for testing _get_group_bytes."""
+
+    data: bytes
+    return_none: bool = False
+
+    async def get(
+        self, prototype: BufferPrototype, byte_range: RangeByteRequest | None = None
+    ) -> Buffer | None:
+        if self.return_none:
+            return None
+        if byte_range is None:
+            return Buffer.from_bytes(self.data)
+        return Buffer.from_bytes(self.data[byte_range.start : byte_range.end])
+
+
+@pytest.mark.asyncio
+async def test_get_group_bytes_single_chunk() -> None:
+    """Test _get_group_bytes extracts single chunk correctly."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    data = b"0123456789" * 10  # 100 bytes
+    byte_getter = MockByteGetter(data=data)
+
+    chunk = _ChunkCoordsByteSlice(chunk_coords=(0,), byte_slice=slice(10, 30))
+    group = [chunk]
+
+    result = await codec._get_group_bytes(group, byte_getter, default_buffer_prototype())
+
+    assert result is not None
+    assert (0,) in result
+    assert result[(0,)].as_numpy_array().tobytes() == data[10:30]
+
+
+@pytest.mark.asyncio
+async def test_get_group_bytes_multiple_chunks() -> None:
+    """Test _get_group_bytes extracts multiple chunks with correct offsets."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    data = b"0123456789" * 10  # 100 bytes
+    byte_getter = MockByteGetter(data=data)
+
+    # Two chunks: [10, 30) and [30, 50)
+    chunk0 = _ChunkCoordsByteSlice(chunk_coords=(0,), byte_slice=slice(10, 30))
+    chunk1 = _ChunkCoordsByteSlice(chunk_coords=(1,), byte_slice=slice(30, 50))
+    group = [chunk0, chunk1]
+
+    result = await codec._get_group_bytes(group, byte_getter, default_buffer_prototype())
+
+    assert result is not None
+    assert len(result) == 2
+    assert result[(0,)].as_numpy_array().tobytes() == data[10:30]
+    assert result[(1,)].as_numpy_array().tobytes() == data[30:50]
+
+
+@pytest.mark.asyncio
+async def test_get_group_bytes_with_gap() -> None:
+    """Test _get_group_bytes handles chunks with gaps correctly."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    data = b"0123456789" * 10  # 100 bytes
+    byte_getter = MockByteGetter(data=data)
+
+    # Two chunks with a gap: [10, 20) and [40, 60)
+    chunk0 = _ChunkCoordsByteSlice(chunk_coords=(0,), byte_slice=slice(10, 20))
+    chunk1 = _ChunkCoordsByteSlice(chunk_coords=(1,), byte_slice=slice(40, 60))
+    group = [chunk0, chunk1]
+
+    result = await codec._get_group_bytes(group, byte_getter, default_buffer_prototype())
+
+    assert result is not None
+    assert len(result) == 2
+    # The byte_getter.get is called with range [10, 60), then sliced
+    assert result[(0,)].as_numpy_array().tobytes() == data[10:20]
+    assert result[(1,)].as_numpy_array().tobytes() == data[40:60]
+
+
+@pytest.mark.asyncio
+async def test_get_group_bytes_returns_none_on_failed_read() -> None:
+    """Test _get_group_bytes returns None when ByteGetter.get returns None."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    byte_getter = MockByteGetter(data=b"", return_none=True)
+
+    chunk = _ChunkCoordsByteSlice(chunk_coords=(0,), byte_slice=slice(0, 100))
+    group = [chunk]
+
+    result = await codec._get_group_bytes(group, byte_getter, default_buffer_prototype())
+
+    assert result is None
+
+
+# ============================================================================
+# _load_partial_shard_maybe tests
+# ============================================================================
+
+
+@dataclass
+class MockByteGetterWithIndex:
+    """Mock ByteGetter that can return a shard index and chunk data."""
+
+    index_data: bytes | None
+    chunk_data: bytes | None
+    call_count: int = 0
+
+    async def get(
+        self, prototype: BufferPrototype, byte_range: RangeByteRequest | None = None
+    ) -> Buffer | None:
+        self.call_count += 1
+        # First call is typically for the index
+        if self.call_count == 1:
+            if self.index_data is None:
+                return None
+            return Buffer.from_bytes(self.index_data)
+        # Subsequent calls are for chunk data
+        if self.chunk_data is None:
+            return None
+        if byte_range is None:
+            return Buffer.from_bytes(self.chunk_data)
+        return Buffer.from_bytes(self.chunk_data[byte_range.start : byte_range.end])
+
+
+@pytest.mark.asyncio
+async def test_load_partial_shard_maybe_index_load_fails() -> None:
+    """Test _load_partial_shard_maybe returns None when index load fails."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    byte_getter = MockByteGetterWithIndex(index_data=None, chunk_data=None)
+
+    chunks_per_shard = (2,)
+    all_chunk_coords = {(0,)}
+
+    result = await codec._load_partial_shard_maybe(
+        byte_getter=byte_getter,
+        prototype=default_buffer_prototype(),
+        chunks_per_shard=chunks_per_shard,
+        all_chunk_coords=all_chunk_coords,
+        max_gap_bytes=100,
+        coalesce_max_bytes=1000,
+        async_concurrency=1,
+    )
+
+    assert result is None
+
+
+@pytest.mark.asyncio
+async def test_load_partial_shard_maybe_with_empty_chunks(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Test _load_partial_shard_maybe skips chunks where get_chunk_slice returns None."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (4,)
+
+    # Create an index where chunk (1,) is empty (returns None from get_chunk_slice)
+    index = _ShardIndex.create_empty(chunks_per_shard)
+    index.set_chunk_slice((0,), slice(0, 100))
+    # (1,) is intentionally left empty
+    index.set_chunk_slice((2,), slice(100, 200))
+    index.set_chunk_slice((3,), slice(200, 300))
+
+    # Mock _load_shard_index_maybe on the class to return our custom index
+    async def mock_load_index(
+        self: ShardingCodec, byte_getter: MockByteGetter, cps: tuple[int, ...]
+    ) -> _ShardIndex:
+        return index
+
+    monkeypatch.setattr(ShardingCodec, "_load_shard_index_maybe", mock_load_index)
+
+    # Create byte getter with chunk data
+    chunk_data = b"x" * 300
+    byte_getter = MockByteGetter(data=chunk_data)
+
+    # Request chunks including the empty one
+    all_chunk_coords = {(0,), (1,), (2,)}
+
+    result = await codec._load_partial_shard_maybe(
+        byte_getter=byte_getter,
+        prototype=default_buffer_prototype(),
+        chunks_per_shard=chunks_per_shard,
+        all_chunk_coords=all_chunk_coords,
+        max_gap_bytes=1000,
+        coalesce_max_bytes=10000,
+        async_concurrency=1,
+    )
+
+    assert result is not None
+    # Only chunks (0,) and (2,) should be in result, (1,) is empty and skipped
+    assert (0,) in result
+    assert (1,) not in result  # Empty chunk should be skipped
+    assert (2,) in result
+
+
+@pytest.mark.asyncio
+async def test_load_partial_shard_maybe_all_chunks_empty(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Test _load_partial_shard_maybe returns empty dict when all requested chunks are empty."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (4,)
+
+    # Create an empty index (all chunks empty)
+    index = _ShardIndex.create_empty(chunks_per_shard)
+
+    # Mock _load_shard_index_maybe on the class to return our empty index
+    async def mock_load_index(
+        self: ShardingCodec, byte_getter: MockByteGetter, cps: tuple[int, ...]
+    ) -> _ShardIndex:
+        return index
+
+    monkeypatch.setattr(ShardingCodec, "_load_shard_index_maybe", mock_load_index)
+
+    byte_getter = MockByteGetter(data=b"")
+
+    # Request some chunks - all will be empty
+    all_chunk_coords = {(0,), (1,), (2,)}
+
+    result = await codec._load_partial_shard_maybe(
+        byte_getter=byte_getter,
+        prototype=default_buffer_prototype(),
+        chunks_per_shard=chunks_per_shard,
+        all_chunk_coords=all_chunk_coords,
+        max_gap_bytes=1000,
+        coalesce_max_bytes=10000,
+        async_concurrency=1,
+    )
+
+    assert result is not None
+    assert len(result) == 0  # All chunks were empty, so result is empty dict
+
+
+# ============================================================================
+# Supporting class tests (_ShardReader, _is_total_shard, _ChunkCoordsByteSlice)
+# ============================================================================
+
+
+def test_chunk_coords_byte_slice() -> None:
+    """Test _ChunkCoordsByteSlice dataclass."""
+    chunk = _ChunkCoordsByteSlice(chunk_coords=(1, 2, 3), byte_slice=slice(100, 200))
+
+    assert chunk.chunk_coords == (1, 2, 3)
+    assert chunk.byte_slice == slice(100, 200)
+    assert chunk.byte_slice.start == 100
+    assert chunk.byte_slice.stop == 200
+
+
+def test_shard_reader_create_empty() -> None:
+    """Test _ShardReader.create_empty creates reader with empty index."""
+    chunks_per_shard = (2, 3)
+    reader = _ShardReader.create_empty(chunks_per_shard)
+
+    assert reader.index.is_all_empty()
+    assert len(reader.buf) == 0
+    assert len(reader) == 6  # 2 * 3
+
+
+def test_shard_reader_iteration() -> None:
+    """Test _ShardReader iteration yields all chunk coordinates."""
+    chunks_per_shard = (2, 2)
+    reader = _ShardReader.create_empty(chunks_per_shard)
+
+    coords = list(reader)
+
+    assert len(coords) == 4
+    assert (0, 0) in coords
+    assert (0, 1) in coords
+    assert (1, 0) in coords
+    assert (1, 1) in coords
+
+
+def test_shard_reader_getitem_raises_for_empty() -> None:
+    """Test _ShardReader.__getitem__ raises KeyError for empty chunks."""
+    chunks_per_shard = (2,)
+    reader = _ShardReader.create_empty(chunks_per_shard)
+
+    with pytest.raises(KeyError):
+        _ = reader[(0,)]
+
+
+def test_is_total_shard_full() -> None:
+    """Test _is_total_shard returns True when all chunk coords are present."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (2, 2)
+    all_chunk_coords = {(0, 0), (0, 1), (1, 0), (1, 1)}
+
+    assert codec._is_total_shard(all_chunk_coords, chunks_per_shard) is True
+
+
+def test_is_total_shard_partial() -> None:
+    """Test _is_total_shard returns False for partial chunk coords."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (2, 2)
+    all_chunk_coords = {(0, 0), (1, 1)}  # Missing (0, 1) and (1, 0)
+
+    assert codec._is_total_shard(all_chunk_coords, chunks_per_shard) is False
+
+
+def test_is_total_shard_empty() -> None:
+    """Test _is_total_shard returns False for empty chunk coords."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (2, 2)
+    all_chunk_coords: set[tuple[int, ...]] = set()
+
+    assert codec._is_total_shard(all_chunk_coords, chunks_per_shard) is False
+
+
+def test_is_total_shard_1d() -> None:
+    """Test _is_total_shard works with 1D shards."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (4,)
+    all_chunk_coords = {(0,), (1,), (2,), (3,)}
+
+    assert codec._is_total_shard(all_chunk_coords, chunks_per_shard) is True
+
+    # Partial
+    partial_coords = {(0,), (2,)}
+    assert codec._is_total_shard(partial_coords, chunks_per_shard) is False

From 6437fb6acb48c14a01d94188d43a0b8276945c00 Mon Sep 17 00:00:00 2001
From: Alden Keefe Sampson <alden@dynamical.org>
Date: Mon, 19 Jan 2026 12:50:54 -0500
Subject: [PATCH 21/21] Fix typing errors in test_sharing_unit.py

---
 tests/test_codecs/test_sharding_unit.py | 57 ++++++++++++++-----------
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/tests/test_codecs/test_sharding_unit.py b/tests/test_codecs/test_sharding_unit.py
index 32d3e4b4af..cc5df22b67 100644
--- a/tests/test_codecs/test_sharding_unit.py
+++ b/tests/test_codecs/test_sharding_unit.py
@@ -17,7 +17,7 @@
 from zarr.core.buffer.cpu import Buffer
 
 if TYPE_CHECKING:
-    from zarr.abc.store import RangeByteRequest
+    from zarr.abc.store import ByteRequest
     from zarr.core.buffer import BufferPrototype
 
 
@@ -349,16 +349,18 @@ class MockByteGetter:
     return_none: bool = False
 
     async def get(
-        self, prototype: BufferPrototype, byte_range: RangeByteRequest | None = None
+        self, prototype: BufferPrototype, byte_range: ByteRequest | None = None
     ) -> Buffer | None:
         if self.return_none:
             return None
         if byte_range is None:
             return Buffer.from_bytes(self.data)
-        return Buffer.from_bytes(self.data[byte_range.start : byte_range.end])
+        # For RangeByteRequest, extract start and end
+        start = getattr(byte_range, "start", 0)
+        end = getattr(byte_range, "end", len(self.data))
+        return Buffer.from_bytes(self.data[start:end])
 
 
-@pytest.mark.asyncio
 async def test_get_group_bytes_single_chunk() -> None:
     """Test _get_group_bytes extracts single chunk correctly."""
     codec = ShardingCodec(chunk_shape=(8,))
@@ -372,10 +374,11 @@ async def test_get_group_bytes_single_chunk() -> None:
 
     assert result is not None
     assert (0,) in result
-    assert result[(0,)].as_numpy_array().tobytes() == data[10:30]
+    chunk_buf = result[(0,)]
+    assert chunk_buf is not None
+    assert chunk_buf.as_numpy_array().tobytes() == data[10:30]
 
 
-@pytest.mark.asyncio
 async def test_get_group_bytes_multiple_chunks() -> None:
     """Test _get_group_bytes extracts multiple chunks with correct offsets."""
     codec = ShardingCodec(chunk_shape=(8,))
@@ -391,11 +394,14 @@ async def test_get_group_bytes_multiple_chunks() -> None:
 
     assert result is not None
     assert len(result) == 2
-    assert result[(0,)].as_numpy_array().tobytes() == data[10:30]
-    assert result[(1,)].as_numpy_array().tobytes() == data[30:50]
+    chunk0_buf = result[(0,)]
+    chunk1_buf = result[(1,)]
+    assert chunk0_buf is not None
+    assert chunk1_buf is not None
+    assert chunk0_buf.as_numpy_array().tobytes() == data[10:30]
+    assert chunk1_buf.as_numpy_array().tobytes() == data[30:50]
 
 
-@pytest.mark.asyncio
 async def test_get_group_bytes_with_gap() -> None:
     """Test _get_group_bytes handles chunks with gaps correctly."""
     codec = ShardingCodec(chunk_shape=(8,))
@@ -412,11 +418,14 @@ async def test_get_group_bytes_with_gap() -> None:
     assert result is not None
     assert len(result) == 2
     # The byte_getter.get is called with range [10, 60), then sliced
-    assert result[(0,)].as_numpy_array().tobytes() == data[10:20]
-    assert result[(1,)].as_numpy_array().tobytes() == data[40:60]
+    chunk0_buf = result[(0,)]
+    chunk1_buf = result[(1,)]
+    assert chunk0_buf is not None
+    assert chunk1_buf is not None
+    assert chunk0_buf.as_numpy_array().tobytes() == data[10:20]
+    assert chunk1_buf.as_numpy_array().tobytes() == data[40:60]
 
 
-@pytest.mark.asyncio
 async def test_get_group_bytes_returns_none_on_failed_read() -> None:
     """Test _get_group_bytes returns None when ByteGetter.get returns None."""
     codec = ShardingCodec(chunk_shape=(8,))
@@ -444,7 +453,7 @@ class MockByteGetterWithIndex:
     call_count: int = 0
 
     async def get(
-        self, prototype: BufferPrototype, byte_range: RangeByteRequest | None = None
+        self, prototype: BufferPrototype, byte_range: ByteRequest | None = None
     ) -> Buffer | None:
         self.call_count += 1
         # First call is typically for the index
@@ -457,17 +466,19 @@ async def get(
             return None
         if byte_range is None:
             return Buffer.from_bytes(self.chunk_data)
-        return Buffer.from_bytes(self.chunk_data[byte_range.start : byte_range.end])
+        # For RangeByteRequest, extract start and end
+        start = getattr(byte_range, "start", 0)
+        end = getattr(byte_range, "end", len(self.chunk_data))
+        return Buffer.from_bytes(self.chunk_data[start:end])
 
 
-@pytest.mark.asyncio
 async def test_load_partial_shard_maybe_index_load_fails() -> None:
     """Test _load_partial_shard_maybe returns None when index load fails."""
     codec = ShardingCodec(chunk_shape=(8,))
     byte_getter = MockByteGetterWithIndex(index_data=None, chunk_data=None)
 
     chunks_per_shard = (2,)
-    all_chunk_coords = {(0,)}
+    all_chunk_coords: set[tuple[int, ...]] = {(0,)}
 
     result = await codec._load_partial_shard_maybe(
         byte_getter=byte_getter,
@@ -482,7 +493,6 @@ async def test_load_partial_shard_maybe_index_load_fails() -> None:
     assert result is None
 
 
-@pytest.mark.asyncio
 async def test_load_partial_shard_maybe_with_empty_chunks(
     monkeypatch: pytest.MonkeyPatch,
 ) -> None:
@@ -510,7 +520,7 @@ async def mock_load_index(
     byte_getter = MockByteGetter(data=chunk_data)
 
     # Request chunks including the empty one
-    all_chunk_coords = {(0,), (1,), (2,)}
+    all_chunk_coords: set[tuple[int, ...]] = {(0,), (1,), (2,)}
 
     result = await codec._load_partial_shard_maybe(
         byte_getter=byte_getter,
@@ -529,7 +539,6 @@ async def mock_load_index(
     assert (2,) in result
 
 
-@pytest.mark.asyncio
 async def test_load_partial_shard_maybe_all_chunks_empty(
     monkeypatch: pytest.MonkeyPatch,
 ) -> None:
@@ -551,7 +560,7 @@ async def mock_load_index(
     byte_getter = MockByteGetter(data=b"")
 
     # Request some chunks - all will be empty
-    all_chunk_coords = {(0,), (1,), (2,)}
+    all_chunk_coords: set[tuple[int, ...]] = {(0,), (1,), (2,)}
 
     result = await codec._load_partial_shard_maybe(
         byte_getter=byte_getter,
@@ -619,7 +628,7 @@ def test_is_total_shard_full() -> None:
     """Test _is_total_shard returns True when all chunk coords are present."""
     codec = ShardingCodec(chunk_shape=(8,))
     chunks_per_shard = (2, 2)
-    all_chunk_coords = {(0, 0), (0, 1), (1, 0), (1, 1)}
+    all_chunk_coords: set[tuple[int, ...]] = {(0, 0), (0, 1), (1, 0), (1, 1)}
 
     assert codec._is_total_shard(all_chunk_coords, chunks_per_shard) is True
 
@@ -628,7 +637,7 @@ def test_is_total_shard_partial() -> None:
     """Test _is_total_shard returns False for partial chunk coords."""
     codec = ShardingCodec(chunk_shape=(8,))
     chunks_per_shard = (2, 2)
-    all_chunk_coords = {(0, 0), (1, 1)}  # Missing (0, 1) and (1, 0)
+    all_chunk_coords: set[tuple[int, ...]] = {(0, 0), (1, 1)}  # Missing (0, 1) and (1, 0)
 
     assert codec._is_total_shard(all_chunk_coords, chunks_per_shard) is False
 
@@ -646,10 +655,10 @@ def test_is_total_shard_1d() -> None:
     """Test _is_total_shard works with 1D shards."""
     codec = ShardingCodec(chunk_shape=(8,))
     chunks_per_shard = (4,)
-    all_chunk_coords = {(0,), (1,), (2,), (3,)}
+    all_chunk_coords: set[tuple[int, ...]] = {(0,), (1,), (2,), (3,)}
 
     assert codec._is_total_shard(all_chunk_coords, chunks_per_shard) is True
 
     # Partial
-    partial_coords = {(0,), (2,)}
+    partial_coords: set[tuple[int, ...]] = {(0,), (2,)}
     assert codec._is_total_shard(partial_coords, chunks_per_shard) is False