Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
44b4473
Begin examining how to best add structured array support to Zarr v3 d…
BrianMichell Nov 24, 2025
e6df164
Merge branch 'google:master' into v3_structs
BrianMichell Nov 24, 2025
187f424
Updates to have proper reads
BrianMichell Nov 24, 2025
c2e73cd
Local testing and examples
BrianMichell Nov 24, 2025
9e8ed94
Begin adding support for opening struct arrays as void and add suppor…
BrianMichell Nov 25, 2025
44c765e
Fix failing tests
BrianMichell Nov 25, 2025
547642d
Resolve issues with opening struct as void
BrianMichell Nov 25, 2025
2a4c3d8
Remove debug print
BrianMichell Nov 26, 2025
b0abb94
Add field for open as void
BrianMichell Dec 2, 2025
fff0a5b
Add a shim for new open_as_void flag open option
BrianMichell Dec 3, 2025
b6c24f9
Revert some formatting changes
BrianMichell Dec 3, 2025
488b160
revert gitignore changes
BrianMichell Dec 3, 2025
537d309
Merge pull request #1 from BrianMichell/v3_structs_compatibility
BrianMichell Dec 3, 2025
54941a0
V3 structs remove shim (#2)
BrianMichell Dec 3, 2025
c9f58f9
Fix structured fill value population
BrianMichell Dec 3, 2025
7655cfd
V3 examples merge (#3)
BrianMichell Dec 4, 2025
8c4c4ca
Remove vestigial example build
BrianMichell Dec 4, 2025
4b590f8
V3 structs fix fills (#4)
BrianMichell Dec 4, 2025
7691c83
Merge branch 'google:master' into v3_structs
BrianMichell Dec 4, 2025
c0082a0
Add new options to schema
BrianMichell Dec 4, 2025
9a46c82
Fix copyright header date
BrianMichell Dec 4, 2025
b9b5e41
Cleanup (#5)
BrianMichell Dec 4, 2025
d5f6201
Merge zarr3 structured dtype support
BrianMichell Jan 5, 2026
31e55ec
Remove default values
BrianMichell Jan 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions tensorstore/driver/zarr3/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ tensorstore_cc_library(

tensorstore_cc_library(
name = "metadata",
srcs = ["metadata.cc"],
hdrs = ["metadata.h"],
srcs = ["metadata.cc", "dtype.cc"],
hdrs = ["metadata.h", "dtype.h"],
deps = [
":default_nan",
":name_configuration_json_binder",
Expand Down Expand Up @@ -145,6 +145,23 @@ tensorstore_cc_library(
],
)

tensorstore_cc_test(
name = "dtype_test",
size = "small",
srcs = ["dtype_test.cc"],
deps = [
":metadata",
"//tensorstore:data_type",
"//tensorstore:index",
"//tensorstore/internal/testing:json_gtest",
"//tensorstore/util:status_testutil",
"//tensorstore/util:str_cat",
"@abseil-cpp//absl/status",
"@googletest//:gtest_main",
"@nlohmann_json//:json",
],
)

tensorstore_cc_test(
name = "driver_test",
size = "small",
Expand Down
87 changes: 72 additions & 15 deletions tensorstore/driver/zarr3/chunk_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstring>
#include <functional>
#include <memory>
#include <string>
Expand Down Expand Up @@ -73,15 +75,19 @@ ZarrChunkCache::~ZarrChunkCache() = default;

ZarrLeafChunkCache::ZarrLeafChunkCache(
kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
internal::CachePool::WeakPtr /*data_cache_pool*/)
: Base(std::move(store)), codec_state_(std::move(codec_state)) {}
ZarrDType dtype, internal::CachePool::WeakPtr /*data_cache_pool*/,
bool open_as_void)
: Base(std::move(store)),
codec_state_(std::move(codec_state)),
dtype_(std::move(dtype)),
open_as_void_(open_as_void) {}

void ZarrLeafChunkCache::Read(ZarrChunkCache::ReadRequest request,
AnyFlowReceiver<absl::Status, internal::ReadChunk,
IndexTransform<>>&& receiver) {
return internal::ChunkCache::Read(
{static_cast<internal::DriverReadRequest&&>(request),
/*component_index=*/0, request.staleness_bound,
request.component_index, request.staleness_bound,
request.fill_missing_data_reads},
std::move(receiver));
}
Expand All @@ -92,7 +98,7 @@ void ZarrLeafChunkCache::Write(
receiver) {
return internal::ChunkCache::Write(
{static_cast<internal::DriverWriteRequest&&>(request),
/*component_index=*/0, request.store_data_equal_to_fill_value},
request.component_index, request.store_data_equal_to_fill_value},
std::move(receiver));
}

Expand Down Expand Up @@ -149,12 +155,59 @@ std::string ZarrLeafChunkCache::GetChunkStorageKey(
Result<absl::InlinedVector<SharedArray<const void>, 1>>
ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
absl::Cord data) {
const size_t num_fields = dtype_.fields.size();
absl::InlinedVector<SharedArray<const void>, 1> field_arrays(num_fields);

// Special case: void access - return raw bytes directly
if (open_as_void_) {
TENSORSTORE_ASSIGN_OR_RETURN(
field_arrays[0], codec_state_->DecodeArray(grid().components[0].shape(),
std::move(data)));
return field_arrays;
}

// For single non-structured field, decode directly
if (num_fields == 1 && dtype_.fields[0].outer_shape.empty()) {
TENSORSTORE_ASSIGN_OR_RETURN(
field_arrays[0], codec_state_->DecodeArray(grid().components[0].shape(),
std::move(data)));
return field_arrays;
}

// For structured types, decode byte array then extract fields
// Build decode shape: [chunk_dims..., bytes_per_outer_element]
const auto& chunk_shape = grid().chunk_shape;
std::vector<Index> decode_shape(chunk_shape.begin(), chunk_shape.end());
decode_shape.push_back(dtype_.bytes_per_outer_element);

TENSORSTORE_ASSIGN_OR_RETURN(
auto array,
codec_state_->DecodeArray(grid().components[0].shape(), std::move(data)));
absl::InlinedVector<SharedArray<const void>, 1> components;
components.push_back(std::move(array));
return components;
auto byte_array, codec_state_->DecodeArray(decode_shape, std::move(data)));

// Extract each field from the byte array
const Index num_elements = byte_array.num_elements() /
dtype_.bytes_per_outer_element;
const auto* src_bytes = static_cast<const std::byte*>(byte_array.data());

for (size_t field_i = 0; field_i < num_fields; ++field_i) {
const auto& field = dtype_.fields[field_i];
// Use the component's shape (from the grid) for the result array
const auto& component_shape = grid().components[field_i].shape();
auto result_array =
AllocateArray(component_shape, c_order, default_init, field.dtype);
auto* dst = static_cast<std::byte*>(result_array.data());
const Index field_size = field.dtype->size;

// Copy field data from each struct element
for (Index i = 0; i < num_elements; ++i) {
std::memcpy(dst + i * field_size,
src_bytes + i * dtype_.bytes_per_outer_element +
field.byte_offset,
field_size);
}
field_arrays[field_i] = std::move(result_array);
}

return field_arrays;
}

Result<absl::Cord> ZarrLeafChunkCache::EncodeChunk(
Expand All @@ -170,10 +223,13 @@ kvstore::Driver* ZarrLeafChunkCache::GetKvStoreDriver() {

ZarrShardedChunkCache::ZarrShardedChunkCache(
kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
internal::CachePool::WeakPtr data_cache_pool)
ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool,
bool open_as_void)
: base_kvstore_(std::move(store)),
codec_state_(std::move(codec_state)),
data_cache_pool_(std::move(data_cache_pool)) {}
dtype_(std::move(dtype)),
data_cache_pool_(std::move(data_cache_pool)),
open_as_void_(open_as_void) {}

Result<IndexTransform<>> TranslateCellToSourceTransformForShard(
IndexTransform<> transform, span<const Index> grid_cell_indices,
Expand Down Expand Up @@ -326,6 +382,7 @@ void ZarrShardedChunkCache::Read(
*this, std::move(request.transform), std::move(receiver),
[transaction = std::move(request.transaction),
batch = std::move(request.batch),
component_index = request.component_index,
staleness_bound = request.staleness_bound,
fill_missing_data_reads = request.fill_missing_data_reads](auto entry) {
Batch shard_batch = batch;
Expand All @@ -339,8 +396,7 @@ void ZarrShardedChunkCache::Read(
IndexTransform<>>&& receiver) {
entry->sub_chunk_cache.get()->Read(
{{transaction, std::move(transform), shard_batch},
staleness_bound,
fill_missing_data_reads},
component_index, staleness_bound, fill_missing_data_reads},
std::move(receiver));
};
});
Expand All @@ -354,6 +410,7 @@ void ZarrShardedChunkCache::Write(
&ZarrArrayToArrayCodec::PreparedState::Write>(
*this, std::move(request.transform), std::move(receiver),
[transaction = std::move(request.transaction),
component_index = request.component_index,
store_data_equal_to_fill_value =
request.store_data_equal_to_fill_value](auto entry) {
internal::OpenTransactionPtr shard_transaction = transaction;
Expand All @@ -366,7 +423,7 @@ void ZarrShardedChunkCache::Write(
AnyFlowReceiver<absl::Status, internal::WriteChunk,
IndexTransform<>>&& receiver) {
entry->sub_chunk_cache.get()->Write(
{{shard_transaction, std::move(transform)},
{{shard_transaction, std::move(transform)}, component_index,
store_data_equal_to_fill_value},
std::move(receiver));
};
Expand Down Expand Up @@ -481,7 +538,7 @@ void ZarrShardedChunkCache::Entry::DoInitialize() {
*sharding_state.sub_chunk_codec_chain,
std::move(sharding_kvstore), cache.executor(),
ZarrShardingCodec::PreparedState::Ptr(&sharding_state),
cache.data_cache_pool_);
cache.dtype_, cache.data_cache_pool_, cache.open_as_void_);
zarr_chunk_cache = new_cache.release();
return std::unique_ptr<internal::Cache>(&zarr_chunk_cache->cache());
})
Expand Down
21 changes: 17 additions & 4 deletions tensorstore/driver/zarr3/chunk_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "tensorstore/driver/read_request.h"
#include "tensorstore/driver/write_request.h"
#include "tensorstore/driver/zarr3/codec/codec.h"
#include "tensorstore/driver/zarr3/dtype.h"
#include "tensorstore/index.h"
#include "tensorstore/index_space/index_transform.h"
#include "tensorstore/internal/cache/cache.h"
Expand Down Expand Up @@ -72,6 +73,7 @@ class ZarrChunkCache {
virtual const Executor& executor() const = 0;

struct ReadRequest : internal::DriverReadRequest {
size_t component_index = 0;
absl::Time staleness_bound;
bool fill_missing_data_reads;
};
Expand All @@ -81,6 +83,7 @@ class ZarrChunkCache {
IndexTransform<>>&& receiver) = 0;

struct WriteRequest : internal::DriverWriteRequest {
size_t component_index = 0;
bool store_data_equal_to_fill_value;
};

Expand Down Expand Up @@ -154,7 +157,9 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,

explicit ZarrLeafChunkCache(kvstore::DriverPtr store,
ZarrCodecChain::PreparedState::Ptr codec_state,
internal::CachePool::WeakPtr data_cache_pool);
ZarrDType dtype,
internal::CachePool::WeakPtr data_cache_pool,
bool open_as_void);

void Read(ZarrChunkCache::ReadRequest request,
AnyFlowReceiver<absl::Status, internal::ReadChunk,
Expand All @@ -181,6 +186,8 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
kvstore::Driver* GetKvStoreDriver() override;

ZarrCodecChain::PreparedState::Ptr codec_state_;
ZarrDType dtype_;
bool open_as_void_;
};

/// Chunk cache for a Zarr array where each chunk is a shard.
Expand All @@ -190,7 +197,9 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
public:
explicit ZarrShardedChunkCache(kvstore::DriverPtr store,
ZarrCodecChain::PreparedState::Ptr codec_state,
internal::CachePool::WeakPtr data_cache_pool);
ZarrDType dtype,
internal::CachePool::WeakPtr data_cache_pool,
bool open_as_void);

const ZarrShardingCodec::PreparedState& sharding_codec_state() const {
return static_cast<const ZarrShardingCodec::PreparedState&>(
Expand Down Expand Up @@ -239,6 +248,8 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {

kvstore::DriverPtr base_kvstore_;
ZarrCodecChain::PreparedState::Ptr codec_state_;
ZarrDType dtype_;
bool open_as_void_;

// Data cache pool, if it differs from `this->pool()` (which is equal to the
// metadata cache pool).
Expand All @@ -253,11 +264,13 @@ class ZarrShardSubChunkCache : public ChunkCacheImpl {
explicit ZarrShardSubChunkCache(
kvstore::DriverPtr store, Executor executor,
ZarrShardingCodec::PreparedState::Ptr sharding_state,
internal::CachePool::WeakPtr data_cache_pool)
ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool,
bool open_as_void)
: ChunkCacheImpl(std::move(store),
ZarrCodecChain::PreparedState::Ptr(
sharding_state->sub_chunk_codec_state),
std::move(data_cache_pool)),
std::move(dtype), std::move(data_cache_pool),
open_as_void),
sharding_state_(std::move(sharding_state)),
executor_(std::move(executor)) {}

Expand Down
Loading