Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
name: CI

on:
push:
branches: [main]
pull_request:
branches: [main]

jobs:
go:
name: Go
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: '1.23'
- run: cd go && go build ./shard/...
- run: cd go && go test ./shard/...

python:
name: Python
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- run: cd py && pip install -e ".[test]"
- run: cd py && pytest tests/

typescript:
name: TypeScript
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '20'
- run: cd ts && npm ci
- run: cd ts && npm test

rust:
name: Rust
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- run: cd rs && cargo build
- run: cd rs && cargo test

c:
name: C
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: sudo apt-get update && sudo apt-get install -y libzstd-dev liblz4-dev
- run: cd c && make clean && make test

publish-gate:
name: Publish Gate
needs: [go, python, typescript, rust, c]
runs-on: ubuntu-latest
if: always()
steps:
- name: Check all language tests passed
run: |
if [[ "${{ needs.go.result }}" != "success" || \
"${{ needs.python.result }}" != "success" || \
"${{ needs.typescript.result }}" != "success" || \
"${{ needs.rust.result }}" != "success" || \
"${{ needs.c.result }}" != "success" ]]; then
echo "One or more language test suites failed:"
echo " Go: ${{ needs.go.result }}"
echo " Python: ${{ needs.python.result }}"
echo " TypeScript: ${{ needs.typescript.result }}"
echo " Rust: ${{ needs.rust.result }}"
echo " C: ${{ needs.c.result }}"
exit 1
fi
echo "All 5 language test suites passed. Safe to publish."
34 changes: 28 additions & 6 deletions c/shard_v2.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,21 @@ static inline void write_le64(uint8_t* p, uint64_t v) {
p[7] = (uint8_t)(v >> 56);
}

static bool checked_u64_add(uint64_t a, uint64_t b, uint64_t* out) {
if (!out) return false;
if (UINT64_MAX - a < b) return false;
*out = a + b;
return true;
}

static bool range_within_size(size_t total, uint64_t offset, uint64_t len, uint64_t* end_out) {
uint64_t end = 0;
if (!checked_u64_add(offset, len, &end)) return false;
if (end > (uint64_t)total) return false;
if (end_out) *end_out = end;
return true;
}

/* ============================================================
* CRC32C (Castagnoli, polynomial 0x82F63B78)
* ============================================================ */
Expand Down Expand Up @@ -358,6 +373,9 @@ static shard_v2_reader_t* reader_parse(shard_v2_reader_t* r) {
if (r->header.total_file_size != (uint64_t)r->data_len) {
goto fail;
}
if (r->header.schema_offset > (uint64_t)r->data_len) {
goto fail;
}

n = r->header.entry_count;
r->entry_count = n;
Expand Down Expand Up @@ -389,8 +407,11 @@ static shard_v2_reader_t* reader_parse(shard_v2_reader_t* r) {
for (uint32_t i = 0; i < n; i++) {
uint32_t name_off = r->entries[i].name_offset;
uint16_t name_len = r->entries[i].name_len;
uint64_t abs_off = st_off + name_off;
if (abs_off + name_len > r->data_len) goto fail;
uint64_t abs_off = 0;
if (!checked_u64_add(st_off, name_off, &abs_off) ||
!range_within_size(r->data_len, abs_off, name_len, NULL)) {
goto fail;
}
r->names[i] = (char*)malloc(name_len + 1);
if (!r->names[i]) goto fail;
memcpy(r->names[i], r->data + abs_off, name_len);
Expand All @@ -404,10 +425,11 @@ static shard_v2_reader_t* reader_parse(shard_v2_reader_t* r) {
for (uint32_t i = 0; i < n; i++) {
uint64_t off = r->entries[i].data_offset;
uint64_t size = r->entries[i].disk_size;
uint64_t end = 0;
if (off < ds_off) goto fail;
if (off + size > (uint64_t)r->data_len) goto fail;
if (!range_within_size(r->data_len, off, size, &end)) goto fail;
if (i > 0 && off < prev_end) goto fail;
prev_end = off + size;
prev_end = end;
}
}

Expand Down Expand Up @@ -606,7 +628,7 @@ const uint8_t* shard_v2_read_entry(const shard_v2_reader_t* r, uint32_t i, size_
if (!r || i >= r->entry_count) return NULL;
const shard_v2_index_entry_t* e = &r->entries[i];

if (e->data_offset + e->disk_size > r->data_len) return NULL;
if (!range_within_size(r->data_len, e->data_offset, e->disk_size, NULL)) return NULL;

if (shard_v2_is_compressed(e)) {
/* Compressed path: decompress into cache on first access. */
Expand Down Expand Up @@ -691,7 +713,7 @@ const uint8_t* shard_v2_read_entry_prefix(const shard_v2_reader_t* r, uint32_t i
if (shard_v2_is_compressed(e)) return NULL;
size_t sz = (size_t)e->disk_size;
if (max_bytes < sz) sz = max_bytes;
if (e->data_offset + sz > r->data_len) return NULL;
if (!range_within_size(r->data_len, e->data_offset, sz, NULL)) return NULL;
if (out_size) *out_size = sz;
return r->data + e->data_offset;
}
Expand Down
40 changes: 24 additions & 16 deletions c/test_metadata.c
Original file line number Diff line number Diff line change
Expand Up @@ -263,18 +263,18 @@ TEST(join_path_multi) {
* list_children tests
* ============================================================ */

static shard_v2_reader_t* make_layered_reader(void) {
/* make_layered_reader returns reader + buffer. Caller must free buf AFTER closing reader,
* because shard_v2_from_buffer borrows the pointer (owns_buf=false). */
static shard_v2_reader_t* make_layered_reader(uint8_t** out_buf) {
test_entry_t entries[] = {
{"layer.0/weight", (const uint8_t*)"w0", 2},
{"layer.0/bias", (const uint8_t*)"b0", 2},
{"layer.1/weight", (const uint8_t*)"w1", 2},
{"embed", (const uint8_t*)"tok", 3},
};
size_t len;
uint8_t* buf = build_shard(entries, 4, 64, &len);
shard_v2_reader_t* r = shard_v2_from_buffer(buf, len);
free(buf);
return r;
*out_buf = build_shard(entries, 4, 64, &len);
return shard_v2_from_buffer(*out_buf, len);
}

/* Helper: check that array `arr` of `n` strings contains `needle`. */
Expand All @@ -286,7 +286,8 @@ static bool arr_contains(char** arr, uint32_t n, const char* needle) {
}

TEST(list_children_exact_prefix_with_slash) {
shard_v2_reader_t* r = make_layered_reader();
uint8_t* buf;
shard_v2_reader_t* r = make_layered_reader(&buf);
assert(r != NULL);
uint32_t count = 0;
char** children = shard_v2_list_children(r, "layer.0/", &count);
Expand All @@ -296,10 +297,12 @@ TEST(list_children_exact_prefix_with_slash) {
assert(arr_contains(children, count, "layer.0/bias"));
shard_v2_list_children_free(children, count);
shard_v2_close(r);
free(buf);
}

TEST(list_children_empty_prefix_returns_top_level) {
shard_v2_reader_t* r = make_layered_reader();
uint8_t* buf;
shard_v2_reader_t* r = make_layered_reader(&buf);
assert(r != NULL);
uint32_t count = 0;
char** children = shard_v2_list_children(r, "", &count);
Expand All @@ -311,10 +314,12 @@ TEST(list_children_empty_prefix_returns_top_level) {
assert(count == 3);
shard_v2_list_children_free(children, count);
shard_v2_close(r);
free(buf);
}

TEST(list_children_partial_prefix) {
shard_v2_reader_t* r = make_layered_reader();
uint8_t* buf;
shard_v2_reader_t* r = make_layered_reader(&buf);
assert(r != NULL);
uint32_t count = 0;
char** children = shard_v2_list_children(r, "layer.", &count);
Expand All @@ -324,17 +329,20 @@ TEST(list_children_partial_prefix) {
assert(arr_contains(children, count, "layer.1/"));
shard_v2_list_children_free(children, count);
shard_v2_close(r);
free(buf);
}

TEST(list_children_nonexistent_prefix) {
shard_v2_reader_t* r = make_layered_reader();
uint8_t* buf;
shard_v2_reader_t* r = make_layered_reader(&buf);
assert(r != NULL);
uint32_t count = 0;
char** children = shard_v2_list_children(r, "nonexistent/", &count);
assert(children != NULL);
assert(count == 0);
shard_v2_list_children_free(children, count);
shard_v2_close(r);
free(buf);
}

TEST(list_children_deduplicated_directories) {
Expand All @@ -346,7 +354,6 @@ TEST(list_children_deduplicated_directories) {
size_t len;
uint8_t* buf = build_shard(entries, 3, 64, &len);
shard_v2_reader_t* r = shard_v2_from_buffer(buf, len);
free(buf);
assert(r != NULL);

uint32_t count = 0;
Expand All @@ -360,6 +367,7 @@ TEST(list_children_deduplicated_directories) {
assert(a_count == 1);
shard_v2_list_children_free(children, count);
shard_v2_close(r);
free(buf);
}

TEST(list_children_hierarchical_three_levels) {
Expand All @@ -372,7 +380,6 @@ TEST(list_children_hierarchical_three_levels) {
size_t len;
uint8_t* buf = build_shard(entries, 4, 64, &len);
shard_v2_reader_t* r = shard_v2_from_buffer(buf, len);
free(buf);
assert(r != NULL);

/* Top-level */
Expand All @@ -399,6 +406,7 @@ TEST(list_children_hierarchical_three_levels) {
shard_v2_list_children_free(under_ab, count);

shard_v2_close(r);
free(buf);
}

/* ============================================================
Expand All @@ -415,7 +423,6 @@ TEST(read_entry_prefix_first_n_bytes) {
size_t buf_len;
uint8_t* buf = build_shard(entries, 1, 64, &buf_len);
shard_v2_reader_t* r = shard_v2_from_buffer(buf, buf_len);
free(buf);
assert(r != NULL);

size_t out_size = 0;
Expand All @@ -424,6 +431,7 @@ TEST(read_entry_prefix_first_n_bytes) {
assert(out_size == 5);
assert(memcmp(got, "Hello", 5) == 0);
shard_v2_close(r);
free(buf);
}

TEST(read_entry_prefix_full_entry) {
Expand All @@ -433,7 +441,6 @@ TEST(read_entry_prefix_full_entry) {
size_t buf_len;
uint8_t* buf = build_shard(entries, 1, 64, &buf_len);
shard_v2_reader_t* r = shard_v2_from_buffer(buf, buf_len);
free(buf);
assert(r != NULL);

size_t out_size = 0;
Expand All @@ -442,6 +449,7 @@ TEST(read_entry_prefix_full_entry) {
assert(out_size == PAYLOAD_LEN);
assert(memcmp(got, PAYLOAD, PAYLOAD_LEN) == 0);
shard_v2_close(r);
free(buf);
}

TEST(read_entry_prefix_exceeds_entry_length) {
Expand All @@ -451,7 +459,6 @@ TEST(read_entry_prefix_exceeds_entry_length) {
size_t buf_len;
uint8_t* buf = build_shard(entries, 1, 64, &buf_len);
shard_v2_reader_t* r = shard_v2_from_buffer(buf, buf_len);
free(buf);
assert(r != NULL);

size_t out_size = 0;
Expand All @@ -460,6 +467,7 @@ TEST(read_entry_prefix_exceeds_entry_length) {
assert(out_size == PAYLOAD_LEN);
assert(memcmp(got, PAYLOAD, PAYLOAD_LEN) == 0);
shard_v2_close(r);
free(buf);
}

TEST(read_entry_prefix_zero_bytes) {
Expand All @@ -469,14 +477,14 @@ TEST(read_entry_prefix_zero_bytes) {
size_t buf_len;
uint8_t* buf = build_shard(entries, 1, 64, &buf_len);
shard_v2_reader_t* r = shard_v2_from_buffer(buf, buf_len);
free(buf);
assert(r != NULL);

size_t out_size = 99;
const uint8_t* got = shard_v2_read_entry_prefix(r, 0, 0, &out_size);
assert(got != NULL);
assert(out_size == 0);
shard_v2_close(r);
free(buf);
}

TEST(read_entry_prefix_one_byte) {
Expand All @@ -486,7 +494,6 @@ TEST(read_entry_prefix_one_byte) {
size_t buf_len;
uint8_t* buf = build_shard(entries, 1, 64, &buf_len);
shard_v2_reader_t* r = shard_v2_from_buffer(buf, buf_len);
free(buf);
assert(r != NULL);

size_t out_size = 0;
Expand All @@ -495,6 +502,7 @@ TEST(read_entry_prefix_one_byte) {
assert(out_size == 1);
assert(got[0] == 'H');
shard_v2_close(r);
free(buf);
}

/* ============================================================
Expand Down
Loading
Loading