From 8100a4045784f0ec4bf0d1ace256e133b6253e7d Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Mon, 5 Jan 2026 15:01:27 -0500 Subject: [PATCH 1/9] feat: add serialization support with SerDe interface --- sampling/reservoir_items_sketch.go | 90 ++++++++++++++++ sampling/serde.go | 165 +++++++++++++++++++++++++++++ sampling/serde_test.go | 119 +++++++++++++++++++++ 3 files changed, 374 insertions(+) create mode 100644 sampling/serde.go create mode 100644 sampling/serde_test.go diff --git a/sampling/reservoir_items_sketch.go b/sampling/reservoir_items_sketch.go index 49aaf22..eee001b 100644 --- a/sampling/reservoir_items_sketch.go +++ b/sampling/reservoir_items_sketch.go @@ -18,6 +18,7 @@ package sampling import ( + "encoding/binary" "errors" "math/rand" ) @@ -111,3 +112,92 @@ func (s *ReservoirItemsSketch[T]) Reset() { s.n = 0 s.data = s.data[:0] } + +// Preamble constants for serialization +const ( + preambleIntsShort = 1 // empty sketch + preambleIntsLong = 3 // non-empty sketch + serVer = 2 // serialization version + familyID = 13 // reservoir items family +) + +// ToByteArray serializes the sketch to a byte array using the provided SerDe. +func (s *ReservoirItemsSketch[T]) ToByteArray(serde ItemsSerDe[T]) ([]byte, error) { + if s.IsEmpty() { + // Empty sketch: minimal preamble + buf := make([]byte, 8) + buf[0] = preambleIntsShort + buf[1] = serVer + buf[2] = familyID + // bytes 4-7: k as int32 + binary.LittleEndian.PutUint32(buf[4:], uint32(s.k)) + return buf, nil + } + + // Serialize items + itemsBytes := serde.SerializeToBytes(s.data) + + // Non-empty sketch + preambleBytes := preambleIntsLong * 8 + buf := make([]byte, preambleBytes+len(itemsBytes)) + + // Preamble + buf[0] = preambleIntsLong + buf[1] = serVer + buf[2] = familyID + // byte 3: reserved + binary.LittleEndian.PutUint32(buf[4:], uint32(s.k)) + binary.LittleEndian.PutUint64(buf[8:], uint64(s.n)) + binary.LittleEndian.PutUint32(buf[16:], uint32(len(s.data))) + // bytes 20-23 reserved + + // Data + copy(buf[preambleBytes:], itemsBytes) + + return buf, nil +} + +// NewReservoirItemsSketchFromSlice deserializes a sketch from a byte array using the provided SerDe. +func NewReservoirItemsSketchFromSlice[T any](data []byte, serde ItemsSerDe[T]) (*ReservoirItemsSketch[T], error) { + if len(data) < 8 { + return nil, errors.New("data too short") + } + + preambleInts := int(data[0]) + ver := data[1] + family := data[2] + k := int(binary.LittleEndian.Uint32(data[4:])) + + if ver != serVer { + return nil, errors.New("unsupported serialization version") + } + if family != familyID { + return nil, errors.New("wrong sketch family") + } + + if preambleInts == preambleIntsShort { + // Empty sketch + return NewReservoirItemsSketch[T](k) + } + + if len(data) < preambleIntsLong*8 { + return nil, errors.New("data too short for non-empty sketch") + } + + n := int64(binary.LittleEndian.Uint64(data[8:])) + numSamples := int(binary.LittleEndian.Uint32(data[16:])) + + preambleBytes := preambleIntsLong * 8 + itemsData := data[preambleBytes:] + + items, err := serde.DeserializeFromBytes(itemsData, numSamples) + if err != nil { + return nil, err + } + + return &ReservoirItemsSketch[T]{ + k: k, + n: n, + data: items, + }, nil +} diff --git a/sampling/serde.go b/sampling/serde.go new file mode 100644 index 0000000..e812494 --- /dev/null +++ b/sampling/serde.go @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sampling + +import ( + "encoding/binary" + "errors" + "math" +) + +// ItemsSerDe defines the interface for serializing and deserializing items. +// Users must implement this interface for custom types. +// Built-in implementations are provided for common types (int64, int32, string, float64). +type ItemsSerDe[T any] interface { + // SerializeToBytes converts items to a byte slice. + SerializeToBytes(items []T) []byte + + // DeserializeFromBytes converts bytes back to items. + // numItems specifies how many items to read from the data. + DeserializeFromBytes(data []byte, numItems int) ([]T, error) + + // SizeOfItem returns the size in bytes for a single item. + // Returns -1 for variable-length types (like string). + SizeOfItem() int +} + +// Int64SerDe provides serialization for int64 (8 bytes per item). +type Int64SerDe struct{} + +func (s Int64SerDe) SerializeToBytes(items []int64) []byte { + buf := make([]byte, len(items)*8) + for i, v := range items { + binary.LittleEndian.PutUint64(buf[i*8:], uint64(v)) + } + return buf +} + +func (s Int64SerDe) DeserializeFromBytes(data []byte, numItems int) ([]int64, error) { + if len(data) < numItems*8 { + return nil, errors.New("data too short for int64 deserialization") + } + items := make([]int64, numItems) + for i := 0; i < numItems; i++ { + items[i] = int64(binary.LittleEndian.Uint64(data[i*8:])) + } + return items, nil +} + +func (s Int64SerDe) SizeOfItem() int { + return 8 +} + +// Int32SerDe provides serialization for int32 (4 bytes per item). +type Int32SerDe struct{} + +func (s Int32SerDe) SerializeToBytes(items []int32) []byte { + buf := make([]byte, len(items)*4) + for i, v := range items { + binary.LittleEndian.PutUint32(buf[i*4:], uint32(v)) + } + return buf +} + +func (s Int32SerDe) DeserializeFromBytes(data []byte, numItems int) ([]int32, error) { + if len(data) < numItems*4 { + return nil, errors.New("data too short for int32 deserialization") + } + items := make([]int32, numItems) + for i := 0; i < numItems; i++ { + items[i] = int32(binary.LittleEndian.Uint32(data[i*4:])) + } + return items, nil +} + +func (s Int32SerDe) SizeOfItem() int { + return 4 +} + +// Float64SerDe provides serialization for float64 (8 bytes per item). +type Float64SerDe struct{} + +func (s Float64SerDe) SerializeToBytes(items []float64) []byte { + buf := make([]byte, len(items)*8) + for i, v := range items { + binary.LittleEndian.PutUint64(buf[i*8:], math.Float64bits(v)) + } + return buf +} + +func (s Float64SerDe) DeserializeFromBytes(data []byte, numItems int) ([]float64, error) { + if len(data) < numItems*8 { + return nil, errors.New("data too short for float64 deserialization") + } + items := make([]float64, numItems) + for i := 0; i < numItems; i++ { + bits := binary.LittleEndian.Uint64(data[i*8:]) + items[i] = math.Float64frombits(bits) + } + return items, nil +} + +func (s Float64SerDe) SizeOfItem() int { + return 8 +} + +// StringSerDe provides serialization for string (variable length: 4-byte length prefix + content). +type StringSerDe struct{} + +func (s StringSerDe) SerializeToBytes(items []string) []byte { + // Calculate total size + totalSize := 0 + for _, str := range items { + totalSize += 4 + len(str) // 4 bytes for length + string bytes + } + + buf := make([]byte, totalSize) + offset := 0 + for _, str := range items { + // Write length + binary.LittleEndian.PutUint32(buf[offset:], uint32(len(str))) + offset += 4 + // Write string content + copy(buf[offset:], str) + offset += len(str) + } + return buf +} + +func (s StringSerDe) DeserializeFromBytes(data []byte, numItems int) ([]string, error) { + items := make([]string, numItems) + offset := 0 + for i := 0; i < numItems; i++ { + if offset+4 > len(data) { + return nil, errors.New("data too short for string length") + } + length := int(binary.LittleEndian.Uint32(data[offset:])) + offset += 4 + + if offset+length > len(data) { + return nil, errors.New("data too short for string content") + } + items[i] = string(data[offset : offset+length]) + offset += length + } + return items, nil +} + +func (s StringSerDe) SizeOfItem() int { + return -1 // Variable length +} diff --git a/sampling/serde_test.go b/sampling/serde_test.go new file mode 100644 index 0000000..03af2c0 --- /dev/null +++ b/sampling/serde_test.go @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sampling + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestInt64SerDe(t *testing.T) { + serde := Int64SerDe{} + items := []int64{1, 2, 3, 42, -100, 1000000} + + bytes := serde.SerializeToBytes(items) + assert.Equal(t, len(items)*8, len(bytes)) + + restored, err := serde.DeserializeFromBytes(bytes, len(items)) + assert.NoError(t, err) + assert.Equal(t, items, restored) +} + +func TestInt32SerDe(t *testing.T) { + serde := Int32SerDe{} + items := []int32{1, 2, 3, 42, -100, 1000000} + + bytes := serde.SerializeToBytes(items) + assert.Equal(t, len(items)*4, len(bytes)) + + restored, err := serde.DeserializeFromBytes(bytes, len(items)) + assert.NoError(t, err) + assert.Equal(t, items, restored) +} + +func TestFloat64SerDe(t *testing.T) { + serde := Float64SerDe{} + items := []float64{1.5, 2.5, 3.14159, -100.5} + + bytes := serde.SerializeToBytes(items) + assert.Equal(t, len(items)*8, len(bytes)) + + restored, err := serde.DeserializeFromBytes(bytes, len(items)) + assert.NoError(t, err) + assert.Len(t, restored, len(items)) +} + +func TestStringSerDe(t *testing.T) { + serde := StringSerDe{} + items := []string{"hello", "world", "", "testing 123", "日本語"} + + bytes := serde.SerializeToBytes(items) + + restored, err := serde.DeserializeFromBytes(bytes, len(items)) + assert.NoError(t, err) + assert.Equal(t, items, restored) +} + +func TestSketchSerializationInt64(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[int64](10) + + for i := int64(1); i <= 5; i++ { + sketch.Update(i) + } + + bytes, err := sketch.ToByteArray(Int64SerDe{}) + assert.NoError(t, err) + assert.NotNil(t, bytes) + + restored, err := NewReservoirItemsSketchFromSlice[int64](bytes, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, sketch.K(), restored.K()) + assert.Equal(t, sketch.N(), restored.N()) + assert.Equal(t, sketch.Samples(), restored.Samples()) +} + +func TestSketchSerializationString(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[string](5) + + sketch.Update("apple") + sketch.Update("banana") + sketch.Update("cherry") + + bytes, err := sketch.ToByteArray(StringSerDe{}) + assert.NoError(t, err) + + restored, err := NewReservoirItemsSketchFromSlice[string](bytes, StringSerDe{}) + assert.NoError(t, err) + assert.Equal(t, sketch.K(), restored.K()) + assert.Equal(t, sketch.N(), restored.N()) + assert.Equal(t, sketch.Samples(), restored.Samples()) +} + +func TestSketchSerializationEmpty(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[int64](10) + + bytes, err := sketch.ToByteArray(Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, 8, len(bytes)) // Minimal preamble + + restored, err := NewReservoirItemsSketchFromSlice[int64](bytes, Int64SerDe{}) + assert.NoError(t, err) + assert.True(t, restored.IsEmpty()) + assert.Equal(t, 10, restored.K()) +} From e12addd566eb7c496def2024a782f84e0f7df62d Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Tue, 6 Jan 2026 23:21:49 -0500 Subject: [PATCH 2/9] refactor: SerializeToBytes returns ([]byte, error) for custom type support --- sampling/reservoir_items_sketch.go | 5 ++++- sampling/serde.go | 18 +++++++++--------- sampling/serde_test.go | 12 ++++++++---- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/sampling/reservoir_items_sketch.go b/sampling/reservoir_items_sketch.go index eee001b..9b729b5 100644 --- a/sampling/reservoir_items_sketch.go +++ b/sampling/reservoir_items_sketch.go @@ -135,7 +135,10 @@ func (s *ReservoirItemsSketch[T]) ToByteArray(serde ItemsSerDe[T]) ([]byte, erro } // Serialize items - itemsBytes := serde.SerializeToBytes(s.data) + itemsBytes, err := serde.SerializeToBytes(s.data) + if err != nil { + return nil, err + } // Non-empty sketch preambleBytes := preambleIntsLong * 8 diff --git a/sampling/serde.go b/sampling/serde.go index e812494..503b442 100644 --- a/sampling/serde.go +++ b/sampling/serde.go @@ -28,7 +28,7 @@ import ( // Built-in implementations are provided for common types (int64, int32, string, float64). type ItemsSerDe[T any] interface { // SerializeToBytes converts items to a byte slice. - SerializeToBytes(items []T) []byte + SerializeToBytes(items []T) ([]byte, error) // DeserializeFromBytes converts bytes back to items. // numItems specifies how many items to read from the data. @@ -42,12 +42,12 @@ type ItemsSerDe[T any] interface { // Int64SerDe provides serialization for int64 (8 bytes per item). type Int64SerDe struct{} -func (s Int64SerDe) SerializeToBytes(items []int64) []byte { +func (s Int64SerDe) SerializeToBytes(items []int64) ([]byte, error) { buf := make([]byte, len(items)*8) for i, v := range items { binary.LittleEndian.PutUint64(buf[i*8:], uint64(v)) } - return buf + return buf, nil } func (s Int64SerDe) DeserializeFromBytes(data []byte, numItems int) ([]int64, error) { @@ -68,12 +68,12 @@ func (s Int64SerDe) SizeOfItem() int { // Int32SerDe provides serialization for int32 (4 bytes per item). type Int32SerDe struct{} -func (s Int32SerDe) SerializeToBytes(items []int32) []byte { +func (s Int32SerDe) SerializeToBytes(items []int32) ([]byte, error) { buf := make([]byte, len(items)*4) for i, v := range items { binary.LittleEndian.PutUint32(buf[i*4:], uint32(v)) } - return buf + return buf, nil } func (s Int32SerDe) DeserializeFromBytes(data []byte, numItems int) ([]int32, error) { @@ -94,12 +94,12 @@ func (s Int32SerDe) SizeOfItem() int { // Float64SerDe provides serialization for float64 (8 bytes per item). type Float64SerDe struct{} -func (s Float64SerDe) SerializeToBytes(items []float64) []byte { +func (s Float64SerDe) SerializeToBytes(items []float64) ([]byte, error) { buf := make([]byte, len(items)*8) for i, v := range items { binary.LittleEndian.PutUint64(buf[i*8:], math.Float64bits(v)) } - return buf + return buf, nil } func (s Float64SerDe) DeserializeFromBytes(data []byte, numItems int) ([]float64, error) { @@ -121,7 +121,7 @@ func (s Float64SerDe) SizeOfItem() int { // StringSerDe provides serialization for string (variable length: 4-byte length prefix + content). type StringSerDe struct{} -func (s StringSerDe) SerializeToBytes(items []string) []byte { +func (s StringSerDe) SerializeToBytes(items []string) ([]byte, error) { // Calculate total size totalSize := 0 for _, str := range items { @@ -138,7 +138,7 @@ func (s StringSerDe) SerializeToBytes(items []string) []byte { copy(buf[offset:], str) offset += len(str) } - return buf + return buf, nil } func (s StringSerDe) DeserializeFromBytes(data []byte, numItems int) ([]string, error) { diff --git a/sampling/serde_test.go b/sampling/serde_test.go index 03af2c0..2ad61aa 100644 --- a/sampling/serde_test.go +++ b/sampling/serde_test.go @@ -27,7 +27,8 @@ func TestInt64SerDe(t *testing.T) { serde := Int64SerDe{} items := []int64{1, 2, 3, 42, -100, 1000000} - bytes := serde.SerializeToBytes(items) + bytes, err := serde.SerializeToBytes(items) + assert.NoError(t, err) assert.Equal(t, len(items)*8, len(bytes)) restored, err := serde.DeserializeFromBytes(bytes, len(items)) @@ -39,7 +40,8 @@ func TestInt32SerDe(t *testing.T) { serde := Int32SerDe{} items := []int32{1, 2, 3, 42, -100, 1000000} - bytes := serde.SerializeToBytes(items) + bytes, err := serde.SerializeToBytes(items) + assert.NoError(t, err) assert.Equal(t, len(items)*4, len(bytes)) restored, err := serde.DeserializeFromBytes(bytes, len(items)) @@ -51,7 +53,8 @@ func TestFloat64SerDe(t *testing.T) { serde := Float64SerDe{} items := []float64{1.5, 2.5, 3.14159, -100.5} - bytes := serde.SerializeToBytes(items) + bytes, err := serde.SerializeToBytes(items) + assert.NoError(t, err) assert.Equal(t, len(items)*8, len(bytes)) restored, err := serde.DeserializeFromBytes(bytes, len(items)) @@ -63,7 +66,8 @@ func TestStringSerDe(t *testing.T) { serde := StringSerDe{} items := []string{"hello", "world", "", "testing 123", "日本語"} - bytes := serde.SerializeToBytes(items) + bytes, err := serde.SerializeToBytes(items) + assert.NoError(t, err) restored, err := serde.DeserializeFromBytes(bytes, len(items)) assert.NoError(t, err) From 9f822e89106fdc4d596c3b77531d679dcd86558e Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Wed, 7 Jan 2026 20:46:58 -0500 Subject: [PATCH 3/9] feat: add reservoir serialization test data files --- sampling/compatibility_test.go | 101 ++++++++++++++++++ sampling/generate_reservoir_test_data.go | 100 +++++++++++++++++ .../reservoir_long_n0_k10_go.sk | Bin 0 -> 8 bytes .../reservoir_long_n100_k10_go.sk | Bin 0 -> 104 bytes .../reservoir_long_n10_k100_go.sk | Bin 0 -> 104 bytes .../reservoir_long_n10_k10_go.sk | Bin 0 -> 104 bytes 6 files changed, 201 insertions(+) create mode 100644 sampling/compatibility_test.go create mode 100644 sampling/generate_reservoir_test_data.go create mode 100644 serialization_test_data/go_generated_files/reservoir_long_n0_k10_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_long_n100_k10_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_long_n10_k100_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_long_n10_k10_go.sk diff --git a/sampling/compatibility_test.go b/sampling/compatibility_test.go new file mode 100644 index 0000000..536d35d --- /dev/null +++ b/sampling/compatibility_test.go @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sampling + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +const testDataPath = "../serialization_test_data/go_generated_files" + +// TestSerializationCompatibilityEmpty tests deserialization of an empty sketch. +func TestSerializationCompatibilityEmpty(t *testing.T) { + data, err := os.ReadFile(filepath.Join(testDataPath, "reservoir_long_n0_k10_go.sk")) + assert.NoError(t, err) + + sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.True(t, sketch.IsEmpty()) + assert.Equal(t, 10, sketch.K()) + assert.Equal(t, int64(0), sketch.N()) +} + +// TestSerializationCompatibilityBelowK tests deserialization of a sketch with items below k. +func TestSerializationCompatibilityBelowK(t *testing.T) { + data, err := os.ReadFile(filepath.Join(testDataPath, "reservoir_long_n10_k100_go.sk")) + assert.NoError(t, err) + + sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, 100, sketch.K()) + assert.Equal(t, int64(10), sketch.N()) + assert.Equal(t, 10, sketch.NumSamples()) +} + +// TestSerializationCompatibilityAtK tests deserialization of a sketch at capacity. +func TestSerializationCompatibilityAtK(t *testing.T) { + data, err := os.ReadFile(filepath.Join(testDataPath, "reservoir_long_n10_k10_go.sk")) + assert.NoError(t, err) + + sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, 10, sketch.K()) + assert.Equal(t, int64(10), sketch.N()) + assert.Equal(t, 10, sketch.NumSamples()) +} + +// TestSerializationCompatibilityWithSampling tests deserialization of a sketch with sampling. +func TestSerializationCompatibilityWithSampling(t *testing.T) { + data, err := os.ReadFile(filepath.Join(testDataPath, "reservoir_long_n100_k10_go.sk")) + assert.NoError(t, err) + + sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, 10, sketch.K()) + assert.Equal(t, int64(100), sketch.N()) + assert.Equal(t, 10, sketch.NumSamples()) // Only k items kept after sampling +} + +// TestSerializationRoundTrip tests serialization and deserialization round-trip. +func TestSerializationRoundTrip(t *testing.T) { + // Create sketch and add items + sketch, _ := NewReservoirItemsSketch[int64](10) + for i := int64(1); i <= 5; i++ { + sketch.Update(i) + } + + // Serialize + data, err := sketch.ToByteArray(Int64SerDe{}) + assert.NoError(t, err) + + // Verify preamble structure + assert.Equal(t, byte(3), data[0]) // preamble_longs = 3 for non-empty + assert.Equal(t, byte(2), data[1]) // serVer = 2 + assert.Equal(t, byte(13), data[2]) // familyID = 13 + + // Deserialize and verify + restored, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, sketch.K(), restored.K()) + assert.Equal(t, sketch.N(), restored.N()) + assert.Equal(t, sketch.Samples(), restored.Samples()) +} diff --git a/sampling/generate_reservoir_test_data.go b/sampling/generate_reservoir_test_data.go new file mode 100644 index 0000000..ef66c8b --- /dev/null +++ b/sampling/generate_reservoir_test_data.go @@ -0,0 +1,100 @@ +//go:build ignore +// +build ignore + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This program generates serialization test data for ReservoirItemsSketch. +// Run with: go run generate_reservoir_test_data.go +package main + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/apache/datasketches-go/sampling" +) + +func main() { + outputDir := filepath.Join("..", "serialization_test_data", "go_generated_files") + + // Generate empty sketch: k=10, n=0 + generateEmptySketch(outputDir, 10) + + // Generate sketch with items below k: k=100, n=10 + generateSketchBelowK(outputDir, 100, 10) + + // Generate sketch at capacity: k=10, n=10 + generateSketchAtK(outputDir, 10, 10) + + // Generate sketch with sampling: k=10, n=100 + generateSketchWithSampling(outputDir, 10, 100) + + fmt.Println("All reservoir test data files generated successfully!") +} + +func generateEmptySketch(dir string, k int) { + sketch, _ := sampling.NewReservoirItemsSketch[int64](k) + data, _ := sketch.ToByteArray(sampling.Int64SerDe{}) + + filename := fmt.Sprintf("reservoir_long_n0_k%d_go.sk", k) + writeFile(dir, filename, data) +} + +func generateSketchBelowK(dir string, k, n int) { + sketch, _ := sampling.NewReservoirItemsSketch[int64](k) + for i := int64(1); i <= int64(n); i++ { + sketch.Update(i) + } + data, _ := sketch.ToByteArray(sampling.Int64SerDe{}) + + filename := fmt.Sprintf("reservoir_long_n%d_k%d_go.sk", n, k) + writeFile(dir, filename, data) +} + +func generateSketchAtK(dir string, k, n int) { + sketch, _ := sampling.NewReservoirItemsSketch[int64](k) + for i := int64(1); i <= int64(n); i++ { + sketch.Update(i) + } + data, _ := sketch.ToByteArray(sampling.Int64SerDe{}) + + filename := fmt.Sprintf("reservoir_long_n%d_k%d_go.sk", n, k) + writeFile(dir, filename, data) +} + +func generateSketchWithSampling(dir string, k, n int) { + sketch, _ := sampling.NewReservoirItemsSketch[int64](k) + for i := int64(1); i <= int64(n); i++ { + sketch.Update(i) + } + data, _ := sketch.ToByteArray(sampling.Int64SerDe{}) + + filename := fmt.Sprintf("reservoir_long_n%d_k%d_go.sk", n, k) + writeFile(dir, filename, data) +} + +func writeFile(dir, filename string, data []byte) { + path := filepath.Join(dir, filename) + err := os.WriteFile(path, data, 0644) + if err != nil { + fmt.Printf("Error writing %s: %v\n", filename, err) + return + } + fmt.Printf("Generated: %s (%d bytes)\n", filename, len(data)) +} diff --git a/serialization_test_data/go_generated_files/reservoir_long_n0_k10_go.sk b/serialization_test_data/go_generated_files/reservoir_long_n0_k10_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..46ee4f6704ac363ea032fbcaf40ec0e95364f9e4 GIT binary patch literal 8 PcmZQ%;$`4sU|;|M0F(e5 literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_long_n100_k10_go.sk b/serialization_test_data/go_generated_files/reservoir_long_n100_k10_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..60392a0270676978c73bc6077b575b158c38ba58 GIT binary patch literal 104 xcmZQ(;$`4sU|>iAVh{kaK|mTpF(g1~ODL@brGucf1eA`4(i~7a4oYi7X#kXr0=obJ literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_long_n10_k100_go.sk b/serialization_test_data/go_generated_files/reservoir_long_n10_k100_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..972c61d51745b91021404cba29422afc7462ebe8 GIT binary patch literal 104 vcmZQ(;$=u-U|`?^Vi17Qj1VpZ6O?9#(kxJ#6-u)~X?7^h0i`*iG|W5zJiGwH literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_long_n10_k10_go.sk b/serialization_test_data/go_generated_files/reservoir_long_n10_k10_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..d203fa2efbbe2931edd71b631900cc5e83226fae GIT binary patch literal 104 wcmZQ(;$`4sU;twv1*aK793WtVPz=mangvR;LTNTA%?_nGpfo3x2AK^402!13XaE2J literal 0 HcmV?d00001 From ad81f10765f82b129c3fdefbb0830a51eb8d9711 Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Fri, 9 Jan 2026 00:53:55 -0500 Subject: [PATCH 4/9] refactor: address review feedback - use FamilyEnum and GoPath --- internal/family.go | 5 ++ sampling/compatibility_test.go | 96 ++++++++++++++++++++-- sampling/generate_reservoir_test_data.go | 100 ----------------------- sampling/reservoir_items_sketch.go | 15 ++-- 4 files changed, 101 insertions(+), 115 deletions(-) delete mode 100644 sampling/generate_reservoir_test_data.go diff --git a/internal/family.go b/internal/family.go index 83cc73d..c57a344 100644 --- a/internal/family.go +++ b/internal/family.go @@ -31,6 +31,7 @@ type families struct { BloomFilter family Tuple family Theta family + ReservoirItems family } var FamilyEnum = &families{ @@ -66,4 +67,8 @@ var FamilyEnum = &families{ Id: 3, MaxPreLongs: 3, }, + ReservoirItems: family{ + Id: 11, + MaxPreLongs: 2, + }, } diff --git a/sampling/compatibility_test.go b/sampling/compatibility_test.go index 536d35d..7810eb7 100644 --- a/sampling/compatibility_test.go +++ b/sampling/compatibility_test.go @@ -18,18 +18,98 @@ package sampling import ( + "fmt" "os" "path/filepath" "testing" + "github.com/apache/datasketches-go/internal" "github.com/stretchr/testify/assert" ) -const testDataPath = "../serialization_test_data/go_generated_files" +// TestGenerateGoBinariesForCompatibilityTesting generates serialization test data. +// This test is skipped unless DSKETCH_TEST_GENERATE_GO environment variable is set. +// Run with: DSKETCH_TEST_GENERATE_GO=1 go test -v -run TestGenerateGoBinaries +func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { + if len(os.Getenv(internal.DSketchTestGenerateGo)) == 0 { + t.Skipf("%s not set", internal.DSketchTestGenerateGo) + } + + err := os.MkdirAll(internal.GoPath, os.ModePerm) + assert.NoError(t, err) + + t.Run("reservoir empty", func(t *testing.T) { + k := 10 + sketch, err := NewReservoirItemsSketch[int64](k) + assert.NoError(t, err) + + data, err := sketch.ToByteArray(Int64SerDe{}) + assert.NoError(t, err) + + filename := fmt.Sprintf("%s/reservoir_long_n0_k%d_go.sk", internal.GoPath, k) + err = os.WriteFile(filename, data, 0644) + assert.NoError(t, err) + t.Logf("Generated: %s (%d bytes)", filename, len(data)) + }) + + t.Run("reservoir below k", func(t *testing.T) { + k, n := 100, 10 + sketch, err := NewReservoirItemsSketch[int64](k) + assert.NoError(t, err) + + for i := int64(1); i <= int64(n); i++ { + sketch.Update(i) + } + + data, err := sketch.ToByteArray(Int64SerDe{}) + assert.NoError(t, err) + + filename := fmt.Sprintf("%s/reservoir_long_n%d_k%d_go.sk", internal.GoPath, n, k) + err = os.WriteFile(filename, data, 0644) + assert.NoError(t, err) + t.Logf("Generated: %s (%d bytes)", filename, len(data)) + }) + + t.Run("reservoir at k", func(t *testing.T) { + k, n := 10, 10 + sketch, err := NewReservoirItemsSketch[int64](k) + assert.NoError(t, err) + + for i := int64(1); i <= int64(n); i++ { + sketch.Update(i) + } + + data, err := sketch.ToByteArray(Int64SerDe{}) + assert.NoError(t, err) + + filename := fmt.Sprintf("%s/reservoir_long_n%d_k%d_go.sk", internal.GoPath, n, k) + err = os.WriteFile(filename, data, 0644) + assert.NoError(t, err) + t.Logf("Generated: %s (%d bytes)", filename, len(data)) + }) + + t.Run("reservoir with sampling", func(t *testing.T) { + k, n := 10, 100 + sketch, err := NewReservoirItemsSketch[int64](k) + assert.NoError(t, err) + + for i := int64(1); i <= int64(n); i++ { + sketch.Update(i) + } + + data, err := sketch.ToByteArray(Int64SerDe{}) + assert.NoError(t, err) + + filename := fmt.Sprintf("%s/reservoir_long_n%d_k%d_go.sk", internal.GoPath, n, k) + err = os.WriteFile(filename, data, 0644) + assert.NoError(t, err) + t.Logf("Generated: %s (%d bytes)", filename, len(data)) + }) +} // TestSerializationCompatibilityEmpty tests deserialization of an empty sketch. func TestSerializationCompatibilityEmpty(t *testing.T) { - data, err := os.ReadFile(filepath.Join(testDataPath, "reservoir_long_n0_k10_go.sk")) + data, err := os.ReadFile(filepath.Join(internal.GoPath, "reservoir_long_n0_k10_go.sk")) assert.NoError(t, err) sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) @@ -41,7 +121,7 @@ func TestSerializationCompatibilityEmpty(t *testing.T) { // TestSerializationCompatibilityBelowK tests deserialization of a sketch with items below k. func TestSerializationCompatibilityBelowK(t *testing.T) { - data, err := os.ReadFile(filepath.Join(testDataPath, "reservoir_long_n10_k100_go.sk")) + data, err := os.ReadFile(filepath.Join(internal.GoPath, "reservoir_long_n10_k100_go.sk")) assert.NoError(t, err) sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) @@ -53,7 +133,7 @@ func TestSerializationCompatibilityBelowK(t *testing.T) { // TestSerializationCompatibilityAtK tests deserialization of a sketch at capacity. func TestSerializationCompatibilityAtK(t *testing.T) { - data, err := os.ReadFile(filepath.Join(testDataPath, "reservoir_long_n10_k10_go.sk")) + data, err := os.ReadFile(filepath.Join(internal.GoPath, "reservoir_long_n10_k10_go.sk")) assert.NoError(t, err) sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) @@ -65,7 +145,7 @@ func TestSerializationCompatibilityAtK(t *testing.T) { // TestSerializationCompatibilityWithSampling tests deserialization of a sketch with sampling. func TestSerializationCompatibilityWithSampling(t *testing.T) { - data, err := os.ReadFile(filepath.Join(testDataPath, "reservoir_long_n100_k10_go.sk")) + data, err := os.ReadFile(filepath.Join(internal.GoPath, "reservoir_long_n100_k10_go.sk")) assert.NoError(t, err) sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) @@ -88,9 +168,9 @@ func TestSerializationRoundTrip(t *testing.T) { assert.NoError(t, err) // Verify preamble structure - assert.Equal(t, byte(3), data[0]) // preamble_longs = 3 for non-empty - assert.Equal(t, byte(2), data[1]) // serVer = 2 - assert.Equal(t, byte(13), data[2]) // familyID = 13 + assert.Equal(t, byte(3), data[0]) // preamble_longs = 3 for non-empty + assert.Equal(t, byte(2), data[1]) // serVer = 2 + assert.Equal(t, byte(internal.FamilyEnum.ReservoirItems.Id), data[2]) // familyID // Deserialize and verify restored, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) diff --git a/sampling/generate_reservoir_test_data.go b/sampling/generate_reservoir_test_data.go deleted file mode 100644 index ef66c8b..0000000 --- a/sampling/generate_reservoir_test_data.go +++ /dev/null @@ -1,100 +0,0 @@ -//go:build ignore -// +build ignore - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// This program generates serialization test data for ReservoirItemsSketch. -// Run with: go run generate_reservoir_test_data.go -package main - -import ( - "fmt" - "os" - "path/filepath" - - "github.com/apache/datasketches-go/sampling" -) - -func main() { - outputDir := filepath.Join("..", "serialization_test_data", "go_generated_files") - - // Generate empty sketch: k=10, n=0 - generateEmptySketch(outputDir, 10) - - // Generate sketch with items below k: k=100, n=10 - generateSketchBelowK(outputDir, 100, 10) - - // Generate sketch at capacity: k=10, n=10 - generateSketchAtK(outputDir, 10, 10) - - // Generate sketch with sampling: k=10, n=100 - generateSketchWithSampling(outputDir, 10, 100) - - fmt.Println("All reservoir test data files generated successfully!") -} - -func generateEmptySketch(dir string, k int) { - sketch, _ := sampling.NewReservoirItemsSketch[int64](k) - data, _ := sketch.ToByteArray(sampling.Int64SerDe{}) - - filename := fmt.Sprintf("reservoir_long_n0_k%d_go.sk", k) - writeFile(dir, filename, data) -} - -func generateSketchBelowK(dir string, k, n int) { - sketch, _ := sampling.NewReservoirItemsSketch[int64](k) - for i := int64(1); i <= int64(n); i++ { - sketch.Update(i) - } - data, _ := sketch.ToByteArray(sampling.Int64SerDe{}) - - filename := fmt.Sprintf("reservoir_long_n%d_k%d_go.sk", n, k) - writeFile(dir, filename, data) -} - -func generateSketchAtK(dir string, k, n int) { - sketch, _ := sampling.NewReservoirItemsSketch[int64](k) - for i := int64(1); i <= int64(n); i++ { - sketch.Update(i) - } - data, _ := sketch.ToByteArray(sampling.Int64SerDe{}) - - filename := fmt.Sprintf("reservoir_long_n%d_k%d_go.sk", n, k) - writeFile(dir, filename, data) -} - -func generateSketchWithSampling(dir string, k, n int) { - sketch, _ := sampling.NewReservoirItemsSketch[int64](k) - for i := int64(1); i <= int64(n); i++ { - sketch.Update(i) - } - data, _ := sketch.ToByteArray(sampling.Int64SerDe{}) - - filename := fmt.Sprintf("reservoir_long_n%d_k%d_go.sk", n, k) - writeFile(dir, filename, data) -} - -func writeFile(dir, filename string, data []byte) { - path := filepath.Join(dir, filename) - err := os.WriteFile(path, data, 0644) - if err != nil { - fmt.Printf("Error writing %s: %v\n", filename, err) - return - } - fmt.Printf("Generated: %s (%d bytes)\n", filename, len(data)) -} diff --git a/sampling/reservoir_items_sketch.go b/sampling/reservoir_items_sketch.go index 9b729b5..3941cd1 100644 --- a/sampling/reservoir_items_sketch.go +++ b/sampling/reservoir_items_sketch.go @@ -21,6 +21,8 @@ import ( "encoding/binary" "errors" "math/rand" + + "github.com/apache/datasketches-go/internal" ) // ResizeFactor controls how the internal array grows. @@ -115,10 +117,9 @@ func (s *ReservoirItemsSketch[T]) Reset() { // Preamble constants for serialization const ( - preambleIntsShort = 1 // empty sketch - preambleIntsLong = 3 // non-empty sketch - serVer = 2 // serialization version - familyID = 13 // reservoir items family + preambleIntsShort = 1 // empty sketch + preambleIntsLong = 3 // non-empty sketch + serVer = 2 // serialization version ) // ToByteArray serializes the sketch to a byte array using the provided SerDe. @@ -128,7 +129,7 @@ func (s *ReservoirItemsSketch[T]) ToByteArray(serde ItemsSerDe[T]) ([]byte, erro buf := make([]byte, 8) buf[0] = preambleIntsShort buf[1] = serVer - buf[2] = familyID + buf[2] = byte(internal.FamilyEnum.ReservoirItems.Id) // bytes 4-7: k as int32 binary.LittleEndian.PutUint32(buf[4:], uint32(s.k)) return buf, nil @@ -147,7 +148,7 @@ func (s *ReservoirItemsSketch[T]) ToByteArray(serde ItemsSerDe[T]) ([]byte, erro // Preamble buf[0] = preambleIntsLong buf[1] = serVer - buf[2] = familyID + buf[2] = byte(internal.FamilyEnum.ReservoirItems.Id) // byte 3: reserved binary.LittleEndian.PutUint32(buf[4:], uint32(s.k)) binary.LittleEndian.PutUint64(buf[8:], uint64(s.n)) @@ -174,7 +175,7 @@ func NewReservoirItemsSketchFromSlice[T any](data []byte, serde ItemsSerDe[T]) ( if ver != serVer { return nil, errors.New("unsupported serialization version") } - if family != familyID { + if family != byte(internal.FamilyEnum.ReservoirItems.Id) { return nil, errors.New("wrong sketch family") } From a21cfc22cd588845f9572b6ae409821971bb863c Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Fri, 9 Jan 2026 23:32:34 -0500 Subject: [PATCH 5/9] fix(sampling): correct ReservoirItems Family ID from 13 to 11 --- .../reservoir_long_n0_k10_go.sk | Bin 8 -> 8 bytes .../reservoir_long_n100_k10_go.sk | Bin 104 -> 104 bytes .../reservoir_long_n10_k100_go.sk | Bin 104 -> 104 bytes .../reservoir_long_n10_k10_go.sk | Bin 104 -> 104 bytes 4 files changed, 0 insertions(+), 0 deletions(-) diff --git a/serialization_test_data/go_generated_files/reservoir_long_n0_k10_go.sk b/serialization_test_data/go_generated_files/reservoir_long_n0_k10_go.sk index 46ee4f6704ac363ea032fbcaf40ec0e95364f9e4..bf817b8213672f357ee88c73b4f59cea8eee3ce2 100644 GIT binary patch literal 8 PcmZQ%;%4AtU|;|M0Ehq? literal 8 PcmZQ%;$`4sU|;|M0F(e5 diff --git a/serialization_test_data/go_generated_files/reservoir_long_n100_k10_go.sk b/serialization_test_data/go_generated_files/reservoir_long_n100_k10_go.sk index 60392a0270676978c73bc6077b575b158c38ba58..68ab9b15f70e015dc1787913cb5fa76913fe74cd 100644 GIT binary patch literal 104 xcmZQ(;%4AtU|>iAVh{kaK_C%AG5AAi2`DWLr6r+s0F)Mm((X{26H3cKX#kEu0!{z` literal 104 xcmZQ(;$`4sU|>iAVh{kaK|mTpF(g1~ODL@brGucf1eA`4(i~7a4oYi7X#kXr0=obJ diff --git a/serialization_test_data/go_generated_files/reservoir_long_n10_k100_go.sk b/serialization_test_data/go_generated_files/reservoir_long_n10_k100_go.sk index 972c61d51745b91021404cba29422afc7462ebe8..15433a393000fd50661f6d29f88a9359df047a7b 100644 GIT binary patch delta 9 Qcmd1EU}k3Gp2(aE010dX-2eap delta 9 Qcmd1EU}k3GoyeRD010#f-v9sr diff --git a/serialization_test_data/go_generated_files/reservoir_long_n10_k10_go.sk b/serialization_test_data/go_generated_files/reservoir_long_n10_k10_go.sk index d203fa2efbbe2931edd71b631900cc5e83226fae..a66ca3bb62b93e0e6aecc8609feccc384d90d25f 100644 GIT binary patch delta 9 Qcmd1EU}k3Gp2(aE010dX-2eap delta 9 Qcmd1EU}k3GoyeRD010#f-v9sr From 709bab03d2def1d450af72feeb3b7c28497395af Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Mon, 12 Jan 2026 16:00:15 -0500 Subject: [PATCH 6/9] refactor: rename ToByteArray to ToSlice for naming consistency --- sampling/compatibility_test.go | 10 +++++----- sampling/reservoir_items_sketch.go | 4 ++-- sampling/serde_test.go | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sampling/compatibility_test.go b/sampling/compatibility_test.go index 7810eb7..8908545 100644 --- a/sampling/compatibility_test.go +++ b/sampling/compatibility_test.go @@ -43,7 +43,7 @@ func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { sketch, err := NewReservoirItemsSketch[int64](k) assert.NoError(t, err) - data, err := sketch.ToByteArray(Int64SerDe{}) + data, err := sketch.ToSlice(Int64SerDe{}) assert.NoError(t, err) filename := fmt.Sprintf("%s/reservoir_long_n0_k%d_go.sk", internal.GoPath, k) @@ -61,7 +61,7 @@ func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { sketch.Update(i) } - data, err := sketch.ToByteArray(Int64SerDe{}) + data, err := sketch.ToSlice(Int64SerDe{}) assert.NoError(t, err) filename := fmt.Sprintf("%s/reservoir_long_n%d_k%d_go.sk", internal.GoPath, n, k) @@ -79,7 +79,7 @@ func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { sketch.Update(i) } - data, err := sketch.ToByteArray(Int64SerDe{}) + data, err := sketch.ToSlice(Int64SerDe{}) assert.NoError(t, err) filename := fmt.Sprintf("%s/reservoir_long_n%d_k%d_go.sk", internal.GoPath, n, k) @@ -97,7 +97,7 @@ func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { sketch.Update(i) } - data, err := sketch.ToByteArray(Int64SerDe{}) + data, err := sketch.ToSlice(Int64SerDe{}) assert.NoError(t, err) filename := fmt.Sprintf("%s/reservoir_long_n%d_k%d_go.sk", internal.GoPath, n, k) @@ -164,7 +164,7 @@ func TestSerializationRoundTrip(t *testing.T) { } // Serialize - data, err := sketch.ToByteArray(Int64SerDe{}) + data, err := sketch.ToSlice(Int64SerDe{}) assert.NoError(t, err) // Verify preamble structure diff --git a/sampling/reservoir_items_sketch.go b/sampling/reservoir_items_sketch.go index 3941cd1..32d6aba 100644 --- a/sampling/reservoir_items_sketch.go +++ b/sampling/reservoir_items_sketch.go @@ -122,8 +122,8 @@ const ( serVer = 2 // serialization version ) -// ToByteArray serializes the sketch to a byte array using the provided SerDe. -func (s *ReservoirItemsSketch[T]) ToByteArray(serde ItemsSerDe[T]) ([]byte, error) { +// ToSlice serializes the sketch to a byte slice using the provided SerDe. +func (s *ReservoirItemsSketch[T]) ToSlice(serde ItemsSerDe[T]) ([]byte, error) { if s.IsEmpty() { // Empty sketch: minimal preamble buf := make([]byte, 8) diff --git a/sampling/serde_test.go b/sampling/serde_test.go index 2ad61aa..9d4d8fa 100644 --- a/sampling/serde_test.go +++ b/sampling/serde_test.go @@ -81,7 +81,7 @@ func TestSketchSerializationInt64(t *testing.T) { sketch.Update(i) } - bytes, err := sketch.ToByteArray(Int64SerDe{}) + bytes, err := sketch.ToSlice(Int64SerDe{}) assert.NoError(t, err) assert.NotNil(t, bytes) @@ -99,7 +99,7 @@ func TestSketchSerializationString(t *testing.T) { sketch.Update("banana") sketch.Update("cherry") - bytes, err := sketch.ToByteArray(StringSerDe{}) + bytes, err := sketch.ToSlice(StringSerDe{}) assert.NoError(t, err) restored, err := NewReservoirItemsSketchFromSlice[string](bytes, StringSerDe{}) @@ -112,7 +112,7 @@ func TestSketchSerializationString(t *testing.T) { func TestSketchSerializationEmpty(t *testing.T) { sketch, _ := NewReservoirItemsSketch[int64](10) - bytes, err := sketch.ToByteArray(Int64SerDe{}) + bytes, err := sketch.ToSlice(Int64SerDe{}) assert.NoError(t, err) assert.Equal(t, 8, len(bytes)) // Minimal preamble From f9f53d3226a056bea946c725201a5f56d09809e8 Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Mon, 12 Jan 2026 16:31:36 -0500 Subject: [PATCH 7/9] feat: add Java compatibility tests for reservoir sampling --- sampling/compatibility_test.go | 106 +++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/sampling/compatibility_test.go b/sampling/compatibility_test.go index 8908545..f9804e6 100644 --- a/sampling/compatibility_test.go +++ b/sampling/compatibility_test.go @@ -179,3 +179,109 @@ func TestSerializationRoundTrip(t *testing.T) { assert.Equal(t, sketch.N(), restored.N()) assert.Equal(t, sketch.Samples(), restored.Samples()) } + +// TestReservoirItemsSketch_JavaCompat tests deserialization of Java-generated reservoir sketch files. +// These tests verify cross-language compatibility with files generated by datasketches-java. +func TestReservoirItemsSketch_JavaCompat(t *testing.T) { + // Test cases based on Java PR #714: ReservoirCrossLanguageTest.java (36 total files) + testCases := []struct { + name string + filename string + k int + n int64 + isEmpty bool + }{ + // ReservoirLongsSketch - Empty (1) + {"longs_empty_k128", "reservoir_longs_empty_k128_java.sk", 128, 0, true}, + + // ReservoirLongsSketch - Exact (5) + {"longs_exact_n1_k128", "reservoir_longs_exact_n1_k128_java.sk", 128, 1, false}, + {"longs_exact_n10_k128", "reservoir_longs_exact_n10_k128_java.sk", 128, 10, false}, + {"longs_exact_n32_k128", "reservoir_longs_exact_n32_k128_java.sk", 128, 32, false}, + {"longs_exact_n100_k128", "reservoir_longs_exact_n100_k128_java.sk", 128, 100, false}, + {"longs_exact_n128_k128", "reservoir_longs_exact_n128_k128_java.sk", 128, 128, false}, + + // ReservoirLongsSketch - Sampling (3) + {"longs_sampling_n1000_k32", "reservoir_longs_sampling_n1000_k32_java.sk", 32, 1000, false}, + {"longs_sampling_n1000_k64", "reservoir_longs_sampling_n1000_k64_java.sk", 64, 1000, false}, + {"longs_sampling_n1000_k128", "reservoir_longs_sampling_n1000_k128_java.sk", 128, 1000, false}, + + // ReservoirItemsSketch - Empty (1) + {"items_long_empty_k128", "reservoir_items_long_empty_k128_java.sk", 128, 0, true}, + + // ReservoirItemsSketch - Exact (5) + {"items_long_exact_n1_k128", "reservoir_items_long_exact_n1_k128_java.sk", 128, 1, false}, + {"items_long_exact_n10_k128", "reservoir_items_long_exact_n10_k128_java.sk", 128, 10, false}, + {"items_long_exact_n32_k128", "reservoir_items_long_exact_n32_k128_java.sk", 128, 32, false}, + {"items_long_exact_n100_k128", "reservoir_items_long_exact_n100_k128_java.sk", 128, 100, false}, + {"items_long_exact_n128_k128", "reservoir_items_long_exact_n128_k128_java.sk", 128, 128, false}, + + // ReservoirItemsSketch - Sampling (3) + {"items_long_sampling_n1000_k32", "reservoir_items_long_sampling_n1000_k32_java.sk", 32, 1000, false}, + {"items_long_sampling_n1000_k64", "reservoir_items_long_sampling_n1000_k64_java.sk", 64, 1000, false}, + {"items_long_sampling_n1000_k128", "reservoir_items_long_sampling_n1000_k128_java.sk", 128, 1000, false}, + + // ReservoirItemsSketch - Empty (1) + {"items_double_empty_k128", "reservoir_items_double_empty_k128_java.sk", 128, 0, true}, + + // ReservoirItemsSketch - Exact (5) + {"items_double_exact_n1_k128", "reservoir_items_double_exact_n1_k128_java.sk", 128, 1, false}, + {"items_double_exact_n10_k128", "reservoir_items_double_exact_n10_k128_java.sk", 128, 10, false}, + {"items_double_exact_n32_k128", "reservoir_items_double_exact_n32_k128_java.sk", 128, 32, false}, + {"items_double_exact_n100_k128", "reservoir_items_double_exact_n100_k128_java.sk", 128, 100, false}, + {"items_double_exact_n128_k128", "reservoir_items_double_exact_n128_k128_java.sk", 128, 128, false}, + + // ReservoirItemsSketch - Sampling (3) + {"items_double_sampling_n1000_k32", "reservoir_items_double_sampling_n1000_k32_java.sk", 32, 1000, false}, + {"items_double_sampling_n1000_k64", "reservoir_items_double_sampling_n1000_k64_java.sk", 64, 1000, false}, + {"items_double_sampling_n1000_k128", "reservoir_items_double_sampling_n1000_k128_java.sk", 128, 1000, false}, + + // ReservoirItemsSketch - Empty (1) + {"items_string_empty_k128", "reservoir_items_string_empty_k128_java.sk", 128, 0, true}, + + // ReservoirItemsSketch - Exact (5) + {"items_string_exact_n1_k128", "reservoir_items_string_exact_n1_k128_java.sk", 128, 1, false}, + {"items_string_exact_n10_k128", "reservoir_items_string_exact_n10_k128_java.sk", 128, 10, false}, + {"items_string_exact_n32_k128", "reservoir_items_string_exact_n32_k128_java.sk", 128, 32, false}, + {"items_string_exact_n100_k128", "reservoir_items_string_exact_n100_k128_java.sk", 128, 100, false}, + {"items_string_exact_n128_k128", "reservoir_items_string_exact_n128_k128_java.sk", 128, 128, false}, + + // ReservoirItemsSketch - Sampling (3) + {"items_string_sampling_n1000_k32", "reservoir_items_string_sampling_n1000_k32_java.sk", 32, 1000, false}, + {"items_string_sampling_n1000_k64", "reservoir_items_string_sampling_n1000_k64_java.sk", 64, 1000, false}, + {"items_string_sampling_n1000_k128", "reservoir_items_string_sampling_n1000_k128_java.sk", 128, 1000, false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + filepath := filepath.Join(internal.JavaPath, tc.filename) + + // Skip if Java file not yet available + if _, err := os.Stat(filepath); os.IsNotExist(err) { + t.Skipf("Java file not found: %s (waiting for sync from datasketches-java)", tc.filename) + return + } + + data, err := os.ReadFile(filepath) + assert.NoError(t, err) + + sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + + assert.Equal(t, tc.k, sketch.K(), "k mismatch") + assert.Equal(t, tc.n, sketch.N(), "n mismatch") + assert.Equal(t, tc.isEmpty, sketch.IsEmpty(), "isEmpty mismatch") + + if !tc.isEmpty { + samples := sketch.Samples() + if tc.n <= int64(tc.k) { + // Exact mode: should have exactly n samples + assert.Equal(t, int(tc.n), len(samples), "sample count mismatch in exact mode") + } else { + // Sampling mode: should have exactly k samples + assert.Equal(t, tc.k, len(samples), "sample count mismatch in sampling mode") + } + } + }) + } +} From f3ca0709c6b0eacae8d93ffc6224f6ae0b5b2764 Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Tue, 13 Jan 2026 01:37:41 -0500 Subject: [PATCH 8/9] feat(sampling): align test data with Java PR #714 conditions - Generate 27 cross-language compatibility test files: - reservoir_items_long_*_go.sk (9 files) - reservoir_items_double_*_go.sk (9 files) - reservoir_items_string_*_go.sk (9 files) - Use k=128 for empty/exact, k=32/64/128 for sampling (n=1000) - Skip reservoir_longs_* (per issue #90: longs is Java legacy) - Update compatibility tests to match new file naming --- sampling/compatibility_test.go | 234 +++++++++++------- .../reservoir_items_double_empty_k128_go.sk | Bin 0 -> 8 bytes ...servoir_items_double_exact_n100_k128_go.sk | Bin 0 -> 824 bytes ...eservoir_items_double_exact_n10_k128_go.sk | Bin 0 -> 104 bytes ...servoir_items_double_exact_n128_k128_go.sk | Bin 0 -> 1048 bytes ...reservoir_items_double_exact_n1_k128_go.sk | Bin 0 -> 32 bytes ...eservoir_items_double_exact_n32_k128_go.sk | Bin 0 -> 280 bytes ...oir_items_double_sampling_n1000_k128_go.sk | Bin 0 -> 1048 bytes ...voir_items_double_sampling_n1000_k32_go.sk | Bin 0 -> 280 bytes ...voir_items_double_sampling_n1000_k64_go.sk | Bin 0 -> 536 bytes .../reservoir_items_long_empty_k128_go.sk | Bin 0 -> 8 bytes ...reservoir_items_long_exact_n100_k128_go.sk | Bin 0 -> 824 bytes ...reservoir_items_long_exact_n10_k128_go.sk} | Bin 104 -> 104 bytes ...reservoir_items_long_exact_n128_k128_go.sk | Bin 0 -> 1048 bytes .../reservoir_items_long_exact_n1_k128_go.sk | Bin 0 -> 32 bytes .../reservoir_items_long_exact_n32_k128_go.sk | Bin 0 -> 280 bytes ...rvoir_items_long_sampling_n1000_k128_go.sk | Bin 0 -> 1048 bytes ...ervoir_items_long_sampling_n1000_k32_go.sk | Bin 0 -> 280 bytes ...ervoir_items_long_sampling_n1000_k64_go.sk | Bin 0 -> 536 bytes .../reservoir_items_string_empty_k128_go.sk | Bin 0 -> 8 bytes ...servoir_items_string_exact_n100_k128_go.sk | Bin 0 -> 1016 bytes ...eservoir_items_string_exact_n10_k128_go.sk | Bin 0 -> 115 bytes ...servoir_items_string_exact_n128_k128_go.sk | Bin 0 -> 1324 bytes ...reservoir_items_string_exact_n1_k128_go.sk | Bin 0 -> 33 bytes ...eservoir_items_string_exact_n32_k128_go.sk | Bin 0 -> 335 bytes ...oir_items_string_sampling_n1000_k128_go.sk | Bin 0 -> 1415 bytes ...voir_items_string_sampling_n1000_k32_go.sk | Bin 0 -> 372 bytes ...voir_items_string_sampling_n1000_k64_go.sk | Bin 0 -> 720 bytes .../reservoir_long_n0_k10_go.sk | Bin 8 -> 0 bytes .../reservoir_long_n100_k10_go.sk | Bin 104 -> 0 bytes .../reservoir_long_n10_k10_go.sk | Bin 104 -> 0 bytes 31 files changed, 146 insertions(+), 88 deletions(-) create mode 100644 serialization_test_data/go_generated_files/reservoir_items_double_empty_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_double_exact_n100_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_double_exact_n10_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_double_exact_n128_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_double_exact_n1_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_double_exact_n32_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k32_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k64_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_long_empty_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_long_exact_n100_k128_go.sk rename serialization_test_data/go_generated_files/{reservoir_long_n10_k100_go.sk => reservoir_items_long_exact_n10_k128_go.sk} (91%) create mode 100644 serialization_test_data/go_generated_files/reservoir_items_long_exact_n128_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_long_exact_n1_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_long_exact_n32_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_long_sampling_n1000_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_long_sampling_n1000_k32_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_long_sampling_n1000_k64_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_string_empty_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_string_exact_n100_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_string_exact_n10_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_string_exact_n128_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_string_exact_n1_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_string_exact_n32_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k128_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k32_go.sk create mode 100644 serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k64_go.sk delete mode 100644 serialization_test_data/go_generated_files/reservoir_long_n0_k10_go.sk delete mode 100644 serialization_test_data/go_generated_files/reservoir_long_n100_k10_go.sk delete mode 100644 serialization_test_data/go_generated_files/reservoir_long_n10_k10_go.sk diff --git a/sampling/compatibility_test.go b/sampling/compatibility_test.go index f9804e6..e0ccdfb 100644 --- a/sampling/compatibility_test.go +++ b/sampling/compatibility_test.go @@ -30,6 +30,9 @@ import ( // TestGenerateGoBinariesForCompatibilityTesting generates serialization test data. // This test is skipped unless DSKETCH_TEST_GENERATE_GO environment variable is set. // Run with: DSKETCH_TEST_GENERATE_GO=1 go test -v -run TestGenerateGoBinaries +// Generates 27 files for cross-language compatibility testing. +// Note: Go only has generic ReservoirItemsSketch[T], no separate ReservoirLongsSketch. +// See https://github.com/apache/datasketches-go/issues/90 for context. func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { if len(os.Getenv(internal.DSketchTestGenerateGo)) == 0 { t.Skipf("%s not set", internal.DSketchTestGenerateGo) @@ -38,121 +41,176 @@ func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { err := os.MkdirAll(internal.GoPath, os.ModePerm) assert.NoError(t, err) - t.Run("reservoir empty", func(t *testing.T) { - k := 10 - sketch, err := NewReservoirItemsSketch[int64](k) - assert.NoError(t, err) + exactNValues := []int{1, 10, 32, 100, 128} + samplingKValues := []int{32, 64, 128} - data, err := sketch.ToSlice(Int64SerDe{}) - assert.NoError(t, err) - - filename := fmt.Sprintf("%s/reservoir_long_n0_k%d_go.sk", internal.GoPath, k) - err = os.WriteFile(filename, data, 0644) - assert.NoError(t, err) - t.Logf("Generated: %s (%d bytes)", filename, len(data)) - }) - - t.Run("reservoir below k", func(t *testing.T) { - k, n := 100, 10 - sketch, err := NewReservoirItemsSketch[int64](k) - assert.NoError(t, err) - - for i := int64(1); i <= int64(n); i++ { - sketch.Update(i) + // ========== ReservoirItemsSketch (9 files) ========== + t.Run("items_long", func(t *testing.T) { + // Empty + t.Run("empty_k128", func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[int64](128) + data, _ := sketch.ToSlice(Int64SerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_long_empty_k128_go.sk", internal.GoPath), data, 0644) + }) + // Exact + for _, n := range exactNValues { + n := n + t.Run(fmt.Sprintf("exact_n%d_k128", n), func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[int64](128) + for i := int64(1); i <= int64(n); i++ { + sketch.Update(i) + } + data, _ := sketch.ToSlice(Int64SerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_long_exact_n%d_k128_go.sk", internal.GoPath, n), data, 0644) + }) + } + // Sampling + for _, k := range samplingKValues { + k := k + t.Run(fmt.Sprintf("sampling_n1000_k%d", k), func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[int64](k) + for i := int64(1); i <= 1000; i++ { + sketch.Update(i) + } + data, _ := sketch.ToSlice(Int64SerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_long_sampling_n1000_k%d_go.sk", internal.GoPath, k), data, 0644) + }) } - - data, err := sketch.ToSlice(Int64SerDe{}) - assert.NoError(t, err) - - filename := fmt.Sprintf("%s/reservoir_long_n%d_k%d_go.sk", internal.GoPath, n, k) - err = os.WriteFile(filename, data, 0644) - assert.NoError(t, err) - t.Logf("Generated: %s (%d bytes)", filename, len(data)) }) - t.Run("reservoir at k", func(t *testing.T) { - k, n := 10, 10 - sketch, err := NewReservoirItemsSketch[int64](k) - assert.NoError(t, err) - - for i := int64(1); i <= int64(n); i++ { - sketch.Update(i) + // ========== ReservoirItemsSketch (9 files) ========== + t.Run("items_double", func(t *testing.T) { + // Empty + t.Run("empty_k128", func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[float64](128) + data, _ := sketch.ToSlice(Float64SerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_double_empty_k128_go.sk", internal.GoPath), data, 0644) + }) + // Exact + for _, n := range exactNValues { + n := n + t.Run(fmt.Sprintf("exact_n%d_k128", n), func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[float64](128) + for i := 1; i <= n; i++ { + sketch.Update(float64(i)) + } + data, _ := sketch.ToSlice(Float64SerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_double_exact_n%d_k128_go.sk", internal.GoPath, n), data, 0644) + }) + } + // Sampling + for _, k := range samplingKValues { + k := k + t.Run(fmt.Sprintf("sampling_n1000_k%d", k), func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[float64](k) + for i := 1; i <= 1000; i++ { + sketch.Update(float64(i)) + } + data, _ := sketch.ToSlice(Float64SerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_double_sampling_n1000_k%d_go.sk", internal.GoPath, k), data, 0644) + }) } - - data, err := sketch.ToSlice(Int64SerDe{}) - assert.NoError(t, err) - - filename := fmt.Sprintf("%s/reservoir_long_n%d_k%d_go.sk", internal.GoPath, n, k) - err = os.WriteFile(filename, data, 0644) - assert.NoError(t, err) - t.Logf("Generated: %s (%d bytes)", filename, len(data)) }) - t.Run("reservoir with sampling", func(t *testing.T) { - k, n := 10, 100 - sketch, err := NewReservoirItemsSketch[int64](k) - assert.NoError(t, err) - - for i := int64(1); i <= int64(n); i++ { - sketch.Update(i) + // ========== ReservoirItemsSketch (9 files) ========== + t.Run("items_string", func(t *testing.T) { + // Empty + t.Run("empty_k128", func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[string](128) + data, _ := sketch.ToSlice(StringSerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_string_empty_k128_go.sk", internal.GoPath), data, 0644) + }) + // Exact + for _, n := range exactNValues { + n := n + t.Run(fmt.Sprintf("exact_n%d_k128", n), func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[string](128) + for i := 1; i <= n; i++ { + sketch.Update(fmt.Sprintf("item%d", i)) + } + data, _ := sketch.ToSlice(StringSerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_string_exact_n%d_k128_go.sk", internal.GoPath, n), data, 0644) + }) + } + // Sampling + for _, k := range samplingKValues { + k := k + t.Run(fmt.Sprintf("sampling_n1000_k%d", k), func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[string](k) + for i := 1; i <= 1000; i++ { + sketch.Update(fmt.Sprintf("item%d", i)) + } + data, _ := sketch.ToSlice(StringSerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_string_sampling_n1000_k%d_go.sk", internal.GoPath, k), data, 0644) + }) } - - data, err := sketch.ToSlice(Int64SerDe{}) - assert.NoError(t, err) - - filename := fmt.Sprintf("%s/reservoir_long_n%d_k%d_go.sk", internal.GoPath, n, k) - err = os.WriteFile(filename, data, 0644) - assert.NoError(t, err) - t.Logf("Generated: %s (%d bytes)", filename, len(data)) }) } // TestSerializationCompatibilityEmpty tests deserialization of an empty sketch. func TestSerializationCompatibilityEmpty(t *testing.T) { - data, err := os.ReadFile(filepath.Join(internal.GoPath, "reservoir_long_n0_k10_go.sk")) + data, err := os.ReadFile(filepath.Join(internal.GoPath, "reservoir_items_long_empty_k128_go.sk")) assert.NoError(t, err) sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) assert.NoError(t, err) assert.True(t, sketch.IsEmpty()) - assert.Equal(t, 10, sketch.K()) + assert.Equal(t, 128, sketch.K()) assert.Equal(t, int64(0), sketch.N()) } -// TestSerializationCompatibilityBelowK tests deserialization of a sketch with items below k. -func TestSerializationCompatibilityBelowK(t *testing.T) { - data, err := os.ReadFile(filepath.Join(internal.GoPath, "reservoir_long_n10_k100_go.sk")) - assert.NoError(t, err) - - sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) - assert.NoError(t, err) - assert.Equal(t, 100, sketch.K()) - assert.Equal(t, int64(10), sketch.N()) - assert.Equal(t, 10, sketch.NumSamples()) -} +// TestSerializationCompatibilityExact tests deserialization of sketches in exact mode. +func TestSerializationCompatibilityExact(t *testing.T) { + testCases := []struct { + filename string + k int + n int64 + }{ + {"reservoir_items_long_exact_n1_k128_go.sk", 128, 1}, + {"reservoir_items_long_exact_n10_k128_go.sk", 128, 10}, + {"reservoir_items_long_exact_n32_k128_go.sk", 128, 32}, + {"reservoir_items_long_exact_n100_k128_go.sk", 128, 100}, + {"reservoir_items_long_exact_n128_k128_go.sk", 128, 128}, + } -// TestSerializationCompatibilityAtK tests deserialization of a sketch at capacity. -func TestSerializationCompatibilityAtK(t *testing.T) { - data, err := os.ReadFile(filepath.Join(internal.GoPath, "reservoir_long_n10_k10_go.sk")) - assert.NoError(t, err) + for _, tc := range testCases { + t.Run(tc.filename, func(t *testing.T) { + data, err := os.ReadFile(filepath.Join(internal.GoPath, tc.filename)) + assert.NoError(t, err) - sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) - assert.NoError(t, err) - assert.Equal(t, 10, sketch.K()) - assert.Equal(t, int64(10), sketch.N()) - assert.Equal(t, 10, sketch.NumSamples()) + sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, tc.k, sketch.K()) + assert.Equal(t, tc.n, sketch.N()) + assert.Equal(t, int(tc.n), sketch.NumSamples()) + }) + } } -// TestSerializationCompatibilityWithSampling tests deserialization of a sketch with sampling. +// TestSerializationCompatibilityWithSampling tests deserialization of sketches in sampling mode. func TestSerializationCompatibilityWithSampling(t *testing.T) { - data, err := os.ReadFile(filepath.Join(internal.GoPath, "reservoir_long_n100_k10_go.sk")) - assert.NoError(t, err) + testCases := []struct { + filename string + k int + n int64 + }{ + {"reservoir_items_long_sampling_n1000_k32_go.sk", 32, 1000}, + {"reservoir_items_long_sampling_n1000_k64_go.sk", 64, 1000}, + {"reservoir_items_long_sampling_n1000_k128_go.sk", 128, 1000}, + } - sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) - assert.NoError(t, err) - assert.Equal(t, 10, sketch.K()) - assert.Equal(t, int64(100), sketch.N()) - assert.Equal(t, 10, sketch.NumSamples()) // Only k items kept after sampling + for _, tc := range testCases { + t.Run(tc.filename, func(t *testing.T) { + data, err := os.ReadFile(filepath.Join(internal.GoPath, tc.filename)) + assert.NoError(t, err) + + sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, tc.k, sketch.K()) + assert.Equal(t, tc.n, sketch.N()) + assert.Equal(t, tc.k, sketch.NumSamples()) // Only k items kept after sampling + }) + } } // TestSerializationRoundTrip tests serialization and deserialization round-trip. diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_empty_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_empty_k128_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..8bff7be517ff23d8742141fce1bd166805cf4a8a GIT binary patch literal 8 PcmZQ%;$~=IU|;|M0$>1- literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_exact_n100_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n100_k128_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..c5c75c349f29c0d7e66789f80b4f80f810502ba4 GIT binary patch literal 824 zcmXxZH%die6ot`^HiC_Xg@uKPh=_$~QhbMHXT(EcGg1eLoya9Dio;i1t~lCR(Y zyMO!o9`vT`9pMS|UJKW_S_j$kpyWV0B zHFKnyW6hjs=2SChnmO0Zg=Q`_bETPU&D?0_Rx@{+x!24?-yMsMd9<_Gm?t}njd`}S f*q9ePi;a1;v)Gt7JBy8Zx3k!o4?ByE`RslG(RE7k literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_exact_n10_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n10_k128_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..1b0a059726b633a0af0b0629db49814df571a4af GIT binary patch literal 104 ycmZQ(;$~=IU|`?^Vi17Q5Z(uS2+iOCp*f(m0F)Mi(h^Ww21+YHX(cGF;s5~ivjo}z literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_exact_n128_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n128_k128_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..d32659473d52ee2d74002f7d8a819bcd7048d85f GIT binary patch literal 1048 zcmX}hH%^{m5QWhzXe`lCP*Au)$U+uEh>XF6kDQZD&P$o4y-Q*Vg#UfMgHLum-&|~T;&?q z+4U@DLo=J2+0x9mW_C2QtC>B`>}%#gGl!Zv(#)}DPBe3>nKR9t_qt=TF&B0g8}rZ3 zVq^Z>S!~RuoyEpn*;#DNwVlPr+}K%c%&ncp#@yLiY|Oo##l}3?S!~RsoyEpH*;#DN Pvz^7pyx3W6%xm`!DWqn= literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_exact_n1_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n1_k128_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..5a37e16779ea7ffacb73384033944b82c47de285 GIT binary patch literal 32 YcmZQ(;$~=IU|?VbVi17Q5Z(uS01{6D!Ts)JoD+G ohdu@vqQVGcOfba^b1bmL3Ttez#SVKMaKs5`TyVt=chqRi1+nlC5C8xG literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k128_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..107ec6bbf53d663656f64ef44cb31ed75276baf5 GIT binary patch literal 1048 zcmXw&DNh4I7=|YxG5mxTh{_Fwq>eEpC;|zhLBNuba3qA~ObOStT;*Q5a%JU8L%L^CgD*JAxiEZ@bU z6T^{c7C1i_9e%cr#-3eCn^+8-n?>wPVtIpJ7Q?MLx)(F*HRu!_(@%6DzXA3U`OS*# zeqRh1Vou)_yv(t5hq}U{^akr|Rdok-eucPbuTlSJs_o(WZzQ(V*XEwz#`P`Yby%RI~&B)tiexCjI z*_Y-12){1z*M7)@y=0z#i}?7Z*vI%6%rkt;&r?sq+>)w)>=AqJvJac+UJXBA^+$Z) z(0TMr9pulcCn2uqyNIg$+wxDV_l)>4Y_vIN-h(gcK6x8ed>B{r<~=E~d-{*4!|o`K TgW*v+`4H_c=di{c{#y1Q6xW-+ literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k32_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k32_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..43e2636ed1b63976452ef00d167e82834e46a32c GIT binary patch literal 280 zcmZQ(;$~1_U|@K`3?zU+0Zc-PhBgN<%b^}ZE0jR!1?3Q0qa8w@fYJ}@Abf!u2yFn> zZ_y3mdo)1k2Ne){K{JHzfY}oS;Tu#!=mu{Hy#uPhpa8<3(F>syIw16gRtVit4WUm! i%}pqV@D*~P>dGMWhEfRq0&4Gv76|`GBZRhSaR2}cSt$?z literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k64_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k64_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..9df39223bceb06d287983020a36acc6d0dfb5c07 GIT binary patch literal 536 zcmXZYF-`(O6vpw1G$uBbl+;#Sz+{d<5E3;pnMk1ogNvw<0v!-B5P>Z%r=;Wva|Dhs zCFLFg;{V&?{qnsxJG(QblQUzDG5gZ793T5%RqP#cJHpBY{WZoWx-YyMV_2b8-`tS% zKBg&}bNu_pjrz%vcMFUyEVQl|YW{<%ht+4a4cSGXVS2(s|6wjPf5EEu^S5LxE1v8+ zXeJnSzIV-c+PC(>Jhahi|MW>t_qd;7@rH4U*&OS-4}VFv>Sy|1c+h&?OFkgyI!AaV ThexURPS@o4i%$3EWqE23t|U)8 literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_long_empty_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_long_empty_k128_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..8bff7be517ff23d8742141fce1bd166805cf4a8a GIT binary patch literal 8 PcmZQ%;$~=IU|;|M0$>1- literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_long_exact_n100_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_long_exact_n100_k128_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..1b6e11d3331b660cc3b5c65270f5180a83b3839c GIT binary patch literal 824 zcmXxj1yTY~00hu4*cb?Qi-m#RiGq#Yod@yg>iF21oi}@cjYeYOJB0A{+rJbo`JW-e zC`(z!a#paCRjg)=HLPVF>ltSQ6HKy^O>AZhTiM2TcCeFO>}C&p*~b+7Ilw^C#F`N;MF`or2WD$#5 z!cvy8oS#|2N>=d;t69UZtmQY>v7QZVWD}d&!dAAio!{BPPIj@IJ?v#4`#Hct4snCyo{o}dY|s4F`+k)3;1Q;W3NR%#CNe% zZ_0cLudy4q^zZbe{@0#7ftMK7+vnTZEq}&U_dncQ!3O;z9Ml`eCwbDHl)TCQn|VKG zFb})1S-;l)yL`vpc6kP$&C}Rw|C~?oxcx=>A)d)!utWdC{98WbdzE|vPw*aJ?N{hu z;*PwYAL$qH4fA^(z?j~t|FbSHvsWyi=DmE}-XPENF};L4bLJzwkZ9Y4TtF`)FGcph-JMl&B1S|W z5rbkWh(EKUKZLcItt zphI=mwVN)|V4e*Bm-l|+&s;f{l6jfI_*zaCVUySeTmQHz}(pWm3|`^ z{0Dy|EB(8N<%e>_-{TyA+E4X0e&K6*!ymi1- literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_exact_n100_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n100_k128_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..4b6d4434c7eb0aefe9cbacbc0cb21ed0f69792d5 GIT binary patch literal 1016 zcmX}rF-}A=3_!sz5KwTLUB^zG`#?cK#{oDxOOc6W`Bk1O&rk2y?{!@tkNJORzdZWu z=kxo1B2J7Gd7@6t6YE5u*stf{P2E+fp;DEaYN=L?U<4x=!3ah$f)R{h1Y-$Gn&zi&Df01*o@8CjLq1L&Di_RAH4km=!4UW literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_exact_n10_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n10_k128_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..a659f5057eca96f7dedc39824d77c571482bc4fb GIT binary patch literal 115 zcmZQ(;$~=IU|`?^Vi17QtUz{VNouYkj9~<07{eGQFor3NVFqKE!x$DYh9w)!Km!1r C;S=lt literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_exact_n128_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n128_k128_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..1831ea8d3994b8400af1c1a1835cefe64a621083 GIT binary patch literal 1324 zcmYMzJ5D4(3`Nn_pcw)-)Bd{_L@)sv3t;IWq`EJ#d?TO8FMr-1UysM{{4Dn0pZ@>( z{$V%#M%>67b)#?0jlFUH-GficD5ru-s;H)hS_;E49K$gj!!aDgF&x7&93wCSBQOFZ zFajem0wXX2BQX*qF%ly&5+gAZBQX-AFbbnE3ZpOzqc94iFbbnF8ly29qcIwzF&d*W z8e=d9V=x9|Fa~2V24gS=V=)$EF&1Mn7Gp6MV=)%vU>uBtaWD?X!8jNP<6xZg$`3x3 vrgUW}Q(4MZj&haKOqxkEX(r92nKYAT(oC93b7?NkrMWbh=F(i6OY{8(5n&WB literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_exact_n1_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n1_k128_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..4c935a5d0f9a4394872c118f391bc1413581f427 GIT binary patch literal 33 ccmZQ(;$~=IU|?VbVi17QtUz{VNouYk02c=WcmMzZ literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_exact_n32_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n32_k128_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..2cccdc3c4d5f80c84859caa001014bd7a2563ab6 GIT binary patch literal 335 zcmX}lxeb6o3`9{21PM)00*pD9f(Qu6D1g#Ar1>j({UZOm>f(-w{+;jj{~yu3`?&UV yG-RYP8MCn%t5J-t^}zv+NGQ>SX0)Ic6+}ZcL_;)0Lo`G~G(`)TvcbCyP_&?~IV$u<6GJ)8nWXFKCjd=_8fVyMfI@*g6&S+eu7qNy z!kKo0%q&jXgdao0oEv6bs~)tgLNy7pO7zg&iYZbAhF+*8UPnwJKY0k0&W)hm;8HaD zQbno8&d`Bivr4y`>r=!k&qdSDD6VxP3_9CX*aS$yqi!t9aO;95hN7Xi)_ zoA*QV&Y8xXgg1c*C}Ic@_~?ev=)O~#2P&xJz=&cz&BG;3Guo$=J_2{l5GpSK(f$LZ CCMb6R literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k32_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k32_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..ea9d9f028a1ce3d763c221051a0536f0dce0cc91 GIT binary patch literal 372 zcmYL^OA-Px2t+BT)P3bi;Oe&`w0(ZIyNQeD&MVNYMgRx#hT>S&Xo>#Czw?1|$&1f^XUhDaaKA fkPfMV5u$-9?duqqdtB~?Ibp^I*4x33tkqP0f1FCL literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k64_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k64_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..08b8ec9b96dafcebc8bc128490518da0b26960a5 GIT binary patch literal 720 zcmYjPF>(Yk2=nA-E=4|LKw#+ZGbvJBmq+Ir*~tWPTX;1#2ub$s?PERGT0ggIxxUBW zy4L&k|N8!XDzc$D8s@2nQq~YS4b|fQ&v544q|ZH~q2z9u!*vWzAVUz&O=nXQ#!MB; zj;&n^uv9j+!YNBcdqB}FCW1Jm1z6yqnJm&)62fWmfDpiAVh{kaK_C%AG5AAi2`DWLr6r+s0F)Mm((X{26H3cKX#kEu0!{z` diff --git a/serialization_test_data/go_generated_files/reservoir_long_n10_k10_go.sk b/serialization_test_data/go_generated_files/reservoir_long_n10_k10_go.sk deleted file mode 100644 index a66ca3bb62b93e0e6aecc8609feccc384d90d25f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 104 wcmZQ(;%4AtU;twv1*aK793WtVPz=mangvR;LTNTA%?_nGpfo3x2AK^402s&sW&i*H From a9c736c7f654b19fb2b33145b1d5d061c92cbfbd Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Tue, 13 Jan 2026 18:11:05 -0500 Subject: [PATCH 9/9] fix(sampling): align serialization format with Java for cross-language compatibility - Update ToSlice to produce Java-compatible binary format: - Use 2-long (16 bytes) preamble instead of 3-long (24 bytes) - Encode ResizeFactor X8 in high 2 bits of byte 0 - Set EMPTY flag (0x04) in byte 3 for empty sketches - Remove explicit numSamples storage (implicit as min(n, k)) - Simplify NewReservoirItemsSketchFromSlice to only support Java format - Add 27 Java-generated .sk files for cross-language compatibility tests - Update Go-generated .sk files to match Java format exactly - Update test generator to use 0-based indexing like Java Verified: Go and Java produce identical bytes for empty and exact mode sketches --- sampling/compatibility_test.go | 17 +++---- sampling/reservoir_items_sketch.go | 43 ++++++++---------- .../reservoir_items_double_empty_k128_go.sk | Bin 8 -> 8 bytes ...servoir_items_double_exact_n100_k128_go.sk | Bin 824 -> 816 bytes ...eservoir_items_double_exact_n10_k128_go.sk | Bin 104 -> 96 bytes ...servoir_items_double_exact_n128_k128_go.sk | Bin 1048 -> 1040 bytes ...reservoir_items_double_exact_n1_k128_go.sk | Bin 32 -> 24 bytes ...eservoir_items_double_exact_n32_k128_go.sk | Bin 280 -> 272 bytes ...oir_items_double_sampling_n1000_k128_go.sk | Bin 1048 -> 1040 bytes ...voir_items_double_sampling_n1000_k32_go.sk | Bin 280 -> 272 bytes ...voir_items_double_sampling_n1000_k64_go.sk | Bin 536 -> 528 bytes .../reservoir_items_long_empty_k128_go.sk | Bin 8 -> 8 bytes ...reservoir_items_long_exact_n100_k128_go.sk | Bin 824 -> 816 bytes .../reservoir_items_long_exact_n10_k128_go.sk | Bin 104 -> 96 bytes ...reservoir_items_long_exact_n128_k128_go.sk | Bin 1048 -> 1040 bytes .../reservoir_items_long_exact_n1_k128_go.sk | Bin 32 -> 24 bytes .../reservoir_items_long_exact_n32_k128_go.sk | Bin 280 -> 272 bytes ...rvoir_items_long_sampling_n1000_k128_go.sk | Bin 1048 -> 1040 bytes ...ervoir_items_long_sampling_n1000_k32_go.sk | Bin 280 -> 272 bytes ...ervoir_items_long_sampling_n1000_k64_go.sk | Bin 536 -> 528 bytes .../reservoir_items_string_empty_k128_go.sk | Bin 8 -> 8 bytes ...servoir_items_string_exact_n100_k128_go.sk | Bin 1016 -> 1006 bytes ...eservoir_items_string_exact_n10_k128_go.sk | Bin 115 -> 106 bytes ...servoir_items_string_exact_n128_k128_go.sk | Bin 1324 -> 1314 bytes ...reservoir_items_string_exact_n1_k128_go.sk | Bin 33 -> 25 bytes ...eservoir_items_string_exact_n32_k128_go.sk | Bin 335 -> 326 bytes ...oir_items_string_sampling_n1000_k128_go.sk | Bin 1415 -> 1410 bytes ...voir_items_string_sampling_n1000_k32_go.sk | Bin 372 -> 365 bytes ...voir_items_string_sampling_n1000_k64_go.sk | Bin 720 -> 713 bytes .../reservoir_items_double_empty_k128_java.sk | Bin 0 -> 8 bytes ...rvoir_items_double_exact_n100_k128_java.sk | Bin 0 -> 816 bytes ...ervoir_items_double_exact_n10_k128_java.sk | Bin 0 -> 96 bytes ...rvoir_items_double_exact_n128_k128_java.sk | Bin 0 -> 1040 bytes ...servoir_items_double_exact_n1_k128_java.sk | Bin 0 -> 24 bytes ...ervoir_items_double_exact_n32_k128_java.sk | Bin 0 -> 272 bytes ...r_items_double_sampling_n1000_k128_java.sk | Bin 0 -> 1040 bytes ...ir_items_double_sampling_n1000_k32_java.sk | Bin 0 -> 272 bytes ...ir_items_double_sampling_n1000_k64_java.sk | Bin 0 -> 528 bytes .../reservoir_items_long_empty_k128_java.sk | Bin 0 -> 8 bytes ...servoir_items_long_exact_n100_k128_java.sk | Bin 0 -> 816 bytes ...eservoir_items_long_exact_n10_k128_java.sk | Bin 0 -> 96 bytes ...servoir_items_long_exact_n128_k128_java.sk | Bin 0 -> 1040 bytes ...reservoir_items_long_exact_n1_k128_java.sk | Bin 0 -> 24 bytes ...eservoir_items_long_exact_n32_k128_java.sk | Bin 0 -> 272 bytes ...oir_items_long_sampling_n1000_k128_java.sk | Bin 0 -> 1040 bytes ...voir_items_long_sampling_n1000_k32_java.sk | Bin 0 -> 272 bytes ...voir_items_long_sampling_n1000_k64_java.sk | Bin 0 -> 528 bytes .../reservoir_items_string_empty_k128_java.sk | Bin 0 -> 8 bytes ...rvoir_items_string_exact_n100_k128_java.sk | Bin 0 -> 1006 bytes ...ervoir_items_string_exact_n10_k128_java.sk | Bin 0 -> 106 bytes ...rvoir_items_string_exact_n128_k128_java.sk | Bin 0 -> 1314 bytes ...servoir_items_string_exact_n1_k128_java.sk | Bin 0 -> 25 bytes ...ervoir_items_string_exact_n32_k128_java.sk | Bin 0 -> 326 bytes ...r_items_string_sampling_n1000_k128_java.sk | Bin 0 -> 1369 bytes ...ir_items_string_sampling_n1000_k32_java.sk | Bin 0 -> 331 bytes ...ir_items_string_sampling_n1000_k64_java.sk | Bin 0 -> 665 bytes 56 files changed, 28 insertions(+), 32 deletions(-) create mode 100644 serialization_test_data/java_generated_files/reservoir_items_double_empty_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_double_exact_n100_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_double_exact_n10_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_double_exact_n128_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_double_exact_n1_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_double_exact_n32_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_double_sampling_n1000_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_double_sampling_n1000_k32_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_double_sampling_n1000_k64_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_long_empty_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_long_exact_n100_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_long_exact_n10_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_long_exact_n128_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_long_exact_n1_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_long_exact_n32_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_long_sampling_n1000_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_long_sampling_n1000_k32_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_long_sampling_n1000_k64_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_string_empty_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_string_exact_n100_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_string_exact_n10_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_string_exact_n128_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_string_exact_n1_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_string_exact_n32_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_string_sampling_n1000_k128_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_string_sampling_n1000_k32_java.sk create mode 100644 serialization_test_data/java_generated_files/reservoir_items_string_sampling_n1000_k64_java.sk diff --git a/sampling/compatibility_test.go b/sampling/compatibility_test.go index e0ccdfb..237c0d6 100644 --- a/sampling/compatibility_test.go +++ b/sampling/compatibility_test.go @@ -57,7 +57,7 @@ func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { n := n t.Run(fmt.Sprintf("exact_n%d_k128", n), func(t *testing.T) { sketch, _ := NewReservoirItemsSketch[int64](128) - for i := int64(1); i <= int64(n); i++ { + for i := int64(0); i < int64(n); i++ { sketch.Update(i) } data, _ := sketch.ToSlice(Int64SerDe{}) @@ -69,7 +69,7 @@ func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { k := k t.Run(fmt.Sprintf("sampling_n1000_k%d", k), func(t *testing.T) { sketch, _ := NewReservoirItemsSketch[int64](k) - for i := int64(1); i <= 1000; i++ { + for i := int64(0); i < 1000; i++ { sketch.Update(i) } data, _ := sketch.ToSlice(Int64SerDe{}) @@ -91,7 +91,7 @@ func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { n := n t.Run(fmt.Sprintf("exact_n%d_k128", n), func(t *testing.T) { sketch, _ := NewReservoirItemsSketch[float64](128) - for i := 1; i <= n; i++ { + for i := 0; i < n; i++ { sketch.Update(float64(i)) } data, _ := sketch.ToSlice(Float64SerDe{}) @@ -103,7 +103,7 @@ func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { k := k t.Run(fmt.Sprintf("sampling_n1000_k%d", k), func(t *testing.T) { sketch, _ := NewReservoirItemsSketch[float64](k) - for i := 1; i <= 1000; i++ { + for i := 0; i < 1000; i++ { sketch.Update(float64(i)) } data, _ := sketch.ToSlice(Float64SerDe{}) @@ -125,7 +125,7 @@ func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { n := n t.Run(fmt.Sprintf("exact_n%d_k128", n), func(t *testing.T) { sketch, _ := NewReservoirItemsSketch[string](128) - for i := 1; i <= n; i++ { + for i := 0; i < n; i++ { sketch.Update(fmt.Sprintf("item%d", i)) } data, _ := sketch.ToSlice(StringSerDe{}) @@ -137,7 +137,7 @@ func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { k := k t.Run(fmt.Sprintf("sampling_n1000_k%d", k), func(t *testing.T) { sketch, _ := NewReservoirItemsSketch[string](k) - for i := 1; i <= 1000; i++ { + for i := 0; i < 1000; i++ { sketch.Update(fmt.Sprintf("item%d", i)) } data, _ := sketch.ToSlice(StringSerDe{}) @@ -225,8 +225,9 @@ func TestSerializationRoundTrip(t *testing.T) { data, err := sketch.ToSlice(Int64SerDe{}) assert.NoError(t, err) - // Verify preamble structure - assert.Equal(t, byte(3), data[0]) // preamble_longs = 3 for non-empty + // Verify preamble structure (Java-compatible format) + // Byte 0: 0xC0 (ResizeFactor X8) | 0x02 (preamble_longs) = 0xC2 + assert.Equal(t, byte(0xC2), data[0]) // preamble_longs = 2 for non-empty + ResizeFactor bits assert.Equal(t, byte(2), data[1]) // serVer = 2 assert.Equal(t, byte(internal.FamilyEnum.ReservoirItems.Id), data[2]) // familyID diff --git a/sampling/reservoir_items_sketch.go b/sampling/reservoir_items_sketch.go index 32d6aba..e642f6a 100644 --- a/sampling/reservoir_items_sketch.go +++ b/sampling/reservoir_items_sketch.go @@ -115,61 +115,57 @@ func (s *ReservoirItemsSketch[T]) Reset() { s.data = s.data[:0] } -// Preamble constants for serialization +// Serialization constants const ( - preambleIntsShort = 1 // empty sketch - preambleIntsLong = 3 // non-empty sketch - serVer = 2 // serialization version + preambleIntsEmpty = 1 + preambleIntsNonEmpty = 2 + serVer = 2 + flagEmpty = 0x04 + resizeFactorBits = 0xC0 // ResizeFactor X8 ) -// ToSlice serializes the sketch to a byte slice using the provided SerDe. +// ToSlice serializes the sketch to a byte slice. func (s *ReservoirItemsSketch[T]) ToSlice(serde ItemsSerDe[T]) ([]byte, error) { if s.IsEmpty() { - // Empty sketch: minimal preamble buf := make([]byte, 8) - buf[0] = preambleIntsShort + buf[0] = resizeFactorBits | preambleIntsEmpty buf[1] = serVer buf[2] = byte(internal.FamilyEnum.ReservoirItems.Id) - // bytes 4-7: k as int32 + buf[3] = flagEmpty binary.LittleEndian.PutUint32(buf[4:], uint32(s.k)) return buf, nil } - // Serialize items itemsBytes, err := serde.SerializeToBytes(s.data) if err != nil { return nil, err } - // Non-empty sketch - preambleBytes := preambleIntsLong * 8 + preambleBytes := preambleIntsNonEmpty * 8 buf := make([]byte, preambleBytes+len(itemsBytes)) - // Preamble - buf[0] = preambleIntsLong + buf[0] = resizeFactorBits | preambleIntsNonEmpty buf[1] = serVer buf[2] = byte(internal.FamilyEnum.ReservoirItems.Id) - // byte 3: reserved + buf[3] = 0 binary.LittleEndian.PutUint32(buf[4:], uint32(s.k)) binary.LittleEndian.PutUint64(buf[8:], uint64(s.n)) - binary.LittleEndian.PutUint32(buf[16:], uint32(len(s.data))) - // bytes 20-23 reserved - // Data copy(buf[preambleBytes:], itemsBytes) return buf, nil } -// NewReservoirItemsSketchFromSlice deserializes a sketch from a byte array using the provided SerDe. +// NewReservoirItemsSketchFromSlice deserializes a sketch from a byte slice. func NewReservoirItemsSketchFromSlice[T any](data []byte, serde ItemsSerDe[T]) (*ReservoirItemsSketch[T], error) { if len(data) < 8 { return nil, errors.New("data too short") } - preambleInts := int(data[0]) + preambleInts := int(data[0] & 0x3F) ver := data[1] family := data[2] + flags := data[3] k := int(binary.LittleEndian.Uint32(data[4:])) if ver != serVer { @@ -179,19 +175,18 @@ func NewReservoirItemsSketchFromSlice[T any](data []byte, serde ItemsSerDe[T]) ( return nil, errors.New("wrong sketch family") } - if preambleInts == preambleIntsShort { - // Empty sketch + if (flags&flagEmpty) != 0 || preambleInts == preambleIntsEmpty { return NewReservoirItemsSketch[T](k) } - if len(data) < preambleIntsLong*8 { + preambleBytes := preambleIntsNonEmpty * 8 + if len(data) < preambleBytes { return nil, errors.New("data too short for non-empty sketch") } n := int64(binary.LittleEndian.Uint64(data[8:])) - numSamples := int(binary.LittleEndian.Uint32(data[16:])) + numSamples := int(min(n, int64(k))) - preambleBytes := preambleIntsLong * 8 itemsData := data[preambleBytes:] items, err := serde.DeserializeFromBytes(itemsData, numSamples) diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_empty_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_empty_k128_go.sk index 8bff7be517ff23d8742141fce1bd166805cf4a8a..cfe18e23bd29ea9aaa4b23a6f540762b76342225 100644 GIT binary patch literal 8 PcmX@e#Ld#cz`y_i2y_8c literal 8 PcmZQ%;$~=IU|;|M0$>1- diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_exact_n100_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n100_k128_go.sk index c5c75c349f29c0d7e66789f80b4f80f810502ba4..90cf41e535d93edf35dc99a06405c6196de4367c 100644 GIT binary patch delta 26 ccmdnNwt-FX5EC~;0|Ns?3J`Py+)C6My diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_exact_n10_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n10_k128_go.sk index 1b0a059726b633a0af0b0629db49814df571a4af..691a9e0578e64032df58ed1c971bd46522a61e47 100644 GIT binary patch delta 23 Zcmc~u5In@h&CtNWz`zB>U@%e89{?@H10(&+~{rdm!&6_uGreA%7#u;O}{eNHnla;N0=41cN-=y;^vH2usM`E%~{6I|0 zV)q6AUJR>Zb}c4*VzDls-HHwJ?5lKeCx#o4^)Y_sSiYr>p|3_oes(Ix^kGJ%&8QgR zC!Evm$aj`J;`UK`+vklFpAZY2H)21BPQ1^2f(zxlN!CxH;m?h9%zR?z(Ze_EiWj+u zwv!I%FJeBebNObP{C%-W{|5b2lmAWqG?vb(bM`Df#Ql`~JZ9vloGYZSY)yWNdbTFr zpX;?!!Q!QKS`$rK4Cuej`7-~`w-mR_;wSIto4f+^3Yddi cQC{x*-IVT8&oDQ``cM8>c2#+G=F+O#UwOEavj6}9 literal 1048 zcmXw&DNh4I7=|YxG5mxTh{_Fwq>eEpC;|zhLBNuba3qA~ObOStT;*Q5a%JU8L%L^CgD*JAxiEZ@bU z6T^{c7C1i_9e%cr#-3eCn^+8-n?>wPVtIpJ7Q?MLx)(F*HRu!_(@%6DzXA3U`OS*# zeqRh1Vou)_yv(t5hq}U{^akr|Rdok-eucPbuTlSJs_o(WZzQ(V*XEwz#`P`Yby%RI~&B)tiexCjI z*_Y-12){1z*M7)@y=0z#i}?7Z*vI%6%rkt;&r?sq+>)w)>=AqJvJac+UJXBA^+$Z) z(0TMr9pulcCn2uqyNIg$+wxDV_l)>4Y_vIN-h(gcK6x8ed>B{r<~=E~d-{*4!|o`K TgW*v+`4H_c=di{c{#y1Q6xW-+ diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k32_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k32_go.sk index 43e2636ed1b63976452ef00d167e82834e46a32c..6b36222d186dba9be0dc69adbdb3dcec6318f6df 100644 GIT binary patch literal 272 zcmX@a#Lb|U_zeC~bx`;0fZD$x*#Q80eI+yi literal 280 zcmZQ(;$~1_U|@K`3?zU+0Zc-PhBgN<%b^}ZE0jR!1?3Q0qa8w@fYJ}@Abf!u2yFn> zZ_y3mdo)1k2Ne){K{JHzfY}oS;Tu#!=mu{Hy#uPhpa8<3(F>syIw16gRtVit4WUm! i%}pqV@D*~P>dGMWhEfRq0&4Gv76|`GBZRhSaR2}cSt$?z diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k64_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k64_go.sk index 9df39223bceb06d287983020a36acc6d0dfb5c07..05fdf31383f247f294f4828a8df9470b78281232 100644 GIT binary patch literal 528 zcmXxgJ8Hu~6ouhS(g?DJ6e?Y61!0OnRHbv{#=;?B0x^bwoe*QjvYq#930ZbFzgypZP~=oe@wSZL0)q;@wL zZqcqWKBLzh^^`jC7)tG*;#hMOnrowTwr1bXFl+y(=JGjtb%*&QM%@b!)WsEk>;99z VFBIfI>KA9!p^yF?zuJBmZVzvYPe}j( literal 536 zcmXZYF-`(O6vpw1G$uBbl+;#Sz+{d<5E3;pnMk1ogNvw<0v!-B5P>Z%r=;Wva|Dhs zCFLFg;{V&?{qnsxJG(QblQUzDG5gZ793T5%RqP#cJHpBY{WZoWx-YyMV_2b8-`tS% zKBg&}bNu_pjrz%vcMFUyEVQl|YW{<%ht+4a4cSGXVS2(s|6wjPf5EEu^S5LxE1v8+ zXeJnSzIV-c+PC(>Jhahi|MW>t_qd;7@rH4U*&OS-4}VFv>Sy|1c+h&?OFkgyI!AaV ThexURPS@o4i%$3EWqE23t|U)8 diff --git a/serialization_test_data/go_generated_files/reservoir_items_long_empty_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_long_empty_k128_go.sk index 8bff7be517ff23d8742141fce1bd166805cf4a8a..cfe18e23bd29ea9aaa4b23a6f540762b76342225 100644 GIT binary patch literal 8 PcmX@e#Ld#cz`y_i2y_8c literal 8 PcmZQ%;$~=IU|;|M0$>1- diff --git a/serialization_test_data/go_generated_files/reservoir_items_long_exact_n100_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_long_exact_n100_k128_go.sk index 1b6e11d3331b660cc3b5c65270f5180a83b3839c..f4502c0d59e95a6cfbe2a294435b16d243b3909a 100644 GIT binary patch delta 26 ccmdnNwt-FX5EC~;0|Ns?3J`U@%e89{?@H10(v9Ml^-Y4PU4uN6$8gi` zMgN@Vd7Scv=jRqsVGhR^yE`yP3# z`w4x7SK_PvGC#8m^K$(LR$#MTw_V(O1^kB3+xN>)@JN5bbND3oVTb;hFY*lUhujbA z-}yC`%IEPL+x)&BTQL{oSYyBE-79&E`wMuEw>V_K#mD%h-}hh^cG_+8d;5gESRUaA zyxsr%v>%g)?RNCNyjI^R4`Cga>F?Y-lt0J^+?kNi@M*rn^Y}0y!6J9p^j$crfAUV$ Hu3P^P>1ReZ literal 1048 zcmXZWJt%|$9LMp8^D^9a3Rz^n$VQ6gq^_itB%9ZT<&rw48yG~XL-Lx%BAZOag1ihT zNlA>Cyo{o}dY|s4F`+k)3;1Q;W3NR%#CNe% zZ_0cLudy4q^zZbe{@0#7ftMK7+vnTZEq}&U_dncQ!3O;z9Ml`eCwbDHl)TCQn|VKG zFb})1S-;l)yL`vpc6kP$&C}Rw|C~?oxcx=>A)d)!utWdC{98WbdzE|vPw*aJ?N{hu z;*PwYAL$qH4fA^(z?j~t|FbSHvsWyi=DmE}-XPENF};L4bLJzwkZKCIg|zf`^6HR literal 280 zcmXYpF%m#96huEMReF<&GpG~}!70+}oJE@h7+Ss2&>9Y4TtF`)FGcph-JMl&B1S|W z5rbkWh(EKUKZLcItt zphI=mwVN)|V4e*Bm-l|+&s;f{l6jfI_*zaCVUySeTmQHz}(pWm3|`^ z{0Dy|EB(8N<%e>_-{TyA+E4X0e&K6*!ymi1- diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_exact_n100_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n100_k128_go.sk index 4b6d4434c7eb0aefe9cbacbc0cb21ed0f69792d5..85d73bd902769d4b9457476ce30cc01f9618437e 100644 GIT binary patch delta 34 kcmeyt{*GPp5EC~;0|Ns?3J`+;D-dUvq~;oIl(@C_MgRZ+ delta 45 pcmaFI{)1hDnTeaBfq{V`1&BcaMsJk7%FN9U6v!+|%{4SI005`42U!3B diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_exact_n10_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n10_k128_go.sk index a659f5057eca96f7dedc39824d77c571482bc4fb..54b50047bfd61db97d1e291e70a0cd374ab39739 100644 GIT binary patch delta 31 gcmXTUl03x3&CtNWz`zB>AixU5nI);Y1`{O$0ZzmPE&u=k delta 41 kcmc~QmSAS$W@unwVBi8`5P;DWB?GzGfc(so)LcUY08!%vwEzGB diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_exact_n128_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n128_k128_go.sk index 1831ea8d3994b8400af1c1a1835cefe64a621083..c21ebf268f5e1ab85842993d549f64796760387c 100644 GIT binary patch delta 34 jcmZ3(wTMgd5EC~;0|NsX11SbpAkHjF%{AC4!O02$Z}|oW delta 45 ncmZ3)wT4TAnTeaBfq?;xffStHD9Opn%?=dEEJ@8ZG_n8yiLnMh diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_exact_n1_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n1_k128_go.sk index 4c935a5d0f9a4394872c118f391bc1413581f427..28742cadd37a8ee8e538b1708fbf1df83b365dae 100644 GIT binary patch literal 25 acmX@a#Ldvaz`(!=#2~;5#F-_jxds3w36qnTeaBfq{WR0f<3B0ZcM5Y?Rbx74H3v$|s`)TvcbCyP_&?~IV$u<6GJ)8nWXFKCjd=_8fVyMfI@*g6&S+eu7qNy z!kKo0%q&jXgdao0oEv6bs~)tgLNy7pO7zg&iYZbAhF+*8UPnwJKY0k0&W)hm;8HaD zQbno8&d`Bivr4y`>r=!k&qdSDD6VxP3_9CX*aS$yqi!t9aO;95hN7Xi)_ zoA*QV&Y8xXgg1c*C}Ic@_~?ev=)O~#2P&xJz=&cz&BG;3Guo$=J_2{l5GpSK(f$LZ CCMb6R diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k32_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k32_go.sk index ea9d9f028a1ce3d763c221051a0536f0dce0cc91..10ebc00f3e15b89747b336ed4f714721a2663c03 100644 GIT binary patch literal 365 zcmYL^u?<2o3`8T4pkN6`h-07a3`9Xe_b2?*Fa{~`B~qd%>z?hLmq&ZH*1p&EtaD%Y zeY`$qSvX*Y*=L~-Eerz@y%3#R7&VAgAaoR_6UFTI-vPAda7HKRT4yyh%95z4s3bi;Oe&`w0(ZIyNQeD&MVNYMgRx#hT>S&Xo>#Czw?1|$&1f^XUhDaaKA fkPfMV5u$-9?duqqdtB~?Ibp^I*4x33tkqP0f1FCL diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k64_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k64_go.sk index 08b8ec9b96dafcebc8bc128490518da0b26960a5..a4895c8f7261e5a11539707406f73d9b0eb2b7f5 100644 GIT binary patch literal 713 zcmYjOxoyJ$5EE=9DZI@6PnTs9<9R**&fmxR z{62o}G=RTejCBtTPQa#tRXWfe(9(v@fXxWDW*}9Lcj$g=&N$5l(8T@$ DZ;zE5 literal 720 zcmYjPF>(Yk2=nA-E=4|LKw#+ZGbvJBmq+Ir*~tWPTX;1#2ub$s?PERGT0ggIxxUBW zy4L&k|N8!XDzc$D8s@2nQq~YS4b|fQ&v544q|ZH~q2z9u!*vWzAVUz&O=nXQ#!MB; zj;&n^uv9j+!YNBcdqB}FCW1Jm1z6yqnJm&)62fWmfDpQHiqo9oEay1S1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~@`%SAuH%bLi6h+a4m0)9GVPPR6A|hhW;hS^Lc`A2k4@cw>6#uGx@L247zIysn$>0Cw z-}~Nj=ydI6KL!_Rz5RC^KKtoo nh!Mt^pu!Y0%u!>3C01BtgDrO0{KnPg~A@q~Mv*w(s%az zz>idtkHN3+`|J9rAr5ndqx{J+j&p*O{KY9wbB43*ILCP|aFI(~<_cH2#@}4$1~<9I zZSM3fW>+(Nn%URPfoA?_=C5WBHFKnyW6hjs=2SChn)#=h|C%}1%tgOD78`SEXR$F? zb`~3RZD+ADH+B{qb8Ba@F?V(r8*^`Gu`v&J78~iJj8`HD1*m~V3ao=sb literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/reservoir_items_double_sampling_n1000_k32_java.sk b/serialization_test_data/java_generated_files/reservoir_items_double_sampling_n1000_k32_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..fafccd47344c083b040f456f1a8b355e6880a1a2 GIT binary patch literal 272 zcmZX|I}Sil6h+Y+N{v$a3WY);A|fv$*Z4@A+FXsxkc6k>7U#)(e`}4D zB!BheoL@P;nr!icZFbn@C9l|HpVu64$Po+2oN&q;&Uni^&Uw!VKJtkRF8R!tlLzKi mGjEz%X=bgNjb^r*dDqN`W(zBTisnZ0HzU6p?ow=0_f literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/reservoir_items_long_empty_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_long_empty_k128_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..cfe18e23bd29ea9aaa4b23a6f540762b76342225 GIT binary patch literal 8 PcmX@e#Ld#cz`y_i2y_8c literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/reservoir_items_long_exact_n100_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_long_exact_n100_k128_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..f4502c0d59e95a6cfbe2a294435b16d243b3909a GIT binary patch literal 816 zcmYkvRZ@Zh0EN+COhiQ#8wCRcJFyF}8#^#?A+C*;>BD*O&U|y`Rzi{BCkTS?zy7bv z39*_rtYsbR*}yOx8DSHf*}^DW8DksU8D|GO*~M=5u$O&Iu%81=GQ~j-ahM}abChEo z=L9pH#uDH8!B2j%%wNIJ6$$_V literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/reservoir_items_long_exact_n10_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_long_exact_n10_k128_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..d5edac6fac2112b1b77a81a8942b425d04516547 GIT binary patch literal 96 rcmX@a#Ldvaz`(!-#K?dV%3*@i%ut#IO0z;~HYm*wr8%H9CzJ*Ngn$8u literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/reservoir_items_long_exact_n128_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_long_exact_n128_k128_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..6442791de6ad3d8851444628f576bb9087c6156f GIT binary patch literal 1040 zcmX}e1y2BuSDaNs=TV;$?+!0&A24>qxx zE&Rz=wy~WZ>|__a`HMmJu$O)8=Ku#e#9@wblw%y{1SdJgY0hw#bDZY_7rDe`u5guW zT;~Qi`I}qZ<_>qc$9*2~4-a|7zdYs%PkF|3Uhp6P^O7N6@tQZh%vl3iR^(`9T2&q3e8`+V0> zBIpJ|a0~zEZXrLyC}WH>$rRIkV1`-dnCBy(Sl}~@EV0ZAt9)UNbvD@KD_d-{!!CP# qW1j;K`OXo?oN&q+=Ui~f71#XWCpX-3$2|``@{1>a^M_|%_{%@I4-vZn literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_empty_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_empty_k128_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..cfe18e23bd29ea9aaa4b23a6f540762b76342225 GIT binary patch literal 8 PcmX@e#Ld#cz`y_i2y_8c literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_exact_n100_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_exact_n100_k128_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..85d73bd902769d4b9457476ce30cc01f9618437e GIT binary patch literal 1006 zcmX}lu}wrl3`Nl|BuHpx*Pii=`+=XU+|M`AXcNHqtP*as!YAZ%Cf)R{h1S1&12u3i1k&I*{ zBN@p^MlzC-jARVPU<}4!48~v##$XJ_U`)njOvYqP#$-&!WK70n6r&i$C`K`gQH){~ yqZo^^7>ltOi?JAsu^5Z77@M&fo3R<2u^F4O8Jn>g&1gn5n$e7AG@}{K=>7$g7}J6P literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_exact_n10_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_exact_n10_k128_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..54b50047bfd61db97d1e291e70a0cd374ab39739 GIT binary patch literal 106 zcmX@a#Ldvaz`(!-#2~;5#F-_jxdt$XA&g-JV;I92CNPF6j9~_2n8O$rFoq=n&A=3I literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_exact_n128_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_exact_n128_k128_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..c21ebf268f5e1ab85842993d549f64796760387c GIT binary patch literal 1314 zcmYMzyG=zw3`Nl^Bp{%f>;Fta1O#LhKy6gU6JquWEbquG^3T)T^?hA`^H|sOV?Ms# zKkpNE!cWABJW(h5#GF_s_RIfeyD6ib3M#3hni^_R7>?l>j^P-N;TVqL7>?l>fe{#i z5g36H7=aNOfe{#qkr;`Q7>SV>iIEtIkr;(h7==+7g;5xVQ5c0$7>&^wjnNp5(HM=< z7>&^wgE1I`F&Kj}7=tkwgE1HjV___eg|RRe#==+_3u9qyjE%7|Hpa%-7#m|_Y>d5M x`N3OhN>_$5m8ERuD3?;2Ni%6C&7_$$lV;LPnn^QhF3qL6G?(VmT$)Ss`2`x|6afGL literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_exact_n1_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_exact_n1_k128_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..28742cadd37a8ee8e538b1708fbf1df83b365dae GIT binary patch literal 25 acmX@a#Ldvaz`(!=#2~;5#F-_jxds3w3Y6Z xhKyt^MmCC3jb^OF|2fVmBB6v9lu(tYI6r zVH>t#RH9*~VWwfGVW#2RL?xmUQHiKTR3a)7m5542C882hiKs+WA}SG;h)P5yq7qSw Ls6>Aq?RNM>xU}jtE2`0uhKnBqH?#`P@WT literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_sampling_n1000_k64_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_sampling_n1000_k64_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..206c5d7a08aa0646b86587d493ec6d4785cab9a0 GIT binary patch literal 665 zcmYMxu?+$-3`XGy1QaZDaU93#ZW#&+IwoWq#^4I_{z$f7;jhZ)bT4};<$FHX@h-X}Xb49*!V!*eL`QT)M|4C-1R@ZD z2t*(TVju=$AO<24iAY2u5-|}IF%c6n^TrQuN>jQrlwwt^idC^HR>e%r#7xZM2W1nA A>;M1& literal 0 HcmV?d00001