diff --git a/internal/family.go b/internal/family.go index 83cc73d..c57a344 100644 --- a/internal/family.go +++ b/internal/family.go @@ -31,6 +31,7 @@ type families struct { BloomFilter family Tuple family Theta family + ReservoirItems family } var FamilyEnum = &families{ @@ -66,4 +67,8 @@ var FamilyEnum = &families{ Id: 3, MaxPreLongs: 3, }, + ReservoirItems: family{ + Id: 11, + MaxPreLongs: 2, + }, } diff --git a/sampling/compatibility_test.go b/sampling/compatibility_test.go new file mode 100644 index 0000000..237c0d6 --- /dev/null +++ b/sampling/compatibility_test.go @@ -0,0 +1,346 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sampling + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/apache/datasketches-go/internal" + "github.com/stretchr/testify/assert" +) + +// TestGenerateGoBinariesForCompatibilityTesting generates serialization test data. +// This test is skipped unless DSKETCH_TEST_GENERATE_GO environment variable is set. +// Run with: DSKETCH_TEST_GENERATE_GO=1 go test -v -run TestGenerateGoBinaries +// Generates 27 files for cross-language compatibility testing. +// Note: Go only has generic ReservoirItemsSketch[T], no separate ReservoirLongsSketch. +// See https://github.com/apache/datasketches-go/issues/90 for context. +func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { + if len(os.Getenv(internal.DSketchTestGenerateGo)) == 0 { + t.Skipf("%s not set", internal.DSketchTestGenerateGo) + } + + err := os.MkdirAll(internal.GoPath, os.ModePerm) + assert.NoError(t, err) + + exactNValues := []int{1, 10, 32, 100, 128} + samplingKValues := []int{32, 64, 128} + + // ========== ReservoirItemsSketch (9 files) ========== + t.Run("items_long", func(t *testing.T) { + // Empty + t.Run("empty_k128", func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[int64](128) + data, _ := sketch.ToSlice(Int64SerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_long_empty_k128_go.sk", internal.GoPath), data, 0644) + }) + // Exact + for _, n := range exactNValues { + n := n + t.Run(fmt.Sprintf("exact_n%d_k128", n), func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[int64](128) + for i := int64(0); i < int64(n); i++ { + sketch.Update(i) + } + data, _ := sketch.ToSlice(Int64SerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_long_exact_n%d_k128_go.sk", internal.GoPath, n), data, 0644) + }) + } + // Sampling + for _, k := range samplingKValues { + k := k + t.Run(fmt.Sprintf("sampling_n1000_k%d", k), func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[int64](k) + for i := int64(0); i < 1000; i++ { + sketch.Update(i) + } + data, _ := sketch.ToSlice(Int64SerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_long_sampling_n1000_k%d_go.sk", internal.GoPath, k), data, 0644) + }) + } + }) + + // ========== ReservoirItemsSketch (9 files) ========== + t.Run("items_double", func(t *testing.T) { + // Empty + t.Run("empty_k128", func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[float64](128) + data, _ := sketch.ToSlice(Float64SerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_double_empty_k128_go.sk", internal.GoPath), data, 0644) + }) + // Exact + for _, n := range exactNValues { + n := n + t.Run(fmt.Sprintf("exact_n%d_k128", n), func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[float64](128) + for i := 0; i < n; i++ { + sketch.Update(float64(i)) + } + data, _ := sketch.ToSlice(Float64SerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_double_exact_n%d_k128_go.sk", internal.GoPath, n), data, 0644) + }) + } + // Sampling + for _, k := range samplingKValues { + k := k + t.Run(fmt.Sprintf("sampling_n1000_k%d", k), func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[float64](k) + for i := 0; i < 1000; i++ { + sketch.Update(float64(i)) + } + data, _ := sketch.ToSlice(Float64SerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_double_sampling_n1000_k%d_go.sk", internal.GoPath, k), data, 0644) + }) + } + }) + + // ========== ReservoirItemsSketch (9 files) ========== + t.Run("items_string", func(t *testing.T) { + // Empty + t.Run("empty_k128", func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[string](128) + data, _ := sketch.ToSlice(StringSerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_string_empty_k128_go.sk", internal.GoPath), data, 0644) + }) + // Exact + for _, n := range exactNValues { + n := n + t.Run(fmt.Sprintf("exact_n%d_k128", n), func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[string](128) + for i := 0; i < n; i++ { + sketch.Update(fmt.Sprintf("item%d", i)) + } + data, _ := sketch.ToSlice(StringSerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_string_exact_n%d_k128_go.sk", internal.GoPath, n), data, 0644) + }) + } + // Sampling + for _, k := range samplingKValues { + k := k + t.Run(fmt.Sprintf("sampling_n1000_k%d", k), func(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[string](k) + for i := 0; i < 1000; i++ { + sketch.Update(fmt.Sprintf("item%d", i)) + } + data, _ := sketch.ToSlice(StringSerDe{}) + os.WriteFile(fmt.Sprintf("%s/reservoir_items_string_sampling_n1000_k%d_go.sk", internal.GoPath, k), data, 0644) + }) + } + }) +} + +// TestSerializationCompatibilityEmpty tests deserialization of an empty sketch. +func TestSerializationCompatibilityEmpty(t *testing.T) { + data, err := os.ReadFile(filepath.Join(internal.GoPath, "reservoir_items_long_empty_k128_go.sk")) + assert.NoError(t, err) + + sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.True(t, sketch.IsEmpty()) + assert.Equal(t, 128, sketch.K()) + assert.Equal(t, int64(0), sketch.N()) +} + +// TestSerializationCompatibilityExact tests deserialization of sketches in exact mode. +func TestSerializationCompatibilityExact(t *testing.T) { + testCases := []struct { + filename string + k int + n int64 + }{ + {"reservoir_items_long_exact_n1_k128_go.sk", 128, 1}, + {"reservoir_items_long_exact_n10_k128_go.sk", 128, 10}, + {"reservoir_items_long_exact_n32_k128_go.sk", 128, 32}, + {"reservoir_items_long_exact_n100_k128_go.sk", 128, 100}, + {"reservoir_items_long_exact_n128_k128_go.sk", 128, 128}, + } + + for _, tc := range testCases { + t.Run(tc.filename, func(t *testing.T) { + data, err := os.ReadFile(filepath.Join(internal.GoPath, tc.filename)) + assert.NoError(t, err) + + sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, tc.k, sketch.K()) + assert.Equal(t, tc.n, sketch.N()) + assert.Equal(t, int(tc.n), sketch.NumSamples()) + }) + } +} + +// TestSerializationCompatibilityWithSampling tests deserialization of sketches in sampling mode. +func TestSerializationCompatibilityWithSampling(t *testing.T) { + testCases := []struct { + filename string + k int + n int64 + }{ + {"reservoir_items_long_sampling_n1000_k32_go.sk", 32, 1000}, + {"reservoir_items_long_sampling_n1000_k64_go.sk", 64, 1000}, + {"reservoir_items_long_sampling_n1000_k128_go.sk", 128, 1000}, + } + + for _, tc := range testCases { + t.Run(tc.filename, func(t *testing.T) { + data, err := os.ReadFile(filepath.Join(internal.GoPath, tc.filename)) + assert.NoError(t, err) + + sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, tc.k, sketch.K()) + assert.Equal(t, tc.n, sketch.N()) + assert.Equal(t, tc.k, sketch.NumSamples()) // Only k items kept after sampling + }) + } +} + +// TestSerializationRoundTrip tests serialization and deserialization round-trip. +func TestSerializationRoundTrip(t *testing.T) { + // Create sketch and add items + sketch, _ := NewReservoirItemsSketch[int64](10) + for i := int64(1); i <= 5; i++ { + sketch.Update(i) + } + + // Serialize + data, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + + // Verify preamble structure (Java-compatible format) + // Byte 0: 0xC0 (ResizeFactor X8) | 0x02 (preamble_longs) = 0xC2 + assert.Equal(t, byte(0xC2), data[0]) // preamble_longs = 2 for non-empty + ResizeFactor bits + assert.Equal(t, byte(2), data[1]) // serVer = 2 + assert.Equal(t, byte(internal.FamilyEnum.ReservoirItems.Id), data[2]) // familyID + + // Deserialize and verify + restored, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, sketch.K(), restored.K()) + assert.Equal(t, sketch.N(), restored.N()) + assert.Equal(t, sketch.Samples(), restored.Samples()) +} + +// TestReservoirItemsSketch_JavaCompat tests deserialization of Java-generated reservoir sketch files. +// These tests verify cross-language compatibility with files generated by datasketches-java. +func TestReservoirItemsSketch_JavaCompat(t *testing.T) { + // Test cases based on Java PR #714: ReservoirCrossLanguageTest.java (36 total files) + testCases := []struct { + name string + filename string + k int + n int64 + isEmpty bool + }{ + // ReservoirLongsSketch - Empty (1) + {"longs_empty_k128", "reservoir_longs_empty_k128_java.sk", 128, 0, true}, + + // ReservoirLongsSketch - Exact (5) + {"longs_exact_n1_k128", "reservoir_longs_exact_n1_k128_java.sk", 128, 1, false}, + {"longs_exact_n10_k128", "reservoir_longs_exact_n10_k128_java.sk", 128, 10, false}, + {"longs_exact_n32_k128", "reservoir_longs_exact_n32_k128_java.sk", 128, 32, false}, + {"longs_exact_n100_k128", "reservoir_longs_exact_n100_k128_java.sk", 128, 100, false}, + {"longs_exact_n128_k128", "reservoir_longs_exact_n128_k128_java.sk", 128, 128, false}, + + // ReservoirLongsSketch - Sampling (3) + {"longs_sampling_n1000_k32", "reservoir_longs_sampling_n1000_k32_java.sk", 32, 1000, false}, + {"longs_sampling_n1000_k64", "reservoir_longs_sampling_n1000_k64_java.sk", 64, 1000, false}, + {"longs_sampling_n1000_k128", "reservoir_longs_sampling_n1000_k128_java.sk", 128, 1000, false}, + + // ReservoirItemsSketch - Empty (1) + {"items_long_empty_k128", "reservoir_items_long_empty_k128_java.sk", 128, 0, true}, + + // ReservoirItemsSketch - Exact (5) + {"items_long_exact_n1_k128", "reservoir_items_long_exact_n1_k128_java.sk", 128, 1, false}, + {"items_long_exact_n10_k128", "reservoir_items_long_exact_n10_k128_java.sk", 128, 10, false}, + {"items_long_exact_n32_k128", "reservoir_items_long_exact_n32_k128_java.sk", 128, 32, false}, + {"items_long_exact_n100_k128", "reservoir_items_long_exact_n100_k128_java.sk", 128, 100, false}, + {"items_long_exact_n128_k128", "reservoir_items_long_exact_n128_k128_java.sk", 128, 128, false}, + + // ReservoirItemsSketch - Sampling (3) + {"items_long_sampling_n1000_k32", "reservoir_items_long_sampling_n1000_k32_java.sk", 32, 1000, false}, + {"items_long_sampling_n1000_k64", "reservoir_items_long_sampling_n1000_k64_java.sk", 64, 1000, false}, + {"items_long_sampling_n1000_k128", "reservoir_items_long_sampling_n1000_k128_java.sk", 128, 1000, false}, + + // ReservoirItemsSketch - Empty (1) + {"items_double_empty_k128", "reservoir_items_double_empty_k128_java.sk", 128, 0, true}, + + // ReservoirItemsSketch - Exact (5) + {"items_double_exact_n1_k128", "reservoir_items_double_exact_n1_k128_java.sk", 128, 1, false}, + {"items_double_exact_n10_k128", "reservoir_items_double_exact_n10_k128_java.sk", 128, 10, false}, + {"items_double_exact_n32_k128", "reservoir_items_double_exact_n32_k128_java.sk", 128, 32, false}, + {"items_double_exact_n100_k128", "reservoir_items_double_exact_n100_k128_java.sk", 128, 100, false}, + {"items_double_exact_n128_k128", "reservoir_items_double_exact_n128_k128_java.sk", 128, 128, false}, + + // ReservoirItemsSketch - Sampling (3) + {"items_double_sampling_n1000_k32", "reservoir_items_double_sampling_n1000_k32_java.sk", 32, 1000, false}, + {"items_double_sampling_n1000_k64", "reservoir_items_double_sampling_n1000_k64_java.sk", 64, 1000, false}, + {"items_double_sampling_n1000_k128", "reservoir_items_double_sampling_n1000_k128_java.sk", 128, 1000, false}, + + // ReservoirItemsSketch - Empty (1) + {"items_string_empty_k128", "reservoir_items_string_empty_k128_java.sk", 128, 0, true}, + + // ReservoirItemsSketch - Exact (5) + {"items_string_exact_n1_k128", "reservoir_items_string_exact_n1_k128_java.sk", 128, 1, false}, + {"items_string_exact_n10_k128", "reservoir_items_string_exact_n10_k128_java.sk", 128, 10, false}, + {"items_string_exact_n32_k128", "reservoir_items_string_exact_n32_k128_java.sk", 128, 32, false}, + {"items_string_exact_n100_k128", "reservoir_items_string_exact_n100_k128_java.sk", 128, 100, false}, + {"items_string_exact_n128_k128", "reservoir_items_string_exact_n128_k128_java.sk", 128, 128, false}, + + // ReservoirItemsSketch - Sampling (3) + {"items_string_sampling_n1000_k32", "reservoir_items_string_sampling_n1000_k32_java.sk", 32, 1000, false}, + {"items_string_sampling_n1000_k64", "reservoir_items_string_sampling_n1000_k64_java.sk", 64, 1000, false}, + {"items_string_sampling_n1000_k128", "reservoir_items_string_sampling_n1000_k128_java.sk", 128, 1000, false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + filepath := filepath.Join(internal.JavaPath, tc.filename) + + // Skip if Java file not yet available + if _, err := os.Stat(filepath); os.IsNotExist(err) { + t.Skipf("Java file not found: %s (waiting for sync from datasketches-java)", tc.filename) + return + } + + data, err := os.ReadFile(filepath) + assert.NoError(t, err) + + sketch, err := NewReservoirItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + + assert.Equal(t, tc.k, sketch.K(), "k mismatch") + assert.Equal(t, tc.n, sketch.N(), "n mismatch") + assert.Equal(t, tc.isEmpty, sketch.IsEmpty(), "isEmpty mismatch") + + if !tc.isEmpty { + samples := sketch.Samples() + if tc.n <= int64(tc.k) { + // Exact mode: should have exactly n samples + assert.Equal(t, int(tc.n), len(samples), "sample count mismatch in exact mode") + } else { + // Sampling mode: should have exactly k samples + assert.Equal(t, tc.k, len(samples), "sample count mismatch in sampling mode") + } + } + }) + } +} diff --git a/sampling/reservoir_items_sketch.go b/sampling/reservoir_items_sketch.go index 49aaf22..e642f6a 100644 --- a/sampling/reservoir_items_sketch.go +++ b/sampling/reservoir_items_sketch.go @@ -18,8 +18,11 @@ package sampling import ( + "encoding/binary" "errors" "math/rand" + + "github.com/apache/datasketches-go/internal" ) // ResizeFactor controls how the internal array grows. @@ -111,3 +114,89 @@ func (s *ReservoirItemsSketch[T]) Reset() { s.n = 0 s.data = s.data[:0] } + +// Serialization constants +const ( + preambleIntsEmpty = 1 + preambleIntsNonEmpty = 2 + serVer = 2 + flagEmpty = 0x04 + resizeFactorBits = 0xC0 // ResizeFactor X8 +) + +// ToSlice serializes the sketch to a byte slice. +func (s *ReservoirItemsSketch[T]) ToSlice(serde ItemsSerDe[T]) ([]byte, error) { + if s.IsEmpty() { + buf := make([]byte, 8) + buf[0] = resizeFactorBits | preambleIntsEmpty + buf[1] = serVer + buf[2] = byte(internal.FamilyEnum.ReservoirItems.Id) + buf[3] = flagEmpty + binary.LittleEndian.PutUint32(buf[4:], uint32(s.k)) + return buf, nil + } + + itemsBytes, err := serde.SerializeToBytes(s.data) + if err != nil { + return nil, err + } + + preambleBytes := preambleIntsNonEmpty * 8 + buf := make([]byte, preambleBytes+len(itemsBytes)) + + buf[0] = resizeFactorBits | preambleIntsNonEmpty + buf[1] = serVer + buf[2] = byte(internal.FamilyEnum.ReservoirItems.Id) + buf[3] = 0 + binary.LittleEndian.PutUint32(buf[4:], uint32(s.k)) + binary.LittleEndian.PutUint64(buf[8:], uint64(s.n)) + + copy(buf[preambleBytes:], itemsBytes) + + return buf, nil +} + +// NewReservoirItemsSketchFromSlice deserializes a sketch from a byte slice. +func NewReservoirItemsSketchFromSlice[T any](data []byte, serde ItemsSerDe[T]) (*ReservoirItemsSketch[T], error) { + if len(data) < 8 { + return nil, errors.New("data too short") + } + + preambleInts := int(data[0] & 0x3F) + ver := data[1] + family := data[2] + flags := data[3] + k := int(binary.LittleEndian.Uint32(data[4:])) + + if ver != serVer { + return nil, errors.New("unsupported serialization version") + } + if family != byte(internal.FamilyEnum.ReservoirItems.Id) { + return nil, errors.New("wrong sketch family") + } + + if (flags&flagEmpty) != 0 || preambleInts == preambleIntsEmpty { + return NewReservoirItemsSketch[T](k) + } + + preambleBytes := preambleIntsNonEmpty * 8 + if len(data) < preambleBytes { + return nil, errors.New("data too short for non-empty sketch") + } + + n := int64(binary.LittleEndian.Uint64(data[8:])) + numSamples := int(min(n, int64(k))) + + itemsData := data[preambleBytes:] + + items, err := serde.DeserializeFromBytes(itemsData, numSamples) + if err != nil { + return nil, err + } + + return &ReservoirItemsSketch[T]{ + k: k, + n: n, + data: items, + }, nil +} diff --git a/sampling/serde.go b/sampling/serde.go new file mode 100644 index 0000000..503b442 --- /dev/null +++ b/sampling/serde.go @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sampling + +import ( + "encoding/binary" + "errors" + "math" +) + +// ItemsSerDe defines the interface for serializing and deserializing items. +// Users must implement this interface for custom types. +// Built-in implementations are provided for common types (int64, int32, string, float64). +type ItemsSerDe[T any] interface { + // SerializeToBytes converts items to a byte slice. + SerializeToBytes(items []T) ([]byte, error) + + // DeserializeFromBytes converts bytes back to items. + // numItems specifies how many items to read from the data. + DeserializeFromBytes(data []byte, numItems int) ([]T, error) + + // SizeOfItem returns the size in bytes for a single item. + // Returns -1 for variable-length types (like string). + SizeOfItem() int +} + +// Int64SerDe provides serialization for int64 (8 bytes per item). +type Int64SerDe struct{} + +func (s Int64SerDe) SerializeToBytes(items []int64) ([]byte, error) { + buf := make([]byte, len(items)*8) + for i, v := range items { + binary.LittleEndian.PutUint64(buf[i*8:], uint64(v)) + } + return buf, nil +} + +func (s Int64SerDe) DeserializeFromBytes(data []byte, numItems int) ([]int64, error) { + if len(data) < numItems*8 { + return nil, errors.New("data too short for int64 deserialization") + } + items := make([]int64, numItems) + for i := 0; i < numItems; i++ { + items[i] = int64(binary.LittleEndian.Uint64(data[i*8:])) + } + return items, nil +} + +func (s Int64SerDe) SizeOfItem() int { + return 8 +} + +// Int32SerDe provides serialization for int32 (4 bytes per item). +type Int32SerDe struct{} + +func (s Int32SerDe) SerializeToBytes(items []int32) ([]byte, error) { + buf := make([]byte, len(items)*4) + for i, v := range items { + binary.LittleEndian.PutUint32(buf[i*4:], uint32(v)) + } + return buf, nil +} + +func (s Int32SerDe) DeserializeFromBytes(data []byte, numItems int) ([]int32, error) { + if len(data) < numItems*4 { + return nil, errors.New("data too short for int32 deserialization") + } + items := make([]int32, numItems) + for i := 0; i < numItems; i++ { + items[i] = int32(binary.LittleEndian.Uint32(data[i*4:])) + } + return items, nil +} + +func (s Int32SerDe) SizeOfItem() int { + return 4 +} + +// Float64SerDe provides serialization for float64 (8 bytes per item). +type Float64SerDe struct{} + +func (s Float64SerDe) SerializeToBytes(items []float64) ([]byte, error) { + buf := make([]byte, len(items)*8) + for i, v := range items { + binary.LittleEndian.PutUint64(buf[i*8:], math.Float64bits(v)) + } + return buf, nil +} + +func (s Float64SerDe) DeserializeFromBytes(data []byte, numItems int) ([]float64, error) { + if len(data) < numItems*8 { + return nil, errors.New("data too short for float64 deserialization") + } + items := make([]float64, numItems) + for i := 0; i < numItems; i++ { + bits := binary.LittleEndian.Uint64(data[i*8:]) + items[i] = math.Float64frombits(bits) + } + return items, nil +} + +func (s Float64SerDe) SizeOfItem() int { + return 8 +} + +// StringSerDe provides serialization for string (variable length: 4-byte length prefix + content). +type StringSerDe struct{} + +func (s StringSerDe) SerializeToBytes(items []string) ([]byte, error) { + // Calculate total size + totalSize := 0 + for _, str := range items { + totalSize += 4 + len(str) // 4 bytes for length + string bytes + } + + buf := make([]byte, totalSize) + offset := 0 + for _, str := range items { + // Write length + binary.LittleEndian.PutUint32(buf[offset:], uint32(len(str))) + offset += 4 + // Write string content + copy(buf[offset:], str) + offset += len(str) + } + return buf, nil +} + +func (s StringSerDe) DeserializeFromBytes(data []byte, numItems int) ([]string, error) { + items := make([]string, numItems) + offset := 0 + for i := 0; i < numItems; i++ { + if offset+4 > len(data) { + return nil, errors.New("data too short for string length") + } + length := int(binary.LittleEndian.Uint32(data[offset:])) + offset += 4 + + if offset+length > len(data) { + return nil, errors.New("data too short for string content") + } + items[i] = string(data[offset : offset+length]) + offset += length + } + return items, nil +} + +func (s StringSerDe) SizeOfItem() int { + return -1 // Variable length +} diff --git a/sampling/serde_test.go b/sampling/serde_test.go new file mode 100644 index 0000000..9d4d8fa --- /dev/null +++ b/sampling/serde_test.go @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sampling + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestInt64SerDe(t *testing.T) { + serde := Int64SerDe{} + items := []int64{1, 2, 3, 42, -100, 1000000} + + bytes, err := serde.SerializeToBytes(items) + assert.NoError(t, err) + assert.Equal(t, len(items)*8, len(bytes)) + + restored, err := serde.DeserializeFromBytes(bytes, len(items)) + assert.NoError(t, err) + assert.Equal(t, items, restored) +} + +func TestInt32SerDe(t *testing.T) { + serde := Int32SerDe{} + items := []int32{1, 2, 3, 42, -100, 1000000} + + bytes, err := serde.SerializeToBytes(items) + assert.NoError(t, err) + assert.Equal(t, len(items)*4, len(bytes)) + + restored, err := serde.DeserializeFromBytes(bytes, len(items)) + assert.NoError(t, err) + assert.Equal(t, items, restored) +} + +func TestFloat64SerDe(t *testing.T) { + serde := Float64SerDe{} + items := []float64{1.5, 2.5, 3.14159, -100.5} + + bytes, err := serde.SerializeToBytes(items) + assert.NoError(t, err) + assert.Equal(t, len(items)*8, len(bytes)) + + restored, err := serde.DeserializeFromBytes(bytes, len(items)) + assert.NoError(t, err) + assert.Len(t, restored, len(items)) +} + +func TestStringSerDe(t *testing.T) { + serde := StringSerDe{} + items := []string{"hello", "world", "", "testing 123", "日本語"} + + bytes, err := serde.SerializeToBytes(items) + assert.NoError(t, err) + + restored, err := serde.DeserializeFromBytes(bytes, len(items)) + assert.NoError(t, err) + assert.Equal(t, items, restored) +} + +func TestSketchSerializationInt64(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[int64](10) + + for i := int64(1); i <= 5; i++ { + sketch.Update(i) + } + + bytes, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + assert.NotNil(t, bytes) + + restored, err := NewReservoirItemsSketchFromSlice[int64](bytes, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, sketch.K(), restored.K()) + assert.Equal(t, sketch.N(), restored.N()) + assert.Equal(t, sketch.Samples(), restored.Samples()) +} + +func TestSketchSerializationString(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[string](5) + + sketch.Update("apple") + sketch.Update("banana") + sketch.Update("cherry") + + bytes, err := sketch.ToSlice(StringSerDe{}) + assert.NoError(t, err) + + restored, err := NewReservoirItemsSketchFromSlice[string](bytes, StringSerDe{}) + assert.NoError(t, err) + assert.Equal(t, sketch.K(), restored.K()) + assert.Equal(t, sketch.N(), restored.N()) + assert.Equal(t, sketch.Samples(), restored.Samples()) +} + +func TestSketchSerializationEmpty(t *testing.T) { + sketch, _ := NewReservoirItemsSketch[int64](10) + + bytes, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, 8, len(bytes)) // Minimal preamble + + restored, err := NewReservoirItemsSketchFromSlice[int64](bytes, Int64SerDe{}) + assert.NoError(t, err) + assert.True(t, restored.IsEmpty()) + assert.Equal(t, 10, restored.K()) +} diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_empty_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_empty_k128_go.sk new file mode 100644 index 0000000..cfe18e2 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_double_empty_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_exact_n100_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n100_k128_go.sk new file mode 100644 index 0000000..90cf41e Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n100_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_exact_n10_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n10_k128_go.sk new file mode 100644 index 0000000..691a9e0 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n10_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_exact_n128_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n128_k128_go.sk new file mode 100644 index 0000000..3740b20 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n128_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_exact_n1_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n1_k128_go.sk new file mode 100644 index 0000000..94accd5 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n1_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_exact_n32_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n32_k128_go.sk new file mode 100644 index 0000000..2133d99 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_double_exact_n32_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k128_go.sk new file mode 100644 index 0000000..fbe2000 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k32_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k32_go.sk new file mode 100644 index 0000000..6b36222 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k32_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k64_go.sk b/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k64_go.sk new file mode 100644 index 0000000..05fdf31 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_double_sampling_n1000_k64_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_long_empty_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_long_empty_k128_go.sk new file mode 100644 index 0000000..cfe18e2 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_long_empty_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_long_exact_n100_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_long_exact_n100_k128_go.sk new file mode 100644 index 0000000..f4502c0 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_long_exact_n100_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_long_exact_n10_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_long_exact_n10_k128_go.sk new file mode 100644 index 0000000..d5edac6 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_long_exact_n10_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_long_exact_n128_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_long_exact_n128_k128_go.sk new file mode 100644 index 0000000..6442791 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_long_exact_n128_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_long_exact_n1_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_long_exact_n1_k128_go.sk new file mode 100644 index 0000000..94accd5 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_long_exact_n1_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_long_exact_n32_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_long_exact_n32_k128_go.sk new file mode 100644 index 0000000..8df355c Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_long_exact_n32_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_long_sampling_n1000_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_long_sampling_n1000_k128_go.sk new file mode 100644 index 0000000..fbcccda Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_long_sampling_n1000_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_long_sampling_n1000_k32_go.sk b/serialization_test_data/go_generated_files/reservoir_items_long_sampling_n1000_k32_go.sk new file mode 100644 index 0000000..6e336ed Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_long_sampling_n1000_k32_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_long_sampling_n1000_k64_go.sk b/serialization_test_data/go_generated_files/reservoir_items_long_sampling_n1000_k64_go.sk new file mode 100644 index 0000000..01a7c61 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_long_sampling_n1000_k64_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_empty_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_empty_k128_go.sk new file mode 100644 index 0000000..cfe18e2 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_string_empty_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_exact_n100_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n100_k128_go.sk new file mode 100644 index 0000000..85d73bd Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n100_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_exact_n10_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n10_k128_go.sk new file mode 100644 index 0000000..54b5004 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n10_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_exact_n128_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n128_k128_go.sk new file mode 100644 index 0000000..c21ebf2 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n128_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_exact_n1_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n1_k128_go.sk new file mode 100644 index 0000000..28742ca Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n1_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_exact_n32_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n32_k128_go.sk new file mode 100644 index 0000000..e71a29f Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_string_exact_n32_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k128_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k128_go.sk new file mode 100644 index 0000000..c3741f0 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k128_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k32_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k32_go.sk new file mode 100644 index 0000000..10ebc00 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k32_go.sk differ diff --git a/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k64_go.sk b/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k64_go.sk new file mode 100644 index 0000000..a4895c8 Binary files /dev/null and b/serialization_test_data/go_generated_files/reservoir_items_string_sampling_n1000_k64_go.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_double_empty_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_double_empty_k128_java.sk new file mode 100644 index 0000000..cfe18e2 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_double_empty_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_double_exact_n100_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_double_exact_n100_k128_java.sk new file mode 100644 index 0000000..90cf41e Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_double_exact_n100_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_double_exact_n10_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_double_exact_n10_k128_java.sk new file mode 100644 index 0000000..691a9e0 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_double_exact_n10_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_double_exact_n128_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_double_exact_n128_k128_java.sk new file mode 100644 index 0000000..3740b20 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_double_exact_n128_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_double_exact_n1_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_double_exact_n1_k128_java.sk new file mode 100644 index 0000000..94accd5 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_double_exact_n1_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_double_exact_n32_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_double_exact_n32_k128_java.sk new file mode 100644 index 0000000..2133d99 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_double_exact_n32_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_double_sampling_n1000_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_double_sampling_n1000_k128_java.sk new file mode 100644 index 0000000..be8a23a Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_double_sampling_n1000_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_double_sampling_n1000_k32_java.sk b/serialization_test_data/java_generated_files/reservoir_items_double_sampling_n1000_k32_java.sk new file mode 100644 index 0000000..fafccd4 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_double_sampling_n1000_k32_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_double_sampling_n1000_k64_java.sk b/serialization_test_data/java_generated_files/reservoir_items_double_sampling_n1000_k64_java.sk new file mode 100644 index 0000000..05fcde9 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_double_sampling_n1000_k64_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_long_empty_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_long_empty_k128_java.sk new file mode 100644 index 0000000..cfe18e2 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_long_empty_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_long_exact_n100_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_long_exact_n100_k128_java.sk new file mode 100644 index 0000000..f4502c0 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_long_exact_n100_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_long_exact_n10_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_long_exact_n10_k128_java.sk new file mode 100644 index 0000000..d5edac6 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_long_exact_n10_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_long_exact_n128_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_long_exact_n128_k128_java.sk new file mode 100644 index 0000000..6442791 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_long_exact_n128_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_long_exact_n1_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_long_exact_n1_k128_java.sk new file mode 100644 index 0000000..94accd5 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_long_exact_n1_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_long_exact_n32_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_long_exact_n32_k128_java.sk new file mode 100644 index 0000000..8df355c Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_long_exact_n32_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_long_sampling_n1000_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_long_sampling_n1000_k128_java.sk new file mode 100644 index 0000000..4c44fbe Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_long_sampling_n1000_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_long_sampling_n1000_k32_java.sk b/serialization_test_data/java_generated_files/reservoir_items_long_sampling_n1000_k32_java.sk new file mode 100644 index 0000000..8400703 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_long_sampling_n1000_k32_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_long_sampling_n1000_k64_java.sk b/serialization_test_data/java_generated_files/reservoir_items_long_sampling_n1000_k64_java.sk new file mode 100644 index 0000000..b112866 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_long_sampling_n1000_k64_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_empty_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_empty_k128_java.sk new file mode 100644 index 0000000..cfe18e2 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_string_empty_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_exact_n100_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_exact_n100_k128_java.sk new file mode 100644 index 0000000..85d73bd Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_string_exact_n100_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_exact_n10_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_exact_n10_k128_java.sk new file mode 100644 index 0000000..54b5004 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_string_exact_n10_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_exact_n128_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_exact_n128_k128_java.sk new file mode 100644 index 0000000..c21ebf2 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_string_exact_n128_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_exact_n1_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_exact_n1_k128_java.sk new file mode 100644 index 0000000..28742ca Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_string_exact_n1_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_exact_n32_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_exact_n32_k128_java.sk new file mode 100644 index 0000000..e71a29f Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_string_exact_n32_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_sampling_n1000_k128_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_sampling_n1000_k128_java.sk new file mode 100644 index 0000000..618b395 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_string_sampling_n1000_k128_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_sampling_n1000_k32_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_sampling_n1000_k32_java.sk new file mode 100644 index 0000000..807667f Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_string_sampling_n1000_k32_java.sk differ diff --git a/serialization_test_data/java_generated_files/reservoir_items_string_sampling_n1000_k64_java.sk b/serialization_test_data/java_generated_files/reservoir_items_string_sampling_n1000_k64_java.sk new file mode 100644 index 0000000..206c5d7 Binary files /dev/null and b/serialization_test_data/java_generated_files/reservoir_items_string_sampling_n1000_k64_java.sk differ