diff --git a/build.gradle b/build.gradle index 33b1645..b5f9126 100644 --- a/build.gradle +++ b/build.gradle @@ -42,7 +42,7 @@ dependencies { // Suppress SLF4J warnings jmhImplementation 'org.slf4j:slf4j-nop:1.7.36' - // Competitor libraries for benchmarking (JMH only) + // Other serialization libraries for benchmarking (JMH only) jmhImplementation 'com.google.protobuf:protobuf-java:3.25.1' jmhImplementation 'org.apache.avro:avro:1.11.3' jmhImplementation 'com.fasterxml.jackson.core:jackson-databind:2.16.0' @@ -50,6 +50,8 @@ dependencies { jmhImplementation 'com.esotericsoftware:kryo:5.4.0' jmhImplementation 'org.msgpack:msgpack-core:0.9.8' jmhImplementation 'org.msgpack:jackson-dataformat-msgpack:0.9.8' + jmhImplementation 'org.apache.thrift:libthrift:0.19.0' + jmhImplementation 'javax.annotation:javax.annotation-api:1.3.2' } protobuf { @@ -137,11 +139,64 @@ tasks.register('generateFlatBuffers', Exec) { } } +// Task to download the Thrift compiler +tasks.register('downloadThrift', Exec) { + description = 'Download Thrift compiler' + group = 'build setup' + + def thriftVersion = "0.19.0" + def thriftExecutable = file("${buildDir}/thrift/thrift.exe") + def thriftUrl = "https://archive.apache.org/dist/thrift/${thriftVersion}/thrift-${thriftVersion}.exe" + + outputs.file(thriftExecutable) + + onlyIf { + !thriftExecutable.exists() && System.getProperty('os.name').toLowerCase().contains('windows') + } + + doFirst { + println "Downloading Thrift compiler for Windows from $thriftUrl..." + thriftExecutable.parentFile.mkdirs() + } + + commandLine 'curl', '-L', '-o', thriftExecutable.absolutePath, thriftUrl + + doLast { + println "Thrift compiler downloaded to: ${thriftExecutable}" + } +} + +// Task to generate Java code from Thrift IDL files for JMH benchmarks +tasks.register('generateJmhThrift', Exec) { + dependsOn tasks.downloadThrift + description = 'Generate Java classes from Thrift schema' + group = 'build' + + def thriftExecutable = file("${buildDir}/thrift/thrift.exe") + def schemaFile = file('src/jmh/thrift/test_record.thrift') + def outputDir = file('build/generated-src/thrift/jmh/java') + + // Only run if the thrift executable exists (i.e., on Windows) + onlyIf { thriftExecutable.exists() } + + commandLine thriftExecutable.absolutePath, '-r', '--gen', 'java', '-o', outputDir.absolutePath, schemaFile.absolutePath + + inputs.file(schemaFile) + outputs.dir(outputDir) + + doFirst { + outputDir.mkdirs() + } +} + + // Add generated FlatBuffers sources to JMH source set sourceSets { jmh { java { srcDir 'build/generated/source/flatbuffers/jmh/java' + srcDir 'build/generated-src/thrift/jmh/java' + srcDir 'build/generated/sbe/java' } proto { srcDir 'src/jmh/proto' @@ -149,8 +204,9 @@ sourceSets { } } -// Make JMH compilation depend on FlatBuffers generation +// Make JMH compilation depend on generation tasks compileJmhJava.dependsOn generateFlatBuffers +compileJmhJava.dependsOn generateJmhThrift // Handle duplicate proto files tasks.named('processJmhResources') { diff --git a/src/jmh/flatbuffers/test_record.fbs b/src/jmh/flatbuffers/test_record.fbs index ccc31d0..698bd81 100644 --- a/src/jmh/flatbuffers/test_record.fbs +++ b/src/jmh/flatbuffers/test_record.fbs @@ -1,15 +1,14 @@ -namespace com.imprint.benchmark; +namespace com.imprint.benchmark.flatbuffers; -table TestRecordFB { - id: int; - name: string; - price: double; +table TestRecord { + id: string; + timestamp: long; + flags: int; active: bool; - category: string; - tags: [string]; - metadata_keys: [string]; - metadata_values: [string]; - extra_data: [string]; + value: double; + data: [ubyte]; + tags: [int]; + metadata: [string]; // Representing map as a flat list of key/value strings for simplicity } -root_type TestRecordFB; \ No newline at end of file +root_type TestRecord; \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index 6a6a958..f47da20 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -1,22 +1,7 @@ package com.imprint.benchmark; -import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.io.Input; -import com.esotericsoftware.kryo.io.Output; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.flatbuffers.FlatBufferBuilder; -import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintWriter; -import com.imprint.core.SchemaId; -import com.imprint.types.MapKey; -import com.imprint.types.Value; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.*; -import org.msgpack.jackson.dataformat.MessagePackFactory; +import com.imprint.benchmark.serializers.*; +import com.imprint.benchmark.serializers.SerializingBenchmark; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.runner.Runner; @@ -24,847 +9,76 @@ import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.nio.ByteBuffer; -import java.util.*; +import java.util.List; import java.util.concurrent.TimeUnit; -/** - * Head-to-head benchmarks comparing Imprint against other serialization libraries. - * Tests the performance claims made in the documentation. - */ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) -@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) -@Fork(1) -@SuppressWarnings("unused") +@Warmup(iterations = 3, time = 1) +@Measurement(iterations = 7, time = 1) +@Fork(value = 1, jvmArgs = {"-Xms4g", "-Xmx4g"}) public class ComparisonBenchmark { - // Test data - private TestRecord testData; + private static final List FRAMEWORKS = List.of( + new ImprintSerializingBenchmark(), + new JacksonSerializingBenchmark(), + new ProtobufSerializingBenchmark(), + new FlatBuffersSerializingBenchmark(), + new AvroSerializingBenchmark(), + new ThriftSerializingBenchmark(), + new KryoSerializingBenchmark(), + new MessagePackSerializingBenchmark()); - // Serialized formats - private ByteBuffer imprintBytesBuffer; - private byte[] jacksonJsonBytes; - private byte[] kryoBytes; - private byte[] messagePackBytes; - private byte[] avroBytes; - private byte[] protobufBytes; - private ByteBuffer flatbuffersBytes; + @Param({"Imprint", "Jackson-JSON", "Protobuf", "FlatBuffers", "Avro-Generic", "Thrift", "Kryo", "MessagePack", "CapnProto"}) + public String framework; - // Library instances - private Schema avroSchema; - private DatumWriter avroWriter; - private DatumReader avroReader; - private ObjectMapper jacksonJsonMapper; - private Kryo kryo; - private ObjectMapper messagePackMapper; + private SerializingBenchmark serializingBenchmark; - @Setup - public void setup() throws Exception { - testData = createTestRecord(); + @Setup(Level.Trial) + public void setup() { + serializingBenchmark = FRAMEWORKS.stream() + .filter(c -> c.name().equals(framework)) + .findFirst() + .orElseThrow(() -> new IllegalStateException("Unknown framework: " + framework)); - // Initialize libraries - jacksonJsonMapper = new ObjectMapper(); - kryo = new Kryo(); - kryo.register(TestRecord.class); - kryo.register(ArrayList.class); - kryo.register(HashMap.class); - kryo.register(Arrays.asList().getClass()); + // Create the test data + DataGenerator.TestRecord testRecord1 = DataGenerator.createTestRecord(); + DataGenerator.TestRecord testRecord2 = DataGenerator.createTestRecord(); - // Initialize MessagePack ObjectMapper - messagePackMapper = new ObjectMapper(new MessagePackFactory()); - setupAvro(); - - // Pre-serialize for deserialization benchmarks - imprintBytesBuffer = serializeWithImprint(testData); - jacksonJsonBytes = serializeWithJacksonJson(testData); - kryoBytes = serializeWithKryo(testData); - messagePackBytes = serializeWithMessagePack(testData); - avroBytes = serializeWithAvro(testData); - protobufBytes = serializeWithProtobuf(testData); - flatbuffersBytes = serializeWithFlatBuffers(testData); - } - - // ===== SERIALIZATION BENCHMARKS ===== - - @Benchmark - public void serializeImprint(Blackhole bh) throws Exception { - ByteBuffer result = serializeWithImprint(testData); - bh.consume(result); - } - - @Benchmark - public void serializeJacksonJson(Blackhole bh) throws Exception { - byte[] result = serializeWithJacksonJson(testData); - bh.consume(result); - } - - @Benchmark - public void serializeKryo(Blackhole bh) { - byte[] result = serializeWithKryo(testData); - bh.consume(result); - } - - @Benchmark - public void serializeMessagePack(Blackhole bh) throws Exception { - byte[] result = serializeWithMessagePack(testData); - bh.consume(result); - } - - @Benchmark - public void serializeAvro(Blackhole bh) throws Exception { - byte[] result = serializeWithAvro(testData); - bh.consume(result); - } - - @Benchmark - public void serializeProtobuf(Blackhole bh) { - byte[] result = serializeWithProtobuf(testData); - bh.consume(result); - } - - @Benchmark - public void serializeFlatBuffers(Blackhole bh) { - ByteBuffer result = serializeWithFlatBuffers(testData); - bh.consume(result); - } - - // ===== SETUP ONLY ===== - - @Benchmark - public void deserializeSetupImprint(Blackhole bh) throws Exception { - ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); - bh.consume(result); - } - - @Benchmark - public void deserializeSetupFlatBuffers(Blackhole bh) { - TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - bh.consume(result); + // Setup the framework with the data + serializingBenchmark.setup(testRecord1, testRecord2); } - // ===== FULL DESERIALIZATION BENCHMARKS ===== - @Benchmark - public void deserializeJacksonJson(Blackhole bh) throws Exception { - TestRecord result = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); - bh.consume(result); + public void serialize(Blackhole bh) { + serializingBenchmark.serialize(bh); } - @Benchmark - public void deserializeKryo(Blackhole bh) { - Input input = new Input(new ByteArrayInputStream(kryoBytes)); - TestRecord result = kryo.readObject(input, TestRecord.class); - input.close(); - bh.consume(result); - } - - @Benchmark - public void deserializeMessagePack(Blackhole bh) throws Exception { - TestRecord result = messagePackMapper.readValue(messagePackBytes, TestRecord.class); - bh.consume(result); - } - - @Benchmark - public void deserializeAvro(Blackhole bh) throws Exception { - GenericRecord result = deserializeWithAvro(avroBytes); - bh.consume(result); - } - - @Benchmark - public void deserializeProtobuf(Blackhole bh) throws Exception { - TestRecordProto.TestRecord result = TestRecordProto.TestRecord.parseFrom(protobufBytes); - bh.consume(result); - } - - @Benchmark - public void deserializeImprint(Blackhole bh) throws Exception { - ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); - // Access all fields to force full deserialization - result.getInt32(1); // id - result.getString(2); // name - result.getFloat64(3); // price - result.getBoolean(4); // active - result.getString(5); // category - result.getArray(6); // tags - result.getMap(7); // metadata - for (int i = 8; i < 21; i++) { - result.getString(i); // extraData fields - } - - bh.consume(result); - } - - @Benchmark - public void deserializeFlatBuffers(Blackhole bh) { - TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - - // Access all fields - result.id(); - result.name(); - result.price(); - result.active(); - result.category(); - // Access all tags - for (int i = 0; i < result.tagsLength(); i++) { - result.tags(i); - } - // Access all metadata - for (int i = 0; i < result.metadataKeysLength(); i++) { - result.metadataKeys(i); - result.metadataValues(i); - } - // Access all extra data - for (int i = 0; i < result.extraDataLength(); i++) { - result.extraData(i); - } - - bh.consume(result); - } - - // ===== FIELD ACCESS BENCHMARKS ===== - // Tests accessing a single field near the end of a record - - @Benchmark - public void singleFieldAccessImprint(Blackhole bh) throws Exception { - ImprintRecord record = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); - var field15 = record.getString(15); - bh.consume(field15); - } - - @Benchmark - public void singleFieldAccessJacksonJson(Blackhole bh) throws Exception { - TestRecord record = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); - bh.consume(record.extraData.get(4)); - } - - @Benchmark - public void singleFieldAccessKryo(Blackhole bh) { - Input input = new Input(new ByteArrayInputStream(kryoBytes)); - TestRecord record = kryo.readObject(input, TestRecord.class); - input.close(); - bh.consume(record.extraData.get(4)); - } - - @Benchmark - public void singleFieldAccessMessagePack(Blackhole bh) throws Exception { - TestRecord record = messagePackMapper.readValue(messagePackBytes, TestRecord.class); - bh.consume(record.extraData.get(4)); - } - - @Benchmark - public void singleFieldAccessAvro(Blackhole bh) throws Exception { - GenericRecord record = deserializeWithAvro(avroBytes); - bh.consume(record.get("extraData4")); - } - - @Benchmark - public void singleFieldAccessProtobuf(Blackhole bh) throws Exception { - TestRecordProto.TestRecord record = TestRecordProto.TestRecord.parseFrom(protobufBytes); - bh.consume(record.getExtraData(4)); - } - - @Benchmark - public void singleFieldAccessFlatBuffers(Blackhole bh) { - TestRecordFB record = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - bh.consume(record.extraData(4)); - } - - // ===== SIZE COMPARISON ===== - - @Benchmark - public void measureImprintSize(Blackhole bh) { - bh.consume(imprintBytesBuffer.remaining()); - } - - @Benchmark - public void measureJacksonJsonSize(Blackhole bh) { - bh.consume(jacksonJsonBytes.length); - } - - @Benchmark - public void measureKryoSize(Blackhole bh) { - bh.consume(kryoBytes.length); - } - - @Benchmark - public void measureMessagePackSize(Blackhole bh) { - bh.consume(messagePackBytes.length); - } - - @Benchmark - public void measureAvroSize(Blackhole bh) { - bh.consume(avroBytes.length); + //@Benchmark + public void deserialize(Blackhole bh) { + serializingBenchmark.deserialize(bh); } @Benchmark - public void measureProtobufSize(Blackhole bh) { - bh.consume(protobufBytes.length); + public void projectAndSerialize(Blackhole bh) { + serializingBenchmark.projectAndSerialize(bh); } @Benchmark - public void measureFlatBuffersSize(Blackhole bh) { - bh.consume(flatbuffersBytes.remaining()); - } - - // ===== MERGE SIMULATION BENCHMARKS ===== - - //@Benchmark - public void mergeImprint(Blackhole bh) throws Exception { - var record1Buffer = imprintBytesBuffer.duplicate(); - var record2Data = createTestRecord2(); - var record2Buffer = serializeWithImprint(record2Data); - - var deserialized1 = ImprintRecord.deserialize(record1Buffer); - var deserialized2 = ImprintRecord.deserialize(record2Buffer); - var merged = simulateMerge(deserialized1, deserialized2); - - bh.consume(merged); - } - - //@Benchmark - public void mergeJacksonJson(Blackhole bh) throws Exception { - var record1 = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithJacksonJson(record2Data); - var record2 = jacksonJsonMapper.readValue(record2Bytes, TestRecord.class); - - var mergedPojo = mergeTestRecords(record1, record2); - byte[] result = jacksonJsonMapper.writeValueAsBytes(mergedPojo); - bh.consume(result); - } - - //@Benchmark - public void mergeKryo(Blackhole bh) { - Input input1 = new Input(new ByteArrayInputStream(kryoBytes)); - var record1 = kryo.readObject(input1, TestRecord.class); - input1.close(); - - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithKryo(record2Data); - Input input2 = new Input(new ByteArrayInputStream(record2Bytes)); - var record2 = kryo.readObject(input2, TestRecord.class); - input2.close(); - - var mergedPojo = mergeTestRecords(record1, record2); - byte[] result = serializeWithKryo(mergedPojo); - bh.consume(result); - } - - //@Benchmark - public void mergeMessagePack(Blackhole bh) throws Exception { - var record1 = messagePackMapper.readValue(messagePackBytes, TestRecord.class); - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithMessagePack(record2Data); - var record2 = messagePackMapper.readValue(record2Bytes, TestRecord.class); - - var mergedPojo = mergeTestRecords(record1, record2); - byte[] result = messagePackMapper.writeValueAsBytes(mergedPojo); - bh.consume(result); + public void mergeAndSerialize(Blackhole bh) { + serializingBenchmark.mergeAndSerialize(bh); } //@Benchmark - public void mergeAvro(Blackhole bh) throws Exception { - var record1 = deserializeWithAvro(avroBytes); - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithAvro(record2Data); - var record2 = deserializeWithAvro(record2Bytes); - - var merged = mergeAvroRecords(record1, record2); - byte[] result = serializeAvroRecord(merged); - bh.consume(result); + public void accessField(Blackhole bh) { + serializingBenchmark.accessField(bh); } - //@Benchmark - public void mergeProtobuf(Blackhole bh) throws Exception { - var record1 = TestRecordProto.TestRecord.parseFrom(protobufBytes); - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithProtobuf(record2Data); - var record2 = TestRecordProto.TestRecord.parseFrom(record2Bytes); - - var merged = mergeProtobufRecords(record1, record2); - byte[] result = merged.toByteArray(); - bh.consume(result); - } - - //@Benchmark - public void mergeFlatBuffers(Blackhole bh) { - var record1 = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - var record2Data = createTestRecord2(); - var record2Buffer = serializeWithFlatBuffers(record2Data); - var record2 = TestRecordFB.getRootAsTestRecordFB(record2Buffer); - - var merged = mergeFlatBuffersRecords(record1, record2); - bh.consume(merged); - } - - // ===== MAIN METHOD TO RUN BENCHMARKS ===== - public static void main(String[] args) throws RunnerException { - runFieldAccessBenchmarks(); - // Or, uncomment specific runner methods to execute subsets: - // runSerializationBenchmarks(); - // runDeserializationBenchmarks(); - // runFieldAccessBenchmarks(); - // runSizeComparisonBenchmarks(); - // runMergeBenchmarks(); - // runMessagePackBenchmarks(); - } - - public static void runAll() throws RunnerException { Options opt = new OptionsBuilder() .include(ComparisonBenchmark.class.getSimpleName()) .build(); new Runner(opt).run(); } - - public static void runSerializationBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".serialize.*") - .build(); - new Runner(opt).run(); - } - - public static void runDeserializationBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".deserialize.*") - .build(); - new Runner(opt).run(); - } - - public static void runFieldAccessBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".singleFieldAccess.*") - .build(); - new Runner(opt).run(); - } - - public static void runSizeComparisonBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".measure.*") - .build(); - new Runner(opt).run(); - } - - public static void runMergeBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".merge.*") - .build(); - new Runner(opt).run(); - } - - public static void runMessagePackBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".*MessagePack.*") - .build(); - new Runner(opt).run(); - } - - public static void runAvroBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".*Avro.*") - .build(); - new Runner(opt).run(); - } - - public static void runProtobufBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".*Protobuf.*") - .build(); - new Runner(opt).run(); - } - - public static void runFlatBuffersBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".*FlatBuffers.*") - .build(); - new Runner(opt).run(); - } - - // ===== HELPER METHODS ===== - - private void setupAvro() { - String schemaJson = "{\n" + - " \"type\": \"record\",\n" + - " \"name\": \"TestRecord\",\n" + - " \"fields\": [\n" + - " {\"name\": \"id\", \"type\": \"int\"},\n" + - " {\"name\": \"name\", \"type\": \"string\"},\n" + - " {\"name\": \"price\", \"type\": \"double\"},\n" + - " {\"name\": \"active\", \"type\": \"boolean\"},\n" + - " {\"name\": \"category\", \"type\": \"string\"},\n" + - " {\"name\": \"tags\", \"type\": {\"type\": \"array\", \"items\": \"string\"}},\n" + - " {\"name\": \"metadata\", \"type\": {\"type\": \"map\", \"values\": \"string\"}},\n" + - " {\"name\": \"extraData0\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData1\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData2\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData3\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData4\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData5\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData6\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData7\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData8\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData9\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData10\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData11\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData12\", \"type\": \"string\"}\n" + - " ]\n" + - "}"; - - avroSchema = new Schema.Parser().parse(schemaJson); - avroWriter = new GenericDatumWriter<>(avroSchema); - avroReader = new GenericDatumReader<>(avroSchema); - } - - private ByteBuffer serializeWithImprint(TestRecord data) throws Exception { - var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); - - writer.addField(1, Value.fromInt32(data.id)); - writer.addField(2, Value.fromString(data.name)); - writer.addField(3, Value.fromFloat64(data.price)); - writer.addField(4, Value.fromBoolean(data.active)); - writer.addField(5, Value.fromString(data.category)); - - var tagValues = new ArrayList(); - if (data.tags != null) { - for (String tag : data.tags) { - tagValues.add(Value.fromString(tag)); - } - } - writer.addField(6, Value.fromArray(tagValues)); - - var metadataMap = new HashMap(); - if (data.metadata != null) { - for (var entry : data.metadata.entrySet()) { - metadataMap.put(MapKey.fromString(entry.getKey()), Value.fromString(entry.getValue())); - } - } - writer.addField(7, Value.fromMap(metadataMap)); - - if (data.extraData != null) { - for (int i = 0; i < data.extraData.size(); i++) { - writer.addField(8 + i, Value.fromString(data.extraData.get(i))); - } - } - - return writer.build().serializeToBuffer(); - } - - private byte[] serializeWithJacksonJson(TestRecord data) throws Exception { - return jacksonJsonMapper.writeValueAsBytes(data); - } - - private byte[] serializeWithKryo(TestRecord data) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - Output output = new Output(baos); - kryo.writeObject(output, data); - output.close(); - return baos.toByteArray(); - } - - private byte[] serializeWithMessagePack(TestRecord data) throws Exception { - return messagePackMapper.writeValueAsBytes(data); - } - - private byte[] serializeWithAvro(TestRecord data) throws Exception { - GenericRecord record = new GenericData.Record(avroSchema); - record.put("id", data.id); - record.put("name", data.name); - record.put("price", data.price); - record.put("active", data.active); - record.put("category", data.category); - record.put("tags", data.tags); - record.put("metadata", data.metadata); - - for (int i = 0; i < data.extraData.size(); i++) { - record.put("extraData" + i, data.extraData.get(i)); - } - - return serializeAvroRecord(record); - } - - private byte[] serializeAvroRecord(GenericRecord record) throws Exception { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - Encoder encoder = EncoderFactory.get().binaryEncoder(baos, null); - avroWriter.write(record, encoder); - encoder.flush(); - return baos.toByteArray(); - } - - private GenericRecord deserializeWithAvro(byte[] data) throws Exception { - Decoder decoder = DecoderFactory.get().binaryDecoder(data, null); - return avroReader.read(null, decoder); - } - - private byte[] serializeWithProtobuf(TestRecord data) { - var builder = TestRecordProto.TestRecord.newBuilder() - .setId(data.id) - .setName(data.name) - .setPrice(data.price) - .setActive(data.active) - .setCategory(data.category) - .addAllTags(data.tags) - .putAllMetadata(data.metadata); - - for (String extraData : data.extraData) { - builder.addExtraData(extraData); - } - - return builder.build().toByteArray(); - } - - private ByteBuffer serializeWithFlatBuffers(TestRecord data) { - FlatBufferBuilder builder = new FlatBufferBuilder(1024); - - // Create strings (must be created before the object that uses them) - int nameOffset = builder.createString(data.name); - int categoryOffset = builder.createString(data.category); - - // Create tags array - int[] tagOffsets = new int[data.tags.size()]; - for (int i = 0; i < data.tags.size(); i++) { - tagOffsets[i] = builder.createString(data.tags.get(i)); - } - int tagsOffset = TestRecordFB.createTagsVector(builder, tagOffsets); - - // Create metadata (as parallel arrays for keys and values) - String[] metadataKeys = data.metadata.keySet().toArray(new String[0]); - String[] metadataValues = new String[metadataKeys.length]; - int[] keyOffsets = new int[metadataKeys.length]; - int[] valueOffsets = new int[metadataKeys.length]; - - for (int i = 0; i < metadataKeys.length; i++) { - metadataValues[i] = data.metadata.get(metadataKeys[i]); - keyOffsets[i] = builder.createString(metadataKeys[i]); - valueOffsets[i] = builder.createString(metadataValues[i]); - } - int metadataKeysOffset = TestRecordFB.createMetadataKeysVector(builder, keyOffsets); - int metadataValuesOffset = TestRecordFB.createMetadataValuesVector(builder, valueOffsets); - - // Create extra data array - int[] extraDataOffsets = new int[data.extraData.size()]; - for (int i = 0; i < data.extraData.size(); i++) { - extraDataOffsets[i] = builder.createString(data.extraData.get(i)); - } - int extraDataOffset = TestRecordFB.createExtraDataVector(builder, extraDataOffsets); - - // Create the main object - TestRecordFB.startTestRecordFB(builder); - TestRecordFB.addId(builder, data.id); - TestRecordFB.addName(builder, nameOffset); - TestRecordFB.addPrice(builder, data.price); - TestRecordFB.addActive(builder, data.active); - TestRecordFB.addCategory(builder, categoryOffset); - TestRecordFB.addTags(builder, tagsOffset); - TestRecordFB.addMetadataKeys(builder, metadataKeysOffset); - TestRecordFB.addMetadataValues(builder, metadataValuesOffset); - TestRecordFB.addExtraData(builder, extraDataOffset); - int recordOffset = TestRecordFB.endTestRecordFB(builder); - - // Finish and return - builder.finish(recordOffset); - return builder.dataBuffer().slice(); - } - - private ImprintRecord simulateMerge(ImprintRecord first, ImprintRecord second) throws Exception { - var writer = new ImprintWriter(first.getHeader().getSchemaId()); - var usedFieldIds = new HashSet(); - - copyFieldsToWriter(first, writer, usedFieldIds); - copyFieldsToWriter(second, writer, usedFieldIds); - - return writer.build(); - } - - private void copyFieldsToWriter(ImprintRecord record, ImprintWriter writer, Set usedFieldIds) throws Exception { - for (var entry : record.getDirectory()) { - int fieldId = entry.getId(); - if (!usedFieldIds.contains(fieldId)) { - var value = record.getValue(fieldId); - if (value != null) { - writer.addField(fieldId, value); - usedFieldIds.add(fieldId); - } - } - } - } - - private TestRecord mergeTestRecords(TestRecord first, TestRecord second) { - var merged = new TestRecord(); - merged.id = first.id; - merged.name = first.name != null ? first.name : second.name; - merged.price = first.price != 0.0 ? first.price : second.price; - merged.active = first.active; - merged.category = first.category != null ? first.category : second.category; - - merged.tags = new ArrayList<>(first.tags); - merged.tags.addAll(second.tags); - - merged.metadata = new HashMap<>(first.metadata); - merged.metadata.putAll(second.metadata); - - return merged; - } - - private GenericRecord mergeAvroRecords(GenericRecord first, GenericRecord second) { - GenericRecord merged = new GenericData.Record(avroSchema); - - // Copy all fields from first record - for (Schema.Field field : avroSchema.getFields()) { - merged.put(field.name(), first.get(field.name())); - } - - // Override with non-null values from second record - for (Schema.Field field : avroSchema.getFields()) { - Object secondValue = second.get(field.name()); - if (secondValue != null && !secondValue.toString().isEmpty()) { - merged.put(field.name(), secondValue); - } - } - - return merged; - } - - private TestRecordProto.TestRecord mergeProtobufRecords(TestRecordProto.TestRecord first, TestRecordProto.TestRecord second) { - return TestRecordProto.TestRecord.newBuilder() - .mergeFrom(first) - .mergeFrom(second) - .build(); - } - - private ByteBuffer mergeFlatBuffersRecords(TestRecordFB first, TestRecordFB second) { - FlatBufferBuilder builder = new FlatBufferBuilder(1024); - - // Use second record's values if they exist, otherwise first record's values - String name = second.name() != null && !second.name().isEmpty() ? second.name() : first.name(); - String category = second.category() != null && !second.category().isEmpty() ? second.category() : first.category(); - double price = second.price() != 0.0 ? second.price() : first.price(); - boolean active = second.active(); // Use second's boolean value - int id = first.id(); // Keep first record's ID - - // Create merged strings - int nameOffset = builder.createString(name); - int categoryOffset = builder.createString(category); - - // Merge tags (combine both arrays) - List mergedTags = new ArrayList<>(); - for (int i = 0; i < first.tagsLength(); i++) { - mergedTags.add(first.tags(i)); - } - for (int i = 0; i < second.tagsLength(); i++) { - mergedTags.add(second.tags(i)); - } - - int[] tagOffsets = new int[mergedTags.size()]; - for (int i = 0; i < mergedTags.size(); i++) { - tagOffsets[i] = builder.createString(mergedTags.get(i)); - } - int tagsOffset = TestRecordFB.createTagsVector(builder, tagOffsets); - - // Merge metadata (second overwrites first) - Map mergedMetadata = new HashMap<>(); - for (int i = 0; i < first.metadataKeysLength(); i++) { - mergedMetadata.put(first.metadataKeys(i), first.metadataValues(i)); - } - for (int i = 0; i < second.metadataKeysLength(); i++) { - mergedMetadata.put(second.metadataKeys(i), second.metadataValues(i)); - } - - String[] metadataKeys = mergedMetadata.keySet().toArray(new String[0]); - int[] keyOffsets = new int[metadataKeys.length]; - int[] valueOffsets = new int[metadataKeys.length]; - - for (int i = 0; i < metadataKeys.length; i++) { - keyOffsets[i] = builder.createString(metadataKeys[i]); - valueOffsets[i] = builder.createString(mergedMetadata.get(metadataKeys[i])); - } - int metadataKeysOffset = TestRecordFB.createMetadataKeysVector(builder, keyOffsets); - int metadataValuesOffset = TestRecordFB.createMetadataValuesVector(builder, valueOffsets); - - // Use first record's extra data (or could merge both) - int[] extraDataOffsets = new int[first.extraDataLength()]; - for (int i = 0; i < first.extraDataLength(); i++) { - extraDataOffsets[i] = builder.createString(first.extraData(i)); - } - int extraDataOffset = TestRecordFB.createExtraDataVector(builder, extraDataOffsets); - - // Create the merged object - TestRecordFB.startTestRecordFB(builder); - TestRecordFB.addId(builder, id); - TestRecordFB.addName(builder, nameOffset); - TestRecordFB.addPrice(builder, price); - TestRecordFB.addActive(builder, active); - TestRecordFB.addCategory(builder, categoryOffset); - TestRecordFB.addTags(builder, tagsOffset); - TestRecordFB.addMetadataKeys(builder, metadataKeysOffset); - TestRecordFB.addMetadataValues(builder, metadataValuesOffset); - TestRecordFB.addExtraData(builder, extraDataOffset); - int recordOffset = TestRecordFB.endTestRecordFB(builder); - - builder.finish(recordOffset); - return builder.dataBuffer().slice(); - } - - private TestRecord createTestRecord() { - var record = new TestRecord(); - record.id = 12345; - record.name = "Test Product"; - record.price = 99.99; - record.active = true; - record.category = "Electronics"; - - record.tags = Arrays.asList("popular", "trending", "bestseller"); - - record.metadata = new HashMap<>(); - record.metadata.put("manufacturer", "TechCorp"); - record.metadata.put("model", "TC-2024"); - record.metadata.put("warranty", "2 years"); - - record.extraData = new ArrayList<>(); - for (int i = 0; i < 13; i++) { - record.extraData.add("extraField" + i + "_value_" + (1000 + i)); - } - - return record; - } - - private TestRecord createTestRecord2() { - var record = new TestRecord(); - record.id = 67890; - record.name = "Test Product 2"; - record.price = 149.99; - record.active = false; - record.category = "Software"; - - record.tags = Arrays.asList("new", "premium"); - - record.metadata = new HashMap<>(); - record.metadata.put("vendor", "SoftCorp"); - record.metadata.put("version", "2.1"); - - record.extraData = new ArrayList<>(); - for (int i = 0; i < 13; i++) { - record.extraData.add("extraField" + i + "_value2_" + (2000 + i)); - } - - return record; - } - - // Test data class for other serialization libraries - public static class TestRecord { - public int id; - public String name; - public double price; - public boolean active; - public String category; - public List tags = new ArrayList<>(); - public Map metadata = new HashMap<>(); - public List extraData = new ArrayList<>(); // Fields 8-20 for large record test - - public TestRecord() {} // Required for deserialization - } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/DataGenerator.java b/src/jmh/java/com/imprint/benchmark/DataGenerator.java new file mode 100644 index 0000000..7dd65b2 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/DataGenerator.java @@ -0,0 +1,67 @@ +package com.imprint.benchmark; + +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class DataGenerator { + + /** + * A standard record used for serialization benchmarks. + * Contains a mix of common data types. + */ + public static class TestRecord { + public String id; + public long timestamp; + public int flags; + public boolean active; + public double value; + public byte[] data; + public List tags; + public Map metadata; + } + + /** + * A smaller record representing a projection of the full TestRecord. + */ + public static class ProjectedRecord { + public String id; + public long timestamp; + public List tags; + } + + public static TestRecord createTestRecord() { + var record = new TestRecord(); + record.id = "ID" + System.nanoTime(); + record.timestamp = System.currentTimeMillis(); + record.flags = 0xDEADBEEF; + record.active = true; + record.value = Math.PI; + record.data = createBytes(128); + record.tags = createIntList(20); + record.metadata = createStringMap(10); + return record; + } + + public static byte[] createBytes(int size) { + byte[] bytes = new byte[size]; + new Random(0).nextBytes(bytes); + return bytes; + } + + public static List createIntList(int size) { + return IntStream.range(0, size).boxed().collect(Collectors.toList()); + } + + public static Map createStringMap(int size) { + Map map = new HashMap<>(); + for (int i = 0; i < size; i++) { + map.put("key" + i, "value" + i); + } + return map; + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java b/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java index 1ead21f..06a7717 100644 --- a/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java @@ -1,7 +1,7 @@ package com.imprint.benchmark; import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintWriter; +import com.imprint.core.ImprintRecordBuilder; import com.imprint.core.SchemaId; import com.imprint.types.MapKey; import com.imprint.types.Value; @@ -20,8 +20,8 @@ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) -@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(1) public class FieldAccessBenchmark { @@ -196,83 +196,80 @@ public void accessDenseRecord(Blackhole bh) throws Exception { * This should be replaced with actual project API when available. */ private ImprintRecord simulateProject(ImprintRecord source, int[] fieldIds) throws Exception { - var writer = new ImprintWriter(source.getHeader().getSchemaId()); + var builder = ImprintRecord.builder(source.getHeader().getSchemaId()); for (int fieldId : fieldIds) { var value = source.getValue(fieldId); if (value != null) { - writer.addField(fieldId, value); + builder.field(fieldId, value); } } - return writer.build(); + return builder.build(); } private ImprintRecord createSparseRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); - - // Sparse record with large field IDs and few fields - writer.addField(1000, Value.fromString("sparse_field_1")); - writer.addField(5000, Value.fromInt32(42)); - writer.addField(10000, Value.fromFloat64(3.14159)); - writer.addField(15000, Value.fromBoolean(true)); - writer.addField(20000, Value.fromString("sparse_field_5")); - - return writer.build(); + return ImprintRecord.builder(new SchemaId(1, 0x12345678)) + .field(1000, Value.fromString("sparse_field_1")) + .field(5000, Value.fromInt32(42)) + .field(10000, Value.fromFloat64(3.14159)) + .field(15000, Value.fromBoolean(true)) + .field(20000, Value.fromString("sparse_field_5")) + .build(); } private ImprintRecord createDenseRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(2, 0x87654321)); + var builder = ImprintRecord.builder(new SchemaId(2, 0x87654321)); // Dense record with 100 sequential fields for (int i = 1; i <= 100; i++) { switch (i % 5) { case 0: - writer.addField(i, Value.fromString("string_field_" + i)); + builder.field(i, Value.fromString("string_field_" + i)); break; case 1: - writer.addField(i, Value.fromInt32(i * 10)); + builder.field(i, Value.fromInt32(i * 10)); break; case 2: - writer.addField(i, Value.fromFloat64(i * 1.5)); + builder.field(i, Value.fromFloat64(i * 1.5)); break; case 3: - writer.addField(i, Value.fromBoolean(i % 2 == 0)); + builder.field(i, Value.fromBoolean(i % 2 == 0)); break; case 4: - writer.addField(i, Value.fromInt64(i * 1000L)); + builder.field(i, Value.fromInt64(i * 1000L)); break; } } - return writer.build(); + return builder.build(); } private ImprintRecord createLargeRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(3, 0x11223344)); + var builder = ImprintRecord.builder(new SchemaId(3, 0xABCDEF12)); - // Large record with complex data types - writer.addField(1, Value.fromString("LargeRecord")); + // Large record with complex fields (arrays, maps) + builder.field(1, Value.fromString("Large record with complex data")); - // Large array field - var largeArray = new ArrayList(); - for (int i = 0; i < 1000; i++) { - largeArray.add(Value.fromString("array_item_" + i)); + // Add a large array + var list = new ArrayList(); + for (int i = 0; i < 200; i++) { + list.add(Value.fromInt32(i)); } - writer.addField(2, Value.fromArray(largeArray)); + builder.field(2, Value.fromArray(list)); - // Large map field - var largeMap = new HashMap(); + // Add a large map + var map = new HashMap(); for (int i = 0; i < 100; i++) { - largeMap.put(MapKey.fromString("key_" + i), Value.fromString("map_value_" + i)); + map.put(MapKey.fromString("key_" + i), Value.fromString("value_" + i)); } - writer.addField(3, Value.fromMap(largeMap)); + builder.field(3, Value.fromMap(map)); - // Many regular fields + // Add more fields for (int i = 4; i <= 50; i++) { - writer.addField(i, Value.fromString("large_record_field_" + i + "_with_substantial_content")); + builder.field(i, Value.fromBytes(new byte[1024])); // 1KB byte arrays } - return writer.build(); + return builder.build(); } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java b/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java deleted file mode 100644 index f93092a..0000000 --- a/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java +++ /dev/null @@ -1,163 +0,0 @@ -package com.imprint.benchmark; - -import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintWriter; -import com.imprint.core.SchemaId; -import com.imprint.types.Value; -import org.openjdk.jmh.annotations.*; -import org.openjdk.jmh.infra.Blackhole; - -import java.util.HashSet; -import java.util.Set; -import java.util.concurrent.TimeUnit; - -/** - * Benchmarks for ImprintRecord merge operations. - * NOTE: These benchmarks simulate merge operations until the actual merge API is implemented. - */ -@BenchmarkMode(Mode.AverageTime) -@OutputTimeUnit(TimeUnit.NANOSECONDS) -@State(Scope.Benchmark) -@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) -@Fork(1) -public class MergeBenchmark { - - private ImprintRecord productRecord; - private ImprintRecord orderRecord; - private ImprintRecord customerRecord; - - @Setup - public void setup() throws Exception { - productRecord = createProductRecord(); - orderRecord = createOrderRecord(); - customerRecord = createCustomerRecord(); - } - - // ===== SIMULATED MERGE BENCHMARKS ===== - // These will be replaced with actual merge API when implemented - - @Benchmark - public void mergeProductAndOrder(Blackhole bh) throws Exception { - // Simulate merge by creating a new record with fields from both - ImprintRecord result = simulateMerge(productRecord, orderRecord); - bh.consume(result); - } - - @Benchmark - public void mergeProductAndCustomer(Blackhole bh) throws Exception { - ImprintRecord result = simulateMerge(productRecord, customerRecord); - bh.consume(result); - } - - @Benchmark - public void mergeOrderAndCustomer(Blackhole bh) throws Exception { - ImprintRecord result = simulateMerge(orderRecord, customerRecord); - bh.consume(result); - } - - @Benchmark - public void mergeThreeRecords(Blackhole bh) throws Exception { - // Test merging multiple records - var temp = simulateMerge(productRecord, orderRecord); - ImprintRecord result = simulateMerge(temp, customerRecord); - bh.consume(result); - } - - // ===== MERGE CONFLICT HANDLING ===== - - @Benchmark - public void mergeWithConflicts(Blackhole bh) throws Exception { - // Create records with overlapping field IDs to test conflict resolution - var record1 = createRecordWithFields(1, 50, "record1_"); - var record2 = createRecordWithFields(25, 75, "record2_"); - - ImprintRecord result = simulateMerge(record1, record2); - bh.consume(result); - } - - // ===== HELPER METHODS ===== - - /** - * Simulates merge operation by manually copying fields. - * This should be replaced with actual merge API when available. - */ - private ImprintRecord simulateMerge(ImprintRecord first, ImprintRecord second) throws Exception { - var writer = new ImprintWriter(first.getHeader().getSchemaId()); - var usedFieldIds = new HashSet(); - - // Copy fields from first record (takes precedence) - copyFieldsToWriter(first, writer, usedFieldIds); - - // Copy non-conflicting fields from second record - copyFieldsToWriter(second, writer, usedFieldIds); - - return writer.build(); - } - - private void copyFieldsToWriter(ImprintRecord record, ImprintWriter writer, Set usedFieldIds) throws Exception { - for (var entry : record.getDirectory()) { - int fieldId = entry.getId(); - if (!usedFieldIds.contains(fieldId)) { - var value = record.getValue(fieldId); - if (value != null) { - writer.addField(fieldId, value); - usedFieldIds.add(fieldId); - } - } - } - } - - private ImprintRecord createProductRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); - - writer.addField(1, Value.fromString("Product")); - writer.addField(2, Value.fromInt32(12345)); - writer.addField(3, Value.fromString("Laptop")); - writer.addField(4, Value.fromFloat64(999.99)); - writer.addField(5, Value.fromString("Electronics")); - writer.addField(6, Value.fromInt32(50)); // stock - writer.addField(7, Value.fromString("TechCorp")); - writer.addField(8, Value.fromBoolean(true)); // available - - return writer.build(); - } - - private ImprintRecord createOrderRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(2, 0x87654321)); - - writer.addField(10, Value.fromString("Order")); - writer.addField(11, Value.fromInt32(67890)); - writer.addField(12, Value.fromInt32(12345)); // product_id (overlaps with product) - writer.addField(13, Value.fromInt32(2)); // quantity - writer.addField(14, Value.fromFloat64(1999.98)); // total - writer.addField(15, Value.fromString("2024-01-15")); // order_date - writer.addField(16, Value.fromString("shipped")); // status - - return writer.build(); - } - - private ImprintRecord createCustomerRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(3, 0x11223344)); - - writer.addField(20, Value.fromString("Customer")); - writer.addField(21, Value.fromInt32(555)); - writer.addField(22, Value.fromString("John Doe")); - writer.addField(23, Value.fromString("john.doe@email.com")); - writer.addField(24, Value.fromString("123 Main St")); - writer.addField(25, Value.fromString("premium")); // tier - writer.addField(26, Value.fromBoolean(true)); // active - - return writer.build(); - } - - private ImprintRecord createRecordWithFields(int startId, int endId, String prefix) throws Exception { - var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); - - for (int i = startId; i <= endId; i++) { - writer.addField(i, Value.fromString(prefix + "field_" + i)); - } - - return writer.build(); - } -} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java b/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java index 3275843..51c9f48 100644 --- a/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java @@ -1,12 +1,16 @@ package com.imprint.benchmark; import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintWriter; +import com.imprint.core.ImprintRecordBuilder; import com.imprint.core.SchemaId; import com.imprint.types.MapKey; import com.imprint.types.Value; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -21,7 +25,7 @@ @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) @Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 7, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(1) public class SerializationBenchmark { @@ -35,10 +39,10 @@ public class SerializationBenchmark { @Setup public void setup() throws Exception { - // Create test records of varying sizes - smallRecord = createSmallRecord(); - mediumRecord = createMediumRecord(); - largeRecord = createLargeRecord(); + // Create test records of varying sizes for deserialization benchmarks + smallRecord = createSmallRecord().build(); + mediumRecord = createMediumRecord().build(); + largeRecord = createLargeRecord().build(); // Pre-serialize for deserialization benchmarks smallRecordBytes = smallRecord.serializeToBuffer(); @@ -49,20 +53,20 @@ public void setup() throws Exception { // ===== SERIALIZATION BENCHMARKS ===== @Benchmark - public void serializeSmallRecord(Blackhole bh) { - ByteBuffer result = smallRecord.serializeToBuffer(); + public void buildAndSerializeSmallRecord(Blackhole bh) throws Exception { + ByteBuffer result = createSmallRecord().buildToBuffer(); bh.consume(result); } @Benchmark - public void serializeMediumRecord(Blackhole bh) { - ByteBuffer result = mediumRecord.serializeToBuffer(); + public void buildAndSerializeMediumRecord(Blackhole bh) throws Exception { + ByteBuffer result = createMediumRecord().buildToBuffer(); bh.consume(result); } @Benchmark - public void serializeLargeRecord(Blackhole bh) { - ByteBuffer result = largeRecord.serializeToBuffer(); + public void buildAndSerializeLargeRecord(Blackhole bh) throws Exception { + ByteBuffer result = createLargeRecord().buildToBuffer(); bh.consume(result); } @@ -88,79 +92,89 @@ public void deserializeLargeRecord(Blackhole bh) throws Exception { // ===== HELPER METHODS ===== - private ImprintRecord createSmallRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); - + private ImprintRecordBuilder createSmallRecord() throws Exception { // Small record: ~10 fields, simple types - writer.addField(1, Value.fromString("Product")); - writer.addField(2, Value.fromInt32(12345)); - writer.addField(3, Value.fromFloat64(99.99)); - writer.addField(4, Value.fromBoolean(true)); - writer.addField(5, Value.fromString("Electronics")); - - return writer.build(); + return ImprintRecord.builder(new SchemaId(1, 0x12345678)) + .field(1, "Product") + .field(2, 12345) + .field(3, 99.99) + .field(4, true) + .field(5, "Electronics"); } - private ImprintRecord createMediumRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); + private ImprintRecordBuilder createMediumRecord() throws Exception { + var builder = ImprintRecord.builder(new SchemaId(1, 0x12345678)); // Medium record: ~50 fields, mixed types including arrays - writer.addField(1, Value.fromString("Product")); - writer.addField(2, Value.fromInt32(12345)); - writer.addField(3, Value.fromFloat64(99.99)); - writer.addField(4, Value.fromBoolean(true)); - writer.addField(5, Value.fromString("Electronics")); + builder.field(1, "Product"); + builder.field(2, 12345); + builder.field(3, 99.99); + builder.field(4, true); + builder.field(5, "Electronics"); // Add array field var tags = Arrays.asList( - Value.fromString("popular"), - Value.fromString("trending"), - Value.fromString("bestseller") + "popular", + "trending", + "bestseller" ); - writer.addField(6, Value.fromArray(tags)); + builder.field(6, tags); // Add map field (all string values for consistency) - var metadata = new HashMap(); - metadata.put(MapKey.fromString("manufacturer"), Value.fromString("TechCorp")); - metadata.put(MapKey.fromString("model"), Value.fromString("TC-2024")); - metadata.put(MapKey.fromString("year"), Value.fromString("2024")); - writer.addField(7, Value.fromMap(metadata)); + var metadata = new HashMap(); + metadata.put("manufacturer", "TechCorp"); + metadata.put("model", "TC-2024"); + metadata.put("year", "2024"); + builder.field(7, metadata); // Add more fields for medium size for (int i = 8; i <= 50; i++) { - writer.addField(i, Value.fromString("field_" + i + "_value")); + builder.field(i, "field_" + i + "_value"); } - return writer.build(); + return builder; } - private ImprintRecord createLargeRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); + private ImprintRecordBuilder createLargeRecord() throws Exception { + var builder = ImprintRecord.builder(new SchemaId(1, 0x12345678)); // Large record: ~200 fields, complex nested structures - writer.addField(1, Value.fromString("LargeProduct")); - writer.addField(2, Value.fromInt32(12345)); - writer.addField(3, Value.fromFloat64(99.99)); + builder.field(1, "LargeProduct"); + builder.field(2, 12345); + builder.field(3, 99.99); // Large array - var largeArray = new ArrayList(); + var largeArray = new ArrayList(); for (int i = 0; i < 100; i++) { - largeArray.add(Value.fromString("item_" + i)); + largeArray.add("item_" + i); } - writer.addField(4, Value.fromArray(largeArray)); + builder.field(4, largeArray); // Large map - var largeMap = new HashMap(); + var largeMap = new HashMap(); for (int i = 0; i < 50; i++) { - largeMap.put(MapKey.fromString("key_" + i), Value.fromString("value_" + i)); + largeMap.put("key_" + i, "value_" + i); } - writer.addField(5, Value.fromMap(largeMap)); + builder.field(5, largeMap); // Many string fields for (int i = 6; i <= 200; i++) { - writer.addField(i, Value.fromString("this_is_a_longer_field_value_for_field_" + i + "_to_increase_record_size")); + builder.field(i, "this_is_a_longer_field_value_for_field_" + i + "_to_increase_record_size"); } - return writer.build(); + return builder; + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(SerializationBenchmark.class.getSimpleName()) + .forks(1) + .warmupIterations(5) + .measurementIterations(5) + .mode(Mode.AverageTime) + .timeUnit(TimeUnit.NANOSECONDS) + .build(); + + new Runner(opt).run(); } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/serializers/AbstractSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/AbstractSerializingBenchmark.java new file mode 100644 index 0000000..4f53203 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/serializers/AbstractSerializingBenchmark.java @@ -0,0 +1,34 @@ +package com.imprint.benchmark.serializers; + +import com.imprint.benchmark.DataGenerator; +import org.openjdk.jmh.infra.Blackhole; + +/** + * A minimal base class for serialization frameworks to compare against, holding the test data. + */ +public abstract class AbstractSerializingBenchmark implements SerializingBenchmark { + + protected final String name; + protected DataGenerator.TestRecord testData; + protected DataGenerator.TestRecord testData2; + + protected AbstractSerializingBenchmark(String name) { + this.name = name; + } + + @Override + public String name() { + return name; + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + this.testData = testRecord; + this.testData2 = testRecord2; + } + + @Override + public void accessField(Blackhole bh) { + // Default implementation is a no-op + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/serializers/AvroSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/AvroSerializingBenchmark.java new file mode 100644 index 0000000..f3e5b8a --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/serializers/AvroSerializingBenchmark.java @@ -0,0 +1,178 @@ +package com.imprint.benchmark.serializers; + +import com.imprint.benchmark.DataGenerator; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.*; +import org.openjdk.jmh.infra.Blackhole; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; + +public class AvroSerializingBenchmark extends AbstractSerializingBenchmark { + + private final Schema schema; + private final Schema projectedSchema; + private final DatumWriter writer; + private final DatumReader reader; + private final DatumWriter projectedWriter; + private byte[] serializedRecord1; + private byte[] serializedRecord2; + + public AvroSerializingBenchmark() { + super("Avro-Generic"); + String schemaDefinition = "{\"type\":\"record\",\"name\":\"TestRecord\",\"fields\":[" + + "{\"name\":\"id\",\"type\":\"string\"}," + + "{\"name\":\"timestamp\",\"type\":\"long\"}," + + "{\"name\":\"flags\",\"type\":\"int\"}," + + "{\"name\":\"active\",\"type\":\"boolean\"}," + + "{\"name\":\"value\",\"type\":\"double\"}," + + "{\"name\":\"data\",\"type\":\"bytes\"}," + + "{\"name\":\"tags\",\"type\":{\"type\":\"array\",\"items\":\"int\"}}," + + "{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":\"string\"}}" + + "]}"; + this.schema = new Schema.Parser().parse(schemaDefinition); + this.writer = new GenericDatumWriter<>(schema); + this.reader = new GenericDatumReader<>(schema); + + String projectedSchemaDef = "{\"type\":\"record\",\"name\":\"ProjectedRecord\",\"fields\":[" + + "{\"name\":\"id\",\"type\":\"string\"}," + + "{\"name\":\"timestamp\",\"type\":\"long\"}," + + "{\"name\":\"tags\",\"type\":{\"type\":\"array\",\"items\":\"int\"}}" + + "]}"; + this.projectedSchema = new Schema.Parser().parse(projectedSchemaDef); + this.projectedWriter = new GenericDatumWriter<>(projectedSchema); + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + this.serializedRecord1 = buildRecord(testRecord); + this.serializedRecord2 = buildRecord(testRecord2); + } + + private byte[] buildRecord(DataGenerator.TestRecord pojo) { + GenericRecord record = new GenericData.Record(schema); + record.put("id", pojo.id); + record.put("timestamp", pojo.timestamp); + record.put("flags", pojo.flags); + record.put("active", pojo.active); + record.put("value", pojo.value); + record.put("data", ByteBuffer.wrap(pojo.data)); + record.put("tags", pojo.tags); + record.put("metadata", pojo.metadata); + + try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null); + writer.write(record, encoder); + encoder.flush(); + return out.toByteArray(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void serialize(Blackhole bh) { + bh.consume(buildRecord(this.testData)); + } + + @Override + public void deserialize(Blackhole bh) { + try { + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serializedRecord1, null); + bh.consume(reader.read(null, decoder)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void projectAndSerialize(Blackhole bh) { + try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { + // Full round trip: deserialize, project to a new object, re-serialize + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serializedRecord1, null); + GenericRecord original = reader.read(null, decoder); + + // With generic records, we can project by building a new record with the projected schema + GenericRecord projected = new GenericData.Record(projectedSchema); + projected.put("id", original.get("id")); + projected.put("timestamp", original.get("timestamp")); + projected.put("tags", ((java.util.List)original.get("tags")).subList(0, 5)); + + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null); + projectedWriter.write(projected, encoder); + encoder.flush(); + bh.consume(out.toByteArray()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + // No direct merge in Avro. Must deserialize, merge manually, and re-serialize. + GenericRecord r1 = buildAvroRecordFromBytes(this.serializedRecord1); + GenericRecord r2 = buildAvroRecordFromBytes(this.serializedRecord2); + + GenericRecord merged = new GenericData.Record(schema); + // Simplified merge logic: take most fields from r1, some from r2 + merged.put("id", r1.get("id")); + merged.put("timestamp", System.currentTimeMillis()); + merged.put("flags", r1.get("flags")); + merged.put("active", false); + merged.put("value", r1.get("value")); + merged.put("data", r1.get("data")); + merged.put("tags", r2.get("tags")); + merged.put("metadata", r2.get("metadata")); + + bh.consume(buildBytes(merged)); + } + + private GenericRecord buildAvroRecord(DataGenerator.TestRecord pojo) { + GenericRecord record = new GenericData.Record(schema); + record.put("id", pojo.id); + record.put("timestamp", pojo.timestamp); + record.put("flags", pojo.flags); + record.put("active", pojo.active); + record.put("value", pojo.value); + record.put("data", ByteBuffer.wrap(pojo.data)); + record.put("tags", pojo.tags); + record.put("metadata", pojo.metadata); + return record; + } + + private GenericRecord buildAvroRecordFromBytes(byte[] bytes) { + try { + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(bytes, null); + return reader.read(null, decoder); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private byte[] buildBytes(GenericRecord record) { + try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null); + writer.write(record, encoder); + encoder.flush(); + return out.toByteArray(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void accessField(Blackhole bh) { + try { + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serializedRecord1, null); + GenericRecord record = reader.read(null, decoder); + bh.consume(record.get("timestamp")); + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/serializers/FlatBuffersSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/FlatBuffersSerializingBenchmark.java new file mode 100644 index 0000000..846b15c --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/serializers/FlatBuffersSerializingBenchmark.java @@ -0,0 +1,145 @@ +package com.imprint.benchmark.serializers; + +import com.google.flatbuffers.FlatBufferBuilder; +import com.imprint.benchmark.DataGenerator; +import com.imprint.benchmark.flatbuffers.TestRecord; +import org.openjdk.jmh.infra.Blackhole; + +import java.nio.ByteBuffer; + +public class FlatBuffersSerializingBenchmark extends AbstractSerializingBenchmark { + + private ByteBuffer serializedRecord1; + private ByteBuffer serializedRecord2; + + public FlatBuffersSerializingBenchmark() { + super("FlatBuffers"); + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + this.serializedRecord1 = buildRecord(testRecord); + this.serializedRecord2 = buildRecord(testRecord2); + } + + private ByteBuffer buildRecord(DataGenerator.TestRecord pojo) { + FlatBufferBuilder builder = new FlatBufferBuilder(1024); + + int idOffset = builder.createString(pojo.id); + int[] tagsOffsets = pojo.tags.stream().mapToInt(i -> i).toArray(); + int tagsVectorOffset = TestRecord.createTagsVector(builder, tagsOffsets); + + int[] metadataKeysOffsets = pojo.metadata.keySet().stream().mapToInt(builder::createString).toArray(); + // This is not correct FlatBuffers map creation, it's a placeholder. + // A proper implementation would require a table for each entry. + // For this benchmark, we'll just serialize the keys vector. + int metadataVectorOffset = TestRecord.createMetadataVector(builder, metadataKeysOffsets); + + int dataOffset = TestRecord.createDataVector(builder, pojo.data); + + TestRecord.startTestRecord(builder); + TestRecord.addId(builder, idOffset); + TestRecord.addTimestamp(builder, pojo.timestamp); + TestRecord.addFlags(builder, pojo.flags); + TestRecord.addActive(builder, pojo.active); + TestRecord.addValue(builder, pojo.value); + TestRecord.addData(builder, dataOffset); + TestRecord.addTags(builder, tagsVectorOffset); + TestRecord.addMetadata(builder, metadataVectorOffset); + + int recordOffset = TestRecord.endTestRecord(builder); + builder.finish(recordOffset); + + return builder.dataBuffer(); + } + + @Override + public void serialize(Blackhole bh) { + bh.consume(buildRecord(this.testData)); + } + + @Override + public void deserialize(Blackhole bh) { + bh.consume(TestRecord.getRootAsTestRecord(serializedRecord1)); + } + + @Override + public void projectAndSerialize(Blackhole bh) { + + FlatBufferBuilder builder = new FlatBufferBuilder(256); + var original = TestRecord.getRootAsTestRecord(serializedRecord1); + + int idOffset = builder.createString(original.id()); + + // Manual sublist + int[] tagsOffsets = new int[5]; + for (int i = 0; i < 5; i++) { + tagsOffsets[i] = original.tags(i); + } + int tagsVectorOffset = TestRecord.createTagsVector(builder, tagsOffsets); + + TestRecord.startTestRecord(builder); + TestRecord.addId(builder, idOffset); + TestRecord.addTimestamp(builder, original.timestamp()); + TestRecord.addTags(builder, tagsVectorOffset); + int recordOffset = TestRecord.endTestRecord(builder); + builder.finish(recordOffset); + + bh.consume(builder.dataBuffer()); + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + // No direct merge operation. Must read both, build a new one. + var r1 = TestRecord.getRootAsTestRecord(serializedRecord1); + var r2 = TestRecord.getRootAsTestRecord(serializedRecord2); + + FlatBufferBuilder builder = new FlatBufferBuilder(1024); + + int idOffset = builder.createString(r1.id()); + + // Correctly read and rebuild the tags vector + // For this benchmark, we'll just take tags from the second record + int[] tagsArray = new int[r2.tagsLength()]; + for (int i = 0; i < r2.tagsLength(); i++) { + tagsArray[i] = r2.tags(i); + } + int tagsVectorOffset = TestRecord.createTagsVector(builder, tagsArray); + + // Correctly read and rebuild the metadata vector + // For this benchmark, we'll just take metadata from the second record + int[] metadataOffsets = new int[r2.metadataLength()]; + for (int i = 0; i < r2.metadataLength(); i++) { + metadataOffsets[i] = builder.createString(r2.metadata(i)); + } + int metadataVectorOffset = TestRecord.createMetadataVector(builder, metadataOffsets); + + + // Correctly read and rebuild the data vector from r1 + ByteBuffer dataBuffer = r1.dataAsByteBuffer(); + byte[] dataArray = new byte[dataBuffer.remaining()]; + dataBuffer.get(dataArray); + int dataOffset = TestRecord.createDataVector(builder, dataArray); + + + TestRecord.startTestRecord(builder); + TestRecord.addId(builder, idOffset); + TestRecord.addTimestamp(builder, System.currentTimeMillis()); // new value + TestRecord.addFlags(builder, r1.flags()); + TestRecord.addActive(builder, false); // new value + TestRecord.addValue(builder, r1.value()); + TestRecord.addData(builder, dataOffset); + TestRecord.addTags(builder, tagsVectorOffset); + TestRecord.addMetadata(builder, metadataVectorOffset); + + int recordOffset = TestRecord.endTestRecord(builder); + builder.finish(recordOffset); + bh.consume(builder.dataBuffer()); + } + + @Override + public void accessField(Blackhole bh) { + bh.consume(TestRecord.getRootAsTestRecord(serializedRecord1).timestamp()); + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/serializers/ImprintSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/ImprintSerializingBenchmark.java new file mode 100644 index 0000000..e71a5c0 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/serializers/ImprintSerializingBenchmark.java @@ -0,0 +1,104 @@ +package com.imprint.benchmark.serializers; + +import com.imprint.benchmark.DataGenerator; +import com.imprint.core.ImprintRecord; +import com.imprint.core.ImprintRecordBuilder; +import com.imprint.core.SchemaId; +import com.imprint.error.ImprintException; +import org.openjdk.jmh.infra.Blackhole; + +import java.nio.ByteBuffer; + +public class ImprintSerializingBenchmark extends AbstractSerializingBenchmark { + + private ImprintRecord imprintRecord1; + private byte[] serializedRecord1; + private byte[] serializedRecord2; + private static final SchemaId SCHEMA_ID = new SchemaId(1, 1); + + public ImprintSerializingBenchmark() { + super("Imprint"); + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + try { + this.imprintRecord1 = buildRecord(testRecord); + ImprintRecord imprintRecord2 = buildRecord(testRecord2); + + ByteBuffer buf1 = this.imprintRecord1.serializeToBuffer(); + this.serializedRecord1 = new byte[buf1.remaining()]; + buf1.get(this.serializedRecord1); + + ByteBuffer buf2 = imprintRecord2.serializeToBuffer(); + this.serializedRecord2 = new byte[buf2.remaining()]; + buf2.get(this.serializedRecord2); + } catch (ImprintException e) { + throw new RuntimeException(e); + } + } + + private ImprintRecord buildRecord(DataGenerator.TestRecord pojo) throws ImprintException { + var builder = ImprintRecord.builder(SCHEMA_ID); + builder.field(0, pojo.id); + builder.field(1, pojo.timestamp); + builder.field(2, pojo.flags); + builder.field(3, pojo.active); + builder.field(4, pojo.value); + builder.field(5, pojo.data); + builder.field(6, pojo.tags); + builder.field(7, pojo.metadata); + return builder.build(); + } + + @Override + public void serialize(Blackhole bh) { + try { + bh.consume(buildRecord(DataGenerator.createTestRecord()).serializeToBuffer()); + } catch (ImprintException e) { + throw new RuntimeException(e); + } + } + + @Override + public void deserialize(Blackhole bh) { + try { + bh.consume(ImprintRecord.deserialize(this.serializedRecord1)); + } catch (ImprintException e) { + throw new RuntimeException(e); + } + } + + @Override + public void projectAndSerialize(Blackhole bh) { + try { + // Should use zero-copy projection directly from existing record + ImprintRecord projected = this.imprintRecord1.project(0, 1, 6); + bh.consume(projected.serializeToBuffer()); + } catch (ImprintException e) { + throw new RuntimeException(e); + } + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + try { + // Use zero-copy merge - keep one record, deserialize the other + var r2 = ImprintRecord.deserialize(this.serializedRecord2); + var merged = this.imprintRecord1.merge(r2); + bh.consume(merged.serializeToBuffer()); + } catch (ImprintException e) { + throw new RuntimeException(e); + } + } + + @Override + public void accessField(Blackhole bh) { + try { + bh.consume(imprintRecord1.getInt64(1)); // Access timestamp by field ID + } catch (ImprintException e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/serializers/JacksonSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/JacksonSerializingBenchmark.java new file mode 100644 index 0000000..d58bc19 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/serializers/JacksonSerializingBenchmark.java @@ -0,0 +1,96 @@ +package com.imprint.benchmark.serializers; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.imprint.benchmark.DataGenerator; +import org.openjdk.jmh.infra.Blackhole; + +public class JacksonSerializingBenchmark extends AbstractSerializingBenchmark { + + private final ObjectMapper mapper; + private byte[] serializedRecord; + private byte[] serializedRecord2; + + public JacksonSerializingBenchmark() { + super("Jackson-JSON"); + this.mapper = new ObjectMapper(); + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + try { + this.serializedRecord = mapper.writeValueAsBytes(testRecord); + this.serializedRecord2 = mapper.writeValueAsBytes(testRecord2); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void serialize(Blackhole bh) { + try { + bh.consume(mapper.writeValueAsBytes(this.testData)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void deserialize(Blackhole bh) { + try { + bh.consume(mapper.readValue(serializedRecord, DataGenerator.TestRecord.class)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void projectAndSerialize(Blackhole bh) { + try { + // Full round trip: deserialize, project to a new object, re-serialize + var original = mapper.readValue(serializedRecord, DataGenerator.TestRecord.class); + + // Simulate by creating the projected object and serializing it + var projected = new DataGenerator.ProjectedRecord(); + projected.id = original.id; + projected.timestamp = original.timestamp; + projected.tags = original.tags.subList(0, 5); + + bh.consume(mapper.writeValueAsBytes(projected)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + try { + var r1 = mapper.readValue(serializedRecord, DataGenerator.TestRecord.class); + var r2 = mapper.readValue(serializedRecord2, DataGenerator.TestRecord.class); + // Simulate by creating a new merged object and serializing it + var merged = new DataGenerator.TestRecord(); + merged.id = r1.id; + merged.timestamp = System.currentTimeMillis(); // new value + merged.flags = r1.flags; + merged.active = false; // new value + merged.value = r1.value; + merged.data = r1.data; + merged.tags = r2.tags; + merged.metadata = r2.metadata; + + bh.consume(mapper.writeValueAsBytes(merged)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void accessField(Blackhole bh) { + try { + var map = mapper.readValue(serializedRecord, java.util.Map.class); + bh.consume(map.get("timestamp")); + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/serializers/KryoSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/KryoSerializingBenchmark.java new file mode 100644 index 0000000..6780513 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/serializers/KryoSerializingBenchmark.java @@ -0,0 +1,148 @@ +package com.imprint.benchmark.serializers; + +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; +import com.imprint.benchmark.DataGenerator; +import org.openjdk.jmh.infra.Blackhole; + +import java.io.ByteArrayOutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; + +public class KryoSerializingBenchmark extends AbstractSerializingBenchmark { + + private final Kryo kryo; + private byte[] serializedRecord1; + private byte[] serializedRecord2; + + public KryoSerializingBenchmark() { + super("Kryo"); + this.kryo = new Kryo(); + this.kryo.register(DataGenerator.TestRecord.class); + this.kryo.register(DataGenerator.ProjectedRecord.class); + this.kryo.register(byte[].class); + kryo.register(ArrayList.class); + kryo.register(HashMap.class); + kryo.register(Arrays.asList().getClass()); + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + + // Fix 1: Create fresh streams for each record + this.serializedRecord1 = serializeRecord(testRecord); + this.serializedRecord2 = serializeRecord(testRecord2); + } + + // Helper method to properly serialize a record + private byte[] serializeRecord(DataGenerator.TestRecord record) { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output output = new Output(baos)) { + kryo.writeObject(output, record); + output.flush(); // Important: flush before getting bytes + return baos.toByteArray(); + } catch (Exception e) { + throw new RuntimeException("Failed to serialize record", e); + } + } + + @Override + public void serialize(Blackhole bh) { + // Fix 2: Create fresh output stream each time + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output output = new Output(baos)) { + kryo.writeObject(output, this.testData); + output.flush(); // Ensure data is written + bh.consume(baos.toByteArray()); + } catch (Exception e) { + throw new RuntimeException("Serialize failed", e); + } + } + + @Override + public void deserialize(Blackhole bh) { + // Fix 3: Create fresh input each time + try (Input input = new Input(serializedRecord1)) { + bh.consume(kryo.readObject(input, DataGenerator.TestRecord.class)); + } catch (Exception e) { + throw new RuntimeException("Deserialize failed", e); + } + } + + @Override + public void projectAndSerialize(Blackhole bh) { + try { + // Step 1: Deserialize with fresh input + DataGenerator.TestRecord original; + try (Input input = new Input(serializedRecord1)) { + original = kryo.readObject(input, DataGenerator.TestRecord.class); + } + + // Step 2: Create projected record + var projected = new DataGenerator.ProjectedRecord(); + projected.id = original.id; + projected.timestamp = original.timestamp; + projected.tags = new ArrayList<>(original.tags.subList(0, Math.min(5, original.tags.size()))); + + // Step 3: Serialize with fresh output + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output output = new Output(baos)) { + kryo.writeObject(output, projected); + output.flush(); + bh.consume(baos.toByteArray()); + } + + } catch (Exception e) { + throw new RuntimeException("ProjectAndSerialize failed", e); + } + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + try { + // Step 1: Deserialize both records with fresh inputs + DataGenerator.TestRecord r1, r2; + try (Input input1 = new Input(serializedRecord1)) { + r1 = kryo.readObject(input1, DataGenerator.TestRecord.class); + } + try (Input input2 = new Input(serializedRecord2)) { + r2 = kryo.readObject(input2, DataGenerator.TestRecord.class); + } + + // Step 2: Create merged record + var merged = new DataGenerator.TestRecord(); + merged.id = r1.id; + merged.timestamp = System.currentTimeMillis(); + merged.flags = r1.flags; + merged.active = false; + merged.value = r1.value; + merged.data = r1.data; + merged.tags = r2.tags; + merged.metadata = r2.metadata; + + // Step 3: Serialize with fresh output + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output output = new Output(baos)) { + kryo.writeObject(output, merged); + output.flush(); + bh.consume(baos.toByteArray()); + } + } catch (Exception e) { + throw new RuntimeException("MergeAndSerialize failed", e); + } + } + + @Override + public void accessField(Blackhole bh) { + // Fix 4: Create fresh input for each access + try (Input input = new Input(serializedRecord1)) { + DataGenerator.TestRecord record = kryo.readObject(input, DataGenerator.TestRecord.class); + bh.consume(record.timestamp); + } catch (Exception e) { + throw new RuntimeException("AccessField failed", e); + } + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/serializers/MessagePackSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/MessagePackSerializingBenchmark.java new file mode 100644 index 0000000..9dd275f --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/serializers/MessagePackSerializingBenchmark.java @@ -0,0 +1,96 @@ +package com.imprint.benchmark.serializers; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.imprint.benchmark.DataGenerator; +import org.msgpack.jackson.dataformat.MessagePackFactory; +import org.openjdk.jmh.infra.Blackhole; + +public class MessagePackSerializingBenchmark extends AbstractSerializingBenchmark { + + private final ObjectMapper mapper; + private byte[] serializedRecord; + private byte[] serializedRecord2; + + public MessagePackSerializingBenchmark() { + super("MessagePack"); + this.mapper = new ObjectMapper(new MessagePackFactory()); + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + try { + this.serializedRecord = mapper.writeValueAsBytes(testRecord); + this.serializedRecord2 = mapper.writeValueAsBytes(testRecord2); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void serialize(Blackhole bh) { + try { + bh.consume(mapper.writeValueAsBytes(this.testData)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void deserialize(Blackhole bh) { + try { + bh.consume(mapper.readValue(serializedRecord, DataGenerator.TestRecord.class)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void projectAndSerialize(Blackhole bh) { + try { + // Full round trip: deserialize, project to a new object, re-serialize + var original = mapper.readValue(serializedRecord, DataGenerator.TestRecord.class); + + var projected = new DataGenerator.ProjectedRecord(); + projected.id = original.id; + projected.timestamp = original.timestamp; + projected.tags = original.tags.subList(0, 5); + + bh.consume(mapper.writeValueAsBytes(projected)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + try { + var r1 = mapper.readValue(serializedRecord, DataGenerator.TestRecord.class); + var r2 = mapper.readValue(serializedRecord2, DataGenerator.TestRecord.class); + + var merged = new DataGenerator.TestRecord(); + merged.id = r1.id; + merged.timestamp = System.currentTimeMillis(); + merged.flags = r1.flags; + merged.active = false; + merged.value = r1.value; + merged.data = r1.data; + merged.tags = r2.tags; + merged.metadata = r2.metadata; + + bh.consume(mapper.writeValueAsBytes(merged)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void accessField(Blackhole bh) { + try { + var map = mapper.readValue(serializedRecord, java.util.Map.class); + bh.consume(map.get("timestamp")); + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/serializers/ProtobufSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/ProtobufSerializingBenchmark.java new file mode 100644 index 0000000..1f6239e --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/serializers/ProtobufSerializingBenchmark.java @@ -0,0 +1,89 @@ +package com.imprint.benchmark.serializers; + +import com.imprint.benchmark.DataGenerator; +import com.imprint.benchmark.proto.TestRecordOuterClass; +import org.openjdk.jmh.infra.Blackhole; + +public class ProtobufSerializingBenchmark extends AbstractSerializingBenchmark { + + private byte[] serializedRecord1; + private byte[] serializedRecord2; + + public ProtobufSerializingBenchmark() { + super("Protobuf"); + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + this.serializedRecord1 = buildRecord(testRecord).toByteArray(); + this.serializedRecord2 = buildRecord(testRecord2).toByteArray(); + } + + private TestRecordOuterClass.TestRecord buildRecord(DataGenerator.TestRecord pojo) { + return TestRecordOuterClass.TestRecord.newBuilder() + .setId(pojo.id) + .setTimestamp(pojo.timestamp) + .setFlags(pojo.flags) + .setActive(pojo.active) + .setValue(pojo.value) + .setData(com.google.protobuf.ByteString.copyFrom(pojo.data)) + .addAllTags(pojo.tags) + .putAllMetadata(pojo.metadata) + .build(); + } + + @Override + public void serialize(Blackhole bh) { + bh.consume(buildRecord(this.testData).toByteArray()); + } + + @Override + public void deserialize(Blackhole bh) { + try { + bh.consume(TestRecordOuterClass.TestRecord.parseFrom(serializedRecord1)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void projectAndSerialize(Blackhole bh) { + try { + // Full round trip: deserialize, project to a new object, re-serialize + var original = TestRecordOuterClass.TestRecord.parseFrom(serializedRecord1); + + TestRecordOuterClass.TestRecord projected = TestRecordOuterClass.TestRecord.newBuilder() + .setId(original.getId()) + .setTimestamp(original.getTimestamp()) + .addAllTags(original.getTagsList().subList(0, 5)) + .build(); + bh.consume(projected.toByteArray()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + try { + // Protobuf's `mergeFrom` is a natural fit here. + var record1 = TestRecordOuterClass.TestRecord.parseFrom(serializedRecord1); + var record2 = TestRecordOuterClass.TestRecord.parseFrom(serializedRecord2); + + var merged = record1.toBuilder().mergeFrom(record2).build(); + bh.consume(merged.toByteArray()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void accessField(Blackhole bh) { + try { + bh.consume(TestRecordOuterClass.TestRecord.parseFrom(serializedRecord1).getTimestamp()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/serializers/SerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/SerializingBenchmark.java new file mode 100644 index 0000000..a6358b8 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/serializers/SerializingBenchmark.java @@ -0,0 +1,17 @@ +package com.imprint.benchmark.serializers; + +import com.imprint.benchmark.DataGenerator; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Defines the contract for a serialization framework in the benchmark. + */ +public interface SerializingBenchmark { + String name(); + void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2); + void serialize(Blackhole bh); + void deserialize(Blackhole bh); + void projectAndSerialize(Blackhole bh); + void mergeAndSerialize(Blackhole bh); + void accessField(Blackhole bh); +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/serializers/ThriftSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/ThriftSerializingBenchmark.java new file mode 100644 index 0000000..18cf9bb --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/serializers/ThriftSerializingBenchmark.java @@ -0,0 +1,135 @@ +package com.imprint.benchmark.serializers; + +import com.imprint.benchmark.DataGenerator; +import com.imprint.benchmark.thrift.ProjectedRecord; +import com.imprint.benchmark.thrift.TestRecord; +import org.apache.thrift.TDeserializer; +import org.apache.thrift.TException; +import org.apache.thrift.TSerializer; +import org.apache.thrift.protocol.TBinaryProtocol; +import org.openjdk.jmh.infra.Blackhole; + +import java.nio.ByteBuffer; +import java.util.stream.Collectors; + +public class ThriftSerializingBenchmark extends AbstractSerializingBenchmark { + + private final TSerializer serializer; + private final TDeserializer deserializer; + private byte[] serializedRecord1; + private byte[] serializedRecord2; + + public ThriftSerializingBenchmark() { + super("Thrift"); + try { + this.serializer = new TSerializer(new TBinaryProtocol.Factory()); + this.deserializer = new TDeserializer(new TBinaryProtocol.Factory()); + } catch (Exception e) { + throw new RuntimeException("Failed to initialize Thrift competitor", e); + } + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + try { + var record1 = buildThriftRecord(testRecord); + this.serializedRecord1 = serializer.serialize(record1); + var record2 = buildThriftRecord(testRecord2); + this.serializedRecord2 = serializer.serialize(record2); + } catch (TException e) { + throw new RuntimeException(e); + } + } + + private TestRecord buildThriftRecord(DataGenerator.TestRecord pojo) { + var record = new TestRecord(); + record.setId(pojo.id); + record.setTimestamp(pojo.timestamp); + record.setFlags(pojo.flags); + record.setActive(pojo.active); + record.setValue(pojo.value); + record.setData(ByteBuffer.wrap(pojo.data)); + record.setTags(pojo.tags); + record.setMetadata(pojo.metadata); + return record; + } + + @Override + public void serialize(Blackhole bh) { + try { + bh.consume(serializer.serialize(buildThriftRecord(this.testData))); + } catch (TException e) { + throw new RuntimeException(e); + } + } + + @Override + public void deserialize(Blackhole bh) { + try { + var record = new TestRecord(); + deserializer.deserialize(record, this.serializedRecord1); + bh.consume(record); + } catch (TException e) { + throw new RuntimeException(e); + } + } + + @Override + public void projectAndSerialize(Blackhole bh) { + try { + // Full round trip: deserialize, project to a new object, re-serialize + var original = new TestRecord(); + deserializer.deserialize(original, this.serializedRecord1); + + var projected = new ProjectedRecord(); + projected.setId(original.getId()); + projected.setTimestamp(original.getTimestamp()); + projected.setTags(original.getTags().stream().limit(5).collect(Collectors.toList())); + bh.consume(serializer.serialize(projected)); + } catch (TException e) { + throw new RuntimeException(e); + } + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + try { + var r1 = new TestRecord(); + deserializer.deserialize(r1, this.serializedRecord1); + var r2 = new TestRecord(); + deserializer.deserialize(r2, this.serializedRecord2); + + var merged = new TestRecord(); + merged.setId(r1.id); + merged.setTimestamp(System.currentTimeMillis()); + merged.setFlags(r1.flags | r2.flags); + merged.setActive(false); + merged.setValue((r1.value + r2.value) / 2); + merged.setData(r1.data); // Keep r1's data + merged.setTags(r1.tags); + r2.tags.forEach(t -> { + if (!merged.tags.contains(t)) { + merged.tags.add(t); + } + }); + merged.setMetadata(r1.metadata); + r2.metadata.forEach(merged.metadata::putIfAbsent); + + bh.consume(serializer.serialize(merged)); + } catch (TException e) { + throw new RuntimeException(e); + } + } + + @Override + public void accessField(Blackhole bh) { + try { + var record = new TestRecord(); + deserializer.deserialize(record, this.serializedRecord1); + bh.consume(record.getTimestamp()); + } catch (TException e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/src/jmh/proto/test_record.proto b/src/jmh/proto/test_record.proto index 7a76f71..1187260 100644 --- a/src/jmh/proto/test_record.proto +++ b/src/jmh/proto/test_record.proto @@ -1,15 +1,18 @@ syntax = "proto3"; -option java_package = "com.imprint.benchmark"; -option java_outer_classname = "TestRecordProto"; +package com.imprint.benchmark.proto; + +option java_package = "com.imprint.benchmark.proto"; +option java_outer_classname = "TestRecordOuterClass"; +option java_multiple_files = false; message TestRecord { - int32 id = 1; - string name = 2; - double price = 3; + string id = 1; + int64 timestamp = 2; + int32 flags = 3; bool active = 4; - string category = 5; - repeated string tags = 6; - map metadata = 7; - repeated string extra_data = 8; + double value = 5; + bytes data = 6; + repeated int32 tags = 7; + map metadata = 8; } \ No newline at end of file diff --git a/src/jmh/thrift/test_record.thrift b/src/jmh/thrift/test_record.thrift new file mode 100644 index 0000000..8af2939 --- /dev/null +++ b/src/jmh/thrift/test_record.thrift @@ -0,0 +1,18 @@ +namespace java com.imprint.benchmark.thrift + +struct TestRecord { + 1: required string id; + 2: required i64 timestamp; + 3: required i32 flags; + 4: required bool active; + 5: required double value; + 6: required binary data; + 7: required list tags; + 8: required map metadata; +} + +struct ProjectedRecord { + 1: required string id; + 2: required i64 timestamp; + 3: required list tags; +} \ No newline at end of file diff --git a/src/main/java/com/imprint/core/Directory.java b/src/main/java/com/imprint/core/Directory.java new file mode 100644 index 0000000..ddaf208 --- /dev/null +++ b/src/main/java/com/imprint/core/Directory.java @@ -0,0 +1,79 @@ +package com.imprint.core; + +import com.imprint.types.TypeCode; +import lombok.Getter; +import lombok.Setter; +import lombok.Value; + +import java.util.List; +import java.util.Objects; + +/** + * Represents the common interface for a directory entry in an Imprint record. + * A directory entry provides metadata about a field, such as its ID, type, and location in the payload. + */ +public interface Directory { + /** + * @return The field's unique identifier. + */ + short getId(); + + /** + * @return The {@link TypeCode} of the field's value. + */ + TypeCode getTypeCode(); + + /** + * @return The starting position (offset) of the field's data within the payload buffer. + */ + int getOffset(); + + /** + * A view interface for accessing directory entries efficiently. + * Provides both access to individual entries and full directory materialization. + */ + interface DirectoryView { + /** + * Find a directory entry by field ID. + * @param fieldId The field ID to search for + * @return The directory entry if found, null otherwise + */ + Directory findEntry(int fieldId); + + /** + * Get all directory entries as a list, with full eager deserialization if necessary. + * @return List of all directory entries in field ID order + */ + List toList(); + + /** + * Get the count of directory entries without parsing all entries. + * @return Number of entries in the directory + */ + int size(); + + /** + * Create an iterator for lazy directory traversal. + * For buffer-backed views, this avoids parsing the entire directory upfront. + * @return Iterator over directory entries in field ID order + */ + java.util.Iterator iterator(); + } + + /** + * Immutable representation of the Imprint Directory used for deserialization, + * merging, and field projections + */ + @Value + class Entry implements Directory { + short id; + TypeCode typeCode; + int offset; + + public Entry(short id, TypeCode typeCode, int offset) { + this.id = id; + this.typeCode = Objects.requireNonNull(typeCode, "TypeCode cannot be null"); + this.offset = offset; + } + } +} \ No newline at end of file diff --git a/src/main/java/com/imprint/core/DirectoryEntry.java b/src/main/java/com/imprint/core/DirectoryEntry.java deleted file mode 100644 index 9556256..0000000 --- a/src/main/java/com/imprint/core/DirectoryEntry.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.imprint.core; - -import com.imprint.types.TypeCode; -import lombok.Value; - -import java.util.Objects; - -/** - * A directory entry describing a single field in an Imprint record. - * Each entry has a fixed size of 7 bytes. - */ -@Value -public class DirectoryEntry { - short id; - TypeCode typeCode; - int offset; - - public DirectoryEntry(int id, TypeCode typeCode, int offset) { - this.id = (short) id; - this.typeCode = Objects.requireNonNull(typeCode, "TypeCode cannot be null"); - this.offset = offset; - } -} \ No newline at end of file diff --git a/src/main/java/com/imprint/core/Header.java b/src/main/java/com/imprint/core/Header.java index 388d491..aec0e9b 100644 --- a/src/main/java/com/imprint/core/Header.java +++ b/src/main/java/com/imprint/core/Header.java @@ -1,7 +1,10 @@ package com.imprint.core; +import com.imprint.Constants; import lombok.Value; +import java.nio.ByteBuffer; + /** * The header of an Imprint record. */ @@ -10,4 +13,24 @@ public class Header { Flags flags; SchemaId schemaId; int payloadSize; + + /** + * Serialize this header to a ByteBuffer. + * Follows the Imprint header format: magic(1) + version(1) + flags(1) + fieldSpaceId(4) + schemaHash(4) + payloadSize(4). + */ + public void serialize(ByteBuffer buffer) { + buffer.put(Constants.MAGIC); + buffer.put(Constants.VERSION); + buffer.put(flags.getValue()); + buffer.putInt(schemaId.getFieldSpaceId()); + buffer.putInt(schemaId.getSchemaHash()); + buffer.putInt(payloadSize); + } + + /** + * Static helper for serializing any header to a ByteBuffer. + */ + public static void serialize(Header header, ByteBuffer buffer) { + header.serialize(buffer); + } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintBuffers.java b/src/main/java/com/imprint/core/ImprintBuffers.java deleted file mode 100644 index c14d6df..0000000 --- a/src/main/java/com/imprint/core/ImprintBuffers.java +++ /dev/null @@ -1,305 +0,0 @@ -package com.imprint.core; - -import com.imprint.Constants; -import com.imprint.error.ErrorType; -import com.imprint.error.ImprintException; -import com.imprint.types.TypeCode; -import com.imprint.util.VarInt; -import lombok.Getter; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import java.util.TreeMap; - -/** - * Manages the raw buffers for an Imprint record with lazy directory parsing. - * Encapsulates all buffer operations and provides zero-copy field access. - * - *

Buffer Layout Overview:

- *
- * directoryBuffer: [VarInt count][DirectoryEntry 1][DirectoryEntry 2]...[DirectoryEntry N]
- * payload:         [Field 1 data][Field 2 data]...[Field N data]
- * 
- * - *

Each DirectoryEntry contains: [fieldId:2bytes][typeCode:1byte][offset:4bytes]

- */ -@Getter -public final class ImprintBuffers { - private final ByteBuffer directoryBuffer; // Raw directory bytes (includes count) - private final ByteBuffer payload; // Read-only payload view - - // Lazy-loaded directory state. Needs to maintain ordering so that we can binary search the endOffset - private TreeMap parsedDirectory; - private boolean directoryParsed = false; - - /** - * Creates buffers from raw data (used during deserialization). - */ - public ImprintBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { - this.directoryBuffer = directoryBuffer.asReadOnlyBuffer(); - this.payload = payload.asReadOnlyBuffer(); - } - - /** - * Creates buffers from pre-parsed directory (used during construction). - */ - public ImprintBuffers(List directory, ByteBuffer payload) { - this.parsedDirectory = createDirectoryMap(Objects.requireNonNull(directory)); - this.directoryParsed = true; - this.payload = payload.asReadOnlyBuffer(); - this.directoryBuffer = createDirectoryBuffer(directory); - } - - /** - * Get a zero-copy ByteBuffer view of a field's data. - * Optimized for the most common use case - single field access. - */ - public ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { - var entry = findDirectoryEntry(fieldId); - if (entry == null) - return null; - - int startOffset = entry.getOffset(); - int endOffset = findEndOffset(entry); - - if (startOffset < 0 || endOffset < 0 || startOffset > payload.limit() || - endOffset > payload.limit() || startOffset > endOffset) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Invalid field buffer range: start=" + startOffset + ", end=" + endOffset + ", payloadLimit=" + payload.limit()); - } - - var fieldBuffer = payload.duplicate(); - fieldBuffer.position(startOffset).limit(endOffset); - return fieldBuffer; - } - - /** - * Find a directory entry for the given field ID using the most efficient method. - *

- * Strategy: - * - If parsed: TreeMap lookup - * - If raw: Binary search on raw bytes to avoid full unwinding of the directory - */ - public DirectoryEntry findDirectoryEntry(int fieldId) throws ImprintException { - if (directoryParsed) - return parsedDirectory.get(fieldId); - else - return findFieldEntryInRawDirectory(fieldId); - } - - /** - * Get the full directory, parsing it if necessary. - * Returns the values in fieldId order thanks to TreeMap. - */ - public List getDirectory() { - ensureDirectoryParsed(); - return new ArrayList<>(parsedDirectory.values()); - } - - /** - * Get directory count without parsing. - */ - public int getDirectoryCount() { - if (directoryParsed) - return parsedDirectory.size(); - try { - var countBuffer = directoryBuffer.duplicate(); - return VarInt.decode(countBuffer).getValue(); - } catch (Exception e) { - return 0; - } - } - - /** - * Create a new buffer containing the serialized directory. - */ - public ByteBuffer serializeDirectory() { - ensureDirectoryParsed(); - return createDirectoryBuffer(new ArrayList<>(parsedDirectory.values())); - } - - // ========== PRIVATE METHODS ========== - - /** - * Binary search on raw directory bytes to find a specific field. - * This avoids parsing the entire directory for single field lookups. - */ - private DirectoryEntry findFieldEntryInRawDirectory(int fieldId) throws ImprintException { - var searchBuffer = directoryBuffer.duplicate(); - searchBuffer.order(ByteOrder.LITTLE_ENDIAN); - - int directoryCount = VarInt.decode(searchBuffer).getValue(); - if (directoryCount == 0) - return null; - - int directoryStartPos = searchBuffer.position(); - int low = 0; - int high = directoryCount - 1; - - while (low <= high) { - int mid = (low + high) >>> 1; - int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); - - if (entryPos + Constants.DIR_ENTRY_BYTES > searchBuffer.limit()) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Directory entry at position " + entryPos + " exceeds buffer limit"); - } - - searchBuffer.position(entryPos); - short midFieldId = searchBuffer.getShort(); - - if (midFieldId < fieldId) { - low = mid + 1; - } else if (midFieldId > fieldId) { - high = mid - 1; - } else { - // Found it - read the complete entry - searchBuffer.position(entryPos); - return deserializeDirectoryEntry(searchBuffer); - } - } - - return null; - } - - /** - * Find the end offset for a field by looking at the next field's offset. - */ - private int findEndOffset(DirectoryEntry entry) throws ImprintException { - if (directoryParsed) { - return findNextOffsetInParsedDirectory(entry.getId()); - } else { - return findNextOffsetInRawDirectory(entry.getId()); - } - } - - /** - * Find the end offset using TreeMap's efficient navigation methods. - */ - private int findNextOffsetInParsedDirectory(int currentFieldId) { - var nextEntry = parsedDirectory.higherEntry(currentFieldId); - return nextEntry != null ? nextEntry.getValue().getOffset() : payload.limit(); - } - - private int findNextOffsetInRawDirectory(int currentFieldId) throws ImprintException { - var scanBuffer = directoryBuffer.duplicate(); - scanBuffer.order(ByteOrder.LITTLE_ENDIAN); - - int count = VarInt.decode(scanBuffer).getValue(); - if (count == 0) - return payload.limit(); - - int directoryStartPos = scanBuffer.position(); - int low = 0; - int high = count - 1; - int nextOffset = payload.limit(); - - // Binary search for the first field with fieldId > currentFieldId - while (low <= high) { - int mid = (low + high) >>> 1; - int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); - - if (entryPos + Constants.DIR_ENTRY_BYTES > scanBuffer.limit()) - break; - - scanBuffer.position(entryPos); - short fieldId = scanBuffer.getShort(); - scanBuffer.get(); // skip type - int offset = scanBuffer.getInt(); - - if (fieldId > currentFieldId) { - nextOffset = offset; - high = mid - 1; - } else { - low = mid + 1; - } - } - - return nextOffset; - } - - /** - * Parse the full directory if not already parsed. - * Creates TreeMap for both fast lookup and ordering needed for binary search on offSets. - */ - private void ensureDirectoryParsed() { - if (directoryParsed) - return; - try { - var parseBuffer = directoryBuffer.duplicate(); - parseBuffer.order(ByteOrder.LITTLE_ENDIAN); - - var countResult = VarInt.decode(parseBuffer); - int count = countResult.getValue(); - - this.parsedDirectory = new TreeMap<>(); - for (int i = 0; i < count; i++) { - var entry = deserializeDirectoryEntry(parseBuffer); - parsedDirectory.put((int)entry.getId(), entry); - } - - this.directoryParsed = true; - } catch (ImprintException e) { - throw new RuntimeException("Failed to parse directory", e); - } - } - - /** - * Create a TreeMap from directory list field lookup with ordering. - */ - private TreeMap createDirectoryMap(List directory) { - var map = new TreeMap(); - for (var entry : directory) { - map.put((int)entry.getId(), entry); - } - return map; - } - - /** - * Create directory buffer from parsed entries. - */ - private ByteBuffer createDirectoryBuffer(List directory) { - try { - int bufferSize = VarInt.encodedLength(directory.size()) + (directory.size() * Constants.DIR_ENTRY_BYTES); - var buffer = ByteBuffer.allocate(bufferSize); - buffer.order(ByteOrder.LITTLE_ENDIAN); - - VarInt.encode(directory.size(), buffer); - for (var entry : directory) - serializeDirectoryEntry(entry, buffer); - - buffer.flip(); - return buffer.asReadOnlyBuffer(); - } catch (Exception e) { - return ByteBuffer.allocate(0).asReadOnlyBuffer(); - } - } - - /** - * Serialize a single directory entry to the buffer. - * Format: [fieldId:2bytes][typeCode:1byte][offset:4bytes] - */ - private void serializeDirectoryEntry(DirectoryEntry entry, ByteBuffer buffer) { - buffer.putShort(entry.getId()); - buffer.put(entry.getTypeCode().getCode()); - buffer.putInt(entry.getOffset()); - } - - /** - * Deserialize a single directory entry from the buffer. - * Reads: [fieldId:2bytes][typeCode:1byte][offset:4bytes] - */ - private DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { - if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for directory entry"); - - short id = buffer.getShort(); - var typeCode = TypeCode.fromByte(buffer.get()); - int offset = buffer.getInt(); - - return new DirectoryEntry(id, typeCode, offset); - } -} \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintFieldObjectMap.java b/src/main/java/com/imprint/core/ImprintFieldObjectMap.java new file mode 100644 index 0000000..d104317 --- /dev/null +++ b/src/main/java/com/imprint/core/ImprintFieldObjectMap.java @@ -0,0 +1,309 @@ +package com.imprint.core; + +import java.util.Arrays; +import java.util.stream.IntStream; + +/** + * Specialized short→object map optimized for ImprintRecordBuilder field IDs. + * Basically a copy of EclipseCollections's primitive map: + * - No key-value boxing/unboxing + * - Primitive int16 keys + * - Open addressing with linear probing + * - Sort values in place and return without allocation (subsequently poisons the map) + */ +final class ImprintFieldObjectMap { + private static final int DEFAULT_CAPACITY = 512; + private static final float LOAD_FACTOR = 0.75f; + private static final short EMPTY_KEY = -1; // Reserved empty marker (field IDs are >= 0) + + private short[] keys; + private Object[] values; + private int size; + private int threshold; + private boolean poisoned = false; + + public ImprintFieldObjectMap() { + this(DEFAULT_CAPACITY); + } + + public ImprintFieldObjectMap(int initialCapacity) { + int capacity = nextPowerOfTwo(Math.max(4, initialCapacity)); + this.keys = new short[capacity]; + this.values = new Object[capacity]; + this.threshold = (int) (capacity * LOAD_FACTOR); + Arrays.fill(keys, EMPTY_KEY); + } + + public void put(short key, T value) { + if (poisoned) + throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); + putValue(key, value); + } + + public void put(int key, T value) { + if (poisoned) + throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); + if (key > Short.MAX_VALUE) + throw new IllegalArgumentException("Field ID must be 0-" + Short.MAX_VALUE + ", got: " + key); + putValue((short) key, value); + } + + private void putValue(short key, T value) { + if (poisoned) + throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); + if (key < 0) + throw new IllegalArgumentException("Field ID must be 0-" + Short.MAX_VALUE + ", got: " + key); + + if (size >= threshold) + resize(); + int index = findSlot(key); + if (keys[index] == EMPTY_KEY) { + size++; + } + keys[index] = key; + values[index] = value; + } + + @SuppressWarnings("unchecked") + public T get(int key) { + if (poisoned) + throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); + if (key < 0 || key > Short.MAX_VALUE) + return null; + short shortKey = (short) key; + int index = findSlot(shortKey); + return keys[index] == shortKey ? (T) values[index] : null; + } + + public boolean containsKey(int key) { + if (poisoned) + throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); + if (key < 0 || key > Short.MAX_VALUE) return false; + short shortKey = (short) key; + + int index = findSlot(shortKey); + return keys[index] == shortKey; + } + + public int size() { + return size; + } + + public boolean isEmpty() { + return size == 0; + } + + /** + * Get all keys (non-destructive). + */ + public int[] getKeys() { + return IntStream.range(0, keys.length) + .filter(i -> keys[i] != EMPTY_KEY) + .map(i -> keys[i]).toArray(); + } + + /** + * Stream all keys without allocation. + * Non-destructive operation that can be called multiple times. + * + * @return IntStream of all keys in the map + */ + public IntStream streamKeys() { + if (poisoned) { + throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); + } + + return IntStream.range(0, keys.length) + .filter(i -> keys[i] != EMPTY_KEY) + .map(i -> keys[i]); + } + + /** + * Result holder for in-place sorted values - avoids allocation by returning + * array reference and valid count. + */ + public static final class SortedValuesResult { + public final Object[] values; + public final int count; + + SortedValuesResult(Object[] values, int count) { + this.values = values; + this.count = count; + } + } + + /** + * Get values sorted by key order with zero allocation by left-side compacting the value set. + * WARNING: Modifies internal state, and renders map operations unstable and in an illegal state. Only invoke this + * if you plan to discard the map afterward. + * (e.g., at the end of builder lifecycle before build()). + * + * @return SortedValuesResult containing the internal values array and valid count. + * Caller should iterate from 0 to result.count-1 only. + */ + public SortedValuesResult getSortedValues() { + if (size == 0) { + // Poison the map even when empty, even if just for consistency + poisoned = true; + return new SortedValuesResult(values, 0); + } + + // left side compaction of all entries to the front of the arrays + compactEntries(); + + // Sort the compacted entries by key in-place + sortEntriesByKey(size); + + // Poison the map - no further operations allowed + poisoned = true; + + // Return the internal array w/ count + return new SortedValuesResult(values, size); + } + + /** + * Get values sorted by key order. + * Does not modify internal state and can be invoked repeatedly. + * + * @param resultArray Array to store results (will be resized if needed) + * @return Sorted array of values + */ + @SuppressWarnings("unchecked") + public T[] getSortedValuesCopy(T[] resultArray) { + if (poisoned) + throw new IllegalStateException("Map is poisoned after destructive sort - cannot perform operations"); + if (size == 0) + return resultArray.length == 0 ? resultArray : Arrays.copyOf(resultArray, 0); + + // Create temporary arrays for non-destructive sort + var tempKeys = new short[size]; + var tempValues = new Object[size]; + + // Copy valid entries to temporary arrays + int writeIndex = 0; + for (int readIndex = 0; readIndex < keys.length; readIndex++) { + if (keys[readIndex] != EMPTY_KEY) { + tempKeys[writeIndex] = keys[readIndex]; + tempValues[writeIndex] = values[readIndex]; + writeIndex++; + } + } + + // Sort the temporary arrays by key + for (int i = 1; i < size; i++) { + short key = tempKeys[i]; + Object value = tempValues[i]; + int j = i - 1; + + while (j >= 0 && tempKeys[j] > key) { + tempKeys[j + 1] = tempKeys[j]; + tempValues[j + 1] = tempValues[j]; + j--; + } + + tempKeys[j + 1] = key; + tempValues[j + 1] = value; + } + + // Copy sorted values to result array + if (resultArray.length != size) + resultArray = Arrays.copyOf(resultArray, size); + + for (int i = 0; i < size; i++) + resultArray[i] = (T) tempValues[i]; + + return resultArray; + } + + /** + * Compact all non-empty entries to the front of keys/values arrays. + */ + private void compactEntries() { + int writeIndex = 0; + + for (int readIndex = 0; readIndex < keys.length; readIndex++) { + if (keys[readIndex] != EMPTY_KEY) { + if (writeIndex != readIndex) { + keys[writeIndex] = keys[readIndex]; + values[writeIndex] = values[readIndex]; + + // Clear the old slot + keys[readIndex] = EMPTY_KEY; + values[readIndex] = null; + } + writeIndex++; + } + } + } + + /** + * Sort the first 'count' entries by key using insertion sort (should be fast for small arrays). + */ + private void sortEntriesByKey(int count) { + for (int i = 1; i < count; i++) { + short key = keys[i]; + Object value = values[i]; + int j = i - 1; + + while (j >= 0 && keys[j] > key) { + keys[j + 1] = keys[j]; + values[j + 1] = values[j]; + j--; + } + + keys[j + 1] = key; + values[j + 1] = value; + } + } + + + private int findSlot(short key) { + int mask = keys.length - 1; + int index = hash(key) & mask; + + // Linear probing + while (keys[index] != EMPTY_KEY && keys[index] != key) { + index = (index + 1) & mask; + } + + return index; + } + + private void resize() { + short[] oldKeys = keys; + Object[] oldValues = values; + + int newCapacity = keys.length * 2; + keys = new short[newCapacity]; + values = new Object[newCapacity]; + threshold = (int) (newCapacity * LOAD_FACTOR); + Arrays.fill(keys, EMPTY_KEY); + + int oldSize = size; + size = 0; + + // Rehash all entries + for (int i = 0; i < oldKeys.length; i++) { + if (oldKeys[i] != EMPTY_KEY) { + @SuppressWarnings("unchecked") + T value = (T) oldValues[i]; + put(oldKeys[i], value); + } + } + + // Verify size didn't change during rehash + assert size == oldSize; + } + + private static int hash(short key) { + // Simple but effective hash for short keys + int intKey = key & 0xFFFF; // Convert to unsigned int + intKey ^= intKey >>> 8; + return intKey; + } + + private static int nextPowerOfTwo(int n) { + if (n <= 1) return 1; + return Integer.highestOneBit(n - 1) << 1; + } +} \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index e720df5..e6f9de6 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -3,123 +3,216 @@ import com.imprint.Constants; import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; -import com.imprint.types.MapKey; +import com.imprint.ops.ImprintOperations; import com.imprint.types.TypeCode; import com.imprint.types.Value; import com.imprint.util.VarInt; + +import lombok.AccessLevel; +import lombok.EqualsAndHashCode; import lombok.Getter; +import lombok.ToString; +import lombok.experimental.NonFinal; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.Iterator; import java.util.List; -import java.util.Map; +import java.util.NoSuchElementException; import java.util.Objects; /** - * An Imprint record containing a header and buffer management. - * Delegates all buffer operations to ImprintBuffers for cleaner separation. + * Imprint Record + *

+ * This is the primary way to work with Imprint records, providing: + * - Zero-copy field access via binary search + * - Direct bytes-to-bytes operations (merge, project) + * - Lazy deserializing operations */ -@Getter -public final class ImprintRecord { - private final Header header; - private final ImprintBuffers buffers; - +@lombok.Value +@EqualsAndHashCode(of = "serializedBytes") +@ToString(of = {"header"}) +public class ImprintRecord { + ByteBuffer serializedBytes; + + @Getter(AccessLevel.PUBLIC) + Header header; + + @Getter(AccessLevel.PACKAGE) + // Raw directory bytes (read-only) + ByteBuffer directoryBuffer; + + @Getter(AccessLevel.PACKAGE) + // Raw payload bytes + ByteBuffer payload; + + @NonFinal + @Getter(AccessLevel.NONE) + //Directory View cache to allow for easier mutable operations needed for lazy initialization + Directory.DirectoryView directoryView; + /** - * Creates a record from deserialized components. + * Package-private constructor for @Value that creates immutable ByteBuffer views. */ - private ImprintRecord(Header header, ImprintBuffers buffers) { - this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.buffers = Objects.requireNonNull(buffers, "Buffers cannot be null"); + ImprintRecord(ByteBuffer serializedBytes, Header header, ByteBuffer directoryBuffer, ByteBuffer payload) { + this.serializedBytes = serializedBytes.asReadOnlyBuffer(); + this.header = Objects.requireNonNull(header); + this.directoryBuffer = directoryBuffer.asReadOnlyBuffer(); + this.payload = payload.asReadOnlyBuffer(); + this.directoryView = null; } - + + // ========== STATIC FACTORY METHODS ========== + /** - * Creates a record from pre-parsed directory (used by ImprintWriter). + * Create a builder for constructing new ImprintRecord instances. */ - ImprintRecord(Header header, List directory, ByteBuffer payload) { - this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.buffers = new ImprintBuffers(directory, payload); + public static ImprintRecordBuilder builder(SchemaId schemaId) { + return new ImprintRecordBuilder(schemaId); } - // ========== FIELD ACCESS METHODS ========== + public static ImprintRecordBuilder builder(int fieldspaceId, int schemaHash) { + return new ImprintRecordBuilder(new SchemaId(fieldspaceId, schemaHash)); + } /** - * Get a value by field ID, deserializing it on demand. - * Returns null if the field is not found. + * Deserialize an ImprintRecord from bytes. */ - public Value getValue(int fieldId) throws ImprintException { - var entry = buffers.findDirectoryEntry(fieldId); - if (entry == null) - return null; - - var fieldBuffer = buffers.getFieldBuffer(fieldId); - if (fieldBuffer == null) - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Failed to get buffer for field " + fieldId); - - return deserializeValue(entry.getTypeCode(), fieldBuffer); + public static ImprintRecord deserialize(byte[] bytes) throws ImprintException { + return fromBytes(ByteBuffer.wrap(bytes)); } + public static ImprintRecord deserialize(ByteBuffer buffer) throws ImprintException { + return fromBytes(buffer); + } + + /** + * Create a ImprintRecord from complete serialized bytes. + */ + public static ImprintRecord fromBytes(ByteBuffer serializedBytes) throws ImprintException { + Objects.requireNonNull(serializedBytes, "Serialized bytes cannot be null"); + + var buffer = serializedBytes.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + // Parse header + var header = parseHeader(buffer); + + // Extract directory and payload sections + var parsedBuffers = parseBuffersFromSerialized(serializedBytes); + + return new ImprintRecord(serializedBytes, header, parsedBuffers.directoryBuffer, parsedBuffers.payload); + } + + + // ========== ZERO-COPY OPERATIONS ========== + + /** + * Merge with another ImprintRecord using pure byte operations. + * Results in a new ImprintRecord without any object creation. + */ + public ImprintRecord merge(ImprintRecord other) throws ImprintException { + var mergedBytes = ImprintOperations.mergeBytes(this.serializedBytes, other.serializedBytes); + return fromBytes(mergedBytes); + } + + /** + * Project fields using pure byte operations. + * Results in a new ImprintRecord without any object creation. + */ + public ImprintRecord project(int... fieldIds) throws ImprintException { + var projectedBytes = ImprintOperations.projectBytes(this.serializedBytes, fieldIds); + return fromBytes(projectedBytes); + } + + /** + * Chain multiple operations efficiently. + * Each operation works on bytes without creating intermediate objects. + */ + public ImprintRecord projectAndMerge(ImprintRecord other, int... projectFields) throws ImprintException { + return this.project(projectFields).merge(other); + } + + /** + * Get the raw serialized bytes. + * This is the most efficient way to pass the record around. + */ + public ByteBuffer getSerializedBytes() { + return serializedBytes.duplicate(); + } + + /** + * Get a DirectoryView for straight through directory access. + */ + public Directory.DirectoryView getDirectoryView() { + if (directoryView == null) { + directoryView = new ImprintDirectoryView(); + } + return directoryView; + } + + /** + * Get the directory list. + */ + public List getDirectory() { + return getDirectoryView().toList(); + } + /** * Get raw bytes for a field without deserializing. */ public ByteBuffer getRawBytes(int fieldId) { try { - return buffers.getFieldBuffer(fieldId); + return getFieldBuffer(fieldId); } catch (ImprintException e) { return null; } } - + /** - * Project a subset of fields from this record. - * - * @param fieldIds Array of field IDs to include in the projection - * @return New ImprintRecord containing only the requested fields + * Get raw bytes for a field by short ID. */ - public ImprintRecord project(int... fieldIds) { - return ImprintOperations.project(this, fieldIds); + public ByteBuffer getRawBytes(short fieldId) { + return getRawBytes((int) fieldId); } - + /** - * Merge another record into this one. - * For duplicate fields, this record's values take precedence. - * - * @param other The record to merge with this one - * @return New ImprintRecord containing merged fields - * @throws ImprintException if merge fails + * Estimate the serialized size of this record. */ - public ImprintRecord merge(ImprintRecord other) throws ImprintException { - return ImprintOperations.merge(this, other); + public int estimateSerializedSize() { + return serializedBytes.remaining(); } - + /** - * Get the directory (parsing it if necessary). + * Get a field value by ID. + * Uses zero-copy binary search to locate the field. */ - public List getDirectory() { - return buffers.getDirectory(); - } - - // ========== TYPED GETTERS ========== - - public boolean getBoolean(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.BOOL, Value.BoolValue.class, "boolean").getValue(); - } - - public int getInt32(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.INT32, Value.Int32Value.class, "int32").getValue(); - } - - public long getInt64(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.INT64, Value.Int64Value.class, "int64").getValue(); + public Value getValue(int fieldId) throws ImprintException { + var entry = getDirectoryView().findEntry(fieldId); + if (entry == null) return null; + + var fieldBuffer = getFieldBuffer(fieldId); + if (fieldBuffer == null) return null; + + return deserializeValue(entry.getTypeCode(), fieldBuffer); } - - public float getFloat32(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.FLOAT32, Value.Float32Value.class, "float32").getValue(); + + /** + * Check if a field exists without deserializing it. + */ + public boolean hasField(int fieldId) { + return getDirectoryView().findEntry(fieldId) != null; } - - public double getFloat64(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.FLOAT64, Value.Float64Value.class, "float64").getValue(); + + /** + * Get the number of fields without parsing the directory. + */ + public int getFieldCount() { + return getDirectoryCount(); } - + + // ========== TYPED GETTERS ========== + public String getString(int fieldId) throws ImprintException { var value = getValidatedValue(fieldId, "STRING"); if (value instanceof Value.StringValue) @@ -128,7 +221,27 @@ public String getString(int fieldId) throws ImprintException { return ((Value.StringBufferValue) value).getValue(); throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is not a STRING"); } - + + public int getInt32(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.INT32, Value.Int32Value.class, "int32").getValue(); + } + + public long getInt64(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.INT64, Value.Int64Value.class, "int64").getValue(); + } + + public boolean getBoolean(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.BOOL, Value.BoolValue.class, "boolean").getValue(); + } + + public float getFloat32(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.FLOAT32, Value.Float32Value.class, "float32").getValue(); + } + + public double getFloat64(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.FLOAT64, Value.Float64Value.class, "float64").getValue(); + } + public byte[] getBytes(int fieldId) throws ImprintException { var value = getValidatedValue(fieldId, "BYTES"); if (value instanceof Value.BytesValue) @@ -137,121 +250,402 @@ public byte[] getBytes(int fieldId) throws ImprintException { return ((Value.BytesBufferValue) value).getValue(); throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is not BYTES"); } - - public List getArray(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.ARRAY, Value.ArrayValue.class, "ARRAY").getValue(); + + public java.util.List getArray(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.ARRAY, Value.ArrayValue.class, "ARRAY").getValue(); } - - public Map getMap(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.MAP, Value.MapValue.class, "MAP").getValue(); + + public java.util.Map getMap(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.MAP, Value.MapValue.class, "MAP").getValue(); } - + public ImprintRecord getRow(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); } - // ========== SERIALIZATION ========== - /** - * Serialize this record to a ByteBuffer. + * Returns a copy of the bytes. */ public ByteBuffer serializeToBuffer() { - var buffer = ByteBuffer.allocate(estimateSerializedSize()); - buffer.order(ByteOrder.LITTLE_ENDIAN); - - // Write header - serializeHeader(buffer); - - // Write directory - var directoryBuffer = buffers.serializeDirectory(); - buffer.put(directoryBuffer); - - // Write payload - var payload = buffers.getPayload(); - var payloadCopy = payload.duplicate(); - buffer.put(payloadCopy); - - buffer.flip(); - return buffer; + return serializedBytes.duplicate(); } - public int estimateSerializedSize() { - int size = Constants.HEADER_BYTES; // header - size += buffers.serializeDirectory().remaining(); // directory - size += buffers.getPayload().remaining(); // payload - return size; + /** + * Get the schema ID from the header. + */ + public SchemaId getSchemaId() { + return header.getSchemaId(); + } + + /** + * Estimate the memory footprint of this record. + */ + public int getSerializedSize() { + return serializedBytes.remaining(); } - // ========== STATIC FACTORY METHODS ========== - - public static ImprintRecordBuilder builder(SchemaId schemaId) { - return new ImprintRecordBuilder(schemaId); + + /** + * Get and validate a value exists and is not null. + */ + private Value getValidatedValue(int fieldId, String typeName) throws ImprintException { + var value = getValue(fieldId); + if (value == null) + throw new ImprintException(ErrorType.FIELD_NOT_FOUND, "Field " + fieldId + " not found"); + if (value.getTypeCode() == com.imprint.types.TypeCode.NULL) + throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is NULL, cannot retrieve as " + typeName); + return value; } - public static ImprintRecordBuilder builder(int fieldspaceId, int schemaHash) { - return new ImprintRecordBuilder(new SchemaId(fieldspaceId, schemaHash)); + private T getTypedValueOrThrow(int fieldId, com.imprint.types.TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) + throws ImprintException { + var value = getValidatedValue(fieldId, expectedTypeName); + if (value.getTypeCode() == expectedTypeCode && expectedValueClass.isInstance(value)) + return expectedValueClass.cast(value); + throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected " + expectedTypeName); + } + + /** + * Parse buffers from serialized record bytes. + */ + private static ParsedBuffers parseBuffersFromSerialized(ByteBuffer serializedRecord) throws ImprintException { + var buffer = serializedRecord.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + // Parse header and extract sections using shared utility + var header = parseHeaderFromBuffer(buffer); + var sections = extractBufferSections(buffer, header); + + return new ParsedBuffers(sections.directoryBuffer, sections.payloadBuffer); + } + + private static class ParsedBuffers { + final ByteBuffer directoryBuffer; + final ByteBuffer payload; + + ParsedBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { + this.directoryBuffer = directoryBuffer; + this.payload = payload; + } + } + + private int getDirectoryCount() { + try { + return VarInt.decode(directoryBuffer.duplicate()).getValue(); + } catch (ImprintException e) { + return 0; // Cache as 0 on error + } + } + + /** + * Gets ByteBuffer view of a field's data. + */ + private ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { + var entry = findDirectoryEntry(fieldId); + if (entry == null) + return null; + + int startOffset = entry.getOffset(); + int endOffset = findEndOffset(entry.getId()); + + if (startOffset < 0 || endOffset < 0 || startOffset > payload.limit() || + endOffset > payload.limit() || startOffset > endOffset) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Invalid field buffer range: start=" + startOffset + ", end=" + endOffset); + } + + var fieldBuffer = payload.duplicate(); + fieldBuffer.position(startOffset).limit(endOffset); + return fieldBuffer; + } + + private Directory findDirectoryEntry(int fieldId) throws ImprintException { + var searchBuffer = directoryBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + int count = getDirectoryCount(); + if (count == 0) return null; + + // Advance past varint to entries + VarInt.decode(searchBuffer); + int directoryStartPos = searchBuffer.position(); + + int low = 0; + int high = count - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; + int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); + + if (entryPos + Constants.DIR_ENTRY_BYTES > searchBuffer.limit()) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Directory entry exceeds buffer"); + + searchBuffer.position(entryPos); + short midFieldId = searchBuffer.getShort(); + + if (midFieldId < fieldId) { + low = mid + 1; + } else if (midFieldId > fieldId) { + high = mid - 1; + } else { + // Found it - read complete entry + searchBuffer.position(entryPos); + return deserializeDirectoryEntry(searchBuffer); + } + } + + return null; } + + private int findEndOffset(int currentFieldId) throws ImprintException { + var scanBuffer = directoryBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + int count = getDirectoryCount(); + if (count == 0) return payload.limit(); + + // Advance past varint + VarInt.decode(scanBuffer); + int directoryStartPos = scanBuffer.position(); + + int low = 0; + int high = count - 1; + int nextOffset = payload.limit(); + + // Binary search for first field with fieldId > currentFieldId + while (low <= high) { + int mid = (low + high) >>> 1; + int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); + + if (entryPos + Constants.DIR_ENTRY_BYTES > scanBuffer.limit()) break; + + scanBuffer.position(entryPos); + short fieldId = scanBuffer.getShort(); + scanBuffer.get(); // skip type + int offset = scanBuffer.getInt(); + + if (fieldId > currentFieldId) { + nextOffset = offset; + high = mid - 1; + } else { + low = mid + 1; + } + } + + return nextOffset; + } + + private Directory deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { + if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for directory entry"); + + short id = buffer.getShort(); + var typeCode = TypeCode.fromByte(buffer.get()); + int offset = buffer.getInt(); + + return new Directory.Entry(id, typeCode, offset); + } + + /** + * DirectoryView + */ + private class ImprintDirectoryView implements Directory.DirectoryView { + + @Override + public Directory findEntry(int fieldId) { + try { + return findDirectoryEntry(fieldId); + } catch (ImprintException e) { + return null; + } + } - public static ImprintRecord deserialize(byte[] bytes) throws ImprintException { - return deserialize(ByteBuffer.wrap(bytes)); + /** + * List out all directories in the buffer. This operation unpacks any directories not already deserialized + * so proceed only if eager evaluation is intended. + */ + @Override + public List toList() { + var list = new ArrayList(getDirectoryCount()); + var iterator = iterator(); + while (iterator.hasNext()) { + list.add(iterator.next()); + } + return list; + } + + @Override + public int size() { + return getDirectoryCount(); + } + + @Override + public Iterator iterator() { + return new ImprintDirectoryIterator(); + } } + + /** + * Iterator that parses directory entries lazily from raw bytes. + */ + private class ImprintDirectoryIterator implements Iterator { + private final ByteBuffer iterBuffer; + private final int totalCount; + private int currentIndex; + + ImprintDirectoryIterator() { + this.iterBuffer = directoryBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + this.totalCount = getDirectoryCount(); + + try { + // Skip past varint to first entry + VarInt.decode(iterBuffer); + } catch (ImprintException e) { + throw new RuntimeException("Failed to initialize directory iterator", e); + } + this.currentIndex = 0; + } + + @Override + public boolean hasNext() { + return currentIndex < totalCount; + } + + @Override + public Directory next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + + try { + var entry = deserializeDirectoryEntry(iterBuffer); + currentIndex++; + return entry; + } catch (ImprintException e) { + throw new RuntimeException("Failed to parse directory entry at index " + currentIndex, e); + } + } + } + + /** + * Used by {@link ImprintRecordBuilder} with sorted field data. + * Creates directory buffer from field data and calculated offsets. + * + * @param sortedFields Array of FieldData objects sorted by ID + * @param offsets Array of payload offsets corresponding to each field + * @param fieldCount Number of valid fields to process + */ + static ByteBuffer createDirectoryBufferFromSorted(Object[] sortedFields, int[] offsets, int fieldCount) { + if (fieldCount == 0) + return createEmptyDirectoryBuffer(); - public static ImprintRecord deserialize(ByteBuffer buffer) throws ImprintException { - buffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + int size = calculateDirectorySize(fieldCount); + var buffer = ByteBuffer.allocate(size); + buffer.order(ByteOrder.LITTLE_ENDIAN); + VarInt.encode(fieldCount, buffer); + + //this ends up being kind of a hotspot for some reason, probably boundary checking. + //Direct writes might help a bit it could get difficult since pretty much all the other + //frameworks just go straight for Unsafe + for (int i = 0; i < fieldCount; i++) { + var fieldData = (ImprintRecordBuilder.FieldData) sortedFields[i]; + buffer.putShort(fieldData.id); + buffer.put(fieldData.value.getTypeCode().getCode()); + buffer.putInt(offsets[i]); + } - // Read header - var header = deserializeHeader(buffer); + buffer.flip(); + return buffer; + } - // Calculate directory size + private static ByteBuffer createEmptyDirectoryBuffer() { + ByteBuffer buffer = ByteBuffer.allocate(1); + VarInt.encode(0, buffer); + buffer.flip(); + return buffer; + } + + /** + * Parse a header from a ByteBuffer without advancing the buffer position. + * Utility method shared between {@link ImprintRecord} and {@link ImprintOperations}. + */ + public static Header parseHeaderFromBuffer(ByteBuffer buffer) throws ImprintException { + int startPos = buffer.position(); + try { + return parseHeader(buffer); + } finally { + buffer.position(startPos); + } + } + + /** + * Calculate the size needed to store a directory with the given entry count. + */ + public static int calculateDirectorySize(int entryCount) { + return VarInt.encodedLength(entryCount) + (entryCount * Constants.DIR_ENTRY_BYTES); + } + + /** + * Container for separated directory and payload buffer sections. + * Utility class shared between {@link ImprintRecord} and {@link ImprintOperations}. + */ + public static class BufferSections { + public final ByteBuffer directoryBuffer; + public final ByteBuffer payloadBuffer; + public final int directoryCount; + + public BufferSections(ByteBuffer directoryBuffer, ByteBuffer payloadBuffer, int directoryCount) { + this.directoryBuffer = directoryBuffer; + this.payloadBuffer = payloadBuffer; + this.directoryCount = directoryCount; + } + } + + /** + * Extract directory and payload sections from a serialized buffer. + * Utility method shared between {@link ImprintRecord} and {@link ImprintOperations}. + */ + public static BufferSections extractBufferSections(ByteBuffer buffer, Header header) throws ImprintException { + // Skip header + buffer.position(buffer.position() + Constants.HEADER_BYTES); + + // Parse directory section int directoryStartPos = buffer.position(); var countResult = VarInt.decode(buffer); int directoryCount = countResult.getValue(); int directorySize = countResult.getBytesRead() + (directoryCount * Constants.DIR_ENTRY_BYTES); - + // Create directory buffer buffer.position(directoryStartPos); var directoryBuffer = buffer.slice(); directoryBuffer.limit(directorySize); - - // Advance past directory + + // Advance to payload buffer.position(buffer.position() + directorySize); - - // Create payload buffer - var payload = buffer.slice(); - payload.limit(header.getPayloadSize()); - - // Create buffers wrapper - var buffers = new ImprintBuffers(directoryBuffer, payload); - - return new ImprintRecord(header, buffers); + var payloadBuffer = buffer.slice(); + payloadBuffer.limit(header.getPayloadSize()); + + return new BufferSections(directoryBuffer, payloadBuffer, directoryCount); } + + private static Header parseHeader(ByteBuffer buffer) throws ImprintException { + if (buffer.remaining() < Constants.HEADER_BYTES) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for header"); - // ========== PRIVATE HELPER METHODS ========== - - /** - * Get and validate a value exists and is not null. - */ - private Value getValidatedValue(int fieldId, String typeName) throws ImprintException { - var value = getValue(fieldId); - if (value == null) - throw new ImprintException(ErrorType.FIELD_NOT_FOUND, "Field " + fieldId + " not found"); - if (value.getTypeCode() == TypeCode.NULL) - throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is NULL, cannot retrieve as " + typeName); - return value; - } + byte magic = buffer.get(); + byte version = buffer.get(); - private T getTypedValueOrThrow(int fieldId, TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) - throws ImprintException { - var value = getValidatedValue(fieldId, expectedTypeName); - if (value.getTypeCode() == expectedTypeCode && expectedValueClass.isInstance(value)) - return expectedValueClass.cast(value); - throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected " + expectedTypeName); + if (magic != Constants.MAGIC) + throw new ImprintException(ErrorType.INVALID_MAGIC, "Invalid magic byte"); + if (version != Constants.VERSION) + throw new ImprintException(ErrorType.UNSUPPORTED_VERSION, "Unsupported version: " + version); + + var flags = new Flags(buffer.get()); + int fieldSpaceId = buffer.getInt(); + int schemaHash = buffer.getInt(); + int payloadSize = buffer.getInt(); + + return new Header(flags, new SchemaId(fieldSpaceId, schemaHash), payloadSize); } - - private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws ImprintException { - var valueBuffer = buffer.duplicate(); - valueBuffer.order(ByteOrder.LITTLE_ENDIAN); - + + private Value deserializeValue(com.imprint.types.TypeCode typeCode, ByteBuffer buffer) throws ImprintException { + var valueBuffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); switch (typeCode) { case NULL: case BOOL: @@ -271,43 +665,4 @@ private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws Impr throw new ImprintException(ErrorType.INVALID_TYPE_CODE, "Unknown type code: " + typeCode); } } - - private void serializeHeader(ByteBuffer buffer) { - buffer.put(Constants.MAGIC); - buffer.put(Constants.VERSION); - buffer.put(header.getFlags().getValue()); - buffer.putInt(header.getSchemaId().getFieldSpaceId()); - buffer.putInt(header.getSchemaId().getSchemaHash()); - buffer.putInt(header.getPayloadSize()); - } - - private static Header deserializeHeader(ByteBuffer buffer) throws ImprintException { - if (buffer.remaining() < Constants.HEADER_BYTES) - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for header"); - - - byte magic = buffer.get(); - if (magic != Constants.MAGIC) { - throw new ImprintException(ErrorType.INVALID_MAGIC, "Invalid magic byte: expected 0x" + Integer.toHexString(Constants.MAGIC) + - ", got 0x" + Integer.toHexString(magic & 0xFF)); - } - - byte version = buffer.get(); - if (version != Constants.VERSION) { - throw new ImprintException(ErrorType.UNSUPPORTED_VERSION, "Unsupported version: " + version); - } - - var flags = new Flags(buffer.get()); - int fieldSpaceId = buffer.getInt(); - int schemaHash = buffer.getInt(); - int payloadSize = buffer.getInt(); - - return new Header(flags, new SchemaId(fieldSpaceId, schemaHash), payloadSize); - } - - @Override - public String toString() { - return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", - header, buffers.getDirectoryCount(), buffers.getPayload().remaining()); - } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 51a3525..8e1dfa0 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -1,9 +1,14 @@ package com.imprint.core; +import com.imprint.Constants; +import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; import com.imprint.types.MapKey; import com.imprint.types.Value; +import lombok.SneakyThrows; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.util.*; /** @@ -30,7 +35,20 @@ @SuppressWarnings("unused") public final class ImprintRecordBuilder { private final SchemaId schemaId; - private final Map fields = new TreeMap<>(); + // Custom int→object map optimized for primitive keys + private final ImprintFieldObjectMap fields = new ImprintFieldObjectMap<>(); + private int estimatedPayloadSize = 0; + + static final class FieldData { + final short id; + final Value value; + + FieldData(short id, Value value) { + this.id = id; + this.value = value; + } + } + ImprintRecordBuilder(SchemaId schemaId) { this.schemaId = Objects.requireNonNull(schemaId, "SchemaId cannot be null"); @@ -66,7 +84,7 @@ public ImprintRecordBuilder field(int id, byte[] value) { } // Collections with automatic conversion - public ImprintRecordBuilder field(int id, List values) { + public ImprintRecordBuilder field(int id, List values) { var convertedValues = new ArrayList(values.size()); for (var item : values) { convertedValues.add(convertToValue(item)); @@ -74,7 +92,7 @@ public ImprintRecordBuilder field(int id, List values) { return addField(id, Value.fromArray(convertedValues)); } - public ImprintRecordBuilder field(int id, Map map) { + public ImprintRecordBuilder field(int id, Map map) { var convertedMap = new HashMap(map.size()); for (var entry : map.entrySet()) { var key = convertToMapKey(entry.getKey()); @@ -129,19 +147,50 @@ public int fieldCount() { } public Set fieldIds() { - return new TreeSet<>(fields.keySet()); + var ids = new HashSet(fields.size()); + var keys = fields.getKeys(); + for (var key : keys) { + ids.add(key); + } + return ids; } // Build the final record public ImprintRecord build() throws ImprintException { - var writer = new ImprintWriter(schemaId); - for (var entry : fields.entrySet()) { - writer.addField(entry.getKey(), entry.getValue()); + // Build to bytes and then create ImprintRecord from bytes for consistency + var serializedBytes = buildToBuffer(); + return ImprintRecord.fromBytes(serializedBytes); + } + + /** + * Builds the record and serializes it directly to a ByteBuffer. + * + * @return A read-only ByteBuffer containing the fully serialized record. + * @throws ImprintException if serialization fails. + */ + public ByteBuffer buildToBuffer() throws ImprintException { + // 1. Sort fields by ID for directory ordering (zero allocation) + var sortedFieldsResult = getSortedFieldsResult(); + var sortedFields = sortedFieldsResult.values; + var fieldCount = sortedFieldsResult.count; + + // 2. Serialize payload and calculate offsets + var payloadBuffer = ByteBuffer.allocate(estimatePayloadSize()); + payloadBuffer.order(ByteOrder.LITTLE_ENDIAN); + + int[] offsets = new int[fieldCount]; + for (int i = 0; i < fieldCount; i++) { + var fieldData = (FieldData) sortedFields[i]; + offsets[i] = payloadBuffer.position(); + serializeValue(fieldData.value, payloadBuffer); } - return writer.build(); + payloadBuffer.flip(); + var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); + + // 3. Create directory buffer and serialize to final buffer + return serializeToBuffer(schemaId, sortedFields, offsets, fieldCount, payloadView); } - // Internal helper methods /** * Adds or overwrites a field in the record being built. * If a field with the given ID already exists, it will be replaced. @@ -152,7 +201,17 @@ public ImprintRecord build() throws ImprintException { */ private ImprintRecordBuilder addField(int id, Value value) { Objects.requireNonNull(value, "Value cannot be null - use nullField() for explicit null values"); - fields.put(id, value); + var newEntry = new FieldData((short) id, value); + + // Check if replacing an existing field - O(1) lookup without boxing! + var oldEntry = fields.get(id); + if (oldEntry != null) { + estimatedPayloadSize -= estimateValueSize(oldEntry.value); + } + + // Add or replace field - O(1) operation without boxing! + fields.put(id, newEntry); + estimatedPayloadSize += estimateValueSize(newEntry.value); return this; } @@ -188,7 +247,6 @@ private Value convertToValue(Object obj) { return Value.fromBytes((byte[]) obj); } if (obj instanceof List) { - //test @SuppressWarnings("unchecked") List list = (List) obj; var convertedValues = new ArrayList(list.size()); @@ -212,8 +270,7 @@ private Value convertToValue(Object obj) { return Value.fromRow((ImprintRecord) obj); } - throw new IllegalArgumentException("Cannot convert " + obj.getClass().getSimpleName() + - " to Imprint Value. Supported types: boolean, int, long, float, double, String, byte[], List, Map, ImprintRecord"); + throw new IllegalArgumentException("Unsupported type for auto-conversion: " + obj.getClass().getName()); } private MapKey convertToMapKey(Object obj) { @@ -230,12 +287,102 @@ private MapKey convertToMapKey(Object obj) { return MapKey.fromBytes((byte[]) obj); } - throw new IllegalArgumentException("Invalid map key type: " + obj.getClass().getSimpleName() + - ". Map keys must be int, long, String, or byte[]"); + throw new IllegalArgumentException("Unsupported map key type: " + obj.getClass().getName()); } - @Override - public String toString() { - return String.format("ImprintRecordBuilder{schemaId=%s, fields=%d}", schemaId, fields.size()); + private int estimatePayloadSize() { + // Add 25% buffer to reduce reallocations and handle VarInt encoding fluctuations. + return Math.max(estimatedPayloadSize + (estimatedPayloadSize / 4), fields.size() * 16); + } + + /** + * Estimates the serialized size in bytes for a given value. + * + * @param value the value to estimate size for + * @return estimated size in bytes including type-specific overhead + */ + @SneakyThrows + private int estimateValueSize(Value value) { + // Use TypeHandler for simple types + switch (value.getTypeCode()) { + case NULL: + case BOOL: + case INT32: + case INT64: + case FLOAT32: + case FLOAT64: + case BYTES: + case STRING: + case ARRAY: + case MAP: + return value.getTypeCode().getHandler().estimateSize(value); + + case ROW: + Value.RowValue rowValue = (Value.RowValue) value; + return rowValue.getValue().estimateSerializedSize(); + + default: + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); + } + } + + private void serializeValue(Value value, ByteBuffer buffer) throws ImprintException { + // Use TypeHandler for simple types + switch (value.getTypeCode()) { + case NULL: + case BOOL: + case INT32: + case INT64: + case FLOAT32: + case FLOAT64: + case BYTES: + case STRING: + case ARRAY: + case MAP: + value.getTypeCode().getHandler().serialize(value, buffer); + break; + //TODO eliminate this switch entirely by implementing a ROW TypeHandler + case ROW: + Value.RowValue rowValue = (Value.RowValue) value; + var serializedRow = rowValue.getValue().serializeToBuffer(); + buffer.put(serializedRow); + break; + + default: + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); + } + } + + /** + * Get fields sorted by ID from the map. + * Returns internal map array reference + count to avoid any copying but sacrifices the map structure in the process. + */ + private ImprintFieldObjectMap.SortedValuesResult getSortedFieldsResult() { + return fields.getSortedValues(); + } + + /** + * Serialize components into a single ByteBuffer. + */ + private static ByteBuffer serializeToBuffer(SchemaId schemaId, Object[] sortedFields, int[] offsets, int fieldCount, ByteBuffer payload) { + var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); + var directoryBuffer = ImprintRecord.createDirectoryBufferFromSorted(sortedFields, offsets, fieldCount); + + int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payload.remaining(); + var finalBuffer = ByteBuffer.allocate(finalSize); + finalBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // Write header + finalBuffer.put(Constants.MAGIC); + finalBuffer.put(Constants.VERSION); + finalBuffer.put(header.getFlags().getValue()); + finalBuffer.putInt(header.getSchemaId().getFieldSpaceId()); + finalBuffer.putInt(header.getSchemaId().getSchemaHash()); + finalBuffer.putInt(header.getPayloadSize()); + finalBuffer.put(directoryBuffer); + finalBuffer.put(payload); + + finalBuffer.flip(); + return finalBuffer.asReadOnlyBuffer(); } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintWriter.java b/src/main/java/com/imprint/core/ImprintWriter.java deleted file mode 100644 index b1d5f53..0000000 --- a/src/main/java/com/imprint/core/ImprintWriter.java +++ /dev/null @@ -1,126 +0,0 @@ -package com.imprint.core; - -import com.imprint.error.ErrorType; -import com.imprint.error.ImprintException; -import com.imprint.types.Value; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.util.ArrayList; -import java.util.Objects; -import java.util.TreeMap; - -/** - * A writer for constructing ImprintRecords by adding fields sequentially. - */ -public final class ImprintWriter { - private final SchemaId schemaId; - private final TreeMap fields; // keep fields in sorted order - - public ImprintWriter(SchemaId schemaId) { - this.schemaId = Objects.requireNonNull(schemaId, "SchemaId cannot be null"); - this.fields = new TreeMap<>(); - } - - /** - * Adds a field to the record being built. - */ - public ImprintWriter addField(int id, Value value) { - Objects.requireNonNull(value, "Value cannot be null"); - this.fields.put(id, value); - return this; - } - - /** - * Consumes the writer and builds an ImprintRecord. - */ - public ImprintRecord build() throws ImprintException { - var directory = new ArrayList(fields.size()); - var payloadBuffer = ByteBuffer.allocate(estimatePayloadSize()); - payloadBuffer.order(ByteOrder.LITTLE_ENDIAN); - - for (var entry : fields.entrySet()) { - int fieldId = entry.getKey(); - var value = entry.getValue(); - - directory.add(new DirectoryEntry(fieldId, value.getTypeCode(), payloadBuffer.position())); - serializeValue(value, payloadBuffer); - } - - // Create read-only view of the payload without copying - payloadBuffer.flip(); // limit = position, position = 0 - var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); - - var header = new Header(new Flags((byte) 0), schemaId, payloadView.remaining()); - return new ImprintRecord(header, directory, payloadView); - } - - private int estimatePayloadSize() throws ImprintException { - // More accurate estimation to reduce allocations - int estimatedSize = 0; - for (var value : fields.values()) { - estimatedSize += estimateValueSize(value); - } - // Add 25% buffer to reduce reallocations - return Math.max(estimatedSize + (estimatedSize / 4), fields.size() * 16); - } - - /** - * Estimates the serialized size in bytes for a given value. - * This method provides size estimates for payload buffer allocation, - * supporting both array-based and ByteBuffer-based value types. - * - * @param value the value to estimate size for - * @return estimated size in bytes including type-specific overhead - */ - private int estimateValueSize(Value value) throws ImprintException { - // Use TypeHandler for simple types - switch (value.getTypeCode()) { - case NULL: - case BOOL: - case INT32: - case INT64: - case FLOAT32: - case FLOAT64: - case BYTES: - case STRING: - case ARRAY: - case MAP: - return value.getTypeCode().getHandler().estimateSize(value); - - case ROW: - Value.RowValue rowValue = (Value.RowValue) value; - return rowValue.getValue().estimateSerializedSize(); - - default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); - } - } - - - private void serializeValue(Value value, ByteBuffer buffer) throws ImprintException { - switch (value.getTypeCode()) { - case NULL: - case BOOL: - case INT32: - case INT64: - case FLOAT32: - case FLOAT64: - case BYTES: - case STRING: - case ARRAY: - case MAP: - value.getTypeCode().getHandler().serialize(value, buffer); - break; - //TODO eliminate this switch entirely by implementing a ROW TypeHandler - case ROW: - Value.RowValue rowValue = (Value.RowValue) value; - var serializedRow = rowValue.getValue().serializeToBuffer(); - buffer.put(serializedRow); - break; - - default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); - } - } -} \ No newline at end of file diff --git a/src/main/java/com/imprint/error/ErrorType.java b/src/main/java/com/imprint/error/ErrorType.java index 49784ef..63a8c60 100644 --- a/src/main/java/com/imprint/error/ErrorType.java +++ b/src/main/java/com/imprint/error/ErrorType.java @@ -13,6 +13,7 @@ public enum ErrorType { MALFORMED_VARINT, TYPE_MISMATCH, INVALID_TYPE_CODE, + INVALID_BUFFER, SERIALIZATION_ERROR, DESERIALIZATION_ERROR, INTERNAL_ERROR diff --git a/src/main/java/com/imprint/ops/ImprintOperations.java b/src/main/java/com/imprint/ops/ImprintOperations.java new file mode 100644 index 0000000..f15e6a1 --- /dev/null +++ b/src/main/java/com/imprint/ops/ImprintOperations.java @@ -0,0 +1,378 @@ +package com.imprint.ops; + +import com.imprint.Constants; +import com.imprint.core.*; +import com.imprint.error.ErrorType; +import com.imprint.error.ImprintException; +import com.imprint.util.VarInt; +import lombok.Value; +import lombok.experimental.UtilityClass; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.*; + +@UtilityClass +public class ImprintOperations { + + /** + * Pure bytes-to-bytes merge operation that avoids all object creation. + * Performs merge directly on serialized Imprint record buffers. + * + * @param firstBuffer Complete serialized Imprint record + * @param secondBuffer Complete serialized Imprint record + * @return Merged record as serialized bytes + * @throws ImprintException if merge fails + */ + public static ByteBuffer mergeBytes(ByteBuffer firstBuffer, ByteBuffer secondBuffer) throws ImprintException { + validateImprintBuffer(firstBuffer, "firstBuffer"); + validateImprintBuffer(secondBuffer, "secondBuffer"); + + // Work on duplicates to avoid affecting original positions + var first = firstBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + var second = secondBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + // Parse headers + var firstHeader = parseHeaderOnly(first); + var secondHeader = parseHeaderOnly(second); + + // Extract directory and payload sections + var firstSections = extractSections(first, firstHeader); + var secondSections = extractSections(second, secondHeader); + + // Perform raw merge + return mergeRawSections(firstHeader, firstSections, secondSections); + } + + /** + * Parse just the header without advancing buffer past it + */ + private static Header parseHeaderOnly(ByteBuffer buffer) throws ImprintException { + return ImprintRecord.parseHeaderFromBuffer(buffer); + } + + /** + * Extract directory and payload sections from a buffer + */ + private static ImprintRecord.BufferSections extractSections(ByteBuffer buffer, Header header) throws ImprintException { + return ImprintRecord.extractBufferSections(buffer, header); + } + + /** + * Merge raw directory and payload sections without object creation + */ + private static ByteBuffer mergeRawSections(Header firstHeader, ImprintRecord.BufferSections firstSections, ImprintRecord.BufferSections secondSections) throws ImprintException { + // Prepare directory iterators + var firstDirIter = new RawDirectoryIterator(firstSections.directoryBuffer); + var secondDirIter = new RawDirectoryIterator(secondSections.directoryBuffer); + + // Pre-allocate - worst case is sum of both directory counts + int maxEntries = firstSections.directoryCount + secondSections.directoryCount; + var mergedDirectoryEntries = new ArrayList(maxEntries); + var mergedChunks = new ArrayList(maxEntries); + + int totalMergedPayloadSize = 0; + int currentMergedOffset = 0; + + RawDirectoryEntry firstEntry = firstDirIter.hasNext() ? firstDirIter.next() : null; + RawDirectoryEntry secondEntry = secondDirIter.hasNext() ? secondDirIter.next() : null; + + // Merge directories and collect payload chunks + while (firstEntry != null || secondEntry != null) { + RawDirectoryEntry currentEntry; + ByteBuffer sourcePayload; + + if (firstEntry != null && (secondEntry == null || firstEntry.fieldId <= secondEntry.fieldId)) { + // Take from first + currentEntry = firstEntry; + sourcePayload = getFieldPayload(firstSections.payloadBuffer, firstEntry, firstDirIter); + + // Skip duplicate in second if present + if (secondEntry != null && firstEntry.fieldId == secondEntry.fieldId) { + secondEntry = secondDirIter.hasNext() ? secondDirIter.next() : null; + } + firstEntry = firstDirIter.hasNext() ? firstDirIter.next() : null; + } else { + // Take from second + currentEntry = secondEntry; + sourcePayload = getFieldPayload(secondSections.payloadBuffer, secondEntry, secondDirIter); + secondEntry = secondDirIter.hasNext() ? secondDirIter.next() : null; + } + + // Add to merged directory with adjusted offset + var adjustedEntry = new RawDirectoryEntry(currentEntry.fieldId, currentEntry.typeCode, currentMergedOffset); + mergedDirectoryEntries.add(adjustedEntry); + + // Collect payload chunk + mergedChunks.add(sourcePayload.duplicate()); + currentMergedOffset += sourcePayload.remaining(); + totalMergedPayloadSize += sourcePayload.remaining(); + } + + // Build final merged buffer + return buildSerializedBuffer(firstHeader, mergedDirectoryEntries, mergedChunks, totalMergedPayloadSize); + } + + /** + * Get payload bytes for a specific field using iterator state + */ + private static ByteBuffer getFieldPayload(ByteBuffer payload, RawDirectoryEntry entry, RawDirectoryIterator iterator) { + int startOffset = entry.offset; + int endOffset = iterator.getNextEntryOffset(payload.limit()); + + var fieldPayload = payload.duplicate(); + fieldPayload.position(startOffset); + fieldPayload.limit(endOffset); + return fieldPayload.slice(); + } + + + /** + * Pure bytes-to-bytes projection operation that avoids all object creation. + * Projects a subset of fields directly from a serialized Imprint record. + * + * @param sourceBuffer Complete serialized Imprint record + * @param fieldIds Array of field IDs to include in projection + * @return Projected record as serialized bytes + * @throws ImprintException if projection fails + */ + public static ByteBuffer projectBytes(ByteBuffer sourceBuffer, int... fieldIds) throws ImprintException { + validateImprintBuffer(sourceBuffer, "sourceBuffer"); + + if (fieldIds == null || fieldIds.length == 0) { + return createEmptyRecordBytes(); + } + + // Sort field IDs for efficient merge algorithm (duplicates handled naturally) + var sortedFieldIds = fieldIds.clone(); + Arrays.sort(sortedFieldIds); + + // Work on duplicate to avoid affecting original position + var source = sourceBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + // Parse header + var header = parseHeaderOnly(source); + + // Extract sections + var sections = extractSections(source, header); + + // Perform raw projection + return projectRawSections(header, sections, sortedFieldIds); + } + + /** + * Project raw sections without object creation using optimized merge algorithm. + * Uses direct array operations and optimized memory access for maximum performance. + */ + private static ByteBuffer projectRawSections(Header originalHeader, ImprintRecord.BufferSections sections, int[] sortedRequestedFields) throws ImprintException { + + if (sortedRequestedFields.length == 0) { + return buildSerializedBuffer(originalHeader, new RawDirectoryEntry[0], new ByteBuffer[0]); + } + + // Use pre-sized ArrayLists to avoid System.arraycopy but still be efficient + var projectedEntries = new ArrayList(sortedRequestedFields.length); + var payloadChunks = new ArrayList(sortedRequestedFields.length); + int totalProjectedPayloadSize = 0; + int currentOffset = 0; + int requestedIndex = 0; + + // Optimize: Cache payload buffer reference to avoid getter calls + var payloadBuffer = sections.payloadBuffer; + + // Merge algorithm: two-pointer approach through sorted sequences + var dirIterator = new RawDirectoryIterator(sections.directoryBuffer); + RawDirectoryEntry currentEntry = dirIterator.hasNext() ? dirIterator.next() : null; + + while (currentEntry != null && requestedIndex < sortedRequestedFields.length) { + int fieldId = currentEntry.fieldId; + int targetFieldId = sortedRequestedFields[requestedIndex]; + + if (fieldId == targetFieldId) { + var fieldPayload = getFieldPayload(payloadBuffer, currentEntry, dirIterator); + + // Add to projection with adjusted offset + projectedEntries.add(new RawDirectoryEntry(currentEntry.fieldId, currentEntry.typeCode, currentOffset)); + + // Collect payload chunk here (fieldPayload is already sliced) + payloadChunks.add(fieldPayload); + + int payloadSize = fieldPayload.remaining(); + currentOffset += payloadSize; + totalProjectedPayloadSize += payloadSize; + + // Advance both pointers (handle dupes by advancing to next unique field) + do { + requestedIndex++; + } while (requestedIndex < sortedRequestedFields.length && sortedRequestedFields[requestedIndex] == targetFieldId); + + currentEntry = dirIterator.hasNext() ? dirIterator.next() : null; + } else if (fieldId < targetFieldId) { + // Directory field is smaller, advance directory pointer + currentEntry = dirIterator.hasNext() ? dirIterator.next() : null; + } else { + // fieldId > targetFieldId - implies requested field isn't in the directory so advance requested pointer + requestedIndex++; + } + } + + return buildSerializedBuffer(originalHeader, projectedEntries, payloadChunks, totalProjectedPayloadSize); + } + + /** + * Build a serialized Imprint record buffer from header, directory entries, and payload chunks. + */ + private static ByteBuffer buildSerializedBuffer(Header originalHeader, RawDirectoryEntry[] directoryEntries, ByteBuffer[] payloadChunks) { + return buildSerializedBuffer(originalHeader, Arrays.asList(directoryEntries), Arrays.asList(payloadChunks), 0); + } + + private static ByteBuffer buildSerializedBuffer(Header originalHeader, List directoryEntries, List payloadChunks, int totalPayloadSize) { + int directorySize = ImprintRecord.calculateDirectorySize(directoryEntries.size()); + int totalSize = Constants.HEADER_BYTES + directorySize + totalPayloadSize; + var finalBuffer = ByteBuffer.allocate(totalSize); + finalBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // Write header (preserve original schema) + finalBuffer.put(Constants.MAGIC); + finalBuffer.put(Constants.VERSION); + finalBuffer.put(originalHeader.getFlags().getValue()); + finalBuffer.putInt(originalHeader.getSchemaId().getFieldSpaceId()); + finalBuffer.putInt(originalHeader.getSchemaId().getSchemaHash()); + finalBuffer.putInt(totalPayloadSize); + + // Write directory + VarInt.encode(directoryEntries.size(), finalBuffer); + for (var entry : directoryEntries) { + finalBuffer.putShort(entry.fieldId); + finalBuffer.put(entry.typeCode); + finalBuffer.putInt(entry.offset); + } + + // Write payload + for (var chunk : payloadChunks) + finalBuffer.put(chunk); + + finalBuffer.flip(); + return finalBuffer.asReadOnlyBuffer(); + } + + + /** + * Create an empty record as serialized bytes + */ + private static ByteBuffer createEmptyRecordBytes() { + // Minimal header + empty directory + empty payload + var buffer = ByteBuffer.allocate(Constants.HEADER_BYTES + 1); // +1 for varint 0 + buffer.order(ByteOrder.LITTLE_ENDIAN); + + // Write header for empty record + buffer.put(Constants.MAGIC); + buffer.put(Constants.VERSION); + buffer.put((byte) 0x01); + buffer.putInt(0); + buffer.putInt(0); + buffer.putInt(0); + + // Write empty directory + VarInt.encode(0, buffer); + + buffer.flip(); + return buffer.asReadOnlyBuffer(); + } + + /** + * Validates that a ByteBuffer contains valid Imprint data by checking magic bytes and basic structure. + * + * @param buffer Buffer to validate + * @param paramName Parameter name for error messages + * @throws ImprintException if buffer is invalid + */ + private static void validateImprintBuffer(ByteBuffer buffer, String paramName) throws ImprintException { + if (buffer == null) { + throw new ImprintException(ErrorType.INVALID_BUFFER, paramName + " cannot be null"); + } + + if (buffer.remaining() < Constants.HEADER_BYTES) { + throw new ImprintException(ErrorType.INVALID_BUFFER, + paramName + " too small to contain valid Imprint header (minimum " + Constants.HEADER_BYTES + " bytes)"); + } + + // Check invariants without advancing buffer position + var duplicate = buffer.duplicate(); + byte magic = duplicate.get(); + byte version = duplicate.get(); + if (magic != Constants.MAGIC) + throw new ImprintException(ErrorType.INVALID_BUFFER, paramName + " does not contain valid Imprint magic byte"); + if (version != Constants.VERSION) + throw new ImprintException(ErrorType.INVALID_BUFFER, paramName + " contains unsupported Imprint version: " + version); + } + + /** + * Directory entry container used for raw byte operations + */ + @Value + private static class RawDirectoryEntry { + short fieldId; + byte typeCode; + int offset; + } + + /** + * Iterator that parses directory entries directly from raw bytes + */ + private static class RawDirectoryIterator { + private final ByteBuffer buffer; + private final int totalCount; + private final int directoryStartPos; + private int currentIndex; + + RawDirectoryIterator(ByteBuffer directoryBuffer) throws ImprintException { + this.buffer = directoryBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + // Read count and advance to first entry + var countResult = VarInt.decode(buffer); + this.totalCount = countResult.getValue(); + this.directoryStartPos = buffer.position(); + this.currentIndex = 0; + } + + boolean hasNext() { + return currentIndex < totalCount; + } + + RawDirectoryEntry next() throws ImprintException { + if (!hasNext()) + throw new RuntimeException("No more directory entries"); + + if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for directory entry"); + + short fieldId = buffer.getShort(); + byte typeCode = buffer.get(); + int offset = buffer.getInt(); + + currentIndex++; + return new RawDirectoryEntry(fieldId, typeCode, offset); + } + + /** + * Get the offset of the next entry without state overhead. + * Returns the provided fallback if this is the last entry. + */ + int getNextEntryOffset(int fallbackOffset) { + if (currentIndex >= totalCount) + return fallbackOffset; + + // Calculate position of next entry directly + int nextEntryPos = directoryStartPos + (currentIndex * Constants.DIR_ENTRY_BYTES); + + // Bounds check - optimized to single comparison + if (nextEntryPos + 7 > buffer.limit()) { // DIR_ENTRY_BYTES = 7 + return fallbackOffset; + } + + // Read just the offset field (skip fieldId and typeCode) + return buffer.getInt(nextEntryPos + 3); // 2 bytes fieldId + 1 byte typeCode = 3 offset + } + } +} diff --git a/src/main/java/com/imprint/types/TypeCode.java b/src/main/java/com/imprint/types/TypeCode.java index a81b199..3447f8b 100644 --- a/src/main/java/com/imprint/types/TypeCode.java +++ b/src/main/java/com/imprint/types/TypeCode.java @@ -19,11 +19,19 @@ public enum TypeCode { ARRAY(0x8, TypeHandler.ARRAY), MAP(0x9, TypeHandler.MAP), ROW(0xA, null); // TODO: implement (basically a placeholder for user-defined type) - + @Getter private final byte code; private final TypeHandler handler; - + + private static final TypeCode[] LOOKUP = new TypeCode[11]; + + static { + for (var type : values()) { + LOOKUP[type.code] = type; + } + } + TypeCode(int code, TypeHandler handler) { this.code = (byte) code; this.handler = handler; @@ -35,14 +43,13 @@ public TypeHandler getHandler() { } return handler; } - + public static TypeCode fromByte(byte code) throws ImprintException { - for (TypeCode type : values()) { - if (type.code == code) { - return type; - } + if (code >= 0 && code < LOOKUP.length) { + var type = LOOKUP[code]; + if (type != null) return type; } - throw new ImprintException(ErrorType.INVALID_TYPE_CODE, - "Unknown type code: 0x" + Integer.toHexString(code & 0xFF)); + throw new ImprintException(ErrorType.INVALID_TYPE_CODE, + "Unknown type code: 0x" + Integer.toHexString(code & 0xFF)); } } \ No newline at end of file diff --git a/src/main/java/com/imprint/types/TypeHandler.java b/src/main/java/com/imprint/types/TypeHandler.java index 634867b..dbc875f 100644 --- a/src/main/java/com/imprint/types/TypeHandler.java +++ b/src/main/java/com/imprint/types/TypeHandler.java @@ -218,7 +218,7 @@ public void serialize(Value value, ByteBuffer buffer) { buffer.put(stringBytes); } } - + @Override public int estimateSize(Value value) { if (value instanceof Value.StringBufferValue) { @@ -227,8 +227,8 @@ public int estimateSize(Value value) { return VarInt.encodedLength(length) + length; } else { Value.StringValue stringValue = (Value.StringValue) value; - byte[] utf8Bytes = stringValue.getUtf8Bytes(); - return VarInt.encodedLength(utf8Bytes.length) + utf8Bytes.length; + int utf8Length = stringValue.getUtf8Length(); // Uses cached bytes + return VarInt.encodedLength(utf8Length) + utf8Length; } } }; diff --git a/src/main/java/com/imprint/types/Value.java b/src/main/java/com/imprint/types/Value.java index bfa9958..070c497 100644 --- a/src/main/java/com/imprint/types/Value.java +++ b/src/main/java/com/imprint/types/Value.java @@ -57,10 +57,11 @@ public static Value fromBytesBuffer(ByteBuffer value) { public static Value fromString(String value) { return new StringValue(value); } - + public static Value fromStringBuffer(ByteBuffer value) { return new StringBufferValue(value); } + public static Value fromArray(List value) { return new ArrayValue(value); @@ -284,20 +285,21 @@ public String toString() { public static class StringValue extends Value { @Getter private final String value; - private volatile byte[] cachedUtf8Bytes; // Cache UTF-8 encoding + private byte[] utf8BytesCache; public StringValue(String value) { this.value = Objects.requireNonNull(value, "String cannot be null"); } public byte[] getUtf8Bytes() { - var cached = cachedUtf8Bytes; - if (cached == null) { - // UTF8 is idempotent so no need to synchronize - cached = value.getBytes(StandardCharsets.UTF_8); - cachedUtf8Bytes = cached; + if (utf8BytesCache == null) { + utf8BytesCache = value.getBytes(StandardCharsets.UTF_8); } - return cached; // Return computed value + return utf8BytesCache; + } + + public int getUtf8Length() { + return getUtf8Bytes().length; } @Override @@ -332,7 +334,7 @@ public String toString() { // String Value (ByteBuffer-based) public static class StringBufferValue extends Value { private final ByteBuffer value; - private volatile String cachedString; + private String cachedString; private static final int THREAD_LOCAL_BUFFER_SIZE = 1024; private static final ThreadLocal DECODE_BUFFER_CACHE = diff --git a/src/main/java/com/imprint/util/VarInt.java b/src/main/java/com/imprint/util/VarInt.java index f43683b..70c9095 100644 --- a/src/main/java/com/imprint/util/VarInt.java +++ b/src/main/java/com/imprint/util/VarInt.java @@ -70,13 +70,10 @@ public static DecodeResult decode(ByteBuffer buffer) throws ImprintException { int bytesRead = 0; while (true) { - if (bytesRead >= MAX_VARINT_LEN) { + if (bytesRead >= MAX_VARINT_LEN) throw new ImprintException(ErrorType.MALFORMED_VARINT, "VarInt too long"); - } - if (!buffer.hasRemaining()) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Unexpected end of data while reading VarInt"); - } + if (!buffer.hasRemaining()) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Unexpected end of data while reading VarInt"); byte b = buffer.get(); bytesRead++; diff --git a/src/test/java/com/imprint/IntegrationTest.java b/src/test/java/com/imprint/IntegrationTest.java index ee1d426..e066f01 100644 --- a/src/test/java/com/imprint/IntegrationTest.java +++ b/src/test/java/com/imprint/IntegrationTest.java @@ -623,4 +623,246 @@ void testTypeGetterRow() throws ImprintException { assertEquals(999L, retrievedRow.getInt64(102)); assertEquals("outer field", deserializedWithRow.getString(202)); } + + @Test + @DisplayName("Boundary Values: Numeric limits and special floating point values") + void testNumericBoundaryValues() throws ImprintException { + var schemaId = new SchemaId(60, 0xB0DA12); + var record = ImprintRecord.builder(schemaId) + .field(1, Integer.MAX_VALUE) + .field(2, Integer.MIN_VALUE) + .field(3, Long.MAX_VALUE) + .field(4, Long.MIN_VALUE) + .field(5, Float.MAX_VALUE) + .field(6, Float.MIN_VALUE) + .field(7, Float.NaN) + .field(8, Float.POSITIVE_INFINITY) + .field(9, Float.NEGATIVE_INFINITY) + .field(10, Double.MAX_VALUE) + .field(11, Double.MIN_VALUE) + .field(12, Double.NaN) + .field(13, Double.POSITIVE_INFINITY) + .field(14, Double.NEGATIVE_INFINITY) + .field(15, -0.0f) + .field(16, -0.0) + .build(); + + var deserialized = serializeAndDeserialize(record); + + assertEquals(Integer.MAX_VALUE, deserialized.getInt32(1)); + assertEquals(Integer.MIN_VALUE, deserialized.getInt32(2)); + assertEquals(Long.MAX_VALUE, deserialized.getInt64(3)); + assertEquals(Long.MIN_VALUE, deserialized.getInt64(4)); + assertEquals(Float.MAX_VALUE, deserialized.getFloat32(5)); + assertEquals(Float.MIN_VALUE, deserialized.getFloat32(6)); + assertTrue(Float.isNaN(deserialized.getFloat32(7))); + assertTrue(Float.isInfinite(deserialized.getFloat32(8)) && deserialized.getFloat32(8) > 0); + assertTrue(Float.isInfinite(deserialized.getFloat32(9)) && deserialized.getFloat32(9) < 0); + assertEquals(Double.MAX_VALUE, deserialized.getFloat64(10)); + assertEquals(Double.MIN_VALUE, deserialized.getFloat64(11)); + assertTrue(Double.isNaN(deserialized.getFloat64(12))); + assertTrue(Double.isInfinite(deserialized.getFloat64(13)) && deserialized.getFloat64(13) > 0); + assertTrue(Double.isInfinite(deserialized.getFloat64(14)) && deserialized.getFloat64(14) < 0); + assertEquals(-0.0f, deserialized.getFloat32(15)); + assertEquals(-0.0, deserialized.getFloat64(16)); + } + + @Test + @DisplayName("Unicode and Special Strings: International character support") + void testUnicodeAndSpecialStrings() throws ImprintException { + var schemaId = new SchemaId(61, 0x04100DE); + var record = ImprintRecord.builder(schemaId) + .field(1, "") // Empty string + .field(2, " ") // Single space + .field(3, "\n\t\r") // Whitespace characters + .field(4, "Hello, 世界! 🌍🚀") // Unicode: CJK + Emoji + .field(5, "مرحبا بالعالم") // Arabic (RTL) + .field(6, "Здравствуй мир") // Cyrillic + .field(7, "こんにちは世界") // Japanese + .field(8, "\u0000\u0001\u001F") // Control characters + .field(9, "A".repeat(10000)) // Large string + .build(); + + var deserialized = serializeAndDeserialize(record); + + assertEquals("", deserialized.getString(1)); + assertEquals(" ", deserialized.getString(2)); + assertEquals("\n\t\r", deserialized.getString(3)); + assertEquals("Hello, 世界! 🌍🚀", deserialized.getString(4)); + assertEquals("مرحبا بالعالم", deserialized.getString(5)); + assertEquals("Здравствуй мир", deserialized.getString(6)); + assertEquals("こんにちは世界", deserialized.getString(7)); + assertEquals("\u0000\u0001\u001F", deserialized.getString(8)); + assertEquals("A".repeat(10000), deserialized.getString(9)); + } + + @Test + @DisplayName("Deep Nesting: Multiple levels of nested records") + void testDeepNesting() throws ImprintException { + // Create 5 levels of nesting + var level5 = ImprintRecord.builder(new SchemaId(65, 5)) + .field(1, "deepest level") + .build(); + + var level4 = ImprintRecord.builder(new SchemaId(64, 4)) + .field(1, level5) + .field(2, "level 4") + .build(); + + var level3 = ImprintRecord.builder(new SchemaId(63, 3)) + .field(1, level4) + .field(2, "level 3") + .build(); + + var level2 = ImprintRecord.builder(new SchemaId(62, 2)) + .field(1, level3) + .field(2, "level 2") + .build(); + + var level1 = ImprintRecord.builder(new SchemaId(61, 1)) + .field(1, level2) + .field(2, "level 1") + .build(); + + var deserialized = serializeAndDeserialize(level1); + + // Navigate through all levels + assertEquals("level 1", deserialized.getString(2)); + var l2 = deserialized.getRow(1); + assertEquals("level 2", l2.getString(2)); + var l3 = l2.getRow(1); + assertEquals("level 3", l3.getString(2)); + var l4 = l3.getRow(1); + assertEquals("level 4", l4.getString(2)); + var l5 = l4.getRow(1); + assertEquals("deepest level", l5.getString(1)); + } + + @Test + @DisplayName("Map Key Types: All supported map key types") + void testMapKeyTypeVariations() throws ImprintException { + var schemaId = new SchemaId(70, 0xAAB5E75); + + // Create maps with different key types + var stringKeyMap = new HashMap(); + stringKeyMap.put(MapKey.fromString("string_key"), Value.fromString("string_value")); + + var intKeyMap = new HashMap(); + intKeyMap.put(MapKey.fromInt32(42), Value.fromString("int_value")); + + var longKeyMap = new HashMap(); + longKeyMap.put(MapKey.fromInt64(9876543210L), Value.fromString("long_value")); + + var bytesKeyMap = new HashMap(); + bytesKeyMap.put(MapKey.fromBytes(new byte[]{1, 2, 3}), Value.fromString("bytes_value")); + + var record = ImprintRecord.builder(schemaId) + .field(1, Value.fromMap(stringKeyMap)) + .field(2, Value.fromMap(intKeyMap)) + .field(3, Value.fromMap(longKeyMap)) + .field(4, Value.fromMap(bytesKeyMap)) + .build(); + + var deserialized = serializeAndDeserialize(record); + + // Verify all map key types work correctly + assertEquals(Value.fromString("string_value"), + deserialized.getMap(1).get(MapKey.fromString("string_key"))); + assertEquals(Value.fromString("int_value"), + deserialized.getMap(2).get(MapKey.fromInt32(42))); + assertEquals(Value.fromString("long_value"), + deserialized.getMap(3).get(MapKey.fromInt64(9876543210L))); + assertEquals(Value.fromString("bytes_value"), + deserialized.getMap(4).get(MapKey.fromBytes(new byte[]{1, 2, 3}))); + } + + @Test + @DisplayName("Large Data: Memory efficiency with large payloads") + void testLargeDataHandling() throws ImprintException { + var schemaId = new SchemaId(80, 0xB16DA7A); + + // Create large byte arrays + byte[] largeBytes1 = new byte[100_000]; // 100KB + byte[] largeBytes2 = new byte[500_000]; // 500KB + Arrays.fill(largeBytes1, (byte) 0xAA); + Arrays.fill(largeBytes2, (byte) 0xBB); + + // Create large string + String largeString = "Large data test: " + "X".repeat(50_000); + + var record = ImprintRecord.builder(schemaId) + .field(1, largeBytes1) + .field(2, largeBytes2) + .field(3, largeString) + .field(4, "small field") + .build(); + + // Verify large record can be serialized and deserialized + var deserialized = serializeAndDeserialize(record); + + assertArrayEquals(largeBytes1, deserialized.getBytes(1)); + assertArrayEquals(largeBytes2, deserialized.getBytes(2)); + assertEquals(largeString, deserialized.getString(3)); + assertEquals("small field", deserialized.getString(4)); + + // Test projection still works with large data + var projected = record.project(4); + assertEquals(1, projected.getDirectory().size()); + assertEquals("small field", projected.getString(4)); + + // Verify original large data is excluded from projection + assertTrue(projected.getSerializedSize() < record.getSerializedSize() / 10); + } + + @Test + @DisplayName("Error Handling: Empty data detection") + void testEmptyDataHandling() { + // Empty data should throw exception + assertThrows(Exception.class, () -> ImprintRecord.deserialize(new byte[0])); + + // Null data should throw exception + assertThrows(Exception.class, () -> ImprintRecord.deserialize((byte[]) null)); + } + + @Test + @DisplayName("Complex Operations: Bytes-to-bytes vs object operations equivalence") + void testBytesToBytesEquivalence() throws ImprintException { + var schemaId = new SchemaId(100, 0xB17E5); + + var record1 = ImprintRecord.builder(schemaId) + .field(1, "record1 field1") + .field(3, 100) + .field(5, true) + .build(); + + var record2 = ImprintRecord.builder(schemaId) + .field(2, "record2 field2") + .field(4, 200L) + .field(6, 3.14) + .build(); + + // Test merge equivalence + var objectMerged = record1.merge(record2); + var bytesMerged = com.imprint.ops.ImprintOperations.mergeBytes( + record1.serializeToBuffer(), + record2.serializeToBuffer() + ); + var bytesMergedRecord = ImprintRecord.deserialize(bytesMerged); + + assertEquals(objectMerged.getDirectory().size(), bytesMergedRecord.getDirectory().size()); + assertEquals(objectMerged.getString(1), bytesMergedRecord.getString(1)); + assertEquals(objectMerged.getString(2), bytesMergedRecord.getString(2)); + assertEquals(objectMerged.getInt32(3), bytesMergedRecord.getInt32(3)); + + // Test project equivalence + var objectProjected = record1.project(1, 3); + var bytesProjected = com.imprint.ops.ImprintOperations.projectBytes( + record1.serializeToBuffer(), 1, 3 + ); + var bytesProjectedRecord = ImprintRecord.deserialize(bytesProjected); + + assertEquals(objectProjected.getDirectory().size(), bytesProjectedRecord.getDirectory().size()); + assertEquals(objectProjected.getString(1), bytesProjectedRecord.getString(1)); + assertEquals(objectProjected.getInt32(3), bytesProjectedRecord.getInt32(3)); + } } \ No newline at end of file diff --git a/src/test/java/com/imprint/core/ImprintFieldObjectMapTest.java b/src/test/java/com/imprint/core/ImprintFieldObjectMapTest.java new file mode 100644 index 0000000..cb6637f --- /dev/null +++ b/src/test/java/com/imprint/core/ImprintFieldObjectMapTest.java @@ -0,0 +1,318 @@ +package com.imprint.core; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.BeforeEach; +import static org.junit.jupiter.api.Assertions.*; + +/** + * Unit tests for IntObjectMap - specialized short→object map optimized for field IDs. + */ +class ImprintFieldObjectMapTest { + + private ImprintFieldObjectMap map; + + @BeforeEach + void setUp() { + map = new ImprintFieldObjectMap<>(); + } + + @Test + void shouldPutAndGetBasicOperations() { + map.put(1, "one"); + map.put(5, "five"); + map.put(10, "ten"); + + assertEquals("one", map.get(1)); + assertEquals("five", map.get(5)); + assertEquals("ten", map.get(10)); + assertNull(map.get(99)); + assertEquals(3, map.size()); + } + + @Test + void shouldHandleKeyValidation() { + // Valid keys (0 to Short.MAX_VALUE) + map.put(0, "zero"); + map.put(Short.MAX_VALUE, "max"); + + // Invalid keys + assertThrows(IllegalArgumentException.class, () -> map.put(-1, "negative")); + assertThrows(IllegalArgumentException.class, () -> map.put(Short.MAX_VALUE + 1, "too_large")); + } + + @Test + void shouldHandleContainsKey() { + map.put(1, "one"); + map.put(5, "five"); + + assertTrue(map.containsKey(1)); + assertTrue(map.containsKey(5)); + assertFalse(map.containsKey(99)); + assertFalse(map.containsKey(-1)); + assertFalse(map.containsKey(Short.MAX_VALUE + 1)); + } + + @Test + void shouldOverwriteExistingKeys() { + map.put(1, "original"); + assertEquals("original", map.get(1)); + assertEquals(1, map.size()); + + map.put(1, "updated"); + assertEquals("updated", map.get(1)); + assertEquals(1, map.size()); // Size should not increase + } + + @Test + void shouldGetKeysArray() { + map.put(3, "three"); + map.put(1, "one"); + map.put(7, "seven"); + + int[] keys = map.getKeys(); + assertEquals(3, keys.length); + + // Convert to set for order-independent comparison + var keySet = java.util.Arrays.stream(keys).boxed() + .collect(java.util.stream.Collectors.toSet()); + + assertTrue(keySet.contains(1)); + assertTrue(keySet.contains(3)); + assertTrue(keySet.contains(7)); + } + + @Test + void shouldSortValuesNonDestructively() { + map.put(3, "three"); + map.put(1, "one"); + map.put(7, "seven"); + map.put(2, "two"); + + // Test non-destructive sort + String[] sorted = map.getSortedValuesCopy(new String[0]); + + assertEquals(4, sorted.length); + assertEquals("one", sorted[0]); // key 1 + assertEquals("two", sorted[1]); // key 2 + assertEquals("three", sorted[2]); // key 3 + assertEquals("seven", sorted[3]); // key 7 + + // Verify map is still functional after non-destructive sort + assertEquals("three", map.get(3)); + assertEquals("one", map.get(1)); + assertEquals(4, map.size()); + + // Should be able to call multiple times + String[] sorted2 = map.getSortedValuesCopy(new String[0]); + assertArrayEquals(sorted, sorted2); + } + + @Test + void shouldSortValuesDestructively() { + map.put(3, "three"); + map.put(1, "one"); + map.put(7, "seven"); + map.put(2, "two"); + + // Test destructive sort + ImprintFieldObjectMap.SortedValuesResult result = map.getSortedValues(); + + assertEquals(4, result.count); + assertEquals("one", result.values[0]); // key 1 + assertEquals("two", result.values[1]); // key 2 + assertEquals("three", result.values[2]); // key 3 + assertEquals("seven", result.values[3]); // key 7 + } + + @Test + void shouldPoisonMapAfterDestructiveSort() { + map.put(1, "one"); + map.put(2, "two"); + + // Perform destructive sort + ImprintFieldObjectMap.SortedValuesResult result = map.getSortedValues(); + assertNotNull(result); + + // All operations should throw IllegalStateException after poisoning + assertThrows(IllegalStateException.class, () -> map.put(3, "three")); + assertThrows(IllegalStateException.class, () -> map.get(1)); + assertThrows(IllegalStateException.class, () -> map.containsKey(1)); + assertThrows(IllegalStateException.class, () -> map.getSortedValuesCopy(new String[0])); + + // Size and isEmpty should still work (they don't check poisoned state) + assertEquals(2, map.size()); + assertFalse(map.isEmpty()); + } + + @Test + void shouldHandleEmptyMapSorting() { + // Test non-destructive sort on empty map + String[] sorted = map.getSortedValuesCopy(new String[0]); + assertEquals(0, sorted.length); + + // Test destructive sort on empty map + ImprintFieldObjectMap.SortedValuesResult result = map.getSortedValues(); + assertEquals(0, result.count); + + // Map should be poisoned even after empty destructive sort + assertThrows(IllegalStateException.class, () -> map.put(1, "one")); + } + + @Test + void shouldHandleSingleElementSorting() { + map.put(42, "answer"); + + // Test non-destructive sort + String[] sorted = map.getSortedValuesCopy(new String[0]); + assertEquals(1, sorted.length); + assertEquals("answer", sorted[0]); + + // Test destructive sort on fresh map + ImprintFieldObjectMap map2 = new ImprintFieldObjectMap<>(); + map2.put(42, "answer"); + + ImprintFieldObjectMap.SortedValuesResult result = map2.getSortedValues(); + assertEquals(1, result.count); + assertEquals("answer", result.values[0]); + } + + @Test + void shouldHandleHashCollisions() { + // Add many entries to trigger collisions and resizing + for (int i = 0; i < 1000; i++) { + map.put(i, "value_" + i); + } + + // Verify all entries are accessible + for (int i = 0; i < 1000; i++) { + assertEquals("value_" + i, map.get(i)); + assertTrue(map.containsKey(i)); + } + + assertEquals(1000, map.size()); + + // Test sorting with many entries + String[] sorted = map.getSortedValuesCopy(new String[0]); + assertEquals(1000, sorted.length); + + // Verify sorting is correct + for (int i = 0; i < 1000; i++) { + assertEquals("value_" + i, sorted[i]); + } + } + + @Test + void shouldReuseResultArrayForNonDestructiveSort() { + map.put(1, "one"); + map.put(2, "two"); + + String[] reusableArray = new String[2]; + String[] result = map.getSortedValuesCopy(reusableArray); + + assertSame(reusableArray, result); // Should reuse the same array + assertEquals("one", result[0]); + assertEquals("two", result[1]); + + // Test with wrong size array - should create new array + String[] wrongSizeArray = new String[5]; + String[] result2 = map.getSortedValuesCopy(wrongSizeArray); + + assertNotSame(wrongSizeArray, result2); // Should create new array + assertEquals(2, result2.length); + assertEquals("one", result2[0]); + assertEquals("two", result2[1]); + } + + @Test + void shouldHandleMaxShortValue() { + int maxKey = Short.MAX_VALUE; + map.put(maxKey, "max_value"); + map.put(0, "zero"); + map.put(maxKey - 1, "almost_max"); + + assertEquals("max_value", map.get(maxKey)); + assertEquals("zero", map.get(0)); + assertEquals("almost_max", map.get(maxKey - 1)); + + String[] sorted = map.getSortedValuesCopy(new String[0]); + assertEquals("zero", sorted[0]); + assertEquals("almost_max", sorted[1]); + assertEquals("max_value", sorted[2]); + } + + @Test + void shouldMaintainSizeCorrectlyWithOverwrites() { + assertEquals(0, map.size()); + assertTrue(map.isEmpty()); + + map.put(1, "first"); + assertEquals(1, map.size()); + assertFalse(map.isEmpty()); + + map.put(1, "overwrite"); + assertEquals(1, map.size()); // Size should not change + + map.put(2, "second"); + assertEquals(2, map.size()); + + map.put(1, "overwrite_again"); + assertEquals(2, map.size()); // Size should not change + } + + @Test + void shouldStreamKeysWithoutAllocation() { + map.put(3, "three"); + map.put(1, "one"); + map.put(7, "seven"); + + // Stream keys without allocation + java.util.Set streamedKeys = map.streamKeys() + .boxed() + .collect(java.util.stream.Collectors.toSet()); + + assertEquals(3, streamedKeys.size()); + assertTrue(streamedKeys.contains(1)); + assertTrue(streamedKeys.contains(3)); + assertTrue(streamedKeys.contains(7)); + + // Should be able to stream multiple times + long count = map.streamKeys().count(); + assertEquals(3, count); + + // Test operations on stream + int sum = map.streamKeys().sum(); + assertEquals(11, sum); // 1 + 3 + 7 + + // Test filtering + long evenKeys = map.streamKeys().filter(k -> k % 2 == 0).count(); + assertEquals(0, evenKeys); + + long oddKeys = map.streamKeys().filter(k -> k % 2 == 1).count(); + assertEquals(3, oddKeys); + } + + @Test + void shouldThrowOnStreamKeysAfterPoisoning() { + map.put(1, "one"); + map.put(2, "two"); + + // Stream should work before poisoning + assertEquals(2, map.streamKeys().count()); + + // Poison the map + map.getSortedValues(); + + // Stream should throw after poisoning + assertThrows(IllegalStateException.class, () -> map.streamKeys()); + } + + @Test + void shouldStreamEmptyMapKeys() { + // Empty map should produce empty stream + assertEquals(0, map.streamKeys().count()); + + // Operations on empty stream should work + assertEquals(0, map.streamKeys().sum()); + assertEquals(java.util.OptionalInt.empty(), map.streamKeys().findFirst()); + } +} \ No newline at end of file diff --git a/src/test/java/com/imprint/core/ImprintRecordTest.java b/src/test/java/com/imprint/core/ImprintRecordTest.java index 3e37473..562f5fd 100644 --- a/src/test/java/com/imprint/core/ImprintRecordTest.java +++ b/src/test/java/com/imprint/core/ImprintRecordTest.java @@ -1,232 +1,289 @@ package com.imprint.core; import com.imprint.error.ImprintException; -import com.imprint.error.ErrorType; -import com.imprint.types.Value; -import com.imprint.types.MapKey; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; -import java.util.*; -import static org.assertj.core.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.*; + +@DisplayName("ImprintRecord") class ImprintRecordTest { - - // Helper method to extract string value from either StringValue or StringBufferValue - private String getStringValue(Value value) { - if (value instanceof Value.StringValue) { - return ((Value.StringValue) value).getValue(); - } else if (value instanceof Value.StringBufferValue) { - return ((Value.StringBufferValue) value).getValue(); - } else { - throw new IllegalArgumentException("Expected string value, got: " + value.getClass()); - } - } - - @Test - void shouldCreateSimpleRecord() throws ImprintException { - var schemaId = new SchemaId(1, 0xdeadbeef); - var writer = new ImprintWriter(schemaId); - - writer.addField(1, Value.fromInt32(42)) - .addField(2, Value.fromString("hello")); - - var record = writer.build(); - - assertThat(record.getHeader().getSchemaId()).isEqualTo(schemaId); - assertThat(record.getDirectory()).hasSize(2); - - Value field1 = record.getValue(1); - Value field2 = record.getValue(2); - - assertThat(field1).isNotNull(); - assertThat(field1).isInstanceOf(Value.Int32Value.class); - assertThat(((Value.Int32Value) field1).getValue()).isEqualTo(42); - - assertThat(field2).isNotNull(); - assertThat(field2.getTypeCode()).isEqualTo(com.imprint.types.TypeCode.STRING); - String stringValue = getStringValue(field2); - assertThat(stringValue).isEqualTo("hello"); - - // Non-existent field should return null - assertThat(record.getValue(999)).isNull(); + + private SchemaId testSchema; + private ImprintRecord testRecord; + private ImprintRecord serializedRecord; + + @BeforeEach + void setUp() throws ImprintException { + testSchema = new SchemaId(1, 0x12345678); + testRecord = ImprintRecord.builder(testSchema) + .field(1, 42) + .field(2, "hello") + .field(3, true) + .field(4, 3.14159) + .field(5, new byte[]{1, 2, 3, 4, 5}) + .build(); + serializedRecord = testRecord; } - - @Test - void shouldRoundtripThroughSerialization() throws ImprintException { - var schemaId = new SchemaId(1, 0xdeadbeef); - var writer = new ImprintWriter(schemaId); - - writer.addField(1, Value.nullValue()) - .addField(2, Value.fromBoolean(true)) - .addField(3, Value.fromInt32(42)) - .addField(4, Value.fromInt64(123456789L)) - .addField(5, Value.fromFloat32(3.14f)) - .addField(6, Value.fromFloat64(2.718281828)) - .addField(7, Value.fromBytes(new byte[]{1, 2, 3, 4})) - .addField(8, Value.fromString("test string")); - - var original = writer.build(); - - // Serialize and deserialize - var buffer = original.serializeToBuffer(); - byte[] serialized = new byte[buffer.remaining()]; - buffer.get(serialized); - var deserialized = ImprintRecord.deserialize(serialized); - - // Verify metadata - assertThat(deserialized.getHeader().getSchemaId().getFieldSpaceId()).isEqualTo(1); - assertThat(deserialized.getHeader().getSchemaId().getSchemaHash()).isEqualTo(0xdeadbeef); - assertThat(deserialized.getDirectory()).hasSize(8); - - // Verify all values - assertThat(deserialized.getValue(1)).isEqualTo(Value.nullValue()); - assertThat(deserialized.getValue(2)).isEqualTo(Value.fromBoolean(true)); - assertThat(deserialized.getValue(3)).isEqualTo(Value.fromInt32(42)); - assertThat(deserialized.getValue(4)).isEqualTo(Value.fromInt64(123456789L)); - assertThat(deserialized.getValue(5)).isEqualTo(Value.fromFloat32(3.14f)); - assertThat(deserialized.getValue(6)).isEqualTo(Value.fromFloat64(2.718281828)); - assertThat(deserialized.getValue(7)).isEqualTo(Value.fromBytes(new byte[]{1, 2, 3, 4})); - assertThat(deserialized.getValue(8)).isEqualTo(Value.fromString("test string")); - - // Non-existent field - assertThat(deserialized.getValue(999)).isNull(); + + @Nested + @DisplayName("Creation") + class Creation { + + @Test + @DisplayName("should create from ImprintRecord") + void shouldCreateFromImprintRecord() { + var serialized = testRecord; + + assertNotNull(serialized); + assertEquals(testRecord.getDirectory().size(), serialized.getFieldCount()); + assertEquals(testSchema, serialized.getSchemaId()); + } + + @Test + @DisplayName("should create from serialized bytes") + void shouldCreateFromSerializedBytes() throws ImprintException { + var bytes = testRecord.serializeToBuffer(); + var serialized = ImprintRecord.fromBytes(bytes); + + assertNotNull(serialized); + assertEquals(testRecord.getDirectory().size(), serialized.getFieldCount()); + assertEquals(testSchema, serialized.getSchemaId()); + } + + @Test + @DisplayName("should reject null bytes") + void shouldRejectNullBytes() { + assertThrows(NullPointerException.class, () -> ImprintRecord.fromBytes(null)); + } } - - @Test - void shouldHandleArrays() throws ImprintException { - var schemaId = new SchemaId(1, 0xdeadbeef); - var writer = new ImprintWriter(schemaId); - - List intArray = Arrays.asList( - Value.fromInt32(1), - Value.fromInt32(2), - Value.fromInt32(3) - ); - - writer.addField(1, Value.fromArray(intArray)); - ImprintRecord record = writer.build(); - - // Serialize and deserialize - var buffer = record.serializeToBuffer(); - byte[] serialized = new byte[buffer.remaining()]; - buffer.get(serialized); - var deserialized = ImprintRecord.deserialize(serialized); - - Value arrayValue = deserialized.getValue(1); - assertThat(arrayValue).isNotNull(); - assertThat(arrayValue).isInstanceOf(Value.ArrayValue.class); - - List deserializedArray = ((Value.ArrayValue) arrayValue).getValue(); - assertThat(deserializedArray).hasSize(3); - assertThat(deserializedArray.get(0)).isEqualTo(Value.fromInt32(1)); - assertThat(deserializedArray.get(1)).isEqualTo(Value.fromInt32(2)); - assertThat(deserializedArray.get(2)).isEqualTo(Value.fromInt32(3)); + + @Nested + @DisplayName("Field Access") + class FieldAccess { + + @Test + @DisplayName("should access fields with correct types") + void shouldAccessFieldsWithCorrectTypes() throws ImprintException { + assertEquals(Integer.valueOf(42), serializedRecord.getInt32(1)); + assertEquals("hello", serializedRecord.getString(2)); + assertEquals(Boolean.TRUE, serializedRecord.getBoolean(3)); + assertEquals(Double.valueOf(3.14159), serializedRecord.getFloat64(4)); + assertArrayEquals(new byte[]{1, 2, 3, 4, 5}, serializedRecord.getBytes(5)); + } + + @Test + @DisplayName("should handle non-existent fields correctly") + void shouldHandleNonExistentFields() throws ImprintException { + // getValue should return null for non-existent fields + assertNull(serializedRecord.getValue(99)); + + // Typed getters should throw exceptions for non-existent fields + assertThrows(ImprintException.class, () -> serializedRecord.getString(99)); + assertThrows(ImprintException.class, () -> serializedRecord.getInt32(100)); + + // hasField should return false + assertFalse(serializedRecord.hasField(99)); + } + + @Test + @DisplayName("should check field existence efficiently") + void shouldCheckFieldExistenceEfficiently() { + assertTrue(serializedRecord.hasField(1)); + assertTrue(serializedRecord.hasField(2)); + assertTrue(serializedRecord.hasField(3)); + assertFalse(serializedRecord.hasField(99)); + } + + @Test + @DisplayName("should return correct field count") + void shouldReturnCorrectFieldCount() { + assertEquals(5, serializedRecord.getFieldCount()); + } } - - @Test - void shouldHandleMaps() throws ImprintException { - var schemaId = new SchemaId(1, 0xdeadbeef); - var writer = new ImprintWriter(schemaId); - - var map = new HashMap(); - map.put(MapKey.fromString("key1"), Value.fromInt32(1)); - map.put(MapKey.fromString("key2"), Value.fromInt32(2)); - - writer.addField(1, Value.fromMap(map)); - var record = writer.build(); - - // Serialize and deserialize - var buffer = record.serializeToBuffer(); - byte[] serialized = new byte[buffer.remaining()]; - buffer.get(serialized); - var deserialized = ImprintRecord.deserialize(serialized); - - Value mapValue = deserialized.getValue(1); - assertThat(mapValue).isNotNull(); - assertThat(mapValue).isInstanceOf(Value.MapValue.class); - - Map deserializedMap = ((Value.MapValue) mapValue).getValue(); - assertThat(deserializedMap).hasSize(2); - assertThat(deserializedMap.get(MapKey.fromString("key1"))).isEqualTo(Value.fromInt32(1)); - assertThat(deserializedMap.get(MapKey.fromString("key2"))).isEqualTo(Value.fromInt32(2)); + + @Nested + @DisplayName("Zero-Copy Operations") + class ZeroCopyOperations { + + @Test + @DisplayName("should merge with another ImprintRecord") + void shouldMergeWithAnotherImprintRecord() throws ImprintException { + // Create another record + var otherRecord = ImprintRecord.builder(testSchema) + .field(6, "additional") + .field(7, 999L) + .build(); + + // Merge + var merged = serializedRecord.merge(otherRecord); + + // Verify merged result + assertEquals(7, merged.getFieldCount()); + assertEquals(Integer.valueOf(42), merged.getInt32(1)); + assertEquals("hello", merged.getString(2)); + assertEquals("additional", merged.getString(6)); + assertEquals(Long.valueOf(999L), merged.getInt64(7)); + } + + @Test + @DisplayName("should project subset of fields") + void shouldProjectSubsetOfFields() throws ImprintException { + var projected = serializedRecord.project(1, 3, 5); + + assertEquals(3, projected.getFieldCount()); + assertEquals(Integer.valueOf(42), projected.getInt32(1)); + assertEquals(Boolean.TRUE, projected.getBoolean(3)); + assertArrayEquals(new byte[]{1, 2, 3, 4, 5}, projected.getBytes(5)); + + // Should not have other fields + assertFalse(projected.hasField(2)); + assertFalse(projected.hasField(4)); + } + + @Test + @DisplayName("should chain project and merge operations") + void shouldChainProjectAndMergeOperations() throws ImprintException { + // Create another record + var otherSerialized = ImprintRecord.builder(testSchema) + .field(10, "chained") + .build(); + + // Chain operations: project this record, then merge with other + var result = serializedRecord.projectAndMerge(otherSerialized, 1, 2); + + // Should have projected fields plus other record + assertEquals(3, result.getFieldCount()); + assertEquals(Integer.valueOf(42), result.getInt32(1)); + assertEquals("hello", result.getString(2)); + assertEquals("chained", result.getString(10)); + + // Should not have non-projected fields + assertFalse(result.hasField(3)); + assertFalse(result.hasField(4)); + assertFalse(result.hasField(5)); + } } - - @Test - void shouldHandleNestedRecords() throws ImprintException { - // Create inner record - var innerSchemaId = new SchemaId(2, 0xcafebabe); - var innerWriter = new ImprintWriter(innerSchemaId); - innerWriter.addField(1, Value.fromInt32(42)) - .addField(2, Value.fromString("nested")); - var innerRecord = innerWriter.build(); - - // Create outer record containing inner record - var outerSchemaId = new SchemaId(1, 0xdeadbeef); - var outerWriter = new ImprintWriter(outerSchemaId); - outerWriter.addField(1, Value.fromRow(innerRecord)) - .addField(2, Value.fromInt64(123L)); - var outerRecord = outerWriter.build(); - - // Serialize and deserialize - var buffer = outerRecord.serializeToBuffer(); - byte[] serialized = new byte[buffer.remaining()]; - buffer.get(serialized); - var deserialized = ImprintRecord.deserialize(serialized); - - // Verify outer record metadata - assertThat(deserialized.getHeader().getSchemaId().getFieldSpaceId()).isEqualTo(1); - assertThat(deserialized.getHeader().getSchemaId().getSchemaHash()).isEqualTo(0xdeadbeef); - - // Verify nested record - Value rowValue = deserialized.getValue(1); - assertThat(rowValue).isNotNull(); - assertThat(rowValue).isInstanceOf(Value.RowValue.class); - - var nestedRecord = ((Value.RowValue) rowValue).getValue(); - assertThat(nestedRecord.getHeader().getSchemaId().getFieldSpaceId()).isEqualTo(2); - assertThat(nestedRecord.getHeader().getSchemaId().getSchemaHash()).isEqualTo(0xcafebabe); - - assertThat(nestedRecord.getValue(1)).isEqualTo(Value.fromInt32(42)); - assertThat(nestedRecord.getValue(2)).isEqualTo(Value.fromString("nested")); - - // Verify outer record field - assertThat(deserialized.getValue(2)).isEqualTo(Value.fromInt64(123L)); + + @Nested + @DisplayName("Conversion") + class Conversion { + + @Test + @DisplayName("should serialize and deserialize consistently") + void shouldSerializeAndDeserializeConsistently() throws ImprintException { + var serializedBytes = serializedRecord.serializeToBuffer(); + var deserialized = ImprintRecord.fromBytes(serializedBytes); + + assertEquals(testRecord.getDirectory().size(), deserialized.getDirectory().size()); + assertEquals(testRecord.getInt32(1), deserialized.getInt32(1)); + assertEquals(testRecord.getString(2), deserialized.getString(2)); + assertEquals(testRecord.getBoolean(3), deserialized.getBoolean(3)); + } + + @Test + @DisplayName("should preserve serialized bytes") + void shouldPreserveSerializedBytes() { + var originalBytes = testRecord.serializeToBuffer(); + var preservedBytes = serializedRecord.getSerializedBytes(); + + assertEquals(originalBytes.remaining(), preservedBytes.remaining()); + + // Compare byte content + var original = originalBytes.duplicate(); + var preserved = preservedBytes.duplicate(); + + while (original.hasRemaining() && preserved.hasRemaining()) { + assertEquals(original.get(), preserved.get()); + } + } } - - @Test - void shouldRejectInvalidMagic() { - byte[] invalidData = new byte[15]; - invalidData[0] = 0x00; // wrong magic - - assertThatThrownBy(() -> ImprintRecord.deserialize(invalidData)) - .isInstanceOf(ImprintException.class) - .extracting("errorType") - .isEqualTo(ErrorType.INVALID_MAGIC); + + @Nested + @DisplayName("Performance Characteristics") + class PerformanceCharacteristics { + + @Test + @DisplayName("should have minimal memory footprint") + void shouldHaveMinimalMemoryFootprint() { + var originalSize = testRecord.serializeToBuffer().remaining(); + var serializedSize = serializedRecord.getSerializedSize(); + + assertEquals(originalSize, serializedSize); + + // ImprintRecord should not significantly increase memory usage + // (just the wrapper object itself) + assertTrue(serializedSize > 0); + } + + @Test + @DisplayName("should support repeated operations efficiently") + void shouldSupportRepeatedOperationsEfficiently() throws ImprintException { + // Multiple field access should not cause performance degradation + for (int i = 0; i < 100; i++) { + assertEquals(Integer.valueOf(42), serializedRecord.getInt32(1)); + assertEquals("hello", serializedRecord.getString(2)); + assertTrue(serializedRecord.hasField(3)); + } + } } - - @Test - void shouldRejectUnsupportedVersion() { - byte[] invalidData = new byte[15]; - invalidData[0] = (byte) 0x49; // correct magic - invalidData[1] = (byte) 0xFF; // wrong version - - assertThatThrownBy(() -> ImprintRecord.deserialize(invalidData)) - .isInstanceOf(ImprintException.class) - .extracting("errorType") - .isEqualTo(ErrorType.UNSUPPORTED_VERSION); + + @Nested + @DisplayName("Edge Cases") + class EdgeCases { + + @Test + @DisplayName("should handle empty projection") + void shouldHandleEmptyProjection() throws ImprintException { + var projected = serializedRecord.project(); + assertEquals(0, projected.getFieldCount()); + } + + @Test + @DisplayName("should handle projection with non-existent fields") + void shouldHandleProjectionWithNonExistentFields() throws ImprintException { + var projected = serializedRecord.project(1, 99, 100); + assertEquals(1, projected.getFieldCount()); + assertEquals(Integer.valueOf(42), projected.getInt32(1)); + assertFalse(projected.hasField(99)); + assertFalse(projected.hasField(100)); + } + + @Test + @DisplayName("should handle merge with empty record") + void shouldHandleMergeWithEmptyRecord() throws ImprintException { + var emptySerialized = ImprintRecord.builder(testSchema).build(); + + var merged = serializedRecord.merge(emptySerialized); + assertEquals(serializedRecord.getFieldCount(), merged.getFieldCount()); + assertEquals(Integer.valueOf(42), merged.getInt32(1)); + } } - - @Test - void shouldHandleDuplicateFieldIds() throws ImprintException { - var schemaId = new SchemaId(1, 0xdeadbeef); - var writer = new ImprintWriter(schemaId); - - // Add duplicate field IDs - last one should win - writer.addField(1, Value.fromInt32(42)) - .addField(1, Value.fromInt32(43)); - - var record = writer.build(); - - assertThat(record.getDirectory()).hasSize(1); - assertThat(record.getValue(1)).isEqualTo(Value.fromInt32(43)); + + @Nested + @DisplayName("Equality and Hashing") + class EqualityAndHashing { + + @Test + @DisplayName("should be equal for same serialized data") + void shouldBeEqualForSameSerializedData() { + var other = testRecord; + + assertEquals(serializedRecord, other); + assertEquals(serializedRecord.hashCode(), other.hashCode()); + } + + @Test + @DisplayName("should not be equal for different data") + void shouldNotBeEqualForDifferentData() throws ImprintException { + // Different value + var differentSerialized = ImprintRecord.builder(testSchema) + .field(1, 999) // Different value + .build(); + + assertNotEquals(serializedRecord, differentSerialized); + } } } \ No newline at end of file diff --git a/src/test/java/com/imprint/ops/ImprintOperationsTest.java b/src/test/java/com/imprint/ops/ImprintOperationsTest.java new file mode 100644 index 0000000..292f8f3 --- /dev/null +++ b/src/test/java/com/imprint/ops/ImprintOperationsTest.java @@ -0,0 +1,679 @@ +package com.imprint.ops; + +import com.imprint.core.Directory; +import com.imprint.core.ImprintRecord; +import com.imprint.core.SchemaId; +import com.imprint.error.ImprintException; +import com.imprint.types.Value; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +import java.nio.ByteBuffer; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +@DisplayName("ImprintOperations") +class ImprintOperationsTest { + + private SchemaId testSchema; + private ImprintRecord multiFieldRecord; + private ImprintRecord emptyRecord; + + @BeforeEach + void setUp() throws ImprintException { + testSchema = new SchemaId(1, 0xdeadbeef); + multiFieldRecord = createTestRecord(); + emptyRecord = createEmptyTestRecord(); + } + + private ImprintRecord createTestRecord() throws ImprintException { + return ImprintRecord.builder(testSchema) + .field(1, 42) + .field(3, "hello") + .field(5, true) + .field(7, new byte[]{1, 2, 3}) + .build(); + } + + private ImprintRecord createEmptyTestRecord() throws ImprintException { + return ImprintRecord.builder(testSchema).build(); + } + + @Nested + @DisplayName("Project Operations") + class ProjectOperations { + + @Test + @DisplayName("should project subset of fields") + void shouldProjectSubsetOfFields() throws ImprintException { + // When projecting a subset of fields + ImprintRecord projected = multiFieldRecord.project(1, 5); + + // Then only the requested fields should be present + assertEquals(2, projected.getDirectory().size()); + assertEquals(42, projected.getInt32(1)); + assertTrue(projected.getBoolean(5)); + + // And non-requested fields should be absent + assertNull(projected.getValue(3)); + assertNull(projected.getValue(7)); + } + + @Test + @DisplayName("should maintain field order regardless of input order") + void shouldMaintainFieldOrderRegardlessOfInputOrder() throws ImprintException { + // When projecting fields in arbitrary order + ImprintRecord projected = multiFieldRecord.project(7, 1, 5, 3); + + // Then all requested fields should be present + assertEquals(4, projected.getDirectory().size()); + assertEquals(42, projected.getInt32(1)); + assertEquals("hello", projected.getString(3)); + assertTrue(projected.getBoolean(5)); + assertArrayEquals(new byte[]{1, 2, 3}, projected.getBytes(7)); + + // And directory should maintain sorted order + List directory = projected.getDirectory(); + for (int i = 1; i < directory.size(); i++) { + assertTrue(directory.get(i - 1).getId() < directory.get(i).getId(), + "Directory entries should be sorted by field id"); + } + } + + @Test + @DisplayName("should handle single field projection") + void shouldHandleSingleFieldProjection() throws ImprintException { + // When projecting a single field + ImprintRecord projected = multiFieldRecord.project(3); + + // Then only that field should be present + assertEquals(1, projected.getDirectory().size()); + assertEquals("hello", projected.getString(3)); + } + + @Test + @DisplayName("should preserve all fields when projecting all") + void shouldPreserveAllFieldsWhenProjectingAll() throws ImprintException { + // Given all field IDs from the original record + int[] allFields = multiFieldRecord.getDirectory().stream() + .mapToInt(Directory::getId) + .toArray(); + + // When projecting all fields + ImprintRecord projected = multiFieldRecord.project(allFields); + + // Then all fields should be present with matching values + assertEquals(multiFieldRecord.getDirectory().size(), projected.getDirectory().size()); + + for (Directory entry : multiFieldRecord.getDirectory()) { + Value originalValue = multiFieldRecord.getValue(entry.getId()); + Value projectedValue = projected.getValue(entry.getId()); + assertEquals(originalValue, projectedValue, + "Field " + entry.getId() + " should have matching value"); + } + } + + @Test + @DisplayName("should handle empty projection") + void shouldHandleEmptyProjection() throws ImprintException { + // When projecting no fields + ImprintRecord projected = multiFieldRecord.project(); + + // Then result should be empty but valid + assertEquals(0, projected.getDirectory().size()); + assertEquals(0, projected.getFieldCount()); + } + + @Test + @DisplayName("should ignore nonexistent fields") + void shouldIgnoreNonexistentFields() throws ImprintException { + // When projecting mix of existing and non-existing fields + ImprintRecord projected = multiFieldRecord.project(1, 99, 100); + + // Then only existing fields should be included + assertEquals(1, projected.getDirectory().size()); + assertEquals(42, projected.getInt32(1)); + assertNull(projected.getValue(99)); + assertNull(projected.getValue(100)); + } + + @Test + @DisplayName("should deduplicate requested fields") + void shouldDeduplicateRequestedFields() throws ImprintException { + // When projecting the same field multiple times + ImprintRecord projected = multiFieldRecord.project(1, 1, 1); + + // Then field should only appear once + assertEquals(1, projected.getDirectory().size()); + assertEquals(42, projected.getInt32(1)); + } + + @Test + @DisplayName("should handle projection from empty record") + void shouldHandleProjectionFromEmptyRecord() throws ImprintException { + // When projecting any fields from empty record + ImprintRecord projected = emptyRecord.project(1, 2, 3); + + // Then result should be empty but valid + assertEquals(0, projected.getDirectory().size()); + assertEquals(0, projected.getFieldCount()); + } + + @Test + @DisplayName("should preserve exact byte representation") + void shouldPreserveExactByteRepresentation() throws ImprintException { + // Given a field's original bytes + byte[] originalBytes = multiFieldRecord.getBytes(7); + + // When projecting that field + ImprintRecord projected = multiFieldRecord.project(7); + + // Then the byte representation should be exactly preserved + byte[] projectedBytes = projected.getBytes(7); + assertArrayEquals(originalBytes, projectedBytes, + "Byte representation should be identical"); + } + + @Test + @DisplayName("should reduce payload size when projecting subset") + void shouldReducePayloadSizeWhenProjectingSubset() throws ImprintException { + // Given a record with large and small fields + ImprintRecord largeRecord = ImprintRecord.builder(testSchema) + .field(1, 42) // 4 bytes + .field(2, "x".repeat(1000)) // ~1000+ bytes + .field(3, 123L) // 8 bytes + .field(4, new byte[500]) // 500+ bytes + .build(); + + int originalPayloadSize = largeRecord.getSerializedSize(); + + // When projecting only the small fields + ImprintRecord projected = largeRecord.project(1, 3); + + // Then the payload size should be significantly smaller + assertTrue(projected.getSerializedSize() < originalPayloadSize, + "Projected payload should be smaller than original"); + + // And the values should still be correct + assertEquals(42, projected.getInt32(1)); + assertEquals(123L, projected.getInt64(3)); + } + } + + @Nested + @DisplayName("Merge Operations") + class MergeOperations { + + @Test + @DisplayName("should merge records with distinct fields") + void shouldMergeRecordsWithDistinctFields() throws ImprintException { + // Given two records with different fields + ImprintRecord record1 = ImprintRecord.builder(testSchema) + .field(1, 42) + .field(3, "hello") + .build(); + + ImprintRecord record2 = ImprintRecord.builder(testSchema) + .field(2, true) + .field(4, 123L) + .build(); + + // When merging the records + ImprintRecord merged = record1.merge(record2); + + // Then all fields should be present + assertEquals(4, merged.getDirectory().size()); + assertEquals(42, merged.getInt32(1)); + assertTrue(merged.getBoolean(2)); + assertEquals("hello", merged.getString(3)); + assertEquals(123L, merged.getInt64(4)); + + // And directory should be sorted + List directory = merged.getDirectory(); + for (int i = 1; i < directory.size(); i++) { + assertTrue(directory.get(i - 1).getId() < directory.get(i).getId(), + "Directory entries should be sorted by field id"); + } + } + + @Test + @DisplayName("should merge records with overlapping fields") + void shouldMergeRecordsWithOverlappingFields() throws ImprintException { + // Given two records with overlapping fields + ImprintRecord record1 = ImprintRecord.builder(testSchema) + .field(2, "first") + .field(3, 42) + .build(); + + ImprintRecord record2 = ImprintRecord.builder(testSchema) + .field(1, true) + .field(2, "second") // Overlapping field + .build(); + + // When merging the records + ImprintRecord merged = record1.merge(record2); + + // Then first record's values should take precedence for duplicates + assertEquals(3, merged.getDirectory().size()); + assertTrue(merged.getBoolean(1)); + assertEquals("first", merged.getString(2)); // First record wins + assertEquals(42, merged.getInt32(3)); + } + + @Test + @DisplayName("should preserve schema id from first record") + void shouldPreserveSchemaIdFromFirstRecord() throws ImprintException { + // Given two records with different schema IDs + SchemaId schema1 = new SchemaId(1, 0xdeadbeef); + SchemaId schema2 = new SchemaId(1, 0xcafebabe); + + ImprintRecord record1 = ImprintRecord.builder(schema1) + .field(1, 42) + .build(); + + ImprintRecord record2 = ImprintRecord.builder(schema2) + .field(2, true) + .build(); + + // When merging the records + ImprintRecord merged = record1.merge(record2); + + // Then schema ID from first record should be preserved + assertEquals(schema1, merged.getHeader().getSchemaId()); + } + + @Test + @DisplayName("should handle merge with empty record") + void shouldHandleMergeWithEmptyRecord() throws ImprintException { + // When merging with empty record + ImprintRecord merged1 = multiFieldRecord.merge(emptyRecord); + ImprintRecord merged2 = emptyRecord.merge(multiFieldRecord); + + // Then results should contain all original fields + assertEquals(multiFieldRecord.getDirectory().size(), merged1.getDirectory().size()); + assertEquals(multiFieldRecord.getDirectory().size(), merged2.getDirectory().size()); + + // And values should be preserved + for (Directory entry : multiFieldRecord.getDirectory()) { + Value originalValue = multiFieldRecord.getValue(entry.getId()); + assertEquals(originalValue, merged1.getValue(entry.getId())); + assertEquals(originalValue, merged2.getValue(entry.getId())); + } + } + + @Test + @DisplayName("should handle merge of two empty records") + void shouldHandleMergeOfTwoEmptyRecords() throws ImprintException { + // When merging two empty records + ImprintRecord merged = emptyRecord.merge(emptyRecord); + + // Then result should be empty but valid + assertEquals(0, merged.getDirectory().size()); + assertEquals(0, merged.getFieldCount()); + } + + @Test + @DisplayName("should maintain correct payload offsets after merge") + void shouldMaintainCorrectPayloadOffsetsAfterMerge() throws ImprintException { + // Given records with different field sizes + ImprintRecord record1 = ImprintRecord.builder(testSchema) + .field(1, 42) // 4 bytes + .field(3, "hello") // 5+ bytes + .build(); + + ImprintRecord record2 = ImprintRecord.builder(testSchema) + .field(2, true) // 1 byte + .field(4, new byte[]{1, 2, 3, 4, 5}) // 5+ bytes + .build(); + + // When merging + ImprintRecord merged = record1.merge(record2); + + // Then all fields should be accessible with correct values + assertEquals(42, merged.getInt32(1)); + assertTrue(merged.getBoolean(2)); + assertEquals("hello", merged.getString(3)); + assertArrayEquals(new byte[]{1, 2, 3, 4, 5}, merged.getBytes(4)); + + // And directory offsets should be sequential + List directory = merged.getDirectory(); + int expectedOffset = 0; + for (Directory entry : directory) { + assertEquals(expectedOffset, entry.getOffset(), + "Field " + entry.getId() + " should have correct offset"); + + // Calculate next offset + var fieldData = merged.getRawBytes(entry.getId()); + assertNotNull(fieldData); + expectedOffset += fieldData.remaining(); + } + } + + @Test + @DisplayName("should handle large records efficiently") + void shouldHandleLargeRecordsEfficiently() throws ImprintException { + // Given records with many fields + var builder1 = ImprintRecord.builder(testSchema); + var builder2 = ImprintRecord.builder(testSchema); + + // Add 100 fields to each record (no overlap) + for (int i = 1; i <= 100; i++) { + builder1.field(i, i * 10); + } + + for (int i = 101; i <= 200; i++) { + builder2.field(i, i * 10); + } + + ImprintRecord record1 = builder1.build(); + ImprintRecord record2 = builder2.build(); + + // When merging large records + ImprintRecord merged = record1.merge(record2); + + // Then all 200 fields should be present and accessible + assertEquals(200, merged.getDirectory().size()); + + // Spot check a bunch of random values just to make sure I guess + assertEquals(10, merged.getInt32(1)); + assertEquals(500, merged.getInt32(50)); + assertEquals(1000, merged.getInt32(100)); + assertEquals(1010, merged.getInt32(101)); + assertEquals(1500, merged.getInt32(150)); + assertEquals(2000, merged.getInt32(200)); + } + } + + @Nested + @DisplayName("Bytes-to-Bytes Operations") + class BytesToBytesOperations { + + @Test + @DisplayName("should merge bytes with same result as object merge") + void shouldMergeBytesWithSameResultAsObjectMerge() throws ImprintException { + // Given two records with distinct fields + ImprintRecord record1 = ImprintRecord.builder(testSchema) + .field(1, 42) + .field(3, "hello") + .build(); + + ImprintRecord record2 = ImprintRecord.builder(testSchema) + .field(2, true) + .field(4, 123L) + .build(); + + // When merging using both approaches + var objectMerged = record1.merge(record2); + var record1Bytes = record1.serializeToBuffer(); + var record2Bytes = record2.serializeToBuffer(); + var bytesMerged = ImprintOperations.mergeBytes(record1Bytes, record2Bytes); + + // Then results should be functionally equivalent + var deserializedBytes = ImprintRecord.deserialize(bytesMerged); + + assertEquals(objectMerged.getDirectory().size(), deserializedBytes.getDirectory().size()); + assertEquals(42, deserializedBytes.getInt32(1)); + assertTrue(deserializedBytes.getBoolean(2)); + assertEquals("hello", deserializedBytes.getString(3)); + assertEquals(123L, deserializedBytes.getInt64(4)); + } + + @Test + @DisplayName("should handle overlapping fields in byte merge") + void shouldHandleOverlappingFieldsInByteMerge() throws ImprintException { + // Given two records with overlapping fields + ImprintRecord record1 = ImprintRecord.builder(testSchema) + .field(1, "first") + .field(2, 42) + .build(); + + ImprintRecord record2 = ImprintRecord.builder(testSchema) + .field(1, "second") // Overlapping field + .field(3, true) + .build(); + + // When merging using bytes + var record1Bytes = record1.serializeToBuffer(); + var record2Bytes = record2.serializeToBuffer(); + var merged = ImprintOperations.mergeBytes(record1Bytes, record2Bytes); + + // Then first record's values should take precedence + var result = ImprintRecord.deserialize(merged); + assertEquals(3, result.getDirectory().size()); + assertEquals("first", result.getString(1)); // First record wins + assertEquals(42, result.getInt32(2)); + assertTrue(result.getBoolean(3)); + } + + @Test + @DisplayName("should merge empty records correctly") + void shouldMergeEmptyRecordsCorrectly() throws ImprintException { + // Given an empty record and a non-empty record + var emptyRecord = ImprintRecord.builder(testSchema).build(); + var nonEmptyRecord = ImprintRecord.builder(testSchema) + .field(1, "test") + .build(); + + // When merging using bytes + var emptyBytes = emptyRecord.serializeToBuffer(); + var nonEmptyBytes = nonEmptyRecord.serializeToBuffer(); + + var merged1 = ImprintOperations.mergeBytes(emptyBytes, nonEmptyBytes); + var merged2 = ImprintOperations.mergeBytes(nonEmptyBytes, emptyBytes); + + // Then both should contain the non-empty record's data + var result1 = ImprintRecord.deserialize(merged1); + var result2 = ImprintRecord.deserialize(merged2); + + assertEquals(1, result1.getDirectory().size()); + assertEquals(1, result2.getDirectory().size()); + assertEquals("test", result1.getString(1)); + assertEquals("test", result2.getString(1)); + } + + @Test + @DisplayName("should project bytes with same result as object project") + void shouldProjectBytesWithSameResultAsObjectProject() throws ImprintException { + // Given a record with multiple fields + ImprintRecord record = ImprintRecord.builder(testSchema) + .field(1, 42) + .field(2, "hello") + .field(3, true) + .field(4, 123L) + .field(5, new byte[]{1, 2, 3}) + .build(); + + // When projecting using both approaches + var objectProjected = record.project(2, 4); + + var recordBytes = record.serializeToBuffer(); + var bytesProjected = ImprintOperations.projectBytes(recordBytes, 2, 4); + + // Then results should be functionally equivalent + var deserializedBytes = ImprintRecord.deserialize(bytesProjected); + + assertEquals(objectProjected.getDirectory().size(), deserializedBytes.getDirectory().size()); + assertEquals("hello", deserializedBytes.getString(2)); + assertEquals(123L, deserializedBytes.getInt64(4)); + + // Should not have the other fields + assertNull(deserializedBytes.getValue(1)); + assertNull(deserializedBytes.getValue(3)); + assertNull(deserializedBytes.getValue(5)); + } + + @Test + @DisplayName("should handle empty projection in bytes") + void shouldHandleEmptyProjectionInBytes() throws ImprintException { + // Given a record with fields + var record = ImprintRecord.builder(testSchema) + .field(1, "test") + .build(); + + // When projecting no fields + var recordBytes = record.serializeToBuffer(); + var projected = ImprintOperations.projectBytes(recordBytes); + + // Then result should be empty but valid + var result = ImprintRecord.deserialize(projected); + assertEquals(0, result.getDirectory().size()); + } + + @Test + @DisplayName("should handle nonexistent fields in byte projection") + void shouldHandleNonexistentFieldsInByteProjection() throws ImprintException { + // Given a record with some fields + var record = ImprintRecord.builder(testSchema) + .field(1, "exists") + .field(3, 42) + .build(); + + // When projecting mix of existing and non-existing fields + var recordBytes = record.serializeToBuffer(); + var projected = ImprintOperations.projectBytes(recordBytes, 1, 99, 100); + + // Then only existing fields should be included + var result = ImprintRecord.deserialize(projected); + assertEquals(1, result.getDirectory().size()); + assertEquals("exists", result.getString(1)); + assertNull(result.getValue(99)); + assertNull(result.getValue(100)); + } + + @Test + @DisplayName("should handle null buffers gracefully") + void shouldHandleNullBuffersGracefully() throws ImprintException { + var validRecord = ImprintRecord.builder(testSchema) + .field(1, "test") + .build(); + var validBuffer = validRecord.serializeToBuffer(); + + // Test null buffer scenarios + assertThrows(Exception.class, () -> + ImprintOperations.mergeBytes(null, validBuffer)); + assertThrows(Exception.class, () -> + ImprintOperations.mergeBytes(validBuffer, null)); + assertThrows(Exception.class, () -> + ImprintOperations.projectBytes(null, 1, 2, 3)); + } + + @Test + @DisplayName("should validate buffer format and reject invalid data") + void shouldValidateBufferFormatAndRejectInvalidData() throws ImprintException { + var validRecord = ImprintRecord.builder(testSchema) + .field(1, "test") + .build(); + var validBuffer = validRecord.serializeToBuffer(); + + // Test invalid magic byte + var invalidMagic = ByteBuffer.allocate(20); + invalidMagic.put((byte) 0x99); // Invalid magic + invalidMagic.put((byte) 0x01); // Valid version + invalidMagic.flip(); + + assertThrows(ImprintException.class, () -> + ImprintOperations.mergeBytes(invalidMagic, validBuffer)); + assertThrows(ImprintException.class, () -> + ImprintOperations.projectBytes(invalidMagic, 1)); + + // Test buffer too small + var tooSmall = ByteBuffer.allocate(5); + tooSmall.put(new byte[]{1, 2, 3, 4, 5}); + tooSmall.flip(); + + assertThrows(ImprintException.class, () -> + ImprintOperations.mergeBytes(tooSmall, validBuffer)); + assertThrows(ImprintException.class, () -> + ImprintOperations.projectBytes(tooSmall, 1)); + + // Test invalid version + var invalidVersion = ByteBuffer.allocate(20); + invalidVersion.put((byte) 0x49); // Valid magic + invalidVersion.put((byte) 0x99); // Invalid version + invalidVersion.flip(); + + assertThrows(ImprintException.class, () -> + ImprintOperations.mergeBytes(invalidVersion, validBuffer)); + assertThrows(ImprintException.class, () -> + ImprintOperations.projectBytes(invalidVersion, 1)); + } + + @Test + @DisplayName("should handle large records efficiently in bytes operations") + void shouldHandleLargeRecordsEfficientlyInBytesOperations() throws ImprintException { + // Create records with many fields + var builder1 = ImprintRecord.builder(testSchema); + var builder2 = ImprintRecord.builder(testSchema); + + // Add many fields + for (int i = 1; i <= 50; i++) { + builder1.field(i, "field_" + i); + } + for (int i = 51; i <= 100; i++) { + builder2.field(i, "field_" + i); + } + + var record1 = builder1.build(); + var record2 = builder2.build(); + + // Test bytes-to-bytes merge with many fields + var merged = ImprintOperations.mergeBytes( + record1.serializeToBuffer(), + record2.serializeToBuffer() + ); + var mergedRecord = ImprintRecord.deserialize(merged); + + assertEquals(100, mergedRecord.getDirectory().size()); + assertEquals("field_1", mergedRecord.getString(1)); + assertEquals("field_100", mergedRecord.getString(100)); + + // Test bytes-to-bytes projection with many fields + int[] projectFields = {1, 25, 50, 75, 100}; + var projected = ImprintOperations.projectBytes(merged, projectFields); + var projectedRecord = ImprintRecord.deserialize(projected); + + assertEquals(5, projectedRecord.getDirectory().size()); + assertEquals("field_1", projectedRecord.getString(1)); + assertEquals("field_25", projectedRecord.getString(25)); + assertEquals("field_100", projectedRecord.getString(100)); + } + + @Test + @DisplayName("should preserve field order in bytes operations") + void shouldPreserveFieldOrderInBytesOperations() throws ImprintException { + var record = ImprintRecord.builder(testSchema) + .field(5, "field5") + .field(1, "field1") + .field(3, "field3") + .field(2, "field2") + .field(4, "field4") + .build(); + + // Project in random order + var projected = ImprintOperations.projectBytes( + record.serializeToBuffer(), 4, 1, 3, 5, 2 + ); + var projectedRecord = ImprintRecord.deserialize(projected); + + // Verify fields are still accessible and directory is sorted + var directory = projectedRecord.getDirectory(); + assertEquals(5, directory.size()); + + // Directory should be sorted by field ID + for (int i = 1; i < directory.size(); i++) { + assertTrue(directory.get(i - 1).getId() < directory.get(i).getId()); + } + + // All fields should be accessible + assertEquals("field1", projectedRecord.getString(1)); + assertEquals("field2", projectedRecord.getString(2)); + assertEquals("field3", projectedRecord.getString(3)); + assertEquals("field4", projectedRecord.getString(4)); + assertEquals("field5", projectedRecord.getString(5)); + } + } + +} diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index 64be931..79882d9 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -1,228 +1,437 @@ package com.imprint.profile; import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintWriter; import com.imprint.core.SchemaId; +import com.imprint.ops.ImprintOperations; import com.imprint.types.Value; import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.util.Random; +import java.util.stream.IntStream; + -/** - * A test designed for profiling hotspots during development. - *

- * To use with a profiler: - * 1. Remove @Disabled annotation - * 2. Run with JProfiler, VisualVM, or async-profiler: - * - JProfiler: Attach to test JVM - * - VisualVM: jvisualvm, attach to process - * - async-profiler: java -jar async-profiler.jar -d 30 -f profile.html - * 3. Look for hotspots in CPU sampling - *

- * Key areas to examine: - * - Object allocation (memory profiling) - * - Method call frequency (CPU sampling) - * - GC pressure (memory profiling) - * - String operations and UTF-8 encoding - * - ByteBuffer operations - */ -//@Disabled("Enable manually for profiling") +@Disabled public class ProfilerTest { - - private static final int ITERATIONS = 1_000_000; + private static final int RECORD_SIZE = 50; - + private static final int LARGE_RECORD_SIZE = 200; + @Test - void profileFieldAccess() throws Exception { - System.out.println("Starting profiler test - attach profiler now..."); - Thread.sleep(5000); // Give time to attach profiler - - // Create a representative record - var record = createTestRecord(); - - System.out.println("Beginning field access profiling..."); + @Tag("merge") + void profileMergeOperations() throws Exception { + System.out.println("Starting merge profiler test - attach profiler now..."); + Thread.sleep(3000); + + profileSmallMerges(); + profileLargeMerges(); + profileOverlappingMerges(); + profileDisjointMerges(); + } + + /** + * Profile small merges (20-field records) + */ + private void profileSmallMerges() throws Exception { + System.out.println("\\n--- Small Merges (20-field records) ---"); + + var record1 = createTestRecord(20); + var record2 = createTestRecord(20); + int iterations = 500_000; + + System.out.printf("Beginning small merge profiling (%,d iterations)...%n", iterations); long start = System.nanoTime(); - - // Simulate real-world access patterns - Random random = new Random(42); - int hits = 0; - - for (int i = 0; i < ITERATIONS; i++) { - // Random field access (hotspot) - int fieldId = random.nextInt(RECORD_SIZE) + 1; - var value = record.getValue(fieldId); - if (value != null) { - hits++; - - // Trigger string decoding (potential hotspot) - if (value.getTypeCode() == com.imprint.types.TypeCode.STRING) { - if (value instanceof Value.StringBufferValue) { - ((Value.StringBufferValue) value).getValue(); - } else { - ((Value.StringValue) value).getValue(); - } - } - } - - // Some raw access (zero-copy path) - if (i % 10 == 0) { - record.getRawBytes(fieldId); + + for (int i = 0; i < iterations; i++) { + // This is the hotspot we want to profile + var merged = record1.merge(record2); + + // Simulate some usage to prevent dead code elimination + if (i % 10_000 == 0) { + merged.getValue(1); // Trigger value decoding + merged.getRawBytes(5); // Trigger raw access } + merged.serializeToBuffer(); } - + + long duration = System.nanoTime() - start; + System.out.printf("Small merges: %.2f ms (avg: %.1f μs/merge)%n", + duration / 1_000_000.0, (double) duration / iterations / 1000.0); + } + + /** + * Profile large merges (100-field records) + */ + private void profileLargeMerges() throws Exception { + System.out.println("\\n--- Large Merges (100-field records) ---"); + + var record1 = createTestRecord(100); + var record2 = createTestRecord(100); + int iterations = 100_000; + + System.out.printf("Beginning large merge profiling (%,d iterations)...%n", iterations); + long start = System.nanoTime(); + + for (int i = 0; i < iterations; i++) { + var merged = record1.merge(record2); + merged.serializeToBuffer(); + } + long duration = System.nanoTime() - start; - System.out.printf("Completed %,d field accesses in %.2f ms (avg: %.1f ns/op, hits: %d)%n", - ITERATIONS, duration / 1_000_000.0, (double) duration / ITERATIONS, hits); + System.out.printf("Large merges: %.2f ms (avg: %.1f μs/merge)%n", + duration / 1_000_000.0, (double) duration / iterations / 1000.0); } - + + /** + * Profile overlapping merges (records with many duplicate field IDs) + */ + private void profileOverlappingMerges() throws Exception { + System.out.println("\\n--- Overlapping Merges (50%% field overlap) ---"); + + var record1 = createTestRecordWithFieldIds(new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); + var record2 = createTestRecordWithFieldIds(new int[]{10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}); + int iterations = 200_000; + + System.out.printf("Beginning overlapping merge profiling (%,d iterations)...%n", iterations); + long start = System.nanoTime(); + + for (int i = 0; i < iterations; i++) { + var merged = record1.merge(record2); + merged.serializeToBuffer(); + } + + long duration = System.nanoTime() - start; + System.out.printf("Overlapping merges: %.2f ms (avg: %.1f μs/merge)%n", + duration / 1_000_000.0, (double) duration / iterations / 1000.0); + } + + /** + * Profile disjoint merges (no overlapping field IDs) + */ + private void profileDisjointMerges() throws Exception { + System.out.println("\\n--- Disjoint Merges (no field overlap) ---"); + + // Create records with completely separate field IDs + var record1 = createTestRecordWithFieldIds(new int[]{1, 3, 5, 7, 9, 11, 13, 15, 17, 19}); + var record2 = createTestRecordWithFieldIds(new int[]{2, 4, 6, 8, 10, 12, 14, 16, 18, 20}); + int iterations = 200_000; + + System.out.printf("Beginning disjoint merge profiling (%,d iterations)...%n", iterations); + long start = System.nanoTime(); + + for (int i = 0; i < iterations; i++) { + var merged = record1.merge(record2); + merged.serializeToBuffer(); + } + + long duration = System.nanoTime() - start; + System.out.printf("Disjoint merges: %.2f ms (avg: %.1f μs/merge)%n", + duration / 1_000_000.0, (double) duration / iterations / 1000.0); + } + @Test - void profileSerialization() throws Exception { - System.out.println("Starting serialization profiler test..."); + @Tag("serialization") + @Tag("small-records") + void profileSmallRecordSerialization() throws Exception { + profileSerialization("small records", RECORD_SIZE, 100_000); + } + + @Test + @Tag("serialization") + @Tag("large-records") + /* + It's usually better to change DEFAULT_CAPACITY in ImprintFieldObjectMap to ensure resizing doesn't happen + unless you specifically want to profile resizing costs (should happen rarely in reality). + */ + void profileLargeRecordSerialization() throws Exception { + profileSerialization("large records", LARGE_RECORD_SIZE, 500_000); + } + + @Test + @Tag("projection") + void profileProjectionOperations() throws Exception { Thread.sleep(3000); - - var schemaId = new SchemaId(1, 0x12345678); + profileSmallProjections(); + profileLargeProjections(); + profileSelectiveProjections(); + } + + // Rest of the methods remain the same... + private void profileSmallProjections() throws Exception { + System.out.println("\\n--- Small Projections (2-5 fields from 20-field records) ---"); - System.out.println("Beginning serialization profiling..."); + var sourceRecord = createTestRecord(20); + int[] projectFields = {1, 5, 10, 15}; // 4 fields + int iterations = 500_000; + + System.out.printf("Beginning small projection profiling (%,d iterations)...%n", iterations); long start = System.nanoTime(); - - // Create and serialize many records (allocation hotspot) - for (int i = 0; i < 500_000; i++) { - var writer = new ImprintWriter(schemaId); - - // Add various field types - writer.addField(1, Value.fromInt32(i)) - .addField(2, Value.fromString("test-string-" + i)) - .addField(3, Value.fromFloat64(i * 3.14159)) - .addField(4, Value.fromBytes(("bytes-" + i).getBytes())); - - var record = writer.build(); - var serialized = record.serializeToBuffer(); // Potential hotspot - - // Trigger some deserialization - if (i % 1000 == 0) { - var deserialized = ImprintRecord.deserialize(serialized); - deserialized.getValue(2); // String decoding hotspot + + for (int i = 0; i < iterations; i++) { + // This is the hotspot we want to profile + var projected = sourceRecord.project(projectFields); + + // Simulate some usage to prevent dead code elimination + if (i % 10_000 == 0) { + projected.getValue(1); // Trigger value decoding + projected.getRawBytes(5); // Trigger raw access } + projected.serializeToBuffer(); } - + long duration = System.nanoTime() - start; - System.out.printf("Completed serialization test in %.2f ms%n", duration / 1_000_000.0); + System.out.printf("Small projections: %.2f ms (avg: %.1f μs/projection)%n", + duration / 1_000_000.0, (double) duration / iterations / 1000.0); } - - @Test - void profileProjection() throws Exception { - System.out.println("Starting projection profiler test..."); - Thread.sleep(3000); - - var record = createLargeRecord(); - - System.out.println("Beginning projection profiling..."); + + private void profileLargeProjections() throws Exception { + System.out.println("\\n--- Large Projections (50 fields from 200-field records) ---"); + + var sourceRecord = createTestRecord(200); + // Select every 4th field for projection + int[] projectFields = IntStream.range(0, 50) + .map(i -> (i * 4) + 1) + .toArray(); + int iterations = 200_000; + + System.out.printf("Beginning large projection profiling (%,d iterations, %d->%d fields)...%n", + iterations, 200, projectFields.length); long start = System.nanoTime(); - - // Simulate analytical workload - project subset of fields repeatedly - for (int i = 0; i < 50_000; i++) { - // Project 10 fields out of 100 (common analytical pattern) - for (int fieldId = 1; fieldId <= 10; fieldId++) { - var value = record.getValue(fieldId); - if (value != null) { - // Force materialization of string values - if (value.getTypeCode() == com.imprint.types.TypeCode.STRING) { - if (value instanceof Value.StringBufferValue) { - ((Value.StringBufferValue) value).getValue(); - } - } - } + + for (int i = 0; i < iterations; i++) { + var projected = sourceRecord.project(projectFields); + + // Periodically access some fields to simulate real usage + if (i % 1_000 == 0) { + projected.getValue(1); + projected.getValue(25); + projected.getValue(49); } + projected.serializeToBuffer(); } - + long duration = System.nanoTime() - start; - System.out.printf("Completed projection test in %.2f ms%n", duration / 1_000_000.0); + System.out.printf("Large projections: %.2f ms (avg: %.1f μs/projection)%n", + duration / 1_000_000.0, (double) duration / iterations / 1000.0); } - - @Test - void profileMemoryAllocation() throws Exception { - System.out.println("Starting allocation profiler test..."); + + private void profileSelectiveProjections() throws Exception { + System.out.println("\\n--- Selective Projections (various patterns) ---"); + + var sourceRecord = createTestRecord(100); + Random random = new Random(42); + int iterations = 200_000; + + // Test different projection patterns + var patterns = new ProjectionPattern[]{ + new ProjectionPattern("First few fields", new int[]{1, 2, 3, 4, 5}), + new ProjectionPattern("Last few fields", new int[]{96, 97, 98, 99, 100}), + new ProjectionPattern("Scattered fields", new int[]{1, 15, 33, 67, 89, 100}), + new ProjectionPattern("Random fields", generateRandomFields(random, 100, 10)) + }; + + for (var pattern : patterns) { + System.out.printf("Testing pattern: %s (%d fields)%n", + pattern.name, pattern.fields.length); + + long start = System.nanoTime(); + + for (int i = 0; i < iterations; i++) { + var projected = sourceRecord.project(pattern.fields); + + // Simulate field access + if (i % 5_000 == 0) { + projected.getValue(pattern.fields[0]); + } + projected.serializeToBuffer(); + } + + long duration = System.nanoTime() - start; + System.out.printf(" %s: %.2f ms (avg: %.1f μs/projection)%n", + pattern.name, duration / 1_000_000.0, (double) duration / iterations / 1000.0); + } + } + + private void profileSerialization(String testName, int recordSize, int iterations) throws Exception { + System.out.printf("Starting %s serialization profiler test...%n", testName); Thread.sleep(3000); - - System.out.println("Beginning allocation profiling - watch for GC events..."); - - // Force allocation pressure to reveal GC hotspots - for (int batch = 0; batch < 1000; batch++) { - for (int i = 0; i < 1000; i++) { - var schemaId = new SchemaId(batch, i); - var writer = new ImprintWriter(schemaId); - - // Create strings of varying sizes (allocation pressure) - writer.addField(1, Value.fromString("small")) - .addField(2, Value.fromString("medium-length-string-" + i)) - .addField(3, Value.fromString("very-long-string-that-will-cause-more-allocation-pressure-" + batch + "-" + i)) - .addField(4, Value.fromBytes(new byte[100 + i % 1000])); // Varying byte arrays - - var record = writer.build(); - - // Some deserialization to trigger string decoding allocations - record.getValue(2); - record.getValue(3); + + var schemaId = new SchemaId(1, 0x12345678); + + System.out.printf("Beginning %s serialization profiling (%,d iterations, %d fields)...%n", testName, iterations, recordSize); + long start = System.nanoTime(); + + for (int i = 0; i < iterations; i++) { + var builder = ImprintRecord.builder(schemaId); + + // Add various field types based on recordSize + for (int fieldId = 1; fieldId <= recordSize; fieldId++) { + switch (fieldId % 7) { + case 0: + builder.field(fieldId, Value.fromInt32(i + fieldId)); + break; + case 1: + builder.field(fieldId, Value.fromInt64(i * 1000L + fieldId)); + break; + case 2: + builder.field(fieldId, Value.fromString("test-string-" + i + "-" + fieldId)); + break; + case 3: + builder.field(fieldId, Value.fromString("longer-descriptive-text-for-field-" + fieldId + "-iteration-" + i)); + break; + case 4: + builder.field(fieldId, Value.fromFloat64(i * 3.14159 + fieldId)); + break; + case 5: + builder.field(fieldId, Value.fromBytes(("bytes-" + i + "-" + fieldId).getBytes())); + break; + case 6: + builder.field(fieldId, Value.fromBoolean((i + fieldId) % 2 == 0)); + break; + } + } + + var record = builder.build(); + var serialized = record.serializeToBuffer(); + + // Trigger some deserialization periodically + if (i % Math.max(1, iterations / 100) == 0) { + var deserialized = ImprintRecord.deserialize(serialized); + // Access a few random fields to trigger value decoding + for (int fieldId = 1; fieldId <= Math.min(5, recordSize); fieldId++) { + deserialized.getValue(fieldId); // String decoding hotspot + } } - - if (batch % 100 == 0) { - System.out.printf("Completed batch %d/1000%n", batch); + + // Progress indicator for long-running tests + if (i > 0 && i % Math.max(1, iterations / 10) == 0) { + System.out.printf("Completed %,d/%,d iterations (%.1f%%)%n", + i, iterations, (double) i / iterations * 100); } } - - System.out.println("Allocation test complete - check GC logs and memory profiler"); + + long duration = System.nanoTime() - start; + System.out.printf("Completed %s serialization test in %.2f ms (avg: %.1f μs/record)%n", + testName, duration / 1_000_000.0, (double) duration / iterations / 1000.0); } - - private ImprintRecord createTestRecord() throws Exception { - var schemaId = new SchemaId(1, 0xdeadbeef); - var writer = new ImprintWriter(schemaId); - - for (int i = 1; i <= RECORD_SIZE; i++) { + + private ImprintRecord createTestRecord(int recordSize) throws Exception { + var builder = ImprintRecord.builder(new SchemaId(1, 0xdeadbeef)); + + for (int i = 1; i <= recordSize; i++) { switch (i % 4) { case 0: - writer.addField(i, Value.fromInt32(i * 100)); + builder.field(i, Value.fromInt32(i * 100)); break; case 1: - writer.addField(i, Value.fromString("field-value-" + i)); + builder.field(i, Value.fromString("field-value-" + i)); break; case 2: - writer.addField(i, Value.fromFloat64(i * 3.14159)); + builder.field(i, Value.fromFloat64(i * 3.14159)); break; case 3: - writer.addField(i, Value.fromBytes(("bytes-" + i).getBytes())); + builder.field(i, Value.fromBytes(("bytes-" + i).getBytes())); break; } } - - return writer.build(); + + return builder.build(); } - - private ImprintRecord createLargeRecord() throws Exception { - var schemaId = new SchemaId(2, 0xcafebabe); - var writer = new ImprintWriter(schemaId); - - // Create 100 fields with realistic data - for (int i = 1; i <= 100; i++) { - switch (i % 5) { + + private ImprintRecord createTestRecordWithFieldIds(int[] fieldIds) throws Exception { + var builder = ImprintRecord.builder(new SchemaId(1, 0xdeadbeef)); + for (int fieldId : fieldIds) { + switch (fieldId % 4) { case 0: - writer.addField(i, Value.fromInt32(i)); + builder.field(fieldId, Value.fromInt32(fieldId * 100)); break; case 1: - writer.addField(i, Value.fromString("user-name-" + i + "@example.com")); + builder.field(fieldId, Value.fromString("field-value-" + fieldId)); break; case 2: - writer.addField(i, Value.fromString("Some longer descriptive text for field " + i + " that might represent a comment or description")); + builder.field(fieldId, Value.fromFloat64(fieldId * 3.14159)); break; case 3: - writer.addField(i, Value.fromFloat64(i * 2.718281828)); - break; - case 4: - writer.addField(i, Value.fromBytes(String.format("binary-data-%04d", i).getBytes())); + builder.field(fieldId, Value.fromBytes(("bytes-" + fieldId).getBytes())); break; } } + + return builder.build(); + } + + private static class ProjectionPattern { + final String name; + final int[] fields; + + ProjectionPattern(String name, int[] fields) { + this.name = name; + this.fields = fields; + } + } + + private int[] generateRandomFields(Random random, int maxField, int count) { + return random.ints(count, 1, maxField + 1) + .distinct() + .sorted() + .toArray(); + } + + @Test + @Tag("profiling") + void profileBytesToBytesVsObjectMerge() throws Exception { + System.out.println("=== Bytes-to-Bytes vs Object Merge Comparison ==="); + + // Create test records + var record1 = createTestRecordWithFieldIds(new int[]{1, 3, 5, 7, 9, 11, 13, 15}); + var record2 = createTestRecordWithFieldIds(new int[]{2, 4, 6, 8, 10, 12, 14, 16}); + + var record1Bytes = record1.serializeToBuffer(); + var record2Bytes = record2.serializeToBuffer(); + + int iterations = 50_000; + + // Warm up + for (int i = 0; i < 1000; i++) { + record1.merge(record2).serializeToBuffer(); + ImprintOperations.mergeBytes(record1Bytes, record2Bytes); + } + + System.out.printf("Profiling %,d merge operations...%n", iterations); + + // Test object merge + serialize + long startObjectMerge = System.nanoTime(); + for (int i = 0; i < iterations; i++) { + var merged = record1.merge(record2); + var serialized = merged.serializeToBuffer(); + // Consume result to prevent optimization + if (serialized.remaining() == 0) throw new RuntimeException("Empty result"); + } + long objectMergeTime = System.nanoTime() - startObjectMerge; + + // Test bytes merge + long startBytesMerge = System.nanoTime(); + for (int i = 0; i < iterations; i++) { + var merged = ImprintOperations.mergeBytes(record1Bytes, record2Bytes); + // Consume result to prevent optimization + if (merged.remaining() == 0) throw new RuntimeException("Empty result"); + } + long bytesMergeTime = System.nanoTime() - startBytesMerge; + + double objectAvg = (double) objectMergeTime / iterations / 1000.0; // microseconds + double bytesAvg = (double) bytesMergeTime / iterations / 1000.0; // microseconds + double speedup = objectAvg / bytesAvg; + + System.out.printf("Object merge + serialize: %.2f ms (avg: %.1f μs/op)%n", + objectMergeTime / 1_000_000.0, objectAvg); + System.out.printf("Bytes-to-bytes merge: %.2f ms (avg: %.1f μs/op)%n", + bytesMergeTime / 1_000_000.0, bytesAvg); + System.out.printf("Speedup: %.1fx faster%n", speedup); - return writer.build(); + // Assert that bytes approach is faster (should be at least 1.5x) + assertTrue(speedup > 1.0, String.format("Bytes merge should be faster. Got %.1fx speedup", speedup)); } } \ No newline at end of file