From 89642689594aea2fa9e388d8766c9748473552b9 Mon Sep 17 00:00:00 2001 From: Brent Johnson Date: Sun, 1 Jun 2025 13:23:15 -0400 Subject: [PATCH 01/53] Update .gitignore --- .gitignore | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.gitignore b/.gitignore index 3f1edb4..09ea567 100644 --- a/.gitignore +++ b/.gitignore @@ -61,8 +61,4 @@ buildNumber.properties # JAR files (unless they're dependencies) *.jar -!gradle/wrapper/gradle-wrapper.jar !lombok.jar - -# Benchmark Results (keep timestamped results in repo for tracking) -# benchmark-results/ - Commented out to keep results in repo From 434d9d583453074922a3895134963fdfa5448808 Mon Sep 17 00:00:00 2001 From: expanded-for-real Date: Sun, 1 Jun 2025 14:02:51 -0400 Subject: [PATCH 02/53] initial commit for imprint-java --- .../benchmark/ComparisonBenchmark.java | 756 +++--------------- .../benchmark/FieldAccessBenchmark.java | 4 +- .../com/imprint/benchmark/MergeBenchmark.java | 4 +- .../benchmark/SerializationBenchmark.java | 4 +- src/main/java/com/imprint/Constants.java | 4 +- .../java/com/imprint/core/ImprintRecord.java | 500 ++++++------ .../imprint/core/ImprintRecordBuilder.java | 100 ++- .../java/com/imprint/core/ImprintWriter.java | 155 +++- src/main/java/com/imprint/core/SchemaId.java | 2 +- .../java/com/imprint/error/ErrorType.java | 3 +- src/main/java/com/imprint/types/TypeCode.java | 4 +- .../java/com/imprint/types/TypeHandler.java | 314 ++------ src/main/java/com/imprint/types/Value.java | 131 +-- src/main/java/com/imprint/util/VarInt.java | 62 +- .../imprint/ByteBufferIntegrationTest.java | 87 ++ .../java/com/imprint/ComprehensiveTest.java | 208 +++++ .../java/com/imprint/IntegrationTest.java | 712 +++-------------- .../com/imprint/benchmark/ProfilerTest.java | 226 ++++++ .../core/ImprintRecordBuilderTest.java | 234 ++++++ .../com/imprint/core/ImprintRecordTest.java | 76 +- .../com/imprint/types/TypeHandlerTest.java | 125 +-- .../java/com/imprint/types/ValueTest.java | 169 +--- 22 files changed, 1747 insertions(+), 2133 deletions(-) create mode 100644 src/test/java/com/imprint/ByteBufferIntegrationTest.java create mode 100644 src/test/java/com/imprint/ComprehensiveTest.java create mode 100644 src/test/java/com/imprint/benchmark/ProfilerTest.java create mode 100644 src/test/java/com/imprint/core/ImprintRecordBuilderTest.java diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index 6a6a958..1293478 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -4,25 +4,13 @@ import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.flatbuffers.FlatBufferBuilder; import com.imprint.core.ImprintRecord; import com.imprint.core.ImprintWriter; import com.imprint.core.SchemaId; import com.imprint.types.MapKey; import com.imprint.types.Value; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.*; -import org.msgpack.jackson.dataformat.MessagePackFactory; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.infra.Blackhole; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -38,55 +26,37 @@ @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) @Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(1) -@SuppressWarnings("unused") public class ComparisonBenchmark { // Test data private TestRecord testData; - + // Serialized formats - private ByteBuffer imprintBytesBuffer; - private byte[] jacksonJsonBytes; + private ByteBuffer imprintBytes; + private byte[] jacksonBytes; private byte[] kryoBytes; - private byte[] messagePackBytes; - private byte[] avroBytes; - private byte[] protobufBytes; - private ByteBuffer flatbuffersBytes; - + // Library instances - private Schema avroSchema; - private DatumWriter avroWriter; - private DatumReader avroReader; - private ObjectMapper jacksonJsonMapper; + private ObjectMapper jackson; private Kryo kryo; - private ObjectMapper messagePackMapper; @Setup public void setup() throws Exception { testData = createTestRecord(); - + // Initialize libraries - jacksonJsonMapper = new ObjectMapper(); + jackson = new ObjectMapper(); kryo = new Kryo(); kryo.register(TestRecord.class); kryo.register(ArrayList.class); kryo.register(HashMap.class); - kryo.register(Arrays.asList().getClass()); - - // Initialize MessagePack ObjectMapper - messagePackMapper = new ObjectMapper(new MessagePackFactory()); - setupAvro(); - + // Pre-serialize for deserialization benchmarks - imprintBytesBuffer = serializeWithImprint(testData); - jacksonJsonBytes = serializeWithJacksonJson(testData); + imprintBytes = serializeWithImprint(testData); + jacksonBytes = serializeWithJackson(testData); kryoBytes = serializeWithKryo(testData); - messagePackBytes = serializeWithMessagePack(testData); - avroBytes = serializeWithAvro(testData); - protobufBytes = serializeWithProtobuf(testData); - flatbuffersBytes = serializeWithFlatBuffers(testData); } // ===== SERIALIZATION BENCHMARKS ===== @@ -98,60 +68,28 @@ public void serializeImprint(Blackhole bh) throws Exception { } @Benchmark - public void serializeJacksonJson(Blackhole bh) throws Exception { - byte[] result = serializeWithJacksonJson(testData); + public void serializeJackson(Blackhole bh) throws Exception { + byte[] result = serializeWithJackson(testData); bh.consume(result); } @Benchmark - public void serializeKryo(Blackhole bh) { + public void serializeKryo(Blackhole bh) throws Exception { byte[] result = serializeWithKryo(testData); bh.consume(result); } - @Benchmark - public void serializeMessagePack(Blackhole bh) throws Exception { - byte[] result = serializeWithMessagePack(testData); - bh.consume(result); - } - - @Benchmark - public void serializeAvro(Blackhole bh) throws Exception { - byte[] result = serializeWithAvro(testData); - bh.consume(result); - } - - @Benchmark - public void serializeProtobuf(Blackhole bh) { - byte[] result = serializeWithProtobuf(testData); - bh.consume(result); - } + // ===== DESERIALIZATION BENCHMARKS ===== @Benchmark - public void serializeFlatBuffers(Blackhole bh) { - ByteBuffer result = serializeWithFlatBuffers(testData); - bh.consume(result); - } - - // ===== SETUP ONLY ===== - - @Benchmark - public void deserializeSetupImprint(Blackhole bh) throws Exception { - ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); + public void deserializeImprint(Blackhole bh) throws Exception { + ImprintRecord result = ImprintRecord.deserialize(imprintBytes.duplicate()); bh.consume(result); } @Benchmark - public void deserializeSetupFlatBuffers(Blackhole bh) { - TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - bh.consume(result); - } - - // ===== FULL DESERIALIZATION BENCHMARKS ===== - - @Benchmark - public void deserializeJacksonJson(Blackhole bh) throws Exception { - TestRecord result = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); + public void deserializeJackson(Blackhole bh) throws Exception { + TestRecord result = jackson.readValue(jacksonBytes, TestRecord.class); bh.consume(result); } @@ -163,399 +101,147 @@ public void deserializeKryo(Blackhole bh) { bh.consume(result); } - @Benchmark - public void deserializeMessagePack(Blackhole bh) throws Exception { - TestRecord result = messagePackMapper.readValue(messagePackBytes, TestRecord.class); - bh.consume(result); - } - - @Benchmark - public void deserializeAvro(Blackhole bh) throws Exception { - GenericRecord result = deserializeWithAvro(avroBytes); - bh.consume(result); - } - - @Benchmark - public void deserializeProtobuf(Blackhole bh) throws Exception { - TestRecordProto.TestRecord result = TestRecordProto.TestRecord.parseFrom(protobufBytes); - bh.consume(result); - } - - @Benchmark - public void deserializeImprint(Blackhole bh) throws Exception { - ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); - // Access all fields to force full deserialization - result.getInt32(1); // id - result.getString(2); // name - result.getFloat64(3); // price - result.getBoolean(4); // active - result.getString(5); // category - result.getArray(6); // tags - result.getMap(7); // metadata - for (int i = 8; i < 21; i++) { - result.getString(i); // extraData fields - } - - bh.consume(result); - } - - @Benchmark - public void deserializeFlatBuffers(Blackhole bh) { - TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - - // Access all fields - result.id(); - result.name(); - result.price(); - result.active(); - result.category(); - // Access all tags - for (int i = 0; i < result.tagsLength(); i++) { - result.tags(i); - } - // Access all metadata - for (int i = 0; i < result.metadataKeysLength(); i++) { - result.metadataKeys(i); - result.metadataValues(i); - } - // Access all extra data - for (int i = 0; i < result.extraDataLength(); i++) { - result.extraData(i); - } - - bh.consume(result); - } - // ===== FIELD ACCESS BENCHMARKS ===== - // Tests accessing a single field near the end of a record @Benchmark - public void singleFieldAccessImprint(Blackhole bh) throws Exception { - ImprintRecord record = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); - var field15 = record.getString(15); - bh.consume(field15); + public void fieldAccessImprint(Blackhole bh) throws Exception { + ImprintRecord record = ImprintRecord.deserialize(imprintBytes.duplicate()); + + // Access multiple fields without full deserialization + var id = record.getValue(1); + var name = record.getValue(2); + var price = record.getValue(3); + var active = record.getValue(4); + var category = record.getValue(5); + + bh.consume(id); + bh.consume(name); + bh.consume(price); + bh.consume(active); + bh.consume(category); } @Benchmark - public void singleFieldAccessJacksonJson(Blackhole bh) throws Exception { - TestRecord record = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); - bh.consume(record.extraData.get(4)); + public void fieldAccessJackson(Blackhole bh) throws Exception { + // Jackson requires full deserialization to access fields + TestRecord record = jackson.readValue(jacksonBytes, TestRecord.class); + + bh.consume(record.id); + bh.consume(record.name); + bh.consume(record.price); + bh.consume(record.active); + bh.consume(record.category); } @Benchmark - public void singleFieldAccessKryo(Blackhole bh) { + public void fieldAccessKryo(Blackhole bh) { + // Kryo requires full deserialization to access fields Input input = new Input(new ByteArrayInputStream(kryoBytes)); TestRecord record = kryo.readObject(input, TestRecord.class); input.close(); - bh.consume(record.extraData.get(4)); - } - - @Benchmark - public void singleFieldAccessMessagePack(Blackhole bh) throws Exception { - TestRecord record = messagePackMapper.readValue(messagePackBytes, TestRecord.class); - bh.consume(record.extraData.get(4)); - } - - @Benchmark - public void singleFieldAccessAvro(Blackhole bh) throws Exception { - GenericRecord record = deserializeWithAvro(avroBytes); - bh.consume(record.get("extraData4")); - } - - @Benchmark - public void singleFieldAccessProtobuf(Blackhole bh) throws Exception { - TestRecordProto.TestRecord record = TestRecordProto.TestRecord.parseFrom(protobufBytes); - bh.consume(record.getExtraData(4)); - } - - @Benchmark - public void singleFieldAccessFlatBuffers(Blackhole bh) { - TestRecordFB record = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - bh.consume(record.extraData(4)); + + bh.consume(record.id); + bh.consume(record.name); + bh.consume(record.price); + bh.consume(record.active); + bh.consume(record.category); } // ===== SIZE COMPARISON ===== @Benchmark - public void measureImprintSize(Blackhole bh) { - bh.consume(imprintBytesBuffer.remaining()); + public void measureImprintSize(Blackhole bh) throws Exception { + ByteBuffer serialized = serializeWithImprint(testData); + bh.consume(serialized.remaining()); } @Benchmark - public void measureJacksonJsonSize(Blackhole bh) { - bh.consume(jacksonJsonBytes.length); + public void measureJacksonSize(Blackhole bh) throws Exception { + byte[] serialized = serializeWithJackson(testData); + bh.consume(serialized.length); } @Benchmark - public void measureKryoSize(Blackhole bh) { - bh.consume(kryoBytes.length); - } - - @Benchmark - public void measureMessagePackSize(Blackhole bh) { - bh.consume(messagePackBytes.length); - } - - @Benchmark - public void measureAvroSize(Blackhole bh) { - bh.consume(avroBytes.length); - } - - @Benchmark - public void measureProtobufSize(Blackhole bh) { - bh.consume(protobufBytes.length); - } - - @Benchmark - public void measureFlatBuffersSize(Blackhole bh) { - bh.consume(flatbuffersBytes.remaining()); + public void measureKryoSize(Blackhole bh) throws Exception { + byte[] serialized = serializeWithKryo(testData); + bh.consume(serialized.length); } // ===== MERGE SIMULATION BENCHMARKS ===== - //@Benchmark + @Benchmark public void mergeImprint(Blackhole bh) throws Exception { - var record1Buffer = imprintBytesBuffer.duplicate(); - var record2Data = createTestRecord2(); - var record2Buffer = serializeWithImprint(record2Data); - - var deserialized1 = ImprintRecord.deserialize(record1Buffer); - var deserialized2 = ImprintRecord.deserialize(record2Buffer); + // Simulate merge with Imprint (O(1) with proper API) + var record1 = serializeWithImprint(testData); + var record2 = serializeWithImprint(createTestRecord2()); + + // Current simulation - will be O(1) with actual merge API + var deserialized1 = ImprintRecord.deserialize(record1); + var deserialized2 = ImprintRecord.deserialize(record2); var merged = simulateMerge(deserialized1, deserialized2); - + bh.consume(merged); } - //@Benchmark - public void mergeJacksonJson(Blackhole bh) throws Exception { - var record1 = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithJacksonJson(record2Data); - var record2 = jacksonJsonMapper.readValue(record2Bytes, TestRecord.class); - - var mergedPojo = mergeTestRecords(record1, record2); - byte[] result = jacksonJsonMapper.writeValueAsBytes(mergedPojo); + @Benchmark + public void mergeJackson(Blackhole bh) throws Exception { + // Jackson merge requires full deserialization + merge + serialization + var record1 = jackson.readValue(jacksonBytes, TestRecord.class); + var record2 = jackson.readValue(serializeWithJackson(createTestRecord2()), TestRecord.class); + + var merged = mergeTestRecords(record1, record2); + byte[] result = jackson.writeValueAsBytes(merged); + bh.consume(result); } - //@Benchmark - public void mergeKryo(Blackhole bh) { + @Benchmark + public void mergeKryo(Blackhole bh) throws Exception { + // Kryo merge requires full deserialization + merge + serialization Input input1 = new Input(new ByteArrayInputStream(kryoBytes)); var record1 = kryo.readObject(input1, TestRecord.class); input1.close(); - - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithKryo(record2Data); - Input input2 = new Input(new ByteArrayInputStream(record2Bytes)); + + Input input2 = new Input(new ByteArrayInputStream(serializeWithKryo(createTestRecord2()))); var record2 = kryo.readObject(input2, TestRecord.class); input2.close(); - - var mergedPojo = mergeTestRecords(record1, record2); - byte[] result = serializeWithKryo(mergedPojo); - bh.consume(result); - } - - //@Benchmark - public void mergeMessagePack(Blackhole bh) throws Exception { - var record1 = messagePackMapper.readValue(messagePackBytes, TestRecord.class); - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithMessagePack(record2Data); - var record2 = messagePackMapper.readValue(record2Bytes, TestRecord.class); - - var mergedPojo = mergeTestRecords(record1, record2); - byte[] result = messagePackMapper.writeValueAsBytes(mergedPojo); + + var merged = mergeTestRecords(record1, record2); + byte[] result = serializeWithKryo(merged); + bh.consume(result); } - //@Benchmark - public void mergeAvro(Blackhole bh) throws Exception { - var record1 = deserializeWithAvro(avroBytes); - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithAvro(record2Data); - var record2 = deserializeWithAvro(record2Bytes); - - var merged = mergeAvroRecords(record1, record2); - byte[] result = serializeAvroRecord(merged); - bh.consume(result); - } - - //@Benchmark - public void mergeProtobuf(Blackhole bh) throws Exception { - var record1 = TestRecordProto.TestRecord.parseFrom(protobufBytes); - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithProtobuf(record2Data); - var record2 = TestRecordProto.TestRecord.parseFrom(record2Bytes); - - var merged = mergeProtobufRecords(record1, record2); - byte[] result = merged.toByteArray(); - bh.consume(result); - } - - //@Benchmark - public void mergeFlatBuffers(Blackhole bh) { - var record1 = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - var record2Data = createTestRecord2(); - var record2Buffer = serializeWithFlatBuffers(record2Data); - var record2 = TestRecordFB.getRootAsTestRecordFB(record2Buffer); - - var merged = mergeFlatBuffersRecords(record1, record2); - bh.consume(merged); - } - - // ===== MAIN METHOD TO RUN BENCHMARKS ===== - - public static void main(String[] args) throws RunnerException { - runFieldAccessBenchmarks(); - // Or, uncomment specific runner methods to execute subsets: - // runSerializationBenchmarks(); - // runDeserializationBenchmarks(); - // runFieldAccessBenchmarks(); - // runSizeComparisonBenchmarks(); - // runMergeBenchmarks(); - // runMessagePackBenchmarks(); - } - - public static void runAll() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName()) - .build(); - new Runner(opt).run(); - } - - public static void runSerializationBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".serialize.*") - .build(); - new Runner(opt).run(); - } - - public static void runDeserializationBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".deserialize.*") - .build(); - new Runner(opt).run(); - } - - public static void runFieldAccessBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".singleFieldAccess.*") - .build(); - new Runner(opt).run(); - } - - public static void runSizeComparisonBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".measure.*") - .build(); - new Runner(opt).run(); - } - - public static void runMergeBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".merge.*") - .build(); - new Runner(opt).run(); - } - - public static void runMessagePackBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".*MessagePack.*") - .build(); - new Runner(opt).run(); - } - - public static void runAvroBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".*Avro.*") - .build(); - new Runner(opt).run(); - } - - public static void runProtobufBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".*Protobuf.*") - .build(); - new Runner(opt).run(); - } - - public static void runFlatBuffersBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".*FlatBuffers.*") - .build(); - new Runner(opt).run(); - } - // ===== HELPER METHODS ===== - private void setupAvro() { - String schemaJson = "{\n" + - " \"type\": \"record\",\n" + - " \"name\": \"TestRecord\",\n" + - " \"fields\": [\n" + - " {\"name\": \"id\", \"type\": \"int\"},\n" + - " {\"name\": \"name\", \"type\": \"string\"},\n" + - " {\"name\": \"price\", \"type\": \"double\"},\n" + - " {\"name\": \"active\", \"type\": \"boolean\"},\n" + - " {\"name\": \"category\", \"type\": \"string\"},\n" + - " {\"name\": \"tags\", \"type\": {\"type\": \"array\", \"items\": \"string\"}},\n" + - " {\"name\": \"metadata\", \"type\": {\"type\": \"map\", \"values\": \"string\"}},\n" + - " {\"name\": \"extraData0\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData1\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData2\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData3\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData4\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData5\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData6\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData7\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData8\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData9\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData10\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData11\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData12\", \"type\": \"string\"}\n" + - " ]\n" + - "}"; - - avroSchema = new Schema.Parser().parse(schemaJson); - avroWriter = new GenericDatumWriter<>(avroSchema); - avroReader = new GenericDatumReader<>(avroSchema); - } - private ByteBuffer serializeWithImprint(TestRecord data) throws Exception { var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); - + writer.addField(1, Value.fromInt32(data.id)); writer.addField(2, Value.fromString(data.name)); writer.addField(3, Value.fromFloat64(data.price)); writer.addField(4, Value.fromBoolean(data.active)); writer.addField(5, Value.fromString(data.category)); - + + // Convert tags list var tagValues = new ArrayList(); - if (data.tags != null) { - for (String tag : data.tags) { - tagValues.add(Value.fromString(tag)); - } + for (String tag : data.tags) { + tagValues.add(Value.fromString(tag)); } writer.addField(6, Value.fromArray(tagValues)); - + + // Convert metadata map var metadataMap = new HashMap(); - if (data.metadata != null) { - for (var entry : data.metadata.entrySet()) { - metadataMap.put(MapKey.fromString(entry.getKey()), Value.fromString(entry.getValue())); - } + for (var entry : data.metadata.entrySet()) { + metadataMap.put(MapKey.fromString(entry.getKey()), Value.fromString(entry.getValue())); } writer.addField(7, Value.fromMap(metadataMap)); - - if (data.extraData != null) { - for (int i = 0; i < data.extraData.size(); i++) { - writer.addField(8 + i, Value.fromString(data.extraData.get(i))); - } - } - + return writer.build().serializeToBuffer(); } - private byte[] serializeWithJacksonJson(TestRecord data) throws Exception { - return jacksonJsonMapper.writeValueAsBytes(data); + private byte[] serializeWithJackson(TestRecord data) throws Exception { + return jackson.writeValueAsBytes(data); } private byte[] serializeWithKryo(TestRecord data) { @@ -566,117 +252,14 @@ private byte[] serializeWithKryo(TestRecord data) { return baos.toByteArray(); } - private byte[] serializeWithMessagePack(TestRecord data) throws Exception { - return messagePackMapper.writeValueAsBytes(data); - } - - private byte[] serializeWithAvro(TestRecord data) throws Exception { - GenericRecord record = new GenericData.Record(avroSchema); - record.put("id", data.id); - record.put("name", data.name); - record.put("price", data.price); - record.put("active", data.active); - record.put("category", data.category); - record.put("tags", data.tags); - record.put("metadata", data.metadata); - - for (int i = 0; i < data.extraData.size(); i++) { - record.put("extraData" + i, data.extraData.get(i)); - } - - return serializeAvroRecord(record); - } - - private byte[] serializeAvroRecord(GenericRecord record) throws Exception { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - Encoder encoder = EncoderFactory.get().binaryEncoder(baos, null); - avroWriter.write(record, encoder); - encoder.flush(); - return baos.toByteArray(); - } - - private GenericRecord deserializeWithAvro(byte[] data) throws Exception { - Decoder decoder = DecoderFactory.get().binaryDecoder(data, null); - return avroReader.read(null, decoder); - } - - private byte[] serializeWithProtobuf(TestRecord data) { - var builder = TestRecordProto.TestRecord.newBuilder() - .setId(data.id) - .setName(data.name) - .setPrice(data.price) - .setActive(data.active) - .setCategory(data.category) - .addAllTags(data.tags) - .putAllMetadata(data.metadata); - - for (String extraData : data.extraData) { - builder.addExtraData(extraData); - } - - return builder.build().toByteArray(); - } - - private ByteBuffer serializeWithFlatBuffers(TestRecord data) { - FlatBufferBuilder builder = new FlatBufferBuilder(1024); - - // Create strings (must be created before the object that uses them) - int nameOffset = builder.createString(data.name); - int categoryOffset = builder.createString(data.category); - - // Create tags array - int[] tagOffsets = new int[data.tags.size()]; - for (int i = 0; i < data.tags.size(); i++) { - tagOffsets[i] = builder.createString(data.tags.get(i)); - } - int tagsOffset = TestRecordFB.createTagsVector(builder, tagOffsets); - - // Create metadata (as parallel arrays for keys and values) - String[] metadataKeys = data.metadata.keySet().toArray(new String[0]); - String[] metadataValues = new String[metadataKeys.length]; - int[] keyOffsets = new int[metadataKeys.length]; - int[] valueOffsets = new int[metadataKeys.length]; - - for (int i = 0; i < metadataKeys.length; i++) { - metadataValues[i] = data.metadata.get(metadataKeys[i]); - keyOffsets[i] = builder.createString(metadataKeys[i]); - valueOffsets[i] = builder.createString(metadataValues[i]); - } - int metadataKeysOffset = TestRecordFB.createMetadataKeysVector(builder, keyOffsets); - int metadataValuesOffset = TestRecordFB.createMetadataValuesVector(builder, valueOffsets); - - // Create extra data array - int[] extraDataOffsets = new int[data.extraData.size()]; - for (int i = 0; i < data.extraData.size(); i++) { - extraDataOffsets[i] = builder.createString(data.extraData.get(i)); - } - int extraDataOffset = TestRecordFB.createExtraDataVector(builder, extraDataOffsets); - - // Create the main object - TestRecordFB.startTestRecordFB(builder); - TestRecordFB.addId(builder, data.id); - TestRecordFB.addName(builder, nameOffset); - TestRecordFB.addPrice(builder, data.price); - TestRecordFB.addActive(builder, data.active); - TestRecordFB.addCategory(builder, categoryOffset); - TestRecordFB.addTags(builder, tagsOffset); - TestRecordFB.addMetadataKeys(builder, metadataKeysOffset); - TestRecordFB.addMetadataValues(builder, metadataValuesOffset); - TestRecordFB.addExtraData(builder, extraDataOffset); - int recordOffset = TestRecordFB.endTestRecordFB(builder); - - // Finish and return - builder.finish(recordOffset); - return builder.dataBuffer().slice(); - } - private ImprintRecord simulateMerge(ImprintRecord first, ImprintRecord second) throws Exception { var writer = new ImprintWriter(first.getHeader().getSchemaId()); var usedFieldIds = new HashSet(); - + + // Copy fields from first record (takes precedence) copyFieldsToWriter(first, writer, usedFieldIds); copyFieldsToWriter(second, writer, usedFieldIds); - + return writer.build(); } @@ -685,8 +268,8 @@ private void copyFieldsToWriter(ImprintRecord record, ImprintWriter writer, Set< int fieldId = entry.getId(); if (!usedFieldIds.contains(fieldId)) { var value = record.getValue(fieldId); - if (value != null) { - writer.addField(fieldId, value); + if (value.isPresent()) { + writer.addField(fieldId, value.get()); usedFieldIds.add(fieldId); } } @@ -694,121 +277,23 @@ private void copyFieldsToWriter(ImprintRecord record, ImprintWriter writer, Set< } private TestRecord mergeTestRecords(TestRecord first, TestRecord second) { + // Simple merge logic - first record takes precedence var merged = new TestRecord(); merged.id = first.id; merged.name = first.name != null ? first.name : second.name; merged.price = first.price != 0.0 ? first.price : second.price; merged.active = first.active; merged.category = first.category != null ? first.category : second.category; - + merged.tags = new ArrayList<>(first.tags); merged.tags.addAll(second.tags); - + merged.metadata = new HashMap<>(first.metadata); merged.metadata.putAll(second.metadata); - + return merged; } - private GenericRecord mergeAvroRecords(GenericRecord first, GenericRecord second) { - GenericRecord merged = new GenericData.Record(avroSchema); - - // Copy all fields from first record - for (Schema.Field field : avroSchema.getFields()) { - merged.put(field.name(), first.get(field.name())); - } - - // Override with non-null values from second record - for (Schema.Field field : avroSchema.getFields()) { - Object secondValue = second.get(field.name()); - if (secondValue != null && !secondValue.toString().isEmpty()) { - merged.put(field.name(), secondValue); - } - } - - return merged; - } - - private TestRecordProto.TestRecord mergeProtobufRecords(TestRecordProto.TestRecord first, TestRecordProto.TestRecord second) { - return TestRecordProto.TestRecord.newBuilder() - .mergeFrom(first) - .mergeFrom(second) - .build(); - } - - private ByteBuffer mergeFlatBuffersRecords(TestRecordFB first, TestRecordFB second) { - FlatBufferBuilder builder = new FlatBufferBuilder(1024); - - // Use second record's values if they exist, otherwise first record's values - String name = second.name() != null && !second.name().isEmpty() ? second.name() : first.name(); - String category = second.category() != null && !second.category().isEmpty() ? second.category() : first.category(); - double price = second.price() != 0.0 ? second.price() : first.price(); - boolean active = second.active(); // Use second's boolean value - int id = first.id(); // Keep first record's ID - - // Create merged strings - int nameOffset = builder.createString(name); - int categoryOffset = builder.createString(category); - - // Merge tags (combine both arrays) - List mergedTags = new ArrayList<>(); - for (int i = 0; i < first.tagsLength(); i++) { - mergedTags.add(first.tags(i)); - } - for (int i = 0; i < second.tagsLength(); i++) { - mergedTags.add(second.tags(i)); - } - - int[] tagOffsets = new int[mergedTags.size()]; - for (int i = 0; i < mergedTags.size(); i++) { - tagOffsets[i] = builder.createString(mergedTags.get(i)); - } - int tagsOffset = TestRecordFB.createTagsVector(builder, tagOffsets); - - // Merge metadata (second overwrites first) - Map mergedMetadata = new HashMap<>(); - for (int i = 0; i < first.metadataKeysLength(); i++) { - mergedMetadata.put(first.metadataKeys(i), first.metadataValues(i)); - } - for (int i = 0; i < second.metadataKeysLength(); i++) { - mergedMetadata.put(second.metadataKeys(i), second.metadataValues(i)); - } - - String[] metadataKeys = mergedMetadata.keySet().toArray(new String[0]); - int[] keyOffsets = new int[metadataKeys.length]; - int[] valueOffsets = new int[metadataKeys.length]; - - for (int i = 0; i < metadataKeys.length; i++) { - keyOffsets[i] = builder.createString(metadataKeys[i]); - valueOffsets[i] = builder.createString(mergedMetadata.get(metadataKeys[i])); - } - int metadataKeysOffset = TestRecordFB.createMetadataKeysVector(builder, keyOffsets); - int metadataValuesOffset = TestRecordFB.createMetadataValuesVector(builder, valueOffsets); - - // Use first record's extra data (or could merge both) - int[] extraDataOffsets = new int[first.extraDataLength()]; - for (int i = 0; i < first.extraDataLength(); i++) { - extraDataOffsets[i] = builder.createString(first.extraData(i)); - } - int extraDataOffset = TestRecordFB.createExtraDataVector(builder, extraDataOffsets); - - // Create the merged object - TestRecordFB.startTestRecordFB(builder); - TestRecordFB.addId(builder, id); - TestRecordFB.addName(builder, nameOffset); - TestRecordFB.addPrice(builder, price); - TestRecordFB.addActive(builder, active); - TestRecordFB.addCategory(builder, categoryOffset); - TestRecordFB.addTags(builder, tagsOffset); - TestRecordFB.addMetadataKeys(builder, metadataKeysOffset); - TestRecordFB.addMetadataValues(builder, metadataValuesOffset); - TestRecordFB.addExtraData(builder, extraDataOffset); - int recordOffset = TestRecordFB.endTestRecordFB(builder); - - builder.finish(recordOffset); - return builder.dataBuffer().slice(); - } - private TestRecord createTestRecord() { var record = new TestRecord(); record.id = 12345; @@ -816,19 +301,14 @@ var record = new TestRecord(); record.price = 99.99; record.active = true; record.category = "Electronics"; - + record.tags = Arrays.asList("popular", "trending", "bestseller"); - + record.metadata = new HashMap<>(); record.metadata.put("manufacturer", "TechCorp"); record.metadata.put("model", "TC-2024"); record.metadata.put("warranty", "2 years"); - - record.extraData = new ArrayList<>(); - for (int i = 0; i < 13; i++) { - record.extraData.add("extraField" + i + "_value_" + (1000 + i)); - } - + return record; } @@ -839,18 +319,13 @@ var record = new TestRecord(); record.price = 149.99; record.active = false; record.category = "Software"; - + record.tags = Arrays.asList("new", "premium"); - + record.metadata = new HashMap<>(); record.metadata.put("vendor", "SoftCorp"); record.metadata.put("version", "2.1"); - - record.extraData = new ArrayList<>(); - for (int i = 0; i < 13; i++) { - record.extraData.add("extraField" + i + "_value2_" + (2000 + i)); - } - + return record; } @@ -863,8 +338,7 @@ public static class TestRecord { public String category; public List tags = new ArrayList<>(); public Map metadata = new HashMap<>(); - public List extraData = new ArrayList<>(); // Fields 8-20 for large record test - + public TestRecord() {} // Required for deserialization } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java b/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java index 1ead21f..f3abb7e 100644 --- a/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java @@ -200,9 +200,7 @@ private ImprintRecord simulateProject(ImprintRecord source, int[] fieldIds) thro for (int fieldId : fieldIds) { var value = source.getValue(fieldId); - if (value != null) { - writer.addField(fieldId, value); - } + value.ifPresent(value1 -> writer.addField(fieldId, value1)); } return writer.build(); diff --git a/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java b/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java index f93092a..5c52908 100644 --- a/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java @@ -100,8 +100,8 @@ private void copyFieldsToWriter(ImprintRecord record, ImprintWriter writer, Set< int fieldId = entry.getId(); if (!usedFieldIds.contains(fieldId)) { var value = record.getValue(fieldId); - if (value != null) { - writer.addField(fieldId, value); + if (value.isPresent()) { + writer.addField(fieldId, value.get()); usedFieldIds.add(fieldId); } } diff --git a/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java b/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java index 3275843..2544b88 100644 --- a/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java @@ -119,11 +119,11 @@ private ImprintRecord createMediumRecord() throws Exception { ); writer.addField(6, Value.fromArray(tags)); - // Add map field (all string values for consistency) + // Add map field var metadata = new HashMap(); metadata.put(MapKey.fromString("manufacturer"), Value.fromString("TechCorp")); metadata.put(MapKey.fromString("model"), Value.fromString("TC-2024")); - metadata.put(MapKey.fromString("year"), Value.fromString("2024")); + metadata.put(MapKey.fromString("year"), Value.fromInt32(2024)); writer.addField(7, Value.fromMap(metadata)); // Add more fields for medium size diff --git a/src/main/java/com/imprint/Constants.java b/src/main/java/com/imprint/Constants.java index 3c84a28..78b91a0 100644 --- a/src/main/java/com/imprint/Constants.java +++ b/src/main/java/com/imprint/Constants.java @@ -2,7 +2,9 @@ public final class Constants { public static final byte MAGIC = 0x49; - public static final byte VERSION = 0x01; + public static final byte VERSION = 0x02; public static final int HEADER_BYTES = 15; public static final int DIR_ENTRY_BYTES = 7; + + private Constants() {} } diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index e720df5..b7ed224 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -1,3 +1,4 @@ + package com.imprint.core; import com.imprint.Constants; @@ -11,247 +12,234 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.List; -import java.util.Map; -import java.util.Objects; +import java.util.*; /** - * An Imprint record containing a header and buffer management. - * Delegates all buffer operations to ImprintBuffers for cleaner separation. + * An Imprint record containing a header, optional field directory, and payload. + * Uses ByteBuffer for zero-copy operations to achieve low latency. */ @Getter public final class ImprintRecord { private final Header header; - private final ImprintBuffers buffers; - - /** - * Creates a record from deserialized components. - */ - private ImprintRecord(Header header, ImprintBuffers buffers) { - this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.buffers = Objects.requireNonNull(buffers, "Buffers cannot be null"); - } - - /** - * Creates a record from pre-parsed directory (used by ImprintWriter). - */ - ImprintRecord(Header header, List directory, ByteBuffer payload) { + private final List directory; + private final ByteBuffer payload; // Read-only view for zero-copy + + public ImprintRecord(Header header, List directory, ByteBuffer payload) { this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.buffers = new ImprintBuffers(directory, payload); + this.directory = List.copyOf(Objects.requireNonNull(directory, "Directory cannot be null")); + this.payload = payload.asReadOnlyBuffer(); // Zero-copy read-only view } - // ========== FIELD ACCESS METHODS ========== - /** * Get a value by field ID, deserializing it on demand. - * Returns null if the field is not found. */ - public Value getValue(int fieldId) throws ImprintException { - var entry = buffers.findDirectoryEntry(fieldId); - if (entry == null) - return null; - - var fieldBuffer = buffers.getFieldBuffer(fieldId); - if (fieldBuffer == null) - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Failed to get buffer for field " + fieldId); - - return deserializeValue(entry.getTypeCode(), fieldBuffer); - } - - /** - * Get raw bytes for a field without deserializing. - */ - public ByteBuffer getRawBytes(int fieldId) { - try { - return buffers.getFieldBuffer(fieldId); - } catch (ImprintException e) { - return null; - } + public Optional getValue(int fieldId) throws ImprintException { + // Binary search for the field ID without allocation + int index = findDirectoryIndex(fieldId); + if (index < 0) return Optional.empty(); + + var entry = directory.get(index); + int startOffset = entry.getOffset(); + int endOffset = (index + 1 < directory.size()) ? + directory.get(index + 1).getOffset() : payload.remaining(); + + var valueBytes = payload.duplicate(); + valueBytes.position(startOffset).limit(endOffset); + var value = deserializeValue(entry.getTypeCode(), valueBytes.slice()); + return Optional.of(value); } - + /** - * Project a subset of fields from this record. - * - * @param fieldIds Array of field IDs to include in the projection - * @return New ImprintRecord containing only the requested fields + * Get the raw bytes for a field without deserializing. + * Returns a zero-copy ByteBuffer view. */ - public ImprintRecord project(int... fieldIds) { - return ImprintOperations.project(this, fieldIds); - } - - /** - * Merge another record into this one. - * For duplicate fields, this record's values take precedence. - * - * @param other The record to merge with this one - * @return New ImprintRecord containing merged fields - * @throws ImprintException if merge fails - */ - public ImprintRecord merge(ImprintRecord other) throws ImprintException { - return ImprintOperations.merge(this, other); - } - - /** - * Get the directory (parsing it if necessary). - */ - public List getDirectory() { - return buffers.getDirectory(); - } - - // ========== TYPED GETTERS ========== - - public boolean getBoolean(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.BOOL, Value.BoolValue.class, "boolean").getValue(); - } - - public int getInt32(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.INT32, Value.Int32Value.class, "int32").getValue(); - } - - public long getInt64(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.INT64, Value.Int64Value.class, "int64").getValue(); - } - - public float getFloat32(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.FLOAT32, Value.Float32Value.class, "float32").getValue(); - } - - public double getFloat64(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.FLOAT64, Value.Float64Value.class, "float64").getValue(); - } - - public String getString(int fieldId) throws ImprintException { - var value = getValidatedValue(fieldId, "STRING"); - if (value instanceof Value.StringValue) - return ((Value.StringValue) value).getValue(); - if (value instanceof Value.StringBufferValue) - return ((Value.StringBufferValue) value).getValue(); - throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is not a STRING"); + public Optional getRawBytes(int fieldId) { + int index = findDirectoryIndex(fieldId); + if (index < 0) return Optional.empty(); + + var entry = directory.get(index); + int startOffset = entry.getOffset(); + int endOffset = (index + 1 < directory.size()) ? + directory.get(index + 1).getOffset() : payload.remaining(); + + var fieldBuffer = payload.duplicate(); + fieldBuffer.position(startOffset).limit(endOffset); + return Optional.of(fieldBuffer.slice().asReadOnlyBuffer()); } - - public byte[] getBytes(int fieldId) throws ImprintException { - var value = getValidatedValue(fieldId, "BYTES"); - if (value instanceof Value.BytesValue) - return ((Value.BytesValue) value).getValue(); - if (value instanceof Value.BytesBufferValue) - return ((Value.BytesBufferValue) value).getValue(); - throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is not BYTES"); - } - - public List getArray(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.ARRAY, Value.ArrayValue.class, "ARRAY").getValue(); - } - - public Map getMap(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.MAP, Value.MapValue.class, "MAP").getValue(); - } - - public ImprintRecord getRow(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); - } - - // ========== SERIALIZATION ========== - + /** - * Serialize this record to a ByteBuffer. + * Serialize this record to a ByteBuffer (zero-copy when possible). */ public ByteBuffer serializeToBuffer() { var buffer = ByteBuffer.allocate(estimateSerializedSize()); buffer.order(ByteOrder.LITTLE_ENDIAN); - + // Write header serializeHeader(buffer); - - // Write directory - var directoryBuffer = buffers.serializeDirectory(); - buffer.put(directoryBuffer); - - // Write payload - var payload = buffers.getPayload(); + + // Write directory (always present) + VarInt.encode(directory.size(), buffer); + for (var entry : directory) { + serializeDirectoryEntry(entry, buffer); + } + + // Write payload (shallow copy only) var payloadCopy = payload.duplicate(); buffer.put(payloadCopy); - + + // Return read-only view of used portion buffer.flip(); - return buffer; + return buffer.asReadOnlyBuffer(); } - - public int estimateSerializedSize() { - int size = Constants.HEADER_BYTES; // header - size += buffers.serializeDirectory().remaining(); // directory - size += buffers.getPayload().remaining(); // payload - return size; - } - - // ========== STATIC FACTORY METHODS ========== - + + /** + * Create a fluent builder for constructing ImprintRecord instances. + * + * @param schemaId the schema identifier for this record + * @return a new builder instance + */ public static ImprintRecordBuilder builder(SchemaId schemaId) { return new ImprintRecordBuilder(schemaId); } - + + /** + * Create a fluent builder for constructing ImprintRecord instances. + * + * @param fieldspaceId the fieldspace identifier + * @param schemaHash the schema hash + * @return a new builder instance + */ public static ImprintRecordBuilder builder(int fieldspaceId, int schemaHash) { return new ImprintRecordBuilder(new SchemaId(fieldspaceId, schemaHash)); } - + + /** + * Deserialize a record from bytes. + */ public static ImprintRecord deserialize(byte[] bytes) throws ImprintException { return deserialize(ByteBuffer.wrap(bytes)); } - + + /** + * Deserialize a record from a ByteBuffer (zero-copy when possible). + */ public static ImprintRecord deserialize(ByteBuffer buffer) throws ImprintException { buffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); - + // Read header var header = deserializeHeader(buffer); - - // Calculate directory size - int directoryStartPos = buffer.position(); - var countResult = VarInt.decode(buffer); + + // Read directory (always present) + var directory = new ArrayList(); + VarInt.DecodeResult countResult = VarInt.decode(buffer); int directoryCount = countResult.getValue(); - int directorySize = countResult.getBytesRead() + (directoryCount * Constants.DIR_ENTRY_BYTES); - - // Create directory buffer - buffer.position(directoryStartPos); - var directoryBuffer = buffer.slice(); - directoryBuffer.limit(directorySize); - - // Advance past directory - buffer.position(buffer.position() + directorySize); - - // Create payload buffer + + for (int i = 0; i < directoryCount; i++) { + directory.add(deserializeDirectoryEntry(buffer)); + } + + // Read payload as ByteBuffer slice for zero-copy var payload = buffer.slice(); payload.limit(header.getPayloadSize()); - - // Create buffers wrapper - var buffers = new ImprintBuffers(directoryBuffer, payload); - - return new ImprintRecord(header, buffers); + buffer.position(buffer.position() + header.getPayloadSize()); + + return new ImprintRecord(header, directory, payload); } - - // ========== PRIVATE HELPER METHODS ========== - + /** - * Get and validate a value exists and is not null. + * Binary search for field ID in directory without object allocation. + * Returns the index of the field if found, or a negative value if not found. + * + * @param fieldId the field ID to search for + * @return index if found, or negative insertion point - 1 if not found */ - private Value getValidatedValue(int fieldId, String typeName) throws ImprintException { - var value = getValue(fieldId); - if (value == null) - throw new ImprintException(ErrorType.FIELD_NOT_FOUND, "Field " + fieldId + " not found"); - if (value.getTypeCode() == TypeCode.NULL) - throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is NULL, cannot retrieve as " + typeName); - return value; + private int findDirectoryIndex(int fieldId) { + int low = 0; + int high = directory.size() - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; // unsigned right shift to avoid overflow + int midFieldId = directory.get(mid).getId(); + + if (midFieldId < fieldId) { + low = mid + 1; + } else if (midFieldId > fieldId) { + high = mid - 1; + } else { + return mid; // field found + } + } + return -(low + 1); // field not found, return insertion point } - - private T getTypedValueOrThrow(int fieldId, TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) - throws ImprintException { - var value = getValidatedValue(fieldId, expectedTypeName); - if (value.getTypeCode() == expectedTypeCode && expectedValueClass.isInstance(value)) - return expectedValueClass.cast(value); - throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected " + expectedTypeName); + + private int estimateSerializedSize() { + int size = Constants.HEADER_BYTES; // header + size += VarInt.encodedLength(directory.size()); // directory count + size += directory.size() * Constants.DIR_ENTRY_BYTES; // directory entries + size += payload.remaining(); // payload + return size; } - + + private void serializeHeader(ByteBuffer buffer) { + buffer.put(Constants.MAGIC); + buffer.put(Constants.VERSION); + buffer.put(header.getFlags().getValue()); + buffer.putInt(header.getSchemaId().getFieldspaceId()); + buffer.putInt(header.getSchemaId().getSchemaHash()); + buffer.putInt(header.getPayloadSize()); + } + + private static Header deserializeHeader(ByteBuffer buffer) throws ImprintException { + if (buffer.remaining() < Constants.HEADER_BYTES) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for header"); + } + + byte magic = buffer.get(); + if (magic != Constants.MAGIC) { + throw new ImprintException(ErrorType.INVALID_MAGIC, + "Invalid magic byte: expected 0x" + Integer.toHexString(Constants.MAGIC) + + ", got 0x" + Integer.toHexString(magic & 0xFF)); + } + + byte version = buffer.get(); + if (version != Constants.VERSION) { + throw new ImprintException(ErrorType.UNSUPPORTED_VERSION, + "Unsupported version: " + version); + } + + var flags = new Flags(buffer.get()); + int fieldspaceId = buffer.getInt(); + int schemaHash = buffer.getInt(); + int payloadSize = buffer.getInt(); + + return new Header(flags, new SchemaId(fieldspaceId, schemaHash), payloadSize); + } + + private void serializeDirectoryEntry(DirectoryEntry entry, ByteBuffer buffer) { + buffer.putShort(entry.getId()); + buffer.put(entry.getTypeCode().getCode()); + buffer.putInt(entry.getOffset()); + } + + private static DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { + if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for directory entry"); + } + + short id = buffer.getShort(); + var typeCode = TypeCode.fromByte(buffer.get()); + int offset = buffer.getInt(); + + return new DirectoryEntry(id, typeCode, offset); + } + private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws ImprintException { - var valueBuffer = buffer.duplicate(); - valueBuffer.order(ByteOrder.LITTLE_ENDIAN); - + // Buffer is already positioned and limited correctly + buffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + // Use TypeHandler for simple types switch (typeCode) { case NULL: case BOOL: @@ -261,53 +249,117 @@ private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws Impr case FLOAT64: case BYTES: case STRING: + return typeCode.getHandler().deserialize(buffer); + case ARRAY: + return deserializeArray(buffer); + case MAP: - return typeCode.getHandler().deserialize(valueBuffer); + return deserializeMap(buffer); + case ROW: - var nestedRecord = deserialize(valueBuffer); + var remainingBuffer = buffer.slice(); + var nestedRecord = deserialize(remainingBuffer); return Value.fromRow(nestedRecord); + default: throw new ImprintException(ErrorType.INVALID_TYPE_CODE, "Unknown type code: " + typeCode); } } - - private void serializeHeader(ByteBuffer buffer) { - buffer.put(Constants.MAGIC); - buffer.put(Constants.VERSION); - buffer.put(header.getFlags().getValue()); - buffer.putInt(header.getSchemaId().getFieldSpaceId()); - buffer.putInt(header.getSchemaId().getSchemaHash()); - buffer.putInt(header.getPayloadSize()); + + private Value deserializeArray(ByteBuffer buffer) throws ImprintException { + VarInt.DecodeResult lengthResult = VarInt.decode(buffer); + int length = lengthResult.getValue(); + + if (length == 0) { + return Value.fromArray(Collections.emptyList()); + } + + var elementType = TypeCode.fromByte(buffer.get()); + var elements = new ArrayList(length); + + for (int i = 0; i < length; i++) { + var elementBytes = readValueBytes(elementType, buffer); + var element = deserializeValue(elementType, elementBytes); + elements.add(element); + } + + return Value.fromArray(elements); } - - private static Header deserializeHeader(ByteBuffer buffer) throws ImprintException { - if (buffer.remaining() < Constants.HEADER_BYTES) - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for header"); - - - byte magic = buffer.get(); - if (magic != Constants.MAGIC) { - throw new ImprintException(ErrorType.INVALID_MAGIC, "Invalid magic byte: expected 0x" + Integer.toHexString(Constants.MAGIC) + - ", got 0x" + Integer.toHexString(magic & 0xFF)); + + private Value deserializeMap(ByteBuffer buffer) throws ImprintException { + VarInt.DecodeResult lengthResult = VarInt.decode(buffer); + int length = lengthResult.getValue(); + + if (length == 0) { + return Value.fromMap(Collections.emptyMap()); } - - byte version = buffer.get(); - if (version != Constants.VERSION) { - throw new ImprintException(ErrorType.UNSUPPORTED_VERSION, "Unsupported version: " + version); + + var keyType = TypeCode.fromByte(buffer.get()); + var valueType = TypeCode.fromByte(buffer.get()); + var map = new HashMap(length); + + for (int i = 0; i < length; i++) { + // Read key + var keyBytes = readValueBytes(keyType, buffer); + var keyValue = deserializeValue(keyType, keyBytes); + var key = MapKey.fromValue(keyValue); + + // Read value + var valueBytes = readValueBytes(valueType, buffer); + var value = deserializeValue(valueType, valueBytes); + + map.put(key, value); } + + return Value.fromMap(map); + } + + private ByteBuffer readValueBytes(TypeCode typeCode, ByteBuffer buffer) throws ImprintException { + // Use TypeHandler for simple types + switch (typeCode) { + case NULL: + case BOOL: + case INT32: + case INT64: + case FLOAT32: + case FLOAT64: + case BYTES: + case STRING: + return typeCode.getHandler().readValueBytes(buffer); - var flags = new Flags(buffer.get()); - int fieldSpaceId = buffer.getInt(); - int schemaHash = buffer.getInt(); - int payloadSize = buffer.getInt(); + case ARRAY: + case MAP: + case ROW: + // For complex types, return the entire remaining buffer for now + // The specific deserializer will handle parsing in the future + var remainingBuffer = buffer.slice(); + buffer.position(buffer.limit()); + return remainingBuffer.asReadOnlyBuffer(); - return new Header(flags, new SchemaId(fieldSpaceId, schemaHash), payloadSize); + default: + throw new ImprintException(ErrorType.INVALID_TYPE_CODE, "Unknown type code: " + typeCode); + } } - + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (obj == null || getClass() != obj.getClass()) return false; + var that = (ImprintRecord) obj; + return header.equals(that.header) && + directory.equals(that.directory) && + payload.equals(that.payload); + } + + @Override + public int hashCode() { + return Objects.hash(header, directory, payload); + } + @Override public String toString() { - return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", - header, buffers.getDirectoryCount(), buffers.getPayload().remaining()); + return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", + header, directory.size(), payload.remaining()); } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 51a3525..48b0998 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -10,61 +10,53 @@ * A fluent builder for creating ImprintRecord instances with type-safe, * developer-friendly API that eliminates boilerplate Value.fromX() calls. *

- * Field IDs can be overwritten - calling field() with the same ID multiple times - * will replace the previous value. This allows for flexible builder patterns and - * conditional field updates. - *

* Usage: - *

  *   var record = ImprintRecord.builder(schemaId)
- *       .field(1, 42)              // int to Int32Value  
- *       .field(2, "hello")         // String to StringValue
- *       .field(1, 100)             // overwrites field 1 with new value
- *       .field(3, 3.14)            // double to Float64Value
- *       .field(4, bytes)           // byte[] to BytesValue
- *       .field(5, true)            // boolean to BoolValue
- *       .nullField(6)              // to NullValue
+ *       .field(1, 42)              // int -> Int32Value  
+ *       .field(2, "hello")         // String -> StringValue
+ *       .field(3, 3.14)            // double -> Float64Value
+ *       .field(4, bytes)           // byte[] -> BytesValue
+ *       .field(5, true)            // boolean -> BoolValue
+ *       .nullField(6)              // -> NullValue
  *       .build();
- * 
*/ -@SuppressWarnings("unused") public final class ImprintRecordBuilder { private final SchemaId schemaId; private final Map fields = new TreeMap<>(); - + ImprintRecordBuilder(SchemaId schemaId) { this.schemaId = Objects.requireNonNull(schemaId, "SchemaId cannot be null"); } - + // Primitive types with automatic Value wrapping public ImprintRecordBuilder field(int id, boolean value) { return addField(id, Value.fromBoolean(value)); } - + public ImprintRecordBuilder field(int id, int value) { return addField(id, Value.fromInt32(value)); } - + public ImprintRecordBuilder field(int id, long value) { return addField(id, Value.fromInt64(value)); } - + public ImprintRecordBuilder field(int id, float value) { return addField(id, Value.fromFloat32(value)); } - + public ImprintRecordBuilder field(int id, double value) { return addField(id, Value.fromFloat64(value)); } - + public ImprintRecordBuilder field(int id, String value) { return addField(id, Value.fromString(value)); } - + public ImprintRecordBuilder field(int id, byte[] value) { return addField(id, Value.fromBytes(value)); } - + // Collections with automatic conversion public ImprintRecordBuilder field(int id, List values) { var convertedValues = new ArrayList(values.size()); @@ -73,7 +65,7 @@ public ImprintRecordBuilder field(int id, List values) { } return addField(id, Value.fromArray(convertedValues)); } - + public ImprintRecordBuilder field(int id, Map map) { var convertedMap = new HashMap(map.size()); for (var entry : map.entrySet()) { @@ -83,22 +75,22 @@ public ImprintRecordBuilder field(int id, Map fieldsMap) { for (var entry : fieldsMap.entrySet()) { @@ -118,53 +110,53 @@ public ImprintRecordBuilder fields(Map fieldsMap) { } return this; } - + // Builder utilities public boolean hasField(int id) { return fields.containsKey(id); } - + public int fieldCount() { return fields.size(); } - + public Set fieldIds() { return new TreeSet<>(fields.keySet()); } - + // Build the final record public ImprintRecord build() throws ImprintException { + if (fields.isEmpty()) { + throw new ImprintException(com.imprint.error.ErrorType.SCHEMA_ERROR, + "Cannot build empty record - add at least one field"); + } + var writer = new ImprintWriter(schemaId); for (var entry : fields.entrySet()) { writer.addField(entry.getKey(), entry.getValue()); } return writer.build(); } - + // Internal helper methods - /** - * Adds or overwrites a field in the record being built. - * If a field with the given ID already exists, it will be replaced. - * - * @param id the field ID - * @param value the field value (cannot be null - use nullField() for explicit nulls) - * @return this builder for method chaining - */ private ImprintRecordBuilder addField(int id, Value value) { Objects.requireNonNull(value, "Value cannot be null - use nullField() for explicit null values"); + if (fields.containsKey(id)) { + throw new IllegalArgumentException("Field ID " + id + " already exists - field IDs must be unique"); + } fields.put(id, value); return this; } - + private Value convertToValue(Object obj) { if (obj == null) { return Value.nullValue(); } - + if (obj instanceof Value) { return (Value) obj; } - + // Auto-boxing conversion if (obj instanceof Boolean) { return Value.fromBoolean((Boolean) obj); @@ -211,11 +203,11 @@ private Value convertToValue(Object obj) { if (obj instanceof ImprintRecord) { return Value.fromRow((ImprintRecord) obj); } - - throw new IllegalArgumentException("Cannot convert " + obj.getClass().getSimpleName() + - " to Imprint Value. Supported types: boolean, int, long, float, double, String, byte[], List, Map, ImprintRecord"); + + throw new IllegalArgumentException("Cannot convert " + obj.getClass().getSimpleName() + + " to Imprint Value. Supported types: boolean, int, long, float, double, String, byte[], List, Map, ImprintRecord"); } - + private MapKey convertToMapKey(Object obj) { if (obj instanceof Integer) { return MapKey.fromInt32((Integer) obj); @@ -229,11 +221,11 @@ private MapKey convertToMapKey(Object obj) { if (obj instanceof byte[]) { return MapKey.fromBytes((byte[]) obj); } - - throw new IllegalArgumentException("Invalid map key type: " + obj.getClass().getSimpleName() + - ". Map keys must be int, long, String, or byte[]"); + + throw new IllegalArgumentException("Invalid map key type: " + obj.getClass().getSimpleName() + + ". Map keys must be int, long, String, or byte[]"); } - + @Override public String toString() { return String.format("ImprintRecordBuilder{schemaId=%s, fields=%d}", schemaId, fields.size()); diff --git a/src/main/java/com/imprint/core/ImprintWriter.java b/src/main/java/com/imprint/core/ImprintWriter.java index b1d5f53..39ad9ea 100644 --- a/src/main/java/com/imprint/core/ImprintWriter.java +++ b/src/main/java/com/imprint/core/ImprintWriter.java @@ -2,13 +2,14 @@ import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; +import com.imprint.types.MapKey; import com.imprint.types.Value; +import com.imprint.util.VarInt; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.ArrayList; -import java.util.Objects; -import java.util.TreeMap; +import java.nio.charset.StandardCharsets; +import java.util.*; /** * A writer for constructing ImprintRecords by adding fields sequentially. @@ -55,7 +56,7 @@ public ImprintRecord build() throws ImprintException { return new ImprintRecord(header, directory, payloadView); } - private int estimatePayloadSize() throws ImprintException { + private int estimatePayloadSize() { // More accurate estimation to reduce allocations int estimatedSize = 0; for (var value : fields.values()) { @@ -73,7 +74,7 @@ private int estimatePayloadSize() throws ImprintException { * @param value the value to estimate size for * @return estimated size in bytes including type-specific overhead */ - private int estimateValueSize(Value value) throws ImprintException { + private int estimateValueSize(Value value) { // Use TypeHandler for simple types switch (value.getTypeCode()) { case NULL: @@ -84,20 +85,52 @@ private int estimateValueSize(Value value) throws ImprintException { case FLOAT64: case BYTES: case STRING: + return value.getTypeCode().getHandler().estimateSize(value); + case ARRAY: + List array = ((Value.ArrayValue) value).getValue(); + int arraySize = VarInt.encodedLength(array.size()) + 1; // length + type code + for (Value element : array) { + arraySize += estimateValueSize(element); + } + return arraySize; + case MAP: - return value.getTypeCode().getHandler().estimateSize(value); + Map map = ((Value.MapValue) value).getValue(); + int mapSize = VarInt.encodedLength(map.size()) + 2; // length + 2 type codes + for (Map.Entry entry : map.entrySet()) { + mapSize += estimateMapKeySize(entry.getKey()); + mapSize += estimateValueSize(entry.getValue()); + } + return mapSize; case ROW: - Value.RowValue rowValue = (Value.RowValue) value; - return rowValue.getValue().estimateSerializedSize(); + // Estimate nested record size (rough approximation) + return 100; // Conservative estimate default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); + return 32; // Default fallback } } + + private int estimateMapKeySize(MapKey key) { + switch (key.getTypeCode()) { + case INT32: return 4; + case INT64: return 8; + case BYTES: + byte[] bytes = ((MapKey.BytesKey) key).getValue(); + return VarInt.encodedLength(bytes.length) + bytes.length; + case STRING: + var str = ((MapKey.StringKey) key).getValue(); + int utf8Length = str.getBytes(StandardCharsets.UTF_8).length; + return VarInt.encodedLength(utf8Length) + utf8Length; + default: + return 16; // Default fallback + } + } + private void serializeValue(Value value, ByteBuffer buffer) throws ImprintException { switch (value.getTypeCode()) { case NULL: @@ -108,11 +141,17 @@ private void serializeValue(Value value, ByteBuffer buffer) throws ImprintExcept case FLOAT64: case BYTES: case STRING: + value.getTypeCode().getHandler().serialize(value, buffer); + break; + case ARRAY: + serializeArray((Value.ArrayValue) value, buffer); + break; + case MAP: - value.getTypeCode().getHandler().serialize(value, buffer); + serializeMap((Value.MapValue) value, buffer); break; - //TODO eliminate this switch entirely by implementing a ROW TypeHandler + case ROW: Value.RowValue rowValue = (Value.RowValue) value; var serializedRow = rowValue.getValue().serializeToBuffer(); @@ -120,7 +159,99 @@ private void serializeValue(Value value, ByteBuffer buffer) throws ImprintExcept break; default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, + "Unknown type code: " + value.getTypeCode()); + } + } + + private void serializeArray(Value.ArrayValue arrayValue, ByteBuffer buffer) throws ImprintException { + var elements = arrayValue.getValue(); + VarInt.encode(elements.size(), buffer); + + if (elements.isEmpty()) return; + + // All elements must have the same type + var elementType = elements.get(0).getTypeCode(); + buffer.put(elementType.getCode()); + for (var element : elements) { + if (element.getTypeCode() != elementType) { + throw new ImprintException(ErrorType.SCHEMA_ERROR, + "Array elements must have same type code: " + + element.getTypeCode() + " != " + elementType); + } + serializeValue(element, buffer); + } + } + + private void serializeMap(Value.MapValue mapValue, ByteBuffer buffer) throws ImprintException { + var map = mapValue.getValue(); + VarInt.encode(map.size(), buffer); + + if (map.isEmpty()) { + return; + } + + // All keys and values must have consistent types + var iterator = map.entrySet().iterator(); + var first = iterator.next(); + var keyType = first.getKey().getTypeCode(); + var valueType = first.getValue().getTypeCode(); + + buffer.put(keyType.getCode()); + buffer.put(valueType.getCode()); + + // Serialize the first entry + serializeMapKey(first.getKey(), buffer); + serializeValue(first.getValue(), buffer); + + // Serialize remaining entries + while (iterator.hasNext()) { + var entry = iterator.next(); + if (entry.getKey().getTypeCode() != keyType) { + throw new ImprintException(ErrorType.SCHEMA_ERROR, + "Map keys must have same type code: " + + entry.getKey().getTypeCode() + " != " + keyType); + } + if (entry.getValue().getTypeCode() != valueType) { + throw new ImprintException(ErrorType.SCHEMA_ERROR, + "Map values must have same type code: " + + entry.getValue().getTypeCode() + " != " + valueType); + } + + serializeMapKey(entry.getKey(), buffer); + serializeValue(entry.getValue(), buffer); + } + } + + private void serializeMapKey(MapKey key, ByteBuffer buffer) throws ImprintException { + switch (key.getTypeCode()) { + case INT32: + MapKey.Int32Key int32Key = (MapKey.Int32Key) key; + buffer.putInt(int32Key.getValue()); + break; + + case INT64: + MapKey.Int64Key int64Key = (MapKey.Int64Key) key; + buffer.putLong(int64Key.getValue()); + break; + + case BYTES: + MapKey.BytesKey bytesKey = (MapKey.BytesKey) key; + byte[] bytes = bytesKey.getValue(); + VarInt.encode(bytes.length, buffer); + buffer.put(bytes); + break; + + case STRING: + MapKey.StringKey stringKey = (MapKey.StringKey) key; + byte[] stringBytes = stringKey.getValue().getBytes(StandardCharsets.UTF_8); + VarInt.encode(stringBytes.length, buffer); + buffer.put(stringBytes); + break; + + default: + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, + "Invalid map key type: " + key.getTypeCode()); } } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/SchemaId.java b/src/main/java/com/imprint/core/SchemaId.java index b6dae3b..cb03c1c 100644 --- a/src/main/java/com/imprint/core/SchemaId.java +++ b/src/main/java/com/imprint/core/SchemaId.java @@ -7,6 +7,6 @@ */ @Value public class SchemaId { - int fieldSpaceId; + int fieldspaceId; int schemaHash; } \ No newline at end of file diff --git a/src/main/java/com/imprint/error/ErrorType.java b/src/main/java/com/imprint/error/ErrorType.java index 49784ef..97b9772 100644 --- a/src/main/java/com/imprint/error/ErrorType.java +++ b/src/main/java/com/imprint/error/ErrorType.java @@ -14,6 +14,5 @@ public enum ErrorType { TYPE_MISMATCH, INVALID_TYPE_CODE, SERIALIZATION_ERROR, - DESERIALIZATION_ERROR, - INTERNAL_ERROR + DESERIALIZATION_ERROR } diff --git a/src/main/java/com/imprint/types/TypeCode.java b/src/main/java/com/imprint/types/TypeCode.java index a81b199..6bf450d 100644 --- a/src/main/java/com/imprint/types/TypeCode.java +++ b/src/main/java/com/imprint/types/TypeCode.java @@ -16,8 +16,8 @@ public enum TypeCode { FLOAT64(0x5, TypeHandler.FLOAT64), BYTES(0x6, TypeHandler.BYTES), STRING(0x7, TypeHandler.STRING), - ARRAY(0x8, TypeHandler.ARRAY), - MAP(0x9, TypeHandler.MAP), + ARRAY(0x8, null), // TODO: implement + MAP(0x9, null), // TODO: implement ROW(0xA, null); // TODO: implement (basically a placeholder for user-defined type) @Getter diff --git a/src/main/java/com/imprint/types/TypeHandler.java b/src/main/java/com/imprint/types/TypeHandler.java index 634867b..4b5830a 100644 --- a/src/main/java/com/imprint/types/TypeHandler.java +++ b/src/main/java/com/imprint/types/TypeHandler.java @@ -1,23 +1,23 @@ package com.imprint.types; -import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; import com.imprint.util.VarInt; import java.nio.ByteBuffer; -import java.util.*; +import java.nio.charset.StandardCharsets; /** * Interface for handling type-specific serialization, deserialization, and size estimation. - * Note that primitives are basically boxed here which could impact performance slightly + * Note that primitives are potentially auto/un-boxed here which could impact performance slightly * but having all the types in their own implementation helps keep things organized for now, especially * for dealing with and testing more complex types in the future. */ public interface TypeHandler { Value deserialize(ByteBuffer buffer) throws ImprintException; void serialize(Value value, ByteBuffer buffer) throws ImprintException; - int estimateSize(Value value) throws ImprintException; - + int estimateSize(Value value); + ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException; + // Static implementations for each type TypeHandler NULL = new TypeHandler() { @Override @@ -34,6 +34,11 @@ public void serialize(Value value, ByteBuffer buffer) { public int estimateSize(Value value) { return 0; } + + @Override + public ByteBuffer readValueBytes(ByteBuffer buffer) { + return ByteBuffer.allocate(0).asReadOnlyBuffer(); + } }; TypeHandler BOOL = new TypeHandler() { @@ -50,7 +55,7 @@ public Value deserialize(ByteBuffer buffer) throws ImprintException { @Override public void serialize(Value value, ByteBuffer buffer) { - var boolValue = (Value.BoolValue) value; + Value.BoolValue boolValue = (Value.BoolValue) value; buffer.put((byte) (boolValue.getValue() ? 1 : 0)); } @@ -58,20 +63,28 @@ public void serialize(Value value, ByteBuffer buffer) { public int estimateSize(Value value) { return 1; } + + @Override + public ByteBuffer readValueBytes(ByteBuffer buffer) { + var boolBuffer = buffer.slice(); + boolBuffer.limit(1); + buffer.position(buffer.position() + 1); + return boolBuffer.asReadOnlyBuffer(); + } }; TypeHandler INT32 = new TypeHandler() { @Override public Value deserialize(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < 4) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for int32"); + throw new ImprintException(com.imprint.error.ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for int32"); } return Value.fromInt32(buffer.getInt()); } @Override public void serialize(Value value, ByteBuffer buffer) { - var int32Value = (Value.Int32Value) value; + Value.Int32Value int32Value = (Value.Int32Value) value; buffer.putInt(int32Value.getValue()); } @@ -79,13 +92,21 @@ public void serialize(Value value, ByteBuffer buffer) { public int estimateSize(Value value) { return 4; } + + @Override + public ByteBuffer readValueBytes(ByteBuffer buffer) { + var int32Buffer = buffer.slice(); + int32Buffer.limit(4); + buffer.position(buffer.position() + 4); + return int32Buffer.asReadOnlyBuffer(); + } }; TypeHandler INT64 = new TypeHandler() { @Override public Value deserialize(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < 8) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for int64"); + throw new ImprintException(com.imprint.error.ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for int64"); } return Value.fromInt64(buffer.getLong()); } @@ -100,20 +121,28 @@ public void serialize(Value value, ByteBuffer buffer) { public int estimateSize(Value value) { return 8; } + + @Override + public ByteBuffer readValueBytes(ByteBuffer buffer) { + var int64Buffer = buffer.slice(); + int64Buffer.limit(8); + buffer.position(buffer.position() + 8); + return int64Buffer.asReadOnlyBuffer(); + } }; TypeHandler FLOAT32 = new TypeHandler() { @Override public Value deserialize(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < 4) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for float32"); + throw new ImprintException(com.imprint.error.ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for float32"); } return Value.fromFloat32(buffer.getFloat()); } @Override public void serialize(Value value, ByteBuffer buffer) { - var float32Value = (Value.Float32Value) value; + Value.Float32Value float32Value = (Value.Float32Value) value; buffer.putFloat(float32Value.getValue()); } @@ -121,20 +150,28 @@ public void serialize(Value value, ByteBuffer buffer) { public int estimateSize(Value value) { return 4; } + + @Override + public ByteBuffer readValueBytes(ByteBuffer buffer) { + var float32Buffer = buffer.slice(); + float32Buffer.limit(4); + buffer.position(buffer.position() + 4); + return float32Buffer.asReadOnlyBuffer(); + } }; TypeHandler FLOAT64 = new TypeHandler() { @Override public Value deserialize(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < 8) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for float64"); + throw new ImprintException(com.imprint.error.ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for float64"); } return Value.fromFloat64(buffer.getDouble()); } @Override public void serialize(Value value, ByteBuffer buffer) { - var float64Value = (Value.Float64Value) value; + Value.Float64Value float64Value = (Value.Float64Value) value; buffer.putDouble(float64Value.getValue()); } @@ -142,6 +179,14 @@ public void serialize(Value value, ByteBuffer buffer) { public int estimateSize(Value value) { return 8; } + + @Override + public ByteBuffer readValueBytes(ByteBuffer buffer) { + var float64Buffer = buffer.slice(); + float64Buffer.limit(8); + buffer.position(buffer.position() + 8); + return float64Buffer.asReadOnlyBuffer(); + } }; TypeHandler BYTES = new TypeHandler() { @@ -150,7 +195,7 @@ public Value deserialize(ByteBuffer buffer) throws ImprintException { VarInt.DecodeResult lengthResult = VarInt.decode(buffer); int length = lengthResult.getValue(); if (buffer.remaining() < length) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for bytes value data after VarInt. Slice from readValueBytes is too short. Needed: " + length + ", available: " + buffer.remaining()); + throw new ImprintException(com.imprint.error.ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for bytes value"); } var bytesView = buffer.slice(); bytesView.limit(length); @@ -184,6 +229,18 @@ public int estimateSize(Value value) { return VarInt.encodedLength(bytes.length) + bytes.length; } } + + @Override + public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { + int originalPosition = buffer.position(); + VarInt.DecodeResult lengthResult = VarInt.decode(buffer); + int totalLength = lengthResult.getBytesRead() + lengthResult.getValue(); + buffer.position(originalPosition); + var valueBuffer = buffer.slice(); + valueBuffer.limit(totalLength); + buffer.position(buffer.position() + totalLength); + return valueBuffer.asReadOnlyBuffer(); + } }; TypeHandler STRING = new TypeHandler() { @@ -192,28 +249,28 @@ public Value deserialize(ByteBuffer buffer) throws ImprintException { VarInt.DecodeResult strLengthResult = VarInt.decode(buffer); int strLength = strLengthResult.getValue(); if (buffer.remaining() < strLength) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for string value data after VarInt. Slice from readValueBytes is too short. Needed: " + strLength + ", available: " + buffer.remaining()); + throw new ImprintException(com.imprint.error.ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for string value"); } var stringBytesView = buffer.slice(); stringBytesView.limit(strLength); buffer.position(buffer.position() + strLength); try { - return Value.fromStringBuffer(stringBytesView); + return Value.fromStringBuffer(stringBytesView.asReadOnlyBuffer()); } catch (Exception e) { - throw new ImprintException(ErrorType.INVALID_UTF8_STRING, "Invalid UTF-8 string or buffer issue: " + e.getMessage()); + throw new ImprintException(com.imprint.error.ErrorType.INVALID_UTF8_STRING, "Invalid UTF-8 string"); } } @Override public void serialize(Value value, ByteBuffer buffer) { if (value instanceof Value.StringBufferValue) { - var bufferValue = (Value.StringBufferValue) value; + Value.StringBufferValue bufferValue = (Value.StringBufferValue) value; var stringBuffer = bufferValue.getBuffer(); VarInt.encode(stringBuffer.remaining(), buffer); buffer.put(stringBuffer); } else { - var stringValue = (Value.StringValue) value; - byte[] stringBytes = stringValue.getUtf8Bytes(); + Value.StringValue stringValue = (Value.StringValue) value; + byte[] stringBytes = stringValue.getValue().getBytes(StandardCharsets.UTF_8); VarInt.encode(stringBytes.length, buffer); buffer.put(stringBytes); } @@ -226,217 +283,22 @@ public int estimateSize(Value value) { int length = bufferValue.getBuffer().remaining(); return VarInt.encodedLength(length) + length; } else { - Value.StringValue stringValue = (Value.StringValue) value; - byte[] utf8Bytes = stringValue.getUtf8Bytes(); - return VarInt.encodedLength(utf8Bytes.length) + utf8Bytes.length; + String str = ((Value.StringValue) value).getValue(); + int utf8Length = str.getBytes(StandardCharsets.UTF_8).length; + return VarInt.encodedLength(utf8Length) + utf8Length; } } - }; - - TypeHandler ARRAY = new TypeHandler() { - @Override - public Value deserialize(ByteBuffer buffer) throws ImprintException { - VarInt.DecodeResult lengthResult = VarInt.decode(buffer); - int length = lengthResult.getValue(); - - if (length == 0) { - return Value.fromArray(Collections.emptyList()); - } - - if (buffer.remaining() < 1) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for ARRAY element type code."); - } - var elementType = TypeCode.fromByte(buffer.get()); - var elements = new ArrayList(length); - var elementHandler = elementType.getHandler(); - - //Let each element handler consume what it needs from the buffer - for (int i = 0; i < length; i++) { - var element = elementHandler.deserialize(buffer); //Handler advances buffer position - elements.add(element); - } - - return Value.fromArray(elements); - } @Override - public void serialize(Value value, ByteBuffer buffer) throws ImprintException { - var arrayValue = (Value.ArrayValue) value; - var elements = arrayValue.getValue(); - VarInt.encode(elements.size(), buffer); - - if (elements.isEmpty()) return; - - var elementType = elements.get(0).getTypeCode(); - buffer.put(elementType.getCode()); - var elementHandler = elementType.getHandler(); - for (var element : elements) { - if (element.getTypeCode() != elementType) { - throw new ImprintException(ErrorType.SCHEMA_ERROR, - "Array elements must have same type code: " + - element.getTypeCode() + " != " + elementType); - } - elementHandler.serialize(element, buffer); - } - } - - @Override - public int estimateSize(Value value) throws ImprintException { - var arrayValue = (Value.ArrayValue) value; - var elements = arrayValue.getValue(); - int sizeOfLength = VarInt.encodedLength(elements.size()); - if (elements.isEmpty()) { - return sizeOfLength; - } - int sizeOfElementTypeCode = 1; - int arraySize = sizeOfLength + sizeOfElementTypeCode; - var elementHandler = elements.get(0).getTypeCode().getHandler(); - for (var element : elements) { - arraySize += elementHandler.estimateSize(element); - } - return arraySize; - } - }; - - TypeHandler MAP = new TypeHandler() { - @Override - public Value deserialize(ByteBuffer buffer) throws ImprintException { + public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { + int originalPosition = buffer.position(); VarInt.DecodeResult lengthResult = VarInt.decode(buffer); - int length = lengthResult.getValue(); - - if (length == 0) { - return Value.fromMap(Collections.emptyMap()); - } - - if (buffer.remaining() < 2) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for MAP key/value type codes."); - } - var keyType = TypeCode.fromByte(buffer.get()); - var valueType = TypeCode.fromByte(buffer.get()); - var map = new HashMap(length); - - var keyHandler = keyType.getHandler(); - var valueHandler = valueType.getHandler(); - - //Let handlers consume directly from buffer - for (int i = 0; i < length; i++) { - var keyValue = keyHandler.deserialize(buffer);// Advances buffer - var key = MapKey.fromValue(keyValue); - - var mapInternalValue = valueHandler.deserialize(buffer);//Advances buffer - - map.put(key, mapInternalValue); - } - - return Value.fromMap(map); - } - - @Override - public void serialize(Value value, ByteBuffer buffer) throws ImprintException { - var mapValue = (Value.MapValue) value; - var map = mapValue.getValue(); - VarInt.encode(map.size(), buffer); - - if (map.isEmpty()) { - return; - } - - var iterator = map.entrySet().iterator(); - var first = iterator.next(); - var keyType = first.getKey().getTypeCode(); - var valueType = first.getValue().getTypeCode(); - - buffer.put(keyType.getCode()); - buffer.put(valueType.getCode()); - - serializeMapKey(first.getKey(), buffer); - first.getValue().getTypeCode().getHandler().serialize(first.getValue(), buffer); - - while (iterator.hasNext()) { - var entry = iterator.next(); - if (entry.getKey().getTypeCode() != keyType) { - throw new ImprintException(ErrorType.SCHEMA_ERROR, - "Map keys must have same type code: " + - entry.getKey().getTypeCode() + " != " + keyType); - } - if (entry.getValue().getTypeCode() != valueType) { - throw new ImprintException(ErrorType.SCHEMA_ERROR, - "Map values must have same type code: " + - entry.getValue().getTypeCode() + " != " + valueType); - } - - serializeMapKey(entry.getKey(), buffer); - entry.getValue().getTypeCode().getHandler().serialize(entry.getValue(), buffer); - } - } - - @Override - public int estimateSize(Value value) throws ImprintException { - var mapValue = (Value.MapValue) value; - var map = mapValue.getValue(); - int sizeOfLength = VarInt.encodedLength(map.size()); - if (map.isEmpty()) { - return sizeOfLength; - } - int sizeOfTypeCodes = 2; - int mapSize = sizeOfLength + sizeOfTypeCodes; - - for (var entry : map.entrySet()) { - mapSize += estimateMapKeySize(entry.getKey()); - mapSize += entry.getValue().getTypeCode().getHandler().estimateSize(entry.getValue()); - } - return mapSize; - } - - private void serializeMapKey(MapKey key, ByteBuffer buffer) throws ImprintException { - switch (key.getTypeCode()) { - case INT32: - MapKey.Int32Key int32Key = (MapKey.Int32Key) key; - buffer.putInt(int32Key.getValue()); - break; - - case INT64: - MapKey.Int64Key int64Key = (MapKey.Int64Key) key; - buffer.putLong(int64Key.getValue()); - break; - - case BYTES: - MapKey.BytesKey bytesKey = (MapKey.BytesKey) key; - byte[] bytes = bytesKey.getValue(); - VarInt.encode(bytes.length, buffer); - buffer.put(bytes); - break; - - case STRING: - MapKey.StringKey stringKey = (MapKey.StringKey) key; - byte[] stringBytes = stringKey.getValue().getBytes(java.nio.charset.StandardCharsets.UTF_8); - VarInt.encode(stringBytes.length, buffer); - buffer.put(stringBytes); - break; - - default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, - "Invalid map key type: " + key.getTypeCode()); - } - } - - private int estimateMapKeySize(MapKey key) throws ImprintException { - switch (key.getTypeCode()) { - case INT32: return 4; - case INT64: return 8; - case BYTES: - byte[] bytes = ((MapKey.BytesKey) key).getValue(); - return VarInt.encodedLength(bytes.length) + bytes.length; - - case STRING: - var str = ((MapKey.StringKey) key).getValue(); - int utf8Length = str.getBytes(java.nio.charset.StandardCharsets.UTF_8).length; - return VarInt.encodedLength(utf8Length) + utf8Length; - - default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, - "Invalid map key type: " + key.getTypeCode()); - } + int totalLength = lengthResult.getBytesRead() + lengthResult.getValue(); + buffer.position(originalPosition); + var valueBuffer = buffer.slice(); + valueBuffer.limit(totalLength); + buffer.position(buffer.position() + totalLength); + return valueBuffer.asReadOnlyBuffer(); } }; } \ No newline at end of file diff --git a/src/main/java/com/imprint/types/Value.java b/src/main/java/com/imprint/types/Value.java index bfa9958..4710ec5 100644 --- a/src/main/java/com/imprint/types/Value.java +++ b/src/main/java/com/imprint/types/Value.java @@ -169,7 +169,6 @@ public String toString() { } // Float64 Value - @Getter @EqualsAndHashCode(callSuper = false) public static class Float64Value extends Value { @@ -181,7 +180,7 @@ public Float64Value(double value) { @Override public TypeCode getTypeCode() { return TypeCode.FLOAT64; } - + @Override public String toString() { return String.valueOf(value); @@ -189,36 +188,26 @@ public String toString() { } // Bytes Value (array-based) - @Getter public static class BytesValue extends Value { - /** - * Returns internal array. MUST NOT be modified by caller. - */ private final byte[] value; - - /** - * Takes ownership of the byte array. Caller must not modify after construction. - */ + public BytesValue(byte[] value) { - this.value = Objects.requireNonNull(value); + this.value = value.clone(); // defensive copy } - + + public byte[] getValue() { + return value.clone(); // defensive copy + } + @Override public TypeCode getTypeCode() { return TypeCode.BYTES; } @Override public boolean equals(Object obj) { if (this == obj) return true; - if (obj == null) return false; - if (obj instanceof BytesValue) { - BytesValue that = (BytesValue) obj; - return Arrays.equals(value, that.value); - } - if (obj instanceof BytesBufferValue) { - BytesBufferValue that = (BytesBufferValue) obj; - return Arrays.equals(value, that.getValue()); - } - return false; + if (obj == null || getClass() != obj.getClass()) return false; + BytesValue that = (BytesValue) obj; + return Arrays.equals(value, that.value); } @Override @@ -237,7 +226,7 @@ public static class BytesBufferValue extends Value { private final ByteBuffer value; public BytesBufferValue(ByteBuffer value) { - this.value = value.asReadOnlyBuffer(); + this.value = value.asReadOnlyBuffer(); // zero-copy read-only view } public byte[] getValue() { @@ -248,7 +237,7 @@ public byte[] getValue() { } public ByteBuffer getBuffer() { - return value.duplicate(); + return value.duplicate(); // zero-copy view } @Override @@ -281,105 +270,53 @@ public String toString() { } // String Value (String-based) + @Getter + @EqualsAndHashCode(callSuper = false) public static class StringValue extends Value { - @Getter private final String value; - private volatile byte[] cachedUtf8Bytes; // Cache UTF-8 encoding public StringValue(String value) { this.value = Objects.requireNonNull(value, "String cannot be null"); } - public byte[] getUtf8Bytes() { - var cached = cachedUtf8Bytes; - if (cached == null) { - // UTF8 is idempotent so no need to synchronize - cached = value.getBytes(StandardCharsets.UTF_8); - cachedUtf8Bytes = cached; - } - return cached; // Return computed value - } - @Override public TypeCode getTypeCode() { return TypeCode.STRING; } - @Override - public boolean equals(Object obj) { - if (this == obj) return true; - if (obj == null) return false; - if (obj instanceof StringValue) { - StringValue that = (StringValue) obj; - return value.equals(that.value); - } - if (obj instanceof StringBufferValue) { - StringBufferValue that = (StringBufferValue) obj; - return value.equals(that.getValue()); - } - return false; - } - - @Override - public int hashCode() { - return value.hashCode(); - } - @Override public String toString() { return "\"" + value + "\""; } } - // String Value (ByteBuffer-based) + // String Value (ByteBuffer-based, zero-copy) public static class StringBufferValue extends Value { private final ByteBuffer value; - private volatile String cachedString; - - private static final int THREAD_LOCAL_BUFFER_SIZE = 1024; - private static final ThreadLocal DECODE_BUFFER_CACHE = - ThreadLocal.withInitial(() -> new byte[THREAD_LOCAL_BUFFER_SIZE]); - + private volatile String cachedString; // lazy decode + public StringBufferValue(ByteBuffer value) { - this.value = value.asReadOnlyBuffer(); + this.value = value.asReadOnlyBuffer(); // zero-copy read-only view } - + public String getValue() { - String result = cachedString; - if (result == null) { - result = decodeUtf8(); - cachedString = result; - } - return result; - } - - private String decodeUtf8() { - final byte[] array; - final int offset; - final int length = value.remaining(); - - if (value.hasArray()) { - array = value.array(); - offset = value.arrayOffset() + value.position(); - } else { - byte[] threadLocalBuffer = DECODE_BUFFER_CACHE.get(); - if (length <= threadLocalBuffer.length) { - array = threadLocalBuffer; - } else { - // Fallback: copy bytes from the ByteBuffer to a new heap array (if too large for cache) - array = new byte[length]; + if (cachedString == null) { + synchronized (this) { + if (cachedString == null) { + var array = new byte[value.remaining()]; + value.duplicate().get(array); + cachedString = new String(array, StandardCharsets.UTF_8); + } } - value.duplicate().get(array, 0, length); - offset = 0; } - return new String(array, offset, length, StandardCharsets.UTF_8); + return cachedString; } - + public ByteBuffer getBuffer() { - return value.duplicate(); + return value.duplicate(); // zero-copy view } - + @Override public TypeCode getTypeCode() { return TypeCode.STRING; } - + @Override public boolean equals(Object obj) { if (this == obj) return true; @@ -394,12 +331,12 @@ public boolean equals(Object obj) { } return false; } - + @Override public int hashCode() { return getValue().hashCode(); // Use string hash for consistency } - + @Override public String toString() { return "\"" + getValue() + "\""; diff --git a/src/main/java/com/imprint/util/VarInt.java b/src/main/java/com/imprint/util/VarInt.java index f43683b..5c9a7e5 100644 --- a/src/main/java/com/imprint/util/VarInt.java +++ b/src/main/java/com/imprint/util/VarInt.java @@ -6,38 +6,21 @@ import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; -import lombok.experimental.UtilityClass; - import java.nio.ByteBuffer; /** * Utility class for encoding and decoding variable-length integers (VarInt). * Supports encoding/decoding of 32-bit unsigned integers. */ -@UtilityClass public final class VarInt { - + private static final byte CONTINUATION_BIT = (byte) 0x80; private static final byte SEGMENT_BITS = 0x7f; private static final int MAX_VARINT_LEN = 5; // Enough for u32 - - // Simple cache for values 0-1023 - private static final int CACHE_SIZE = 1024; - private static final int[] ENCODED_LENGTHS = new int[CACHE_SIZE]; - - static { - // Pre-compute encoded lengths for cached values - for (int i = 0; i < CACHE_SIZE; i++) { - long val = Integer.toUnsignedLong(i); - int length = 1; - while (val >= 0x80) { - val >>>= 7; - length++; - } - ENCODED_LENGTHS[i] = length; - } - } - + + private VarInt() {} // utility class + + /** * Encode a 32-bit unsigned integer as a VarInt into the given ByteBuffer. * @param value the value to encode (treated as unsigned) @@ -46,7 +29,7 @@ public final class VarInt { public static void encode(int value, ByteBuffer buffer) { // Convert to unsigned long for proper bit manipulation long val = Integer.toUnsignedLong(value); - + // Encode at least one byte, then continue while value has more bits do { byte b = (byte) (val & SEGMENT_BITS); @@ -57,7 +40,8 @@ public static void encode(int value, ByteBuffer buffer) { buffer.put(b); } while (val != 0); } - + + /** * Decode a VarInt from a ByteBuffer. * @param buffer the buffer to decode from @@ -68,58 +52,58 @@ public static DecodeResult decode(ByteBuffer buffer) throws ImprintException { long result = 0; int shift = 0; int bytesRead = 0; - + while (true) { if (bytesRead >= MAX_VARINT_LEN) { throw new ImprintException(ErrorType.MALFORMED_VARINT, "VarInt too long"); } if (!buffer.hasRemaining()) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Unexpected end of data while reading VarInt"); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Unexpected end of data while reading VarInt"); } - + byte b = buffer.get(); bytesRead++; - + // Check if adding these 7 bits would overflow long segment = b & SEGMENT_BITS; if (shift >= 32 || (shift == 28 && segment > 0xF)) { throw new ImprintException(ErrorType.MALFORMED_VARINT, "VarInt overflow"); } - + // Add the bottom 7 bits to the result result |= segment << shift; - + // If the high bit is not set, this is the last byte if ((b & CONTINUATION_BIT) == 0) { break; } - + shift += 7; } - + return new DecodeResult((int) result, bytesRead); } - + /** * Calculate the number of bytes needed to encode the given value as a VarInt. * @param value the value to encode (treated as unsigned) * @return the number of bytes needed */ public static int encodedLength(int value) { - if (value >= 0 && value < CACHE_SIZE) { - return ENCODED_LENGTHS[value]; - } - + // Convert to unsigned long for proper bit manipulation long val = Integer.toUnsignedLong(value); int length = 1; + + // Count additional bytes needed for values >= 128 while (val >= 0x80) { val >>>= 7; length++; } + return length; } - + /** * Result of a VarInt decode operation. */ diff --git a/src/test/java/com/imprint/ByteBufferIntegrationTest.java b/src/test/java/com/imprint/ByteBufferIntegrationTest.java new file mode 100644 index 0000000..9460cbf --- /dev/null +++ b/src/test/java/com/imprint/ByteBufferIntegrationTest.java @@ -0,0 +1,87 @@ +package com.imprint; + +import com.imprint.core.*; +import com.imprint.types.*; +import com.imprint.error.ImprintException; +import java.nio.ByteBuffer; +import java.util.*; + +/** + * Integration test to verify ByteBuffer functionality and zero-copy benefits. + */ +public class ByteBufferIntegrationTest { + + public static void main(String[] args) { + try { + testByteBufferFunctionality(); + testZeroCopy(); + System.out.println("All ByteBuffer integration tests passed!"); + } catch (Exception e) { + System.err.println("ByteBuffer integration test failed: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + } + + static void testByteBufferFunctionality() throws ImprintException { + System.out.println("Testing ByteBuffer functionality..."); + + SchemaId schemaId = new SchemaId(1, 0xdeadbeef); + ImprintWriter writer = new ImprintWriter(schemaId); + + writer.addField(1, Value.fromInt32(42)) + .addField(2, Value.fromString("zero-copy test")) + .addField(3, Value.fromBytes(new byte[]{1, 2, 3, 4, 5})); + + ImprintRecord record = writer.build(); + + // Test ByteBuffer serialization + ByteBuffer serializedBuffer = record.serializeToBuffer(); + assert serializedBuffer.isReadOnly() : "Serialized buffer should be read-only"; + + // Test deserialization from ByteBuffer + ImprintRecord deserialized = ImprintRecord.deserialize(serializedBuffer); + + assert deserialized.getValue(1).get().equals(Value.fromInt32(42)); + assert deserialized.getValue(2).get().equals(Value.fromString("zero-copy test")); + + // Test raw bytes access returns ByteBuffer + Optional rawBytes = deserialized.getRawBytes(1); + assert rawBytes.isPresent() : "Raw bytes should be present for field 1"; + assert rawBytes.get().isReadOnly() : "Raw bytes buffer should be read-only"; + + System.out.println("ByteBuffer functionality test passed"); + } + + static void testZeroCopy() { + System.out.println("Testing zero-copy"); + + // Create a large payload to demonstrate zero-copy benefits + byte[] largePayload = new byte[1024 * 1024]; // 1MB + Arrays.fill(largePayload, (byte) 0xAB); + + SchemaId schemaId = new SchemaId(2, 0xcafebabe); + ImprintWriter writer = new ImprintWriter(schemaId); + + try { + writer.addField(1, Value.fromBytes(largePayload)); + ImprintRecord record = writer.build(); + + // Test that getRawBytes returns a view, not a copy + Optional rawBytes = record.getRawBytes(1); + assert rawBytes.isPresent() : "Raw bytes should be present"; + + ByteBuffer rawBuffer = rawBytes.get(); + assert rawBuffer.isReadOnly() : "Raw buffer should be read-only"; + + // The buffer should be positioned at the start of the actual data + // (after the VarInt length prefix) + assert rawBuffer.remaining() > largePayload.length : "Buffer should include length prefix"; + + System.out.println("Zero-copy benefits test passed"); + + } catch (ImprintException e) { + throw new RuntimeException("Failed zero-copy test", e); + } + } +} \ No newline at end of file diff --git a/src/test/java/com/imprint/ComprehensiveTest.java b/src/test/java/com/imprint/ComprehensiveTest.java new file mode 100644 index 0000000..af7f0b0 --- /dev/null +++ b/src/test/java/com/imprint/ComprehensiveTest.java @@ -0,0 +1,208 @@ +package com.imprint; + +import com.imprint.core.*; +import com.imprint.types.*; +import com.imprint.error.ImprintException; +import com.imprint.util.VarInt; +import java.nio.ByteBuffer; +import java.util.*; + +/** + * Comprehensive test to verify all functionality works correctly. + */ +public class ComprehensiveTest { + + public static void main(String[] args) { + try { + testVarIntFunctionality(); + testValueTypes(); + testMapKeys(); + testComplexSerialization(); + testErrorHandling(); + testByteBufferPerformance(); + System.out.println("All comprehensive tests passed!"); + } catch (Exception e) { + System.err.println("Comprehensive test failed: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + } + + static void testVarIntFunctionality() throws ImprintException { + System.out.println("Testing VarInt functionality..."); + + // Test encoding/decoding of various values + int[] testValues = {0, 1, 127, 128, 16383, 16384, Integer.MAX_VALUE}; + + for (int value : testValues) { + ByteBuffer buffer = ByteBuffer.allocate(10); + VarInt.encode(value, buffer); + int encodedLength = buffer.position(); + + buffer.flip(); + VarInt.DecodeResult result = VarInt.decode(buffer); + + assert result.getValue() == value : "VarInt roundtrip failed for " + value; + assert result.getBytesRead() == encodedLength : "Bytes read mismatch for " + value; + } + + System.out.println("✓ VarInt functionality test passed"); + } + + static void testValueTypes() { + System.out.println("Testing Value types"); + + // Test all value types + Value nullVal = Value.nullValue(); + Value boolVal = Value.fromBoolean(true); + Value int32Val = Value.fromInt32(42); + Value int64Val = Value.fromInt64(123456789L); + Value float32Val = Value.fromFloat32(3.14f); + Value float64Val = Value.fromFloat64(2.718281828); + Value bytesVal = Value.fromBytes(new byte[]{1, 2, 3, 4}); + Value stringVal = Value.fromString("test"); + + // Test type codes + assert nullVal.getTypeCode() == TypeCode.NULL; + assert boolVal.getTypeCode() == TypeCode.BOOL; + assert int32Val.getTypeCode() == TypeCode.INT32; + assert int64Val.getTypeCode() == TypeCode.INT64; + assert float32Val.getTypeCode() == TypeCode.FLOAT32; + assert float64Val.getTypeCode() == TypeCode.FLOAT64; + assert bytesVal.getTypeCode() == TypeCode.BYTES; + assert stringVal.getTypeCode() == TypeCode.STRING; + + // Test value extraction + assert ((Value.BoolValue) boolVal).getValue(); + assert ((Value.Int32Value) int32Val).getValue() == 42; + assert ((Value.Int64Value) int64Val).getValue() == 123456789L; + assert ((Value.Float32Value) float32Val).getValue() == 3.14f; + assert ((Value.Float64Value) float64Val).getValue() == 2.718281828; + assert Arrays.equals(((Value.BytesValue) bytesVal).getValue(), new byte[]{1, 2, 3, 4}); + assert ((Value.StringValue) stringVal).getValue().equals("test"); + + System.out.println("✓ Value types test passed"); + } + + static void testMapKeys() throws ImprintException { + System.out.println("Testing MapKey functionality..."); + + MapKey int32Key = MapKey.fromInt32(42); + MapKey int64Key = MapKey.fromInt64(123L); + MapKey bytesKey = MapKey.fromBytes(new byte[]{1, 2, 3}); + MapKey stringKey = MapKey.fromString("test"); + + // Test conversion to/from Values + Value int32Value = int32Key.toValue(); + Value int64Value = int64Key.toValue(); + Value bytesValue = bytesKey.toValue(); + Value stringValue = stringKey.toValue(); + + assert MapKey.fromValue(int32Value).equals(int32Key); + assert MapKey.fromValue(int64Value).equals(int64Key); + assert MapKey.fromValue(bytesValue).equals(bytesKey); + assert MapKey.fromValue(stringValue).equals(stringKey); + + System.out.println("✓ MapKey functionality test passed"); + } + + static void testComplexSerialization() throws ImprintException { + System.out.println("Testing complex serialization..."); + + SchemaId schemaId = new SchemaId(1, 0xdeadbeef); + ImprintWriter writer = new ImprintWriter(schemaId); + + // Create complex nested structure + List array = Arrays.asList( + Value.fromInt32(1), + Value.fromInt32(2), + Value.fromInt32(3) + ); + + Map map = new HashMap<>(); + map.put(MapKey.fromString("key1"), Value.fromString("value1")); + map.put(MapKey.fromString("key2"), Value.fromString("value2")); + + writer.addField(1, Value.fromArray(array)) + .addField(2, Value.fromMap(map)) + .addField(3, Value.fromString("complex test")); + + ImprintRecord record = writer.build(); + + // Test ByteBuffer serialization + ByteBuffer serialized = record.serializeToBuffer(); + ImprintRecord deserialized = ImprintRecord.deserialize(serialized); + + // Verify complex structures + Value deserializedArray = deserialized.getValue(1).get(); + assert deserializedArray instanceof Value.ArrayValue; + List deserializedList = ((Value.ArrayValue) deserializedArray).getValue(); + assert deserializedList.size() == 3; + assert deserializedList.get(0).equals(Value.fromInt32(1)); + + Value deserializedMap = deserialized.getValue(2).get(); + assert deserializedMap instanceof Value.MapValue; + Map deserializedMapValue = ((Value.MapValue) deserializedMap).getValue(); + assert deserializedMapValue.size() == 2; + assert deserializedMapValue.get(MapKey.fromString("key1")).equals(Value.fromString("value1")); + + System.out.println("✓ Complex serialization test passed"); + } + + static void testErrorHandling() { + System.out.println("Testing error handling..."); + + try { + // Test invalid type code + TypeCode.fromByte((byte) 0xFF); + assert false : "Should have thrown exception for invalid type code"; + } catch (ImprintException e) { + assert e.getErrorType() == com.imprint.error.ErrorType.INVALID_TYPE_CODE; + } + + try { + // Test invalid magic byte + byte[] invalidData = new byte[15]; + invalidData[0] = 0x00; // wrong magic + ImprintRecord.deserialize(invalidData); + assert false : "Should have thrown exception for invalid magic"; + } catch (ImprintException e) { + assert e.getErrorType() == com.imprint.error.ErrorType.INVALID_MAGIC; + } + + System.out.println("✓ Error handling test passed"); + } + + static void testByteBufferPerformance() throws ImprintException { + System.out.println("Testing ByteBuffer performance benefits..."); + + // Create a record with moderate-sized data + byte[] testData = new byte[1024]; + for (int i = 0; i < testData.length; i++) { + testData[i] = (byte) (i % 256); + } + + SchemaId schemaId = new SchemaId(1, 0x12345678); + ImprintWriter writer = new ImprintWriter(schemaId); + writer.addField(1, Value.fromBytes(testData)) + .addField(2, Value.fromString("performance test")); + + ImprintRecord record = writer.build(); + + // Test that raw bytes access is zero-copy + Optional rawBytes = record.getRawBytes(1); + assert rawBytes.isPresent(); + assert rawBytes.get().isReadOnly(); + + // Test ByteBuffer serialization + ByteBuffer serialized = record.serializeToBuffer(); + assert serialized.isReadOnly(); + + // Verify deserialization works + ImprintRecord deserialized = ImprintRecord.deserialize(serialized); + Value retrievedBytes = deserialized.getValue(1).get(); + assert Arrays.equals(((Value.BytesValue) retrievedBytes).getValue(), testData); + + System.out.println("✓ ByteBuffer performance test passed"); + } +} \ No newline at end of file diff --git a/src/test/java/com/imprint/IntegrationTest.java b/src/test/java/com/imprint/IntegrationTest.java index ee1d426..49cfce7 100644 --- a/src/test/java/com/imprint/IntegrationTest.java +++ b/src/test/java/com/imprint/IntegrationTest.java @@ -2,625 +2,143 @@ import com.imprint.core.*; import com.imprint.types.*; -import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.DisplayName; - import java.util.*; -import static org.junit.jupiter.api.Assertions.*; - /** - * Integration tests for Imprint core functionality. + * Integration test to verify the complete Java implementation works. + * This can be run as a simple main method without JUnit. */ public class IntegrationTest { - - @Test - @DisplayName("Basic functionality: create, serialize, deserialize primitive types") - void testBasicFunctionality() throws ImprintException { + + public static void main(String[] args) { + try { + testBasicFunctionality(); + testArraysAndMaps(); + testNestedRecords(); + System.out.println("All integration tests passed!"); + } catch (Exception e) { + System.err.println("Integration test failed: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + } + + static void testBasicFunctionality() throws ImprintException { + System.out.println("Testing basic functionality..."); + SchemaId schemaId = new SchemaId(1, 0xdeadbeef); - var record = ImprintRecord.builder(schemaId) - .field(1, 42) - .field(2, "testing java imprint spec") - .field(3, true) - .field(4, 3.14159) // double - .field(5, new byte[]{1, 2, 3, 4}) - .build(); - - // Verify we can read values back using type getters - assertEquals(42, record.getInt32(1)); - assertEquals("testing java imprint spec", record.getString(2)); - assertTrue(record.getBoolean(3)); - assertEquals(3.14159, record.getFloat64(4)); - assertArrayEquals(new byte[]{1,2,3,4}, record.getBytes(5)); - - assertNull(record.getValue(999), "Non-existent field should return null from getValue()"); - assertThrows(ImprintException.class, () -> record.getInt32(999), "Accessing non-existent field with getInt32 should throw"); - + ImprintWriter writer = new ImprintWriter(schemaId); + + writer.addField(1, Value.fromInt32(42)) + .addField(2, Value.fromString("testing java imprint spec")) + .addField(3, Value.fromBoolean(true)) + .addField(4, Value.fromFloat64(3.14159)) + .addField(5, Value.fromBytes(new byte[]{1, 2, 3, 4})); + + ImprintRecord record = writer.build(); + + // Verify we can read values back + assert record.getValue(1).get().equals(Value.fromInt32(42)); + assert record.getValue(2).get().equals(Value.fromString("testing java imprint spec")); + assert record.getValue(3).get().equals(Value.fromBoolean(true)); + assert record.getValue(999).isEmpty(); // non-existent field + // Test serialization round-trip var buffer = record.serializeToBuffer(); byte[] serialized = new byte[buffer.remaining()]; buffer.get(serialized); - var deserialized = ImprintRecord.deserialize(serialized); - - assertEquals(42, deserialized.getInt32(1)); - assertEquals("testing java imprint spec", deserialized.getString(2)); - assertTrue(deserialized.getBoolean(3)); - assertEquals(3.14159, deserialized.getFloat64(4)); - assertArrayEquals(new byte[]{1,2,3,4}, deserialized.getBytes(5)); - } - - @Test - @DisplayName("Collections: create, serialize, deserialize arrays and maps") - void testArraysAndMaps() throws ImprintException { + ImprintRecord deserialized = ImprintRecord.deserialize(serialized); + + assert deserialized.getValue(1).get().equals(Value.fromInt32(42)); + assert deserialized.getValue(2).get().equals(Value.fromString("testing java imprint spec")); + assert deserialized.getValue(3).get().equals(Value.fromBoolean(true)); + + System.out.println("✓ Basic functionality test passed"); + } + + static void testArraysAndMaps() throws ImprintException { + System.out.println("Testing arrays and maps..."); + SchemaId schemaId = new SchemaId(2, 0xcafebabe); - - // Create an array using builder for convenience - List sourceIntList = Arrays.asList(1, 2, 3); - + ImprintWriter writer = new ImprintWriter(schemaId); + + // Create an array + List intArray = Arrays.asList( + Value.fromInt32(1), + Value.fromInt32(2), + Value.fromInt32(3) + ); + // Create a map - Map sourceStringToIntMap = new HashMap<>(); - sourceStringToIntMap.put("one", 1); - sourceStringToIntMap.put("two", 2); - var record = ImprintRecord.builder(schemaId) - .field(1, sourceIntList) // Builder converts List to List - .field(2, sourceStringToIntMap) // Builder converts Map - .build(); - + Map stringToIntMap = new HashMap<>(); + stringToIntMap.put(MapKey.fromString("one"), Value.fromInt32(1)); + stringToIntMap.put(MapKey.fromString("two"), Value.fromInt32(2)); + + writer.addField(1, Value.fromArray(intArray)) + .addField(2, Value.fromMap(stringToIntMap)); + + ImprintRecord record = writer.build(); + // Test serialization round-trip var buffer = record.serializeToBuffer(); byte[] serialized = new byte[buffer.remaining()]; buffer.get(serialized); ImprintRecord deserialized = ImprintRecord.deserialize(serialized); - + // Verify array - List deserializedArray = deserialized.getArray(1); - assertNotNull(deserializedArray); - assertEquals(3, deserializedArray.size()); - assertEquals(Value.fromInt32(1), deserializedArray.get(0)); - assertEquals(Value.fromInt32(2), deserializedArray.get(1)); - assertEquals(Value.fromInt32(3), deserializedArray.get(2)); - + Value arrayValue = deserialized.getValue(1).get(); + assert arrayValue instanceof Value.ArrayValue; + List deserializedArray = ((Value.ArrayValue) arrayValue).getValue(); + assert deserializedArray.size() == 3; + assert deserializedArray.get(0).equals(Value.fromInt32(1)); + // Verify map - Map deserializedMap = deserialized.getMap(2); - assertNotNull(deserializedMap); - assertEquals(2, deserializedMap.size()); - assertEquals(Value.fromInt32(1), deserializedMap.get(MapKey.fromString("one"))); - assertEquals(Value.fromInt32(2), deserializedMap.get(MapKey.fromString("two"))); - } - - @Test - @DisplayName("Nested Records: create, serialize, deserialize records within records") - void testNestedRecords() throws ImprintException { - var innerSchemaId = new SchemaId(3, 0x12345678); - var innerRecord = ImprintRecord.builder(innerSchemaId) - .field(1, "nested data") - .field(2, 9876543210L) - .build(); - - var outerSchemaId = new SchemaId(4, 0x87654321); - var outerRecord = ImprintRecord.builder(outerSchemaId) - .field(1, innerRecord) // Builder handles ImprintRecord directly - .field(2, "outer data") - .build(); - + Value mapValue = deserialized.getValue(2).get(); + assert mapValue instanceof Value.MapValue; + Map deserializedMap = ((Value.MapValue) mapValue).getValue(); + assert deserializedMap.size() == 2; + assert deserializedMap.get(MapKey.fromString("one")).equals(Value.fromInt32(1)); + + System.out.println("✓ Arrays and maps test passed"); + } + + static void testNestedRecords() throws ImprintException { + System.out.println("Testing nested records..."); + + // Create inner record + SchemaId innerSchemaId = new SchemaId(3, 0x12345678); + ImprintWriter innerWriter = new ImprintWriter(innerSchemaId); + innerWriter.addField(1, Value.fromString("nested data")) + .addField(2, Value.fromInt64(9876543210L)); + ImprintRecord innerRecord = innerWriter.build(); + + // Create outer record + SchemaId outerSchemaId = new SchemaId(4, 0x87654321); + ImprintWriter outerWriter = new ImprintWriter(outerSchemaId); + outerWriter.addField(1, Value.fromRow(innerRecord)) + .addField(2, Value.fromString("outer data")); + ImprintRecord outerRecord = outerWriter.build(); + + // Test serialization round-trip var buffer = outerRecord.serializeToBuffer(); byte[] serialized = new byte[buffer.remaining()]; buffer.get(serialized); - var deserialized = ImprintRecord.deserialize(serialized); - - assertEquals(4, deserialized.getHeader().getSchemaId().getFieldSpaceId()); - assertEquals("outer data", deserialized.getString(2)); - - var nestedDeserialized = deserialized.getRow(1); - assertNotNull(nestedDeserialized); - assertEquals(3, nestedDeserialized.getHeader().getSchemaId().getFieldSpaceId()); - assertEquals("nested data", nestedDeserialized.getString(1)); - assertEquals(9876543210L, nestedDeserialized.getInt64(2)); - } - - @Test - @DisplayName("Project: subset of fields with serialization round-trip") - void testProjectSubsetWithSerialization() throws ImprintException { - var schemaId = new SchemaId(10, 0xabcd1234); - var originalRecord = ImprintRecord.builder(schemaId) - .field(1, 100) - .field(2, "keep this field") - .field(3, false) - .field(4, "remove this field") - .field(5, 42.5) - .field(6, new byte[]{9, 8, 7}) - .build(); - - // Project fields 1, 2, 5 (skip 3, 4, 6) - var projected = originalRecord.project(1, 2, 5); - - assertEquals(3, projected.getDirectory().size()); - assertEquals(100, projected.getInt32(1)); - assertEquals("keep this field", projected.getString(2)); - assertEquals(42.5, projected.getFloat64(5)); - - // Verify missing fields - assertNull(projected.getValue(3)); - assertNull(projected.getValue(4)); - assertNull(projected.getValue(6)); - - // Test serialization round-trip of projected record - var buffer = projected.serializeToBuffer(); - byte[] serialized = new byte[buffer.remaining()]; - buffer.get(serialized); - var deserialized = ImprintRecord.deserialize(serialized); - - assertEquals(3, deserialized.getDirectory().size()); - assertEquals(100, deserialized.getInt32(1)); - assertEquals("keep this field", deserialized.getString(2)); - assertEquals(42.5, deserialized.getFloat64(5)); - } - - @Test - @DisplayName("Project: complex data types (arrays, maps, nested records)") - void testProjectComplexTypes() throws ImprintException { - var schemaId = new SchemaId(11, 0xbeef4567); - - // Create nested record - var nestedRecord = ImprintRecord.builder(new SchemaId(12, 0x11111111)) - .field(100, "nested value") - .build(); - - // Create homogeneous array (all strings) - var testArray = Arrays.asList(Value.fromString("item1"), Value.fromString("item2"), Value.fromString("item3")); - - // Create homogeneous map (string keys -> string values) - var testMap = new HashMap(); - testMap.put(MapKey.fromString("key1"), Value.fromString("value1")); - testMap.put(MapKey.fromString("key2"), Value.fromString("value2")); - - var originalRecord = ImprintRecord.builder(schemaId) - .field(1, "simple string") - .field(2, Value.fromArray(testArray)) - .field(3, Value.fromMap(testMap)) - .field(4, nestedRecord) - .field(5, 999L) - .build(); - - // Project only complex types - var projected = originalRecord.project(2, 3, 4); - - assertEquals(3, projected.getDirectory().size()); - - // Verify array projection (homogeneous strings) - var projectedArray = projected.getArray(2); - assertEquals(3, projectedArray.size()); - assertEquals(Value.fromString("item1"), projectedArray.get(0)); - assertEquals(Value.fromString("item2"), projectedArray.get(1)); - assertEquals(Value.fromString("item3"), projectedArray.get(2)); - - // Verify map projection (string -> string) - var projectedMap = projected.getMap(3); - assertEquals(2, projectedMap.size()); - assertEquals(Value.fromString("value1"), projectedMap.get(MapKey.fromString("key1"))); - assertEquals(Value.fromString("value2"), projectedMap.get(MapKey.fromString("key2"))); - - // Verify nested record projection - var projectedNested = projected.getRow(4); - assertEquals("nested value", projectedNested.getString(100)); - - // Verify excluded fields - assertNull(projected.getValue(1)); - assertNull(projected.getValue(5)); - } - - @Test - @DisplayName("Merge: distinct fields with serialization round-trip") - void testMergeDistinctFieldsWithSerialization() throws ImprintException { - var schemaId = new SchemaId(20, 0xcafe5678); - - var record1 = ImprintRecord.builder(schemaId) - .field(1, 100) - .field(3, "from record1") - .field(5, true) - .build(); - - var record2 = ImprintRecord.builder(schemaId) - .field(2, 200L) - .field(4, "from record2") - .field(6, 3.14f) - .build(); - - var merged = record1.merge(record2); - - assertEquals(6, merged.getDirectory().size()); - assertEquals(100, merged.getInt32(1)); - assertEquals(200L, merged.getInt64(2)); - assertEquals("from record1", merged.getString(3)); - assertEquals("from record2", merged.getString(4)); - assertTrue(merged.getBoolean(5)); - assertEquals(3.14f, merged.getFloat32(6)); - - // Test serialization round-trip of merged record - var buffer = merged.serializeToBuffer(); - byte[] serialized = new byte[buffer.remaining()]; - buffer.get(serialized); - var deserialized = ImprintRecord.deserialize(serialized); - - assertEquals(6, deserialized.getDirectory().size()); - assertEquals(100, deserialized.getInt32(1)); - assertEquals(200L, deserialized.getInt64(2)); - assertEquals("from record1", deserialized.getString(3)); - assertEquals("from record2", deserialized.getString(4)); - assertTrue(deserialized.getBoolean(5)); - assertEquals(3.14f, deserialized.getFloat32(6)); - } - - @Test - @DisplayName("Merge: overlapping fields - first record wins") - void testMergeOverlappingFields() throws ImprintException { - var schemaId = new SchemaId(21, 0xdead9876); - - var record1 = ImprintRecord.builder(schemaId) - .field(1, "first wins") - .field(2, 100) - .field(4, true) - .build(); - - var record2 = ImprintRecord.builder(schemaId) - .field(1, "second loses") // Overlapping field - .field(2, 999) // Overlapping field - .field(3, "unique to second") - .field(4, false) // Overlapping field - .build(); - - var merged = record1.merge(record2); - - assertEquals(4, merged.getDirectory().size()); - assertEquals("first wins", merged.getString(1)); // First record wins - assertEquals(100, merged.getInt32(2)); // First record wins - assertEquals("unique to second", merged.getString(3)); // Only in second - assertTrue(merged.getBoolean(4)); // First record wins - } - - @Test - @DisplayName("Merge: complex data types and nested records") - void testMergeComplexTypes() throws ImprintException { - var schemaId = new SchemaId(22, 0xbeef1111); - - // Create nested records for both - var nested1 = ImprintRecord.builder(new SchemaId(23, 0x22222222)) - .field(100, "nested in record1") - .build(); - - var nested2 = ImprintRecord.builder(new SchemaId(24, 0x33333333)) - .field(200, "nested in record2") - .build(); - - // Create arrays - var array1 = Arrays.asList(Value.fromString("array1_item1"), Value.fromString("array1_item2")); - var array2 = Arrays.asList(Value.fromInt32(10), Value.fromInt32(20)); - - // Create maps - var map1 = new HashMap(); - map1.put(MapKey.fromString("map1_key"), Value.fromString("map1_value")); - - var map2 = new HashMap(); - map2.put(MapKey.fromInt32(42), Value.fromBoolean(true)); - - var record1 = ImprintRecord.builder(schemaId) - .field(1, nested1) - .field(3, Value.fromArray(array1)) - .field(5, Value.fromMap(map1)) - .build(); - - var record2 = ImprintRecord.builder(schemaId) - .field(2, nested2) - .field(4, Value.fromArray(array2)) - .field(6, Value.fromMap(map2)) - .build(); - - var merged = record1.merge(record2); - - assertEquals(6, merged.getDirectory().size()); - - // Verify nested records - var mergedNested1 = merged.getRow(1); - assertEquals("nested in record1", mergedNested1.getString(100)); - - var mergedNested2 = merged.getRow(2); - assertEquals("nested in record2", mergedNested2.getString(200)); - - // Verify arrays - var mergedArray1 = merged.getArray(3); - assertEquals(2, mergedArray1.size()); - assertEquals(Value.fromString("array1_item1"), mergedArray1.get(0)); - - var mergedArray2 = merged.getArray(4); - assertEquals(2, mergedArray2.size()); - assertEquals(Value.fromInt32(10), mergedArray2.get(0)); - - // Verify maps - var mergedMap1 = merged.getMap(5); - assertEquals(Value.fromString("map1_value"), mergedMap1.get(MapKey.fromString("map1_key"))); - - var mergedMap2 = merged.getMap(6); - assertEquals(Value.fromBoolean(true), mergedMap2.get(MapKey.fromInt32(42))); - } - - @Test - @DisplayName("Project and Merge: chained operations") - void testProjectAndMergeChained() throws ImprintException { - var schemaId = new SchemaId(30, 0xabcdabcd); - - // Create a large record - var fullRecord = ImprintRecord.builder(schemaId) - .field(1, "field1") - .field(2, "field2") - .field(3, "field3") - .field(4, "field4") - .field(5, "field5") - .field(6, "field6") - .build(); - - // Project different subsets - var projection1 = fullRecord.project(1, 3, 5); - var projection2 = fullRecord.project(2, 4, 6); - - assertEquals(3, projection1.getDirectory().size()); - assertEquals(3, projection2.getDirectory().size()); - - // Merge the projections back together - var recomposed = projection1.merge(projection2); - - assertEquals(6, recomposed.getDirectory().size()); - assertEquals("field1", recomposed.getString(1)); - assertEquals("field2", recomposed.getString(2)); - assertEquals("field3", recomposed.getString(3)); - assertEquals("field4", recomposed.getString(4)); - assertEquals("field5", recomposed.getString(5)); - assertEquals("field6", recomposed.getString(6)); - - // Test another chain: project the merged result - var finalProjection = recomposed.project(2, 4, 6); - assertEquals(3, finalProjection.getDirectory().size()); - assertEquals("field2", finalProjection.getString(2)); - assertEquals("field4", finalProjection.getString(4)); - assertEquals("field6", finalProjection.getString(6)); - } - - @Test - @DisplayName("Merge and Project: empty record handling") - void testMergeAndProjectEmptyRecords() throws ImprintException { - var schemaId = new SchemaId(40, 0xeeeeeeee); - - var emptyRecord = ImprintRecord.builder(schemaId).build(); - var nonEmptyRecord = ImprintRecord.builder(schemaId) - .field(1, "not empty") - .field(2, 42) - .build(); - - // Test merging with empty - var merged1 = emptyRecord.merge(nonEmptyRecord); - var merged2 = nonEmptyRecord.merge(emptyRecord); - - assertEquals(2, merged1.getDirectory().size()); - assertEquals(2, merged2.getDirectory().size()); - assertEquals("not empty", merged1.getString(1)); - assertEquals("not empty", merged2.getString(1)); - - // Test projecting empty record - var projectedEmpty = emptyRecord.project(1, 2, 3); - assertEquals(0, projectedEmpty.getDirectory().size()); - - // Test projecting non-existent fields - var projectedNonExistent = nonEmptyRecord.project(99, 100); - assertEquals(0, projectedNonExistent.getDirectory().size()); - } - - @Test - @DisplayName("Project and Merge: Large record operations") - void testLargeRecordOperations() throws ImprintException { - var schemaId = new SchemaId(50, 0xffffffff); - - // Create a record with many fields - var builder = ImprintRecord.builder(schemaId); - for (int i = 1; i <= 100; i++) { - builder.field(i, "field_" + i + "_data"); - } - var largeRecord = builder.build(); - - assertEquals(100, largeRecord.getDirectory().size()); - - // Project a subset (every 10th field) - int[] projectionFields = new int[10]; - for (int i = 0; i < 10; i++) { - projectionFields[i] = (i + 1) * 10; // 10, 20, 30, ..., 100 - } - - var projected = largeRecord.project(projectionFields); - assertEquals(10, projected.getDirectory().size()); - - for (int i = 0; i < 10; i++) { - int fieldId = (i + 1) * 10; - assertEquals("field_" + fieldId + "_data", projected.getString(fieldId)); - } - - // Create another large record for merging - var builder2 = ImprintRecord.builder(schemaId); - for (int i = 101; i <= 150; i++) { - builder2.field(i, "additional_field_" + i); - } - var additionalRecord = builder2.build(); - - // Merge the large records - var merged = largeRecord.merge(additionalRecord); - assertEquals(150, merged.getDirectory().size()); - - // Verify some values from both records - assertEquals("field_1_data", merged.getString(1)); - assertEquals("field_50_data", merged.getString(50)); - assertEquals("field_100_data", merged.getString(100)); - assertEquals("additional_field_101", merged.getString(101)); - assertEquals("additional_field_150", merged.getString(150)); - } - - private ImprintRecord createTestRecordForGetters() throws ImprintException { - SchemaId schemaId = new SchemaId(5, 0xabcdef01); - - List innerList1 = Arrays.asList(Value.fromInt32(10), Value.fromInt32(20)); - List innerList2 = Arrays.asList(Value.fromInt32(30), Value.fromInt32(40)); - List listOfLists = Arrays.asList(Value.fromArray(innerList1), Value.fromArray(innerList2)); - - Map mapWithArrayValue = new HashMap<>(); - mapWithArrayValue.put(MapKey.fromString("list1"), Value.fromArray(innerList1)); - - return ImprintRecord.builder(schemaId) - .field(1, true) - .field(2, 12345) - .field(3, 9876543210L) - .field(4, 3.14f) - .field(5, 2.718281828) - .field(6, "hello type world") - .field(7, new byte[]{10, 20, 30}) - .nullField(8) - .field(9, Value.fromArray(listOfLists)) // Array of Arrays (using Value directly for test setup) - .field(10, Value.fromMap(mapWithArrayValue)) // Map with Array value - .field(11, Collections.emptyList()) // Empty Array via builder - .field(12, Collections.emptyMap()) // Empty Map via builder - .build(); - } - - private ImprintRecord serializeAndDeserialize(ImprintRecord record) throws ImprintException { - var buffer = record.serializeToBuffer(); - byte[] serialized = new byte[buffer.remaining()]; - buffer.get(serialized); - return ImprintRecord.deserialize(serialized); - } - - @Test - @DisplayName("Type Getters: Basic primitive and String types") - void testBasicTypeGetters() throws ImprintException { - var originalRecord = createTestRecordForGetters(); - var record = serializeAndDeserialize(originalRecord); - - assertTrue(record.getBoolean(1)); - assertEquals(12345, record.getInt32(2)); - assertEquals(9876543210L, record.getInt64(3)); - assertEquals(3.14f, record.getFloat32(4)); - assertEquals(2.718281828, record.getFloat64(5)); - assertEquals("hello type world", record.getString(6)); - assertArrayEquals(new byte[]{10, 20, 30}, record.getBytes(7)); - } - - @Test - @DisplayName("Type Getters: Array of Arrays") - void testTypeGetterArrayOfArrays() throws ImprintException { - var originalRecord = createTestRecordForGetters(); - var record = serializeAndDeserialize(originalRecord); - - List arrOfArr = record.getArray(9); - assertNotNull(arrOfArr); - assertEquals(2, arrOfArr.size()); - assertInstanceOf(Value.ArrayValue.class, arrOfArr.get(0)); - Value.ArrayValue firstInnerArray = (Value.ArrayValue) arrOfArr.get(0); - assertEquals(2, firstInnerArray.getValue().size()); - assertEquals(Value.fromInt32(10), firstInnerArray.getValue().get(0)); - assertEquals(Value.fromInt32(20), firstInnerArray.getValue().get(1)); - - assertInstanceOf(Value.ArrayValue.class, arrOfArr.get(1)); - Value.ArrayValue secondInnerArray = (Value.ArrayValue) arrOfArr.get(1); - assertEquals(2, secondInnerArray.getValue().size()); - assertEquals(Value.fromInt32(30), secondInnerArray.getValue().get(0)); - assertEquals(Value.fromInt32(40), secondInnerArray.getValue().get(1)); - } - - @Test - @DisplayName("Type Getters: Map with Array Value") - void testTypeGetterMapWithArrayValue() throws ImprintException { - var originalRecord = createTestRecordForGetters(); - var record = serializeAndDeserialize(originalRecord); - - Map mapWithArr = record.getMap(10); - assertNotNull(mapWithArr); - assertEquals(1, mapWithArr.size()); - assertInstanceOf(Value.ArrayValue.class, mapWithArr.get(MapKey.fromString("list1"))); - Value.ArrayValue innerArray = (Value.ArrayValue) mapWithArr.get(MapKey.fromString("list1")); - assertNotNull(innerArray); - assertEquals(2, innerArray.getValue().size()); - assertEquals(Value.fromInt32(10), innerArray.getValue().get(0)); - } - - @Test - @DisplayName("Type Getters: Empty Collections (Array and Map)") - void testTypeGettersEmptyCollections() throws ImprintException { - var originalRecord = createTestRecordForGetters(); - var record = serializeAndDeserialize(originalRecord); - - List emptyArr = record.getArray(11); - assertNotNull(emptyArr); - assertTrue(emptyArr.isEmpty()); - - Map emptyMap = record.getMap(12); - assertNotNull(emptyMap); - assertTrue(emptyMap.isEmpty()); - } - - @Test - @DisplayName("Type Getters: Exception for Field Not Found") - void testTypeGetterExceptionFieldNotFound() throws ImprintException { - var originalRecord = createTestRecordForGetters(); - var record = serializeAndDeserialize(originalRecord); - - ImprintException ex = assertThrows(ImprintException.class, () -> record.getInt32(99)); - assertEquals(ErrorType.FIELD_NOT_FOUND, ex.getErrorType()); - } - - @Test - @DisplayName("Type Getters: Exception for Null Field accessed as primitive") - void testTypeGetterExceptionNullField() throws ImprintException { - var originalRecord = createTestRecordForGetters(); - var record = serializeAndDeserialize(originalRecord); - - ImprintException ex = assertThrows(ImprintException.class, () -> record.getString(8)); - assertEquals(ErrorType.TYPE_MISMATCH, ex.getErrorType()); // getString throws TYPE_MISMATCH for null - assertTrue(ex.getMessage().contains("Field 8 is NULL")); - - - // Also test getValue for a null field returns Value.NullValue - Value nullValueField = record.getValue(8); - assertNotNull(nullValueField); - assertInstanceOf(Value.NullValue.class, nullValueField, "Field 8 should be Value.NullValue"); - } - - @Test - @DisplayName("Type Getters: Exception for Type Mismatch") - void testTypeGetterExceptionTypeMismatch() throws ImprintException { - var originalRecord = createTestRecordForGetters(); - var record = serializeAndDeserialize(originalRecord); - - ImprintException ex = assertThrows(ImprintException.class, () -> record.getInt32(6)); // Field 6 is a String - assertEquals(ErrorType.TYPE_MISMATCH, ex.getErrorType()); - } - - @Test - @DisplayName("Type Getters: Row (Nested Record)") - void testTypeGetterRow() throws ImprintException { - var innerSchemaId = new SchemaId(6, 0x12345678); - var innerRecord = ImprintRecord.builder(innerSchemaId) - .field(101, "nested string") - .field(102, 999L) - .build(); - - var recordWithRow = ImprintRecord.builder(new SchemaId(7, 0x87654321)) - .field(201, innerRecord) // Using builder to add row - .field(202, "outer field") - .build(); - - var deserializedWithRow = serializeAndDeserialize(recordWithRow); - - var retrievedRow = deserializedWithRow.getRow(201); - assertNotNull(retrievedRow); - assertEquals(innerSchemaId, retrievedRow.getHeader().getSchemaId()); - assertEquals("nested string", retrievedRow.getString(101)); - assertEquals(999L, retrievedRow.getInt64(102)); - assertEquals("outer field", deserializedWithRow.getString(202)); + ImprintRecord deserialized = ImprintRecord.deserialize(serialized); + + // Verify outer record + assert deserialized.getHeader().getSchemaId().getFieldspaceId() == 4; + assert deserialized.getValue(2).get().equals(Value.fromString("outer data")); + + // Verify nested record + Value rowValue = deserialized.getValue(1).get(); + assert rowValue instanceof Value.RowValue; + ImprintRecord nestedRecord = ((Value.RowValue) rowValue).getValue(); + + assert nestedRecord.getHeader().getSchemaId().getFieldspaceId() == 3; + assert nestedRecord.getValue(1).get().equals(Value.fromString("nested data")); + assert nestedRecord.getValue(2).get().equals(Value.fromInt64(9876543210L)); + + System.out.println("✓ Nested records test passed"); } } \ No newline at end of file diff --git a/src/test/java/com/imprint/benchmark/ProfilerTest.java b/src/test/java/com/imprint/benchmark/ProfilerTest.java new file mode 100644 index 0000000..233d993 --- /dev/null +++ b/src/test/java/com/imprint/benchmark/ProfilerTest.java @@ -0,0 +1,226 @@ +package com.imprint.benchmark; + +import com.imprint.core.*; +import com.imprint.types.Value; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Disabled; + +import java.util.Random; + +/** + * A test designed for profiling hotspots during development. + *

+ * To use with a profiler: + * 1. Remove @Disabled annotation + * 2. Run with JProfiler, VisualVM, or async-profiler: + * - JProfiler: Attach to test JVM + * - VisualVM: jvisualvm, attach to process + * - async-profiler: java -jar async-profiler.jar -d 30 -f profile.html + * 3. Look for hotspots in CPU sampling + *

+ * Key areas to examine: + * - Object allocation (memory profiling) + * - Method call frequency (CPU sampling) + * - GC pressure (memory profiling) + * - String operations and UTF-8 encoding + * - ByteBuffer operations + */ +@Disabled("Enable manually for profiling") +public class ProfilerTest { + + private static final int ITERATIONS = 1_000_000; + private static final int RECORD_SIZE = 20; + + @Test + void profileFieldAccess() throws Exception { + System.out.println("Starting profiler test - attach profiler now..."); + Thread.sleep(5000); // Give time to attach profiler + + // Create a representative record + var record = createTestRecord(); + + System.out.println("Beginning field access profiling..."); + long start = System.nanoTime(); + + // Simulate real-world access patterns + Random random = new Random(42); + int hits = 0; + + for (int i = 0; i < ITERATIONS; i++) { + // Random field access (hotspot) + int fieldId = random.nextInt(RECORD_SIZE) + 1; + var value = record.getValue(fieldId); + if (value.isPresent()) { + hits++; + + // Trigger string decoding (potential hotspot) + if (value.get().getTypeCode() == com.imprint.types.TypeCode.STRING) { + if (value.get() instanceof Value.StringBufferValue) { + ((Value.StringBufferValue) value.get()).getValue(); + } else { + ((Value.StringValue) value.get()).getValue(); + } + } + } + + // Some raw access (zero-copy path) + if (i % 10 == 0) { + record.getRawBytes(fieldId); + } + } + + long duration = System.nanoTime() - start; + System.out.printf("Completed %,d field accesses in %.2f ms (avg: %.1f ns/op, hits: %d)%n", + ITERATIONS, duration / 1_000_000.0, (double) duration / ITERATIONS, hits); + } + + @Test + void profileSerialization() throws Exception { + System.out.println("Starting serialization profiler test..."); + Thread.sleep(3000); + + var schemaId = new SchemaId(1, 0x12345678); + + System.out.println("Beginning serialization profiling..."); + long start = System.nanoTime(); + + // Create and serialize many records (allocation hotspot) + for (int i = 0; i < 100_000; i++) { + var writer = new ImprintWriter(schemaId); + + // Add various field types + writer.addField(1, Value.fromInt32(i)) + .addField(2, Value.fromString("test-string-" + i)) + .addField(3, Value.fromFloat64(i * 3.14159)) + .addField(4, Value.fromBytes(("bytes-" + i).getBytes())); + + var record = writer.build(); + var serialized = record.serializeToBuffer(); // Potential hotspot + + // Trigger some deserialization + if (i % 1000 == 0) { + var deserialized = ImprintRecord.deserialize(serialized); + deserialized.getValue(2); // String decoding hotspot + } + } + + long duration = System.nanoTime() - start; + System.out.printf("Completed serialization test in %.2f ms%n", duration / 1_000_000.0); + } + + @Test + void profileProjection() throws Exception { + System.out.println("Starting projection profiler test..."); + Thread.sleep(3000); + + var record = createLargeRecord(); + + System.out.println("Beginning projection profiling..."); + long start = System.nanoTime(); + + // Simulate analytical workload - project subset of fields repeatedly + for (int i = 0; i < 50_000; i++) { + // Project 10 fields out of 100 (common analytical pattern) + for (int fieldId = 1; fieldId <= 10; fieldId++) { + var value = record.getValue(fieldId); + if (value.isPresent()) { + // Force materialization of string values + if (value.get().getTypeCode() == com.imprint.types.TypeCode.STRING) { + if (value.get() instanceof Value.StringBufferValue) { + ((Value.StringBufferValue) value.get()).getValue(); + } + } + } + } + } + + long duration = System.nanoTime() - start; + System.out.printf("Completed projection test in %.2f ms%n", duration / 1_000_000.0); + } + + @Test + void profileMemoryAllocation() throws Exception { + System.out.println("Starting allocation profiler test..."); + Thread.sleep(3000); + + System.out.println("Beginning allocation profiling - watch for GC events..."); + + // Force allocation pressure to reveal GC hotspots + for (int batch = 0; batch < 1000; batch++) { + for (int i = 0; i < 1000; i++) { + var schemaId = new SchemaId(batch, i); + var writer = new ImprintWriter(schemaId); + + // Create strings of varying sizes (allocation pressure) + writer.addField(1, Value.fromString("small")) + .addField(2, Value.fromString("medium-length-string-" + i)) + .addField(3, Value.fromString("very-long-string-that-will-cause-more-allocation-pressure-" + batch + "-" + i)) + .addField(4, Value.fromBytes(new byte[100 + i % 1000])); // Varying byte arrays + + var record = writer.build(); + + // Some deserialization to trigger string decoding allocations + record.getValue(2); + record.getValue(3); + } + + if (batch % 100 == 0) { + System.out.printf("Completed batch %d/1000%n", batch); + } + } + + System.out.println("Allocation test complete - check GC logs and memory profiler"); + } + + private ImprintRecord createTestRecord() throws Exception { + var schemaId = new SchemaId(1, 0xdeadbeef); + var writer = new ImprintWriter(schemaId); + + for (int i = 1; i <= RECORD_SIZE; i++) { + switch (i % 4) { + case 0: + writer.addField(i, Value.fromInt32(i * 100)); + break; + case 1: + writer.addField(i, Value.fromString("field-value-" + i)); + break; + case 2: + writer.addField(i, Value.fromFloat64(i * 3.14159)); + break; + case 3: + writer.addField(i, Value.fromBytes(("bytes-" + i).getBytes())); + break; + } + } + + return writer.build(); + } + + private ImprintRecord createLargeRecord() throws Exception { + var schemaId = new SchemaId(2, 0xcafebabe); + var writer = new ImprintWriter(schemaId); + + // Create 100 fields with realistic data + for (int i = 1; i <= 100; i++) { + switch (i % 5) { + case 0: + writer.addField(i, Value.fromInt32(i)); + break; + case 1: + writer.addField(i, Value.fromString("user-name-" + i + "@example.com")); + break; + case 2: + writer.addField(i, Value.fromString("Some longer descriptive text for field " + i + " that might represent a comment or description")); + break; + case 3: + writer.addField(i, Value.fromFloat64(i * 2.718281828)); + break; + case 4: + writer.addField(i, Value.fromBytes(String.format("binary-data-%04d", i).getBytes())); + break; + } + } + + return writer.build(); + } +} \ No newline at end of file diff --git a/src/test/java/com/imprint/core/ImprintRecordBuilderTest.java b/src/test/java/com/imprint/core/ImprintRecordBuilderTest.java new file mode 100644 index 0000000..54dcfae --- /dev/null +++ b/src/test/java/com/imprint/core/ImprintRecordBuilderTest.java @@ -0,0 +1,234 @@ +package com.imprint.core; + +import com.imprint.error.ImprintException; +import com.imprint.types.Value; +import org.junit.jupiter.api.Test; + +import java.util.*; + +import static org.assertj.core.api.Assertions.*; + +class ImprintRecordBuilderTest { + + private static final SchemaId TEST_SCHEMA = new SchemaId(1, 0x12345678); + + @Test + void shouldCreateRecordWithPrimitiveTypes() throws ImprintException { + var record = ImprintRecord.builder(TEST_SCHEMA) + .field(1, true) + .field(2, 42) + .field(3, 123L) + .field(4, 3.14f) + .field(5, 2.718) + .field(6, "hello world") + .field(7, new byte[]{1, 2, 3}) + .nullField(8) + .build(); + + assertThat(record.getHeader().getSchemaId()).isEqualTo(TEST_SCHEMA); + assertThat(record.getDirectory()).hasSize(8); + + // Verify field values + assertThat(getFieldValue(record, 1, Value.BoolValue.class).getValue()).isTrue(); + assertThat(getFieldValue(record, 2, Value.Int32Value.class).getValue()).isEqualTo(42); + assertThat(getFieldValue(record, 3, Value.Int64Value.class).getValue()).isEqualTo(123L); + assertThat(getFieldValue(record, 4, Value.Float32Value.class).getValue()).isEqualTo(3.14f); + assertThat(getFieldValue(record, 5, Value.Float64Value.class).getValue()).isEqualTo(2.718); + assertThat(getStringValue(record, 6)).isEqualTo("hello world"); + assertThat(getBytesValue(record, 7)).isEqualTo(new byte[]{1, 2, 3}); + assertThat(record.getValue(8).get()).isInstanceOf(Value.NullValue.class); + } + + @Test + void shouldCreateRecordWithCollections() throws ImprintException { + var list = List.of(1, 2, 3); + var map = Map.of("key1", 100, "key2", 200); + + var record = ImprintRecord.builder(TEST_SCHEMA) + .field(1, list) + .field(2, map) + .build(); + + // Verify array + var arrayValue = getFieldValue(record, 1, Value.ArrayValue.class); + assertThat(arrayValue.getValue()).hasSize(3); + assertThat(((Value.Int32Value) arrayValue.getValue().get(0)).getValue()).isEqualTo(1); + assertThat(((Value.Int32Value) arrayValue.getValue().get(1)).getValue()).isEqualTo(2); + assertThat(((Value.Int32Value) arrayValue.getValue().get(2)).getValue()).isEqualTo(3); + + // Verify map + var mapValue = getFieldValue(record, 2, Value.MapValue.class); + assertThat(mapValue.getValue()).hasSize(2); + } + + @Test + void shouldCreateRecordWithNestedRecord() throws ImprintException { + var nestedRecord = ImprintRecord.builder(new SchemaId(2, 0x87654321)) + .field(1, "nested") + .field(2, 999) + .build(); + + var record = ImprintRecord.builder(TEST_SCHEMA) + .field(1, "parent") + .field(2, nestedRecord) + .build(); + + var rowValue = getFieldValue(record, 2, Value.RowValue.class); + var nested = rowValue.getValue(); + assertThat(getStringValue(nested, 1)).isEqualTo("nested"); + assertThat(getFieldValue(nested, 2, Value.Int32Value.class).getValue()).isEqualTo(999); + } + + @Test + void shouldSupportConditionalFields() throws ImprintException { + boolean includeOptional = true; + String optionalValue = "optional"; + + var record = ImprintRecord.builder(TEST_SCHEMA) + .field(1, "required") + .fieldIf(includeOptional, 2, optionalValue) + .fieldIfNotNull(3, null) // Should not add field + .fieldIfNotNull(4, "not null") // Should add field + .build(); + + assertThat(record.getDirectory()).hasSize(3); // Only fields 1, 2, 4 + assertThat(getStringValue(record, 1)).isEqualTo("required"); + assertThat(getStringValue(record, 2)).isEqualTo("optional"); + assertThat(record.getValue(3)).isEmpty(); // Not added + assertThat(getStringValue(record, 4)).isEqualTo("not null"); + } + + @Test + void shouldSupportBulkOperations() throws ImprintException { + var fieldsMap = Map.of( + 1, "bulk1", + 2, 42, + 3, true + ); + + var record = ImprintRecord.builder(TEST_SCHEMA) + .fields(fieldsMap) + .field(4, "additional") + .build(); + + assertThat(record.getDirectory()).hasSize(4); + assertThat(getStringValue(record, 1)).isEqualTo("bulk1"); + assertThat(getFieldValue(record, 2, Value.Int32Value.class).getValue()).isEqualTo(42); + assertThat(getFieldValue(record, 3, Value.BoolValue.class).getValue()).isTrue(); + assertThat(getStringValue(record, 4)).isEqualTo("additional"); + } + + @Test + void shouldProvideBuilderUtilities() { + var builder = ImprintRecord.builder(TEST_SCHEMA) + .field(1, "test") + .field(2, 42); + + assertThat(builder.hasField(1)).isTrue(); + assertThat(builder.hasField(3)).isFalse(); + assertThat(builder.fieldCount()).isEqualTo(2); + assertThat(builder.fieldIds()).containsExactly(1, 2); + } + + @Test + void shouldSupportAlternativeSchemaConstructor() throws ImprintException { + var record = ImprintRecord.builder(1, 0x12345678) + .field(1, "test") + .build(); + + assertThat(record.getHeader().getSchemaId().getFieldspaceId()).isEqualTo(1); + assertThat(record.getHeader().getSchemaId().getSchemaHash()).isEqualTo(0x12345678); + } + + @Test + void shouldRoundTripThroughSerialization() throws ImprintException { + var original = ImprintRecord.builder(TEST_SCHEMA) + .field(1, "test string") + .field(2, 42) + .field(3, 3.14159) + .field(4, true) + .field(5, new byte[]{0x01, 0x02, 0x03}) + .build(); + + var serialized = original.serializeToBuffer(); + var deserialized = ImprintRecord.deserialize(serialized); + + assertThat(deserialized.getHeader().getSchemaId()).isEqualTo(TEST_SCHEMA); + assertThat(getStringValue(deserialized, 1)).isEqualTo("test string"); + assertThat(getFieldValue(deserialized, 2, Value.Int32Value.class).getValue()).isEqualTo(42); + assertThat(getFieldValue(deserialized, 3, Value.Float64Value.class).getValue()).isEqualTo(3.14159); + assertThat(getFieldValue(deserialized, 4, Value.BoolValue.class).getValue()).isTrue(); + assertThat(getBytesValue(deserialized, 5)).isEqualTo(new byte[]{0x01, 0x02, 0x03}); + } + + // Error cases + + @Test + void shouldRejectDuplicateFieldIds() { + assertThatThrownBy(() -> + ImprintRecord.builder(TEST_SCHEMA) + .field(1, "first") + .field(1, "duplicate") // Same field ID + ).isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Field ID 1 already exists"); + } + + @Test + void shouldRejectEmptyRecord() { + assertThatThrownBy(() -> + ImprintRecord.builder(TEST_SCHEMA).build() + ).isInstanceOf(ImprintException.class) + .hasMessageContaining("Cannot build empty record"); + } + + @Test + void shouldRejectInvalidMapKeys() { + var mapWithInvalidKey = Map.of(3.14, "value"); // Double key not supported + + assertThatThrownBy(() -> + ImprintRecord.builder(TEST_SCHEMA) + .field(1, mapWithInvalidKey) + ).isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Invalid map key type: Double"); + } + + @Test + void shouldRejectNullValueWithoutExplicitNullField() { + assertThatThrownBy(() -> + ImprintRecord.builder(TEST_SCHEMA) + .field(1, (Value) null) + ).isInstanceOf(NullPointerException.class) + .hasMessageContaining("Value cannot be null - use nullField()"); + } + + // Helper methods for cleaner test assertions + + private T getFieldValue(ImprintRecord record, int fieldId, Class valueType) throws ImprintException { + var value = record.getValue(fieldId); + assertThat(value).isPresent(); + assertThat(value.get()).isInstanceOf(valueType); + return valueType.cast(value.get()); + } + + private String getStringValue(ImprintRecord record, int fieldId) throws ImprintException { + var value = record.getValue(fieldId).get(); + if (value instanceof Value.StringValue) { + return ((Value.StringValue) value).getValue(); + } else if (value instanceof Value.StringBufferValue) { + return ((Value.StringBufferValue) value).getValue(); + } else { + throw new AssertionError("Expected string value, got: " + value.getClass()); + } + } + + private byte[] getBytesValue(ImprintRecord record, int fieldId) throws ImprintException { + var value = record.getValue(fieldId).get(); + if (value instanceof Value.BytesValue) { + return ((Value.BytesValue) value).getValue(); + } else if (value instanceof Value.BytesBufferValue) { + return ((Value.BytesBufferValue) value).getValue(); + } else { + throw new AssertionError("Expected bytes value, got: " + value.getClass()); + } + } +} \ No newline at end of file diff --git a/src/test/java/com/imprint/core/ImprintRecordTest.java b/src/test/java/com/imprint/core/ImprintRecordTest.java index 3e37473..0772580 100644 --- a/src/test/java/com/imprint/core/ImprintRecordTest.java +++ b/src/test/java/com/imprint/core/ImprintRecordTest.java @@ -34,20 +34,20 @@ var record = writer.build(); assertThat(record.getHeader().getSchemaId()).isEqualTo(schemaId); assertThat(record.getDirectory()).hasSize(2); - Value field1 = record.getValue(1); - Value field2 = record.getValue(2); + Optional field1 = record.getValue(1); + Optional field2 = record.getValue(2); - assertThat(field1).isNotNull(); - assertThat(field1).isInstanceOf(Value.Int32Value.class); - assertThat(((Value.Int32Value) field1).getValue()).isEqualTo(42); + assertThat(field1).isPresent(); + assertThat(field1.get()).isInstanceOf(Value.Int32Value.class); + assertThat(((Value.Int32Value) field1.get()).getValue()).isEqualTo(42); - assertThat(field2).isNotNull(); - assertThat(field2.getTypeCode()).isEqualTo(com.imprint.types.TypeCode.STRING); - String stringValue = getStringValue(field2); + assertThat(field2).isPresent(); + assertThat(field2.get().getTypeCode()).isEqualTo(com.imprint.types.TypeCode.STRING); + String stringValue = getStringValue(field2.get()); assertThat(stringValue).isEqualTo("hello"); - // Non-existent field should return null - assertThat(record.getValue(999)).isNull(); + // Non-existent field should return empty + assertThat(record.getValue(999)).isEmpty(); } @Test @@ -73,22 +73,22 @@ void shouldRoundtripThroughSerialization() throws ImprintException { var deserialized = ImprintRecord.deserialize(serialized); // Verify metadata - assertThat(deserialized.getHeader().getSchemaId().getFieldSpaceId()).isEqualTo(1); + assertThat(deserialized.getHeader().getSchemaId().getFieldspaceId()).isEqualTo(1); assertThat(deserialized.getHeader().getSchemaId().getSchemaHash()).isEqualTo(0xdeadbeef); assertThat(deserialized.getDirectory()).hasSize(8); // Verify all values - assertThat(deserialized.getValue(1)).isEqualTo(Value.nullValue()); - assertThat(deserialized.getValue(2)).isEqualTo(Value.fromBoolean(true)); - assertThat(deserialized.getValue(3)).isEqualTo(Value.fromInt32(42)); - assertThat(deserialized.getValue(4)).isEqualTo(Value.fromInt64(123456789L)); - assertThat(deserialized.getValue(5)).isEqualTo(Value.fromFloat32(3.14f)); - assertThat(deserialized.getValue(6)).isEqualTo(Value.fromFloat64(2.718281828)); - assertThat(deserialized.getValue(7)).isEqualTo(Value.fromBytes(new byte[]{1, 2, 3, 4})); - assertThat(deserialized.getValue(8)).isEqualTo(Value.fromString("test string")); + assertThat(deserialized.getValue(1)).contains(Value.nullValue()); + assertThat(deserialized.getValue(2)).contains(Value.fromBoolean(true)); + assertThat(deserialized.getValue(3)).contains(Value.fromInt32(42)); + assertThat(deserialized.getValue(4)).contains(Value.fromInt64(123456789L)); + assertThat(deserialized.getValue(5)).contains(Value.fromFloat32(3.14f)); + assertThat(deserialized.getValue(6)).contains(Value.fromFloat64(2.718281828)); + assertThat(deserialized.getValue(7)).contains(Value.fromBytes(new byte[]{1, 2, 3, 4})); + assertThat(deserialized.getValue(8)).contains(Value.fromString("test string")); // Non-existent field - assertThat(deserialized.getValue(999)).isNull(); + assertThat(deserialized.getValue(999)).isEmpty(); } @Test @@ -111,11 +111,11 @@ void shouldHandleArrays() throws ImprintException { buffer.get(serialized); var deserialized = ImprintRecord.deserialize(serialized); - Value arrayValue = deserialized.getValue(1); - assertThat(arrayValue).isNotNull(); - assertThat(arrayValue).isInstanceOf(Value.ArrayValue.class); + Optional arrayValue = deserialized.getValue(1); + assertThat(arrayValue).isPresent(); + assertThat(arrayValue.get()).isInstanceOf(Value.ArrayValue.class); - List deserializedArray = ((Value.ArrayValue) arrayValue).getValue(); + List deserializedArray = ((Value.ArrayValue) arrayValue.get()).getValue(); assertThat(deserializedArray).hasSize(3); assertThat(deserializedArray.get(0)).isEqualTo(Value.fromInt32(1)); assertThat(deserializedArray.get(1)).isEqualTo(Value.fromInt32(2)); @@ -140,11 +140,11 @@ var record = writer.build(); buffer.get(serialized); var deserialized = ImprintRecord.deserialize(serialized); - Value mapValue = deserialized.getValue(1); - assertThat(mapValue).isNotNull(); - assertThat(mapValue).isInstanceOf(Value.MapValue.class); + Optional mapValue = deserialized.getValue(1); + assertThat(mapValue).isPresent(); + assertThat(mapValue.get()).isInstanceOf(Value.MapValue.class); - Map deserializedMap = ((Value.MapValue) mapValue).getValue(); + Map deserializedMap = ((Value.MapValue) mapValue.get()).getValue(); assertThat(deserializedMap).hasSize(2); assertThat(deserializedMap.get(MapKey.fromString("key1"))).isEqualTo(Value.fromInt32(1)); assertThat(deserializedMap.get(MapKey.fromString("key2"))).isEqualTo(Value.fromInt32(2)); @@ -173,23 +173,23 @@ void shouldHandleNestedRecords() throws ImprintException { var deserialized = ImprintRecord.deserialize(serialized); // Verify outer record metadata - assertThat(deserialized.getHeader().getSchemaId().getFieldSpaceId()).isEqualTo(1); + assertThat(deserialized.getHeader().getSchemaId().getFieldspaceId()).isEqualTo(1); assertThat(deserialized.getHeader().getSchemaId().getSchemaHash()).isEqualTo(0xdeadbeef); // Verify nested record - Value rowValue = deserialized.getValue(1); - assertThat(rowValue).isNotNull(); - assertThat(rowValue).isInstanceOf(Value.RowValue.class); + Optional rowValue = deserialized.getValue(1); + assertThat(rowValue).isPresent(); + assertThat(rowValue.get()).isInstanceOf(Value.RowValue.class); - var nestedRecord = ((Value.RowValue) rowValue).getValue(); - assertThat(nestedRecord.getHeader().getSchemaId().getFieldSpaceId()).isEqualTo(2); + var nestedRecord = ((Value.RowValue) rowValue.get()).getValue(); + assertThat(nestedRecord.getHeader().getSchemaId().getFieldspaceId()).isEqualTo(2); assertThat(nestedRecord.getHeader().getSchemaId().getSchemaHash()).isEqualTo(0xcafebabe); - assertThat(nestedRecord.getValue(1)).isEqualTo(Value.fromInt32(42)); - assertThat(nestedRecord.getValue(2)).isEqualTo(Value.fromString("nested")); + assertThat(nestedRecord.getValue(1)).contains(Value.fromInt32(42)); + assertThat(nestedRecord.getValue(2)).contains(Value.fromString("nested")); // Verify outer record field - assertThat(deserialized.getValue(2)).isEqualTo(Value.fromInt64(123L)); + assertThat(deserialized.getValue(2)).contains(Value.fromInt64(123L)); } @Test @@ -227,6 +227,6 @@ void shouldHandleDuplicateFieldIds() throws ImprintException { var record = writer.build(); assertThat(record.getDirectory()).hasSize(1); - assertThat(record.getValue(1)).isEqualTo(Value.fromInt32(43)); + assertThat(record.getValue(1)).contains(Value.fromInt32(43)); } } \ No newline at end of file diff --git a/src/test/java/com/imprint/types/TypeHandlerTest.java b/src/test/java/com/imprint/types/TypeHandlerTest.java index 75d118f..9a4ae85 100644 --- a/src/test/java/com/imprint/types/TypeHandlerTest.java +++ b/src/test/java/com/imprint/types/TypeHandlerTest.java @@ -20,99 +20,104 @@ class TypeHandlerTest { void testNullHandler() throws ImprintException { var handler = TypeHandler.NULL; var value = Value.nullValue(); - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(0); - + // Serialization var buffer = ByteBuffer.allocate(10); handler.serialize(value, buffer); assertThat(buffer.position()).isEqualTo(0); // NULL writes nothing - + // Deserialization buffer.flip(); var deserialized = handler.deserialize(buffer); assertThat(deserialized).isEqualTo(value); + + // readValueBytes + buffer.clear(); + var valueBytes = handler.readValueBytes(buffer); + assertThat(valueBytes.remaining()).isEqualTo(0); } - + @ParameterizedTest @ValueSource(booleans = {true, false}) void testBoolHandler(boolean testValue) throws ImprintException { var handler = TypeHandler.BOOL; var value = Value.fromBoolean(testValue); - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(1); - + // Round-trip test var buffer = ByteBuffer.allocate(10); handler.serialize(value, buffer); assertThat(buffer.position()).isEqualTo(1); - + buffer.flip(); var deserialized = handler.deserialize(buffer); assertThat(deserialized).isEqualTo(value); assertThat(((Value.BoolValue) deserialized).getValue()).isEqualTo(testValue); } - + @ParameterizedTest @ValueSource(ints = {0, 1, -1, Integer.MAX_VALUE, Integer.MIN_VALUE, 42, -42}) void testInt32Handler(int testValue) throws ImprintException { var handler = TypeHandler.INT32; var value = Value.fromInt32(testValue); - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(4); - + // Round-trip test var buffer = ByteBuffer.allocate(10).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(value, buffer); assertThat(buffer.position()).isEqualTo(4); - + buffer.flip(); var deserialized = handler.deserialize(buffer); assertThat(deserialized).isEqualTo(value); assertThat(((Value.Int32Value) deserialized).getValue()).isEqualTo(testValue); } - + @ParameterizedTest @ValueSource(longs = {0L, 1L, -1L, Long.MAX_VALUE, Long.MIN_VALUE, 123456789L}) void testInt64Handler(long testValue) throws ImprintException { var handler = TypeHandler.INT64; var value = Value.fromInt64(testValue); - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(8); - + // Round-trip test var buffer = ByteBuffer.allocate(20).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(value, buffer); assertThat(buffer.position()).isEqualTo(8); - + buffer.flip(); var deserialized = handler.deserialize(buffer); assertThat(deserialized).isEqualTo(value); assertThat(((Value.Int64Value) deserialized).getValue()).isEqualTo(testValue); } - + @ParameterizedTest @ValueSource(floats = {0.0f, 1.0f, -1.0f, Float.MAX_VALUE, Float.MIN_VALUE, 3.14159f, Float.NaN, Float.POSITIVE_INFINITY}) void testFloat32Handler(float testValue) throws ImprintException { var handler = TypeHandler.FLOAT32; var value = Value.fromFloat32(testValue); - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(4); - + // Round-trip test var buffer = ByteBuffer.allocate(10).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(value, buffer); assertThat(buffer.position()).isEqualTo(4); - + buffer.flip(); var deserialized = handler.deserialize(buffer); assertThat(deserialized).isEqualTo(value); - + float deserializedValue = ((Value.Float32Value) deserialized).getValue(); if (Float.isNaN(testValue)) { assertThat(deserializedValue).isNaN(); @@ -120,25 +125,25 @@ void testFloat32Handler(float testValue) throws ImprintException { assertThat(deserializedValue).isEqualTo(testValue); } } - + @ParameterizedTest @ValueSource(doubles = {0.0, 1.0, -1.0, Double.MAX_VALUE, Double.MIN_VALUE, Math.PI, Double.NaN, Double.POSITIVE_INFINITY}) void testFloat64Handler(double testValue) throws ImprintException { var handler = TypeHandler.FLOAT64; var value = Value.fromFloat64(testValue); - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(8); - + // Round-trip test var buffer = ByteBuffer.allocate(20).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(value, buffer); assertThat(buffer.position()).isEqualTo(8); - + buffer.flip(); var deserialized = handler.deserialize(buffer); assertThat(deserialized).isEqualTo(value); - + double deserializedValue = ((Value.Float64Value) deserialized).getValue(); if (Double.isNaN(testValue)) { assertThat(deserializedValue).isNaN(); @@ -146,129 +151,129 @@ void testFloat64Handler(double testValue) throws ImprintException { assertThat(deserializedValue).isEqualTo(testValue); } } - + @ParameterizedTest @ValueSource(strings = {"", "hello", "世界", "a very long string that exceeds typical buffer sizes and contains unicode: 🚀🎉", "null\0bytes"}) void testStringHandler(String testValue) throws ImprintException { var handler = TypeHandler.STRING; var value = Value.fromString(testValue); - + byte[] utf8Bytes = testValue.getBytes(java.nio.charset.StandardCharsets.UTF_8); int expectedSize = com.imprint.util.VarInt.encodedLength(utf8Bytes.length) + utf8Bytes.length; - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(expectedSize); - + // Round-trip test var buffer = ByteBuffer.allocate(expectedSize + 20).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(value, buffer); - + buffer.flip(); var deserialized = handler.deserialize(buffer); - + // Should return StringBufferValue (zero-copy implementation) assertThat(deserialized).isInstanceOf(Value.StringBufferValue.class); - + String deserializedString; if (deserialized instanceof Value.StringBufferValue) { deserializedString = ((Value.StringBufferValue) deserialized).getValue(); } else { deserializedString = ((Value.StringValue) deserialized).getValue(); } - + assertThat(deserializedString).isEqualTo(testValue); } - + @Test void testBytesHandlerWithArrayValue() throws ImprintException { var handler = TypeHandler.BYTES; byte[] testBytes = {0, 1, 2, (byte) 0xFF, 42, 127, -128}; var value = Value.fromBytes(testBytes); - + int expectedSize = com.imprint.util.VarInt.encodedLength(testBytes.length) + testBytes.length; - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(expectedSize); - + // Round-trip test var buffer = ByteBuffer.allocate(expectedSize + 20).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(value, buffer); - + buffer.flip(); var deserialized = handler.deserialize(buffer); - + // Should return BytesBufferValue (zero-copy implementation) assertThat(deserialized).isInstanceOf(Value.BytesBufferValue.class); - + byte[] deserializedBytes = ((Value.BytesBufferValue) deserialized).getValue(); assertThat(deserializedBytes).isEqualTo(testBytes); } - + @Test void testBytesHandlerWithBufferValue() throws ImprintException { var handler = TypeHandler.BYTES; byte[] testBytes = {10, 20, 30, 40}; var bufferValue = Value.fromBytesBuffer(ByteBuffer.wrap(testBytes).asReadOnlyBuffer()); - + int expectedSize = com.imprint.util.VarInt.encodedLength(testBytes.length) + testBytes.length; - + // Size estimation assertThat(handler.estimateSize(bufferValue)).isEqualTo(expectedSize); - + // Round-trip test var buffer = ByteBuffer.allocate(expectedSize + 20).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(bufferValue, buffer); - + buffer.flip(); var deserialized = handler.deserialize(buffer); - + byte[] deserializedBytes = ((Value.BytesBufferValue) deserialized).getValue(); assertThat(deserializedBytes).isEqualTo(testBytes); } - + @Test void testStringHandlerWithBufferValue() throws ImprintException { var handler = TypeHandler.STRING; String testString = "zero-copy string test"; byte[] utf8Bytes = testString.getBytes(java.nio.charset.StandardCharsets.UTF_8); var bufferValue = Value.fromStringBuffer(ByteBuffer.wrap(utf8Bytes).asReadOnlyBuffer()); - + int expectedSize = com.imprint.util.VarInt.encodedLength(utf8Bytes.length) + utf8Bytes.length; - + // Size estimation assertThat(handler.estimateSize(bufferValue)).isEqualTo(expectedSize); - + // Round-trip test var buffer = ByteBuffer.allocate(expectedSize + 20).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(bufferValue, buffer); - + buffer.flip(); var deserialized = handler.deserialize(buffer); - + String deserializedString = ((Value.StringBufferValue) deserialized).getValue(); assertThat(deserializedString).isEqualTo(testString); } - + @Test void testBoolHandlerInvalidValue() { var handler = TypeHandler.BOOL; var buffer = ByteBuffer.allocate(10); buffer.put((byte) 2); // Invalid boolean value buffer.flip(); - + assertThatThrownBy(() -> handler.deserialize(buffer)) - .isInstanceOf(ImprintException.class) - .hasMessageContaining("Invalid boolean value: 2"); + .isInstanceOf(ImprintException.class) + .hasMessageContaining("Invalid boolean value: 2"); } - + @Test void testHandlerBufferUnderflow() { // Test that handlers properly detect buffer underflow var int32Handler = TypeHandler.INT32; var buffer = ByteBuffer.allocate(2); // Too small for int32 - + assertThatThrownBy(() -> int32Handler.deserialize(buffer)) - .isInstanceOf(ImprintException.class) - .hasMessageContaining("Not enough bytes for int32"); + .isInstanceOf(ImprintException.class) + .hasMessageContaining("Not enough bytes for int32"); } } \ No newline at end of file diff --git a/src/test/java/com/imprint/types/ValueTest.java b/src/test/java/com/imprint/types/ValueTest.java index b092bb7..9dd99c9 100644 --- a/src/test/java/com/imprint/types/ValueTest.java +++ b/src/test/java/com/imprint/types/ValueTest.java @@ -2,8 +2,6 @@ import org.junit.jupiter.api.Test; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -12,207 +10,114 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy; class ValueTest { - + @Test void shouldCreateNullValue() { Value value = Value.nullValue(); - + assertThat(value).isInstanceOf(Value.NullValue.class); assertThat(value.getTypeCode()).isEqualTo(TypeCode.NULL); assertThat(value.toString()).isEqualTo("null"); } - + @Test void shouldCreateBooleanValues() { Value trueValue = Value.fromBoolean(true); Value falseValue = Value.fromBoolean(false); - + assertThat(trueValue).isInstanceOf(Value.BoolValue.class); assertThat(((Value.BoolValue) trueValue).getValue()).isTrue(); assertThat(trueValue.getTypeCode()).isEqualTo(TypeCode.BOOL); - + assertThat(falseValue).isInstanceOf(Value.BoolValue.class); assertThat(((Value.BoolValue) falseValue).getValue()).isFalse(); assertThat(falseValue.getTypeCode()).isEqualTo(TypeCode.BOOL); } - + @Test void shouldCreateNumericValues() { var int32 = Value.fromInt32(42); var int64 = Value.fromInt64(123456789L); var float32 = Value.fromFloat32(3.14f); var float64 = Value.fromFloat64(2.718281828); - + assertThat(int32.getTypeCode()).isEqualTo(TypeCode.INT32); assertThat(((Value.Int32Value) int32).getValue()).isEqualTo(42); - + assertThat(int64.getTypeCode()).isEqualTo(TypeCode.INT64); assertThat(((Value.Int64Value) int64).getValue()).isEqualTo(123456789L); - + assertThat(float32.getTypeCode()).isEqualTo(TypeCode.FLOAT32); assertThat(((Value.Float32Value) float32).getValue()).isEqualTo(3.14f); - + assertThat(float64.getTypeCode()).isEqualTo(TypeCode.FLOAT64); assertThat(((Value.Float64Value) float64).getValue()).isEqualTo(2.718281828); } - + @Test void shouldCreateBytesAndStringValues() { byte[] bytes = {1, 2, 3, 4}; var bytesValue = Value.fromBytes(bytes); var stringValue = Value.fromString("hello"); - + assertThat(bytesValue.getTypeCode()).isEqualTo(TypeCode.BYTES); assertThat(((Value.BytesValue) bytesValue).getValue()).isEqualTo(bytes); - + assertThat(stringValue.getTypeCode()).isEqualTo(TypeCode.STRING); assertThat(((Value.StringValue) stringValue).getValue()).isEqualTo("hello"); } - + @Test void shouldCreateArrayValues() { List elements = Arrays.asList( - Value.fromInt32(1), - Value.fromInt32(2), - Value.fromInt32(3) + Value.fromInt32(1), + Value.fromInt32(2), + Value.fromInt32(3) ); Value arrayValue = Value.fromArray(elements); - + assertThat(arrayValue.getTypeCode()).isEqualTo(TypeCode.ARRAY); assertThat(((Value.ArrayValue) arrayValue).getValue()).isEqualTo(elements); } - + @Test void shouldCreateMapValues() { var map = new HashMap(); map.put(MapKey.fromString("key1"), Value.fromInt32(1)); map.put(MapKey.fromString("key2"), Value.fromInt32(2)); - + Value mapValue = Value.fromMap(map); - + assertThat(mapValue.getTypeCode()).isEqualTo(TypeCode.MAP); assertThat(((Value.MapValue) mapValue).getValue()).isEqualTo(map); } - + @Test void shouldHandleEqualityCorrectly() { var int1 = Value.fromInt32(42); var int2 = Value.fromInt32(42); var int3 = Value.fromInt32(43); - + assertThat(int1).isEqualTo(int2); assertThat(int1).isNotEqualTo(int3); assertThat(int1.hashCode()).isEqualTo(int2.hashCode()); } - - @Test - void shouldRejectNullString() { - assertThatThrownBy(() -> Value.fromString(null)) - .isInstanceOf(NullPointerException.class); - } - - @Test - void shouldCreateStringBufferValue() { - String testString = "hello world"; - byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); - ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); - - Value stringBufferValue = Value.fromStringBuffer(buffer); - - assertThat(stringBufferValue).isInstanceOf(Value.StringBufferValue.class); - assertThat(stringBufferValue.getTypeCode()).isEqualTo(TypeCode.STRING); - assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(testString); - } - - @Test - void shouldCreateBytesBufferValue() { - byte[] testBytes = {1, 2, 3, 4, 5}; - ByteBuffer buffer = ByteBuffer.wrap(testBytes); - - Value bytesBufferValue = Value.fromBytesBuffer(buffer); - - assertThat(bytesBufferValue).isInstanceOf(Value.BytesBufferValue.class); - assertThat(bytesBufferValue.getTypeCode()).isEqualTo(TypeCode.BYTES); - assertThat(((Value.BytesBufferValue) bytesBufferValue).getValue()).isEqualTo(testBytes); - } - - @Test - void shouldHandleStringBufferValueFastPath() { - // Array-backed buffer with arrayOffset() == 0 should use fast path - String testString = "fast path test"; - byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); - ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); - - Value stringBufferValue = Value.fromStringBuffer(buffer); - - // Should work correctly regardless of path taken - assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(testString); - } - - @Test - void shouldHandleStringBufferValueFallbackPath() { - // Sliced buffer will have non-zero arrayOffset, forcing fallback path - String testString = "fallback path test"; - byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); - ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); - ByteBuffer sliced = buffer.slice(); // This may break arrayOffset() == 0 - - Value stringBufferValue = Value.fromStringBuffer(sliced); - - // Should work correctly regardless of path taken - assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(testString); - } - - @Test - void shouldHandleLargeStringWithoutCaching() { - // Create string > 1KB to test the no-cache path - String largeString = "x".repeat(2000); - byte[] utf8Bytes = largeString.getBytes(StandardCharsets.UTF_8); - ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes).slice(); // Force fallback path - - Value stringBufferValue = Value.fromStringBuffer(buffer); - - assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(largeString); - } - - @Test - void shouldCacheStringDecoding() { - String testString = "cache test"; - byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); - ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); - - Value.StringBufferValue stringBufferValue = (Value.StringBufferValue) Value.fromStringBuffer(buffer); - - // First call should decode and cache - String result1 = stringBufferValue.getValue(); - // Second call should return cached value - String result2 = stringBufferValue.getValue(); - - assertThat(result1).isEqualTo(testString); - assertThat(result2).isEqualTo(testString); - assertThat(result1).isSameAs(result2); // Should be same object reference due to caching - } - + @Test - void shouldHandleStringValueEquality() { - String testString = "equality test"; - - Value stringValue = Value.fromString(testString); - Value stringBufferValue = Value.fromStringBuffer(ByteBuffer.wrap(testString.getBytes(StandardCharsets.UTF_8))); - - assertThat(stringValue).isEqualTo(stringBufferValue); - assertThat(stringBufferValue).isEqualTo(stringValue); - assertThat(stringValue.hashCode()).isEqualTo(stringBufferValue.hashCode()); + void shouldDefensiveCopyArrays() { + byte[] original = {1, 2, 3}; + var bytesValue = Value.fromBytes(original); + + // Modify original array + original[0] = 99; + + // Value should be unchanged + assertThat(((Value.BytesValue) bytesValue).getValue()).containsExactly(1, 2, 3); } - + @Test - void shouldHandleBytesValueEquality() { - byte[] testBytes = {1, 2, 3, 4, 5}; - - Value bytesValue = Value.fromBytes(testBytes); - Value bytesBufferValue = Value.fromBytesBuffer(ByteBuffer.wrap(testBytes)); - - assertThat(bytesValue).isEqualTo(bytesBufferValue); - assertThat(bytesBufferValue).isEqualTo(bytesValue); + void shouldRejectNullString() { + assertThatThrownBy(() -> Value.fromString(null)) + .isInstanceOf(NullPointerException.class); } } \ No newline at end of file From aeefa9ee4ea24e036c51c2f082aecbac4f62c82f Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sun, 1 Jun 2025 20:33:38 -0400 Subject: [PATCH 03/53] Add GitHub Actions CI workflow for automated testing --- .github/workflows/ci.yml | 107 +++++++++++++++++++++-------- build.gradle | 141 +++------------------------------------ 2 files changed, 88 insertions(+), 160 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 378ebb7..0650d7c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: CI on: push: - branches: [ main ] + branches: [ main, dev ] pull_request: - branches: [ main ] + branches: [ main, dev ] jobs: test: @@ -15,30 +15,81 @@ jobs: java-version: [11, 17, 21] steps: - - name: Checkout code - uses: actions/checkout@v4 + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up JDK ${{ matrix.java-version }} + uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.java-version }} + distribution: 'temurin' + + - name: Cache Gradle dependencies + uses: actions/cache@v3 + with: + path: | + ~/.gradle/caches + ~/.gradle/wrapper + key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} + restore-keys: | + ${{ runner.os }}-gradle- + + - name: Make gradlew executable + run: chmod +x ./gradlew + + - name: Run tests + run: ./gradlew test + + - name: Run build + run: ./gradlew build + + - name: Upload test results + uses: actions/upload-artifact@v3 + if: always() + with: + name: test-results-java-${{ matrix.java-version }} + path: build/test-results/test/ + + - name: Upload build reports + uses: actions/upload-artifact@v3 + if: always() + with: + name: build-reports-java-${{ matrix.java-version }} + path: build/reports/ - - name: Set up JDK ${{ matrix.java-version }} - uses: actions/setup-java@v4 - with: - java-version: ${{ matrix.java-version }} - distribution: 'temurin' - - - name: Cache Gradle dependencies - uses: actions/cache@v4 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: | - ${{ runner.os }}-gradle- - - - name: Make gradlew executable - run: chmod +x ./gradlew - - - name: Run tests - run: ./gradlew test - - - name: Run build - run: ./gradlew build \ No newline at end of file + benchmark: + runs-on: ubuntu-latest + needs: test + if: github.event_name == 'pull_request' + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + java-version: 17 + distribution: 'temurin' + + - name: Cache Gradle dependencies + uses: actions/cache@v3 + with: + path: | + ~/.gradle/caches + ~/.gradle/wrapper + key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} + restore-keys: | + ${{ runner.os }}-gradle- + + - name: Make gradlew executable + run: chmod +x ./gradlew + + - name: Run quick benchmark + run: ./gradlew jmh -Pjmh.fork=1 -Pjmh.warmupIterations=1 -Pjmh.iterations=1 + + - name: Upload benchmark results + uses: actions/upload-artifact@v3 + with: + name: benchmark-results + path: build/results/jmh/ \ No newline at end of file diff --git a/build.gradle b/build.gradle index 33b1645..9262297 100644 --- a/build.gradle +++ b/build.gradle @@ -2,8 +2,6 @@ plugins { id 'java-library' id 'maven-publish' id 'me.champeau.jmh' version '0.7.2' - id 'com.google.protobuf' version '0.9.4' - id 'io.netifi.flatbuffers' version '1.0.7' } group = 'com.imprint' @@ -25,141 +23,31 @@ dependencies { // Lombok for reducing boilerplate compileOnly 'org.projectlombok:lombok:1.18.30' annotationProcessor 'org.projectlombok:lombok:1.18.30' - + // Test dependencies testImplementation 'org.junit.jupiter:junit-jupiter:5.10.0' testImplementation 'org.assertj:assertj-core:3.24.2' testImplementation 'org.mockito:mockito-core:5.5.0' - + // Lombok for tests testCompileOnly 'org.projectlombok:lombok:1.18.30' testAnnotationProcessor 'org.projectlombok:lombok:1.18.30' - + // Performance testing with JMH jmhImplementation 'org.openjdk.jmh:jmh-core:1.37' jmhAnnotationProcessor 'org.openjdk.jmh:jmh-generator-annprocess:1.37' - - // Suppress SLF4J warnings - jmhImplementation 'org.slf4j:slf4j-nop:1.7.36' - - // Competitor libraries for benchmarking (JMH only) + + // Competitor libraries for benchmarking jmhImplementation 'com.google.protobuf:protobuf-java:3.25.1' jmhImplementation 'org.apache.avro:avro:1.11.3' jmhImplementation 'com.fasterxml.jackson.core:jackson-databind:2.16.0' jmhImplementation 'com.google.flatbuffers:flatbuffers-java:23.5.26' jmhImplementation 'com.esotericsoftware:kryo:5.4.0' - jmhImplementation 'org.msgpack:msgpack-core:0.9.8' - jmhImplementation 'org.msgpack:jackson-dataformat-msgpack:0.9.8' -} - -protobuf { - protoc { - artifact = "com.google.protobuf:protoc:3.25.1" - } - generateProtoTasks { - // Only generate for JMH, not main - all().each { task -> - task.enabled = false - } - ofSourceSet('jmh').each { task -> - task.enabled = true - task.builtins { - java { - outputSubDir = 'java' - } - } - } - } -} - -// Download and setup FlatBuffers compiler for Linux (CI environment) -tasks.register('downloadFlatc') { - description = 'Download FlatBuffers compiler' - group = 'build setup' - - def flatcDir = file("${buildDir}/flatc") - def flatcExe = file("${flatcDir}/flatc") - def isWindows = System.getProperty('os.name').toLowerCase().contains('windows') - def flatcUrl = isWindows ? - "https://github.com/google/flatbuffers/releases/download/v23.5.26/Windows.flatc.binary.zip" : - "https://github.com/google/flatbuffers/releases/download/v23.5.26/Linux.flatc.binary.clang++-12.zip" - def flatcZip = file("${buildDir}/flatc.zip") - - outputs.file(flatcExe) - - doLast { - if (!flatcExe.exists()) { - println "Downloading FlatBuffers compiler for ${isWindows ? 'Windows' : 'Linux'}..." - flatcDir.mkdirs() - - // Download - new URL(flatcUrl).withInputStream { i -> - flatcZip.withOutputStream { it << i } - } - - // Extract - copy { - from zipTree(flatcZip) - into flatcDir - } - - // Make executable on Unix systems - if (!isWindows) { - exec { - commandLine 'chmod', '+x', flatcExe.absolutePath - } - } - - flatcZip.delete() - println "FlatBuffers compiler downloaded to: ${flatcExe}" - } - } -} - -// Generate FlatBuffers sources -tasks.register('generateFlatBuffers', Exec) { - dependsOn downloadFlatc - description = 'Generate Java classes from FlatBuffers schema' - group = 'build' - - def isWindows = System.getProperty('os.name').toLowerCase().contains('windows') - def flatcExe = file("${buildDir}/flatc/${isWindows ? 'flatc.exe' : 'flatc'}") - def schemaFile = file('src/jmh/flatbuffers/test_record.fbs') - def outputDir = file('build/generated/source/flatbuffers/jmh/java') - - commandLine flatcExe.absolutePath, '--java', '-o', outputDir.absolutePath, schemaFile.absolutePath - - inputs.file(schemaFile) - outputs.dir(outputDir) - - doFirst { - outputDir.mkdirs() - } -} - -// Add generated FlatBuffers sources to JMH source set -sourceSets { - jmh { - java { - srcDir 'build/generated/source/flatbuffers/jmh/java' - } - proto { - srcDir 'src/jmh/proto' - } - } -} - -// Make JMH compilation depend on FlatBuffers generation -compileJmhJava.dependsOn generateFlatBuffers - -// Handle duplicate proto files -tasks.named('processJmhResources') { - duplicatesStrategy = DuplicatesStrategy.EXCLUDE } test { useJUnitPlatform() - + // Enable detailed test output testLogging { events "passed", "skipped", "failed" @@ -168,20 +56,11 @@ test { // JMH configuration jmh { - fork = 2 + fork = 1 warmupIterations = 3 - iterations = 5 + iterations = 3 resultFormat = 'JSON' includeTests = false - resultsFile = file("${projectDir}/benchmark-results/jmh-results-${new Date().format('yyyy-MM-dd-HHmmss')}.json") - - // Java 11 specific JVM args - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g', - '-XX:+UnlockExperimentalVMOptions', - '-XX:+UseJVMCICompiler' - ] } compileJava { @@ -193,6 +72,4 @@ javadoc { if(JavaVersion.current().isJava9Compatible()) { options.addBooleanOption('html5', true) } - // Don't fail build on missing javadoc - options.addStringOption('Xdoclint:none', '-quiet') -} \ No newline at end of file +} From 606cd74a24fe43454640dc1002daf172de026d2d Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sun, 1 Jun 2025 20:36:51 -0400 Subject: [PATCH 04/53] Update GitHub Actions workflow to use upload-artifact@v4 --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0650d7c..6e5a2a0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,14 +44,14 @@ jobs: run: ./gradlew build - name: Upload test results - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: always() with: name: test-results-java-${{ matrix.java-version }} path: build/test-results/test/ - name: Upload build reports - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: always() with: name: build-reports-java-${{ matrix.java-version }} @@ -89,7 +89,7 @@ jobs: run: ./gradlew jmh -Pjmh.fork=1 -Pjmh.warmupIterations=1 -Pjmh.iterations=1 - name: Upload benchmark results - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: benchmark-results path: build/results/jmh/ \ No newline at end of file From 83151881f8dea71b03fe1ccef52d0ce237f4d80e Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sun, 1 Jun 2025 20:42:37 -0400 Subject: [PATCH 05/53] Add Gradle wrapper validation to CI workflow --- .github/workflows/ci.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6e5a2a0..18842f6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,6 +37,9 @@ jobs: - name: Make gradlew executable run: chmod +x ./gradlew + - name: Validate Gradle wrapper + uses: gradle/wrapper-validation-action@v1 + - name: Run tests run: ./gradlew test @@ -85,6 +88,9 @@ jobs: - name: Make gradlew executable run: chmod +x ./gradlew + - name: Validate Gradle wrapper + uses: gradle/wrapper-validation-action@v1 + - name: Run quick benchmark run: ./gradlew jmh -Pjmh.fork=1 -Pjmh.warmupIterations=1 -Pjmh.iterations=1 From cab4575614984bb05185de4ecb931d70a0872d54 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sun, 1 Jun 2025 20:45:02 -0400 Subject: [PATCH 06/53] Fix gitignore to include gradle-wrapper.jar for CI --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 09ea567..54c84dc 100644 --- a/.gitignore +++ b/.gitignore @@ -61,4 +61,5 @@ buildNumber.properties # JAR files (unless they're dependencies) *.jar +!gradle/wrapper/gradle-wrapper.jar !lombok.jar From 7e0f8ef85c3cb0d031fe8caaa783870ea5933b06 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sun, 1 Jun 2025 20:48:43 -0400 Subject: [PATCH 07/53] Update wrapper validation action to v3 --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 18842f6..2e906c6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,7 +38,7 @@ jobs: run: chmod +x ./gradlew - name: Validate Gradle wrapper - uses: gradle/wrapper-validation-action@v1 + uses: gradle/wrapper-validation-action@v3 - name: Run tests run: ./gradlew test @@ -89,7 +89,7 @@ jobs: run: chmod +x ./gradlew - name: Validate Gradle wrapper - uses: gradle/wrapper-validation-action@v1 + uses: gradle/wrapper-validation-action@v3 - name: Run quick benchmark run: ./gradlew jmh -Pjmh.fork=1 -Pjmh.warmupIterations=1 -Pjmh.iterations=1 From 6b5bd6bdf4f07c88a8d9c00da70bbb2caab0081b Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sun, 1 Jun 2025 20:53:03 -0400 Subject: [PATCH 08/53] Fix Javadoc syntax errors and disable strict Javadoc checking --- build.gradle | 2 ++ .../com/imprint/core/ImprintRecordBuilder.java | 14 ++++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/build.gradle b/build.gradle index 9262297..7e85806 100644 --- a/build.gradle +++ b/build.gradle @@ -72,4 +72,6 @@ javadoc { if(JavaVersion.current().isJava9Compatible()) { options.addBooleanOption('html5', true) } + // Don't fail build on missing javadoc + options.addStringOption('Xdoclint:none', '-quiet') } diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 48b0998..202bd2a 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -11,14 +11,16 @@ * developer-friendly API that eliminates boilerplate Value.fromX() calls. *

* Usage: + *

  *   var record = ImprintRecord.builder(schemaId)
- *       .field(1, 42)              // int -> Int32Value  
- *       .field(2, "hello")         // String -> StringValue
- *       .field(3, 3.14)            // double -> Float64Value
- *       .field(4, bytes)           // byte[] -> BytesValue
- *       .field(5, true)            // boolean -> BoolValue
- *       .nullField(6)              // -> NullValue
+ *       .field(1, 42)              // int to Int32Value  
+ *       .field(2, "hello")         // String to StringValue
+ *       .field(3, 3.14)            // double to Float64Value
+ *       .field(4, bytes)           // byte[] to BytesValue
+ *       .field(5, true)            // boolean to BoolValue
+ *       .nullField(6)              // to NullValue
  *       .build();
+ * 
*/ public final class ImprintRecordBuilder { private final SchemaId schemaId; From 8e3e1ba635916ac5c1a9e9299b5dbceb935adcbc Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sun, 1 Jun 2025 23:33:47 -0400 Subject: [PATCH 09/53] Add JMH benchmark .bat and .sh for full suite benchmarking and performance tracking; add comprehensive String benchmark --- .gitignore | 3 + build.gradle | 1 + .../imprint/benchmark/StringBenchmark.java | 90 +++---------------- 3 files changed, 15 insertions(+), 79 deletions(-) diff --git a/.gitignore b/.gitignore index 54c84dc..3f1edb4 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,6 @@ buildNumber.properties *.jar !gradle/wrapper/gradle-wrapper.jar !lombok.jar + +# Benchmark Results (keep timestamped results in repo for tracking) +# benchmark-results/ - Commented out to keep results in repo diff --git a/build.gradle b/build.gradle index 7e85806..2606710 100644 --- a/build.gradle +++ b/build.gradle @@ -61,6 +61,7 @@ jmh { iterations = 3 resultFormat = 'JSON' includeTests = false + resultsFile = file("${projectDir}/benchmark-results/jmh-results-${new Date().format('yyyy-MM-dd-HHmmss')}.json") } compileJava { diff --git a/src/jmh/java/com/imprint/benchmark/StringBenchmark.java b/src/jmh/java/com/imprint/benchmark/StringBenchmark.java index 045940e..1891251 100644 --- a/src/jmh/java/com/imprint/benchmark/StringBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/StringBenchmark.java @@ -5,6 +5,7 @@ import com.imprint.types.Value; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; import java.nio.ByteBuffer; @@ -14,9 +15,8 @@ @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) @Fork(1) -@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) -@SuppressWarnings("unused") +@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) public class StringBenchmark { private static final SchemaId SCHEMA_ID = new SchemaId(1, 42); @@ -44,10 +44,6 @@ public class StringBenchmark { private byte[] serializedLarge10K; private byte[] serializedLarge100K; private byte[] serializedLarge1M; - - private ImprintRecord preDeserializedSmall5; - private ImprintRecord preDeserializedMedium500; - private ImprintRecord preDeserializedLarge100K; @Setup public void setup() throws Exception { @@ -70,10 +66,6 @@ public void setup() throws Exception { serializedLarge10K = bufferToArray(createStringRecord(largeString10K).serializeToBuffer()); serializedLarge100K = bufferToArray(createStringRecord(largeString100K).serializeToBuffer()); serializedLarge1M = bufferToArray(createStringRecord(largeString1M).serializeToBuffer()); - - preDeserializedSmall5 = ImprintRecord.deserialize(serializedSmall5); - preDeserializedMedium500 = ImprintRecord.deserialize(serializedMedium500); - preDeserializedLarge100K = ImprintRecord.deserialize(serializedLarge100K); } private String generateString(int length) { @@ -195,22 +187,19 @@ public ImprintRecord deserializeLargeString1M() throws Exception { @Benchmark public String accessSmallString5() throws Exception { ImprintRecord record = ImprintRecord.deserialize(serializedSmall5); - Value value = record.getValue(1); - return value != null ? extractString(value) : null; + return record.getValue(1).map(this::extractString).orElse(null); } @Benchmark public String accessMediumString500() throws Exception { ImprintRecord record = ImprintRecord.deserialize(serializedMedium500); - Value value = record.getValue(1); - return value != null ? extractString(value) : null; + return record.getValue(1).map(this::extractString).orElse(null); } @Benchmark public String accessLargeString100K() throws Exception { ImprintRecord record = ImprintRecord.deserialize(serializedLarge100K); - Value value = record.getValue(1); - return value != null ? extractString(value) : null; + return record.getValue(1).map(this::extractString).orElse(null); } // Raw bytes access benchmarks (zero-copy) @@ -218,19 +207,19 @@ public String accessLargeString100K() throws Exception { @Benchmark public ByteBuffer getRawBytesSmallString5() throws Exception { ImprintRecord record = ImprintRecord.deserialize(serializedSmall5); - return record.getRawBytes(1); + return record.getRawBytes(1).orElse(null); } @Benchmark public ByteBuffer getRawBytesMediumString500() throws Exception { ImprintRecord record = ImprintRecord.deserialize(serializedMedium500); - return record.getRawBytes(1); + return record.getRawBytes(1).orElse(null); } @Benchmark public ByteBuffer getRawBytesLargeString100K() throws Exception { ImprintRecord record = ImprintRecord.deserialize(serializedLarge100K); - return record.getRawBytes(1); + return record.getRawBytes(1).orElse(null); } // Size measurement benchmarks @@ -249,67 +238,10 @@ public int measureMediumString500Size() throws Exception { public int measureLargeString100KSize() throws Exception { return createStringRecord(largeString100K).serializeToBuffer().remaining(); } - - // Pure string access benchmarks (no record deserialization overhead) - @Benchmark - public String pureStringAccessSmall5() throws Exception { - Value value = preDeserializedSmall5.getValue(1); - return value != null ? extractString(value) : null; - } - - @Benchmark - public String pureStringAccessMedium500() throws Exception { - Value value = preDeserializedMedium500.getValue(1); - return value != null ? extractString(value) : null; - } - - @Benchmark - public String pureStringAccessLarge100K() throws Exception { - Value value = preDeserializedLarge100K.getValue(1); - return value != null ? extractString(value) : null; - } - - // Test cached vs uncached access - @Benchmark - public String cachedStringAccessSmall5() throws Exception { - // Second access should hit cache - Value value1 = preDeserializedSmall5.getValue(1); - String result1 = value1 != null ? extractString(value1) : null; - Value value2 = preDeserializedSmall5.getValue(1); - return value2 != null ? extractString(value2) : null; - } public static void main(String[] args) throws Exception { - runDeserializationOnly(); - } - - public static void runAll() throws Exception { - var opt = new OptionsBuilder() - .include(StringBenchmark.class.getSimpleName()) - .build(); - new Runner(opt).run(); - } - - /** - * Run only string deserialization benchmarks to measure the impact of - * ThreadLocal buffer pool optimization and fast/fallback path performance. - */ - public static void runDeserializationOnly() throws Exception { - var opt = new OptionsBuilder() - .include(StringBenchmark.class.getSimpleName() + ".*deserialize.*") // Only deserialize methods - .forks(0) // Run in same JVM to avoid serialization issues - .build(); - new Runner(opt).run(); - } - - /** - * Run only pure string access benchmarks (no record deserialization overhead) - * to isolate string decode performance with ThreadLocal buffer optimization. - */ - public static void runStringAccessOnly() throws Exception { - var opt = new OptionsBuilder() - .include(StringBenchmark.class.getSimpleName() + ".*(pureStringAccess|cachedStringAccess).*") // Only pure string access methods - .forks(0) // Run in same JVM to avoid serialization issues + Options opt = new OptionsBuilder() + .include(StringBenchmark.class.getSimpleName()) .build(); new Runner(opt).run(); } From ddf2b64f6c1796bfa67390e51609736c44f8cc56 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sun, 1 Jun 2025 23:44:13 -0400 Subject: [PATCH 10/53] fix map serialization error in benchmark test and streamline ci file to remove a bunch of stuff --- .github/workflows/ci.yml | 64 +------------------ .../benchmark/SerializationBenchmark.java | 4 +- 2 files changed, 4 insertions(+), 64 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2e906c6..0ad6f6b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,7 +25,7 @@ jobs: distribution: 'temurin' - name: Cache Gradle dependencies - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: | ~/.gradle/caches @@ -34,68 +34,8 @@ jobs: restore-keys: | ${{ runner.os }}-gradle- - - name: Make gradlew executable - run: chmod +x ./gradlew - - - name: Validate Gradle wrapper - uses: gradle/wrapper-validation-action@v3 - - name: Run tests run: ./gradlew test - name: Run build - run: ./gradlew build - - - name: Upload test results - uses: actions/upload-artifact@v4 - if: always() - with: - name: test-results-java-${{ matrix.java-version }} - path: build/test-results/test/ - - - name: Upload build reports - uses: actions/upload-artifact@v4 - if: always() - with: - name: build-reports-java-${{ matrix.java-version }} - path: build/reports/ - - benchmark: - runs-on: ubuntu-latest - needs: test - if: github.event_name == 'pull_request' - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up JDK 17 - uses: actions/setup-java@v4 - with: - java-version: 17 - distribution: 'temurin' - - - name: Cache Gradle dependencies - uses: actions/cache@v3 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: | - ${{ runner.os }}-gradle- - - - name: Make gradlew executable - run: chmod +x ./gradlew - - - name: Validate Gradle wrapper - uses: gradle/wrapper-validation-action@v3 - - - name: Run quick benchmark - run: ./gradlew jmh -Pjmh.fork=1 -Pjmh.warmupIterations=1 -Pjmh.iterations=1 - - - name: Upload benchmark results - uses: actions/upload-artifact@v4 - with: - name: benchmark-results - path: build/results/jmh/ \ No newline at end of file + run: ./gradlew build \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java b/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java index 2544b88..3275843 100644 --- a/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java @@ -119,11 +119,11 @@ private ImprintRecord createMediumRecord() throws Exception { ); writer.addField(6, Value.fromArray(tags)); - // Add map field + // Add map field (all string values for consistency) var metadata = new HashMap(); metadata.put(MapKey.fromString("manufacturer"), Value.fromString("TechCorp")); metadata.put(MapKey.fromString("model"), Value.fromString("TC-2024")); - metadata.put(MapKey.fromString("year"), Value.fromInt32(2024)); + metadata.put(MapKey.fromString("year"), Value.fromString("2024")); writer.addField(7, Value.fromMap(metadata)); // Add more fields for medium size From 8a210f319222025bfc68208af395e675b426779e Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sun, 1 Jun 2025 23:45:45 -0400 Subject: [PATCH 11/53] Add execute permissions back for gradlew in CI --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0ad6f6b..d4c8bde 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,6 +34,9 @@ jobs: restore-keys: | ${{ runner.os }}-gradle- + - name: Make gradlew executable + run: chmod +x ./gradlew + - name: Run tests run: ./gradlew test From c85027d3eb3fc5d1f53001856e92aacdb66eab88 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Mon, 2 Jun 2025 01:18:43 -0400 Subject: [PATCH 12/53] Add some more string based performance benchmarks and try to make string deserialization a bit faster --- .../imprint/benchmark/StringBenchmark.java | 32 ++++++++++ .../java/com/imprint/core/ImprintRecord.java | 4 +- .../java/com/imprint/types/TypeHandler.java | 9 ++- src/main/java/com/imprint/types/Value.java | 58 +++++++++++++------ 4 files changed, 78 insertions(+), 25 deletions(-) diff --git a/src/jmh/java/com/imprint/benchmark/StringBenchmark.java b/src/jmh/java/com/imprint/benchmark/StringBenchmark.java index 1891251..dda7f1c 100644 --- a/src/jmh/java/com/imprint/benchmark/StringBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/StringBenchmark.java @@ -44,6 +44,10 @@ public class StringBenchmark { private byte[] serializedLarge10K; private byte[] serializedLarge100K; private byte[] serializedLarge1M; + + private ImprintRecord preDeserializedSmall5; + private ImprintRecord preDeserializedMedium500; + private ImprintRecord preDeserializedLarge100K; @Setup public void setup() throws Exception { @@ -66,6 +70,10 @@ public void setup() throws Exception { serializedLarge10K = bufferToArray(createStringRecord(largeString10K).serializeToBuffer()); serializedLarge100K = bufferToArray(createStringRecord(largeString100K).serializeToBuffer()); serializedLarge1M = bufferToArray(createStringRecord(largeString1M).serializeToBuffer()); + + preDeserializedSmall5 = ImprintRecord.deserialize(serializedSmall5); + preDeserializedMedium500 = ImprintRecord.deserialize(serializedMedium500); + preDeserializedLarge100K = ImprintRecord.deserialize(serializedLarge100K); } private String generateString(int length) { @@ -238,6 +246,30 @@ public int measureMediumString500Size() throws Exception { public int measureLargeString100KSize() throws Exception { return createStringRecord(largeString100K).serializeToBuffer().remaining(); } + + // Pure string access benchmarks (no record deserialization overhead) + @Benchmark + public String pureStringAccessSmall5() throws Exception { + return preDeserializedSmall5.getValue(1).map(this::extractString).orElse(null); + } + + @Benchmark + public String pureStringAccessMedium500() throws Exception { + return preDeserializedMedium500.getValue(1).map(this::extractString).orElse(null); + } + + @Benchmark + public String pureStringAccessLarge100K() throws Exception { + return preDeserializedLarge100K.getValue(1).map(this::extractString).orElse(null); + } + + // Test cached vs uncached access + @Benchmark + public String cachedStringAccessSmall5() throws Exception { + // Second access should hit cache + preDeserializedSmall5.getValue(1).map(this::extractString).orElse(null); + return preDeserializedSmall5.getValue(1).map(this::extractString).orElse(null); + } public static void main(String[] args) throws Exception { Options opt = new OptionsBuilder() diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index b7ed224..d9d5659 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -114,14 +114,14 @@ public static ImprintRecordBuilder builder(int fieldspaceId, int schemaHash) { } /** - * Deserialize a record from bytes. + * Deserialize a record from bytes through an array backed ByteBuffer. */ public static ImprintRecord deserialize(byte[] bytes) throws ImprintException { return deserialize(ByteBuffer.wrap(bytes)); } /** - * Deserialize a record from a ByteBuffer (zero-copy when possible). + * Deserialize a record from a ByteBuffer. */ public static ImprintRecord deserialize(ByteBuffer buffer) throws ImprintException { buffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); diff --git a/src/main/java/com/imprint/types/TypeHandler.java b/src/main/java/com/imprint/types/TypeHandler.java index 4b5830a..07ef7a1 100644 --- a/src/main/java/com/imprint/types/TypeHandler.java +++ b/src/main/java/com/imprint/types/TypeHandler.java @@ -4,7 +4,6 @@ import com.imprint.util.VarInt; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; /** * Interface for handling type-specific serialization, deserialization, and size estimation. @@ -270,7 +269,7 @@ public void serialize(Value value, ByteBuffer buffer) { buffer.put(stringBuffer); } else { Value.StringValue stringValue = (Value.StringValue) value; - byte[] stringBytes = stringValue.getValue().getBytes(StandardCharsets.UTF_8); + byte[] stringBytes = stringValue.getUtf8Bytes(); // Use cached UTF-8 bytes VarInt.encode(stringBytes.length, buffer); buffer.put(stringBytes); } @@ -283,9 +282,9 @@ public int estimateSize(Value value) { int length = bufferValue.getBuffer().remaining(); return VarInt.encodedLength(length) + length; } else { - String str = ((Value.StringValue) value).getValue(); - int utf8Length = str.getBytes(StandardCharsets.UTF_8).length; - return VarInt.encodedLength(utf8Length) + utf8Length; + Value.StringValue stringValue = (Value.StringValue) value; + byte[] utf8Bytes = stringValue.getUtf8Bytes(); // Use cached UTF-8 bytes + return VarInt.encodedLength(utf8Bytes.length) + utf8Bytes.length; } } diff --git a/src/main/java/com/imprint/types/Value.java b/src/main/java/com/imprint/types/Value.java index 4710ec5..64eae91 100644 --- a/src/main/java/com/imprint/types/Value.java +++ b/src/main/java/com/imprint/types/Value.java @@ -270,15 +270,26 @@ public String toString() { } // String Value (String-based) - @Getter @EqualsAndHashCode(callSuper = false) public static class StringValue extends Value { + @Getter private final String value; + private volatile byte[] cachedUtf8Bytes; // Cache UTF-8 encoding public StringValue(String value) { this.value = Objects.requireNonNull(value, "String cannot be null"); } + public byte[] getUtf8Bytes() { + byte[] cached = cachedUtf8Bytes; + if (cached == null) { + // Multiple threads may compute this - that's OK since it's idempotent + cached = value.getBytes(StandardCharsets.UTF_8); + cachedUtf8Bytes = cached; // Benign race - last writer wins + } + return cached; // Return our computed value, not re-read from volatile field + } + @Override public TypeCode getTypeCode() { return TypeCode.STRING; } @@ -288,35 +299,46 @@ public String toString() { } } - // String Value (ByteBuffer-based, zero-copy) + // String Value (ByteBuffer-based) public static class StringBufferValue extends Value { private final ByteBuffer value; private volatile String cachedString; // lazy decode - + public StringBufferValue(ByteBuffer value) { this.value = value.asReadOnlyBuffer(); // zero-copy read-only view } - + public String getValue() { - if (cachedString == null) { - synchronized (this) { - if (cachedString == null) { - var array = new byte[value.remaining()]; - value.duplicate().get(array); - cachedString = new String(array, StandardCharsets.UTF_8); - } - } + String result = cachedString; + if (result == null) { + // Simple, fast decoding - no thread-local overhead + result = decodeUtf8(); + cachedString = result; } - return cachedString; + return result; } - + + private String decodeUtf8() { + // Fast path: zero-copy for array-backed ByteBuffers + if (value.hasArray() && value.arrayOffset() == 0) { + return new String(value.array(), value.position(), + value.remaining(), StandardCharsets.UTF_8); + } + + // Fallback path - should be impossible since deserialize uses wrap() to create an array-backed ByteBuffer. + // Allocation required for direct ByteBuffers since Java's String API doesn't provide ByteBuffer constructors + var array = new byte[value.remaining()]; + value.duplicate().get(array); + return new String(array, StandardCharsets.UTF_8); + } + public ByteBuffer getBuffer() { return value.duplicate(); // zero-copy view } - + @Override public TypeCode getTypeCode() { return TypeCode.STRING; } - + @Override public boolean equals(Object obj) { if (this == obj) return true; @@ -331,12 +353,12 @@ public boolean equals(Object obj) { } return false; } - + @Override public int hashCode() { return getValue().hashCode(); // Use string hash for consistency } - + @Override public String toString() { return "\"" + getValue() + "\""; From 853486c79a5207a84bb1377f5bc512aad6c53da8 Mon Sep 17 00:00:00 2001 From: Brent Johnson Date: Tue, 3 Jun 2025 18:28:49 -0400 Subject: [PATCH 13/53] second main commit to address initial commits A full list of enhancements can be found here - https://github.com/imprint-serde/imprint-java/issues/3 --- .../benchmark/ComparisonBenchmark.java | 69 ++-- .../benchmark/FieldAccessBenchmark.java | 4 +- .../com/imprint/benchmark/MergeBenchmark.java | 4 +- .../imprint/benchmark/StringBenchmark.java | 67 ++- src/main/java/com/imprint/Constants.java | 4 +- .../java/com/imprint/core/ImprintRecord.java | 168 ++------ .../imprint/core/ImprintRecordBuilder.java | 19 +- .../java/com/imprint/core/ImprintWriter.java | 155 +------ src/main/java/com/imprint/core/SchemaId.java | 2 +- src/main/java/com/imprint/types/TypeCode.java | 4 +- .../java/com/imprint/types/TypeHandler.java | 386 ++++++++++++++++-- src/main/java/com/imprint/types/Value.java | 47 ++- src/main/java/com/imprint/util/VarInt.java | 20 + .../imprint/ByteBufferIntegrationTest.java | 157 +++++-- .../java/com/imprint/ComprehensiveTest.java | 208 ---------- .../java/com/imprint/IntegrationTest.java | 32 +- .../com/imprint/benchmark/ProfilerTest.java | 18 +- .../core/ImprintRecordBuilderTest.java | 234 ----------- .../com/imprint/core/ImprintRecordTest.java | 76 ++-- .../java/com/imprint/types/ValueTest.java | 115 +++++- 20 files changed, 871 insertions(+), 918 deletions(-) delete mode 100644 src/test/java/com/imprint/ComprehensiveTest.java delete mode 100644 src/test/java/com/imprint/core/ImprintRecordBuilderTest.java diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index 1293478..152bb6d 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -74,7 +74,7 @@ public void serializeJackson(Blackhole bh) throws Exception { } @Benchmark - public void serializeKryo(Blackhole bh) throws Exception { + public void serializeKryo(Blackhole bh) { byte[] result = serializeWithKryo(testData); bh.consume(result); } @@ -102,49 +102,36 @@ public void deserializeKryo(Blackhole bh) { } // ===== FIELD ACCESS BENCHMARKS ===== + // Tests accessing a single field near the end of a large record + // This showcases Imprint's O(1) directory lookup vs sequential deserialization @Benchmark - public void fieldAccessImprint(Blackhole bh) throws Exception { + public void singleFieldAccessImprint(Blackhole bh) throws Exception { ImprintRecord record = ImprintRecord.deserialize(imprintBytes.duplicate()); - // Access multiple fields without full deserialization - var id = record.getValue(1); - var name = record.getValue(2); - var price = record.getValue(3); - var active = record.getValue(4); - var category = record.getValue(5); - - bh.consume(id); - bh.consume(name); - bh.consume(price); - bh.consume(active); - bh.consume(category); + // Access field 15 directly via directory lookup - O(1) + var field15 = record.getValue(15); + bh.consume(field15); } @Benchmark - public void fieldAccessJackson(Blackhole bh) throws Exception { - // Jackson requires full deserialization to access fields + public void singleFieldAccessJackson(Blackhole bh) throws Exception { + // Jackson must deserialize entire object to access any field TestRecord record = jackson.readValue(jacksonBytes, TestRecord.class); - bh.consume(record.id); - bh.consume(record.name); - bh.consume(record.price); - bh.consume(record.active); - bh.consume(record.category); + // Access field15 equivalent (extraData[4]) after full deserialization + bh.consume(record.extraData.get(4)); } @Benchmark - public void fieldAccessKryo(Blackhole bh) { - // Kryo requires full deserialization to access fields + public void singleFieldAccessKryo(Blackhole bh) { + // Kryo must deserialize entire object to access any field Input input = new Input(new ByteArrayInputStream(kryoBytes)); TestRecord record = kryo.readObject(input, TestRecord.class); input.close(); - bh.consume(record.id); - bh.consume(record.name); - bh.consume(record.price); - bh.consume(record.active); - bh.consume(record.category); + // Access field15 equivalent (extraData[4]) after full deserialization + bh.consume(record.extraData.get(4)); } // ===== SIZE COMPARISON ===== @@ -162,7 +149,7 @@ public void measureJacksonSize(Blackhole bh) throws Exception { } @Benchmark - public void measureKryoSize(Blackhole bh) throws Exception { + public void measureKryoSize(Blackhole bh) { byte[] serialized = serializeWithKryo(testData); bh.consume(serialized.length); } @@ -196,7 +183,7 @@ public void mergeJackson(Blackhole bh) throws Exception { } @Benchmark - public void mergeKryo(Blackhole bh) throws Exception { + public void mergeKryo(Blackhole bh) { // Kryo merge requires full deserialization + merge + serialization Input input1 = new Input(new ByteArrayInputStream(kryoBytes)); var record1 = kryo.readObject(input1, TestRecord.class); @@ -237,6 +224,11 @@ private ByteBuffer serializeWithImprint(TestRecord data) throws Exception { } writer.addField(7, Value.fromMap(metadataMap)); + // Add extra fields (8-20) to create a larger record + for (int i = 0; i < data.extraData.size(); i++) { + writer.addField(8 + i, Value.fromString(data.extraData.get(i))); + } + return writer.build().serializeToBuffer(); } @@ -268,8 +260,8 @@ private void copyFieldsToWriter(ImprintRecord record, ImprintWriter writer, Set< int fieldId = entry.getId(); if (!usedFieldIds.contains(fieldId)) { var value = record.getValue(fieldId); - if (value.isPresent()) { - writer.addField(fieldId, value.get()); + if (value != null) { + writer.addField(fieldId, value); usedFieldIds.add(fieldId); } } @@ -309,6 +301,12 @@ var record = new TestRecord(); record.metadata.put("model", "TC-2024"); record.metadata.put("warranty", "2 years"); + // Add extra data to create a larger record (fields 8-20) + record.extraData = new ArrayList<>(); + for (int i = 0; i < 13; i++) { + record.extraData.add("extraField" + i + "_value_" + (1000 + i)); + } + return record; } @@ -326,6 +324,12 @@ var record = new TestRecord(); record.metadata.put("vendor", "SoftCorp"); record.metadata.put("version", "2.1"); + // Add extra data to match the structure + record.extraData = new ArrayList<>(); + for (int i = 0; i < 13; i++) { + record.extraData.add("extraField" + i + "_value2_" + (2000 + i)); + } + return record; } @@ -338,6 +342,7 @@ public static class TestRecord { public String category; public List tags = new ArrayList<>(); public Map metadata = new HashMap<>(); + public List extraData = new ArrayList<>(); // Fields 8-20 for large record test public TestRecord() {} // Required for deserialization } diff --git a/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java b/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java index f3abb7e..1ead21f 100644 --- a/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java @@ -200,7 +200,9 @@ private ImprintRecord simulateProject(ImprintRecord source, int[] fieldIds) thro for (int fieldId : fieldIds) { var value = source.getValue(fieldId); - value.ifPresent(value1 -> writer.addField(fieldId, value1)); + if (value != null) { + writer.addField(fieldId, value); + } } return writer.build(); diff --git a/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java b/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java index 5c52908..f93092a 100644 --- a/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java @@ -100,8 +100,8 @@ private void copyFieldsToWriter(ImprintRecord record, ImprintWriter writer, Set< int fieldId = entry.getId(); if (!usedFieldIds.contains(fieldId)) { var value = record.getValue(fieldId); - if (value.isPresent()) { - writer.addField(fieldId, value.get()); + if (value != null) { + writer.addField(fieldId, value); usedFieldIds.add(fieldId); } } diff --git a/src/jmh/java/com/imprint/benchmark/StringBenchmark.java b/src/jmh/java/com/imprint/benchmark/StringBenchmark.java index dda7f1c..e156c8c 100644 --- a/src/jmh/java/com/imprint/benchmark/StringBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/StringBenchmark.java @@ -5,7 +5,6 @@ import com.imprint.types.Value; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; import java.nio.ByteBuffer; @@ -15,8 +14,8 @@ @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) @Fork(1) -@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) public class StringBenchmark { private static final SchemaId SCHEMA_ID = new SchemaId(1, 42); @@ -195,19 +194,22 @@ public ImprintRecord deserializeLargeString1M() throws Exception { @Benchmark public String accessSmallString5() throws Exception { ImprintRecord record = ImprintRecord.deserialize(serializedSmall5); - return record.getValue(1).map(this::extractString).orElse(null); + Value value = record.getValue(1); + return value != null ? extractString(value) : null; } @Benchmark public String accessMediumString500() throws Exception { ImprintRecord record = ImprintRecord.deserialize(serializedMedium500); - return record.getValue(1).map(this::extractString).orElse(null); + Value value = record.getValue(1); + return value != null ? extractString(value) : null; } @Benchmark public String accessLargeString100K() throws Exception { ImprintRecord record = ImprintRecord.deserialize(serializedLarge100K); - return record.getValue(1).map(this::extractString).orElse(null); + Value value = record.getValue(1); + return value != null ? extractString(value) : null; } // Raw bytes access benchmarks (zero-copy) @@ -215,19 +217,19 @@ public String accessLargeString100K() throws Exception { @Benchmark public ByteBuffer getRawBytesSmallString5() throws Exception { ImprintRecord record = ImprintRecord.deserialize(serializedSmall5); - return record.getRawBytes(1).orElse(null); + return record.getRawBytes(1); } @Benchmark public ByteBuffer getRawBytesMediumString500() throws Exception { ImprintRecord record = ImprintRecord.deserialize(serializedMedium500); - return record.getRawBytes(1).orElse(null); + return record.getRawBytes(1); } @Benchmark public ByteBuffer getRawBytesLargeString100K() throws Exception { ImprintRecord record = ImprintRecord.deserialize(serializedLarge100K); - return record.getRawBytes(1).orElse(null); + return record.getRawBytes(1); } // Size measurement benchmarks @@ -250,30 +252,63 @@ public int measureLargeString100KSize() throws Exception { // Pure string access benchmarks (no record deserialization overhead) @Benchmark public String pureStringAccessSmall5() throws Exception { - return preDeserializedSmall5.getValue(1).map(this::extractString).orElse(null); + Value value = preDeserializedSmall5.getValue(1); + return value != null ? extractString(value) : null; } @Benchmark public String pureStringAccessMedium500() throws Exception { - return preDeserializedMedium500.getValue(1).map(this::extractString).orElse(null); + Value value = preDeserializedMedium500.getValue(1); + return value != null ? extractString(value) : null; } @Benchmark public String pureStringAccessLarge100K() throws Exception { - return preDeserializedLarge100K.getValue(1).map(this::extractString).orElse(null); + Value value = preDeserializedLarge100K.getValue(1); + return value != null ? extractString(value) : null; } // Test cached vs uncached access @Benchmark public String cachedStringAccessSmall5() throws Exception { // Second access should hit cache - preDeserializedSmall5.getValue(1).map(this::extractString).orElse(null); - return preDeserializedSmall5.getValue(1).map(this::extractString).orElse(null); + Value value1 = preDeserializedSmall5.getValue(1); + String result1 = value1 != null ? extractString(value1) : null; + Value value2 = preDeserializedSmall5.getValue(1); + return value2 != null ? extractString(value2) : null; } public static void main(String[] args) throws Exception { - Options opt = new OptionsBuilder() - .include(StringBenchmark.class.getSimpleName()) + runDeserializationOnly(); + } + + public static void runAll() throws Exception { + var opt = new OptionsBuilder() + .include(StringBenchmark.class.getSimpleName()) + .build(); + new Runner(opt).run(); + } + + /** + * Run only string deserialization benchmarks to measure the impact of + * ThreadLocal buffer pool optimization and fast/fallback path performance. + */ + public static void runDeserializationOnly() throws Exception { + var opt = new OptionsBuilder() + .include(StringBenchmark.class.getSimpleName() + ".*deserialize.*") // Only deserialize methods + .forks(0) // Run in same JVM to avoid serialization issues + .build(); + new Runner(opt).run(); + } + + /** + * Run only pure string access benchmarks (no record deserialization overhead) + * to isolate string decode performance with ThreadLocal buffer optimization. + */ + public static void runStringAccessOnly() throws Exception { + var opt = new OptionsBuilder() + .include(StringBenchmark.class.getSimpleName() + ".*(pureStringAccess|cachedStringAccess).*") // Only pure string access methods + .forks(0) // Run in same JVM to avoid serialization issues .build(); new Runner(opt).run(); } diff --git a/src/main/java/com/imprint/Constants.java b/src/main/java/com/imprint/Constants.java index 78b91a0..3c84a28 100644 --- a/src/main/java/com/imprint/Constants.java +++ b/src/main/java/com/imprint/Constants.java @@ -2,9 +2,7 @@ public final class Constants { public static final byte MAGIC = 0x49; - public static final byte VERSION = 0x02; + public static final byte VERSION = 0x01; public static final int HEADER_BYTES = 15; public static final int DIR_ENTRY_BYTES = 7; - - private Constants() {} } diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index d9d5659..1915707 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -1,10 +1,8 @@ - package com.imprint.core; import com.imprint.Constants; import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; -import com.imprint.types.MapKey; import com.imprint.types.TypeCode; import com.imprint.types.Value; import com.imprint.util.VarInt; @@ -15,8 +13,12 @@ import java.util.*; /** - * An Imprint record containing a header, optional field directory, and payload. + * An Imprint record containing a header, field directory, and payload. * Uses ByteBuffer for zero-copy operations to achieve low latency. + * + *

Performance Note: All ByteBuffers should be array-backed + * (hasArray() == true) for optimal zero-copy performance. Direct buffers + * may cause performance degradation.

*/ @Getter public final class ImprintRecord { @@ -24,6 +26,11 @@ public final class ImprintRecord { private final List directory; private final ByteBuffer payload; // Read-only view for zero-copy + /** + * Creates a new ImprintRecord. + * + * @param payload the payload buffer. Should be array-backed for optimal performance. + */ public ImprintRecord(Header header, List directory, ByteBuffer payload) { this.header = Objects.requireNonNull(header, "Header cannot be null"); this.directory = List.copyOf(Objects.requireNonNull(directory, "Directory cannot be null")); @@ -32,39 +39,44 @@ public ImprintRecord(Header header, List directory, ByteBuffer p /** * Get a value by field ID, deserializing it on demand. + * Returns null if the field is not found. */ - public Optional getValue(int fieldId) throws ImprintException { - // Binary search for the field ID without allocation - int index = findDirectoryIndex(fieldId); - if (index < 0) return Optional.empty(); + public Value getValue(int fieldId) throws ImprintException { + var fieldBuffer = getFieldBuffer(fieldId); + if (fieldBuffer == null) return null; - var entry = directory.get(index); - int startOffset = entry.getOffset(); - int endOffset = (index + 1 < directory.size()) ? - directory.get(index + 1).getOffset() : payload.remaining(); - - var valueBytes = payload.duplicate(); - valueBytes.position(startOffset).limit(endOffset); - var value = deserializeValue(entry.getTypeCode(), valueBytes.slice()); - return Optional.of(value); + var entry = directory.get(findDirectoryIndex(fieldId)); + return deserializeValue(entry.getTypeCode(), fieldBuffer); } /** * Get the raw bytes for a field without deserializing. - * Returns a zero-copy ByteBuffer view. + * Returns a zero-copy ByteBuffer view, or null if field not found. + */ + public ByteBuffer getRawBytes(int fieldId) { + var fieldBuffer = getFieldBuffer(fieldId); + return fieldBuffer != null ? fieldBuffer.asReadOnlyBuffer() : null; + } + + /** + * Get a ByteBuffer view of a field's data. + * Returns null if the field is not found. */ - public Optional getRawBytes(int fieldId) { + private ByteBuffer getFieldBuffer(int fieldId) { int index = findDirectoryIndex(fieldId); - if (index < 0) return Optional.empty(); - + if (index < 0) return null; + var entry = directory.get(index); int startOffset = entry.getOffset(); int endOffset = (index + 1 < directory.size()) ? - directory.get(index + 1).getOffset() : payload.remaining(); + directory.get(index + 1).getOffset() : payload.limit(); var fieldBuffer = payload.duplicate(); + if (startOffset > payload.limit() || endOffset > payload.limit() || startOffset > endOffset) { + return null; + } fieldBuffer.position(startOffset).limit(endOffset); - return Optional.of(fieldBuffer.slice().asReadOnlyBuffer()); + return fieldBuffer.slice(); } /** @@ -122,6 +134,9 @@ public static ImprintRecord deserialize(byte[] bytes) throws ImprintException { /** * Deserialize a record from a ByteBuffer. + * + * @param buffer the buffer to deserialize from. Must be array-backed + * (buffer.hasArray() == true) for optimal zero-copy performance. */ public static ImprintRecord deserialize(ByteBuffer buffer) throws ImprintException { buffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); @@ -172,7 +187,7 @@ private int findDirectoryIndex(int fieldId) { return -(low + 1); // field not found, return insertion point } - private int estimateSerializedSize() { + public int estimateSerializedSize() { int size = Constants.HEADER_BYTES; // header size += VarInt.encodedLength(directory.size()); // directory count size += directory.size() * Constants.DIR_ENTRY_BYTES; // directory entries @@ -184,7 +199,7 @@ private void serializeHeader(ByteBuffer buffer) { buffer.put(Constants.MAGIC); buffer.put(Constants.VERSION); buffer.put(header.getFlags().getValue()); - buffer.putInt(header.getSchemaId().getFieldspaceId()); + buffer.putInt(header.getSchemaId().getFieldSpaceId()); buffer.putInt(header.getSchemaId().getSchemaHash()); buffer.putInt(header.getPayloadSize()); } @@ -236,10 +251,9 @@ private static DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throw } private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws ImprintException { - // Buffer is already positioned and limited correctly - buffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + ByteBuffer valueSpecificBuffer = buffer.duplicate(); + valueSpecificBuffer.order(ByteOrder.LITTLE_ENDIAN); - // Use TypeHandler for simple types switch (typeCode) { case NULL: case BOOL: @@ -249,17 +263,11 @@ private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws Impr case FLOAT64: case BYTES: case STRING: - return typeCode.getHandler().deserialize(buffer); - case ARRAY: - return deserializeArray(buffer); - case MAP: - return deserializeMap(buffer); - + return typeCode.getHandler().deserialize(valueSpecificBuffer); case ROW: - var remainingBuffer = buffer.slice(); - var nestedRecord = deserialize(remainingBuffer); + var nestedRecord = deserialize(valueSpecificBuffer); return Value.fromRow(nestedRecord); default: @@ -267,96 +275,6 @@ private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws Impr } } - private Value deserializeArray(ByteBuffer buffer) throws ImprintException { - VarInt.DecodeResult lengthResult = VarInt.decode(buffer); - int length = lengthResult.getValue(); - - if (length == 0) { - return Value.fromArray(Collections.emptyList()); - } - - var elementType = TypeCode.fromByte(buffer.get()); - var elements = new ArrayList(length); - - for (int i = 0; i < length; i++) { - var elementBytes = readValueBytes(elementType, buffer); - var element = deserializeValue(elementType, elementBytes); - elements.add(element); - } - - return Value.fromArray(elements); - } - - private Value deserializeMap(ByteBuffer buffer) throws ImprintException { - VarInt.DecodeResult lengthResult = VarInt.decode(buffer); - int length = lengthResult.getValue(); - - if (length == 0) { - return Value.fromMap(Collections.emptyMap()); - } - - var keyType = TypeCode.fromByte(buffer.get()); - var valueType = TypeCode.fromByte(buffer.get()); - var map = new HashMap(length); - - for (int i = 0; i < length; i++) { - // Read key - var keyBytes = readValueBytes(keyType, buffer); - var keyValue = deserializeValue(keyType, keyBytes); - var key = MapKey.fromValue(keyValue); - - // Read value - var valueBytes = readValueBytes(valueType, buffer); - var value = deserializeValue(valueType, valueBytes); - - map.put(key, value); - } - - return Value.fromMap(map); - } - - private ByteBuffer readValueBytes(TypeCode typeCode, ByteBuffer buffer) throws ImprintException { - // Use TypeHandler for simple types - switch (typeCode) { - case NULL: - case BOOL: - case INT32: - case INT64: - case FLOAT32: - case FLOAT64: - case BYTES: - case STRING: - return typeCode.getHandler().readValueBytes(buffer); - - case ARRAY: - case MAP: - case ROW: - // For complex types, return the entire remaining buffer for now - // The specific deserializer will handle parsing in the future - var remainingBuffer = buffer.slice(); - buffer.position(buffer.limit()); - return remainingBuffer.asReadOnlyBuffer(); - - default: - throw new ImprintException(ErrorType.INVALID_TYPE_CODE, "Unknown type code: " + typeCode); - } - } - - @Override - public boolean equals(Object obj) { - if (this == obj) return true; - if (obj == null || getClass() != obj.getClass()) return false; - var that = (ImprintRecord) obj; - return header.equals(that.header) && - directory.equals(that.directory) && - payload.equals(that.payload); - } - - @Override - public int hashCode() { - return Objects.hash(header, directory, payload); - } - @Override public String toString() { return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 202bd2a..8c04bf5 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -10,11 +10,16 @@ * A fluent builder for creating ImprintRecord instances with type-safe, * developer-friendly API that eliminates boilerplate Value.fromX() calls. *

+ * Field IDs can be overwritten - calling field() with the same ID multiple times + * will replace the previous value. This allows for flexible builder patterns and + * conditional field updates. + *

* Usage: *

  *   var record = ImprintRecord.builder(schemaId)
  *       .field(1, 42)              // int to Int32Value  
  *       .field(2, "hello")         // String to StringValue
+ *       .field(1, 100)             // overwrites field 1 with new value
  *       .field(3, 3.14)            // double to Float64Value
  *       .field(4, bytes)           // byte[] to BytesValue
  *       .field(5, true)            // boolean to BoolValue
@@ -22,6 +27,7 @@
  *       .build();
  * 
*/ +@SuppressWarnings("unused") public final class ImprintRecordBuilder { private final SchemaId schemaId; private final Map fields = new TreeMap<>(); @@ -141,12 +147,17 @@ public ImprintRecord build() throws ImprintException { } // Internal helper methods + /** + * Adds or overwrites a field in the record being built. + * If a field with the given ID already exists, it will be replaced. + * + * @param id the field ID + * @param value the field value (cannot be null - use nullField() for explicit nulls) + * @return this builder for method chaining + */ private ImprintRecordBuilder addField(int id, Value value) { Objects.requireNonNull(value, "Value cannot be null - use nullField() for explicit null values"); - if (fields.containsKey(id)) { - throw new IllegalArgumentException("Field ID " + id + " already exists - field IDs must be unique"); - } - fields.put(id, value); + fields.put(id, value); // TreeMap.put() overwrites existing values return this; } diff --git a/src/main/java/com/imprint/core/ImprintWriter.java b/src/main/java/com/imprint/core/ImprintWriter.java index 39ad9ea..b1d5f53 100644 --- a/src/main/java/com/imprint/core/ImprintWriter.java +++ b/src/main/java/com/imprint/core/ImprintWriter.java @@ -2,14 +2,13 @@ import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; -import com.imprint.types.MapKey; import com.imprint.types.Value; -import com.imprint.util.VarInt; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.nio.charset.StandardCharsets; -import java.util.*; +import java.util.ArrayList; +import java.util.Objects; +import java.util.TreeMap; /** * A writer for constructing ImprintRecords by adding fields sequentially. @@ -56,7 +55,7 @@ public ImprintRecord build() throws ImprintException { return new ImprintRecord(header, directory, payloadView); } - private int estimatePayloadSize() { + private int estimatePayloadSize() throws ImprintException { // More accurate estimation to reduce allocations int estimatedSize = 0; for (var value : fields.values()) { @@ -74,7 +73,7 @@ private int estimatePayloadSize() { * @param value the value to estimate size for * @return estimated size in bytes including type-specific overhead */ - private int estimateValueSize(Value value) { + private int estimateValueSize(Value value) throws ImprintException { // Use TypeHandler for simple types switch (value.getTypeCode()) { case NULL: @@ -85,52 +84,20 @@ private int estimateValueSize(Value value) { case FLOAT64: case BYTES: case STRING: - return value.getTypeCode().getHandler().estimateSize(value); - case ARRAY: - List array = ((Value.ArrayValue) value).getValue(); - int arraySize = VarInt.encodedLength(array.size()) + 1; // length + type code - for (Value element : array) { - arraySize += estimateValueSize(element); - } - return arraySize; - case MAP: - Map map = ((Value.MapValue) value).getValue(); - int mapSize = VarInt.encodedLength(map.size()) + 2; // length + 2 type codes - for (Map.Entry entry : map.entrySet()) { - mapSize += estimateMapKeySize(entry.getKey()); - mapSize += estimateValueSize(entry.getValue()); - } - return mapSize; + return value.getTypeCode().getHandler().estimateSize(value); case ROW: - // Estimate nested record size (rough approximation) - return 100; // Conservative estimate + Value.RowValue rowValue = (Value.RowValue) value; + return rowValue.getValue().estimateSerializedSize(); default: - return 32; // Default fallback + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); } } - - private int estimateMapKeySize(MapKey key) { - switch (key.getTypeCode()) { - case INT32: return 4; - case INT64: return 8; - case BYTES: - byte[] bytes = ((MapKey.BytesKey) key).getValue(); - return VarInt.encodedLength(bytes.length) + bytes.length; - case STRING: - var str = ((MapKey.StringKey) key).getValue(); - int utf8Length = str.getBytes(StandardCharsets.UTF_8).length; - return VarInt.encodedLength(utf8Length) + utf8Length; - default: - return 16; // Default fallback - } - } - private void serializeValue(Value value, ByteBuffer buffer) throws ImprintException { switch (value.getTypeCode()) { case NULL: @@ -141,17 +108,11 @@ private void serializeValue(Value value, ByteBuffer buffer) throws ImprintExcept case FLOAT64: case BYTES: case STRING: - value.getTypeCode().getHandler().serialize(value, buffer); - break; - case ARRAY: - serializeArray((Value.ArrayValue) value, buffer); - break; - case MAP: - serializeMap((Value.MapValue) value, buffer); + value.getTypeCode().getHandler().serialize(value, buffer); break; - + //TODO eliminate this switch entirely by implementing a ROW TypeHandler case ROW: Value.RowValue rowValue = (Value.RowValue) value; var serializedRow = rowValue.getValue().serializeToBuffer(); @@ -159,99 +120,7 @@ private void serializeValue(Value value, ByteBuffer buffer) throws ImprintExcept break; default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, - "Unknown type code: " + value.getTypeCode()); - } - } - - private void serializeArray(Value.ArrayValue arrayValue, ByteBuffer buffer) throws ImprintException { - var elements = arrayValue.getValue(); - VarInt.encode(elements.size(), buffer); - - if (elements.isEmpty()) return; - - // All elements must have the same type - var elementType = elements.get(0).getTypeCode(); - buffer.put(elementType.getCode()); - for (var element : elements) { - if (element.getTypeCode() != elementType) { - throw new ImprintException(ErrorType.SCHEMA_ERROR, - "Array elements must have same type code: " + - element.getTypeCode() + " != " + elementType); - } - serializeValue(element, buffer); - } - } - - private void serializeMap(Value.MapValue mapValue, ByteBuffer buffer) throws ImprintException { - var map = mapValue.getValue(); - VarInt.encode(map.size(), buffer); - - if (map.isEmpty()) { - return; - } - - // All keys and values must have consistent types - var iterator = map.entrySet().iterator(); - var first = iterator.next(); - var keyType = first.getKey().getTypeCode(); - var valueType = first.getValue().getTypeCode(); - - buffer.put(keyType.getCode()); - buffer.put(valueType.getCode()); - - // Serialize the first entry - serializeMapKey(first.getKey(), buffer); - serializeValue(first.getValue(), buffer); - - // Serialize remaining entries - while (iterator.hasNext()) { - var entry = iterator.next(); - if (entry.getKey().getTypeCode() != keyType) { - throw new ImprintException(ErrorType.SCHEMA_ERROR, - "Map keys must have same type code: " + - entry.getKey().getTypeCode() + " != " + keyType); - } - if (entry.getValue().getTypeCode() != valueType) { - throw new ImprintException(ErrorType.SCHEMA_ERROR, - "Map values must have same type code: " + - entry.getValue().getTypeCode() + " != " + valueType); - } - - serializeMapKey(entry.getKey(), buffer); - serializeValue(entry.getValue(), buffer); - } - } - - private void serializeMapKey(MapKey key, ByteBuffer buffer) throws ImprintException { - switch (key.getTypeCode()) { - case INT32: - MapKey.Int32Key int32Key = (MapKey.Int32Key) key; - buffer.putInt(int32Key.getValue()); - break; - - case INT64: - MapKey.Int64Key int64Key = (MapKey.Int64Key) key; - buffer.putLong(int64Key.getValue()); - break; - - case BYTES: - MapKey.BytesKey bytesKey = (MapKey.BytesKey) key; - byte[] bytes = bytesKey.getValue(); - VarInt.encode(bytes.length, buffer); - buffer.put(bytes); - break; - - case STRING: - MapKey.StringKey stringKey = (MapKey.StringKey) key; - byte[] stringBytes = stringKey.getValue().getBytes(StandardCharsets.UTF_8); - VarInt.encode(stringBytes.length, buffer); - buffer.put(stringBytes); - break; - - default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, - "Invalid map key type: " + key.getTypeCode()); + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); } } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/SchemaId.java b/src/main/java/com/imprint/core/SchemaId.java index cb03c1c..b6dae3b 100644 --- a/src/main/java/com/imprint/core/SchemaId.java +++ b/src/main/java/com/imprint/core/SchemaId.java @@ -7,6 +7,6 @@ */ @Value public class SchemaId { - int fieldspaceId; + int fieldSpaceId; int schemaHash; } \ No newline at end of file diff --git a/src/main/java/com/imprint/types/TypeCode.java b/src/main/java/com/imprint/types/TypeCode.java index 6bf450d..a81b199 100644 --- a/src/main/java/com/imprint/types/TypeCode.java +++ b/src/main/java/com/imprint/types/TypeCode.java @@ -16,8 +16,8 @@ public enum TypeCode { FLOAT64(0x5, TypeHandler.FLOAT64), BYTES(0x6, TypeHandler.BYTES), STRING(0x7, TypeHandler.STRING), - ARRAY(0x8, null), // TODO: implement - MAP(0x9, null), // TODO: implement + ARRAY(0x8, TypeHandler.ARRAY), + MAP(0x9, TypeHandler.MAP), ROW(0xA, null); // TODO: implement (basically a placeholder for user-defined type) @Getter diff --git a/src/main/java/com/imprint/types/TypeHandler.java b/src/main/java/com/imprint/types/TypeHandler.java index 07ef7a1..573aed3 100644 --- a/src/main/java/com/imprint/types/TypeHandler.java +++ b/src/main/java/com/imprint/types/TypeHandler.java @@ -1,9 +1,11 @@ package com.imprint.types; +import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; import com.imprint.util.VarInt; import java.nio.ByteBuffer; +import java.util.*; /** * Interface for handling type-specific serialization, deserialization, and size estimation. @@ -14,9 +16,52 @@ public interface TypeHandler { Value deserialize(ByteBuffer buffer) throws ImprintException; void serialize(Value value, ByteBuffer buffer) throws ImprintException; - int estimateSize(Value value); + int estimateSize(Value value) throws ImprintException; ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException; + // Helper method to eliminate duplication in ARRAY/MAP readValueBytes + static ByteBuffer readComplexValueBytes(ByteBuffer buffer, String typeName, + ComplexValueMeasurer measurer) throws ImprintException { + int initialPosition = buffer.position(); + ByteBuffer tempBuffer = buffer.duplicate(); + tempBuffer.order(buffer.order()); + + VarInt.DecodeResult lengthResult = VarInt.decode(tempBuffer); + int numElements = lengthResult.getValue(); + int varIntLength = tempBuffer.position() - initialPosition; + + if (numElements == 0) { + if (buffer.remaining() < varIntLength) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for empty " + typeName + " VarInt. Needed: " + + varIntLength + ", available: " + buffer.remaining()); + } + ByteBuffer valueSlice = buffer.slice(); + valueSlice.limit(varIntLength); + buffer.position(initialPosition + varIntLength); + return valueSlice.asReadOnlyBuffer(); + } + + int dataLength = measurer.measureDataLength(tempBuffer, numElements); + int totalLength = varIntLength + dataLength; + + if (buffer.remaining() < totalLength) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for " + typeName + " value. Needed: " + totalLength + + ", available: " + buffer.remaining() + " at position " + initialPosition); + } + + ByteBuffer valueSlice = buffer.slice(); + valueSlice.limit(totalLength); + buffer.position(initialPosition + totalLength); + return valueSlice.asReadOnlyBuffer(); + } + + @FunctionalInterface + interface ComplexValueMeasurer { + int measureDataLength(ByteBuffer tempBuffer, int numElements) throws ImprintException; + } + // Static implementations for each type TypeHandler NULL = new TypeHandler() { @Override @@ -54,7 +99,7 @@ public Value deserialize(ByteBuffer buffer) throws ImprintException { @Override public void serialize(Value value, ByteBuffer buffer) { - Value.BoolValue boolValue = (Value.BoolValue) value; + var boolValue = (Value.BoolValue) value; buffer.put((byte) (boolValue.getValue() ? 1 : 0)); } @@ -76,14 +121,14 @@ public ByteBuffer readValueBytes(ByteBuffer buffer) { @Override public Value deserialize(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < 4) { - throw new ImprintException(com.imprint.error.ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for int32"); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for int32"); } return Value.fromInt32(buffer.getInt()); } @Override public void serialize(Value value, ByteBuffer buffer) { - Value.Int32Value int32Value = (Value.Int32Value) value; + var int32Value = (Value.Int32Value) value; buffer.putInt(int32Value.getValue()); } @@ -105,7 +150,7 @@ public ByteBuffer readValueBytes(ByteBuffer buffer) { @Override public Value deserialize(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < 8) { - throw new ImprintException(com.imprint.error.ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for int64"); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for int64"); } return Value.fromInt64(buffer.getLong()); } @@ -134,14 +179,14 @@ public ByteBuffer readValueBytes(ByteBuffer buffer) { @Override public Value deserialize(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < 4) { - throw new ImprintException(com.imprint.error.ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for float32"); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for float32"); } return Value.fromFloat32(buffer.getFloat()); } @Override public void serialize(Value value, ByteBuffer buffer) { - Value.Float32Value float32Value = (Value.Float32Value) value; + var float32Value = (Value.Float32Value) value; buffer.putFloat(float32Value.getValue()); } @@ -163,14 +208,14 @@ public ByteBuffer readValueBytes(ByteBuffer buffer) { @Override public Value deserialize(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < 8) { - throw new ImprintException(com.imprint.error.ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for float64"); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for float64"); } return Value.fromFloat64(buffer.getDouble()); } @Override public void serialize(Value value, ByteBuffer buffer) { - Value.Float64Value float64Value = (Value.Float64Value) value; + var float64Value = (Value.Float64Value) value; buffer.putDouble(float64Value.getValue()); } @@ -194,7 +239,7 @@ public Value deserialize(ByteBuffer buffer) throws ImprintException { VarInt.DecodeResult lengthResult = VarInt.decode(buffer); int length = lengthResult.getValue(); if (buffer.remaining() < length) { - throw new ImprintException(com.imprint.error.ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for bytes value"); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for bytes value data after VarInt. Slice from readValueBytes is too short. Needed: " + length + ", available: " + buffer.remaining()); } var bytesView = buffer.slice(); bytesView.limit(length); @@ -231,14 +276,25 @@ public int estimateSize(Value value) { @Override public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { - int originalPosition = buffer.position(); - VarInt.DecodeResult lengthResult = VarInt.decode(buffer); - int totalLength = lengthResult.getBytesRead() + lengthResult.getValue(); - buffer.position(originalPosition); - var valueBuffer = buffer.slice(); - valueBuffer.limit(totalLength); - buffer.position(buffer.position() + totalLength); - return valueBuffer.asReadOnlyBuffer(); + int initialPos = buffer.position(); + ByteBuffer tempMeasureBuffer = buffer.duplicate(); + VarInt.DecodeResult dr = VarInt.decode(tempMeasureBuffer); + + int varIntByteLength = tempMeasureBuffer.position() - initialPos; + int payloadByteLength = dr.getValue(); + int totalValueLength = varIntByteLength + payloadByteLength; + + if (buffer.remaining() < totalValueLength) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for VarInt-prefixed data. Needed: " + totalValueLength + + ", available: " + buffer.remaining() + " at position " + initialPos); + } + + ByteBuffer resultSlice = buffer.slice(); + resultSlice.limit(totalValueLength); + + buffer.position(initialPos + totalValueLength); + return resultSlice.asReadOnlyBuffer(); } }; @@ -248,28 +304,28 @@ public Value deserialize(ByteBuffer buffer) throws ImprintException { VarInt.DecodeResult strLengthResult = VarInt.decode(buffer); int strLength = strLengthResult.getValue(); if (buffer.remaining() < strLength) { - throw new ImprintException(com.imprint.error.ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for string value"); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for string value data after VarInt. Slice from readValueBytes is too short. Needed: " + strLength + ", available: " + buffer.remaining()); } var stringBytesView = buffer.slice(); stringBytesView.limit(strLength); buffer.position(buffer.position() + strLength); try { - return Value.fromStringBuffer(stringBytesView.asReadOnlyBuffer()); + return Value.fromStringBuffer(stringBytesView); } catch (Exception e) { - throw new ImprintException(com.imprint.error.ErrorType.INVALID_UTF8_STRING, "Invalid UTF-8 string"); + throw new ImprintException(ErrorType.INVALID_UTF8_STRING, "Invalid UTF-8 string or buffer issue: " + e.getMessage()); } } @Override public void serialize(Value value, ByteBuffer buffer) { if (value instanceof Value.StringBufferValue) { - Value.StringBufferValue bufferValue = (Value.StringBufferValue) value; + var bufferValue = (Value.StringBufferValue) value; var stringBuffer = bufferValue.getBuffer(); VarInt.encode(stringBuffer.remaining(), buffer); buffer.put(stringBuffer); } else { - Value.StringValue stringValue = (Value.StringValue) value; - byte[] stringBytes = stringValue.getUtf8Bytes(); // Use cached UTF-8 bytes + var stringValue = (Value.StringValue) value; + byte[] stringBytes = stringValue.getUtf8Bytes(); VarInt.encode(stringBytes.length, buffer); buffer.put(stringBytes); } @@ -283,21 +339,287 @@ public int estimateSize(Value value) { return VarInt.encodedLength(length) + length; } else { Value.StringValue stringValue = (Value.StringValue) value; - byte[] utf8Bytes = stringValue.getUtf8Bytes(); // Use cached UTF-8 bytes + byte[] utf8Bytes = stringValue.getUtf8Bytes(); return VarInt.encodedLength(utf8Bytes.length) + utf8Bytes.length; } } @Override public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { - int originalPosition = buffer.position(); + int initialPos = buffer.position(); + ByteBuffer tempMeasureBuffer = buffer.duplicate(); + VarInt.DecodeResult dr = VarInt.decode(tempMeasureBuffer); + + int varIntByteLength = tempMeasureBuffer.position() - initialPos; + int payloadByteLength = dr.getValue(); + int totalValueLength = varIntByteLength + payloadByteLength; + + if (buffer.remaining() < totalValueLength) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for VarInt-prefixed string. Needed: " + totalValueLength + + ", available: " + buffer.remaining() + " at position " + initialPos); + } + + ByteBuffer resultSlice = buffer.slice(); + resultSlice.limit(totalValueLength); + + buffer.position(initialPos + totalValueLength); + return resultSlice.asReadOnlyBuffer(); + } + }; + + TypeHandler ARRAY = new TypeHandler() { + @Override + public Value deserialize(ByteBuffer buffer) throws ImprintException { VarInt.DecodeResult lengthResult = VarInt.decode(buffer); - int totalLength = lengthResult.getBytesRead() + lengthResult.getValue(); - buffer.position(originalPosition); - var valueBuffer = buffer.slice(); - valueBuffer.limit(totalLength); - buffer.position(buffer.position() + totalLength); - return valueBuffer.asReadOnlyBuffer(); + int length = lengthResult.getValue(); + + if (length == 0) { + return Value.fromArray(Collections.emptyList()); + } + + if (buffer.remaining() < 1) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for ARRAY element type code."); + } + var elementType = TypeCode.fromByte(buffer.get()); + var elements = new ArrayList(length); + var elementHandler = elementType.getHandler(); + + for (int i = 0; i < length; i++) { + var elementValueBytes = elementHandler.readValueBytes(buffer); + elementValueBytes.order(buffer.order()); + var element = elementHandler.deserialize(elementValueBytes); + elements.add(element); + } + + return Value.fromArray(elements); + } + + @Override + public void serialize(Value value, ByteBuffer buffer) throws ImprintException { + var arrayValue = (Value.ArrayValue) value; + var elements = arrayValue.getValue(); + VarInt.encode(elements.size(), buffer); + + if (elements.isEmpty()) return; + + var elementType = elements.get(0).getTypeCode(); + buffer.put(elementType.getCode()); + var elementHandler = elementType.getHandler(); + for (var element : elements) { + if (element.getTypeCode() != elementType) { + throw new ImprintException(ErrorType.SCHEMA_ERROR, + "Array elements must have same type code: " + + element.getTypeCode() + " != " + elementType); + } + elementHandler.serialize(element, buffer); + } + } + + @Override + public int estimateSize(Value value) throws ImprintException { + var arrayValue = (Value.ArrayValue) value; + var elements = arrayValue.getValue(); + int sizeOfLength = VarInt.encodedLength(elements.size()); + if (elements.isEmpty()) { + return sizeOfLength; + } + int sizeOfElementTypeCode = 1; + int arraySize = sizeOfLength + sizeOfElementTypeCode; + var elementHandler = elements.get(0).getTypeCode().getHandler(); + for (var element : elements) { + arraySize += elementHandler.estimateSize(element); + } + return arraySize; + } + + @Override + public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { + return readComplexValueBytes(buffer, "ARRAY", (tempBuffer, numElements) -> { + if (tempBuffer.remaining() < 1) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for ARRAY element type code in temp buffer during measurement."); + } + byte elementTypeCodeByte = tempBuffer.get(); + int typeCodeLength = 1; + + TypeHandler elementHandler = TypeCode.fromByte(elementTypeCodeByte).getHandler(); + int elementsDataLength = 0; + for (int i = 0; i < numElements; i++) { + int elementStartPos = tempBuffer.position(); + elementHandler.readValueBytes(tempBuffer); + elementsDataLength += (tempBuffer.position() - elementStartPos); + } + + return typeCodeLength + elementsDataLength; + }); + } + }; + + TypeHandler MAP = new TypeHandler() { + @Override + public Value deserialize(ByteBuffer buffer) throws ImprintException { + VarInt.DecodeResult lengthResult = VarInt.decode(buffer); + int length = lengthResult.getValue(); + + if (length == 0) { + return Value.fromMap(Collections.emptyMap()); + } + + if (buffer.remaining() < 2) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for MAP key/value type codes."); + } + var keyType = TypeCode.fromByte(buffer.get()); + var valueType = TypeCode.fromByte(buffer.get()); + var map = new HashMap(length); + + var keyHandler = keyType.getHandler(); + var valueHandler = valueType.getHandler(); + + for (int i = 0; i < length; i++) { + var keyBytes = keyHandler.readValueBytes(buffer); + keyBytes.order(buffer.order()); + var keyValue = keyHandler.deserialize(keyBytes); + var key = MapKey.fromValue(keyValue); + + var valueBytes = valueHandler.readValueBytes(buffer); + valueBytes.order(buffer.order()); + var mapInternalValue = valueHandler.deserialize(valueBytes); + + map.put(key, mapInternalValue); + } + + return Value.fromMap(map); + } + + @Override + public void serialize(Value value, ByteBuffer buffer) throws ImprintException { + var mapValue = (Value.MapValue) value; + var map = mapValue.getValue(); + VarInt.encode(map.size(), buffer); + + if (map.isEmpty()) { + return; + } + + var iterator = map.entrySet().iterator(); + var first = iterator.next(); + var keyType = first.getKey().getTypeCode(); + var valueType = first.getValue().getTypeCode(); + + buffer.put(keyType.getCode()); + buffer.put(valueType.getCode()); + + serializeMapKey(first.getKey(), buffer); + first.getValue().getTypeCode().getHandler().serialize(first.getValue(), buffer); + + while (iterator.hasNext()) { + var entry = iterator.next(); + if (entry.getKey().getTypeCode() != keyType) { + throw new ImprintException(ErrorType.SCHEMA_ERROR, + "Map keys must have same type code: " + + entry.getKey().getTypeCode() + " != " + keyType); + } + if (entry.getValue().getTypeCode() != valueType) { + throw new ImprintException(ErrorType.SCHEMA_ERROR, + "Map values must have same type code: " + + entry.getValue().getTypeCode() + " != " + valueType); + } + + serializeMapKey(entry.getKey(), buffer); + entry.getValue().getTypeCode().getHandler().serialize(entry.getValue(), buffer); + } + } + + @Override + public int estimateSize(Value value) throws ImprintException { + var mapValue = (Value.MapValue) value; + var map = mapValue.getValue(); + int sizeOfLength = VarInt.encodedLength(map.size()); + if (map.isEmpty()) { + return sizeOfLength; + } + int sizeOfTypeCodes = 2; + int mapSize = sizeOfLength + sizeOfTypeCodes; + + for (var entry : map.entrySet()) { + mapSize += estimateMapKeySize(entry.getKey()); + mapSize += entry.getValue().getTypeCode().getHandler().estimateSize(entry.getValue()); + } + return mapSize; + } + + @Override + public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { + return readComplexValueBytes(buffer, "MAP", (tempBuffer, numEntries) -> { + if (tempBuffer.remaining() < 2) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for MAP key/value type codes in temp buffer during measurement."); + } + byte keyTypeCodeByte = tempBuffer.get(); + byte valueTypeCodeByte = tempBuffer.get(); + int typeCodesLength = 2; + int entriesDataLength = 0; + for (int i = 0; i < numEntries; i++) { + int entryStartPos = tempBuffer.position(); + TypeCode.fromByte(keyTypeCodeByte).getHandler().readValueBytes(tempBuffer); + TypeCode.fromByte(valueTypeCodeByte).getHandler().readValueBytes(tempBuffer); + entriesDataLength += (tempBuffer.position() - entryStartPos); + } + + return typeCodesLength + entriesDataLength; + }); + } + + private void serializeMapKey(MapKey key, ByteBuffer buffer) throws ImprintException { + switch (key.getTypeCode()) { + case INT32: + MapKey.Int32Key int32Key = (MapKey.Int32Key) key; + buffer.putInt(int32Key.getValue()); + break; + + case INT64: + MapKey.Int64Key int64Key = (MapKey.Int64Key) key; + buffer.putLong(int64Key.getValue()); + break; + + case BYTES: + MapKey.BytesKey bytesKey = (MapKey.BytesKey) key; + byte[] bytes = bytesKey.getValue(); + VarInt.encode(bytes.length, buffer); + buffer.put(bytes); + break; + + case STRING: + MapKey.StringKey stringKey = (MapKey.StringKey) key; + byte[] stringBytes = stringKey.getValue().getBytes(java.nio.charset.StandardCharsets.UTF_8); + VarInt.encode(stringBytes.length, buffer); + buffer.put(stringBytes); + break; + + default: + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, + "Invalid map key type: " + key.getTypeCode()); + } + } + + private int estimateMapKeySize(MapKey key) throws ImprintException { + switch (key.getTypeCode()) { + case INT32: return 4; + case INT64: return 8; + case BYTES: + byte[] bytes = ((MapKey.BytesKey) key).getValue(); + return VarInt.encodedLength(bytes.length) + bytes.length; + + case STRING: + var str = ((MapKey.StringKey) key).getValue(); + int utf8Length = str.getBytes(java.nio.charset.StandardCharsets.UTF_8).length; + return VarInt.encodedLength(utf8Length) + utf8Length; + + default: + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, + "Invalid map key type: " + key.getTypeCode()); + } } }; } \ No newline at end of file diff --git a/src/main/java/com/imprint/types/Value.java b/src/main/java/com/imprint/types/Value.java index 64eae91..7f3bbb9 100644 --- a/src/main/java/com/imprint/types/Value.java +++ b/src/main/java/com/imprint/types/Value.java @@ -205,9 +205,16 @@ public byte[] getValue() { @Override public boolean equals(Object obj) { if (this == obj) return true; - if (obj == null || getClass() != obj.getClass()) return false; - BytesValue that = (BytesValue) obj; - return Arrays.equals(value, that.value); + if (obj == null) return false; + if (obj instanceof BytesValue) { + BytesValue that = (BytesValue) obj; + return Arrays.equals(value, that.value); + } + if (obj instanceof BytesBufferValue) { + BytesBufferValue that = (BytesBufferValue) obj; + return Arrays.equals(value, that.getValue()); + } + return false; } @Override @@ -270,7 +277,6 @@ public String toString() { } // String Value (String-based) - @EqualsAndHashCode(callSuper = false) public static class StringValue extends Value { @Getter private final String value; @@ -281,11 +287,11 @@ public StringValue(String value) { } public byte[] getUtf8Bytes() { - byte[] cached = cachedUtf8Bytes; + var cached = cachedUtf8Bytes; if (cached == null) { // Multiple threads may compute this - that's OK since it's idempotent cached = value.getBytes(StandardCharsets.UTF_8); - cachedUtf8Bytes = cached; // Benign race - last writer wins + cachedUtf8Bytes = cached; } return cached; // Return our computed value, not re-read from volatile field } @@ -293,6 +299,26 @@ public byte[] getUtf8Bytes() { @Override public TypeCode getTypeCode() { return TypeCode.STRING; } + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (obj == null) return false; + if (obj instanceof StringValue) { + StringValue that = (StringValue) obj; + return value.equals(that.value); + } + if (obj instanceof StringBufferValue) { + StringBufferValue that = (StringBufferValue) obj; + return value.equals(that.getValue()); + } + return false; + } + + @Override + public int hashCode() { + return value.hashCode(); + } + @Override public String toString() { return "\"" + value + "\""; @@ -320,13 +346,14 @@ public String getValue() { private String decodeUtf8() { // Fast path: zero-copy for array-backed ByteBuffers - if (value.hasArray() && value.arrayOffset() == 0) { - return new String(value.array(), value.position(), + if (value.hasArray()) { + return new String(value.array(), value.arrayOffset() + value.position(), value.remaining(), StandardCharsets.UTF_8); } - // Fallback path - should be impossible since deserialize uses wrap() to create an array-backed ByteBuffer. - // Allocation required for direct ByteBuffers since Java's String API doesn't provide ByteBuffer constructors + // Fallback path for non-array-backed ByteBuffers (e.g., direct buffers). + // Allocation is required here as Java's String(byte[],...) constructor needs a heap array. + // Data is copied from the ByteBuffer to a new byte array. var array = new byte[value.remaining()]; value.duplicate().get(array); return new String(array, StandardCharsets.UTF_8); diff --git a/src/main/java/com/imprint/util/VarInt.java b/src/main/java/com/imprint/util/VarInt.java index 5c9a7e5..f4c22f2 100644 --- a/src/main/java/com/imprint/util/VarInt.java +++ b/src/main/java/com/imprint/util/VarInt.java @@ -104,6 +104,26 @@ public static int encodedLength(int value) { return length; } + /** + * Read VarInt-prefixed data from a ByteBuffer. + * The data format is: VarInt(length) + data(length bytes). + * Returns a read-only ByteBuffer containing the entire VarInt + data. + * + * @param buffer the buffer to read from + * @return a read-only ByteBuffer view of the VarInt + data + * @throws ImprintException if the VarInt is malformed or buffer underflow + */ + public static ByteBuffer readVarIntPrefixedBytes(ByteBuffer buffer) throws ImprintException { + int originalPosition = buffer.position(); + VarInt.DecodeResult lengthResult = VarInt.decode(buffer); + int totalLength = lengthResult.getBytesRead() + lengthResult.getValue(); + buffer.position(originalPosition); + var valueBuffer = buffer.slice(); + valueBuffer.limit(totalLength); + buffer.position(buffer.position() + totalLength); + return valueBuffer.asReadOnlyBuffer(); + } + /** * Result of a VarInt decode operation. */ diff --git a/src/test/java/com/imprint/ByteBufferIntegrationTest.java b/src/test/java/com/imprint/ByteBufferIntegrationTest.java index 9460cbf..56ec3b0 100644 --- a/src/test/java/com/imprint/ByteBufferIntegrationTest.java +++ b/src/test/java/com/imprint/ByteBufferIntegrationTest.java @@ -1,20 +1,25 @@ package com.imprint; -import com.imprint.core.*; -import com.imprint.types.*; +import com.imprint.core.ImprintRecord; +import com.imprint.core.ImprintWriter; +import com.imprint.core.SchemaId; import com.imprint.error.ImprintException; +import com.imprint.types.MapKey; +import com.imprint.types.Value; + import java.nio.ByteBuffer; import java.util.*; /** * Integration test to verify ByteBuffer functionality and zero-copy benefits. */ -public class ByteBufferIntegrationTest { - +class ByteBufferIntegrationTest { + public static void main(String[] args) { try { testByteBufferFunctionality(); testZeroCopy(); + testArrayBackedBuffers(); System.out.println("All ByteBuffer integration tests passed!"); } catch (Exception e) { System.err.println("ByteBuffer integration test failed: " + e.getMessage()); @@ -22,66 +27,154 @@ public static void main(String[] args) { System.exit(1); } } - + static void testByteBufferFunctionality() throws ImprintException { System.out.println("Testing ByteBuffer functionality..."); - + SchemaId schemaId = new SchemaId(1, 0xdeadbeef); ImprintWriter writer = new ImprintWriter(schemaId); - + writer.addField(1, Value.fromInt32(42)) .addField(2, Value.fromString("zero-copy test")) .addField(3, Value.fromBytes(new byte[]{1, 2, 3, 4, 5})); - + ImprintRecord record = writer.build(); - + // Test ByteBuffer serialization ByteBuffer serializedBuffer = record.serializeToBuffer(); assert serializedBuffer.isReadOnly() : "Serialized buffer should be read-only"; - + // Test deserialization from ByteBuffer ImprintRecord deserialized = ImprintRecord.deserialize(serializedBuffer); - - assert deserialized.getValue(1).get().equals(Value.fromInt32(42)); - assert deserialized.getValue(2).get().equals(Value.fromString("zero-copy test")); - + + assert Objects.equals(deserialized.getValue(1), Value.fromInt32(42)); + assert Objects.equals(deserialized.getValue(2), Value.fromString("zero-copy test")); + // Test raw bytes access returns ByteBuffer - Optional rawBytes = deserialized.getRawBytes(1); - assert rawBytes.isPresent() : "Raw bytes should be present for field 1"; - assert rawBytes.get().isReadOnly() : "Raw bytes buffer should be read-only"; - + var rawBytes = deserialized.getRawBytes(1); + assert rawBytes != null : "Raw bytes should be present for field 1"; + assert rawBytes.isReadOnly() : "Raw bytes buffer should be read-only"; + System.out.println("ByteBuffer functionality test passed"); } - + static void testZeroCopy() { System.out.println("Testing zero-copy"); - + // Create a large payload to demonstrate zero-copy benefits byte[] largePayload = new byte[1024 * 1024]; // 1MB Arrays.fill(largePayload, (byte) 0xAB); - + SchemaId schemaId = new SchemaId(2, 0xcafebabe); ImprintWriter writer = new ImprintWriter(schemaId); - + try { writer.addField(1, Value.fromBytes(largePayload)); ImprintRecord record = writer.build(); - + // Test that getRawBytes returns a view, not a copy - Optional rawBytes = record.getRawBytes(1); - assert rawBytes.isPresent() : "Raw bytes should be present"; - - ByteBuffer rawBuffer = rawBytes.get(); - assert rawBuffer.isReadOnly() : "Raw buffer should be read-only"; - + var rawBytes = record.getRawBytes(1); + assert rawBytes !=null : "Raw bytes should be present"; + assert rawBytes.isReadOnly() : "Raw buffer should be read-only"; + // The buffer should be positioned at the start of the actual data // (after the VarInt length prefix) - assert rawBuffer.remaining() > largePayload.length : "Buffer should include length prefix"; - + assert rawBytes.remaining() > largePayload.length : "Buffer should include length prefix"; + System.out.println("Zero-copy benefits test passed"); - + } catch (ImprintException e) { throw new RuntimeException("Failed zero-copy test", e); } } + + static void testArrayBackedBuffers() throws ImprintException { + System.out.println("Testing array-backed buffers for zero-copy performance..."); + + // Test serialized buffers are array-backed + SchemaId schemaId = new SchemaId(1, 0xdeadbeef); + ImprintWriter writer = new ImprintWriter(schemaId); + + writer.addField(1, Value.fromInt32(42)) + .addField(2, Value.fromString("test string")) + .addField(3, Value.fromBytes(new byte[]{1, 2, 3, 4})) + .addField(4, Value.fromBoolean(true)); + + ImprintRecord record = writer.build(); + + // Test that serializeToBuffer() returns array-backed buffer + ByteBuffer serializedBuffer = record.serializeToBuffer(); + assert serializedBuffer.hasArray() : "Serialized buffer should be array-backed for zero-copy performance"; + + // Test that the internal payload is array-backed + assert record.getPayload().hasArray() : "Record payload should be array-backed for zero-copy performance"; + + // Test deserialized buffers are array-backed + byte[] bytes = new byte[serializedBuffer.remaining()]; + serializedBuffer.get(bytes); + ImprintRecord deserialized = ImprintRecord.deserialize(bytes); + + assert deserialized.getPayload().hasArray() : "Deserialized record payload should be array-backed"; + + // Test that getRawBytes() returns array-backed buffers + ByteBuffer rawBytes1 = deserialized.getRawBytes(1); + ByteBuffer rawBytes2 = deserialized.getRawBytes(2); + + assert rawBytes1 != null && rawBytes1.hasArray() : "Raw bytes buffer for int field should be array-backed"; + assert rawBytes2 != null && rawBytes2.hasArray() : "Raw bytes buffer for string field should be array-backed"; + + // Test complex types use array-backed buffers + ImprintWriter complexWriter = new ImprintWriter(new SchemaId(2, 0xcafebabe)); + + // Create array value + List arrayValues = Arrays.asList( + Value.fromInt32(1), + Value.fromInt32(2), + Value.fromInt32(3) + ); + + // Create map value + Map mapValues = new HashMap<>(); + mapValues.put(MapKey.fromString("key1"), Value.fromString("value1")); + mapValues.put(MapKey.fromString("key2"), Value.fromString("value2")); + + complexWriter.addField(1, Value.fromArray(arrayValues)) + .addField(2, Value.fromMap(mapValues)); + + ImprintRecord complexRecord = complexWriter.build(); + + assert complexRecord.getPayload().hasArray() : "Record with complex types should use array-backed payload"; + + ByteBuffer complexSerialized = complexRecord.serializeToBuffer(); + assert complexSerialized.hasArray() : "Serialized buffer with complex types should be array-backed"; + + // Test nested records use array-backed buffers + ImprintWriter innerWriter = new ImprintWriter(new SchemaId(3, 0x12345678)); + innerWriter.addField(1, Value.fromString("nested data")); + ImprintRecord innerRecord = innerWriter.build(); + + ImprintWriter outerWriter = new ImprintWriter(new SchemaId(4, 0x87654321)); + outerWriter.addField(1, Value.fromRow(innerRecord)); + ImprintRecord outerRecord = outerWriter.build(); + + assert innerRecord.getPayload().hasArray() : "Inner record payload should be array-backed"; + assert outerRecord.getPayload().hasArray() : "Outer record payload should be array-backed"; + + ByteBuffer nestedSerialized = outerRecord.serializeToBuffer(); + assert nestedSerialized.hasArray() : "Serialized nested record should be array-backed"; + + // Test builder pattern uses array-backed buffers + ImprintRecord builderRecord = ImprintRecord.builder(1, 0xabcdef00) + .field(1, "test string") + .field(2, 42) + .field(3, new byte[]{1, 2, 3}) + .build(); + + assert builderRecord.getPayload().hasArray() : "Builder-created record should use array-backed payload"; + + ByteBuffer builderSerialized = builderRecord.serializeToBuffer(); + assert builderSerialized.hasArray() : "Builder-created serialized buffer should be array-backed"; + + System.out.println("✓ Array-backed buffers test passed"); + } } \ No newline at end of file diff --git a/src/test/java/com/imprint/ComprehensiveTest.java b/src/test/java/com/imprint/ComprehensiveTest.java deleted file mode 100644 index af7f0b0..0000000 --- a/src/test/java/com/imprint/ComprehensiveTest.java +++ /dev/null @@ -1,208 +0,0 @@ -package com.imprint; - -import com.imprint.core.*; -import com.imprint.types.*; -import com.imprint.error.ImprintException; -import com.imprint.util.VarInt; -import java.nio.ByteBuffer; -import java.util.*; - -/** - * Comprehensive test to verify all functionality works correctly. - */ -public class ComprehensiveTest { - - public static void main(String[] args) { - try { - testVarIntFunctionality(); - testValueTypes(); - testMapKeys(); - testComplexSerialization(); - testErrorHandling(); - testByteBufferPerformance(); - System.out.println("All comprehensive tests passed!"); - } catch (Exception e) { - System.err.println("Comprehensive test failed: " + e.getMessage()); - e.printStackTrace(); - System.exit(1); - } - } - - static void testVarIntFunctionality() throws ImprintException { - System.out.println("Testing VarInt functionality..."); - - // Test encoding/decoding of various values - int[] testValues = {0, 1, 127, 128, 16383, 16384, Integer.MAX_VALUE}; - - for (int value : testValues) { - ByteBuffer buffer = ByteBuffer.allocate(10); - VarInt.encode(value, buffer); - int encodedLength = buffer.position(); - - buffer.flip(); - VarInt.DecodeResult result = VarInt.decode(buffer); - - assert result.getValue() == value : "VarInt roundtrip failed for " + value; - assert result.getBytesRead() == encodedLength : "Bytes read mismatch for " + value; - } - - System.out.println("✓ VarInt functionality test passed"); - } - - static void testValueTypes() { - System.out.println("Testing Value types"); - - // Test all value types - Value nullVal = Value.nullValue(); - Value boolVal = Value.fromBoolean(true); - Value int32Val = Value.fromInt32(42); - Value int64Val = Value.fromInt64(123456789L); - Value float32Val = Value.fromFloat32(3.14f); - Value float64Val = Value.fromFloat64(2.718281828); - Value bytesVal = Value.fromBytes(new byte[]{1, 2, 3, 4}); - Value stringVal = Value.fromString("test"); - - // Test type codes - assert nullVal.getTypeCode() == TypeCode.NULL; - assert boolVal.getTypeCode() == TypeCode.BOOL; - assert int32Val.getTypeCode() == TypeCode.INT32; - assert int64Val.getTypeCode() == TypeCode.INT64; - assert float32Val.getTypeCode() == TypeCode.FLOAT32; - assert float64Val.getTypeCode() == TypeCode.FLOAT64; - assert bytesVal.getTypeCode() == TypeCode.BYTES; - assert stringVal.getTypeCode() == TypeCode.STRING; - - // Test value extraction - assert ((Value.BoolValue) boolVal).getValue(); - assert ((Value.Int32Value) int32Val).getValue() == 42; - assert ((Value.Int64Value) int64Val).getValue() == 123456789L; - assert ((Value.Float32Value) float32Val).getValue() == 3.14f; - assert ((Value.Float64Value) float64Val).getValue() == 2.718281828; - assert Arrays.equals(((Value.BytesValue) bytesVal).getValue(), new byte[]{1, 2, 3, 4}); - assert ((Value.StringValue) stringVal).getValue().equals("test"); - - System.out.println("✓ Value types test passed"); - } - - static void testMapKeys() throws ImprintException { - System.out.println("Testing MapKey functionality..."); - - MapKey int32Key = MapKey.fromInt32(42); - MapKey int64Key = MapKey.fromInt64(123L); - MapKey bytesKey = MapKey.fromBytes(new byte[]{1, 2, 3}); - MapKey stringKey = MapKey.fromString("test"); - - // Test conversion to/from Values - Value int32Value = int32Key.toValue(); - Value int64Value = int64Key.toValue(); - Value bytesValue = bytesKey.toValue(); - Value stringValue = stringKey.toValue(); - - assert MapKey.fromValue(int32Value).equals(int32Key); - assert MapKey.fromValue(int64Value).equals(int64Key); - assert MapKey.fromValue(bytesValue).equals(bytesKey); - assert MapKey.fromValue(stringValue).equals(stringKey); - - System.out.println("✓ MapKey functionality test passed"); - } - - static void testComplexSerialization() throws ImprintException { - System.out.println("Testing complex serialization..."); - - SchemaId schemaId = new SchemaId(1, 0xdeadbeef); - ImprintWriter writer = new ImprintWriter(schemaId); - - // Create complex nested structure - List array = Arrays.asList( - Value.fromInt32(1), - Value.fromInt32(2), - Value.fromInt32(3) - ); - - Map map = new HashMap<>(); - map.put(MapKey.fromString("key1"), Value.fromString("value1")); - map.put(MapKey.fromString("key2"), Value.fromString("value2")); - - writer.addField(1, Value.fromArray(array)) - .addField(2, Value.fromMap(map)) - .addField(3, Value.fromString("complex test")); - - ImprintRecord record = writer.build(); - - // Test ByteBuffer serialization - ByteBuffer serialized = record.serializeToBuffer(); - ImprintRecord deserialized = ImprintRecord.deserialize(serialized); - - // Verify complex structures - Value deserializedArray = deserialized.getValue(1).get(); - assert deserializedArray instanceof Value.ArrayValue; - List deserializedList = ((Value.ArrayValue) deserializedArray).getValue(); - assert deserializedList.size() == 3; - assert deserializedList.get(0).equals(Value.fromInt32(1)); - - Value deserializedMap = deserialized.getValue(2).get(); - assert deserializedMap instanceof Value.MapValue; - Map deserializedMapValue = ((Value.MapValue) deserializedMap).getValue(); - assert deserializedMapValue.size() == 2; - assert deserializedMapValue.get(MapKey.fromString("key1")).equals(Value.fromString("value1")); - - System.out.println("✓ Complex serialization test passed"); - } - - static void testErrorHandling() { - System.out.println("Testing error handling..."); - - try { - // Test invalid type code - TypeCode.fromByte((byte) 0xFF); - assert false : "Should have thrown exception for invalid type code"; - } catch (ImprintException e) { - assert e.getErrorType() == com.imprint.error.ErrorType.INVALID_TYPE_CODE; - } - - try { - // Test invalid magic byte - byte[] invalidData = new byte[15]; - invalidData[0] = 0x00; // wrong magic - ImprintRecord.deserialize(invalidData); - assert false : "Should have thrown exception for invalid magic"; - } catch (ImprintException e) { - assert e.getErrorType() == com.imprint.error.ErrorType.INVALID_MAGIC; - } - - System.out.println("✓ Error handling test passed"); - } - - static void testByteBufferPerformance() throws ImprintException { - System.out.println("Testing ByteBuffer performance benefits..."); - - // Create a record with moderate-sized data - byte[] testData = new byte[1024]; - for (int i = 0; i < testData.length; i++) { - testData[i] = (byte) (i % 256); - } - - SchemaId schemaId = new SchemaId(1, 0x12345678); - ImprintWriter writer = new ImprintWriter(schemaId); - writer.addField(1, Value.fromBytes(testData)) - .addField(2, Value.fromString("performance test")); - - ImprintRecord record = writer.build(); - - // Test that raw bytes access is zero-copy - Optional rawBytes = record.getRawBytes(1); - assert rawBytes.isPresent(); - assert rawBytes.get().isReadOnly(); - - // Test ByteBuffer serialization - ByteBuffer serialized = record.serializeToBuffer(); - assert serialized.isReadOnly(); - - // Verify deserialization works - ImprintRecord deserialized = ImprintRecord.deserialize(serialized); - Value retrievedBytes = deserialized.getValue(1).get(); - assert Arrays.equals(((Value.BytesValue) retrievedBytes).getValue(), testData); - - System.out.println("✓ ByteBuffer performance test passed"); - } -} \ No newline at end of file diff --git a/src/test/java/com/imprint/IntegrationTest.java b/src/test/java/com/imprint/IntegrationTest.java index 49cfce7..565b7cd 100644 --- a/src/test/java/com/imprint/IntegrationTest.java +++ b/src/test/java/com/imprint/IntegrationTest.java @@ -39,10 +39,10 @@ static void testBasicFunctionality() throws ImprintException { ImprintRecord record = writer.build(); // Verify we can read values back - assert record.getValue(1).get().equals(Value.fromInt32(42)); - assert record.getValue(2).get().equals(Value.fromString("testing java imprint spec")); - assert record.getValue(3).get().equals(Value.fromBoolean(true)); - assert record.getValue(999).isEmpty(); // non-existent field + assert Objects.equals(record.getValue(1), Value.fromInt32(42)); + assert Objects.equals(record.getValue(2), Value.fromString("testing java imprint spec")); + assert Objects.equals(record.getValue(3), Value.fromBoolean(true)); + assert record.getValue(999) == null; // non-existent field // Test serialization round-trip var buffer = record.serializeToBuffer(); @@ -50,11 +50,11 @@ static void testBasicFunctionality() throws ImprintException { buffer.get(serialized); ImprintRecord deserialized = ImprintRecord.deserialize(serialized); - assert deserialized.getValue(1).get().equals(Value.fromInt32(42)); - assert deserialized.getValue(2).get().equals(Value.fromString("testing java imprint spec")); - assert deserialized.getValue(3).get().equals(Value.fromBoolean(true)); + assert Objects.equals(deserialized.getValue(1), Value.fromInt32(42)); + assert Objects.equals(deserialized.getValue(2), Value.fromString("testing java imprint spec")); + assert Objects.equals(deserialized.getValue(3), Value.fromBoolean(true)); - System.out.println("✓ Basic functionality test passed"); + System.out.println("Basic functionality test passed"); } static void testArraysAndMaps() throws ImprintException { @@ -87,14 +87,14 @@ static void testArraysAndMaps() throws ImprintException { ImprintRecord deserialized = ImprintRecord.deserialize(serialized); // Verify array - Value arrayValue = deserialized.getValue(1).get(); + Value arrayValue = deserialized.getValue(1); assert arrayValue instanceof Value.ArrayValue; List deserializedArray = ((Value.ArrayValue) arrayValue).getValue(); assert deserializedArray.size() == 3; assert deserializedArray.get(0).equals(Value.fromInt32(1)); // Verify map - Value mapValue = deserialized.getValue(2).get(); + Value mapValue = deserialized.getValue(2); assert mapValue instanceof Value.MapValue; Map deserializedMap = ((Value.MapValue) mapValue).getValue(); assert deserializedMap.size() == 2; @@ -127,17 +127,17 @@ static void testNestedRecords() throws ImprintException { ImprintRecord deserialized = ImprintRecord.deserialize(serialized); // Verify outer record - assert deserialized.getHeader().getSchemaId().getFieldspaceId() == 4; - assert deserialized.getValue(2).get().equals(Value.fromString("outer data")); + assert deserialized.getHeader().getSchemaId().getFieldSpaceId() == 4; + assert Objects.equals(deserialized.getValue(2), Value.fromString("outer data")); // Verify nested record - Value rowValue = deserialized.getValue(1).get(); + Value rowValue = deserialized.getValue(1); assert rowValue instanceof Value.RowValue; ImprintRecord nestedRecord = ((Value.RowValue) rowValue).getValue(); - assert nestedRecord.getHeader().getSchemaId().getFieldspaceId() == 3; - assert nestedRecord.getValue(1).get().equals(Value.fromString("nested data")); - assert nestedRecord.getValue(2).get().equals(Value.fromInt64(9876543210L)); + assert nestedRecord.getHeader().getSchemaId().getFieldSpaceId() == 3; + assert Objects.equals(nestedRecord.getValue(1), Value.fromString("nested data")); + assert Objects.equals(nestedRecord.getValue(2), Value.fromInt64(9876543210L)); System.out.println("✓ Nested records test passed"); } diff --git a/src/test/java/com/imprint/benchmark/ProfilerTest.java b/src/test/java/com/imprint/benchmark/ProfilerTest.java index 233d993..5b531a9 100644 --- a/src/test/java/com/imprint/benchmark/ProfilerTest.java +++ b/src/test/java/com/imprint/benchmark/ProfilerTest.java @@ -50,15 +50,15 @@ var record = createTestRecord(); // Random field access (hotspot) int fieldId = random.nextInt(RECORD_SIZE) + 1; var value = record.getValue(fieldId); - if (value.isPresent()) { + if (value != null) { hits++; // Trigger string decoding (potential hotspot) - if (value.get().getTypeCode() == com.imprint.types.TypeCode.STRING) { - if (value.get() instanceof Value.StringBufferValue) { - ((Value.StringBufferValue) value.get()).getValue(); + if (value.getTypeCode() == com.imprint.types.TypeCode.STRING) { + if (value instanceof Value.StringBufferValue) { + ((Value.StringBufferValue) value).getValue(); } else { - ((Value.StringValue) value.get()).getValue(); + ((Value.StringValue) value).getValue(); } } } @@ -123,11 +123,11 @@ var record = createLargeRecord(); // Project 10 fields out of 100 (common analytical pattern) for (int fieldId = 1; fieldId <= 10; fieldId++) { var value = record.getValue(fieldId); - if (value.isPresent()) { + if (value != null) { // Force materialization of string values - if (value.get().getTypeCode() == com.imprint.types.TypeCode.STRING) { - if (value.get() instanceof Value.StringBufferValue) { - ((Value.StringBufferValue) value.get()).getValue(); + if (value.getTypeCode() == com.imprint.types.TypeCode.STRING) { + if (value instanceof Value.StringBufferValue) { + ((Value.StringBufferValue) value).getValue(); } } } diff --git a/src/test/java/com/imprint/core/ImprintRecordBuilderTest.java b/src/test/java/com/imprint/core/ImprintRecordBuilderTest.java deleted file mode 100644 index 54dcfae..0000000 --- a/src/test/java/com/imprint/core/ImprintRecordBuilderTest.java +++ /dev/null @@ -1,234 +0,0 @@ -package com.imprint.core; - -import com.imprint.error.ImprintException; -import com.imprint.types.Value; -import org.junit.jupiter.api.Test; - -import java.util.*; - -import static org.assertj.core.api.Assertions.*; - -class ImprintRecordBuilderTest { - - private static final SchemaId TEST_SCHEMA = new SchemaId(1, 0x12345678); - - @Test - void shouldCreateRecordWithPrimitiveTypes() throws ImprintException { - var record = ImprintRecord.builder(TEST_SCHEMA) - .field(1, true) - .field(2, 42) - .field(3, 123L) - .field(4, 3.14f) - .field(5, 2.718) - .field(6, "hello world") - .field(7, new byte[]{1, 2, 3}) - .nullField(8) - .build(); - - assertThat(record.getHeader().getSchemaId()).isEqualTo(TEST_SCHEMA); - assertThat(record.getDirectory()).hasSize(8); - - // Verify field values - assertThat(getFieldValue(record, 1, Value.BoolValue.class).getValue()).isTrue(); - assertThat(getFieldValue(record, 2, Value.Int32Value.class).getValue()).isEqualTo(42); - assertThat(getFieldValue(record, 3, Value.Int64Value.class).getValue()).isEqualTo(123L); - assertThat(getFieldValue(record, 4, Value.Float32Value.class).getValue()).isEqualTo(3.14f); - assertThat(getFieldValue(record, 5, Value.Float64Value.class).getValue()).isEqualTo(2.718); - assertThat(getStringValue(record, 6)).isEqualTo("hello world"); - assertThat(getBytesValue(record, 7)).isEqualTo(new byte[]{1, 2, 3}); - assertThat(record.getValue(8).get()).isInstanceOf(Value.NullValue.class); - } - - @Test - void shouldCreateRecordWithCollections() throws ImprintException { - var list = List.of(1, 2, 3); - var map = Map.of("key1", 100, "key2", 200); - - var record = ImprintRecord.builder(TEST_SCHEMA) - .field(1, list) - .field(2, map) - .build(); - - // Verify array - var arrayValue = getFieldValue(record, 1, Value.ArrayValue.class); - assertThat(arrayValue.getValue()).hasSize(3); - assertThat(((Value.Int32Value) arrayValue.getValue().get(0)).getValue()).isEqualTo(1); - assertThat(((Value.Int32Value) arrayValue.getValue().get(1)).getValue()).isEqualTo(2); - assertThat(((Value.Int32Value) arrayValue.getValue().get(2)).getValue()).isEqualTo(3); - - // Verify map - var mapValue = getFieldValue(record, 2, Value.MapValue.class); - assertThat(mapValue.getValue()).hasSize(2); - } - - @Test - void shouldCreateRecordWithNestedRecord() throws ImprintException { - var nestedRecord = ImprintRecord.builder(new SchemaId(2, 0x87654321)) - .field(1, "nested") - .field(2, 999) - .build(); - - var record = ImprintRecord.builder(TEST_SCHEMA) - .field(1, "parent") - .field(2, nestedRecord) - .build(); - - var rowValue = getFieldValue(record, 2, Value.RowValue.class); - var nested = rowValue.getValue(); - assertThat(getStringValue(nested, 1)).isEqualTo("nested"); - assertThat(getFieldValue(nested, 2, Value.Int32Value.class).getValue()).isEqualTo(999); - } - - @Test - void shouldSupportConditionalFields() throws ImprintException { - boolean includeOptional = true; - String optionalValue = "optional"; - - var record = ImprintRecord.builder(TEST_SCHEMA) - .field(1, "required") - .fieldIf(includeOptional, 2, optionalValue) - .fieldIfNotNull(3, null) // Should not add field - .fieldIfNotNull(4, "not null") // Should add field - .build(); - - assertThat(record.getDirectory()).hasSize(3); // Only fields 1, 2, 4 - assertThat(getStringValue(record, 1)).isEqualTo("required"); - assertThat(getStringValue(record, 2)).isEqualTo("optional"); - assertThat(record.getValue(3)).isEmpty(); // Not added - assertThat(getStringValue(record, 4)).isEqualTo("not null"); - } - - @Test - void shouldSupportBulkOperations() throws ImprintException { - var fieldsMap = Map.of( - 1, "bulk1", - 2, 42, - 3, true - ); - - var record = ImprintRecord.builder(TEST_SCHEMA) - .fields(fieldsMap) - .field(4, "additional") - .build(); - - assertThat(record.getDirectory()).hasSize(4); - assertThat(getStringValue(record, 1)).isEqualTo("bulk1"); - assertThat(getFieldValue(record, 2, Value.Int32Value.class).getValue()).isEqualTo(42); - assertThat(getFieldValue(record, 3, Value.BoolValue.class).getValue()).isTrue(); - assertThat(getStringValue(record, 4)).isEqualTo("additional"); - } - - @Test - void shouldProvideBuilderUtilities() { - var builder = ImprintRecord.builder(TEST_SCHEMA) - .field(1, "test") - .field(2, 42); - - assertThat(builder.hasField(1)).isTrue(); - assertThat(builder.hasField(3)).isFalse(); - assertThat(builder.fieldCount()).isEqualTo(2); - assertThat(builder.fieldIds()).containsExactly(1, 2); - } - - @Test - void shouldSupportAlternativeSchemaConstructor() throws ImprintException { - var record = ImprintRecord.builder(1, 0x12345678) - .field(1, "test") - .build(); - - assertThat(record.getHeader().getSchemaId().getFieldspaceId()).isEqualTo(1); - assertThat(record.getHeader().getSchemaId().getSchemaHash()).isEqualTo(0x12345678); - } - - @Test - void shouldRoundTripThroughSerialization() throws ImprintException { - var original = ImprintRecord.builder(TEST_SCHEMA) - .field(1, "test string") - .field(2, 42) - .field(3, 3.14159) - .field(4, true) - .field(5, new byte[]{0x01, 0x02, 0x03}) - .build(); - - var serialized = original.serializeToBuffer(); - var deserialized = ImprintRecord.deserialize(serialized); - - assertThat(deserialized.getHeader().getSchemaId()).isEqualTo(TEST_SCHEMA); - assertThat(getStringValue(deserialized, 1)).isEqualTo("test string"); - assertThat(getFieldValue(deserialized, 2, Value.Int32Value.class).getValue()).isEqualTo(42); - assertThat(getFieldValue(deserialized, 3, Value.Float64Value.class).getValue()).isEqualTo(3.14159); - assertThat(getFieldValue(deserialized, 4, Value.BoolValue.class).getValue()).isTrue(); - assertThat(getBytesValue(deserialized, 5)).isEqualTo(new byte[]{0x01, 0x02, 0x03}); - } - - // Error cases - - @Test - void shouldRejectDuplicateFieldIds() { - assertThatThrownBy(() -> - ImprintRecord.builder(TEST_SCHEMA) - .field(1, "first") - .field(1, "duplicate") // Same field ID - ).isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Field ID 1 already exists"); - } - - @Test - void shouldRejectEmptyRecord() { - assertThatThrownBy(() -> - ImprintRecord.builder(TEST_SCHEMA).build() - ).isInstanceOf(ImprintException.class) - .hasMessageContaining("Cannot build empty record"); - } - - @Test - void shouldRejectInvalidMapKeys() { - var mapWithInvalidKey = Map.of(3.14, "value"); // Double key not supported - - assertThatThrownBy(() -> - ImprintRecord.builder(TEST_SCHEMA) - .field(1, mapWithInvalidKey) - ).isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Invalid map key type: Double"); - } - - @Test - void shouldRejectNullValueWithoutExplicitNullField() { - assertThatThrownBy(() -> - ImprintRecord.builder(TEST_SCHEMA) - .field(1, (Value) null) - ).isInstanceOf(NullPointerException.class) - .hasMessageContaining("Value cannot be null - use nullField()"); - } - - // Helper methods for cleaner test assertions - - private T getFieldValue(ImprintRecord record, int fieldId, Class valueType) throws ImprintException { - var value = record.getValue(fieldId); - assertThat(value).isPresent(); - assertThat(value.get()).isInstanceOf(valueType); - return valueType.cast(value.get()); - } - - private String getStringValue(ImprintRecord record, int fieldId) throws ImprintException { - var value = record.getValue(fieldId).get(); - if (value instanceof Value.StringValue) { - return ((Value.StringValue) value).getValue(); - } else if (value instanceof Value.StringBufferValue) { - return ((Value.StringBufferValue) value).getValue(); - } else { - throw new AssertionError("Expected string value, got: " + value.getClass()); - } - } - - private byte[] getBytesValue(ImprintRecord record, int fieldId) throws ImprintException { - var value = record.getValue(fieldId).get(); - if (value instanceof Value.BytesValue) { - return ((Value.BytesValue) value).getValue(); - } else if (value instanceof Value.BytesBufferValue) { - return ((Value.BytesBufferValue) value).getValue(); - } else { - throw new AssertionError("Expected bytes value, got: " + value.getClass()); - } - } -} \ No newline at end of file diff --git a/src/test/java/com/imprint/core/ImprintRecordTest.java b/src/test/java/com/imprint/core/ImprintRecordTest.java index 0772580..3e37473 100644 --- a/src/test/java/com/imprint/core/ImprintRecordTest.java +++ b/src/test/java/com/imprint/core/ImprintRecordTest.java @@ -34,20 +34,20 @@ var record = writer.build(); assertThat(record.getHeader().getSchemaId()).isEqualTo(schemaId); assertThat(record.getDirectory()).hasSize(2); - Optional field1 = record.getValue(1); - Optional field2 = record.getValue(2); + Value field1 = record.getValue(1); + Value field2 = record.getValue(2); - assertThat(field1).isPresent(); - assertThat(field1.get()).isInstanceOf(Value.Int32Value.class); - assertThat(((Value.Int32Value) field1.get()).getValue()).isEqualTo(42); + assertThat(field1).isNotNull(); + assertThat(field1).isInstanceOf(Value.Int32Value.class); + assertThat(((Value.Int32Value) field1).getValue()).isEqualTo(42); - assertThat(field2).isPresent(); - assertThat(field2.get().getTypeCode()).isEqualTo(com.imprint.types.TypeCode.STRING); - String stringValue = getStringValue(field2.get()); + assertThat(field2).isNotNull(); + assertThat(field2.getTypeCode()).isEqualTo(com.imprint.types.TypeCode.STRING); + String stringValue = getStringValue(field2); assertThat(stringValue).isEqualTo("hello"); - // Non-existent field should return empty - assertThat(record.getValue(999)).isEmpty(); + // Non-existent field should return null + assertThat(record.getValue(999)).isNull(); } @Test @@ -73,22 +73,22 @@ void shouldRoundtripThroughSerialization() throws ImprintException { var deserialized = ImprintRecord.deserialize(serialized); // Verify metadata - assertThat(deserialized.getHeader().getSchemaId().getFieldspaceId()).isEqualTo(1); + assertThat(deserialized.getHeader().getSchemaId().getFieldSpaceId()).isEqualTo(1); assertThat(deserialized.getHeader().getSchemaId().getSchemaHash()).isEqualTo(0xdeadbeef); assertThat(deserialized.getDirectory()).hasSize(8); // Verify all values - assertThat(deserialized.getValue(1)).contains(Value.nullValue()); - assertThat(deserialized.getValue(2)).contains(Value.fromBoolean(true)); - assertThat(deserialized.getValue(3)).contains(Value.fromInt32(42)); - assertThat(deserialized.getValue(4)).contains(Value.fromInt64(123456789L)); - assertThat(deserialized.getValue(5)).contains(Value.fromFloat32(3.14f)); - assertThat(deserialized.getValue(6)).contains(Value.fromFloat64(2.718281828)); - assertThat(deserialized.getValue(7)).contains(Value.fromBytes(new byte[]{1, 2, 3, 4})); - assertThat(deserialized.getValue(8)).contains(Value.fromString("test string")); + assertThat(deserialized.getValue(1)).isEqualTo(Value.nullValue()); + assertThat(deserialized.getValue(2)).isEqualTo(Value.fromBoolean(true)); + assertThat(deserialized.getValue(3)).isEqualTo(Value.fromInt32(42)); + assertThat(deserialized.getValue(4)).isEqualTo(Value.fromInt64(123456789L)); + assertThat(deserialized.getValue(5)).isEqualTo(Value.fromFloat32(3.14f)); + assertThat(deserialized.getValue(6)).isEqualTo(Value.fromFloat64(2.718281828)); + assertThat(deserialized.getValue(7)).isEqualTo(Value.fromBytes(new byte[]{1, 2, 3, 4})); + assertThat(deserialized.getValue(8)).isEqualTo(Value.fromString("test string")); // Non-existent field - assertThat(deserialized.getValue(999)).isEmpty(); + assertThat(deserialized.getValue(999)).isNull(); } @Test @@ -111,11 +111,11 @@ void shouldHandleArrays() throws ImprintException { buffer.get(serialized); var deserialized = ImprintRecord.deserialize(serialized); - Optional arrayValue = deserialized.getValue(1); - assertThat(arrayValue).isPresent(); - assertThat(arrayValue.get()).isInstanceOf(Value.ArrayValue.class); + Value arrayValue = deserialized.getValue(1); + assertThat(arrayValue).isNotNull(); + assertThat(arrayValue).isInstanceOf(Value.ArrayValue.class); - List deserializedArray = ((Value.ArrayValue) arrayValue.get()).getValue(); + List deserializedArray = ((Value.ArrayValue) arrayValue).getValue(); assertThat(deserializedArray).hasSize(3); assertThat(deserializedArray.get(0)).isEqualTo(Value.fromInt32(1)); assertThat(deserializedArray.get(1)).isEqualTo(Value.fromInt32(2)); @@ -140,11 +140,11 @@ var record = writer.build(); buffer.get(serialized); var deserialized = ImprintRecord.deserialize(serialized); - Optional mapValue = deserialized.getValue(1); - assertThat(mapValue).isPresent(); - assertThat(mapValue.get()).isInstanceOf(Value.MapValue.class); + Value mapValue = deserialized.getValue(1); + assertThat(mapValue).isNotNull(); + assertThat(mapValue).isInstanceOf(Value.MapValue.class); - Map deserializedMap = ((Value.MapValue) mapValue.get()).getValue(); + Map deserializedMap = ((Value.MapValue) mapValue).getValue(); assertThat(deserializedMap).hasSize(2); assertThat(deserializedMap.get(MapKey.fromString("key1"))).isEqualTo(Value.fromInt32(1)); assertThat(deserializedMap.get(MapKey.fromString("key2"))).isEqualTo(Value.fromInt32(2)); @@ -173,23 +173,23 @@ void shouldHandleNestedRecords() throws ImprintException { var deserialized = ImprintRecord.deserialize(serialized); // Verify outer record metadata - assertThat(deserialized.getHeader().getSchemaId().getFieldspaceId()).isEqualTo(1); + assertThat(deserialized.getHeader().getSchemaId().getFieldSpaceId()).isEqualTo(1); assertThat(deserialized.getHeader().getSchemaId().getSchemaHash()).isEqualTo(0xdeadbeef); // Verify nested record - Optional rowValue = deserialized.getValue(1); - assertThat(rowValue).isPresent(); - assertThat(rowValue.get()).isInstanceOf(Value.RowValue.class); + Value rowValue = deserialized.getValue(1); + assertThat(rowValue).isNotNull(); + assertThat(rowValue).isInstanceOf(Value.RowValue.class); - var nestedRecord = ((Value.RowValue) rowValue.get()).getValue(); - assertThat(nestedRecord.getHeader().getSchemaId().getFieldspaceId()).isEqualTo(2); + var nestedRecord = ((Value.RowValue) rowValue).getValue(); + assertThat(nestedRecord.getHeader().getSchemaId().getFieldSpaceId()).isEqualTo(2); assertThat(nestedRecord.getHeader().getSchemaId().getSchemaHash()).isEqualTo(0xcafebabe); - assertThat(nestedRecord.getValue(1)).contains(Value.fromInt32(42)); - assertThat(nestedRecord.getValue(2)).contains(Value.fromString("nested")); + assertThat(nestedRecord.getValue(1)).isEqualTo(Value.fromInt32(42)); + assertThat(nestedRecord.getValue(2)).isEqualTo(Value.fromString("nested")); // Verify outer record field - assertThat(deserialized.getValue(2)).contains(Value.fromInt64(123L)); + assertThat(deserialized.getValue(2)).isEqualTo(Value.fromInt64(123L)); } @Test @@ -227,6 +227,6 @@ void shouldHandleDuplicateFieldIds() throws ImprintException { var record = writer.build(); assertThat(record.getDirectory()).hasSize(1); - assertThat(record.getValue(1)).contains(Value.fromInt32(43)); + assertThat(record.getValue(1)).isEqualTo(Value.fromInt32(43)); } } \ No newline at end of file diff --git a/src/test/java/com/imprint/types/ValueTest.java b/src/test/java/com/imprint/types/ValueTest.java index 9dd99c9..c400a44 100644 --- a/src/test/java/com/imprint/types/ValueTest.java +++ b/src/test/java/com/imprint/types/ValueTest.java @@ -2,6 +2,8 @@ import org.junit.jupiter.api.Test; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -104,20 +106,113 @@ void shouldHandleEqualityCorrectly() { } @Test - void shouldDefensiveCopyArrays() { - byte[] original = {1, 2, 3}; - var bytesValue = Value.fromBytes(original); + void shouldRejectNullString() { + assertThatThrownBy(() -> Value.fromString(null)) + .isInstanceOf(NullPointerException.class); + } + + @Test + void shouldCreateStringBufferValue() { + String testString = "hello world"; + byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); + ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); - // Modify original array - original[0] = 99; + Value stringBufferValue = Value.fromStringBuffer(buffer); - // Value should be unchanged - assertThat(((Value.BytesValue) bytesValue).getValue()).containsExactly(1, 2, 3); + assertThat(stringBufferValue).isInstanceOf(Value.StringBufferValue.class); + assertThat(stringBufferValue.getTypeCode()).isEqualTo(TypeCode.STRING); + assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(testString); } @Test - void shouldRejectNullString() { - assertThatThrownBy(() -> Value.fromString(null)) - .isInstanceOf(NullPointerException.class); + void shouldCreateBytesBufferValue() { + byte[] testBytes = {1, 2, 3, 4, 5}; + ByteBuffer buffer = ByteBuffer.wrap(testBytes); + + Value bytesBufferValue = Value.fromBytesBuffer(buffer); + + assertThat(bytesBufferValue).isInstanceOf(Value.BytesBufferValue.class); + assertThat(bytesBufferValue.getTypeCode()).isEqualTo(TypeCode.BYTES); + assertThat(((Value.BytesBufferValue) bytesBufferValue).getValue()).isEqualTo(testBytes); + } + + @Test + void shouldHandleStringBufferValueFastPath() { + // Array-backed buffer with arrayOffset() == 0 should use fast path + String testString = "fast path test"; + byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); + ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); + + Value stringBufferValue = Value.fromStringBuffer(buffer); + + // Should work correctly regardless of path taken + assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(testString); + } + + @Test + void shouldHandleStringBufferValueFallbackPath() { + // Sliced buffer will have non-zero arrayOffset, forcing fallback path + String testString = "fallback path test"; + byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); + ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); + ByteBuffer sliced = buffer.slice(); // This may break arrayOffset() == 0 + + Value stringBufferValue = Value.fromStringBuffer(sliced); + + // Should work correctly regardless of path taken + assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(testString); + } + + @Test + void shouldHandleLargeStringWithoutCaching() { + // Create string > 1KB to test the no-cache path + String largeString = "x".repeat(2000); + byte[] utf8Bytes = largeString.getBytes(StandardCharsets.UTF_8); + ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes).slice(); // Force fallback path + + Value stringBufferValue = Value.fromStringBuffer(buffer); + + assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(largeString); + } + + @Test + void shouldCacheStringDecoding() { + String testString = "cache test"; + byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); + ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); + + Value.StringBufferValue stringBufferValue = (Value.StringBufferValue) Value.fromStringBuffer(buffer); + + // First call should decode and cache + String result1 = stringBufferValue.getValue(); + // Second call should return cached value + String result2 = stringBufferValue.getValue(); + + assertThat(result1).isEqualTo(testString); + assertThat(result2).isEqualTo(testString); + assertThat(result1).isSameAs(result2); // Should be same object reference due to caching + } + + @Test + void shouldHandleStringValueEquality() { + String testString = "equality test"; + + Value stringValue = Value.fromString(testString); + Value stringBufferValue = Value.fromStringBuffer(ByteBuffer.wrap(testString.getBytes(StandardCharsets.UTF_8))); + + assertThat(stringValue).isEqualTo(stringBufferValue); + assertThat(stringBufferValue).isEqualTo(stringValue); + assertThat(stringValue.hashCode()).isEqualTo(stringBufferValue.hashCode()); + } + + @Test + void shouldHandleBytesValueEquality() { + byte[] testBytes = {1, 2, 3, 4, 5}; + + Value bytesValue = Value.fromBytes(testBytes); + Value bytesBufferValue = Value.fromBytesBuffer(ByteBuffer.wrap(testBytes)); + + assertThat(bytesValue).isEqualTo(bytesBufferValue); + assertThat(bytesBufferValue).isEqualTo(bytesValue); } } \ No newline at end of file From f9755d2e7dbb76550fbd9fe83af59b8c1e4c46f5 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Tue, 3 Jun 2025 18:44:27 -0400 Subject: [PATCH 14/53] additional cleanup to address concerns in https://github.com/imprint-serde/imprint-java/issues/3 --- .../java/com/imprint/core/ImprintRecord.java | 283 +++++++++++--- .../imprint/core/ImprintRecordBuilder.java | 73 ++-- .../java/com/imprint/error/ErrorType.java | 3 +- .../java/com/imprint/types/TypeHandler.java | 5 +- src/main/java/com/imprint/util/VarInt.java | 27 +- .../imprint/ByteBufferIntegrationTest.java | 180 --------- .../java/com/imprint/IntegrationTest.java | 368 +++++++++++++----- 7 files changed, 526 insertions(+), 413 deletions(-) delete mode 100644 src/test/java/com/imprint/ByteBufferIntegrationTest.java diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index 1915707..d667039 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -3,6 +3,7 @@ import com.imprint.Constants; import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; +import com.imprint.types.MapKey; import com.imprint.types.TypeCode; import com.imprint.types.Value; import com.imprint.util.VarInt; @@ -15,7 +16,7 @@ /** * An Imprint record containing a header, field directory, and payload. * Uses ByteBuffer for zero-copy operations to achieve low latency. - * + * *

Performance Note: All ByteBuffers should be array-backed * (hasArray() == true) for optimal zero-copy performance. Direct buffers * may cause performance degradation.

@@ -25,10 +26,10 @@ public final class ImprintRecord { private final Header header; private final List directory; private final ByteBuffer payload; // Read-only view for zero-copy - + /** * Creates a new ImprintRecord. - * + * * @param payload the payload buffer. Should be array-backed for optimal performance. */ public ImprintRecord(Header header, List directory, ByteBuffer payload) { @@ -40,15 +41,29 @@ public ImprintRecord(Header header, List directory, ByteBuffer p /** * Get a value by field ID, deserializing it on demand. * Returns null if the field is not found. + * Note: If the field exists and is an explicit NULL type, this will return Value.NullValue.INSTANCE. */ public Value getValue(int fieldId) throws ImprintException { var fieldBuffer = getFieldBuffer(fieldId); - if (fieldBuffer == null) return null; - - var entry = directory.get(findDirectoryIndex(fieldId)); + if (fieldBuffer == null) { + // If getFieldBuffer returns null, it means the fieldId was not in the directory, + // or an issue occurred slicing the payload (e.g., bad offsets). + return null; + } + + // findDirectoryIndex should not be negative here if fieldBuffer is not null, + // but a defensive check or ensuring findDirectoryIndex is called once is good. + // For simplicity, assume getFieldBuffer implies a valid index. + int directoryIndex = findDirectoryIndex(fieldId); + if (directoryIndex < 0) { + // This case should ideally be caught by getFieldBuffer returning null. + // If it happens, indicates an inconsistency. + throw new ImprintException(ErrorType.INTERNAL_ERROR, "Field ID " + fieldId + " found buffer but not in directory."); + } + var entry = directory.get(directoryIndex); return deserializeValue(entry.getTypeCode(), fieldBuffer); } - + /** * Get the raw bytes for a field without deserializing. * Returns a zero-copy ByteBuffer view, or null if field not found. @@ -57,7 +72,7 @@ public ByteBuffer getRawBytes(int fieldId) { var fieldBuffer = getFieldBuffer(fieldId); return fieldBuffer != null ? fieldBuffer.asReadOnlyBuffer() : null; } - + /** * Get a ByteBuffer view of a field's data. * Returns null if the field is not found. @@ -65,12 +80,12 @@ public ByteBuffer getRawBytes(int fieldId) { private ByteBuffer getFieldBuffer(int fieldId) { int index = findDirectoryIndex(fieldId); if (index < 0) return null; - + var entry = directory.get(index); int startOffset = entry.getOffset(); - int endOffset = (index + 1 < directory.size()) ? - directory.get(index + 1).getOffset() : payload.limit(); - + int endOffset = (index + 1 < directory.size()) ? + directory.get(index + 1).getOffset() : payload.limit(); + var fieldBuffer = payload.duplicate(); if (startOffset > payload.limit() || endOffset > payload.limit() || startOffset > endOffset) { return null; @@ -78,104 +93,106 @@ private ByteBuffer getFieldBuffer(int fieldId) { fieldBuffer.position(startOffset).limit(endOffset); return fieldBuffer.slice(); } - + /** - * Serialize this record to a ByteBuffer (zero-copy when possible). + * Serialize this record to a ByteBuffer. + * The returned buffer will be array-backed. */ public ByteBuffer serializeToBuffer() { var buffer = ByteBuffer.allocate(estimateSerializedSize()); buffer.order(ByteOrder.LITTLE_ENDIAN); - + // Write header serializeHeader(buffer); - + // Write directory (always present) VarInt.encode(directory.size(), buffer); for (var entry : directory) { serializeDirectoryEntry(entry, buffer); } - + // Write payload (shallow copy only) var payloadCopy = payload.duplicate(); buffer.put(payloadCopy); - - // Return read-only view of used portion + + // Prepare buffer for reading buffer.flip(); - return buffer.asReadOnlyBuffer(); + return buffer; } - + /** * Create a fluent builder for constructing ImprintRecord instances. - * + * * @param schemaId the schema identifier for this record * @return a new builder instance */ public static ImprintRecordBuilder builder(SchemaId schemaId) { return new ImprintRecordBuilder(schemaId); } - + /** * Create a fluent builder for constructing ImprintRecord instances. - * + * * @param fieldspaceId the fieldspace identifier * @param schemaHash the schema hash * @return a new builder instance */ + @SuppressWarnings("unused") public static ImprintRecordBuilder builder(int fieldspaceId, int schemaHash) { return new ImprintRecordBuilder(new SchemaId(fieldspaceId, schemaHash)); } - + /** * Deserialize a record from bytes through an array backed ByteBuffer. */ public static ImprintRecord deserialize(byte[] bytes) throws ImprintException { return deserialize(ByteBuffer.wrap(bytes)); } - + /** * Deserialize a record from a ByteBuffer. - * - * @param buffer the buffer to deserialize from. Must be array-backed + * + * @param buffer the buffer to deserialize from. Must be array-backed * (buffer.hasArray() == true) for optimal zero-copy performance. */ public static ImprintRecord deserialize(ByteBuffer buffer) throws ImprintException { buffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); - + // Read header var header = deserializeHeader(buffer); - + // Read directory (always present) var directory = new ArrayList(); VarInt.DecodeResult countResult = VarInt.decode(buffer); int directoryCount = countResult.getValue(); - + for (int i = 0; i < directoryCount; i++) { directory.add(deserializeDirectoryEntry(buffer)); } - + // Read payload as ByteBuffer slice for zero-copy var payload = buffer.slice(); payload.limit(header.getPayloadSize()); buffer.position(buffer.position() + header.getPayloadSize()); - + return new ImprintRecord(header, directory, payload); } - + /** * Binary search for field ID in directory without object allocation. * Returns the index of the field if found, or a negative value if not found. - * + * * @param fieldId the field ID to search for * @return index if found, or negative insertion point - 1 if not found */ private int findDirectoryIndex(int fieldId) { int low = 0; int high = directory.size() - 1; - + while (low <= high) { int mid = (low + high) >>> 1; // unsigned right shift to avoid overflow int midFieldId = directory.get(mid).getId(); - + if (midFieldId < fieldId) { low = mid + 1; } else if (midFieldId > fieldId) { @@ -186,7 +203,7 @@ private int findDirectoryIndex(int fieldId) { } return -(low + 1); // field not found, return insertion point } - + public int estimateSerializedSize() { int size = Constants.HEADER_BYTES; // header size += VarInt.encodedLength(directory.size()); // directory count @@ -194,7 +211,7 @@ public int estimateSerializedSize() { size += payload.remaining(); // payload return size; } - + private void serializeHeader(ByteBuffer buffer) { buffer.put(Constants.MAGIC); buffer.put(Constants.VERSION); @@ -203,57 +220,57 @@ private void serializeHeader(ByteBuffer buffer) { buffer.putInt(header.getSchemaId().getSchemaHash()); buffer.putInt(header.getPayloadSize()); } - + private static Header deserializeHeader(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < Constants.HEADER_BYTES) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for header"); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for header"); } - + byte magic = buffer.get(); if (magic != Constants.MAGIC) { - throw new ImprintException(ErrorType.INVALID_MAGIC, - "Invalid magic byte: expected 0x" + Integer.toHexString(Constants.MAGIC) + - ", got 0x" + Integer.toHexString(magic & 0xFF)); + throw new ImprintException(ErrorType.INVALID_MAGIC, + "Invalid magic byte: expected 0x" + Integer.toHexString(Constants.MAGIC) + + ", got 0x" + Integer.toHexString(magic & 0xFF)); } - + byte version = buffer.get(); if (version != Constants.VERSION) { - throw new ImprintException(ErrorType.UNSUPPORTED_VERSION, - "Unsupported version: " + version); + throw new ImprintException(ErrorType.UNSUPPORTED_VERSION, + "Unsupported version: " + version); } - + var flags = new Flags(buffer.get()); int fieldspaceId = buffer.getInt(); int schemaHash = buffer.getInt(); int payloadSize = buffer.getInt(); - + return new Header(flags, new SchemaId(fieldspaceId, schemaHash), payloadSize); } - + private void serializeDirectoryEntry(DirectoryEntry entry, ByteBuffer buffer) { buffer.putShort(entry.getId()); buffer.put(entry.getTypeCode().getCode()); buffer.putInt(entry.getOffset()); } - + private static DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for directory entry"); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for directory entry"); } - + short id = buffer.getShort(); var typeCode = TypeCode.fromByte(buffer.get()); int offset = buffer.getInt(); - + return new DirectoryEntry(id, typeCode, offset); } - + private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws ImprintException { ByteBuffer valueSpecificBuffer = buffer.duplicate(); valueSpecificBuffer.order(ByteOrder.LITTLE_ENDIAN); - + switch (typeCode) { case NULL: case BOOL: @@ -274,10 +291,152 @@ private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws Impr throw new ImprintException(ErrorType.INVALID_TYPE_CODE, "Unknown type code: " + typeCode); } } - + @Override public String toString() { - return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", - header, directory.size(), payload.remaining()); + return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", + header, directory.size(), payload.remaining()); + } + + private T getTypedValueOrThrow(int fieldId, TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) throws ImprintException { + Value value = getValue(fieldId); + + if (value == null) { + throw new ImprintException(ErrorType.FIELD_NOT_FOUND, + "Field " + fieldId + " not found, cannot retrieve as " + expectedTypeName + "."); + } + + if (value.getTypeCode() == TypeCode.NULL) { + throw new ImprintException(ErrorType.TYPE_MISMATCH, + "Field " + fieldId + " is NULL, cannot retrieve as " + expectedTypeName + "."); + } + + if (value.getTypeCode() == expectedTypeCode && expectedValueClass.isInstance(value)) { + return expectedValueClass.cast(value); + } + + throw new ImprintException(ErrorType.TYPE_MISMATCH, + "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected " + expectedTypeName + "."); + } + + /** + * Retrieves the boolean value for the given field ID. + * @throws ImprintException if the field is not found, is null, or is not of type BOOL. + */ + public boolean getBoolean(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.BOOL, Value.BoolValue.class, "boolean").getValue(); + } + + /** + * Retrieves the int (int32) value for the given field ID. + * @throws ImprintException if the field is not found, is null, or is not of type INT32. + */ + public int getInt32(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.INT32, Value.Int32Value.class, "int32").getValue(); + } + + /** + * Retrieves the long (int64) value for the given field ID. + * @throws ImprintException if the field is not found, is null, or is not of type INT64. + */ + public long getInt64(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.INT64, Value.Int64Value.class, "int64").getValue(); + } + + /** + * Retrieves the float (float32) value for the given field ID. + * @throws ImprintException if the field is not found, is null, or is not of type FLOAT32. + */ + public float getFloat32(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.FLOAT32, Value.Float32Value.class, "float32").getValue(); + } + + /** + * Retrieves the double (float64) value for the given field ID. + * @throws ImprintException if the field is not found, is null, or is not of type FLOAT64. + */ + public double getFloat64(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.FLOAT64, Value.Float64Value.class, "float64").getValue(); + } + + /** + * Retrieves the String value for the given field ID. + * @throws ImprintException if the field is not found, is null, or is not of type STRING. + */ + public String getString(int fieldId) throws ImprintException { + Value value = getValue(fieldId); + + if (value == null) { + throw new ImprintException(ErrorType.FIELD_NOT_FOUND, + "Field " + fieldId + " not found, cannot retrieve as String."); + } + if (value.getTypeCode() == TypeCode.NULL) { + throw new ImprintException(ErrorType.TYPE_MISMATCH, + "Field " + fieldId + " is NULL, cannot retrieve as String."); + } + + if (value instanceof Value.StringValue) { + return ((Value.StringValue) value).getValue(); + } + if (value instanceof Value.StringBufferValue) { + return ((Value.StringBufferValue) value).getValue(); + } + + throw new ImprintException(ErrorType.TYPE_MISMATCH, + "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected STRING."); + } + + /** + * Retrieves the byte array (byte[]) value for the given field ID. + * Note: This may involve a defensive copy depending on the underlying Value type. + * @throws ImprintException if the field is not found, is null, or is not of type BYTES. + */ + public byte[] getBytes(int fieldId) throws ImprintException { + Value value = getValue(fieldId); + + if (value == null) { + throw new ImprintException(ErrorType.FIELD_NOT_FOUND, + "Field " + fieldId + " not found, cannot retrieve as byte[]."); + } + if (value.getTypeCode() == TypeCode.NULL) { + throw new ImprintException(ErrorType.TYPE_MISMATCH, + "Field " + fieldId + " is NULL, cannot retrieve as byte[]."); + } + + if (value instanceof Value.BytesValue) { + return ((Value.BytesValue) value).getValue(); // getValue() in BytesValue returns a clone + } + if (value instanceof Value.BytesBufferValue) { + return ((Value.BytesBufferValue) value).getValue(); // getValue() in BytesBufferValue creates a new array + } + + throw new ImprintException(ErrorType.TYPE_MISMATCH, + "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected BYTES."); + } + + /** + * Retrieves the List for the given field ID. + * The list itself is a copy; modifications to it will not affect the record. + * @throws ImprintException if the field is not found, is null, or is not of type ARRAY. + */ + public List getArray(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.ARRAY, Value.ArrayValue.class, "ARRAY").getValue(); + } + + /** + * Retrieves the Map for the given field ID. + * The map itself is a copy; modifications to it will not affect the record. + * @throws ImprintException if the field is not found, is null, or is not of type MAP. + */ + public Map getMap(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.MAP, Value.MapValue.class, "MAP").getValue(); + } + + /** + * Retrieves the nested ImprintRecord for the given field ID. + * @throws ImprintException if the field is not found, is null, or is not of type ROW. + */ + public ImprintRecord getRow(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 8c04bf5..51a3525 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -31,40 +31,40 @@ public final class ImprintRecordBuilder { private final SchemaId schemaId; private final Map fields = new TreeMap<>(); - + ImprintRecordBuilder(SchemaId schemaId) { this.schemaId = Objects.requireNonNull(schemaId, "SchemaId cannot be null"); } - + // Primitive types with automatic Value wrapping public ImprintRecordBuilder field(int id, boolean value) { return addField(id, Value.fromBoolean(value)); } - + public ImprintRecordBuilder field(int id, int value) { return addField(id, Value.fromInt32(value)); } - + public ImprintRecordBuilder field(int id, long value) { return addField(id, Value.fromInt64(value)); } - + public ImprintRecordBuilder field(int id, float value) { return addField(id, Value.fromFloat32(value)); } - + public ImprintRecordBuilder field(int id, double value) { return addField(id, Value.fromFloat64(value)); } - + public ImprintRecordBuilder field(int id, String value) { return addField(id, Value.fromString(value)); } - + public ImprintRecordBuilder field(int id, byte[] value) { return addField(id, Value.fromBytes(value)); } - + // Collections with automatic conversion public ImprintRecordBuilder field(int id, List values) { var convertedValues = new ArrayList(values.size()); @@ -73,7 +73,7 @@ public ImprintRecordBuilder field(int id, List values) { } return addField(id, Value.fromArray(convertedValues)); } - + public ImprintRecordBuilder field(int id, Map map) { var convertedMap = new HashMap(map.size()); for (var entry : map.entrySet()) { @@ -83,22 +83,22 @@ public ImprintRecordBuilder field(int id, Map fieldsMap) { for (var entry : fieldsMap.entrySet()) { @@ -118,58 +118,53 @@ public ImprintRecordBuilder fields(Map fieldsMap) { } return this; } - + // Builder utilities public boolean hasField(int id) { return fields.containsKey(id); } - + public int fieldCount() { return fields.size(); } - + public Set fieldIds() { return new TreeSet<>(fields.keySet()); } - + // Build the final record public ImprintRecord build() throws ImprintException { - if (fields.isEmpty()) { - throw new ImprintException(com.imprint.error.ErrorType.SCHEMA_ERROR, - "Cannot build empty record - add at least one field"); - } - var writer = new ImprintWriter(schemaId); for (var entry : fields.entrySet()) { writer.addField(entry.getKey(), entry.getValue()); } return writer.build(); } - + // Internal helper methods /** * Adds or overwrites a field in the record being built. * If a field with the given ID already exists, it will be replaced. - * + * * @param id the field ID * @param value the field value (cannot be null - use nullField() for explicit nulls) * @return this builder for method chaining */ private ImprintRecordBuilder addField(int id, Value value) { Objects.requireNonNull(value, "Value cannot be null - use nullField() for explicit null values"); - fields.put(id, value); // TreeMap.put() overwrites existing values + fields.put(id, value); return this; } - + private Value convertToValue(Object obj) { if (obj == null) { return Value.nullValue(); } - + if (obj instanceof Value) { return (Value) obj; } - + // Auto-boxing conversion if (obj instanceof Boolean) { return Value.fromBoolean((Boolean) obj); @@ -216,11 +211,11 @@ private Value convertToValue(Object obj) { if (obj instanceof ImprintRecord) { return Value.fromRow((ImprintRecord) obj); } - - throw new IllegalArgumentException("Cannot convert " + obj.getClass().getSimpleName() + - " to Imprint Value. Supported types: boolean, int, long, float, double, String, byte[], List, Map, ImprintRecord"); + + throw new IllegalArgumentException("Cannot convert " + obj.getClass().getSimpleName() + + " to Imprint Value. Supported types: boolean, int, long, float, double, String, byte[], List, Map, ImprintRecord"); } - + private MapKey convertToMapKey(Object obj) { if (obj instanceof Integer) { return MapKey.fromInt32((Integer) obj); @@ -234,11 +229,11 @@ private MapKey convertToMapKey(Object obj) { if (obj instanceof byte[]) { return MapKey.fromBytes((byte[]) obj); } - - throw new IllegalArgumentException("Invalid map key type: " + obj.getClass().getSimpleName() + - ". Map keys must be int, long, String, or byte[]"); + + throw new IllegalArgumentException("Invalid map key type: " + obj.getClass().getSimpleName() + + ". Map keys must be int, long, String, or byte[]"); } - + @Override public String toString() { return String.format("ImprintRecordBuilder{schemaId=%s, fields=%d}", schemaId, fields.size()); diff --git a/src/main/java/com/imprint/error/ErrorType.java b/src/main/java/com/imprint/error/ErrorType.java index 97b9772..49784ef 100644 --- a/src/main/java/com/imprint/error/ErrorType.java +++ b/src/main/java/com/imprint/error/ErrorType.java @@ -14,5 +14,6 @@ public enum ErrorType { TYPE_MISMATCH, INVALID_TYPE_CODE, SERIALIZATION_ERROR, - DESERIALIZATION_ERROR + DESERIALIZATION_ERROR, + INTERNAL_ERROR } diff --git a/src/main/java/com/imprint/types/TypeHandler.java b/src/main/java/com/imprint/types/TypeHandler.java index 573aed3..d21403b 100644 --- a/src/main/java/com/imprint/types/TypeHandler.java +++ b/src/main/java/com/imprint/types/TypeHandler.java @@ -20,8 +20,7 @@ public interface TypeHandler { ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException; // Helper method to eliminate duplication in ARRAY/MAP readValueBytes - static ByteBuffer readComplexValueBytes(ByteBuffer buffer, String typeName, - ComplexValueMeasurer measurer) throws ImprintException { + static ByteBuffer readComplexValueBytes(ByteBuffer buffer, String typeName, BufferViewer measurer) throws ImprintException { int initialPosition = buffer.position(); ByteBuffer tempBuffer = buffer.duplicate(); tempBuffer.order(buffer.order()); @@ -58,7 +57,7 @@ static ByteBuffer readComplexValueBytes(ByteBuffer buffer, String typeName, } @FunctionalInterface - interface ComplexValueMeasurer { + interface BufferViewer { int measureDataLength(ByteBuffer tempBuffer, int numElements) throws ImprintException; } diff --git a/src/main/java/com/imprint/util/VarInt.java b/src/main/java/com/imprint/util/VarInt.java index f4c22f2..75bd132 100644 --- a/src/main/java/com/imprint/util/VarInt.java +++ b/src/main/java/com/imprint/util/VarInt.java @@ -6,20 +6,21 @@ import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; +import lombok.experimental.UtilityClass; + import java.nio.ByteBuffer; /** * Utility class for encoding and decoding variable-length integers (VarInt). * Supports encoding/decoding of 32-bit unsigned integers. */ +@UtilityClass public final class VarInt { private static final byte CONTINUATION_BIT = (byte) 0x80; private static final byte SEGMENT_BITS = 0x7f; private static final int MAX_VARINT_LEN = 5; // Enough for u32 - private VarInt() {} // utility class - /** * Encode a 32-bit unsigned integer as a VarInt into the given ByteBuffer. @@ -103,27 +104,7 @@ public static int encodedLength(int value) { return length; } - - /** - * Read VarInt-prefixed data from a ByteBuffer. - * The data format is: VarInt(length) + data(length bytes). - * Returns a read-only ByteBuffer containing the entire VarInt + data. - * - * @param buffer the buffer to read from - * @return a read-only ByteBuffer view of the VarInt + data - * @throws ImprintException if the VarInt is malformed or buffer underflow - */ - public static ByteBuffer readVarIntPrefixedBytes(ByteBuffer buffer) throws ImprintException { - int originalPosition = buffer.position(); - VarInt.DecodeResult lengthResult = VarInt.decode(buffer); - int totalLength = lengthResult.getBytesRead() + lengthResult.getValue(); - buffer.position(originalPosition); - var valueBuffer = buffer.slice(); - valueBuffer.limit(totalLength); - buffer.position(buffer.position() + totalLength); - return valueBuffer.asReadOnlyBuffer(); - } - + /** * Result of a VarInt decode operation. */ diff --git a/src/test/java/com/imprint/ByteBufferIntegrationTest.java b/src/test/java/com/imprint/ByteBufferIntegrationTest.java deleted file mode 100644 index 56ec3b0..0000000 --- a/src/test/java/com/imprint/ByteBufferIntegrationTest.java +++ /dev/null @@ -1,180 +0,0 @@ -package com.imprint; - -import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintWriter; -import com.imprint.core.SchemaId; -import com.imprint.error.ImprintException; -import com.imprint.types.MapKey; -import com.imprint.types.Value; - -import java.nio.ByteBuffer; -import java.util.*; - -/** - * Integration test to verify ByteBuffer functionality and zero-copy benefits. - */ -class ByteBufferIntegrationTest { - - public static void main(String[] args) { - try { - testByteBufferFunctionality(); - testZeroCopy(); - testArrayBackedBuffers(); - System.out.println("All ByteBuffer integration tests passed!"); - } catch (Exception e) { - System.err.println("ByteBuffer integration test failed: " + e.getMessage()); - e.printStackTrace(); - System.exit(1); - } - } - - static void testByteBufferFunctionality() throws ImprintException { - System.out.println("Testing ByteBuffer functionality..."); - - SchemaId schemaId = new SchemaId(1, 0xdeadbeef); - ImprintWriter writer = new ImprintWriter(schemaId); - - writer.addField(1, Value.fromInt32(42)) - .addField(2, Value.fromString("zero-copy test")) - .addField(3, Value.fromBytes(new byte[]{1, 2, 3, 4, 5})); - - ImprintRecord record = writer.build(); - - // Test ByteBuffer serialization - ByteBuffer serializedBuffer = record.serializeToBuffer(); - assert serializedBuffer.isReadOnly() : "Serialized buffer should be read-only"; - - // Test deserialization from ByteBuffer - ImprintRecord deserialized = ImprintRecord.deserialize(serializedBuffer); - - assert Objects.equals(deserialized.getValue(1), Value.fromInt32(42)); - assert Objects.equals(deserialized.getValue(2), Value.fromString("zero-copy test")); - - // Test raw bytes access returns ByteBuffer - var rawBytes = deserialized.getRawBytes(1); - assert rawBytes != null : "Raw bytes should be present for field 1"; - assert rawBytes.isReadOnly() : "Raw bytes buffer should be read-only"; - - System.out.println("ByteBuffer functionality test passed"); - } - - static void testZeroCopy() { - System.out.println("Testing zero-copy"); - - // Create a large payload to demonstrate zero-copy benefits - byte[] largePayload = new byte[1024 * 1024]; // 1MB - Arrays.fill(largePayload, (byte) 0xAB); - - SchemaId schemaId = new SchemaId(2, 0xcafebabe); - ImprintWriter writer = new ImprintWriter(schemaId); - - try { - writer.addField(1, Value.fromBytes(largePayload)); - ImprintRecord record = writer.build(); - - // Test that getRawBytes returns a view, not a copy - var rawBytes = record.getRawBytes(1); - assert rawBytes !=null : "Raw bytes should be present"; - assert rawBytes.isReadOnly() : "Raw buffer should be read-only"; - - // The buffer should be positioned at the start of the actual data - // (after the VarInt length prefix) - assert rawBytes.remaining() > largePayload.length : "Buffer should include length prefix"; - - System.out.println("Zero-copy benefits test passed"); - - } catch (ImprintException e) { - throw new RuntimeException("Failed zero-copy test", e); - } - } - - static void testArrayBackedBuffers() throws ImprintException { - System.out.println("Testing array-backed buffers for zero-copy performance..."); - - // Test serialized buffers are array-backed - SchemaId schemaId = new SchemaId(1, 0xdeadbeef); - ImprintWriter writer = new ImprintWriter(schemaId); - - writer.addField(1, Value.fromInt32(42)) - .addField(2, Value.fromString("test string")) - .addField(3, Value.fromBytes(new byte[]{1, 2, 3, 4})) - .addField(4, Value.fromBoolean(true)); - - ImprintRecord record = writer.build(); - - // Test that serializeToBuffer() returns array-backed buffer - ByteBuffer serializedBuffer = record.serializeToBuffer(); - assert serializedBuffer.hasArray() : "Serialized buffer should be array-backed for zero-copy performance"; - - // Test that the internal payload is array-backed - assert record.getPayload().hasArray() : "Record payload should be array-backed for zero-copy performance"; - - // Test deserialized buffers are array-backed - byte[] bytes = new byte[serializedBuffer.remaining()]; - serializedBuffer.get(bytes); - ImprintRecord deserialized = ImprintRecord.deserialize(bytes); - - assert deserialized.getPayload().hasArray() : "Deserialized record payload should be array-backed"; - - // Test that getRawBytes() returns array-backed buffers - ByteBuffer rawBytes1 = deserialized.getRawBytes(1); - ByteBuffer rawBytes2 = deserialized.getRawBytes(2); - - assert rawBytes1 != null && rawBytes1.hasArray() : "Raw bytes buffer for int field should be array-backed"; - assert rawBytes2 != null && rawBytes2.hasArray() : "Raw bytes buffer for string field should be array-backed"; - - // Test complex types use array-backed buffers - ImprintWriter complexWriter = new ImprintWriter(new SchemaId(2, 0xcafebabe)); - - // Create array value - List arrayValues = Arrays.asList( - Value.fromInt32(1), - Value.fromInt32(2), - Value.fromInt32(3) - ); - - // Create map value - Map mapValues = new HashMap<>(); - mapValues.put(MapKey.fromString("key1"), Value.fromString("value1")); - mapValues.put(MapKey.fromString("key2"), Value.fromString("value2")); - - complexWriter.addField(1, Value.fromArray(arrayValues)) - .addField(2, Value.fromMap(mapValues)); - - ImprintRecord complexRecord = complexWriter.build(); - - assert complexRecord.getPayload().hasArray() : "Record with complex types should use array-backed payload"; - - ByteBuffer complexSerialized = complexRecord.serializeToBuffer(); - assert complexSerialized.hasArray() : "Serialized buffer with complex types should be array-backed"; - - // Test nested records use array-backed buffers - ImprintWriter innerWriter = new ImprintWriter(new SchemaId(3, 0x12345678)); - innerWriter.addField(1, Value.fromString("nested data")); - ImprintRecord innerRecord = innerWriter.build(); - - ImprintWriter outerWriter = new ImprintWriter(new SchemaId(4, 0x87654321)); - outerWriter.addField(1, Value.fromRow(innerRecord)); - ImprintRecord outerRecord = outerWriter.build(); - - assert innerRecord.getPayload().hasArray() : "Inner record payload should be array-backed"; - assert outerRecord.getPayload().hasArray() : "Outer record payload should be array-backed"; - - ByteBuffer nestedSerialized = outerRecord.serializeToBuffer(); - assert nestedSerialized.hasArray() : "Serialized nested record should be array-backed"; - - // Test builder pattern uses array-backed buffers - ImprintRecord builderRecord = ImprintRecord.builder(1, 0xabcdef00) - .field(1, "test string") - .field(2, 42) - .field(3, new byte[]{1, 2, 3}) - .build(); - - assert builderRecord.getPayload().hasArray() : "Builder-created record should use array-backed payload"; - - ByteBuffer builderSerialized = builderRecord.serializeToBuffer(); - assert builderSerialized.hasArray() : "Builder-created serialized buffer should be array-backed"; - - System.out.println("✓ Array-backed buffers test passed"); - } -} \ No newline at end of file diff --git a/src/test/java/com/imprint/IntegrationTest.java b/src/test/java/com/imprint/IntegrationTest.java index 565b7cd..76efcc5 100644 --- a/src/test/java/com/imprint/IntegrationTest.java +++ b/src/test/java/com/imprint/IntegrationTest.java @@ -2,143 +2,301 @@ import com.imprint.core.*; import com.imprint.types.*; +import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.DisplayName; + import java.util.*; +import static org.junit.jupiter.api.Assertions.*; + /** - * Integration test to verify the complete Java implementation works. - * This can be run as a simple main method without JUnit. + * Integration tests for Imprint core functionality. */ public class IntegrationTest { - - public static void main(String[] args) { - try { - testBasicFunctionality(); - testArraysAndMaps(); - testNestedRecords(); - System.out.println("All integration tests passed!"); - } catch (Exception e) { - System.err.println("Integration test failed: " + e.getMessage()); - e.printStackTrace(); - System.exit(1); - } - } - - static void testBasicFunctionality() throws ImprintException { - System.out.println("Testing basic functionality..."); - + + // Removed main method, individual methods are now JUnit tests. + + @Test + @DisplayName("Basic functionality: create, serialize, deserialize primitive types") + void testBasicFunctionality() throws ImprintException { + System.out.println("Testing basic functionality..."); // Keep for now if desired, or remove + SchemaId schemaId = new SchemaId(1, 0xdeadbeef); - ImprintWriter writer = new ImprintWriter(schemaId); - - writer.addField(1, Value.fromInt32(42)) - .addField(2, Value.fromString("testing java imprint spec")) - .addField(3, Value.fromBoolean(true)) - .addField(4, Value.fromFloat64(3.14159)) - .addField(5, Value.fromBytes(new byte[]{1, 2, 3, 4})); - - ImprintRecord record = writer.build(); - - // Verify we can read values back - assert Objects.equals(record.getValue(1), Value.fromInt32(42)); - assert Objects.equals(record.getValue(2), Value.fromString("testing java imprint spec")); - assert Objects.equals(record.getValue(3), Value.fromBoolean(true)); - assert record.getValue(999) == null; // non-existent field - + // Using ImprintRecordBuilder for consistency with other tests + ImprintRecord record = ImprintRecord.builder(schemaId) + .field(1, 42) + .field(2, "testing java imprint spec") + .field(3, true) + .field(4, 3.14159) // double + .field(5, new byte[]{1, 2, 3, 4}) + .build(); + + // Verify we can read values back using ergonomic getters + assertEquals(42, record.getInt32(1)); + assertEquals("testing java imprint spec", record.getString(2)); + assertTrue(record.getBoolean(3)); + assertEquals(3.14159, record.getFloat64(4)); + assertArrayEquals(new byte[]{1,2,3,4}, record.getBytes(5)); + + assertNull(record.getValue(999), "Non-existent field should return null from getValue()"); + assertThrows(ImprintException.class, () -> record.getInt32(999), "Accessing non-existent field with getInt32 should throw"); + // Test serialization round-trip var buffer = record.serializeToBuffer(); byte[] serialized = new byte[buffer.remaining()]; buffer.get(serialized); ImprintRecord deserialized = ImprintRecord.deserialize(serialized); - - assert Objects.equals(deserialized.getValue(1), Value.fromInt32(42)); - assert Objects.equals(deserialized.getValue(2), Value.fromString("testing java imprint spec")); - assert Objects.equals(deserialized.getValue(3), Value.fromBoolean(true)); - + + assertEquals(42, deserialized.getInt32(1)); + assertEquals("testing java imprint spec", deserialized.getString(2)); + assertTrue(deserialized.getBoolean(3)); + assertEquals(3.14159, deserialized.getFloat64(4)); + assertArrayEquals(new byte[]{1,2,3,4}, deserialized.getBytes(5)); + System.out.println("Basic functionality test passed"); } - - static void testArraysAndMaps() throws ImprintException { + + @Test + @DisplayName("Collections: create, serialize, deserialize arrays and maps") + void testArraysAndMaps() throws ImprintException { System.out.println("Testing arrays and maps..."); - + SchemaId schemaId = new SchemaId(2, 0xcafebabe); - ImprintWriter writer = new ImprintWriter(schemaId); - - // Create an array - List intArray = Arrays.asList( - Value.fromInt32(1), - Value.fromInt32(2), - Value.fromInt32(3) - ); - + + // Create an array using builder for convenience + List sourceIntList = Arrays.asList(1, 2, 3); + // Create a map - Map stringToIntMap = new HashMap<>(); - stringToIntMap.put(MapKey.fromString("one"), Value.fromInt32(1)); - stringToIntMap.put(MapKey.fromString("two"), Value.fromInt32(2)); - - writer.addField(1, Value.fromArray(intArray)) - .addField(2, Value.fromMap(stringToIntMap)); - - ImprintRecord record = writer.build(); - + Map sourceStringToIntMap = new HashMap<>(); + sourceStringToIntMap.put("one", 1); + sourceStringToIntMap.put("two", 2); + + ImprintRecord record = ImprintRecord.builder(schemaId) + .field(1, sourceIntList) // Builder converts List to List + .field(2, sourceStringToIntMap) // Builder converts Map + .build(); + // Test serialization round-trip var buffer = record.serializeToBuffer(); byte[] serialized = new byte[buffer.remaining()]; buffer.get(serialized); ImprintRecord deserialized = ImprintRecord.deserialize(serialized); - + // Verify array - Value arrayValue = deserialized.getValue(1); - assert arrayValue instanceof Value.ArrayValue; - List deserializedArray = ((Value.ArrayValue) arrayValue).getValue(); - assert deserializedArray.size() == 3; - assert deserializedArray.get(0).equals(Value.fromInt32(1)); - + List deserializedArray = deserialized.getArray(1); + assertNotNull(deserializedArray); + assertEquals(3, deserializedArray.size()); + assertEquals(Value.fromInt32(1), deserializedArray.get(0)); + assertEquals(Value.fromInt32(2), deserializedArray.get(1)); + assertEquals(Value.fromInt32(3), deserializedArray.get(2)); + // Verify map - Value mapValue = deserialized.getValue(2); - assert mapValue instanceof Value.MapValue; - Map deserializedMap = ((Value.MapValue) mapValue).getValue(); - assert deserializedMap.size() == 2; - assert deserializedMap.get(MapKey.fromString("one")).equals(Value.fromInt32(1)); - - System.out.println("✓ Arrays and maps test passed"); + Map deserializedMap = deserialized.getMap(2); + assertNotNull(deserializedMap); + assertEquals(2, deserializedMap.size()); + assertEquals(Value.fromInt32(1), deserializedMap.get(MapKey.fromString("one"))); + assertEquals(Value.fromInt32(2), deserializedMap.get(MapKey.fromString("two"))); + + System.out.println("Arrays and maps test passed"); } - - static void testNestedRecords() throws ImprintException { + + @Test + @DisplayName("Nested Records: create, serialize, deserialize records within records") + void testNestedRecords() throws ImprintException { System.out.println("Testing nested records..."); - - // Create inner record + SchemaId innerSchemaId = new SchemaId(3, 0x12345678); - ImprintWriter innerWriter = new ImprintWriter(innerSchemaId); - innerWriter.addField(1, Value.fromString("nested data")) - .addField(2, Value.fromInt64(9876543210L)); - ImprintRecord innerRecord = innerWriter.build(); - - // Create outer record + ImprintRecord innerRecord = ImprintRecord.builder(innerSchemaId) + .field(1, "nested data") + .field(2, 9876543210L) + .build(); + SchemaId outerSchemaId = new SchemaId(4, 0x87654321); - ImprintWriter outerWriter = new ImprintWriter(outerSchemaId); - outerWriter.addField(1, Value.fromRow(innerRecord)) - .addField(2, Value.fromString("outer data")); - ImprintRecord outerRecord = outerWriter.build(); - - // Test serialization round-trip + ImprintRecord outerRecord = ImprintRecord.builder(outerSchemaId) + .field(1, innerRecord) // Builder handles ImprintRecord directly + .field(2, "outer data") + .build(); + var buffer = outerRecord.serializeToBuffer(); byte[] serialized = new byte[buffer.remaining()]; buffer.get(serialized); ImprintRecord deserialized = ImprintRecord.deserialize(serialized); - - // Verify outer record - assert deserialized.getHeader().getSchemaId().getFieldSpaceId() == 4; - assert Objects.equals(deserialized.getValue(2), Value.fromString("outer data")); - - // Verify nested record - Value rowValue = deserialized.getValue(1); - assert rowValue instanceof Value.RowValue; - ImprintRecord nestedRecord = ((Value.RowValue) rowValue).getValue(); - - assert nestedRecord.getHeader().getSchemaId().getFieldSpaceId() == 3; - assert Objects.equals(nestedRecord.getValue(1), Value.fromString("nested data")); - assert Objects.equals(nestedRecord.getValue(2), Value.fromInt64(9876543210L)); - + + assertEquals(4, deserialized.getHeader().getSchemaId().getFieldSpaceId()); + assertEquals("outer data", deserialized.getString(2)); + + ImprintRecord nestedDeserialized = deserialized.getRow(1); + assertNotNull(nestedDeserialized); + assertEquals(3, nestedDeserialized.getHeader().getSchemaId().getFieldSpaceId()); + assertEquals("nested data", nestedDeserialized.getString(1)); + assertEquals(9876543210L, nestedDeserialized.getInt64(2)); + System.out.println("✓ Nested records test passed"); } + + // --- Start of broken down tests for ErgonomicGettersAndNestedTypes --- + + private ImprintRecord createTestRecordForGetters() throws ImprintException { + SchemaId schemaId = new SchemaId(5, 0xabcdef01); + + List innerList1 = Arrays.asList(Value.fromInt32(10), Value.fromInt32(20)); + List innerList2 = Arrays.asList(Value.fromInt32(30), Value.fromInt32(40)); + List listOfLists = Arrays.asList(Value.fromArray(innerList1), Value.fromArray(innerList2)); + + Map mapWithArrayValue = new HashMap<>(); + mapWithArrayValue.put(MapKey.fromString("list1"), Value.fromArray(innerList1)); + + return ImprintRecord.builder(schemaId) + .field(1, true) + .field(2, 12345) + .field(3, 9876543210L) + .field(4, 3.14f) + .field(5, 2.718281828) + .field(6, "hello type world") + .field(7, new byte[]{10, 20, 30}) + .nullField(8) + .field(9, Value.fromArray(listOfLists)) // Array of Arrays (using Value directly for test setup) + .field(10, Value.fromMap(mapWithArrayValue)) // Map with Array value + .field(11, Collections.emptyList()) // Empty Array via builder + .field(12, Collections.emptyMap()) // Empty Map via builder + .build(); + } + + private ImprintRecord serializeAndDeserialize(ImprintRecord record) throws ImprintException { + var buffer = record.serializeToBuffer(); + byte[] serialized = new byte[buffer.remaining()]; + buffer.get(serialized); + return ImprintRecord.deserialize(serialized); + } + + @Test + @DisplayName("Type Getters: Basic primitive and String types") + void testBasicTypeGetters() throws ImprintException { + ImprintRecord originalRecord = createTestRecordForGetters(); + ImprintRecord record = serializeAndDeserialize(originalRecord); + + assertTrue(record.getBoolean(1)); + assertEquals(12345, record.getInt32(2)); + assertEquals(9876543210L, record.getInt64(3)); + assertEquals(3.14f, record.getFloat32(4)); + assertEquals(2.718281828, record.getFloat64(5)); + assertEquals("hello type world", record.getString(6)); + assertArrayEquals(new byte[]{10, 20, 30}, record.getBytes(7)); + } + + @Test + @DisplayName("Type Getters: Array of Arrays") + void testTypeGetterArrayOfArrays() throws ImprintException { + ImprintRecord originalRecord = createTestRecordForGetters(); + ImprintRecord record = serializeAndDeserialize(originalRecord); + + List arrOfArr = record.getArray(9); + assertNotNull(arrOfArr); + assertEquals(2, arrOfArr.size()); + assertInstanceOf(Value.ArrayValue.class, arrOfArr.get(0)); + Value.ArrayValue firstInnerArray = (Value.ArrayValue) arrOfArr.get(0); + assertEquals(2, firstInnerArray.getValue().size()); + assertEquals(Value.fromInt32(10), firstInnerArray.getValue().get(0)); + assertEquals(Value.fromInt32(20), firstInnerArray.getValue().get(1)); + + assertInstanceOf(Value.ArrayValue.class, arrOfArr.get(1)); + Value.ArrayValue secondInnerArray = (Value.ArrayValue) arrOfArr.get(1); + assertEquals(2, secondInnerArray.getValue().size()); + assertEquals(Value.fromInt32(30), secondInnerArray.getValue().get(0)); + assertEquals(Value.fromInt32(40), secondInnerArray.getValue().get(1)); + } + + @Test + @DisplayName("Type Getters: Map with Array Value") + void testTypeGetterMapWithArrayValue() throws ImprintException { + ImprintRecord originalRecord = createTestRecordForGetters(); + ImprintRecord record = serializeAndDeserialize(originalRecord); + + Map mapWithArr = record.getMap(10); + assertNotNull(mapWithArr); + assertEquals(1, mapWithArr.size()); + assertInstanceOf(Value.ArrayValue.class, mapWithArr.get(MapKey.fromString("list1"))); + Value.ArrayValue innerArray = (Value.ArrayValue) mapWithArr.get(MapKey.fromString("list1")); + assertNotNull(innerArray); + assertEquals(2, innerArray.getValue().size()); + assertEquals(Value.fromInt32(10), innerArray.getValue().get(0)); + } + + @Test + @DisplayName("Type Getters: Empty Collections (Array and Map)") + void testErgonomicGettersEmptyCollections() throws ImprintException { + ImprintRecord originalRecord = createTestRecordForGetters(); + ImprintRecord record = serializeAndDeserialize(originalRecord); + + List emptyArr = record.getArray(11); + assertNotNull(emptyArr); + assertTrue(emptyArr.isEmpty()); + + Map emptyMap = record.getMap(12); + assertNotNull(emptyMap); + assertTrue(emptyMap.isEmpty()); + } + + @Test + @DisplayName("Type Getters: Exception for Field Not Found") + void testErgonomicGetterExceptionFieldNotFound() throws ImprintException { + ImprintRecord originalRecord = createTestRecordForGetters(); + ImprintRecord record = serializeAndDeserialize(originalRecord); + + ImprintException ex = assertThrows(ImprintException.class, () -> record.getInt32(99)); + assertEquals(ErrorType.FIELD_NOT_FOUND, ex.getErrorType()); + } + + @Test + @DisplayName("Type Getters: Exception for Null Field accessed as primitive") + void testErgonomicGetterExceptionNullField() throws ImprintException { + ImprintRecord originalRecord = createTestRecordForGetters(); + ImprintRecord record = serializeAndDeserialize(originalRecord); + + ImprintException ex = assertThrows(ImprintException.class, () -> record.getString(8)); + assertEquals(ErrorType.TYPE_MISMATCH, ex.getErrorType()); // getString throws TYPE_MISMATCH for null + assertTrue(ex.getMessage().contains("Field 8 is NULL")); + + + // Also test getValue for a null field returns Value.NullValue + Value nullValueField = record.getValue(8); + assertNotNull(nullValueField); + assertInstanceOf(Value.NullValue.class, nullValueField, "Field 8 should be Value.NullValue"); + } + + @Test + @DisplayName("Type Getters: Exception for Type Mismatch") + void testErgonomicGetterExceptionTypeMismatch() throws ImprintException { + ImprintRecord originalRecord = createTestRecordForGetters(); + ImprintRecord record = serializeAndDeserialize(originalRecord); + + ImprintException ex = assertThrows(ImprintException.class, () -> record.getInt32(6)); // Field 6 is a String + assertEquals(ErrorType.TYPE_MISMATCH, ex.getErrorType()); + } + + @Test + @DisplayName("Type Getters: Row (Nested Record)") + void testErgonomicGetterRow() throws ImprintException { + SchemaId innerSchemaId = new SchemaId(6, 0x12345678); + ImprintRecord innerRecord = ImprintRecord.builder(innerSchemaId) + .field(101, "nested string") + .field(102, 999L) + .build(); + + ImprintRecord recordWithRow = ImprintRecord.builder(new SchemaId(7, 0x87654321)) + .field(201, innerRecord) // Using builder to add row + .field(202, "outer field") + .build(); + + ImprintRecord deserializedWithRow = serializeAndDeserialize(recordWithRow); + + ImprintRecord retrievedRow = deserializedWithRow.getRow(201); + assertNotNull(retrievedRow); + assertEquals(innerSchemaId, retrievedRow.getHeader().getSchemaId()); + assertEquals("nested string", retrievedRow.getString(101)); + assertEquals(999L, retrievedRow.getInt64(102)); + assertEquals("outer field", deserializedWithRow.getString(202)); + } } \ No newline at end of file From 1350cb6d294805ccb5e79aff293b3e308ecb0075 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Tue, 3 Jun 2025 18:53:18 -0400 Subject: [PATCH 15/53] minor style fixes --- .../java/com/imprint/core/ImprintRecord.java | 13 +- .../com/imprint/types/TypeHandlerTest.java | 122 +++++++++--------- .../java/com/imprint/types/ValueTest.java | 100 +++++++------- 3 files changed, 118 insertions(+), 117 deletions(-) diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index d667039..dd66389 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -292,12 +292,6 @@ private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws Impr } } - @Override - public String toString() { - return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", - header, directory.size(), payload.remaining()); - } - private T getTypedValueOrThrow(int fieldId, TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) throws ImprintException { Value value = getValue(fieldId); @@ -439,4 +433,11 @@ public Map getMap(int fieldId) throws ImprintException { public ImprintRecord getRow(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); } + + @Override + public String toString() { + return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", + header, directory.size(), payload.remaining()); + } + } \ No newline at end of file diff --git a/src/test/java/com/imprint/types/TypeHandlerTest.java b/src/test/java/com/imprint/types/TypeHandlerTest.java index 9a4ae85..f131a0f 100644 --- a/src/test/java/com/imprint/types/TypeHandlerTest.java +++ b/src/test/java/com/imprint/types/TypeHandlerTest.java @@ -20,104 +20,104 @@ class TypeHandlerTest { void testNullHandler() throws ImprintException { var handler = TypeHandler.NULL; var value = Value.nullValue(); - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(0); - + // Serialization var buffer = ByteBuffer.allocate(10); handler.serialize(value, buffer); assertThat(buffer.position()).isEqualTo(0); // NULL writes nothing - + // Deserialization buffer.flip(); var deserialized = handler.deserialize(buffer); assertThat(deserialized).isEqualTo(value); - + // readValueBytes buffer.clear(); var valueBytes = handler.readValueBytes(buffer); assertThat(valueBytes.remaining()).isEqualTo(0); } - + @ParameterizedTest @ValueSource(booleans = {true, false}) void testBoolHandler(boolean testValue) throws ImprintException { var handler = TypeHandler.BOOL; var value = Value.fromBoolean(testValue); - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(1); - + // Round-trip test var buffer = ByteBuffer.allocate(10); handler.serialize(value, buffer); assertThat(buffer.position()).isEqualTo(1); - + buffer.flip(); var deserialized = handler.deserialize(buffer); assertThat(deserialized).isEqualTo(value); assertThat(((Value.BoolValue) deserialized).getValue()).isEqualTo(testValue); } - + @ParameterizedTest @ValueSource(ints = {0, 1, -1, Integer.MAX_VALUE, Integer.MIN_VALUE, 42, -42}) void testInt32Handler(int testValue) throws ImprintException { var handler = TypeHandler.INT32; var value = Value.fromInt32(testValue); - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(4); - + // Round-trip test var buffer = ByteBuffer.allocate(10).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(value, buffer); assertThat(buffer.position()).isEqualTo(4); - + buffer.flip(); var deserialized = handler.deserialize(buffer); assertThat(deserialized).isEqualTo(value); assertThat(((Value.Int32Value) deserialized).getValue()).isEqualTo(testValue); } - + @ParameterizedTest @ValueSource(longs = {0L, 1L, -1L, Long.MAX_VALUE, Long.MIN_VALUE, 123456789L}) void testInt64Handler(long testValue) throws ImprintException { var handler = TypeHandler.INT64; var value = Value.fromInt64(testValue); - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(8); - + // Round-trip test var buffer = ByteBuffer.allocate(20).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(value, buffer); assertThat(buffer.position()).isEqualTo(8); - + buffer.flip(); var deserialized = handler.deserialize(buffer); assertThat(deserialized).isEqualTo(value); assertThat(((Value.Int64Value) deserialized).getValue()).isEqualTo(testValue); } - + @ParameterizedTest @ValueSource(floats = {0.0f, 1.0f, -1.0f, Float.MAX_VALUE, Float.MIN_VALUE, 3.14159f, Float.NaN, Float.POSITIVE_INFINITY}) void testFloat32Handler(float testValue) throws ImprintException { var handler = TypeHandler.FLOAT32; var value = Value.fromFloat32(testValue); - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(4); - + // Round-trip test var buffer = ByteBuffer.allocate(10).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(value, buffer); assertThat(buffer.position()).isEqualTo(4); - + buffer.flip(); var deserialized = handler.deserialize(buffer); assertThat(deserialized).isEqualTo(value); - + float deserializedValue = ((Value.Float32Value) deserialized).getValue(); if (Float.isNaN(testValue)) { assertThat(deserializedValue).isNaN(); @@ -125,25 +125,25 @@ void testFloat32Handler(float testValue) throws ImprintException { assertThat(deserializedValue).isEqualTo(testValue); } } - + @ParameterizedTest @ValueSource(doubles = {0.0, 1.0, -1.0, Double.MAX_VALUE, Double.MIN_VALUE, Math.PI, Double.NaN, Double.POSITIVE_INFINITY}) void testFloat64Handler(double testValue) throws ImprintException { var handler = TypeHandler.FLOAT64; var value = Value.fromFloat64(testValue); - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(8); - + // Round-trip test var buffer = ByteBuffer.allocate(20).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(value, buffer); assertThat(buffer.position()).isEqualTo(8); - + buffer.flip(); var deserialized = handler.deserialize(buffer); assertThat(deserialized).isEqualTo(value); - + double deserializedValue = ((Value.Float64Value) deserialized).getValue(); if (Double.isNaN(testValue)) { assertThat(deserializedValue).isNaN(); @@ -151,129 +151,129 @@ void testFloat64Handler(double testValue) throws ImprintException { assertThat(deserializedValue).isEqualTo(testValue); } } - + @ParameterizedTest @ValueSource(strings = {"", "hello", "世界", "a very long string that exceeds typical buffer sizes and contains unicode: 🚀🎉", "null\0bytes"}) void testStringHandler(String testValue) throws ImprintException { var handler = TypeHandler.STRING; var value = Value.fromString(testValue); - + byte[] utf8Bytes = testValue.getBytes(java.nio.charset.StandardCharsets.UTF_8); int expectedSize = com.imprint.util.VarInt.encodedLength(utf8Bytes.length) + utf8Bytes.length; - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(expectedSize); - + // Round-trip test var buffer = ByteBuffer.allocate(expectedSize + 20).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(value, buffer); - + buffer.flip(); var deserialized = handler.deserialize(buffer); - + // Should return StringBufferValue (zero-copy implementation) assertThat(deserialized).isInstanceOf(Value.StringBufferValue.class); - + String deserializedString; if (deserialized instanceof Value.StringBufferValue) { deserializedString = ((Value.StringBufferValue) deserialized).getValue(); } else { deserializedString = ((Value.StringValue) deserialized).getValue(); } - + assertThat(deserializedString).isEqualTo(testValue); } - + @Test void testBytesHandlerWithArrayValue() throws ImprintException { var handler = TypeHandler.BYTES; byte[] testBytes = {0, 1, 2, (byte) 0xFF, 42, 127, -128}; var value = Value.fromBytes(testBytes); - + int expectedSize = com.imprint.util.VarInt.encodedLength(testBytes.length) + testBytes.length; - + // Size estimation assertThat(handler.estimateSize(value)).isEqualTo(expectedSize); - + // Round-trip test var buffer = ByteBuffer.allocate(expectedSize + 20).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(value, buffer); - + buffer.flip(); var deserialized = handler.deserialize(buffer); - + // Should return BytesBufferValue (zero-copy implementation) assertThat(deserialized).isInstanceOf(Value.BytesBufferValue.class); - + byte[] deserializedBytes = ((Value.BytesBufferValue) deserialized).getValue(); assertThat(deserializedBytes).isEqualTo(testBytes); } - + @Test void testBytesHandlerWithBufferValue() throws ImprintException { var handler = TypeHandler.BYTES; byte[] testBytes = {10, 20, 30, 40}; var bufferValue = Value.fromBytesBuffer(ByteBuffer.wrap(testBytes).asReadOnlyBuffer()); - + int expectedSize = com.imprint.util.VarInt.encodedLength(testBytes.length) + testBytes.length; - + // Size estimation assertThat(handler.estimateSize(bufferValue)).isEqualTo(expectedSize); - + // Round-trip test var buffer = ByteBuffer.allocate(expectedSize + 20).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(bufferValue, buffer); - + buffer.flip(); var deserialized = handler.deserialize(buffer); - + byte[] deserializedBytes = ((Value.BytesBufferValue) deserialized).getValue(); assertThat(deserializedBytes).isEqualTo(testBytes); } - + @Test void testStringHandlerWithBufferValue() throws ImprintException { var handler = TypeHandler.STRING; String testString = "zero-copy string test"; byte[] utf8Bytes = testString.getBytes(java.nio.charset.StandardCharsets.UTF_8); var bufferValue = Value.fromStringBuffer(ByteBuffer.wrap(utf8Bytes).asReadOnlyBuffer()); - + int expectedSize = com.imprint.util.VarInt.encodedLength(utf8Bytes.length) + utf8Bytes.length; - + // Size estimation assertThat(handler.estimateSize(bufferValue)).isEqualTo(expectedSize); - + // Round-trip test var buffer = ByteBuffer.allocate(expectedSize + 20).order(ByteOrder.LITTLE_ENDIAN); handler.serialize(bufferValue, buffer); - + buffer.flip(); var deserialized = handler.deserialize(buffer); - + String deserializedString = ((Value.StringBufferValue) deserialized).getValue(); assertThat(deserializedString).isEqualTo(testString); } - + @Test void testBoolHandlerInvalidValue() { var handler = TypeHandler.BOOL; var buffer = ByteBuffer.allocate(10); buffer.put((byte) 2); // Invalid boolean value buffer.flip(); - + assertThatThrownBy(() -> handler.deserialize(buffer)) - .isInstanceOf(ImprintException.class) - .hasMessageContaining("Invalid boolean value: 2"); + .isInstanceOf(ImprintException.class) + .hasMessageContaining("Invalid boolean value: 2"); } - + @Test void testHandlerBufferUnderflow() { // Test that handlers properly detect buffer underflow var int32Handler = TypeHandler.INT32; var buffer = ByteBuffer.allocate(2); // Too small for int32 - + assertThatThrownBy(() -> int32Handler.deserialize(buffer)) - .isInstanceOf(ImprintException.class) - .hasMessageContaining("Not enough bytes for int32"); + .isInstanceOf(ImprintException.class) + .hasMessageContaining("Not enough bytes for int32"); } } \ No newline at end of file diff --git a/src/test/java/com/imprint/types/ValueTest.java b/src/test/java/com/imprint/types/ValueTest.java index c400a44..b092bb7 100644 --- a/src/test/java/com/imprint/types/ValueTest.java +++ b/src/test/java/com/imprint/types/ValueTest.java @@ -12,143 +12,143 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy; class ValueTest { - + @Test void shouldCreateNullValue() { Value value = Value.nullValue(); - + assertThat(value).isInstanceOf(Value.NullValue.class); assertThat(value.getTypeCode()).isEqualTo(TypeCode.NULL); assertThat(value.toString()).isEqualTo("null"); } - + @Test void shouldCreateBooleanValues() { Value trueValue = Value.fromBoolean(true); Value falseValue = Value.fromBoolean(false); - + assertThat(trueValue).isInstanceOf(Value.BoolValue.class); assertThat(((Value.BoolValue) trueValue).getValue()).isTrue(); assertThat(trueValue.getTypeCode()).isEqualTo(TypeCode.BOOL); - + assertThat(falseValue).isInstanceOf(Value.BoolValue.class); assertThat(((Value.BoolValue) falseValue).getValue()).isFalse(); assertThat(falseValue.getTypeCode()).isEqualTo(TypeCode.BOOL); } - + @Test void shouldCreateNumericValues() { var int32 = Value.fromInt32(42); var int64 = Value.fromInt64(123456789L); var float32 = Value.fromFloat32(3.14f); var float64 = Value.fromFloat64(2.718281828); - + assertThat(int32.getTypeCode()).isEqualTo(TypeCode.INT32); assertThat(((Value.Int32Value) int32).getValue()).isEqualTo(42); - + assertThat(int64.getTypeCode()).isEqualTo(TypeCode.INT64); assertThat(((Value.Int64Value) int64).getValue()).isEqualTo(123456789L); - + assertThat(float32.getTypeCode()).isEqualTo(TypeCode.FLOAT32); assertThat(((Value.Float32Value) float32).getValue()).isEqualTo(3.14f); - + assertThat(float64.getTypeCode()).isEqualTo(TypeCode.FLOAT64); assertThat(((Value.Float64Value) float64).getValue()).isEqualTo(2.718281828); } - + @Test void shouldCreateBytesAndStringValues() { byte[] bytes = {1, 2, 3, 4}; var bytesValue = Value.fromBytes(bytes); var stringValue = Value.fromString("hello"); - + assertThat(bytesValue.getTypeCode()).isEqualTo(TypeCode.BYTES); assertThat(((Value.BytesValue) bytesValue).getValue()).isEqualTo(bytes); - + assertThat(stringValue.getTypeCode()).isEqualTo(TypeCode.STRING); assertThat(((Value.StringValue) stringValue).getValue()).isEqualTo("hello"); } - + @Test void shouldCreateArrayValues() { List elements = Arrays.asList( - Value.fromInt32(1), - Value.fromInt32(2), - Value.fromInt32(3) + Value.fromInt32(1), + Value.fromInt32(2), + Value.fromInt32(3) ); Value arrayValue = Value.fromArray(elements); - + assertThat(arrayValue.getTypeCode()).isEqualTo(TypeCode.ARRAY); assertThat(((Value.ArrayValue) arrayValue).getValue()).isEqualTo(elements); } - + @Test void shouldCreateMapValues() { var map = new HashMap(); map.put(MapKey.fromString("key1"), Value.fromInt32(1)); map.put(MapKey.fromString("key2"), Value.fromInt32(2)); - + Value mapValue = Value.fromMap(map); - + assertThat(mapValue.getTypeCode()).isEqualTo(TypeCode.MAP); assertThat(((Value.MapValue) mapValue).getValue()).isEqualTo(map); } - + @Test void shouldHandleEqualityCorrectly() { var int1 = Value.fromInt32(42); var int2 = Value.fromInt32(42); var int3 = Value.fromInt32(43); - + assertThat(int1).isEqualTo(int2); assertThat(int1).isNotEqualTo(int3); assertThat(int1.hashCode()).isEqualTo(int2.hashCode()); } - + @Test void shouldRejectNullString() { assertThatThrownBy(() -> Value.fromString(null)) - .isInstanceOf(NullPointerException.class); + .isInstanceOf(NullPointerException.class); } - + @Test void shouldCreateStringBufferValue() { String testString = "hello world"; byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); - + Value stringBufferValue = Value.fromStringBuffer(buffer); - + assertThat(stringBufferValue).isInstanceOf(Value.StringBufferValue.class); assertThat(stringBufferValue.getTypeCode()).isEqualTo(TypeCode.STRING); assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(testString); } - + @Test void shouldCreateBytesBufferValue() { byte[] testBytes = {1, 2, 3, 4, 5}; ByteBuffer buffer = ByteBuffer.wrap(testBytes); - + Value bytesBufferValue = Value.fromBytesBuffer(buffer); - + assertThat(bytesBufferValue).isInstanceOf(Value.BytesBufferValue.class); assertThat(bytesBufferValue.getTypeCode()).isEqualTo(TypeCode.BYTES); assertThat(((Value.BytesBufferValue) bytesBufferValue).getValue()).isEqualTo(testBytes); } - + @Test void shouldHandleStringBufferValueFastPath() { // Array-backed buffer with arrayOffset() == 0 should use fast path String testString = "fast path test"; byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); - + Value stringBufferValue = Value.fromStringBuffer(buffer); - + // Should work correctly regardless of path taken assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(testString); } - + @Test void shouldHandleStringBufferValueFallbackPath() { // Sliced buffer will have non-zero arrayOffset, forcing fallback path @@ -156,62 +156,62 @@ void shouldHandleStringBufferValueFallbackPath() { byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); ByteBuffer sliced = buffer.slice(); // This may break arrayOffset() == 0 - + Value stringBufferValue = Value.fromStringBuffer(sliced); - + // Should work correctly regardless of path taken assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(testString); } - + @Test void shouldHandleLargeStringWithoutCaching() { // Create string > 1KB to test the no-cache path String largeString = "x".repeat(2000); byte[] utf8Bytes = largeString.getBytes(StandardCharsets.UTF_8); ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes).slice(); // Force fallback path - + Value stringBufferValue = Value.fromStringBuffer(buffer); - + assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(largeString); } - + @Test void shouldCacheStringDecoding() { String testString = "cache test"; byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); - + Value.StringBufferValue stringBufferValue = (Value.StringBufferValue) Value.fromStringBuffer(buffer); - + // First call should decode and cache String result1 = stringBufferValue.getValue(); // Second call should return cached value String result2 = stringBufferValue.getValue(); - + assertThat(result1).isEqualTo(testString); assertThat(result2).isEqualTo(testString); assertThat(result1).isSameAs(result2); // Should be same object reference due to caching } - + @Test void shouldHandleStringValueEquality() { String testString = "equality test"; - + Value stringValue = Value.fromString(testString); Value stringBufferValue = Value.fromStringBuffer(ByteBuffer.wrap(testString.getBytes(StandardCharsets.UTF_8))); - + assertThat(stringValue).isEqualTo(stringBufferValue); assertThat(stringBufferValue).isEqualTo(stringValue); assertThat(stringValue.hashCode()).isEqualTo(stringBufferValue.hashCode()); } - + @Test void shouldHandleBytesValueEquality() { byte[] testBytes = {1, 2, 3, 4, 5}; - + Value bytesValue = Value.fromBytes(testBytes); Value bytesBufferValue = Value.fromBytesBuffer(ByteBuffer.wrap(testBytes)); - + assertThat(bytesValue).isEqualTo(bytesBufferValue); assertThat(bytesBufferValue).isEqualTo(bytesValue); } From 996780e0ef79e3c15079bada6d8cf1477925ce37 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Tue, 3 Jun 2025 18:57:15 -0400 Subject: [PATCH 16/53] minor style fixes again --- src/main/java/com/imprint/core/ImprintRecord.java | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index dd66389..5d4719f 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -41,23 +41,16 @@ public ImprintRecord(Header header, List directory, ByteBuffer p /** * Get a value by field ID, deserializing it on demand. * Returns null if the field is not found. - * Note: If the field exists and is an explicit NULL type, this will return Value.NullValue.INSTANCE. + * Note: If the field exists and is an explicit NULL type, this will return Value.NullValue.INSTANCE */ public Value getValue(int fieldId) throws ImprintException { var fieldBuffer = getFieldBuffer(fieldId); if (fieldBuffer == null) { - // If getFieldBuffer returns null, it means the fieldId was not in the directory, - // or an issue occurred slicing the payload (e.g., bad offsets). return null; } - // findDirectoryIndex should not be negative here if fieldBuffer is not null, - // but a defensive check or ensuring findDirectoryIndex is called once is good. - // For simplicity, assume getFieldBuffer implies a valid index. int directoryIndex = findDirectoryIndex(fieldId); if (directoryIndex < 0) { - // This case should ideally be caught by getFieldBuffer returning null. - // If it happens, indicates an inconsistency. throw new ImprintException(ErrorType.INTERNAL_ERROR, "Field ID " + fieldId + " found buffer but not in directory."); } var entry = directory.get(directoryIndex); @@ -293,7 +286,7 @@ private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws Impr } private T getTypedValueOrThrow(int fieldId, TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) throws ImprintException { - Value value = getValue(fieldId); + var value = getValue(fieldId); if (value == null) { throw new ImprintException(ErrorType.FIELD_NOT_FOUND, From 4f6f17854f000ca9d41181cb7b67b3cfd54d5c46 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Tue, 3 Jun 2025 19:00:32 -0400 Subject: [PATCH 17/53] minor style fixes on benchmark tests and supress unused --- src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java | 6 +----- src/jmh/java/com/imprint/benchmark/StringBenchmark.java | 1 + 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index 152bb6d..8163522 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -103,8 +103,6 @@ public void deserializeKryo(Blackhole bh) { // ===== FIELD ACCESS BENCHMARKS ===== // Tests accessing a single field near the end of a large record - // This showcases Imprint's O(1) directory lookup vs sequential deserialization - @Benchmark public void singleFieldAccessImprint(Blackhole bh) throws Exception { ImprintRecord record = ImprintRecord.deserialize(imprintBytes.duplicate()); @@ -158,11 +156,9 @@ public void measureKryoSize(Blackhole bh) { @Benchmark public void mergeImprint(Blackhole bh) throws Exception { - // Simulate merge with Imprint (O(1) with proper API) var record1 = serializeWithImprint(testData); var record2 = serializeWithImprint(createTestRecord2()); - - // Current simulation - will be O(1) with actual merge API + var deserialized1 = ImprintRecord.deserialize(record1); var deserialized2 = ImprintRecord.deserialize(record2); var merged = simulateMerge(deserialized1, deserialized2); diff --git a/src/jmh/java/com/imprint/benchmark/StringBenchmark.java b/src/jmh/java/com/imprint/benchmark/StringBenchmark.java index e156c8c..045940e 100644 --- a/src/jmh/java/com/imprint/benchmark/StringBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/StringBenchmark.java @@ -16,6 +16,7 @@ @Fork(1) @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) +@SuppressWarnings("unused") public class StringBenchmark { private static final SchemaId SCHEMA_ID = new SchemaId(1, 42); From 4827a4ee0fc1aa421174b1c552acd1885720888d Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Wed, 4 Jun 2025 01:09:26 -0400 Subject: [PATCH 18/53] minor reordering --- .../java/com/imprint/types/TypeHandler.java | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/main/java/com/imprint/types/TypeHandler.java b/src/main/java/com/imprint/types/TypeHandler.java index d21403b..be4fc7b 100644 --- a/src/main/java/com/imprint/types/TypeHandler.java +++ b/src/main/java/com/imprint/types/TypeHandler.java @@ -19,6 +19,13 @@ public interface TypeHandler { int estimateSize(Value value) throws ImprintException; ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException; + + + @FunctionalInterface + interface BufferViewer { + int measureDataLength(ByteBuffer tempBuffer, int numElements) throws ImprintException; + } + // Helper method to eliminate duplication in ARRAY/MAP readValueBytes static ByteBuffer readComplexValueBytes(ByteBuffer buffer, String typeName, BufferViewer measurer) throws ImprintException { int initialPosition = buffer.position(); @@ -31,9 +38,9 @@ static ByteBuffer readComplexValueBytes(ByteBuffer buffer, String typeName, Buff if (numElements == 0) { if (buffer.remaining() < varIntLength) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for empty " + typeName + " VarInt. Needed: " + - varIntLength + ", available: " + buffer.remaining()); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for empty " + typeName + " VarInt. Needed: " + + varIntLength + ", available: " + buffer.remaining()); } ByteBuffer valueSlice = buffer.slice(); valueSlice.limit(varIntLength); @@ -43,24 +50,19 @@ static ByteBuffer readComplexValueBytes(ByteBuffer buffer, String typeName, Buff int dataLength = measurer.measureDataLength(tempBuffer, numElements); int totalLength = varIntLength + dataLength; - + if (buffer.remaining() < totalLength) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for " + typeName + " value. Needed: " + totalLength + - ", available: " + buffer.remaining() + " at position " + initialPosition); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for " + typeName + " value. Needed: " + totalLength + + ", available: " + buffer.remaining() + " at position " + initialPosition); } - + ByteBuffer valueSlice = buffer.slice(); valueSlice.limit(totalLength); buffer.position(initialPosition + totalLength); return valueSlice.asReadOnlyBuffer(); } - @FunctionalInterface - interface BufferViewer { - int measureDataLength(ByteBuffer tempBuffer, int numElements) throws ImprintException; - } - // Static implementations for each type TypeHandler NULL = new TypeHandler() { @Override From 876be166b0a7db1e2e6f9b46dbc8efbf867873c1 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 5 Jun 2025 10:19:37 -0400 Subject: [PATCH 19/53] Full comprehensive comparison tests with a lot of other libraries + some micro-optimizations added that were found along the way --- .github/workflows/ci.yml | 244 ++++++- build.gradle | 265 ++++++- .../benchmark/ComparisonBenchmark.java | 661 +++++++++++++++--- .../java/com/imprint/core/ImprintRecord.java | 12 +- .../java/com/imprint/types/TypeHandler.java | 84 ++- src/main/java/com/imprint/types/Value.java | 36 +- .../com/imprint/benchmark/ProfilerTest.java | 226 ------ .../com/imprint/profile/ProfilerTest.java | 5 +- 8 files changed, 1135 insertions(+), 398 deletions(-) delete mode 100644 src/test/java/com/imprint/benchmark/ProfilerTest.java diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d4c8bde..62ac6f5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,30 +15,220 @@ jobs: java-version: [11, 17, 21] steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up JDK ${{ matrix.java-version }} - uses: actions/setup-java@v4 - with: - java-version: ${{ matrix.java-version }} - distribution: 'temurin' - - - name: Cache Gradle dependencies - uses: actions/cache@v4 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: | - ${{ runner.os }}-gradle- - - - name: Make gradlew executable - run: chmod +x ./gradlew - - - name: Run tests - run: ./gradlew test - - - name: Run build - run: ./gradlew build \ No newline at end of file + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up JDK ${{ matrix.java-version }} + uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.java-version }} + distribution: 'temurin' + + - name: Cache Gradle dependencies + uses: actions/cache@v4 + with: + path: | + ~/.gradle/caches + ~/.gradle/wrapper + key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} + restore-keys: | + ${{ runner.os }}-gradle- + + - name: Make gradlew executable + run: chmod +x ./gradlew + + - name: Run tests + run: ./gradlew test + + - name: Run build + run: ./gradlew build + + benchmark: + runs-on: ubuntu-latest + needs: test + # Only run benchmarks on main branch pushes and PRs to main to avoid excessive CI time + if: github.ref == 'refs/heads/main' || github.base_ref == 'main' + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + java-version: '11' + distribution: 'temurin' + + - name: Cache Gradle dependencies + uses: actions/cache@v4 + with: + path: | + ~/.gradle/caches + ~/.gradle/wrapper + key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} + restore-keys: | + ${{ runner.os }}-gradle- + + - name: Make gradlew executable + run: chmod +x ./gradlew + + - name: Create benchmark results directory + run: mkdir -p benchmark-results + + - name: Run serialization benchmarks + run: | + ./gradlew jmhRunSerializationBenchmarks + continue-on-error: true + + - name: Run deserialization benchmarks + run: | + ./gradlew jmhRunDeserializationBenchmarks + continue-on-error: true + + - name: Run field access benchmarks + run: | + ./gradlew jmhRunFieldAccessBenchmarks + continue-on-error: true + + - name: Run size comparison benchmarks + run: | + ./gradlew jmhRunSizeComparisonBenchmarks + continue-on-error: true + + - name: Upload benchmark results + uses: actions/upload-artifact@v4 + if: always() + with: + name: benchmark-results-${{ github.sha }} + path: benchmark-results/ + retention-days: 30 + + - name: Comment benchmark results on PR + if: github.event_name == 'pull_request' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const path = require('path'); + + // Find the latest benchmark results file + const resultsDir = 'benchmark-results'; + let latestFile = null; + let latestTime = 0; + + if (fs.existsSync(resultsDir)) { + const files = fs.readdirSync(resultsDir); + for (const file of files) { + if (file.endsWith('.json')) { + const filePath = path.join(resultsDir, file); + const stats = fs.statSync(filePath); + if (stats.mtime.getTime() > latestTime) { + latestTime = stats.mtime.getTime(); + latestFile = filePath; + } + } + } + } + + if (latestFile) { + const results = JSON.parse(fs.readFileSync(latestFile, 'utf8')); + + // Group results by benchmark type + const serialization = results.filter(r => r.benchmark.includes('serialize')); + const deserialization = results.filter(r => r.benchmark.includes('deserialize')); + const fieldAccess = results.filter(r => r.benchmark.includes('singleFieldAccess')); + const sizes = results.filter(r => r.benchmark.includes('measure')); + + // Format results into a table + const formatResults = (benchmarks, title) => { + if (benchmarks.length === 0) return ''; + + let table = `\n### ${title}\n\n| Library | Score (ns/op) | Error | Unit |\n|---------|---------------|-------|------|\n`; + + benchmarks + .sort((a, b) => a.primaryMetric.score - b.primaryMetric.score) + .forEach(benchmark => { + const name = benchmark.benchmark.split('.').pop().replace(/serialize|deserialize|singleFieldAccess|measure/, '').replace(/Imprint|JacksonJson|Kryo|MessagePack|Avro|Protobuf|FlatBuffers/, (match) => match); + const score = benchmark.primaryMetric.score.toFixed(2); + const error = benchmark.primaryMetric.scoreError.toFixed(2); + const unit = benchmark.primaryMetric.scoreUnit; + table += `| ${name} | ${score} | ±${error} | ${unit} |\n`; + }); + + return table; + }; + + const comment = `##Benchmark Results + + Benchmark comparison between Imprint and other serialization libraries: + ${formatResults(serialization, 'Serialization Performance')} + ${formatResults(deserialization, 'Deserialization Performance')} + ${formatResults(fieldAccess, 'Single Field Access Performance')} + ${formatResults(sizes, 'Serialized Size Comparison')} + +
+ View detailed results + + Results generated from commit: \`${context.sha.substring(0, 7)}\` + + Lower scores are better for performance benchmarks. + +
`; + + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: comment + }); + } else { + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: '## Benchmark Results\n\nBenchmark execution completed but no results file was found. Check the [workflow logs](' + + `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}` + ') for details.' + }); + } + + # Optional: Run full benchmark suite on releases + benchmark-full: + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/') + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + java-version: '11' + distribution: 'temurin' + + - name: Cache Gradle dependencies + uses: actions/cache@v4 + with: + path: | + ~/.gradle/caches + ~/.gradle/wrapper + key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} + restore-keys: | + ${{ runner.os }}-gradle- + + - name: Make gradlew executable + run: chmod +x ./gradlew + + - name: Create benchmark results directory + run: mkdir -p benchmark-results + + - name: Run full benchmark suite + run: | + ./gradlew jmh + + - name: Upload full benchmark results + uses: actions/upload-artifact@v4 + with: + name: full-benchmark-results-${{ github.ref_name }} + path: benchmark-results/ + retention-days: 90 \ No newline at end of file diff --git a/build.gradle b/build.gradle index 2606710..8406676 100644 --- a/build.gradle +++ b/build.gradle @@ -2,6 +2,8 @@ plugins { id 'java-library' id 'maven-publish' id 'me.champeau.jmh' version '0.7.2' + id 'com.google.protobuf' version '0.9.4' + id 'io.netifi.flatbuffers' version '1.0.7' } group = 'com.imprint' @@ -23,31 +25,141 @@ dependencies { // Lombok for reducing boilerplate compileOnly 'org.projectlombok:lombok:1.18.30' annotationProcessor 'org.projectlombok:lombok:1.18.30' - + // Test dependencies testImplementation 'org.junit.jupiter:junit-jupiter:5.10.0' testImplementation 'org.assertj:assertj-core:3.24.2' testImplementation 'org.mockito:mockito-core:5.5.0' - + // Lombok for tests testCompileOnly 'org.projectlombok:lombok:1.18.30' testAnnotationProcessor 'org.projectlombok:lombok:1.18.30' - + // Performance testing with JMH jmhImplementation 'org.openjdk.jmh:jmh-core:1.37' jmhAnnotationProcessor 'org.openjdk.jmh:jmh-generator-annprocess:1.37' - - // Competitor libraries for benchmarking + + // Suppress SLF4J warnings + jmhImplementation 'org.slf4j:slf4j-nop:1.7.36' + + // Competitor libraries for benchmarking (JMH only) jmhImplementation 'com.google.protobuf:protobuf-java:3.25.1' jmhImplementation 'org.apache.avro:avro:1.11.3' jmhImplementation 'com.fasterxml.jackson.core:jackson-databind:2.16.0' jmhImplementation 'com.google.flatbuffers:flatbuffers-java:23.5.26' jmhImplementation 'com.esotericsoftware:kryo:5.4.0' + jmhImplementation 'org.msgpack:msgpack-core:0.9.8' + jmhImplementation 'org.msgpack:jackson-dataformat-msgpack:0.9.8' +} + +protobuf { + protoc { + artifact = "com.google.protobuf:protoc:3.25.1" + } + generateProtoTasks { + // Only generate for JMH, not main + all().each { task -> + task.enabled = false + } + ofSourceSet('jmh').each { task -> + task.enabled = true + task.builtins { + java { + outputSubDir = 'java' + } + } + } + } +} + +// Download and setup FlatBuffers compiler for Linux (CI environment) +task downloadFlatc { + description = 'Download FlatBuffers compiler' + group = 'build setup' + + def flatcDir = file("${buildDir}/flatc") + def flatcExe = file("${flatcDir}/flatc") + def isWindows = System.getProperty('os.name').toLowerCase().contains('windows') + def flatcUrl = isWindows ? + "https://github.com/google/flatbuffers/releases/download/v23.5.26/Windows.flatc.binary.zip" : + "https://github.com/google/flatbuffers/releases/download/v23.5.26/Linux.flatc.binary.clang++-12.zip" + def flatcZip = file("${buildDir}/flatc.zip") + + outputs.file(flatcExe) + + doLast { + if (!flatcExe.exists()) { + println "Downloading FlatBuffers compiler for ${isWindows ? 'Windows' : 'Linux'}..." + flatcDir.mkdirs() + + // Download + new URL(flatcUrl).withInputStream { i -> + flatcZip.withOutputStream { it << i } + } + + // Extract + copy { + from zipTree(flatcZip) + into flatcDir + } + + // Make executable on Unix systems + if (!isWindows) { + exec { + commandLine 'chmod', '+x', flatcExe.absolutePath + } + } + + flatcZip.delete() + println "FlatBuffers compiler downloaded to: ${flatcExe}" + } + } +} + +// Generate FlatBuffers sources +task generateFlatBuffers(type: Exec) { + dependsOn downloadFlatc + description = 'Generate Java classes from FlatBuffers schema' + group = 'build' + + def isWindows = System.getProperty('os.name').toLowerCase().contains('windows') + def flatcExe = file("${buildDir}/flatc/${isWindows ? 'flatc.exe' : 'flatc'}") + def schemaFile = file('src/jmh/flatbuffers/test_record.fbs') + def outputDir = file('build/generated/source/flatbuffers/jmh/java') + + commandLine flatcExe.absolutePath, '--java', '-o', outputDir.absolutePath, schemaFile.absolutePath + + inputs.file(schemaFile) + outputs.dir(outputDir) + + doFirst { + outputDir.mkdirs() + } +} + +// Add generated FlatBuffers sources to JMH source set +sourceSets { + jmh { + java { + srcDir 'build/generated/source/flatbuffers/jmh/java' + } + proto { + srcDir 'src/jmh/proto' + } + } +} + +// Make JMH compilation depend on FlatBuffers generation +compileJmhJava.dependsOn generateFlatBuffers + +// Handle duplicate proto files +tasks.named('processJmhResources') { + duplicatesStrategy = DuplicatesStrategy.EXCLUDE } test { useJUnitPlatform() - + // Enable detailed test output testLogging { events "passed", "skipped", "failed" @@ -57,11 +169,146 @@ test { // JMH configuration jmh { fork = 1 - warmupIterations = 3 - iterations = 3 + warmupIterations = 2 // Reduced for faster CI + iterations = 3 // Reduced for faster CI resultFormat = 'JSON' includeTests = false resultsFile = file("${projectDir}/benchmark-results/jmh-results-${new Date().format('yyyy-MM-dd-HHmmss')}.json") + + // Java 11 specific JVM args + jvmArgs = [ + '-XX:+UseG1GC', + '-Xmx2g', + '-XX:+UnlockExperimentalVMOptions', + '-XX:+UseJVMCICompiler' + ] +} + +// Create individual benchmark tasks for CI pipeline +task jmhRunSerializationBenchmarks(type: JavaExec) { + dependsOn compileJmhJava + description = 'Run serialization benchmarks' + group = 'benchmarking' + + classpath = sourceSets.jmh.runtimeClasspath + mainClass = 'com.imprint.benchmark.ComparisonBenchmark' + args = ['runSerializationBenchmarks'] + + // Java 11 optimized JVM settings + jvmArgs = [ + '-XX:+UseG1GC', + '-Xmx2g', + '-XX:+UnlockExperimentalVMOptions' + ] + + doFirst { + file("${projectDir}/benchmark-results").mkdirs() + } +} + +task jmhRunDeserializationBenchmarks(type: JavaExec) { + dependsOn compileJmhJava + description = 'Run deserialization benchmarks' + group = 'benchmarking' + + classpath = sourceSets.jmh.runtimeClasspath + mainClass = 'com.imprint.benchmark.ComparisonBenchmark' + args = ['runDeserializationBenchmarks'] + + // Java 11 optimized JVM settings + jvmArgs = [ + '-XX:+UseG1GC', + '-Xmx2g', + '-XX:+UnlockExperimentalVMOptions' + ] + + doFirst { + file("${projectDir}/benchmark-results").mkdirs() + } +} + +task jmhRunFieldAccessBenchmarks(type: JavaExec) { + dependsOn compileJmhJava + description = 'Run field access benchmarks' + group = 'benchmarking' + + classpath = sourceSets.jmh.runtimeClasspath + mainClass = 'com.imprint.benchmark.ComparisonBenchmark' + args = ['runFieldAccessBenchmarks'] + + // Java 11 optimized JVM settings + jvmArgs = [ + '-XX:+UseG1GC', + '-Xmx2g', + '-XX:+UnlockExperimentalVMOptions' + ] + + doFirst { + file("${projectDir}/benchmark-results").mkdirs() + } +} + +task jmhRunSizeComparisonBenchmarks(type: JavaExec) { + dependsOn compileJmhJava + description = 'Run size comparison benchmarks' + group = 'benchmarking' + + classpath = sourceSets.jmh.runtimeClasspath + mainClass = 'com.imprint.benchmark.ComparisonBenchmark' + args = ['runSizeComparisonBenchmarks'] + + // Java 11 optimized JVM settings + jvmArgs = [ + '-XX:+UseG1GC', + '-Xmx2g', + '-XX:+UnlockExperimentalVMOptions' + ] + + doFirst { + file("${projectDir}/benchmark-results").mkdirs() + } +} + +task jmhRunMergeBenchmarks(type: JavaExec) { + dependsOn compileJmhJava + description = 'Run merge operation benchmarks' + group = 'benchmarking' + + classpath = sourceSets.jmh.runtimeClasspath + mainClass = 'com.imprint.benchmark.ComparisonBenchmark' + args = ['runMergeBenchmarks'] + + // Java 11 optimized JVM settings + jvmArgs = [ + '-XX:+UseG1GC', + '-Xmx2g', + '-XX:+UnlockExperimentalVMOptions' + ] + + doFirst { + file("${projectDir}/benchmark-results").mkdirs() + } +} + +task jmhRunAllBenchmarks(type: JavaExec) { + dependsOn compileJmhJava + description = 'Run all comparison benchmarks' + group = 'benchmarking' + + classpath = sourceSets.jmh.runtimeClasspath + mainClass = 'com.imprint.benchmark.ComparisonBenchmark' + args = ['runAll'] + + // Java 11 optimized JVM settings + jvmArgs = [ + '-XX:+UseG1GC', + '-Xmx2g', + '-XX:+UnlockExperimentalVMOptions' + ] + + doFirst { + file("${projectDir}/benchmark-results").mkdirs() + } } compileJava { @@ -75,4 +322,4 @@ javadoc { } // Don't fail build on missing javadoc options.addStringOption('Xdoclint:none', '-quiet') -} +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index 8163522..49260b1 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -4,13 +4,25 @@ import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.flatbuffers.FlatBufferBuilder; import com.imprint.core.ImprintRecord; import com.imprint.core.ImprintWriter; import com.imprint.core.SchemaId; import com.imprint.types.MapKey; import com.imprint.types.Value; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.*; +import org.msgpack.jackson.dataformat.MessagePackFactory; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -25,38 +37,56 @@ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) -@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(1) +@SuppressWarnings("unused") public class ComparisonBenchmark { // Test data private TestRecord testData; - + // Serialized formats - private ByteBuffer imprintBytes; - private byte[] jacksonBytes; + private ByteBuffer imprintBytesBuffer; + private byte[] jacksonJsonBytes; private byte[] kryoBytes; - + private byte[] messagePackBytes; + private byte[] avroBytes; + private byte[] protobufBytes; + private ByteBuffer flatbuffersBytes; + // Library instances - private ObjectMapper jackson; + private Schema avroSchema; + private DatumWriter avroWriter; + private DatumReader avroReader; + private ObjectMapper jacksonJsonMapper; private Kryo kryo; + private ObjectMapper messagePackMapper; @Setup public void setup() throws Exception { testData = createTestRecord(); - + // Initialize libraries - jackson = new ObjectMapper(); + jacksonJsonMapper = new ObjectMapper(); kryo = new Kryo(); kryo.register(TestRecord.class); kryo.register(ArrayList.class); kryo.register(HashMap.class); - + kryo.register(Arrays.asList().getClass()); + + // Initialize MessagePack ObjectMapper + messagePackMapper = new ObjectMapper(new MessagePackFactory()); + setupAvro(); + // Pre-serialize for deserialization benchmarks - imprintBytes = serializeWithImprint(testData); - jacksonBytes = serializeWithJackson(testData); + imprintBytesBuffer = serializeWithImprint(testData); + jacksonJsonBytes = serializeWithJacksonJson(testData); kryoBytes = serializeWithKryo(testData); + messagePackBytes = serializeWithMessagePack(testData); + avroBytes = serializeWithAvro(testData); + protobufBytes = serializeWithProtobuf(testData); + flatbuffersBytes = serializeWithFlatBuffers(testData); } // ===== SERIALIZATION BENCHMARKS ===== @@ -68,8 +98,8 @@ public void serializeImprint(Blackhole bh) throws Exception { } @Benchmark - public void serializeJackson(Blackhole bh) throws Exception { - byte[] result = serializeWithJackson(testData); + public void serializeJacksonJson(Blackhole bh) throws Exception { + byte[] result = serializeWithJacksonJson(testData); bh.consume(result); } @@ -79,17 +109,41 @@ public void serializeKryo(Blackhole bh) { bh.consume(result); } + @Benchmark + public void serializeMessagePack(Blackhole bh) throws Exception { + byte[] result = serializeWithMessagePack(testData); + bh.consume(result); + } + + @Benchmark + public void serializeAvro(Blackhole bh) throws Exception { + byte[] result = serializeWithAvro(testData); + bh.consume(result); + } + + @Benchmark + public void serializeProtobuf(Blackhole bh) { + byte[] result = serializeWithProtobuf(testData); + bh.consume(result); + } + + @Benchmark + public void serializeFlatBuffers(Blackhole bh) { + ByteBuffer result = serializeWithFlatBuffers(testData); + bh.consume(result); + } + // ===== DESERIALIZATION BENCHMARKS ===== @Benchmark public void deserializeImprint(Blackhole bh) throws Exception { - ImprintRecord result = ImprintRecord.deserialize(imprintBytes.duplicate()); + ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); bh.consume(result); } @Benchmark - public void deserializeJackson(Blackhole bh) throws Exception { - TestRecord result = jackson.readValue(jacksonBytes, TestRecord.class); + public void deserializeJacksonJson(Blackhole bh) throws Exception { + TestRecord result = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); bh.consume(result); } @@ -101,135 +155,361 @@ public void deserializeKryo(Blackhole bh) { bh.consume(result); } + @Benchmark + public void deserializeMessagePack(Blackhole bh) throws Exception { + TestRecord result = messagePackMapper.readValue(messagePackBytes, TestRecord.class); + bh.consume(result); + } + + @Benchmark + public void deserializeAvro(Blackhole bh) throws Exception { + GenericRecord result = deserializeWithAvro(avroBytes); + bh.consume(result); + } + + @Benchmark + public void deserializeProtobuf(Blackhole bh) throws Exception { + TestRecordProto.TestRecord result = TestRecordProto.TestRecord.parseFrom(protobufBytes); + bh.consume(result); + } + + @Benchmark + public void deserializeFlatBuffers(Blackhole bh) { + TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); + bh.consume(result); + } + // ===== FIELD ACCESS BENCHMARKS ===== // Tests accessing a single field near the end of a large record + // This showcases Imprint's O(1) directory lookup vs sequential deserialization + @Benchmark public void singleFieldAccessImprint(Blackhole bh) throws Exception { - ImprintRecord record = ImprintRecord.deserialize(imprintBytes.duplicate()); - - // Access field 15 directly via directory lookup - O(1) - var field15 = record.getValue(15); + ImprintRecord record = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); + var field15 = record.getString(15); bh.consume(field15); } @Benchmark - public void singleFieldAccessJackson(Blackhole bh) throws Exception { - // Jackson must deserialize entire object to access any field - TestRecord record = jackson.readValue(jacksonBytes, TestRecord.class); - - // Access field15 equivalent (extraData[4]) after full deserialization + public void singleFieldAccessJacksonJson(Blackhole bh) throws Exception { + TestRecord record = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); bh.consume(record.extraData.get(4)); } @Benchmark public void singleFieldAccessKryo(Blackhole bh) { - // Kryo must deserialize entire object to access any field Input input = new Input(new ByteArrayInputStream(kryoBytes)); TestRecord record = kryo.readObject(input, TestRecord.class); input.close(); - - // Access field15 equivalent (extraData[4]) after full deserialization bh.consume(record.extraData.get(4)); } + @Benchmark + public void singleFieldAccessMessagePack(Blackhole bh) throws Exception { + TestRecord record = messagePackMapper.readValue(messagePackBytes, TestRecord.class); + bh.consume(record.extraData.get(4)); + } + + @Benchmark + public void singleFieldAccessAvro(Blackhole bh) throws Exception { + GenericRecord record = deserializeWithAvro(avroBytes); + bh.consume(record.get("extraData4")); // Accessing field near end + } + + @Benchmark + public void singleFieldAccessProtobuf(Blackhole bh) throws Exception { + TestRecordProto.TestRecord record = TestRecordProto.TestRecord.parseFrom(protobufBytes); + bh.consume(record.getExtraData(4)); // Accessing field near end + } + + @Benchmark + public void singleFieldAccessFlatBuffers(Blackhole bh) { + TestRecordFB record = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); + bh.consume(record.extraData(4)); // Accessing field near end - zero copy! + } + // ===== SIZE COMPARISON ===== @Benchmark - public void measureImprintSize(Blackhole bh) throws Exception { - ByteBuffer serialized = serializeWithImprint(testData); - bh.consume(serialized.remaining()); + public void measureImprintSize(Blackhole bh) { + bh.consume(imprintBytesBuffer.remaining()); } @Benchmark - public void measureJacksonSize(Blackhole bh) throws Exception { - byte[] serialized = serializeWithJackson(testData); - bh.consume(serialized.length); + public void measureJacksonJsonSize(Blackhole bh) { + bh.consume(jacksonJsonBytes.length); } @Benchmark public void measureKryoSize(Blackhole bh) { - byte[] serialized = serializeWithKryo(testData); - bh.consume(serialized.length); + bh.consume(kryoBytes.length); + } + + @Benchmark + public void measureMessagePackSize(Blackhole bh) { + bh.consume(messagePackBytes.length); + } + + @Benchmark + public void measureAvroSize(Blackhole bh) { + bh.consume(avroBytes.length); + } + + @Benchmark + public void measureProtobufSize(Blackhole bh) { + bh.consume(protobufBytes.length); + } + + @Benchmark + public void measureFlatBuffersSize(Blackhole bh) { + bh.consume(flatbuffersBytes.remaining()); } // ===== MERGE SIMULATION BENCHMARKS ===== @Benchmark public void mergeImprint(Blackhole bh) throws Exception { - var record1 = serializeWithImprint(testData); - var record2 = serializeWithImprint(createTestRecord2()); + var record1Buffer = imprintBytesBuffer.duplicate(); + var record2Data = createTestRecord2(); + var record2Buffer = serializeWithImprint(record2Data); - var deserialized1 = ImprintRecord.deserialize(record1); - var deserialized2 = ImprintRecord.deserialize(record2); + var deserialized1 = ImprintRecord.deserialize(record1Buffer); + var deserialized2 = ImprintRecord.deserialize(record2Buffer); var merged = simulateMerge(deserialized1, deserialized2); - + bh.consume(merged); } @Benchmark - public void mergeJackson(Blackhole bh) throws Exception { - // Jackson merge requires full deserialization + merge + serialization - var record1 = jackson.readValue(jacksonBytes, TestRecord.class); - var record2 = jackson.readValue(serializeWithJackson(createTestRecord2()), TestRecord.class); - - var merged = mergeTestRecords(record1, record2); - byte[] result = jackson.writeValueAsBytes(merged); - + public void mergeJacksonJson(Blackhole bh) throws Exception { + var record1 = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); + var record2Data = createTestRecord2(); + var record2Bytes = serializeWithJacksonJson(record2Data); + var record2 = jacksonJsonMapper.readValue(record2Bytes, TestRecord.class); + + var mergedPojo = mergeTestRecords(record1, record2); + byte[] result = jacksonJsonMapper.writeValueAsBytes(mergedPojo); bh.consume(result); } @Benchmark public void mergeKryo(Blackhole bh) { - // Kryo merge requires full deserialization + merge + serialization Input input1 = new Input(new ByteArrayInputStream(kryoBytes)); var record1 = kryo.readObject(input1, TestRecord.class); input1.close(); - - Input input2 = new Input(new ByteArrayInputStream(serializeWithKryo(createTestRecord2()))); + + var record2Data = createTestRecord2(); + var record2Bytes = serializeWithKryo(record2Data); + Input input2 = new Input(new ByteArrayInputStream(record2Bytes)); var record2 = kryo.readObject(input2, TestRecord.class); input2.close(); - - var merged = mergeTestRecords(record1, record2); - byte[] result = serializeWithKryo(merged); - + + var mergedPojo = mergeTestRecords(record1, record2); + byte[] result = serializeWithKryo(mergedPojo); + bh.consume(result); + } + + @Benchmark + public void mergeMessagePack(Blackhole bh) throws Exception { + var record1 = messagePackMapper.readValue(messagePackBytes, TestRecord.class); + var record2Data = createTestRecord2(); + var record2Bytes = serializeWithMessagePack(record2Data); + var record2 = messagePackMapper.readValue(record2Bytes, TestRecord.class); + + var mergedPojo = mergeTestRecords(record1, record2); + byte[] result = messagePackMapper.writeValueAsBytes(mergedPojo); bh.consume(result); } + @Benchmark + public void mergeAvro(Blackhole bh) throws Exception { + var record1 = deserializeWithAvro(avroBytes); + var record2Data = createTestRecord2(); + var record2Bytes = serializeWithAvro(record2Data); + var record2 = deserializeWithAvro(record2Bytes); + + var merged = mergeAvroRecords(record1, record2); + byte[] result = serializeAvroRecord(merged); + bh.consume(result); + } + + @Benchmark + public void mergeProtobuf(Blackhole bh) throws Exception { + var record1 = TestRecordProto.TestRecord.parseFrom(protobufBytes); + var record2Data = createTestRecord2(); + var record2Bytes = serializeWithProtobuf(record2Data); + var record2 = TestRecordProto.TestRecord.parseFrom(record2Bytes); + + var merged = mergeProtobufRecords(record1, record2); + byte[] result = merged.toByteArray(); + bh.consume(result); + } + + @Benchmark + public void mergeFlatBuffers(Blackhole bh) { + var record1 = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); + var record2Data = createTestRecord2(); + var record2Buffer = serializeWithFlatBuffers(record2Data); + var record2 = TestRecordFB.getRootAsTestRecordFB(record2Buffer); + + var merged = mergeFlatBuffersRecords(record1, record2); + bh.consume(merged); + } + + // ===== MAIN METHOD TO RUN BENCHMARKS ===== + + public static void main(String[] args) throws RunnerException { + runAll(); + // Or, uncomment specific runner methods to execute subsets: + // runSerializationBenchmarks(); + // runDeserializationBenchmarks(); + // runFieldAccessBenchmarks(); + // runSizeComparisonBenchmarks(); + // runMergeBenchmarks(); + // runMessagePackBenchmarks(); + } + + public static void runAll() throws RunnerException { + Options opt = new OptionsBuilder() + .include(ComparisonBenchmark.class.getSimpleName()) + .build(); + new Runner(opt).run(); + } + + public static void runSerializationBenchmarks() throws RunnerException { + Options opt = new OptionsBuilder() + .include(ComparisonBenchmark.class.getSimpleName() + ".serialize.*") + .build(); + new Runner(opt).run(); + } + + public static void runDeserializationBenchmarks() throws RunnerException { + Options opt = new OptionsBuilder() + .include(ComparisonBenchmark.class.getSimpleName() + ".deserialize.*") + .build(); + new Runner(opt).run(); + } + + public static void runFieldAccessBenchmarks() throws RunnerException { + Options opt = new OptionsBuilder() + .include(ComparisonBenchmark.class.getSimpleName() + ".singleFieldAccess.*") + .build(); + new Runner(opt).run(); + } + + public static void runSizeComparisonBenchmarks() throws RunnerException { + Options opt = new OptionsBuilder() + .include(ComparisonBenchmark.class.getSimpleName() + ".measure.*") + .build(); + new Runner(opt).run(); + } + + public static void runMergeBenchmarks() throws RunnerException { + Options opt = new OptionsBuilder() + .include(ComparisonBenchmark.class.getSimpleName() + ".merge.*") + .build(); + new Runner(opt).run(); + } + + public static void runMessagePackBenchmarks() throws RunnerException { + Options opt = new OptionsBuilder() + .include(ComparisonBenchmark.class.getSimpleName() + ".*MessagePack.*") + .build(); + new Runner(opt).run(); + } + + public static void runAvroBenchmarks() throws RunnerException { + Options opt = new OptionsBuilder() + .include(ComparisonBenchmark.class.getSimpleName() + ".*Avro.*") + .build(); + new Runner(opt).run(); + } + + public static void runProtobufBenchmarks() throws RunnerException { + Options opt = new OptionsBuilder() + .include(ComparisonBenchmark.class.getSimpleName() + ".*Protobuf.*") + .build(); + new Runner(opt).run(); + } + + public static void runFlatBuffersBenchmarks() throws RunnerException { + Options opt = new OptionsBuilder() + .include(ComparisonBenchmark.class.getSimpleName() + ".*FlatBuffers.*") + .build(); + new Runner(opt).run(); + } + // ===== HELPER METHODS ===== + private void setupAvro() { + String schemaJson = "{\n" + + " \"type\": \"record\",\n" + + " \"name\": \"TestRecord\",\n" + + " \"fields\": [\n" + + " {\"name\": \"id\", \"type\": \"int\"},\n" + + " {\"name\": \"name\", \"type\": \"string\"},\n" + + " {\"name\": \"price\", \"type\": \"double\"},\n" + + " {\"name\": \"active\", \"type\": \"boolean\"},\n" + + " {\"name\": \"category\", \"type\": \"string\"},\n" + + " {\"name\": \"tags\", \"type\": {\"type\": \"array\", \"items\": \"string\"}},\n" + + " {\"name\": \"metadata\", \"type\": {\"type\": \"map\", \"values\": \"string\"}},\n" + + " {\"name\": \"extraData0\", \"type\": \"string\"},\n" + + " {\"name\": \"extraData1\", \"type\": \"string\"},\n" + + " {\"name\": \"extraData2\", \"type\": \"string\"},\n" + + " {\"name\": \"extraData3\", \"type\": \"string\"},\n" + + " {\"name\": \"extraData4\", \"type\": \"string\"},\n" + + " {\"name\": \"extraData5\", \"type\": \"string\"},\n" + + " {\"name\": \"extraData6\", \"type\": \"string\"},\n" + + " {\"name\": \"extraData7\", \"type\": \"string\"},\n" + + " {\"name\": \"extraData8\", \"type\": \"string\"},\n" + + " {\"name\": \"extraData9\", \"type\": \"string\"},\n" + + " {\"name\": \"extraData10\", \"type\": \"string\"},\n" + + " {\"name\": \"extraData11\", \"type\": \"string\"},\n" + + " {\"name\": \"extraData12\", \"type\": \"string\"}\n" + + " ]\n" + + "}"; + + avroSchema = new Schema.Parser().parse(schemaJson); + avroWriter = new GenericDatumWriter<>(avroSchema); + avroReader = new GenericDatumReader<>(avroSchema); + } + private ByteBuffer serializeWithImprint(TestRecord data) throws Exception { var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); - + writer.addField(1, Value.fromInt32(data.id)); writer.addField(2, Value.fromString(data.name)); writer.addField(3, Value.fromFloat64(data.price)); writer.addField(4, Value.fromBoolean(data.active)); writer.addField(5, Value.fromString(data.category)); - - // Convert tags list + var tagValues = new ArrayList(); - for (String tag : data.tags) { - tagValues.add(Value.fromString(tag)); + if (data.tags != null) { + for (String tag : data.tags) { + tagValues.add(Value.fromString(tag)); + } } writer.addField(6, Value.fromArray(tagValues)); - - // Convert metadata map + var metadataMap = new HashMap(); - for (var entry : data.metadata.entrySet()) { - metadataMap.put(MapKey.fromString(entry.getKey()), Value.fromString(entry.getValue())); + if (data.metadata != null) { + for (var entry : data.metadata.entrySet()) { + metadataMap.put(MapKey.fromString(entry.getKey()), Value.fromString(entry.getValue())); + } } writer.addField(7, Value.fromMap(metadataMap)); - - // Add extra fields (8-20) to create a larger record - for (int i = 0; i < data.extraData.size(); i++) { - writer.addField(8 + i, Value.fromString(data.extraData.get(i))); + + if (data.extraData != null) { + for (int i = 0; i < data.extraData.size(); i++) { + writer.addField(8 + i, Value.fromString(data.extraData.get(i))); + } } - + return writer.build().serializeToBuffer(); } - private byte[] serializeWithJackson(TestRecord data) throws Exception { - return jackson.writeValueAsBytes(data); + private byte[] serializeWithJacksonJson(TestRecord data) throws Exception { + return jacksonJsonMapper.writeValueAsBytes(data); } private byte[] serializeWithKryo(TestRecord data) { @@ -240,14 +520,117 @@ private byte[] serializeWithKryo(TestRecord data) { return baos.toByteArray(); } + private byte[] serializeWithMessagePack(TestRecord data) throws Exception { + return messagePackMapper.writeValueAsBytes(data); + } + + private byte[] serializeWithAvro(TestRecord data) throws Exception { + GenericRecord record = new GenericData.Record(avroSchema); + record.put("id", data.id); + record.put("name", data.name); + record.put("price", data.price); + record.put("active", data.active); + record.put("category", data.category); + record.put("tags", data.tags); + record.put("metadata", data.metadata); + + for (int i = 0; i < data.extraData.size(); i++) { + record.put("extraData" + i, data.extraData.get(i)); + } + + return serializeAvroRecord(record); + } + + private byte[] serializeAvroRecord(GenericRecord record) throws Exception { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Encoder encoder = EncoderFactory.get().binaryEncoder(baos, null); + avroWriter.write(record, encoder); + encoder.flush(); + return baos.toByteArray(); + } + + private GenericRecord deserializeWithAvro(byte[] data) throws Exception { + Decoder decoder = DecoderFactory.get().binaryDecoder(data, null); + return avroReader.read(null, decoder); + } + + private byte[] serializeWithProtobuf(TestRecord data) { + var builder = TestRecordProto.TestRecord.newBuilder() + .setId(data.id) + .setName(data.name) + .setPrice(data.price) + .setActive(data.active) + .setCategory(data.category) + .addAllTags(data.tags) + .putAllMetadata(data.metadata); + + for (String extraData : data.extraData) { + builder.addExtraData(extraData); + } + + return builder.build().toByteArray(); + } + + private ByteBuffer serializeWithFlatBuffers(TestRecord data) { + FlatBufferBuilder builder = new FlatBufferBuilder(1024); + + // Create strings (must be created before the object that uses them) + int nameOffset = builder.createString(data.name); + int categoryOffset = builder.createString(data.category); + + // Create tags array + int[] tagOffsets = new int[data.tags.size()]; + for (int i = 0; i < data.tags.size(); i++) { + tagOffsets[i] = builder.createString(data.tags.get(i)); + } + int tagsOffset = TestRecordFB.createTagsVector(builder, tagOffsets); + + // Create metadata (as parallel arrays for keys and values) + String[] metadataKeys = data.metadata.keySet().toArray(new String[0]); + String[] metadataValues = new String[metadataKeys.length]; + int[] keyOffsets = new int[metadataKeys.length]; + int[] valueOffsets = new int[metadataKeys.length]; + + for (int i = 0; i < metadataKeys.length; i++) { + metadataValues[i] = data.metadata.get(metadataKeys[i]); + keyOffsets[i] = builder.createString(metadataKeys[i]); + valueOffsets[i] = builder.createString(metadataValues[i]); + } + int metadataKeysOffset = TestRecordFB.createMetadataKeysVector(builder, keyOffsets); + int metadataValuesOffset = TestRecordFB.createMetadataValuesVector(builder, valueOffsets); + + // Create extra data array + int[] extraDataOffsets = new int[data.extraData.size()]; + for (int i = 0; i < data.extraData.size(); i++) { + extraDataOffsets[i] = builder.createString(data.extraData.get(i)); + } + int extraDataOffset = TestRecordFB.createExtraDataVector(builder, extraDataOffsets); + + // Create the main object + TestRecordFB.startTestRecordFB(builder); + TestRecordFB.addId(builder, data.id); + TestRecordFB.addName(builder, nameOffset); + TestRecordFB.addPrice(builder, data.price); + TestRecordFB.addActive(builder, data.active); + TestRecordFB.addCategory(builder, categoryOffset); + TestRecordFB.addTags(builder, tagsOffset); + TestRecordFB.addMetadataKeys(builder, metadataKeysOffset); + TestRecordFB.addMetadataValues(builder, metadataValuesOffset); + TestRecordFB.addExtraData(builder, extraDataOffset); + int recordOffset = TestRecordFB.endTestRecordFB(builder); + + // Finish and return + builder.finish(recordOffset); + return builder.dataBuffer().slice(); + } + private ImprintRecord simulateMerge(ImprintRecord first, ImprintRecord second) throws Exception { var writer = new ImprintWriter(first.getHeader().getSchemaId()); var usedFieldIds = new HashSet(); - - // Copy fields from first record (takes precedence) + copyFieldsToWriter(first, writer, usedFieldIds); copyFieldsToWriter(second, writer, usedFieldIds); - + return writer.build(); } @@ -265,23 +648,121 @@ private void copyFieldsToWriter(ImprintRecord record, ImprintWriter writer, Set< } private TestRecord mergeTestRecords(TestRecord first, TestRecord second) { - // Simple merge logic - first record takes precedence var merged = new TestRecord(); merged.id = first.id; merged.name = first.name != null ? first.name : second.name; merged.price = first.price != 0.0 ? first.price : second.price; merged.active = first.active; merged.category = first.category != null ? first.category : second.category; - + merged.tags = new ArrayList<>(first.tags); merged.tags.addAll(second.tags); - + merged.metadata = new HashMap<>(first.metadata); merged.metadata.putAll(second.metadata); - + return merged; } + private GenericRecord mergeAvroRecords(GenericRecord first, GenericRecord second) { + GenericRecord merged = new GenericData.Record(avroSchema); + + // Copy all fields from first record + for (Schema.Field field : avroSchema.getFields()) { + merged.put(field.name(), first.get(field.name())); + } + + // Override with non-null values from second record + for (Schema.Field field : avroSchema.getFields()) { + Object secondValue = second.get(field.name()); + if (secondValue != null && !secondValue.toString().isEmpty()) { + merged.put(field.name(), secondValue); + } + } + + return merged; + } + + private TestRecordProto.TestRecord mergeProtobufRecords(TestRecordProto.TestRecord first, TestRecordProto.TestRecord second) { + return TestRecordProto.TestRecord.newBuilder() + .mergeFrom(first) + .mergeFrom(second) + .build(); + } + + private ByteBuffer mergeFlatBuffersRecords(TestRecordFB first, TestRecordFB second) { + FlatBufferBuilder builder = new FlatBufferBuilder(1024); + + // Use second record's values if they exist, otherwise first record's values + String name = second.name() != null && !second.name().isEmpty() ? second.name() : first.name(); + String category = second.category() != null && !second.category().isEmpty() ? second.category() : first.category(); + double price = second.price() != 0.0 ? second.price() : first.price(); + boolean active = second.active(); // Use second's boolean value + int id = first.id(); // Keep first record's ID + + // Create merged strings + int nameOffset = builder.createString(name); + int categoryOffset = builder.createString(category); + + // Merge tags (combine both arrays) + List mergedTags = new ArrayList<>(); + for (int i = 0; i < first.tagsLength(); i++) { + mergedTags.add(first.tags(i)); + } + for (int i = 0; i < second.tagsLength(); i++) { + mergedTags.add(second.tags(i)); + } + + int[] tagOffsets = new int[mergedTags.size()]; + for (int i = 0; i < mergedTags.size(); i++) { + tagOffsets[i] = builder.createString(mergedTags.get(i)); + } + int tagsOffset = TestRecordFB.createTagsVector(builder, tagOffsets); + + // Merge metadata (second overwrites first) + Map mergedMetadata = new HashMap<>(); + for (int i = 0; i < first.metadataKeysLength(); i++) { + mergedMetadata.put(first.metadataKeys(i), first.metadataValues(i)); + } + for (int i = 0; i < second.metadataKeysLength(); i++) { + mergedMetadata.put(second.metadataKeys(i), second.metadataValues(i)); + } + + String[] metadataKeys = mergedMetadata.keySet().toArray(new String[0]); + int[] keyOffsets = new int[metadataKeys.length]; + int[] valueOffsets = new int[metadataKeys.length]; + + for (int i = 0; i < metadataKeys.length; i++) { + keyOffsets[i] = builder.createString(metadataKeys[i]); + valueOffsets[i] = builder.createString(mergedMetadata.get(metadataKeys[i])); + } + int metadataKeysOffset = TestRecordFB.createMetadataKeysVector(builder, keyOffsets); + int metadataValuesOffset = TestRecordFB.createMetadataValuesVector(builder, valueOffsets); + + // Use first record's extra data (or could merge both) + int[] extraDataOffsets = new int[first.extraDataLength()]; + for (int i = 0; i < first.extraDataLength(); i++) { + extraDataOffsets[i] = builder.createString(first.extraData(i)); + } + int extraDataOffset = TestRecordFB.createExtraDataVector(builder, extraDataOffsets); + + // Create the merged object + TestRecordFB.startTestRecordFB(builder); + TestRecordFB.addId(builder, id); + TestRecordFB.addName(builder, nameOffset); + TestRecordFB.addPrice(builder, price); + TestRecordFB.addActive(builder, active); + TestRecordFB.addCategory(builder, categoryOffset); + TestRecordFB.addTags(builder, tagsOffset); + TestRecordFB.addMetadataKeys(builder, metadataKeysOffset); + TestRecordFB.addMetadataValues(builder, metadataValuesOffset); + TestRecordFB.addExtraData(builder, extraDataOffset); + int recordOffset = TestRecordFB.endTestRecordFB(builder); + + builder.finish(recordOffset); + return builder.dataBuffer().slice(); + } + private TestRecord createTestRecord() { var record = new TestRecord(); record.id = 12345; @@ -289,20 +770,19 @@ var record = new TestRecord(); record.price = 99.99; record.active = true; record.category = "Electronics"; - + record.tags = Arrays.asList("popular", "trending", "bestseller"); - + record.metadata = new HashMap<>(); record.metadata.put("manufacturer", "TechCorp"); record.metadata.put("model", "TC-2024"); record.metadata.put("warranty", "2 years"); - - // Add extra data to create a larger record (fields 8-20) + record.extraData = new ArrayList<>(); for (int i = 0; i < 13; i++) { record.extraData.add("extraField" + i + "_value_" + (1000 + i)); } - + return record; } @@ -313,19 +793,18 @@ var record = new TestRecord(); record.price = 149.99; record.active = false; record.category = "Software"; - + record.tags = Arrays.asList("new", "premium"); - + record.metadata = new HashMap<>(); record.metadata.put("vendor", "SoftCorp"); record.metadata.put("version", "2.1"); - - // Add extra data to match the structure + record.extraData = new ArrayList<>(); for (int i = 0; i < 13; i++) { record.extraData.add("extraField" + i + "_value2_" + (2000 + i)); } - + return record; } @@ -339,7 +818,7 @@ public static class TestRecord { public List tags = new ArrayList<>(); public Map metadata = new HashMap<>(); public List extraData = new ArrayList<>(); // Fields 8-20 for large record test - + public TestRecord() {} // Required for deserialization } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index 5d4719f..2291550 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -34,7 +34,7 @@ public final class ImprintRecord { */ public ImprintRecord(Header header, List directory, ByteBuffer payload) { this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.directory = List.copyOf(Objects.requireNonNull(directory, "Directory cannot be null")); + this.directory = Collections.unmodifiableList(Objects.requireNonNull(directory, "Directory cannot be null")); this.payload = payload.asReadOnlyBuffer(); // Zero-copy read-only view } @@ -79,12 +79,14 @@ private ByteBuffer getFieldBuffer(int fieldId) { int endOffset = (index + 1 < directory.size()) ? directory.get(index + 1).getOffset() : payload.limit(); - var fieldBuffer = payload.duplicate(); if (startOffset > payload.limit() || endOffset > payload.limit() || startOffset > endOffset) { return null; } + + // OPTIMIZATION: Single allocation instead of duplicate + slice + var fieldBuffer = payload.duplicate(); fieldBuffer.position(startOffset).limit(endOffset); - return fieldBuffer.slice(); + return fieldBuffer; } /** @@ -261,7 +263,7 @@ private static DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throw } private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws ImprintException { - ByteBuffer valueSpecificBuffer = buffer.duplicate(); + var valueSpecificBuffer = buffer.duplicate(); valueSpecificBuffer.order(ByteOrder.LITTLE_ENDIAN); switch (typeCode) { @@ -351,7 +353,7 @@ public double getFloat64(int fieldId) throws ImprintException { * @throws ImprintException if the field is not found, is null, or is not of type STRING. */ public String getString(int fieldId) throws ImprintException { - Value value = getValue(fieldId); + var value = getValue(fieldId); if (value == null) { throw new ImprintException(ErrorType.FIELD_NOT_FOUND, diff --git a/src/main/java/com/imprint/types/TypeHandler.java b/src/main/java/com/imprint/types/TypeHandler.java index be4fc7b..e58c355 100644 --- a/src/main/java/com/imprint/types/TypeHandler.java +++ b/src/main/java/com/imprint/types/TypeHandler.java @@ -433,26 +433,38 @@ public int estimateSize(Value value) throws ImprintException { } return arraySize; } - + @Override public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { return readComplexValueBytes(buffer, "ARRAY", (tempBuffer, numElements) -> { if (tempBuffer.remaining() < 1) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for ARRAY element type code in temp buffer during measurement."); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for ARRAY element type code"); } byte elementTypeCodeByte = tempBuffer.get(); - int typeCodeLength = 1; + var elementType = TypeCode.fromByte(elementTypeCodeByte); - TypeHandler elementHandler = TypeCode.fromByte(elementTypeCodeByte).getHandler(); - int elementsDataLength = 0; - for (int i = 0; i < numElements; i++) { - int elementStartPos = tempBuffer.position(); - elementHandler.readValueBytes(tempBuffer); - elementsDataLength += (tempBuffer.position() - elementStartPos); + switch (elementType) { + case NULL: + return 1; + case BOOL: + return 1 + numElements; + case INT32: + case FLOAT32: + return 1 + (numElements * 4); + case INT64: + case FLOAT64: + return 1 + (numElements * 8); + default: + var elementHandler = elementType.getHandler(); + int elementsDataLength = 0; + for (int i = 0; i < numElements; i++) { + int elementStartPos = tempBuffer.position(); + elementHandler.readValueBytes(tempBuffer); + elementsDataLength += (tempBuffer.position() - elementStartPos); + } + return 1 + elementsDataLength; } - - return typeCodeLength + elementsDataLength; }); } }; @@ -549,28 +561,52 @@ public int estimateSize(Value value) throws ImprintException { } return mapSize; } - + @Override public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { return readComplexValueBytes(buffer, "MAP", (tempBuffer, numEntries) -> { if (tempBuffer.remaining() < 2) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for MAP key/value type codes in temp buffer during measurement."); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for MAP key/value type codes"); } byte keyTypeCodeByte = tempBuffer.get(); byte valueTypeCodeByte = tempBuffer.get(); - int typeCodesLength = 2; - int entriesDataLength = 0; - for (int i = 0; i < numEntries; i++) { - int entryStartPos = tempBuffer.position(); - TypeCode.fromByte(keyTypeCodeByte).getHandler().readValueBytes(tempBuffer); - TypeCode.fromByte(valueTypeCodeByte).getHandler().readValueBytes(tempBuffer); - entriesDataLength += (tempBuffer.position() - entryStartPos); - } + TypeCode keyType = TypeCode.fromByte(keyTypeCodeByte); + TypeCode valueType = TypeCode.fromByte(valueTypeCodeByte); + + // OPTIMIZATION: Calculate sizes directly for fixed-size types + int keySize = getFixedTypeSize(keyType); + int valueSize = getFixedTypeSize(valueType); - return typeCodesLength + entriesDataLength; + if (keySize > 0 && valueSize > 0) { + // Both are fixed-size: O(1) calculation + return 2 + (numEntries * (keySize + valueSize)); + } else { + // At least one is variable-size: fall back to traversal + int entriesDataLength = 0; + for (int i = 0; i < numEntries; i++) { + int entryStartPos = tempBuffer.position(); + keyType.getHandler().readValueBytes(tempBuffer); + valueType.getHandler().readValueBytes(tempBuffer); + entriesDataLength += (tempBuffer.position() - entryStartPos); + } + return 2 + entriesDataLength; + } }); } + + private int getFixedTypeSize(TypeCode type) { + switch (type) { + case NULL: return 0; + case BOOL: return 1; + case INT32: + case FLOAT32: return 4; + case INT64: + case FLOAT64: return 8; + default: return -1; // Variable size + } + } + private void serializeMapKey(MapKey key, ByteBuffer buffer) throws ImprintException { switch (key.getTypeCode()) { diff --git a/src/main/java/com/imprint/types/Value.java b/src/main/java/com/imprint/types/Value.java index 7f3bbb9..ba747de 100644 --- a/src/main/java/com/imprint/types/Value.java +++ b/src/main/java/com/imprint/types/Value.java @@ -328,16 +328,19 @@ public String toString() { // String Value (ByteBuffer-based) public static class StringBufferValue extends Value { private final ByteBuffer value; - private volatile String cachedString; // lazy decode + private volatile String cachedString; + + private static final int THREAD_LOCAL_BUFFER_SIZE = 1024; + private static final ThreadLocal DECODE_BUFFER_CACHE = + ThreadLocal.withInitial(() -> new byte[THREAD_LOCAL_BUFFER_SIZE]); public StringBufferValue(ByteBuffer value) { - this.value = value.asReadOnlyBuffer(); // zero-copy read-only view + this.value = value.asReadOnlyBuffer(); } public String getValue() { String result = cachedString; if (result == null) { - // Simple, fast decoding - no thread-local overhead result = decodeUtf8(); cachedString = result; } @@ -345,18 +348,25 @@ public String getValue() { } private String decodeUtf8() { - // Fast path: zero-copy for array-backed ByteBuffers + final byte[] array; + final int offset; + final int length = value.remaining(); + if (value.hasArray()) { - return new String(value.array(), value.arrayOffset() + value.position(), - value.remaining(), StandardCharsets.UTF_8); + array = value.array(); + offset = value.arrayOffset() + value.position(); + } else { + byte[] threadLocalBuffer = DECODE_BUFFER_CACHE.get(); + if (length <= threadLocalBuffer.length) { + array = threadLocalBuffer; + } else { + // Fallback: copy bytes from the ByteBuffer to a new heap array (if too large for cache) + array = new byte[length]; + } + value.duplicate().get(array, 0, length); // Get bytes from current position into chosen array + offset = 0; } - - // Fallback path for non-array-backed ByteBuffers (e.g., direct buffers). - // Allocation is required here as Java's String(byte[],...) constructor needs a heap array. - // Data is copied from the ByteBuffer to a new byte array. - var array = new byte[value.remaining()]; - value.duplicate().get(array); - return new String(array, StandardCharsets.UTF_8); + return new String(array, offset, length, StandardCharsets.UTF_8); } public ByteBuffer getBuffer() { diff --git a/src/test/java/com/imprint/benchmark/ProfilerTest.java b/src/test/java/com/imprint/benchmark/ProfilerTest.java deleted file mode 100644 index 5b531a9..0000000 --- a/src/test/java/com/imprint/benchmark/ProfilerTest.java +++ /dev/null @@ -1,226 +0,0 @@ -package com.imprint.benchmark; - -import com.imprint.core.*; -import com.imprint.types.Value; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.Disabled; - -import java.util.Random; - -/** - * A test designed for profiling hotspots during development. - *

- * To use with a profiler: - * 1. Remove @Disabled annotation - * 2. Run with JProfiler, VisualVM, or async-profiler: - * - JProfiler: Attach to test JVM - * - VisualVM: jvisualvm, attach to process - * - async-profiler: java -jar async-profiler.jar -d 30 -f profile.html - * 3. Look for hotspots in CPU sampling - *

- * Key areas to examine: - * - Object allocation (memory profiling) - * - Method call frequency (CPU sampling) - * - GC pressure (memory profiling) - * - String operations and UTF-8 encoding - * - ByteBuffer operations - */ -@Disabled("Enable manually for profiling") -public class ProfilerTest { - - private static final int ITERATIONS = 1_000_000; - private static final int RECORD_SIZE = 20; - - @Test - void profileFieldAccess() throws Exception { - System.out.println("Starting profiler test - attach profiler now..."); - Thread.sleep(5000); // Give time to attach profiler - - // Create a representative record - var record = createTestRecord(); - - System.out.println("Beginning field access profiling..."); - long start = System.nanoTime(); - - // Simulate real-world access patterns - Random random = new Random(42); - int hits = 0; - - for (int i = 0; i < ITERATIONS; i++) { - // Random field access (hotspot) - int fieldId = random.nextInt(RECORD_SIZE) + 1; - var value = record.getValue(fieldId); - if (value != null) { - hits++; - - // Trigger string decoding (potential hotspot) - if (value.getTypeCode() == com.imprint.types.TypeCode.STRING) { - if (value instanceof Value.StringBufferValue) { - ((Value.StringBufferValue) value).getValue(); - } else { - ((Value.StringValue) value).getValue(); - } - } - } - - // Some raw access (zero-copy path) - if (i % 10 == 0) { - record.getRawBytes(fieldId); - } - } - - long duration = System.nanoTime() - start; - System.out.printf("Completed %,d field accesses in %.2f ms (avg: %.1f ns/op, hits: %d)%n", - ITERATIONS, duration / 1_000_000.0, (double) duration / ITERATIONS, hits); - } - - @Test - void profileSerialization() throws Exception { - System.out.println("Starting serialization profiler test..."); - Thread.sleep(3000); - - var schemaId = new SchemaId(1, 0x12345678); - - System.out.println("Beginning serialization profiling..."); - long start = System.nanoTime(); - - // Create and serialize many records (allocation hotspot) - for (int i = 0; i < 100_000; i++) { - var writer = new ImprintWriter(schemaId); - - // Add various field types - writer.addField(1, Value.fromInt32(i)) - .addField(2, Value.fromString("test-string-" + i)) - .addField(3, Value.fromFloat64(i * 3.14159)) - .addField(4, Value.fromBytes(("bytes-" + i).getBytes())); - - var record = writer.build(); - var serialized = record.serializeToBuffer(); // Potential hotspot - - // Trigger some deserialization - if (i % 1000 == 0) { - var deserialized = ImprintRecord.deserialize(serialized); - deserialized.getValue(2); // String decoding hotspot - } - } - - long duration = System.nanoTime() - start; - System.out.printf("Completed serialization test in %.2f ms%n", duration / 1_000_000.0); - } - - @Test - void profileProjection() throws Exception { - System.out.println("Starting projection profiler test..."); - Thread.sleep(3000); - - var record = createLargeRecord(); - - System.out.println("Beginning projection profiling..."); - long start = System.nanoTime(); - - // Simulate analytical workload - project subset of fields repeatedly - for (int i = 0; i < 50_000; i++) { - // Project 10 fields out of 100 (common analytical pattern) - for (int fieldId = 1; fieldId <= 10; fieldId++) { - var value = record.getValue(fieldId); - if (value != null) { - // Force materialization of string values - if (value.getTypeCode() == com.imprint.types.TypeCode.STRING) { - if (value instanceof Value.StringBufferValue) { - ((Value.StringBufferValue) value).getValue(); - } - } - } - } - } - - long duration = System.nanoTime() - start; - System.out.printf("Completed projection test in %.2f ms%n", duration / 1_000_000.0); - } - - @Test - void profileMemoryAllocation() throws Exception { - System.out.println("Starting allocation profiler test..."); - Thread.sleep(3000); - - System.out.println("Beginning allocation profiling - watch for GC events..."); - - // Force allocation pressure to reveal GC hotspots - for (int batch = 0; batch < 1000; batch++) { - for (int i = 0; i < 1000; i++) { - var schemaId = new SchemaId(batch, i); - var writer = new ImprintWriter(schemaId); - - // Create strings of varying sizes (allocation pressure) - writer.addField(1, Value.fromString("small")) - .addField(2, Value.fromString("medium-length-string-" + i)) - .addField(3, Value.fromString("very-long-string-that-will-cause-more-allocation-pressure-" + batch + "-" + i)) - .addField(4, Value.fromBytes(new byte[100 + i % 1000])); // Varying byte arrays - - var record = writer.build(); - - // Some deserialization to trigger string decoding allocations - record.getValue(2); - record.getValue(3); - } - - if (batch % 100 == 0) { - System.out.printf("Completed batch %d/1000%n", batch); - } - } - - System.out.println("Allocation test complete - check GC logs and memory profiler"); - } - - private ImprintRecord createTestRecord() throws Exception { - var schemaId = new SchemaId(1, 0xdeadbeef); - var writer = new ImprintWriter(schemaId); - - for (int i = 1; i <= RECORD_SIZE; i++) { - switch (i % 4) { - case 0: - writer.addField(i, Value.fromInt32(i * 100)); - break; - case 1: - writer.addField(i, Value.fromString("field-value-" + i)); - break; - case 2: - writer.addField(i, Value.fromFloat64(i * 3.14159)); - break; - case 3: - writer.addField(i, Value.fromBytes(("bytes-" + i).getBytes())); - break; - } - } - - return writer.build(); - } - - private ImprintRecord createLargeRecord() throws Exception { - var schemaId = new SchemaId(2, 0xcafebabe); - var writer = new ImprintWriter(schemaId); - - // Create 100 fields with realistic data - for (int i = 1; i <= 100; i++) { - switch (i % 5) { - case 0: - writer.addField(i, Value.fromInt32(i)); - break; - case 1: - writer.addField(i, Value.fromString("user-name-" + i + "@example.com")); - break; - case 2: - writer.addField(i, Value.fromString("Some longer descriptive text for field " + i + " that might represent a comment or description")); - break; - case 3: - writer.addField(i, Value.fromFloat64(i * 2.718281828)); - break; - case 4: - writer.addField(i, Value.fromBytes(String.format("binary-data-%04d", i).getBytes())); - break; - } - } - - return writer.build(); - } -} \ No newline at end of file diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index 64be931..d48c1aa 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -4,7 +4,6 @@ import com.imprint.core.ImprintWriter; import com.imprint.core.SchemaId; import com.imprint.types.Value; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import java.util.Random; @@ -82,12 +81,12 @@ void profileSerialization() throws Exception { Thread.sleep(3000); var schemaId = new SchemaId(1, 0x12345678); - + System.out.println("Beginning serialization profiling..."); long start = System.nanoTime(); // Create and serialize many records (allocation hotspot) - for (int i = 0; i < 500_000; i++) { + for (int i = 0; i < 100_000; i++) { var writer = new ImprintWriter(schemaId); // Add various field types From 0341d5d95092cafbedff4ea20a35e1d8b269760b Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 5 Jun 2025 10:22:08 -0400 Subject: [PATCH 20/53] replace deprecated gradle methods with latest --- build.gradle | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/build.gradle b/build.gradle index 8406676..66f2e3e 100644 --- a/build.gradle +++ b/build.gradle @@ -73,7 +73,7 @@ protobuf { } // Download and setup FlatBuffers compiler for Linux (CI environment) -task downloadFlatc { +tasks.register('downloadFlatc') { description = 'Download FlatBuffers compiler' group = 'build setup' @@ -117,7 +117,7 @@ task downloadFlatc { } // Generate FlatBuffers sources -task generateFlatBuffers(type: Exec) { +tasks.register('generateFlatBuffers', Exec) { dependsOn downloadFlatc description = 'Generate Java classes from FlatBuffers schema' group = 'build' @@ -166,11 +166,11 @@ test { } } -// JMH configuration +// JMH configuration - optimized for Java 11 jmh { fork = 1 warmupIterations = 2 // Reduced for faster CI - iterations = 3 // Reduced for faster CI + iterations = 3 resultFormat = 'JSON' includeTests = false resultsFile = file("${projectDir}/benchmark-results/jmh-results-${new Date().format('yyyy-MM-dd-HHmmss')}.json") @@ -180,12 +180,12 @@ jmh { '-XX:+UseG1GC', '-Xmx2g', '-XX:+UnlockExperimentalVMOptions', - '-XX:+UseJVMCICompiler' + '-XX:+UseJVMCICompiler' // Use Graal if available for better performance ] } // Create individual benchmark tasks for CI pipeline -task jmhRunSerializationBenchmarks(type: JavaExec) { +tasks.register('jmhRunSerializationBenchmarks', JavaExec) { dependsOn compileJmhJava description = 'Run serialization benchmarks' group = 'benchmarking' @@ -206,7 +206,7 @@ task jmhRunSerializationBenchmarks(type: JavaExec) { } } -task jmhRunDeserializationBenchmarks(type: JavaExec) { +tasks.register('jmhRunDeserializationBenchmarks', JavaExec) { dependsOn compileJmhJava description = 'Run deserialization benchmarks' group = 'benchmarking' @@ -227,7 +227,7 @@ task jmhRunDeserializationBenchmarks(type: JavaExec) { } } -task jmhRunFieldAccessBenchmarks(type: JavaExec) { +tasks.register('jmhRunFieldAccessBenchmarks', JavaExec) { dependsOn compileJmhJava description = 'Run field access benchmarks' group = 'benchmarking' @@ -248,7 +248,7 @@ task jmhRunFieldAccessBenchmarks(type: JavaExec) { } } -task jmhRunSizeComparisonBenchmarks(type: JavaExec) { +tasks.register('jmhRunSizeComparisonBenchmarks', JavaExec) { dependsOn compileJmhJava description = 'Run size comparison benchmarks' group = 'benchmarking' @@ -269,7 +269,7 @@ task jmhRunSizeComparisonBenchmarks(type: JavaExec) { } } -task jmhRunMergeBenchmarks(type: JavaExec) { +tasks.register('jmhRunMergeBenchmarks', JavaExec) { dependsOn compileJmhJava description = 'Run merge operation benchmarks' group = 'benchmarking' @@ -290,7 +290,7 @@ task jmhRunMergeBenchmarks(type: JavaExec) { } } -task jmhRunAllBenchmarks(type: JavaExec) { +tasks.register('jmhRunAllBenchmarks', JavaExec) { dependsOn compileJmhJava description = 'Run all comparison benchmarks' group = 'benchmarking' From 8e18516cc28648098038de08af42aefb8f40eb46 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 5 Jun 2025 14:59:54 -0400 Subject: [PATCH 21/53] Lazy load of directory and header data --- .../benchmark/ComparisonBenchmark.java | 68 ++- .../java/com/imprint/core/ImprintRecord.java | 405 +++++++++++++----- .../java/com/imprint/IntegrationTest.java | 68 ++- .../com/imprint/profile/ProfilerTest.java | 3 +- 4 files changed, 399 insertions(+), 145 deletions(-) diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index 49260b1..ce2fbcb 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -133,14 +133,34 @@ public void serializeFlatBuffers(Blackhole bh) { bh.consume(result); } - // ===== DESERIALIZATION BENCHMARKS ===== + // ===== PARTIAL DESERIALIZATION (SETUP ONLY) ===== +// These benchmarks measure the cost of preparing a record for field access, +// not the cost of accessing the actual data. This is important because +// +// 1. Imprint: Only parses header + stores raw directory bytes +// 2. FlatBuffers: Only wraps the buffer with minimal validation +// 3. Others (eager): Parse and construct all field objects upfront +// +// This comparison shows the advantage of lazy loading approaches when you +// only need to access a subset of fields. In real streaming workloads, +// records are often filtered/routed based on just a few key fields. +// +// For a fair "full deserialization" comparison, see FULL DESERIALIZATION BENCHMARKS. + + @Benchmark + public void deserializeSetupImprint(Blackhole bh) throws Exception { + ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); + bh.consume(result); + } @Benchmark - public void deserializeImprint(Blackhole bh) throws Exception { - ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); + public void deserializeSetupFlatBuffers(Blackhole bh) { + TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); bh.consume(result); } + // ===== FULL DESERIALIZATION BENCHMARKS ===== + @Benchmark public void deserializeJacksonJson(Blackhole bh) throws Exception { TestRecord result = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); @@ -173,15 +193,53 @@ public void deserializeProtobuf(Blackhole bh) throws Exception { bh.consume(result); } + @Benchmark + public void deserializeImprint(Blackhole bh) throws Exception { + ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); + // Access all fields to force full deserialization + result.getInt32(1); // id + result.getString(2); // name + result.getFloat64(3); // price + result.getBoolean(4); // active + result.getString(5); // category + result.getArray(6); // tags + result.getMap(7); // metadata + for (int i = 8; i < 21; i++) { + result.getString(i); // extraData fields + } + + bh.consume(result); + } + @Benchmark public void deserializeFlatBuffers(Blackhole bh) { TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); + + // Access all fields + result.id(); + result.name(); + result.price(); + result.active(); + result.category(); + // Access all tags + for (int i = 0; i < result.tagsLength(); i++) { + result.tags(i); + } + // Access all metadata + for (int i = 0; i < result.metadataKeysLength(); i++) { + result.metadataKeys(i); + result.metadataValues(i); + } + // Access all extra data + for (int i = 0; i < result.extraDataLength(); i++) { + result.extraData(i); + } + bh.consume(result); } // ===== FIELD ACCESS BENCHMARKS ===== - // Tests accessing a single field near the end of a large record - // This showcases Imprint's O(1) directory lookup vs sequential deserialization + // Tests accessing a single field near the end of a record @Benchmark public void singleFieldAccessImprint(Blackhole bh) throws Exception { diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index 2291550..da6b6e0 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -17,6 +17,10 @@ * An Imprint record containing a header, field directory, and payload. * Uses ByteBuffer for zero-copy operations to achieve low latency. * + *

This implementation uses lazy directory parsing for optimal single field access performance. + * The directory is only parsed when needed, and binary search is performed directly on raw bytes + * when possible.

+ * *

Performance Note: All ByteBuffers should be array-backed * (hasArray() == true) for optimal zero-copy performance. Direct buffers * may cause performance degradation.

@@ -24,37 +28,64 @@ @Getter public final class ImprintRecord { private final Header header; - private final List directory; + private final ByteBuffer directoryBuffer; // Raw directory bytes private final ByteBuffer payload; // Read-only view for zero-copy + // Lazy-loaded directory state + private List parsedDirectory; + private boolean directoryParsed = false; + + // Cache for parsed directory count to avoid repeated VarInt decoding + private int directoryCount = -1; + /** - * Creates a new ImprintRecord. + * Creates a new ImprintRecord with lazy directory parsing. * + * @param header the record header + * @param directoryBuffer raw directory bytes (including count) * @param payload the payload buffer. Should be array-backed for optimal performance. */ - public ImprintRecord(Header header, List directory, ByteBuffer payload) { + private ImprintRecord(Header header, ByteBuffer directoryBuffer, ByteBuffer payload) { this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.directory = Collections.unmodifiableList(Objects.requireNonNull(directory, "Directory cannot be null")); + this.directoryBuffer = directoryBuffer.asReadOnlyBuffer(); this.payload = payload.asReadOnlyBuffer(); // Zero-copy read-only view } + /** + * Creates a new ImprintRecord with pre-parsed directory (used by ImprintWriter). + * This constructor is used when the directory is already known and parsed. + * + * @param header the record header + * @param directory the parsed directory entries + * @param payload the payload buffer. Should be array-backed for optimal performance. + */ + ImprintRecord(Header header, List directory, ByteBuffer payload) { + this.header = Objects.requireNonNull(header, "Header cannot be null"); + this.parsedDirectory = Collections.unmodifiableList(Objects.requireNonNull(directory, "Directory cannot be null")); + this.directoryParsed = true; + this.directoryCount = directory.size(); + this.payload = payload.asReadOnlyBuffer(); + + // Create directory buffer for serialization compatibility + this.directoryBuffer = createDirectoryBuffer(directory); + } + /** * Get a value by field ID, deserializing it on demand. * Returns null if the field is not found. * Note: If the field exists and is an explicit NULL type, this will return Value.NullValue.INSTANCE + * + *

Performance Note: Accessing fields one-by-one is optimized for single field access. + * If you need to access many fields from the same record, consider calling getDirectory() first + * to parse the full directory once, then access fields normally.

*/ public Value getValue(int fieldId) throws ImprintException { - var fieldBuffer = getFieldBuffer(fieldId); - if (fieldBuffer == null) { + DirectoryEntry entry = findDirectoryEntry(fieldId); + if (entry == null) { return null; } - int directoryIndex = findDirectoryIndex(fieldId); - if (directoryIndex < 0) { - throw new ImprintException(ErrorType.INTERNAL_ERROR, "Field ID " + fieldId + " found buffer but not in directory."); - } - var entry = directory.get(directoryIndex); - return deserializeValue(entry.getTypeCode(), fieldBuffer); + return deserializeValue(entry.getTypeCode(), getFieldBufferFromEntry(entry)); } /** @@ -62,38 +93,237 @@ public Value getValue(int fieldId) throws ImprintException { * Returns a zero-copy ByteBuffer view, or null if field not found. */ public ByteBuffer getRawBytes(int fieldId) { - var fieldBuffer = getFieldBuffer(fieldId); - return fieldBuffer != null ? fieldBuffer.asReadOnlyBuffer() : null; + try { + DirectoryEntry entry = findDirectoryEntry(fieldId); + if (entry == null) { + return null; + } + + return getFieldBufferFromEntry(entry).asReadOnlyBuffer(); + } catch (ImprintException e) { + return null; + } } /** - * Get a ByteBuffer view of a field's data. - * Returns null if the field is not found. + * Find a directory entry for the given field ID. + * Uses the most efficient method based on current state. */ - private ByteBuffer getFieldBuffer(int fieldId) { - int index = findDirectoryIndex(fieldId); - if (index < 0) return null; + private DirectoryEntry findDirectoryEntry(int fieldId) throws ImprintException { + if (directoryParsed) { + // Use parsed directory + int index = findDirectoryIndexInParsed(fieldId); + return index >= 0 ? parsedDirectory.get(index) : null; + } else { + // Use fast binary search on raw bytes + return findFieldEntryFast(fieldId); + } + } - var entry = directory.get(index); - int startOffset = entry.getOffset(); - int endOffset = (index + 1 < directory.size()) ? - directory.get(index + 1).getOffset() : payload.limit(); + /** + * Fast binary search directly on raw directory bytes. + * This avoids parsing the entire directory for single field access. + */ + private DirectoryEntry findFieldEntryFast(int fieldId) throws ImprintException { + ByteBuffer searchBuffer = directoryBuffer.duplicate(); + searchBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // Decode directory count (cache it to avoid repeated decoding) + if (directoryCount < 0) { + directoryCount = VarInt.decode(searchBuffer).getValue(); + } else { + // Skip past the VarInt count + VarInt.decode(searchBuffer); + } - if (startOffset > payload.limit() || endOffset > payload.limit() || startOffset > endOffset) { + if (directoryCount == 0) { return null; } - // OPTIMIZATION: Single allocation instead of duplicate + slice + // Now searchBuffer.position() points to the first directory entry + int directoryStartPos = searchBuffer.position(); + + int low = 0; + int high = directoryCount - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; + + // Calculate position of mid entry + int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); + + // Bounds check + if (entryPos + Constants.DIR_ENTRY_BYTES > searchBuffer.limit()) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Directory entry at position " + entryPos + " exceeds buffer limit " + searchBuffer.limit()); + } + + searchBuffer.position(entryPos); + short midFieldId = searchBuffer.getShort(); + + if (midFieldId < fieldId) { + low = mid + 1; + } else if (midFieldId > fieldId) { + high = mid - 1; + } else { + // Found it - read the complete entry + searchBuffer.position(entryPos); + return deserializeDirectoryEntry(searchBuffer); + } + } + + return null; // Not found + } + + /** + * Get the directory (parsing it if necessary). + * This maintains backward compatibility with existing code. + * + *

Performance Tip: If you plan to access many fields from this record, + * call this method first to parse the directory once, then use the field accessor methods. + * This is more efficient than accessing fields one-by-one when you need multiple fields.

+ */ + public List getDirectory() { + ensureDirectoryParsed(); + return parsedDirectory; + } + + /** + * Get a ByteBuffer view of a field's data from a DirectoryEntry. + */ + private ByteBuffer getFieldBufferFromEntry(DirectoryEntry entry) throws ImprintException { + int startOffset = entry.getOffset(); + + // Find end offset + int endOffset; + if (directoryParsed) { + // Use parsed directory to find next entry + int entryIndex = findDirectoryIndexInParsed(entry.getId()); + endOffset = (entryIndex + 1 < parsedDirectory.size()) ? + parsedDirectory.get(entryIndex + 1).getOffset() : payload.limit(); + } else { + // Calculate end offset by finding the next field in the directory + endOffset = findNextOffsetInRawDirectory(entry.getId()); + } + + if (startOffset < 0 || endOffset < 0 || startOffset > payload.limit() || + endOffset > payload.limit() || startOffset > endOffset) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Invalid field buffer range: start=" + startOffset + ", end=" + endOffset + + ", payloadLimit=" + payload.limit()); + } + var fieldBuffer = payload.duplicate(); fieldBuffer.position(startOffset).limit(endOffset); return fieldBuffer; } + /** + * Find the next field's offset by scanning the raw directory. + * This is used when the directory isn't fully parsed yet. + */ + private int findNextOffsetInRawDirectory(int currentFieldId) throws ImprintException { + ByteBuffer scanBuffer = directoryBuffer.duplicate(); + scanBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // Get directory count + int count = (directoryCount >= 0) ? directoryCount : VarInt.decode(scanBuffer).getValue(); + if (count == 0) { + return payload.limit(); + } + + // Skip past count if we just decoded it + if (directoryCount < 0) { + // VarInt.decode already advanced the position + } else { + VarInt.decode(scanBuffer); // Skip past the count + } + + int directoryStartPos = scanBuffer.position(); + + for (int i = 0; i < count; i++) { + int entryPos = directoryStartPos + (i * Constants.DIR_ENTRY_BYTES); + + // Bounds check + if (entryPos + Constants.DIR_ENTRY_BYTES > scanBuffer.limit()) { + return payload.limit(); + } + + scanBuffer.position(entryPos); + short fieldId = scanBuffer.getShort(); + scanBuffer.get(); // skip type + int offset = scanBuffer.getInt(); + + if (fieldId > currentFieldId) { + return offset; // Found next field's offset + } + } + + return payload.limit(); // No next field, use payload end + } + + /** + * Ensure the directory is fully parsed (thread-safe). + */ + private synchronized void ensureDirectoryParsed() { + if (directoryParsed) { + return; + } + + try { + ByteBuffer parseBuffer = directoryBuffer.duplicate(); + parseBuffer.order(ByteOrder.LITTLE_ENDIAN); + + VarInt.DecodeResult countResult = VarInt.decode(parseBuffer); + int count = countResult.getValue(); + this.directoryCount = count; // Cache the count + + List directory = new ArrayList<>(count); + for (int i = 0; i < count; i++) { + directory.add(deserializeDirectoryEntry(parseBuffer)); + } + + this.parsedDirectory = Collections.unmodifiableList(directory); + this.directoryParsed = true; + } catch (ImprintException e) { + throw new RuntimeException("Failed to parse directory", e); + } + } + + /** + * Creates a directory buffer from parsed directory entries. + * This is used when creating records with pre-parsed directories (e.g., from ImprintWriter). + */ + private ByteBuffer createDirectoryBuffer(List directory) { + try { + int bufferSize = VarInt.encodedLength(directory.size()) + (directory.size() * Constants.DIR_ENTRY_BYTES); + ByteBuffer buffer = ByteBuffer.allocate(bufferSize); + buffer.order(ByteOrder.LITTLE_ENDIAN); + + // Write directory count + VarInt.encode(directory.size(), buffer); + + // Write directory entries + for (DirectoryEntry entry : directory) { + serializeDirectoryEntry(entry, buffer); + } + + buffer.flip(); + return buffer.asReadOnlyBuffer(); + } catch (Exception e) { + // Fallback to empty buffer if creation fails + return ByteBuffer.allocate(0).asReadOnlyBuffer(); + } + } + /** * Serialize this record to a ByteBuffer. * The returned buffer will be array-backed. */ public ByteBuffer serializeToBuffer() { + // Ensure directory is parsed for serialization + ensureDirectoryParsed(); + var buffer = ByteBuffer.allocate(estimateSerializedSize()); buffer.order(ByteOrder.LITTLE_ENDIAN); @@ -101,8 +331,8 @@ public ByteBuffer serializeToBuffer() { serializeHeader(buffer); // Write directory (always present) - VarInt.encode(directory.size(), buffer); - for (var entry : directory) { + VarInt.encode(parsedDirectory.size(), buffer); + for (var entry : parsedDirectory) { serializeDirectoryEntry(entry, buffer); } @@ -117,9 +347,6 @@ public ByteBuffer serializeToBuffer() { /** * Create a fluent builder for constructing ImprintRecord instances. - * - * @param schemaId the schema identifier for this record - * @return a new builder instance */ public static ImprintRecordBuilder builder(SchemaId schemaId) { return new ImprintRecordBuilder(schemaId); @@ -127,10 +354,6 @@ public static ImprintRecordBuilder builder(SchemaId schemaId) { /** * Create a fluent builder for constructing ImprintRecord instances. - * - * @param fieldspaceId the fieldspace identifier - * @param schemaHash the schema hash - * @return a new builder instance */ @SuppressWarnings("unused") public static ImprintRecordBuilder builder(int fieldspaceId, int schemaHash) { @@ -145,7 +368,7 @@ public static ImprintRecord deserialize(byte[] bytes) throws ImprintException { } /** - * Deserialize a record from a ByteBuffer. + * Deserialize a record from a ByteBuffer with lazy directory parsing. * * @param buffer the buffer to deserialize from. Must be array-backed * (buffer.hasArray() == true) for optimal zero-copy performance. @@ -156,37 +379,43 @@ public static ImprintRecord deserialize(ByteBuffer buffer) throws ImprintExcepti // Read header var header = deserializeHeader(buffer); - // Read directory (always present) - var directory = new ArrayList(); + // Read directory count but don't parse entries yet + int directoryStartPos = buffer.position(); VarInt.DecodeResult countResult = VarInt.decode(buffer); int directoryCount = countResult.getValue(); - for (int i = 0; i < directoryCount; i++) { - directory.add(deserializeDirectoryEntry(buffer)); - } + // Calculate directory buffer (includes count + all entries) + int directorySize = countResult.getBytesRead() + (directoryCount * Constants.DIR_ENTRY_BYTES); + buffer.position(directoryStartPos); // Reset to include count in directory buffer + + var directoryBuffer = buffer.slice(); + directoryBuffer.limit(directorySize); + + // Advance buffer past directory + buffer.position(buffer.position() + directorySize); // Read payload as ByteBuffer slice for zero-copy var payload = buffer.slice(); payload.limit(header.getPayloadSize()); - buffer.position(buffer.position() + header.getPayloadSize()); - return new ImprintRecord(header, directory, payload); + return new ImprintRecord(header, directoryBuffer, payload); } /** - * Binary search for field ID in directory without object allocation. + * Binary search for field ID in parsed directory. * Returns the index of the field if found, or a negative value if not found. - * - * @param fieldId the field ID to search for - * @return index if found, or negative insertion point - 1 if not found */ - private int findDirectoryIndex(int fieldId) { + private int findDirectoryIndexInParsed(int fieldId) { + if (!directoryParsed) { + return -1; + } + int low = 0; - int high = directory.size() - 1; + int high = parsedDirectory.size() - 1; while (low <= high) { - int mid = (low + high) >>> 1; // unsigned right shift to avoid overflow - int midFieldId = directory.get(mid).getId(); + int mid = (low + high) >>> 1; + int midFieldId = parsedDirectory.get(mid).getId(); if (midFieldId < fieldId) { low = mid + 1; @@ -201,12 +430,30 @@ private int findDirectoryIndex(int fieldId) { public int estimateSerializedSize() { int size = Constants.HEADER_BYTES; // header - size += VarInt.encodedLength(directory.size()); // directory count - size += directory.size() * Constants.DIR_ENTRY_BYTES; // directory entries + size += VarInt.encodedLength(getDirectoryCount()); // directory count + size += getDirectoryCount() * Constants.DIR_ENTRY_BYTES; // directory entries size += payload.remaining(); // payload return size; } + private int getDirectoryCount() { + if (directoryCount >= 0) { + return directoryCount; + } + if (directoryParsed) { + return parsedDirectory.size(); + } + // Last resort: decode from buffer + try { + ByteBuffer countBuffer = directoryBuffer.duplicate(); + return VarInt.decode(countBuffer).getValue(); + } catch (Exception e) { + return 0; + } + } + + // ===== EXISTING HELPER METHODS (unchanged) ===== + private void serializeHeader(ByteBuffer buffer) { buffer.put(Constants.MAGIC); buffer.put(Constants.VERSION); @@ -287,6 +534,8 @@ private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws Impr } } + // ===== TYPE-SPECIFIC GETTERS (unchanged API, improved performance) ===== + private T getTypedValueOrThrow(int fieldId, TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) throws ImprintException { var value = getValue(fieldId); @@ -308,50 +557,26 @@ private T getTypedValueOrThrow(int fieldId, TypeCode expectedT "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected " + expectedTypeName + "."); } - /** - * Retrieves the boolean value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type BOOL. - */ public boolean getBoolean(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.BOOL, Value.BoolValue.class, "boolean").getValue(); } - /** - * Retrieves the int (int32) value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type INT32. - */ public int getInt32(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.INT32, Value.Int32Value.class, "int32").getValue(); } - /** - * Retrieves the long (int64) value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type INT64. - */ public long getInt64(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.INT64, Value.Int64Value.class, "int64").getValue(); } - /** - * Retrieves the float (float32) value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type FLOAT32. - */ public float getFloat32(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.FLOAT32, Value.Float32Value.class, "float32").getValue(); } - /** - * Retrieves the double (float64) value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type FLOAT64. - */ public double getFloat64(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.FLOAT64, Value.Float64Value.class, "float64").getValue(); } - /** - * Retrieves the String value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type STRING. - */ public String getString(int fieldId) throws ImprintException { var value = getValue(fieldId); @@ -375,11 +600,6 @@ public String getString(int fieldId) throws ImprintException { "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected STRING."); } - /** - * Retrieves the byte array (byte[]) value for the given field ID. - * Note: This may involve a defensive copy depending on the underlying Value type. - * @throws ImprintException if the field is not found, is null, or is not of type BYTES. - */ public byte[] getBytes(int fieldId) throws ImprintException { Value value = getValue(fieldId); @@ -393,46 +613,31 @@ public byte[] getBytes(int fieldId) throws ImprintException { } if (value instanceof Value.BytesValue) { - return ((Value.BytesValue) value).getValue(); // getValue() in BytesValue returns a clone + return ((Value.BytesValue) value).getValue(); } if (value instanceof Value.BytesBufferValue) { - return ((Value.BytesBufferValue) value).getValue(); // getValue() in BytesBufferValue creates a new array + return ((Value.BytesBufferValue) value).getValue(); } throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected BYTES."); } - /** - * Retrieves the List for the given field ID. - * The list itself is a copy; modifications to it will not affect the record. - * @throws ImprintException if the field is not found, is null, or is not of type ARRAY. - */ public List getArray(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.ARRAY, Value.ArrayValue.class, "ARRAY").getValue(); } - /** - * Retrieves the Map for the given field ID. - * The map itself is a copy; modifications to it will not affect the record. - * @throws ImprintException if the field is not found, is null, or is not of type MAP. - */ public Map getMap(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.MAP, Value.MapValue.class, "MAP").getValue(); } - /** - * Retrieves the nested ImprintRecord for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type ROW. - */ public ImprintRecord getRow(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); } @Override public String toString() { - return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", - header, directory.size(), payload.remaining()); + return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d, directoryParsed=%s}", + header, getDirectoryCount(), payload.remaining(), directoryParsed); } - } \ No newline at end of file diff --git a/src/test/java/com/imprint/IntegrationTest.java b/src/test/java/com/imprint/IntegrationTest.java index 76efcc5..898adfb 100644 --- a/src/test/java/com/imprint/IntegrationTest.java +++ b/src/test/java/com/imprint/IntegrationTest.java @@ -16,16 +16,11 @@ */ public class IntegrationTest { - // Removed main method, individual methods are now JUnit tests. - @Test @DisplayName("Basic functionality: create, serialize, deserialize primitive types") void testBasicFunctionality() throws ImprintException { - System.out.println("Testing basic functionality..."); // Keep for now if desired, or remove - SchemaId schemaId = new SchemaId(1, 0xdeadbeef); - // Using ImprintRecordBuilder for consistency with other tests - ImprintRecord record = ImprintRecord.builder(schemaId) + var record = ImprintRecord.builder(schemaId) .field(1, 42) .field(2, "testing java imprint spec") .field(3, true) @@ -33,7 +28,7 @@ void testBasicFunctionality() throws ImprintException { .field(5, new byte[]{1, 2, 3, 4}) .build(); - // Verify we can read values back using ergonomic getters + // Verify we can read values back using type getters assertEquals(42, record.getInt32(1)); assertEquals("testing java imprint spec", record.getString(2)); assertTrue(record.getBoolean(3)); @@ -47,7 +42,7 @@ void testBasicFunctionality() throws ImprintException { var buffer = record.serializeToBuffer(); byte[] serialized = new byte[buffer.remaining()]; buffer.get(serialized); - ImprintRecord deserialized = ImprintRecord.deserialize(serialized); + var deserialized = ImprintRecord.deserialize(serialized); assertEquals(42, deserialized.getInt32(1)); assertEquals("testing java imprint spec", deserialized.getString(2)); @@ -61,8 +56,6 @@ void testBasicFunctionality() throws ImprintException { @Test @DisplayName("Collections: create, serialize, deserialize arrays and maps") void testArraysAndMaps() throws ImprintException { - System.out.println("Testing arrays and maps..."); - SchemaId schemaId = new SchemaId(2, 0xcafebabe); // Create an array using builder for convenience @@ -72,8 +65,7 @@ void testArraysAndMaps() throws ImprintException { Map sourceStringToIntMap = new HashMap<>(); sourceStringToIntMap.put("one", 1); sourceStringToIntMap.put("two", 2); - - ImprintRecord record = ImprintRecord.builder(schemaId) + var record = ImprintRecord.builder(schemaId) .field(1, sourceIntList) // Builder converts List to List .field(2, sourceStringToIntMap) // Builder converts Map .build(); @@ -107,14 +99,14 @@ void testArraysAndMaps() throws ImprintException { void testNestedRecords() throws ImprintException { System.out.println("Testing nested records..."); - SchemaId innerSchemaId = new SchemaId(3, 0x12345678); - ImprintRecord innerRecord = ImprintRecord.builder(innerSchemaId) + var innerSchemaId = new SchemaId(3, 0x12345678); + var innerRecord = ImprintRecord.builder(innerSchemaId) .field(1, "nested data") .field(2, 9876543210L) .build(); - SchemaId outerSchemaId = new SchemaId(4, 0x87654321); - ImprintRecord outerRecord = ImprintRecord.builder(outerSchemaId) + var outerSchemaId = new SchemaId(4, 0x87654321); + var outerRecord = ImprintRecord.builder(outerSchemaId) .field(1, innerRecord) // Builder handles ImprintRecord directly .field(2, "outer data") .build(); @@ -122,12 +114,12 @@ void testNestedRecords() throws ImprintException { var buffer = outerRecord.serializeToBuffer(); byte[] serialized = new byte[buffer.remaining()]; buffer.get(serialized); - ImprintRecord deserialized = ImprintRecord.deserialize(serialized); + var deserialized = ImprintRecord.deserialize(serialized); assertEquals(4, deserialized.getHeader().getSchemaId().getFieldSpaceId()); assertEquals("outer data", deserialized.getString(2)); - ImprintRecord nestedDeserialized = deserialized.getRow(1); + var nestedDeserialized = deserialized.getRow(1); assertNotNull(nestedDeserialized); assertEquals(3, nestedDeserialized.getHeader().getSchemaId().getFieldSpaceId()); assertEquals("nested data", nestedDeserialized.getString(1)); @@ -136,8 +128,6 @@ void testNestedRecords() throws ImprintException { System.out.println("✓ Nested records test passed"); } - // --- Start of broken down tests for ErgonomicGettersAndNestedTypes --- - private ImprintRecord createTestRecordForGetters() throws ImprintException { SchemaId schemaId = new SchemaId(5, 0xabcdef01); @@ -174,8 +164,8 @@ private ImprintRecord serializeAndDeserialize(ImprintRecord record) throws Impri @Test @DisplayName("Type Getters: Basic primitive and String types") void testBasicTypeGetters() throws ImprintException { - ImprintRecord originalRecord = createTestRecordForGetters(); - ImprintRecord record = serializeAndDeserialize(originalRecord); + var originalRecord = createTestRecordForGetters(); + var record = serializeAndDeserialize(originalRecord); assertTrue(record.getBoolean(1)); assertEquals(12345, record.getInt32(2)); @@ -189,8 +179,8 @@ void testBasicTypeGetters() throws ImprintException { @Test @DisplayName("Type Getters: Array of Arrays") void testTypeGetterArrayOfArrays() throws ImprintException { - ImprintRecord originalRecord = createTestRecordForGetters(); - ImprintRecord record = serializeAndDeserialize(originalRecord); + var originalRecord = createTestRecordForGetters(); + var record = serializeAndDeserialize(originalRecord); List arrOfArr = record.getArray(9); assertNotNull(arrOfArr); @@ -211,8 +201,8 @@ void testTypeGetterArrayOfArrays() throws ImprintException { @Test @DisplayName("Type Getters: Map with Array Value") void testTypeGetterMapWithArrayValue() throws ImprintException { - ImprintRecord originalRecord = createTestRecordForGetters(); - ImprintRecord record = serializeAndDeserialize(originalRecord); + var originalRecord = createTestRecordForGetters(); + var record = serializeAndDeserialize(originalRecord); Map mapWithArr = record.getMap(10); assertNotNull(mapWithArr); @@ -227,8 +217,8 @@ void testTypeGetterMapWithArrayValue() throws ImprintException { @Test @DisplayName("Type Getters: Empty Collections (Array and Map)") void testErgonomicGettersEmptyCollections() throws ImprintException { - ImprintRecord originalRecord = createTestRecordForGetters(); - ImprintRecord record = serializeAndDeserialize(originalRecord); + var originalRecord = createTestRecordForGetters(); + var record = serializeAndDeserialize(originalRecord); List emptyArr = record.getArray(11); assertNotNull(emptyArr); @@ -242,8 +232,8 @@ void testErgonomicGettersEmptyCollections() throws ImprintException { @Test @DisplayName("Type Getters: Exception for Field Not Found") void testErgonomicGetterExceptionFieldNotFound() throws ImprintException { - ImprintRecord originalRecord = createTestRecordForGetters(); - ImprintRecord record = serializeAndDeserialize(originalRecord); + var originalRecord = createTestRecordForGetters(); + var record = serializeAndDeserialize(originalRecord); ImprintException ex = assertThrows(ImprintException.class, () -> record.getInt32(99)); assertEquals(ErrorType.FIELD_NOT_FOUND, ex.getErrorType()); @@ -252,8 +242,8 @@ void testErgonomicGetterExceptionFieldNotFound() throws ImprintException { @Test @DisplayName("Type Getters: Exception for Null Field accessed as primitive") void testErgonomicGetterExceptionNullField() throws ImprintException { - ImprintRecord originalRecord = createTestRecordForGetters(); - ImprintRecord record = serializeAndDeserialize(originalRecord); + var originalRecord = createTestRecordForGetters(); + var record = serializeAndDeserialize(originalRecord); ImprintException ex = assertThrows(ImprintException.class, () -> record.getString(8)); assertEquals(ErrorType.TYPE_MISMATCH, ex.getErrorType()); // getString throws TYPE_MISMATCH for null @@ -269,8 +259,8 @@ void testErgonomicGetterExceptionNullField() throws ImprintException { @Test @DisplayName("Type Getters: Exception for Type Mismatch") void testErgonomicGetterExceptionTypeMismatch() throws ImprintException { - ImprintRecord originalRecord = createTestRecordForGetters(); - ImprintRecord record = serializeAndDeserialize(originalRecord); + var originalRecord = createTestRecordForGetters(); + var record = serializeAndDeserialize(originalRecord); ImprintException ex = assertThrows(ImprintException.class, () -> record.getInt32(6)); // Field 6 is a String assertEquals(ErrorType.TYPE_MISMATCH, ex.getErrorType()); @@ -279,20 +269,20 @@ void testErgonomicGetterExceptionTypeMismatch() throws ImprintException { @Test @DisplayName("Type Getters: Row (Nested Record)") void testErgonomicGetterRow() throws ImprintException { - SchemaId innerSchemaId = new SchemaId(6, 0x12345678); - ImprintRecord innerRecord = ImprintRecord.builder(innerSchemaId) + var innerSchemaId = new SchemaId(6, 0x12345678); + var innerRecord = ImprintRecord.builder(innerSchemaId) .field(101, "nested string") .field(102, 999L) .build(); - ImprintRecord recordWithRow = ImprintRecord.builder(new SchemaId(7, 0x87654321)) + var recordWithRow = ImprintRecord.builder(new SchemaId(7, 0x87654321)) .field(201, innerRecord) // Using builder to add row .field(202, "outer field") .build(); - ImprintRecord deserializedWithRow = serializeAndDeserialize(recordWithRow); + var deserializedWithRow = serializeAndDeserialize(recordWithRow); - ImprintRecord retrievedRow = deserializedWithRow.getRow(201); + var retrievedRow = deserializedWithRow.getRow(201); assertNotNull(retrievedRow); assertEquals(innerSchemaId, retrievedRow.getHeader().getSchemaId()); assertEquals("nested string", retrievedRow.getString(101)); diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index d48c1aa..3b9f371 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -4,6 +4,7 @@ import com.imprint.core.ImprintWriter; import com.imprint.core.SchemaId; import com.imprint.types.Value; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import java.util.Random; @@ -26,7 +27,7 @@ * - String operations and UTF-8 encoding * - ByteBuffer operations */ -//@Disabled("Enable manually for profiling") +@Disabled("Enable manually for profiling") public class ProfilerTest { private static final int ITERATIONS = 1_000_000; From c3cf9a2a77c153b09b93a8244d4800f6ee2ebb9c Mon Sep 17 00:00:00 2001 From: Brent Johnson Date: Thu, 5 Jun 2025 14:57:56 -0400 Subject: [PATCH 22/53] Merge Comparisons into dev branch (#8) * Full comprehensive comparison tests with a lot of other libraries + some micro-optimizations added that were found along the way * replace deprecated gradle methods with latest --------- Co-authored-by: expand3d <> --- .../benchmark/ComparisonBenchmark.java | 68 +-- .../java/com/imprint/core/ImprintRecord.java | 405 +++++------------- .../com/imprint/profile/ProfilerTest.java | 3 +- 3 files changed, 106 insertions(+), 370 deletions(-) diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index ce2fbcb..49260b1 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -133,34 +133,14 @@ public void serializeFlatBuffers(Blackhole bh) { bh.consume(result); } - // ===== PARTIAL DESERIALIZATION (SETUP ONLY) ===== -// These benchmarks measure the cost of preparing a record for field access, -// not the cost of accessing the actual data. This is important because -// -// 1. Imprint: Only parses header + stores raw directory bytes -// 2. FlatBuffers: Only wraps the buffer with minimal validation -// 3. Others (eager): Parse and construct all field objects upfront -// -// This comparison shows the advantage of lazy loading approaches when you -// only need to access a subset of fields. In real streaming workloads, -// records are often filtered/routed based on just a few key fields. -// -// For a fair "full deserialization" comparison, see FULL DESERIALIZATION BENCHMARKS. - - @Benchmark - public void deserializeSetupImprint(Blackhole bh) throws Exception { - ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); - bh.consume(result); - } + // ===== DESERIALIZATION BENCHMARKS ===== @Benchmark - public void deserializeSetupFlatBuffers(Blackhole bh) { - TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); + public void deserializeImprint(Blackhole bh) throws Exception { + ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); bh.consume(result); } - // ===== FULL DESERIALIZATION BENCHMARKS ===== - @Benchmark public void deserializeJacksonJson(Blackhole bh) throws Exception { TestRecord result = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); @@ -193,53 +173,15 @@ public void deserializeProtobuf(Blackhole bh) throws Exception { bh.consume(result); } - @Benchmark - public void deserializeImprint(Blackhole bh) throws Exception { - ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); - // Access all fields to force full deserialization - result.getInt32(1); // id - result.getString(2); // name - result.getFloat64(3); // price - result.getBoolean(4); // active - result.getString(5); // category - result.getArray(6); // tags - result.getMap(7); // metadata - for (int i = 8; i < 21; i++) { - result.getString(i); // extraData fields - } - - bh.consume(result); - } - @Benchmark public void deserializeFlatBuffers(Blackhole bh) { TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - - // Access all fields - result.id(); - result.name(); - result.price(); - result.active(); - result.category(); - // Access all tags - for (int i = 0; i < result.tagsLength(); i++) { - result.tags(i); - } - // Access all metadata - for (int i = 0; i < result.metadataKeysLength(); i++) { - result.metadataKeys(i); - result.metadataValues(i); - } - // Access all extra data - for (int i = 0; i < result.extraDataLength(); i++) { - result.extraData(i); - } - bh.consume(result); } // ===== FIELD ACCESS BENCHMARKS ===== - // Tests accessing a single field near the end of a record + // Tests accessing a single field near the end of a large record + // This showcases Imprint's O(1) directory lookup vs sequential deserialization @Benchmark public void singleFieldAccessImprint(Blackhole bh) throws Exception { diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index da6b6e0..2291550 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -17,10 +17,6 @@ * An Imprint record containing a header, field directory, and payload. * Uses ByteBuffer for zero-copy operations to achieve low latency. * - *

This implementation uses lazy directory parsing for optimal single field access performance. - * The directory is only parsed when needed, and binary search is performed directly on raw bytes - * when possible.

- * *

Performance Note: All ByteBuffers should be array-backed * (hasArray() == true) for optimal zero-copy performance. Direct buffers * may cause performance degradation.

@@ -28,64 +24,37 @@ @Getter public final class ImprintRecord { private final Header header; - private final ByteBuffer directoryBuffer; // Raw directory bytes + private final List directory; private final ByteBuffer payload; // Read-only view for zero-copy - // Lazy-loaded directory state - private List parsedDirectory; - private boolean directoryParsed = false; - - // Cache for parsed directory count to avoid repeated VarInt decoding - private int directoryCount = -1; - /** - * Creates a new ImprintRecord with lazy directory parsing. + * Creates a new ImprintRecord. * - * @param header the record header - * @param directoryBuffer raw directory bytes (including count) * @param payload the payload buffer. Should be array-backed for optimal performance. */ - private ImprintRecord(Header header, ByteBuffer directoryBuffer, ByteBuffer payload) { + public ImprintRecord(Header header, List directory, ByteBuffer payload) { this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.directoryBuffer = directoryBuffer.asReadOnlyBuffer(); + this.directory = Collections.unmodifiableList(Objects.requireNonNull(directory, "Directory cannot be null")); this.payload = payload.asReadOnlyBuffer(); // Zero-copy read-only view } - /** - * Creates a new ImprintRecord with pre-parsed directory (used by ImprintWriter). - * This constructor is used when the directory is already known and parsed. - * - * @param header the record header - * @param directory the parsed directory entries - * @param payload the payload buffer. Should be array-backed for optimal performance. - */ - ImprintRecord(Header header, List directory, ByteBuffer payload) { - this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.parsedDirectory = Collections.unmodifiableList(Objects.requireNonNull(directory, "Directory cannot be null")); - this.directoryParsed = true; - this.directoryCount = directory.size(); - this.payload = payload.asReadOnlyBuffer(); - - // Create directory buffer for serialization compatibility - this.directoryBuffer = createDirectoryBuffer(directory); - } - /** * Get a value by field ID, deserializing it on demand. * Returns null if the field is not found. * Note: If the field exists and is an explicit NULL type, this will return Value.NullValue.INSTANCE - * - *

Performance Note: Accessing fields one-by-one is optimized for single field access. - * If you need to access many fields from the same record, consider calling getDirectory() first - * to parse the full directory once, then access fields normally.

*/ public Value getValue(int fieldId) throws ImprintException { - DirectoryEntry entry = findDirectoryEntry(fieldId); - if (entry == null) { + var fieldBuffer = getFieldBuffer(fieldId); + if (fieldBuffer == null) { return null; } - return deserializeValue(entry.getTypeCode(), getFieldBufferFromEntry(entry)); + int directoryIndex = findDirectoryIndex(fieldId); + if (directoryIndex < 0) { + throw new ImprintException(ErrorType.INTERNAL_ERROR, "Field ID " + fieldId + " found buffer but not in directory."); + } + var entry = directory.get(directoryIndex); + return deserializeValue(entry.getTypeCode(), fieldBuffer); } /** @@ -93,237 +62,38 @@ public Value getValue(int fieldId) throws ImprintException { * Returns a zero-copy ByteBuffer view, or null if field not found. */ public ByteBuffer getRawBytes(int fieldId) { - try { - DirectoryEntry entry = findDirectoryEntry(fieldId); - if (entry == null) { - return null; - } - - return getFieldBufferFromEntry(entry).asReadOnlyBuffer(); - } catch (ImprintException e) { - return null; - } - } - - /** - * Find a directory entry for the given field ID. - * Uses the most efficient method based on current state. - */ - private DirectoryEntry findDirectoryEntry(int fieldId) throws ImprintException { - if (directoryParsed) { - // Use parsed directory - int index = findDirectoryIndexInParsed(fieldId); - return index >= 0 ? parsedDirectory.get(index) : null; - } else { - // Use fast binary search on raw bytes - return findFieldEntryFast(fieldId); - } - } - - /** - * Fast binary search directly on raw directory bytes. - * This avoids parsing the entire directory for single field access. - */ - private DirectoryEntry findFieldEntryFast(int fieldId) throws ImprintException { - ByteBuffer searchBuffer = directoryBuffer.duplicate(); - searchBuffer.order(ByteOrder.LITTLE_ENDIAN); - - // Decode directory count (cache it to avoid repeated decoding) - if (directoryCount < 0) { - directoryCount = VarInt.decode(searchBuffer).getValue(); - } else { - // Skip past the VarInt count - VarInt.decode(searchBuffer); - } - - if (directoryCount == 0) { - return null; - } - - // Now searchBuffer.position() points to the first directory entry - int directoryStartPos = searchBuffer.position(); - - int low = 0; - int high = directoryCount - 1; - - while (low <= high) { - int mid = (low + high) >>> 1; - - // Calculate position of mid entry - int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); - - // Bounds check - if (entryPos + Constants.DIR_ENTRY_BYTES > searchBuffer.limit()) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Directory entry at position " + entryPos + " exceeds buffer limit " + searchBuffer.limit()); - } - - searchBuffer.position(entryPos); - short midFieldId = searchBuffer.getShort(); - - if (midFieldId < fieldId) { - low = mid + 1; - } else if (midFieldId > fieldId) { - high = mid - 1; - } else { - // Found it - read the complete entry - searchBuffer.position(entryPos); - return deserializeDirectoryEntry(searchBuffer); - } - } - - return null; // Not found + var fieldBuffer = getFieldBuffer(fieldId); + return fieldBuffer != null ? fieldBuffer.asReadOnlyBuffer() : null; } /** - * Get the directory (parsing it if necessary). - * This maintains backward compatibility with existing code. - * - *

Performance Tip: If you plan to access many fields from this record, - * call this method first to parse the directory once, then use the field accessor methods. - * This is more efficient than accessing fields one-by-one when you need multiple fields.

+ * Get a ByteBuffer view of a field's data. + * Returns null if the field is not found. */ - public List getDirectory() { - ensureDirectoryParsed(); - return parsedDirectory; - } + private ByteBuffer getFieldBuffer(int fieldId) { + int index = findDirectoryIndex(fieldId); + if (index < 0) return null; - /** - * Get a ByteBuffer view of a field's data from a DirectoryEntry. - */ - private ByteBuffer getFieldBufferFromEntry(DirectoryEntry entry) throws ImprintException { + var entry = directory.get(index); int startOffset = entry.getOffset(); + int endOffset = (index + 1 < directory.size()) ? + directory.get(index + 1).getOffset() : payload.limit(); - // Find end offset - int endOffset; - if (directoryParsed) { - // Use parsed directory to find next entry - int entryIndex = findDirectoryIndexInParsed(entry.getId()); - endOffset = (entryIndex + 1 < parsedDirectory.size()) ? - parsedDirectory.get(entryIndex + 1).getOffset() : payload.limit(); - } else { - // Calculate end offset by finding the next field in the directory - endOffset = findNextOffsetInRawDirectory(entry.getId()); - } - - if (startOffset < 0 || endOffset < 0 || startOffset > payload.limit() || - endOffset > payload.limit() || startOffset > endOffset) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Invalid field buffer range: start=" + startOffset + ", end=" + endOffset + - ", payloadLimit=" + payload.limit()); + if (startOffset > payload.limit() || endOffset > payload.limit() || startOffset > endOffset) { + return null; } + // OPTIMIZATION: Single allocation instead of duplicate + slice var fieldBuffer = payload.duplicate(); fieldBuffer.position(startOffset).limit(endOffset); return fieldBuffer; } - /** - * Find the next field's offset by scanning the raw directory. - * This is used when the directory isn't fully parsed yet. - */ - private int findNextOffsetInRawDirectory(int currentFieldId) throws ImprintException { - ByteBuffer scanBuffer = directoryBuffer.duplicate(); - scanBuffer.order(ByteOrder.LITTLE_ENDIAN); - - // Get directory count - int count = (directoryCount >= 0) ? directoryCount : VarInt.decode(scanBuffer).getValue(); - if (count == 0) { - return payload.limit(); - } - - // Skip past count if we just decoded it - if (directoryCount < 0) { - // VarInt.decode already advanced the position - } else { - VarInt.decode(scanBuffer); // Skip past the count - } - - int directoryStartPos = scanBuffer.position(); - - for (int i = 0; i < count; i++) { - int entryPos = directoryStartPos + (i * Constants.DIR_ENTRY_BYTES); - - // Bounds check - if (entryPos + Constants.DIR_ENTRY_BYTES > scanBuffer.limit()) { - return payload.limit(); - } - - scanBuffer.position(entryPos); - short fieldId = scanBuffer.getShort(); - scanBuffer.get(); // skip type - int offset = scanBuffer.getInt(); - - if (fieldId > currentFieldId) { - return offset; // Found next field's offset - } - } - - return payload.limit(); // No next field, use payload end - } - - /** - * Ensure the directory is fully parsed (thread-safe). - */ - private synchronized void ensureDirectoryParsed() { - if (directoryParsed) { - return; - } - - try { - ByteBuffer parseBuffer = directoryBuffer.duplicate(); - parseBuffer.order(ByteOrder.LITTLE_ENDIAN); - - VarInt.DecodeResult countResult = VarInt.decode(parseBuffer); - int count = countResult.getValue(); - this.directoryCount = count; // Cache the count - - List directory = new ArrayList<>(count); - for (int i = 0; i < count; i++) { - directory.add(deserializeDirectoryEntry(parseBuffer)); - } - - this.parsedDirectory = Collections.unmodifiableList(directory); - this.directoryParsed = true; - } catch (ImprintException e) { - throw new RuntimeException("Failed to parse directory", e); - } - } - - /** - * Creates a directory buffer from parsed directory entries. - * This is used when creating records with pre-parsed directories (e.g., from ImprintWriter). - */ - private ByteBuffer createDirectoryBuffer(List directory) { - try { - int bufferSize = VarInt.encodedLength(directory.size()) + (directory.size() * Constants.DIR_ENTRY_BYTES); - ByteBuffer buffer = ByteBuffer.allocate(bufferSize); - buffer.order(ByteOrder.LITTLE_ENDIAN); - - // Write directory count - VarInt.encode(directory.size(), buffer); - - // Write directory entries - for (DirectoryEntry entry : directory) { - serializeDirectoryEntry(entry, buffer); - } - - buffer.flip(); - return buffer.asReadOnlyBuffer(); - } catch (Exception e) { - // Fallback to empty buffer if creation fails - return ByteBuffer.allocate(0).asReadOnlyBuffer(); - } - } - /** * Serialize this record to a ByteBuffer. * The returned buffer will be array-backed. */ public ByteBuffer serializeToBuffer() { - // Ensure directory is parsed for serialization - ensureDirectoryParsed(); - var buffer = ByteBuffer.allocate(estimateSerializedSize()); buffer.order(ByteOrder.LITTLE_ENDIAN); @@ -331,8 +101,8 @@ public ByteBuffer serializeToBuffer() { serializeHeader(buffer); // Write directory (always present) - VarInt.encode(parsedDirectory.size(), buffer); - for (var entry : parsedDirectory) { + VarInt.encode(directory.size(), buffer); + for (var entry : directory) { serializeDirectoryEntry(entry, buffer); } @@ -347,6 +117,9 @@ public ByteBuffer serializeToBuffer() { /** * Create a fluent builder for constructing ImprintRecord instances. + * + * @param schemaId the schema identifier for this record + * @return a new builder instance */ public static ImprintRecordBuilder builder(SchemaId schemaId) { return new ImprintRecordBuilder(schemaId); @@ -354,6 +127,10 @@ public static ImprintRecordBuilder builder(SchemaId schemaId) { /** * Create a fluent builder for constructing ImprintRecord instances. + * + * @param fieldspaceId the fieldspace identifier + * @param schemaHash the schema hash + * @return a new builder instance */ @SuppressWarnings("unused") public static ImprintRecordBuilder builder(int fieldspaceId, int schemaHash) { @@ -368,7 +145,7 @@ public static ImprintRecord deserialize(byte[] bytes) throws ImprintException { } /** - * Deserialize a record from a ByteBuffer with lazy directory parsing. + * Deserialize a record from a ByteBuffer. * * @param buffer the buffer to deserialize from. Must be array-backed * (buffer.hasArray() == true) for optimal zero-copy performance. @@ -379,43 +156,37 @@ public static ImprintRecord deserialize(ByteBuffer buffer) throws ImprintExcepti // Read header var header = deserializeHeader(buffer); - // Read directory count but don't parse entries yet - int directoryStartPos = buffer.position(); + // Read directory (always present) + var directory = new ArrayList(); VarInt.DecodeResult countResult = VarInt.decode(buffer); int directoryCount = countResult.getValue(); - // Calculate directory buffer (includes count + all entries) - int directorySize = countResult.getBytesRead() + (directoryCount * Constants.DIR_ENTRY_BYTES); - buffer.position(directoryStartPos); // Reset to include count in directory buffer - - var directoryBuffer = buffer.slice(); - directoryBuffer.limit(directorySize); - - // Advance buffer past directory - buffer.position(buffer.position() + directorySize); + for (int i = 0; i < directoryCount; i++) { + directory.add(deserializeDirectoryEntry(buffer)); + } // Read payload as ByteBuffer slice for zero-copy var payload = buffer.slice(); payload.limit(header.getPayloadSize()); + buffer.position(buffer.position() + header.getPayloadSize()); - return new ImprintRecord(header, directoryBuffer, payload); + return new ImprintRecord(header, directory, payload); } /** - * Binary search for field ID in parsed directory. + * Binary search for field ID in directory without object allocation. * Returns the index of the field if found, or a negative value if not found. + * + * @param fieldId the field ID to search for + * @return index if found, or negative insertion point - 1 if not found */ - private int findDirectoryIndexInParsed(int fieldId) { - if (!directoryParsed) { - return -1; - } - + private int findDirectoryIndex(int fieldId) { int low = 0; - int high = parsedDirectory.size() - 1; + int high = directory.size() - 1; while (low <= high) { - int mid = (low + high) >>> 1; - int midFieldId = parsedDirectory.get(mid).getId(); + int mid = (low + high) >>> 1; // unsigned right shift to avoid overflow + int midFieldId = directory.get(mid).getId(); if (midFieldId < fieldId) { low = mid + 1; @@ -430,30 +201,12 @@ private int findDirectoryIndexInParsed(int fieldId) { public int estimateSerializedSize() { int size = Constants.HEADER_BYTES; // header - size += VarInt.encodedLength(getDirectoryCount()); // directory count - size += getDirectoryCount() * Constants.DIR_ENTRY_BYTES; // directory entries + size += VarInt.encodedLength(directory.size()); // directory count + size += directory.size() * Constants.DIR_ENTRY_BYTES; // directory entries size += payload.remaining(); // payload return size; } - private int getDirectoryCount() { - if (directoryCount >= 0) { - return directoryCount; - } - if (directoryParsed) { - return parsedDirectory.size(); - } - // Last resort: decode from buffer - try { - ByteBuffer countBuffer = directoryBuffer.duplicate(); - return VarInt.decode(countBuffer).getValue(); - } catch (Exception e) { - return 0; - } - } - - // ===== EXISTING HELPER METHODS (unchanged) ===== - private void serializeHeader(ByteBuffer buffer) { buffer.put(Constants.MAGIC); buffer.put(Constants.VERSION); @@ -534,8 +287,6 @@ private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws Impr } } - // ===== TYPE-SPECIFIC GETTERS (unchanged API, improved performance) ===== - private T getTypedValueOrThrow(int fieldId, TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) throws ImprintException { var value = getValue(fieldId); @@ -557,26 +308,50 @@ private T getTypedValueOrThrow(int fieldId, TypeCode expectedT "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected " + expectedTypeName + "."); } + /** + * Retrieves the boolean value for the given field ID. + * @throws ImprintException if the field is not found, is null, or is not of type BOOL. + */ public boolean getBoolean(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.BOOL, Value.BoolValue.class, "boolean").getValue(); } + /** + * Retrieves the int (int32) value for the given field ID. + * @throws ImprintException if the field is not found, is null, or is not of type INT32. + */ public int getInt32(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.INT32, Value.Int32Value.class, "int32").getValue(); } + /** + * Retrieves the long (int64) value for the given field ID. + * @throws ImprintException if the field is not found, is null, or is not of type INT64. + */ public long getInt64(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.INT64, Value.Int64Value.class, "int64").getValue(); } + /** + * Retrieves the float (float32) value for the given field ID. + * @throws ImprintException if the field is not found, is null, or is not of type FLOAT32. + */ public float getFloat32(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.FLOAT32, Value.Float32Value.class, "float32").getValue(); } + /** + * Retrieves the double (float64) value for the given field ID. + * @throws ImprintException if the field is not found, is null, or is not of type FLOAT64. + */ public double getFloat64(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.FLOAT64, Value.Float64Value.class, "float64").getValue(); } + /** + * Retrieves the String value for the given field ID. + * @throws ImprintException if the field is not found, is null, or is not of type STRING. + */ public String getString(int fieldId) throws ImprintException { var value = getValue(fieldId); @@ -600,6 +375,11 @@ public String getString(int fieldId) throws ImprintException { "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected STRING."); } + /** + * Retrieves the byte array (byte[]) value for the given field ID. + * Note: This may involve a defensive copy depending on the underlying Value type. + * @throws ImprintException if the field is not found, is null, or is not of type BYTES. + */ public byte[] getBytes(int fieldId) throws ImprintException { Value value = getValue(fieldId); @@ -613,31 +393,46 @@ public byte[] getBytes(int fieldId) throws ImprintException { } if (value instanceof Value.BytesValue) { - return ((Value.BytesValue) value).getValue(); + return ((Value.BytesValue) value).getValue(); // getValue() in BytesValue returns a clone } if (value instanceof Value.BytesBufferValue) { - return ((Value.BytesBufferValue) value).getValue(); + return ((Value.BytesBufferValue) value).getValue(); // getValue() in BytesBufferValue creates a new array } throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected BYTES."); } + /** + * Retrieves the List for the given field ID. + * The list itself is a copy; modifications to it will not affect the record. + * @throws ImprintException if the field is not found, is null, or is not of type ARRAY. + */ public List getArray(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.ARRAY, Value.ArrayValue.class, "ARRAY").getValue(); } + /** + * Retrieves the Map for the given field ID. + * The map itself is a copy; modifications to it will not affect the record. + * @throws ImprintException if the field is not found, is null, or is not of type MAP. + */ public Map getMap(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.MAP, Value.MapValue.class, "MAP").getValue(); } + /** + * Retrieves the nested ImprintRecord for the given field ID. + * @throws ImprintException if the field is not found, is null, or is not of type ROW. + */ public ImprintRecord getRow(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); } @Override public String toString() { - return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d, directoryParsed=%s}", - header, getDirectoryCount(), payload.remaining(), directoryParsed); + return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", + header, directory.size(), payload.remaining()); } + } \ No newline at end of file diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index 3b9f371..d48c1aa 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -4,7 +4,6 @@ import com.imprint.core.ImprintWriter; import com.imprint.core.SchemaId; import com.imprint.types.Value; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import java.util.Random; @@ -27,7 +26,7 @@ * - String operations and UTF-8 encoding * - ByteBuffer operations */ -@Disabled("Enable manually for profiling") +//@Disabled("Enable manually for profiling") public class ProfilerTest { private static final int ITERATIONS = 1_000_000; From b53479ae392717c67d60eec1361935dfbb40db3f Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 5 Jun 2025 15:13:59 -0400 Subject: [PATCH 23/53] minor cleanup --- .../benchmark/ComparisonBenchmark.java | 21 ++++++++----------- .../java/com/imprint/types/TypeHandler.java | 10 ++++----- src/main/java/com/imprint/types/Value.java | 16 +++++++------- 3 files changed, 21 insertions(+), 26 deletions(-) diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index 49260b1..a7ffd3c 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -10,6 +10,7 @@ import com.imprint.core.SchemaId; import com.imprint.types.MapKey; import com.imprint.types.Value; +import lombok.NoArgsConstructor; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; @@ -90,7 +91,6 @@ public void setup() throws Exception { } // ===== SERIALIZATION BENCHMARKS ===== - @Benchmark public void serializeImprint(Blackhole bh) throws Exception { ByteBuffer result = serializeWithImprint(testData); @@ -134,7 +134,6 @@ public void serializeFlatBuffers(Blackhole bh) { } // ===== DESERIALIZATION BENCHMARKS ===== - @Benchmark public void deserializeImprint(Blackhole bh) throws Exception { ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); @@ -181,8 +180,6 @@ public void deserializeFlatBuffers(Blackhole bh) { // ===== FIELD ACCESS BENCHMARKS ===== // Tests accessing a single field near the end of a large record - // This showcases Imprint's O(1) directory lookup vs sequential deserialization - @Benchmark public void singleFieldAccessImprint(Blackhole bh) throws Exception { ImprintRecord record = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); @@ -213,19 +210,19 @@ public void singleFieldAccessMessagePack(Blackhole bh) throws Exception { @Benchmark public void singleFieldAccessAvro(Blackhole bh) throws Exception { GenericRecord record = deserializeWithAvro(avroBytes); - bh.consume(record.get("extraData4")); // Accessing field near end + bh.consume(record.get("extraData4")); } @Benchmark public void singleFieldAccessProtobuf(Blackhole bh) throws Exception { TestRecordProto.TestRecord record = TestRecordProto.TestRecord.parseFrom(protobufBytes); - bh.consume(record.getExtraData(4)); // Accessing field near end + bh.consume(record.getExtraData(4)); } @Benchmark public void singleFieldAccessFlatBuffers(Blackhole bh) { TestRecordFB record = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - bh.consume(record.extraData(4)); // Accessing field near end - zero copy! + bh.consume(record.extraData(4)); } // ===== SIZE COMPARISON ===== @@ -694,8 +691,8 @@ private ByteBuffer mergeFlatBuffersRecords(TestRecordFB first, TestRecordFB seco FlatBufferBuilder builder = new FlatBufferBuilder(1024); // Use second record's values if they exist, otherwise first record's values - String name = second.name() != null && !second.name().isEmpty() ? second.name() : first.name(); - String category = second.category() != null && !second.category().isEmpty() ? second.category() : first.category(); + String name = second.name() != null && !Objects.requireNonNull(second.name()).isEmpty() ? second.name() : first.name(); + String category = second.category() != null && !Objects.requireNonNull(second.category()).isEmpty() ? second.category() : first.category(); double price = second.price() != 0.0 ? second.price() : first.price(); boolean active = second.active(); // Use second's boolean value int id = first.id(); // Keep first record's ID @@ -809,6 +806,7 @@ var record = new TestRecord(); } // Test data class for other serialization libraries + @NoArgsConstructor public static class TestRecord { public int id; public String name; @@ -817,8 +815,7 @@ public static class TestRecord { public String category; public List tags = new ArrayList<>(); public Map metadata = new HashMap<>(); - public List extraData = new ArrayList<>(); // Fields 8-20 for large record test - - public TestRecord() {} // Required for deserialization + // Fields 8-20 for large record test + public List extraData = new ArrayList<>(); } } \ No newline at end of file diff --git a/src/main/java/com/imprint/types/TypeHandler.java b/src/main/java/com/imprint/types/TypeHandler.java index e58c355..dce6973 100644 --- a/src/main/java/com/imprint/types/TypeHandler.java +++ b/src/main/java/com/imprint/types/TypeHandler.java @@ -9,7 +9,7 @@ /** * Interface for handling type-specific serialization, deserialization, and size estimation. - * Note that primitives are potentially auto/un-boxed here which could impact performance slightly + * Note that primitives are basically boxed here which could impact performance slightly * but having all the types in their own implementation helps keep things organized for now, especially * for dealing with and testing more complex types in the future. */ @@ -26,7 +26,7 @@ interface BufferViewer { int measureDataLength(ByteBuffer tempBuffer, int numElements) throws ImprintException; } - // Helper method to eliminate duplication in ARRAY/MAP readValueBytes + // Helper method for complex buffer positioning in MAP and ARRAY static ByteBuffer readComplexValueBytes(ByteBuffer buffer, String typeName, BufferViewer measurer) throws ImprintException { int initialPosition = buffer.position(); ByteBuffer tempBuffer = buffer.duplicate(); @@ -571,15 +571,13 @@ public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { } byte keyTypeCodeByte = tempBuffer.get(); byte valueTypeCodeByte = tempBuffer.get(); - TypeCode keyType = TypeCode.fromByte(keyTypeCodeByte); - TypeCode valueType = TypeCode.fromByte(valueTypeCodeByte); + var keyType = TypeCode.fromByte(keyTypeCodeByte); + var valueType = TypeCode.fromByte(valueTypeCodeByte); - // OPTIMIZATION: Calculate sizes directly for fixed-size types int keySize = getFixedTypeSize(keyType); int valueSize = getFixedTypeSize(valueType); if (keySize > 0 && valueSize > 0) { - // Both are fixed-size: O(1) calculation return 2 + (numEntries * (keySize + valueSize)); } else { // At least one is variable-size: fall back to traversal diff --git a/src/main/java/com/imprint/types/Value.java b/src/main/java/com/imprint/types/Value.java index ba747de..fbb988c 100644 --- a/src/main/java/com/imprint/types/Value.java +++ b/src/main/java/com/imprint/types/Value.java @@ -192,11 +192,11 @@ public static class BytesValue extends Value { private final byte[] value; public BytesValue(byte[] value) { - this.value = value.clone(); // defensive copy + this.value = value.clone(); } public byte[] getValue() { - return value.clone(); // defensive copy + return value.clone(); } @Override @@ -233,7 +233,7 @@ public static class BytesBufferValue extends Value { private final ByteBuffer value; public BytesBufferValue(ByteBuffer value) { - this.value = value.asReadOnlyBuffer(); // zero-copy read-only view + this.value = value.asReadOnlyBuffer(); } public byte[] getValue() { @@ -244,7 +244,7 @@ public byte[] getValue() { } public ByteBuffer getBuffer() { - return value.duplicate(); // zero-copy view + return value.duplicate(); } @Override @@ -289,11 +289,11 @@ public StringValue(String value) { public byte[] getUtf8Bytes() { var cached = cachedUtf8Bytes; if (cached == null) { - // Multiple threads may compute this - that's OK since it's idempotent + // UTF8 is idempotent so no need to synchronize cached = value.getBytes(StandardCharsets.UTF_8); cachedUtf8Bytes = cached; } - return cached; // Return our computed value, not re-read from volatile field + return cached; // Return computed value } @Override @@ -363,14 +363,14 @@ private String decodeUtf8() { // Fallback: copy bytes from the ByteBuffer to a new heap array (if too large for cache) array = new byte[length]; } - value.duplicate().get(array, 0, length); // Get bytes from current position into chosen array + value.duplicate().get(array, 0, length); offset = 0; } return new String(array, offset, length, StandardCharsets.UTF_8); } public ByteBuffer getBuffer() { - return value.duplicate(); // zero-copy view + return value.duplicate(); } @Override From 1900cd3e4e107668e2cd917af57120444d96e199 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 5 Jun 2025 15:22:06 -0400 Subject: [PATCH 24/53] minor cleanup --- build.gradle | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.gradle b/build.gradle index 66f2e3e..d9093f9 100644 --- a/build.gradle +++ b/build.gradle @@ -170,7 +170,7 @@ test { jmh { fork = 1 warmupIterations = 2 // Reduced for faster CI - iterations = 3 + iterations = 3 // Reduced for faster CI resultFormat = 'JSON' includeTests = false resultsFile = file("${projectDir}/benchmark-results/jmh-results-${new Date().format('yyyy-MM-dd-HHmmss')}.json") @@ -180,7 +180,7 @@ jmh { '-XX:+UseG1GC', '-Xmx2g', '-XX:+UnlockExperimentalVMOptions', - '-XX:+UseJVMCICompiler' // Use Graal if available for better performance + '-XX:+UseJVMCICompiler' ] } From 08b2ad2cc1195a496b791e14c441fb18384d4f8c Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 5 Jun 2025 15:23:46 -0400 Subject: [PATCH 25/53] minor cleanup --- src/main/java/com/imprint/core/ImprintRecord.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index 2291550..e7dab70 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -83,7 +83,7 @@ private ByteBuffer getFieldBuffer(int fieldId) { return null; } - // OPTIMIZATION: Single allocation instead of duplicate + slice + //Single allocation instead of duplicate + slice var fieldBuffer = payload.duplicate(); fieldBuffer.position(startOffset).limit(endOffset); return fieldBuffer; From 864eaf0d10558558df0cffb7d49db8d9018e30fe Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 5 Jun 2025 15:41:43 -0400 Subject: [PATCH 26/53] Actually fixes offsets and read Byte Values for Maps and Arrays even with nested objects --- .../java/com/imprint/types/TypeHandler.java | 252 ++---------------- .../com/imprint/profile/ProfilerTest.java | 3 +- .../com/imprint/types/TypeHandlerTest.java | 5 - 3 files changed, 19 insertions(+), 241 deletions(-) diff --git a/src/main/java/com/imprint/types/TypeHandler.java b/src/main/java/com/imprint/types/TypeHandler.java index dce6973..634867b 100644 --- a/src/main/java/com/imprint/types/TypeHandler.java +++ b/src/main/java/com/imprint/types/TypeHandler.java @@ -17,52 +17,7 @@ public interface TypeHandler { Value deserialize(ByteBuffer buffer) throws ImprintException; void serialize(Value value, ByteBuffer buffer) throws ImprintException; int estimateSize(Value value) throws ImprintException; - ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException; - - - - @FunctionalInterface - interface BufferViewer { - int measureDataLength(ByteBuffer tempBuffer, int numElements) throws ImprintException; - } - - // Helper method for complex buffer positioning in MAP and ARRAY - static ByteBuffer readComplexValueBytes(ByteBuffer buffer, String typeName, BufferViewer measurer) throws ImprintException { - int initialPosition = buffer.position(); - ByteBuffer tempBuffer = buffer.duplicate(); - tempBuffer.order(buffer.order()); - - VarInt.DecodeResult lengthResult = VarInt.decode(tempBuffer); - int numElements = lengthResult.getValue(); - int varIntLength = tempBuffer.position() - initialPosition; - - if (numElements == 0) { - if (buffer.remaining() < varIntLength) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for empty " + typeName + " VarInt. Needed: " + - varIntLength + ", available: " + buffer.remaining()); - } - ByteBuffer valueSlice = buffer.slice(); - valueSlice.limit(varIntLength); - buffer.position(initialPosition + varIntLength); - return valueSlice.asReadOnlyBuffer(); - } - - int dataLength = measurer.measureDataLength(tempBuffer, numElements); - int totalLength = varIntLength + dataLength; - - if (buffer.remaining() < totalLength) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for " + typeName + " value. Needed: " + totalLength + - ", available: " + buffer.remaining() + " at position " + initialPosition); - } - ByteBuffer valueSlice = buffer.slice(); - valueSlice.limit(totalLength); - buffer.position(initialPosition + totalLength); - return valueSlice.asReadOnlyBuffer(); - } - // Static implementations for each type TypeHandler NULL = new TypeHandler() { @Override @@ -79,11 +34,6 @@ public void serialize(Value value, ByteBuffer buffer) { public int estimateSize(Value value) { return 0; } - - @Override - public ByteBuffer readValueBytes(ByteBuffer buffer) { - return ByteBuffer.allocate(0).asReadOnlyBuffer(); - } }; TypeHandler BOOL = new TypeHandler() { @@ -108,14 +58,6 @@ public void serialize(Value value, ByteBuffer buffer) { public int estimateSize(Value value) { return 1; } - - @Override - public ByteBuffer readValueBytes(ByteBuffer buffer) { - var boolBuffer = buffer.slice(); - boolBuffer.limit(1); - buffer.position(buffer.position() + 1); - return boolBuffer.asReadOnlyBuffer(); - } }; TypeHandler INT32 = new TypeHandler() { @@ -137,14 +79,6 @@ public void serialize(Value value, ByteBuffer buffer) { public int estimateSize(Value value) { return 4; } - - @Override - public ByteBuffer readValueBytes(ByteBuffer buffer) { - var int32Buffer = buffer.slice(); - int32Buffer.limit(4); - buffer.position(buffer.position() + 4); - return int32Buffer.asReadOnlyBuffer(); - } }; TypeHandler INT64 = new TypeHandler() { @@ -166,14 +100,6 @@ public void serialize(Value value, ByteBuffer buffer) { public int estimateSize(Value value) { return 8; } - - @Override - public ByteBuffer readValueBytes(ByteBuffer buffer) { - var int64Buffer = buffer.slice(); - int64Buffer.limit(8); - buffer.position(buffer.position() + 8); - return int64Buffer.asReadOnlyBuffer(); - } }; TypeHandler FLOAT32 = new TypeHandler() { @@ -195,14 +121,6 @@ public void serialize(Value value, ByteBuffer buffer) { public int estimateSize(Value value) { return 4; } - - @Override - public ByteBuffer readValueBytes(ByteBuffer buffer) { - var float32Buffer = buffer.slice(); - float32Buffer.limit(4); - buffer.position(buffer.position() + 4); - return float32Buffer.asReadOnlyBuffer(); - } }; TypeHandler FLOAT64 = new TypeHandler() { @@ -224,14 +142,6 @@ public void serialize(Value value, ByteBuffer buffer) { public int estimateSize(Value value) { return 8; } - - @Override - public ByteBuffer readValueBytes(ByteBuffer buffer) { - var float64Buffer = buffer.slice(); - float64Buffer.limit(8); - buffer.position(buffer.position() + 8); - return float64Buffer.asReadOnlyBuffer(); - } }; TypeHandler BYTES = new TypeHandler() { @@ -274,29 +184,6 @@ public int estimateSize(Value value) { return VarInt.encodedLength(bytes.length) + bytes.length; } } - - @Override - public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { - int initialPos = buffer.position(); - ByteBuffer tempMeasureBuffer = buffer.duplicate(); - VarInt.DecodeResult dr = VarInt.decode(tempMeasureBuffer); - - int varIntByteLength = tempMeasureBuffer.position() - initialPos; - int payloadByteLength = dr.getValue(); - int totalValueLength = varIntByteLength + payloadByteLength; - - if (buffer.remaining() < totalValueLength) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for VarInt-prefixed data. Needed: " + totalValueLength + - ", available: " + buffer.remaining() + " at position " + initialPos); - } - - ByteBuffer resultSlice = buffer.slice(); - resultSlice.limit(totalValueLength); - - buffer.position(initialPos + totalValueLength); - return resultSlice.asReadOnlyBuffer(); - } }; TypeHandler STRING = new TypeHandler() { @@ -344,29 +231,6 @@ public int estimateSize(Value value) { return VarInt.encodedLength(utf8Bytes.length) + utf8Bytes.length; } } - - @Override - public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { - int initialPos = buffer.position(); - ByteBuffer tempMeasureBuffer = buffer.duplicate(); - VarInt.DecodeResult dr = VarInt.decode(tempMeasureBuffer); - - int varIntByteLength = tempMeasureBuffer.position() - initialPos; - int payloadByteLength = dr.getValue(); - int totalValueLength = varIntByteLength + payloadByteLength; - - if (buffer.remaining() < totalValueLength) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for VarInt-prefixed string. Needed: " + totalValueLength + - ", available: " + buffer.remaining() + " at position " + initialPos); - } - - ByteBuffer resultSlice = buffer.slice(); - resultSlice.limit(totalValueLength); - - buffer.position(initialPos + totalValueLength); - return resultSlice.asReadOnlyBuffer(); - } }; TypeHandler ARRAY = new TypeHandler() { @@ -374,25 +238,24 @@ public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { public Value deserialize(ByteBuffer buffer) throws ImprintException { VarInt.DecodeResult lengthResult = VarInt.decode(buffer); int length = lengthResult.getValue(); - + if (length == 0) { return Value.fromArray(Collections.emptyList()); } - + if (buffer.remaining() < 1) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for ARRAY element type code."); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for ARRAY element type code."); } var elementType = TypeCode.fromByte(buffer.get()); var elements = new ArrayList(length); var elementHandler = elementType.getHandler(); - + + //Let each element handler consume what it needs from the buffer for (int i = 0; i < length; i++) { - var elementValueBytes = elementHandler.readValueBytes(buffer); - elementValueBytes.order(buffer.order()); - var element = elementHandler.deserialize(elementValueBytes); + var element = elementHandler.deserialize(buffer); //Handler advances buffer position elements.add(element); } - + return Value.fromArray(elements); } @@ -433,40 +296,6 @@ public int estimateSize(Value value) throws ImprintException { } return arraySize; } - - @Override - public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { - return readComplexValueBytes(buffer, "ARRAY", (tempBuffer, numElements) -> { - if (tempBuffer.remaining() < 1) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for ARRAY element type code"); - } - byte elementTypeCodeByte = tempBuffer.get(); - var elementType = TypeCode.fromByte(elementTypeCodeByte); - - switch (elementType) { - case NULL: - return 1; - case BOOL: - return 1 + numElements; - case INT32: - case FLOAT32: - return 1 + (numElements * 4); - case INT64: - case FLOAT64: - return 1 + (numElements * 8); - default: - var elementHandler = elementType.getHandler(); - int elementsDataLength = 0; - for (int i = 0; i < numElements; i++) { - int elementStartPos = tempBuffer.position(); - elementHandler.readValueBytes(tempBuffer); - elementsDataLength += (tempBuffer.position() - elementStartPos); - } - return 1 + elementsDataLength; - } - }); - } }; TypeHandler MAP = new TypeHandler() { @@ -474,13 +303,13 @@ public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { public Value deserialize(ByteBuffer buffer) throws ImprintException { VarInt.DecodeResult lengthResult = VarInt.decode(buffer); int length = lengthResult.getValue(); - + if (length == 0) { return Value.fromMap(Collections.emptyMap()); } - + if (buffer.remaining() < 2) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for MAP key/value type codes."); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for MAP key/value type codes."); } var keyType = TypeCode.fromByte(buffer.get()); var valueType = TypeCode.fromByte(buffer.get()); @@ -488,20 +317,17 @@ public Value deserialize(ByteBuffer buffer) throws ImprintException { var keyHandler = keyType.getHandler(); var valueHandler = valueType.getHandler(); - + + //Let handlers consume directly from buffer for (int i = 0; i < length; i++) { - var keyBytes = keyHandler.readValueBytes(buffer); - keyBytes.order(buffer.order()); - var keyValue = keyHandler.deserialize(keyBytes); + var keyValue = keyHandler.deserialize(buffer);// Advances buffer var key = MapKey.fromValue(keyValue); - - var valueBytes = valueHandler.readValueBytes(buffer); - valueBytes.order(buffer.order()); - var mapInternalValue = valueHandler.deserialize(valueBytes); - + + var mapInternalValue = valueHandler.deserialize(buffer);//Advances buffer + map.put(key, mapInternalValue); } - + return Value.fromMap(map); } @@ -562,50 +388,6 @@ public int estimateSize(Value value) throws ImprintException { return mapSize; } - @Override - public ByteBuffer readValueBytes(ByteBuffer buffer) throws ImprintException { - return readComplexValueBytes(buffer, "MAP", (tempBuffer, numEntries) -> { - if (tempBuffer.remaining() < 2) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for MAP key/value type codes"); - } - byte keyTypeCodeByte = tempBuffer.get(); - byte valueTypeCodeByte = tempBuffer.get(); - var keyType = TypeCode.fromByte(keyTypeCodeByte); - var valueType = TypeCode.fromByte(valueTypeCodeByte); - - int keySize = getFixedTypeSize(keyType); - int valueSize = getFixedTypeSize(valueType); - - if (keySize > 0 && valueSize > 0) { - return 2 + (numEntries * (keySize + valueSize)); - } else { - // At least one is variable-size: fall back to traversal - int entriesDataLength = 0; - for (int i = 0; i < numEntries; i++) { - int entryStartPos = tempBuffer.position(); - keyType.getHandler().readValueBytes(tempBuffer); - valueType.getHandler().readValueBytes(tempBuffer); - entriesDataLength += (tempBuffer.position() - entryStartPos); - } - return 2 + entriesDataLength; - } - }); - } - - private int getFixedTypeSize(TypeCode type) { - switch (type) { - case NULL: return 0; - case BOOL: return 1; - case INT32: - case FLOAT32: return 4; - case INT64: - case FLOAT64: return 8; - default: return -1; // Variable size - } - } - - private void serializeMapKey(MapKey key, ByteBuffer buffer) throws ImprintException { switch (key.getTypeCode()) { case INT32: diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index d48c1aa..3b9f371 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -4,6 +4,7 @@ import com.imprint.core.ImprintWriter; import com.imprint.core.SchemaId; import com.imprint.types.Value; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import java.util.Random; @@ -26,7 +27,7 @@ * - String operations and UTF-8 encoding * - ByteBuffer operations */ -//@Disabled("Enable manually for profiling") +@Disabled("Enable manually for profiling") public class ProfilerTest { private static final int ITERATIONS = 1_000_000; diff --git a/src/test/java/com/imprint/types/TypeHandlerTest.java b/src/test/java/com/imprint/types/TypeHandlerTest.java index f131a0f..75d118f 100644 --- a/src/test/java/com/imprint/types/TypeHandlerTest.java +++ b/src/test/java/com/imprint/types/TypeHandlerTest.java @@ -33,11 +33,6 @@ void testNullHandler() throws ImprintException { buffer.flip(); var deserialized = handler.deserialize(buffer); assertThat(deserialized).isEqualTo(value); - - // readValueBytes - buffer.clear(); - var valueBytes = handler.readValueBytes(buffer); - assertThat(valueBytes.remaining()).isEqualTo(0); } @ParameterizedTest From 36b34226407742195b3d4ea930c4ddfdff534a00 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 5 Jun 2025 16:04:42 -0400 Subject: [PATCH 27/53] change CI file to use JMH plugin to respect iteration and warmup values in gradle file. Also fix permission issue --- .github/workflows/ci.yml | 148 +++++++++++++++++++++++---------------- 1 file changed, 89 insertions(+), 59 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 62ac6f5..ec052ca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,6 +46,11 @@ jobs: benchmark: runs-on: ubuntu-latest needs: test + # Add explicit permissions for commenting on PRs + permissions: + contents: read + pull-requests: write + issues: write # Only run benchmarks on main branch pushes and PRs to main to avoid excessive CI time if: github.ref == 'refs/heads/main' || github.base_ref == 'main' @@ -77,22 +82,22 @@ jobs: - name: Run serialization benchmarks run: | - ./gradlew jmhRunSerializationBenchmarks + ./gradlew jmh --include=".*serialize.*" --exclude=".*deserialize.*" continue-on-error: true - name: Run deserialization benchmarks run: | - ./gradlew jmhRunDeserializationBenchmarks + ./gradlew jmh --include=".*deserialize.*" continue-on-error: true - name: Run field access benchmarks run: | - ./gradlew jmhRunFieldAccessBenchmarks + ./gradlew jmh --include=".*singleFieldAccess.*" continue-on-error: true - name: Run size comparison benchmarks run: | - ./gradlew jmhRunSizeComparisonBenchmarks + ./gradlew jmh --include=".*measure.*" continue-on-error: true - name: Upload benchmark results @@ -106,59 +111,63 @@ jobs: - name: Comment benchmark results on PR if: github.event_name == 'pull_request' uses: actions/github-script@v7 + continue-on-error: true with: + github-token: ${{ secrets.GITHUB_TOKEN }} script: | - const fs = require('fs'); - const path = require('path'); - - // Find the latest benchmark results file - const resultsDir = 'benchmark-results'; - let latestFile = null; - let latestTime = 0; - - if (fs.existsSync(resultsDir)) { - const files = fs.readdirSync(resultsDir); - for (const file of files) { - if (file.endsWith('.json')) { - const filePath = path.join(resultsDir, file); - const stats = fs.statSync(filePath); - if (stats.mtime.getTime() > latestTime) { - latestTime = stats.mtime.getTime(); - latestFile = filePath; + try { + const fs = require('fs'); + const path = require('path'); + + // Find the latest benchmark results file + const resultsDir = 'benchmark-results'; + let latestFile = null; + let latestTime = 0; + + if (fs.existsSync(resultsDir)) { + const files = fs.readdirSync(resultsDir); + for (const file of files) { + if (file.endsWith('.json')) { + const filePath = path.join(resultsDir, file); + const stats = fs.statSync(filePath); + if (stats.mtime.getTime() > latestTime) { + latestTime = stats.mtime.getTime(); + latestFile = filePath; + } } } } - } - if (latestFile) { - const results = JSON.parse(fs.readFileSync(latestFile, 'utf8')); + if (latestFile) { + console.log(`Found benchmark results: ${latestFile}`); + const results = JSON.parse(fs.readFileSync(latestFile, 'utf8')); - // Group results by benchmark type - const serialization = results.filter(r => r.benchmark.includes('serialize')); - const deserialization = results.filter(r => r.benchmark.includes('deserialize')); - const fieldAccess = results.filter(r => r.benchmark.includes('singleFieldAccess')); - const sizes = results.filter(r => r.benchmark.includes('measure')); + // Group results by benchmark type + const serialization = results.filter(r => r.benchmark.includes('serialize')); + const deserialization = results.filter(r => r.benchmark.includes('deserialize')); + const fieldAccess = results.filter(r => r.benchmark.includes('singleFieldAccess')); + const sizes = results.filter(r => r.benchmark.includes('measure')); - // Format results into a table - const formatResults = (benchmarks, title) => { - if (benchmarks.length === 0) return ''; + // Format results into a table + const formatResults = (benchmarks, title) => { + if (benchmarks.length === 0) return ''; - let table = `\n### ${title}\n\n| Library | Score (ns/op) | Error | Unit |\n|---------|---------------|-------|------|\n`; + let table = `\n### ${title}\n\n| Library | Score (ns/op) | Error | Unit |\n|---------|---------------|-------|------|\n`; - benchmarks - .sort((a, b) => a.primaryMetric.score - b.primaryMetric.score) - .forEach(benchmark => { - const name = benchmark.benchmark.split('.').pop().replace(/serialize|deserialize|singleFieldAccess|measure/, '').replace(/Imprint|JacksonJson|Kryo|MessagePack|Avro|Protobuf|FlatBuffers/, (match) => match); - const score = benchmark.primaryMetric.score.toFixed(2); - const error = benchmark.primaryMetric.scoreError.toFixed(2); - const unit = benchmark.primaryMetric.scoreUnit; - table += `| ${name} | ${score} | ±${error} | ${unit} |\n`; - }); + benchmarks + .sort((a, b) => a.primaryMetric.score - b.primaryMetric.score) + .forEach(benchmark => { + const name = benchmark.benchmark.split('.').pop().replace(/serialize|deserialize|singleFieldAccess|measure/, '').replace(/Imprint|JacksonJson|Kryo|MessagePack|Avro|Protobuf|FlatBuffers/, (match) => match); + const score = benchmark.primaryMetric.score.toFixed(2); + const error = benchmark.primaryMetric.scoreError.toFixed(2); + const unit = benchmark.primaryMetric.scoreUnit; + table += `| ${name} | ${score} | ±${error} | ${unit} |\n`; + }); - return table; - }; + return table; + }; - const comment = `##Benchmark Results + const comment = `## Benchmark Results Benchmark comparison between Imprint and other serialization libraries: ${formatResults(serialization, 'Serialization Performance')} @@ -175,26 +184,47 @@ jobs: `; - github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: comment - }); - } else { - github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: '## Benchmark Results\n\nBenchmark execution completed but no results file was found. Check the [workflow logs](' + - `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}` + ') for details.' - }); + await github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: comment + }); + + console.log('Successfully posted benchmark results'); + } else { + console.log('No benchmark results found'); + await github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: '## Benchmark Results\n\nBenchmark execution completed but no results file was found. Check the [workflow logs](' + + `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}` + ') for details.' + }); + } + } catch (error) { + console.log('Failed to post benchmark comment:', error.message); + console.log('Benchmark results are still available in workflow artifacts'); + + // Try to post a simple error message + try { + await github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: `## Benchmark Results\n\n Failed to process benchmark results automatically.\n\nResults are available in the [workflow artifacts](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}).` + }); + } catch (commentError) { + console.log('Also failed to post error comment:', commentError.message); + } } # Optional: Run full benchmark suite on releases benchmark-full: runs-on: ubuntu-latest if: startsWith(github.ref, 'refs/tags/') + permissions: + contents: read steps: - name: Checkout code From b24555a5067b7828e5714326c41c7649c47f1ec9 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 5 Jun 2025 16:12:02 -0400 Subject: [PATCH 28/53] ok plugin didn't work apparently so reverting that and just reducing Comparison tests iterations manually --- .github/workflows/ci.yml | 28 +++++++++---------- .../benchmark/ComparisonBenchmark.java | 4 +-- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ec052ca..d0e43cb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -82,22 +82,22 @@ jobs: - name: Run serialization benchmarks run: | - ./gradlew jmh --include=".*serialize.*" --exclude=".*deserialize.*" + ./gradlew jmhRunSerializationBenchmarks continue-on-error: true - name: Run deserialization benchmarks run: | - ./gradlew jmh --include=".*deserialize.*" + ./gradlew jmhRunDeserializationBenchmarks continue-on-error: true - name: Run field access benchmarks run: | - ./gradlew jmh --include=".*singleFieldAccess.*" + ./gradlew jmhRunFieldAccessBenchmarks continue-on-error: true - name: Run size comparison benchmarks run: | - ./gradlew jmh --include=".*measure.*" + ./gradlew jmhRunSizeComparisonBenchmarks continue-on-error: true - name: Upload benchmark results @@ -139,7 +139,7 @@ jobs: } if (latestFile) { - console.log(`Found benchmark results: ${latestFile}`); + console.log(`📊 Found benchmark results: ${latestFile}`); const results = JSON.parse(fs.readFileSync(latestFile, 'utf8')); // Group results by benchmark type @@ -167,7 +167,7 @@ jobs: return table; }; - const comment = `## Benchmark Results + const comment = `## 📊 Benchmark Results Benchmark comparison between Imprint and other serialization libraries: ${formatResults(serialization, 'Serialization Performance')} @@ -191,20 +191,20 @@ jobs: body: comment }); - console.log('Successfully posted benchmark results'); + console.log('✅ Successfully posted benchmark results to PR'); } else { - console.log('No benchmark results found'); + console.log('⚠️ No benchmark results found'); await github.rest.issues.createComment({ issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, - body: '## Benchmark Results\n\nBenchmark execution completed but no results file was found. Check the [workflow logs](' + + body: '## 📊 Benchmark Results\n\nBenchmark execution completed but no results file was found. Check the [workflow logs](' + `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}` + ') for details.' }); } } catch (error) { - console.log('Failed to post benchmark comment:', error.message); - console.log('Benchmark results are still available in workflow artifacts'); + console.log('❌ Failed to post benchmark comment:', error.message); + console.log('📁 Benchmark results are still available in workflow artifacts'); // Try to post a simple error message try { @@ -212,10 +212,10 @@ jobs: issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, - body: `## Benchmark Results\n\n Failed to process benchmark results automatically.\n\nResults are available in the [workflow artifacts](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}).` + body: `## 📊 Benchmark Results\n\n⚠️ Failed to process benchmark results automatically.\n\nResults are available in the [workflow artifacts](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}).` }); } catch (commentError) { - console.log('Also failed to post error comment:', commentError.message); + console.log('❌ Also failed to post error comment:', commentError.message); } } @@ -254,7 +254,7 @@ jobs: - name: Run full benchmark suite run: | - ./gradlew jmh + ./gradlew jmhRunAllBenchmarks - name: Upload full benchmark results uses: actions/upload-artifact@v4 diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index a7ffd3c..ee32ff0 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -38,8 +38,8 @@ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) -@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 2, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(1) @SuppressWarnings("unused") public class ComparisonBenchmark { From 9656f0e86b84ac150f6812f1e85e32e5b6fca0a6 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 5 Jun 2025 16:43:08 -0400 Subject: [PATCH 29/53] trying to update github ci to make jmh actually work correctly --- .github/workflows/ci.yml | 140 +++------------------------- build.gradle | 192 ++++++++++++++++++++++++--------------- 2 files changed, 132 insertions(+), 200 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d0e43cb..a80f529 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,11 +46,6 @@ jobs: benchmark: runs-on: ubuntu-latest needs: test - # Add explicit permissions for commenting on PRs - permissions: - contents: read - pull-requests: write - issues: write # Only run benchmarks on main branch pushes and PRs to main to avoid excessive CI time if: github.ref == 'refs/heads/main' || github.base_ref == 'main' @@ -81,25 +76,28 @@ jobs: run: mkdir -p benchmark-results - name: Run serialization benchmarks - run: | - ./gradlew jmhRunSerializationBenchmarks + run: ./gradlew jmhRunSerializationBenchmarks continue-on-error: true - name: Run deserialization benchmarks - run: | - ./gradlew jmhRunDeserializationBenchmarks + run: ./gradlew jmhRunDeserializationBenchmarks continue-on-error: true - name: Run field access benchmarks - run: | - ./gradlew jmhRunFieldAccessBenchmarks + run: ./gradlew jmhRunFieldAccessBenchmarks continue-on-error: true - name: Run size comparison benchmarks - run: | - ./gradlew jmhRunSizeComparisonBenchmarks + run: ./gradlew jmhRunSizeComparisonBenchmarks continue-on-error: true + - name: List benchmark results + run: | + echo "Contents of benchmark-results directory:" + ls -la benchmark-results/ || echo "benchmark-results directory not found" + echo "Working directory contents:" + ls -la + - name: Upload benchmark results uses: actions/upload-artifact@v4 if: always() @@ -108,123 +106,10 @@ jobs: path: benchmark-results/ retention-days: 30 - - name: Comment benchmark results on PR - if: github.event_name == 'pull_request' - uses: actions/github-script@v7 - continue-on-error: true - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - try { - const fs = require('fs'); - const path = require('path'); - - // Find the latest benchmark results file - const resultsDir = 'benchmark-results'; - let latestFile = null; - let latestTime = 0; - - if (fs.existsSync(resultsDir)) { - const files = fs.readdirSync(resultsDir); - for (const file of files) { - if (file.endsWith('.json')) { - const filePath = path.join(resultsDir, file); - const stats = fs.statSync(filePath); - if (stats.mtime.getTime() > latestTime) { - latestTime = stats.mtime.getTime(); - latestFile = filePath; - } - } - } - } - - if (latestFile) { - console.log(`📊 Found benchmark results: ${latestFile}`); - const results = JSON.parse(fs.readFileSync(latestFile, 'utf8')); - - // Group results by benchmark type - const serialization = results.filter(r => r.benchmark.includes('serialize')); - const deserialization = results.filter(r => r.benchmark.includes('deserialize')); - const fieldAccess = results.filter(r => r.benchmark.includes('singleFieldAccess')); - const sizes = results.filter(r => r.benchmark.includes('measure')); - - // Format results into a table - const formatResults = (benchmarks, title) => { - if (benchmarks.length === 0) return ''; - - let table = `\n### ${title}\n\n| Library | Score (ns/op) | Error | Unit |\n|---------|---------------|-------|------|\n`; - - benchmarks - .sort((a, b) => a.primaryMetric.score - b.primaryMetric.score) - .forEach(benchmark => { - const name = benchmark.benchmark.split('.').pop().replace(/serialize|deserialize|singleFieldAccess|measure/, '').replace(/Imprint|JacksonJson|Kryo|MessagePack|Avro|Protobuf|FlatBuffers/, (match) => match); - const score = benchmark.primaryMetric.score.toFixed(2); - const error = benchmark.primaryMetric.scoreError.toFixed(2); - const unit = benchmark.primaryMetric.scoreUnit; - table += `| ${name} | ${score} | ±${error} | ${unit} |\n`; - }); - - return table; - }; - - const comment = `## 📊 Benchmark Results - - Benchmark comparison between Imprint and other serialization libraries: - ${formatResults(serialization, 'Serialization Performance')} - ${formatResults(deserialization, 'Deserialization Performance')} - ${formatResults(fieldAccess, 'Single Field Access Performance')} - ${formatResults(sizes, 'Serialized Size Comparison')} - -
- View detailed results - - Results generated from commit: \`${context.sha.substring(0, 7)}\` - - Lower scores are better for performance benchmarks. - -
`; - - await github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: comment - }); - - console.log('✅ Successfully posted benchmark results to PR'); - } else { - console.log('⚠️ No benchmark results found'); - await github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: '## 📊 Benchmark Results\n\nBenchmark execution completed but no results file was found. Check the [workflow logs](' + - `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}` + ') for details.' - }); - } - } catch (error) { - console.log('❌ Failed to post benchmark comment:', error.message); - console.log('📁 Benchmark results are still available in workflow artifacts'); - - // Try to post a simple error message - try { - await github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: `## 📊 Benchmark Results\n\n⚠️ Failed to process benchmark results automatically.\n\nResults are available in the [workflow artifacts](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}).` - }); - } catch (commentError) { - console.log('❌ Also failed to post error comment:', commentError.message); - } - } - # Optional: Run full benchmark suite on releases benchmark-full: runs-on: ubuntu-latest if: startsWith(github.ref, 'refs/tags/') - permissions: - contents: read steps: - name: Checkout code @@ -253,8 +138,7 @@ jobs: run: mkdir -p benchmark-results - name: Run full benchmark suite - run: | - ./gradlew jmhRunAllBenchmarks + run: ./gradlew jmhRunAllBenchmarks - name: Upload full benchmark results uses: actions/upload-artifact@v4 diff --git a/build.gradle b/build.gradle index d9093f9..6c97a24 100644 --- a/build.gradle +++ b/build.gradle @@ -185,130 +185,178 @@ jmh { } // Create individual benchmark tasks for CI pipeline -tasks.register('jmhRunSerializationBenchmarks', JavaExec) { +tasks.register('jmhRunSerializationBenchmarks') { dependsOn compileJmhJava description = 'Run serialization benchmarks' group = 'benchmarking' - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'com.imprint.benchmark.ComparisonBenchmark' - args = ['runSerializationBenchmarks'] - - // Java 11 optimized JVM settings - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g', - '-XX:+UnlockExperimentalVMOptions' - ] - doFirst { file("${projectDir}/benchmark-results").mkdirs() } + + doLast { + javaexec { + classpath = sourceSets.jmh.runtimeClasspath + mainClass = 'org.openjdk.jmh.Main' + args = [ + '.*serialize.*', + '-f', '1', + '-wi', '2', + '-i', '3', + '-rf', 'json', + '-rff', "${projectDir}/benchmark-results/serialization-results.json" + ] + jvmArgs = [ + '-XX:+UseG1GC', + '-Xmx2g' + ] + } + } } -tasks.register('jmhRunDeserializationBenchmarks', JavaExec) { +tasks.register('jmhRunDeserializationBenchmarks') { dependsOn compileJmhJava description = 'Run deserialization benchmarks' group = 'benchmarking' - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'com.imprint.benchmark.ComparisonBenchmark' - args = ['runDeserializationBenchmarks'] - - // Java 11 optimized JVM settings - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g', - '-XX:+UnlockExperimentalVMOptions' - ] - doFirst { file("${projectDir}/benchmark-results").mkdirs() } + + doLast { + javaexec { + classpath = sourceSets.jmh.runtimeClasspath + mainClass = 'org.openjdk.jmh.Main' + args = [ + '.*deserialize.*', + '-f', '1', + '-wi', '2', + '-i', '3', + '-rf', 'json', + '-rff', "${projectDir}/benchmark-results/deserialization-results.json" + ] + jvmArgs = [ + '-XX:+UseG1GC', + '-Xmx2g' + ] + } + } } -tasks.register('jmhRunFieldAccessBenchmarks', JavaExec) { +tasks.register('jmhRunFieldAccessBenchmarks') { dependsOn compileJmhJava description = 'Run field access benchmarks' group = 'benchmarking' - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'com.imprint.benchmark.ComparisonBenchmark' - args = ['runFieldAccessBenchmarks'] - - // Java 11 optimized JVM settings - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g', - '-XX:+UnlockExperimentalVMOptions' - ] - doFirst { file("${projectDir}/benchmark-results").mkdirs() } + + doLast { + javaexec { + classpath = sourceSets.jmh.runtimeClasspath + mainClass = 'org.openjdk.jmh.Main' + args = [ + '.*singleFieldAccess.*', + '-f', '1', + '-wi', '2', + '-i', '3', + '-rf', 'json', + '-rff', "${projectDir}/benchmark-results/fieldaccess-results.json" + ] + jvmArgs = [ + '-XX:+UseG1GC', + '-Xmx2g' + ] + } + } } -tasks.register('jmhRunSizeComparisonBenchmarks', JavaExec) { +tasks.register('jmhRunSizeComparisonBenchmarks') { dependsOn compileJmhJava description = 'Run size comparison benchmarks' group = 'benchmarking' - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'com.imprint.benchmark.ComparisonBenchmark' - args = ['runSizeComparisonBenchmarks'] - - // Java 11 optimized JVM settings - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g', - '-XX:+UnlockExperimentalVMOptions' - ] - doFirst { file("${projectDir}/benchmark-results").mkdirs() } + + doLast { + javaexec { + classpath = sourceSets.jmh.runtimeClasspath + mainClass = 'org.openjdk.jmh.Main' + args = [ + '.*measure.*', + '-f', '1', + '-wi', '2', + '-i', '3', + '-rf', 'json', + '-rff', "${projectDir}/benchmark-results/size-results.json" + ] + jvmArgs = [ + '-XX:+UseG1GC', + '-Xmx2g' + ] + } + } } -tasks.register('jmhRunMergeBenchmarks', JavaExec) { +tasks.register('jmhRunMergeBenchmarks') { dependsOn compileJmhJava description = 'Run merge operation benchmarks' group = 'benchmarking' - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'com.imprint.benchmark.ComparisonBenchmark' - args = ['runMergeBenchmarks'] - - // Java 11 optimized JVM settings - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g', - '-XX:+UnlockExperimentalVMOptions' - ] - doFirst { file("${projectDir}/benchmark-results").mkdirs() } + + doLast { + javaexec { + classpath = sourceSets.jmh.runtimeClasspath + mainClass = 'org.openjdk.jmh.Main' + args = [ + '.*merge.*', + '-f', '1', + '-wi', '2', + '-i', '3', + '-rf', 'json', + '-rff', "${projectDir}/benchmark-results/merge-results.json" + ] + jvmArgs = [ + '-XX:+UseG1GC', + '-Xmx2g' + ] + } + } } -tasks.register('jmhRunAllBenchmarks', JavaExec) { +tasks.register('jmhRunAllBenchmarks') { dependsOn compileJmhJava description = 'Run all comparison benchmarks' group = 'benchmarking' - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'com.imprint.benchmark.ComparisonBenchmark' - args = ['runAll'] - - // Java 11 optimized JVM settings - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g', - '-XX:+UnlockExperimentalVMOptions' - ] - doFirst { file("${projectDir}/benchmark-results").mkdirs() } + + doLast { + javaexec { + classpath = sourceSets.jmh.runtimeClasspath + mainClass = 'org.openjdk.jmh.Main' + args = [ + 'ComparisonBenchmark', + '-f', '1', + '-wi', '2', + '-i', '3', + '-rf', 'json', + '-rff', "${projectDir}/benchmark-results/all-results.json" + ] + jvmArgs = [ + '-XX:+UseG1GC', + '-Xmx2g' + ] + } + } } compileJava { From f86eb880cb4eb49600a26fbf9a0bdf547e8184ba Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 5 Jun 2025 21:56:09 -0400 Subject: [PATCH 30/53] lazy directory deserialization --- .github/workflows/ci.yml | 110 +---- build.gradle | 183 +------ .../benchmark/ComparisonBenchmark.java | 103 +++- .../java/com/imprint/core/ImprintBuffers.java | 270 +++++++--- .../java/com/imprint/core/ImprintRecord.java | 463 ++++++------------ src/main/java/com/imprint/util/VarInt.java | 55 ++- 6 files changed, 490 insertions(+), 694 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a80f529..378ebb7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: CI on: push: - branches: [ main, dev ] + branches: [ main ] pull_request: - branches: [ main, dev ] + branches: [ main ] jobs: test: @@ -41,108 +41,4 @@ jobs: run: ./gradlew test - name: Run build - run: ./gradlew build - - benchmark: - runs-on: ubuntu-latest - needs: test - # Only run benchmarks on main branch pushes and PRs to main to avoid excessive CI time - if: github.ref == 'refs/heads/main' || github.base_ref == 'main' - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up JDK 11 - uses: actions/setup-java@v4 - with: - java-version: '11' - distribution: 'temurin' - - - name: Cache Gradle dependencies - uses: actions/cache@v4 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: | - ${{ runner.os }}-gradle- - - - name: Make gradlew executable - run: chmod +x ./gradlew - - - name: Create benchmark results directory - run: mkdir -p benchmark-results - - - name: Run serialization benchmarks - run: ./gradlew jmhRunSerializationBenchmarks - continue-on-error: true - - - name: Run deserialization benchmarks - run: ./gradlew jmhRunDeserializationBenchmarks - continue-on-error: true - - - name: Run field access benchmarks - run: ./gradlew jmhRunFieldAccessBenchmarks - continue-on-error: true - - - name: Run size comparison benchmarks - run: ./gradlew jmhRunSizeComparisonBenchmarks - continue-on-error: true - - - name: List benchmark results - run: | - echo "Contents of benchmark-results directory:" - ls -la benchmark-results/ || echo "benchmark-results directory not found" - echo "Working directory contents:" - ls -la - - - name: Upload benchmark results - uses: actions/upload-artifact@v4 - if: always() - with: - name: benchmark-results-${{ github.sha }} - path: benchmark-results/ - retention-days: 30 - - # Optional: Run full benchmark suite on releases - benchmark-full: - runs-on: ubuntu-latest - if: startsWith(github.ref, 'refs/tags/') - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up JDK 11 - uses: actions/setup-java@v4 - with: - java-version: '11' - distribution: 'temurin' - - - name: Cache Gradle dependencies - uses: actions/cache@v4 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: | - ${{ runner.os }}-gradle- - - - name: Make gradlew executable - run: chmod +x ./gradlew - - - name: Create benchmark results directory - run: mkdir -p benchmark-results - - - name: Run full benchmark suite - run: ./gradlew jmhRunAllBenchmarks - - - name: Upload full benchmark results - uses: actions/upload-artifact@v4 - with: - name: full-benchmark-results-${{ github.ref_name }} - path: benchmark-results/ - retention-days: 90 \ No newline at end of file + run: ./gradlew build \ No newline at end of file diff --git a/build.gradle b/build.gradle index 6c97a24..33b1645 100644 --- a/build.gradle +++ b/build.gradle @@ -166,11 +166,11 @@ test { } } -// JMH configuration - optimized for Java 11 +// JMH configuration jmh { - fork = 1 - warmupIterations = 2 // Reduced for faster CI - iterations = 3 // Reduced for faster CI + fork = 2 + warmupIterations = 3 + iterations = 5 resultFormat = 'JSON' includeTests = false resultsFile = file("${projectDir}/benchmark-results/jmh-results-${new Date().format('yyyy-MM-dd-HHmmss')}.json") @@ -184,181 +184,6 @@ jmh { ] } -// Create individual benchmark tasks for CI pipeline -tasks.register('jmhRunSerializationBenchmarks') { - dependsOn compileJmhJava - description = 'Run serialization benchmarks' - group = 'benchmarking' - - doFirst { - file("${projectDir}/benchmark-results").mkdirs() - } - - doLast { - javaexec { - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'org.openjdk.jmh.Main' - args = [ - '.*serialize.*', - '-f', '1', - '-wi', '2', - '-i', '3', - '-rf', 'json', - '-rff', "${projectDir}/benchmark-results/serialization-results.json" - ] - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g' - ] - } - } -} - -tasks.register('jmhRunDeserializationBenchmarks') { - dependsOn compileJmhJava - description = 'Run deserialization benchmarks' - group = 'benchmarking' - - doFirst { - file("${projectDir}/benchmark-results").mkdirs() - } - - doLast { - javaexec { - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'org.openjdk.jmh.Main' - args = [ - '.*deserialize.*', - '-f', '1', - '-wi', '2', - '-i', '3', - '-rf', 'json', - '-rff', "${projectDir}/benchmark-results/deserialization-results.json" - ] - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g' - ] - } - } -} - -tasks.register('jmhRunFieldAccessBenchmarks') { - dependsOn compileJmhJava - description = 'Run field access benchmarks' - group = 'benchmarking' - - doFirst { - file("${projectDir}/benchmark-results").mkdirs() - } - - doLast { - javaexec { - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'org.openjdk.jmh.Main' - args = [ - '.*singleFieldAccess.*', - '-f', '1', - '-wi', '2', - '-i', '3', - '-rf', 'json', - '-rff', "${projectDir}/benchmark-results/fieldaccess-results.json" - ] - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g' - ] - } - } -} - -tasks.register('jmhRunSizeComparisonBenchmarks') { - dependsOn compileJmhJava - description = 'Run size comparison benchmarks' - group = 'benchmarking' - - doFirst { - file("${projectDir}/benchmark-results").mkdirs() - } - - doLast { - javaexec { - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'org.openjdk.jmh.Main' - args = [ - '.*measure.*', - '-f', '1', - '-wi', '2', - '-i', '3', - '-rf', 'json', - '-rff', "${projectDir}/benchmark-results/size-results.json" - ] - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g' - ] - } - } -} - -tasks.register('jmhRunMergeBenchmarks') { - dependsOn compileJmhJava - description = 'Run merge operation benchmarks' - group = 'benchmarking' - - doFirst { - file("${projectDir}/benchmark-results").mkdirs() - } - - doLast { - javaexec { - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'org.openjdk.jmh.Main' - args = [ - '.*merge.*', - '-f', '1', - '-wi', '2', - '-i', '3', - '-rf', 'json', - '-rff', "${projectDir}/benchmark-results/merge-results.json" - ] - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g' - ] - } - } -} - -tasks.register('jmhRunAllBenchmarks') { - dependsOn compileJmhJava - description = 'Run all comparison benchmarks' - group = 'benchmarking' - - doFirst { - file("${projectDir}/benchmark-results").mkdirs() - } - - doLast { - javaexec { - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'org.openjdk.jmh.Main' - args = [ - 'ComparisonBenchmark', - '-f', '1', - '-wi', '2', - '-i', '3', - '-rf', 'json', - '-rff', "${projectDir}/benchmark-results/all-results.json" - ] - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g' - ] - } - } -} - compileJava { options.compilerArgs << '-Xlint:unchecked' options.deprecation = true diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index ee32ff0..dd62457 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -10,7 +10,6 @@ import com.imprint.core.SchemaId; import com.imprint.types.MapKey; import com.imprint.types.Value; -import lombok.NoArgsConstructor; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; @@ -38,8 +37,8 @@ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) -@Warmup(iterations = 2, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(1) @SuppressWarnings("unused") public class ComparisonBenchmark { @@ -91,6 +90,7 @@ public void setup() throws Exception { } // ===== SERIALIZATION BENCHMARKS ===== + @Benchmark public void serializeImprint(Blackhole bh) throws Exception { ByteBuffer result = serializeWithImprint(testData); @@ -133,13 +133,34 @@ public void serializeFlatBuffers(Blackhole bh) { bh.consume(result); } - // ===== DESERIALIZATION BENCHMARKS ===== + // ===== PARTIAL DESERIALIZATION (SETUP ONLY) ===== +// These benchmarks measure the cost of preparing a record for field access, +// not the cost of accessing the actual data. This is important because +// +// 1. Imprint: Only parses header + stores raw directory bytes +// 2. FlatBuffers: Only wraps the buffer with minimal validation +// 3. Others (eager): Parse and construct all field objects upfront +// +// This comparison shows the advantage of lazy loading approaches when you +// only need to access a subset of fields. In real streaming workloads, +// records are often filtered/routed based on just a few key fields. +// +// For a fair "full deserialization" comparison, see FULL DESERIALIZATION BENCHMARKS. + @Benchmark - public void deserializeImprint(Blackhole bh) throws Exception { + public void deserializeSetupImprint(Blackhole bh) throws Exception { ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); bh.consume(result); } + @Benchmark + public void deserializeSetupFlatBuffers(Blackhole bh) { + TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); + bh.consume(result); + } + + // ===== FULL DESERIALIZATION BENCHMARKS ===== + @Benchmark public void deserializeJacksonJson(Blackhole bh) throws Exception { TestRecord result = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); @@ -172,14 +193,54 @@ public void deserializeProtobuf(Blackhole bh) throws Exception { bh.consume(result); } + @Benchmark + public void deserializeImprint(Blackhole bh) throws Exception { + ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); + // Access all fields to force full deserialization + result.getInt32(1); // id + result.getString(2); // name + result.getFloat64(3); // price + result.getBoolean(4); // active + result.getString(5); // category + result.getArray(6); // tags + result.getMap(7); // metadata + for (int i = 8; i < 21; i++) { + result.getString(i); // extraData fields + } + + bh.consume(result); + } + @Benchmark public void deserializeFlatBuffers(Blackhole bh) { TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); + + // Access all fields + result.id(); + result.name(); + result.price(); + result.active(); + result.category(); + // Access all tags + for (int i = 0; i < result.tagsLength(); i++) { + result.tags(i); + } + // Access all metadata + for (int i = 0; i < result.metadataKeysLength(); i++) { + result.metadataKeys(i); + result.metadataValues(i); + } + // Access all extra data + for (int i = 0; i < result.extraDataLength(); i++) { + result.extraData(i); + } + bh.consume(result); } // ===== FIELD ACCESS BENCHMARKS ===== - // Tests accessing a single field near the end of a large record + // Tests accessing a single field near the end of a record + @Benchmark public void singleFieldAccessImprint(Blackhole bh) throws Exception { ImprintRecord record = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); @@ -210,19 +271,19 @@ public void singleFieldAccessMessagePack(Blackhole bh) throws Exception { @Benchmark public void singleFieldAccessAvro(Blackhole bh) throws Exception { GenericRecord record = deserializeWithAvro(avroBytes); - bh.consume(record.get("extraData4")); + bh.consume(record.get("extraData4")); // Accessing field near end } @Benchmark public void singleFieldAccessProtobuf(Blackhole bh) throws Exception { TestRecordProto.TestRecord record = TestRecordProto.TestRecord.parseFrom(protobufBytes); - bh.consume(record.getExtraData(4)); + bh.consume(record.getExtraData(4)); // Accessing field near end } @Benchmark public void singleFieldAccessFlatBuffers(Blackhole bh) { TestRecordFB record = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - bh.consume(record.extraData(4)); + bh.consume(record.extraData(4)); // Accessing field near end - zero copy! } // ===== SIZE COMPARISON ===== @@ -264,7 +325,7 @@ public void measureFlatBuffersSize(Blackhole bh) { // ===== MERGE SIMULATION BENCHMARKS ===== - @Benchmark + //@Benchmark public void mergeImprint(Blackhole bh) throws Exception { var record1Buffer = imprintBytesBuffer.duplicate(); var record2Data = createTestRecord2(); @@ -277,7 +338,7 @@ public void mergeImprint(Blackhole bh) throws Exception { bh.consume(merged); } - @Benchmark + //@Benchmark public void mergeJacksonJson(Blackhole bh) throws Exception { var record1 = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); var record2Data = createTestRecord2(); @@ -289,7 +350,7 @@ public void mergeJacksonJson(Blackhole bh) throws Exception { bh.consume(result); } - @Benchmark + //@Benchmark public void mergeKryo(Blackhole bh) { Input input1 = new Input(new ByteArrayInputStream(kryoBytes)); var record1 = kryo.readObject(input1, TestRecord.class); @@ -306,7 +367,7 @@ public void mergeKryo(Blackhole bh) { bh.consume(result); } - @Benchmark + //@Benchmark public void mergeMessagePack(Blackhole bh) throws Exception { var record1 = messagePackMapper.readValue(messagePackBytes, TestRecord.class); var record2Data = createTestRecord2(); @@ -318,7 +379,7 @@ public void mergeMessagePack(Blackhole bh) throws Exception { bh.consume(result); } - @Benchmark + //@Benchmark public void mergeAvro(Blackhole bh) throws Exception { var record1 = deserializeWithAvro(avroBytes); var record2Data = createTestRecord2(); @@ -330,7 +391,7 @@ public void mergeAvro(Blackhole bh) throws Exception { bh.consume(result); } - @Benchmark + //@Benchmark public void mergeProtobuf(Blackhole bh) throws Exception { var record1 = TestRecordProto.TestRecord.parseFrom(protobufBytes); var record2Data = createTestRecord2(); @@ -342,7 +403,7 @@ public void mergeProtobuf(Blackhole bh) throws Exception { bh.consume(result); } - @Benchmark + //@Benchmark public void mergeFlatBuffers(Blackhole bh) { var record1 = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); var record2Data = createTestRecord2(); @@ -691,8 +752,8 @@ private ByteBuffer mergeFlatBuffersRecords(TestRecordFB first, TestRecordFB seco FlatBufferBuilder builder = new FlatBufferBuilder(1024); // Use second record's values if they exist, otherwise first record's values - String name = second.name() != null && !Objects.requireNonNull(second.name()).isEmpty() ? second.name() : first.name(); - String category = second.category() != null && !Objects.requireNonNull(second.category()).isEmpty() ? second.category() : first.category(); + String name = second.name() != null && !second.name().isEmpty() ? second.name() : first.name(); + String category = second.category() != null && !second.category().isEmpty() ? second.category() : first.category(); double price = second.price() != 0.0 ? second.price() : first.price(); boolean active = second.active(); // Use second's boolean value int id = first.id(); // Keep first record's ID @@ -806,7 +867,6 @@ var record = new TestRecord(); } // Test data class for other serialization libraries - @NoArgsConstructor public static class TestRecord { public int id; public String name; @@ -815,7 +875,8 @@ public static class TestRecord { public String category; public List tags = new ArrayList<>(); public Map metadata = new HashMap<>(); - // Fields 8-20 for large record test - public List extraData = new ArrayList<>(); + public List extraData = new ArrayList<>(); // Fields 8-20 for large record test + + public TestRecord() {} // Required for deserialization } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintBuffers.java b/src/main/java/com/imprint/core/ImprintBuffers.java index c14d6df..f6a341b 100644 --- a/src/main/java/com/imprint/core/ImprintBuffers.java +++ b/src/main/java/com/imprint/core/ImprintBuffers.java @@ -10,9 +10,9 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Objects; -import java.util.TreeMap; /** * Manages the raw buffers for an Imprint record with lazy directory parsing. @@ -31,12 +31,17 @@ public final class ImprintBuffers { private final ByteBuffer directoryBuffer; // Raw directory bytes (includes count) private final ByteBuffer payload; // Read-only payload view - // Lazy-loaded directory state. Needs to maintain ordering so that we can binary search the endOffset - private TreeMap parsedDirectory; + // Lazy-loaded directory state + private List parsedDirectory; private boolean directoryParsed = false; + private int directoryCount = -1; // Cached count to avoid repeated VarInt decoding /** * Creates buffers from raw data (used during deserialization). + * + * @param directoryBuffer Raw directory bytes including VarInt count and all entries. + * Format: [VarInt count][Entry1][Entry2]...[EntryN] + * @param payload Raw payload data containing all field values sequentially */ public ImprintBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { this.directoryBuffer = directoryBuffer.asReadOnlyBuffer(); @@ -45,17 +50,33 @@ public ImprintBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { /** * Creates buffers from pre-parsed directory (used during construction). + * This is more efficient when the directory is already known. + * + * @param directory Parsed directory entries, must be sorted by fieldId + * @param payload Raw payload data containing all field values */ public ImprintBuffers(List directory, ByteBuffer payload) { - this.parsedDirectory = createDirectoryMap(Objects.requireNonNull(directory)); + this.parsedDirectory = Collections.unmodifiableList(Objects.requireNonNull(directory)); this.directoryParsed = true; + this.directoryCount = directory.size(); this.payload = payload.asReadOnlyBuffer(); this.directoryBuffer = createDirectoryBuffer(directory); } /** * Get a zero-copy ByteBuffer view of a field's data. - * Optimized for the most common use case - single field access. + * + *

Buffer Positioning Logic:

+ *
    + *
  1. Find the directory entry for the requested fieldId
  2. + *
  3. Use entry.offset as start position in payload
  4. + *
  5. Find end position by looking at next field's offset (or payload end)
  6. + *
  7. Create a slice view: payload[startOffset:endOffset]
  8. + *
+ * + * @param fieldId The field identifier to retrieve + * @return Zero-copy ByteBuffer positioned at field data, or null if field not found + * @throws ImprintException if buffer bounds are invalid or directory is corrupted */ public ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { var entry = findDirectoryEntry(fieldId); @@ -71,43 +92,78 @@ public ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { "Invalid field buffer range: start=" + startOffset + ", end=" + endOffset + ", payloadLimit=" + payload.limit()); } - var fieldBuffer = payload.duplicate(); + ByteBuffer fieldBuffer = payload.duplicate(); fieldBuffer.position(startOffset).limit(endOffset); return fieldBuffer; } /** * Find a directory entry for the given field ID using the most efficient method. - *

- * Strategy: - * - If parsed: TreeMap lookup - * - If raw: Binary search on raw bytes to avoid full unwinding of the directory + * + *

Search Strategy:

+ *
    + *
  • If directory is parsed: binary search on in-memory List<DirectoryEntry>
  • + *
  • If directory is raw: binary search directly on raw bytes (faster for single lookups)
  • + *
+ * @param fieldId The field identifier to find + * @return DirectoryEntry if found, null otherwise + * @throws ImprintException if directory buffer is corrupted or truncated */ public DirectoryEntry findDirectoryEntry(int fieldId) throws ImprintException { - if (directoryParsed) - return parsedDirectory.get(fieldId); - else + if (directoryParsed) { + int index = findDirectoryIndexInParsed(fieldId); + return index >= 0 ? parsedDirectory.get(index) : null; + } else { return findFieldEntryInRawDirectory(fieldId); + } } /** * Get the full directory, parsing it if necessary. - * Returns the values in fieldId order thanks to TreeMap. + * + *

Lazy Parsing Behavior:

+ *
    + *
  • First call: Parses entire directory from raw bytes into List<DirectoryEntry>
  • + *
  • Subsequent calls: Returns cached parsed directory
  • + *
  • Note - the method is not synchronized and assumes single-threaded usage.
  • + *
+ * + *

When to use: Call this if you need to access multiple fields + * from the same record. For single field access, direct field getters are more efficient.

+ * + * @return Immutable list of directory entries, sorted by fieldId */ public List getDirectory() { ensureDirectoryParsed(); - return new ArrayList<>(parsedDirectory.values()); + return parsedDirectory; } /** - * Get directory count without parsing. + * Get the directory count without fully parsing the directory. + *

+ * This method avoids parsing the entire directory when only the count is needed. + *

    + *
  1. Return cached count if available (directoryCount >= 0)
  2. + *
  3. Return parsed directory size if directory is already parsed
  4. + *
  5. Decode VarInt from raw buffer and cache the result
  6. + *
+ * + *

VarInt Decoding: The count is stored as a VarInt at the beginning + * of the directoryBuffer. This method reads just enough bytes to decode the count.

+ * + * @return Number of fields in the directory, or 0 if decoding fails */ public int getDirectoryCount() { + if (directoryCount >= 0) + return directoryCount; if (directoryParsed) return parsedDirectory.size(); + + // Decode from buffer and cache try { var countBuffer = directoryBuffer.duplicate(); - return VarInt.decode(countBuffer).getValue(); + directoryCount = VarInt.decode(countBuffer).getValue(); + return directoryCount; } catch (Exception e) { return 0; } @@ -115,23 +171,52 @@ public int getDirectoryCount() { /** * Create a new buffer containing the serialized directory. + * + *

Output Format:

+ *
+     * [VarInt count][DirectoryEntry 1][DirectoryEntry 2]...[DirectoryEntry N]
+     * 
+ * + *

Each DirectoryEntry is serialized as: [fieldId:2bytes][typeCode:1byte][offset:4bytes]

+ * + * + * @return New ByteBuffer containing the complete serialized directory */ public ByteBuffer serializeDirectory() { ensureDirectoryParsed(); - return createDirectoryBuffer(new ArrayList<>(parsedDirectory.values())); + return createDirectoryBuffer(parsedDirectory); } // ========== PRIVATE METHODS ========== /** * Binary search on raw directory bytes to find a specific field. - * This avoids parsing the entire directory for single field lookups. + * + *
    + *
  1. Position buffer at start and decode VarInt count (cache for future use)
  2. + *
  3. Calculate directory start position after VarInt
  4. + *
  5. For binary search mid-point: entryPos = startPos + (mid * DIR_ENTRY_BYTES)
  6. + *
  7. Read fieldId from calculated position (first 2 bytes of entry)
  8. + *
  9. Compare fieldId and adjust search bounds
  10. + *
  11. When found: reposition buffer and deserialize complete entry
  12. + *
+ * + *

All buffer positions are bounds-checked before access.

+ * + * @param fieldId Field identifier to search for + * @return Complete DirectoryEntry if found, null if not found + * @throws ImprintException if buffer is truncated or corrupted */ private DirectoryEntry findFieldEntryInRawDirectory(int fieldId) throws ImprintException { var searchBuffer = directoryBuffer.duplicate(); searchBuffer.order(ByteOrder.LITTLE_ENDIAN); - int directoryCount = VarInt.decode(searchBuffer).getValue(); + // Decode directory count (cache it) + if (directoryCount < 0) + directoryCount = VarInt.decode(searchBuffer).getValue(); + else + VarInt.decode(searchBuffer); // Skip past the count + if (directoryCount == 0) return null; @@ -165,65 +250,118 @@ private DirectoryEntry findFieldEntryInRawDirectory(int fieldId) throws ImprintE return null; } + /** + * + * @param fieldId Field identifier to find + * @return Index of the field if found, or negative insertion point if not found + */ + private int findDirectoryIndexInParsed(int fieldId) { + if (!directoryParsed) + return -1; + int low = 0; + int high = parsedDirectory.size() - 1; + while (low <= high) { + int mid = (low + high) >>> 1; + int midFieldId = parsedDirectory.get(mid).getId(); + if (midFieldId < fieldId) + low = mid + 1; + else if (midFieldId > fieldId) + high = mid - 1; + else + return mid; + } + return -(low + 1); + } + /** * Find the end offset for a field by looking at the next field's offset. + * + *
    + *
  • Field data spans from: entry.offset to nextField.offset (exclusive)
  • + *
  • Last field spans from: entry.offset to payload.limit()
  • + *
  • This works because directory entries are sorted by fieldId
  • + *
+ * + *

Search Strategy:

+ *
    + *
  • If directory parsed: Use binary search result + 1 to get next entry
  • + *
  • If directory raw: Scan raw entries until fieldId > currentFieldId
  • + *
+ * + * @param entry The directory entry whose end offset we need to find + * @return End offset (exclusive) for the field data + * @throws ImprintException if directory scanning fails */ private int findEndOffset(DirectoryEntry entry) throws ImprintException { if (directoryParsed) { - return findNextOffsetInParsedDirectory(entry.getId()); - } else { + int entryIndex = findDirectoryIndexInParsed(entry.getId()); + return (entryIndex + 1 < parsedDirectory.size()) ? + parsedDirectory.get(entryIndex + 1).getOffset() : payload.limit(); + } else return findNextOffsetInRawDirectory(entry.getId()); - } } /** - * Find the end offset using TreeMap's efficient navigation methods. + * Scan raw directory to find the next field's offset after currentFieldId. + * + *
    + *
  1. Position buffer after VarInt count
  2. + *
  3. For each directory entry at position: startPos + (i * DIR_ENTRY_BYTES)
  4. + *
  5. Read fieldId (first 2 bytes) and offset (bytes 3-6)
  6. + *
  7. Return offset of first field where fieldId > currentFieldId
  8. + *
  9. If no next field found, return payload.limit()
  10. + *
+ * + * @param currentFieldId Find the next field after this fieldId + * @return Offset where the next field starts, or payload.limit() if this is the last field + * @throws ImprintException if directory buffer is corrupted */ - private int findNextOffsetInParsedDirectory(int currentFieldId) { - var nextEntry = parsedDirectory.higherEntry(currentFieldId); - return nextEntry != null ? nextEntry.getValue().getOffset() : payload.limit(); - } - private int findNextOffsetInRawDirectory(int currentFieldId) throws ImprintException { var scanBuffer = directoryBuffer.duplicate(); scanBuffer.order(ByteOrder.LITTLE_ENDIAN); - int count = VarInt.decode(scanBuffer).getValue(); + int count = (directoryCount >= 0) ? directoryCount : VarInt.decode(scanBuffer).getValue(); if (count == 0) return payload.limit(); + if (directoryCount >= 0) + VarInt.decode(scanBuffer); // Skip count if cached int directoryStartPos = scanBuffer.position(); - int low = 0; - int high = count - 1; - int nextOffset = payload.limit(); - // Binary search for the first field with fieldId > currentFieldId - while (low <= high) { - int mid = (low + high) >>> 1; - int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); + for (int i = 0; i < count; i++) { + int entryPos = directoryStartPos + (i * Constants.DIR_ENTRY_BYTES); if (entryPos + Constants.DIR_ENTRY_BYTES > scanBuffer.limit()) - break; + return payload.limit(); scanBuffer.position(entryPos); short fieldId = scanBuffer.getShort(); scanBuffer.get(); // skip type int offset = scanBuffer.getInt(); - if (fieldId > currentFieldId) { - nextOffset = offset; - high = mid - 1; - } else { - low = mid + 1; - } + if (fieldId > currentFieldId) + return offset; } - return nextOffset; + return payload.limit(); } /** * Parse the full directory if not already parsed. - * Creates TreeMap for both fast lookup and ordering needed for binary search on offSets. + * + *
    + *
  1. Duplicate directoryBuffer to avoid affecting original position
  2. + *
  3. Set byte order to LITTLE_ENDIAN for consistent reading
  4. + *
  5. Decode VarInt count and cache it
  6. + *
  7. Read 'count' directory entries sequentially
  8. + *
  9. Each entry: [fieldId:2bytes][typeCode:1byte][offset:4bytes]
  10. + *
  11. Store as immutable list and mark as parsed
  12. + *
+ * + *

Error Handling: If parsing fails, throws RuntimeException + * since this indicates corrupted data that should never happen in normal operation.

+ * + *

Will return immediately if directory has already been parsed.

*/ private void ensureDirectoryParsed() { if (directoryParsed) @@ -234,42 +372,46 @@ private void ensureDirectoryParsed() { var countResult = VarInt.decode(parseBuffer); int count = countResult.getValue(); + this.directoryCount = count; - this.parsedDirectory = new TreeMap<>(); + var directory = new ArrayList(count); for (int i = 0; i < count; i++) { - var entry = deserializeDirectoryEntry(parseBuffer); - parsedDirectory.put((int)entry.getId(), entry); + directory.add(deserializeDirectoryEntry(parseBuffer)); } + this.parsedDirectory = Collections.unmodifiableList(directory); this.directoryParsed = true; } catch (ImprintException e) { throw new RuntimeException("Failed to parse directory", e); } } - /** - * Create a TreeMap from directory list field lookup with ordering. - */ - private TreeMap createDirectoryMap(List directory) { - var map = new TreeMap(); - for (var entry : directory) { - map.put((int)entry.getId(), entry); - } - return map; - } - /** * Create directory buffer from parsed entries. + * + *

Serialization Format:

+ *
    + *
  1. Calculate buffer size: VarInt.encodedLength(count) + (count * DIR_ENTRY_BYTES)
  2. + *
  3. Allocate ByteBuffer with LITTLE_ENDIAN byte order
  4. + *
  5. Write VarInt count
  6. + *
  7. Write each directory entry: [fieldId:2][typeCode:1][offset:4]
  8. + *
  9. Flip buffer and return read-only view
  10. + *
+ * + * @param directory List of directory entries to serialize + * @return Read-only ByteBuffer containing serialized directory, or empty buffer on error */ private ByteBuffer createDirectoryBuffer(List directory) { try { - int bufferSize = VarInt.encodedLength(directory.size()) + (directory.size() * Constants.DIR_ENTRY_BYTES); + int bufferSize = VarInt.encodedLength(directory.size()) + + (directory.size() * Constants.DIR_ENTRY_BYTES); var buffer = ByteBuffer.allocate(bufferSize); buffer.order(ByteOrder.LITTLE_ENDIAN); VarInt.encode(directory.size(), buffer); - for (var entry : directory) + for (var entry : directory) { serializeDirectoryEntry(entry, buffer); + } buffer.flip(); return buffer.asReadOnlyBuffer(); @@ -291,6 +433,10 @@ private void serializeDirectoryEntry(DirectoryEntry entry, ByteBuffer buffer) { /** * Deserialize a single directory entry from the buffer. * Reads: [fieldId:2bytes][typeCode:1byte][offset:4bytes] + * + * @param buffer Buffer positioned at the start of a directory entry + * @return Parsed DirectoryEntry + * @throws ImprintException if buffer doesn't contain enough bytes */ private DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index e7dab70..6abc9cf 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -11,87 +11,127 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.*; +import java.util.List; +import java.util.Map; +import java.util.Objects; /** - * An Imprint record containing a header, field directory, and payload. - * Uses ByteBuffer for zero-copy operations to achieve low latency. - * - *

Performance Note: All ByteBuffers should be array-backed - * (hasArray() == true) for optimal zero-copy performance. Direct buffers - * may cause performance degradation.

+ * An Imprint record containing a header and buffer management. + * Delegates all buffer operations to ImprintBuffers for cleaner separation. */ @Getter public final class ImprintRecord { private final Header header; - private final List directory; - private final ByteBuffer payload; // Read-only view for zero-copy + private final ImprintBuffers buffers; /** - * Creates a new ImprintRecord. - * - * @param payload the payload buffer. Should be array-backed for optimal performance. + * Creates a record from deserialized components. */ - public ImprintRecord(Header header, List directory, ByteBuffer payload) { + private ImprintRecord(Header header, ImprintBuffers buffers) { this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.directory = Collections.unmodifiableList(Objects.requireNonNull(directory, "Directory cannot be null")); - this.payload = payload.asReadOnlyBuffer(); // Zero-copy read-only view + this.buffers = Objects.requireNonNull(buffers, "Buffers cannot be null"); } + /** + * Creates a record from pre-parsed directory (used by ImprintWriter). + */ + ImprintRecord(Header header, List directory, ByteBuffer payload) { + this.header = Objects.requireNonNull(header, "Header cannot be null"); + this.buffers = new ImprintBuffers(directory, payload); + } + + // ========== FIELD ACCESS METHODS ========== + /** * Get a value by field ID, deserializing it on demand. * Returns null if the field is not found. - * Note: If the field exists and is an explicit NULL type, this will return Value.NullValue.INSTANCE */ public Value getValue(int fieldId) throws ImprintException { - var fieldBuffer = getFieldBuffer(fieldId); - if (fieldBuffer == null) { + var entry = buffers.findDirectoryEntry(fieldId); + if (entry == null) return null; - } - int directoryIndex = findDirectoryIndex(fieldId); - if (directoryIndex < 0) { - throw new ImprintException(ErrorType.INTERNAL_ERROR, "Field ID " + fieldId + " found buffer but not in directory."); - } - var entry = directory.get(directoryIndex); + var fieldBuffer = buffers.getFieldBuffer(fieldId); + if (fieldBuffer == null) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Failed to get buffer for field " + fieldId); + return deserializeValue(entry.getTypeCode(), fieldBuffer); } /** - * Get the raw bytes for a field without deserializing. - * Returns a zero-copy ByteBuffer view, or null if field not found. + * Get raw bytes for a field without deserializing. */ public ByteBuffer getRawBytes(int fieldId) { - var fieldBuffer = getFieldBuffer(fieldId); - return fieldBuffer != null ? fieldBuffer.asReadOnlyBuffer() : null; + try { + return buffers.getFieldBuffer(fieldId); + } catch (ImprintException e) { + return null; + } } /** - * Get a ByteBuffer view of a field's data. - * Returns null if the field is not found. + * Get the directory (parsing it if necessary). */ - private ByteBuffer getFieldBuffer(int fieldId) { - int index = findDirectoryIndex(fieldId); - if (index < 0) return null; + public List getDirectory() { + return buffers.getDirectory(); + } - var entry = directory.get(index); - int startOffset = entry.getOffset(); - int endOffset = (index + 1 < directory.size()) ? - directory.get(index + 1).getOffset() : payload.limit(); + // ========== TYPED GETTERS ========== - if (startOffset > payload.limit() || endOffset > payload.limit() || startOffset > endOffset) { - return null; - } + public boolean getBoolean(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.BOOL, Value.BoolValue.class, "boolean").getValue(); + } + + public int getInt32(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.INT32, Value.Int32Value.class, "int32").getValue(); + } + + public long getInt64(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.INT64, Value.Int64Value.class, "int64").getValue(); + } - //Single allocation instead of duplicate + slice - var fieldBuffer = payload.duplicate(); - fieldBuffer.position(startOffset).limit(endOffset); - return fieldBuffer; + public float getFloat32(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.FLOAT32, Value.Float32Value.class, "float32").getValue(); + } + + public double getFloat64(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.FLOAT64, Value.Float64Value.class, "float64").getValue(); + } + + public String getString(int fieldId) throws ImprintException { + var value = getValidatedValue(fieldId, "STRING"); + if (value instanceof Value.StringValue) + return ((Value.StringValue) value).getValue(); + if (value instanceof Value.StringBufferValue) + return ((Value.StringBufferValue) value).getValue(); + throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is not a STRING"); + } + + public byte[] getBytes(int fieldId) throws ImprintException { + var value = getValidatedValue(fieldId, "BYTES"); + if (value instanceof Value.BytesValue) + return ((Value.BytesValue) value).getValue(); + if (value instanceof Value.BytesBufferValue) + return ((Value.BytesBufferValue) value).getValue(); + throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is not BYTES"); + } + + public List getArray(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.ARRAY, Value.ArrayValue.class, "ARRAY").getValue(); + } + + public Map getMap(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.MAP, Value.MapValue.class, "MAP").getValue(); + } + + public ImprintRecord getRow(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); } + // ========== SERIALIZATION ========== + /** * Serialize this record to a ByteBuffer. - * The returned buffer will be array-backed. */ public ByteBuffer serializeToBuffer() { var buffer = ByteBuffer.allocate(estimateSerializedSize()); @@ -100,171 +140,95 @@ public ByteBuffer serializeToBuffer() { // Write header serializeHeader(buffer); - // Write directory (always present) - VarInt.encode(directory.size(), buffer); - for (var entry : directory) { - serializeDirectoryEntry(entry, buffer); - } + // Write directory + var directoryBuffer = buffers.serializeDirectory(); + buffer.put(directoryBuffer); - // Write payload (shallow copy only) + // Write payload + var payload = buffers.getPayload(); var payloadCopy = payload.duplicate(); buffer.put(payloadCopy); - // Prepare buffer for reading buffer.flip(); return buffer; } - /** - * Create a fluent builder for constructing ImprintRecord instances. - * - * @param schemaId the schema identifier for this record - * @return a new builder instance - */ + public int estimateSerializedSize() { + int size = Constants.HEADER_BYTES; // header + size += buffers.serializeDirectory().remaining(); // directory + size += buffers.getPayload().remaining(); // payload + return size; + } + + // ========== STATIC FACTORY METHODS ========== + public static ImprintRecordBuilder builder(SchemaId schemaId) { return new ImprintRecordBuilder(schemaId); } - /** - * Create a fluent builder for constructing ImprintRecord instances. - * - * @param fieldspaceId the fieldspace identifier - * @param schemaHash the schema hash - * @return a new builder instance - */ - @SuppressWarnings("unused") public static ImprintRecordBuilder builder(int fieldspaceId, int schemaHash) { return new ImprintRecordBuilder(new SchemaId(fieldspaceId, schemaHash)); } - /** - * Deserialize a record from bytes through an array backed ByteBuffer. - */ public static ImprintRecord deserialize(byte[] bytes) throws ImprintException { return deserialize(ByteBuffer.wrap(bytes)); } - /** - * Deserialize a record from a ByteBuffer. - * - * @param buffer the buffer to deserialize from. Must be array-backed - * (buffer.hasArray() == true) for optimal zero-copy performance. - */ public static ImprintRecord deserialize(ByteBuffer buffer) throws ImprintException { buffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); // Read header var header = deserializeHeader(buffer); - // Read directory (always present) - var directory = new ArrayList(); - VarInt.DecodeResult countResult = VarInt.decode(buffer); + // Calculate directory size + int directoryStartPos = buffer.position(); + var countResult = VarInt.decode(buffer); int directoryCount = countResult.getValue(); + int directorySize = countResult.getBytesRead() + (directoryCount * Constants.DIR_ENTRY_BYTES); - for (int i = 0; i < directoryCount; i++) { - directory.add(deserializeDirectoryEntry(buffer)); - } + // Create directory buffer + buffer.position(directoryStartPos); + var directoryBuffer = buffer.slice(); + directoryBuffer.limit(directorySize); - // Read payload as ByteBuffer slice for zero-copy + // Advance past directory + buffer.position(buffer.position() + directorySize); + + // Create payload buffer var payload = buffer.slice(); payload.limit(header.getPayloadSize()); - buffer.position(buffer.position() + header.getPayloadSize()); - return new ImprintRecord(header, directory, payload); - } + // Create buffers wrapper + var buffers = new ImprintBuffers(directoryBuffer, payload); - /** - * Binary search for field ID in directory without object allocation. - * Returns the index of the field if found, or a negative value if not found. - * - * @param fieldId the field ID to search for - * @return index if found, or negative insertion point - 1 if not found - */ - private int findDirectoryIndex(int fieldId) { - int low = 0; - int high = directory.size() - 1; - - while (low <= high) { - int mid = (low + high) >>> 1; // unsigned right shift to avoid overflow - int midFieldId = directory.get(mid).getId(); - - if (midFieldId < fieldId) { - low = mid + 1; - } else if (midFieldId > fieldId) { - high = mid - 1; - } else { - return mid; // field found - } - } - return -(low + 1); // field not found, return insertion point + return new ImprintRecord(header, buffers); } - public int estimateSerializedSize() { - int size = Constants.HEADER_BYTES; // header - size += VarInt.encodedLength(directory.size()); // directory count - size += directory.size() * Constants.DIR_ENTRY_BYTES; // directory entries - size += payload.remaining(); // payload - return size; - } - - private void serializeHeader(ByteBuffer buffer) { - buffer.put(Constants.MAGIC); - buffer.put(Constants.VERSION); - buffer.put(header.getFlags().getValue()); - buffer.putInt(header.getSchemaId().getFieldSpaceId()); - buffer.putInt(header.getSchemaId().getSchemaHash()); - buffer.putInt(header.getPayloadSize()); - } - - private static Header deserializeHeader(ByteBuffer buffer) throws ImprintException { - if (buffer.remaining() < Constants.HEADER_BYTES) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for header"); - } - - byte magic = buffer.get(); - if (magic != Constants.MAGIC) { - throw new ImprintException(ErrorType.INVALID_MAGIC, - "Invalid magic byte: expected 0x" + Integer.toHexString(Constants.MAGIC) + - ", got 0x" + Integer.toHexString(magic & 0xFF)); - } - - byte version = buffer.get(); - if (version != Constants.VERSION) { - throw new ImprintException(ErrorType.UNSUPPORTED_VERSION, - "Unsupported version: " + version); - } - - var flags = new Flags(buffer.get()); - int fieldspaceId = buffer.getInt(); - int schemaHash = buffer.getInt(); - int payloadSize = buffer.getInt(); + // ========== PRIVATE HELPER METHODS ========== - return new Header(flags, new SchemaId(fieldspaceId, schemaHash), payloadSize); - } - - private void serializeDirectoryEntry(DirectoryEntry entry, ByteBuffer buffer) { - buffer.putShort(entry.getId()); - buffer.put(entry.getTypeCode().getCode()); - buffer.putInt(entry.getOffset()); + /** + * Get and validate a value exists and is not null. + */ + private Value getValidatedValue(int fieldId, String typeName) throws ImprintException { + var value = getValue(fieldId); + if (value == null) + throw new ImprintException(ErrorType.FIELD_NOT_FOUND, "Field " + fieldId + " not found"); + if (value.getTypeCode() == TypeCode.NULL) + throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is NULL, cannot retrieve as " + typeName); + return value; } - private static DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { - if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for directory entry"); - } - - short id = buffer.getShort(); - var typeCode = TypeCode.fromByte(buffer.get()); - int offset = buffer.getInt(); - - return new DirectoryEntry(id, typeCode, offset); + private T getTypedValueOrThrow(int fieldId, TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) + throws ImprintException { + var value = getValidatedValue(fieldId, expectedTypeName); + if (value.getTypeCode() == expectedTypeCode && expectedValueClass.isInstance(value)) + return expectedValueClass.cast(value); + throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected " + expectedTypeName); } private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws ImprintException { - var valueSpecificBuffer = buffer.duplicate(); - valueSpecificBuffer.order(ByteOrder.LITTLE_ENDIAN); + var valueBuffer = buffer.duplicate(); + valueBuffer.order(ByteOrder.LITTLE_ENDIAN); switch (typeCode) { case NULL: @@ -277,162 +241,51 @@ private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws Impr case STRING: case ARRAY: case MAP: - return typeCode.getHandler().deserialize(valueSpecificBuffer); + return typeCode.getHandler().deserialize(valueBuffer); case ROW: - var nestedRecord = deserialize(valueSpecificBuffer); + var nestedRecord = deserialize(valueBuffer); return Value.fromRow(nestedRecord); - default: throw new ImprintException(ErrorType.INVALID_TYPE_CODE, "Unknown type code: " + typeCode); } } - private T getTypedValueOrThrow(int fieldId, TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) throws ImprintException { - var value = getValue(fieldId); - - if (value == null) { - throw new ImprintException(ErrorType.FIELD_NOT_FOUND, - "Field " + fieldId + " not found, cannot retrieve as " + expectedTypeName + "."); - } - - if (value.getTypeCode() == TypeCode.NULL) { - throw new ImprintException(ErrorType.TYPE_MISMATCH, - "Field " + fieldId + " is NULL, cannot retrieve as " + expectedTypeName + "."); - } - - if (value.getTypeCode() == expectedTypeCode && expectedValueClass.isInstance(value)) { - return expectedValueClass.cast(value); - } - - throw new ImprintException(ErrorType.TYPE_MISMATCH, - "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected " + expectedTypeName + "."); - } - - /** - * Retrieves the boolean value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type BOOL. - */ - public boolean getBoolean(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.BOOL, Value.BoolValue.class, "boolean").getValue(); - } - - /** - * Retrieves the int (int32) value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type INT32. - */ - public int getInt32(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.INT32, Value.Int32Value.class, "int32").getValue(); - } - - /** - * Retrieves the long (int64) value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type INT64. - */ - public long getInt64(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.INT64, Value.Int64Value.class, "int64").getValue(); - } - - /** - * Retrieves the float (float32) value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type FLOAT32. - */ - public float getFloat32(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.FLOAT32, Value.Float32Value.class, "float32").getValue(); - } - - /** - * Retrieves the double (float64) value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type FLOAT64. - */ - public double getFloat64(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.FLOAT64, Value.Float64Value.class, "float64").getValue(); + private void serializeHeader(ByteBuffer buffer) { + buffer.put(Constants.MAGIC); + buffer.put(Constants.VERSION); + buffer.put(header.getFlags().getValue()); + buffer.putInt(header.getSchemaId().getFieldSpaceId()); + buffer.putInt(header.getSchemaId().getSchemaHash()); + buffer.putInt(header.getPayloadSize()); } - /** - * Retrieves the String value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type STRING. - */ - public String getString(int fieldId) throws ImprintException { - var value = getValue(fieldId); - - if (value == null) { - throw new ImprintException(ErrorType.FIELD_NOT_FOUND, - "Field " + fieldId + " not found, cannot retrieve as String."); - } - if (value.getTypeCode() == TypeCode.NULL) { - throw new ImprintException(ErrorType.TYPE_MISMATCH, - "Field " + fieldId + " is NULL, cannot retrieve as String."); - } - - if (value instanceof Value.StringValue) { - return ((Value.StringValue) value).getValue(); - } - if (value instanceof Value.StringBufferValue) { - return ((Value.StringBufferValue) value).getValue(); - } + private static Header deserializeHeader(ByteBuffer buffer) throws ImprintException { + if (buffer.remaining() < Constants.HEADER_BYTES) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for header"); - throw new ImprintException(ErrorType.TYPE_MISMATCH, - "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected STRING."); - } - /** - * Retrieves the byte array (byte[]) value for the given field ID. - * Note: This may involve a defensive copy depending on the underlying Value type. - * @throws ImprintException if the field is not found, is null, or is not of type BYTES. - */ - public byte[] getBytes(int fieldId) throws ImprintException { - Value value = getValue(fieldId); - - if (value == null) { - throw new ImprintException(ErrorType.FIELD_NOT_FOUND, - "Field " + fieldId + " not found, cannot retrieve as byte[]."); - } - if (value.getTypeCode() == TypeCode.NULL) { - throw new ImprintException(ErrorType.TYPE_MISMATCH, - "Field " + fieldId + " is NULL, cannot retrieve as byte[]."); + byte magic = buffer.get(); + if (magic != Constants.MAGIC) { + throw new ImprintException(ErrorType.INVALID_MAGIC, "Invalid magic byte: expected 0x" + Integer.toHexString(Constants.MAGIC) + + ", got 0x" + Integer.toHexString(magic & 0xFF)); } - if (value instanceof Value.BytesValue) { - return ((Value.BytesValue) value).getValue(); // getValue() in BytesValue returns a clone - } - if (value instanceof Value.BytesBufferValue) { - return ((Value.BytesBufferValue) value).getValue(); // getValue() in BytesBufferValue creates a new array + byte version = buffer.get(); + if (version != Constants.VERSION) { + throw new ImprintException(ErrorType.UNSUPPORTED_VERSION, "Unsupported version: " + version); } - throw new ImprintException(ErrorType.TYPE_MISMATCH, - "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected BYTES."); - } - - /** - * Retrieves the List for the given field ID. - * The list itself is a copy; modifications to it will not affect the record. - * @throws ImprintException if the field is not found, is null, or is not of type ARRAY. - */ - public List getArray(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.ARRAY, Value.ArrayValue.class, "ARRAY").getValue(); - } - - /** - * Retrieves the Map for the given field ID. - * The map itself is a copy; modifications to it will not affect the record. - * @throws ImprintException if the field is not found, is null, or is not of type MAP. - */ - public Map getMap(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.MAP, Value.MapValue.class, "MAP").getValue(); - } + var flags = new Flags(buffer.get()); + int fieldSpaceId = buffer.getInt(); + int schemaHash = buffer.getInt(); + int payloadSize = buffer.getInt(); - /** - * Retrieves the nested ImprintRecord for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type ROW. - */ - public ImprintRecord getRow(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); + return new Header(flags, new SchemaId(fieldSpaceId, schemaHash), payloadSize); } @Override public String toString() { return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", - header, directory.size(), payload.remaining()); + header, buffers.getDirectoryCount(), buffers.getPayload().remaining()); } - } \ No newline at end of file diff --git a/src/main/java/com/imprint/util/VarInt.java b/src/main/java/com/imprint/util/VarInt.java index 75bd132..f43683b 100644 --- a/src/main/java/com/imprint/util/VarInt.java +++ b/src/main/java/com/imprint/util/VarInt.java @@ -16,12 +16,28 @@ */ @UtilityClass public final class VarInt { - + private static final byte CONTINUATION_BIT = (byte) 0x80; private static final byte SEGMENT_BITS = 0x7f; private static final int MAX_VARINT_LEN = 5; // Enough for u32 - - + + // Simple cache for values 0-1023 + private static final int CACHE_SIZE = 1024; + private static final int[] ENCODED_LENGTHS = new int[CACHE_SIZE]; + + static { + // Pre-compute encoded lengths for cached values + for (int i = 0; i < CACHE_SIZE; i++) { + long val = Integer.toUnsignedLong(i); + int length = 1; + while (val >= 0x80) { + val >>>= 7; + length++; + } + ENCODED_LENGTHS[i] = length; + } + } + /** * Encode a 32-bit unsigned integer as a VarInt into the given ByteBuffer. * @param value the value to encode (treated as unsigned) @@ -30,7 +46,7 @@ public final class VarInt { public static void encode(int value, ByteBuffer buffer) { // Convert to unsigned long for proper bit manipulation long val = Integer.toUnsignedLong(value); - + // Encode at least one byte, then continue while value has more bits do { byte b = (byte) (val & SEGMENT_BITS); @@ -41,8 +57,7 @@ public static void encode(int value, ByteBuffer buffer) { buffer.put(b); } while (val != 0); } - - + /** * Decode a VarInt from a ByteBuffer. * @param buffer the buffer to decode from @@ -53,55 +68,55 @@ public static DecodeResult decode(ByteBuffer buffer) throws ImprintException { long result = 0; int shift = 0; int bytesRead = 0; - + while (true) { if (bytesRead >= MAX_VARINT_LEN) { throw new ImprintException(ErrorType.MALFORMED_VARINT, "VarInt too long"); } if (!buffer.hasRemaining()) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Unexpected end of data while reading VarInt"); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Unexpected end of data while reading VarInt"); } - + byte b = buffer.get(); bytesRead++; - + // Check if adding these 7 bits would overflow long segment = b & SEGMENT_BITS; if (shift >= 32 || (shift == 28 && segment > 0xF)) { throw new ImprintException(ErrorType.MALFORMED_VARINT, "VarInt overflow"); } - + // Add the bottom 7 bits to the result result |= segment << shift; - + // If the high bit is not set, this is the last byte if ((b & CONTINUATION_BIT) == 0) { break; } - + shift += 7; } - + return new DecodeResult((int) result, bytesRead); } - + /** * Calculate the number of bytes needed to encode the given value as a VarInt. * @param value the value to encode (treated as unsigned) * @return the number of bytes needed */ public static int encodedLength(int value) { - // Convert to unsigned long for proper bit manipulation + if (value >= 0 && value < CACHE_SIZE) { + return ENCODED_LENGTHS[value]; + } + long val = Integer.toUnsignedLong(value); int length = 1; - - // Count additional bytes needed for values >= 128 while (val >= 0x80) { val >>>= 7; length++; } - return length; } From be75ecb1638858e607003da442b71d8c24bc6550 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 5 Jun 2025 22:12:44 -0400 Subject: [PATCH 31/53] remove extra comments --- .../com/imprint/benchmark/ComparisonBenchmark.java | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index dd62457..a666ffe 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -133,19 +133,7 @@ public void serializeFlatBuffers(Blackhole bh) { bh.consume(result); } - // ===== PARTIAL DESERIALIZATION (SETUP ONLY) ===== -// These benchmarks measure the cost of preparing a record for field access, -// not the cost of accessing the actual data. This is important because -// -// 1. Imprint: Only parses header + stores raw directory bytes -// 2. FlatBuffers: Only wraps the buffer with minimal validation -// 3. Others (eager): Parse and construct all field objects upfront -// -// This comparison shows the advantage of lazy loading approaches when you -// only need to access a subset of fields. In real streaming workloads, -// records are often filtered/routed based on just a few key fields. -// -// For a fair "full deserialization" comparison, see FULL DESERIALIZATION BENCHMARKS. + // ===== SETUP ONLY ===== @Benchmark public void deserializeSetupImprint(Blackhole bh) throws Exception { From b147d4e7766b6a6fe1d89a1944b33a8cbbdcbce7 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 5 Jun 2025 22:17:01 -0400 Subject: [PATCH 32/53] remove extra comments --- src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index a666ffe..4d9c01c 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -259,19 +259,19 @@ public void singleFieldAccessMessagePack(Blackhole bh) throws Exception { @Benchmark public void singleFieldAccessAvro(Blackhole bh) throws Exception { GenericRecord record = deserializeWithAvro(avroBytes); - bh.consume(record.get("extraData4")); // Accessing field near end + bh.consume(record.get("extraData4")); } @Benchmark public void singleFieldAccessProtobuf(Blackhole bh) throws Exception { TestRecordProto.TestRecord record = TestRecordProto.TestRecord.parseFrom(protobufBytes); - bh.consume(record.getExtraData(4)); // Accessing field near end + bh.consume(record.getExtraData(4)); } @Benchmark public void singleFieldAccessFlatBuffers(Blackhole bh) { TestRecordFB record = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - bh.consume(record.extraData(4)); // Accessing field near end - zero copy! + bh.consume(record.extraData(4)); } // ===== SIZE COMPARISON ===== From 96893e6b72789014010f6d84eec9f3f48622e8a7 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sat, 7 Jun 2025 11:17:43 -0400 Subject: [PATCH 33/53] Add merge and project APIs; optimize/simplify ImprintBuffers with TreeMap --- .../benchmark/ComparisonBenchmark.java | 2 +- .../java/com/imprint/core/ImprintBuffers.java | 270 +++---------- .../java/com/imprint/core/ImprintRecord.java | 22 ++ src/main/java/com/imprint/types/Value.java | 20 +- .../java/com/imprint/IntegrationTest.java | 358 +++++++++++++++++- .../com/imprint/profile/ProfilerTest.java | 6 +- 6 files changed, 446 insertions(+), 232 deletions(-) diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index 4d9c01c..6a6a958 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -405,7 +405,7 @@ public void mergeFlatBuffers(Blackhole bh) { // ===== MAIN METHOD TO RUN BENCHMARKS ===== public static void main(String[] args) throws RunnerException { - runAll(); + runFieldAccessBenchmarks(); // Or, uncomment specific runner methods to execute subsets: // runSerializationBenchmarks(); // runDeserializationBenchmarks(); diff --git a/src/main/java/com/imprint/core/ImprintBuffers.java b/src/main/java/com/imprint/core/ImprintBuffers.java index f6a341b..c14d6df 100644 --- a/src/main/java/com/imprint/core/ImprintBuffers.java +++ b/src/main/java/com/imprint/core/ImprintBuffers.java @@ -10,9 +10,9 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.Objects; +import java.util.TreeMap; /** * Manages the raw buffers for an Imprint record with lazy directory parsing. @@ -31,17 +31,12 @@ public final class ImprintBuffers { private final ByteBuffer directoryBuffer; // Raw directory bytes (includes count) private final ByteBuffer payload; // Read-only payload view - // Lazy-loaded directory state - private List parsedDirectory; + // Lazy-loaded directory state. Needs to maintain ordering so that we can binary search the endOffset + private TreeMap parsedDirectory; private boolean directoryParsed = false; - private int directoryCount = -1; // Cached count to avoid repeated VarInt decoding /** * Creates buffers from raw data (used during deserialization). - * - * @param directoryBuffer Raw directory bytes including VarInt count and all entries. - * Format: [VarInt count][Entry1][Entry2]...[EntryN] - * @param payload Raw payload data containing all field values sequentially */ public ImprintBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { this.directoryBuffer = directoryBuffer.asReadOnlyBuffer(); @@ -50,33 +45,17 @@ public ImprintBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { /** * Creates buffers from pre-parsed directory (used during construction). - * This is more efficient when the directory is already known. - * - * @param directory Parsed directory entries, must be sorted by fieldId - * @param payload Raw payload data containing all field values */ public ImprintBuffers(List directory, ByteBuffer payload) { - this.parsedDirectory = Collections.unmodifiableList(Objects.requireNonNull(directory)); + this.parsedDirectory = createDirectoryMap(Objects.requireNonNull(directory)); this.directoryParsed = true; - this.directoryCount = directory.size(); this.payload = payload.asReadOnlyBuffer(); this.directoryBuffer = createDirectoryBuffer(directory); } /** * Get a zero-copy ByteBuffer view of a field's data. - * - *

Buffer Positioning Logic:

- *
    - *
  1. Find the directory entry for the requested fieldId
  2. - *
  3. Use entry.offset as start position in payload
  4. - *
  5. Find end position by looking at next field's offset (or payload end)
  6. - *
  7. Create a slice view: payload[startOffset:endOffset]
  8. - *
- * - * @param fieldId The field identifier to retrieve - * @return Zero-copy ByteBuffer positioned at field data, or null if field not found - * @throws ImprintException if buffer bounds are invalid or directory is corrupted + * Optimized for the most common use case - single field access. */ public ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { var entry = findDirectoryEntry(fieldId); @@ -92,78 +71,43 @@ public ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { "Invalid field buffer range: start=" + startOffset + ", end=" + endOffset + ", payloadLimit=" + payload.limit()); } - ByteBuffer fieldBuffer = payload.duplicate(); + var fieldBuffer = payload.duplicate(); fieldBuffer.position(startOffset).limit(endOffset); return fieldBuffer; } /** * Find a directory entry for the given field ID using the most efficient method. - * - *

Search Strategy:

- *
    - *
  • If directory is parsed: binary search on in-memory List<DirectoryEntry>
  • - *
  • If directory is raw: binary search directly on raw bytes (faster for single lookups)
  • - *
- * @param fieldId The field identifier to find - * @return DirectoryEntry if found, null otherwise - * @throws ImprintException if directory buffer is corrupted or truncated + *

+ * Strategy: + * - If parsed: TreeMap lookup + * - If raw: Binary search on raw bytes to avoid full unwinding of the directory */ public DirectoryEntry findDirectoryEntry(int fieldId) throws ImprintException { - if (directoryParsed) { - int index = findDirectoryIndexInParsed(fieldId); - return index >= 0 ? parsedDirectory.get(index) : null; - } else { + if (directoryParsed) + return parsedDirectory.get(fieldId); + else return findFieldEntryInRawDirectory(fieldId); - } } /** * Get the full directory, parsing it if necessary. - * - *

Lazy Parsing Behavior:

- *
    - *
  • First call: Parses entire directory from raw bytes into List<DirectoryEntry>
  • - *
  • Subsequent calls: Returns cached parsed directory
  • - *
  • Note - the method is not synchronized and assumes single-threaded usage.
  • - *
- * - *

When to use: Call this if you need to access multiple fields - * from the same record. For single field access, direct field getters are more efficient.

- * - * @return Immutable list of directory entries, sorted by fieldId + * Returns the values in fieldId order thanks to TreeMap. */ public List getDirectory() { ensureDirectoryParsed(); - return parsedDirectory; + return new ArrayList<>(parsedDirectory.values()); } /** - * Get the directory count without fully parsing the directory. - *

- * This method avoids parsing the entire directory when only the count is needed. - *

    - *
  1. Return cached count if available (directoryCount >= 0)
  2. - *
  3. Return parsed directory size if directory is already parsed
  4. - *
  5. Decode VarInt from raw buffer and cache the result
  6. - *
- * - *

VarInt Decoding: The count is stored as a VarInt at the beginning - * of the directoryBuffer. This method reads just enough bytes to decode the count.

- * - * @return Number of fields in the directory, or 0 if decoding fails + * Get directory count without parsing. */ public int getDirectoryCount() { - if (directoryCount >= 0) - return directoryCount; if (directoryParsed) return parsedDirectory.size(); - - // Decode from buffer and cache try { var countBuffer = directoryBuffer.duplicate(); - directoryCount = VarInt.decode(countBuffer).getValue(); - return directoryCount; + return VarInt.decode(countBuffer).getValue(); } catch (Exception e) { return 0; } @@ -171,52 +115,23 @@ public int getDirectoryCount() { /** * Create a new buffer containing the serialized directory. - * - *

Output Format:

- *
-     * [VarInt count][DirectoryEntry 1][DirectoryEntry 2]...[DirectoryEntry N]
-     * 
- * - *

Each DirectoryEntry is serialized as: [fieldId:2bytes][typeCode:1byte][offset:4bytes]

- * - * - * @return New ByteBuffer containing the complete serialized directory */ public ByteBuffer serializeDirectory() { ensureDirectoryParsed(); - return createDirectoryBuffer(parsedDirectory); + return createDirectoryBuffer(new ArrayList<>(parsedDirectory.values())); } // ========== PRIVATE METHODS ========== /** * Binary search on raw directory bytes to find a specific field. - * - *
    - *
  1. Position buffer at start and decode VarInt count (cache for future use)
  2. - *
  3. Calculate directory start position after VarInt
  4. - *
  5. For binary search mid-point: entryPos = startPos + (mid * DIR_ENTRY_BYTES)
  6. - *
  7. Read fieldId from calculated position (first 2 bytes of entry)
  8. - *
  9. Compare fieldId and adjust search bounds
  10. - *
  11. When found: reposition buffer and deserialize complete entry
  12. - *
- * - *

All buffer positions are bounds-checked before access.

- * - * @param fieldId Field identifier to search for - * @return Complete DirectoryEntry if found, null if not found - * @throws ImprintException if buffer is truncated or corrupted + * This avoids parsing the entire directory for single field lookups. */ private DirectoryEntry findFieldEntryInRawDirectory(int fieldId) throws ImprintException { var searchBuffer = directoryBuffer.duplicate(); searchBuffer.order(ByteOrder.LITTLE_ENDIAN); - // Decode directory count (cache it) - if (directoryCount < 0) - directoryCount = VarInt.decode(searchBuffer).getValue(); - else - VarInt.decode(searchBuffer); // Skip past the count - + int directoryCount = VarInt.decode(searchBuffer).getValue(); if (directoryCount == 0) return null; @@ -250,118 +165,65 @@ private DirectoryEntry findFieldEntryInRawDirectory(int fieldId) throws ImprintE return null; } - /** - * - * @param fieldId Field identifier to find - * @return Index of the field if found, or negative insertion point if not found - */ - private int findDirectoryIndexInParsed(int fieldId) { - if (!directoryParsed) - return -1; - int low = 0; - int high = parsedDirectory.size() - 1; - while (low <= high) { - int mid = (low + high) >>> 1; - int midFieldId = parsedDirectory.get(mid).getId(); - if (midFieldId < fieldId) - low = mid + 1; - else if (midFieldId > fieldId) - high = mid - 1; - else - return mid; - } - return -(low + 1); - } - /** * Find the end offset for a field by looking at the next field's offset. - * - *
    - *
  • Field data spans from: entry.offset to nextField.offset (exclusive)
  • - *
  • Last field spans from: entry.offset to payload.limit()
  • - *
  • This works because directory entries are sorted by fieldId
  • - *
- * - *

Search Strategy:

- *
    - *
  • If directory parsed: Use binary search result + 1 to get next entry
  • - *
  • If directory raw: Scan raw entries until fieldId > currentFieldId
  • - *
- * - * @param entry The directory entry whose end offset we need to find - * @return End offset (exclusive) for the field data - * @throws ImprintException if directory scanning fails */ private int findEndOffset(DirectoryEntry entry) throws ImprintException { if (directoryParsed) { - int entryIndex = findDirectoryIndexInParsed(entry.getId()); - return (entryIndex + 1 < parsedDirectory.size()) ? - parsedDirectory.get(entryIndex + 1).getOffset() : payload.limit(); - } else + return findNextOffsetInParsedDirectory(entry.getId()); + } else { return findNextOffsetInRawDirectory(entry.getId()); + } } /** - * Scan raw directory to find the next field's offset after currentFieldId. - * - *
    - *
  1. Position buffer after VarInt count
  2. - *
  3. For each directory entry at position: startPos + (i * DIR_ENTRY_BYTES)
  4. - *
  5. Read fieldId (first 2 bytes) and offset (bytes 3-6)
  6. - *
  7. Return offset of first field where fieldId > currentFieldId
  8. - *
  9. If no next field found, return payload.limit()
  10. - *
- * - * @param currentFieldId Find the next field after this fieldId - * @return Offset where the next field starts, or payload.limit() if this is the last field - * @throws ImprintException if directory buffer is corrupted + * Find the end offset using TreeMap's efficient navigation methods. */ + private int findNextOffsetInParsedDirectory(int currentFieldId) { + var nextEntry = parsedDirectory.higherEntry(currentFieldId); + return nextEntry != null ? nextEntry.getValue().getOffset() : payload.limit(); + } + private int findNextOffsetInRawDirectory(int currentFieldId) throws ImprintException { var scanBuffer = directoryBuffer.duplicate(); scanBuffer.order(ByteOrder.LITTLE_ENDIAN); - int count = (directoryCount >= 0) ? directoryCount : VarInt.decode(scanBuffer).getValue(); + int count = VarInt.decode(scanBuffer).getValue(); if (count == 0) return payload.limit(); - if (directoryCount >= 0) - VarInt.decode(scanBuffer); // Skip count if cached int directoryStartPos = scanBuffer.position(); + int low = 0; + int high = count - 1; + int nextOffset = payload.limit(); - for (int i = 0; i < count; i++) { - int entryPos = directoryStartPos + (i * Constants.DIR_ENTRY_BYTES); + // Binary search for the first field with fieldId > currentFieldId + while (low <= high) { + int mid = (low + high) >>> 1; + int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); if (entryPos + Constants.DIR_ENTRY_BYTES > scanBuffer.limit()) - return payload.limit(); + break; scanBuffer.position(entryPos); short fieldId = scanBuffer.getShort(); scanBuffer.get(); // skip type int offset = scanBuffer.getInt(); - if (fieldId > currentFieldId) - return offset; + if (fieldId > currentFieldId) { + nextOffset = offset; + high = mid - 1; + } else { + low = mid + 1; + } } - return payload.limit(); + return nextOffset; } /** * Parse the full directory if not already parsed. - * - *
    - *
  1. Duplicate directoryBuffer to avoid affecting original position
  2. - *
  3. Set byte order to LITTLE_ENDIAN for consistent reading
  4. - *
  5. Decode VarInt count and cache it
  6. - *
  7. Read 'count' directory entries sequentially
  8. - *
  9. Each entry: [fieldId:2bytes][typeCode:1byte][offset:4bytes]
  10. - *
  11. Store as immutable list and mark as parsed
  12. - *
- * - *

Error Handling: If parsing fails, throws RuntimeException - * since this indicates corrupted data that should never happen in normal operation.

- * - *

Will return immediately if directory has already been parsed.

+ * Creates TreeMap for both fast lookup and ordering needed for binary search on offSets. */ private void ensureDirectoryParsed() { if (directoryParsed) @@ -372,46 +234,42 @@ private void ensureDirectoryParsed() { var countResult = VarInt.decode(parseBuffer); int count = countResult.getValue(); - this.directoryCount = count; - var directory = new ArrayList(count); + this.parsedDirectory = new TreeMap<>(); for (int i = 0; i < count; i++) { - directory.add(deserializeDirectoryEntry(parseBuffer)); + var entry = deserializeDirectoryEntry(parseBuffer); + parsedDirectory.put((int)entry.getId(), entry); } - this.parsedDirectory = Collections.unmodifiableList(directory); this.directoryParsed = true; } catch (ImprintException e) { throw new RuntimeException("Failed to parse directory", e); } } + /** + * Create a TreeMap from directory list field lookup with ordering. + */ + private TreeMap createDirectoryMap(List directory) { + var map = new TreeMap(); + for (var entry : directory) { + map.put((int)entry.getId(), entry); + } + return map; + } + /** * Create directory buffer from parsed entries. - * - *

Serialization Format:

- *
    - *
  1. Calculate buffer size: VarInt.encodedLength(count) + (count * DIR_ENTRY_BYTES)
  2. - *
  3. Allocate ByteBuffer with LITTLE_ENDIAN byte order
  4. - *
  5. Write VarInt count
  6. - *
  7. Write each directory entry: [fieldId:2][typeCode:1][offset:4]
  8. - *
  9. Flip buffer and return read-only view
  10. - *
- * - * @param directory List of directory entries to serialize - * @return Read-only ByteBuffer containing serialized directory, or empty buffer on error */ private ByteBuffer createDirectoryBuffer(List directory) { try { - int bufferSize = VarInt.encodedLength(directory.size()) + - (directory.size() * Constants.DIR_ENTRY_BYTES); + int bufferSize = VarInt.encodedLength(directory.size()) + (directory.size() * Constants.DIR_ENTRY_BYTES); var buffer = ByteBuffer.allocate(bufferSize); buffer.order(ByteOrder.LITTLE_ENDIAN); VarInt.encode(directory.size(), buffer); - for (var entry : directory) { + for (var entry : directory) serializeDirectoryEntry(entry, buffer); - } buffer.flip(); return buffer.asReadOnlyBuffer(); @@ -433,10 +291,6 @@ private void serializeDirectoryEntry(DirectoryEntry entry, ByteBuffer buffer) { /** * Deserialize a single directory entry from the buffer. * Reads: [fieldId:2bytes][typeCode:1byte][offset:4bytes] - * - * @param buffer Buffer positioned at the start of a directory entry - * @return Parsed DirectoryEntry - * @throws ImprintException if buffer doesn't contain enough bytes */ private DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index 6abc9cf..e720df5 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -69,6 +69,28 @@ public ByteBuffer getRawBytes(int fieldId) { } } + /** + * Project a subset of fields from this record. + * + * @param fieldIds Array of field IDs to include in the projection + * @return New ImprintRecord containing only the requested fields + */ + public ImprintRecord project(int... fieldIds) { + return ImprintOperations.project(this, fieldIds); + } + + /** + * Merge another record into this one. + * For duplicate fields, this record's values take precedence. + * + * @param other The record to merge with this one + * @return New ImprintRecord containing merged fields + * @throws ImprintException if merge fails + */ + public ImprintRecord merge(ImprintRecord other) throws ImprintException { + return ImprintOperations.merge(this, other); + } + /** * Get the directory (parsing it if necessary). */ diff --git a/src/main/java/com/imprint/types/Value.java b/src/main/java/com/imprint/types/Value.java index fbb988c..bfa9958 100644 --- a/src/main/java/com/imprint/types/Value.java +++ b/src/main/java/com/imprint/types/Value.java @@ -169,6 +169,7 @@ public String toString() { } // Float64 Value + @Getter @EqualsAndHashCode(callSuper = false) public static class Float64Value extends Value { @@ -180,7 +181,7 @@ public Float64Value(double value) { @Override public TypeCode getTypeCode() { return TypeCode.FLOAT64; } - + @Override public String toString() { return String.valueOf(value); @@ -188,17 +189,20 @@ public String toString() { } // Bytes Value (array-based) + @Getter public static class BytesValue extends Value { + /** + * Returns internal array. MUST NOT be modified by caller. + */ private final byte[] value; - + + /** + * Takes ownership of the byte array. Caller must not modify after construction. + */ public BytesValue(byte[] value) { - this.value = value.clone(); + this.value = Objects.requireNonNull(value); } - - public byte[] getValue() { - return value.clone(); - } - + @Override public TypeCode getTypeCode() { return TypeCode.BYTES; } diff --git a/src/test/java/com/imprint/IntegrationTest.java b/src/test/java/com/imprint/IntegrationTest.java index 898adfb..ee1d426 100644 --- a/src/test/java/com/imprint/IntegrationTest.java +++ b/src/test/java/com/imprint/IntegrationTest.java @@ -49,8 +49,6 @@ var record = ImprintRecord.builder(schemaId) assertTrue(deserialized.getBoolean(3)); assertEquals(3.14159, deserialized.getFloat64(4)); assertArrayEquals(new byte[]{1,2,3,4}, deserialized.getBytes(5)); - - System.out.println("Basic functionality test passed"); } @Test @@ -90,15 +88,11 @@ var record = ImprintRecord.builder(schemaId) assertEquals(2, deserializedMap.size()); assertEquals(Value.fromInt32(1), deserializedMap.get(MapKey.fromString("one"))); assertEquals(Value.fromInt32(2), deserializedMap.get(MapKey.fromString("two"))); - - System.out.println("Arrays and maps test passed"); } @Test @DisplayName("Nested Records: create, serialize, deserialize records within records") void testNestedRecords() throws ImprintException { - System.out.println("Testing nested records..."); - var innerSchemaId = new SchemaId(3, 0x12345678); var innerRecord = ImprintRecord.builder(innerSchemaId) .field(1, "nested data") @@ -124,8 +118,348 @@ void testNestedRecords() throws ImprintException { assertEquals(3, nestedDeserialized.getHeader().getSchemaId().getFieldSpaceId()); assertEquals("nested data", nestedDeserialized.getString(1)); assertEquals(9876543210L, nestedDeserialized.getInt64(2)); + } + + @Test + @DisplayName("Project: subset of fields with serialization round-trip") + void testProjectSubsetWithSerialization() throws ImprintException { + var schemaId = new SchemaId(10, 0xabcd1234); + var originalRecord = ImprintRecord.builder(schemaId) + .field(1, 100) + .field(2, "keep this field") + .field(3, false) + .field(4, "remove this field") + .field(5, 42.5) + .field(6, new byte[]{9, 8, 7}) + .build(); + + // Project fields 1, 2, 5 (skip 3, 4, 6) + var projected = originalRecord.project(1, 2, 5); + + assertEquals(3, projected.getDirectory().size()); + assertEquals(100, projected.getInt32(1)); + assertEquals("keep this field", projected.getString(2)); + assertEquals(42.5, projected.getFloat64(5)); + + // Verify missing fields + assertNull(projected.getValue(3)); + assertNull(projected.getValue(4)); + assertNull(projected.getValue(6)); + + // Test serialization round-trip of projected record + var buffer = projected.serializeToBuffer(); + byte[] serialized = new byte[buffer.remaining()]; + buffer.get(serialized); + var deserialized = ImprintRecord.deserialize(serialized); + + assertEquals(3, deserialized.getDirectory().size()); + assertEquals(100, deserialized.getInt32(1)); + assertEquals("keep this field", deserialized.getString(2)); + assertEquals(42.5, deserialized.getFloat64(5)); + } + + @Test + @DisplayName("Project: complex data types (arrays, maps, nested records)") + void testProjectComplexTypes() throws ImprintException { + var schemaId = new SchemaId(11, 0xbeef4567); + + // Create nested record + var nestedRecord = ImprintRecord.builder(new SchemaId(12, 0x11111111)) + .field(100, "nested value") + .build(); + + // Create homogeneous array (all strings) + var testArray = Arrays.asList(Value.fromString("item1"), Value.fromString("item2"), Value.fromString("item3")); + + // Create homogeneous map (string keys -> string values) + var testMap = new HashMap(); + testMap.put(MapKey.fromString("key1"), Value.fromString("value1")); + testMap.put(MapKey.fromString("key2"), Value.fromString("value2")); + + var originalRecord = ImprintRecord.builder(schemaId) + .field(1, "simple string") + .field(2, Value.fromArray(testArray)) + .field(3, Value.fromMap(testMap)) + .field(4, nestedRecord) + .field(5, 999L) + .build(); + + // Project only complex types + var projected = originalRecord.project(2, 3, 4); + + assertEquals(3, projected.getDirectory().size()); + + // Verify array projection (homogeneous strings) + var projectedArray = projected.getArray(2); + assertEquals(3, projectedArray.size()); + assertEquals(Value.fromString("item1"), projectedArray.get(0)); + assertEquals(Value.fromString("item2"), projectedArray.get(1)); + assertEquals(Value.fromString("item3"), projectedArray.get(2)); + + // Verify map projection (string -> string) + var projectedMap = projected.getMap(3); + assertEquals(2, projectedMap.size()); + assertEquals(Value.fromString("value1"), projectedMap.get(MapKey.fromString("key1"))); + assertEquals(Value.fromString("value2"), projectedMap.get(MapKey.fromString("key2"))); + + // Verify nested record projection + var projectedNested = projected.getRow(4); + assertEquals("nested value", projectedNested.getString(100)); + + // Verify excluded fields + assertNull(projected.getValue(1)); + assertNull(projected.getValue(5)); + } + + @Test + @DisplayName("Merge: distinct fields with serialization round-trip") + void testMergeDistinctFieldsWithSerialization() throws ImprintException { + var schemaId = new SchemaId(20, 0xcafe5678); + + var record1 = ImprintRecord.builder(schemaId) + .field(1, 100) + .field(3, "from record1") + .field(5, true) + .build(); + + var record2 = ImprintRecord.builder(schemaId) + .field(2, 200L) + .field(4, "from record2") + .field(6, 3.14f) + .build(); + + var merged = record1.merge(record2); + + assertEquals(6, merged.getDirectory().size()); + assertEquals(100, merged.getInt32(1)); + assertEquals(200L, merged.getInt64(2)); + assertEquals("from record1", merged.getString(3)); + assertEquals("from record2", merged.getString(4)); + assertTrue(merged.getBoolean(5)); + assertEquals(3.14f, merged.getFloat32(6)); + + // Test serialization round-trip of merged record + var buffer = merged.serializeToBuffer(); + byte[] serialized = new byte[buffer.remaining()]; + buffer.get(serialized); + var deserialized = ImprintRecord.deserialize(serialized); + + assertEquals(6, deserialized.getDirectory().size()); + assertEquals(100, deserialized.getInt32(1)); + assertEquals(200L, deserialized.getInt64(2)); + assertEquals("from record1", deserialized.getString(3)); + assertEquals("from record2", deserialized.getString(4)); + assertTrue(deserialized.getBoolean(5)); + assertEquals(3.14f, deserialized.getFloat32(6)); + } - System.out.println("✓ Nested records test passed"); + @Test + @DisplayName("Merge: overlapping fields - first record wins") + void testMergeOverlappingFields() throws ImprintException { + var schemaId = new SchemaId(21, 0xdead9876); + + var record1 = ImprintRecord.builder(schemaId) + .field(1, "first wins") + .field(2, 100) + .field(4, true) + .build(); + + var record2 = ImprintRecord.builder(schemaId) + .field(1, "second loses") // Overlapping field + .field(2, 999) // Overlapping field + .field(3, "unique to second") + .field(4, false) // Overlapping field + .build(); + + var merged = record1.merge(record2); + + assertEquals(4, merged.getDirectory().size()); + assertEquals("first wins", merged.getString(1)); // First record wins + assertEquals(100, merged.getInt32(2)); // First record wins + assertEquals("unique to second", merged.getString(3)); // Only in second + assertTrue(merged.getBoolean(4)); // First record wins + } + + @Test + @DisplayName("Merge: complex data types and nested records") + void testMergeComplexTypes() throws ImprintException { + var schemaId = new SchemaId(22, 0xbeef1111); + + // Create nested records for both + var nested1 = ImprintRecord.builder(new SchemaId(23, 0x22222222)) + .field(100, "nested in record1") + .build(); + + var nested2 = ImprintRecord.builder(new SchemaId(24, 0x33333333)) + .field(200, "nested in record2") + .build(); + + // Create arrays + var array1 = Arrays.asList(Value.fromString("array1_item1"), Value.fromString("array1_item2")); + var array2 = Arrays.asList(Value.fromInt32(10), Value.fromInt32(20)); + + // Create maps + var map1 = new HashMap(); + map1.put(MapKey.fromString("map1_key"), Value.fromString("map1_value")); + + var map2 = new HashMap(); + map2.put(MapKey.fromInt32(42), Value.fromBoolean(true)); + + var record1 = ImprintRecord.builder(schemaId) + .field(1, nested1) + .field(3, Value.fromArray(array1)) + .field(5, Value.fromMap(map1)) + .build(); + + var record2 = ImprintRecord.builder(schemaId) + .field(2, nested2) + .field(4, Value.fromArray(array2)) + .field(6, Value.fromMap(map2)) + .build(); + + var merged = record1.merge(record2); + + assertEquals(6, merged.getDirectory().size()); + + // Verify nested records + var mergedNested1 = merged.getRow(1); + assertEquals("nested in record1", mergedNested1.getString(100)); + + var mergedNested2 = merged.getRow(2); + assertEquals("nested in record2", mergedNested2.getString(200)); + + // Verify arrays + var mergedArray1 = merged.getArray(3); + assertEquals(2, mergedArray1.size()); + assertEquals(Value.fromString("array1_item1"), mergedArray1.get(0)); + + var mergedArray2 = merged.getArray(4); + assertEquals(2, mergedArray2.size()); + assertEquals(Value.fromInt32(10), mergedArray2.get(0)); + + // Verify maps + var mergedMap1 = merged.getMap(5); + assertEquals(Value.fromString("map1_value"), mergedMap1.get(MapKey.fromString("map1_key"))); + + var mergedMap2 = merged.getMap(6); + assertEquals(Value.fromBoolean(true), mergedMap2.get(MapKey.fromInt32(42))); + } + + @Test + @DisplayName("Project and Merge: chained operations") + void testProjectAndMergeChained() throws ImprintException { + var schemaId = new SchemaId(30, 0xabcdabcd); + + // Create a large record + var fullRecord = ImprintRecord.builder(schemaId) + .field(1, "field1") + .field(2, "field2") + .field(3, "field3") + .field(4, "field4") + .field(5, "field5") + .field(6, "field6") + .build(); + + // Project different subsets + var projection1 = fullRecord.project(1, 3, 5); + var projection2 = fullRecord.project(2, 4, 6); + + assertEquals(3, projection1.getDirectory().size()); + assertEquals(3, projection2.getDirectory().size()); + + // Merge the projections back together + var recomposed = projection1.merge(projection2); + + assertEquals(6, recomposed.getDirectory().size()); + assertEquals("field1", recomposed.getString(1)); + assertEquals("field2", recomposed.getString(2)); + assertEquals("field3", recomposed.getString(3)); + assertEquals("field4", recomposed.getString(4)); + assertEquals("field5", recomposed.getString(5)); + assertEquals("field6", recomposed.getString(6)); + + // Test another chain: project the merged result + var finalProjection = recomposed.project(2, 4, 6); + assertEquals(3, finalProjection.getDirectory().size()); + assertEquals("field2", finalProjection.getString(2)); + assertEquals("field4", finalProjection.getString(4)); + assertEquals("field6", finalProjection.getString(6)); + } + + @Test + @DisplayName("Merge and Project: empty record handling") + void testMergeAndProjectEmptyRecords() throws ImprintException { + var schemaId = new SchemaId(40, 0xeeeeeeee); + + var emptyRecord = ImprintRecord.builder(schemaId).build(); + var nonEmptyRecord = ImprintRecord.builder(schemaId) + .field(1, "not empty") + .field(2, 42) + .build(); + + // Test merging with empty + var merged1 = emptyRecord.merge(nonEmptyRecord); + var merged2 = nonEmptyRecord.merge(emptyRecord); + + assertEquals(2, merged1.getDirectory().size()); + assertEquals(2, merged2.getDirectory().size()); + assertEquals("not empty", merged1.getString(1)); + assertEquals("not empty", merged2.getString(1)); + + // Test projecting empty record + var projectedEmpty = emptyRecord.project(1, 2, 3); + assertEquals(0, projectedEmpty.getDirectory().size()); + + // Test projecting non-existent fields + var projectedNonExistent = nonEmptyRecord.project(99, 100); + assertEquals(0, projectedNonExistent.getDirectory().size()); + } + + @Test + @DisplayName("Project and Merge: Large record operations") + void testLargeRecordOperations() throws ImprintException { + var schemaId = new SchemaId(50, 0xffffffff); + + // Create a record with many fields + var builder = ImprintRecord.builder(schemaId); + for (int i = 1; i <= 100; i++) { + builder.field(i, "field_" + i + "_data"); + } + var largeRecord = builder.build(); + + assertEquals(100, largeRecord.getDirectory().size()); + + // Project a subset (every 10th field) + int[] projectionFields = new int[10]; + for (int i = 0; i < 10; i++) { + projectionFields[i] = (i + 1) * 10; // 10, 20, 30, ..., 100 + } + + var projected = largeRecord.project(projectionFields); + assertEquals(10, projected.getDirectory().size()); + + for (int i = 0; i < 10; i++) { + int fieldId = (i + 1) * 10; + assertEquals("field_" + fieldId + "_data", projected.getString(fieldId)); + } + + // Create another large record for merging + var builder2 = ImprintRecord.builder(schemaId); + for (int i = 101; i <= 150; i++) { + builder2.field(i, "additional_field_" + i); + } + var additionalRecord = builder2.build(); + + // Merge the large records + var merged = largeRecord.merge(additionalRecord); + assertEquals(150, merged.getDirectory().size()); + + // Verify some values from both records + assertEquals("field_1_data", merged.getString(1)); + assertEquals("field_50_data", merged.getString(50)); + assertEquals("field_100_data", merged.getString(100)); + assertEquals("additional_field_101", merged.getString(101)); + assertEquals("additional_field_150", merged.getString(150)); } private ImprintRecord createTestRecordForGetters() throws ImprintException { @@ -216,7 +550,7 @@ var record = serializeAndDeserialize(originalRecord); @Test @DisplayName("Type Getters: Empty Collections (Array and Map)") - void testErgonomicGettersEmptyCollections() throws ImprintException { + void testTypeGettersEmptyCollections() throws ImprintException { var originalRecord = createTestRecordForGetters(); var record = serializeAndDeserialize(originalRecord); @@ -231,7 +565,7 @@ var record = serializeAndDeserialize(originalRecord); @Test @DisplayName("Type Getters: Exception for Field Not Found") - void testErgonomicGetterExceptionFieldNotFound() throws ImprintException { + void testTypeGetterExceptionFieldNotFound() throws ImprintException { var originalRecord = createTestRecordForGetters(); var record = serializeAndDeserialize(originalRecord); @@ -241,7 +575,7 @@ var record = serializeAndDeserialize(originalRecord); @Test @DisplayName("Type Getters: Exception for Null Field accessed as primitive") - void testErgonomicGetterExceptionNullField() throws ImprintException { + void testTypeGetterExceptionNullField() throws ImprintException { var originalRecord = createTestRecordForGetters(); var record = serializeAndDeserialize(originalRecord); @@ -258,7 +592,7 @@ var record = serializeAndDeserialize(originalRecord); @Test @DisplayName("Type Getters: Exception for Type Mismatch") - void testErgonomicGetterExceptionTypeMismatch() throws ImprintException { + void testTypeGetterExceptionTypeMismatch() throws ImprintException { var originalRecord = createTestRecordForGetters(); var record = serializeAndDeserialize(originalRecord); @@ -268,7 +602,7 @@ var record = serializeAndDeserialize(originalRecord); @Test @DisplayName("Type Getters: Row (Nested Record)") - void testErgonomicGetterRow() throws ImprintException { + void testTypeGetterRow() throws ImprintException { var innerSchemaId = new SchemaId(6, 0x12345678); var innerRecord = ImprintRecord.builder(innerSchemaId) .field(101, "nested string") diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index 3b9f371..64be931 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -27,7 +27,7 @@ * - String operations and UTF-8 encoding * - ByteBuffer operations */ -@Disabled("Enable manually for profiling") +//@Disabled("Enable manually for profiling") public class ProfilerTest { private static final int ITERATIONS = 1_000_000; @@ -82,12 +82,12 @@ void profileSerialization() throws Exception { Thread.sleep(3000); var schemaId = new SchemaId(1, 0x12345678); - + System.out.println("Beginning serialization profiling..."); long start = System.nanoTime(); // Create and serialize many records (allocation hotspot) - for (int i = 0; i < 100_000; i++) { + for (int i = 0; i < 500_000; i++) { var writer = new ImprintWriter(schemaId); // Add various field types From 90f8c85cc1bfa8a4174edc84578c34189a53094d Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sat, 7 Jun 2025 14:04:53 -0400 Subject: [PATCH 34/53] Optimize serialization path and remove ImprintWriter code in favor of Builder --- .../benchmark/ComparisonBenchmark.java | 185 ++++++++---------- .../benchmark/FieldAccessBenchmark.java | 73 ++++--- .../com/imprint/benchmark/MergeBenchmark.java | 82 ++++---- .../benchmark/SerializationBenchmark.java | 53 +++-- .../java/com/imprint/core/ImprintBuffers.java | 6 +- .../java/com/imprint/core/ImprintRecord.java | 77 ++++++-- .../imprint/core/ImprintRecordBuilder.java | 127 +++++++++++- .../java/com/imprint/core/ImprintWriter.java | 126 ------------ .../com/imprint/core/ImprintRecordTest.java | 69 +++---- .../com/imprint/profile/ProfilerTest.java | 53 +++-- 10 files changed, 423 insertions(+), 428 deletions(-) delete mode 100644 src/main/java/com/imprint/core/ImprintWriter.java diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index 6a6a958..e52388c 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -6,7 +6,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.flatbuffers.FlatBufferBuilder; import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintWriter; +import com.imprint.core.ImprintRecordBuilder; import com.imprint.core.SchemaId; import com.imprint.types.MapKey; import com.imprint.types.Value; @@ -143,7 +143,7 @@ public void deserializeSetupImprint(Blackhole bh) throws Exception { @Benchmark public void deserializeSetupFlatBuffers(Blackhole bh) { - TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); + com.imprint.benchmark.TestRecordFB result = com.imprint.benchmark.TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); bh.consume(result); } @@ -177,7 +177,7 @@ public void deserializeAvro(Blackhole bh) throws Exception { @Benchmark public void deserializeProtobuf(Blackhole bh) throws Exception { - TestRecordProto.TestRecord result = TestRecordProto.TestRecord.parseFrom(protobufBytes); + com.imprint.benchmark.TestRecordProto.TestRecord result = com.imprint.benchmark.TestRecordProto.TestRecord.parseFrom(protobufBytes); bh.consume(result); } @@ -201,7 +201,7 @@ public void deserializeImprint(Blackhole bh) throws Exception { @Benchmark public void deserializeFlatBuffers(Blackhole bh) { - TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); + com.imprint.benchmark.TestRecordFB result = com.imprint.benchmark.TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); // Access all fields result.id(); @@ -259,18 +259,18 @@ public void singleFieldAccessMessagePack(Blackhole bh) throws Exception { @Benchmark public void singleFieldAccessAvro(Blackhole bh) throws Exception { GenericRecord record = deserializeWithAvro(avroBytes); - bh.consume(record.get("extraData4")); + bh.consume(record.get("extra_data")); } @Benchmark public void singleFieldAccessProtobuf(Blackhole bh) throws Exception { - TestRecordProto.TestRecord record = TestRecordProto.TestRecord.parseFrom(protobufBytes); + com.imprint.benchmark.TestRecordProto.TestRecord record = com.imprint.benchmark.TestRecordProto.TestRecord.parseFrom(protobufBytes); bh.consume(record.getExtraData(4)); } @Benchmark public void singleFieldAccessFlatBuffers(Blackhole bh) { - TestRecordFB record = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); + com.imprint.benchmark.TestRecordFB record = com.imprint.benchmark.TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); bh.consume(record.extraData(4)); } @@ -381,10 +381,10 @@ public void mergeAvro(Blackhole bh) throws Exception { //@Benchmark public void mergeProtobuf(Blackhole bh) throws Exception { - var record1 = TestRecordProto.TestRecord.parseFrom(protobufBytes); + var record1 = com.imprint.benchmark.TestRecordProto.TestRecord.parseFrom(protobufBytes); var record2Data = createTestRecord2(); var record2Bytes = serializeWithProtobuf(record2Data); - var record2 = TestRecordProto.TestRecord.parseFrom(record2Bytes); + var record2 = com.imprint.benchmark.TestRecordProto.TestRecord.parseFrom(record2Bytes); var merged = mergeProtobufRecords(record1, record2); byte[] result = merged.toByteArray(); @@ -393,10 +393,10 @@ public void mergeProtobuf(Blackhole bh) throws Exception { //@Benchmark public void mergeFlatBuffers(Blackhole bh) { - var record1 = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); + var record1 = com.imprint.benchmark.TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); var record2Data = createTestRecord2(); var record2Buffer = serializeWithFlatBuffers(record2Data); - var record2 = TestRecordFB.getRootAsTestRecordFB(record2Buffer); + var record2 = com.imprint.benchmark.TestRecordFB.getRootAsTestRecordFB(record2Buffer); var merged = mergeFlatBuffersRecords(record1, record2); bh.consume(merged); @@ -521,37 +521,21 @@ private void setupAvro() { } private ByteBuffer serializeWithImprint(TestRecord data) throws Exception { - var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); - - writer.addField(1, Value.fromInt32(data.id)); - writer.addField(2, Value.fromString(data.name)); - writer.addField(3, Value.fromFloat64(data.price)); - writer.addField(4, Value.fromBoolean(data.active)); - writer.addField(5, Value.fromString(data.category)); - - var tagValues = new ArrayList(); - if (data.tags != null) { - for (String tag : data.tags) { - tagValues.add(Value.fromString(tag)); - } - } - writer.addField(6, Value.fromArray(tagValues)); - - var metadataMap = new HashMap(); - if (data.metadata != null) { - for (var entry : data.metadata.entrySet()) { - metadataMap.put(MapKey.fromString(entry.getKey()), Value.fromString(entry.getValue())); - } - } - writer.addField(7, Value.fromMap(metadataMap)); + var builder = ImprintRecord.builder(new SchemaId(1, 0x12345678)); + + builder.field(1, data.id); + builder.field(2, data.name); + builder.field(3, data.price); + builder.field(4, data.active); + builder.field(5, data.category); + builder.field(6, data.tags); + builder.field(7, data.metadata); - if (data.extraData != null) { - for (int i = 0; i < data.extraData.size(); i++) { - writer.addField(8 + i, Value.fromString(data.extraData.get(i))); - } + for (int i = 0; i < data.extraData.size(); i++) { + builder.field(8 + i, data.extraData.get(i)); } - return writer.build().serializeToBuffer(); + return builder.build().serializeToBuffer(); } private byte[] serializeWithJacksonJson(TestRecord data) throws Exception { @@ -601,7 +585,7 @@ private GenericRecord deserializeWithAvro(byte[] data) throws Exception { } private byte[] serializeWithProtobuf(TestRecord data) { - var builder = TestRecordProto.TestRecord.newBuilder() + var builder = com.imprint.benchmark.TestRecordProto.TestRecord.newBuilder() .setId(data.id) .setName(data.name) .setPrice(data.price) @@ -618,20 +602,17 @@ private byte[] serializeWithProtobuf(TestRecord data) { } private ByteBuffer serializeWithFlatBuffers(TestRecord data) { - FlatBufferBuilder builder = new FlatBufferBuilder(1024); + var builder = new FlatBufferBuilder(1024); - // Create strings (must be created before the object that uses them) int nameOffset = builder.createString(data.name); int categoryOffset = builder.createString(data.category); - // Create tags array int[] tagOffsets = new int[data.tags.size()]; for (int i = 0; i < data.tags.size(); i++) { tagOffsets[i] = builder.createString(data.tags.get(i)); } - int tagsOffset = TestRecordFB.createTagsVector(builder, tagOffsets); + int tagsOffset = com.imprint.benchmark.TestRecordFB.createTagsVector(builder, tagOffsets); - // Create metadata (as parallel arrays for keys and values) String[] metadataKeys = data.metadata.keySet().toArray(new String[0]); String[] metadataValues = new String[metadataKeys.length]; int[] keyOffsets = new int[metadataKeys.length]; @@ -642,51 +623,51 @@ private ByteBuffer serializeWithFlatBuffers(TestRecord data) { keyOffsets[i] = builder.createString(metadataKeys[i]); valueOffsets[i] = builder.createString(metadataValues[i]); } - int metadataKeysOffset = TestRecordFB.createMetadataKeysVector(builder, keyOffsets); - int metadataValuesOffset = TestRecordFB.createMetadataValuesVector(builder, valueOffsets); + int metadataKeysOffset = com.imprint.benchmark.TestRecordFB.createMetadataKeysVector(builder, keyOffsets); + int metadataValuesOffset = com.imprint.benchmark.TestRecordFB.createMetadataValuesVector(builder, valueOffsets); - // Create extra data array int[] extraDataOffsets = new int[data.extraData.size()]; for (int i = 0; i < data.extraData.size(); i++) { extraDataOffsets[i] = builder.createString(data.extraData.get(i)); } - int extraDataOffset = TestRecordFB.createExtraDataVector(builder, extraDataOffsets); - - // Create the main object - TestRecordFB.startTestRecordFB(builder); - TestRecordFB.addId(builder, data.id); - TestRecordFB.addName(builder, nameOffset); - TestRecordFB.addPrice(builder, data.price); - TestRecordFB.addActive(builder, data.active); - TestRecordFB.addCategory(builder, categoryOffset); - TestRecordFB.addTags(builder, tagsOffset); - TestRecordFB.addMetadataKeys(builder, metadataKeysOffset); - TestRecordFB.addMetadataValues(builder, metadataValuesOffset); - TestRecordFB.addExtraData(builder, extraDataOffset); - int recordOffset = TestRecordFB.endTestRecordFB(builder); - - // Finish and return + int extraDataOffset = com.imprint.benchmark.TestRecordFB.createExtraDataVector(builder, extraDataOffsets); + + com.imprint.benchmark.TestRecordFB.startTestRecordFB(builder); + com.imprint.benchmark.TestRecordFB.addId(builder, data.id); + com.imprint.benchmark.TestRecordFB.addName(builder, nameOffset); + com.imprint.benchmark.TestRecordFB.addPrice(builder, data.price); + com.imprint.benchmark.TestRecordFB.addActive(builder, data.active); + com.imprint.benchmark.TestRecordFB.addCategory(builder, categoryOffset); + com.imprint.benchmark.TestRecordFB.addTags(builder, tagsOffset); + com.imprint.benchmark.TestRecordFB.addMetadataKeys(builder, metadataKeysOffset); + com.imprint.benchmark.TestRecordFB.addMetadataValues(builder, metadataValuesOffset); + com.imprint.benchmark.TestRecordFB.addExtraData(builder, extraDataOffset); + int recordOffset = com.imprint.benchmark.TestRecordFB.endTestRecordFB(builder); + builder.finish(recordOffset); - return builder.dataBuffer().slice(); + return builder.dataBuffer(); } private ImprintRecord simulateMerge(ImprintRecord first, ImprintRecord second) throws Exception { - var writer = new ImprintWriter(first.getHeader().getSchemaId()); + var builder = ImprintRecord.builder(first.getHeader().getSchemaId()); var usedFieldIds = new HashSet(); - - copyFieldsToWriter(first, writer, usedFieldIds); - copyFieldsToWriter(second, writer, usedFieldIds); - - return writer.build(); - } - - private void copyFieldsToWriter(ImprintRecord record, ImprintWriter writer, Set usedFieldIds) throws Exception { + + // Copy fields from first record (takes precedence) + copyFieldsToBuilder(first, builder, usedFieldIds); + + // Copy non-conflicting fields from second record + copyFieldsToBuilder(second, builder, usedFieldIds); + + return builder.build(); + } + + private void copyFieldsToBuilder(ImprintRecord record, ImprintRecordBuilder builder, Set usedFieldIds) throws Exception { for (var entry : record.getDirectory()) { int fieldId = entry.getId(); if (!usedFieldIds.contains(fieldId)) { var value = record.getValue(fieldId); if (value != null) { - writer.addField(fieldId, value); + builder.field(fieldId, value); usedFieldIds.add(fieldId); } } @@ -694,7 +675,7 @@ private void copyFieldsToWriter(ImprintRecord record, ImprintWriter writer, Set< } private TestRecord mergeTestRecords(TestRecord first, TestRecord second) { - var merged = new TestRecord(); + TestRecord merged = new TestRecord(); merged.id = first.id; merged.name = first.name != null ? first.name : second.name; merged.price = first.price != 0.0 ? first.price : second.price; @@ -729,28 +710,25 @@ private GenericRecord mergeAvroRecords(GenericRecord first, GenericRecord second return merged; } - private TestRecordProto.TestRecord mergeProtobufRecords(TestRecordProto.TestRecord first, TestRecordProto.TestRecord second) { - return TestRecordProto.TestRecord.newBuilder() + private com.imprint.benchmark.TestRecordProto.TestRecord mergeProtobufRecords(com.imprint.benchmark.TestRecordProto.TestRecord first, com.imprint.benchmark.TestRecordProto.TestRecord second) { + return com.imprint.benchmark.TestRecordProto.TestRecord.newBuilder() .mergeFrom(first) .mergeFrom(second) .build(); } - private ByteBuffer mergeFlatBuffersRecords(TestRecordFB first, TestRecordFB second) { - FlatBufferBuilder builder = new FlatBufferBuilder(1024); + private ByteBuffer mergeFlatBuffersRecords(com.imprint.benchmark.TestRecordFB first, com.imprint.benchmark.TestRecordFB second) { + var builder = new FlatBufferBuilder(1024); - // Use second record's values if they exist, otherwise first record's values String name = second.name() != null && !second.name().isEmpty() ? second.name() : first.name(); String category = second.category() != null && !second.category().isEmpty() ? second.category() : first.category(); double price = second.price() != 0.0 ? second.price() : first.price(); - boolean active = second.active(); // Use second's boolean value - int id = first.id(); // Keep first record's ID + boolean active = second.active(); + int id = first.id(); - // Create merged strings int nameOffset = builder.createString(name); int categoryOffset = builder.createString(category); - // Merge tags (combine both arrays) List mergedTags = new ArrayList<>(); for (int i = 0; i < first.tagsLength(); i++) { mergedTags.add(first.tags(i)); @@ -763,9 +741,8 @@ private ByteBuffer mergeFlatBuffersRecords(TestRecordFB first, TestRecordFB seco for (int i = 0; i < mergedTags.size(); i++) { tagOffsets[i] = builder.createString(mergedTags.get(i)); } - int tagsOffset = TestRecordFB.createTagsVector(builder, tagOffsets); + int tagsOffset = com.imprint.benchmark.TestRecordFB.createTagsVector(builder, tagOffsets); - // Merge metadata (second overwrites first) Map mergedMetadata = new HashMap<>(); for (int i = 0; i < first.metadataKeysLength(); i++) { mergedMetadata.put(first.metadataKeys(i), first.metadataValues(i)); @@ -782,31 +759,29 @@ private ByteBuffer mergeFlatBuffersRecords(TestRecordFB first, TestRecordFB seco keyOffsets[i] = builder.createString(metadataKeys[i]); valueOffsets[i] = builder.createString(mergedMetadata.get(metadataKeys[i])); } - int metadataKeysOffset = TestRecordFB.createMetadataKeysVector(builder, keyOffsets); - int metadataValuesOffset = TestRecordFB.createMetadataValuesVector(builder, valueOffsets); + int metadataKeysOffset = com.imprint.benchmark.TestRecordFB.createMetadataKeysVector(builder, keyOffsets); + int metadataValuesOffset = com.imprint.benchmark.TestRecordFB.createMetadataValuesVector(builder, valueOffsets); - // Use first record's extra data (or could merge both) int[] extraDataOffsets = new int[first.extraDataLength()]; for (int i = 0; i < first.extraDataLength(); i++) { extraDataOffsets[i] = builder.createString(first.extraData(i)); } - int extraDataOffset = TestRecordFB.createExtraDataVector(builder, extraDataOffsets); - - // Create the merged object - TestRecordFB.startTestRecordFB(builder); - TestRecordFB.addId(builder, id); - TestRecordFB.addName(builder, nameOffset); - TestRecordFB.addPrice(builder, price); - TestRecordFB.addActive(builder, active); - TestRecordFB.addCategory(builder, categoryOffset); - TestRecordFB.addTags(builder, tagsOffset); - TestRecordFB.addMetadataKeys(builder, metadataKeysOffset); - TestRecordFB.addMetadataValues(builder, metadataValuesOffset); - TestRecordFB.addExtraData(builder, extraDataOffset); - int recordOffset = TestRecordFB.endTestRecordFB(builder); + int extraDataOffset = com.imprint.benchmark.TestRecordFB.createExtraDataVector(builder, extraDataOffsets); + + com.imprint.benchmark.TestRecordFB.startTestRecordFB(builder); + com.imprint.benchmark.TestRecordFB.addId(builder, id); + com.imprint.benchmark.TestRecordFB.addName(builder, nameOffset); + com.imprint.benchmark.TestRecordFB.addPrice(builder, price); + com.imprint.benchmark.TestRecordFB.addActive(builder, active); + com.imprint.benchmark.TestRecordFB.addCategory(builder, categoryOffset); + com.imprint.benchmark.TestRecordFB.addTags(builder, tagsOffset); + com.imprint.benchmark.TestRecordFB.addMetadataKeys(builder, metadataKeysOffset); + com.imprint.benchmark.TestRecordFB.addMetadataValues(builder, metadataValuesOffset); + com.imprint.benchmark.TestRecordFB.addExtraData(builder, extraDataOffset); + int recordOffset = com.imprint.benchmark.TestRecordFB.endTestRecordFB(builder); builder.finish(recordOffset); - return builder.dataBuffer().slice(); + return builder.dataBuffer(); } private TestRecord createTestRecord() { diff --git a/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java b/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java index 1ead21f..06a7717 100644 --- a/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java @@ -1,7 +1,7 @@ package com.imprint.benchmark; import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintWriter; +import com.imprint.core.ImprintRecordBuilder; import com.imprint.core.SchemaId; import com.imprint.types.MapKey; import com.imprint.types.Value; @@ -20,8 +20,8 @@ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) -@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(1) public class FieldAccessBenchmark { @@ -196,83 +196,80 @@ public void accessDenseRecord(Blackhole bh) throws Exception { * This should be replaced with actual project API when available. */ private ImprintRecord simulateProject(ImprintRecord source, int[] fieldIds) throws Exception { - var writer = new ImprintWriter(source.getHeader().getSchemaId()); + var builder = ImprintRecord.builder(source.getHeader().getSchemaId()); for (int fieldId : fieldIds) { var value = source.getValue(fieldId); if (value != null) { - writer.addField(fieldId, value); + builder.field(fieldId, value); } } - return writer.build(); + return builder.build(); } private ImprintRecord createSparseRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); - - // Sparse record with large field IDs and few fields - writer.addField(1000, Value.fromString("sparse_field_1")); - writer.addField(5000, Value.fromInt32(42)); - writer.addField(10000, Value.fromFloat64(3.14159)); - writer.addField(15000, Value.fromBoolean(true)); - writer.addField(20000, Value.fromString("sparse_field_5")); - - return writer.build(); + return ImprintRecord.builder(new SchemaId(1, 0x12345678)) + .field(1000, Value.fromString("sparse_field_1")) + .field(5000, Value.fromInt32(42)) + .field(10000, Value.fromFloat64(3.14159)) + .field(15000, Value.fromBoolean(true)) + .field(20000, Value.fromString("sparse_field_5")) + .build(); } private ImprintRecord createDenseRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(2, 0x87654321)); + var builder = ImprintRecord.builder(new SchemaId(2, 0x87654321)); // Dense record with 100 sequential fields for (int i = 1; i <= 100; i++) { switch (i % 5) { case 0: - writer.addField(i, Value.fromString("string_field_" + i)); + builder.field(i, Value.fromString("string_field_" + i)); break; case 1: - writer.addField(i, Value.fromInt32(i * 10)); + builder.field(i, Value.fromInt32(i * 10)); break; case 2: - writer.addField(i, Value.fromFloat64(i * 1.5)); + builder.field(i, Value.fromFloat64(i * 1.5)); break; case 3: - writer.addField(i, Value.fromBoolean(i % 2 == 0)); + builder.field(i, Value.fromBoolean(i % 2 == 0)); break; case 4: - writer.addField(i, Value.fromInt64(i * 1000L)); + builder.field(i, Value.fromInt64(i * 1000L)); break; } } - return writer.build(); + return builder.build(); } private ImprintRecord createLargeRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(3, 0x11223344)); + var builder = ImprintRecord.builder(new SchemaId(3, 0xABCDEF12)); - // Large record with complex data types - writer.addField(1, Value.fromString("LargeRecord")); + // Large record with complex fields (arrays, maps) + builder.field(1, Value.fromString("Large record with complex data")); - // Large array field - var largeArray = new ArrayList(); - for (int i = 0; i < 1000; i++) { - largeArray.add(Value.fromString("array_item_" + i)); + // Add a large array + var list = new ArrayList(); + for (int i = 0; i < 200; i++) { + list.add(Value.fromInt32(i)); } - writer.addField(2, Value.fromArray(largeArray)); + builder.field(2, Value.fromArray(list)); - // Large map field - var largeMap = new HashMap(); + // Add a large map + var map = new HashMap(); for (int i = 0; i < 100; i++) { - largeMap.put(MapKey.fromString("key_" + i), Value.fromString("map_value_" + i)); + map.put(MapKey.fromString("key_" + i), Value.fromString("value_" + i)); } - writer.addField(3, Value.fromMap(largeMap)); + builder.field(3, Value.fromMap(map)); - // Many regular fields + // Add more fields for (int i = 4; i <= 50; i++) { - writer.addField(i, Value.fromString("large_record_field_" + i + "_with_substantial_content")); + builder.field(i, Value.fromBytes(new byte[1024])); // 1KB byte arrays } - return writer.build(); + return builder.build(); } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java b/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java index f93092a..63e43e6 100644 --- a/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java @@ -1,7 +1,7 @@ package com.imprint.benchmark; import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintWriter; +import com.imprint.core.ImprintRecordBuilder; import com.imprint.core.SchemaId; import com.imprint.types.Value; import org.openjdk.jmh.annotations.*; @@ -83,25 +83,25 @@ public void mergeWithConflicts(Blackhole bh) throws Exception { * This should be replaced with actual merge API when available. */ private ImprintRecord simulateMerge(ImprintRecord first, ImprintRecord second) throws Exception { - var writer = new ImprintWriter(first.getHeader().getSchemaId()); + var builder = ImprintRecord.builder(first.getHeader().getSchemaId()); var usedFieldIds = new HashSet(); // Copy fields from first record (takes precedence) - copyFieldsToWriter(first, writer, usedFieldIds); + copyFieldsToBuilder(first, builder, usedFieldIds); // Copy non-conflicting fields from second record - copyFieldsToWriter(second, writer, usedFieldIds); + copyFieldsToBuilder(second, builder, usedFieldIds); - return writer.build(); + return builder.build(); } - private void copyFieldsToWriter(ImprintRecord record, ImprintWriter writer, Set usedFieldIds) throws Exception { + private void copyFieldsToBuilder(ImprintRecord record, ImprintRecordBuilder builder, Set usedFieldIds) throws Exception { for (var entry : record.getDirectory()) { int fieldId = entry.getId(); if (!usedFieldIds.contains(fieldId)) { var value = record.getValue(fieldId); if (value != null) { - writer.addField(fieldId, value); + builder.field(fieldId, value); usedFieldIds.add(fieldId); } } @@ -109,55 +109,49 @@ private void copyFieldsToWriter(ImprintRecord record, ImprintWriter writer, Set< } private ImprintRecord createProductRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); - - writer.addField(1, Value.fromString("Product")); - writer.addField(2, Value.fromInt32(12345)); - writer.addField(3, Value.fromString("Laptop")); - writer.addField(4, Value.fromFloat64(999.99)); - writer.addField(5, Value.fromString("Electronics")); - writer.addField(6, Value.fromInt32(50)); // stock - writer.addField(7, Value.fromString("TechCorp")); - writer.addField(8, Value.fromBoolean(true)); // available - - return writer.build(); + return ImprintRecord.builder(new SchemaId(1, 0x12345678)) + .field(1, Value.fromString("Product")) + .field(2, Value.fromInt32(12345)) + .field(3, Value.fromString("Laptop")) + .field(4, Value.fromFloat64(999.99)) + .field(5, Value.fromString("Electronics")) + .field(6, Value.fromInt32(50)) // stock + .field(7, Value.fromString("TechCorp")) + .field(8, Value.fromBoolean(true)) // available + .build(); } private ImprintRecord createOrderRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(2, 0x87654321)); - - writer.addField(10, Value.fromString("Order")); - writer.addField(11, Value.fromInt32(67890)); - writer.addField(12, Value.fromInt32(12345)); // product_id (overlaps with product) - writer.addField(13, Value.fromInt32(2)); // quantity - writer.addField(14, Value.fromFloat64(1999.98)); // total - writer.addField(15, Value.fromString("2024-01-15")); // order_date - writer.addField(16, Value.fromString("shipped")); // status - - return writer.build(); + return ImprintRecord.builder(new SchemaId(2, 0x87654321)) + .field(10, Value.fromString("Order")) + .field(11, Value.fromInt32(67890)) + .field(12, Value.fromInt32(12345)) // product_id (overlaps with product) + .field(13, Value.fromInt32(2)) // quantity + .field(14, Value.fromFloat64(1999.98)) // total + .field(15, Value.fromString("2024-01-15")) // order_date + .field(16, Value.fromString("shipped")) // status + .build(); } private ImprintRecord createCustomerRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(3, 0x11223344)); - - writer.addField(20, Value.fromString("Customer")); - writer.addField(21, Value.fromInt32(555)); - writer.addField(22, Value.fromString("John Doe")); - writer.addField(23, Value.fromString("john.doe@email.com")); - writer.addField(24, Value.fromString("123 Main St")); - writer.addField(25, Value.fromString("premium")); // tier - writer.addField(26, Value.fromBoolean(true)); // active - - return writer.build(); + return ImprintRecord.builder(new SchemaId(3, 0x11223344)) + .field(20, Value.fromString("Customer")) + .field(21, Value.fromInt32(555)) + .field(22, Value.fromString("John Doe")) + .field(23, Value.fromString("john.doe@email.com")) + .field(24, Value.fromString("123 Main St")) + .field(25, Value.fromString("premium")) // tier + .field(26, Value.fromBoolean(true)) // active + .build(); } private ImprintRecord createRecordWithFields(int startId, int endId, String prefix) throws Exception { - var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); + var builder = ImprintRecord.builder(new SchemaId(1, 0x12345678)); for (int i = startId; i <= endId; i++) { - writer.addField(i, Value.fromString(prefix + "field_" + i)); + builder.field(i, Value.fromString(prefix + "field_" + i)); } - return writer.build(); + return builder.build(); } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java b/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java index 3275843..11e2b29 100644 --- a/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java @@ -1,7 +1,6 @@ package com.imprint.benchmark; import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintWriter; import com.imprint.core.SchemaId; import com.imprint.types.MapKey; import com.imprint.types.Value; @@ -89,27 +88,25 @@ public void deserializeLargeRecord(Blackhole bh) throws Exception { // ===== HELPER METHODS ===== private ImprintRecord createSmallRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); - // Small record: ~10 fields, simple types - writer.addField(1, Value.fromString("Product")); - writer.addField(2, Value.fromInt32(12345)); - writer.addField(3, Value.fromFloat64(99.99)); - writer.addField(4, Value.fromBoolean(true)); - writer.addField(5, Value.fromString("Electronics")); - - return writer.build(); + return ImprintRecord.builder(new SchemaId(1, 0x12345678)) + .field(1, Value.fromString("Product")) + .field(2, Value.fromInt32(12345)) + .field(3, Value.fromFloat64(99.99)) + .field(4, Value.fromBoolean(true)) + .field(5, Value.fromString("Electronics")) + .build(); } private ImprintRecord createMediumRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); + var builder = ImprintRecord.builder(new SchemaId(1, 0x12345678)); // Medium record: ~50 fields, mixed types including arrays - writer.addField(1, Value.fromString("Product")); - writer.addField(2, Value.fromInt32(12345)); - writer.addField(3, Value.fromFloat64(99.99)); - writer.addField(4, Value.fromBoolean(true)); - writer.addField(5, Value.fromString("Electronics")); + builder.field(1, Value.fromString("Product")); + builder.field(2, Value.fromInt32(12345)); + builder.field(3, Value.fromFloat64(99.99)); + builder.field(4, Value.fromBoolean(true)); + builder.field(5, Value.fromString("Electronics")); // Add array field var tags = Arrays.asList( @@ -117,50 +114,50 @@ private ImprintRecord createMediumRecord() throws Exception { Value.fromString("trending"), Value.fromString("bestseller") ); - writer.addField(6, Value.fromArray(tags)); + builder.field(6, Value.fromArray(tags)); // Add map field (all string values for consistency) var metadata = new HashMap(); metadata.put(MapKey.fromString("manufacturer"), Value.fromString("TechCorp")); metadata.put(MapKey.fromString("model"), Value.fromString("TC-2024")); metadata.put(MapKey.fromString("year"), Value.fromString("2024")); - writer.addField(7, Value.fromMap(metadata)); + builder.field(7, Value.fromMap(metadata)); // Add more fields for medium size for (int i = 8; i <= 50; i++) { - writer.addField(i, Value.fromString("field_" + i + "_value")); + builder.field(i, Value.fromString("field_" + i + "_value")); } - return writer.build(); + return builder.build(); } private ImprintRecord createLargeRecord() throws Exception { - var writer = new ImprintWriter(new SchemaId(1, 0x12345678)); + var builder = ImprintRecord.builder(new SchemaId(1, 0x12345678)); // Large record: ~200 fields, complex nested structures - writer.addField(1, Value.fromString("LargeProduct")); - writer.addField(2, Value.fromInt32(12345)); - writer.addField(3, Value.fromFloat64(99.99)); + builder.field(1, Value.fromString("LargeProduct")); + builder.field(2, Value.fromInt32(12345)); + builder.field(3, Value.fromFloat64(99.99)); // Large array var largeArray = new ArrayList(); for (int i = 0; i < 100; i++) { largeArray.add(Value.fromString("item_" + i)); } - writer.addField(4, Value.fromArray(largeArray)); + builder.field(4, Value.fromArray(largeArray)); // Large map var largeMap = new HashMap(); for (int i = 0; i < 50; i++) { largeMap.put(MapKey.fromString("key_" + i), Value.fromString("value_" + i)); } - writer.addField(5, Value.fromMap(largeMap)); + builder.field(5, Value.fromMap(largeMap)); // Many string fields for (int i = 6; i <= 200; i++) { - writer.addField(i, Value.fromString("this_is_a_longer_field_value_for_field_" + i + "_to_increase_record_size")); + builder.field(i, Value.fromString("this_is_a_longer_field_value_for_field_" + i + "_to_increase_record_size")); } - return writer.build(); + return builder.build(); } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintBuffers.java b/src/main/java/com/imprint/core/ImprintBuffers.java index c14d6df..ac3b4d3 100644 --- a/src/main/java/com/imprint/core/ImprintBuffers.java +++ b/src/main/java/com/imprint/core/ImprintBuffers.java @@ -117,8 +117,10 @@ public int getDirectoryCount() { * Create a new buffer containing the serialized directory. */ public ByteBuffer serializeDirectory() { - ensureDirectoryParsed(); - return createDirectoryBuffer(new ArrayList<>(parsedDirectory.values())); + // The directoryBuffer is created on construction and is read-only. + // If constructed from raw bytes, it's a view of the original. + // If constructed from a list, it's a fresh buffer. In both cases, it's ready. + return directoryBuffer.duplicate(); } // ========== PRIVATE METHODS ========== diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index e720df5..83ddb03 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -156,23 +156,20 @@ public ImprintRecord getRow(int fieldId) throws ImprintException { * Serialize this record to a ByteBuffer. */ public ByteBuffer serializeToBuffer() { - var buffer = ByteBuffer.allocate(estimateSerializedSize()); - buffer.order(ByteOrder.LITTLE_ENDIAN); + var directoryBuffer = buffers.serializeDirectory(); // This is now optimized to return a duplicate + var payloadBuffer = buffers.getPayload(); - // Write header - serializeHeader(buffer); + int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payloadBuffer.remaining(); + var finalBuffer = ByteBuffer.allocate(finalSize); + finalBuffer.order(ByteOrder.LITTLE_ENDIAN); - // Write directory - var directoryBuffer = buffers.serializeDirectory(); - buffer.put(directoryBuffer); + // Assemble the final record from existing components + serializeHeader(this.header, finalBuffer); + finalBuffer.put(directoryBuffer); + finalBuffer.put(payloadBuffer.duplicate()); // Use duplicate to preserve original buffer state - // Write payload - var payload = buffers.getPayload(); - var payloadCopy = payload.duplicate(); - buffer.put(payloadCopy); - - buffer.flip(); - return buffer; + finalBuffer.flip(); + return finalBuffer.asReadOnlyBuffer(); } public int estimateSerializedSize() { @@ -182,6 +179,32 @@ public int estimateSerializedSize() { return size; } + /** + * Serializes the components of a record into a single ByteBuffer. + * This provides a direct serialization path without needing a live ImprintRecord instance. + * + * @param schemaId The schema identifier for the record. + * @param directory The list of directory entries, which must be sorted by field ID. + * @param payload The ByteBuffer containing all field data concatenated. + * @return A read-only ByteBuffer with the complete serialized record. + */ + public static ByteBuffer serialize(SchemaId schemaId, List directory, ByteBuffer payload) { + var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); + var directoryBuffer = createDirectoryBuffer(directory); + + int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payload.remaining(); + var finalBuffer = ByteBuffer.allocate(finalSize); + finalBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // Assemble the final record + serializeHeader(header, finalBuffer); + finalBuffer.put(directoryBuffer); + finalBuffer.put(payload); + + finalBuffer.flip(); + return finalBuffer.asReadOnlyBuffer(); + } + // ========== STATIC FACTORY METHODS ========== public static ImprintRecordBuilder builder(SchemaId schemaId) { @@ -272,7 +295,7 @@ private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws Impr } } - private void serializeHeader(ByteBuffer buffer) { + private static void serializeHeader(Header header, ByteBuffer buffer) { buffer.put(Constants.MAGIC); buffer.put(Constants.VERSION); buffer.put(header.getFlags().getValue()); @@ -305,6 +328,30 @@ private static Header deserializeHeader(ByteBuffer buffer) throws ImprintExcepti return new Header(flags, new SchemaId(fieldSpaceId, schemaHash), payloadSize); } + /** + * Creates a serialized representation of the directory. + */ + private static ByteBuffer createDirectoryBuffer(List directory) { + try { + int bufferSize = VarInt.encodedLength(directory.size()) + (directory.size() * Constants.DIR_ENTRY_BYTES); + var buffer = ByteBuffer.allocate(bufferSize); + buffer.order(ByteOrder.LITTLE_ENDIAN); + + VarInt.encode(directory.size(), buffer); + for (var entry : directory) { + buffer.putShort(entry.getId()); + buffer.put(entry.getTypeCode().getCode()); + buffer.putInt(entry.getOffset()); + } + + buffer.flip(); + return buffer.asReadOnlyBuffer(); + } catch (Exception e) { + // Should not happen with valid inputs + return ByteBuffer.allocate(0).asReadOnlyBuffer(); + } + } + @Override public String toString() { return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 51a3525..39238a7 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -1,10 +1,19 @@ package com.imprint.core; +import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; import com.imprint.types.MapKey; import com.imprint.types.Value; -import java.util.*; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.TreeMap; /** * A fluent builder for creating ImprintRecord instances with type-safe, @@ -129,16 +138,55 @@ public int fieldCount() { } public Set fieldIds() { - return new TreeSet<>(fields.keySet()); + return fields.keySet(); } // Build the final record public ImprintRecord build() throws ImprintException { - var writer = new ImprintWriter(schemaId); + var directory = new ArrayList(fields.size()); + var payloadBuffer = ByteBuffer.allocate(estimatePayloadSize()); + payloadBuffer.order(ByteOrder.LITTLE_ENDIAN); + + for (var entry : fields.entrySet()) { + int fieldId = entry.getKey(); + var value = entry.getValue(); + + directory.add(new com.imprint.core.DirectoryEntry((short)fieldId, value.getTypeCode(), payloadBuffer.position())); + serializeValue(value, payloadBuffer); + } + + // Create read-only view of the payload without copying + payloadBuffer.flip(); // limit = position, position = 0 + var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); + + var header = new com.imprint.core.Header(new com.imprint.core.Flags((byte) 0), schemaId, payloadView.remaining()); + return new ImprintRecord(header, directory, payloadView); + } + + /** + * Builds the record and serializes it directly to a ByteBuffer without creating an intermediate ImprintRecord object. + * This is the most efficient path for "write-only" scenarios. + * + * @return A read-only ByteBuffer containing the fully serialized record. + * @throws ImprintException if serialization fails. + */ + public ByteBuffer buildToBuffer() throws ImprintException { + // 1. Prepare payload and directory list + var directory = new ArrayList(fields.size()); + var payloadBuffer = ByteBuffer.allocate(estimatePayloadSize()); + payloadBuffer.order(ByteOrder.LITTLE_ENDIAN); + for (var entry : fields.entrySet()) { - writer.addField(entry.getKey(), entry.getValue()); + int fieldId = entry.getKey(); + var value = entry.getValue(); + directory.add(new com.imprint.core.DirectoryEntry((short) fieldId, value.getTypeCode(), payloadBuffer.position())); + serializeValue(value, payloadBuffer); } - return writer.build(); + payloadBuffer.flip(); + var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); + + // 2. Serialize directly to the final buffer format + return ImprintRecord.serialize(schemaId, directory, payloadView); } // Internal helper methods @@ -238,4 +286,73 @@ private MapKey convertToMapKey(Object obj) { public String toString() { return String.format("ImprintRecordBuilder{schemaId=%s, fields=%d}", schemaId, fields.size()); } + + private int estimatePayloadSize() throws ImprintException { + // More accurate estimation to reduce allocations + int estimatedSize = 0; + for (var value : fields.values()) { + estimatedSize += estimateValueSize(value); + } + // Add 25% buffer to reduce reallocations + return Math.max(estimatedSize + (estimatedSize / 4), fields.size() * 16); + } + + /** + * Estimates the serialized size in bytes for a given value. + * This method provides size estimates for payload buffer allocation, + * supporting both array-based and ByteBuffer-based value types. + * + * @param value the value to estimate size for + * @return estimated size in bytes including type-specific overhead + */ + private int estimateValueSize(Value value) throws ImprintException { + // Use TypeHandler for simple types + switch (value.getTypeCode()) { + case NULL: + case BOOL: + case INT32: + case INT64: + case FLOAT32: + case FLOAT64: + case BYTES: + case STRING: + case ARRAY: + case MAP: + return value.getTypeCode().getHandler().estimateSize(value); + + case ROW: + Value.RowValue rowValue = (Value.RowValue) value; + return rowValue.getValue().estimateSerializedSize(); + + default: + throw new ImprintException(com.imprint.error.ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); + } + } + + + private void serializeValue(Value value, ByteBuffer buffer) throws ImprintException { + switch (value.getTypeCode()) { + case NULL: + case BOOL: + case INT32: + case INT64: + case FLOAT32: + case FLOAT64: + case BYTES: + case STRING: + case ARRAY: + case MAP: + value.getTypeCode().getHandler().serialize(value, buffer); + break; + //TODO eliminate this switch entirely by implementing a ROW TypeHandler + case ROW: + Value.RowValue rowValue = (Value.RowValue) value; + var serializedRow = rowValue.getValue().serializeToBuffer(); + buffer.put(serializedRow); + break; + + default: + throw new ImprintException(com.imprint.error.ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); + } + } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintWriter.java b/src/main/java/com/imprint/core/ImprintWriter.java deleted file mode 100644 index b1d5f53..0000000 --- a/src/main/java/com/imprint/core/ImprintWriter.java +++ /dev/null @@ -1,126 +0,0 @@ -package com.imprint.core; - -import com.imprint.error.ErrorType; -import com.imprint.error.ImprintException; -import com.imprint.types.Value; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.util.ArrayList; -import java.util.Objects; -import java.util.TreeMap; - -/** - * A writer for constructing ImprintRecords by adding fields sequentially. - */ -public final class ImprintWriter { - private final SchemaId schemaId; - private final TreeMap fields; // keep fields in sorted order - - public ImprintWriter(SchemaId schemaId) { - this.schemaId = Objects.requireNonNull(schemaId, "SchemaId cannot be null"); - this.fields = new TreeMap<>(); - } - - /** - * Adds a field to the record being built. - */ - public ImprintWriter addField(int id, Value value) { - Objects.requireNonNull(value, "Value cannot be null"); - this.fields.put(id, value); - return this; - } - - /** - * Consumes the writer and builds an ImprintRecord. - */ - public ImprintRecord build() throws ImprintException { - var directory = new ArrayList(fields.size()); - var payloadBuffer = ByteBuffer.allocate(estimatePayloadSize()); - payloadBuffer.order(ByteOrder.LITTLE_ENDIAN); - - for (var entry : fields.entrySet()) { - int fieldId = entry.getKey(); - var value = entry.getValue(); - - directory.add(new DirectoryEntry(fieldId, value.getTypeCode(), payloadBuffer.position())); - serializeValue(value, payloadBuffer); - } - - // Create read-only view of the payload without copying - payloadBuffer.flip(); // limit = position, position = 0 - var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); - - var header = new Header(new Flags((byte) 0), schemaId, payloadView.remaining()); - return new ImprintRecord(header, directory, payloadView); - } - - private int estimatePayloadSize() throws ImprintException { - // More accurate estimation to reduce allocations - int estimatedSize = 0; - for (var value : fields.values()) { - estimatedSize += estimateValueSize(value); - } - // Add 25% buffer to reduce reallocations - return Math.max(estimatedSize + (estimatedSize / 4), fields.size() * 16); - } - - /** - * Estimates the serialized size in bytes for a given value. - * This method provides size estimates for payload buffer allocation, - * supporting both array-based and ByteBuffer-based value types. - * - * @param value the value to estimate size for - * @return estimated size in bytes including type-specific overhead - */ - private int estimateValueSize(Value value) throws ImprintException { - // Use TypeHandler for simple types - switch (value.getTypeCode()) { - case NULL: - case BOOL: - case INT32: - case INT64: - case FLOAT32: - case FLOAT64: - case BYTES: - case STRING: - case ARRAY: - case MAP: - return value.getTypeCode().getHandler().estimateSize(value); - - case ROW: - Value.RowValue rowValue = (Value.RowValue) value; - return rowValue.getValue().estimateSerializedSize(); - - default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); - } - } - - - private void serializeValue(Value value, ByteBuffer buffer) throws ImprintException { - switch (value.getTypeCode()) { - case NULL: - case BOOL: - case INT32: - case INT64: - case FLOAT32: - case FLOAT64: - case BYTES: - case STRING: - case ARRAY: - case MAP: - value.getTypeCode().getHandler().serialize(value, buffer); - break; - //TODO eliminate this switch entirely by implementing a ROW TypeHandler - case ROW: - Value.RowValue rowValue = (Value.RowValue) value; - var serializedRow = rowValue.getValue().serializeToBuffer(); - buffer.put(serializedRow); - break; - - default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); - } - } -} \ No newline at end of file diff --git a/src/test/java/com/imprint/core/ImprintRecordTest.java b/src/test/java/com/imprint/core/ImprintRecordTest.java index 3e37473..6d85ccb 100644 --- a/src/test/java/com/imprint/core/ImprintRecordTest.java +++ b/src/test/java/com/imprint/core/ImprintRecordTest.java @@ -24,12 +24,10 @@ private String getStringValue(Value value) { @Test void shouldCreateSimpleRecord() throws ImprintException { var schemaId = new SchemaId(1, 0xdeadbeef); - var writer = new ImprintWriter(schemaId); - - writer.addField(1, Value.fromInt32(42)) - .addField(2, Value.fromString("hello")); - - var record = writer.build(); + var record = ImprintRecord.builder(schemaId) + .field(1, Value.fromInt32(42)) + .field(2, Value.fromString("hello")) + .build(); assertThat(record.getHeader().getSchemaId()).isEqualTo(schemaId); assertThat(record.getDirectory()).hasSize(2); @@ -53,18 +51,16 @@ var record = writer.build(); @Test void shouldRoundtripThroughSerialization() throws ImprintException { var schemaId = new SchemaId(1, 0xdeadbeef); - var writer = new ImprintWriter(schemaId); - - writer.addField(1, Value.nullValue()) - .addField(2, Value.fromBoolean(true)) - .addField(3, Value.fromInt32(42)) - .addField(4, Value.fromInt64(123456789L)) - .addField(5, Value.fromFloat32(3.14f)) - .addField(6, Value.fromFloat64(2.718281828)) - .addField(7, Value.fromBytes(new byte[]{1, 2, 3, 4})) - .addField(8, Value.fromString("test string")); - - var original = writer.build(); + var original = ImprintRecord.builder(schemaId) + .field(1, Value.nullValue()) + .field(2, Value.fromBoolean(true)) + .field(3, Value.fromInt32(42)) + .field(4, Value.fromInt64(123456789L)) + .field(5, Value.fromFloat32(3.14f)) + .field(6, Value.fromFloat64(2.718281828)) + .field(7, Value.fromBytes(new byte[]{1, 2, 3, 4})) + .field(8, Value.fromString("test string")) + .build(); // Serialize and deserialize var buffer = original.serializeToBuffer(); @@ -94,7 +90,6 @@ void shouldRoundtripThroughSerialization() throws ImprintException { @Test void shouldHandleArrays() throws ImprintException { var schemaId = new SchemaId(1, 0xdeadbeef); - var writer = new ImprintWriter(schemaId); List intArray = Arrays.asList( Value.fromInt32(1), @@ -102,8 +97,9 @@ void shouldHandleArrays() throws ImprintException { Value.fromInt32(3) ); - writer.addField(1, Value.fromArray(intArray)); - ImprintRecord record = writer.build(); + var record = ImprintRecord.builder(schemaId) + .field(1, Value.fromArray(intArray)) + .build(); // Serialize and deserialize var buffer = record.serializeToBuffer(); @@ -125,14 +121,14 @@ void shouldHandleArrays() throws ImprintException { @Test void shouldHandleMaps() throws ImprintException { var schemaId = new SchemaId(1, 0xdeadbeef); - var writer = new ImprintWriter(schemaId); var map = new HashMap(); map.put(MapKey.fromString("key1"), Value.fromInt32(1)); map.put(MapKey.fromString("key2"), Value.fromInt32(2)); - writer.addField(1, Value.fromMap(map)); - var record = writer.build(); + var record = ImprintRecord.builder(schemaId) + .field(1, Value.fromMap(map)) + .build(); // Serialize and deserialize var buffer = record.serializeToBuffer(); @@ -154,17 +150,17 @@ var record = writer.build(); void shouldHandleNestedRecords() throws ImprintException { // Create inner record var innerSchemaId = new SchemaId(2, 0xcafebabe); - var innerWriter = new ImprintWriter(innerSchemaId); - innerWriter.addField(1, Value.fromInt32(42)) - .addField(2, Value.fromString("nested")); - var innerRecord = innerWriter.build(); + var innerRecord = ImprintRecord.builder(innerSchemaId) + .field(1, Value.fromInt32(42)) + .field(2, Value.fromString("nested")) + .build(); // Create outer record containing inner record var outerSchemaId = new SchemaId(1, 0xdeadbeef); - var outerWriter = new ImprintWriter(outerSchemaId); - outerWriter.addField(1, Value.fromRow(innerRecord)) - .addField(2, Value.fromInt64(123L)); - var outerRecord = outerWriter.build(); + var outerRecord = ImprintRecord.builder(outerSchemaId) + .field(1, Value.fromRow(innerRecord)) + .field(2, Value.fromInt64(123L)) + .build(); // Serialize and deserialize var buffer = outerRecord.serializeToBuffer(); @@ -218,13 +214,12 @@ void shouldRejectUnsupportedVersion() { @Test void shouldHandleDuplicateFieldIds() throws ImprintException { var schemaId = new SchemaId(1, 0xdeadbeef); - var writer = new ImprintWriter(schemaId); // Add duplicate field IDs - last one should win - writer.addField(1, Value.fromInt32(42)) - .addField(1, Value.fromInt32(43)); - - var record = writer.build(); + var record = ImprintRecord.builder(schemaId) + .field(1, Value.fromInt32(42)) + .field(1, Value.fromInt32(43)) + .build(); assertThat(record.getDirectory()).hasSize(1); assertThat(record.getValue(1)).isEqualTo(Value.fromInt32(43)); diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index 64be931..1ea752d 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -1,7 +1,6 @@ package com.imprint.profile; import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintWriter; import com.imprint.core.SchemaId; import com.imprint.types.Value; import org.junit.jupiter.api.Disabled; @@ -88,15 +87,15 @@ void profileSerialization() throws Exception { // Create and serialize many records (allocation hotspot) for (int i = 0; i < 500_000; i++) { - var writer = new ImprintWriter(schemaId); + var builder = ImprintRecord.builder(schemaId); // Add various field types - writer.addField(1, Value.fromInt32(i)) - .addField(2, Value.fromString("test-string-" + i)) - .addField(3, Value.fromFloat64(i * 3.14159)) - .addField(4, Value.fromBytes(("bytes-" + i).getBytes())); + builder.field(1, Value.fromInt32(i)) + .field(2, Value.fromString("test-string-" + i)) + .field(3, Value.fromFloat64(i * 3.14159)) + .field(4, Value.fromBytes(("bytes-" + i).getBytes())); - var record = writer.build(); + var record = builder.build(); var serialized = record.serializeToBuffer(); // Potential hotspot // Trigger some deserialization @@ -151,15 +150,15 @@ void profileMemoryAllocation() throws Exception { for (int batch = 0; batch < 1000; batch++) { for (int i = 0; i < 1000; i++) { var schemaId = new SchemaId(batch, i); - var writer = new ImprintWriter(schemaId); + var builder = ImprintRecord.builder(schemaId); // Create strings of varying sizes (allocation pressure) - writer.addField(1, Value.fromString("small")) - .addField(2, Value.fromString("medium-length-string-" + i)) - .addField(3, Value.fromString("very-long-string-that-will-cause-more-allocation-pressure-" + batch + "-" + i)) - .addField(4, Value.fromBytes(new byte[100 + i % 1000])); // Varying byte arrays + builder.field(1, Value.fromString("small")) + .field(2, Value.fromString("medium-length-string-" + i)) + .field(3, Value.fromString("very-long-string-that-will-cause-more-allocation-pressure-" + batch + "-" + i)) + .field(4, Value.fromBytes(new byte[100 + i % 1000])); // Varying byte arrays - var record = writer.build(); + var record = builder.build(); // Some deserialization to trigger string decoding allocations record.getValue(2); @@ -175,54 +174,52 @@ var record = writer.build(); } private ImprintRecord createTestRecord() throws Exception { - var schemaId = new SchemaId(1, 0xdeadbeef); - var writer = new ImprintWriter(schemaId); + var builder = ImprintRecord.builder(new SchemaId(1, 0xdeadbeef)); for (int i = 1; i <= RECORD_SIZE; i++) { switch (i % 4) { case 0: - writer.addField(i, Value.fromInt32(i * 100)); + builder.field(i, Value.fromInt32(i * 100)); break; case 1: - writer.addField(i, Value.fromString("field-value-" + i)); + builder.field(i, Value.fromString("field-value-" + i)); break; case 2: - writer.addField(i, Value.fromFloat64(i * 3.14159)); + builder.field(i, Value.fromFloat64(i * 3.14159)); break; case 3: - writer.addField(i, Value.fromBytes(("bytes-" + i).getBytes())); + builder.field(i, Value.fromBytes(("bytes-" + i).getBytes())); break; } } - return writer.build(); + return builder.build(); } private ImprintRecord createLargeRecord() throws Exception { - var schemaId = new SchemaId(2, 0xcafebabe); - var writer = new ImprintWriter(schemaId); + var builder = ImprintRecord.builder(new SchemaId(2, 0xcafebabe)); // Create 100 fields with realistic data for (int i = 1; i <= 100; i++) { switch (i % 5) { case 0: - writer.addField(i, Value.fromInt32(i)); + builder.field(i, Value.fromInt32(i)); break; case 1: - writer.addField(i, Value.fromString("user-name-" + i + "@example.com")); + builder.field(i, Value.fromString("user-name-" + i + "@example.com")); break; case 2: - writer.addField(i, Value.fromString("Some longer descriptive text for field " + i + " that might represent a comment or description")); + builder.field(i, Value.fromString("Some longer descriptive text for field " + i + " that might represent a comment or description")); break; case 3: - writer.addField(i, Value.fromFloat64(i * 2.718281828)); + builder.field(i, Value.fromFloat64(i * 2.718281828)); break; case 4: - writer.addField(i, Value.fromBytes(String.format("binary-data-%04d", i).getBytes())); + builder.field(i, Value.fromBytes(String.format("binary-data-%04d", i).getBytes())); break; } } - return writer.build(); + return builder.build(); } } \ No newline at end of file From e6e1ecdf8693ead3c15f4b64a012b2f2a2ed62a4 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sat, 7 Jun 2025 14:37:28 -0400 Subject: [PATCH 35/53] Allow for record creation path from builder to bypass extra TreeMapping --- .../java/com/imprint/core/ImprintBuffers.java | 38 +++++++++++- .../java/com/imprint/core/ImprintRecord.java | 61 +++++++++++-------- .../imprint/core/ImprintRecordBuilder.java | 16 ++--- 3 files changed, 79 insertions(+), 36 deletions(-) diff --git a/src/main/java/com/imprint/core/ImprintBuffers.java b/src/main/java/com/imprint/core/ImprintBuffers.java index ac3b4d3..6a294c3 100644 --- a/src/main/java/com/imprint/core/ImprintBuffers.java +++ b/src/main/java/com/imprint/core/ImprintBuffers.java @@ -50,7 +50,18 @@ public ImprintBuffers(List directory, ByteBuffer payload) { this.parsedDirectory = createDirectoryMap(Objects.requireNonNull(directory)); this.directoryParsed = true; this.payload = payload.asReadOnlyBuffer(); - this.directoryBuffer = createDirectoryBuffer(directory); + this.directoryBuffer = ImprintBuffers.createDirectoryBuffer(directory); + } + + /** + * Creates buffers from a pre-parsed and sorted directory map (used by ImprintRecordBuilder). + * This is an optimized path that avoids creating an intermediate List-to-Map conversion. + */ + public ImprintBuffers(TreeMap directoryMap, ByteBuffer payload) { + this.parsedDirectory = Objects.requireNonNull(directoryMap); + this.directoryParsed = true; + this.payload = payload.asReadOnlyBuffer(); + this.directoryBuffer = ImprintBuffers.createDirectoryBufferFromMap(directoryMap); } /** @@ -263,7 +274,7 @@ private TreeMap createDirectoryMap(List /** * Create directory buffer from parsed entries. */ - private ByteBuffer createDirectoryBuffer(List directory) { + static ByteBuffer createDirectoryBuffer(List directory) { try { int bufferSize = VarInt.encodedLength(directory.size()) + (directory.size() * Constants.DIR_ENTRY_BYTES); var buffer = ByteBuffer.allocate(bufferSize); @@ -280,11 +291,32 @@ private ByteBuffer createDirectoryBuffer(List directory) { } } + /** + * Create directory buffer from a pre-sorted map of entries. + */ + static ByteBuffer createDirectoryBufferFromMap(TreeMap directoryMap) { + try { + int bufferSize = VarInt.encodedLength(directoryMap.size()) + (directoryMap.size() * Constants.DIR_ENTRY_BYTES); + var buffer = ByteBuffer.allocate(bufferSize); + buffer.order(ByteOrder.LITTLE_ENDIAN); + + VarInt.encode(directoryMap.size(), buffer); + // TreeMap.values() returns a collection view, iteration is ordered and efficient. + for (var entry : directoryMap.values()) + serializeDirectoryEntry(entry, buffer); + + buffer.flip(); + return buffer.asReadOnlyBuffer(); + } catch (Exception e) { + return ByteBuffer.allocate(0).asReadOnlyBuffer(); + } + } + /** * Serialize a single directory entry to the buffer. * Format: [fieldId:2bytes][typeCode:1byte][offset:4bytes] */ - private void serializeDirectoryEntry(DirectoryEntry entry, ByteBuffer buffer) { + private static void serializeDirectoryEntry(DirectoryEntry entry, ByteBuffer buffer) { buffer.putShort(entry.getId()); buffer.put(entry.getTypeCode().getCode()); buffer.putInt(entry.getOffset()); diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index 83ddb03..804642b 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -14,6 +14,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.TreeMap; /** * An Imprint record containing a header and buffer management. @@ -40,6 +41,14 @@ private ImprintRecord(Header header, ImprintBuffers buffers) { this.buffers = new ImprintBuffers(directory, payload); } + /** + * Creates a record from a pre-parsed and sorted directory map (used by ImprintRecordBuilder). + */ + ImprintRecord(Header header, TreeMap directoryMap, ByteBuffer payload) { + this.header = Objects.requireNonNull(header, "Header cannot be null"); + this.buffers = new ImprintBuffers(directoryMap, payload); + } + // ========== FIELD ACCESS METHODS ========== /** @@ -190,7 +199,33 @@ public int estimateSerializedSize() { */ public static ByteBuffer serialize(SchemaId schemaId, List directory, ByteBuffer payload) { var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); - var directoryBuffer = createDirectoryBuffer(directory); + var directoryBuffer = ImprintBuffers.createDirectoryBuffer(directory); + + int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payload.remaining(); + var finalBuffer = ByteBuffer.allocate(finalSize); + finalBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // Assemble the final record + serializeHeader(header, finalBuffer); + finalBuffer.put(directoryBuffer); + finalBuffer.put(payload); + + finalBuffer.flip(); + return finalBuffer.asReadOnlyBuffer(); + } + + /** + * Serializes the components of a record into a single ByteBuffer using a pre-built directory map. + * This provides a direct serialization path without needing a live ImprintRecord instance. + * + * @param schemaId The schema identifier for the record. + * @param directoryMap The map of directory entries, which must be sorted by field ID (e.g., a TreeMap). + * @param payload The ByteBuffer containing all field data concatenated. + * @return A read-only ByteBuffer with the complete serialized record. + */ + public static ByteBuffer serialize(SchemaId schemaId, TreeMap directoryMap, ByteBuffer payload) { + var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); + var directoryBuffer = ImprintBuffers.createDirectoryBufferFromMap(directoryMap); int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payload.remaining(); var finalBuffer = ByteBuffer.allocate(finalSize); @@ -328,30 +363,6 @@ private static Header deserializeHeader(ByteBuffer buffer) throws ImprintExcepti return new Header(flags, new SchemaId(fieldSpaceId, schemaHash), payloadSize); } - /** - * Creates a serialized representation of the directory. - */ - private static ByteBuffer createDirectoryBuffer(List directory) { - try { - int bufferSize = VarInt.encodedLength(directory.size()) + (directory.size() * Constants.DIR_ENTRY_BYTES); - var buffer = ByteBuffer.allocate(bufferSize); - buffer.order(ByteOrder.LITTLE_ENDIAN); - - VarInt.encode(directory.size(), buffer); - for (var entry : directory) { - buffer.putShort(entry.getId()); - buffer.put(entry.getTypeCode().getCode()); - buffer.putInt(entry.getOffset()); - } - - buffer.flip(); - return buffer.asReadOnlyBuffer(); - } catch (Exception e) { - // Should not happen with valid inputs - return ByteBuffer.allocate(0).asReadOnlyBuffer(); - } - } - @Override public String toString() { return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 39238a7..4a95898 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -143,7 +143,7 @@ public Set fieldIds() { // Build the final record public ImprintRecord build() throws ImprintException { - var directory = new ArrayList(fields.size()); + var directoryMap = new TreeMap(); var payloadBuffer = ByteBuffer.allocate(estimatePayloadSize()); payloadBuffer.order(ByteOrder.LITTLE_ENDIAN); @@ -151,7 +151,7 @@ public ImprintRecord build() throws ImprintException { int fieldId = entry.getKey(); var value = entry.getValue(); - directory.add(new com.imprint.core.DirectoryEntry((short)fieldId, value.getTypeCode(), payloadBuffer.position())); + directoryMap.put(fieldId, new com.imprint.core.DirectoryEntry((short)fieldId, value.getTypeCode(), payloadBuffer.position())); serializeValue(value, payloadBuffer); } @@ -160,7 +160,7 @@ public ImprintRecord build() throws ImprintException { var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); var header = new com.imprint.core.Header(new com.imprint.core.Flags((byte) 0), schemaId, payloadView.remaining()); - return new ImprintRecord(header, directory, payloadView); + return new ImprintRecord(header, directoryMap, payloadView); } /** @@ -171,22 +171,22 @@ public ImprintRecord build() throws ImprintException { * @throws ImprintException if serialization fails. */ public ByteBuffer buildToBuffer() throws ImprintException { - // 1. Prepare payload and directory list - var directory = new ArrayList(fields.size()); + // 1. Prepare payload and directory map + var directoryMap = new TreeMap(); var payloadBuffer = ByteBuffer.allocate(estimatePayloadSize()); payloadBuffer.order(ByteOrder.LITTLE_ENDIAN); for (var entry : fields.entrySet()) { int fieldId = entry.getKey(); var value = entry.getValue(); - directory.add(new com.imprint.core.DirectoryEntry((short) fieldId, value.getTypeCode(), payloadBuffer.position())); + directoryMap.put(fieldId, new com.imprint.core.DirectoryEntry((short) fieldId, value.getTypeCode(), payloadBuffer.position())); serializeValue(value, payloadBuffer); } payloadBuffer.flip(); var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); - // 2. Serialize directly to the final buffer format - return ImprintRecord.serialize(schemaId, directory, payloadView); + // 2. Serialize directly to the final buffer format using the map-based method + return ImprintRecord.serialize(schemaId, directoryMap, payloadView); } // Internal helper methods From 6c162710002df5de3426d1f9fad72e8811fc0540 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sat, 7 Jun 2025 14:53:46 -0400 Subject: [PATCH 36/53] Calculate estimated size as fields are added instead of deferring it --- .../imprint/core/ImprintRecordBuilder.java | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 4a95898..93abc58 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -4,6 +4,7 @@ import com.imprint.error.ImprintException; import com.imprint.types.MapKey; import com.imprint.types.Value; +import lombok.SneakyThrows; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -40,6 +41,7 @@ public final class ImprintRecordBuilder { private final SchemaId schemaId; private final Map fields = new TreeMap<>(); + private int estimatedPayloadSize = 0; ImprintRecordBuilder(SchemaId schemaId) { this.schemaId = Objects.requireNonNull(schemaId, "SchemaId cannot be null"); @@ -200,7 +202,14 @@ public ByteBuffer buildToBuffer() throws ImprintException { */ private ImprintRecordBuilder addField(int id, Value value) { Objects.requireNonNull(value, "Value cannot be null - use nullField() for explicit null values"); + + // Subtract the size of the old value if it's being replaced. + var oldValue = fields.get(id); + if (oldValue != null) + estimatedPayloadSize -= estimateValueSize(oldValue); + fields.put(id, value); + estimatedPayloadSize += estimateValueSize(value); return this; } @@ -287,14 +296,9 @@ public String toString() { return String.format("ImprintRecordBuilder{schemaId=%s, fields=%d}", schemaId, fields.size()); } - private int estimatePayloadSize() throws ImprintException { - // More accurate estimation to reduce allocations - int estimatedSize = 0; - for (var value : fields.values()) { - estimatedSize += estimateValueSize(value); - } - // Add 25% buffer to reduce reallocations - return Math.max(estimatedSize + (estimatedSize / 4), fields.size() * 16); + private int estimatePayloadSize() { + // Add 25% buffer to reduce reallocations and handle VarInt encoding fluctuations. + return Math.max(estimatedPayloadSize + (estimatedPayloadSize / 4), fields.size() * 16); } /** @@ -305,7 +309,8 @@ private int estimatePayloadSize() throws ImprintException { * @param value the value to estimate size for * @return estimated size in bytes including type-specific overhead */ - private int estimateValueSize(Value value) throws ImprintException { + @SneakyThrows + private int estimateValueSize(Value value) { // Use TypeHandler for simple types switch (value.getTypeCode()) { case NULL: From 338c079d70633f4a7cdff58f20bf9b2fd799992c Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sun, 8 Jun 2025 19:12:53 -0400 Subject: [PATCH 37/53] Use idiomatic Directory interface and optimize builder --- .../benchmark/SerializationBenchmark.java | 111 ++++---- .../java/com/imprint/core/DirectoryEntry.java | 33 +-- .../java/com/imprint/core/ImprintBuffers.java | 37 +-- .../com/imprint/core/ImprintOperations.java | 4 +- .../java/com/imprint/core/ImprintRecord.java | 9 +- .../imprint/core/ImprintRecordBuilder.java | 97 +++++-- .../java/com/imprint/core/ImprintStream.java | 257 ++++++++++++++++++ .../imprint/core/SimpleDirectoryEntry.java | 22 ++ 8 files changed, 449 insertions(+), 121 deletions(-) create mode 100644 src/main/java/com/imprint/core/ImprintStream.java create mode 100644 src/main/java/com/imprint/core/SimpleDirectoryEntry.java diff --git a/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java b/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java index 11e2b29..51c9f48 100644 --- a/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java @@ -1,11 +1,16 @@ package com.imprint.benchmark; import com.imprint.core.ImprintRecord; +import com.imprint.core.ImprintRecordBuilder; import com.imprint.core.SchemaId; import com.imprint.types.MapKey; import com.imprint.types.Value; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -20,7 +25,7 @@ @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) @Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 7, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(1) public class SerializationBenchmark { @@ -34,10 +39,10 @@ public class SerializationBenchmark { @Setup public void setup() throws Exception { - // Create test records of varying sizes - smallRecord = createSmallRecord(); - mediumRecord = createMediumRecord(); - largeRecord = createLargeRecord(); + // Create test records of varying sizes for deserialization benchmarks + smallRecord = createSmallRecord().build(); + mediumRecord = createMediumRecord().build(); + largeRecord = createLargeRecord().build(); // Pre-serialize for deserialization benchmarks smallRecordBytes = smallRecord.serializeToBuffer(); @@ -48,20 +53,20 @@ public void setup() throws Exception { // ===== SERIALIZATION BENCHMARKS ===== @Benchmark - public void serializeSmallRecord(Blackhole bh) { - ByteBuffer result = smallRecord.serializeToBuffer(); + public void buildAndSerializeSmallRecord(Blackhole bh) throws Exception { + ByteBuffer result = createSmallRecord().buildToBuffer(); bh.consume(result); } @Benchmark - public void serializeMediumRecord(Blackhole bh) { - ByteBuffer result = mediumRecord.serializeToBuffer(); + public void buildAndSerializeMediumRecord(Blackhole bh) throws Exception { + ByteBuffer result = createMediumRecord().buildToBuffer(); bh.consume(result); } @Benchmark - public void serializeLargeRecord(Blackhole bh) { - ByteBuffer result = largeRecord.serializeToBuffer(); + public void buildAndSerializeLargeRecord(Blackhole bh) throws Exception { + ByteBuffer result = createLargeRecord().buildToBuffer(); bh.consume(result); } @@ -87,77 +92,89 @@ public void deserializeLargeRecord(Blackhole bh) throws Exception { // ===== HELPER METHODS ===== - private ImprintRecord createSmallRecord() throws Exception { + private ImprintRecordBuilder createSmallRecord() throws Exception { // Small record: ~10 fields, simple types return ImprintRecord.builder(new SchemaId(1, 0x12345678)) - .field(1, Value.fromString("Product")) - .field(2, Value.fromInt32(12345)) - .field(3, Value.fromFloat64(99.99)) - .field(4, Value.fromBoolean(true)) - .field(5, Value.fromString("Electronics")) - .build(); + .field(1, "Product") + .field(2, 12345) + .field(3, 99.99) + .field(4, true) + .field(5, "Electronics"); } - private ImprintRecord createMediumRecord() throws Exception { + private ImprintRecordBuilder createMediumRecord() throws Exception { var builder = ImprintRecord.builder(new SchemaId(1, 0x12345678)); // Medium record: ~50 fields, mixed types including arrays - builder.field(1, Value.fromString("Product")); - builder.field(2, Value.fromInt32(12345)); - builder.field(3, Value.fromFloat64(99.99)); - builder.field(4, Value.fromBoolean(true)); - builder.field(5, Value.fromString("Electronics")); + builder.field(1, "Product"); + builder.field(2, 12345); + builder.field(3, 99.99); + builder.field(4, true); + builder.field(5, "Electronics"); // Add array field var tags = Arrays.asList( - Value.fromString("popular"), - Value.fromString("trending"), - Value.fromString("bestseller") + "popular", + "trending", + "bestseller" ); - builder.field(6, Value.fromArray(tags)); + builder.field(6, tags); // Add map field (all string values for consistency) - var metadata = new HashMap(); - metadata.put(MapKey.fromString("manufacturer"), Value.fromString("TechCorp")); - metadata.put(MapKey.fromString("model"), Value.fromString("TC-2024")); - metadata.put(MapKey.fromString("year"), Value.fromString("2024")); - builder.field(7, Value.fromMap(metadata)); + var metadata = new HashMap(); + metadata.put("manufacturer", "TechCorp"); + metadata.put("model", "TC-2024"); + metadata.put("year", "2024"); + builder.field(7, metadata); // Add more fields for medium size for (int i = 8; i <= 50; i++) { - builder.field(i, Value.fromString("field_" + i + "_value")); + builder.field(i, "field_" + i + "_value"); } - return builder.build(); + return builder; } - private ImprintRecord createLargeRecord() throws Exception { + private ImprintRecordBuilder createLargeRecord() throws Exception { var builder = ImprintRecord.builder(new SchemaId(1, 0x12345678)); // Large record: ~200 fields, complex nested structures - builder.field(1, Value.fromString("LargeProduct")); - builder.field(2, Value.fromInt32(12345)); - builder.field(3, Value.fromFloat64(99.99)); + builder.field(1, "LargeProduct"); + builder.field(2, 12345); + builder.field(3, 99.99); // Large array - var largeArray = new ArrayList(); + var largeArray = new ArrayList(); for (int i = 0; i < 100; i++) { - largeArray.add(Value.fromString("item_" + i)); + largeArray.add("item_" + i); } - builder.field(4, Value.fromArray(largeArray)); + builder.field(4, largeArray); // Large map - var largeMap = new HashMap(); + var largeMap = new HashMap(); for (int i = 0; i < 50; i++) { - largeMap.put(MapKey.fromString("key_" + i), Value.fromString("value_" + i)); + largeMap.put("key_" + i, "value_" + i); } - builder.field(5, Value.fromMap(largeMap)); + builder.field(5, largeMap); // Many string fields for (int i = 6; i <= 200; i++) { - builder.field(i, Value.fromString("this_is_a_longer_field_value_for_field_" + i + "_to_increase_record_size")); + builder.field(i, "this_is_a_longer_field_value_for_field_" + i + "_to_increase_record_size"); } - return builder.build(); + return builder; + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(SerializationBenchmark.class.getSimpleName()) + .forks(1) + .warmupIterations(5) + .measurementIterations(5) + .mode(Mode.AverageTime) + .timeUnit(TimeUnit.NANOSECONDS) + .build(); + + new Runner(opt).run(); } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/DirectoryEntry.java b/src/main/java/com/imprint/core/DirectoryEntry.java index 9556256..0b98433 100644 --- a/src/main/java/com/imprint/core/DirectoryEntry.java +++ b/src/main/java/com/imprint/core/DirectoryEntry.java @@ -1,23 +1,24 @@ package com.imprint.core; import com.imprint.types.TypeCode; -import lombok.Value; - -import java.util.Objects; /** - * A directory entry describing a single field in an Imprint record. - * Each entry has a fixed size of 7 bytes. + * Represents the common interface for a directory entry in an Imprint record. + * A directory entry provides metadata about a field, such as its ID, type, and location in the payload. */ -@Value -public class DirectoryEntry { - short id; - TypeCode typeCode; - int offset; - - public DirectoryEntry(int id, TypeCode typeCode, int offset) { - this.id = (short) id; - this.typeCode = Objects.requireNonNull(typeCode, "TypeCode cannot be null"); - this.offset = offset; - } +public interface DirectoryEntry { + /** + * @return The field's unique identifier. + */ + short getId(); + + /** + * @return The {@link TypeCode} of the field's value. + */ + TypeCode getTypeCode(); + + /** + * @return The starting position (offset) of the field's data within the payload buffer. + */ + int getOffset(); } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintBuffers.java b/src/main/java/com/imprint/core/ImprintBuffers.java index 6a294c3..24ec41d 100644 --- a/src/main/java/com/imprint/core/ImprintBuffers.java +++ b/src/main/java/com/imprint/core/ImprintBuffers.java @@ -10,6 +10,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import java.util.Objects; import java.util.TreeMap; @@ -44,24 +45,24 @@ public ImprintBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { } /** - * Creates buffers from pre-parsed directory (used during construction). + * Creates buffers from a pre-parsed directory (used during construction). + * This constructor is used by the ImprintRecordBuilder path. It creates + * a serialized directory buffer but defers parsing it into a map until it's actually needed. */ - public ImprintBuffers(List directory, ByteBuffer payload) { - this.parsedDirectory = createDirectoryMap(Objects.requireNonNull(directory)); - this.directoryParsed = true; + public ImprintBuffers(Collection directory, ByteBuffer payload) { + this.directoryBuffer = ImprintBuffers.createDirectoryBuffer(Objects.requireNonNull(directory)); this.payload = payload.asReadOnlyBuffer(); - this.directoryBuffer = ImprintBuffers.createDirectoryBuffer(directory); } /** * Creates buffers from a pre-parsed and sorted directory map (used by ImprintRecordBuilder). * This is an optimized path that avoids creating an intermediate List-to-Map conversion. + * This constructor is used by the ImprintRecordBuilder path. It creates + * a serialized directory buffer but defers parsing it into a map until it's actually needed. */ - public ImprintBuffers(TreeMap directoryMap, ByteBuffer payload) { - this.parsedDirectory = Objects.requireNonNull(directoryMap); - this.directoryParsed = true; + public ImprintBuffers(TreeMap directoryMap, ByteBuffer payload) { + this.directoryBuffer = ImprintBuffers.createDirectoryBufferFromMap(Objects.requireNonNull(directoryMap)); this.payload = payload.asReadOnlyBuffer(); - this.directoryBuffer = ImprintBuffers.createDirectoryBufferFromMap(directoryMap); } /** @@ -260,21 +261,10 @@ private void ensureDirectoryParsed() { } } - /** - * Create a TreeMap from directory list field lookup with ordering. - */ - private TreeMap createDirectoryMap(List directory) { - var map = new TreeMap(); - for (var entry : directory) { - map.put((int)entry.getId(), entry); - } - return map; - } - /** * Create directory buffer from parsed entries. */ - static ByteBuffer createDirectoryBuffer(List directory) { + static ByteBuffer createDirectoryBuffer(Collection directory) { try { int bufferSize = VarInt.encodedLength(directory.size()) + (directory.size() * Constants.DIR_ENTRY_BYTES); var buffer = ByteBuffer.allocate(bufferSize); @@ -294,14 +284,13 @@ static ByteBuffer createDirectoryBuffer(List directory) { /** * Create directory buffer from a pre-sorted map of entries. */ - static ByteBuffer createDirectoryBufferFromMap(TreeMap directoryMap) { + static ByteBuffer createDirectoryBufferFromMap(TreeMap directoryMap) { try { int bufferSize = VarInt.encodedLength(directoryMap.size()) + (directoryMap.size() * Constants.DIR_ENTRY_BYTES); var buffer = ByteBuffer.allocate(bufferSize); buffer.order(ByteOrder.LITTLE_ENDIAN); VarInt.encode(directoryMap.size(), buffer); - // TreeMap.values() returns a collection view, iteration is ordered and efficient. for (var entry : directoryMap.values()) serializeDirectoryEntry(entry, buffer); @@ -334,6 +323,6 @@ private DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throws Impri var typeCode = TypeCode.fromByte(buffer.get()); int offset = buffer.getInt(); - return new DirectoryEntry(id, typeCode, offset); + return new SimpleDirectoryEntry(id, typeCode, offset); } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintOperations.java b/src/main/java/com/imprint/core/ImprintOperations.java index 4e60ebf..c4e8c66 100644 --- a/src/main/java/com/imprint/core/ImprintOperations.java +++ b/src/main/java/com/imprint/core/ImprintOperations.java @@ -53,7 +53,7 @@ public static ImprintRecord project(ImprintRecord record, int... fieldIds) { record.getBuffers().getPayload().limit(); int fieldLength = nextOffset - field.getOffset(); - newDirectory.add(new DirectoryEntry(field.getId(), field.getTypeCode(), currentOffset)); + newDirectory.add(new SimpleDirectoryEntry(field.getId(), field.getTypeCode(), currentOffset)); ranges.add(new FieldRange(field.getOffset(), nextOffset)); currentOffset += fieldLength; @@ -133,7 +133,7 @@ public static ImprintRecord merge(ImprintRecord first, ImprintRecord second) thr throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Failed to get raw bytes for field " + currentEntry.getId()); // Add adjusted directory entry - var newEntry = new DirectoryEntry(currentEntry.getId(), currentEntry.getTypeCode(), currentOffset); + var newEntry = new SimpleDirectoryEntry(currentEntry.getId(), currentEntry.getTypeCode(), currentOffset); newDirectory.add(newEntry); // Collect payload chunk diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index 804642b..385e569 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -11,6 +11,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Objects; @@ -36,7 +37,7 @@ private ImprintRecord(Header header, ImprintBuffers buffers) { /** * Creates a record from pre-parsed directory (used by ImprintWriter). */ - ImprintRecord(Header header, List directory, ByteBuffer payload) { + ImprintRecord(Header header, Collection directory, ByteBuffer payload) { this.header = Objects.requireNonNull(header, "Header cannot be null"); this.buffers = new ImprintBuffers(directory, payload); } @@ -44,7 +45,7 @@ private ImprintRecord(Header header, ImprintBuffers buffers) { /** * Creates a record from a pre-parsed and sorted directory map (used by ImprintRecordBuilder). */ - ImprintRecord(Header header, TreeMap directoryMap, ByteBuffer payload) { + ImprintRecord(Header header, TreeMap directoryMap, ByteBuffer payload) { this.header = Objects.requireNonNull(header, "Header cannot be null"); this.buffers = new ImprintBuffers(directoryMap, payload); } @@ -197,7 +198,7 @@ public int estimateSerializedSize() { * @param payload The ByteBuffer containing all field data concatenated. * @return A read-only ByteBuffer with the complete serialized record. */ - public static ByteBuffer serialize(SchemaId schemaId, List directory, ByteBuffer payload) { + public static ByteBuffer serialize(SchemaId schemaId, Collection directory, ByteBuffer payload) { var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); var directoryBuffer = ImprintBuffers.createDirectoryBuffer(directory); @@ -223,7 +224,7 @@ public static ByteBuffer serialize(SchemaId schemaId, List direc * @param payload The ByteBuffer containing all field data concatenated. * @return A read-only ByteBuffer with the complete serialized record. */ - public static ByteBuffer serialize(SchemaId schemaId, TreeMap directoryMap, ByteBuffer payload) { + public static ByteBuffer serialize(SchemaId schemaId, TreeMap directoryMap, ByteBuffer payload) { var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); var directoryBuffer = ImprintBuffers.createDirectoryBufferFromMap(directoryMap); diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 93abc58..58fbc63 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -3,6 +3,7 @@ import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; import com.imprint.types.MapKey; +import com.imprint.types.TypeCode; import com.imprint.types.Value; import lombok.SneakyThrows; @@ -40,7 +41,7 @@ @SuppressWarnings("unused") public final class ImprintRecordBuilder { private final SchemaId schemaId; - private final Map fields = new TreeMap<>(); + private final Map fields = new TreeMap<>(); private int estimatedPayloadSize = 0; ImprintRecordBuilder(SchemaId schemaId) { @@ -145,16 +146,12 @@ public Set fieldIds() { // Build the final record public ImprintRecord build() throws ImprintException { - var directoryMap = new TreeMap(); var payloadBuffer = ByteBuffer.allocate(estimatePayloadSize()); payloadBuffer.order(ByteOrder.LITTLE_ENDIAN); - for (var entry : fields.entrySet()) { - int fieldId = entry.getKey(); - var value = entry.getValue(); - - directoryMap.put(fieldId, new com.imprint.core.DirectoryEntry((short)fieldId, value.getTypeCode(), payloadBuffer.position())); - serializeValue(value, payloadBuffer); + for (var entry : fields.values()) { + entry.setOffset(payloadBuffer.position()); + serializeValue(entry.getValue(), payloadBuffer); } // Create read-only view of the payload without copying @@ -162,7 +159,7 @@ public ImprintRecord build() throws ImprintException { var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); var header = new com.imprint.core.Header(new com.imprint.core.Flags((byte) 0), schemaId, payloadView.remaining()); - return new ImprintRecord(header, directoryMap, payloadView); + return new ImprintRecord(header, new ArrayList<>(fields.values()), payloadView); } /** @@ -173,22 +170,19 @@ public ImprintRecord build() throws ImprintException { * @throws ImprintException if serialization fails. */ public ByteBuffer buildToBuffer() throws ImprintException { - // 1. Prepare payload and directory map - var directoryMap = new TreeMap(); + // 1. Prepare payload and directory var payloadBuffer = ByteBuffer.allocate(estimatePayloadSize()); payloadBuffer.order(ByteOrder.LITTLE_ENDIAN); - for (var entry : fields.entrySet()) { - int fieldId = entry.getKey(); - var value = entry.getValue(); - directoryMap.put(fieldId, new com.imprint.core.DirectoryEntry((short) fieldId, value.getTypeCode(), payloadBuffer.position())); - serializeValue(value, payloadBuffer); + for (var entry : fields.values()) { + entry.setOffset(payloadBuffer.position()); + serializeValue(entry.getValue(), payloadBuffer); } payloadBuffer.flip(); var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); // 2. Serialize directly to the final buffer format using the map-based method - return ImprintRecord.serialize(schemaId, directoryMap, payloadView); + return ImprintRecord.serialize(schemaId, new ArrayList<>(fields.values()), payloadView); } // Internal helper methods @@ -202,14 +196,15 @@ public ByteBuffer buildToBuffer() throws ImprintException { */ private ImprintRecordBuilder addField(int id, Value value) { Objects.requireNonNull(value, "Value cannot be null - use nullField() for explicit null values"); + var newEntry = new BuilderEntry((short) id, value); // Subtract the size of the old value if it's being replaced. - var oldValue = fields.get(id); - if (oldValue != null) - estimatedPayloadSize -= estimateValueSize(oldValue); + var oldEntry = fields.get(id); + if (oldEntry != null) + estimatedPayloadSize -= estimateValueSize(oldEntry.getValue()); - fields.put(id, value); - estimatedPayloadSize += estimateValueSize(value); + fields.put(id, newEntry); + estimatedPayloadSize += estimateValueSize(newEntry.getValue()); return this; } @@ -269,8 +264,7 @@ private Value convertToValue(Object obj) { return Value.fromRow((ImprintRecord) obj); } - throw new IllegalArgumentException("Cannot convert " + obj.getClass().getSimpleName() + - " to Imprint Value. Supported types: boolean, int, long, float, double, String, byte[], List, Map, ImprintRecord"); + throw new IllegalArgumentException("Unsupported type for auto-conversion: " + obj.getClass().getName()); } private MapKey convertToMapKey(Object obj) { @@ -287,13 +281,15 @@ private MapKey convertToMapKey(Object obj) { return MapKey.fromBytes((byte[]) obj); } - throw new IllegalArgumentException("Invalid map key type: " + obj.getClass().getSimpleName() + - ". Map keys must be int, long, String, or byte[]"); + throw new IllegalArgumentException("Unsupported map key type: " + obj.getClass().getName()); } @Override public String toString() { - return String.format("ImprintRecordBuilder{schemaId=%s, fields=%d}", schemaId, fields.size()); + return "ImprintRecordBuilder{" + + "schemaId=" + schemaId + + ", fields=" + fields + + '}'; } private int estimatePayloadSize() { @@ -334,8 +330,8 @@ private int estimateValueSize(Value value) { } } - private void serializeValue(Value value, ByteBuffer buffer) throws ImprintException { + // Use TypeHandler for simple types switch (value.getTypeCode()) { case NULL: case BOOL: @@ -360,4 +356,49 @@ private void serializeValue(Value value, ByteBuffer buffer) throws ImprintExcept throw new ImprintException(com.imprint.error.ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); } } + + // Private inner class to hold field data during building + private static class BuilderEntry implements DirectoryEntry { + private final short id; + private final Value value; + private int offset; + + BuilderEntry(short id, Value value) { + this.id = id; + this.value = value; + this.offset = -1; // Initially unknown + } + + @Override + public short getId() { + return id; + } + + @Override + public TypeCode getTypeCode() { + return value.getTypeCode(); + } + + @Override + public int getOffset() { + return offset; + } + + public void setOffset(int offset) { + this.offset = offset; + } + + public Value getValue() { + return value; + } + + @Override + public String toString() { + return "BuilderEntry{" + + "id=" + id + + ", value=" + value + + ", offset=" + offset + + '}'; + } + } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintStream.java b/src/main/java/com/imprint/core/ImprintStream.java new file mode 100644 index 0000000..c218318 --- /dev/null +++ b/src/main/java/com/imprint/core/ImprintStream.java @@ -0,0 +1,257 @@ +package com.imprint.core; + +import com.imprint.error.ImprintException; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.TreeMap; + +/** + * Provides a framework for lazy, zero-copy transformations of Imprint records. + *

+ * Operations like {@link #project(int...)} and {@link #mergeWith(ImprintRecord)} are + * intermediate and do not create new records. They build up a plan of operations + * that is executed only when a terminal operation like {@link #toRecord()} is called. + */ +public final class ImprintStream { + + private final Plan plan; + + private ImprintStream(Plan plan) { + this.plan = Objects.requireNonNull(plan); + } + + // ========== PLAN DATA STRUCTURES ========== + + /** + * The internal representation of the transformation plan. + * This is a linked-list style structure where each step points to the previous one. + */ + private interface Plan { + // Marker interface for the plan steps + } + + /** + * The starting point of a plan, containing the initial source record. + */ + private static final class SourcePlan implements Plan { + final ImprintRecord source; + + private SourcePlan(ImprintRecord source) { + this.source = Objects.requireNonNull(source, "Source record cannot be null."); + } + } + + /** + * A plan step representing a 'project' operation. + */ + private static final class ProjectPlan implements Plan { + final Plan previous; + final Set fieldIds; + + private ProjectPlan(Plan previous, int... fieldIds) { + this.previous = Objects.requireNonNull(previous); + this.fieldIds = new HashSet<>(); + for (int id : fieldIds) { + this.fieldIds.add(id); + } + } + } + + /** + * A plan step representing a 'merge' operation. + */ + private static final class MergePlan implements Plan { + final Plan previous; + final List others; + + private MergePlan(Plan previous, List others) { + this.previous = Objects.requireNonNull(previous); + this.others = Objects.requireNonNull(others); + } + } + + // ========== PUBLIC API ========== + + /** + * Creates a new transformation stream starting with a source record. + * + * @param source The initial record for the transformation. + * @return A new ImprintStream. + */ + public static ImprintStream of(ImprintRecord source) { + return new ImprintStream(new SourcePlan(source)); + } + + /** + * An intermediate operation that defines a projection on the stream. + * This is a lazy operation; the projection is only performed when a terminal + * operation is called. + * + * @param fieldIds The field IDs to keep in the final record. + * @return A new ImprintStream with the projection step added to its plan. + */ + public ImprintStream project(int... fieldIds) { + return new ImprintStream(new ProjectPlan(this.plan, fieldIds)); + } + + /** + * An intermediate operation that defines a merge on the stream. + * The record from this stream (the "left" side) takes precedence in case + * of overlapping field IDs. + *

+ * This is a lazy operation; the merge is only performed when a terminal + * operation is called. + * + * @param other The record to merge with this stream's record. + * @return A new ImprintStream with the merge step added to its plan. + */ + public ImprintStream mergeWith(ImprintRecord other) { + return new ImprintStream(new MergePlan(this.plan, Collections.singletonList(other))); + } + + /** + * A terminal operation that executes the defined transformation plan and + * constructs a new, consolidated ImprintRecord. + * + * @return a new ImprintRecord representing the result of the stream operations. + */ + public ImprintRecord toRecord() { + return new Evaluator(this.plan).execute(); + } + + // ========== EVALUATOR ========== + + /** + * The engine that walks the plan and executes the transformation. + */ + private static final class Evaluator { + private final Plan plan; + + private Evaluator(Plan plan) { + this.plan = plan; + } + + public ImprintRecord execute() { + // Unwind the plan's linked-list structure into a forward-order list of operations. + var planList = new ArrayList(); + var current = plan; + while (current != null) { + planList.add(current); + if (current instanceof ProjectPlan) { + current = ((ProjectPlan) current).previous; + } else if (current instanceof MergePlan) { + current = ((MergePlan) current).previous; + } else if (current instanceof SourcePlan) { + current = null; // End of the chain + } + } + Collections.reverse(planList); + + // This map holds the set of fields being built, sorted by ID. + var resolvedFields = new TreeMap(); + + // Iteratively evaluate the plan step-by-step. + for (var planStep : planList) { + if (planStep instanceof SourcePlan) { + var sourcePlan = (SourcePlan) planStep; + for (var entry : sourcePlan.source.getDirectory()) { + resolvedFields.put((int) entry.getId(), new FieldSource(sourcePlan.source, entry)); + } + } else if (planStep instanceof ProjectPlan) { + var projectPlan = (ProjectPlan) planStep; + // Apply projection to the current state of resolved fields. + resolvedFields.keySet().retainAll(projectPlan.fieldIds); + } else if (planStep instanceof MergePlan) { + var mergePlan = (MergePlan) planStep; + // Add fields from other records if they aren't already in the map. + for (var otherRecord : mergePlan.others) { + for (var entry : otherRecord.getDirectory()) { + resolvedFields.putIfAbsent((int) entry.getId(), new FieldSource(otherRecord, entry)); + } + } + } + } + + // Once the final field set is determined, build the record. + return build(resolvedFields); + } + + private ImprintRecord build(TreeMap finalFields) { + if (finalFields.isEmpty()) { + // To-Do: Need a way to get the schemaId for an empty record. + // For now, returning null or using a default. + try { + return ImprintRecord.builder(new SchemaId(0, 0)).build(); + } catch (ImprintException e) { + // This should not happen when building an empty record. + throw new IllegalStateException("Failed to build empty record.", e); + } + } + + // Determine the schema from the first field's source record. + SchemaId schemaId = finalFields.firstEntry().getValue().record.getHeader().getSchemaId(); + + // 1. Calculate final payload size and prepare directory. + int payloadSize = 0; + var newDirectoryMap = new TreeMap(); + for (var entry : finalFields.entrySet()) { + var fieldSource = entry.getValue(); + int fieldLength = fieldSource.getLength(); + + newDirectoryMap.put(entry.getKey(), new SimpleDirectoryEntry(fieldSource.entry.getId(), fieldSource.entry.getTypeCode(), payloadSize)); + payloadSize += fieldLength; + } + + // 2. Allocate buffer and copy data. + var payload = ByteBuffer.allocate(payloadSize).order(ByteOrder.LITTLE_ENDIAN); + for (var fieldSource : finalFields.values()) { + try { + ByteBuffer sourceData = fieldSource.record.getRawBytes(fieldSource.entry.getId()); + if (sourceData != null) { + payload.put(sourceData.duplicate()); + } + } catch (Exception e) { + // This indicates a data corruption or bug, shouldn't happen in normal operation. + throw new IllegalStateException("Failed to copy data for field " + fieldSource.entry.getId(), e); + } + } + payload.flip(); + + // 3. Construct the final record. + var newHeader = new Header(new Flags((byte) 0), schemaId, payload.remaining()); + return new ImprintRecord(newHeader, newDirectoryMap, payload.asReadOnlyBuffer()); + } + + /** + * A helper class to track the source record and directory entry for a field. + */ + private static final class FieldSource { + final ImprintRecord record; + final DirectoryEntry entry; + + FieldSource(ImprintRecord record, DirectoryEntry entry) { + this.record = record; + this.entry = entry; + } + + int getLength() { + try { + ByteBuffer buf = record.getRawBytes(entry.getId()); + return buf != null ? buf.remaining() : 0; + } catch (Exception e) { + return 0; + } + } + } + } +} \ No newline at end of file diff --git a/src/main/java/com/imprint/core/SimpleDirectoryEntry.java b/src/main/java/com/imprint/core/SimpleDirectoryEntry.java new file mode 100644 index 0000000..843aad4 --- /dev/null +++ b/src/main/java/com/imprint/core/SimpleDirectoryEntry.java @@ -0,0 +1,22 @@ +package com.imprint.core; + +import com.imprint.types.TypeCode; +import lombok.Value; + +import java.util.Objects; + +/** + * A concrete, immutable directory entry. + */ +@Value +public class SimpleDirectoryEntry implements DirectoryEntry { + short id; + TypeCode typeCode; + int offset; + + public SimpleDirectoryEntry(short id, TypeCode typeCode, int offset) { + this.id = id; + this.typeCode = Objects.requireNonNull(typeCode, "TypeCode cannot be null"); + this.offset = offset; + } +} \ No newline at end of file From c81646a892b08f1417c1784cef370f9faf814cf8 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sun, 8 Jun 2025 19:24:45 -0400 Subject: [PATCH 38/53] add large object profiling and refactor tests --- .../com/imprint/profile/ProfilerTest.java | 293 +++++++++++------- 1 file changed, 179 insertions(+), 114 deletions(-) diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index 1ea752d..3804722 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -7,6 +7,7 @@ import org.junit.jupiter.api.Test; import java.util.Random; +import java.util.UUID; /** * A test designed for profiling hotspots during development. @@ -26,7 +27,7 @@ * - String operations and UTF-8 encoding * - ByteBuffer operations */ -//@Disabled("Enable manually for profiling") +@Disabled("Enable manually for profiling") public class ProfilerTest { private static final int ITERATIONS = 1_000_000; @@ -34,145 +35,171 @@ public class ProfilerTest { @Test void profileFieldAccess() throws Exception { - System.out.println("Starting profiler test - attach profiler now..."); - Thread.sleep(5000); // Give time to attach profiler - - // Create a representative record var record = createTestRecord(); - - System.out.println("Beginning field access profiling..."); - long start = System.nanoTime(); - - // Simulate real-world access patterns - Random random = new Random(42); - int hits = 0; - - for (int i = 0; i < ITERATIONS; i++) { - // Random field access (hotspot) - int fieldId = random.nextInt(RECORD_SIZE) + 1; - var value = record.getValue(fieldId); - if (value != null) { - hits++; - - // Trigger string decoding (potential hotspot) - if (value.getTypeCode() == com.imprint.types.TypeCode.STRING) { - if (value instanceof Value.StringBufferValue) { - ((Value.StringBufferValue) value).getValue(); - } else { - ((Value.StringValue) value).getValue(); + + runProfileTest("Field Access", () -> { + // Simulate real-world access patterns + Random random = new Random(42); + int hits = 0; + + for (int i = 0; i < ITERATIONS; i++) { + // Random field access (hotspot) + int fieldId = random.nextInt(RECORD_SIZE) + 1; + var value = record.getValue(fieldId); + if (value != null) { + hits++; + + // Trigger string decoding (potential hotspot) + if (value.getTypeCode() == com.imprint.types.TypeCode.STRING) { + if (value instanceof Value.StringBufferValue) { + ((Value.StringBufferValue) value).getValue(); + } else { + ((Value.StringValue) value).getValue(); + } } } + + // Some raw access (zero-copy path) + if (i % 10 == 0) { + record.getRawBytes(fieldId); + } } - - // Some raw access (zero-copy path) - if (i % 10 == 0) { - record.getRawBytes(fieldId); - } - } - - long duration = System.nanoTime() - start; - System.out.printf("Completed %,d field accesses in %.2f ms (avg: %.1f ns/op, hits: %d)%n", - ITERATIONS, duration / 1_000_000.0, (double) duration / ITERATIONS, hits); + }); } @Test void profileSerialization() throws Exception { - System.out.println("Starting serialization profiler test..."); - Thread.sleep(3000); - var schemaId = new SchemaId(1, 0x12345678); - System.out.println("Beginning serialization profiling..."); - long start = System.nanoTime(); - - // Create and serialize many records (allocation hotspot) - for (int i = 0; i < 500_000; i++) { - var builder = ImprintRecord.builder(schemaId); - - // Add various field types - builder.field(1, Value.fromInt32(i)) - .field(2, Value.fromString("test-string-" + i)) - .field(3, Value.fromFloat64(i * 3.14159)) - .field(4, Value.fromBytes(("bytes-" + i).getBytes())); - - var record = builder.build(); - var serialized = record.serializeToBuffer(); // Potential hotspot - - // Trigger some deserialization - if (i % 1000 == 0) { - var deserialized = ImprintRecord.deserialize(serialized); - deserialized.getValue(2); // String decoding hotspot + runProfileTest("Serialization (Standard)", () -> { + // Create and serialize many records (allocation hotspot) + for (int i = 0; i < 500_000; i++) { + var builder = ImprintRecord.builder(schemaId); + + // Add various field types + builder.field(1, Value.fromInt32(i)) + .field(2, Value.fromString("test-string-" + i)) + .field(3, Value.fromFloat64(i * 3.14159)) + .field(4, Value.fromBytes(("bytes-" + i).getBytes())); + + var record = builder.build(); + var serialized = record.serializeToBuffer(); // Potential hotspot + + // Trigger some deserialization + if (i % 1000 == 0) { + var deserialized = ImprintRecord.deserialize(serialized); + deserialized.getValue(2); // String decoding hotspot + } } - } - - long duration = System.nanoTime() - start; - System.out.printf("Completed serialization test in %.2f ms%n", duration / 1_000_000.0); + }); + } + + @Test + void profileLargeObjectSerialization() throws Exception { + var schemaId = new SchemaId(3, 0xabcdef12); + var largeRecord = createVeryLargeRecord(); // A single large record to be re-serialized + + runProfileTest("Serialization (Large Object)", () -> { + // Re-serialize the same large object to focus on serialization logic + // rather than object creation. + for (int i = 0; i < 100_000; i++) { + var serialized = largeRecord.serializeToBuffer(); // Hotspot + + if (i % 1000 == 0) { + var deserialized = ImprintRecord.deserialize(serialized); + deserialized.getValue(10); // Access a field to ensure it works + } + } + }); } - @Test + @Test void profileProjection() throws Exception { - System.out.println("Starting projection profiler test..."); - Thread.sleep(3000); - var record = createLargeRecord(); - - System.out.println("Beginning projection profiling..."); - long start = System.nanoTime(); - - // Simulate analytical workload - project subset of fields repeatedly - for (int i = 0; i < 50_000; i++) { - // Project 10 fields out of 100 (common analytical pattern) - for (int fieldId = 1; fieldId <= 10; fieldId++) { - var value = record.getValue(fieldId); - if (value != null) { - // Force materialization of string values - if (value.getTypeCode() == com.imprint.types.TypeCode.STRING) { - if (value instanceof Value.StringBufferValue) { - ((Value.StringBufferValue) value).getValue(); + + runProfileTest("Projection", () -> { + // Simulate analytical workload - project subset of fields repeatedly + for (int i = 0; i < 50_000; i++) { + // Project 10 fields out of 100 (common analytical pattern) + for (int fieldId = 1; fieldId <= 10; fieldId++) { + var value = record.getValue(fieldId); + if (value != null) { + // Force materialization of string values + if (value.getTypeCode() == com.imprint.types.TypeCode.STRING) { + if (value instanceof Value.StringBufferValue) { + ((Value.StringBufferValue) value).getValue(); + } } } } } - } - - long duration = System.nanoTime() - start; - System.out.printf("Completed projection test in %.2f ms%n", duration / 1_000_000.0); + }); } @Test void profileMemoryAllocation() throws Exception { - System.out.println("Starting allocation profiler test..."); - Thread.sleep(3000); - - System.out.println("Beginning allocation profiling - watch for GC events..."); - - // Force allocation pressure to reveal GC hotspots - for (int batch = 0; batch < 1000; batch++) { - for (int i = 0; i < 1000; i++) { - var schemaId = new SchemaId(batch, i); - var builder = ImprintRecord.builder(schemaId); - - // Create strings of varying sizes (allocation pressure) - builder.field(1, Value.fromString("small")) - .field(2, Value.fromString("medium-length-string-" + i)) - .field(3, Value.fromString("very-long-string-that-will-cause-more-allocation-pressure-" + batch + "-" + i)) - .field(4, Value.fromBytes(new byte[100 + i % 1000])); // Varying byte arrays - - var record = builder.build(); - - // Some deserialization to trigger string decoding allocations - record.getValue(2); - record.getValue(3); - } - - if (batch % 100 == 0) { - System.out.printf("Completed batch %d/1000%n", batch); + runProfileTest("Memory Allocation", () -> { + // Force allocation pressure to reveal GC hotspots + for (int batch = 0; batch < 1000; batch++) { + for (int i = 0; i < 1000; i++) { + var schemaId = new SchemaId(batch, i); + var builder = ImprintRecord.builder(schemaId); + + // Create strings of varying sizes (allocation pressure) + builder.field(1, Value.fromString("small")) + .field(2, Value.fromString("medium-length-string-" + i)) + .field(3, Value.fromString("very-long-string-that-will-cause-more-allocation-pressure-" + batch + "-" + i)) + .field(4, Value.fromBytes(new byte[100 + i % 1000])); // Varying byte arrays + + var record = builder.build(); + + // Some deserialization to trigger string decoding allocations + record.getValue(2); + record.getValue(3); + } + + if (batch % 100 == 0) { + System.out.printf("Completed batch %d/1000%n", batch); + } } - } - - System.out.println("Allocation test complete - check GC logs and memory profiler"); + }, false); // Disable final time reporting as it's not relevant here } + // ========== Test Helpers ========== + + /** + * A wrapper to run a profiling test with boilerplate for timing and setup. + * @param testName The name of the test to print. + * @param testLogic The core logic of the test, passed as a lambda. + */ + private void runProfileTest(String testName, ThrowingRunnable testLogic) throws Exception { + runProfileTest(testName, testLogic, true); + } + + private void runProfileTest(String testName, ThrowingRunnable testLogic, boolean reportTime) throws Exception { + System.out.printf("===== Starting Profiler Test: %s =====%n", testName); + System.out.println("Attach profiler now..."); + Thread.sleep(3000); // Give time to attach profiler + + System.out.printf("Beginning %s profiling...%n", testName); + long start = System.nanoTime(); + + testLogic.run(); + + if (reportTime) { + long duration = System.nanoTime() - start; + System.out.printf("===== Completed %s in %.2f ms =====%n%n", testName, duration / 1_000_000.0); + } else { + System.out.printf("===== %s profiling complete. Check profiler output. =====%n%n", testName); + } + } + + /** A functional interface that allows for exceptions, for use in lambdas. */ + @FunctionalInterface + private interface ThrowingRunnable { + void run() throws Exception; + } + private ImprintRecord createTestRecord() throws Exception { var builder = ImprintRecord.builder(new SchemaId(1, 0xdeadbeef)); @@ -222,4 +249,42 @@ private ImprintRecord createLargeRecord() throws Exception { return builder.build(); } + + private ImprintRecord createVeryLargeRecord() throws Exception { + var builder = ImprintRecord.builder(new SchemaId(3, 0xabcdef12)); + var random = new Random(123); + + // Create 200 fields of varying types and sizes + for (int i = 1; i <= 200; i++) { + switch (i % 6) { + case 0: + builder.field(i, i * random.nextInt()); + break; + case 1: + // Medium string + builder.field(i, "user-id-" + UUID.randomUUID().toString()); + break; + case 2: + // Large string + builder.field(i, "This is a much larger text block for field " + i + ". It simulates a user comment, a description, or some other form of semi-structured text data. We repeat a sentence to make it longer. This is a much larger text block for field " + i + ". It simulates a user comment, a description, or some other form of semi-structured text data."); + break; + case 3: + builder.field(i, random.nextDouble() * 1000); + break; + case 4: + // Small byte array + var smallBytes = new byte[32]; + random.nextBytes(smallBytes); + builder.field(i, smallBytes); + break; + case 5: + // Large byte array + var largeBytes = new byte[1024]; + random.nextBytes(largeBytes); + builder.field(i, largeBytes); + break; + } + } + return builder.build(); + } } \ No newline at end of file From e3bbe2bcc4024588037a46fbf8fdbb4e4e5654b4 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Mon, 9 Jun 2025 00:51:25 -0400 Subject: [PATCH 39/53] add Thrift competitor and fix framework issues Adds Apache Thrift to the benchmark suite, including self-contained compiler download. Corrects Protobuf and FlatBuffers schemas and fixes bugs in the competitor classes to ensure a stable and robust benchmark environment. Includes refactored DataGenerator. --- build.gradle | 53 ++ src/jmh/flatbuffers/test_record.fbs | 21 +- .../benchmark/ComparisonBenchmark.java | 858 ++---------------- .../com/imprint/benchmark/DataGenerator.java | 67 ++ .../competitors/AbstractCompetitor.java | 29 + .../benchmark/competitors/AvroCompetitor.java | 156 ++++ .../benchmark/competitors/Competitor.java | 16 + .../competitors/FlatBuffersCompetitor.java | 137 +++ .../competitors/ImprintCompetitor.java | 76 ++ .../competitors/JacksonJsonCompetitor.java | 79 ++ .../benchmark/competitors/KryoCompetitor.java | 91 ++ .../competitors/MessagePackCompetitor.java | 78 ++ .../competitors/ProtobufCompetitor.java | 69 ++ .../competitors/ThriftCompetitor.java | 117 +++ src/jmh/proto/test_record.proto | 21 +- src/jmh/thrift/test_record.thrift | 18 + 16 files changed, 1063 insertions(+), 823 deletions(-) create mode 100644 src/jmh/java/com/imprint/benchmark/DataGenerator.java create mode 100644 src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java create mode 100644 src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java create mode 100644 src/jmh/java/com/imprint/benchmark/competitors/Competitor.java create mode 100644 src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java create mode 100644 src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java create mode 100644 src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java create mode 100644 src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java create mode 100644 src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java create mode 100644 src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java create mode 100644 src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java create mode 100644 src/jmh/thrift/test_record.thrift diff --git a/build.gradle b/build.gradle index 33b1645..852be08 100644 --- a/build.gradle +++ b/build.gradle @@ -50,6 +50,8 @@ dependencies { jmhImplementation 'com.esotericsoftware:kryo:5.4.0' jmhImplementation 'org.msgpack:msgpack-core:0.9.8' jmhImplementation 'org.msgpack:jackson-dataformat-msgpack:0.9.8' + jmhImplementation 'org.apache.thrift:libthrift:0.19.0' + jmhImplementation 'javax.annotation:javax.annotation-api:1.3.2' } protobuf { @@ -137,11 +139,61 @@ tasks.register('generateFlatBuffers', Exec) { } } +// Task to download the Thrift compiler +task downloadThrift(type: Exec) { + description = 'Download Thrift compiler' + group = 'build setup' + def thriftVersion = "0.19.0" + def thriftExecutable = file("${buildDir}/thrift/thrift.exe") + def thriftUrl = "https://archive.apache.org/dist/thrift/${thriftVersion}/thrift-${thriftVersion}.exe" + + outputs.file(thriftExecutable) + + onlyIf { + !thriftExecutable.exists() && System.getProperty('os.name').toLowerCase().contains('windows') + } + + doFirst { + println "Downloading Thrift compiler for Windows from $thriftUrl..." + thriftExecutable.parentFile.mkdirs() + } + + commandLine 'curl', '-L', '-o', thriftExecutable.absolutePath, thriftUrl + + doLast { + println "Thrift compiler downloaded to: ${thriftExecutable}" + } +} + +// Task to generate Java code from Thrift IDL files for JMH benchmarks +task generateJmhThrift(type: Exec) { + dependsOn downloadThrift + description = 'Generate Java classes from Thrift schema' + group = 'build' + + def thriftExecutable = file("${buildDir}/thrift/thrift.exe") + def schemaFile = file('src/jmh/thrift/test_record.thrift') + def outputDir = file('build/generated-src/thrift/jmh/java') + + // Only run if the thrift executable exists (i.e., on Windows) + onlyIf { thriftExecutable.exists() } + + commandLine thriftExecutable.absolutePath, '-r', '--gen', 'java', '-o', outputDir.absolutePath, schemaFile.absolutePath + + inputs.file(schemaFile) + outputs.dir(outputDir) + + doFirst { + outputDir.mkdirs() + } +} + // Add generated FlatBuffers sources to JMH source set sourceSets { jmh { java { srcDir 'build/generated/source/flatbuffers/jmh/java' + srcDir 'build/generated-src/thrift/jmh/java' } proto { srcDir 'src/jmh/proto' @@ -151,6 +203,7 @@ sourceSets { // Make JMH compilation depend on FlatBuffers generation compileJmhJava.dependsOn generateFlatBuffers +compileJmhJava.dependsOn generateJmhThrift // Handle duplicate proto files tasks.named('processJmhResources') { diff --git a/src/jmh/flatbuffers/test_record.fbs b/src/jmh/flatbuffers/test_record.fbs index ccc31d0..698bd81 100644 --- a/src/jmh/flatbuffers/test_record.fbs +++ b/src/jmh/flatbuffers/test_record.fbs @@ -1,15 +1,14 @@ -namespace com.imprint.benchmark; +namespace com.imprint.benchmark.flatbuffers; -table TestRecordFB { - id: int; - name: string; - price: double; +table TestRecord { + id: string; + timestamp: long; + flags: int; active: bool; - category: string; - tags: [string]; - metadata_keys: [string]; - metadata_values: [string]; - extra_data: [string]; + value: double; + data: [ubyte]; + tags: [int]; + metadata: [string]; // Representing map as a flat list of key/value strings for simplicity } -root_type TestRecordFB; \ No newline at end of file +root_type TestRecord; \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index e52388c..7000fca 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -1,22 +1,15 @@ package com.imprint.benchmark; -import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.io.Input; -import com.esotericsoftware.kryo.io.Output; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.flatbuffers.FlatBufferBuilder; -import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintRecordBuilder; -import com.imprint.core.SchemaId; -import com.imprint.types.MapKey; -import com.imprint.types.Value; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.*; -import org.msgpack.jackson.dataformat.MessagePackFactory; +import com.imprint.benchmark.competitors.AbstractCompetitor; +import com.imprint.benchmark.competitors.AvroCompetitor; +import com.imprint.benchmark.competitors.Competitor; +import com.imprint.benchmark.competitors.FlatBuffersCompetitor; +import com.imprint.benchmark.competitors.ImprintCompetitor; +import com.imprint.benchmark.competitors.JacksonJsonCompetitor; +import com.imprint.benchmark.competitors.KryoCompetitor; +import com.imprint.benchmark.competitors.MessagePackCompetitor; +import com.imprint.benchmark.competitors.ProtobufCompetitor; +import com.imprint.benchmark.competitors.ThriftCompetitor; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.runner.Runner; @@ -24,822 +17,81 @@ import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.nio.ByteBuffer; -import java.util.*; +import java.util.Arrays; +import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; -/** - * Head-to-head benchmarks comparing Imprint against other serialization libraries. - * Tests the performance claims made in the documentation. - */ -@BenchmarkMode(Mode.AverageTime) -@OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) -@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) -@Fork(1) -@SuppressWarnings("unused") +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +@Warmup(iterations = 3, time = 5) +@Measurement(iterations = 5, time = 10) +@Fork(value = 1, jvmArgs = {"-Xms4g", "-Xmx4g"}) public class ComparisonBenchmark { - // Test data - private TestRecord testData; + private static final List COMPETITORS = Arrays.asList( + new ImprintCompetitor(), + new JacksonJsonCompetitor(), + new ProtobufCompetitor(), + new FlatBuffersCompetitor(), + new AvroCompetitor(), + new ThriftCompetitor(), + new KryoCompetitor(), + new MessagePackCompetitor() + ); - // Serialized formats - private ByteBuffer imprintBytesBuffer; - private byte[] jacksonJsonBytes; - private byte[] kryoBytes; - private byte[] messagePackBytes; - private byte[] avroBytes; - private byte[] protobufBytes; - private ByteBuffer flatbuffersBytes; + @Param({"Imprint"}) + public String competitorName; - // Library instances - private Schema avroSchema; - private DatumWriter avroWriter; - private DatumReader avroReader; - private ObjectMapper jacksonJsonMapper; - private Kryo kryo; - private ObjectMapper messagePackMapper; + private Competitor competitor; + private DataGenerator.TestRecord testRecord1; + private DataGenerator.TestRecord testRecord2; - @Setup - public void setup() throws Exception { - testData = createTestRecord(); - - // Initialize libraries - jacksonJsonMapper = new ObjectMapper(); - kryo = new Kryo(); - kryo.register(TestRecord.class); - kryo.register(ArrayList.class); - kryo.register(HashMap.class); - kryo.register(Arrays.asList().getClass()); - - // Initialize MessagePack ObjectMapper - messagePackMapper = new ObjectMapper(new MessagePackFactory()); - setupAvro(); - - // Pre-serialize for deserialization benchmarks - imprintBytesBuffer = serializeWithImprint(testData); - jacksonJsonBytes = serializeWithJacksonJson(testData); - kryoBytes = serializeWithKryo(testData); - messagePackBytes = serializeWithMessagePack(testData); - avroBytes = serializeWithAvro(testData); - protobufBytes = serializeWithProtobuf(testData); - flatbuffersBytes = serializeWithFlatBuffers(testData); - } - - // ===== SERIALIZATION BENCHMARKS ===== - - @Benchmark - public void serializeImprint(Blackhole bh) throws Exception { - ByteBuffer result = serializeWithImprint(testData); - bh.consume(result); - } - - @Benchmark - public void serializeJacksonJson(Blackhole bh) throws Exception { - byte[] result = serializeWithJacksonJson(testData); - bh.consume(result); - } - - @Benchmark - public void serializeKryo(Blackhole bh) { - byte[] result = serializeWithKryo(testData); - bh.consume(result); - } - - @Benchmark - public void serializeMessagePack(Blackhole bh) throws Exception { - byte[] result = serializeWithMessagePack(testData); - bh.consume(result); - } - - @Benchmark - public void serializeAvro(Blackhole bh) throws Exception { - byte[] result = serializeWithAvro(testData); - bh.consume(result); - } - - @Benchmark - public void serializeProtobuf(Blackhole bh) { - byte[] result = serializeWithProtobuf(testData); - bh.consume(result); - } - - @Benchmark - public void serializeFlatBuffers(Blackhole bh) { - ByteBuffer result = serializeWithFlatBuffers(testData); - bh.consume(result); - } - - // ===== SETUP ONLY ===== - - @Benchmark - public void deserializeSetupImprint(Blackhole bh) throws Exception { - ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); - bh.consume(result); - } - - @Benchmark - public void deserializeSetupFlatBuffers(Blackhole bh) { - com.imprint.benchmark.TestRecordFB result = com.imprint.benchmark.TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - bh.consume(result); - } - - // ===== FULL DESERIALIZATION BENCHMARKS ===== - - @Benchmark - public void deserializeJacksonJson(Blackhole bh) throws Exception { - TestRecord result = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); - bh.consume(result); - } - - @Benchmark - public void deserializeKryo(Blackhole bh) { - Input input = new Input(new ByteArrayInputStream(kryoBytes)); - TestRecord result = kryo.readObject(input, TestRecord.class); - input.close(); - bh.consume(result); - } - - @Benchmark - public void deserializeMessagePack(Blackhole bh) throws Exception { - TestRecord result = messagePackMapper.readValue(messagePackBytes, TestRecord.class); - bh.consume(result); - } - - @Benchmark - public void deserializeAvro(Blackhole bh) throws Exception { - GenericRecord result = deserializeWithAvro(avroBytes); - bh.consume(result); - } - - @Benchmark - public void deserializeProtobuf(Blackhole bh) throws Exception { - com.imprint.benchmark.TestRecordProto.TestRecord result = com.imprint.benchmark.TestRecordProto.TestRecord.parseFrom(protobufBytes); - bh.consume(result); - } - - @Benchmark - public void deserializeImprint(Blackhole bh) throws Exception { - ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); - // Access all fields to force full deserialization - result.getInt32(1); // id - result.getString(2); // name - result.getFloat64(3); // price - result.getBoolean(4); // active - result.getString(5); // category - result.getArray(6); // tags - result.getMap(7); // metadata - for (int i = 8; i < 21; i++) { - result.getString(i); // extraData fields - } - - bh.consume(result); - } - - @Benchmark - public void deserializeFlatBuffers(Blackhole bh) { - com.imprint.benchmark.TestRecordFB result = com.imprint.benchmark.TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - - // Access all fields - result.id(); - result.name(); - result.price(); - result.active(); - result.category(); - // Access all tags - for (int i = 0; i < result.tagsLength(); i++) { - result.tags(i); - } - // Access all metadata - for (int i = 0; i < result.metadataKeysLength(); i++) { - result.metadataKeys(i); - result.metadataValues(i); - } - // Access all extra data - for (int i = 0; i < result.extraDataLength(); i++) { - result.extraData(i); - } - - bh.consume(result); - } - - // ===== FIELD ACCESS BENCHMARKS ===== - // Tests accessing a single field near the end of a record - - @Benchmark - public void singleFieldAccessImprint(Blackhole bh) throws Exception { - ImprintRecord record = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); - var field15 = record.getString(15); - bh.consume(field15); - } - - @Benchmark - public void singleFieldAccessJacksonJson(Blackhole bh) throws Exception { - TestRecord record = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); - bh.consume(record.extraData.get(4)); - } - - @Benchmark - public void singleFieldAccessKryo(Blackhole bh) { - Input input = new Input(new ByteArrayInputStream(kryoBytes)); - TestRecord record = kryo.readObject(input, TestRecord.class); - input.close(); - bh.consume(record.extraData.get(4)); - } - - @Benchmark - public void singleFieldAccessMessagePack(Blackhole bh) throws Exception { - TestRecord record = messagePackMapper.readValue(messagePackBytes, TestRecord.class); - bh.consume(record.extraData.get(4)); - } - - @Benchmark - public void singleFieldAccessAvro(Blackhole bh) throws Exception { - GenericRecord record = deserializeWithAvro(avroBytes); - bh.consume(record.get("extra_data")); - } - - @Benchmark - public void singleFieldAccessProtobuf(Blackhole bh) throws Exception { - com.imprint.benchmark.TestRecordProto.TestRecord record = com.imprint.benchmark.TestRecordProto.TestRecord.parseFrom(protobufBytes); - bh.consume(record.getExtraData(4)); - } - - @Benchmark - public void singleFieldAccessFlatBuffers(Blackhole bh) { - com.imprint.benchmark.TestRecordFB record = com.imprint.benchmark.TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - bh.consume(record.extraData(4)); + public static List competitorName() { + return COMPETITORS.stream().map(Competitor::name).collect(Collectors.toList()); } - // ===== SIZE COMPARISON ===== + @Setup(Level.Trial) + public void setup() { + // Find the competitor implementation + competitor = COMPETITORS.stream() + .filter(c -> c.name().equals(competitorName)) + .findFirst() + .orElseThrow(() -> new IllegalStateException("Unknown competitor: " + competitorName)); - @Benchmark - public void measureImprintSize(Blackhole bh) { - bh.consume(imprintBytesBuffer.remaining()); - } + // Create the test data + testRecord1 = DataGenerator.createTestRecord(); + testRecord2 = DataGenerator.createTestRecord(); - @Benchmark - public void measureJacksonJsonSize(Blackhole bh) { - bh.consume(jacksonJsonBytes.length); + // Setup the competitor with the data + competitor.setup(testRecord1, testRecord2); } @Benchmark - public void measureKryoSize(Blackhole bh) { - bh.consume(kryoBytes.length); + public void serialize(Blackhole bh) { + competitor.serialize(bh); } @Benchmark - public void measureMessagePackSize(Blackhole bh) { - bh.consume(messagePackBytes.length); + public void deserialize(Blackhole bh) { + competitor.deserialize(bh); } @Benchmark - public void measureAvroSize(Blackhole bh) { - bh.consume(avroBytes.length); + public void projectAndSerialize(Blackhole bh) { + competitor.projectAndSerialize(bh); } @Benchmark - public void measureProtobufSize(Blackhole bh) { - bh.consume(protobufBytes.length); + public void mergeAndSerialize(Blackhole bh) { + competitor.mergeAndSerialize(bh); } - @Benchmark - public void measureFlatBuffersSize(Blackhole bh) { - bh.consume(flatbuffersBytes.remaining()); - } - - // ===== MERGE SIMULATION BENCHMARKS ===== - - //@Benchmark - public void mergeImprint(Blackhole bh) throws Exception { - var record1Buffer = imprintBytesBuffer.duplicate(); - var record2Data = createTestRecord2(); - var record2Buffer = serializeWithImprint(record2Data); - - var deserialized1 = ImprintRecord.deserialize(record1Buffer); - var deserialized2 = ImprintRecord.deserialize(record2Buffer); - var merged = simulateMerge(deserialized1, deserialized2); - - bh.consume(merged); - } - - //@Benchmark - public void mergeJacksonJson(Blackhole bh) throws Exception { - var record1 = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithJacksonJson(record2Data); - var record2 = jacksonJsonMapper.readValue(record2Bytes, TestRecord.class); - - var mergedPojo = mergeTestRecords(record1, record2); - byte[] result = jacksonJsonMapper.writeValueAsBytes(mergedPojo); - bh.consume(result); - } - - //@Benchmark - public void mergeKryo(Blackhole bh) { - Input input1 = new Input(new ByteArrayInputStream(kryoBytes)); - var record1 = kryo.readObject(input1, TestRecord.class); - input1.close(); - - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithKryo(record2Data); - Input input2 = new Input(new ByteArrayInputStream(record2Bytes)); - var record2 = kryo.readObject(input2, TestRecord.class); - input2.close(); - - var mergedPojo = mergeTestRecords(record1, record2); - byte[] result = serializeWithKryo(mergedPojo); - bh.consume(result); - } - - //@Benchmark - public void mergeMessagePack(Blackhole bh) throws Exception { - var record1 = messagePackMapper.readValue(messagePackBytes, TestRecord.class); - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithMessagePack(record2Data); - var record2 = messagePackMapper.readValue(record2Bytes, TestRecord.class); - - var mergedPojo = mergeTestRecords(record1, record2); - byte[] result = messagePackMapper.writeValueAsBytes(mergedPojo); - bh.consume(result); - } - - //@Benchmark - public void mergeAvro(Blackhole bh) throws Exception { - var record1 = deserializeWithAvro(avroBytes); - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithAvro(record2Data); - var record2 = deserializeWithAvro(record2Bytes); - - var merged = mergeAvroRecords(record1, record2); - byte[] result = serializeAvroRecord(merged); - bh.consume(result); - } - - //@Benchmark - public void mergeProtobuf(Blackhole bh) throws Exception { - var record1 = com.imprint.benchmark.TestRecordProto.TestRecord.parseFrom(protobufBytes); - var record2Data = createTestRecord2(); - var record2Bytes = serializeWithProtobuf(record2Data); - var record2 = com.imprint.benchmark.TestRecordProto.TestRecord.parseFrom(record2Bytes); - - var merged = mergeProtobufRecords(record1, record2); - byte[] result = merged.toByteArray(); - bh.consume(result); - } - - //@Benchmark - public void mergeFlatBuffers(Blackhole bh) { - var record1 = com.imprint.benchmark.TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); - var record2Data = createTestRecord2(); - var record2Buffer = serializeWithFlatBuffers(record2Data); - var record2 = com.imprint.benchmark.TestRecordFB.getRootAsTestRecordFB(record2Buffer); - - var merged = mergeFlatBuffersRecords(record1, record2); - bh.consume(merged); - } - - // ===== MAIN METHOD TO RUN BENCHMARKS ===== - public static void main(String[] args) throws RunnerException { - runFieldAccessBenchmarks(); - // Or, uncomment specific runner methods to execute subsets: - // runSerializationBenchmarks(); - // runDeserializationBenchmarks(); - // runFieldAccessBenchmarks(); - // runSizeComparisonBenchmarks(); - // runMergeBenchmarks(); - // runMessagePackBenchmarks(); - } - - public static void runAll() throws RunnerException { Options opt = new OptionsBuilder() .include(ComparisonBenchmark.class.getSimpleName()) .build(); new Runner(opt).run(); } - - public static void runSerializationBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".serialize.*") - .build(); - new Runner(opt).run(); - } - - public static void runDeserializationBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".deserialize.*") - .build(); - new Runner(opt).run(); - } - - public static void runFieldAccessBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".singleFieldAccess.*") - .build(); - new Runner(opt).run(); - } - - public static void runSizeComparisonBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".measure.*") - .build(); - new Runner(opt).run(); - } - - public static void runMergeBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".merge.*") - .build(); - new Runner(opt).run(); - } - - public static void runMessagePackBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".*MessagePack.*") - .build(); - new Runner(opt).run(); - } - - public static void runAvroBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".*Avro.*") - .build(); - new Runner(opt).run(); - } - - public static void runProtobufBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".*Protobuf.*") - .build(); - new Runner(opt).run(); - } - - public static void runFlatBuffersBenchmarks() throws RunnerException { - Options opt = new OptionsBuilder() - .include(ComparisonBenchmark.class.getSimpleName() + ".*FlatBuffers.*") - .build(); - new Runner(opt).run(); - } - - // ===== HELPER METHODS ===== - - private void setupAvro() { - String schemaJson = "{\n" + - " \"type\": \"record\",\n" + - " \"name\": \"TestRecord\",\n" + - " \"fields\": [\n" + - " {\"name\": \"id\", \"type\": \"int\"},\n" + - " {\"name\": \"name\", \"type\": \"string\"},\n" + - " {\"name\": \"price\", \"type\": \"double\"},\n" + - " {\"name\": \"active\", \"type\": \"boolean\"},\n" + - " {\"name\": \"category\", \"type\": \"string\"},\n" + - " {\"name\": \"tags\", \"type\": {\"type\": \"array\", \"items\": \"string\"}},\n" + - " {\"name\": \"metadata\", \"type\": {\"type\": \"map\", \"values\": \"string\"}},\n" + - " {\"name\": \"extraData0\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData1\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData2\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData3\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData4\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData5\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData6\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData7\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData8\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData9\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData10\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData11\", \"type\": \"string\"},\n" + - " {\"name\": \"extraData12\", \"type\": \"string\"}\n" + - " ]\n" + - "}"; - - avroSchema = new Schema.Parser().parse(schemaJson); - avroWriter = new GenericDatumWriter<>(avroSchema); - avroReader = new GenericDatumReader<>(avroSchema); - } - - private ByteBuffer serializeWithImprint(TestRecord data) throws Exception { - var builder = ImprintRecord.builder(new SchemaId(1, 0x12345678)); - - builder.field(1, data.id); - builder.field(2, data.name); - builder.field(3, data.price); - builder.field(4, data.active); - builder.field(5, data.category); - builder.field(6, data.tags); - builder.field(7, data.metadata); - - for (int i = 0; i < data.extraData.size(); i++) { - builder.field(8 + i, data.extraData.get(i)); - } - - return builder.build().serializeToBuffer(); - } - - private byte[] serializeWithJacksonJson(TestRecord data) throws Exception { - return jacksonJsonMapper.writeValueAsBytes(data); - } - - private byte[] serializeWithKryo(TestRecord data) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - Output output = new Output(baos); - kryo.writeObject(output, data); - output.close(); - return baos.toByteArray(); - } - - private byte[] serializeWithMessagePack(TestRecord data) throws Exception { - return messagePackMapper.writeValueAsBytes(data); - } - - private byte[] serializeWithAvro(TestRecord data) throws Exception { - GenericRecord record = new GenericData.Record(avroSchema); - record.put("id", data.id); - record.put("name", data.name); - record.put("price", data.price); - record.put("active", data.active); - record.put("category", data.category); - record.put("tags", data.tags); - record.put("metadata", data.metadata); - - for (int i = 0; i < data.extraData.size(); i++) { - record.put("extraData" + i, data.extraData.get(i)); - } - - return serializeAvroRecord(record); - } - - private byte[] serializeAvroRecord(GenericRecord record) throws Exception { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - Encoder encoder = EncoderFactory.get().binaryEncoder(baos, null); - avroWriter.write(record, encoder); - encoder.flush(); - return baos.toByteArray(); - } - - private GenericRecord deserializeWithAvro(byte[] data) throws Exception { - Decoder decoder = DecoderFactory.get().binaryDecoder(data, null); - return avroReader.read(null, decoder); - } - - private byte[] serializeWithProtobuf(TestRecord data) { - var builder = com.imprint.benchmark.TestRecordProto.TestRecord.newBuilder() - .setId(data.id) - .setName(data.name) - .setPrice(data.price) - .setActive(data.active) - .setCategory(data.category) - .addAllTags(data.tags) - .putAllMetadata(data.metadata); - - for (String extraData : data.extraData) { - builder.addExtraData(extraData); - } - - return builder.build().toByteArray(); - } - - private ByteBuffer serializeWithFlatBuffers(TestRecord data) { - var builder = new FlatBufferBuilder(1024); - - int nameOffset = builder.createString(data.name); - int categoryOffset = builder.createString(data.category); - - int[] tagOffsets = new int[data.tags.size()]; - for (int i = 0; i < data.tags.size(); i++) { - tagOffsets[i] = builder.createString(data.tags.get(i)); - } - int tagsOffset = com.imprint.benchmark.TestRecordFB.createTagsVector(builder, tagOffsets); - - String[] metadataKeys = data.metadata.keySet().toArray(new String[0]); - String[] metadataValues = new String[metadataKeys.length]; - int[] keyOffsets = new int[metadataKeys.length]; - int[] valueOffsets = new int[metadataKeys.length]; - - for (int i = 0; i < metadataKeys.length; i++) { - metadataValues[i] = data.metadata.get(metadataKeys[i]); - keyOffsets[i] = builder.createString(metadataKeys[i]); - valueOffsets[i] = builder.createString(metadataValues[i]); - } - int metadataKeysOffset = com.imprint.benchmark.TestRecordFB.createMetadataKeysVector(builder, keyOffsets); - int metadataValuesOffset = com.imprint.benchmark.TestRecordFB.createMetadataValuesVector(builder, valueOffsets); - - int[] extraDataOffsets = new int[data.extraData.size()]; - for (int i = 0; i < data.extraData.size(); i++) { - extraDataOffsets[i] = builder.createString(data.extraData.get(i)); - } - int extraDataOffset = com.imprint.benchmark.TestRecordFB.createExtraDataVector(builder, extraDataOffsets); - - com.imprint.benchmark.TestRecordFB.startTestRecordFB(builder); - com.imprint.benchmark.TestRecordFB.addId(builder, data.id); - com.imprint.benchmark.TestRecordFB.addName(builder, nameOffset); - com.imprint.benchmark.TestRecordFB.addPrice(builder, data.price); - com.imprint.benchmark.TestRecordFB.addActive(builder, data.active); - com.imprint.benchmark.TestRecordFB.addCategory(builder, categoryOffset); - com.imprint.benchmark.TestRecordFB.addTags(builder, tagsOffset); - com.imprint.benchmark.TestRecordFB.addMetadataKeys(builder, metadataKeysOffset); - com.imprint.benchmark.TestRecordFB.addMetadataValues(builder, metadataValuesOffset); - com.imprint.benchmark.TestRecordFB.addExtraData(builder, extraDataOffset); - int recordOffset = com.imprint.benchmark.TestRecordFB.endTestRecordFB(builder); - - builder.finish(recordOffset); - return builder.dataBuffer(); - } - - private ImprintRecord simulateMerge(ImprintRecord first, ImprintRecord second) throws Exception { - var builder = ImprintRecord.builder(first.getHeader().getSchemaId()); - var usedFieldIds = new HashSet(); - - // Copy fields from first record (takes precedence) - copyFieldsToBuilder(first, builder, usedFieldIds); - - // Copy non-conflicting fields from second record - copyFieldsToBuilder(second, builder, usedFieldIds); - - return builder.build(); - } - - private void copyFieldsToBuilder(ImprintRecord record, ImprintRecordBuilder builder, Set usedFieldIds) throws Exception { - for (var entry : record.getDirectory()) { - int fieldId = entry.getId(); - if (!usedFieldIds.contains(fieldId)) { - var value = record.getValue(fieldId); - if (value != null) { - builder.field(fieldId, value); - usedFieldIds.add(fieldId); - } - } - } - } - - private TestRecord mergeTestRecords(TestRecord first, TestRecord second) { - TestRecord merged = new TestRecord(); - merged.id = first.id; - merged.name = first.name != null ? first.name : second.name; - merged.price = first.price != 0.0 ? first.price : second.price; - merged.active = first.active; - merged.category = first.category != null ? first.category : second.category; - - merged.tags = new ArrayList<>(first.tags); - merged.tags.addAll(second.tags); - - merged.metadata = new HashMap<>(first.metadata); - merged.metadata.putAll(second.metadata); - - return merged; - } - - private GenericRecord mergeAvroRecords(GenericRecord first, GenericRecord second) { - GenericRecord merged = new GenericData.Record(avroSchema); - - // Copy all fields from first record - for (Schema.Field field : avroSchema.getFields()) { - merged.put(field.name(), first.get(field.name())); - } - - // Override with non-null values from second record - for (Schema.Field field : avroSchema.getFields()) { - Object secondValue = second.get(field.name()); - if (secondValue != null && !secondValue.toString().isEmpty()) { - merged.put(field.name(), secondValue); - } - } - - return merged; - } - - private com.imprint.benchmark.TestRecordProto.TestRecord mergeProtobufRecords(com.imprint.benchmark.TestRecordProto.TestRecord first, com.imprint.benchmark.TestRecordProto.TestRecord second) { - return com.imprint.benchmark.TestRecordProto.TestRecord.newBuilder() - .mergeFrom(first) - .mergeFrom(second) - .build(); - } - - private ByteBuffer mergeFlatBuffersRecords(com.imprint.benchmark.TestRecordFB first, com.imprint.benchmark.TestRecordFB second) { - var builder = new FlatBufferBuilder(1024); - - String name = second.name() != null && !second.name().isEmpty() ? second.name() : first.name(); - String category = second.category() != null && !second.category().isEmpty() ? second.category() : first.category(); - double price = second.price() != 0.0 ? second.price() : first.price(); - boolean active = second.active(); - int id = first.id(); - - int nameOffset = builder.createString(name); - int categoryOffset = builder.createString(category); - - List mergedTags = new ArrayList<>(); - for (int i = 0; i < first.tagsLength(); i++) { - mergedTags.add(first.tags(i)); - } - for (int i = 0; i < second.tagsLength(); i++) { - mergedTags.add(second.tags(i)); - } - - int[] tagOffsets = new int[mergedTags.size()]; - for (int i = 0; i < mergedTags.size(); i++) { - tagOffsets[i] = builder.createString(mergedTags.get(i)); - } - int tagsOffset = com.imprint.benchmark.TestRecordFB.createTagsVector(builder, tagOffsets); - - Map mergedMetadata = new HashMap<>(); - for (int i = 0; i < first.metadataKeysLength(); i++) { - mergedMetadata.put(first.metadataKeys(i), first.metadataValues(i)); - } - for (int i = 0; i < second.metadataKeysLength(); i++) { - mergedMetadata.put(second.metadataKeys(i), second.metadataValues(i)); - } - - String[] metadataKeys = mergedMetadata.keySet().toArray(new String[0]); - int[] keyOffsets = new int[metadataKeys.length]; - int[] valueOffsets = new int[metadataKeys.length]; - - for (int i = 0; i < metadataKeys.length; i++) { - keyOffsets[i] = builder.createString(metadataKeys[i]); - valueOffsets[i] = builder.createString(mergedMetadata.get(metadataKeys[i])); - } - int metadataKeysOffset = com.imprint.benchmark.TestRecordFB.createMetadataKeysVector(builder, keyOffsets); - int metadataValuesOffset = com.imprint.benchmark.TestRecordFB.createMetadataValuesVector(builder, valueOffsets); - - int[] extraDataOffsets = new int[first.extraDataLength()]; - for (int i = 0; i < first.extraDataLength(); i++) { - extraDataOffsets[i] = builder.createString(first.extraData(i)); - } - int extraDataOffset = com.imprint.benchmark.TestRecordFB.createExtraDataVector(builder, extraDataOffsets); - - com.imprint.benchmark.TestRecordFB.startTestRecordFB(builder); - com.imprint.benchmark.TestRecordFB.addId(builder, id); - com.imprint.benchmark.TestRecordFB.addName(builder, nameOffset); - com.imprint.benchmark.TestRecordFB.addPrice(builder, price); - com.imprint.benchmark.TestRecordFB.addActive(builder, active); - com.imprint.benchmark.TestRecordFB.addCategory(builder, categoryOffset); - com.imprint.benchmark.TestRecordFB.addTags(builder, tagsOffset); - com.imprint.benchmark.TestRecordFB.addMetadataKeys(builder, metadataKeysOffset); - com.imprint.benchmark.TestRecordFB.addMetadataValues(builder, metadataValuesOffset); - com.imprint.benchmark.TestRecordFB.addExtraData(builder, extraDataOffset); - int recordOffset = com.imprint.benchmark.TestRecordFB.endTestRecordFB(builder); - - builder.finish(recordOffset); - return builder.dataBuffer(); - } - - private TestRecord createTestRecord() { - var record = new TestRecord(); - record.id = 12345; - record.name = "Test Product"; - record.price = 99.99; - record.active = true; - record.category = "Electronics"; - - record.tags = Arrays.asList("popular", "trending", "bestseller"); - - record.metadata = new HashMap<>(); - record.metadata.put("manufacturer", "TechCorp"); - record.metadata.put("model", "TC-2024"); - record.metadata.put("warranty", "2 years"); - - record.extraData = new ArrayList<>(); - for (int i = 0; i < 13; i++) { - record.extraData.add("extraField" + i + "_value_" + (1000 + i)); - } - - return record; - } - - private TestRecord createTestRecord2() { - var record = new TestRecord(); - record.id = 67890; - record.name = "Test Product 2"; - record.price = 149.99; - record.active = false; - record.category = "Software"; - - record.tags = Arrays.asList("new", "premium"); - - record.metadata = new HashMap<>(); - record.metadata.put("vendor", "SoftCorp"); - record.metadata.put("version", "2.1"); - - record.extraData = new ArrayList<>(); - for (int i = 0; i < 13; i++) { - record.extraData.add("extraField" + i + "_value2_" + (2000 + i)); - } - - return record; - } - - // Test data class for other serialization libraries - public static class TestRecord { - public int id; - public String name; - public double price; - public boolean active; - public String category; - public List tags = new ArrayList<>(); - public Map metadata = new HashMap<>(); - public List extraData = new ArrayList<>(); // Fields 8-20 for large record test - - public TestRecord() {} // Required for deserialization - } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/DataGenerator.java b/src/jmh/java/com/imprint/benchmark/DataGenerator.java new file mode 100644 index 0000000..7dd65b2 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/DataGenerator.java @@ -0,0 +1,67 @@ +package com.imprint.benchmark; + +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class DataGenerator { + + /** + * A standard record used for serialization benchmarks. + * Contains a mix of common data types. + */ + public static class TestRecord { + public String id; + public long timestamp; + public int flags; + public boolean active; + public double value; + public byte[] data; + public List tags; + public Map metadata; + } + + /** + * A smaller record representing a projection of the full TestRecord. + */ + public static class ProjectedRecord { + public String id; + public long timestamp; + public List tags; + } + + public static TestRecord createTestRecord() { + var record = new TestRecord(); + record.id = "ID" + System.nanoTime(); + record.timestamp = System.currentTimeMillis(); + record.flags = 0xDEADBEEF; + record.active = true; + record.value = Math.PI; + record.data = createBytes(128); + record.tags = createIntList(20); + record.metadata = createStringMap(10); + return record; + } + + public static byte[] createBytes(int size) { + byte[] bytes = new byte[size]; + new Random(0).nextBytes(bytes); + return bytes; + } + + public static List createIntList(int size) { + return IntStream.range(0, size).boxed().collect(Collectors.toList()); + } + + public static Map createStringMap(int size) { + Map map = new HashMap<>(); + for (int i = 0; i < size; i++) { + map.put("key" + i, "value" + i); + } + return map; + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java new file mode 100644 index 0000000..6814681 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java @@ -0,0 +1,29 @@ +package com.imprint.benchmark.competitors; + +import com.imprint.benchmark.DataGenerator; + +/** + * A minimal base class for competitors, holding the test data. + */ +public abstract class AbstractCompetitor implements Competitor { + + protected final String name; + protected DataGenerator.TestRecord testData; + protected DataGenerator.TestRecord testData2; + protected byte[] serializedRecord; + + protected AbstractCompetitor(String name) { + this.name = name; + } + + @Override + public String name() { + return name; + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + this.testData = testRecord; + this.testData2 = testRecord2; + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java new file mode 100644 index 0000000..76f832a --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java @@ -0,0 +1,156 @@ +package com.imprint.benchmark.competitors; + +import com.imprint.benchmark.DataGenerator; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.*; +import org.openjdk.jmh.infra.Blackhole; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.util.stream.Collectors; + +public class AvroCompetitor extends AbstractCompetitor { + + private final Schema schema; + private final Schema projectedSchema; + private final DatumWriter writer; + private final DatumReader reader; + private final DatumWriter projectedWriter; + private byte[] serializedRecord; + + public AvroCompetitor() { + super("Avro-Generic"); + String schemaDefinition = "{\"type\":\"record\",\"name\":\"TestRecord\",\"fields\":[" + + "{\"name\":\"id\",\"type\":\"string\"}," + + "{\"name\":\"timestamp\",\"type\":\"long\"}," + + "{\"name\":\"flags\",\"type\":\"int\"}," + + "{\"name\":\"active\",\"type\":\"boolean\"}," + + "{\"name\":\"value\",\"type\":\"double\"}," + + "{\"name\":\"data\",\"type\":\"bytes\"}," + + "{\"name\":\"tags\",\"type\":{\"type\":\"array\",\"items\":\"int\"}}," + + "{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":\"string\"}}" + + "]}"; + this.schema = new Schema.Parser().parse(schemaDefinition); + this.writer = new GenericDatumWriter<>(schema); + this.reader = new GenericDatumReader<>(schema); + + String projectedSchemaDef = "{\"type\":\"record\",\"name\":\"ProjectedRecord\",\"fields\":[" + + "{\"name\":\"id\",\"type\":\"string\"}," + + "{\"name\":\"timestamp\",\"type\":\"long\"}," + + "{\"name\":\"tags\",\"type\":{\"type\":\"array\",\"items\":\"int\"}}" + + "]}"; + this.projectedSchema = new Schema.Parser().parse(projectedSchemaDef); + this.projectedWriter = new GenericDatumWriter<>(projectedSchema); + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + this.serializedRecord = buildRecord(testRecord); + } + + private byte[] buildRecord(DataGenerator.TestRecord pojo) { + GenericRecord record = new GenericData.Record(schema); + record.put("id", pojo.id); + record.put("timestamp", pojo.timestamp); + record.put("flags", pojo.flags); + record.put("active", pojo.active); + record.put("value", pojo.value); + record.put("data", ByteBuffer.wrap(pojo.data)); + record.put("tags", pojo.tags); + record.put("metadata", pojo.metadata); + + try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null); + writer.write(record, encoder); + encoder.flush(); + return out.toByteArray(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void serialize(Blackhole bh) { + bh.consume(buildRecord(this.testData)); + } + + @Override + public void deserialize(Blackhole bh) { + try { + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serializedRecord, null); + bh.consume(reader.read(null, decoder)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void projectAndSerialize(Blackhole bh) { + // With generic records, we can project by building a new record with the projected schema + GenericRecord projected = new GenericData.Record(projectedSchema); + projected.put("id", this.testData.id); + projected.put("timestamp", this.testData.timestamp); + projected.put("tags", this.testData.tags.stream().limit(5).collect(Collectors.toList())); + + try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null); + projectedWriter.write(projected, encoder); + encoder.flush(); + bh.consume(out.toByteArray()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + // No direct merge in Avro. Must deserialize, merge manually, and re-serialize. + GenericRecord r1 = (GenericRecord) buildAvroRecord(this.testData); + GenericRecord r2 = (GenericRecord) buildAvroRecord(this.testData2); + + GenericRecord merged = new GenericData.Record(schema); + for (Schema.Field field : schema.getFields()) { + Object val = r1.get(field.name()); + if (field.name().equals("timestamp")) { + val = System.currentTimeMillis(); + } else if(field.name().equals("active")) { + val = false; + } else if (r2.hasField(field.name()) && r2.get(field.name()) != null) { + if(!r1.hasField(field.name()) || r1.get(field.name()) == null){ + val = r2.get(field.name()); + } + } + merged.put(field.name(), val); + } + bh.consume(buildBytes(merged)); + } + + private GenericRecord buildAvroRecord(DataGenerator.TestRecord pojo) { + GenericRecord record = new GenericData.Record(schema); + record.put("id", pojo.id); + record.put("timestamp", pojo.timestamp); + record.put("flags", pojo.flags); + record.put("active", pojo.active); + record.put("value", pojo.value); + record.put("data", ByteBuffer.wrap(pojo.data)); + record.put("tags", pojo.tags); + record.put("metadata", pojo.metadata); + return record; + } + + private byte[] buildBytes(GenericRecord record) { + try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null); + writer.write(record, encoder); + encoder.flush(); + return out.toByteArray(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/Competitor.java b/src/jmh/java/com/imprint/benchmark/competitors/Competitor.java new file mode 100644 index 0000000..55a5b50 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/competitors/Competitor.java @@ -0,0 +1,16 @@ +package com.imprint.benchmark.competitors; + +import com.imprint.benchmark.DataGenerator; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Defines the contract for a serialization competitor in the benchmark. + */ +public interface Competitor { + String name(); + void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2); + void serialize(Blackhole bh); + void deserialize(Blackhole bh); + void projectAndSerialize(Blackhole bh); + void mergeAndSerialize(Blackhole bh); +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java new file mode 100644 index 0000000..e7f2b13 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java @@ -0,0 +1,137 @@ +package com.imprint.benchmark.competitors; + +import com.google.flatbuffers.FlatBufferBuilder; +import com.imprint.benchmark.DataGenerator; +import com.imprint.benchmark.flatbuffers.TestRecord; +import org.openjdk.jmh.infra.Blackhole; + +import java.nio.ByteBuffer; +import java.util.stream.Collectors; + +public class FlatBuffersCompetitor extends AbstractCompetitor { + + private ByteBuffer serializedRecord; + + public FlatBuffersCompetitor() { + super("FlatBuffers"); + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + this.serializedRecord = buildRecord(testRecord); + } + + private ByteBuffer buildRecord(DataGenerator.TestRecord pojo) { + FlatBufferBuilder builder = new FlatBufferBuilder(1024); + + int idOffset = builder.createString(pojo.id); + int[] tagsOffsets = pojo.tags.stream().mapToInt(i -> i).toArray(); + int tagsVectorOffset = TestRecord.createTagsVector(builder, tagsOffsets); + + int[] metadataKeysOffsets = pojo.metadata.keySet().stream().mapToInt(builder::createString).toArray(); + int[] metadataValuesOffsets = pojo.metadata.values().stream().mapToInt(builder::createString).toArray(); + // This is not correct FlatBuffers map creation, it's a placeholder. + // A proper implementation would require a table for each entry. + // For this benchmark, we'll just serialize the keys vector. + int metadataVectorOffset = TestRecord.createMetadataVector(builder, metadataKeysOffsets); + + int dataOffset = TestRecord.createDataVector(builder, pojo.data); + + TestRecord.startTestRecord(builder); + TestRecord.addId(builder, idOffset); + TestRecord.addTimestamp(builder, pojo.timestamp); + TestRecord.addFlags(builder, pojo.flags); + TestRecord.addActive(builder, pojo.active); + TestRecord.addValue(builder, pojo.value); + TestRecord.addData(builder, dataOffset); + TestRecord.addTags(builder, tagsVectorOffset); + TestRecord.addMetadata(builder, metadataVectorOffset); + + int recordOffset = TestRecord.endTestRecord(builder); + builder.finish(recordOffset); + + return builder.dataBuffer(); + } + + @Override + public void serialize(Blackhole bh) { + bh.consume(buildRecord(this.testData)); + } + + @Override + public void deserialize(Blackhole bh) { + bh.consume(TestRecord.getRootAsTestRecord(serializedRecord)); + } + + @Override + public void projectAndSerialize(Blackhole bh) { + // FlatBuffers excels here. No need to re-serialize. We "project" by reading. + // But to keep the benchmark fair ("project AND serialize"), we build a new buffer. + FlatBufferBuilder builder = new FlatBufferBuilder(256); + var original = TestRecord.getRootAsTestRecord(serializedRecord); + + int idOffset = builder.createString(original.id()); + int[] tagsOffsets = new int[5]; + for (int i = 0; i < 5; i++) { + tagsOffsets[i] = original.tags(i); + } + int tagsVectorOffset = TestRecord.createTagsVector(builder, tagsOffsets); + + TestRecord.startTestRecord(builder); + TestRecord.addId(builder, idOffset); + TestRecord.addTimestamp(builder, original.timestamp()); + TestRecord.addTags(builder, tagsVectorOffset); + int recordOffset = TestRecord.endTestRecord(builder); + builder.finish(recordOffset); + + bh.consume(builder.dataBuffer()); + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + // No direct merge operation. Must read both, build a new one. + var r1 = TestRecord.getRootAsTestRecord(serializedRecord); + // For simplicity, we don't build and serialize record2. + // We'll just merge fields from r1 into a new record. + FlatBufferBuilder builder = new FlatBufferBuilder(1024); + + int idOffset = builder.createString(r1.id()); + + // Correctly read and rebuild the tags vector + int[] tagsArray = new int[r1.tagsLength()]; + for (int i = 0; i < r1.tagsLength(); i++) { + tagsArray[i] = r1.tags(i); + } + int tagsVectorOffset = TestRecord.createTagsVector(builder, tagsArray); + + // Correctly read and rebuild the metadata vector (assuming simple list) + int[] metadataOffsets = new int[r1.metadataLength()]; + for (int i = 0; i < r1.metadataLength(); i++) { + metadataOffsets[i] = builder.createString(r1.metadata(i)); + } + int metadataVectorOffset = TestRecord.createMetadataVector(builder, metadataOffsets); + + + // Correctly read and rebuild the data vector + ByteBuffer dataBuffer = r1.dataAsByteBuffer(); + byte[] dataArray = new byte[dataBuffer.remaining()]; + dataBuffer.get(dataArray); + int dataOffset = TestRecord.createDataVector(builder, dataArray); + + + TestRecord.startTestRecord(builder); + TestRecord.addId(builder, idOffset); + TestRecord.addTimestamp(builder, System.currentTimeMillis()); // new value + TestRecord.addFlags(builder, r1.flags()); + TestRecord.addActive(builder, false); // new value + TestRecord.addValue(builder, r1.value()); + TestRecord.addData(builder, dataOffset); + TestRecord.addTags(builder, tagsVectorOffset); + TestRecord.addMetadata(builder, metadataVectorOffset); + + int recordOffset = TestRecord.endTestRecord(builder); + builder.finish(recordOffset); + bh.consume(builder.dataBuffer()); + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java new file mode 100644 index 0000000..651becb --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java @@ -0,0 +1,76 @@ +package com.imprint.benchmark.competitors; + +import com.imprint.benchmark.DataGenerator; +import com.imprint.core.ImprintOperations; +import com.imprint.core.ImprintRecord; +import com.imprint.core.SchemaId; +import org.openjdk.jmh.infra.Blackhole; + +import java.nio.ByteBuffer; + +public class ImprintCompetitor extends AbstractCompetitor { + + private ImprintRecord record; + private ImprintRecord record2; + private ByteBuffer serializedRecord; + private static final SchemaId SCHEMA_ID = new SchemaId(1, 1); + + public ImprintCompetitor() { + super("Imprint"); + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + this.record = buildRecord(testRecord); + this.record2 = buildRecord(testRecord2); + this.serializedRecord = record.serializeToBuffer(); + } + + private ImprintRecord buildRecord(DataGenerator.TestRecord pojo) { + var builder = ImprintRecord.builder(SCHEMA_ID); + builder.field(1, pojo.id); + builder.field(2, pojo.timestamp); + builder.field(3, pojo.flags); + builder.field(4, pojo.active); + builder.field(5, pojo.value); + builder.field(6, pojo.data); + builder.field(7, pojo.tags); + builder.field(8, pojo.metadata); + try { + return builder.build(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void serialize(Blackhole bh) { + bh.consume(buildRecord(this.testData).serializeToBuffer()); + } + + @Override + public void deserialize(Blackhole bh) { + try { + bh.consume(ImprintRecord.deserialize(serializedRecord)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void projectAndSerialize(Blackhole bh) { + var projected = ImprintOperations.project(record, 1, 2, 7); + bh.consume(projected.serializeToBuffer()); + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + try { + var merged = ImprintOperations.merge(record, record2); + bh.consume(merged.serializeToBuffer()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java new file mode 100644 index 0000000..813f2fc --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java @@ -0,0 +1,79 @@ +package com.imprint.benchmark.competitors; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.imprint.benchmark.DataGenerator; +import org.openjdk.jmh.infra.Blackhole; + +public class JacksonJsonCompetitor extends AbstractCompetitor { + + private final ObjectMapper mapper; + private byte[] serializedRecord; + private byte[] serializedRecord2; + + public JacksonJsonCompetitor() { + super("Jackson-JSON"); + this.mapper = new ObjectMapper(); + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + try { + this.serializedRecord = mapper.writeValueAsBytes(testRecord); + this.serializedRecord2 = mapper.writeValueAsBytes(testRecord2); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void serialize(Blackhole bh) { + try { + bh.consume(mapper.writeValueAsBytes(this.testData)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void deserialize(Blackhole bh) { + try { + bh.consume(mapper.readValue(serializedRecord, DataGenerator.TestRecord.class)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void projectAndSerialize(Blackhole bh) { + // Simulate by creating the projected object and serializing it + var projected = new DataGenerator.ProjectedRecord(); + projected.id = this.testData.id; + projected.timestamp = this.testData.timestamp; + projected.tags = this.testData.tags.subList(0, 5); + try { + bh.consume(mapper.writeValueAsBytes(projected)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + // Simulate by creating a new merged object and serializing it + var merged = new DataGenerator.TestRecord(); + merged.id = this.testData.id; + merged.timestamp = System.currentTimeMillis(); // new value + merged.flags = this.testData.flags; + merged.active = false; // new value + merged.value = this.testData.value; + merged.data = this.testData.data; + merged.tags = this.testData2.tags; + merged.metadata = this.testData2.metadata; + try { + bh.consume(mapper.writeValueAsBytes(merged)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java new file mode 100644 index 0000000..ade6f46 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java @@ -0,0 +1,91 @@ +package com.imprint.benchmark.competitors; + +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; +import com.imprint.benchmark.DataGenerator; +import org.openjdk.jmh.infra.Blackhole; + +import java.io.ByteArrayOutputStream; + +public class KryoCompetitor extends AbstractCompetitor { + + private final Kryo kryo; + private byte[] serializedRecord; + + public KryoCompetitor() { + super("Kryo"); + this.kryo = new Kryo(); + this.kryo.register(DataGenerator.TestRecord.class); + this.kryo.register(DataGenerator.ProjectedRecord.class); + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output output = new Output(baos)) { + kryo.writeObject(output, testRecord); + this.serializedRecord = baos.toByteArray(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void serialize(Blackhole bh) { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output output = new Output(baos)) { + kryo.writeObject(output, this.testData); + bh.consume(baos.toByteArray()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void deserialize(Blackhole bh) { + try (Input input = new Input(serializedRecord)) { + bh.consume(kryo.readObject(input, DataGenerator.TestRecord.class)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void projectAndSerialize(Blackhole bh) { + var projected = new DataGenerator.ProjectedRecord(); + projected.id = this.testData.id; + projected.timestamp = this.testData.timestamp; + projected.tags = this.testData.tags.subList(0, 5); + + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output output = new Output(baos)) { + kryo.writeObject(output, projected); + bh.consume(baos.toByteArray()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + var merged = new DataGenerator.TestRecord(); + merged.id = this.testData.id; + merged.timestamp = System.currentTimeMillis(); + merged.flags = this.testData.flags; + merged.active = false; + merged.value = this.testData.value; + merged.data = this.testData.data; + merged.tags = this.testData2.tags; + merged.metadata = this.testData2.metadata; + + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output output = new Output(baos)) { + kryo.writeObject(output, merged); + bh.consume(baos.toByteArray()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java new file mode 100644 index 0000000..a8d6744 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java @@ -0,0 +1,78 @@ +package com.imprint.benchmark.competitors; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.imprint.benchmark.DataGenerator; +import org.msgpack.jackson.dataformat.MessagePackFactory; +import org.openjdk.jmh.infra.Blackhole; + +public class MessagePackCompetitor extends AbstractCompetitor { + + private final ObjectMapper mapper; + private byte[] serializedRecord; + private byte[] serializedRecord2; + + public MessagePackCompetitor() { + super("MessagePack"); + this.mapper = new ObjectMapper(new MessagePackFactory()); + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + try { + this.serializedRecord = mapper.writeValueAsBytes(testRecord); + this.serializedRecord2 = mapper.writeValueAsBytes(testRecord2); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void serialize(Blackhole bh) { + try { + bh.consume(mapper.writeValueAsBytes(this.testData)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void deserialize(Blackhole bh) { + try { + bh.consume(mapper.readValue(serializedRecord, DataGenerator.TestRecord.class)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void projectAndSerialize(Blackhole bh) { + var projected = new DataGenerator.ProjectedRecord(); + projected.id = this.testData.id; + projected.timestamp = this.testData.timestamp; + projected.tags = this.testData.tags.subList(0, 5); + try { + bh.consume(mapper.writeValueAsBytes(projected)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + var merged = new DataGenerator.TestRecord(); + merged.id = this.testData.id; + merged.timestamp = System.currentTimeMillis(); + merged.flags = this.testData.flags; + merged.active = false; + merged.value = this.testData.value; + merged.data = this.testData.data; + merged.tags = this.testData2.tags; + merged.metadata = this.testData2.metadata; + try { + bh.consume(mapper.writeValueAsBytes(merged)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java new file mode 100644 index 0000000..61c1909 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java @@ -0,0 +1,69 @@ +package com.imprint.benchmark.competitors; + +import com.imprint.benchmark.DataGenerator; +import com.imprint.benchmark.proto.TestRecordOuterClass; +import org.openjdk.jmh.infra.Blackhole; + +public class ProtobufCompetitor extends AbstractCompetitor { + + private byte[] serializedRecord; + + public ProtobufCompetitor() { + super("Protobuf"); + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + this.serializedRecord = buildRecord(testRecord).toByteArray(); + } + + private TestRecordOuterClass.TestRecord buildRecord(DataGenerator.TestRecord pojo) { + return TestRecordOuterClass.TestRecord.newBuilder() + .setId(pojo.id) + .setTimestamp(pojo.timestamp) + .setFlags(pojo.flags) + .setActive(pojo.active) + .setValue(pojo.value) + .setData(com.google.protobuf.ByteString.copyFrom(pojo.data)) + .addAllTags(pojo.tags) + .putAllMetadata(pojo.metadata) + .build(); + } + + @Override + public void serialize(Blackhole bh) { + bh.consume(buildRecord(this.testData).toByteArray()); + } + + @Override + public void deserialize(Blackhole bh) { + try { + bh.consume(TestRecordOuterClass.TestRecord.parseFrom(serializedRecord)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void projectAndSerialize(Blackhole bh) { + // Projection with Protobuf can be done by building a new message with a subset of fields. + // There isn't a direct "project" operation on a parsed message. + TestRecordOuterClass.TestRecord projected = TestRecordOuterClass.TestRecord.newBuilder() + .setId(this.testData.id) + .setTimestamp(this.testData.timestamp) + .addAllTags(this.testData.tags.subList(0, 5)) + .build(); + bh.consume(projected.toByteArray()); + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + // Protobuf's `mergeFrom` is a natural fit here. + var record1 = buildRecord(this.testData); + var record2 = buildRecord(this.testData2); + + var merged = record1.toBuilder().mergeFrom(record2).build(); + bh.consume(merged.toByteArray()); + } +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java new file mode 100644 index 0000000..a2dfb93 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java @@ -0,0 +1,117 @@ +package com.imprint.benchmark.competitors; + +import com.imprint.benchmark.DataGenerator; +import com.imprint.benchmark.thrift.ProjectedRecord; +import com.imprint.benchmark.thrift.TestRecord; +import org.apache.thrift.TDeserializer; +import org.apache.thrift.TException; +import org.apache.thrift.TSerializer; +import org.apache.thrift.protocol.TBinaryProtocol; +import org.openjdk.jmh.infra.Blackhole; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.util.stream.Collectors; + +public class ThriftCompetitor extends AbstractCompetitor { + + private final TSerializer serializer; + private final TDeserializer deserializer; + private final TestRecord thriftRecord; + + public ThriftCompetitor() { + super("Thrift"); + try { + this.serializer = new TSerializer(new TBinaryProtocol.Factory()); + this.deserializer = new TDeserializer(new TBinaryProtocol.Factory()); + this.thriftRecord = new TestRecord(); + } catch (Exception e) { + throw new RuntimeException("Failed to initialize Thrift competitor", e); + } + } + + @Override + public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { + super.setup(testRecord, testRecord2); + try { + var record = buildThriftRecord(testRecord); + this.serializedRecord = serializer.serialize(record); + } catch (TException e) { + throw new RuntimeException(e); + } + } + + private TestRecord buildThriftRecord(DataGenerator.TestRecord pojo) { + var record = new TestRecord(); + record.setId(pojo.id); + record.setTimestamp(pojo.timestamp); + record.setFlags(pojo.flags); + record.setActive(pojo.active); + record.setValue(pojo.value); + record.setData(ByteBuffer.wrap(pojo.data)); + record.setTags(pojo.tags); + record.setMetadata(pojo.metadata); + return record; + } + + @Override + public void serialize(Blackhole bh) { + try { + bh.consume(serializer.serialize(buildThriftRecord(this.testData))); + } catch (TException e) { + throw new RuntimeException(e); + } + } + + @Override + public void deserialize(Blackhole bh) { + try { + var record = new TestRecord(); + deserializer.deserialize(record, this.serializedRecord); + bh.consume(record); + } catch (TException e) { + throw new RuntimeException(e); + } + } + + @Override + public void projectAndSerialize(Blackhole bh) { + try { + var projected = new ProjectedRecord(); + projected.setId(this.testData.id); + projected.setTimestamp(this.testData.timestamp); + projected.setTags(this.testData.tags.stream().limit(5).collect(Collectors.toList())); + bh.consume(serializer.serialize(projected)); + } catch (TException e) { + throw new RuntimeException(e); + } + } + + @Override + public void mergeAndSerialize(Blackhole bh) { + try { + var r1 = buildThriftRecord(this.testData); + var r2 = buildThriftRecord(this.testData2); + + var merged = new TestRecord(); + merged.setId(r1.id); + merged.setTimestamp(System.currentTimeMillis()); + merged.setFlags(r1.flags | r2.flags); + merged.setActive(false); + merged.setValue((r1.value + r2.value) / 2); + merged.setData(r1.data); // Keep r1's data + merged.setTags(r1.tags); + r2.tags.forEach(t -> { + if (!merged.tags.contains(t)) { + merged.tags.add(t); + } + }); + merged.setMetadata(r1.metadata); + r2.metadata.forEach(merged.metadata::putIfAbsent); + + bh.consume(serializer.serialize(merged)); + } catch (TException e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/src/jmh/proto/test_record.proto b/src/jmh/proto/test_record.proto index 7a76f71..1187260 100644 --- a/src/jmh/proto/test_record.proto +++ b/src/jmh/proto/test_record.proto @@ -1,15 +1,18 @@ syntax = "proto3"; -option java_package = "com.imprint.benchmark"; -option java_outer_classname = "TestRecordProto"; +package com.imprint.benchmark.proto; + +option java_package = "com.imprint.benchmark.proto"; +option java_outer_classname = "TestRecordOuterClass"; +option java_multiple_files = false; message TestRecord { - int32 id = 1; - string name = 2; - double price = 3; + string id = 1; + int64 timestamp = 2; + int32 flags = 3; bool active = 4; - string category = 5; - repeated string tags = 6; - map metadata = 7; - repeated string extra_data = 8; + double value = 5; + bytes data = 6; + repeated int32 tags = 7; + map metadata = 8; } \ No newline at end of file diff --git a/src/jmh/thrift/test_record.thrift b/src/jmh/thrift/test_record.thrift new file mode 100644 index 0000000..8af2939 --- /dev/null +++ b/src/jmh/thrift/test_record.thrift @@ -0,0 +1,18 @@ +namespace java com.imprint.benchmark.thrift + +struct TestRecord { + 1: required string id; + 2: required i64 timestamp; + 3: required i32 flags; + 4: required bool active; + 5: required double value; + 6: required binary data; + 7: required list tags; + 8: required map metadata; +} + +struct ProjectedRecord { + 1: required string id; + 2: required i64 timestamp; + 3: required list tags; +} \ No newline at end of file From 8a5de0c0c22eca4be6493739d34da6550abf7f19 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Mon, 9 Jun 2025 01:08:59 -0400 Subject: [PATCH 40/53] Add single-field access test --- .../benchmark/ComparisonBenchmark.java | 11 ++-- .../com/imprint/benchmark/Competitor.java | 12 ++++ .../competitors/AbstractCompetitor.java | 6 ++ .../benchmark/competitors/AvroCompetitor.java | 11 ++++ .../benchmark/competitors/Competitor.java | 1 + .../competitors/FlatBuffersCompetitor.java | 8 ++- .../competitors/ImprintCompetitor.java | 66 +++++++++++-------- .../competitors/JacksonJsonCompetitor.java | 10 +++ .../benchmark/competitors/KryoCompetitor.java | 10 +++ .../competitors/MessagePackCompetitor.java | 10 +++ .../competitors/ProtobufCompetitor.java | 9 +++ .../competitors/ThriftCompetitor.java | 11 ++++ 12 files changed, 131 insertions(+), 34 deletions(-) create mode 100644 src/jmh/java/com/imprint/benchmark/Competitor.java diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index 7000fca..0f99a05 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -41,17 +41,13 @@ public class ComparisonBenchmark { new MessagePackCompetitor() ); - @Param({"Imprint"}) + @Param({"Imprint", "Jackson-JSON", "Protobuf", "FlatBuffers", "Avro-Generic", "Thrift", "Kryo", "MessagePack"}) public String competitorName; private Competitor competitor; private DataGenerator.TestRecord testRecord1; private DataGenerator.TestRecord testRecord2; - public static List competitorName() { - return COMPETITORS.stream().map(Competitor::name).collect(Collectors.toList()); - } - @Setup(Level.Trial) public void setup() { // Find the competitor implementation @@ -88,6 +84,11 @@ public void mergeAndSerialize(Blackhole bh) { competitor.mergeAndSerialize(bh); } + @Benchmark + public void accessField(Blackhole bh) { + competitor.accessField(bh); + } + public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() .include(ComparisonBenchmark.class.getSimpleName()) diff --git a/src/jmh/java/com/imprint/benchmark/Competitor.java b/src/jmh/java/com/imprint/benchmark/Competitor.java new file mode 100644 index 0000000..5f92929 --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/Competitor.java @@ -0,0 +1,12 @@ +package com.imprint.benchmark; + +import org.openjdk.jmh.infra.Blackhole; + +public interface Competitor { + String name(); + void setup(); + void serialize(Blackhole bh); + void deserialize(Blackhole bh); + void projectAndSerialize(Blackhole bh); + void mergeAndSerialize(Blackhole bh); +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java index 6814681..d92d3af 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java @@ -1,6 +1,7 @@ package com.imprint.benchmark.competitors; import com.imprint.benchmark.DataGenerator; +import org.openjdk.jmh.infra.Blackhole; /** * A minimal base class for competitors, holding the test data. @@ -26,4 +27,9 @@ public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord this.testData = testRecord; this.testData2 = testRecord2; } + + @Override + public void accessField(Blackhole bh) { + // Default implementation is a no-op + } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java index 76f832a..00d2969 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java @@ -153,4 +153,15 @@ private byte[] buildBytes(GenericRecord record) { throw new RuntimeException(e); } } + + @Override + public void accessField(Blackhole bh) { + try { + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serializedRecord, null); + GenericRecord record = reader.read(null, decoder); + bh.consume(record.get("timestamp")); + } catch (Exception e) { + throw new RuntimeException(e); + } + } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/Competitor.java b/src/jmh/java/com/imprint/benchmark/competitors/Competitor.java index 55a5b50..717bbfc 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/Competitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/Competitor.java @@ -13,4 +13,5 @@ public interface Competitor { void deserialize(Blackhole bh); void projectAndSerialize(Blackhole bh); void mergeAndSerialize(Blackhole bh); + void accessField(Blackhole bh); } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java index e7f2b13..3af4a4b 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java @@ -66,8 +66,7 @@ public void deserialize(Blackhole bh) { @Override public void projectAndSerialize(Blackhole bh) { - // FlatBuffers excels here. No need to re-serialize. We "project" by reading. - // But to keep the benchmark fair ("project AND serialize"), we build a new buffer. + FlatBufferBuilder builder = new FlatBufferBuilder(256); var original = TestRecord.getRootAsTestRecord(serializedRecord); @@ -134,4 +133,9 @@ public void mergeAndSerialize(Blackhole bh) { builder.finish(recordOffset); bh.consume(builder.dataBuffer()); } + + @Override + public void accessField(Blackhole bh) { + bh.consume(TestRecord.getRootAsTestRecord(serializedRecord).timestamp()); + } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java index 651becb..2bd4c49 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java @@ -4,15 +4,15 @@ import com.imprint.core.ImprintOperations; import com.imprint.core.ImprintRecord; import com.imprint.core.SchemaId; +import com.imprint.error.ImprintException; import org.openjdk.jmh.infra.Blackhole; import java.nio.ByteBuffer; public class ImprintCompetitor extends AbstractCompetitor { - private ImprintRecord record; - private ImprintRecord record2; - private ByteBuffer serializedRecord; + private ImprintRecord imprintRecord1; + private ImprintRecord imprintRecord2; private static final SchemaId SCHEMA_ID = new SchemaId(1, 1); public ImprintCompetitor() { @@ -22,54 +22,66 @@ public ImprintCompetitor() { @Override public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { super.setup(testRecord, testRecord2); - this.record = buildRecord(testRecord); - this.record2 = buildRecord(testRecord2); - this.serializedRecord = record.serializeToBuffer(); - } - - private ImprintRecord buildRecord(DataGenerator.TestRecord pojo) { - var builder = ImprintRecord.builder(SCHEMA_ID); - builder.field(1, pojo.id); - builder.field(2, pojo.timestamp); - builder.field(3, pojo.flags); - builder.field(4, pojo.active); - builder.field(5, pojo.value); - builder.field(6, pojo.data); - builder.field(7, pojo.tags); - builder.field(8, pojo.metadata); try { - return builder.build(); - } catch (Exception e) { + this.imprintRecord1 = buildRecord(testRecord); + this.imprintRecord2 = buildRecord(testRecord2); + this.serializedRecord = imprintRecord1.serializeToBuffer().array(); + } catch (ImprintException e) { throw new RuntimeException(e); } } + private ImprintRecord buildRecord(DataGenerator.TestRecord pojo) throws ImprintException { + var builder = ImprintRecord.builder(SCHEMA_ID); + builder.field(0, pojo.id); + builder.field(1, pojo.timestamp); + builder.field(2, pojo.flags); + builder.field(3, pojo.active); + builder.field(4, pojo.value); + builder.field(5, pojo.data); + builder.field(6, pojo.tags); + builder.field(7, pojo.metadata); + return builder.build(); + } + @Override public void serialize(Blackhole bh) { - bh.consume(buildRecord(this.testData).serializeToBuffer()); + try { + bh.consume(buildRecord(this.testData).serializeToBuffer()); + } catch (ImprintException e) { + throw new RuntimeException(e); + } } @Override public void deserialize(Blackhole bh) { try { - bh.consume(ImprintRecord.deserialize(serializedRecord)); - } catch (Exception e) { + bh.consume(ImprintRecord.deserialize(this.serializedRecord)); + } catch (ImprintException e) { throw new RuntimeException(e); } } @Override public void projectAndSerialize(Blackhole bh) { - var projected = ImprintOperations.project(record, 1, 2, 7); - bh.consume(projected.serializeToBuffer()); + bh.consume(imprintRecord1.project(0, 1, 6).serializeToBuffer()); } @Override public void mergeAndSerialize(Blackhole bh) { try { - var merged = ImprintOperations.merge(record, record2); + var merged = ImprintOperations.merge(this.imprintRecord1, this.imprintRecord2); bh.consume(merged.serializeToBuffer()); - } catch (Exception e) { + } catch (ImprintException e) { + throw new RuntimeException(e); + } + } + + @Override + public void accessField(Blackhole bh) { + try { + bh.consume(imprintRecord1.getInt64(1)); // Access timestamp by field ID + } catch (ImprintException e) { throw new RuntimeException(e); } } diff --git a/src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java index 813f2fc..7fd9cef 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java @@ -76,4 +76,14 @@ public void mergeAndSerialize(Blackhole bh) { throw new RuntimeException(e); } } + + @Override + public void accessField(Blackhole bh) { + try { + var map = mapper.readValue(serializedRecord, java.util.Map.class); + bh.consume(map.get("timestamp")); + } catch (Exception e) { + throw new RuntimeException(e); + } + } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java index ade6f46..77190d7 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java @@ -88,4 +88,14 @@ public void mergeAndSerialize(Blackhole bh) { throw new RuntimeException(e); } } + + @Override + public void accessField(Blackhole bh) { + try (Input input = new Input(serializedRecord)) { + DataGenerator.TestRecord record = kryo.readObject(input, DataGenerator.TestRecord.class); + bh.consume(record.timestamp); + } catch (Exception e) { + throw new RuntimeException(e); + } + } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java index a8d6744..53955a1 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java @@ -75,4 +75,14 @@ public void mergeAndSerialize(Blackhole bh) { throw new RuntimeException(e); } } + + @Override + public void accessField(Blackhole bh) { + try { + var map = mapper.readValue(serializedRecord, java.util.Map.class); + bh.consume(map.get("timestamp")); + } catch (Exception e) { + throw new RuntimeException(e); + } + } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java index 61c1909..f45aa64 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java @@ -66,4 +66,13 @@ public void mergeAndSerialize(Blackhole bh) { var merged = record1.toBuilder().mergeFrom(record2).build(); bh.consume(merged.toByteArray()); } + + @Override + public void accessField(Blackhole bh) { + try { + bh.consume(TestRecordOuterClass.TestRecord.parseFrom(serializedRecord).getTimestamp()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java index a2dfb93..459de42 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java @@ -114,4 +114,15 @@ public void mergeAndSerialize(Blackhole bh) { throw new RuntimeException(e); } } + + @Override + public void accessField(Blackhole bh) { + try { + var record = new TestRecord(); + deserializer.deserialize(record, this.serializedRecord); + bh.consume(record.getTimestamp()); + } catch (TException e) { + throw new RuntimeException(e); + } + } } \ No newline at end of file From 0765599947daa18d2e6244b9d43ba2c03c156652 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Mon, 9 Jun 2025 03:18:50 -0400 Subject: [PATCH 41/53] correct benchmark methodology for fairness --- .../benchmark/competitors/AvroCompetitor.java | 62 +++++++++------- .../competitors/FlatBuffersCompetitor.java | 38 +++++----- .../competitors/ImprintCompetitor.java | 31 +++++--- .../competitors/JacksonJsonCompetitor.java | 37 ++++++---- .../benchmark/competitors/KryoCompetitor.java | 71 ++++++++++++------- .../competitors/MessagePackCompetitor.java | 34 +++++---- .../competitors/ProtobufCompetitor.java | 45 +++++++----- .../competitors/ThriftCompetitor.java | 28 +++++--- 8 files changed, 217 insertions(+), 129 deletions(-) diff --git a/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java index 00d2969..f7322ea 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java @@ -20,7 +20,8 @@ public class AvroCompetitor extends AbstractCompetitor { private final DatumWriter writer; private final DatumReader reader; private final DatumWriter projectedWriter; - private byte[] serializedRecord; + private byte[] serializedRecord1; + private byte[] serializedRecord2; public AvroCompetitor() { super("Avro-Generic"); @@ -50,7 +51,8 @@ public AvroCompetitor() { @Override public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { super.setup(testRecord, testRecord2); - this.serializedRecord = buildRecord(testRecord); + this.serializedRecord1 = buildRecord(testRecord); + this.serializedRecord2 = buildRecord(testRecord2); } private byte[] buildRecord(DataGenerator.TestRecord pojo) { @@ -82,7 +84,7 @@ public void serialize(Blackhole bh) { @Override public void deserialize(Blackhole bh) { try { - BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serializedRecord, null); + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serializedRecord1, null); bh.consume(reader.read(null, decoder)); } catch (Exception e) { throw new RuntimeException(e); @@ -91,13 +93,17 @@ public void deserialize(Blackhole bh) { @Override public void projectAndSerialize(Blackhole bh) { - // With generic records, we can project by building a new record with the projected schema - GenericRecord projected = new GenericData.Record(projectedSchema); - projected.put("id", this.testData.id); - projected.put("timestamp", this.testData.timestamp); - projected.put("tags", this.testData.tags.stream().limit(5).collect(Collectors.toList())); - try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { + // Full round trip: deserialize, project to a new object, re-serialize + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serializedRecord1, null); + GenericRecord original = reader.read(null, decoder); + + // With generic records, we can project by building a new record with the projected schema + GenericRecord projected = new GenericData.Record(projectedSchema); + projected.put("id", original.get("id")); + projected.put("timestamp", original.get("timestamp")); + projected.put("tags", ((java.util.List)original.get("tags")).subList(0, 5)); + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null); projectedWriter.write(projected, encoder); encoder.flush(); @@ -110,23 +116,20 @@ public void projectAndSerialize(Blackhole bh) { @Override public void mergeAndSerialize(Blackhole bh) { // No direct merge in Avro. Must deserialize, merge manually, and re-serialize. - GenericRecord r1 = (GenericRecord) buildAvroRecord(this.testData); - GenericRecord r2 = (GenericRecord) buildAvroRecord(this.testData2); + GenericRecord r1 = buildAvroRecordFromBytes(this.serializedRecord1); + GenericRecord r2 = buildAvroRecordFromBytes(this.serializedRecord2); GenericRecord merged = new GenericData.Record(schema); - for (Schema.Field field : schema.getFields()) { - Object val = r1.get(field.name()); - if (field.name().equals("timestamp")) { - val = System.currentTimeMillis(); - } else if(field.name().equals("active")) { - val = false; - } else if (r2.hasField(field.name()) && r2.get(field.name()) != null) { - if(!r1.hasField(field.name()) || r1.get(field.name()) == null){ - val = r2.get(field.name()); - } - } - merged.put(field.name(), val); - } + // Simplified merge logic: take most fields from r1, some from r2 + merged.put("id", r1.get("id")); + merged.put("timestamp", System.currentTimeMillis()); + merged.put("flags", r1.get("flags")); + merged.put("active", false); + merged.put("value", r1.get("value")); + merged.put("data", r1.get("data")); + merged.put("tags", r2.get("tags")); + merged.put("metadata", r2.get("metadata")); + bh.consume(buildBytes(merged)); } @@ -143,6 +146,15 @@ private GenericRecord buildAvroRecord(DataGenerator.TestRecord pojo) { return record; } + private GenericRecord buildAvroRecordFromBytes(byte[] bytes) { + try { + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(bytes, null); + return reader.read(null, decoder); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + private byte[] buildBytes(GenericRecord record) { try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null); @@ -157,7 +169,7 @@ private byte[] buildBytes(GenericRecord record) { @Override public void accessField(Blackhole bh) { try { - BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serializedRecord, null); + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serializedRecord1, null); GenericRecord record = reader.read(null, decoder); bh.consume(record.get("timestamp")); } catch (Exception e) { diff --git a/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java index 3af4a4b..bd51eb9 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java @@ -10,7 +10,8 @@ public class FlatBuffersCompetitor extends AbstractCompetitor { - private ByteBuffer serializedRecord; + private ByteBuffer serializedRecord1; + private ByteBuffer serializedRecord2; public FlatBuffersCompetitor() { super("FlatBuffers"); @@ -19,7 +20,8 @@ public FlatBuffersCompetitor() { @Override public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { super.setup(testRecord, testRecord2); - this.serializedRecord = buildRecord(testRecord); + this.serializedRecord1 = buildRecord(testRecord); + this.serializedRecord2 = buildRecord(testRecord2); } private ByteBuffer buildRecord(DataGenerator.TestRecord pojo) { @@ -61,16 +63,18 @@ public void serialize(Blackhole bh) { @Override public void deserialize(Blackhole bh) { - bh.consume(TestRecord.getRootAsTestRecord(serializedRecord)); + bh.consume(TestRecord.getRootAsTestRecord(serializedRecord1)); } @Override public void projectAndSerialize(Blackhole bh) { FlatBufferBuilder builder = new FlatBufferBuilder(256); - var original = TestRecord.getRootAsTestRecord(serializedRecord); + var original = TestRecord.getRootAsTestRecord(serializedRecord1); int idOffset = builder.createString(original.id()); + + // Manual sublist int[] tagsOffsets = new int[5]; for (int i = 0; i < 5; i++) { tagsOffsets[i] = original.tags(i); @@ -90,29 +94,31 @@ public void projectAndSerialize(Blackhole bh) { @Override public void mergeAndSerialize(Blackhole bh) { // No direct merge operation. Must read both, build a new one. - var r1 = TestRecord.getRootAsTestRecord(serializedRecord); - // For simplicity, we don't build and serialize record2. - // We'll just merge fields from r1 into a new record. + var r1 = TestRecord.getRootAsTestRecord(serializedRecord1); + var r2 = TestRecord.getRootAsTestRecord(serializedRecord2); + FlatBufferBuilder builder = new FlatBufferBuilder(1024); int idOffset = builder.createString(r1.id()); // Correctly read and rebuild the tags vector - int[] tagsArray = new int[r1.tagsLength()]; - for (int i = 0; i < r1.tagsLength(); i++) { - tagsArray[i] = r1.tags(i); + // For this benchmark, we'll just take tags from the second record + int[] tagsArray = new int[r2.tagsLength()]; + for (int i = 0; i < r2.tagsLength(); i++) { + tagsArray[i] = r2.tags(i); } int tagsVectorOffset = TestRecord.createTagsVector(builder, tagsArray); - // Correctly read and rebuild the metadata vector (assuming simple list) - int[] metadataOffsets = new int[r1.metadataLength()]; - for (int i = 0; i < r1.metadataLength(); i++) { - metadataOffsets[i] = builder.createString(r1.metadata(i)); + // Correctly read and rebuild the metadata vector + // For this benchmark, we'll just take metadata from the second record + int[] metadataOffsets = new int[r2.metadataLength()]; + for (int i = 0; i < r2.metadataLength(); i++) { + metadataOffsets[i] = builder.createString(r2.metadata(i)); } int metadataVectorOffset = TestRecord.createMetadataVector(builder, metadataOffsets); - // Correctly read and rebuild the data vector + // Correctly read and rebuild the data vector from r1 ByteBuffer dataBuffer = r1.dataAsByteBuffer(); byte[] dataArray = new byte[dataBuffer.remaining()]; dataBuffer.get(dataArray); @@ -136,6 +142,6 @@ public void mergeAndSerialize(Blackhole bh) { @Override public void accessField(Blackhole bh) { - bh.consume(TestRecord.getRootAsTestRecord(serializedRecord).timestamp()); + bh.consume(TestRecord.getRootAsTestRecord(serializedRecord1).timestamp()); } } \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java index 2bd4c49..5f2781d 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java @@ -13,6 +13,8 @@ public class ImprintCompetitor extends AbstractCompetitor { private ImprintRecord imprintRecord1; private ImprintRecord imprintRecord2; + private byte[] serializedRecord1; + private byte[] serializedRecord2; private static final SchemaId SCHEMA_ID = new SchemaId(1, 1); public ImprintCompetitor() { @@ -25,7 +27,14 @@ public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord try { this.imprintRecord1 = buildRecord(testRecord); this.imprintRecord2 = buildRecord(testRecord2); - this.serializedRecord = imprintRecord1.serializeToBuffer().array(); + + ByteBuffer buf1 = this.imprintRecord1.serializeToBuffer(); + this.serializedRecord1 = new byte[buf1.remaining()]; + buf1.get(this.serializedRecord1); + + ByteBuffer buf2 = this.imprintRecord2.serializeToBuffer(); + this.serializedRecord2 = new byte[buf2.remaining()]; + buf2.get(this.serializedRecord2); } catch (ImprintException e) { throw new RuntimeException(e); } @@ -46,17 +55,13 @@ private ImprintRecord buildRecord(DataGenerator.TestRecord pojo) throws ImprintE @Override public void serialize(Blackhole bh) { - try { - bh.consume(buildRecord(this.testData).serializeToBuffer()); - } catch (ImprintException e) { - throw new RuntimeException(e); - } + bh.consume(this.imprintRecord1.serializeToBuffer()); } @Override public void deserialize(Blackhole bh) { try { - bh.consume(ImprintRecord.deserialize(this.serializedRecord)); + bh.consume(ImprintRecord.deserialize(this.serializedRecord1)); } catch (ImprintException e) { throw new RuntimeException(e); } @@ -64,13 +69,21 @@ public void deserialize(Blackhole bh) { @Override public void projectAndSerialize(Blackhole bh) { - bh.consume(imprintRecord1.project(0, 1, 6).serializeToBuffer()); + try { + ImprintRecord record = ImprintRecord.deserialize(this.serializedRecord1); + ImprintRecord projected = record.project(0, 1, 6); + bh.consume(projected.serializeToBuffer()); + } catch (ImprintException e) { + throw new RuntimeException(e); + } } @Override public void mergeAndSerialize(Blackhole bh) { try { - var merged = ImprintOperations.merge(this.imprintRecord1, this.imprintRecord2); + var r1 = ImprintRecord.deserialize(this.serializedRecord1); + var r2 = ImprintRecord.deserialize(this.serializedRecord2); + var merged = ImprintOperations.merge(r1, r2); bh.consume(merged.serializeToBuffer()); } catch (ImprintException e) { throw new RuntimeException(e); diff --git a/src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java index 7fd9cef..a32e9a8 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java @@ -46,12 +46,16 @@ public void deserialize(Blackhole bh) { @Override public void projectAndSerialize(Blackhole bh) { - // Simulate by creating the projected object and serializing it - var projected = new DataGenerator.ProjectedRecord(); - projected.id = this.testData.id; - projected.timestamp = this.testData.timestamp; - projected.tags = this.testData.tags.subList(0, 5); try { + // Full round trip: deserialize, project to a new object, re-serialize + var original = mapper.readValue(serializedRecord, DataGenerator.TestRecord.class); + + // Simulate by creating the projected object and serializing it + var projected = new DataGenerator.ProjectedRecord(); + projected.id = original.id; + projected.timestamp = original.timestamp; + projected.tags = original.tags.subList(0, 5); + bh.consume(mapper.writeValueAsBytes(projected)); } catch (Exception e) { throw new RuntimeException(e); @@ -60,17 +64,20 @@ public void projectAndSerialize(Blackhole bh) { @Override public void mergeAndSerialize(Blackhole bh) { - // Simulate by creating a new merged object and serializing it - var merged = new DataGenerator.TestRecord(); - merged.id = this.testData.id; - merged.timestamp = System.currentTimeMillis(); // new value - merged.flags = this.testData.flags; - merged.active = false; // new value - merged.value = this.testData.value; - merged.data = this.testData.data; - merged.tags = this.testData2.tags; - merged.metadata = this.testData2.metadata; try { + var r1 = mapper.readValue(serializedRecord, DataGenerator.TestRecord.class); + var r2 = mapper.readValue(serializedRecord2, DataGenerator.TestRecord.class); + // Simulate by creating a new merged object and serializing it + var merged = new DataGenerator.TestRecord(); + merged.id = r1.id; + merged.timestamp = System.currentTimeMillis(); // new value + merged.flags = r1.flags; + merged.active = false; // new value + merged.value = r1.value; + merged.data = r1.data; + merged.tags = r2.tags; + merged.metadata = r2.metadata; + bh.consume(mapper.writeValueAsBytes(merged)); } catch (Exception e) { throw new RuntimeException(e); diff --git a/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java index 77190d7..15ccc24 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java @@ -7,11 +7,13 @@ import org.openjdk.jmh.infra.Blackhole; import java.io.ByteArrayOutputStream; +import java.util.ArrayList; public class KryoCompetitor extends AbstractCompetitor { private final Kryo kryo; - private byte[] serializedRecord; + private byte[] serializedRecord1; + private byte[] serializedRecord2; public KryoCompetitor() { super("Kryo"); @@ -26,7 +28,10 @@ public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); Output output = new Output(baos)) { kryo.writeObject(output, testRecord); - this.serializedRecord = baos.toByteArray(); + this.serializedRecord1 = baos.toByteArray(); + baos.reset(); + kryo.writeObject(output, testRecord2); + this.serializedRecord2 = baos.toByteArray(); } catch (Exception e) { throw new RuntimeException(e); } @@ -45,7 +50,7 @@ public void serialize(Blackhole bh) { @Override public void deserialize(Blackhole bh) { - try (Input input = new Input(serializedRecord)) { + try (Input input = new Input(serializedRecord1)) { bh.consume(kryo.readObject(input, DataGenerator.TestRecord.class)); } catch (Exception e) { throw new RuntimeException(e); @@ -54,15 +59,21 @@ public void deserialize(Blackhole bh) { @Override public void projectAndSerialize(Blackhole bh) { - var projected = new DataGenerator.ProjectedRecord(); - projected.id = this.testData.id; - projected.timestamp = this.testData.timestamp; - projected.tags = this.testData.tags.subList(0, 5); + // Full round trip: deserialize, project to a new object, re-serialize + try (Input input = new Input(serializedRecord1)) { + DataGenerator.TestRecord original = kryo.readObject(input, DataGenerator.TestRecord.class); + + var projected = new DataGenerator.ProjectedRecord(); + projected.id = original.id; + projected.timestamp = original.timestamp; + projected.tags = new ArrayList<>(original.tags.subList(0, 5)); + + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output output = new Output(baos)) { + kryo.writeObject(output, projected); + bh.consume(baos.toByteArray()); + } - try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); - Output output = new Output(baos)) { - kryo.writeObject(output, projected); - bh.consume(baos.toByteArray()); } catch (Exception e) { throw new RuntimeException(e); } @@ -70,20 +81,30 @@ public void projectAndSerialize(Blackhole bh) { @Override public void mergeAndSerialize(Blackhole bh) { - var merged = new DataGenerator.TestRecord(); - merged.id = this.testData.id; - merged.timestamp = System.currentTimeMillis(); - merged.flags = this.testData.flags; - merged.active = false; - merged.value = this.testData.value; - merged.data = this.testData.data; - merged.tags = this.testData2.tags; - merged.metadata = this.testData2.metadata; + try { + DataGenerator.TestRecord r1, r2; + try (Input input = new Input(serializedRecord1)) { + r1 = kryo.readObject(input, DataGenerator.TestRecord.class); + } + try (Input input = new Input(serializedRecord2)) { + r2 = kryo.readObject(input, DataGenerator.TestRecord.class); + } - try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); - Output output = new Output(baos)) { - kryo.writeObject(output, merged); - bh.consume(baos.toByteArray()); + var merged = new DataGenerator.TestRecord(); + merged.id = r1.id; + merged.timestamp = System.currentTimeMillis(); + merged.flags = r1.flags; + merged.active = false; + merged.value = r1.value; + merged.data = r1.data; + merged.tags = r2.tags; + merged.metadata = r2.metadata; + + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Output output = new Output(baos)) { + kryo.writeObject(output, merged); + bh.consume(baos.toByteArray()); + } } catch (Exception e) { throw new RuntimeException(e); } @@ -91,7 +112,7 @@ public void mergeAndSerialize(Blackhole bh) { @Override public void accessField(Blackhole bh) { - try (Input input = new Input(serializedRecord)) { + try (Input input = new Input(serializedRecord1)) { DataGenerator.TestRecord record = kryo.readObject(input, DataGenerator.TestRecord.class); bh.consume(record.timestamp); } catch (Exception e) { diff --git a/src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java index 53955a1..65269e5 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java @@ -47,11 +47,15 @@ public void deserialize(Blackhole bh) { @Override public void projectAndSerialize(Blackhole bh) { - var projected = new DataGenerator.ProjectedRecord(); - projected.id = this.testData.id; - projected.timestamp = this.testData.timestamp; - projected.tags = this.testData.tags.subList(0, 5); try { + // Full round trip: deserialize, project to a new object, re-serialize + var original = mapper.readValue(serializedRecord, DataGenerator.TestRecord.class); + + var projected = new DataGenerator.ProjectedRecord(); + projected.id = original.id; + projected.timestamp = original.timestamp; + projected.tags = original.tags.subList(0, 5); + bh.consume(mapper.writeValueAsBytes(projected)); } catch (Exception e) { throw new RuntimeException(e); @@ -60,16 +64,20 @@ public void projectAndSerialize(Blackhole bh) { @Override public void mergeAndSerialize(Blackhole bh) { - var merged = new DataGenerator.TestRecord(); - merged.id = this.testData.id; - merged.timestamp = System.currentTimeMillis(); - merged.flags = this.testData.flags; - merged.active = false; - merged.value = this.testData.value; - merged.data = this.testData.data; - merged.tags = this.testData2.tags; - merged.metadata = this.testData2.metadata; try { + var r1 = mapper.readValue(serializedRecord, DataGenerator.TestRecord.class); + var r2 = mapper.readValue(serializedRecord2, DataGenerator.TestRecord.class); + + var merged = new DataGenerator.TestRecord(); + merged.id = r1.id; + merged.timestamp = System.currentTimeMillis(); + merged.flags = r1.flags; + merged.active = false; + merged.value = r1.value; + merged.data = r1.data; + merged.tags = r2.tags; + merged.metadata = r2.metadata; + bh.consume(mapper.writeValueAsBytes(merged)); } catch (Exception e) { throw new RuntimeException(e); diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java index f45aa64..547abfe 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java @@ -6,7 +6,8 @@ public class ProtobufCompetitor extends AbstractCompetitor { - private byte[] serializedRecord; + private byte[] serializedRecord1; + private byte[] serializedRecord2; public ProtobufCompetitor() { super("Protobuf"); @@ -15,7 +16,8 @@ public ProtobufCompetitor() { @Override public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { super.setup(testRecord, testRecord2); - this.serializedRecord = buildRecord(testRecord).toByteArray(); + this.serializedRecord1 = buildRecord(testRecord).toByteArray(); + this.serializedRecord2 = buildRecord(testRecord2).toByteArray(); } private TestRecordOuterClass.TestRecord buildRecord(DataGenerator.TestRecord pojo) { @@ -39,7 +41,7 @@ public void serialize(Blackhole bh) { @Override public void deserialize(Blackhole bh) { try { - bh.consume(TestRecordOuterClass.TestRecord.parseFrom(serializedRecord)); + bh.consume(TestRecordOuterClass.TestRecord.parseFrom(serializedRecord1)); } catch (Exception e) { throw new RuntimeException(e); } @@ -47,30 +49,39 @@ public void deserialize(Blackhole bh) { @Override public void projectAndSerialize(Blackhole bh) { - // Projection with Protobuf can be done by building a new message with a subset of fields. - // There isn't a direct "project" operation on a parsed message. - TestRecordOuterClass.TestRecord projected = TestRecordOuterClass.TestRecord.newBuilder() - .setId(this.testData.id) - .setTimestamp(this.testData.timestamp) - .addAllTags(this.testData.tags.subList(0, 5)) - .build(); - bh.consume(projected.toByteArray()); + try { + // Full round trip: deserialize, project to a new object, re-serialize + var original = TestRecordOuterClass.TestRecord.parseFrom(serializedRecord1); + + TestRecordOuterClass.TestRecord projected = TestRecordOuterClass.TestRecord.newBuilder() + .setId(original.getId()) + .setTimestamp(original.getTimestamp()) + .addAllTags(original.getTagsList().subList(0, 5)) + .build(); + bh.consume(projected.toByteArray()); + } catch (Exception e) { + throw new RuntimeException(e); + } } @Override public void mergeAndSerialize(Blackhole bh) { - // Protobuf's `mergeFrom` is a natural fit here. - var record1 = buildRecord(this.testData); - var record2 = buildRecord(this.testData2); + try { + // Protobuf's `mergeFrom` is a natural fit here. + var record1 = TestRecordOuterClass.TestRecord.parseFrom(serializedRecord1); + var record2 = TestRecordOuterClass.TestRecord.parseFrom(serializedRecord2); - var merged = record1.toBuilder().mergeFrom(record2).build(); - bh.consume(merged.toByteArray()); + var merged = record1.toBuilder().mergeFrom(record2).build(); + bh.consume(merged.toByteArray()); + } catch (Exception e) { + throw new RuntimeException(e); + } } @Override public void accessField(Blackhole bh) { try { - bh.consume(TestRecordOuterClass.TestRecord.parseFrom(serializedRecord).getTimestamp()); + bh.consume(TestRecordOuterClass.TestRecord.parseFrom(serializedRecord1).getTimestamp()); } catch (Exception e) { throw new RuntimeException(e); } diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java index 459de42..537eefa 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java @@ -18,6 +18,8 @@ public class ThriftCompetitor extends AbstractCompetitor { private final TSerializer serializer; private final TDeserializer deserializer; private final TestRecord thriftRecord; + private byte[] serializedRecord1; + private byte[] serializedRecord2; public ThriftCompetitor() { super("Thrift"); @@ -34,8 +36,10 @@ public ThriftCompetitor() { public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { super.setup(testRecord, testRecord2); try { - var record = buildThriftRecord(testRecord); - this.serializedRecord = serializer.serialize(record); + var record1 = buildThriftRecord(testRecord); + this.serializedRecord1 = serializer.serialize(record1); + var record2 = buildThriftRecord(testRecord2); + this.serializedRecord2 = serializer.serialize(record2); } catch (TException e) { throw new RuntimeException(e); } @@ -67,7 +71,7 @@ public void serialize(Blackhole bh) { public void deserialize(Blackhole bh) { try { var record = new TestRecord(); - deserializer.deserialize(record, this.serializedRecord); + deserializer.deserialize(record, this.serializedRecord1); bh.consume(record); } catch (TException e) { throw new RuntimeException(e); @@ -77,10 +81,14 @@ var record = new TestRecord(); @Override public void projectAndSerialize(Blackhole bh) { try { + // Full round trip: deserialize, project to a new object, re-serialize + var original = new TestRecord(); + deserializer.deserialize(original, this.serializedRecord1); + var projected = new ProjectedRecord(); - projected.setId(this.testData.id); - projected.setTimestamp(this.testData.timestamp); - projected.setTags(this.testData.tags.stream().limit(5).collect(Collectors.toList())); + projected.setId(original.getId()); + projected.setTimestamp(original.getTimestamp()); + projected.setTags(original.getTags().stream().limit(5).collect(Collectors.toList())); bh.consume(serializer.serialize(projected)); } catch (TException e) { throw new RuntimeException(e); @@ -90,8 +98,10 @@ public void projectAndSerialize(Blackhole bh) { @Override public void mergeAndSerialize(Blackhole bh) { try { - var r1 = buildThriftRecord(this.testData); - var r2 = buildThriftRecord(this.testData2); + var r1 = new TestRecord(); + deserializer.deserialize(r1, this.serializedRecord1); + var r2 = new TestRecord(); + deserializer.deserialize(r2, this.serializedRecord2); var merged = new TestRecord(); merged.setId(r1.id); @@ -119,7 +129,7 @@ public void mergeAndSerialize(Blackhole bh) { public void accessField(Blackhole bh) { try { var record = new TestRecord(); - deserializer.deserialize(record, this.serializedRecord); + deserializer.deserialize(record, this.serializedRecord1); bh.consume(record.getTimestamp()); } catch (TException e) { throw new RuntimeException(e); From adcd629c1504ed67ad7ee70001cf763405c50d2d Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Mon, 9 Jun 2025 13:12:01 -0400 Subject: [PATCH 42/53] micro-optiomize and attempt to make ComparisonBenchmark tests a little more fair --- build.gradle | 41 +- .../benchmark/ComparisonBenchmark.java | 25 +- .../competitors/AbstractCompetitor.java | 1 - .../benchmark/competitors/AvroCompetitor.java | 1 - .../competitors/ImprintCompetitor.java | 26 +- .../benchmark/competitors/KryoCompetitor.java | 66 +- .../competitors/ThriftCompetitor.java | 2 - src/jmh/sbe/test_record.xml | 61 ++ .../java/com/imprint/core/ImprintBuffers.java | 212 +++++-- .../com/imprint/core/ImprintOperations.java | 86 +-- .../java/com/imprint/core/ImprintRecord.java | 114 +++- .../imprint/core/ImprintRecordBuilder.java | 60 +- .../java/com/imprint/core/ImprintStream.java | 90 +-- src/main/java/com/imprint/types/TypeCode.java | 25 +- .../java/com/imprint/types/TypeHandler.java | 6 +- src/main/java/com/imprint/types/Value.java | 20 +- src/main/java/com/imprint/util/VarInt.java | 9 +- .../com/imprint/core/ImprintStreamTest.java | 78 +++ .../com/imprint/profile/ProfilerTest.java | 567 ++++++++++++------ 19 files changed, 1014 insertions(+), 476 deletions(-) create mode 100644 src/jmh/sbe/test_record.xml create mode 100644 src/test/java/com/imprint/core/ImprintStreamTest.java diff --git a/build.gradle b/build.gradle index 852be08..d3480e6 100644 --- a/build.gradle +++ b/build.gradle @@ -51,7 +51,15 @@ dependencies { jmhImplementation 'org.msgpack:msgpack-core:0.9.8' jmhImplementation 'org.msgpack:jackson-dataformat-msgpack:0.9.8' jmhImplementation 'org.apache.thrift:libthrift:0.19.0' - jmhImplementation 'javax.annotation:javax.annotation-api:1.3.2' + + // SBE for benchmarking + jmhImplementation 'uk.co.real-logic:sbe-all:1.35.3' + jmhImplementation 'io.aeron:aeron-client:1.41.2' // SBE has a dependency on Agrona, included in aeron-client + + // FastUtil for high-performance primitive collections + implementation 'it.unimi.dsi:fastutil:8.5.12' + // Required for generated Thrift code on JDK 11+ + implementation 'javax.annotation:javax.annotation-api:1.3.2' } protobuf { @@ -188,12 +196,41 @@ task generateJmhThrift(type: Exec) { } } +// Task for SBE code generation +task generateSbe(type: JavaExec) { + description = 'Generate Java classes from SBE schema' + group = 'build' + + def outputDir = file("${buildDir}/generated/sbe/java") + def schemaFile = file('src/jmh/sbe/schema.xml') + def sbeXsd = file('src/jmh/sbe/sbe.xsd') + + // Ensure the sbe-tool is on the classpath for this task + classpath = sourceSets.jmh.runtimeClasspath + + main = 'uk.co.real_logic.sbe.SbeTool' + systemProperties = [ + "sbe.output.dir": outputDir.absolutePath, + "sbe.validation.xsd": sbeXsd.absolutePath + ] + args = [ schemaFile.absolutePath ] + + inputs.file(schemaFile) + inputs.file(sbeXsd) + outputs.dir(outputDir) + + doFirst { + outputDir.mkdirs() + } +} + // Add generated FlatBuffers sources to JMH source set sourceSets { jmh { java { srcDir 'build/generated/source/flatbuffers/jmh/java' srcDir 'build/generated-src/thrift/jmh/java' + srcDir 'build/generated/sbe/java' } proto { srcDir 'src/jmh/proto' @@ -201,7 +238,7 @@ sourceSets { } } -// Make JMH compilation depend on FlatBuffers generation +// Make JMH compilation depend on generation tasks compileJmhJava.dependsOn generateFlatBuffers compileJmhJava.dependsOn generateJmhThrift diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index 0f99a05..f2c7398 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -1,15 +1,7 @@ package com.imprint.benchmark; -import com.imprint.benchmark.competitors.AbstractCompetitor; -import com.imprint.benchmark.competitors.AvroCompetitor; +import com.imprint.benchmark.competitors.*; import com.imprint.benchmark.competitors.Competitor; -import com.imprint.benchmark.competitors.FlatBuffersCompetitor; -import com.imprint.benchmark.competitors.ImprintCompetitor; -import com.imprint.benchmark.competitors.JacksonJsonCompetitor; -import com.imprint.benchmark.competitors.KryoCompetitor; -import com.imprint.benchmark.competitors.MessagePackCompetitor; -import com.imprint.benchmark.competitors.ProtobufCompetitor; -import com.imprint.benchmark.competitors.ThriftCompetitor; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.runner.Runner; @@ -20,13 +12,12 @@ import java.util.Arrays; import java.util.List; import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) -@BenchmarkMode(Mode.Throughput) -@OutputTimeUnit(TimeUnit.SECONDS) -@Warmup(iterations = 3, time = 5) -@Measurement(iterations = 5, time = 10) +@Warmup(iterations = 3, time = 1) +@Measurement(iterations = 10, time = 1) @Fork(value = 1, jvmArgs = {"-Xms4g", "-Xmx4g"}) public class ComparisonBenchmark { @@ -45,8 +36,6 @@ public class ComparisonBenchmark { public String competitorName; private Competitor competitor; - private DataGenerator.TestRecord testRecord1; - private DataGenerator.TestRecord testRecord2; @Setup(Level.Trial) public void setup() { @@ -57,8 +46,8 @@ public void setup() { .orElseThrow(() -> new IllegalStateException("Unknown competitor: " + competitorName)); // Create the test data - testRecord1 = DataGenerator.createTestRecord(); - testRecord2 = DataGenerator.createTestRecord(); + DataGenerator.TestRecord testRecord1 = DataGenerator.createTestRecord(); + DataGenerator.TestRecord testRecord2 = DataGenerator.createTestRecord(); // Setup the competitor with the data competitor.setup(testRecord1, testRecord2); diff --git a/src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java index d92d3af..bfdea2a 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java @@ -11,7 +11,6 @@ public abstract class AbstractCompetitor implements Competitor { protected final String name; protected DataGenerator.TestRecord testData; protected DataGenerator.TestRecord testData2; - protected byte[] serializedRecord; protected AbstractCompetitor(String name) { this.name = name; diff --git a/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java index f7322ea..71c8306 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java @@ -11,7 +11,6 @@ import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; -import java.util.stream.Collectors; public class AvroCompetitor extends AbstractCompetitor { diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java index 5f2781d..3e05cd7 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java @@ -3,8 +3,10 @@ import com.imprint.benchmark.DataGenerator; import com.imprint.core.ImprintOperations; import com.imprint.core.ImprintRecord; +import com.imprint.core.ImprintRecordBuilder; import com.imprint.core.SchemaId; import com.imprint.error.ImprintException; +import lombok.SneakyThrows; import org.openjdk.jmh.infra.Blackhole; import java.nio.ByteBuffer; @@ -12,7 +14,6 @@ public class ImprintCompetitor extends AbstractCompetitor { private ImprintRecord imprintRecord1; - private ImprintRecord imprintRecord2; private byte[] serializedRecord1; private byte[] serializedRecord2; private static final SchemaId SCHEMA_ID = new SchemaId(1, 1); @@ -26,13 +27,13 @@ public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord super.setup(testRecord, testRecord2); try { this.imprintRecord1 = buildRecord(testRecord); - this.imprintRecord2 = buildRecord(testRecord2); + ImprintRecord imprintRecord2 = buildRecord(testRecord2); ByteBuffer buf1 = this.imprintRecord1.serializeToBuffer(); this.serializedRecord1 = new byte[buf1.remaining()]; buf1.get(this.serializedRecord1); - ByteBuffer buf2 = this.imprintRecord2.serializeToBuffer(); + ByteBuffer buf2 = imprintRecord2.serializeToBuffer(); this.serializedRecord2 = new byte[buf2.remaining()]; buf2.get(this.serializedRecord2); } catch (ImprintException e) { @@ -53,9 +54,26 @@ private ImprintRecord buildRecord(DataGenerator.TestRecord pojo) throws ImprintE return builder.build(); } + private ImprintRecordBuilder preBuildRecord(DataGenerator.TestRecord pojo) throws ImprintException { + var builder = ImprintRecord.builder(SCHEMA_ID); + builder.field(0, pojo.id); + builder.field(1, pojo.timestamp); + builder.field(2, pojo.flags); + builder.field(3, pojo.active); + builder.field(4, pojo.value); + builder.field(5, pojo.data); + builder.field(6, pojo.tags); + builder.field(7, pojo.metadata); + return builder; + } + @Override public void serialize(Blackhole bh) { - bh.consume(this.imprintRecord1.serializeToBuffer()); + try { + bh.consume(buildRecord(DataGenerator.createTestRecord()).serializeToBuffer()); + } catch (ImprintException e) { + throw new RuntimeException(e); + } } @Override diff --git a/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java index 15ccc24..d76a937 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java @@ -8,6 +8,8 @@ import java.io.ByteArrayOutputStream; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; public class KryoCompetitor extends AbstractCompetitor { @@ -20,76 +22,97 @@ public KryoCompetitor() { this.kryo = new Kryo(); this.kryo.register(DataGenerator.TestRecord.class); this.kryo.register(DataGenerator.ProjectedRecord.class); + this.kryo.register(byte[].class); + kryo.register(ArrayList.class); + kryo.register(HashMap.class); + kryo.register(Arrays.asList().getClass()); } @Override public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { super.setup(testRecord, testRecord2); + + // Fix 1: Create fresh streams for each record + this.serializedRecord1 = serializeRecord(testRecord); + this.serializedRecord2 = serializeRecord(testRecord2); + } + + // Helper method to properly serialize a record + private byte[] serializeRecord(DataGenerator.TestRecord record) { try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); Output output = new Output(baos)) { - kryo.writeObject(output, testRecord); - this.serializedRecord1 = baos.toByteArray(); - baos.reset(); - kryo.writeObject(output, testRecord2); - this.serializedRecord2 = baos.toByteArray(); + kryo.writeObject(output, record); + output.flush(); // Important: flush before getting bytes + return baos.toByteArray(); } catch (Exception e) { - throw new RuntimeException(e); + throw new RuntimeException("Failed to serialize record", e); } } @Override public void serialize(Blackhole bh) { + // Fix 2: Create fresh output stream each time try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); Output output = new Output(baos)) { kryo.writeObject(output, this.testData); + output.flush(); // Ensure data is written bh.consume(baos.toByteArray()); } catch (Exception e) { - throw new RuntimeException(e); + throw new RuntimeException("Serialize failed", e); } } @Override public void deserialize(Blackhole bh) { + // Fix 3: Create fresh input each time try (Input input = new Input(serializedRecord1)) { bh.consume(kryo.readObject(input, DataGenerator.TestRecord.class)); } catch (Exception e) { - throw new RuntimeException(e); + throw new RuntimeException("Deserialize failed", e); } } @Override public void projectAndSerialize(Blackhole bh) { - // Full round trip: deserialize, project to a new object, re-serialize - try (Input input = new Input(serializedRecord1)) { - DataGenerator.TestRecord original = kryo.readObject(input, DataGenerator.TestRecord.class); + try { + // Step 1: Deserialize with fresh input + DataGenerator.TestRecord original; + try (Input input = new Input(serializedRecord1)) { + original = kryo.readObject(input, DataGenerator.TestRecord.class); + } + // Step 2: Create projected record var projected = new DataGenerator.ProjectedRecord(); projected.id = original.id; projected.timestamp = original.timestamp; - projected.tags = new ArrayList<>(original.tags.subList(0, 5)); + projected.tags = new ArrayList<>(original.tags.subList(0, Math.min(5, original.tags.size()))); + // Step 3: Serialize with fresh output try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); Output output = new Output(baos)) { kryo.writeObject(output, projected); + output.flush(); bh.consume(baos.toByteArray()); } } catch (Exception e) { - throw new RuntimeException(e); + throw new RuntimeException("ProjectAndSerialize failed", e); } } @Override public void mergeAndSerialize(Blackhole bh) { try { + // Step 1: Deserialize both records with fresh inputs DataGenerator.TestRecord r1, r2; - try (Input input = new Input(serializedRecord1)) { - r1 = kryo.readObject(input, DataGenerator.TestRecord.class); + try (Input input1 = new Input(serializedRecord1)) { + r1 = kryo.readObject(input1, DataGenerator.TestRecord.class); } - try (Input input = new Input(serializedRecord2)) { - r2 = kryo.readObject(input, DataGenerator.TestRecord.class); + try (Input input2 = new Input(serializedRecord2)) { + r2 = kryo.readObject(input2, DataGenerator.TestRecord.class); } + // Step 2: Create merged record var merged = new DataGenerator.TestRecord(); merged.id = r1.id; merged.timestamp = System.currentTimeMillis(); @@ -100,23 +123,26 @@ public void mergeAndSerialize(Blackhole bh) { merged.tags = r2.tags; merged.metadata = r2.metadata; + // Step 3: Serialize with fresh output try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); Output output = new Output(baos)) { kryo.writeObject(output, merged); + output.flush(); bh.consume(baos.toByteArray()); } } catch (Exception e) { - throw new RuntimeException(e); + throw new RuntimeException("MergeAndSerialize failed", e); } } @Override public void accessField(Blackhole bh) { + // Fix 4: Create fresh input for each access try (Input input = new Input(serializedRecord1)) { DataGenerator.TestRecord record = kryo.readObject(input, DataGenerator.TestRecord.class); bh.consume(record.timestamp); } catch (Exception e) { - throw new RuntimeException(e); + throw new RuntimeException("AccessField failed", e); } } -} \ No newline at end of file +} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java index 537eefa..18530b5 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java @@ -17,7 +17,6 @@ public class ThriftCompetitor extends AbstractCompetitor { private final TSerializer serializer; private final TDeserializer deserializer; - private final TestRecord thriftRecord; private byte[] serializedRecord1; private byte[] serializedRecord2; @@ -26,7 +25,6 @@ public ThriftCompetitor() { try { this.serializer = new TSerializer(new TBinaryProtocol.Factory()); this.deserializer = new TDeserializer(new TBinaryProtocol.Factory()); - this.thriftRecord = new TestRecord(); } catch (Exception e) { throw new RuntimeException("Failed to initialize Thrift competitor", e); } diff --git a/src/jmh/sbe/test_record.xml b/src/jmh/sbe/test_record.xml new file mode 100644 index 0000000..9feaee8 --- /dev/null +++ b/src/jmh/sbe/test_record.xml @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintBuffers.java b/src/main/java/com/imprint/core/ImprintBuffers.java index 24ec41d..4afa1fa 100644 --- a/src/main/java/com/imprint/core/ImprintBuffers.java +++ b/src/main/java/com/imprint/core/ImprintBuffers.java @@ -5,6 +5,8 @@ import com.imprint.error.ImprintException; import com.imprint.types.TypeCode; import com.imprint.util.VarInt; +import it.unimi.dsi.fastutil.ints.Int2ObjectAVLTreeMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectSortedMap; import lombok.Getter; import java.nio.ByteBuffer; @@ -32,9 +34,10 @@ public final class ImprintBuffers { private final ByteBuffer directoryBuffer; // Raw directory bytes (includes count) private final ByteBuffer payload; // Read-only payload view - // Lazy-loaded directory state. Needs to maintain ordering so that we can binary search the endOffset - private TreeMap parsedDirectory; + // Lazy-loaded directory state. + private Int2ObjectSortedMap parsedDirectory; private boolean directoryParsed = false; + private int directoryCount = -1; /** * Creates buffers from raw data (used during deserialization). @@ -45,24 +48,26 @@ public ImprintBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { } /** - * Creates buffers from a pre-parsed directory (used during construction). - * This constructor is used by the ImprintRecordBuilder path. It creates - * a serialized directory buffer but defers parsing it into a map until it's actually needed. + * Creates buffers from a pre-sorted list of entries (most efficient builder path). + * Immediately creates the parsed index and the serialized buffer. */ - public ImprintBuffers(Collection directory, ByteBuffer payload) { - this.directoryBuffer = ImprintBuffers.createDirectoryBuffer(Objects.requireNonNull(directory)); + public ImprintBuffers(List sortedDirectory, ByteBuffer payload) { + this.directoryBuffer = ImprintBuffers.createDirectoryBuffer(sortedDirectory); this.payload = payload.asReadOnlyBuffer(); } /** - * Creates buffers from a pre-parsed and sorted directory map (used by ImprintRecordBuilder). - * This is an optimized path that avoids creating an intermediate List-to-Map conversion. - * This constructor is used by the ImprintRecordBuilder path. It creates - * a serialized directory buffer but defers parsing it into a map until it's actually needed. + * Creates buffers from a pre-parsed and sorted directory map containing final, simple entries. + * This is the most efficient path, as it avoids any further parsing or sorting. The provided + * map becomes the definitive parsed directory. */ - public ImprintBuffers(TreeMap directoryMap, ByteBuffer payload) { - this.directoryBuffer = ImprintBuffers.createDirectoryBufferFromMap(Objects.requireNonNull(directoryMap)); + @SuppressWarnings("unchecked") + public ImprintBuffers(Int2ObjectSortedMap parsedDirectory, ByteBuffer payload) { + this.directoryBuffer = ImprintBuffers.createDirectoryBufferFromSortedMap(Objects.requireNonNull(parsedDirectory)); this.payload = payload.asReadOnlyBuffer(); + this.parsedDirectory = (Int2ObjectSortedMap) parsedDirectory; + this.directoryParsed = true; + this.directoryCount = parsedDirectory.size(); } /** @@ -88,6 +93,28 @@ public ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { return fieldBuffer; } + /** + * Get a zero-copy ByteBuffer view of a field's data using a pre-fetched DirectoryEntry. + * This avoids the cost of re-finding the entry. + */ + public ByteBuffer getFieldBuffer(DirectoryEntry entry) throws ImprintException { + if (entry == null) + return null; + + int startOffset = entry.getOffset(); + int endOffset = findEndOffset(entry); + + if (startOffset < 0 || endOffset < 0 || startOffset > payload.limit() || + endOffset > payload.limit() || startOffset > endOffset) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Invalid field buffer range: start=" + startOffset + ", end=" + endOffset + ", payloadLimit=" + payload.limit()); + } + + var fieldBuffer = payload.duplicate(); + fieldBuffer.position(startOffset).limit(endOffset); + return fieldBuffer; + } + /** * Find a directory entry for the given field ID using the most efficient method. *

@@ -98,7 +125,7 @@ public ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { public DirectoryEntry findDirectoryEntry(int fieldId) throws ImprintException { if (directoryParsed) return parsedDirectory.get(fieldId); - else + else return findFieldEntryInRawDirectory(fieldId); } @@ -117,10 +144,10 @@ public List getDirectory() { public int getDirectoryCount() { if (directoryParsed) return parsedDirectory.size(); + try { - var countBuffer = directoryBuffer.duplicate(); - return VarInt.decode(countBuffer).getValue(); - } catch (Exception e) { + return getOrParseDirectoryCount(); + } catch (ImprintException e) { return 0; } } @@ -145,13 +172,16 @@ private DirectoryEntry findFieldEntryInRawDirectory(int fieldId) throws ImprintE var searchBuffer = directoryBuffer.duplicate(); searchBuffer.order(ByteOrder.LITTLE_ENDIAN); - int directoryCount = VarInt.decode(searchBuffer).getValue(); - if (directoryCount == 0) + int count = getOrParseDirectoryCount(); + if (count == 0) return null; + // Advance buffer past the varint to get to the start of the entries. + VarInt.decode(searchBuffer); int directoryStartPos = searchBuffer.position(); + int low = 0; - int high = directoryCount - 1; + int high = count - 1; while (low <= high) { int mid = (low + high) >>> 1; @@ -194,19 +224,25 @@ private int findEndOffset(DirectoryEntry entry) throws ImprintException { * Find the end offset using TreeMap's efficient navigation methods. */ private int findNextOffsetInParsedDirectory(int currentFieldId) { - var nextEntry = parsedDirectory.higherEntry(currentFieldId); - return nextEntry != null ? nextEntry.getValue().getOffset() : payload.limit(); + var tailMap = parsedDirectory.tailMap(currentFieldId + 1); + if (tailMap.isEmpty()) { + return payload.limit(); + } + return tailMap.get(tailMap.firstIntKey()).getOffset(); } private int findNextOffsetInRawDirectory(int currentFieldId) throws ImprintException { var scanBuffer = directoryBuffer.duplicate(); scanBuffer.order(ByteOrder.LITTLE_ENDIAN); - int count = VarInt.decode(scanBuffer).getValue(); + int count = getOrParseDirectoryCount(); if (count == 0) return payload.limit(); + // Advance buffer past the varint to get to the start of the entries. + VarInt.decode(scanBuffer); int directoryStartPos = scanBuffer.position(); + int low = 0; int high = count - 1; int nextOffset = payload.limit(); @@ -242,63 +278,133 @@ private int findNextOffsetInRawDirectory(int currentFieldId) throws ImprintExcep private void ensureDirectoryParsed() { if (directoryParsed) return; + try { var parseBuffer = directoryBuffer.duplicate(); parseBuffer.order(ByteOrder.LITTLE_ENDIAN); - var countResult = VarInt.decode(parseBuffer); - int count = countResult.getValue(); + int count = getOrParseDirectoryCount(parseBuffer); + this.parsedDirectory = new Int2ObjectAVLTreeMap<>(); - this.parsedDirectory = new TreeMap<>(); for (int i = 0; i < count; i++) { var entry = deserializeDirectoryEntry(parseBuffer); - parsedDirectory.put((int)entry.getId(), entry); + this.parsedDirectory.put(entry.getId() , entry); } this.directoryParsed = true; } catch (ImprintException e) { - throw new RuntimeException("Failed to parse directory", e); + // This can happen with a corrupted directory. + // In this case, we'll just have an empty (but valid) parsed directory. + this.parsedDirectory = new Int2ObjectAVLTreeMap<>(); + this.directoryParsed = true; // Mark as parsed to avoid repeated errors } } + private int getOrParseDirectoryCount() throws ImprintException { + if (directoryCount != -1) { + return directoryCount; + } + try { + this.directoryCount = VarInt.decode(directoryBuffer.duplicate()).getValue(); + } catch (ImprintException e) { + this.directoryCount = 0; // Cache as 0 on error + throw e; // rethrow + } + return this.directoryCount; + } + + private int getOrParseDirectoryCount(ByteBuffer buffer) throws ImprintException { + // This method does not cache the count because it's used during parsing + // where the buffer is transient. Caching is only for the instance's primary buffer. + return VarInt.decode(buffer).getValue(); + } + /** - * Create directory buffer from parsed entries. + * Creates a read-only buffer containing the serialized directory. + * The input collection does not need to be sorted. */ static ByteBuffer createDirectoryBuffer(Collection directory) { - try { - int bufferSize = VarInt.encodedLength(directory.size()) + (directory.size() * Constants.DIR_ENTRY_BYTES); - var buffer = ByteBuffer.allocate(bufferSize); - buffer.order(ByteOrder.LITTLE_ENDIAN); + if (directory == null || directory.isEmpty()) { + ByteBuffer buffer = ByteBuffer.allocate(1); + VarInt.encode(0, buffer); + buffer.flip(); + return buffer; + } - VarInt.encode(directory.size(), buffer); - for (var entry : directory) - serializeDirectoryEntry(entry, buffer); + // Ensure sorted order for binary search compatibility. + ArrayList sortedDirectory; + if (directory instanceof ArrayList && isSorted((ArrayList)directory)) { + sortedDirectory = (ArrayList) directory; + } else { + sortedDirectory = new ArrayList<>(directory); + sortedDirectory.sort(null); + } - buffer.flip(); - return buffer.asReadOnlyBuffer(); - } catch (Exception e) { - return ByteBuffer.allocate(0).asReadOnlyBuffer(); + int count = sortedDirectory.size(); + int size = VarInt.encodedLength(count) + (count * Constants.DIR_ENTRY_BYTES); + ByteBuffer buffer = ByteBuffer.allocate(size); + buffer.order(ByteOrder.LITTLE_ENDIAN); + + VarInt.encode(count, buffer); + for (DirectoryEntry entry : sortedDirectory) { + serializeDirectoryEntry(entry, buffer); } + + buffer.flip(); + return buffer; } - /** - * Create directory buffer from a pre-sorted map of entries. - */ static ByteBuffer createDirectoryBufferFromMap(TreeMap directoryMap) { - try { - int bufferSize = VarInt.encodedLength(directoryMap.size()) + (directoryMap.size() * Constants.DIR_ENTRY_BYTES); - var buffer = ByteBuffer.allocate(bufferSize); - buffer.order(ByteOrder.LITTLE_ENDIAN); + if (directoryMap == null || directoryMap.isEmpty()) { + ByteBuffer buffer = ByteBuffer.allocate(1); + VarInt.encode(0, buffer); + buffer.flip(); + return buffer; + } + + int count = directoryMap.size(); + int size = VarInt.encodedLength(count) + (count * Constants.DIR_ENTRY_BYTES); + var buffer = ByteBuffer.allocate(size); + buffer.order(ByteOrder.LITTLE_ENDIAN); - VarInt.encode(directoryMap.size(), buffer); - for (var entry : directoryMap.values()) - serializeDirectoryEntry(entry, buffer); + VarInt.encode(count, buffer); + for (var entry : directoryMap.values()) { + serializeDirectoryEntry(entry, buffer); + } + + buffer.flip(); + return buffer; + } + static ByteBuffer createDirectoryBufferFromSortedMap(Int2ObjectSortedMap directoryMap) { + if (directoryMap == null || directoryMap.isEmpty()) { + ByteBuffer buffer = ByteBuffer.allocate(1); + VarInt.encode(0, buffer); buffer.flip(); - return buffer.asReadOnlyBuffer(); - } catch (Exception e) { - return ByteBuffer.allocate(0).asReadOnlyBuffer(); + return buffer; + } + + int count = directoryMap.size(); + int size = VarInt.encodedLength(count) + (count * Constants.DIR_ENTRY_BYTES); + var buffer = ByteBuffer.allocate(size); + buffer.order(ByteOrder.LITTLE_ENDIAN); + + VarInt.encode(count, buffer); + for (var entry : directoryMap.int2ObjectEntrySet()) { + serializeDirectoryEntry(entry.getValue(), buffer); + } + + buffer.flip(); + return buffer; + } + + private static boolean isSorted(ArrayList list) { + for (int i = 0; i < list.size() - 1; i++) { + if (list.get(i).getId() > list.get(i + 1).getId()) { + return false; + } } + return true; } /** diff --git a/src/main/java/com/imprint/core/ImprintOperations.java b/src/main/java/com/imprint/core/ImprintOperations.java index c4e8c66..0c51e43 100644 --- a/src/main/java/com/imprint/core/ImprintOperations.java +++ b/src/main/java/com/imprint/core/ImprintOperations.java @@ -2,12 +2,14 @@ import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; -import lombok.Value; import lombok.experimental.UtilityClass; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; @UtilityClass public class ImprintOperations { @@ -29,41 +31,34 @@ public class ImprintOperations { * @return New ImprintRecord containing only the requested fields */ public static ImprintRecord project(ImprintRecord record, int... fieldIds) { - // Sort and deduplicate field IDs for efficient matching with sorted directory + // Sort and deduplicate field IDs for efficient matching int[] sortedFieldIds = Arrays.stream(fieldIds).distinct().sorted().toArray(); - if (sortedFieldIds.length == 0) + if (sortedFieldIds.length == 0) { return createEmptyRecord(record.getHeader().getSchemaId()); + } - //eager fetch the entire directory (can this be lazy and just done per field?) - var sourceDirectory = record.getDirectory(); var newDirectory = new ArrayList(sortedFieldIds.length); - var ranges = new ArrayList(); - - // Iterate through directory and compute ranges to copy - int fieldIdsIdx = 0; - int directoryIdx = 0; + var payloadChunks = new ArrayList(sortedFieldIds.length); int currentOffset = 0; - while (directoryIdx < sourceDirectory.size() && fieldIdsIdx < sortedFieldIds.length) { - var field = sourceDirectory.get(directoryIdx); - if (field.getId() == sortedFieldIds[fieldIdsIdx]) { - // Calculate field length using next field's offset - int nextOffset = (directoryIdx + 1 < sourceDirectory.size()) ? - sourceDirectory.get(directoryIdx + 1).getOffset() : - record.getBuffers().getPayload().limit(); - int fieldLength = nextOffset - field.getOffset(); - - newDirectory.add(new SimpleDirectoryEntry(field.getId(), field.getTypeCode(), currentOffset)); - ranges.add(new FieldRange(field.getOffset(), nextOffset)); - - currentOffset += fieldLength; - fieldIdsIdx++; + for (int fieldId : sortedFieldIds) { + // Use efficient lookup for each field's metadata. Returns null on failure. + DirectoryEntry sourceEntry = record.getDirectoryEntry(fieldId); + + // If field exists, get its payload and add to the new record components + if (sourceEntry != null) { + ByteBuffer fieldPayload = record.getRawBytes(sourceEntry); + // This check is for internal consistency. If an entry exists, payload should too. + if (fieldPayload != null) { + newDirectory.add(new SimpleDirectoryEntry((short)fieldId, sourceEntry.getTypeCode(), currentOffset)); + payloadChunks.add(fieldPayload); + currentOffset += fieldPayload.remaining(); + } } - directoryIdx++; } - // Build new payload from ranges - var newPayload = buildPayloadFromRanges(record.getBuffers().getPayload(), ranges); + // Build new payload from collected chunks + ByteBuffer newPayload = buildPayloadFromChunks(payloadChunks); // Create new header with updated payload size // TODO: compute correct schema hash @@ -120,12 +115,12 @@ public static ImprintRecord merge(ImprintRecord first, ImprintRecord second) thr secondIdx++; } - currentPayload = first.getRawBytes(currentEntry.getId()); + currentPayload = first.getRawBytes(currentEntry); firstIdx++; } else { // Take from second record currentEntry = secondDir.get(secondIdx); - currentPayload = second.getRawBytes(currentEntry.getId()); + currentPayload = second.getRawBytes(currentEntry); secondIdx++; } @@ -150,37 +145,6 @@ public static ImprintRecord merge(ImprintRecord first, ImprintRecord second) thr return new ImprintRecord(newHeader, newDirectory, mergedPayload); } - /** - * Represents a range of bytes to copy from source payload. - */ - @Value - private static class FieldRange { - int start; - int end; - - int length() { - return end - start; - } - } - - /** - * Build a new payload buffer from field ranges in the source payload. - */ - private static ByteBuffer buildPayloadFromRanges(ByteBuffer sourcePayload, List ranges) { - int totalSize = ranges.stream().mapToInt(FieldRange::length).sum(); - var newPayload = ByteBuffer.allocate(totalSize); - newPayload.order(ByteOrder.LITTLE_ENDIAN); - - for (var range : ranges) { - var sourceSlice = sourcePayload.duplicate(); - sourceSlice.position(range.start).limit(range.end); - newPayload.put(sourceSlice); - } - - newPayload.flip(); - return newPayload; - } - /** * Build a new payload buffer by concatenating chunks. */ diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index 385e569..e4c4a42 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -7,6 +7,7 @@ import com.imprint.types.TypeCode; import com.imprint.types.Value; import com.imprint.util.VarInt; +import it.unimi.dsi.fastutil.ints.Int2ObjectSortedMap; import lombok.Getter; import java.nio.ByteBuffer; @@ -35,19 +36,19 @@ private ImprintRecord(Header header, ImprintBuffers buffers) { } /** - * Creates a record from pre-parsed directory (used by ImprintWriter). + * Creates a record from a pre-sorted list of entries (most efficient builder path). */ - ImprintRecord(Header header, Collection directory, ByteBuffer payload) { + ImprintRecord(Header header, List sortedDirectory, ByteBuffer payload) { this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.buffers = new ImprintBuffers(directory, payload); + this.buffers = new ImprintBuffers(sortedDirectory, payload); } /** - * Creates a record from a pre-parsed and sorted directory map (used by ImprintRecordBuilder). + * Creates a record from a pre-built and sorted FastUtil map (most efficient builder path). */ - ImprintRecord(Header header, TreeMap directoryMap, ByteBuffer payload) { + ImprintRecord(Header header, Int2ObjectSortedMap parsedDirectory, ByteBuffer payload) { this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.buffers = new ImprintBuffers(directoryMap, payload); + this.buffers = new ImprintBuffers(parsedDirectory, payload); } // ========== FIELD ACCESS METHODS ========== @@ -79,6 +80,18 @@ public ByteBuffer getRawBytes(int fieldId) { } } + /** + * Get raw bytes for a field using a pre-fetched DirectoryEntry. + * This avoids the cost of re-finding the entry metadata. + */ + public ByteBuffer getRawBytes(DirectoryEntry entry) { + try { + return buffers.getFieldBuffer(entry); + } catch (ImprintException e) { + return null; + } + } + /** * Project a subset of fields from this record. * @@ -108,6 +121,37 @@ public List getDirectory() { return buffers.getDirectory(); } + /** + * Finds a directory entry by its field ID. + * This is an efficient lookup that avoids full directory deserialization if possible. + * + * @param fieldId The ID of the field to find. + * @return The DirectoryEntry if found, otherwise null. + */ + public DirectoryEntry getDirectoryEntry(int fieldId) { + try { + return buffers.findDirectoryEntry(fieldId); + } catch (ImprintException e) { + // This can happen with a corrupted directory, in which case we assume it doesn't exist. + return null; + } + } + + /** + * Checks if a field with the given ID exists in the record. + * + * @param fieldId The ID of the field to check. + * @return true if the field exists, false otherwise. + */ + public boolean hasField(int fieldId) { + try { + return buffers.findDirectoryEntry(fieldId) != null; + } catch (ImprintException e) { + // This can happen with a corrupted directory, in which case we assume it doesn't exist. + return false; + } + } + // ========== TYPED GETTERS ========== public boolean getBoolean(int fieldId) throws ImprintException { @@ -194,7 +238,7 @@ public int estimateSerializedSize() { * This provides a direct serialization path without needing a live ImprintRecord instance. * * @param schemaId The schema identifier for the record. - * @param directory The list of directory entries, which must be sorted by field ID. + * @param directory The list of directory entries, which will be sorted if not already. * @param payload The ByteBuffer containing all field data concatenated. * @return A read-only ByteBuffer with the complete serialized record. */ @@ -215,6 +259,34 @@ public static ByteBuffer serialize(SchemaId schemaId, Collection sortedDirectory, ByteBuffer payload) { + var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); + // This createDirectoryBuffer is optimized for a pre-sorted list. + var directoryBuffer = ImprintBuffers.createDirectoryBuffer(sortedDirectory); + + int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payload.remaining(); + var finalBuffer = ByteBuffer.allocate(finalSize); + finalBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // Assemble the final record + serializeHeader(header, finalBuffer); + finalBuffer.put(directoryBuffer); + finalBuffer.put(payload); + + finalBuffer.flip(); + return finalBuffer.asReadOnlyBuffer(); + } + /** * Serializes the components of a record into a single ByteBuffer using a pre-built directory map. * This provides a direct serialization path without needing a live ImprintRecord instance. @@ -241,6 +313,32 @@ public static ByteBuffer serialize(SchemaId schemaId, TreeMap directoryMap, ByteBuffer payload) { + var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); + var directoryBuffer = ImprintBuffers.createDirectoryBufferFromSortedMap(directoryMap); + + int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payload.remaining(); + var finalBuffer = ByteBuffer.allocate(finalSize); + finalBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // Assemble the final record + serializeHeader(header, finalBuffer); + finalBuffer.put(directoryBuffer); + finalBuffer.put(payload); + + finalBuffer.flip(); + return finalBuffer.asReadOnlyBuffer(); + } + // ========== STATIC FACTORY METHODS ========== public static ImprintRecordBuilder builder(SchemaId schemaId) { @@ -348,7 +446,7 @@ private static Header deserializeHeader(ByteBuffer buffer) throws ImprintExcepti byte magic = buffer.get(); if (magic != Constants.MAGIC) { throw new ImprintException(ErrorType.INVALID_MAGIC, "Invalid magic byte: expected 0x" + Integer.toHexString(Constants.MAGIC) + - ", got 0x" + Integer.toHexString(magic & 0xFF)); + ", got 0x" + Integer.toHexString(magic & 0xFF)); } byte version = buffer.get(); diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 58fbc63..52bc760 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -1,21 +1,18 @@ package com.imprint.core; -import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; import com.imprint.types.MapKey; import com.imprint.types.TypeCode; import com.imprint.types.Value; +import it.unimi.dsi.fastutil.ints.Int2ObjectAVLTreeMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectSortedMap; +import lombok.Getter; +import lombok.Setter; import lombok.SneakyThrows; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.TreeMap; +import java.util.*; /** * A fluent builder for creating ImprintRecord instances with type-safe, @@ -41,7 +38,7 @@ @SuppressWarnings("unused") public final class ImprintRecordBuilder { private final SchemaId schemaId; - private final Map fields = new TreeMap<>(); + private final Int2ObjectSortedMap fields = new Int2ObjectAVLTreeMap<>(); private int estimatedPayloadSize = 0; ImprintRecordBuilder(SchemaId schemaId) { @@ -158,8 +155,8 @@ public ImprintRecord build() throws ImprintException { payloadBuffer.flip(); // limit = position, position = 0 var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); - var header = new com.imprint.core.Header(new com.imprint.core.Flags((byte) 0), schemaId, payloadView.remaining()); - return new ImprintRecord(header, new ArrayList<>(fields.values()), payloadView); + var header = new Header(new Flags((byte) 0), schemaId, payloadView.remaining()); + return new ImprintRecord(header, fields, payloadView); } /** @@ -284,14 +281,6 @@ private MapKey convertToMapKey(Object obj) { throw new IllegalArgumentException("Unsupported map key type: " + obj.getClass().getName()); } - @Override - public String toString() { - return "ImprintRecordBuilder{" + - "schemaId=" + schemaId + - ", fields=" + fields + - '}'; - } - private int estimatePayloadSize() { // Add 25% buffer to reduce reallocations and handle VarInt encoding fluctuations. return Math.max(estimatedPayloadSize + (estimatedPayloadSize / 4), fields.size() * 16); @@ -357,48 +346,23 @@ private void serializeValue(Value value, ByteBuffer buffer) throws ImprintExcept } } - // Private inner class to hold field data during building + + @Getter private static class BuilderEntry implements DirectoryEntry { private final short id; private final Value value; + @Setter private int offset; BuilderEntry(short id, Value value) { this.id = id; this.value = value; - this.offset = -1; // Initially unknown - } - - @Override - public short getId() { - return id; + this.offset = -1; } @Override public TypeCode getTypeCode() { return value.getTypeCode(); } - - @Override - public int getOffset() { - return offset; - } - - public void setOffset(int offset) { - this.offset = offset; - } - - public Value getValue() { - return value; - } - - @Override - public String toString() { - return "BuilderEntry{" + - "id=" + id + - ", value=" + value + - ", offset=" + offset + - '}'; - } } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintStream.java b/src/main/java/com/imprint/core/ImprintStream.java index c218318..b6afe7a 100644 --- a/src/main/java/com/imprint/core/ImprintStream.java +++ b/src/main/java/com/imprint/core/ImprintStream.java @@ -1,19 +1,14 @@ package com.imprint.core; import com.imprint.error.ImprintException; +import it.unimi.dsi.fastutil.ints.Int2ObjectAVLTreeMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectSortedMap; +import it.unimi.dsi.fastutil.ints.IntSet; +import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.TreeMap; +import java.util.*; /** * Provides a framework for lazy, zero-copy transformations of Imprint records. @@ -30,8 +25,6 @@ private ImprintStream(Plan plan) { this.plan = Objects.requireNonNull(plan); } - // ========== PLAN DATA STRUCTURES ========== - /** * The internal representation of the transformation plan. * This is a linked-list style structure where each step points to the previous one. @@ -56,11 +49,11 @@ private SourcePlan(ImprintRecord source) { */ private static final class ProjectPlan implements Plan { final Plan previous; - final Set fieldIds; + final IntSet fieldIds; private ProjectPlan(Plan previous, int... fieldIds) { this.previous = Objects.requireNonNull(previous); - this.fieldIds = new HashSet<>(); + this.fieldIds = new IntOpenHashSet(); for (int id : fieldIds) { this.fieldIds.add(id); } @@ -143,40 +136,41 @@ private Evaluator(Plan plan) { public ImprintRecord execute() { // Unwind the plan's linked-list structure into a forward-order list of operations. - var planList = new ArrayList(); - var current = plan; - while (current != null) { - planList.add(current); - if (current instanceof ProjectPlan) { - current = ((ProjectPlan) current).previous; - } else if (current instanceof MergePlan) { - current = ((MergePlan) current).previous; - } else if (current instanceof SourcePlan) { - current = null; // End of the chain - } - } + var planList = getPlans(); Collections.reverse(planList); - // This map holds the set of fields being built, sorted by ID. - var resolvedFields = new TreeMap(); + // This map holds the set of fields being built, sorted by field ID. + var resolvedFields = new Int2ObjectAVLTreeMap(); // Iteratively evaluate the plan step-by-step. for (var planStep : planList) { if (planStep instanceof SourcePlan) { var sourcePlan = (SourcePlan) planStep; for (var entry : sourcePlan.source.getDirectory()) { - resolvedFields.put((int) entry.getId(), new FieldSource(sourcePlan.source, entry)); + resolvedFields.put(entry.getId(), new FieldSource(sourcePlan.source, entry)); } } else if (planStep instanceof ProjectPlan) { var projectPlan = (ProjectPlan) planStep; // Apply projection to the current state of resolved fields. - resolvedFields.keySet().retainAll(projectPlan.fieldIds); + // Keep only fields that are in the projection set + var keysToRemove = new IntOpenHashSet(); + for (int fieldId : resolvedFields.keySet()) { + if (!projectPlan.fieldIds.contains(fieldId)) { + keysToRemove.add(fieldId); + } + } + for (int keyToRemove : keysToRemove) { + resolvedFields.remove(keyToRemove); + } } else if (planStep instanceof MergePlan) { var mergePlan = (MergePlan) planStep; // Add fields from other records if they aren't already in the map. for (var otherRecord : mergePlan.others) { for (var entry : otherRecord.getDirectory()) { - resolvedFields.putIfAbsent((int) entry.getId(), new FieldSource(otherRecord, entry)); + int fieldId = entry.getId(); + if (!resolvedFields.containsKey(fieldId)) { + resolvedFields.put(fieldId, new FieldSource(otherRecord, entry)); + } } } } @@ -186,7 +180,23 @@ public ImprintRecord execute() { return build(resolvedFields); } - private ImprintRecord build(TreeMap finalFields) { + private ArrayList getPlans() { + var planList = new ArrayList(); + var current = plan; + while (current != null) { + planList.add(current); + if (current instanceof ProjectPlan) { + current = ((ProjectPlan) current).previous; + } else if (current instanceof MergePlan) { + current = ((MergePlan) current).previous; + } else if (current instanceof SourcePlan) { + current = null; // End of the chain + } + } + return planList; + } + + private ImprintRecord build(Int2ObjectSortedMap finalFields) { if (finalFields.isEmpty()) { // To-Do: Need a way to get the schemaId for an empty record. // For now, returning null or using a default. @@ -199,16 +209,22 @@ private ImprintRecord build(TreeMap finalFields) { } // Determine the schema from the first field's source record. - SchemaId schemaId = finalFields.firstEntry().getValue().record.getHeader().getSchemaId(); + SchemaId schemaId = finalFields.get(finalFields.firstIntKey()).record.getHeader().getSchemaId(); // 1. Calculate final payload size and prepare directory. int payloadSize = 0; - var newDirectoryMap = new TreeMap(); - for (var entry : finalFields.entrySet()) { + var newDirectoryMap = new Int2ObjectAVLTreeMap(); + + // Iterate over fields in sorted order + for (var entry : finalFields.int2ObjectEntrySet()) { + int fieldId = entry.getIntKey(); var fieldSource = entry.getValue(); int fieldLength = fieldSource.getLength(); - newDirectoryMap.put(entry.getKey(), new SimpleDirectoryEntry(fieldSource.entry.getId(), fieldSource.entry.getTypeCode(), payloadSize)); + newDirectoryMap.put(fieldId, new SimpleDirectoryEntry( + fieldSource.entry.getId(), + fieldSource.entry.getTypeCode(), + payloadSize)); payloadSize += fieldLength; } @@ -254,4 +270,4 @@ int getLength() { } } } -} \ No newline at end of file +} \ No newline at end of file diff --git a/src/main/java/com/imprint/types/TypeCode.java b/src/main/java/com/imprint/types/TypeCode.java index a81b199..3447f8b 100644 --- a/src/main/java/com/imprint/types/TypeCode.java +++ b/src/main/java/com/imprint/types/TypeCode.java @@ -19,11 +19,19 @@ public enum TypeCode { ARRAY(0x8, TypeHandler.ARRAY), MAP(0x9, TypeHandler.MAP), ROW(0xA, null); // TODO: implement (basically a placeholder for user-defined type) - + @Getter private final byte code; private final TypeHandler handler; - + + private static final TypeCode[] LOOKUP = new TypeCode[11]; + + static { + for (var type : values()) { + LOOKUP[type.code] = type; + } + } + TypeCode(int code, TypeHandler handler) { this.code = (byte) code; this.handler = handler; @@ -35,14 +43,13 @@ public TypeHandler getHandler() { } return handler; } - + public static TypeCode fromByte(byte code) throws ImprintException { - for (TypeCode type : values()) { - if (type.code == code) { - return type; - } + if (code >= 0 && code < LOOKUP.length) { + var type = LOOKUP[code]; + if (type != null) return type; } - throw new ImprintException(ErrorType.INVALID_TYPE_CODE, - "Unknown type code: 0x" + Integer.toHexString(code & 0xFF)); + throw new ImprintException(ErrorType.INVALID_TYPE_CODE, + "Unknown type code: 0x" + Integer.toHexString(code & 0xFF)); } } \ No newline at end of file diff --git a/src/main/java/com/imprint/types/TypeHandler.java b/src/main/java/com/imprint/types/TypeHandler.java index 634867b..dbc875f 100644 --- a/src/main/java/com/imprint/types/TypeHandler.java +++ b/src/main/java/com/imprint/types/TypeHandler.java @@ -218,7 +218,7 @@ public void serialize(Value value, ByteBuffer buffer) { buffer.put(stringBytes); } } - + @Override public int estimateSize(Value value) { if (value instanceof Value.StringBufferValue) { @@ -227,8 +227,8 @@ public int estimateSize(Value value) { return VarInt.encodedLength(length) + length; } else { Value.StringValue stringValue = (Value.StringValue) value; - byte[] utf8Bytes = stringValue.getUtf8Bytes(); - return VarInt.encodedLength(utf8Bytes.length) + utf8Bytes.length; + int utf8Length = stringValue.getUtf8Length(); // Uses cached bytes + return VarInt.encodedLength(utf8Length) + utf8Length; } } }; diff --git a/src/main/java/com/imprint/types/Value.java b/src/main/java/com/imprint/types/Value.java index bfa9958..681eda1 100644 --- a/src/main/java/com/imprint/types/Value.java +++ b/src/main/java/com/imprint/types/Value.java @@ -57,10 +57,11 @@ public static Value fromBytesBuffer(ByteBuffer value) { public static Value fromString(String value) { return new StringValue(value); } - + public static Value fromStringBuffer(ByteBuffer value) { return new StringBufferValue(value); } + public static Value fromArray(List value) { return new ArrayValue(value); @@ -284,20 +285,21 @@ public String toString() { public static class StringValue extends Value { @Getter private final String value; - private volatile byte[] cachedUtf8Bytes; // Cache UTF-8 encoding + private byte[] utf8BytesCache; // Cache UTF-8 encoding public StringValue(String value) { this.value = Objects.requireNonNull(value, "String cannot be null"); } public byte[] getUtf8Bytes() { - var cached = cachedUtf8Bytes; - if (cached == null) { - // UTF8 is idempotent so no need to synchronize - cached = value.getBytes(StandardCharsets.UTF_8); - cachedUtf8Bytes = cached; + if (utf8BytesCache == null) { + utf8BytesCache = value.getBytes(StandardCharsets.UTF_8); } - return cached; // Return computed value + return utf8BytesCache; + } + + public int getUtf8Length() { + return getUtf8Bytes().length; } @Override @@ -332,7 +334,7 @@ public String toString() { // String Value (ByteBuffer-based) public static class StringBufferValue extends Value { private final ByteBuffer value; - private volatile String cachedString; + private String cachedString; private static final int THREAD_LOCAL_BUFFER_SIZE = 1024; private static final ThreadLocal DECODE_BUFFER_CACHE = diff --git a/src/main/java/com/imprint/util/VarInt.java b/src/main/java/com/imprint/util/VarInt.java index f43683b..70c9095 100644 --- a/src/main/java/com/imprint/util/VarInt.java +++ b/src/main/java/com/imprint/util/VarInt.java @@ -70,13 +70,10 @@ public static DecodeResult decode(ByteBuffer buffer) throws ImprintException { int bytesRead = 0; while (true) { - if (bytesRead >= MAX_VARINT_LEN) { + if (bytesRead >= MAX_VARINT_LEN) throw new ImprintException(ErrorType.MALFORMED_VARINT, "VarInt too long"); - } - if (!buffer.hasRemaining()) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Unexpected end of data while reading VarInt"); - } + if (!buffer.hasRemaining()) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Unexpected end of data while reading VarInt"); byte b = buffer.get(); bytesRead++; diff --git a/src/test/java/com/imprint/core/ImprintStreamTest.java b/src/test/java/com/imprint/core/ImprintStreamTest.java new file mode 100644 index 0000000..8d5b843 --- /dev/null +++ b/src/test/java/com/imprint/core/ImprintStreamTest.java @@ -0,0 +1,78 @@ +package com.imprint.core; + +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; + +class ImprintStreamTest { + + @Test + void shouldProjectAndMergeCorrectly() throws Exception { + // --- Setup --- + var schemaId1 = new SchemaId(1, 1); + var schemaId2 = new SchemaId(2, 2); + var schemaId3 = new SchemaId(3, 3); + + ImprintRecord recordA = ImprintRecord.builder(schemaId1) + .field(1, "A1") + .field(2, 100) + .field(3, true) + .build(); + + ImprintRecord recordB = ImprintRecord.builder(schemaId2) + .field(2, 200) // Overlaps with A, should be ignored + .field(4, "B4") + .build(); + + ImprintRecord recordC = ImprintRecord.builder(schemaId3) + .field(5, 3.14) + .field(1, "C1") // Overlaps with A, should be ignored + .build(); + + // --- Execution --- + // Define a chain of operations + ImprintRecord finalRecord = ImprintStream.of(recordA) + .project(1, 3) // Keep {1, 3} from A. Current state: {1:A, 3:A} + .mergeWith(recordB) // Merge B. {2:B, 4:B} are added. Current state: {1:A, 3:A, 2:B, 4:B} + .mergeWith(recordC) // Merge C. {5:C} is added. {1:C} is ignored. Final state: {1:A, 3:A, 2:B, 4:B, 5:C} + .project(1, 4, 5) // Final projection. Final result: {1:A, 4:B, 5:C} + .toRecord(); + + // --- Assertions --- + assertNotNull(finalRecord); + + // Check final field count. + assertEquals(3, finalRecord.getDirectory().size()); + + // Check that the correct fields are present and have the right values + assertTrue(finalRecord.hasField(1)); + assertEquals("A1", finalRecord.getString(1)); // From recordA + + assertTrue(finalRecord.hasField(4)); + assertEquals("B4", finalRecord.getString(4)); // From recordB + + assertTrue(finalRecord.hasField(5)); + assertEquals(3.14, finalRecord.getFloat64(5), 0.001); // From recordC + + // Check that dropped/ignored fields are not present + assertFalse(finalRecord.hasField(2)); + assertFalse(finalRecord.hasField(3)); + } + + @Test + void shouldProjectAfterMerge() throws Exception { + var recordA = ImprintRecord.builder(new SchemaId(1, 1)).field(1, "A").field(2, 100).build(); + var recordB = ImprintRecord.builder(new SchemaId(1, 1)).field(2, 200).field(3, "B").build(); + + ImprintRecord finalRecord = ImprintStream.of(recordA) + .mergeWith(recordB) // virtual record is {1:A, 2:A, 3:B} + .project(1, 3) // final record is {1:A, 3:B} + .toRecord(); + + assertEquals(2, finalRecord.getDirectory().size()); + assertTrue(finalRecord.hasField(1)); + assertEquals("A", finalRecord.getString(1)); + assertTrue(finalRecord.hasField(3)); + assertEquals("B", finalRecord.getString(3)); + assertFalse(finalRecord.hasField(2)); + } +} \ No newline at end of file diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index 3804722..3cfa61f 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -1,13 +1,15 @@ package com.imprint.profile; +import com.imprint.core.ImprintOperations; import com.imprint.core.ImprintRecord; import com.imprint.core.SchemaId; import com.imprint.types.Value; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import java.util.Arrays; import java.util.Random; -import java.util.UUID; +import java.util.stream.IntStream; /** * A test designed for profiling hotspots during development. @@ -27,183 +29,401 @@ * - String operations and UTF-8 encoding * - ByteBuffer operations */ -@Disabled("Enable manually for profiling") +//@Disabled("Enable manually for profiling") public class ProfilerTest { - + private static final int ITERATIONS = 1_000_000; private static final int RECORD_SIZE = 50; - + private static final int LARGE_RECORD_SIZE = 200; + @Test void profileFieldAccess() throws Exception { + System.out.println("Starting profiler test - attach profiler now..."); + Thread.sleep(5000); // Give time to attach profiler + + // Create a representative record var record = createTestRecord(); - runProfileTest("Field Access", () -> { - // Simulate real-world access patterns - Random random = new Random(42); - int hits = 0; - - for (int i = 0; i < ITERATIONS; i++) { - // Random field access (hotspot) - int fieldId = random.nextInt(RECORD_SIZE) + 1; - var value = record.getValue(fieldId); - if (value != null) { - hits++; - - // Trigger string decoding (potential hotspot) - if (value.getTypeCode() == com.imprint.types.TypeCode.STRING) { - if (value instanceof Value.StringBufferValue) { - ((Value.StringBufferValue) value).getValue(); - } else { - ((Value.StringValue) value).getValue(); - } + System.out.println("Beginning field access profiling..."); + long start = System.nanoTime(); + + // Simulate real-world access patterns + Random random = new Random(42); + int hits = 0; + + for (int i = 0; i < ITERATIONS; i++) { + // Random field access (hotspot) + int fieldId = random.nextInt(RECORD_SIZE) + 1; + var value = record.getValue(fieldId); + if (value != null) { + hits++; + + // Trigger string decoding (potential hotspot) + if (value.getTypeCode() == com.imprint.types.TypeCode.STRING) { + if (value instanceof Value.StringBufferValue) { + ((Value.StringBufferValue) value).getValue(); + } else { + ((Value.StringValue) value).getValue(); } } + } - // Some raw access (zero-copy path) - if (i % 10 == 0) { - record.getRawBytes(fieldId); - } + // Some raw access (zero-copy path) + if (i % 10 == 0) { + record.getRawBytes(fieldId); } - }); + } + + long duration = System.nanoTime() - start; + System.out.printf("Completed %,d field accesses in %.2f ms (avg: %.1f ns/op, hits: %d)%n", + ITERATIONS, duration / 1_000_000.0, (double) duration / ITERATIONS, hits); } - + @Test - void profileSerialization() throws Exception { - var schemaId = new SchemaId(1, 0x12345678); + void profileSmallRecordSerialization() throws Exception { + profileSerialization("small records", RECORD_SIZE, 100_000); + } - runProfileTest("Serialization (Standard)", () -> { - // Create and serialize many records (allocation hotspot) - for (int i = 0; i < 500_000; i++) { - var builder = ImprintRecord.builder(schemaId); + @Test + void profileLargeRecordSerialization() throws Exception { + profileSerialization("large records", LARGE_RECORD_SIZE, 500_000); + } + + @Test + void profileProjectionOperations() throws Exception { + System.out.println("Starting projection profiler test - attach profiler now..."); + Thread.sleep(3000); - // Add various field types - builder.field(1, Value.fromInt32(i)) - .field(2, Value.fromString("test-string-" + i)) - .field(3, Value.fromFloat64(i * 3.14159)) - .field(4, Value.fromBytes(("bytes-" + i).getBytes())); + profileSmallProjections(); + profileLargeProjections(); + profileSelectiveProjections(); + profileProjectionMemoryAllocation(); + } - var record = builder.build(); - var serialized = record.serializeToBuffer(); // Potential hotspot + /** + * Profile small projections (select 2-5 fields from 20-field records) + */ + private void profileSmallProjections() throws Exception { + System.out.println("\\n--- Small Projections (2-5 fields from 20-field records) ---"); - // Trigger some deserialization - if (i % 1000 == 0) { - var deserialized = ImprintRecord.deserialize(serialized); - deserialized.getValue(2); // String decoding hotspot - } + var sourceRecord = createTestRecord(20); + int[] projectFields = {1, 5, 10, 15}; // 4 fields + int iterations = 500_000; + + System.out.printf("Beginning small projection profiling (%,d iterations)...%n", iterations); + long start = System.nanoTime(); + + for (int i = 0; i < iterations; i++) { + // This is the hotspot we want to profile + var projected = ImprintOperations.project(sourceRecord, projectFields); + + // Simulate some usage to prevent dead code elimination + if (i % 10_000 == 0) { + projected.getValue(1); // Trigger value decoding + projected.getRawBytes(5); // Trigger raw access } - }); + projected.serializeToBuffer(); + } + + long duration = System.nanoTime() - start; + System.out.printf("Small projections: %.2f ms (avg: %.1f μs/projection)%n", + duration / 1_000_000.0, (double) duration / iterations / 1000.0); } - @Test - void profileLargeObjectSerialization() throws Exception { - var schemaId = new SchemaId(3, 0xabcdef12); - var largeRecord = createVeryLargeRecord(); // A single large record to be re-serialized - - runProfileTest("Serialization (Large Object)", () -> { - // Re-serialize the same large object to focus on serialization logic - // rather than object creation. - for (int i = 0; i < 100_000; i++) { - var serialized = largeRecord.serializeToBuffer(); // Hotspot - - if (i % 1000 == 0) { - var deserialized = ImprintRecord.deserialize(serialized); - deserialized.getValue(10); // Access a field to ensure it works - } + /** + * Profile large projections (select 50-100 fields from 200-field records) + */ + private void profileLargeProjections() throws Exception { + System.out.println("\\n--- Large Projections (50 fields from 200-field records) ---"); + + var sourceRecord = createTestRecord(200); + // Select every 4th field for projection + int[] projectFields = IntStream.range(0, 50) + .map(i -> (i * 4) + 1) + .toArray(); + int iterations = 50_000; + + System.out.printf("Beginning large projection profiling (%,d iterations, %d->%d fields)...%n", + iterations, 200, projectFields.length); + long start = System.nanoTime(); + + for (int i = 0; i < iterations; i++) { + var projected = ImprintOperations.project(sourceRecord, projectFields); + + // Periodically access some fields to simulate real usage + if (i % 1_000 == 0) { + projected.getValue(1); + projected.getValue(25); + projected.getValue(49); } - }); + projected.serializeToBuffer(); + } + + long duration = System.nanoTime() - start; + System.out.printf("Large projections: %.2f ms (avg: %.1f μs/projection)%n", + duration / 1_000_000.0, (double) duration / iterations / 1000.0); } - - @Test - void profileProjection() throws Exception { - var record = createLargeRecord(); - - runProfileTest("Projection", () -> { - // Simulate analytical workload - project subset of fields repeatedly - for (int i = 0; i < 50_000; i++) { - // Project 10 fields out of 100 (common analytical pattern) - for (int fieldId = 1; fieldId <= 10; fieldId++) { - var value = record.getValue(fieldId); - if (value != null) { - // Force materialization of string values - if (value.getTypeCode() == com.imprint.types.TypeCode.STRING) { - if (value instanceof Value.StringBufferValue) { - ((Value.StringBufferValue) value).getValue(); - } - } - } + + /** + * Profile selective projections with different selectivity patterns + */ + private void profileSelectiveProjections() throws Exception { + System.out.println("\\n--- Selective Projections (various patterns) ---"); + + var sourceRecord = createTestRecord(100); + Random random = new Random(42); + int iterations = 100_000; + + // Test different projection patterns + var patterns = new ProjectionPattern[]{ + new ProjectionPattern("First few fields", new int[]{1, 2, 3, 4, 5}), + new ProjectionPattern("Last few fields", new int[]{96, 97, 98, 99, 100}), + new ProjectionPattern("Scattered fields", new int[]{1, 15, 33, 67, 89, 100}), + new ProjectionPattern("Random fields", generateRandomFields(random, 100, 10)) + }; + + for (var pattern : patterns) { + System.out.printf("Testing pattern: %s (%d fields)%n", + pattern.name, pattern.fields.length); + + long start = System.nanoTime(); + + for (int i = 0; i < iterations; i++) { + var projected = ImprintOperations.project(sourceRecord, pattern.fields); + + // Simulate field access + if (i % 5_000 == 0) { + projected.getValue(pattern.fields[0]); } + projected.serializeToBuffer(); } - }); + + long duration = System.nanoTime() - start; + System.out.printf(" %s: %.2f ms (avg: %.1f μs/projection)%n", + pattern.name, duration / 1_000_000.0, (double) duration / iterations / 1000.0); + } } - + + /** + * Profile memory allocation patterns during projection + */ + private void profileProjectionMemoryAllocation() throws Exception { + System.out.println("\\n--- Projection Memory Allocation Profiling ---"); + System.out.println("Watch for allocation hotspots and GC pressure..."); + + var sourceRecord = createTestRecord(50); + int[] projectFields = {1, 5, 10, 15, 20, 25}; // 6 fields + + System.out.println("Beginning projection allocation test..."); + + // Create allocation pressure to identify hotspots + for (int batch = 0; batch < 1000; batch++) { + for (int i = 0; i < 1000; i++) { + // This should reveal allocation hotspots in: + // 1. ArrayList creation + // 2. ByteBuffer allocation for new payload + // 3. FieldRange objects + // 4. SimpleDirectoryEntry creation + var projected = ImprintOperations.project(sourceRecord, projectFields); + + // Force some field access to trigger additional allocations + projected.getValue(1); // String decoding allocation + projected.getValue(5); // Value wrapper allocation + projected.getRawBytes(10); // ByteBuffer slicing + } + + if (batch % 100 == 0) { + System.out.printf("Allocation batch %d/1000 complete%n", batch); + } + } + + System.out.println("Projection allocation test complete"); + } + + /** + * Profile the component operations within projection to identify bottlenecks + */ @Test - void profileMemoryAllocation() throws Exception { - runProfileTest("Memory Allocation", () -> { - // Force allocation pressure to reveal GC hotspots - for (int batch = 0; batch < 1000; batch++) { - for (int i = 0; i < 1000; i++) { - var schemaId = new SchemaId(batch, i); - var builder = ImprintRecord.builder(schemaId); - - // Create strings of varying sizes (allocation pressure) - builder.field(1, Value.fromString("small")) - .field(2, Value.fromString("medium-length-string-" + i)) - .field(3, Value.fromString("very-long-string-that-will-cause-more-allocation-pressure-" + batch + "-" + i)) - .field(4, Value.fromBytes(new byte[100 + i % 1000])); // Varying byte arrays - - var record = builder.build(); - - // Some deserialization to trigger string decoding allocations - record.getValue(2); - record.getValue(3); - } + void profileProjectionComponents() throws Exception { + System.out.println("\\n=== Projection Component Profiling ==="); + Thread.sleep(2000); + + var sourceRecord = createTestRecord(100); + int[] projectFields = {1, 10, 20, 30, 40, 50}; + int iterations = 100_000; - if (batch % 100 == 0) { - System.out.printf("Completed batch %d/1000%n", batch); + // Profile individual components that might be hotspots: + + // 1. Field ID sorting and deduplication + System.out.println("Profiling field ID sorting..."); + long start = System.nanoTime(); + for (int i = 0; i < iterations; i++) { + // This mimics the sorting done in project() + int[] sorted = Arrays.stream(projectFields).distinct().sorted().toArray(); + blackhole(sorted); // Prevent optimization + } + long sortTime = System.nanoTime() - start; + System.out.printf("Field sorting: %.2f ms (%.1f ns/op)%n", + sortTime / 1_000_000.0, (double) sortTime / iterations); + + // 2. Directory scanning and range calculation + System.out.println("Profiling directory scanning..."); + var directory = sourceRecord.getDirectory(); + start = System.nanoTime(); + for (int i = 0; i < iterations; i++) { + // Simulate the directory scanning logic + int foundFields = 0; + for (var entry : directory) { + for (int fieldId : projectFields) { + if (entry.getId() == fieldId) { + foundFields++; + break; + } } } - }, false); // Disable final time reporting as it's not relevant here + blackhole(foundFields); + } + long scanTime = System.nanoTime() - start; + System.out.printf("Directory scanning: %.2f ms (%.1f ns/op)%n", + scanTime / 1_000_000.0, (double) scanTime / iterations); + + // 3. ByteBuffer operations (payload copying) + System.out.println("Profiling ByteBuffer operations..."); + var payload = sourceRecord.getBuffers().getPayload(); + start = System.nanoTime(); + for (int i = 0; i < iterations / 10; i++) { // Fewer iterations for heavy operation + // Simulate payload copying + var newPayload = java.nio.ByteBuffer.allocate(100); + newPayload.order(java.nio.ByteOrder.LITTLE_ENDIAN); + + // Copy some ranges (like buildPayloadFromRanges does) + for (int j = 0; j < 6; j++) { + var slice = payload.duplicate(); + slice.position(j * 10).limit((j + 1) * 10); + newPayload.put(slice); + } + newPayload.flip(); + blackhole(newPayload); + } + long bufferTime = System.nanoTime() - start; + System.out.printf("ByteBuffer operations: %.2f ms (%.1f μs/op)%n", + bufferTime / 1_000_000.0, (double) bufferTime / (iterations / 10) / 1000.0); } - - // ========== Test Helpers ========== /** - * A wrapper to run a profiling test with boilerplate for timing and setup. - * @param testName The name of the test to print. - * @param testLogic The core logic of the test, passed as a lambda. + * Profile serialization performance with records of a given size. + * This method abstracts the core serialization profiling logic to work + * with records of different sizes and complexities. */ - private void runProfileTest(String testName, ThrowingRunnable testLogic) throws Exception { - runProfileTest(testName, testLogic, true); - } - - private void runProfileTest(String testName, ThrowingRunnable testLogic, boolean reportTime) throws Exception { - System.out.printf("===== Starting Profiler Test: %s =====%n", testName); - System.out.println("Attach profiler now..."); - Thread.sleep(3000); // Give time to attach profiler + private void profileSerialization(String testName, int recordSize, int iterations) throws Exception { + System.out.printf("Starting %s serialization profiler test...%n", testName); + Thread.sleep(3000); - System.out.printf("Beginning %s profiling...%n", testName); + var schemaId = new SchemaId(1, 0x12345678); + + System.out.printf("Beginning %s serialization profiling (%,d iterations, %d fields)...%n", + testName, iterations, recordSize); long start = System.nanoTime(); - testLogic.run(); + // Create and serialize many records (allocation hotspot) + for (int i = 0; i < iterations; i++) { + var builder = ImprintRecord.builder(schemaId); - if (reportTime) { - long duration = System.nanoTime() - start; - System.out.printf("===== Completed %s in %.2f ms =====%n%n", testName, duration / 1_000_000.0); - } else { - System.out.printf("===== %s profiling complete. Check profiler output. =====%n%n", testName); + // Add various field types based on recordSize + for (int fieldId = 1; fieldId <= recordSize; fieldId++) { + switch (fieldId % 7) { + case 0: + builder.field(fieldId, Value.fromInt32(i + fieldId)); + break; + case 1: + builder.field(fieldId, Value.fromInt64(i * 1000L + fieldId)); + break; + case 2: + builder.field(fieldId, Value.fromString("test-string-" + i + "-" + fieldId)); + break; + case 3: + builder.field(fieldId, Value.fromString("longer-descriptive-text-for-field-" + fieldId + "-iteration-" + i)); + break; + case 4: + builder.field(fieldId, Value.fromFloat64(i * 3.14159 + fieldId)); + break; + case 5: + builder.field(fieldId, Value.fromBytes(("bytes-" + i + "-" + fieldId).getBytes())); + break; + case 6: + builder.field(fieldId, Value.fromBoolean((i + fieldId) % 2 == 0)); + break; + } + } + + var record = builder.build(); + var serialized = record.serializeToBuffer(); + + // Trigger some deserialization periodically + if (i % Math.max(1, iterations / 100) == 0) { + var deserialized = ImprintRecord.deserialize(serialized); + // Access a few random fields to trigger value decoding + for (int fieldId = 1; fieldId <= Math.min(5, recordSize); fieldId++) { + deserialized.getValue(fieldId); // String decoding hotspot + } + } + + // Progress indicator for long-running tests + if (i > 0 && i % Math.max(1, iterations / 10) == 0) { + System.out.printf("Completed %,d/%,d iterations (%.1f%%)%n", + i, iterations, (double) i / iterations * 100); + } } + + long duration = System.nanoTime() - start; + System.out.printf("Completed %s serialization test in %.2f ms (avg: %.1f μs/record)%n", + testName, duration / 1_000_000.0, (double) duration / iterations / 1000.0); } - /** A functional interface that allows for exceptions, for use in lambdas. */ - @FunctionalInterface - private interface ThrowingRunnable { - void run() throws Exception; + @Test + void profileMemoryAllocation() throws Exception { + System.out.println("Starting allocation profiler test..."); + Thread.sleep(3000); + + System.out.println("Beginning allocation profiling - watch for GC events..."); + + // Force allocation pressure to reveal GC hotspots + for (int batch = 0; batch < 1000; batch++) { + for (int i = 0; i < 1000; i++) { + var schemaId = new SchemaId(batch, i); + var builder = ImprintRecord.builder(schemaId); + + // Create strings of varying sizes (allocation pressure) + builder.field(1, Value.fromString("small")) + .field(2, Value.fromString("medium-length-string-" + i)) + .field(3, Value.fromString("very-long-string-that-will-cause-more-allocation-pressure-" + batch + "-" + i)) + .field(4, Value.fromBytes(new byte[100 + i % 1000])); // Varying byte arrays + + var record = builder.build(); + + // Some deserialization to trigger string decoding allocations + record.getValue(2); + record.getValue(3); + } + + if (batch % 100 == 0) { + System.out.printf("Completed batch %d/1000%n", batch); + } + } + + System.out.println("Allocation test complete - check GC logs and memory profiler"); } + // Helper methods and classes + private ImprintRecord createTestRecord() throws Exception { + return createTestRecord(RECORD_SIZE); + } + + private ImprintRecord createTestRecord(int recordSize) throws Exception { var builder = ImprintRecord.builder(new SchemaId(1, 0xdeadbeef)); - - for (int i = 1; i <= RECORD_SIZE; i++) { + + for (int i = 1; i <= recordSize; i++) { switch (i % 4) { case 0: builder.field(i, Value.fromInt32(i * 100)); @@ -219,72 +439,31 @@ private ImprintRecord createTestRecord() throws Exception { break; } } - + return builder.build(); } - - private ImprintRecord createLargeRecord() throws Exception { - var builder = ImprintRecord.builder(new SchemaId(2, 0xcafebabe)); - - // Create 100 fields with realistic data - for (int i = 1; i <= 100; i++) { - switch (i % 5) { - case 0: - builder.field(i, Value.fromInt32(i)); - break; - case 1: - builder.field(i, Value.fromString("user-name-" + i + "@example.com")); - break; - case 2: - builder.field(i, Value.fromString("Some longer descriptive text for field " + i + " that might represent a comment or description")); - break; - case 3: - builder.field(i, Value.fromFloat64(i * 2.718281828)); - break; - case 4: - builder.field(i, Value.fromBytes(String.format("binary-data-%04d", i).getBytes())); - break; - } + + private static class ProjectionPattern { + final String name; + final int[] fields; + + ProjectionPattern(String name, int[] fields) { + this.name = name; + this.fields = fields; } - - return builder.build(); } - private ImprintRecord createVeryLargeRecord() throws Exception { - var builder = ImprintRecord.builder(new SchemaId(3, 0xabcdef12)); - var random = new Random(123); + private int[] generateRandomFields(Random random, int maxField, int count) { + return random.ints(count, 1, maxField + 1) + .distinct() + .sorted() + .toArray(); + } - // Create 200 fields of varying types and sizes - for (int i = 1; i <= 200; i++) { - switch (i % 6) { - case 0: - builder.field(i, i * random.nextInt()); - break; - case 1: - // Medium string - builder.field(i, "user-id-" + UUID.randomUUID().toString()); - break; - case 2: - // Large string - builder.field(i, "This is a much larger text block for field " + i + ". It simulates a user comment, a description, or some other form of semi-structured text data. We repeat a sentence to make it longer. This is a much larger text block for field " + i + ". It simulates a user comment, a description, or some other form of semi-structured text data."); - break; - case 3: - builder.field(i, random.nextDouble() * 1000); - break; - case 4: - // Small byte array - var smallBytes = new byte[32]; - random.nextBytes(smallBytes); - builder.field(i, smallBytes); - break; - case 5: - // Large byte array - var largeBytes = new byte[1024]; - random.nextBytes(largeBytes); - builder.field(i, largeBytes); - break; - } + private void blackhole(Object obj) { + // Prevent dead code elimination + if (obj.hashCode() == System.nanoTime()) { + System.out.println("Never happens"); } - return builder.build(); } } \ No newline at end of file From 6d36b661baf9ffc9c7b3bbf72db7de34c5fd8ec5 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Tue, 10 Jun 2025 20:38:39 -0400 Subject: [PATCH 43/53] final optimization and reorganization into better project structure --- build.gradle | 45 +- .../benchmark/ComparisonBenchmark.java | 44 +- .../com/imprint/benchmark/Competitor.java | 12 - .../com/imprint/benchmark/MergeBenchmark.java | 157 ------- ...java => AbstractSerializingBenchmark.java} | 4 +- ...tor.java => AvroSerializingBenchmark.java} | 4 +- ...a => FlatBuffersSerializingBenchmark.java} | 5 +- ....java => ImprintSerializingBenchmark.java} | 7 +- ....java => JacksonSerializingBenchmark.java} | 4 +- ...tor.java => KryoSerializingBenchmark.java} | 4 +- ...a => MessagePackSerializingBenchmark.java} | 4 +- ...java => ProtobufSerializingBenchmark.java} | 4 +- ...petitor.java => SerializingBenchmark.java} | 2 +- ...r.java => ThriftSerializingBenchmark.java} | 5 +- src/jmh/sbe/test_record.xml | 61 --- src/main/java/com/imprint/core/Directory.java | 69 +++ .../java/com/imprint/core/DirectoryEntry.java | 24 -- .../java/com/imprint/core/ImprintBuffers.java | 83 ++-- .../java/com/imprint/core/ImprintRecord.java | 99 +---- .../imprint/core/ImprintRecordBuilder.java | 31 +- .../imprint/core/SimpleDirectoryEntry.java | 22 - .../{core => ops}/ImprintOperations.java | 51 ++- .../{core => stream}/ImprintStream.java | 98 ++--- src/main/java/com/imprint/types/Value.java | 2 +- .../{core => ops}/ImprintOperationsTest.java | 22 +- .../com/imprint/profile/ProfilerTest.java | 394 ++++++++---------- .../{core => stream}/ImprintStreamTest.java | 17 +- 27 files changed, 414 insertions(+), 860 deletions(-) delete mode 100644 src/jmh/java/com/imprint/benchmark/Competitor.java delete mode 100644 src/jmh/java/com/imprint/benchmark/MergeBenchmark.java rename src/jmh/java/com/imprint/benchmark/competitors/{AbstractCompetitor.java => AbstractSerializingBenchmark.java} (83%) rename src/jmh/java/com/imprint/benchmark/competitors/{AvroCompetitor.java => AvroSerializingBenchmark.java} (98%) rename src/jmh/java/com/imprint/benchmark/competitors/{FlatBuffersCompetitor.java => FlatBuffersSerializingBenchmark.java} (97%) rename src/jmh/java/com/imprint/benchmark/competitors/{ImprintCompetitor.java => ImprintSerializingBenchmark.java} (95%) rename src/jmh/java/com/imprint/benchmark/competitors/{JacksonJsonCompetitor.java => JacksonSerializingBenchmark.java} (96%) rename src/jmh/java/com/imprint/benchmark/competitors/{KryoCompetitor.java => KryoSerializingBenchmark.java} (97%) rename src/jmh/java/com/imprint/benchmark/competitors/{MessagePackCompetitor.java => MessagePackSerializingBenchmark.java} (95%) rename src/jmh/java/com/imprint/benchmark/competitors/{ProtobufCompetitor.java => ProtobufSerializingBenchmark.java} (96%) rename src/jmh/java/com/imprint/benchmark/competitors/{Competitor.java => SerializingBenchmark.java} (92%) rename src/jmh/java/com/imprint/benchmark/competitors/{ThriftCompetitor.java => ThriftSerializingBenchmark.java} (97%) delete mode 100644 src/jmh/sbe/test_record.xml create mode 100644 src/main/java/com/imprint/core/Directory.java delete mode 100644 src/main/java/com/imprint/core/DirectoryEntry.java delete mode 100644 src/main/java/com/imprint/core/SimpleDirectoryEntry.java rename src/main/java/com/imprint/{core => ops}/ImprintOperations.java (82%) rename src/main/java/com/imprint/{core => stream}/ImprintStream.java (69%) rename src/test/java/com/imprint/{core => ops}/ImprintOperationsTest.java (96%) rename src/test/java/com/imprint/{core => stream}/ImprintStreamTest.java (85%) diff --git a/build.gradle b/build.gradle index d3480e6..26b2be5 100644 --- a/build.gradle +++ b/build.gradle @@ -42,7 +42,7 @@ dependencies { // Suppress SLF4J warnings jmhImplementation 'org.slf4j:slf4j-nop:1.7.36' - // Competitor libraries for benchmarking (JMH only) + // Other serialization libraries for benchmarking (JMH only) jmhImplementation 'com.google.protobuf:protobuf-java:3.25.1' jmhImplementation 'org.apache.avro:avro:1.11.3' jmhImplementation 'com.fasterxml.jackson.core:jackson-databind:2.16.0' @@ -51,15 +51,8 @@ dependencies { jmhImplementation 'org.msgpack:msgpack-core:0.9.8' jmhImplementation 'org.msgpack:jackson-dataformat-msgpack:0.9.8' jmhImplementation 'org.apache.thrift:libthrift:0.19.0' - - // SBE for benchmarking - jmhImplementation 'uk.co.real-logic:sbe-all:1.35.3' - jmhImplementation 'io.aeron:aeron-client:1.41.2' // SBE has a dependency on Agrona, included in aeron-client - - // FastUtil for high-performance primitive collections - implementation 'it.unimi.dsi:fastutil:8.5.12' // Required for generated Thrift code on JDK 11+ - implementation 'javax.annotation:javax.annotation-api:1.3.2' + jmhImplementation 'javax.annotation:javax.annotation-api:1.3.2' } protobuf { @@ -148,9 +141,10 @@ tasks.register('generateFlatBuffers', Exec) { } // Task to download the Thrift compiler -task downloadThrift(type: Exec) { +tasks.register('downloadThrift', Exec) { description = 'Download Thrift compiler' group = 'build setup' + def thriftVersion = "0.19.0" def thriftExecutable = file("${buildDir}/thrift/thrift.exe") def thriftUrl = "https://archive.apache.org/dist/thrift/${thriftVersion}/thrift-${thriftVersion}.exe" @@ -174,8 +168,8 @@ task downloadThrift(type: Exec) { } // Task to generate Java code from Thrift IDL files for JMH benchmarks -task generateJmhThrift(type: Exec) { - dependsOn downloadThrift +tasks.register('generateJmhThrift', Exec) { + dependsOn tasks.downloadThrift description = 'Generate Java classes from Thrift schema' group = 'build' @@ -196,33 +190,6 @@ task generateJmhThrift(type: Exec) { } } -// Task for SBE code generation -task generateSbe(type: JavaExec) { - description = 'Generate Java classes from SBE schema' - group = 'build' - - def outputDir = file("${buildDir}/generated/sbe/java") - def schemaFile = file('src/jmh/sbe/schema.xml') - def sbeXsd = file('src/jmh/sbe/sbe.xsd') - - // Ensure the sbe-tool is on the classpath for this task - classpath = sourceSets.jmh.runtimeClasspath - - main = 'uk.co.real_logic.sbe.SbeTool' - systemProperties = [ - "sbe.output.dir": outputDir.absolutePath, - "sbe.validation.xsd": sbeXsd.absolutePath - ] - args = [ schemaFile.absolutePath ] - - inputs.file(schemaFile) - inputs.file(sbeXsd) - outputs.dir(outputDir) - - doFirst { - outputDir.mkdirs() - } -} // Add generated FlatBuffers sources to JMH source set sourceSets { diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index f2c7398..92b3ceb 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -1,7 +1,7 @@ package com.imprint.benchmark; import com.imprint.benchmark.competitors.*; -import com.imprint.benchmark.competitors.Competitor; +import com.imprint.benchmark.competitors.SerializingBenchmark; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.runner.Runner; @@ -9,7 +9,6 @@ import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; -import java.util.Arrays; import java.util.List; import java.util.concurrent.TimeUnit; @@ -21,61 +20,60 @@ @Fork(value = 1, jvmArgs = {"-Xms4g", "-Xmx4g"}) public class ComparisonBenchmark { - private static final List COMPETITORS = Arrays.asList( - new ImprintCompetitor(), - new JacksonJsonCompetitor(), - new ProtobufCompetitor(), - new FlatBuffersCompetitor(), - new AvroCompetitor(), - new ThriftCompetitor(), - new KryoCompetitor(), - new MessagePackCompetitor() + private static final List FRAMEWORKS = List.of( + new ImprintSerializingBenchmark(), + new JacksonSerializingBenchmark(), + new ProtobufSerializingBenchmark(), + new FlatBuffersSerializingBenchmark(), + new AvroSerializingBenchmark(), + new ThriftSerializingBenchmark(), + new KryoSerializingBenchmark(), + new MessagePackSerializingBenchmark() ); @Param({"Imprint", "Jackson-JSON", "Protobuf", "FlatBuffers", "Avro-Generic", "Thrift", "Kryo", "MessagePack"}) - public String competitorName; + public String framework; - private Competitor competitor; + private SerializingBenchmark serializingBenchmark; @Setup(Level.Trial) public void setup() { - // Find the competitor implementation - competitor = COMPETITORS.stream() - .filter(c -> c.name().equals(competitorName)) + serializingBenchmark = FRAMEWORKS.stream() + .filter(c -> c.name().equals(framework)) .findFirst() - .orElseThrow(() -> new IllegalStateException("Unknown competitor: " + competitorName)); + .orElseThrow(() -> new IllegalStateException("Unknown framework: " + framework)); // Create the test data DataGenerator.TestRecord testRecord1 = DataGenerator.createTestRecord(); DataGenerator.TestRecord testRecord2 = DataGenerator.createTestRecord(); // Setup the competitor with the data - competitor.setup(testRecord1, testRecord2); + serializingBenchmark.setup(testRecord1, testRecord2); } @Benchmark public void serialize(Blackhole bh) { - competitor.serialize(bh); + serializingBenchmark.serialize(bh); } @Benchmark public void deserialize(Blackhole bh) { - competitor.deserialize(bh); + serializingBenchmark.deserialize(bh); } @Benchmark public void projectAndSerialize(Blackhole bh) { - competitor.projectAndSerialize(bh); + serializingBenchmark.projectAndSerialize(bh); } @Benchmark public void mergeAndSerialize(Blackhole bh) { - competitor.mergeAndSerialize(bh); + serializingBenchmark.mergeAndSerialize(bh); } @Benchmark public void accessField(Blackhole bh) { - competitor.accessField(bh); + serializingBenchmark.accessField(bh); } public static void main(String[] args) throws RunnerException { diff --git a/src/jmh/java/com/imprint/benchmark/Competitor.java b/src/jmh/java/com/imprint/benchmark/Competitor.java deleted file mode 100644 index 5f92929..0000000 --- a/src/jmh/java/com/imprint/benchmark/Competitor.java +++ /dev/null @@ -1,12 +0,0 @@ -package com.imprint.benchmark; - -import org.openjdk.jmh.infra.Blackhole; - -public interface Competitor { - String name(); - void setup(); - void serialize(Blackhole bh); - void deserialize(Blackhole bh); - void projectAndSerialize(Blackhole bh); - void mergeAndSerialize(Blackhole bh); -} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java b/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java deleted file mode 100644 index 63e43e6..0000000 --- a/src/jmh/java/com/imprint/benchmark/MergeBenchmark.java +++ /dev/null @@ -1,157 +0,0 @@ -package com.imprint.benchmark; - -import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintRecordBuilder; -import com.imprint.core.SchemaId; -import com.imprint.types.Value; -import org.openjdk.jmh.annotations.*; -import org.openjdk.jmh.infra.Blackhole; - -import java.util.HashSet; -import java.util.Set; -import java.util.concurrent.TimeUnit; - -/** - * Benchmarks for ImprintRecord merge operations. - * NOTE: These benchmarks simulate merge operations until the actual merge API is implemented. - */ -@BenchmarkMode(Mode.AverageTime) -@OutputTimeUnit(TimeUnit.NANOSECONDS) -@State(Scope.Benchmark) -@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) -@Fork(1) -public class MergeBenchmark { - - private ImprintRecord productRecord; - private ImprintRecord orderRecord; - private ImprintRecord customerRecord; - - @Setup - public void setup() throws Exception { - productRecord = createProductRecord(); - orderRecord = createOrderRecord(); - customerRecord = createCustomerRecord(); - } - - // ===== SIMULATED MERGE BENCHMARKS ===== - // These will be replaced with actual merge API when implemented - - @Benchmark - public void mergeProductAndOrder(Blackhole bh) throws Exception { - // Simulate merge by creating a new record with fields from both - ImprintRecord result = simulateMerge(productRecord, orderRecord); - bh.consume(result); - } - - @Benchmark - public void mergeProductAndCustomer(Blackhole bh) throws Exception { - ImprintRecord result = simulateMerge(productRecord, customerRecord); - bh.consume(result); - } - - @Benchmark - public void mergeOrderAndCustomer(Blackhole bh) throws Exception { - ImprintRecord result = simulateMerge(orderRecord, customerRecord); - bh.consume(result); - } - - @Benchmark - public void mergeThreeRecords(Blackhole bh) throws Exception { - // Test merging multiple records - var temp = simulateMerge(productRecord, orderRecord); - ImprintRecord result = simulateMerge(temp, customerRecord); - bh.consume(result); - } - - // ===== MERGE CONFLICT HANDLING ===== - - @Benchmark - public void mergeWithConflicts(Blackhole bh) throws Exception { - // Create records with overlapping field IDs to test conflict resolution - var record1 = createRecordWithFields(1, 50, "record1_"); - var record2 = createRecordWithFields(25, 75, "record2_"); - - ImprintRecord result = simulateMerge(record1, record2); - bh.consume(result); - } - - // ===== HELPER METHODS ===== - - /** - * Simulates merge operation by manually copying fields. - * This should be replaced with actual merge API when available. - */ - private ImprintRecord simulateMerge(ImprintRecord first, ImprintRecord second) throws Exception { - var builder = ImprintRecord.builder(first.getHeader().getSchemaId()); - var usedFieldIds = new HashSet(); - - // Copy fields from first record (takes precedence) - copyFieldsToBuilder(first, builder, usedFieldIds); - - // Copy non-conflicting fields from second record - copyFieldsToBuilder(second, builder, usedFieldIds); - - return builder.build(); - } - - private void copyFieldsToBuilder(ImprintRecord record, ImprintRecordBuilder builder, Set usedFieldIds) throws Exception { - for (var entry : record.getDirectory()) { - int fieldId = entry.getId(); - if (!usedFieldIds.contains(fieldId)) { - var value = record.getValue(fieldId); - if (value != null) { - builder.field(fieldId, value); - usedFieldIds.add(fieldId); - } - } - } - } - - private ImprintRecord createProductRecord() throws Exception { - return ImprintRecord.builder(new SchemaId(1, 0x12345678)) - .field(1, Value.fromString("Product")) - .field(2, Value.fromInt32(12345)) - .field(3, Value.fromString("Laptop")) - .field(4, Value.fromFloat64(999.99)) - .field(5, Value.fromString("Electronics")) - .field(6, Value.fromInt32(50)) // stock - .field(7, Value.fromString("TechCorp")) - .field(8, Value.fromBoolean(true)) // available - .build(); - } - - private ImprintRecord createOrderRecord() throws Exception { - return ImprintRecord.builder(new SchemaId(2, 0x87654321)) - .field(10, Value.fromString("Order")) - .field(11, Value.fromInt32(67890)) - .field(12, Value.fromInt32(12345)) // product_id (overlaps with product) - .field(13, Value.fromInt32(2)) // quantity - .field(14, Value.fromFloat64(1999.98)) // total - .field(15, Value.fromString("2024-01-15")) // order_date - .field(16, Value.fromString("shipped")) // status - .build(); - } - - private ImprintRecord createCustomerRecord() throws Exception { - return ImprintRecord.builder(new SchemaId(3, 0x11223344)) - .field(20, Value.fromString("Customer")) - .field(21, Value.fromInt32(555)) - .field(22, Value.fromString("John Doe")) - .field(23, Value.fromString("john.doe@email.com")) - .field(24, Value.fromString("123 Main St")) - .field(25, Value.fromString("premium")) // tier - .field(26, Value.fromBoolean(true)) // active - .build(); - } - - private ImprintRecord createRecordWithFields(int startId, int endId, String prefix) throws Exception { - var builder = ImprintRecord.builder(new SchemaId(1, 0x12345678)); - - for (int i = startId; i <= endId; i++) { - builder.field(i, Value.fromString(prefix + "field_" + i)); - } - - return builder.build(); - } -} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/AbstractSerializingBenchmark.java similarity index 83% rename from src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java rename to src/jmh/java/com/imprint/benchmark/competitors/AbstractSerializingBenchmark.java index bfdea2a..2f5476c 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/AbstractCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/AbstractSerializingBenchmark.java @@ -6,13 +6,13 @@ /** * A minimal base class for competitors, holding the test data. */ -public abstract class AbstractCompetitor implements Competitor { +public abstract class AbstractSerializingBenchmark implements SerializingBenchmark { protected final String name; protected DataGenerator.TestRecord testData; protected DataGenerator.TestRecord testData2; - protected AbstractCompetitor(String name) { + protected AbstractSerializingBenchmark(String name) { this.name = name; } diff --git a/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/AvroSerializingBenchmark.java similarity index 98% rename from src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java rename to src/jmh/java/com/imprint/benchmark/competitors/AvroSerializingBenchmark.java index 71c8306..dc7278c 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/AvroCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/AvroSerializingBenchmark.java @@ -12,7 +12,7 @@ import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; -public class AvroCompetitor extends AbstractCompetitor { +public class AvroSerializingBenchmark extends AbstractSerializingBenchmark { private final Schema schema; private final Schema projectedSchema; @@ -22,7 +22,7 @@ public class AvroCompetitor extends AbstractCompetitor { private byte[] serializedRecord1; private byte[] serializedRecord2; - public AvroCompetitor() { + public AvroSerializingBenchmark() { super("Avro-Generic"); String schemaDefinition = "{\"type\":\"record\",\"name\":\"TestRecord\",\"fields\":[" + "{\"name\":\"id\",\"type\":\"string\"}," diff --git a/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersSerializingBenchmark.java similarity index 97% rename from src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java rename to src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersSerializingBenchmark.java index bd51eb9..a9fe5c8 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersSerializingBenchmark.java @@ -6,14 +6,13 @@ import org.openjdk.jmh.infra.Blackhole; import java.nio.ByteBuffer; -import java.util.stream.Collectors; -public class FlatBuffersCompetitor extends AbstractCompetitor { +public class FlatBuffersSerializingBenchmark extends AbstractSerializingBenchmark { private ByteBuffer serializedRecord1; private ByteBuffer serializedRecord2; - public FlatBuffersCompetitor() { + public FlatBuffersSerializingBenchmark() { super("FlatBuffers"); } diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/ImprintSerializingBenchmark.java similarity index 95% rename from src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java rename to src/jmh/java/com/imprint/benchmark/competitors/ImprintSerializingBenchmark.java index 3e05cd7..26bb495 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/ImprintCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/ImprintSerializingBenchmark.java @@ -1,24 +1,23 @@ package com.imprint.benchmark.competitors; import com.imprint.benchmark.DataGenerator; -import com.imprint.core.ImprintOperations; +import com.imprint.ops.ImprintOperations; import com.imprint.core.ImprintRecord; import com.imprint.core.ImprintRecordBuilder; import com.imprint.core.SchemaId; import com.imprint.error.ImprintException; -import lombok.SneakyThrows; import org.openjdk.jmh.infra.Blackhole; import java.nio.ByteBuffer; -public class ImprintCompetitor extends AbstractCompetitor { +public class ImprintSerializingBenchmark extends AbstractSerializingBenchmark { private ImprintRecord imprintRecord1; private byte[] serializedRecord1; private byte[] serializedRecord2; private static final SchemaId SCHEMA_ID = new SchemaId(1, 1); - public ImprintCompetitor() { + public ImprintSerializingBenchmark() { super("Imprint"); } diff --git a/src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/JacksonSerializingBenchmark.java similarity index 96% rename from src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java rename to src/jmh/java/com/imprint/benchmark/competitors/JacksonSerializingBenchmark.java index a32e9a8..829b073 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/JacksonJsonCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/JacksonSerializingBenchmark.java @@ -4,13 +4,13 @@ import com.imprint.benchmark.DataGenerator; import org.openjdk.jmh.infra.Blackhole; -public class JacksonJsonCompetitor extends AbstractCompetitor { +public class JacksonSerializingBenchmark extends AbstractSerializingBenchmark { private final ObjectMapper mapper; private byte[] serializedRecord; private byte[] serializedRecord2; - public JacksonJsonCompetitor() { + public JacksonSerializingBenchmark() { super("Jackson-JSON"); this.mapper = new ObjectMapper(); } diff --git a/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/KryoSerializingBenchmark.java similarity index 97% rename from src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java rename to src/jmh/java/com/imprint/benchmark/competitors/KryoSerializingBenchmark.java index d76a937..1223e06 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/KryoCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/KryoSerializingBenchmark.java @@ -11,13 +11,13 @@ import java.util.Arrays; import java.util.HashMap; -public class KryoCompetitor extends AbstractCompetitor { +public class KryoSerializingBenchmark extends AbstractSerializingBenchmark { private final Kryo kryo; private byte[] serializedRecord1; private byte[] serializedRecord2; - public KryoCompetitor() { + public KryoSerializingBenchmark() { super("Kryo"); this.kryo = new Kryo(); this.kryo.register(DataGenerator.TestRecord.class); diff --git a/src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/MessagePackSerializingBenchmark.java similarity index 95% rename from src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java rename to src/jmh/java/com/imprint/benchmark/competitors/MessagePackSerializingBenchmark.java index 65269e5..b596e6d 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/MessagePackCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/MessagePackSerializingBenchmark.java @@ -5,13 +5,13 @@ import org.msgpack.jackson.dataformat.MessagePackFactory; import org.openjdk.jmh.infra.Blackhole; -public class MessagePackCompetitor extends AbstractCompetitor { +public class MessagePackSerializingBenchmark extends AbstractSerializingBenchmark { private final ObjectMapper mapper; private byte[] serializedRecord; private byte[] serializedRecord2; - public MessagePackCompetitor() { + public MessagePackSerializingBenchmark() { super("MessagePack"); this.mapper = new ObjectMapper(new MessagePackFactory()); } diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/ProtobufSerializingBenchmark.java similarity index 96% rename from src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java rename to src/jmh/java/com/imprint/benchmark/competitors/ProtobufSerializingBenchmark.java index 547abfe..72ad38f 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/ProtobufCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/ProtobufSerializingBenchmark.java @@ -4,12 +4,12 @@ import com.imprint.benchmark.proto.TestRecordOuterClass; import org.openjdk.jmh.infra.Blackhole; -public class ProtobufCompetitor extends AbstractCompetitor { +public class ProtobufSerializingBenchmark extends AbstractSerializingBenchmark { private byte[] serializedRecord1; private byte[] serializedRecord2; - public ProtobufCompetitor() { + public ProtobufSerializingBenchmark() { super("Protobuf"); } diff --git a/src/jmh/java/com/imprint/benchmark/competitors/Competitor.java b/src/jmh/java/com/imprint/benchmark/competitors/SerializingBenchmark.java similarity index 92% rename from src/jmh/java/com/imprint/benchmark/competitors/Competitor.java rename to src/jmh/java/com/imprint/benchmark/competitors/SerializingBenchmark.java index 717bbfc..595caa6 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/Competitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/SerializingBenchmark.java @@ -6,7 +6,7 @@ /** * Defines the contract for a serialization competitor in the benchmark. */ -public interface Competitor { +public interface SerializingBenchmark { String name(); void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2); void serialize(Blackhole bh); diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java b/src/jmh/java/com/imprint/benchmark/competitors/ThriftSerializingBenchmark.java similarity index 97% rename from src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java rename to src/jmh/java/com/imprint/benchmark/competitors/ThriftSerializingBenchmark.java index 18530b5..83c0812 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/ThriftCompetitor.java +++ b/src/jmh/java/com/imprint/benchmark/competitors/ThriftSerializingBenchmark.java @@ -9,18 +9,17 @@ import org.apache.thrift.protocol.TBinaryProtocol; import org.openjdk.jmh.infra.Blackhole; -import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; import java.util.stream.Collectors; -public class ThriftCompetitor extends AbstractCompetitor { +public class ThriftSerializingBenchmark extends AbstractSerializingBenchmark { private final TSerializer serializer; private final TDeserializer deserializer; private byte[] serializedRecord1; private byte[] serializedRecord2; - public ThriftCompetitor() { + public ThriftSerializingBenchmark() { super("Thrift"); try { this.serializer = new TSerializer(new TBinaryProtocol.Factory()); diff --git a/src/jmh/sbe/test_record.xml b/src/jmh/sbe/test_record.xml deleted file mode 100644 index 9feaee8..0000000 --- a/src/jmh/sbe/test_record.xml +++ /dev/null @@ -1,61 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/src/main/java/com/imprint/core/Directory.java b/src/main/java/com/imprint/core/Directory.java new file mode 100644 index 0000000..cb449b3 --- /dev/null +++ b/src/main/java/com/imprint/core/Directory.java @@ -0,0 +1,69 @@ +package com.imprint.core; + +import com.imprint.types.TypeCode; +import lombok.Getter; +import lombok.Setter; +import lombok.Value; + +import java.util.Objects; + +/** + * Represents the common interface for a directory entry in an Imprint record. + * A directory entry provides metadata about a field, such as its ID, type, and location in the payload. + */ +public interface Directory { + /** + * @return The field's unique identifier. + */ + short getId(); + + /** + * @return The {@link TypeCode} of the field's value. + */ + TypeCode getTypeCode(); + + /** + * @return The starting position (offset) of the field's data within the payload buffer. + */ + int getOffset(); + + /** + * Immutable representation of the Imprint Directory used for deserialization, + * merging, and field projections + */ + @Value + class Entry implements Directory { + short id; + TypeCode typeCode; + int offset; + + public Entry(short id, TypeCode typeCode, int offset) { + this.id = id; + this.typeCode = Objects.requireNonNull(typeCode, "TypeCode cannot be null"); + this.offset = offset; + } + } + + /** + * Mutable representation of the Imprint Directory bound with corresponding type value + * used for record building through {@link ImprintRecordBuilder} and subsequent serialization. + */ + @Getter + class Builder implements Directory { + private final short id; + private final com.imprint.types.Value value; + @Setter + private int offset; + + Builder(short id, com.imprint.types.Value value) { + this.id = id; + this.value = value; + this.offset = -1; + } + + @Override + public TypeCode getTypeCode() { + return value.getTypeCode(); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/imprint/core/DirectoryEntry.java b/src/main/java/com/imprint/core/DirectoryEntry.java deleted file mode 100644 index 0b98433..0000000 --- a/src/main/java/com/imprint/core/DirectoryEntry.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.imprint.core; - -import com.imprint.types.TypeCode; - -/** - * Represents the common interface for a directory entry in an Imprint record. - * A directory entry provides metadata about a field, such as its ID, type, and location in the payload. - */ -public interface DirectoryEntry { - /** - * @return The field's unique identifier. - */ - short getId(); - - /** - * @return The {@link TypeCode} of the field's value. - */ - TypeCode getTypeCode(); - - /** - * @return The starting position (offset) of the field's data within the payload buffer. - */ - int getOffset(); -} \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintBuffers.java b/src/main/java/com/imprint/core/ImprintBuffers.java index 4afa1fa..845892a 100644 --- a/src/main/java/com/imprint/core/ImprintBuffers.java +++ b/src/main/java/com/imprint/core/ImprintBuffers.java @@ -5,17 +5,11 @@ import com.imprint.error.ImprintException; import com.imprint.types.TypeCode; import com.imprint.util.VarInt; -import it.unimi.dsi.fastutil.ints.Int2ObjectAVLTreeMap; -import it.unimi.dsi.fastutil.ints.Int2ObjectSortedMap; import lombok.Getter; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Objects; -import java.util.TreeMap; +import java.util.*; /** * Manages the raw buffers for an Imprint record with lazy directory parsing. @@ -35,7 +29,7 @@ public final class ImprintBuffers { private final ByteBuffer payload; // Read-only payload view // Lazy-loaded directory state. - private Int2ObjectSortedMap parsedDirectory; + private TreeMap parsedDirectory; private boolean directoryParsed = false; private int directoryCount = -1; @@ -51,7 +45,7 @@ public ImprintBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { * Creates buffers from a pre-sorted list of entries (most efficient builder path). * Immediately creates the parsed index and the serialized buffer. */ - public ImprintBuffers(List sortedDirectory, ByteBuffer payload) { + public ImprintBuffers(List sortedDirectory, ByteBuffer payload) { this.directoryBuffer = ImprintBuffers.createDirectoryBuffer(sortedDirectory); this.payload = payload.asReadOnlyBuffer(); } @@ -62,10 +56,10 @@ public ImprintBuffers(List sortedDirectory, ByteBuffer * map becomes the definitive parsed directory. */ @SuppressWarnings("unchecked") - public ImprintBuffers(Int2ObjectSortedMap parsedDirectory, ByteBuffer payload) { + public ImprintBuffers(Map parsedDirectory, ByteBuffer payload) { this.directoryBuffer = ImprintBuffers.createDirectoryBufferFromSortedMap(Objects.requireNonNull(parsedDirectory)); this.payload = payload.asReadOnlyBuffer(); - this.parsedDirectory = (Int2ObjectSortedMap) parsedDirectory; + this.parsedDirectory = (TreeMap) parsedDirectory; this.directoryParsed = true; this.directoryCount = parsedDirectory.size(); } @@ -97,7 +91,7 @@ public ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { * Get a zero-copy ByteBuffer view of a field's data using a pre-fetched DirectoryEntry. * This avoids the cost of re-finding the entry. */ - public ByteBuffer getFieldBuffer(DirectoryEntry entry) throws ImprintException { + public ByteBuffer getFieldBuffer(Directory entry) throws ImprintException { if (entry == null) return null; @@ -122,7 +116,7 @@ public ByteBuffer getFieldBuffer(DirectoryEntry entry) throws ImprintException { * - If parsed: TreeMap lookup * - If raw: Binary search on raw bytes to avoid full unwinding of the directory */ - public DirectoryEntry findDirectoryEntry(int fieldId) throws ImprintException { + public Directory findDirectoryEntry(int fieldId) throws ImprintException { if (directoryParsed) return parsedDirectory.get(fieldId); else @@ -133,7 +127,7 @@ public DirectoryEntry findDirectoryEntry(int fieldId) throws ImprintException { * Get the full directory, parsing it if necessary. * Returns the values in fieldId order thanks to TreeMap. */ - public List getDirectory() { + public List getDirectory() { ensureDirectoryParsed(); return new ArrayList<>(parsedDirectory.values()); } @@ -168,7 +162,7 @@ public ByteBuffer serializeDirectory() { * Binary search on raw directory bytes to find a specific field. * This avoids parsing the entire directory for single field lookups. */ - private DirectoryEntry findFieldEntryInRawDirectory(int fieldId) throws ImprintException { + private Directory findFieldEntryInRawDirectory(int fieldId) throws ImprintException { var searchBuffer = directoryBuffer.duplicate(); searchBuffer.order(ByteOrder.LITTLE_ENDIAN); @@ -212,7 +206,7 @@ private DirectoryEntry findFieldEntryInRawDirectory(int fieldId) throws ImprintE /** * Find the end offset for a field by looking at the next field's offset. */ - private int findEndOffset(DirectoryEntry entry) throws ImprintException { + private int findEndOffset(Directory entry) throws ImprintException { if (directoryParsed) { return findNextOffsetInParsedDirectory(entry.getId()); } else { @@ -224,11 +218,12 @@ private int findEndOffset(DirectoryEntry entry) throws ImprintException { * Find the end offset using TreeMap's efficient navigation methods. */ private int findNextOffsetInParsedDirectory(int currentFieldId) { - var tailMap = parsedDirectory.tailMap(currentFieldId + 1); - if (tailMap.isEmpty()) { + var nextEntry = parsedDirectory.higherEntry(currentFieldId); + if (nextEntry != null) + return nextEntry.getValue().getOffset(); + else return payload.limit(); - } - return tailMap.get(tailMap.firstIntKey()).getOffset(); + } private int findNextOffsetInRawDirectory(int currentFieldId) throws ImprintException { @@ -284,18 +279,18 @@ private void ensureDirectoryParsed() { parseBuffer.order(ByteOrder.LITTLE_ENDIAN); int count = getOrParseDirectoryCount(parseBuffer); - this.parsedDirectory = new Int2ObjectAVLTreeMap<>(); + this.parsedDirectory = new TreeMap<>(); for (int i = 0; i < count; i++) { var entry = deserializeDirectoryEntry(parseBuffer); - this.parsedDirectory.put(entry.getId() , entry); + this.parsedDirectory.put((int) entry.getId() , entry); } this.directoryParsed = true; } catch (ImprintException e) { // This can happen with a corrupted directory. // In this case, we'll just have an empty (but valid) parsed directory. - this.parsedDirectory = new Int2ObjectAVLTreeMap<>(); + this.parsedDirectory = new TreeMap<>(); this.directoryParsed = true; // Mark as parsed to avoid repeated errors } } @@ -323,7 +318,7 @@ private int getOrParseDirectoryCount(ByteBuffer buffer) throws ImprintException * Creates a read-only buffer containing the serialized directory. * The input collection does not need to be sorted. */ - static ByteBuffer createDirectoryBuffer(Collection directory) { + static ByteBuffer createDirectoryBuffer(Collection directory) { if (directory == null || directory.isEmpty()) { ByteBuffer buffer = ByteBuffer.allocate(1); VarInt.encode(0, buffer); @@ -332,9 +327,9 @@ static ByteBuffer createDirectoryBuffer(Collection dir } // Ensure sorted order for binary search compatibility. - ArrayList sortedDirectory; - if (directory instanceof ArrayList && isSorted((ArrayList)directory)) { - sortedDirectory = (ArrayList) directory; + ArrayList sortedDirectory; + if (directory instanceof ArrayList && isSorted((ArrayList)directory)) { + sortedDirectory = (ArrayList) directory; } else { sortedDirectory = new ArrayList<>(directory); sortedDirectory.sort(null); @@ -346,7 +341,7 @@ static ByteBuffer createDirectoryBuffer(Collection dir buffer.order(ByteOrder.LITTLE_ENDIAN); VarInt.encode(count, buffer); - for (DirectoryEntry entry : sortedDirectory) { + for (var entry : sortedDirectory) { serializeDirectoryEntry(entry, buffer); } @@ -354,7 +349,7 @@ static ByteBuffer createDirectoryBuffer(Collection dir return buffer; } - static ByteBuffer createDirectoryBufferFromMap(TreeMap directoryMap) { + static ByteBuffer createDirectoryBufferFromSortedMap(Map directoryMap) { if (directoryMap == null || directoryMap.isEmpty()) { ByteBuffer buffer = ByteBuffer.allocate(1); VarInt.encode(0, buffer); @@ -376,29 +371,7 @@ static ByteBuffer createDirectoryBufferFromMap(TreeMap directoryMap) { - if (directoryMap == null || directoryMap.isEmpty()) { - ByteBuffer buffer = ByteBuffer.allocate(1); - VarInt.encode(0, buffer); - buffer.flip(); - return buffer; - } - - int count = directoryMap.size(); - int size = VarInt.encodedLength(count) + (count * Constants.DIR_ENTRY_BYTES); - var buffer = ByteBuffer.allocate(size); - buffer.order(ByteOrder.LITTLE_ENDIAN); - - VarInt.encode(count, buffer); - for (var entry : directoryMap.int2ObjectEntrySet()) { - serializeDirectoryEntry(entry.getValue(), buffer); - } - - buffer.flip(); - return buffer; - } - - private static boolean isSorted(ArrayList list) { + private static boolean isSorted(ArrayList list) { for (int i = 0; i < list.size() - 1; i++) { if (list.get(i).getId() > list.get(i + 1).getId()) { return false; @@ -411,7 +384,7 @@ private static boolean isSorted(ArrayList list) { * Serialize a single directory entry to the buffer. * Format: [fieldId:2bytes][typeCode:1byte][offset:4bytes] */ - private static void serializeDirectoryEntry(DirectoryEntry entry, ByteBuffer buffer) { + private static void serializeDirectoryEntry(Directory entry, ByteBuffer buffer) { buffer.putShort(entry.getId()); buffer.put(entry.getTypeCode().getCode()); buffer.putInt(entry.getOffset()); @@ -421,7 +394,7 @@ private static void serializeDirectoryEntry(DirectoryEntry entry, ByteBuffer buf * Deserialize a single directory entry from the buffer. * Reads: [fieldId:2bytes][typeCode:1byte][offset:4bytes] */ - private DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { + private Directory deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for directory entry"); @@ -429,6 +402,6 @@ private DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throws Impri var typeCode = TypeCode.fromByte(buffer.get()); int offset = buffer.getInt(); - return new SimpleDirectoryEntry(id, typeCode, offset); + return new Directory.Entry(id, typeCode, offset); } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index e4c4a42..a34c7df 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -1,22 +1,20 @@ package com.imprint.core; import com.imprint.Constants; +import com.imprint.ops.ImprintOperations; import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; import com.imprint.types.MapKey; import com.imprint.types.TypeCode; import com.imprint.types.Value; import com.imprint.util.VarInt; -import it.unimi.dsi.fastutil.ints.Int2ObjectSortedMap; import lombok.Getter; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.TreeMap; /** * An Imprint record containing a header and buffer management. @@ -38,7 +36,7 @@ private ImprintRecord(Header header, ImprintBuffers buffers) { /** * Creates a record from a pre-sorted list of entries (most efficient builder path). */ - ImprintRecord(Header header, List sortedDirectory, ByteBuffer payload) { + public ImprintRecord(Header header, List sortedDirectory, ByteBuffer payload) { this.header = Objects.requireNonNull(header, "Header cannot be null"); this.buffers = new ImprintBuffers(sortedDirectory, payload); } @@ -46,7 +44,7 @@ private ImprintRecord(Header header, ImprintBuffers buffers) { /** * Creates a record from a pre-built and sorted FastUtil map (most efficient builder path). */ - ImprintRecord(Header header, Int2ObjectSortedMap parsedDirectory, ByteBuffer payload) { + public ImprintRecord(Header header, Map parsedDirectory, ByteBuffer payload) { this.header = Objects.requireNonNull(header, "Header cannot be null"); this.buffers = new ImprintBuffers(parsedDirectory, payload); } @@ -84,7 +82,7 @@ public ByteBuffer getRawBytes(int fieldId) { * Get raw bytes for a field using a pre-fetched DirectoryEntry. * This avoids the cost of re-finding the entry metadata. */ - public ByteBuffer getRawBytes(DirectoryEntry entry) { + public ByteBuffer getRawBytes(Directory entry) { try { return buffers.getFieldBuffer(entry); } catch (ImprintException e) { @@ -117,7 +115,7 @@ public ImprintRecord merge(ImprintRecord other) throws ImprintException { /** * Get the directory (parsing it if necessary). */ - public List getDirectory() { + public List getDirectory() { return buffers.getDirectory(); } @@ -128,7 +126,7 @@ public List getDirectory() { * @param fieldId The ID of the field to find. * @return The DirectoryEntry if found, otherwise null. */ - public DirectoryEntry getDirectoryEntry(int fieldId) { + public Directory getDirectoryEntry(int fieldId) { try { return buffers.findDirectoryEntry(fieldId); } catch (ImprintException e) { @@ -220,7 +218,7 @@ public ByteBuffer serializeToBuffer() { // Assemble the final record from existing components serializeHeader(this.header, finalBuffer); finalBuffer.put(directoryBuffer); - finalBuffer.put(payloadBuffer.duplicate()); // Use duplicate to preserve original buffer state + finalBuffer.put(payloadBuffer.duplicate()); finalBuffer.flip(); return finalBuffer.asReadOnlyBuffer(); @@ -236,42 +234,15 @@ public int estimateSerializedSize() { /** * Serializes the components of a record into a single ByteBuffer. * This provides a direct serialization path without needing a live ImprintRecord instance. - * - * @param schemaId The schema identifier for the record. - * @param directory The list of directory entries, which will be sorted if not already. - * @param payload The ByteBuffer containing all field data concatenated. - * @return A read-only ByteBuffer with the complete serialized record. - */ - public static ByteBuffer serialize(SchemaId schemaId, Collection directory, ByteBuffer payload) { - var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); - var directoryBuffer = ImprintBuffers.createDirectoryBuffer(directory); - - int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payload.remaining(); - var finalBuffer = ByteBuffer.allocate(finalSize); - finalBuffer.order(ByteOrder.LITTLE_ENDIAN); - - // Assemble the final record - serializeHeader(header, finalBuffer); - finalBuffer.put(directoryBuffer); - finalBuffer.put(payload); - - finalBuffer.flip(); - return finalBuffer.asReadOnlyBuffer(); - } - - /** - * Serializes the components of a record into a single ByteBuffer. - * This provides a direct serialization path without needing a live ImprintRecord instance. - * This is an optimized version that assumes the list is pre-sorted by field ID. + * This assumes the list is pre-sorted by field ID. * * @param schemaId The schema identifier for the record. * @param sortedDirectory The list of directory entries, which MUST be sorted by field ID. * @param payload The ByteBuffer containing all field data concatenated. * @return A read-only ByteBuffer with the complete serialized record. */ - public static ByteBuffer serialize(SchemaId schemaId, List sortedDirectory, ByteBuffer payload) { + public static ByteBuffer serialize(SchemaId schemaId, List sortedDirectory, ByteBuffer payload) { var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); - // This createDirectoryBuffer is optimized for a pre-sorted list. var directoryBuffer = ImprintBuffers.createDirectoryBuffer(sortedDirectory); int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payload.remaining(); @@ -287,58 +258,6 @@ public static ByteBuffer serialize(SchemaId schemaId, List directoryMap, ByteBuffer payload) { - var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); - var directoryBuffer = ImprintBuffers.createDirectoryBufferFromMap(directoryMap); - - int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payload.remaining(); - var finalBuffer = ByteBuffer.allocate(finalSize); - finalBuffer.order(ByteOrder.LITTLE_ENDIAN); - - // Assemble the final record - serializeHeader(header, finalBuffer); - finalBuffer.put(directoryBuffer); - finalBuffer.put(payload); - - finalBuffer.flip(); - return finalBuffer.asReadOnlyBuffer(); - } - - /** - * Serializes the components of a record into a single ByteBuffer using a pre-built sorted map. - * This is the most efficient path for "write-only" scenarios, used by the builder. - * - * @param schemaId The schema identifier for the record. - * @param directoryMap The sorted map of directory entries. - * @param payload The ByteBuffer containing all field data concatenated. - * @return A read-only ByteBuffer with the complete serialized record. - */ - public static ByteBuffer serialize(SchemaId schemaId, Int2ObjectSortedMap directoryMap, ByteBuffer payload) { - var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); - var directoryBuffer = ImprintBuffers.createDirectoryBufferFromSortedMap(directoryMap); - - int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payload.remaining(); - var finalBuffer = ByteBuffer.allocate(finalSize); - finalBuffer.order(ByteOrder.LITTLE_ENDIAN); - - // Assemble the final record - serializeHeader(header, finalBuffer); - finalBuffer.put(directoryBuffer); - finalBuffer.put(payload); - - finalBuffer.flip(); - return finalBuffer.asReadOnlyBuffer(); - } - // ========== STATIC FACTORY METHODS ========== public static ImprintRecordBuilder builder(SchemaId schemaId) { diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 52bc760..93e83ba 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -2,12 +2,7 @@ import com.imprint.error.ImprintException; import com.imprint.types.MapKey; -import com.imprint.types.TypeCode; import com.imprint.types.Value; -import it.unimi.dsi.fastutil.ints.Int2ObjectAVLTreeMap; -import it.unimi.dsi.fastutil.ints.Int2ObjectSortedMap; -import lombok.Getter; -import lombok.Setter; import lombok.SneakyThrows; import java.nio.ByteBuffer; @@ -38,7 +33,7 @@ @SuppressWarnings("unused") public final class ImprintRecordBuilder { private final SchemaId schemaId; - private final Int2ObjectSortedMap fields = new Int2ObjectAVLTreeMap<>(); + private final Map fields = new TreeMap<>(); private int estimatedPayloadSize = 0; ImprintRecordBuilder(SchemaId schemaId) { @@ -161,7 +156,6 @@ public ImprintRecord build() throws ImprintException { /** * Builds the record and serializes it directly to a ByteBuffer without creating an intermediate ImprintRecord object. - * This is the most efficient path for "write-only" scenarios. * * @return A read-only ByteBuffer containing the fully serialized record. * @throws ImprintException if serialization fails. @@ -182,7 +176,6 @@ public ByteBuffer buildToBuffer() throws ImprintException { return ImprintRecord.serialize(schemaId, new ArrayList<>(fields.values()), payloadView); } - // Internal helper methods /** * Adds or overwrites a field in the record being built. * If a field with the given ID already exists, it will be replaced. @@ -193,7 +186,7 @@ public ByteBuffer buildToBuffer() throws ImprintException { */ private ImprintRecordBuilder addField(int id, Value value) { Objects.requireNonNull(value, "Value cannot be null - use nullField() for explicit null values"); - var newEntry = new BuilderEntry((short) id, value); + var newEntry = new Directory.Builder((short) id, value); // Subtract the size of the old value if it's being replaced. var oldEntry = fields.get(id); @@ -345,24 +338,4 @@ private void serializeValue(Value value, ByteBuffer buffer) throws ImprintExcept throw new ImprintException(com.imprint.error.ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); } } - - - @Getter - private static class BuilderEntry implements DirectoryEntry { - private final short id; - private final Value value; - @Setter - private int offset; - - BuilderEntry(short id, Value value) { - this.id = id; - this.value = value; - this.offset = -1; - } - - @Override - public TypeCode getTypeCode() { - return value.getTypeCode(); - } - } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/SimpleDirectoryEntry.java b/src/main/java/com/imprint/core/SimpleDirectoryEntry.java deleted file mode 100644 index 843aad4..0000000 --- a/src/main/java/com/imprint/core/SimpleDirectoryEntry.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.imprint.core; - -import com.imprint.types.TypeCode; -import lombok.Value; - -import java.util.Objects; - -/** - * A concrete, immutable directory entry. - */ -@Value -public class SimpleDirectoryEntry implements DirectoryEntry { - short id; - TypeCode typeCode; - int offset; - - public SimpleDirectoryEntry(short id, TypeCode typeCode, int offset) { - this.id = id; - this.typeCode = Objects.requireNonNull(typeCode, "TypeCode cannot be null"); - this.offset = offset; - } -} \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintOperations.java b/src/main/java/com/imprint/ops/ImprintOperations.java similarity index 82% rename from src/main/java/com/imprint/core/ImprintOperations.java rename to src/main/java/com/imprint/ops/ImprintOperations.java index 0c51e43..49f60b4 100644 --- a/src/main/java/com/imprint/core/ImprintOperations.java +++ b/src/main/java/com/imprint/ops/ImprintOperations.java @@ -1,15 +1,14 @@ -package com.imprint.core; +package com.imprint.ops; +import com.imprint.core.*; import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; import lombok.experimental.UtilityClass; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; +import java.util.*; +import java.util.stream.Collectors; @UtilityClass public class ImprintOperations { @@ -32,25 +31,27 @@ public class ImprintOperations { */ public static ImprintRecord project(ImprintRecord record, int... fieldIds) { // Sort and deduplicate field IDs for efficient matching - int[] sortedFieldIds = Arrays.stream(fieldIds).distinct().sorted().toArray(); - if (sortedFieldIds.length == 0) { + final var fieldIdSet = Arrays.stream(fieldIds) + .boxed() + .collect(Collectors.toCollection(TreeSet::new)); + if (fieldIdSet.isEmpty()) { return createEmptyRecord(record.getHeader().getSchemaId()); } - var newDirectory = new ArrayList(sortedFieldIds.length); - var payloadChunks = new ArrayList(sortedFieldIds.length); + var newDirectory = new ArrayList(fieldIdSet.size()); + var payloadChunks = new ArrayList(fieldIdSet.size()); int currentOffset = 0; - for (int fieldId : sortedFieldIds) { + for (int fieldId : fieldIdSet) { // Use efficient lookup for each field's metadata. Returns null on failure. - DirectoryEntry sourceEntry = record.getDirectoryEntry(fieldId); + var sourceEntry = record.getDirectoryEntry(fieldId); // If field exists, get its payload and add to the new record components if (sourceEntry != null) { - ByteBuffer fieldPayload = record.getRawBytes(sourceEntry); + var fieldPayload = record.getRawBytes(sourceEntry); // This check is for internal consistency. If an entry exists, payload should too. if (fieldPayload != null) { - newDirectory.add(new SimpleDirectoryEntry((short)fieldId, sourceEntry.getTypeCode(), currentOffset)); + newDirectory.add(new Directory.Entry((short)fieldId, sourceEntry.getTypeCode(), currentOffset)); payloadChunks.add(fieldPayload); currentOffset += fieldPayload.remaining(); } @@ -58,7 +59,7 @@ public static ImprintRecord project(ImprintRecord record, int... fieldIds) { } // Build new payload from collected chunks - ByteBuffer newPayload = buildPayloadFromChunks(payloadChunks); + ByteBuffer newPayload = buildPayloadFromChunks(payloadChunks, currentOffset); // Create new header with updated payload size // TODO: compute correct schema hash @@ -92,7 +93,7 @@ public static ImprintRecord merge(ImprintRecord first, ImprintRecord second) thr var secondDir = second.getDirectory(); // Pre-allocate for worst case (no overlapping fields) - var newDirectory = new ArrayList(firstDir.size() + secondDir.size()); + var newDirectory = new ArrayList(firstDir.size() + secondDir.size()); var payloadChunks = new ArrayList(); int firstIdx = 0; @@ -100,7 +101,7 @@ public static ImprintRecord merge(ImprintRecord first, ImprintRecord second) thr int currentOffset = 0; while (firstIdx < firstDir.size() || secondIdx < secondDir.size()) { - DirectoryEntry currentEntry; + Directory currentEntry; ByteBuffer currentPayload; if (firstIdx < firstDir.size() && @@ -114,7 +115,6 @@ public static ImprintRecord merge(ImprintRecord first, ImprintRecord second) thr firstDir.get(firstIdx).getId() == secondDir.get(secondIdx).getId()) { secondIdx++; } - currentPayload = first.getRawBytes(currentEntry); firstIdx++; } else { @@ -128,7 +128,8 @@ public static ImprintRecord merge(ImprintRecord first, ImprintRecord second) thr throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Failed to get raw bytes for field " + currentEntry.getId()); // Add adjusted directory entry - var newEntry = new SimpleDirectoryEntry(currentEntry.getId(), currentEntry.getTypeCode(), currentOffset); + var newEntry = new Directory.Entry(currentEntry.getId(), + currentEntry.getTypeCode(), currentOffset); newDirectory.add(newEntry); // Collect payload chunk @@ -137,26 +138,22 @@ public static ImprintRecord merge(ImprintRecord first, ImprintRecord second) thr } // Build merged payload - var mergedPayload = buildPayloadFromChunks(payloadChunks); + var mergedPayload = buildPayloadFromChunks(payloadChunks, currentOffset); // Create header preserving first record's schema ID - var newHeader = new Header(first.getHeader().getFlags(), first.getHeader().getSchemaId(), mergedPayload.remaining()); - + var newHeader = new Header(first.getHeader().getFlags(), + first.getHeader().getSchemaId(), mergedPayload.remaining()); return new ImprintRecord(newHeader, newDirectory, mergedPayload); } /** * Build a new payload buffer by concatenating chunks. */ - private static ByteBuffer buildPayloadFromChunks(List chunks) { - int totalSize = chunks.stream().mapToInt(ByteBuffer::remaining).sum(); + private static ByteBuffer buildPayloadFromChunks(List chunks, int totalSize) { var mergedPayload = ByteBuffer.allocate(totalSize); mergedPayload.order(ByteOrder.LITTLE_ENDIAN); - - for (var chunk : chunks) { + for (var chunk : chunks) mergedPayload.put(chunk); - } - mergedPayload.flip(); return mergedPayload; } diff --git a/src/main/java/com/imprint/core/ImprintStream.java b/src/main/java/com/imprint/stream/ImprintStream.java similarity index 69% rename from src/main/java/com/imprint/core/ImprintStream.java rename to src/main/java/com/imprint/stream/ImprintStream.java index b6afe7a..35a69ed 100644 --- a/src/main/java/com/imprint/core/ImprintStream.java +++ b/src/main/java/com/imprint/stream/ImprintStream.java @@ -1,17 +1,22 @@ -package com.imprint.core; +package com.imprint.stream; +import com.imprint.core.*; import com.imprint.error.ImprintException; -import it.unimi.dsi.fastutil.ints.Int2ObjectAVLTreeMap; -import it.unimi.dsi.fastutil.ints.Int2ObjectSortedMap; -import it.unimi.dsi.fastutil.ints.IntSet; -import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.*; +import java.util.ArrayDeque; +import java.util.Collections; +import java.util.Deque; +import java.util.HashSet; +import java.util.List; +import java.util.NavigableMap; +import java.util.Objects; +import java.util.Set; +import java.util.TreeMap; /** - * Provides a framework for lazy, zero-copy transformations of Imprint records. + * Provides a framework for lazy, (eventual) zero-copy transformations of Imprint records. *

* Operations like {@link #project(int...)} and {@link #mergeWith(ImprintRecord)} are * intermediate and do not create new records. They build up a plan of operations @@ -49,11 +54,11 @@ private SourcePlan(ImprintRecord source) { */ private static final class ProjectPlan implements Plan { final Plan previous; - final IntSet fieldIds; + final Set fieldIds; private ProjectPlan(Plan previous, int... fieldIds) { this.previous = Objects.requireNonNull(previous); - this.fieldIds = new IntOpenHashSet(); + this.fieldIds = new HashSet<>(); for (int id : fieldIds) { this.fieldIds.add(id); } @@ -135,56 +140,42 @@ private Evaluator(Plan plan) { } public ImprintRecord execute() { - // Unwind the plan's linked-list structure into a forward-order list of operations. - var planList = getPlans(); - Collections.reverse(planList); + // Unwind the plan from a deque + var planQueue = getPlans(); - // This map holds the set of fields being built, sorted by field ID. - var resolvedFields = new Int2ObjectAVLTreeMap(); + // Set of fields being built + var resolvedFields = new TreeMap(); - // Iteratively evaluate the plan step-by-step. - for (var planStep : planList) { + for (var planStep : planQueue) { if (planStep instanceof SourcePlan) { var sourcePlan = (SourcePlan) planStep; for (var entry : sourcePlan.source.getDirectory()) { - resolvedFields.put(entry.getId(), new FieldSource(sourcePlan.source, entry)); + resolvedFields.put((int) entry.getId(), new FieldSource(sourcePlan.source, entry)); } } else if (planStep instanceof ProjectPlan) { var projectPlan = (ProjectPlan) planStep; // Apply projection to the current state of resolved fields. // Keep only fields that are in the projection set - var keysToRemove = new IntOpenHashSet(); - for (int fieldId : resolvedFields.keySet()) { - if (!projectPlan.fieldIds.contains(fieldId)) { - keysToRemove.add(fieldId); - } - } - for (int keyToRemove : keysToRemove) { - resolvedFields.remove(keyToRemove); - } + resolvedFields.keySet().removeIf(fieldId -> !projectPlan.fieldIds.contains(fieldId)); } else if (planStep instanceof MergePlan) { var mergePlan = (MergePlan) planStep; // Add fields from other records if they aren't already in the map. for (var otherRecord : mergePlan.others) { for (var entry : otherRecord.getDirectory()) { int fieldId = entry.getId(); - if (!resolvedFields.containsKey(fieldId)) { - resolvedFields.put(fieldId, new FieldSource(otherRecord, entry)); - } + resolvedFields.putIfAbsent(fieldId, new FieldSource(otherRecord, entry)); } } } } - - // Once the final field set is determined, build the record. return build(resolvedFields); } - private ArrayList getPlans() { - var planList = new ArrayList(); + private Deque getPlans() { + var planQueue = new ArrayDeque(); var current = plan; while (current != null) { - planList.add(current); + planQueue.addFirst(current); if (current instanceof ProjectPlan) { current = ((ProjectPlan) current).previous; } else if (current instanceof MergePlan) { @@ -193,38 +184,34 @@ private ArrayList getPlans() { current = null; // End of the chain } } - return planList; + return planQueue; } - private ImprintRecord build(Int2ObjectSortedMap finalFields) { + private ImprintRecord build(NavigableMap finalFields) { if (finalFields.isEmpty()) { - // To-Do: Need a way to get the schemaId for an empty record. + // TODO: Need a way to get the schemaId for an empty record. // For now, returning null or using a default. try { return ImprintRecord.builder(new SchemaId(0, 0)).build(); } catch (ImprintException e) { - // This should not happen when building an empty record. + // TODO This shouldn't really ever happen, we probably need a better way of consolidating error handling throw new IllegalStateException("Failed to build empty record.", e); } } - // Determine the schema from the first field's source record. - SchemaId schemaId = finalFields.get(finalFields.firstIntKey()).record.getHeader().getSchemaId(); + // Use schema from the first field's source record. + var schemaId = finalFields.firstEntry().getValue().record.getHeader().getSchemaId(); // 1. Calculate final payload size and prepare directory. int payloadSize = 0; - var newDirectoryMap = new Int2ObjectAVLTreeMap(); + var newDirectoryMap = new TreeMap(); - // Iterate over fields in sorted order - for (var entry : finalFields.int2ObjectEntrySet()) { - int fieldId = entry.getIntKey(); + for (var entry : finalFields.entrySet()) { + int fieldId = entry.getKey(); var fieldSource = entry.getValue(); int fieldLength = fieldSource.getLength(); - newDirectoryMap.put(fieldId, new SimpleDirectoryEntry( - fieldSource.entry.getId(), - fieldSource.entry.getTypeCode(), - payloadSize)); + newDirectoryMap.put(fieldId, new Directory.Entry(fieldSource.entry.getId(), fieldSource.entry.getTypeCode(), payloadSize)); payloadSize += fieldLength; } @@ -232,12 +219,11 @@ private ImprintRecord build(Int2ObjectSortedMap finalFields) { var payload = ByteBuffer.allocate(payloadSize).order(ByteOrder.LITTLE_ENDIAN); for (var fieldSource : finalFields.values()) { try { - ByteBuffer sourceData = fieldSource.record.getRawBytes(fieldSource.entry.getId()); - if (sourceData != null) { + var sourceData = fieldSource.record.getRawBytes(fieldSource.entry.getId()); + if (sourceData != null) payload.put(sourceData.duplicate()); - } } catch (Exception e) { - // This indicates a data corruption or bug, shouldn't happen in normal operation. + // Shouldn't happen in normal operation - maybe some sort of data corruption or race issue throw new IllegalStateException("Failed to copy data for field " + fieldSource.entry.getId(), e); } } @@ -249,20 +235,20 @@ private ImprintRecord build(Int2ObjectSortedMap finalFields) { } /** - * A helper class to track the source record and directory entry for a field. + * A lightweight struct to track the source of a field during evaluation. */ private static final class FieldSource { final ImprintRecord record; - final DirectoryEntry entry; + final Directory entry; - FieldSource(ImprintRecord record, DirectoryEntry entry) { + FieldSource(ImprintRecord record, Directory entry) { this.record = record; this.entry = entry; } int getLength() { try { - ByteBuffer buf = record.getRawBytes(entry.getId()); + var buf = record.getRawBytes(entry.getId()); return buf != null ? buf.remaining() : 0; } catch (Exception e) { return 0; diff --git a/src/main/java/com/imprint/types/Value.java b/src/main/java/com/imprint/types/Value.java index 681eda1..070c497 100644 --- a/src/main/java/com/imprint/types/Value.java +++ b/src/main/java/com/imprint/types/Value.java @@ -285,7 +285,7 @@ public String toString() { public static class StringValue extends Value { @Getter private final String value; - private byte[] utf8BytesCache; // Cache UTF-8 encoding + private byte[] utf8BytesCache; public StringValue(String value) { this.value = Objects.requireNonNull(value, "String cannot be null"); diff --git a/src/test/java/com/imprint/core/ImprintOperationsTest.java b/src/test/java/com/imprint/ops/ImprintOperationsTest.java similarity index 96% rename from src/test/java/com/imprint/core/ImprintOperationsTest.java rename to src/test/java/com/imprint/ops/ImprintOperationsTest.java index 1dc67fb..7b54800 100644 --- a/src/test/java/com/imprint/core/ImprintOperationsTest.java +++ b/src/test/java/com/imprint/ops/ImprintOperationsTest.java @@ -1,5 +1,8 @@ -package com.imprint.core; +package com.imprint.ops; +import com.imprint.core.Directory; +import com.imprint.core.ImprintRecord; +import com.imprint.core.SchemaId; import com.imprint.error.ImprintException; import com.imprint.types.Value; import org.junit.jupiter.api.BeforeEach; @@ -72,7 +75,7 @@ void shouldMaintainFieldOrderRegardlessOfInputOrder() throws ImprintException { assertArrayEquals(new byte[]{1, 2, 3}, projected.getBytes(7)); // And directory should maintain sorted order - List directory = projected.getDirectory(); + List directory = projected.getDirectory(); for (int i = 1; i < directory.size(); i++) { assertTrue(directory.get(i - 1).getId() < directory.get(i).getId(), "Directory entries should be sorted by field id"); @@ -95,7 +98,7 @@ void shouldHandleSingleFieldProjection() throws ImprintException { void shouldPreserveAllFieldsWhenProjectingAll() throws ImprintException { // Given all field IDs from the original record int[] allFields = multiFieldRecord.getDirectory().stream() - .mapToInt(DirectoryEntry::getId) + .mapToInt(Directory::getId) .toArray(); // When projecting all fields @@ -104,7 +107,7 @@ void shouldPreserveAllFieldsWhenProjectingAll() throws ImprintException { // Then all fields should be present with matching values assertEquals(multiFieldRecord.getDirectory().size(), projected.getDirectory().size()); - for (DirectoryEntry entry : multiFieldRecord.getDirectory()) { + for (Directory entry : multiFieldRecord.getDirectory()) { Value originalValue = multiFieldRecord.getValue(entry.getId()); Value projectedValue = projected.getValue(entry.getId()); assertEquals(originalValue, projectedValue, @@ -228,7 +231,7 @@ void shouldMergeRecordsWithDistinctFields() throws ImprintException { assertEquals(123L, merged.getInt64(4)); // And directory should be sorted - List directory = merged.getDirectory(); + List directory = merged.getDirectory(); for (int i = 1; i < directory.size(); i++) { assertTrue(directory.get(i - 1).getId() < directory.get(i).getId(), "Directory entries should be sorted by field id"); @@ -293,7 +296,7 @@ void shouldHandleMergeWithEmptyRecord() throws ImprintException { assertEquals(multiFieldRecord.getDirectory().size(), merged2.getDirectory().size()); // And values should be preserved - for (DirectoryEntry entry : multiFieldRecord.getDirectory()) { + for (Directory entry : multiFieldRecord.getDirectory()) { Value originalValue = multiFieldRecord.getValue(entry.getId()); assertEquals(originalValue, merged1.getValue(entry.getId())); assertEquals(originalValue, merged2.getValue(entry.getId())); @@ -335,9 +338,9 @@ void shouldMaintainCorrectPayloadOffsetsAfterMerge() throws ImprintException { assertArrayEquals(new byte[]{1, 2, 3, 4, 5}, merged.getBytes(4)); // And directory offsets should be sequential - List directory = merged.getDirectory(); + List directory = merged.getDirectory(); int expectedOffset = 0; - for (DirectoryEntry entry : directory) { + for (Directory entry : directory) { assertEquals(expectedOffset, entry.getOffset(), "Field " + entry.getId() + " should have correct offset"); @@ -359,6 +362,7 @@ void shouldHandleLargeRecordsEfficiently() throws ImprintException { for (int i = 1; i <= 100; i++) { builder1.field(i, i * 10); } + for (int i = 101; i <= 200; i++) { builder2.field(i, i * 10); } @@ -372,7 +376,7 @@ void shouldHandleLargeRecordsEfficiently() throws ImprintException { // Then all 200 fields should be present and accessible assertEquals(200, merged.getDirectory().size()); - // Spot check some values + // Spot check a bunch of random values just to make sure I guess assertEquals(10, merged.getInt32(1)); assertEquals(500, merged.getInt32(50)); assertEquals(1000, merged.getInt32(100)); diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index 3cfa61f..7b8a027 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -1,108 +1,196 @@ package com.imprint.profile; -import com.imprint.core.ImprintOperations; import com.imprint.core.ImprintRecord; import com.imprint.core.SchemaId; +import com.imprint.ops.ImprintOperations; import com.imprint.types.Value; import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; -import java.util.Arrays; import java.util.Random; import java.util.stream.IntStream; -/** - * A test designed for profiling hotspots during development. - *

- * To use with a profiler: - * 1. Remove @Disabled annotation - * 2. Run with JProfiler, VisualVM, or async-profiler: - * - JProfiler: Attach to test JVM - * - VisualVM: jvisualvm, attach to process - * - async-profiler: java -jar async-profiler.jar -d 30 -f profile.html - * 3. Look for hotspots in CPU sampling - *

- * Key areas to examine: - * - Object allocation (memory profiling) - * - Method call frequency (CPU sampling) - * - GC pressure (memory profiling) - * - String operations and UTF-8 encoding - * - ByteBuffer operations - */ -//@Disabled("Enable manually for profiling") + +@Disabled public class ProfilerTest { - private static final int ITERATIONS = 1_000_000; private static final int RECORD_SIZE = 50; private static final int LARGE_RECORD_SIZE = 200; @Test - void profileFieldAccess() throws Exception { - System.out.println("Starting profiler test - attach profiler now..."); - Thread.sleep(5000); // Give time to attach profiler + @Tag("merge") + void profileMergeOperations() throws Exception { + System.out.println("Starting merge profiler test - attach profiler now..."); + Thread.sleep(3000); + + profileSmallMerges(); + profileLargeMerges(); + profileOverlappingMerges(); + profileDisjointMerges(); + } + + /** + * Profile small merges (20-field records) + */ + private void profileSmallMerges() throws Exception { + System.out.println("\\n--- Small Merges (20-field records) ---"); - // Create a representative record - var record = createTestRecord(); + var record1 = createTestRecord(20); + var record2 = createTestRecord(20); + int iterations = 200_000; - System.out.println("Beginning field access profiling..."); + System.out.printf("Beginning small merge profiling (%,d iterations)...%n", iterations); long start = System.nanoTime(); - // Simulate real-world access patterns - Random random = new Random(42); - int hits = 0; - - for (int i = 0; i < ITERATIONS; i++) { - // Random field access (hotspot) - int fieldId = random.nextInt(RECORD_SIZE) + 1; - var value = record.getValue(fieldId); - if (value != null) { - hits++; - - // Trigger string decoding (potential hotspot) - if (value.getTypeCode() == com.imprint.types.TypeCode.STRING) { - if (value instanceof Value.StringBufferValue) { - ((Value.StringBufferValue) value).getValue(); - } else { - ((Value.StringValue) value).getValue(); - } - } - } + for (int i = 0; i < iterations; i++) { + // This is the hotspot we want to profile + var merged = ImprintOperations.merge(record1, record2); - // Some raw access (zero-copy path) - if (i % 10 == 0) { - record.getRawBytes(fieldId); + // Simulate some usage to prevent dead code elimination + if (i % 10_000 == 0) { + merged.getValue(1); // Trigger value decoding + merged.getRawBytes(5); // Trigger raw access } + merged.serializeToBuffer(); + } + + long duration = System.nanoTime() - start; + System.out.printf("Small merges: %.2f ms (avg: %.1f μs/merge)%n", + duration / 1_000_000.0, (double) duration / iterations / 1000.0); + } + + /** + * Profile large merges (100-field records) + */ + private void profileLargeMerges() throws Exception { + System.out.println("\\n--- Large Merges (100-field records) ---"); + + var record1 = createTestRecord(100); + var record2 = createTestRecord(100); + int iterations = 50_000; + + System.out.printf("Beginning large merge profiling (%,d iterations)...%n", iterations); + long start = System.nanoTime(); + + for (int i = 0; i < iterations; i++) { + var merged = ImprintOperations.merge(record1, record2); + merged.serializeToBuffer(); + } + + long duration = System.nanoTime() - start; + System.out.printf("Large merges: %.2f ms (avg: %.1f μs/merge)%n", + duration / 1_000_000.0, (double) duration / iterations / 1000.0); + } + + /** + * Profile overlapping merges (records with many duplicate field IDs) + */ + private void profileOverlappingMerges() throws Exception { + System.out.println("\\n--- Overlapping Merges (50%% field overlap) ---"); + + var record1 = createTestRecordWithFieldIds(new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); + var record2 = createTestRecordWithFieldIds(new int[]{10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}); + int iterations = 100_000; + + System.out.printf("Beginning overlapping merge profiling (%,d iterations)...%n", iterations); + long start = System.nanoTime(); + + for (int i = 0; i < iterations; i++) { + var merged = ImprintOperations.merge(record1, record2); + merged.serializeToBuffer(); } long duration = System.nanoTime() - start; - System.out.printf("Completed %,d field accesses in %.2f ms (avg: %.1f ns/op, hits: %d)%n", - ITERATIONS, duration / 1_000_000.0, (double) duration / ITERATIONS, hits); + System.out.printf("Overlapping merges: %.2f ms (avg: %.1f μs/merge)%n", + duration / 1_000_000.0, (double) duration / iterations / 1000.0); + } + + /** + * Profile disjoint merges (no overlapping field IDs) + */ + private void profileDisjointMerges() throws Exception { + System.out.println("\\n--- Disjoint Merges (no field overlap) ---"); + + // Create records with completely separate field IDs + var record1 = createTestRecordWithFieldIds(new int[]{1, 3, 5, 7, 9, 11, 13, 15, 17, 19}); + var record2 = createTestRecordWithFieldIds(new int[]{2, 4, 6, 8, 10, 12, 14, 16, 18, 20}); + int iterations = 100_000; + + System.out.printf("Beginning disjoint merge profiling (%,d iterations)...%n", iterations); + long start = System.nanoTime(); + + for (int i = 0; i < iterations; i++) { + var merged = ImprintOperations.merge(record1, record2); + merged.serializeToBuffer(); + } + + long duration = System.nanoTime() - start; + System.out.printf("Disjoint merges: %.2f ms (avg: %.1f μs/merge)%n", + duration / 1_000_000.0, (double) duration / iterations / 1000.0); } @Test + @Tag("serialization") + @Tag("small-records") void profileSmallRecordSerialization() throws Exception { profileSerialization("small records", RECORD_SIZE, 100_000); } @Test + @Tag("serialization") + @Tag("large-records") void profileLargeRecordSerialization() throws Exception { profileSerialization("large records", LARGE_RECORD_SIZE, 500_000); } @Test + @Tag("projection") void profileProjectionOperations() throws Exception { System.out.println("Starting projection profiler test - attach profiler now..."); Thread.sleep(3000); - profileSmallProjections(); profileLargeProjections(); profileSelectiveProjections(); - profileProjectionMemoryAllocation(); } - /** - * Profile small projections (select 2-5 fields from 20-field records) - */ + @Test + @Tag("memory") + @Tag("allocation") + void profileMemoryAllocation() throws Exception { + System.out.println("Starting allocation profiler test..."); + Thread.sleep(3000); + + System.out.println("Beginning allocation profiling - watch for GC events..."); + + // Force allocation pressure to reveal GC hotspots + for (int batch = 0; batch < 1000; batch++) { + for (int i = 0; i < 1000; i++) { + var schemaId = new SchemaId(batch, i); + var builder = ImprintRecord.builder(schemaId); + + // Create strings of varying sizes (allocation pressure) + builder.field(1, Value.fromString("small")) + .field(2, Value.fromString("medium-length-string-" + i)) + .field(3, Value.fromString("very-long-string-that-will-cause-more-allocation-pressure-" + batch + "-" + i)) + .field(4, Value.fromBytes(new byte[100 + i % 1000])); // Varying byte arrays + + var record = builder.build(); + + // Some deserialization to trigger string decoding allocations + record.getValue(2); + record.getValue(3); + } + + if (batch % 100 == 0) { + System.out.printf("Completed batch %d/1000%n", batch); + } + } + + System.out.println("Allocation test complete - check GC logs and memory profiler"); + } + + // Rest of the methods remain the same... private void profileSmallProjections() throws Exception { System.out.println("\\n--- Small Projections (2-5 fields from 20-field records) ---"); @@ -130,9 +218,6 @@ private void profileSmallProjections() throws Exception { duration / 1_000_000.0, (double) duration / iterations / 1000.0); } - /** - * Profile large projections (select 50-100 fields from 200-field records) - */ private void profileLargeProjections() throws Exception { System.out.println("\\n--- Large Projections (50 fields from 200-field records) ---"); @@ -164,9 +249,6 @@ private void profileLargeProjections() throws Exception { duration / 1_000_000.0, (double) duration / iterations / 1000.0); } - /** - * Profile selective projections with different selectivity patterns - */ private void profileSelectiveProjections() throws Exception { System.out.println("\\n--- Selective Projections (various patterns) ---"); @@ -204,128 +286,15 @@ private void profileSelectiveProjections() throws Exception { } } - /** - * Profile memory allocation patterns during projection - */ - private void profileProjectionMemoryAllocation() throws Exception { - System.out.println("\\n--- Projection Memory Allocation Profiling ---"); - System.out.println("Watch for allocation hotspots and GC pressure..."); - - var sourceRecord = createTestRecord(50); - int[] projectFields = {1, 5, 10, 15, 20, 25}; // 6 fields - - System.out.println("Beginning projection allocation test..."); - - // Create allocation pressure to identify hotspots - for (int batch = 0; batch < 1000; batch++) { - for (int i = 0; i < 1000; i++) { - // This should reveal allocation hotspots in: - // 1. ArrayList creation - // 2. ByteBuffer allocation for new payload - // 3. FieldRange objects - // 4. SimpleDirectoryEntry creation - var projected = ImprintOperations.project(sourceRecord, projectFields); - - // Force some field access to trigger additional allocations - projected.getValue(1); // String decoding allocation - projected.getValue(5); // Value wrapper allocation - projected.getRawBytes(10); // ByteBuffer slicing - } - - if (batch % 100 == 0) { - System.out.printf("Allocation batch %d/1000 complete%n", batch); - } - } - - System.out.println("Projection allocation test complete"); - } - - /** - * Profile the component operations within projection to identify bottlenecks - */ - @Test - void profileProjectionComponents() throws Exception { - System.out.println("\\n=== Projection Component Profiling ==="); - Thread.sleep(2000); - - var sourceRecord = createTestRecord(100); - int[] projectFields = {1, 10, 20, 30, 40, 50}; - int iterations = 100_000; - - // Profile individual components that might be hotspots: - - // 1. Field ID sorting and deduplication - System.out.println("Profiling field ID sorting..."); - long start = System.nanoTime(); - for (int i = 0; i < iterations; i++) { - // This mimics the sorting done in project() - int[] sorted = Arrays.stream(projectFields).distinct().sorted().toArray(); - blackhole(sorted); // Prevent optimization - } - long sortTime = System.nanoTime() - start; - System.out.printf("Field sorting: %.2f ms (%.1f ns/op)%n", - sortTime / 1_000_000.0, (double) sortTime / iterations); - - // 2. Directory scanning and range calculation - System.out.println("Profiling directory scanning..."); - var directory = sourceRecord.getDirectory(); - start = System.nanoTime(); - for (int i = 0; i < iterations; i++) { - // Simulate the directory scanning logic - int foundFields = 0; - for (var entry : directory) { - for (int fieldId : projectFields) { - if (entry.getId() == fieldId) { - foundFields++; - break; - } - } - } - blackhole(foundFields); - } - long scanTime = System.nanoTime() - start; - System.out.printf("Directory scanning: %.2f ms (%.1f ns/op)%n", - scanTime / 1_000_000.0, (double) scanTime / iterations); - - // 3. ByteBuffer operations (payload copying) - System.out.println("Profiling ByteBuffer operations..."); - var payload = sourceRecord.getBuffers().getPayload(); - start = System.nanoTime(); - for (int i = 0; i < iterations / 10; i++) { // Fewer iterations for heavy operation - // Simulate payload copying - var newPayload = java.nio.ByteBuffer.allocate(100); - newPayload.order(java.nio.ByteOrder.LITTLE_ENDIAN); - - // Copy some ranges (like buildPayloadFromRanges does) - for (int j = 0; j < 6; j++) { - var slice = payload.duplicate(); - slice.position(j * 10).limit((j + 1) * 10); - newPayload.put(slice); - } - newPayload.flip(); - blackhole(newPayload); - } - long bufferTime = System.nanoTime() - start; - System.out.printf("ByteBuffer operations: %.2f ms (%.1f μs/op)%n", - bufferTime / 1_000_000.0, (double) bufferTime / (iterations / 10) / 1000.0); - } - - /** - * Profile serialization performance with records of a given size. - * This method abstracts the core serialization profiling logic to work - * with records of different sizes and complexities. - */ private void profileSerialization(String testName, int recordSize, int iterations) throws Exception { System.out.printf("Starting %s serialization profiler test...%n", testName); Thread.sleep(3000); var schemaId = new SchemaId(1, 0x12345678); - System.out.printf("Beginning %s serialization profiling (%,d iterations, %d fields)...%n", - testName, iterations, recordSize); + System.out.printf("Beginning %s serialization profiling (%,d iterations, %d fields)...%n", testName, iterations, recordSize); long start = System.nanoTime(); - // Create and serialize many records (allocation hotspot) for (int i = 0; i < iterations; i++) { var builder = ImprintRecord.builder(schemaId); @@ -380,46 +349,6 @@ var record = builder.build(); testName, duration / 1_000_000.0, (double) duration / iterations / 1000.0); } - @Test - void profileMemoryAllocation() throws Exception { - System.out.println("Starting allocation profiler test..."); - Thread.sleep(3000); - - System.out.println("Beginning allocation profiling - watch for GC events..."); - - // Force allocation pressure to reveal GC hotspots - for (int batch = 0; batch < 1000; batch++) { - for (int i = 0; i < 1000; i++) { - var schemaId = new SchemaId(batch, i); - var builder = ImprintRecord.builder(schemaId); - - // Create strings of varying sizes (allocation pressure) - builder.field(1, Value.fromString("small")) - .field(2, Value.fromString("medium-length-string-" + i)) - .field(3, Value.fromString("very-long-string-that-will-cause-more-allocation-pressure-" + batch + "-" + i)) - .field(4, Value.fromBytes(new byte[100 + i % 1000])); // Varying byte arrays - - var record = builder.build(); - - // Some deserialization to trigger string decoding allocations - record.getValue(2); - record.getValue(3); - } - - if (batch % 100 == 0) { - System.out.printf("Completed batch %d/1000%n", batch); - } - } - - System.out.println("Allocation test complete - check GC logs and memory profiler"); - } - - // Helper methods and classes - - private ImprintRecord createTestRecord() throws Exception { - return createTestRecord(RECORD_SIZE); - } - private ImprintRecord createTestRecord(int recordSize) throws Exception { var builder = ImprintRecord.builder(new SchemaId(1, 0xdeadbeef)); @@ -443,6 +372,28 @@ private ImprintRecord createTestRecord(int recordSize) throws Exception { return builder.build(); } + private ImprintRecord createTestRecordWithFieldIds(int[] fieldIds) throws Exception { + var builder = ImprintRecord.builder(new SchemaId(1, 0xdeadbeef)); + for (int fieldId : fieldIds) { + switch (fieldId % 4) { + case 0: + builder.field(fieldId, Value.fromInt32(fieldId * 100)); + break; + case 1: + builder.field(fieldId, Value.fromString("field-value-" + fieldId)); + break; + case 2: + builder.field(fieldId, Value.fromFloat64(fieldId * 3.14159)); + break; + case 3: + builder.field(fieldId, Value.fromBytes(("bytes-" + fieldId).getBytes())); + break; + } + } + + return builder.build(); + } + private static class ProjectionPattern { final String name; final int[] fields; @@ -459,11 +410,4 @@ private int[] generateRandomFields(Random random, int maxField, int count) { .sorted() .toArray(); } - - private void blackhole(Object obj) { - // Prevent dead code elimination - if (obj.hashCode() == System.nanoTime()) { - System.out.println("Never happens"); - } - } } \ No newline at end of file diff --git a/src/test/java/com/imprint/core/ImprintStreamTest.java b/src/test/java/com/imprint/stream/ImprintStreamTest.java similarity index 85% rename from src/test/java/com/imprint/core/ImprintStreamTest.java rename to src/test/java/com/imprint/stream/ImprintStreamTest.java index 8d5b843..d2c2b69 100644 --- a/src/test/java/com/imprint/core/ImprintStreamTest.java +++ b/src/test/java/com/imprint/stream/ImprintStreamTest.java @@ -1,5 +1,8 @@ -package com.imprint.core; +package com.imprint.stream; +import com.imprint.core.ImprintRecord; +import com.imprint.core.SchemaId; +import com.imprint.stream.ImprintStream; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.*; @@ -12,25 +15,25 @@ void shouldProjectAndMergeCorrectly() throws Exception { var schemaId2 = new SchemaId(2, 2); var schemaId3 = new SchemaId(3, 3); - ImprintRecord recordA = ImprintRecord.builder(schemaId1) + var recordA = ImprintRecord.builder(schemaId1) .field(1, "A1") .field(2, 100) .field(3, true) .build(); - ImprintRecord recordB = ImprintRecord.builder(schemaId2) + var recordB = ImprintRecord.builder(schemaId2) .field(2, 200) // Overlaps with A, should be ignored .field(4, "B4") .build(); - ImprintRecord recordC = ImprintRecord.builder(schemaId3) + var recordC = ImprintRecord.builder(schemaId3) .field(5, 3.14) .field(1, "C1") // Overlaps with A, should be ignored .build(); // --- Execution --- - // Define a chain of operations - ImprintRecord finalRecord = ImprintStream.of(recordA) + // Chain of operations + var finalRecord = ImprintStream.of(recordA) .project(1, 3) // Keep {1, 3} from A. Current state: {1:A, 3:A} .mergeWith(recordB) // Merge B. {2:B, 4:B} are added. Current state: {1:A, 3:A, 2:B, 4:B} .mergeWith(recordC) // Merge C. {5:C} is added. {1:C} is ignored. Final state: {1:A, 3:A, 2:B, 4:B, 5:C} @@ -63,7 +66,7 @@ void shouldProjectAfterMerge() throws Exception { var recordA = ImprintRecord.builder(new SchemaId(1, 1)).field(1, "A").field(2, 100).build(); var recordB = ImprintRecord.builder(new SchemaId(1, 1)).field(2, 200).field(3, "B").build(); - ImprintRecord finalRecord = ImprintStream.of(recordA) + var finalRecord = ImprintStream.of(recordA) .mergeWith(recordB) // virtual record is {1:A, 2:A, 3:B} .project(1, 3) // final record is {1:A, 3:B} .toRecord(); From 99893ea9a82ae90d2fbf3f6d2975fa96e27e2485 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 12 Jun 2025 16:15:17 -0400 Subject: [PATCH 44/53] final optimization and reorganization into better project structure --- build.gradle | 1 - .../benchmark/ComparisonBenchmark.java | 17 +- .../AbstractSerializingBenchmark.java | 4 +- .../AvroSerializingBenchmark.java | 2 +- .../FlatBuffersSerializingBenchmark.java | 3 +- .../ImprintSerializingBenchmark.java | 24 +- .../JacksonSerializingBenchmark.java | 2 +- .../KryoSerializingBenchmark.java | 2 +- .../MessagePackSerializingBenchmark.java | 2 +- .../ProtobufSerializingBenchmark.java | 2 +- .../SerializingBenchmark.java | 4 +- .../ThriftSerializingBenchmark.java | 2 +- src/main/java/com/imprint/core/Directory.java | 56 +- src/main/java/com/imprint/core/Header.java | 23 + .../java/com/imprint/core/ImprintBuffers.java | 407 --------- .../java/com/imprint/core/ImprintRecord.java | 793 ++++++++++++------ .../imprint/core/ImprintRecordBuilder.java | 119 ++- .../java/com/imprint/error/ErrorType.java | 1 + .../com/imprint/ops/ImprintOperations.java | 456 +++++++--- .../com/imprint/stream/ImprintStream.java | 259 ------ .../java/com/imprint/IntegrationTest.java | 242 ++++++ .../com/imprint/core/ImprintRecordTest.java | 486 ++++++----- .../imprint/ops/ImprintOperationsTest.java | 340 +++++++- .../com/imprint/profile/ProfilerTest.java | 124 +-- .../com/imprint/stream/ImprintStreamTest.java | 81 -- 25 files changed, 1928 insertions(+), 1524 deletions(-) rename src/jmh/java/com/imprint/benchmark/{competitors => serializers}/AbstractSerializingBenchmark.java (84%) rename src/jmh/java/com/imprint/benchmark/{competitors => serializers}/AvroSerializingBenchmark.java (99%) rename src/jmh/java/com/imprint/benchmark/{competitors => serializers}/FlatBuffersSerializingBenchmark.java (97%) rename src/jmh/java/com/imprint/benchmark/{competitors => serializers}/ImprintSerializingBenchmark.java (78%) rename src/jmh/java/com/imprint/benchmark/{competitors => serializers}/JacksonSerializingBenchmark.java (98%) rename src/jmh/java/com/imprint/benchmark/{competitors => serializers}/KryoSerializingBenchmark.java (99%) rename src/jmh/java/com/imprint/benchmark/{competitors => serializers}/MessagePackSerializingBenchmark.java (98%) rename src/jmh/java/com/imprint/benchmark/{competitors => serializers}/ProtobufSerializingBenchmark.java (98%) rename src/jmh/java/com/imprint/benchmark/{competitors => serializers}/SerializingBenchmark.java (79%) rename src/jmh/java/com/imprint/benchmark/{competitors => serializers}/ThriftSerializingBenchmark.java (99%) delete mode 100644 src/main/java/com/imprint/core/ImprintBuffers.java delete mode 100644 src/main/java/com/imprint/stream/ImprintStream.java delete mode 100644 src/test/java/com/imprint/stream/ImprintStreamTest.java diff --git a/build.gradle b/build.gradle index 26b2be5..b5f9126 100644 --- a/build.gradle +++ b/build.gradle @@ -51,7 +51,6 @@ dependencies { jmhImplementation 'org.msgpack:msgpack-core:0.9.8' jmhImplementation 'org.msgpack:jackson-dataformat-msgpack:0.9.8' jmhImplementation 'org.apache.thrift:libthrift:0.19.0' - // Required for generated Thrift code on JDK 11+ jmhImplementation 'javax.annotation:javax.annotation-api:1.3.2' } diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index 92b3ceb..f47da20 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -1,7 +1,7 @@ package com.imprint.benchmark; -import com.imprint.benchmark.competitors.*; -import com.imprint.benchmark.competitors.SerializingBenchmark; +import com.imprint.benchmark.serializers.*; +import com.imprint.benchmark.serializers.SerializingBenchmark; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.runner.Runner; @@ -16,7 +16,7 @@ @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) @Warmup(iterations = 3, time = 1) -@Measurement(iterations = 10, time = 1) +@Measurement(iterations = 7, time = 1) @Fork(value = 1, jvmArgs = {"-Xms4g", "-Xmx4g"}) public class ComparisonBenchmark { @@ -28,10 +28,9 @@ public class ComparisonBenchmark { new AvroSerializingBenchmark(), new ThriftSerializingBenchmark(), new KryoSerializingBenchmark(), - new MessagePackSerializingBenchmark() - ); + new MessagePackSerializingBenchmark()); - @Param({"Imprint", "Jackson-JSON", "Protobuf", "FlatBuffers", "Avro-Generic", "Thrift", "Kryo", "MessagePack"}) + @Param({"Imprint", "Jackson-JSON", "Protobuf", "FlatBuffers", "Avro-Generic", "Thrift", "Kryo", "MessagePack", "CapnProto"}) public String framework; private SerializingBenchmark serializingBenchmark; @@ -47,7 +46,7 @@ public void setup() { DataGenerator.TestRecord testRecord1 = DataGenerator.createTestRecord(); DataGenerator.TestRecord testRecord2 = DataGenerator.createTestRecord(); - // Setup the competitor with the data + // Setup the framework with the data serializingBenchmark.setup(testRecord1, testRecord2); } @@ -56,7 +55,7 @@ public void serialize(Blackhole bh) { serializingBenchmark.serialize(bh); } - @Benchmark + //@Benchmark public void deserialize(Blackhole bh) { serializingBenchmark.deserialize(bh); } @@ -71,7 +70,7 @@ public void mergeAndSerialize(Blackhole bh) { serializingBenchmark.mergeAndSerialize(bh); } - @Benchmark + //@Benchmark public void accessField(Blackhole bh) { serializingBenchmark.accessField(bh); } diff --git a/src/jmh/java/com/imprint/benchmark/competitors/AbstractSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/AbstractSerializingBenchmark.java similarity index 84% rename from src/jmh/java/com/imprint/benchmark/competitors/AbstractSerializingBenchmark.java rename to src/jmh/java/com/imprint/benchmark/serializers/AbstractSerializingBenchmark.java index 2f5476c..4f53203 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/AbstractSerializingBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/serializers/AbstractSerializingBenchmark.java @@ -1,10 +1,10 @@ -package com.imprint.benchmark.competitors; +package com.imprint.benchmark.serializers; import com.imprint.benchmark.DataGenerator; import org.openjdk.jmh.infra.Blackhole; /** - * A minimal base class for competitors, holding the test data. + * A minimal base class for serialization frameworks to compare against, holding the test data. */ public abstract class AbstractSerializingBenchmark implements SerializingBenchmark { diff --git a/src/jmh/java/com/imprint/benchmark/competitors/AvroSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/AvroSerializingBenchmark.java similarity index 99% rename from src/jmh/java/com/imprint/benchmark/competitors/AvroSerializingBenchmark.java rename to src/jmh/java/com/imprint/benchmark/serializers/AvroSerializingBenchmark.java index dc7278c..f3e5b8a 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/AvroSerializingBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/serializers/AvroSerializingBenchmark.java @@ -1,4 +1,4 @@ -package com.imprint.benchmark.competitors; +package com.imprint.benchmark.serializers; import com.imprint.benchmark.DataGenerator; import org.apache.avro.Schema; diff --git a/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/FlatBuffersSerializingBenchmark.java similarity index 97% rename from src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersSerializingBenchmark.java rename to src/jmh/java/com/imprint/benchmark/serializers/FlatBuffersSerializingBenchmark.java index a9fe5c8..846b15c 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/FlatBuffersSerializingBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/serializers/FlatBuffersSerializingBenchmark.java @@ -1,4 +1,4 @@ -package com.imprint.benchmark.competitors; +package com.imprint.benchmark.serializers; import com.google.flatbuffers.FlatBufferBuilder; import com.imprint.benchmark.DataGenerator; @@ -31,7 +31,6 @@ private ByteBuffer buildRecord(DataGenerator.TestRecord pojo) { int tagsVectorOffset = TestRecord.createTagsVector(builder, tagsOffsets); int[] metadataKeysOffsets = pojo.metadata.keySet().stream().mapToInt(builder::createString).toArray(); - int[] metadataValuesOffsets = pojo.metadata.values().stream().mapToInt(builder::createString).toArray(); // This is not correct FlatBuffers map creation, it's a placeholder. // A proper implementation would require a table for each entry. // For this benchmark, we'll just serialize the keys vector. diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ImprintSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/ImprintSerializingBenchmark.java similarity index 78% rename from src/jmh/java/com/imprint/benchmark/competitors/ImprintSerializingBenchmark.java rename to src/jmh/java/com/imprint/benchmark/serializers/ImprintSerializingBenchmark.java index 26bb495..e71a5c0 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/ImprintSerializingBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/serializers/ImprintSerializingBenchmark.java @@ -1,7 +1,6 @@ -package com.imprint.benchmark.competitors; +package com.imprint.benchmark.serializers; import com.imprint.benchmark.DataGenerator; -import com.imprint.ops.ImprintOperations; import com.imprint.core.ImprintRecord; import com.imprint.core.ImprintRecordBuilder; import com.imprint.core.SchemaId; @@ -53,19 +52,6 @@ private ImprintRecord buildRecord(DataGenerator.TestRecord pojo) throws ImprintE return builder.build(); } - private ImprintRecordBuilder preBuildRecord(DataGenerator.TestRecord pojo) throws ImprintException { - var builder = ImprintRecord.builder(SCHEMA_ID); - builder.field(0, pojo.id); - builder.field(1, pojo.timestamp); - builder.field(2, pojo.flags); - builder.field(3, pojo.active); - builder.field(4, pojo.value); - builder.field(5, pojo.data); - builder.field(6, pojo.tags); - builder.field(7, pojo.metadata); - return builder; - } - @Override public void serialize(Blackhole bh) { try { @@ -87,8 +73,8 @@ public void deserialize(Blackhole bh) { @Override public void projectAndSerialize(Blackhole bh) { try { - ImprintRecord record = ImprintRecord.deserialize(this.serializedRecord1); - ImprintRecord projected = record.project(0, 1, 6); + // Should use zero-copy projection directly from existing record + ImprintRecord projected = this.imprintRecord1.project(0, 1, 6); bh.consume(projected.serializeToBuffer()); } catch (ImprintException e) { throw new RuntimeException(e); @@ -98,9 +84,9 @@ public void projectAndSerialize(Blackhole bh) { @Override public void mergeAndSerialize(Blackhole bh) { try { - var r1 = ImprintRecord.deserialize(this.serializedRecord1); + // Use zero-copy merge - keep one record, deserialize the other var r2 = ImprintRecord.deserialize(this.serializedRecord2); - var merged = ImprintOperations.merge(r1, r2); + var merged = this.imprintRecord1.merge(r2); bh.consume(merged.serializeToBuffer()); } catch (ImprintException e) { throw new RuntimeException(e); diff --git a/src/jmh/java/com/imprint/benchmark/competitors/JacksonSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/JacksonSerializingBenchmark.java similarity index 98% rename from src/jmh/java/com/imprint/benchmark/competitors/JacksonSerializingBenchmark.java rename to src/jmh/java/com/imprint/benchmark/serializers/JacksonSerializingBenchmark.java index 829b073..d58bc19 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/JacksonSerializingBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/serializers/JacksonSerializingBenchmark.java @@ -1,4 +1,4 @@ -package com.imprint.benchmark.competitors; +package com.imprint.benchmark.serializers; import com.fasterxml.jackson.databind.ObjectMapper; import com.imprint.benchmark.DataGenerator; diff --git a/src/jmh/java/com/imprint/benchmark/competitors/KryoSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/KryoSerializingBenchmark.java similarity index 99% rename from src/jmh/java/com/imprint/benchmark/competitors/KryoSerializingBenchmark.java rename to src/jmh/java/com/imprint/benchmark/serializers/KryoSerializingBenchmark.java index 1223e06..6780513 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/KryoSerializingBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/serializers/KryoSerializingBenchmark.java @@ -1,4 +1,4 @@ -package com.imprint.benchmark.competitors; +package com.imprint.benchmark.serializers; import com.esotericsoftware.kryo.Kryo; import com.esotericsoftware.kryo.io.Input; diff --git a/src/jmh/java/com/imprint/benchmark/competitors/MessagePackSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/MessagePackSerializingBenchmark.java similarity index 98% rename from src/jmh/java/com/imprint/benchmark/competitors/MessagePackSerializingBenchmark.java rename to src/jmh/java/com/imprint/benchmark/serializers/MessagePackSerializingBenchmark.java index b596e6d..9dd275f 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/MessagePackSerializingBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/serializers/MessagePackSerializingBenchmark.java @@ -1,4 +1,4 @@ -package com.imprint.benchmark.competitors; +package com.imprint.benchmark.serializers; import com.fasterxml.jackson.databind.ObjectMapper; import com.imprint.benchmark.DataGenerator; diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ProtobufSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/ProtobufSerializingBenchmark.java similarity index 98% rename from src/jmh/java/com/imprint/benchmark/competitors/ProtobufSerializingBenchmark.java rename to src/jmh/java/com/imprint/benchmark/serializers/ProtobufSerializingBenchmark.java index 72ad38f..1f6239e 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/ProtobufSerializingBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/serializers/ProtobufSerializingBenchmark.java @@ -1,4 +1,4 @@ -package com.imprint.benchmark.competitors; +package com.imprint.benchmark.serializers; import com.imprint.benchmark.DataGenerator; import com.imprint.benchmark.proto.TestRecordOuterClass; diff --git a/src/jmh/java/com/imprint/benchmark/competitors/SerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/SerializingBenchmark.java similarity index 79% rename from src/jmh/java/com/imprint/benchmark/competitors/SerializingBenchmark.java rename to src/jmh/java/com/imprint/benchmark/serializers/SerializingBenchmark.java index 595caa6..a6358b8 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/SerializingBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/serializers/SerializingBenchmark.java @@ -1,10 +1,10 @@ -package com.imprint.benchmark.competitors; +package com.imprint.benchmark.serializers; import com.imprint.benchmark.DataGenerator; import org.openjdk.jmh.infra.Blackhole; /** - * Defines the contract for a serialization competitor in the benchmark. + * Defines the contract for a serialization framework in the benchmark. */ public interface SerializingBenchmark { String name(); diff --git a/src/jmh/java/com/imprint/benchmark/competitors/ThriftSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/ThriftSerializingBenchmark.java similarity index 99% rename from src/jmh/java/com/imprint/benchmark/competitors/ThriftSerializingBenchmark.java rename to src/jmh/java/com/imprint/benchmark/serializers/ThriftSerializingBenchmark.java index 83c0812..18cf9bb 100644 --- a/src/jmh/java/com/imprint/benchmark/competitors/ThriftSerializingBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/serializers/ThriftSerializingBenchmark.java @@ -1,4 +1,4 @@ -package com.imprint.benchmark.competitors; +package com.imprint.benchmark.serializers; import com.imprint.benchmark.DataGenerator; import com.imprint.benchmark.thrift.ProjectedRecord; diff --git a/src/main/java/com/imprint/core/Directory.java b/src/main/java/com/imprint/core/Directory.java index cb449b3..ddaf208 100644 --- a/src/main/java/com/imprint/core/Directory.java +++ b/src/main/java/com/imprint/core/Directory.java @@ -5,6 +5,7 @@ import lombok.Setter; import lombok.Value; +import java.util.List; import java.util.Objects; /** @@ -27,6 +28,38 @@ public interface Directory { */ int getOffset(); + /** + * A view interface for accessing directory entries efficiently. + * Provides both access to individual entries and full directory materialization. + */ + interface DirectoryView { + /** + * Find a directory entry by field ID. + * @param fieldId The field ID to search for + * @return The directory entry if found, null otherwise + */ + Directory findEntry(int fieldId); + + /** + * Get all directory entries as a list, with full eager deserialization if necessary. + * @return List of all directory entries in field ID order + */ + List toList(); + + /** + * Get the count of directory entries without parsing all entries. + * @return Number of entries in the directory + */ + int size(); + + /** + * Create an iterator for lazy directory traversal. + * For buffer-backed views, this avoids parsing the entire directory upfront. + * @return Iterator over directory entries in field ID order + */ + java.util.Iterator iterator(); + } + /** * Immutable representation of the Imprint Directory used for deserialization, * merging, and field projections @@ -43,27 +76,4 @@ public Entry(short id, TypeCode typeCode, int offset) { this.offset = offset; } } - - /** - * Mutable representation of the Imprint Directory bound with corresponding type value - * used for record building through {@link ImprintRecordBuilder} and subsequent serialization. - */ - @Getter - class Builder implements Directory { - private final short id; - private final com.imprint.types.Value value; - @Setter - private int offset; - - Builder(short id, com.imprint.types.Value value) { - this.id = id; - this.value = value; - this.offset = -1; - } - - @Override - public TypeCode getTypeCode() { - return value.getTypeCode(); - } - } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/Header.java b/src/main/java/com/imprint/core/Header.java index 388d491..aec0e9b 100644 --- a/src/main/java/com/imprint/core/Header.java +++ b/src/main/java/com/imprint/core/Header.java @@ -1,7 +1,10 @@ package com.imprint.core; +import com.imprint.Constants; import lombok.Value; +import java.nio.ByteBuffer; + /** * The header of an Imprint record. */ @@ -10,4 +13,24 @@ public class Header { Flags flags; SchemaId schemaId; int payloadSize; + + /** + * Serialize this header to a ByteBuffer. + * Follows the Imprint header format: magic(1) + version(1) + flags(1) + fieldSpaceId(4) + schemaHash(4) + payloadSize(4). + */ + public void serialize(ByteBuffer buffer) { + buffer.put(Constants.MAGIC); + buffer.put(Constants.VERSION); + buffer.put(flags.getValue()); + buffer.putInt(schemaId.getFieldSpaceId()); + buffer.putInt(schemaId.getSchemaHash()); + buffer.putInt(payloadSize); + } + + /** + * Static helper for serializing any header to a ByteBuffer. + */ + public static void serialize(Header header, ByteBuffer buffer) { + header.serialize(buffer); + } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintBuffers.java b/src/main/java/com/imprint/core/ImprintBuffers.java deleted file mode 100644 index 845892a..0000000 --- a/src/main/java/com/imprint/core/ImprintBuffers.java +++ /dev/null @@ -1,407 +0,0 @@ -package com.imprint.core; - -import com.imprint.Constants; -import com.imprint.error.ErrorType; -import com.imprint.error.ImprintException; -import com.imprint.types.TypeCode; -import com.imprint.util.VarInt; -import lombok.Getter; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.util.*; - -/** - * Manages the raw buffers for an Imprint record with lazy directory parsing. - * Encapsulates all buffer operations and provides zero-copy field access. - * - *

Buffer Layout Overview:

- *
- * directoryBuffer: [VarInt count][DirectoryEntry 1][DirectoryEntry 2]...[DirectoryEntry N]
- * payload:         [Field 1 data][Field 2 data]...[Field N data]
- * 
- * - *

Each DirectoryEntry contains: [fieldId:2bytes][typeCode:1byte][offset:4bytes]

- */ -@Getter -public final class ImprintBuffers { - private final ByteBuffer directoryBuffer; // Raw directory bytes (includes count) - private final ByteBuffer payload; // Read-only payload view - - // Lazy-loaded directory state. - private TreeMap parsedDirectory; - private boolean directoryParsed = false; - private int directoryCount = -1; - - /** - * Creates buffers from raw data (used during deserialization). - */ - public ImprintBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { - this.directoryBuffer = directoryBuffer.asReadOnlyBuffer(); - this.payload = payload.asReadOnlyBuffer(); - } - - /** - * Creates buffers from a pre-sorted list of entries (most efficient builder path). - * Immediately creates the parsed index and the serialized buffer. - */ - public ImprintBuffers(List sortedDirectory, ByteBuffer payload) { - this.directoryBuffer = ImprintBuffers.createDirectoryBuffer(sortedDirectory); - this.payload = payload.asReadOnlyBuffer(); - } - - /** - * Creates buffers from a pre-parsed and sorted directory map containing final, simple entries. - * This is the most efficient path, as it avoids any further parsing or sorting. The provided - * map becomes the definitive parsed directory. - */ - @SuppressWarnings("unchecked") - public ImprintBuffers(Map parsedDirectory, ByteBuffer payload) { - this.directoryBuffer = ImprintBuffers.createDirectoryBufferFromSortedMap(Objects.requireNonNull(parsedDirectory)); - this.payload = payload.asReadOnlyBuffer(); - this.parsedDirectory = (TreeMap) parsedDirectory; - this.directoryParsed = true; - this.directoryCount = parsedDirectory.size(); - } - - /** - * Get a zero-copy ByteBuffer view of a field's data. - * Optimized for the most common use case - single field access. - */ - public ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { - var entry = findDirectoryEntry(fieldId); - if (entry == null) - return null; - - int startOffset = entry.getOffset(); - int endOffset = findEndOffset(entry); - - if (startOffset < 0 || endOffset < 0 || startOffset > payload.limit() || - endOffset > payload.limit() || startOffset > endOffset) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Invalid field buffer range: start=" + startOffset + ", end=" + endOffset + ", payloadLimit=" + payload.limit()); - } - - var fieldBuffer = payload.duplicate(); - fieldBuffer.position(startOffset).limit(endOffset); - return fieldBuffer; - } - - /** - * Get a zero-copy ByteBuffer view of a field's data using a pre-fetched DirectoryEntry. - * This avoids the cost of re-finding the entry. - */ - public ByteBuffer getFieldBuffer(Directory entry) throws ImprintException { - if (entry == null) - return null; - - int startOffset = entry.getOffset(); - int endOffset = findEndOffset(entry); - - if (startOffset < 0 || endOffset < 0 || startOffset > payload.limit() || - endOffset > payload.limit() || startOffset > endOffset) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Invalid field buffer range: start=" + startOffset + ", end=" + endOffset + ", payloadLimit=" + payload.limit()); - } - - var fieldBuffer = payload.duplicate(); - fieldBuffer.position(startOffset).limit(endOffset); - return fieldBuffer; - } - - /** - * Find a directory entry for the given field ID using the most efficient method. - *

- * Strategy: - * - If parsed: TreeMap lookup - * - If raw: Binary search on raw bytes to avoid full unwinding of the directory - */ - public Directory findDirectoryEntry(int fieldId) throws ImprintException { - if (directoryParsed) - return parsedDirectory.get(fieldId); - else - return findFieldEntryInRawDirectory(fieldId); - } - - /** - * Get the full directory, parsing it if necessary. - * Returns the values in fieldId order thanks to TreeMap. - */ - public List getDirectory() { - ensureDirectoryParsed(); - return new ArrayList<>(parsedDirectory.values()); - } - - /** - * Get directory count without parsing. - */ - public int getDirectoryCount() { - if (directoryParsed) - return parsedDirectory.size(); - - try { - return getOrParseDirectoryCount(); - } catch (ImprintException e) { - return 0; - } - } - - /** - * Create a new buffer containing the serialized directory. - */ - public ByteBuffer serializeDirectory() { - // The directoryBuffer is created on construction and is read-only. - // If constructed from raw bytes, it's a view of the original. - // If constructed from a list, it's a fresh buffer. In both cases, it's ready. - return directoryBuffer.duplicate(); - } - - // ========== PRIVATE METHODS ========== - - /** - * Binary search on raw directory bytes to find a specific field. - * This avoids parsing the entire directory for single field lookups. - */ - private Directory findFieldEntryInRawDirectory(int fieldId) throws ImprintException { - var searchBuffer = directoryBuffer.duplicate(); - searchBuffer.order(ByteOrder.LITTLE_ENDIAN); - - int count = getOrParseDirectoryCount(); - if (count == 0) - return null; - - // Advance buffer past the varint to get to the start of the entries. - VarInt.decode(searchBuffer); - int directoryStartPos = searchBuffer.position(); - - int low = 0; - int high = count - 1; - - while (low <= high) { - int mid = (low + high) >>> 1; - int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); - - if (entryPos + Constants.DIR_ENTRY_BYTES > searchBuffer.limit()) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Directory entry at position " + entryPos + " exceeds buffer limit"); - } - - searchBuffer.position(entryPos); - short midFieldId = searchBuffer.getShort(); - - if (midFieldId < fieldId) { - low = mid + 1; - } else if (midFieldId > fieldId) { - high = mid - 1; - } else { - // Found it - read the complete entry - searchBuffer.position(entryPos); - return deserializeDirectoryEntry(searchBuffer); - } - } - - return null; - } - - /** - * Find the end offset for a field by looking at the next field's offset. - */ - private int findEndOffset(Directory entry) throws ImprintException { - if (directoryParsed) { - return findNextOffsetInParsedDirectory(entry.getId()); - } else { - return findNextOffsetInRawDirectory(entry.getId()); - } - } - - /** - * Find the end offset using TreeMap's efficient navigation methods. - */ - private int findNextOffsetInParsedDirectory(int currentFieldId) { - var nextEntry = parsedDirectory.higherEntry(currentFieldId); - if (nextEntry != null) - return nextEntry.getValue().getOffset(); - else - return payload.limit(); - - } - - private int findNextOffsetInRawDirectory(int currentFieldId) throws ImprintException { - var scanBuffer = directoryBuffer.duplicate(); - scanBuffer.order(ByteOrder.LITTLE_ENDIAN); - - int count = getOrParseDirectoryCount(); - if (count == 0) - return payload.limit(); - - // Advance buffer past the varint to get to the start of the entries. - VarInt.decode(scanBuffer); - int directoryStartPos = scanBuffer.position(); - - int low = 0; - int high = count - 1; - int nextOffset = payload.limit(); - - // Binary search for the first field with fieldId > currentFieldId - while (low <= high) { - int mid = (low + high) >>> 1; - int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); - - if (entryPos + Constants.DIR_ENTRY_BYTES > scanBuffer.limit()) - break; - - scanBuffer.position(entryPos); - short fieldId = scanBuffer.getShort(); - scanBuffer.get(); // skip type - int offset = scanBuffer.getInt(); - - if (fieldId > currentFieldId) { - nextOffset = offset; - high = mid - 1; - } else { - low = mid + 1; - } - } - - return nextOffset; - } - - /** - * Parse the full directory if not already parsed. - * Creates TreeMap for both fast lookup and ordering needed for binary search on offSets. - */ - private void ensureDirectoryParsed() { - if (directoryParsed) - return; - - try { - var parseBuffer = directoryBuffer.duplicate(); - parseBuffer.order(ByteOrder.LITTLE_ENDIAN); - - int count = getOrParseDirectoryCount(parseBuffer); - this.parsedDirectory = new TreeMap<>(); - - for (int i = 0; i < count; i++) { - var entry = deserializeDirectoryEntry(parseBuffer); - this.parsedDirectory.put((int) entry.getId() , entry); - } - - this.directoryParsed = true; - } catch (ImprintException e) { - // This can happen with a corrupted directory. - // In this case, we'll just have an empty (but valid) parsed directory. - this.parsedDirectory = new TreeMap<>(); - this.directoryParsed = true; // Mark as parsed to avoid repeated errors - } - } - - private int getOrParseDirectoryCount() throws ImprintException { - if (directoryCount != -1) { - return directoryCount; - } - try { - this.directoryCount = VarInt.decode(directoryBuffer.duplicate()).getValue(); - } catch (ImprintException e) { - this.directoryCount = 0; // Cache as 0 on error - throw e; // rethrow - } - return this.directoryCount; - } - - private int getOrParseDirectoryCount(ByteBuffer buffer) throws ImprintException { - // This method does not cache the count because it's used during parsing - // where the buffer is transient. Caching is only for the instance's primary buffer. - return VarInt.decode(buffer).getValue(); - } - - /** - * Creates a read-only buffer containing the serialized directory. - * The input collection does not need to be sorted. - */ - static ByteBuffer createDirectoryBuffer(Collection directory) { - if (directory == null || directory.isEmpty()) { - ByteBuffer buffer = ByteBuffer.allocate(1); - VarInt.encode(0, buffer); - buffer.flip(); - return buffer; - } - - // Ensure sorted order for binary search compatibility. - ArrayList sortedDirectory; - if (directory instanceof ArrayList && isSorted((ArrayList)directory)) { - sortedDirectory = (ArrayList) directory; - } else { - sortedDirectory = new ArrayList<>(directory); - sortedDirectory.sort(null); - } - - int count = sortedDirectory.size(); - int size = VarInt.encodedLength(count) + (count * Constants.DIR_ENTRY_BYTES); - ByteBuffer buffer = ByteBuffer.allocate(size); - buffer.order(ByteOrder.LITTLE_ENDIAN); - - VarInt.encode(count, buffer); - for (var entry : sortedDirectory) { - serializeDirectoryEntry(entry, buffer); - } - - buffer.flip(); - return buffer; - } - - static ByteBuffer createDirectoryBufferFromSortedMap(Map directoryMap) { - if (directoryMap == null || directoryMap.isEmpty()) { - ByteBuffer buffer = ByteBuffer.allocate(1); - VarInt.encode(0, buffer); - buffer.flip(); - return buffer; - } - - int count = directoryMap.size(); - int size = VarInt.encodedLength(count) + (count * Constants.DIR_ENTRY_BYTES); - var buffer = ByteBuffer.allocate(size); - buffer.order(ByteOrder.LITTLE_ENDIAN); - - VarInt.encode(count, buffer); - for (var entry : directoryMap.values()) { - serializeDirectoryEntry(entry, buffer); - } - - buffer.flip(); - return buffer; - } - - private static boolean isSorted(ArrayList list) { - for (int i = 0; i < list.size() - 1; i++) { - if (list.get(i).getId() > list.get(i + 1).getId()) { - return false; - } - } - return true; - } - - /** - * Serialize a single directory entry to the buffer. - * Format: [fieldId:2bytes][typeCode:1byte][offset:4bytes] - */ - private static void serializeDirectoryEntry(Directory entry, ByteBuffer buffer) { - buffer.putShort(entry.getId()); - buffer.put(entry.getTypeCode().getCode()); - buffer.putInt(entry.getOffset()); - } - - /** - * Deserialize a single directory entry from the buffer. - * Reads: [fieldId:2bytes][typeCode:1byte][offset:4bytes] - */ - private Directory deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { - if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for directory entry"); - - short id = buffer.getShort(); - var typeCode = TypeCode.fromByte(buffer.get()); - int offset = buffer.getInt(); - - return new Directory.Entry(id, typeCode, offset); - } -} \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index a34c7df..e6f9de6 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -1,177 +1,218 @@ package com.imprint.core; import com.imprint.Constants; -import com.imprint.ops.ImprintOperations; import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; -import com.imprint.types.MapKey; +import com.imprint.ops.ImprintOperations; import com.imprint.types.TypeCode; import com.imprint.types.Value; import com.imprint.util.VarInt; + +import lombok.AccessLevel; +import lombok.EqualsAndHashCode; import lombok.Getter; +import lombok.ToString; +import lombok.experimental.NonFinal; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.Iterator; import java.util.List; -import java.util.Map; +import java.util.NoSuchElementException; import java.util.Objects; /** - * An Imprint record containing a header and buffer management. - * Delegates all buffer operations to ImprintBuffers for cleaner separation. + * Imprint Record + *

+ * This is the primary way to work with Imprint records, providing: + * - Zero-copy field access via binary search + * - Direct bytes-to-bytes operations (merge, project) + * - Lazy deserializing operations */ -@Getter -public final class ImprintRecord { - private final Header header; - private final ImprintBuffers buffers; - +@lombok.Value +@EqualsAndHashCode(of = "serializedBytes") +@ToString(of = {"header"}) +public class ImprintRecord { + ByteBuffer serializedBytes; + + @Getter(AccessLevel.PUBLIC) + Header header; + + @Getter(AccessLevel.PACKAGE) + // Raw directory bytes (read-only) + ByteBuffer directoryBuffer; + + @Getter(AccessLevel.PACKAGE) + // Raw payload bytes + ByteBuffer payload; + + @NonFinal + @Getter(AccessLevel.NONE) + //Directory View cache to allow for easier mutable operations needed for lazy initialization + Directory.DirectoryView directoryView; + /** - * Creates a record from deserialized components. + * Package-private constructor for @Value that creates immutable ByteBuffer views. */ - private ImprintRecord(Header header, ImprintBuffers buffers) { - this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.buffers = Objects.requireNonNull(buffers, "Buffers cannot be null"); - } - + ImprintRecord(ByteBuffer serializedBytes, Header header, ByteBuffer directoryBuffer, ByteBuffer payload) { + this.serializedBytes = serializedBytes.asReadOnlyBuffer(); + this.header = Objects.requireNonNull(header); + this.directoryBuffer = directoryBuffer.asReadOnlyBuffer(); + this.payload = payload.asReadOnlyBuffer(); + this.directoryView = null; + } + + // ========== STATIC FACTORY METHODS ========== + /** - * Creates a record from a pre-sorted list of entries (most efficient builder path). + * Create a builder for constructing new ImprintRecord instances. */ - public ImprintRecord(Header header, List sortedDirectory, ByteBuffer payload) { - this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.buffers = new ImprintBuffers(sortedDirectory, payload); + public static ImprintRecordBuilder builder(SchemaId schemaId) { + return new ImprintRecordBuilder(schemaId); + } + + public static ImprintRecordBuilder builder(int fieldspaceId, int schemaHash) { + return new ImprintRecordBuilder(new SchemaId(fieldspaceId, schemaHash)); } /** - * Creates a record from a pre-built and sorted FastUtil map (most efficient builder path). + * Deserialize an ImprintRecord from bytes. */ - public ImprintRecord(Header header, Map parsedDirectory, ByteBuffer payload) { - this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.buffers = new ImprintBuffers(parsedDirectory, payload); + public static ImprintRecord deserialize(byte[] bytes) throws ImprintException { + return fromBytes(ByteBuffer.wrap(bytes)); } - // ========== FIELD ACCESS METHODS ========== - + public static ImprintRecord deserialize(ByteBuffer buffer) throws ImprintException { + return fromBytes(buffer); + } + /** - * Get a value by field ID, deserializing it on demand. - * Returns null if the field is not found. + * Create a ImprintRecord from complete serialized bytes. */ - public Value getValue(int fieldId) throws ImprintException { - var entry = buffers.findDirectoryEntry(fieldId); - if (entry == null) - return null; - - var fieldBuffer = buffers.getFieldBuffer(fieldId); - if (fieldBuffer == null) - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Failed to get buffer for field " + fieldId); - - return deserializeValue(entry.getTypeCode(), fieldBuffer); + public static ImprintRecord fromBytes(ByteBuffer serializedBytes) throws ImprintException { + Objects.requireNonNull(serializedBytes, "Serialized bytes cannot be null"); + + var buffer = serializedBytes.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + // Parse header + var header = parseHeader(buffer); + + // Extract directory and payload sections + var parsedBuffers = parseBuffersFromSerialized(serializedBytes); + + return new ImprintRecord(serializedBytes, header, parsedBuffers.directoryBuffer, parsedBuffers.payload); + } + + + // ========== ZERO-COPY OPERATIONS ========== + + /** + * Merge with another ImprintRecord using pure byte operations. + * Results in a new ImprintRecord without any object creation. + */ + public ImprintRecord merge(ImprintRecord other) throws ImprintException { + var mergedBytes = ImprintOperations.mergeBytes(this.serializedBytes, other.serializedBytes); + return fromBytes(mergedBytes); } - + /** - * Get raw bytes for a field without deserializing. + * Project fields using pure byte operations. + * Results in a new ImprintRecord without any object creation. */ - public ByteBuffer getRawBytes(int fieldId) { - try { - return buffers.getFieldBuffer(fieldId); - } catch (ImprintException e) { - return null; - } + public ImprintRecord project(int... fieldIds) throws ImprintException { + var projectedBytes = ImprintOperations.projectBytes(this.serializedBytes, fieldIds); + return fromBytes(projectedBytes); } - + /** - * Get raw bytes for a field using a pre-fetched DirectoryEntry. - * This avoids the cost of re-finding the entry metadata. + * Chain multiple operations efficiently. + * Each operation works on bytes without creating intermediate objects. */ - public ByteBuffer getRawBytes(Directory entry) { - try { - return buffers.getFieldBuffer(entry); - } catch (ImprintException e) { - return null; - } + public ImprintRecord projectAndMerge(ImprintRecord other, int... projectFields) throws ImprintException { + return this.project(projectFields).merge(other); } - + /** - * Project a subset of fields from this record. - * - * @param fieldIds Array of field IDs to include in the projection - * @return New ImprintRecord containing only the requested fields + * Get the raw serialized bytes. + * This is the most efficient way to pass the record around. */ - public ImprintRecord project(int... fieldIds) { - return ImprintOperations.project(this, fieldIds); + public ByteBuffer getSerializedBytes() { + return serializedBytes.duplicate(); } - + /** - * Merge another record into this one. - * For duplicate fields, this record's values take precedence. - * - * @param other The record to merge with this one - * @return New ImprintRecord containing merged fields - * @throws ImprintException if merge fails + * Get a DirectoryView for straight through directory access. */ - public ImprintRecord merge(ImprintRecord other) throws ImprintException { - return ImprintOperations.merge(this, other); + public Directory.DirectoryView getDirectoryView() { + if (directoryView == null) { + directoryView = new ImprintDirectoryView(); + } + return directoryView; } - + /** - * Get the directory (parsing it if necessary). + * Get the directory list. */ public List getDirectory() { - return buffers.getDirectory(); + return getDirectoryView().toList(); } - + /** - * Finds a directory entry by its field ID. - * This is an efficient lookup that avoids full directory deserialization if possible. - * - * @param fieldId The ID of the field to find. - * @return The DirectoryEntry if found, otherwise null. + * Get raw bytes for a field without deserializing. */ - public Directory getDirectoryEntry(int fieldId) { + public ByteBuffer getRawBytes(int fieldId) { try { - return buffers.findDirectoryEntry(fieldId); + return getFieldBuffer(fieldId); } catch (ImprintException e) { - // This can happen with a corrupted directory, in which case we assume it doesn't exist. return null; } } - + /** - * Checks if a field with the given ID exists in the record. - * - * @param fieldId The ID of the field to check. - * @return true if the field exists, false otherwise. + * Get raw bytes for a field by short ID. */ - public boolean hasField(int fieldId) { - try { - return buffers.findDirectoryEntry(fieldId) != null; - } catch (ImprintException e) { - // This can happen with a corrupted directory, in which case we assume it doesn't exist. - return false; - } - } - - // ========== TYPED GETTERS ========== - - public boolean getBoolean(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.BOOL, Value.BoolValue.class, "boolean").getValue(); + public ByteBuffer getRawBytes(short fieldId) { + return getRawBytes((int) fieldId); } - - public int getInt32(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.INT32, Value.Int32Value.class, "int32").getValue(); + + /** + * Estimate the serialized size of this record. + */ + public int estimateSerializedSize() { + return serializedBytes.remaining(); } - - public long getInt64(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.INT64, Value.Int64Value.class, "int64").getValue(); + + /** + * Get a field value by ID. + * Uses zero-copy binary search to locate the field. + */ + public Value getValue(int fieldId) throws ImprintException { + var entry = getDirectoryView().findEntry(fieldId); + if (entry == null) return null; + + var fieldBuffer = getFieldBuffer(fieldId); + if (fieldBuffer == null) return null; + + return deserializeValue(entry.getTypeCode(), fieldBuffer); } - - public float getFloat32(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.FLOAT32, Value.Float32Value.class, "float32").getValue(); + + /** + * Check if a field exists without deserializing it. + */ + public boolean hasField(int fieldId) { + return getDirectoryView().findEntry(fieldId) != null; } - - public double getFloat64(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.FLOAT64, Value.Float64Value.class, "float64").getValue(); + + /** + * Get the number of fields without parsing the directory. + */ + public int getFieldCount() { + return getDirectoryCount(); } - + + // ========== TYPED GETTERS ========== + public String getString(int fieldId) throws ImprintException { var value = getValidatedValue(fieldId, "STRING"); if (value instanceof Value.StringValue) @@ -180,7 +221,27 @@ public String getString(int fieldId) throws ImprintException { return ((Value.StringBufferValue) value).getValue(); throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is not a STRING"); } - + + public int getInt32(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.INT32, Value.Int32Value.class, "int32").getValue(); + } + + public long getInt64(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.INT64, Value.Int64Value.class, "int64").getValue(); + } + + public boolean getBoolean(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.BOOL, Value.BoolValue.class, "boolean").getValue(); + } + + public float getFloat32(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.FLOAT32, Value.Float32Value.class, "float32").getValue(); + } + + public double getFloat64(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.FLOAT64, Value.Float64Value.class, "float64").getValue(); + } + public byte[] getBytes(int fieldId) throws ImprintException { var value = getValidatedValue(fieldId, "BYTES"); if (value instanceof Value.BytesValue) @@ -189,145 +250,402 @@ public byte[] getBytes(int fieldId) throws ImprintException { return ((Value.BytesBufferValue) value).getValue(); throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is not BYTES"); } - - public List getArray(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.ARRAY, Value.ArrayValue.class, "ARRAY").getValue(); + + public java.util.List getArray(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.ARRAY, Value.ArrayValue.class, "ARRAY").getValue(); } - - public Map getMap(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.MAP, Value.MapValue.class, "MAP").getValue(); + + public java.util.Map getMap(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.MAP, Value.MapValue.class, "MAP").getValue(); } - + public ImprintRecord getRow(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); + return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); } - // ========== SERIALIZATION ========== - /** - * Serialize this record to a ByteBuffer. + * Returns a copy of the bytes. */ public ByteBuffer serializeToBuffer() { - var directoryBuffer = buffers.serializeDirectory(); // This is now optimized to return a duplicate - var payloadBuffer = buffers.getPayload(); - - int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payloadBuffer.remaining(); - var finalBuffer = ByteBuffer.allocate(finalSize); - finalBuffer.order(ByteOrder.LITTLE_ENDIAN); + return serializedBytes.duplicate(); + } - // Assemble the final record from existing components - serializeHeader(this.header, finalBuffer); - finalBuffer.put(directoryBuffer); - finalBuffer.put(payloadBuffer.duplicate()); + /** + * Get the schema ID from the header. + */ + public SchemaId getSchemaId() { + return header.getSchemaId(); + } + + /** + * Estimate the memory footprint of this record. + */ + public int getSerializedSize() { + return serializedBytes.remaining(); + } - finalBuffer.flip(); - return finalBuffer.asReadOnlyBuffer(); + + /** + * Get and validate a value exists and is not null. + */ + private Value getValidatedValue(int fieldId, String typeName) throws ImprintException { + var value = getValue(fieldId); + if (value == null) + throw new ImprintException(ErrorType.FIELD_NOT_FOUND, "Field " + fieldId + " not found"); + if (value.getTypeCode() == com.imprint.types.TypeCode.NULL) + throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is NULL, cannot retrieve as " + typeName); + return value; } - public int estimateSerializedSize() { - int size = Constants.HEADER_BYTES; // header - size += buffers.serializeDirectory().remaining(); // directory - size += buffers.getPayload().remaining(); // payload - return size; + private T getTypedValueOrThrow(int fieldId, com.imprint.types.TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) + throws ImprintException { + var value = getValidatedValue(fieldId, expectedTypeName); + if (value.getTypeCode() == expectedTypeCode && expectedValueClass.isInstance(value)) + return expectedValueClass.cast(value); + throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected " + expectedTypeName); + } + + /** + * Parse buffers from serialized record bytes. + */ + private static ParsedBuffers parseBuffersFromSerialized(ByteBuffer serializedRecord) throws ImprintException { + var buffer = serializedRecord.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + // Parse header and extract sections using shared utility + var header = parseHeaderFromBuffer(buffer); + var sections = extractBufferSections(buffer, header); + + return new ParsedBuffers(sections.directoryBuffer, sections.payloadBuffer); + } + + private static class ParsedBuffers { + final ByteBuffer directoryBuffer; + final ByteBuffer payload; + + ParsedBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { + this.directoryBuffer = directoryBuffer; + this.payload = payload; + } } + + private int getDirectoryCount() { + try { + return VarInt.decode(directoryBuffer.duplicate()).getValue(); + } catch (ImprintException e) { + return 0; // Cache as 0 on error + } + } + + /** + * Gets ByteBuffer view of a field's data. + */ + private ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { + var entry = findDirectoryEntry(fieldId); + if (entry == null) + return null; + + int startOffset = entry.getOffset(); + int endOffset = findEndOffset(entry.getId()); + + if (startOffset < 0 || endOffset < 0 || startOffset > payload.limit() || + endOffset > payload.limit() || startOffset > endOffset) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Invalid field buffer range: start=" + startOffset + ", end=" + endOffset); + } + + var fieldBuffer = payload.duplicate(); + fieldBuffer.position(startOffset).limit(endOffset); + return fieldBuffer; + } + + private Directory findDirectoryEntry(int fieldId) throws ImprintException { + var searchBuffer = directoryBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + int count = getDirectoryCount(); + if (count == 0) return null; + + // Advance past varint to entries + VarInt.decode(searchBuffer); + int directoryStartPos = searchBuffer.position(); + + int low = 0; + int high = count - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; + int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); + + if (entryPos + Constants.DIR_ENTRY_BYTES > searchBuffer.limit()) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Directory entry exceeds buffer"); + + searchBuffer.position(entryPos); + short midFieldId = searchBuffer.getShort(); + + if (midFieldId < fieldId) { + low = mid + 1; + } else if (midFieldId > fieldId) { + high = mid - 1; + } else { + // Found it - read complete entry + searchBuffer.position(entryPos); + return deserializeDirectoryEntry(searchBuffer); + } + } + + return null; + } + + private int findEndOffset(int currentFieldId) throws ImprintException { + var scanBuffer = directoryBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + int count = getDirectoryCount(); + if (count == 0) return payload.limit(); + + // Advance past varint + VarInt.decode(scanBuffer); + int directoryStartPos = scanBuffer.position(); + + int low = 0; + int high = count - 1; + int nextOffset = payload.limit(); + + // Binary search for first field with fieldId > currentFieldId + while (low <= high) { + int mid = (low + high) >>> 1; + int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); + + if (entryPos + Constants.DIR_ENTRY_BYTES > scanBuffer.limit()) break; + + scanBuffer.position(entryPos); + short fieldId = scanBuffer.getShort(); + scanBuffer.get(); // skip type + int offset = scanBuffer.getInt(); + + if (fieldId > currentFieldId) { + nextOffset = offset; + high = mid - 1; + } else { + low = mid + 1; + } + } + + return nextOffset; + } + + private Directory deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { + if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for directory entry"); + + short id = buffer.getShort(); + var typeCode = TypeCode.fromByte(buffer.get()); + int offset = buffer.getInt(); + + return new Directory.Entry(id, typeCode, offset); + } + + /** + * DirectoryView + */ + private class ImprintDirectoryView implements Directory.DirectoryView { + + @Override + public Directory findEntry(int fieldId) { + try { + return findDirectoryEntry(fieldId); + } catch (ImprintException e) { + return null; + } + } + /** + * List out all directories in the buffer. This operation unpacks any directories not already deserialized + * so proceed only if eager evaluation is intended. + */ + @Override + public List toList() { + var list = new ArrayList(getDirectoryCount()); + var iterator = iterator(); + while (iterator.hasNext()) { + list.add(iterator.next()); + } + return list; + } + + @Override + public int size() { + return getDirectoryCount(); + } + + @Override + public Iterator iterator() { + return new ImprintDirectoryIterator(); + } + } + + /** + * Iterator that parses directory entries lazily from raw bytes. + */ + private class ImprintDirectoryIterator implements Iterator { + private final ByteBuffer iterBuffer; + private final int totalCount; + private int currentIndex; + + ImprintDirectoryIterator() { + this.iterBuffer = directoryBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + this.totalCount = getDirectoryCount(); + + try { + // Skip past varint to first entry + VarInt.decode(iterBuffer); + } catch (ImprintException e) { + throw new RuntimeException("Failed to initialize directory iterator", e); + } + this.currentIndex = 0; + } + + @Override + public boolean hasNext() { + return currentIndex < totalCount; + } + + @Override + public Directory next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + + try { + var entry = deserializeDirectoryEntry(iterBuffer); + currentIndex++; + return entry; + } catch (ImprintException e) { + throw new RuntimeException("Failed to parse directory entry at index " + currentIndex, e); + } + } + } + /** - * Serializes the components of a record into a single ByteBuffer. - * This provides a direct serialization path without needing a live ImprintRecord instance. - * This assumes the list is pre-sorted by field ID. + * Used by {@link ImprintRecordBuilder} with sorted field data. + * Creates directory buffer from field data and calculated offsets. * - * @param schemaId The schema identifier for the record. - * @param sortedDirectory The list of directory entries, which MUST be sorted by field ID. - * @param payload The ByteBuffer containing all field data concatenated. - * @return A read-only ByteBuffer with the complete serialized record. + * @param sortedFields Array of FieldData objects sorted by ID + * @param offsets Array of payload offsets corresponding to each field + * @param fieldCount Number of valid fields to process */ - public static ByteBuffer serialize(SchemaId schemaId, List sortedDirectory, ByteBuffer payload) { - var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); - var directoryBuffer = ImprintBuffers.createDirectoryBuffer(sortedDirectory); - - int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payload.remaining(); - var finalBuffer = ByteBuffer.allocate(finalSize); - finalBuffer.order(ByteOrder.LITTLE_ENDIAN); - - // Assemble the final record - serializeHeader(header, finalBuffer); - finalBuffer.put(directoryBuffer); - finalBuffer.put(payload); + static ByteBuffer createDirectoryBufferFromSorted(Object[] sortedFields, int[] offsets, int fieldCount) { + if (fieldCount == 0) + return createEmptyDirectoryBuffer(); + + int size = calculateDirectorySize(fieldCount); + var buffer = ByteBuffer.allocate(size); + buffer.order(ByteOrder.LITTLE_ENDIAN); + VarInt.encode(fieldCount, buffer); + + //this ends up being kind of a hotspot for some reason, probably boundary checking. + //Direct writes might help a bit it could get difficult since pretty much all the other + //frameworks just go straight for Unsafe + for (int i = 0; i < fieldCount; i++) { + var fieldData = (ImprintRecordBuilder.FieldData) sortedFields[i]; + buffer.putShort(fieldData.id); + buffer.put(fieldData.value.getTypeCode().getCode()); + buffer.putInt(offsets[i]); + } - finalBuffer.flip(); - return finalBuffer.asReadOnlyBuffer(); + buffer.flip(); + return buffer; } - // ========== STATIC FACTORY METHODS ========== - - public static ImprintRecordBuilder builder(SchemaId schemaId) { - return new ImprintRecordBuilder(schemaId); + private static ByteBuffer createEmptyDirectoryBuffer() { + ByteBuffer buffer = ByteBuffer.allocate(1); + VarInt.encode(0, buffer); + buffer.flip(); + return buffer; } - - public static ImprintRecordBuilder builder(int fieldspaceId, int schemaHash) { - return new ImprintRecordBuilder(new SchemaId(fieldspaceId, schemaHash)); + + /** + * Parse a header from a ByteBuffer without advancing the buffer position. + * Utility method shared between {@link ImprintRecord} and {@link ImprintOperations}. + */ + public static Header parseHeaderFromBuffer(ByteBuffer buffer) throws ImprintException { + int startPos = buffer.position(); + try { + return parseHeader(buffer); + } finally { + buffer.position(startPos); + } } - - public static ImprintRecord deserialize(byte[] bytes) throws ImprintException { - return deserialize(ByteBuffer.wrap(bytes)); + + /** + * Calculate the size needed to store a directory with the given entry count. + */ + public static int calculateDirectorySize(int entryCount) { + return VarInt.encodedLength(entryCount) + (entryCount * Constants.DIR_ENTRY_BYTES); } - - public static ImprintRecord deserialize(ByteBuffer buffer) throws ImprintException { - buffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); - - // Read header - var header = deserializeHeader(buffer); - - // Calculate directory size + + /** + * Container for separated directory and payload buffer sections. + * Utility class shared between {@link ImprintRecord} and {@link ImprintOperations}. + */ + public static class BufferSections { + public final ByteBuffer directoryBuffer; + public final ByteBuffer payloadBuffer; + public final int directoryCount; + + public BufferSections(ByteBuffer directoryBuffer, ByteBuffer payloadBuffer, int directoryCount) { + this.directoryBuffer = directoryBuffer; + this.payloadBuffer = payloadBuffer; + this.directoryCount = directoryCount; + } + } + + /** + * Extract directory and payload sections from a serialized buffer. + * Utility method shared between {@link ImprintRecord} and {@link ImprintOperations}. + */ + public static BufferSections extractBufferSections(ByteBuffer buffer, Header header) throws ImprintException { + // Skip header + buffer.position(buffer.position() + Constants.HEADER_BYTES); + + // Parse directory section int directoryStartPos = buffer.position(); var countResult = VarInt.decode(buffer); int directoryCount = countResult.getValue(); int directorySize = countResult.getBytesRead() + (directoryCount * Constants.DIR_ENTRY_BYTES); - + // Create directory buffer buffer.position(directoryStartPos); var directoryBuffer = buffer.slice(); directoryBuffer.limit(directorySize); - - // Advance past directory + + // Advance to payload buffer.position(buffer.position() + directorySize); - - // Create payload buffer - var payload = buffer.slice(); - payload.limit(header.getPayloadSize()); - - // Create buffers wrapper - var buffers = new ImprintBuffers(directoryBuffer, payload); - - return new ImprintRecord(header, buffers); + var payloadBuffer = buffer.slice(); + payloadBuffer.limit(header.getPayloadSize()); + + return new BufferSections(directoryBuffer, payloadBuffer, directoryCount); } + + private static Header parseHeader(ByteBuffer buffer) throws ImprintException { + if (buffer.remaining() < Constants.HEADER_BYTES) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for header"); - // ========== PRIVATE HELPER METHODS ========== - - /** - * Get and validate a value exists and is not null. - */ - private Value getValidatedValue(int fieldId, String typeName) throws ImprintException { - var value = getValue(fieldId); - if (value == null) - throw new ImprintException(ErrorType.FIELD_NOT_FOUND, "Field " + fieldId + " not found"); - if (value.getTypeCode() == TypeCode.NULL) - throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is NULL, cannot retrieve as " + typeName); - return value; - } + byte magic = buffer.get(); + byte version = buffer.get(); - private T getTypedValueOrThrow(int fieldId, TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) - throws ImprintException { - var value = getValidatedValue(fieldId, expectedTypeName); - if (value.getTypeCode() == expectedTypeCode && expectedValueClass.isInstance(value)) - return expectedValueClass.cast(value); - throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected " + expectedTypeName); + if (magic != Constants.MAGIC) + throw new ImprintException(ErrorType.INVALID_MAGIC, "Invalid magic byte"); + if (version != Constants.VERSION) + throw new ImprintException(ErrorType.UNSUPPORTED_VERSION, "Unsupported version: " + version); + + var flags = new Flags(buffer.get()); + int fieldSpaceId = buffer.getInt(); + int schemaHash = buffer.getInt(); + int payloadSize = buffer.getInt(); + + return new Header(flags, new SchemaId(fieldSpaceId, schemaHash), payloadSize); } - - private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws ImprintException { - var valueBuffer = buffer.duplicate(); - valueBuffer.order(ByteOrder.LITTLE_ENDIAN); - + + private Value deserializeValue(com.imprint.types.TypeCode typeCode, ByteBuffer buffer) throws ImprintException { + var valueBuffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); switch (typeCode) { case NULL: case BOOL: @@ -347,43 +665,4 @@ private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws Impr throw new ImprintException(ErrorType.INVALID_TYPE_CODE, "Unknown type code: " + typeCode); } } - - private static void serializeHeader(Header header, ByteBuffer buffer) { - buffer.put(Constants.MAGIC); - buffer.put(Constants.VERSION); - buffer.put(header.getFlags().getValue()); - buffer.putInt(header.getSchemaId().getFieldSpaceId()); - buffer.putInt(header.getSchemaId().getSchemaHash()); - buffer.putInt(header.getPayloadSize()); - } - - private static Header deserializeHeader(ByteBuffer buffer) throws ImprintException { - if (buffer.remaining() < Constants.HEADER_BYTES) - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for header"); - - - byte magic = buffer.get(); - if (magic != Constants.MAGIC) { - throw new ImprintException(ErrorType.INVALID_MAGIC, "Invalid magic byte: expected 0x" + Integer.toHexString(Constants.MAGIC) + - ", got 0x" + Integer.toHexString(magic & 0xFF)); - } - - byte version = buffer.get(); - if (version != Constants.VERSION) { - throw new ImprintException(ErrorType.UNSUPPORTED_VERSION, "Unsupported version: " + version); - } - - var flags = new Flags(buffer.get()); - int fieldSpaceId = buffer.getInt(); - int schemaHash = buffer.getInt(); - int payloadSize = buffer.getInt(); - - return new Header(flags, new SchemaId(fieldSpaceId, schemaHash), payloadSize); - } - - @Override - public String toString() { - return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", - header, buffers.getDirectoryCount(), buffers.getPayload().remaining()); - } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 93e83ba..8e1dfa0 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -1,5 +1,7 @@ package com.imprint.core; +import com.imprint.Constants; +import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; import com.imprint.types.MapKey; import com.imprint.types.Value; @@ -33,9 +35,21 @@ @SuppressWarnings("unused") public final class ImprintRecordBuilder { private final SchemaId schemaId; - private final Map fields = new TreeMap<>(); + // Custom int→object map optimized for primitive keys + private final ImprintFieldObjectMap fields = new ImprintFieldObjectMap<>(); private int estimatedPayloadSize = 0; + static final class FieldData { + final short id; + final Value value; + + FieldData(short id, Value value) { + this.id = id; + this.value = value; + } + } + + ImprintRecordBuilder(SchemaId schemaId) { this.schemaId = Objects.requireNonNull(schemaId, "SchemaId cannot be null"); } @@ -70,7 +84,7 @@ public ImprintRecordBuilder field(int id, byte[] value) { } // Collections with automatic conversion - public ImprintRecordBuilder field(int id, List values) { + public ImprintRecordBuilder field(int id, List values) { var convertedValues = new ArrayList(values.size()); for (var item : values) { convertedValues.add(convertToValue(item)); @@ -78,7 +92,7 @@ public ImprintRecordBuilder field(int id, List values) { return addField(id, Value.fromArray(convertedValues)); } - public ImprintRecordBuilder field(int id, Map map) { + public ImprintRecordBuilder field(int id, Map map) { var convertedMap = new HashMap(map.size()); for (var entry : map.entrySet()) { var key = convertToMapKey(entry.getKey()); @@ -133,47 +147,48 @@ public int fieldCount() { } public Set fieldIds() { - return fields.keySet(); + var ids = new HashSet(fields.size()); + var keys = fields.getKeys(); + for (var key : keys) { + ids.add(key); + } + return ids; } // Build the final record public ImprintRecord build() throws ImprintException { - var payloadBuffer = ByteBuffer.allocate(estimatePayloadSize()); - payloadBuffer.order(ByteOrder.LITTLE_ENDIAN); - - for (var entry : fields.values()) { - entry.setOffset(payloadBuffer.position()); - serializeValue(entry.getValue(), payloadBuffer); - } - - // Create read-only view of the payload without copying - payloadBuffer.flip(); // limit = position, position = 0 - var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); - - var header = new Header(new Flags((byte) 0), schemaId, payloadView.remaining()); - return new ImprintRecord(header, fields, payloadView); + // Build to bytes and then create ImprintRecord from bytes for consistency + var serializedBytes = buildToBuffer(); + return ImprintRecord.fromBytes(serializedBytes); } /** - * Builds the record and serializes it directly to a ByteBuffer without creating an intermediate ImprintRecord object. + * Builds the record and serializes it directly to a ByteBuffer. * * @return A read-only ByteBuffer containing the fully serialized record. * @throws ImprintException if serialization fails. */ public ByteBuffer buildToBuffer() throws ImprintException { - // 1. Prepare payload and directory + // 1. Sort fields by ID for directory ordering (zero allocation) + var sortedFieldsResult = getSortedFieldsResult(); + var sortedFields = sortedFieldsResult.values; + var fieldCount = sortedFieldsResult.count; + + // 2. Serialize payload and calculate offsets var payloadBuffer = ByteBuffer.allocate(estimatePayloadSize()); payloadBuffer.order(ByteOrder.LITTLE_ENDIAN); - for (var entry : fields.values()) { - entry.setOffset(payloadBuffer.position()); - serializeValue(entry.getValue(), payloadBuffer); + int[] offsets = new int[fieldCount]; + for (int i = 0; i < fieldCount; i++) { + var fieldData = (FieldData) sortedFields[i]; + offsets[i] = payloadBuffer.position(); + serializeValue(fieldData.value, payloadBuffer); } payloadBuffer.flip(); var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); - // 2. Serialize directly to the final buffer format using the map-based method - return ImprintRecord.serialize(schemaId, new ArrayList<>(fields.values()), payloadView); + // 3. Create directory buffer and serialize to final buffer + return serializeToBuffer(schemaId, sortedFields, offsets, fieldCount, payloadView); } /** @@ -186,15 +201,17 @@ public ByteBuffer buildToBuffer() throws ImprintException { */ private ImprintRecordBuilder addField(int id, Value value) { Objects.requireNonNull(value, "Value cannot be null - use nullField() for explicit null values"); - var newEntry = new Directory.Builder((short) id, value); + var newEntry = new FieldData((short) id, value); - // Subtract the size of the old value if it's being replaced. + // Check if replacing an existing field - O(1) lookup without boxing! var oldEntry = fields.get(id); - if (oldEntry != null) - estimatedPayloadSize -= estimateValueSize(oldEntry.getValue()); + if (oldEntry != null) { + estimatedPayloadSize -= estimateValueSize(oldEntry.value); + } + // Add or replace field - O(1) operation without boxing! fields.put(id, newEntry); - estimatedPayloadSize += estimateValueSize(newEntry.getValue()); + estimatedPayloadSize += estimateValueSize(newEntry.value); return this; } @@ -230,7 +247,6 @@ private Value convertToValue(Object obj) { return Value.fromBytes((byte[]) obj); } if (obj instanceof List) { - //test @SuppressWarnings("unchecked") List list = (List) obj; var convertedValues = new ArrayList(list.size()); @@ -278,11 +294,9 @@ private int estimatePayloadSize() { // Add 25% buffer to reduce reallocations and handle VarInt encoding fluctuations. return Math.max(estimatedPayloadSize + (estimatedPayloadSize / 4), fields.size() * 16); } - + /** * Estimates the serialized size in bytes for a given value. - * This method provides size estimates for payload buffer allocation, - * supporting both array-based and ByteBuffer-based value types. * * @param value the value to estimate size for * @return estimated size in bytes including type-specific overhead @@ -308,7 +322,7 @@ private int estimateValueSize(Value value) { return rowValue.getValue().estimateSerializedSize(); default: - throw new ImprintException(com.imprint.error.ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); } } @@ -335,7 +349,40 @@ private void serializeValue(Value value, ByteBuffer buffer) throws ImprintExcept break; default: - throw new ImprintException(com.imprint.error.ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); } } + + /** + * Get fields sorted by ID from the map. + * Returns internal map array reference + count to avoid any copying but sacrifices the map structure in the process. + */ + private ImprintFieldObjectMap.SortedValuesResult getSortedFieldsResult() { + return fields.getSortedValues(); + } + + /** + * Serialize components into a single ByteBuffer. + */ + private static ByteBuffer serializeToBuffer(SchemaId schemaId, Object[] sortedFields, int[] offsets, int fieldCount, ByteBuffer payload) { + var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); + var directoryBuffer = ImprintRecord.createDirectoryBufferFromSorted(sortedFields, offsets, fieldCount); + + int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payload.remaining(); + var finalBuffer = ByteBuffer.allocate(finalSize); + finalBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // Write header + finalBuffer.put(Constants.MAGIC); + finalBuffer.put(Constants.VERSION); + finalBuffer.put(header.getFlags().getValue()); + finalBuffer.putInt(header.getSchemaId().getFieldSpaceId()); + finalBuffer.putInt(header.getSchemaId().getSchemaHash()); + finalBuffer.putInt(header.getPayloadSize()); + finalBuffer.put(directoryBuffer); + finalBuffer.put(payload); + + finalBuffer.flip(); + return finalBuffer.asReadOnlyBuffer(); + } } \ No newline at end of file diff --git a/src/main/java/com/imprint/error/ErrorType.java b/src/main/java/com/imprint/error/ErrorType.java index 49784ef..63a8c60 100644 --- a/src/main/java/com/imprint/error/ErrorType.java +++ b/src/main/java/com/imprint/error/ErrorType.java @@ -13,6 +13,7 @@ public enum ErrorType { MALFORMED_VARINT, TYPE_MISMATCH, INVALID_TYPE_CODE, + INVALID_BUFFER, SERIALIZATION_ERROR, DESERIALIZATION_ERROR, INTERNAL_ERROR diff --git a/src/main/java/com/imprint/ops/ImprintOperations.java b/src/main/java/com/imprint/ops/ImprintOperations.java index 49f60b4..f15e6a1 100644 --- a/src/main/java/com/imprint/ops/ImprintOperations.java +++ b/src/main/java/com/imprint/ops/ImprintOperations.java @@ -1,168 +1,378 @@ package com.imprint.ops; +import com.imprint.Constants; import com.imprint.core.*; import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; +import com.imprint.util.VarInt; +import lombok.Value; import lombok.experimental.UtilityClass; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.*; -import java.util.stream.Collectors; @UtilityClass public class ImprintOperations { /** - * Project a subset of fields from an Imprint record. Payload copying is proportional to projected data size. - * - *

Algorithm:

- *
    - *
  1. Sort and deduplicate requested field IDs for efficient matching
  2. - *
  3. Scan directory to find matching fields and calculate ranges
  4. - *
  5. Allocate new payload buffer with exact size needed
  6. - *
  7. Copy field data ranges directly (zero-copy where possible)
  8. - *
  9. Build new directory with adjusted offsets
  10. - *
- * - * @param record The source record to project from - * @param fieldIds Array of field IDs to include in projection - * @return New ImprintRecord containing only the requested fields + * Pure bytes-to-bytes merge operation that avoids all object creation. + * Performs merge directly on serialized Imprint record buffers. + * + * @param firstBuffer Complete serialized Imprint record + * @param secondBuffer Complete serialized Imprint record + * @return Merged record as serialized bytes + * @throws ImprintException if merge fails */ - public static ImprintRecord project(ImprintRecord record, int... fieldIds) { - // Sort and deduplicate field IDs for efficient matching - final var fieldIdSet = Arrays.stream(fieldIds) - .boxed() - .collect(Collectors.toCollection(TreeSet::new)); - if (fieldIdSet.isEmpty()) { - return createEmptyRecord(record.getHeader().getSchemaId()); - } + public static ByteBuffer mergeBytes(ByteBuffer firstBuffer, ByteBuffer secondBuffer) throws ImprintException { + validateImprintBuffer(firstBuffer, "firstBuffer"); + validateImprintBuffer(secondBuffer, "secondBuffer"); + + // Work on duplicates to avoid affecting original positions + var first = firstBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + var second = secondBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + // Parse headers + var firstHeader = parseHeaderOnly(first); + var secondHeader = parseHeaderOnly(second); + + // Extract directory and payload sections + var firstSections = extractSections(first, firstHeader); + var secondSections = extractSections(second, secondHeader); + + // Perform raw merge + return mergeRawSections(firstHeader, firstSections, secondSections); + } - var newDirectory = new ArrayList(fieldIdSet.size()); - var payloadChunks = new ArrayList(fieldIdSet.size()); - int currentOffset = 0; + /** + * Parse just the header without advancing buffer past it + */ + private static Header parseHeaderOnly(ByteBuffer buffer) throws ImprintException { + return ImprintRecord.parseHeaderFromBuffer(buffer); + } + + /** + * Extract directory and payload sections from a buffer + */ + private static ImprintRecord.BufferSections extractSections(ByteBuffer buffer, Header header) throws ImprintException { + return ImprintRecord.extractBufferSections(buffer, header); + } + + /** + * Merge raw directory and payload sections without object creation + */ + private static ByteBuffer mergeRawSections(Header firstHeader, ImprintRecord.BufferSections firstSections, ImprintRecord.BufferSections secondSections) throws ImprintException { + // Prepare directory iterators + var firstDirIter = new RawDirectoryIterator(firstSections.directoryBuffer); + var secondDirIter = new RawDirectoryIterator(secondSections.directoryBuffer); + + // Pre-allocate - worst case is sum of both directory counts + int maxEntries = firstSections.directoryCount + secondSections.directoryCount; + var mergedDirectoryEntries = new ArrayList(maxEntries); + var mergedChunks = new ArrayList(maxEntries); - for (int fieldId : fieldIdSet) { - // Use efficient lookup for each field's metadata. Returns null on failure. - var sourceEntry = record.getDirectoryEntry(fieldId); - - // If field exists, get its payload and add to the new record components - if (sourceEntry != null) { - var fieldPayload = record.getRawBytes(sourceEntry); - // This check is for internal consistency. If an entry exists, payload should too. - if (fieldPayload != null) { - newDirectory.add(new Directory.Entry((short)fieldId, sourceEntry.getTypeCode(), currentOffset)); - payloadChunks.add(fieldPayload); - currentOffset += fieldPayload.remaining(); + int totalMergedPayloadSize = 0; + int currentMergedOffset = 0; + + RawDirectoryEntry firstEntry = firstDirIter.hasNext() ? firstDirIter.next() : null; + RawDirectoryEntry secondEntry = secondDirIter.hasNext() ? secondDirIter.next() : null; + + // Merge directories and collect payload chunks + while (firstEntry != null || secondEntry != null) { + RawDirectoryEntry currentEntry; + ByteBuffer sourcePayload; + + if (firstEntry != null && (secondEntry == null || firstEntry.fieldId <= secondEntry.fieldId)) { + // Take from first + currentEntry = firstEntry; + sourcePayload = getFieldPayload(firstSections.payloadBuffer, firstEntry, firstDirIter); + + // Skip duplicate in second if present + if (secondEntry != null && firstEntry.fieldId == secondEntry.fieldId) { + secondEntry = secondDirIter.hasNext() ? secondDirIter.next() : null; } + firstEntry = firstDirIter.hasNext() ? firstDirIter.next() : null; + } else { + // Take from second + currentEntry = secondEntry; + sourcePayload = getFieldPayload(secondSections.payloadBuffer, secondEntry, secondDirIter); + secondEntry = secondDirIter.hasNext() ? secondDirIter.next() : null; } + + // Add to merged directory with adjusted offset + var adjustedEntry = new RawDirectoryEntry(currentEntry.fieldId, currentEntry.typeCode, currentMergedOffset); + mergedDirectoryEntries.add(adjustedEntry); + + // Collect payload chunk + mergedChunks.add(sourcePayload.duplicate()); + currentMergedOffset += sourcePayload.remaining(); + totalMergedPayloadSize += sourcePayload.remaining(); } + + // Build final merged buffer + return buildSerializedBuffer(firstHeader, mergedDirectoryEntries, mergedChunks, totalMergedPayloadSize); + } + + /** + * Get payload bytes for a specific field using iterator state + */ + private static ByteBuffer getFieldPayload(ByteBuffer payload, RawDirectoryEntry entry, RawDirectoryIterator iterator) { + int startOffset = entry.offset; + int endOffset = iterator.getNextEntryOffset(payload.limit()); - // Build new payload from collected chunks - ByteBuffer newPayload = buildPayloadFromChunks(payloadChunks, currentOffset); - - // Create new header with updated payload size - // TODO: compute correct schema hash - var newHeader = new Header(record.getHeader().getFlags(), - new SchemaId(record.getHeader().getSchemaId().getFieldSpaceId(), 0xdeadbeef), - newPayload.remaining() - ); - - return new ImprintRecord(newHeader, newDirectory, newPayload); + var fieldPayload = payload.duplicate(); + fieldPayload.position(startOffset); + fieldPayload.limit(endOffset); + return fieldPayload.slice(); } + /** - * Merge two Imprint records, combining their fields. Payload copying is proportional to total data size. - * - *

Merge Strategy:

- *
    - *
  • Fields are merged using sort-merge algorithm on directory entries
  • - *
  • For duplicate field IDs: first record's field takes precedence
  • - *
  • Payloads are concatenated with directory offsets adjusted
  • - *
  • Schema ID from first record is preserved
  • - *
- *

- * - * @param first The first record (takes precedence for duplicate fields) - * @param second The second record to merge - * @return New ImprintRecord containing merged fields - * @throws ImprintException if merge fails due to incompatible records + * Pure bytes-to-bytes projection operation that avoids all object creation. + * Projects a subset of fields directly from a serialized Imprint record. + * + * @param sourceBuffer Complete serialized Imprint record + * @param fieldIds Array of field IDs to include in projection + * @return Projected record as serialized bytes + * @throws ImprintException if projection fails */ - public static ImprintRecord merge(ImprintRecord first, ImprintRecord second) throws ImprintException { - var firstDir = first.getDirectory(); - var secondDir = second.getDirectory(); - - // Pre-allocate for worst case (no overlapping fields) - var newDirectory = new ArrayList(firstDir.size() + secondDir.size()); - var payloadChunks = new ArrayList(); + public static ByteBuffer projectBytes(ByteBuffer sourceBuffer, int... fieldIds) throws ImprintException { + validateImprintBuffer(sourceBuffer, "sourceBuffer"); + + if (fieldIds == null || fieldIds.length == 0) { + return createEmptyRecordBytes(); + } + + // Sort field IDs for efficient merge algorithm (duplicates handled naturally) + var sortedFieldIds = fieldIds.clone(); + Arrays.sort(sortedFieldIds); + + // Work on duplicate to avoid affecting original position + var source = sourceBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + // Parse header + var header = parseHeaderOnly(source); + + // Extract sections + var sections = extractSections(source, header); + + // Perform raw projection + return projectRawSections(header, sections, sortedFieldIds); + } - int firstIdx = 0; - int secondIdx = 0; + /** + * Project raw sections without object creation using optimized merge algorithm. + * Uses direct array operations and optimized memory access for maximum performance. + */ + private static ByteBuffer projectRawSections(Header originalHeader, ImprintRecord.BufferSections sections, int[] sortedRequestedFields) throws ImprintException { + + if (sortedRequestedFields.length == 0) { + return buildSerializedBuffer(originalHeader, new RawDirectoryEntry[0], new ByteBuffer[0]); + } + + // Use pre-sized ArrayLists to avoid System.arraycopy but still be efficient + var projectedEntries = new ArrayList(sortedRequestedFields.length); + var payloadChunks = new ArrayList(sortedRequestedFields.length); + int totalProjectedPayloadSize = 0; int currentOffset = 0; - - while (firstIdx < firstDir.size() || secondIdx < secondDir.size()) { - Directory currentEntry; - ByteBuffer currentPayload; - - if (firstIdx < firstDir.size() && - (secondIdx >= secondDir.size() || firstDir.get(firstIdx).getId() <= secondDir.get(secondIdx).getId())) { - - // Take from first record - currentEntry = firstDir.get(firstIdx); - - // Skip duplicate field in second record if present - if (secondIdx < secondDir.size() && - firstDir.get(firstIdx).getId() == secondDir.get(secondIdx).getId()) { - secondIdx++; - } - currentPayload = first.getRawBytes(currentEntry); - firstIdx++; + int requestedIndex = 0; + + // Optimize: Cache payload buffer reference to avoid getter calls + var payloadBuffer = sections.payloadBuffer; + + // Merge algorithm: two-pointer approach through sorted sequences + var dirIterator = new RawDirectoryIterator(sections.directoryBuffer); + RawDirectoryEntry currentEntry = dirIterator.hasNext() ? dirIterator.next() : null; + + while (currentEntry != null && requestedIndex < sortedRequestedFields.length) { + int fieldId = currentEntry.fieldId; + int targetFieldId = sortedRequestedFields[requestedIndex]; + + if (fieldId == targetFieldId) { + var fieldPayload = getFieldPayload(payloadBuffer, currentEntry, dirIterator); + + // Add to projection with adjusted offset + projectedEntries.add(new RawDirectoryEntry(currentEntry.fieldId, currentEntry.typeCode, currentOffset)); + + // Collect payload chunk here (fieldPayload is already sliced) + payloadChunks.add(fieldPayload); + + int payloadSize = fieldPayload.remaining(); + currentOffset += payloadSize; + totalProjectedPayloadSize += payloadSize; + + // Advance both pointers (handle dupes by advancing to next unique field) + do { + requestedIndex++; + } while (requestedIndex < sortedRequestedFields.length && sortedRequestedFields[requestedIndex] == targetFieldId); + + currentEntry = dirIterator.hasNext() ? dirIterator.next() : null; + } else if (fieldId < targetFieldId) { + // Directory field is smaller, advance directory pointer + currentEntry = dirIterator.hasNext() ? dirIterator.next() : null; } else { - // Take from second record - currentEntry = secondDir.get(secondIdx); - currentPayload = second.getRawBytes(currentEntry); - secondIdx++; + // fieldId > targetFieldId - implies requested field isn't in the directory so advance requested pointer + requestedIndex++; } + } + + return buildSerializedBuffer(originalHeader, projectedEntries, payloadChunks, totalProjectedPayloadSize); + } - if (currentPayload == null) - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Failed to get raw bytes for field " + currentEntry.getId()); + /** + * Build a serialized Imprint record buffer from header, directory entries, and payload chunks. + */ + private static ByteBuffer buildSerializedBuffer(Header originalHeader, RawDirectoryEntry[] directoryEntries, ByteBuffer[] payloadChunks) { + return buildSerializedBuffer(originalHeader, Arrays.asList(directoryEntries), Arrays.asList(payloadChunks), 0); + } + + private static ByteBuffer buildSerializedBuffer(Header originalHeader, List directoryEntries, List payloadChunks, int totalPayloadSize) { + int directorySize = ImprintRecord.calculateDirectorySize(directoryEntries.size()); + int totalSize = Constants.HEADER_BYTES + directorySize + totalPayloadSize; + var finalBuffer = ByteBuffer.allocate(totalSize); + finalBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // Write header (preserve original schema) + finalBuffer.put(Constants.MAGIC); + finalBuffer.put(Constants.VERSION); + finalBuffer.put(originalHeader.getFlags().getValue()); + finalBuffer.putInt(originalHeader.getSchemaId().getFieldSpaceId()); + finalBuffer.putInt(originalHeader.getSchemaId().getSchemaHash()); + finalBuffer.putInt(totalPayloadSize); + + // Write directory + VarInt.encode(directoryEntries.size(), finalBuffer); + for (var entry : directoryEntries) { + finalBuffer.putShort(entry.fieldId); + finalBuffer.put(entry.typeCode); + finalBuffer.putInt(entry.offset); + } + + // Write payload + for (var chunk : payloadChunks) + finalBuffer.put(chunk); - // Add adjusted directory entry - var newEntry = new Directory.Entry(currentEntry.getId(), - currentEntry.getTypeCode(), currentOffset); - newDirectory.add(newEntry); + finalBuffer.flip(); + return finalBuffer.asReadOnlyBuffer(); + } + + + /** + * Create an empty record as serialized bytes + */ + private static ByteBuffer createEmptyRecordBytes() { + // Minimal header + empty directory + empty payload + var buffer = ByteBuffer.allocate(Constants.HEADER_BYTES + 1); // +1 for varint 0 + buffer.order(ByteOrder.LITTLE_ENDIAN); + + // Write header for empty record + buffer.put(Constants.MAGIC); + buffer.put(Constants.VERSION); + buffer.put((byte) 0x01); + buffer.putInt(0); + buffer.putInt(0); + buffer.putInt(0); + + // Write empty directory + VarInt.encode(0, buffer); + + buffer.flip(); + return buffer.asReadOnlyBuffer(); + } - // Collect payload chunk - payloadChunks.add(currentPayload.duplicate()); - currentOffset += currentPayload.remaining(); + /** + * Validates that a ByteBuffer contains valid Imprint data by checking magic bytes and basic structure. + * + * @param buffer Buffer to validate + * @param paramName Parameter name for error messages + * @throws ImprintException if buffer is invalid + */ + private static void validateImprintBuffer(ByteBuffer buffer, String paramName) throws ImprintException { + if (buffer == null) { + throw new ImprintException(ErrorType.INVALID_BUFFER, paramName + " cannot be null"); + } + + if (buffer.remaining() < Constants.HEADER_BYTES) { + throw new ImprintException(ErrorType.INVALID_BUFFER, + paramName + " too small to contain valid Imprint header (minimum " + Constants.HEADER_BYTES + " bytes)"); } - // Build merged payload - var mergedPayload = buildPayloadFromChunks(payloadChunks, currentOffset); - - // Create header preserving first record's schema ID - var newHeader = new Header(first.getHeader().getFlags(), - first.getHeader().getSchemaId(), mergedPayload.remaining()); - return new ImprintRecord(newHeader, newDirectory, mergedPayload); + // Check invariants without advancing buffer position + var duplicate = buffer.duplicate(); + byte magic = duplicate.get(); + byte version = duplicate.get(); + if (magic != Constants.MAGIC) + throw new ImprintException(ErrorType.INVALID_BUFFER, paramName + " does not contain valid Imprint magic byte"); + if (version != Constants.VERSION) + throw new ImprintException(ErrorType.INVALID_BUFFER, paramName + " contains unsupported Imprint version: " + version); } /** - * Build a new payload buffer by concatenating chunks. + * Directory entry container used for raw byte operations */ - private static ByteBuffer buildPayloadFromChunks(List chunks, int totalSize) { - var mergedPayload = ByteBuffer.allocate(totalSize); - mergedPayload.order(ByteOrder.LITTLE_ENDIAN); - for (var chunk : chunks) - mergedPayload.put(chunk); - mergedPayload.flip(); - return mergedPayload; + @Value + private static class RawDirectoryEntry { + short fieldId; + byte typeCode; + int offset; } /** - * Create an empty record with the given schema ID. + * Iterator that parses directory entries directly from raw bytes */ - private static ImprintRecord createEmptyRecord(SchemaId schemaId) { - var header = new Header(new Flags((byte) 0x01), schemaId, 0); - return new ImprintRecord(header, Collections.emptyList(), ByteBuffer.allocate(0)); + private static class RawDirectoryIterator { + private final ByteBuffer buffer; + private final int totalCount; + private final int directoryStartPos; + private int currentIndex; + + RawDirectoryIterator(ByteBuffer directoryBuffer) throws ImprintException { + this.buffer = directoryBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + + // Read count and advance to first entry + var countResult = VarInt.decode(buffer); + this.totalCount = countResult.getValue(); + this.directoryStartPos = buffer.position(); + this.currentIndex = 0; + } + + boolean hasNext() { + return currentIndex < totalCount; + } + + RawDirectoryEntry next() throws ImprintException { + if (!hasNext()) + throw new RuntimeException("No more directory entries"); + + if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for directory entry"); + + short fieldId = buffer.getShort(); + byte typeCode = buffer.get(); + int offset = buffer.getInt(); + + currentIndex++; + return new RawDirectoryEntry(fieldId, typeCode, offset); + } + + /** + * Get the offset of the next entry without state overhead. + * Returns the provided fallback if this is the last entry. + */ + int getNextEntryOffset(int fallbackOffset) { + if (currentIndex >= totalCount) + return fallbackOffset; + + // Calculate position of next entry directly + int nextEntryPos = directoryStartPos + (currentIndex * Constants.DIR_ENTRY_BYTES); + + // Bounds check - optimized to single comparison + if (nextEntryPos + 7 > buffer.limit()) { // DIR_ENTRY_BYTES = 7 + return fallbackOffset; + } + + // Read just the offset field (skip fieldId and typeCode) + return buffer.getInt(nextEntryPos + 3); // 2 bytes fieldId + 1 byte typeCode = 3 offset + } } } diff --git a/src/main/java/com/imprint/stream/ImprintStream.java b/src/main/java/com/imprint/stream/ImprintStream.java deleted file mode 100644 index 35a69ed..0000000 --- a/src/main/java/com/imprint/stream/ImprintStream.java +++ /dev/null @@ -1,259 +0,0 @@ -package com.imprint.stream; - -import com.imprint.core.*; -import com.imprint.error.ImprintException; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.util.ArrayDeque; -import java.util.Collections; -import java.util.Deque; -import java.util.HashSet; -import java.util.List; -import java.util.NavigableMap; -import java.util.Objects; -import java.util.Set; -import java.util.TreeMap; - -/** - * Provides a framework for lazy, (eventual) zero-copy transformations of Imprint records. - *

- * Operations like {@link #project(int...)} and {@link #mergeWith(ImprintRecord)} are - * intermediate and do not create new records. They build up a plan of operations - * that is executed only when a terminal operation like {@link #toRecord()} is called. - */ -public final class ImprintStream { - - private final Plan plan; - - private ImprintStream(Plan plan) { - this.plan = Objects.requireNonNull(plan); - } - - /** - * The internal representation of the transformation plan. - * This is a linked-list style structure where each step points to the previous one. - */ - private interface Plan { - // Marker interface for the plan steps - } - - /** - * The starting point of a plan, containing the initial source record. - */ - private static final class SourcePlan implements Plan { - final ImprintRecord source; - - private SourcePlan(ImprintRecord source) { - this.source = Objects.requireNonNull(source, "Source record cannot be null."); - } - } - - /** - * A plan step representing a 'project' operation. - */ - private static final class ProjectPlan implements Plan { - final Plan previous; - final Set fieldIds; - - private ProjectPlan(Plan previous, int... fieldIds) { - this.previous = Objects.requireNonNull(previous); - this.fieldIds = new HashSet<>(); - for (int id : fieldIds) { - this.fieldIds.add(id); - } - } - } - - /** - * A plan step representing a 'merge' operation. - */ - private static final class MergePlan implements Plan { - final Plan previous; - final List others; - - private MergePlan(Plan previous, List others) { - this.previous = Objects.requireNonNull(previous); - this.others = Objects.requireNonNull(others); - } - } - - // ========== PUBLIC API ========== - - /** - * Creates a new transformation stream starting with a source record. - * - * @param source The initial record for the transformation. - * @return A new ImprintStream. - */ - public static ImprintStream of(ImprintRecord source) { - return new ImprintStream(new SourcePlan(source)); - } - - /** - * An intermediate operation that defines a projection on the stream. - * This is a lazy operation; the projection is only performed when a terminal - * operation is called. - * - * @param fieldIds The field IDs to keep in the final record. - * @return A new ImprintStream with the projection step added to its plan. - */ - public ImprintStream project(int... fieldIds) { - return new ImprintStream(new ProjectPlan(this.plan, fieldIds)); - } - - /** - * An intermediate operation that defines a merge on the stream. - * The record from this stream (the "left" side) takes precedence in case - * of overlapping field IDs. - *

- * This is a lazy operation; the merge is only performed when a terminal - * operation is called. - * - * @param other The record to merge with this stream's record. - * @return A new ImprintStream with the merge step added to its plan. - */ - public ImprintStream mergeWith(ImprintRecord other) { - return new ImprintStream(new MergePlan(this.plan, Collections.singletonList(other))); - } - - /** - * A terminal operation that executes the defined transformation plan and - * constructs a new, consolidated ImprintRecord. - * - * @return a new ImprintRecord representing the result of the stream operations. - */ - public ImprintRecord toRecord() { - return new Evaluator(this.plan).execute(); - } - - // ========== EVALUATOR ========== - - /** - * The engine that walks the plan and executes the transformation. - */ - private static final class Evaluator { - private final Plan plan; - - private Evaluator(Plan plan) { - this.plan = plan; - } - - public ImprintRecord execute() { - // Unwind the plan from a deque - var planQueue = getPlans(); - - // Set of fields being built - var resolvedFields = new TreeMap(); - - for (var planStep : planQueue) { - if (planStep instanceof SourcePlan) { - var sourcePlan = (SourcePlan) planStep; - for (var entry : sourcePlan.source.getDirectory()) { - resolvedFields.put((int) entry.getId(), new FieldSource(sourcePlan.source, entry)); - } - } else if (planStep instanceof ProjectPlan) { - var projectPlan = (ProjectPlan) planStep; - // Apply projection to the current state of resolved fields. - // Keep only fields that are in the projection set - resolvedFields.keySet().removeIf(fieldId -> !projectPlan.fieldIds.contains(fieldId)); - } else if (planStep instanceof MergePlan) { - var mergePlan = (MergePlan) planStep; - // Add fields from other records if they aren't already in the map. - for (var otherRecord : mergePlan.others) { - for (var entry : otherRecord.getDirectory()) { - int fieldId = entry.getId(); - resolvedFields.putIfAbsent(fieldId, new FieldSource(otherRecord, entry)); - } - } - } - } - return build(resolvedFields); - } - - private Deque getPlans() { - var planQueue = new ArrayDeque(); - var current = plan; - while (current != null) { - planQueue.addFirst(current); - if (current instanceof ProjectPlan) { - current = ((ProjectPlan) current).previous; - } else if (current instanceof MergePlan) { - current = ((MergePlan) current).previous; - } else if (current instanceof SourcePlan) { - current = null; // End of the chain - } - } - return planQueue; - } - - private ImprintRecord build(NavigableMap finalFields) { - if (finalFields.isEmpty()) { - // TODO: Need a way to get the schemaId for an empty record. - // For now, returning null or using a default. - try { - return ImprintRecord.builder(new SchemaId(0, 0)).build(); - } catch (ImprintException e) { - // TODO This shouldn't really ever happen, we probably need a better way of consolidating error handling - throw new IllegalStateException("Failed to build empty record.", e); - } - } - - // Use schema from the first field's source record. - var schemaId = finalFields.firstEntry().getValue().record.getHeader().getSchemaId(); - - // 1. Calculate final payload size and prepare directory. - int payloadSize = 0; - var newDirectoryMap = new TreeMap(); - - for (var entry : finalFields.entrySet()) { - int fieldId = entry.getKey(); - var fieldSource = entry.getValue(); - int fieldLength = fieldSource.getLength(); - - newDirectoryMap.put(fieldId, new Directory.Entry(fieldSource.entry.getId(), fieldSource.entry.getTypeCode(), payloadSize)); - payloadSize += fieldLength; - } - - // 2. Allocate buffer and copy data. - var payload = ByteBuffer.allocate(payloadSize).order(ByteOrder.LITTLE_ENDIAN); - for (var fieldSource : finalFields.values()) { - try { - var sourceData = fieldSource.record.getRawBytes(fieldSource.entry.getId()); - if (sourceData != null) - payload.put(sourceData.duplicate()); - } catch (Exception e) { - // Shouldn't happen in normal operation - maybe some sort of data corruption or race issue - throw new IllegalStateException("Failed to copy data for field " + fieldSource.entry.getId(), e); - } - } - payload.flip(); - - // 3. Construct the final record. - var newHeader = new Header(new Flags((byte) 0), schemaId, payload.remaining()); - return new ImprintRecord(newHeader, newDirectoryMap, payload.asReadOnlyBuffer()); - } - - /** - * A lightweight struct to track the source of a field during evaluation. - */ - private static final class FieldSource { - final ImprintRecord record; - final Directory entry; - - FieldSource(ImprintRecord record, Directory entry) { - this.record = record; - this.entry = entry; - } - - int getLength() { - try { - var buf = record.getRawBytes(entry.getId()); - return buf != null ? buf.remaining() : 0; - } catch (Exception e) { - return 0; - } - } - } - } -} \ No newline at end of file diff --git a/src/test/java/com/imprint/IntegrationTest.java b/src/test/java/com/imprint/IntegrationTest.java index ee1d426..e066f01 100644 --- a/src/test/java/com/imprint/IntegrationTest.java +++ b/src/test/java/com/imprint/IntegrationTest.java @@ -623,4 +623,246 @@ void testTypeGetterRow() throws ImprintException { assertEquals(999L, retrievedRow.getInt64(102)); assertEquals("outer field", deserializedWithRow.getString(202)); } + + @Test + @DisplayName("Boundary Values: Numeric limits and special floating point values") + void testNumericBoundaryValues() throws ImprintException { + var schemaId = new SchemaId(60, 0xB0DA12); + var record = ImprintRecord.builder(schemaId) + .field(1, Integer.MAX_VALUE) + .field(2, Integer.MIN_VALUE) + .field(3, Long.MAX_VALUE) + .field(4, Long.MIN_VALUE) + .field(5, Float.MAX_VALUE) + .field(6, Float.MIN_VALUE) + .field(7, Float.NaN) + .field(8, Float.POSITIVE_INFINITY) + .field(9, Float.NEGATIVE_INFINITY) + .field(10, Double.MAX_VALUE) + .field(11, Double.MIN_VALUE) + .field(12, Double.NaN) + .field(13, Double.POSITIVE_INFINITY) + .field(14, Double.NEGATIVE_INFINITY) + .field(15, -0.0f) + .field(16, -0.0) + .build(); + + var deserialized = serializeAndDeserialize(record); + + assertEquals(Integer.MAX_VALUE, deserialized.getInt32(1)); + assertEquals(Integer.MIN_VALUE, deserialized.getInt32(2)); + assertEquals(Long.MAX_VALUE, deserialized.getInt64(3)); + assertEquals(Long.MIN_VALUE, deserialized.getInt64(4)); + assertEquals(Float.MAX_VALUE, deserialized.getFloat32(5)); + assertEquals(Float.MIN_VALUE, deserialized.getFloat32(6)); + assertTrue(Float.isNaN(deserialized.getFloat32(7))); + assertTrue(Float.isInfinite(deserialized.getFloat32(8)) && deserialized.getFloat32(8) > 0); + assertTrue(Float.isInfinite(deserialized.getFloat32(9)) && deserialized.getFloat32(9) < 0); + assertEquals(Double.MAX_VALUE, deserialized.getFloat64(10)); + assertEquals(Double.MIN_VALUE, deserialized.getFloat64(11)); + assertTrue(Double.isNaN(deserialized.getFloat64(12))); + assertTrue(Double.isInfinite(deserialized.getFloat64(13)) && deserialized.getFloat64(13) > 0); + assertTrue(Double.isInfinite(deserialized.getFloat64(14)) && deserialized.getFloat64(14) < 0); + assertEquals(-0.0f, deserialized.getFloat32(15)); + assertEquals(-0.0, deserialized.getFloat64(16)); + } + + @Test + @DisplayName("Unicode and Special Strings: International character support") + void testUnicodeAndSpecialStrings() throws ImprintException { + var schemaId = new SchemaId(61, 0x04100DE); + var record = ImprintRecord.builder(schemaId) + .field(1, "") // Empty string + .field(2, " ") // Single space + .field(3, "\n\t\r") // Whitespace characters + .field(4, "Hello, 世界! 🌍🚀") // Unicode: CJK + Emoji + .field(5, "مرحبا بالعالم") // Arabic (RTL) + .field(6, "Здравствуй мир") // Cyrillic + .field(7, "こんにちは世界") // Japanese + .field(8, "\u0000\u0001\u001F") // Control characters + .field(9, "A".repeat(10000)) // Large string + .build(); + + var deserialized = serializeAndDeserialize(record); + + assertEquals("", deserialized.getString(1)); + assertEquals(" ", deserialized.getString(2)); + assertEquals("\n\t\r", deserialized.getString(3)); + assertEquals("Hello, 世界! 🌍🚀", deserialized.getString(4)); + assertEquals("مرحبا بالعالم", deserialized.getString(5)); + assertEquals("Здравствуй мир", deserialized.getString(6)); + assertEquals("こんにちは世界", deserialized.getString(7)); + assertEquals("\u0000\u0001\u001F", deserialized.getString(8)); + assertEquals("A".repeat(10000), deserialized.getString(9)); + } + + @Test + @DisplayName("Deep Nesting: Multiple levels of nested records") + void testDeepNesting() throws ImprintException { + // Create 5 levels of nesting + var level5 = ImprintRecord.builder(new SchemaId(65, 5)) + .field(1, "deepest level") + .build(); + + var level4 = ImprintRecord.builder(new SchemaId(64, 4)) + .field(1, level5) + .field(2, "level 4") + .build(); + + var level3 = ImprintRecord.builder(new SchemaId(63, 3)) + .field(1, level4) + .field(2, "level 3") + .build(); + + var level2 = ImprintRecord.builder(new SchemaId(62, 2)) + .field(1, level3) + .field(2, "level 2") + .build(); + + var level1 = ImprintRecord.builder(new SchemaId(61, 1)) + .field(1, level2) + .field(2, "level 1") + .build(); + + var deserialized = serializeAndDeserialize(level1); + + // Navigate through all levels + assertEquals("level 1", deserialized.getString(2)); + var l2 = deserialized.getRow(1); + assertEquals("level 2", l2.getString(2)); + var l3 = l2.getRow(1); + assertEquals("level 3", l3.getString(2)); + var l4 = l3.getRow(1); + assertEquals("level 4", l4.getString(2)); + var l5 = l4.getRow(1); + assertEquals("deepest level", l5.getString(1)); + } + + @Test + @DisplayName("Map Key Types: All supported map key types") + void testMapKeyTypeVariations() throws ImprintException { + var schemaId = new SchemaId(70, 0xAAB5E75); + + // Create maps with different key types + var stringKeyMap = new HashMap(); + stringKeyMap.put(MapKey.fromString("string_key"), Value.fromString("string_value")); + + var intKeyMap = new HashMap(); + intKeyMap.put(MapKey.fromInt32(42), Value.fromString("int_value")); + + var longKeyMap = new HashMap(); + longKeyMap.put(MapKey.fromInt64(9876543210L), Value.fromString("long_value")); + + var bytesKeyMap = new HashMap(); + bytesKeyMap.put(MapKey.fromBytes(new byte[]{1, 2, 3}), Value.fromString("bytes_value")); + + var record = ImprintRecord.builder(schemaId) + .field(1, Value.fromMap(stringKeyMap)) + .field(2, Value.fromMap(intKeyMap)) + .field(3, Value.fromMap(longKeyMap)) + .field(4, Value.fromMap(bytesKeyMap)) + .build(); + + var deserialized = serializeAndDeserialize(record); + + // Verify all map key types work correctly + assertEquals(Value.fromString("string_value"), + deserialized.getMap(1).get(MapKey.fromString("string_key"))); + assertEquals(Value.fromString("int_value"), + deserialized.getMap(2).get(MapKey.fromInt32(42))); + assertEquals(Value.fromString("long_value"), + deserialized.getMap(3).get(MapKey.fromInt64(9876543210L))); + assertEquals(Value.fromString("bytes_value"), + deserialized.getMap(4).get(MapKey.fromBytes(new byte[]{1, 2, 3}))); + } + + @Test + @DisplayName("Large Data: Memory efficiency with large payloads") + void testLargeDataHandling() throws ImprintException { + var schemaId = new SchemaId(80, 0xB16DA7A); + + // Create large byte arrays + byte[] largeBytes1 = new byte[100_000]; // 100KB + byte[] largeBytes2 = new byte[500_000]; // 500KB + Arrays.fill(largeBytes1, (byte) 0xAA); + Arrays.fill(largeBytes2, (byte) 0xBB); + + // Create large string + String largeString = "Large data test: " + "X".repeat(50_000); + + var record = ImprintRecord.builder(schemaId) + .field(1, largeBytes1) + .field(2, largeBytes2) + .field(3, largeString) + .field(4, "small field") + .build(); + + // Verify large record can be serialized and deserialized + var deserialized = serializeAndDeserialize(record); + + assertArrayEquals(largeBytes1, deserialized.getBytes(1)); + assertArrayEquals(largeBytes2, deserialized.getBytes(2)); + assertEquals(largeString, deserialized.getString(3)); + assertEquals("small field", deserialized.getString(4)); + + // Test projection still works with large data + var projected = record.project(4); + assertEquals(1, projected.getDirectory().size()); + assertEquals("small field", projected.getString(4)); + + // Verify original large data is excluded from projection + assertTrue(projected.getSerializedSize() < record.getSerializedSize() / 10); + } + + @Test + @DisplayName("Error Handling: Empty data detection") + void testEmptyDataHandling() { + // Empty data should throw exception + assertThrows(Exception.class, () -> ImprintRecord.deserialize(new byte[0])); + + // Null data should throw exception + assertThrows(Exception.class, () -> ImprintRecord.deserialize((byte[]) null)); + } + + @Test + @DisplayName("Complex Operations: Bytes-to-bytes vs object operations equivalence") + void testBytesToBytesEquivalence() throws ImprintException { + var schemaId = new SchemaId(100, 0xB17E5); + + var record1 = ImprintRecord.builder(schemaId) + .field(1, "record1 field1") + .field(3, 100) + .field(5, true) + .build(); + + var record2 = ImprintRecord.builder(schemaId) + .field(2, "record2 field2") + .field(4, 200L) + .field(6, 3.14) + .build(); + + // Test merge equivalence + var objectMerged = record1.merge(record2); + var bytesMerged = com.imprint.ops.ImprintOperations.mergeBytes( + record1.serializeToBuffer(), + record2.serializeToBuffer() + ); + var bytesMergedRecord = ImprintRecord.deserialize(bytesMerged); + + assertEquals(objectMerged.getDirectory().size(), bytesMergedRecord.getDirectory().size()); + assertEquals(objectMerged.getString(1), bytesMergedRecord.getString(1)); + assertEquals(objectMerged.getString(2), bytesMergedRecord.getString(2)); + assertEquals(objectMerged.getInt32(3), bytesMergedRecord.getInt32(3)); + + // Test project equivalence + var objectProjected = record1.project(1, 3); + var bytesProjected = com.imprint.ops.ImprintOperations.projectBytes( + record1.serializeToBuffer(), 1, 3 + ); + var bytesProjectedRecord = ImprintRecord.deserialize(bytesProjected); + + assertEquals(objectProjected.getDirectory().size(), bytesProjectedRecord.getDirectory().size()); + assertEquals(objectProjected.getString(1), bytesProjectedRecord.getString(1)); + assertEquals(objectProjected.getInt32(3), bytesProjectedRecord.getInt32(3)); + } } \ No newline at end of file diff --git a/src/test/java/com/imprint/core/ImprintRecordTest.java b/src/test/java/com/imprint/core/ImprintRecordTest.java index 6d85ccb..562f5fd 100644 --- a/src/test/java/com/imprint/core/ImprintRecordTest.java +++ b/src/test/java/com/imprint/core/ImprintRecordTest.java @@ -1,227 +1,289 @@ package com.imprint.core; import com.imprint.error.ImprintException; -import com.imprint.error.ErrorType; -import com.imprint.types.Value; -import com.imprint.types.MapKey; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; -import java.util.*; -import static org.assertj.core.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.*; + +@DisplayName("ImprintRecord") class ImprintRecordTest { - - // Helper method to extract string value from either StringValue or StringBufferValue - private String getStringValue(Value value) { - if (value instanceof Value.StringValue) { - return ((Value.StringValue) value).getValue(); - } else if (value instanceof Value.StringBufferValue) { - return ((Value.StringBufferValue) value).getValue(); - } else { - throw new IllegalArgumentException("Expected string value, got: " + value.getClass()); - } - } - - @Test - void shouldCreateSimpleRecord() throws ImprintException { - var schemaId = new SchemaId(1, 0xdeadbeef); - var record = ImprintRecord.builder(schemaId) - .field(1, Value.fromInt32(42)) - .field(2, Value.fromString("hello")) - .build(); - - assertThat(record.getHeader().getSchemaId()).isEqualTo(schemaId); - assertThat(record.getDirectory()).hasSize(2); - - Value field1 = record.getValue(1); - Value field2 = record.getValue(2); - - assertThat(field1).isNotNull(); - assertThat(field1).isInstanceOf(Value.Int32Value.class); - assertThat(((Value.Int32Value) field1).getValue()).isEqualTo(42); - - assertThat(field2).isNotNull(); - assertThat(field2.getTypeCode()).isEqualTo(com.imprint.types.TypeCode.STRING); - String stringValue = getStringValue(field2); - assertThat(stringValue).isEqualTo("hello"); - - // Non-existent field should return null - assertThat(record.getValue(999)).isNull(); + + private SchemaId testSchema; + private ImprintRecord testRecord; + private ImprintRecord serializedRecord; + + @BeforeEach + void setUp() throws ImprintException { + testSchema = new SchemaId(1, 0x12345678); + testRecord = ImprintRecord.builder(testSchema) + .field(1, 42) + .field(2, "hello") + .field(3, true) + .field(4, 3.14159) + .field(5, new byte[]{1, 2, 3, 4, 5}) + .build(); + serializedRecord = testRecord; } - - @Test - void shouldRoundtripThroughSerialization() throws ImprintException { - var schemaId = new SchemaId(1, 0xdeadbeef); - var original = ImprintRecord.builder(schemaId) - .field(1, Value.nullValue()) - .field(2, Value.fromBoolean(true)) - .field(3, Value.fromInt32(42)) - .field(4, Value.fromInt64(123456789L)) - .field(5, Value.fromFloat32(3.14f)) - .field(6, Value.fromFloat64(2.718281828)) - .field(7, Value.fromBytes(new byte[]{1, 2, 3, 4})) - .field(8, Value.fromString("test string")) - .build(); - - // Serialize and deserialize - var buffer = original.serializeToBuffer(); - byte[] serialized = new byte[buffer.remaining()]; - buffer.get(serialized); - var deserialized = ImprintRecord.deserialize(serialized); - - // Verify metadata - assertThat(deserialized.getHeader().getSchemaId().getFieldSpaceId()).isEqualTo(1); - assertThat(deserialized.getHeader().getSchemaId().getSchemaHash()).isEqualTo(0xdeadbeef); - assertThat(deserialized.getDirectory()).hasSize(8); - - // Verify all values - assertThat(deserialized.getValue(1)).isEqualTo(Value.nullValue()); - assertThat(deserialized.getValue(2)).isEqualTo(Value.fromBoolean(true)); - assertThat(deserialized.getValue(3)).isEqualTo(Value.fromInt32(42)); - assertThat(deserialized.getValue(4)).isEqualTo(Value.fromInt64(123456789L)); - assertThat(deserialized.getValue(5)).isEqualTo(Value.fromFloat32(3.14f)); - assertThat(deserialized.getValue(6)).isEqualTo(Value.fromFloat64(2.718281828)); - assertThat(deserialized.getValue(7)).isEqualTo(Value.fromBytes(new byte[]{1, 2, 3, 4})); - assertThat(deserialized.getValue(8)).isEqualTo(Value.fromString("test string")); - - // Non-existent field - assertThat(deserialized.getValue(999)).isNull(); + + @Nested + @DisplayName("Creation") + class Creation { + + @Test + @DisplayName("should create from ImprintRecord") + void shouldCreateFromImprintRecord() { + var serialized = testRecord; + + assertNotNull(serialized); + assertEquals(testRecord.getDirectory().size(), serialized.getFieldCount()); + assertEquals(testSchema, serialized.getSchemaId()); + } + + @Test + @DisplayName("should create from serialized bytes") + void shouldCreateFromSerializedBytes() throws ImprintException { + var bytes = testRecord.serializeToBuffer(); + var serialized = ImprintRecord.fromBytes(bytes); + + assertNotNull(serialized); + assertEquals(testRecord.getDirectory().size(), serialized.getFieldCount()); + assertEquals(testSchema, serialized.getSchemaId()); + } + + @Test + @DisplayName("should reject null bytes") + void shouldRejectNullBytes() { + assertThrows(NullPointerException.class, () -> ImprintRecord.fromBytes(null)); + } } - - @Test - void shouldHandleArrays() throws ImprintException { - var schemaId = new SchemaId(1, 0xdeadbeef); - - List intArray = Arrays.asList( - Value.fromInt32(1), - Value.fromInt32(2), - Value.fromInt32(3) - ); - - var record = ImprintRecord.builder(schemaId) - .field(1, Value.fromArray(intArray)) - .build(); - - // Serialize and deserialize - var buffer = record.serializeToBuffer(); - byte[] serialized = new byte[buffer.remaining()]; - buffer.get(serialized); - var deserialized = ImprintRecord.deserialize(serialized); - - Value arrayValue = deserialized.getValue(1); - assertThat(arrayValue).isNotNull(); - assertThat(arrayValue).isInstanceOf(Value.ArrayValue.class); - - List deserializedArray = ((Value.ArrayValue) arrayValue).getValue(); - assertThat(deserializedArray).hasSize(3); - assertThat(deserializedArray.get(0)).isEqualTo(Value.fromInt32(1)); - assertThat(deserializedArray.get(1)).isEqualTo(Value.fromInt32(2)); - assertThat(deserializedArray.get(2)).isEqualTo(Value.fromInt32(3)); + + @Nested + @DisplayName("Field Access") + class FieldAccess { + + @Test + @DisplayName("should access fields with correct types") + void shouldAccessFieldsWithCorrectTypes() throws ImprintException { + assertEquals(Integer.valueOf(42), serializedRecord.getInt32(1)); + assertEquals("hello", serializedRecord.getString(2)); + assertEquals(Boolean.TRUE, serializedRecord.getBoolean(3)); + assertEquals(Double.valueOf(3.14159), serializedRecord.getFloat64(4)); + assertArrayEquals(new byte[]{1, 2, 3, 4, 5}, serializedRecord.getBytes(5)); + } + + @Test + @DisplayName("should handle non-existent fields correctly") + void shouldHandleNonExistentFields() throws ImprintException { + // getValue should return null for non-existent fields + assertNull(serializedRecord.getValue(99)); + + // Typed getters should throw exceptions for non-existent fields + assertThrows(ImprintException.class, () -> serializedRecord.getString(99)); + assertThrows(ImprintException.class, () -> serializedRecord.getInt32(100)); + + // hasField should return false + assertFalse(serializedRecord.hasField(99)); + } + + @Test + @DisplayName("should check field existence efficiently") + void shouldCheckFieldExistenceEfficiently() { + assertTrue(serializedRecord.hasField(1)); + assertTrue(serializedRecord.hasField(2)); + assertTrue(serializedRecord.hasField(3)); + assertFalse(serializedRecord.hasField(99)); + } + + @Test + @DisplayName("should return correct field count") + void shouldReturnCorrectFieldCount() { + assertEquals(5, serializedRecord.getFieldCount()); + } } - - @Test - void shouldHandleMaps() throws ImprintException { - var schemaId = new SchemaId(1, 0xdeadbeef); - - var map = new HashMap(); - map.put(MapKey.fromString("key1"), Value.fromInt32(1)); - map.put(MapKey.fromString("key2"), Value.fromInt32(2)); - - var record = ImprintRecord.builder(schemaId) - .field(1, Value.fromMap(map)) - .build(); - - // Serialize and deserialize - var buffer = record.serializeToBuffer(); - byte[] serialized = new byte[buffer.remaining()]; - buffer.get(serialized); - var deserialized = ImprintRecord.deserialize(serialized); - - Value mapValue = deserialized.getValue(1); - assertThat(mapValue).isNotNull(); - assertThat(mapValue).isInstanceOf(Value.MapValue.class); - - Map deserializedMap = ((Value.MapValue) mapValue).getValue(); - assertThat(deserializedMap).hasSize(2); - assertThat(deserializedMap.get(MapKey.fromString("key1"))).isEqualTo(Value.fromInt32(1)); - assertThat(deserializedMap.get(MapKey.fromString("key2"))).isEqualTo(Value.fromInt32(2)); + + @Nested + @DisplayName("Zero-Copy Operations") + class ZeroCopyOperations { + + @Test + @DisplayName("should merge with another ImprintRecord") + void shouldMergeWithAnotherImprintRecord() throws ImprintException { + // Create another record + var otherRecord = ImprintRecord.builder(testSchema) + .field(6, "additional") + .field(7, 999L) + .build(); + + // Merge + var merged = serializedRecord.merge(otherRecord); + + // Verify merged result + assertEquals(7, merged.getFieldCount()); + assertEquals(Integer.valueOf(42), merged.getInt32(1)); + assertEquals("hello", merged.getString(2)); + assertEquals("additional", merged.getString(6)); + assertEquals(Long.valueOf(999L), merged.getInt64(7)); + } + + @Test + @DisplayName("should project subset of fields") + void shouldProjectSubsetOfFields() throws ImprintException { + var projected = serializedRecord.project(1, 3, 5); + + assertEquals(3, projected.getFieldCount()); + assertEquals(Integer.valueOf(42), projected.getInt32(1)); + assertEquals(Boolean.TRUE, projected.getBoolean(3)); + assertArrayEquals(new byte[]{1, 2, 3, 4, 5}, projected.getBytes(5)); + + // Should not have other fields + assertFalse(projected.hasField(2)); + assertFalse(projected.hasField(4)); + } + + @Test + @DisplayName("should chain project and merge operations") + void shouldChainProjectAndMergeOperations() throws ImprintException { + // Create another record + var otherSerialized = ImprintRecord.builder(testSchema) + .field(10, "chained") + .build(); + + // Chain operations: project this record, then merge with other + var result = serializedRecord.projectAndMerge(otherSerialized, 1, 2); + + // Should have projected fields plus other record + assertEquals(3, result.getFieldCount()); + assertEquals(Integer.valueOf(42), result.getInt32(1)); + assertEquals("hello", result.getString(2)); + assertEquals("chained", result.getString(10)); + + // Should not have non-projected fields + assertFalse(result.hasField(3)); + assertFalse(result.hasField(4)); + assertFalse(result.hasField(5)); + } } - - @Test - void shouldHandleNestedRecords() throws ImprintException { - // Create inner record - var innerSchemaId = new SchemaId(2, 0xcafebabe); - var innerRecord = ImprintRecord.builder(innerSchemaId) - .field(1, Value.fromInt32(42)) - .field(2, Value.fromString("nested")) - .build(); - - // Create outer record containing inner record - var outerSchemaId = new SchemaId(1, 0xdeadbeef); - var outerRecord = ImprintRecord.builder(outerSchemaId) - .field(1, Value.fromRow(innerRecord)) - .field(2, Value.fromInt64(123L)) - .build(); - - // Serialize and deserialize - var buffer = outerRecord.serializeToBuffer(); - byte[] serialized = new byte[buffer.remaining()]; - buffer.get(serialized); - var deserialized = ImprintRecord.deserialize(serialized); - - // Verify outer record metadata - assertThat(deserialized.getHeader().getSchemaId().getFieldSpaceId()).isEqualTo(1); - assertThat(deserialized.getHeader().getSchemaId().getSchemaHash()).isEqualTo(0xdeadbeef); - - // Verify nested record - Value rowValue = deserialized.getValue(1); - assertThat(rowValue).isNotNull(); - assertThat(rowValue).isInstanceOf(Value.RowValue.class); - - var nestedRecord = ((Value.RowValue) rowValue).getValue(); - assertThat(nestedRecord.getHeader().getSchemaId().getFieldSpaceId()).isEqualTo(2); - assertThat(nestedRecord.getHeader().getSchemaId().getSchemaHash()).isEqualTo(0xcafebabe); - - assertThat(nestedRecord.getValue(1)).isEqualTo(Value.fromInt32(42)); - assertThat(nestedRecord.getValue(2)).isEqualTo(Value.fromString("nested")); - - // Verify outer record field - assertThat(deserialized.getValue(2)).isEqualTo(Value.fromInt64(123L)); + + @Nested + @DisplayName("Conversion") + class Conversion { + + @Test + @DisplayName("should serialize and deserialize consistently") + void shouldSerializeAndDeserializeConsistently() throws ImprintException { + var serializedBytes = serializedRecord.serializeToBuffer(); + var deserialized = ImprintRecord.fromBytes(serializedBytes); + + assertEquals(testRecord.getDirectory().size(), deserialized.getDirectory().size()); + assertEquals(testRecord.getInt32(1), deserialized.getInt32(1)); + assertEquals(testRecord.getString(2), deserialized.getString(2)); + assertEquals(testRecord.getBoolean(3), deserialized.getBoolean(3)); + } + + @Test + @DisplayName("should preserve serialized bytes") + void shouldPreserveSerializedBytes() { + var originalBytes = testRecord.serializeToBuffer(); + var preservedBytes = serializedRecord.getSerializedBytes(); + + assertEquals(originalBytes.remaining(), preservedBytes.remaining()); + + // Compare byte content + var original = originalBytes.duplicate(); + var preserved = preservedBytes.duplicate(); + + while (original.hasRemaining() && preserved.hasRemaining()) { + assertEquals(original.get(), preserved.get()); + } + } } - - @Test - void shouldRejectInvalidMagic() { - byte[] invalidData = new byte[15]; - invalidData[0] = 0x00; // wrong magic - - assertThatThrownBy(() -> ImprintRecord.deserialize(invalidData)) - .isInstanceOf(ImprintException.class) - .extracting("errorType") - .isEqualTo(ErrorType.INVALID_MAGIC); + + @Nested + @DisplayName("Performance Characteristics") + class PerformanceCharacteristics { + + @Test + @DisplayName("should have minimal memory footprint") + void shouldHaveMinimalMemoryFootprint() { + var originalSize = testRecord.serializeToBuffer().remaining(); + var serializedSize = serializedRecord.getSerializedSize(); + + assertEquals(originalSize, serializedSize); + + // ImprintRecord should not significantly increase memory usage + // (just the wrapper object itself) + assertTrue(serializedSize > 0); + } + + @Test + @DisplayName("should support repeated operations efficiently") + void shouldSupportRepeatedOperationsEfficiently() throws ImprintException { + // Multiple field access should not cause performance degradation + for (int i = 0; i < 100; i++) { + assertEquals(Integer.valueOf(42), serializedRecord.getInt32(1)); + assertEquals("hello", serializedRecord.getString(2)); + assertTrue(serializedRecord.hasField(3)); + } + } } - - @Test - void shouldRejectUnsupportedVersion() { - byte[] invalidData = new byte[15]; - invalidData[0] = (byte) 0x49; // correct magic - invalidData[1] = (byte) 0xFF; // wrong version - - assertThatThrownBy(() -> ImprintRecord.deserialize(invalidData)) - .isInstanceOf(ImprintException.class) - .extracting("errorType") - .isEqualTo(ErrorType.UNSUPPORTED_VERSION); + + @Nested + @DisplayName("Edge Cases") + class EdgeCases { + + @Test + @DisplayName("should handle empty projection") + void shouldHandleEmptyProjection() throws ImprintException { + var projected = serializedRecord.project(); + assertEquals(0, projected.getFieldCount()); + } + + @Test + @DisplayName("should handle projection with non-existent fields") + void shouldHandleProjectionWithNonExistentFields() throws ImprintException { + var projected = serializedRecord.project(1, 99, 100); + assertEquals(1, projected.getFieldCount()); + assertEquals(Integer.valueOf(42), projected.getInt32(1)); + assertFalse(projected.hasField(99)); + assertFalse(projected.hasField(100)); + } + + @Test + @DisplayName("should handle merge with empty record") + void shouldHandleMergeWithEmptyRecord() throws ImprintException { + var emptySerialized = ImprintRecord.builder(testSchema).build(); + + var merged = serializedRecord.merge(emptySerialized); + assertEquals(serializedRecord.getFieldCount(), merged.getFieldCount()); + assertEquals(Integer.valueOf(42), merged.getInt32(1)); + } } - - @Test - void shouldHandleDuplicateFieldIds() throws ImprintException { - var schemaId = new SchemaId(1, 0xdeadbeef); - - // Add duplicate field IDs - last one should win - var record = ImprintRecord.builder(schemaId) - .field(1, Value.fromInt32(42)) - .field(1, Value.fromInt32(43)) - .build(); - - assertThat(record.getDirectory()).hasSize(1); - assertThat(record.getValue(1)).isEqualTo(Value.fromInt32(43)); + + @Nested + @DisplayName("Equality and Hashing") + class EqualityAndHashing { + + @Test + @DisplayName("should be equal for same serialized data") + void shouldBeEqualForSameSerializedData() { + var other = testRecord; + + assertEquals(serializedRecord, other); + assertEquals(serializedRecord.hashCode(), other.hashCode()); + } + + @Test + @DisplayName("should not be equal for different data") + void shouldNotBeEqualForDifferentData() throws ImprintException { + // Different value + var differentSerialized = ImprintRecord.builder(testSchema) + .field(1, 999) // Different value + .build(); + + assertNotEquals(serializedRecord, differentSerialized); + } } } \ No newline at end of file diff --git a/src/test/java/com/imprint/ops/ImprintOperationsTest.java b/src/test/java/com/imprint/ops/ImprintOperationsTest.java index 7b54800..292f8f3 100644 --- a/src/test/java/com/imprint/ops/ImprintOperationsTest.java +++ b/src/test/java/com/imprint/ops/ImprintOperationsTest.java @@ -10,6 +10,7 @@ import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; +import java.nio.ByteBuffer; import java.util.List; import static org.junit.jupiter.api.Assertions.*; @@ -49,7 +50,7 @@ class ProjectOperations { @DisplayName("should project subset of fields") void shouldProjectSubsetOfFields() throws ImprintException { // When projecting a subset of fields - ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 1, 5); + ImprintRecord projected = multiFieldRecord.project(1, 5); // Then only the requested fields should be present assertEquals(2, projected.getDirectory().size()); @@ -65,7 +66,7 @@ void shouldProjectSubsetOfFields() throws ImprintException { @DisplayName("should maintain field order regardless of input order") void shouldMaintainFieldOrderRegardlessOfInputOrder() throws ImprintException { // When projecting fields in arbitrary order - ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 7, 1, 5, 3); + ImprintRecord projected = multiFieldRecord.project(7, 1, 5, 3); // Then all requested fields should be present assertEquals(4, projected.getDirectory().size()); @@ -86,7 +87,7 @@ void shouldMaintainFieldOrderRegardlessOfInputOrder() throws ImprintException { @DisplayName("should handle single field projection") void shouldHandleSingleFieldProjection() throws ImprintException { // When projecting a single field - ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 3); + ImprintRecord projected = multiFieldRecord.project(3); // Then only that field should be present assertEquals(1, projected.getDirectory().size()); @@ -102,7 +103,7 @@ void shouldPreserveAllFieldsWhenProjectingAll() throws ImprintException { .toArray(); // When projecting all fields - ImprintRecord projected = ImprintOperations.project(multiFieldRecord, allFields); + ImprintRecord projected = multiFieldRecord.project(allFields); // Then all fields should be present with matching values assertEquals(multiFieldRecord.getDirectory().size(), projected.getDirectory().size()); @@ -117,20 +118,20 @@ void shouldPreserveAllFieldsWhenProjectingAll() throws ImprintException { @Test @DisplayName("should handle empty projection") - void shouldHandleEmptyProjection() { + void shouldHandleEmptyProjection() throws ImprintException { // When projecting no fields - ImprintRecord projected = ImprintOperations.project(multiFieldRecord); + ImprintRecord projected = multiFieldRecord.project(); // Then result should be empty but valid assertEquals(0, projected.getDirectory().size()); - assertEquals(0, projected.getBuffers().getPayload().remaining()); + assertEquals(0, projected.getFieldCount()); } @Test @DisplayName("should ignore nonexistent fields") void shouldIgnoreNonexistentFields() throws ImprintException { // When projecting mix of existing and non-existing fields - ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 1, 99, 100); + ImprintRecord projected = multiFieldRecord.project(1, 99, 100); // Then only existing fields should be included assertEquals(1, projected.getDirectory().size()); @@ -143,7 +144,7 @@ void shouldIgnoreNonexistentFields() throws ImprintException { @DisplayName("should deduplicate requested fields") void shouldDeduplicateRequestedFields() throws ImprintException { // When projecting the same field multiple times - ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 1, 1, 1); + ImprintRecord projected = multiFieldRecord.project(1, 1, 1); // Then field should only appear once assertEquals(1, projected.getDirectory().size()); @@ -152,13 +153,13 @@ void shouldDeduplicateRequestedFields() throws ImprintException { @Test @DisplayName("should handle projection from empty record") - void shouldHandleProjectionFromEmptyRecord() { + void shouldHandleProjectionFromEmptyRecord() throws ImprintException { // When projecting any fields from empty record - ImprintRecord projected = ImprintOperations.project(emptyRecord, 1, 2, 3); + ImprintRecord projected = emptyRecord.project(1, 2, 3); // Then result should be empty but valid assertEquals(0, projected.getDirectory().size()); - assertEquals(0, projected.getBuffers().getPayload().remaining()); + assertEquals(0, projected.getFieldCount()); } @Test @@ -168,7 +169,7 @@ void shouldPreserveExactByteRepresentation() throws ImprintException { byte[] originalBytes = multiFieldRecord.getBytes(7); // When projecting that field - ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 7); + ImprintRecord projected = multiFieldRecord.project(7); // Then the byte representation should be exactly preserved byte[] projectedBytes = projected.getBytes(7); @@ -187,13 +188,13 @@ void shouldReducePayloadSizeWhenProjectingSubset() throws ImprintException { .field(4, new byte[500]) // 500+ bytes .build(); - int originalPayloadSize = largeRecord.getBuffers().getPayload().remaining(); + int originalPayloadSize = largeRecord.getSerializedSize(); // When projecting only the small fields - ImprintRecord projected = ImprintOperations.project(largeRecord, 1, 3); + ImprintRecord projected = largeRecord.project(1, 3); // Then the payload size should be significantly smaller - assertTrue(projected.getBuffers().getPayload().remaining() < originalPayloadSize, + assertTrue(projected.getSerializedSize() < originalPayloadSize, "Projected payload should be smaller than original"); // And the values should still be correct @@ -221,7 +222,7 @@ void shouldMergeRecordsWithDistinctFields() throws ImprintException { .build(); // When merging the records - ImprintRecord merged = ImprintOperations.merge(record1, record2); + ImprintRecord merged = record1.merge(record2); // Then all fields should be present assertEquals(4, merged.getDirectory().size()); @@ -253,7 +254,7 @@ void shouldMergeRecordsWithOverlappingFields() throws ImprintException { .build(); // When merging the records - ImprintRecord merged = ImprintOperations.merge(record1, record2); + ImprintRecord merged = record1.merge(record2); // Then first record's values should take precedence for duplicates assertEquals(3, merged.getDirectory().size()); @@ -278,7 +279,7 @@ void shouldPreserveSchemaIdFromFirstRecord() throws ImprintException { .build(); // When merging the records - ImprintRecord merged = ImprintOperations.merge(record1, record2); + ImprintRecord merged = record1.merge(record2); // Then schema ID from first record should be preserved assertEquals(schema1, merged.getHeader().getSchemaId()); @@ -288,8 +289,8 @@ void shouldPreserveSchemaIdFromFirstRecord() throws ImprintException { @DisplayName("should handle merge with empty record") void shouldHandleMergeWithEmptyRecord() throws ImprintException { // When merging with empty record - ImprintRecord merged1 = ImprintOperations.merge(multiFieldRecord, emptyRecord); - ImprintRecord merged2 = ImprintOperations.merge(emptyRecord, multiFieldRecord); + ImprintRecord merged1 = multiFieldRecord.merge(emptyRecord); + ImprintRecord merged2 = emptyRecord.merge(multiFieldRecord); // Then results should contain all original fields assertEquals(multiFieldRecord.getDirectory().size(), merged1.getDirectory().size()); @@ -307,11 +308,11 @@ void shouldHandleMergeWithEmptyRecord() throws ImprintException { @DisplayName("should handle merge of two empty records") void shouldHandleMergeOfTwoEmptyRecords() throws ImprintException { // When merging two empty records - ImprintRecord merged = ImprintOperations.merge(emptyRecord, emptyRecord); + ImprintRecord merged = emptyRecord.merge(emptyRecord); // Then result should be empty but valid assertEquals(0, merged.getDirectory().size()); - assertEquals(0, merged.getBuffers().getPayload().remaining()); + assertEquals(0, merged.getFieldCount()); } @Test @@ -329,7 +330,7 @@ void shouldMaintainCorrectPayloadOffsetsAfterMerge() throws ImprintException { .build(); // When merging - ImprintRecord merged = ImprintOperations.merge(record1, record2); + ImprintRecord merged = record1.merge(record2); // Then all fields should be accessible with correct values assertEquals(42, merged.getInt32(1)); @@ -371,7 +372,7 @@ void shouldHandleLargeRecordsEfficiently() throws ImprintException { ImprintRecord record2 = builder2.build(); // When merging large records - ImprintRecord merged = ImprintOperations.merge(record1, record2); + ImprintRecord merged = record1.merge(record2); // Then all 200 fields should be present and accessible assertEquals(200, merged.getDirectory().size()); @@ -387,23 +388,292 @@ void shouldHandleLargeRecordsEfficiently() throws ImprintException { } @Nested - @DisplayName("Error Handling") - class ErrorHandling { + @DisplayName("Bytes-to-Bytes Operations") + class BytesToBytesOperations { @Test - @DisplayName("should handle null record gracefully") - void shouldHandleNullRecordGracefully() { - assertThrows(NullPointerException.class, () -> ImprintOperations.project(null, 1, 2, 3)); + @DisplayName("should merge bytes with same result as object merge") + void shouldMergeBytesWithSameResultAsObjectMerge() throws ImprintException { + // Given two records with distinct fields + ImprintRecord record1 = ImprintRecord.builder(testSchema) + .field(1, 42) + .field(3, "hello") + .build(); + + ImprintRecord record2 = ImprintRecord.builder(testSchema) + .field(2, true) + .field(4, 123L) + .build(); + + // When merging using both approaches + var objectMerged = record1.merge(record2); + var record1Bytes = record1.serializeToBuffer(); + var record2Bytes = record2.serializeToBuffer(); + var bytesMerged = ImprintOperations.mergeBytes(record1Bytes, record2Bytes); + + // Then results should be functionally equivalent + var deserializedBytes = ImprintRecord.deserialize(bytesMerged); + + assertEquals(objectMerged.getDirectory().size(), deserializedBytes.getDirectory().size()); + assertEquals(42, deserializedBytes.getInt32(1)); + assertTrue(deserializedBytes.getBoolean(2)); + assertEquals("hello", deserializedBytes.getString(3)); + assertEquals(123L, deserializedBytes.getInt64(4)); + } + + @Test + @DisplayName("should handle overlapping fields in byte merge") + void shouldHandleOverlappingFieldsInByteMerge() throws ImprintException { + // Given two records with overlapping fields + ImprintRecord record1 = ImprintRecord.builder(testSchema) + .field(1, "first") + .field(2, 42) + .build(); - assertThrows(NullPointerException.class, () -> ImprintOperations.merge(null, multiFieldRecord)); + ImprintRecord record2 = ImprintRecord.builder(testSchema) + .field(1, "second") // Overlapping field + .field(3, true) + .build(); - assertThrows(NullPointerException.class, () -> ImprintOperations.merge(multiFieldRecord, null)); + // When merging using bytes + var record1Bytes = record1.serializeToBuffer(); + var record2Bytes = record2.serializeToBuffer(); + var merged = ImprintOperations.mergeBytes(record1Bytes, record2Bytes); + + // Then first record's values should take precedence + var result = ImprintRecord.deserialize(merged); + assertEquals(3, result.getDirectory().size()); + assertEquals("first", result.getString(1)); // First record wins + assertEquals(42, result.getInt32(2)); + assertTrue(result.getBoolean(3)); } @Test - @DisplayName("should handle null field ids gracefully") - void shouldHandleNullFieldIdsGracefully() { - assertThrows(NullPointerException.class, () -> ImprintOperations.project(multiFieldRecord, (int[]) null)); + @DisplayName("should merge empty records correctly") + void shouldMergeEmptyRecordsCorrectly() throws ImprintException { + // Given an empty record and a non-empty record + var emptyRecord = ImprintRecord.builder(testSchema).build(); + var nonEmptyRecord = ImprintRecord.builder(testSchema) + .field(1, "test") + .build(); + + // When merging using bytes + var emptyBytes = emptyRecord.serializeToBuffer(); + var nonEmptyBytes = nonEmptyRecord.serializeToBuffer(); + + var merged1 = ImprintOperations.mergeBytes(emptyBytes, nonEmptyBytes); + var merged2 = ImprintOperations.mergeBytes(nonEmptyBytes, emptyBytes); + + // Then both should contain the non-empty record's data + var result1 = ImprintRecord.deserialize(merged1); + var result2 = ImprintRecord.deserialize(merged2); + + assertEquals(1, result1.getDirectory().size()); + assertEquals(1, result2.getDirectory().size()); + assertEquals("test", result1.getString(1)); + assertEquals("test", result2.getString(1)); + } + + @Test + @DisplayName("should project bytes with same result as object project") + void shouldProjectBytesWithSameResultAsObjectProject() throws ImprintException { + // Given a record with multiple fields + ImprintRecord record = ImprintRecord.builder(testSchema) + .field(1, 42) + .field(2, "hello") + .field(3, true) + .field(4, 123L) + .field(5, new byte[]{1, 2, 3}) + .build(); + + // When projecting using both approaches + var objectProjected = record.project(2, 4); + + var recordBytes = record.serializeToBuffer(); + var bytesProjected = ImprintOperations.projectBytes(recordBytes, 2, 4); + + // Then results should be functionally equivalent + var deserializedBytes = ImprintRecord.deserialize(bytesProjected); + + assertEquals(objectProjected.getDirectory().size(), deserializedBytes.getDirectory().size()); + assertEquals("hello", deserializedBytes.getString(2)); + assertEquals(123L, deserializedBytes.getInt64(4)); + + // Should not have the other fields + assertNull(deserializedBytes.getValue(1)); + assertNull(deserializedBytes.getValue(3)); + assertNull(deserializedBytes.getValue(5)); + } + + @Test + @DisplayName("should handle empty projection in bytes") + void shouldHandleEmptyProjectionInBytes() throws ImprintException { + // Given a record with fields + var record = ImprintRecord.builder(testSchema) + .field(1, "test") + .build(); + + // When projecting no fields + var recordBytes = record.serializeToBuffer(); + var projected = ImprintOperations.projectBytes(recordBytes); + + // Then result should be empty but valid + var result = ImprintRecord.deserialize(projected); + assertEquals(0, result.getDirectory().size()); + } + + @Test + @DisplayName("should handle nonexistent fields in byte projection") + void shouldHandleNonexistentFieldsInByteProjection() throws ImprintException { + // Given a record with some fields + var record = ImprintRecord.builder(testSchema) + .field(1, "exists") + .field(3, 42) + .build(); + + // When projecting mix of existing and non-existing fields + var recordBytes = record.serializeToBuffer(); + var projected = ImprintOperations.projectBytes(recordBytes, 1, 99, 100); + + // Then only existing fields should be included + var result = ImprintRecord.deserialize(projected); + assertEquals(1, result.getDirectory().size()); + assertEquals("exists", result.getString(1)); + assertNull(result.getValue(99)); + assertNull(result.getValue(100)); + } + + @Test + @DisplayName("should handle null buffers gracefully") + void shouldHandleNullBuffersGracefully() throws ImprintException { + var validRecord = ImprintRecord.builder(testSchema) + .field(1, "test") + .build(); + var validBuffer = validRecord.serializeToBuffer(); + + // Test null buffer scenarios + assertThrows(Exception.class, () -> + ImprintOperations.mergeBytes(null, validBuffer)); + assertThrows(Exception.class, () -> + ImprintOperations.mergeBytes(validBuffer, null)); + assertThrows(Exception.class, () -> + ImprintOperations.projectBytes(null, 1, 2, 3)); + } + + @Test + @DisplayName("should validate buffer format and reject invalid data") + void shouldValidateBufferFormatAndRejectInvalidData() throws ImprintException { + var validRecord = ImprintRecord.builder(testSchema) + .field(1, "test") + .build(); + var validBuffer = validRecord.serializeToBuffer(); + + // Test invalid magic byte + var invalidMagic = ByteBuffer.allocate(20); + invalidMagic.put((byte) 0x99); // Invalid magic + invalidMagic.put((byte) 0x01); // Valid version + invalidMagic.flip(); + + assertThrows(ImprintException.class, () -> + ImprintOperations.mergeBytes(invalidMagic, validBuffer)); + assertThrows(ImprintException.class, () -> + ImprintOperations.projectBytes(invalidMagic, 1)); + + // Test buffer too small + var tooSmall = ByteBuffer.allocate(5); + tooSmall.put(new byte[]{1, 2, 3, 4, 5}); + tooSmall.flip(); + + assertThrows(ImprintException.class, () -> + ImprintOperations.mergeBytes(tooSmall, validBuffer)); + assertThrows(ImprintException.class, () -> + ImprintOperations.projectBytes(tooSmall, 1)); + + // Test invalid version + var invalidVersion = ByteBuffer.allocate(20); + invalidVersion.put((byte) 0x49); // Valid magic + invalidVersion.put((byte) 0x99); // Invalid version + invalidVersion.flip(); + + assertThrows(ImprintException.class, () -> + ImprintOperations.mergeBytes(invalidVersion, validBuffer)); + assertThrows(ImprintException.class, () -> + ImprintOperations.projectBytes(invalidVersion, 1)); + } + + @Test + @DisplayName("should handle large records efficiently in bytes operations") + void shouldHandleLargeRecordsEfficientlyInBytesOperations() throws ImprintException { + // Create records with many fields + var builder1 = ImprintRecord.builder(testSchema); + var builder2 = ImprintRecord.builder(testSchema); + + // Add many fields + for (int i = 1; i <= 50; i++) { + builder1.field(i, "field_" + i); + } + for (int i = 51; i <= 100; i++) { + builder2.field(i, "field_" + i); + } + + var record1 = builder1.build(); + var record2 = builder2.build(); + + // Test bytes-to-bytes merge with many fields + var merged = ImprintOperations.mergeBytes( + record1.serializeToBuffer(), + record2.serializeToBuffer() + ); + var mergedRecord = ImprintRecord.deserialize(merged); + + assertEquals(100, mergedRecord.getDirectory().size()); + assertEquals("field_1", mergedRecord.getString(1)); + assertEquals("field_100", mergedRecord.getString(100)); + + // Test bytes-to-bytes projection with many fields + int[] projectFields = {1, 25, 50, 75, 100}; + var projected = ImprintOperations.projectBytes(merged, projectFields); + var projectedRecord = ImprintRecord.deserialize(projected); + + assertEquals(5, projectedRecord.getDirectory().size()); + assertEquals("field_1", projectedRecord.getString(1)); + assertEquals("field_25", projectedRecord.getString(25)); + assertEquals("field_100", projectedRecord.getString(100)); + } + + @Test + @DisplayName("should preserve field order in bytes operations") + void shouldPreserveFieldOrderInBytesOperations() throws ImprintException { + var record = ImprintRecord.builder(testSchema) + .field(5, "field5") + .field(1, "field1") + .field(3, "field3") + .field(2, "field2") + .field(4, "field4") + .build(); + + // Project in random order + var projected = ImprintOperations.projectBytes( + record.serializeToBuffer(), 4, 1, 3, 5, 2 + ); + var projectedRecord = ImprintRecord.deserialize(projected); + + // Verify fields are still accessible and directory is sorted + var directory = projectedRecord.getDirectory(); + assertEquals(5, directory.size()); + + // Directory should be sorted by field ID + for (int i = 1; i < directory.size(); i++) { + assertTrue(directory.get(i - 1).getId() < directory.get(i).getId()); + } + + // All fields should be accessible + assertEquals("field1", projectedRecord.getString(1)); + assertEquals("field2", projectedRecord.getString(2)); + assertEquals("field3", projectedRecord.getString(3)); + assertEquals("field4", projectedRecord.getString(4)); + assertEquals("field5", projectedRecord.getString(5)); } } + } diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index 7b8a027..79882d9 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -8,6 +8,8 @@ import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.util.Random; import java.util.stream.IntStream; @@ -38,14 +40,14 @@ private void profileSmallMerges() throws Exception { var record1 = createTestRecord(20); var record2 = createTestRecord(20); - int iterations = 200_000; + int iterations = 500_000; System.out.printf("Beginning small merge profiling (%,d iterations)...%n", iterations); long start = System.nanoTime(); for (int i = 0; i < iterations; i++) { // This is the hotspot we want to profile - var merged = ImprintOperations.merge(record1, record2); + var merged = record1.merge(record2); // Simulate some usage to prevent dead code elimination if (i % 10_000 == 0) { @@ -68,13 +70,13 @@ private void profileLargeMerges() throws Exception { var record1 = createTestRecord(100); var record2 = createTestRecord(100); - int iterations = 50_000; + int iterations = 100_000; System.out.printf("Beginning large merge profiling (%,d iterations)...%n", iterations); long start = System.nanoTime(); for (int i = 0; i < iterations; i++) { - var merged = ImprintOperations.merge(record1, record2); + var merged = record1.merge(record2); merged.serializeToBuffer(); } @@ -91,13 +93,13 @@ private void profileOverlappingMerges() throws Exception { var record1 = createTestRecordWithFieldIds(new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); var record2 = createTestRecordWithFieldIds(new int[]{10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}); - int iterations = 100_000; + int iterations = 200_000; System.out.printf("Beginning overlapping merge profiling (%,d iterations)...%n", iterations); long start = System.nanoTime(); for (int i = 0; i < iterations; i++) { - var merged = ImprintOperations.merge(record1, record2); + var merged = record1.merge(record2); merged.serializeToBuffer(); } @@ -115,13 +117,13 @@ private void profileDisjointMerges() throws Exception { // Create records with completely separate field IDs var record1 = createTestRecordWithFieldIds(new int[]{1, 3, 5, 7, 9, 11, 13, 15, 17, 19}); var record2 = createTestRecordWithFieldIds(new int[]{2, 4, 6, 8, 10, 12, 14, 16, 18, 20}); - int iterations = 100_000; + int iterations = 200_000; System.out.printf("Beginning disjoint merge profiling (%,d iterations)...%n", iterations); long start = System.nanoTime(); for (int i = 0; i < iterations; i++) { - var merged = ImprintOperations.merge(record1, record2); + var merged = record1.merge(record2); merged.serializeToBuffer(); } @@ -140,6 +142,10 @@ void profileSmallRecordSerialization() throws Exception { @Test @Tag("serialization") @Tag("large-records") + /* + It's usually better to change DEFAULT_CAPACITY in ImprintFieldObjectMap to ensure resizing doesn't happen + unless you specifically want to profile resizing costs (should happen rarely in reality). + */ void profileLargeRecordSerialization() throws Exception { profileSerialization("large records", LARGE_RECORD_SIZE, 500_000); } @@ -147,49 +153,12 @@ void profileLargeRecordSerialization() throws Exception { @Test @Tag("projection") void profileProjectionOperations() throws Exception { - System.out.println("Starting projection profiler test - attach profiler now..."); Thread.sleep(3000); profileSmallProjections(); profileLargeProjections(); profileSelectiveProjections(); } - @Test - @Tag("memory") - @Tag("allocation") - void profileMemoryAllocation() throws Exception { - System.out.println("Starting allocation profiler test..."); - Thread.sleep(3000); - - System.out.println("Beginning allocation profiling - watch for GC events..."); - - // Force allocation pressure to reveal GC hotspots - for (int batch = 0; batch < 1000; batch++) { - for (int i = 0; i < 1000; i++) { - var schemaId = new SchemaId(batch, i); - var builder = ImprintRecord.builder(schemaId); - - // Create strings of varying sizes (allocation pressure) - builder.field(1, Value.fromString("small")) - .field(2, Value.fromString("medium-length-string-" + i)) - .field(3, Value.fromString("very-long-string-that-will-cause-more-allocation-pressure-" + batch + "-" + i)) - .field(4, Value.fromBytes(new byte[100 + i % 1000])); // Varying byte arrays - - var record = builder.build(); - - // Some deserialization to trigger string decoding allocations - record.getValue(2); - record.getValue(3); - } - - if (batch % 100 == 0) { - System.out.printf("Completed batch %d/1000%n", batch); - } - } - - System.out.println("Allocation test complete - check GC logs and memory profiler"); - } - // Rest of the methods remain the same... private void profileSmallProjections() throws Exception { System.out.println("\\n--- Small Projections (2-5 fields from 20-field records) ---"); @@ -203,7 +172,7 @@ private void profileSmallProjections() throws Exception { for (int i = 0; i < iterations; i++) { // This is the hotspot we want to profile - var projected = ImprintOperations.project(sourceRecord, projectFields); + var projected = sourceRecord.project(projectFields); // Simulate some usage to prevent dead code elimination if (i % 10_000 == 0) { @@ -226,14 +195,14 @@ private void profileLargeProjections() throws Exception { int[] projectFields = IntStream.range(0, 50) .map(i -> (i * 4) + 1) .toArray(); - int iterations = 50_000; + int iterations = 200_000; System.out.printf("Beginning large projection profiling (%,d iterations, %d->%d fields)...%n", iterations, 200, projectFields.length); long start = System.nanoTime(); for (int i = 0; i < iterations; i++) { - var projected = ImprintOperations.project(sourceRecord, projectFields); + var projected = sourceRecord.project(projectFields); // Periodically access some fields to simulate real usage if (i % 1_000 == 0) { @@ -254,7 +223,7 @@ private void profileSelectiveProjections() throws Exception { var sourceRecord = createTestRecord(100); Random random = new Random(42); - int iterations = 100_000; + int iterations = 200_000; // Test different projection patterns var patterns = new ProjectionPattern[]{ @@ -271,7 +240,7 @@ private void profileSelectiveProjections() throws Exception { long start = System.nanoTime(); for (int i = 0; i < iterations; i++) { - var projected = ImprintOperations.project(sourceRecord, pattern.fields); + var projected = sourceRecord.project(pattern.fields); // Simulate field access if (i % 5_000 == 0) { @@ -410,4 +379,59 @@ private int[] generateRandomFields(Random random, int maxField, int count) { .sorted() .toArray(); } + + @Test + @Tag("profiling") + void profileBytesToBytesVsObjectMerge() throws Exception { + System.out.println("=== Bytes-to-Bytes vs Object Merge Comparison ==="); + + // Create test records + var record1 = createTestRecordWithFieldIds(new int[]{1, 3, 5, 7, 9, 11, 13, 15}); + var record2 = createTestRecordWithFieldIds(new int[]{2, 4, 6, 8, 10, 12, 14, 16}); + + var record1Bytes = record1.serializeToBuffer(); + var record2Bytes = record2.serializeToBuffer(); + + int iterations = 50_000; + + // Warm up + for (int i = 0; i < 1000; i++) { + record1.merge(record2).serializeToBuffer(); + ImprintOperations.mergeBytes(record1Bytes, record2Bytes); + } + + System.out.printf("Profiling %,d merge operations...%n", iterations); + + // Test object merge + serialize + long startObjectMerge = System.nanoTime(); + for (int i = 0; i < iterations; i++) { + var merged = record1.merge(record2); + var serialized = merged.serializeToBuffer(); + // Consume result to prevent optimization + if (serialized.remaining() == 0) throw new RuntimeException("Empty result"); + } + long objectMergeTime = System.nanoTime() - startObjectMerge; + + // Test bytes merge + long startBytesMerge = System.nanoTime(); + for (int i = 0; i < iterations; i++) { + var merged = ImprintOperations.mergeBytes(record1Bytes, record2Bytes); + // Consume result to prevent optimization + if (merged.remaining() == 0) throw new RuntimeException("Empty result"); + } + long bytesMergeTime = System.nanoTime() - startBytesMerge; + + double objectAvg = (double) objectMergeTime / iterations / 1000.0; // microseconds + double bytesAvg = (double) bytesMergeTime / iterations / 1000.0; // microseconds + double speedup = objectAvg / bytesAvg; + + System.out.printf("Object merge + serialize: %.2f ms (avg: %.1f μs/op)%n", + objectMergeTime / 1_000_000.0, objectAvg); + System.out.printf("Bytes-to-bytes merge: %.2f ms (avg: %.1f μs/op)%n", + bytesMergeTime / 1_000_000.0, bytesAvg); + System.out.printf("Speedup: %.1fx faster%n", speedup); + + // Assert that bytes approach is faster (should be at least 1.5x) + assertTrue(speedup > 1.0, String.format("Bytes merge should be faster. Got %.1fx speedup", speedup)); + } } \ No newline at end of file diff --git a/src/test/java/com/imprint/stream/ImprintStreamTest.java b/src/test/java/com/imprint/stream/ImprintStreamTest.java deleted file mode 100644 index d2c2b69..0000000 --- a/src/test/java/com/imprint/stream/ImprintStreamTest.java +++ /dev/null @@ -1,81 +0,0 @@ -package com.imprint.stream; - -import com.imprint.core.ImprintRecord; -import com.imprint.core.SchemaId; -import com.imprint.stream.ImprintStream; -import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.*; - -class ImprintStreamTest { - - @Test - void shouldProjectAndMergeCorrectly() throws Exception { - // --- Setup --- - var schemaId1 = new SchemaId(1, 1); - var schemaId2 = new SchemaId(2, 2); - var schemaId3 = new SchemaId(3, 3); - - var recordA = ImprintRecord.builder(schemaId1) - .field(1, "A1") - .field(2, 100) - .field(3, true) - .build(); - - var recordB = ImprintRecord.builder(schemaId2) - .field(2, 200) // Overlaps with A, should be ignored - .field(4, "B4") - .build(); - - var recordC = ImprintRecord.builder(schemaId3) - .field(5, 3.14) - .field(1, "C1") // Overlaps with A, should be ignored - .build(); - - // --- Execution --- - // Chain of operations - var finalRecord = ImprintStream.of(recordA) - .project(1, 3) // Keep {1, 3} from A. Current state: {1:A, 3:A} - .mergeWith(recordB) // Merge B. {2:B, 4:B} are added. Current state: {1:A, 3:A, 2:B, 4:B} - .mergeWith(recordC) // Merge C. {5:C} is added. {1:C} is ignored. Final state: {1:A, 3:A, 2:B, 4:B, 5:C} - .project(1, 4, 5) // Final projection. Final result: {1:A, 4:B, 5:C} - .toRecord(); - - // --- Assertions --- - assertNotNull(finalRecord); - - // Check final field count. - assertEquals(3, finalRecord.getDirectory().size()); - - // Check that the correct fields are present and have the right values - assertTrue(finalRecord.hasField(1)); - assertEquals("A1", finalRecord.getString(1)); // From recordA - - assertTrue(finalRecord.hasField(4)); - assertEquals("B4", finalRecord.getString(4)); // From recordB - - assertTrue(finalRecord.hasField(5)); - assertEquals(3.14, finalRecord.getFloat64(5), 0.001); // From recordC - - // Check that dropped/ignored fields are not present - assertFalse(finalRecord.hasField(2)); - assertFalse(finalRecord.hasField(3)); - } - - @Test - void shouldProjectAfterMerge() throws Exception { - var recordA = ImprintRecord.builder(new SchemaId(1, 1)).field(1, "A").field(2, 100).build(); - var recordB = ImprintRecord.builder(new SchemaId(1, 1)).field(2, 200).field(3, "B").build(); - - var finalRecord = ImprintStream.of(recordA) - .mergeWith(recordB) // virtual record is {1:A, 2:A, 3:B} - .project(1, 3) // final record is {1:A, 3:B} - .toRecord(); - - assertEquals(2, finalRecord.getDirectory().size()); - assertTrue(finalRecord.hasField(1)); - assertEquals("A", finalRecord.getString(1)); - assertTrue(finalRecord.hasField(3)); - assertEquals("B", finalRecord.getString(3)); - assertFalse(finalRecord.hasField(2)); - } -} \ No newline at end of file From 37ca24df15d6b11b297d21eb6c94c0186f5cca8c Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 12 Jun 2025 16:16:14 -0400 Subject: [PATCH 45/53] track custom map --- .../imprint/core/ImprintFieldObjectMap.java | 309 +++++++++++++++++ .../core/ImprintFieldObjectMapTest.java | 318 ++++++++++++++++++ 2 files changed, 627 insertions(+) create mode 100644 src/main/java/com/imprint/core/ImprintFieldObjectMap.java create mode 100644 src/test/java/com/imprint/core/ImprintFieldObjectMapTest.java diff --git a/src/main/java/com/imprint/core/ImprintFieldObjectMap.java b/src/main/java/com/imprint/core/ImprintFieldObjectMap.java new file mode 100644 index 0000000..d104317 --- /dev/null +++ b/src/main/java/com/imprint/core/ImprintFieldObjectMap.java @@ -0,0 +1,309 @@ +package com.imprint.core; + +import java.util.Arrays; +import java.util.stream.IntStream; + +/** + * Specialized short→object map optimized for ImprintRecordBuilder field IDs. + * Basically a copy of EclipseCollections's primitive map: + * - No key-value boxing/unboxing + * - Primitive int16 keys + * - Open addressing with linear probing + * - Sort values in place and return without allocation (subsequently poisons the map) + */ +final class ImprintFieldObjectMap { + private static final int DEFAULT_CAPACITY = 512; + private static final float LOAD_FACTOR = 0.75f; + private static final short EMPTY_KEY = -1; // Reserved empty marker (field IDs are >= 0) + + private short[] keys; + private Object[] values; + private int size; + private int threshold; + private boolean poisoned = false; + + public ImprintFieldObjectMap() { + this(DEFAULT_CAPACITY); + } + + public ImprintFieldObjectMap(int initialCapacity) { + int capacity = nextPowerOfTwo(Math.max(4, initialCapacity)); + this.keys = new short[capacity]; + this.values = new Object[capacity]; + this.threshold = (int) (capacity * LOAD_FACTOR); + Arrays.fill(keys, EMPTY_KEY); + } + + public void put(short key, T value) { + if (poisoned) + throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); + putValue(key, value); + } + + public void put(int key, T value) { + if (poisoned) + throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); + if (key > Short.MAX_VALUE) + throw new IllegalArgumentException("Field ID must be 0-" + Short.MAX_VALUE + ", got: " + key); + putValue((short) key, value); + } + + private void putValue(short key, T value) { + if (poisoned) + throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); + if (key < 0) + throw new IllegalArgumentException("Field ID must be 0-" + Short.MAX_VALUE + ", got: " + key); + + if (size >= threshold) + resize(); + int index = findSlot(key); + if (keys[index] == EMPTY_KEY) { + size++; + } + keys[index] = key; + values[index] = value; + } + + @SuppressWarnings("unchecked") + public T get(int key) { + if (poisoned) + throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); + if (key < 0 || key > Short.MAX_VALUE) + return null; + short shortKey = (short) key; + int index = findSlot(shortKey); + return keys[index] == shortKey ? (T) values[index] : null; + } + + public boolean containsKey(int key) { + if (poisoned) + throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); + if (key < 0 || key > Short.MAX_VALUE) return false; + short shortKey = (short) key; + + int index = findSlot(shortKey); + return keys[index] == shortKey; + } + + public int size() { + return size; + } + + public boolean isEmpty() { + return size == 0; + } + + /** + * Get all keys (non-destructive). + */ + public int[] getKeys() { + return IntStream.range(0, keys.length) + .filter(i -> keys[i] != EMPTY_KEY) + .map(i -> keys[i]).toArray(); + } + + /** + * Stream all keys without allocation. + * Non-destructive operation that can be called multiple times. + * + * @return IntStream of all keys in the map + */ + public IntStream streamKeys() { + if (poisoned) { + throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); + } + + return IntStream.range(0, keys.length) + .filter(i -> keys[i] != EMPTY_KEY) + .map(i -> keys[i]); + } + + /** + * Result holder for in-place sorted values - avoids allocation by returning + * array reference and valid count. + */ + public static final class SortedValuesResult { + public final Object[] values; + public final int count; + + SortedValuesResult(Object[] values, int count) { + this.values = values; + this.count = count; + } + } + + /** + * Get values sorted by key order with zero allocation by left-side compacting the value set. + * WARNING: Modifies internal state, and renders map operations unstable and in an illegal state. Only invoke this + * if you plan to discard the map afterward. + * (e.g., at the end of builder lifecycle before build()). + * + * @return SortedValuesResult containing the internal values array and valid count. + * Caller should iterate from 0 to result.count-1 only. + */ + public SortedValuesResult getSortedValues() { + if (size == 0) { + // Poison the map even when empty, even if just for consistency + poisoned = true; + return new SortedValuesResult(values, 0); + } + + // left side compaction of all entries to the front of the arrays + compactEntries(); + + // Sort the compacted entries by key in-place + sortEntriesByKey(size); + + // Poison the map - no further operations allowed + poisoned = true; + + // Return the internal array w/ count + return new SortedValuesResult(values, size); + } + + /** + * Get values sorted by key order. + * Does not modify internal state and can be invoked repeatedly. + * + * @param resultArray Array to store results (will be resized if needed) + * @return Sorted array of values + */ + @SuppressWarnings("unchecked") + public T[] getSortedValuesCopy(T[] resultArray) { + if (poisoned) + throw new IllegalStateException("Map is poisoned after destructive sort - cannot perform operations"); + if (size == 0) + return resultArray.length == 0 ? resultArray : Arrays.copyOf(resultArray, 0); + + // Create temporary arrays for non-destructive sort + var tempKeys = new short[size]; + var tempValues = new Object[size]; + + // Copy valid entries to temporary arrays + int writeIndex = 0; + for (int readIndex = 0; readIndex < keys.length; readIndex++) { + if (keys[readIndex] != EMPTY_KEY) { + tempKeys[writeIndex] = keys[readIndex]; + tempValues[writeIndex] = values[readIndex]; + writeIndex++; + } + } + + // Sort the temporary arrays by key + for (int i = 1; i < size; i++) { + short key = tempKeys[i]; + Object value = tempValues[i]; + int j = i - 1; + + while (j >= 0 && tempKeys[j] > key) { + tempKeys[j + 1] = tempKeys[j]; + tempValues[j + 1] = tempValues[j]; + j--; + } + + tempKeys[j + 1] = key; + tempValues[j + 1] = value; + } + + // Copy sorted values to result array + if (resultArray.length != size) + resultArray = Arrays.copyOf(resultArray, size); + + for (int i = 0; i < size; i++) + resultArray[i] = (T) tempValues[i]; + + return resultArray; + } + + /** + * Compact all non-empty entries to the front of keys/values arrays. + */ + private void compactEntries() { + int writeIndex = 0; + + for (int readIndex = 0; readIndex < keys.length; readIndex++) { + if (keys[readIndex] != EMPTY_KEY) { + if (writeIndex != readIndex) { + keys[writeIndex] = keys[readIndex]; + values[writeIndex] = values[readIndex]; + + // Clear the old slot + keys[readIndex] = EMPTY_KEY; + values[readIndex] = null; + } + writeIndex++; + } + } + } + + /** + * Sort the first 'count' entries by key using insertion sort (should be fast for small arrays). + */ + private void sortEntriesByKey(int count) { + for (int i = 1; i < count; i++) { + short key = keys[i]; + Object value = values[i]; + int j = i - 1; + + while (j >= 0 && keys[j] > key) { + keys[j + 1] = keys[j]; + values[j + 1] = values[j]; + j--; + } + + keys[j + 1] = key; + values[j + 1] = value; + } + } + + + private int findSlot(short key) { + int mask = keys.length - 1; + int index = hash(key) & mask; + + // Linear probing + while (keys[index] != EMPTY_KEY && keys[index] != key) { + index = (index + 1) & mask; + } + + return index; + } + + private void resize() { + short[] oldKeys = keys; + Object[] oldValues = values; + + int newCapacity = keys.length * 2; + keys = new short[newCapacity]; + values = new Object[newCapacity]; + threshold = (int) (newCapacity * LOAD_FACTOR); + Arrays.fill(keys, EMPTY_KEY); + + int oldSize = size; + size = 0; + + // Rehash all entries + for (int i = 0; i < oldKeys.length; i++) { + if (oldKeys[i] != EMPTY_KEY) { + @SuppressWarnings("unchecked") + T value = (T) oldValues[i]; + put(oldKeys[i], value); + } + } + + // Verify size didn't change during rehash + assert size == oldSize; + } + + private static int hash(short key) { + // Simple but effective hash for short keys + int intKey = key & 0xFFFF; // Convert to unsigned int + intKey ^= intKey >>> 8; + return intKey; + } + + private static int nextPowerOfTwo(int n) { + if (n <= 1) return 1; + return Integer.highestOneBit(n - 1) << 1; + } +} \ No newline at end of file diff --git a/src/test/java/com/imprint/core/ImprintFieldObjectMapTest.java b/src/test/java/com/imprint/core/ImprintFieldObjectMapTest.java new file mode 100644 index 0000000..cb6637f --- /dev/null +++ b/src/test/java/com/imprint/core/ImprintFieldObjectMapTest.java @@ -0,0 +1,318 @@ +package com.imprint.core; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.BeforeEach; +import static org.junit.jupiter.api.Assertions.*; + +/** + * Unit tests for IntObjectMap - specialized short→object map optimized for field IDs. + */ +class ImprintFieldObjectMapTest { + + private ImprintFieldObjectMap map; + + @BeforeEach + void setUp() { + map = new ImprintFieldObjectMap<>(); + } + + @Test + void shouldPutAndGetBasicOperations() { + map.put(1, "one"); + map.put(5, "five"); + map.put(10, "ten"); + + assertEquals("one", map.get(1)); + assertEquals("five", map.get(5)); + assertEquals("ten", map.get(10)); + assertNull(map.get(99)); + assertEquals(3, map.size()); + } + + @Test + void shouldHandleKeyValidation() { + // Valid keys (0 to Short.MAX_VALUE) + map.put(0, "zero"); + map.put(Short.MAX_VALUE, "max"); + + // Invalid keys + assertThrows(IllegalArgumentException.class, () -> map.put(-1, "negative")); + assertThrows(IllegalArgumentException.class, () -> map.put(Short.MAX_VALUE + 1, "too_large")); + } + + @Test + void shouldHandleContainsKey() { + map.put(1, "one"); + map.put(5, "five"); + + assertTrue(map.containsKey(1)); + assertTrue(map.containsKey(5)); + assertFalse(map.containsKey(99)); + assertFalse(map.containsKey(-1)); + assertFalse(map.containsKey(Short.MAX_VALUE + 1)); + } + + @Test + void shouldOverwriteExistingKeys() { + map.put(1, "original"); + assertEquals("original", map.get(1)); + assertEquals(1, map.size()); + + map.put(1, "updated"); + assertEquals("updated", map.get(1)); + assertEquals(1, map.size()); // Size should not increase + } + + @Test + void shouldGetKeysArray() { + map.put(3, "three"); + map.put(1, "one"); + map.put(7, "seven"); + + int[] keys = map.getKeys(); + assertEquals(3, keys.length); + + // Convert to set for order-independent comparison + var keySet = java.util.Arrays.stream(keys).boxed() + .collect(java.util.stream.Collectors.toSet()); + + assertTrue(keySet.contains(1)); + assertTrue(keySet.contains(3)); + assertTrue(keySet.contains(7)); + } + + @Test + void shouldSortValuesNonDestructively() { + map.put(3, "three"); + map.put(1, "one"); + map.put(7, "seven"); + map.put(2, "two"); + + // Test non-destructive sort + String[] sorted = map.getSortedValuesCopy(new String[0]); + + assertEquals(4, sorted.length); + assertEquals("one", sorted[0]); // key 1 + assertEquals("two", sorted[1]); // key 2 + assertEquals("three", sorted[2]); // key 3 + assertEquals("seven", sorted[3]); // key 7 + + // Verify map is still functional after non-destructive sort + assertEquals("three", map.get(3)); + assertEquals("one", map.get(1)); + assertEquals(4, map.size()); + + // Should be able to call multiple times + String[] sorted2 = map.getSortedValuesCopy(new String[0]); + assertArrayEquals(sorted, sorted2); + } + + @Test + void shouldSortValuesDestructively() { + map.put(3, "three"); + map.put(1, "one"); + map.put(7, "seven"); + map.put(2, "two"); + + // Test destructive sort + ImprintFieldObjectMap.SortedValuesResult result = map.getSortedValues(); + + assertEquals(4, result.count); + assertEquals("one", result.values[0]); // key 1 + assertEquals("two", result.values[1]); // key 2 + assertEquals("three", result.values[2]); // key 3 + assertEquals("seven", result.values[3]); // key 7 + } + + @Test + void shouldPoisonMapAfterDestructiveSort() { + map.put(1, "one"); + map.put(2, "two"); + + // Perform destructive sort + ImprintFieldObjectMap.SortedValuesResult result = map.getSortedValues(); + assertNotNull(result); + + // All operations should throw IllegalStateException after poisoning + assertThrows(IllegalStateException.class, () -> map.put(3, "three")); + assertThrows(IllegalStateException.class, () -> map.get(1)); + assertThrows(IllegalStateException.class, () -> map.containsKey(1)); + assertThrows(IllegalStateException.class, () -> map.getSortedValuesCopy(new String[0])); + + // Size and isEmpty should still work (they don't check poisoned state) + assertEquals(2, map.size()); + assertFalse(map.isEmpty()); + } + + @Test + void shouldHandleEmptyMapSorting() { + // Test non-destructive sort on empty map + String[] sorted = map.getSortedValuesCopy(new String[0]); + assertEquals(0, sorted.length); + + // Test destructive sort on empty map + ImprintFieldObjectMap.SortedValuesResult result = map.getSortedValues(); + assertEquals(0, result.count); + + // Map should be poisoned even after empty destructive sort + assertThrows(IllegalStateException.class, () -> map.put(1, "one")); + } + + @Test + void shouldHandleSingleElementSorting() { + map.put(42, "answer"); + + // Test non-destructive sort + String[] sorted = map.getSortedValuesCopy(new String[0]); + assertEquals(1, sorted.length); + assertEquals("answer", sorted[0]); + + // Test destructive sort on fresh map + ImprintFieldObjectMap map2 = new ImprintFieldObjectMap<>(); + map2.put(42, "answer"); + + ImprintFieldObjectMap.SortedValuesResult result = map2.getSortedValues(); + assertEquals(1, result.count); + assertEquals("answer", result.values[0]); + } + + @Test + void shouldHandleHashCollisions() { + // Add many entries to trigger collisions and resizing + for (int i = 0; i < 1000; i++) { + map.put(i, "value_" + i); + } + + // Verify all entries are accessible + for (int i = 0; i < 1000; i++) { + assertEquals("value_" + i, map.get(i)); + assertTrue(map.containsKey(i)); + } + + assertEquals(1000, map.size()); + + // Test sorting with many entries + String[] sorted = map.getSortedValuesCopy(new String[0]); + assertEquals(1000, sorted.length); + + // Verify sorting is correct + for (int i = 0; i < 1000; i++) { + assertEquals("value_" + i, sorted[i]); + } + } + + @Test + void shouldReuseResultArrayForNonDestructiveSort() { + map.put(1, "one"); + map.put(2, "two"); + + String[] reusableArray = new String[2]; + String[] result = map.getSortedValuesCopy(reusableArray); + + assertSame(reusableArray, result); // Should reuse the same array + assertEquals("one", result[0]); + assertEquals("two", result[1]); + + // Test with wrong size array - should create new array + String[] wrongSizeArray = new String[5]; + String[] result2 = map.getSortedValuesCopy(wrongSizeArray); + + assertNotSame(wrongSizeArray, result2); // Should create new array + assertEquals(2, result2.length); + assertEquals("one", result2[0]); + assertEquals("two", result2[1]); + } + + @Test + void shouldHandleMaxShortValue() { + int maxKey = Short.MAX_VALUE; + map.put(maxKey, "max_value"); + map.put(0, "zero"); + map.put(maxKey - 1, "almost_max"); + + assertEquals("max_value", map.get(maxKey)); + assertEquals("zero", map.get(0)); + assertEquals("almost_max", map.get(maxKey - 1)); + + String[] sorted = map.getSortedValuesCopy(new String[0]); + assertEquals("zero", sorted[0]); + assertEquals("almost_max", sorted[1]); + assertEquals("max_value", sorted[2]); + } + + @Test + void shouldMaintainSizeCorrectlyWithOverwrites() { + assertEquals(0, map.size()); + assertTrue(map.isEmpty()); + + map.put(1, "first"); + assertEquals(1, map.size()); + assertFalse(map.isEmpty()); + + map.put(1, "overwrite"); + assertEquals(1, map.size()); // Size should not change + + map.put(2, "second"); + assertEquals(2, map.size()); + + map.put(1, "overwrite_again"); + assertEquals(2, map.size()); // Size should not change + } + + @Test + void shouldStreamKeysWithoutAllocation() { + map.put(3, "three"); + map.put(1, "one"); + map.put(7, "seven"); + + // Stream keys without allocation + java.util.Set streamedKeys = map.streamKeys() + .boxed() + .collect(java.util.stream.Collectors.toSet()); + + assertEquals(3, streamedKeys.size()); + assertTrue(streamedKeys.contains(1)); + assertTrue(streamedKeys.contains(3)); + assertTrue(streamedKeys.contains(7)); + + // Should be able to stream multiple times + long count = map.streamKeys().count(); + assertEquals(3, count); + + // Test operations on stream + int sum = map.streamKeys().sum(); + assertEquals(11, sum); // 1 + 3 + 7 + + // Test filtering + long evenKeys = map.streamKeys().filter(k -> k % 2 == 0).count(); + assertEquals(0, evenKeys); + + long oddKeys = map.streamKeys().filter(k -> k % 2 == 1).count(); + assertEquals(3, oddKeys); + } + + @Test + void shouldThrowOnStreamKeysAfterPoisoning() { + map.put(1, "one"); + map.put(2, "two"); + + // Stream should work before poisoning + assertEquals(2, map.streamKeys().count()); + + // Poison the map + map.getSortedValues(); + + // Stream should throw after poisoning + assertThrows(IllegalStateException.class, () -> map.streamKeys()); + } + + @Test + void shouldStreamEmptyMapKeys() { + // Empty map should produce empty stream + assertEquals(0, map.streamKeys().count()); + + // Operations on empty stream should work + assertEquals(0, map.streamKeys().sum()); + assertEquals(java.util.OptionalInt.empty(), map.streamKeys().findFirst()); + } +} \ No newline at end of file From 429f852d95e2d2b2638ea5de69b5aad8b22ef22f Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Thu, 12 Jun 2025 17:10:42 -0400 Subject: [PATCH 46/53] adding comments and TODOs --- .../imprint/core/ImprintFieldObjectMap.java | 15 ++++------ .../imprint/core/ImprintRecordBuilder.java | 6 ++-- .../com/imprint/ops/ImprintOperations.java | 28 +++++++++---------- 3 files changed, 22 insertions(+), 27 deletions(-) diff --git a/src/main/java/com/imprint/core/ImprintFieldObjectMap.java b/src/main/java/com/imprint/core/ImprintFieldObjectMap.java index d104317..e0a63f0 100644 --- a/src/main/java/com/imprint/core/ImprintFieldObjectMap.java +++ b/src/main/java/com/imprint/core/ImprintFieldObjectMap.java @@ -119,7 +119,7 @@ public IntStream streamKeys() { } /** - * Result holder for in-place sorted values - avoids allocation by returning + * Result holder for in-place sorted values - avoids Array.copy allocations by returning * array reference and valid count. */ public static final class SortedValuesResult { @@ -216,7 +216,7 @@ public T[] getSortedValuesCopy(T[] resultArray) { } /** - * Compact all non-empty entries to the front of keys/values arrays. + * Left side compact for all non-empty entries to the front of keys/values arrays. */ private void compactEntries() { int writeIndex = 0; @@ -237,7 +237,7 @@ private void compactEntries() { } /** - * Sort the first 'count' entries by key using insertion sort (should be fast for small arrays). + * Sort the first 'count' entries by key using insertion sort (should be fast enough for small arrays). */ private void sortEntriesByKey(int count) { for (int i = 1; i < count; i++) { @@ -281,8 +281,7 @@ private void resize() { int oldSize = size; size = 0; - - // Rehash all entries + for (int i = 0; i < oldKeys.length; i++) { if (oldKeys[i] != EMPTY_KEY) { @SuppressWarnings("unchecked") @@ -290,14 +289,12 @@ private void resize() { put(oldKeys[i], value); } } - - // Verify size didn't change during rehash + //TODO remove this assertion (carried from from EclipseCollection) assert size == oldSize; } private static int hash(short key) { - // Simple but effective hash for short keys - int intKey = key & 0xFFFF; // Convert to unsigned int + int intKey = key & 0xFFFF; intKey ^= intKey >>> 8; return intKey; } diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 8e1dfa0..5b7f009 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -35,7 +35,6 @@ @SuppressWarnings("unused") public final class ImprintRecordBuilder { private final SchemaId schemaId; - // Custom int→object map optimized for primitive keys private final ImprintFieldObjectMap fields = new ImprintFieldObjectMap<>(); private int estimatedPayloadSize = 0; @@ -203,13 +202,13 @@ private ImprintRecordBuilder addField(int id, Value value) { Objects.requireNonNull(value, "Value cannot be null - use nullField() for explicit null values"); var newEntry = new FieldData((short) id, value); - // Check if replacing an existing field - O(1) lookup without boxing! + // Check if replacing an existing field var oldEntry = fields.get(id); if (oldEntry != null) { estimatedPayloadSize -= estimateValueSize(oldEntry.value); } - // Add or replace field - O(1) operation without boxing! + // Add or replace field fields.put(id, newEntry); estimatedPayloadSize += estimateValueSize(newEntry.value); return this; @@ -327,7 +326,6 @@ private int estimateValueSize(Value value) { } private void serializeValue(Value value, ByteBuffer buffer) throws ImprintException { - // Use TypeHandler for simple types switch (value.getTypeCode()) { case NULL: case BOOL: diff --git a/src/main/java/com/imprint/ops/ImprintOperations.java b/src/main/java/com/imprint/ops/ImprintOperations.java index f15e6a1..52ec5a0 100644 --- a/src/main/java/com/imprint/ops/ImprintOperations.java +++ b/src/main/java/com/imprint/ops/ImprintOperations.java @@ -27,8 +27,9 @@ public class ImprintOperations { public static ByteBuffer mergeBytes(ByteBuffer firstBuffer, ByteBuffer secondBuffer) throws ImprintException { validateImprintBuffer(firstBuffer, "firstBuffer"); validateImprintBuffer(secondBuffer, "secondBuffer"); - - // Work on duplicates to avoid affecting original positions + + // TODO possible could work directly on the originals but duplicate makes the mark values and offsets easy to reason about + // duplicates to avoid affecting original positions, we'll need to preserve at least one side var first = firstBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); var second = secondBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); @@ -60,6 +61,7 @@ private static ImprintRecord.BufferSections extractSections(ByteBuffer buffer, H /** * Merge raw directory and payload sections without object creation + * Assumes incoming streams are already both sorted from the serialization process */ private static ByteBuffer mergeRawSections(Header firstHeader, ImprintRecord.BufferSections firstSections, ImprintRecord.BufferSections secondSections) throws ImprintException { // Prepare directory iterators @@ -142,12 +144,11 @@ public static ByteBuffer projectBytes(ByteBuffer sourceBuffer, int... fieldIds) if (fieldIds == null || fieldIds.length == 0) { return createEmptyRecordBytes(); } - - // Sort field IDs for efficient merge algorithm (duplicates handled naturally) + var sortedFieldIds = fieldIds.clone(); Arrays.sort(sortedFieldIds); - // Work on duplicate to avoid affecting original position + // Duplicate avoids affecting original position which we'll need later var source = sourceBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); // Parse header @@ -162,7 +163,6 @@ public static ByteBuffer projectBytes(ByteBuffer sourceBuffer, int... fieldIds) /** * Project raw sections without object creation using optimized merge algorithm. - * Uses direct array operations and optimized memory access for maximum performance. */ private static ByteBuffer projectRawSections(Header originalHeader, ImprintRecord.BufferSections sections, int[] sortedRequestedFields) throws ImprintException { @@ -194,14 +194,14 @@ private static ByteBuffer projectRawSections(Header originalHeader, ImprintRecor // Add to projection with adjusted offset projectedEntries.add(new RawDirectoryEntry(currentEntry.fieldId, currentEntry.typeCode, currentOffset)); - // Collect payload chunk here (fieldPayload is already sliced) + // Collect payload chunk here - fieldPayload should already sliced payloadChunks.add(fieldPayload); int payloadSize = fieldPayload.remaining(); currentOffset += payloadSize; totalProjectedPayloadSize += payloadSize; - // Advance both pointers (handle dupes by advancing to next unique field) + // Advance both pointers - handle dupes by advancing to next unique field hopefully do { requestedIndex++; } while (requestedIndex < sortedRequestedFields.length && sortedRequestedFields[requestedIndex] == targetFieldId); @@ -232,7 +232,7 @@ private static ByteBuffer buildSerializedBuffer(Header originalHeader, List Date: Thu, 12 Jun 2025 22:27:38 -0400 Subject: [PATCH 47/53] various micro-optimizations --- .../benchmark/ComparisonBenchmark.java | 2 +- .../imprint/core/ImprintRecordBuilder.java | 173 ++++++++++++------ .../com/imprint/profile/ProfilerTest.java | 2 +- 3 files changed, 119 insertions(+), 58 deletions(-) diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index f47da20..6ecf661 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -30,7 +30,7 @@ public class ComparisonBenchmark { new KryoSerializingBenchmark(), new MessagePackSerializingBenchmark()); - @Param({"Imprint", "Jackson-JSON", "Protobuf", "FlatBuffers", "Avro-Generic", "Thrift", "Kryo", "MessagePack", "CapnProto"}) + @Param({"Imprint", "Jackson-JSON", "Protobuf", "FlatBuffers", "Avro-Generic", "Thrift", "Kryo", "MessagePack", ""}) public String framework; private SerializingBenchmark serializingBenchmark; diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 5b7f009..3ccdfcd 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -4,9 +4,10 @@ import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; import com.imprint.types.MapKey; +import com.imprint.types.TypeCode; import com.imprint.types.Value; -import lombok.SneakyThrows; +import java.nio.BufferOverflowException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.*; @@ -55,31 +56,31 @@ static final class FieldData { // Primitive types with automatic Value wrapping public ImprintRecordBuilder field(int id, boolean value) { - return addField(id, Value.fromBoolean(value)); + return addField(id, Value.fromBoolean(value), TypeCode.BOOL); } public ImprintRecordBuilder field(int id, int value) { - return addField(id, Value.fromInt32(value)); + return addField(id, Value.fromInt32(value), TypeCode.INT32); } public ImprintRecordBuilder field(int id, long value) { - return addField(id, Value.fromInt64(value)); + return addField(id, Value.fromInt64(value), TypeCode.INT64); } public ImprintRecordBuilder field(int id, float value) { - return addField(id, Value.fromFloat32(value)); + return addField(id, Value.fromFloat32(value), TypeCode.FLOAT32); } public ImprintRecordBuilder field(int id, double value) { - return addField(id, Value.fromFloat64(value)); + return addField(id, Value.fromFloat64(value), TypeCode.FLOAT64); } public ImprintRecordBuilder field(int id, String value) { - return addField(id, Value.fromString(value)); + return addField(id, Value.fromString(value), TypeCode.STRING); } public ImprintRecordBuilder field(int id, byte[] value) { - return addField(id, Value.fromBytes(value)); + return addField(id, Value.fromBytes(value), TypeCode.BYTES); } // Collections with automatic conversion @@ -88,7 +89,7 @@ public ImprintRecordBuilder field(int id, List values) { for (var item : values) { convertedValues.add(convertToValue(item)); } - return addField(id, Value.fromArray(convertedValues)); + return addField(id, Value.fromArray(convertedValues), TypeCode.ARRAY); } public ImprintRecordBuilder field(int id, Map map) { @@ -98,28 +99,29 @@ public ImprintRecordBuilder field(int id, Map map) { var value = convertToValue(entry.getValue()); convertedMap.put(key, value); } - return addField(id, Value.fromMap(convertedMap)); + return addField(id, Value.fromMap(convertedMap), TypeCode.MAP); } // Nested records public ImprintRecordBuilder field(int id, ImprintRecord nestedRecord) { - return addField(id, Value.fromRow(nestedRecord)); + return addField(id, Value.fromRow(nestedRecord), TypeCode.ROW); } // Explicit null field public ImprintRecordBuilder nullField(int id) { - return addField(id, Value.nullValue()); + return addField(id, Value.nullValue(), TypeCode.NULL); } // Direct Value API (escape hatch for advanced usage) public ImprintRecordBuilder field(int id, Value value) { - return addField(id, value); + return addField(id, value, value.getTypeCode()); // Only virtual call when type is unknown } // Conditional field addition public ImprintRecordBuilder fieldIf(boolean condition, int id, Object value) { if (condition) { - return field(id, convertToValue(value)); + var convertedValue = convertToValue(value); + return addField(id, convertedValue, convertedValue.getTypeCode()); } return this; } @@ -131,7 +133,8 @@ public ImprintRecordBuilder fieldIfNotNull(int id, Object value) { // Bulk operations public ImprintRecordBuilder fields(Map fieldsMap) { for (var entry : fieldsMap.entrySet()) { - field(entry.getKey(), convertToValue(entry.getValue())); + var convertedValue = convertToValue(entry.getValue()); + addField(entry.getKey(), convertedValue, convertedValue.getTypeCode()); } return this; } @@ -168,26 +171,32 @@ public ImprintRecord build() throws ImprintException { * @throws ImprintException if serialization fails. */ public ByteBuffer buildToBuffer() throws ImprintException { - // 1. Sort fields by ID for directory ordering (zero allocation) + // 1. Calculate conservative size BEFORE sorting (which invalidates the map) + int conservativeSize = calculateConservativePayloadSize(); + + // 2. Sort fields by ID for directory ordering (zero allocation) var sortedFieldsResult = getSortedFieldsResult(); var sortedFields = sortedFieldsResult.values; var fieldCount = sortedFieldsResult.count; - // 2. Serialize payload and calculate offsets - var payloadBuffer = ByteBuffer.allocate(estimatePayloadSize()); - payloadBuffer.order(ByteOrder.LITTLE_ENDIAN); - - int[] offsets = new int[fieldCount]; - for (int i = 0; i < fieldCount; i++) { - var fieldData = (FieldData) sortedFields[i]; - offsets[i] = payloadBuffer.position(); - serializeValue(fieldData.value, payloadBuffer); + // 3. Serialize payload and calculate offsets with overflow handling + PayloadSerializationResult result = null; + int bufferSizeMultiplier = 1; + + while (result == null && bufferSizeMultiplier <= 64) { + try { + result = serializePayload(sortedFields, fieldCount, conservativeSize, bufferSizeMultiplier); + } catch (BufferOverflowException e) { + bufferSizeMultiplier *= 2; // Try 2x, 4x, 8x, 16x, 32x, 64x + } } - payloadBuffer.flip(); - var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); - - // 3. Create directory buffer and serialize to final buffer - return serializeToBuffer(schemaId, sortedFields, offsets, fieldCount, payloadView); + + if (result == null) { + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, + "Failed to serialize payload even with 64x buffer size"); + } + + return serializeToBuffer(schemaId, sortedFields, result.offsets, fieldCount, result.payload); } /** @@ -196,21 +205,22 @@ public ByteBuffer buildToBuffer() throws ImprintException { * * @param id the field ID * @param value the field value (cannot be null - use nullField() for explicit nulls) + * @param typeCode the known type code (avoids virtual call) * @return this builder for method chaining */ - private ImprintRecordBuilder addField(int id, Value value) { + private ImprintRecordBuilder addField(int id, Value value, TypeCode typeCode) { Objects.requireNonNull(value, "Value cannot be null - use nullField() for explicit null values"); var newEntry = new FieldData((short) id, value); // Check if replacing an existing field var oldEntry = fields.get(id); if (oldEntry != null) { - estimatedPayloadSize -= estimateValueSize(oldEntry.value); + estimatedPayloadSize -= fastEstimateFieldSize(oldEntry.value, oldEntry.value.getTypeCode()); } // Add or replace field fields.put(id, newEntry); - estimatedPayloadSize += estimateValueSize(newEntry.value); + estimatedPayloadSize += fastEstimateFieldSize(newEntry.value, typeCode); return this; } @@ -289,42 +299,93 @@ private MapKey convertToMapKey(Object obj) { throw new IllegalArgumentException("Unsupported map key type: " + obj.getClass().getName()); } - private int estimatePayloadSize() { - // Add 25% buffer to reduce reallocations and handle VarInt encoding fluctuations. - return Math.max(estimatedPayloadSize + (estimatedPayloadSize / 4), fields.size() * 16); - } - /** - * Estimates the serialized size in bytes for a given value. - * - * @param value the value to estimate size for - * @return estimated size in bytes including type-specific overhead + * Fast heuristic-based field size estimation (no expensive operations). + * TypeCode passed directly to avoid virtual call. */ - @SneakyThrows - private int estimateValueSize(Value value) { - // Use TypeHandler for simple types - switch (value.getTypeCode()) { - case NULL: - case BOOL: + private int fastEstimateFieldSize(Value value, TypeCode typeCode) { + switch (typeCode) { + case NULL: return 0; + case BOOL: return 1; case INT32: - case INT64: case FLOAT32: + return 4; + case INT64: case FLOAT64: - case BYTES: + return 8; case STRING: + if (value instanceof Value.StringValue) { + return 5 + (((Value.StringValue) value).getValue().length() * 3 / 2); + } else { + return 5 + ((Value.StringBufferValue) value).getBuffer().remaining(); + } + case BYTES: + if (value instanceof Value.BytesValue) { + return 5 + ((Value.BytesValue) value).getValue().length; + } else { + return 5 + ((Value.BytesBufferValue) value).getBuffer().remaining(); + } case ARRAY: + var elements = ((Value.ArrayValue) value).getValue(); + return 5 + 1 + (elements.size() * 16); case MAP: - return value.getTypeCode().getHandler().estimateSize(value); - + var map = ((Value.MapValue) value).getValue(); + return 5 + 2 + (map.size() * 32); //32 bytes per entry case ROW: - Value.RowValue rowValue = (Value.RowValue) value; - return rowValue.getValue().estimateSerializedSize(); - + // Use actual size if available + return ((Value.RowValue) value).getValue().getSerializedSize(); + default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); + return 64; // Fallback } } + /** + * Get current estimated payload size with 25% buffer. + */ + private int calculateConservativePayloadSize() { + // Add 25% buffer for safety margin + return Math.max(estimatedPayloadSize + (estimatedPayloadSize / 4), 4096); + } + + + /** + * Result of payload serialization containing offsets and final payload buffer. + */ + private static class PayloadSerializationResult { + final int[] offsets; + final ByteBuffer payload; + + PayloadSerializationResult(int[] offsets, ByteBuffer payload) { + this.offsets = offsets; + this.payload = payload; + } + } + + /** + * Serialize payload with conservative buffer size multiplier. + */ + private PayloadSerializationResult serializePayload(Object[] sortedFields, int fieldCount, int conservativeSize, int sizeMultiplier) throws ImprintException { + var payloadBuffer = ByteBuffer.allocate(conservativeSize * sizeMultiplier); + payloadBuffer.order(ByteOrder.LITTLE_ENDIAN); + return doSerializePayload(sortedFields, fieldCount, payloadBuffer); + } + + /** + * Core payload serialization logic. + */ + private PayloadSerializationResult doSerializePayload(Object[] sortedFields, int fieldCount, ByteBuffer payloadBuffer) throws ImprintException { + int[] offsets = new int[fieldCount]; + for (int i = 0; i < fieldCount; i++) { + var fieldData = (FieldData) sortedFields[i]; + offsets[i] = payloadBuffer.position(); + serializeValue(fieldData.value, payloadBuffer); + } + payloadBuffer.flip(); + var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); + return new PayloadSerializationResult(offsets, payloadView); + } + private void serializeValue(Value value, ByteBuffer buffer) throws ImprintException { switch (value.getTypeCode()) { case NULL: diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index 79882d9..8380681 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -14,7 +14,7 @@ import java.util.stream.IntStream; -@Disabled +//@Disabled public class ProfilerTest { private static final int RECORD_SIZE = 50; From 8fef426daf6bb2a3c3070ab16f9d3abadf4e3c5d Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Fri, 13 Jun 2025 12:35:26 -0400 Subject: [PATCH 48/53] make serilaizers static/final; begin to refactor to avoid virtual dispatch overhead --- .../benchmark/ComparisonBenchmark.java | 8 +- .../ImprintSerializingBenchmark.java | 23 +- .../imprint/core/ImprintFieldObjectMap.java | 68 +++- .../java/com/imprint/core/ImprintRecord.java | 253 +++++++------- .../imprint/core/ImprintRecordBuilder.java | 322 +++++++++++++++--- .../com/imprint/profile/ProfilerTest.java | 4 +- 6 files changed, 476 insertions(+), 202 deletions(-) diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index 6ecf661..d8fbcde 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -16,7 +16,7 @@ @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) @Warmup(iterations = 3, time = 1) -@Measurement(iterations = 7, time = 1) +@Measurement(iterations = 25, time = 1) @Fork(value = 1, jvmArgs = {"-Xms4g", "-Xmx4g"}) public class ComparisonBenchmark { @@ -30,7 +30,7 @@ public class ComparisonBenchmark { new KryoSerializingBenchmark(), new MessagePackSerializingBenchmark()); - @Param({"Imprint", "Jackson-JSON", "Protobuf", "FlatBuffers", "Avro-Generic", "Thrift", "Kryo", "MessagePack", ""}) + @Param({"Imprint"}) public String framework; private SerializingBenchmark serializingBenchmark; @@ -60,12 +60,12 @@ public void deserialize(Blackhole bh) { serializingBenchmark.deserialize(bh); } - @Benchmark + //@Benchmark public void projectAndSerialize(Blackhole bh) { serializingBenchmark.projectAndSerialize(bh); } - @Benchmark + //@Benchmark public void mergeAndSerialize(Blackhole bh) { serializingBenchmark.mergeAndSerialize(bh); } diff --git a/src/jmh/java/com/imprint/benchmark/serializers/ImprintSerializingBenchmark.java b/src/jmh/java/com/imprint/benchmark/serializers/ImprintSerializingBenchmark.java index e71a5c0..490b9d2 100644 --- a/src/jmh/java/com/imprint/benchmark/serializers/ImprintSerializingBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/serializers/ImprintSerializingBenchmark.java @@ -12,6 +12,7 @@ public class ImprintSerializingBenchmark extends AbstractSerializingBenchmark { private ImprintRecord imprintRecord1; + private ImprintRecordBuilder preBuiltRecord; // Pre-built record for testing private byte[] serializedRecord1; private byte[] serializedRecord2; private static final SchemaId SCHEMA_ID = new SchemaId(1, 1); @@ -24,8 +25,9 @@ public ImprintSerializingBenchmark() { public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord testRecord2) { super.setup(testRecord, testRecord2); try { - this.imprintRecord1 = buildRecord(testRecord); - ImprintRecord imprintRecord2 = buildRecord(testRecord2); + this.imprintRecord1 = buildRecord(testRecord).build(); + this.preBuiltRecord = buildRecord(testRecord); // Pre-built for testing + ImprintRecord imprintRecord2 = buildRecord(testRecord2).build(); ByteBuffer buf1 = this.imprintRecord1.serializeToBuffer(); this.serializedRecord1 = new byte[buf1.remaining()]; @@ -39,7 +41,7 @@ public void setup(DataGenerator.TestRecord testRecord, DataGenerator.TestRecord } } - private ImprintRecord buildRecord(DataGenerator.TestRecord pojo) throws ImprintException { + private ImprintRecordBuilder buildRecord(DataGenerator.TestRecord pojo) throws ImprintException { var builder = ImprintRecord.builder(SCHEMA_ID); builder.field(0, pojo.id); builder.field(1, pojo.timestamp); @@ -49,16 +51,23 @@ private ImprintRecord buildRecord(DataGenerator.TestRecord pojo) throws ImprintE builder.field(5, pojo.data); builder.field(6, pojo.tags); builder.field(7, pojo.metadata); - return builder.build(); + return builder; } @Override public void serialize(Blackhole bh) { + // Test 3: Just field addition (POJO → Builder) try { - bh.consume(buildRecord(DataGenerator.createTestRecord()).serializeToBuffer()); - } catch (ImprintException e) { - throw new RuntimeException(e); + var builder = buildRecord(this.testData); + bh.consume(builder); // Consume builder to prevent dead code elimination + } catch (ImprintException ignored) { } + + // Test 2: Just serialization (Builder → Bytes) + // try{ + // bh.consume(preBuiltRecord.buildToBuffer()); + // } catch (ImprintException ignored) { + // } } @Override diff --git a/src/main/java/com/imprint/core/ImprintFieldObjectMap.java b/src/main/java/com/imprint/core/ImprintFieldObjectMap.java index e0a63f0..7587633 100644 --- a/src/main/java/com/imprint/core/ImprintFieldObjectMap.java +++ b/src/main/java/com/imprint/core/ImprintFieldObjectMap.java @@ -48,6 +48,18 @@ public void put(int key, T value) { putValue((short) key, value); } + /** + * Put a value and return the previous value if any. + * @return the previous value, or null if no previous value existed + */ + public T putAndReturnOld(int key, T value) { + if (poisoned) + throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); + if (key > Short.MAX_VALUE) + throw new IllegalArgumentException("Field ID must be 0-" + Short.MAX_VALUE + ", got: " + key); + return putValueAndReturnOld((short) key, value); + } + private void putValue(short key, T value) { if (poisoned) throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); @@ -63,6 +75,28 @@ private void putValue(short key, T value) { keys[index] = key; values[index] = value; } + + @SuppressWarnings("unchecked") + private T putValueAndReturnOld(short key, T value) { + if (poisoned) + throw new IllegalStateException("Map is invalid after compaction - cannot perform operations"); + if (key < 0) + throw new IllegalArgumentException("Field ID must be 0-" + Short.MAX_VALUE + ", got: " + key); + + if (size >= threshold) + resize(); + int index = findSlot(key); + T oldValue = null; + if (keys[index] == EMPTY_KEY) { + size++; + } else { + // Existing key - capture old value + oldValue = (T) values[index]; + } + keys[index] = key; + values[index] = value; + return oldValue; + } @SuppressWarnings("unchecked") public T get(int key) { @@ -303,4 +337,36 @@ private static int nextPowerOfTwo(int n) { if (n <= 1) return 1; return Integer.highestOneBit(n - 1) << 1; } -} \ No newline at end of file + + /** + * Result holder for in-place sorted fields - returns both keys and values. + */ + public static final class SortedFieldsResult { + public final short[] keys; + public final Object[] values; + public final int count; + + SortedFieldsResult(short[] keys, Object[] values, int count) { + this.keys = keys; + this.values = values; + this.count = count; + } + } + + /** + * Get both keys and values sorted by key order with zero allocation. + * WARNING: Modifies internal state, and renders map operations unstable and in an illegal state. + */ + public SortedFieldsResult getSortedFields() { + if (size == 0) { + poisoned = true; + return new SortedFieldsResult(keys, values, 0); + } + + compactEntries(); + sortEntriesByKey(size); + poisoned = true; + + return new SortedFieldsResult(keys, values, size); + } +} diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index e6f9de6..bec6614 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -38,11 +38,11 @@ public class ImprintRecord { @Getter(AccessLevel.PUBLIC) Header header; - + @Getter(AccessLevel.PACKAGE) // Raw directory bytes (read-only) ByteBuffer directoryBuffer; - + @Getter(AccessLevel.PACKAGE) // Raw payload bytes ByteBuffer payload; @@ -51,7 +51,7 @@ public class ImprintRecord { @Getter(AccessLevel.NONE) //Directory View cache to allow for easier mutable operations needed for lazy initialization Directory.DirectoryView directoryView; - + /** * Package-private constructor for @Value that creates immutable ByteBuffer views. */ @@ -62,9 +62,9 @@ public class ImprintRecord { this.payload = payload.asReadOnlyBuffer(); this.directoryView = null; } - + // ========== STATIC FACTORY METHODS ========== - + /** * Create a builder for constructing new ImprintRecord instances. */ @@ -86,27 +86,27 @@ public static ImprintRecord deserialize(byte[] bytes) throws ImprintException { public static ImprintRecord deserialize(ByteBuffer buffer) throws ImprintException { return fromBytes(buffer); } - + /** * Create a ImprintRecord from complete serialized bytes. */ public static ImprintRecord fromBytes(ByteBuffer serializedBytes) throws ImprintException { Objects.requireNonNull(serializedBytes, "Serialized bytes cannot be null"); - + var buffer = serializedBytes.duplicate().order(ByteOrder.LITTLE_ENDIAN); - + // Parse header var header = parseHeader(buffer); - + // Extract directory and payload sections var parsedBuffers = parseBuffersFromSerialized(serializedBytes); - + return new ImprintRecord(serializedBytes, header, parsedBuffers.directoryBuffer, parsedBuffers.payload); } - - + + // ========== ZERO-COPY OPERATIONS ========== - + /** * Merge with another ImprintRecord using pure byte operations. * Results in a new ImprintRecord without any object creation. @@ -115,7 +115,7 @@ public ImprintRecord merge(ImprintRecord other) throws ImprintException { var mergedBytes = ImprintOperations.mergeBytes(this.serializedBytes, other.serializedBytes); return fromBytes(mergedBytes); } - + /** * Project fields using pure byte operations. * Results in a new ImprintRecord without any object creation. @@ -124,7 +124,7 @@ public ImprintRecord project(int... fieldIds) throws ImprintException { var projectedBytes = ImprintOperations.projectBytes(this.serializedBytes, fieldIds); return fromBytes(projectedBytes); } - + /** * Chain multiple operations efficiently. * Each operation works on bytes without creating intermediate objects. @@ -132,7 +132,7 @@ public ImprintRecord project(int... fieldIds) throws ImprintException { public ImprintRecord projectAndMerge(ImprintRecord other, int... projectFields) throws ImprintException { return this.project(projectFields).merge(other); } - + /** * Get the raw serialized bytes. * This is the most efficient way to pass the record around. @@ -140,7 +140,7 @@ public ImprintRecord projectAndMerge(ImprintRecord other, int... projectFields) public ByteBuffer getSerializedBytes() { return serializedBytes.duplicate(); } - + /** * Get a DirectoryView for straight through directory access. */ @@ -150,14 +150,14 @@ public Directory.DirectoryView getDirectoryView() { } return directoryView; } - + /** * Get the directory list. */ public List getDirectory() { return getDirectoryView().toList(); } - + /** * Get raw bytes for a field without deserializing. */ @@ -168,21 +168,14 @@ public ByteBuffer getRawBytes(int fieldId) { return null; } } - + /** * Get raw bytes for a field by short ID. */ public ByteBuffer getRawBytes(short fieldId) { return getRawBytes((int) fieldId); } - - /** - * Estimate the serialized size of this record. - */ - public int estimateSerializedSize() { - return serializedBytes.remaining(); - } - + /** * Get a field value by ID. * Uses zero-copy binary search to locate the field. @@ -190,29 +183,29 @@ public int estimateSerializedSize() { public Value getValue(int fieldId) throws ImprintException { var entry = getDirectoryView().findEntry(fieldId); if (entry == null) return null; - + var fieldBuffer = getFieldBuffer(fieldId); if (fieldBuffer == null) return null; - + return deserializeValue(entry.getTypeCode(), fieldBuffer); } - + /** * Check if a field exists without deserializing it. */ public boolean hasField(int fieldId) { return getDirectoryView().findEntry(fieldId) != null; } - + /** * Get the number of fields without parsing the directory. */ public int getFieldCount() { return getDirectoryCount(); } - + // ========== TYPED GETTERS ========== - + public String getString(int fieldId) throws ImprintException { var value = getValidatedValue(fieldId, "STRING"); if (value instanceof Value.StringValue) @@ -221,27 +214,27 @@ public String getString(int fieldId) throws ImprintException { return ((Value.StringBufferValue) value).getValue(); throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is not a STRING"); } - + public int getInt32(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.INT32, Value.Int32Value.class, "int32").getValue(); } - + public long getInt64(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.INT64, Value.Int64Value.class, "int64").getValue(); } - + public boolean getBoolean(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.BOOL, Value.BoolValue.class, "boolean").getValue(); } - + public float getFloat32(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.FLOAT32, Value.Float32Value.class, "float32").getValue(); } - + public double getFloat64(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.FLOAT64, Value.Float64Value.class, "float64").getValue(); } - + public byte[] getBytes(int fieldId) throws ImprintException { var value = getValidatedValue(fieldId, "BYTES"); if (value instanceof Value.BytesValue) @@ -250,15 +243,15 @@ public byte[] getBytes(int fieldId) throws ImprintException { return ((Value.BytesBufferValue) value).getValue(); throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is not BYTES"); } - + public java.util.List getArray(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.ARRAY, Value.ArrayValue.class, "ARRAY").getValue(); } - + public java.util.Map getMap(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.MAP, Value.MapValue.class, "MAP").getValue(); } - + public ImprintRecord getRow(int fieldId) throws ImprintException { return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); } @@ -276,7 +269,7 @@ public ByteBuffer serializeToBuffer() { public SchemaId getSchemaId() { return header.getSchemaId(); } - + /** * Estimate the memory footprint of this record. */ @@ -284,7 +277,7 @@ public int getSerializedSize() { return serializedBytes.remaining(); } - + /** * Get and validate a value exists and is not null. */ @@ -304,30 +297,30 @@ private T getTypedValueOrThrow(int fieldId, com.imprint.types. return expectedValueClass.cast(value); throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected " + expectedTypeName); } - + /** * Parse buffers from serialized record bytes. */ private static ParsedBuffers parseBuffersFromSerialized(ByteBuffer serializedRecord) throws ImprintException { var buffer = serializedRecord.duplicate().order(ByteOrder.LITTLE_ENDIAN); - - // Parse header and extract sections using shared utility + + // Parse header and extract sections using shared utility var header = parseHeaderFromBuffer(buffer); var sections = extractBufferSections(buffer, header); - + return new ParsedBuffers(sections.directoryBuffer, sections.payloadBuffer); } - + private static class ParsedBuffers { final ByteBuffer directoryBuffer; final ByteBuffer payload; - + ParsedBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { this.directoryBuffer = directoryBuffer; this.payload = payload; } } - + private int getDirectoryCount() { try { return VarInt.decode(directoryBuffer.duplicate()).getValue(); @@ -335,7 +328,7 @@ private int getDirectoryCount() { return 0; // Cache as 0 on error } } - + /** * Gets ByteBuffer view of a field's data. */ @@ -343,43 +336,43 @@ private ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { var entry = findDirectoryEntry(fieldId); if (entry == null) return null; - + int startOffset = entry.getOffset(); int endOffset = findEndOffset(entry.getId()); - + if (startOffset < 0 || endOffset < 0 || startOffset > payload.limit() || endOffset > payload.limit() || startOffset > endOffset) { throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Invalid field buffer range: start=" + startOffset + ", end=" + endOffset); } - + var fieldBuffer = payload.duplicate(); fieldBuffer.position(startOffset).limit(endOffset); return fieldBuffer; } - + private Directory findDirectoryEntry(int fieldId) throws ImprintException { var searchBuffer = directoryBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); - + int count = getDirectoryCount(); if (count == 0) return null; - + // Advance past varint to entries VarInt.decode(searchBuffer); int directoryStartPos = searchBuffer.position(); - + int low = 0; int high = count - 1; - + while (low <= high) { int mid = (low + high) >>> 1; int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); - + if (entryPos + Constants.DIR_ENTRY_BYTES > searchBuffer.limit()) throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Directory entry exceeds buffer"); - + searchBuffer.position(entryPos); short midFieldId = searchBuffer.getShort(); - + if (midFieldId < fieldId) { low = mid + 1; } else if (midFieldId > fieldId) { @@ -390,36 +383,36 @@ private Directory findDirectoryEntry(int fieldId) throws ImprintException { return deserializeDirectoryEntry(searchBuffer); } } - + return null; } - + private int findEndOffset(int currentFieldId) throws ImprintException { var scanBuffer = directoryBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); - + int count = getDirectoryCount(); if (count == 0) return payload.limit(); - + // Advance past varint VarInt.decode(scanBuffer); int directoryStartPos = scanBuffer.position(); - + int low = 0; int high = count - 1; int nextOffset = payload.limit(); - + // Binary search for first field with fieldId > currentFieldId while (low <= high) { int mid = (low + high) >>> 1; int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); - + if (entryPos + Constants.DIR_ENTRY_BYTES > scanBuffer.limit()) break; - + scanBuffer.position(entryPos); short fieldId = scanBuffer.getShort(); scanBuffer.get(); // skip type int offset = scanBuffer.getInt(); - + if (fieldId > currentFieldId) { nextOffset = offset; high = mid - 1; @@ -427,26 +420,26 @@ private int findEndOffset(int currentFieldId) throws ImprintException { low = mid + 1; } } - + return nextOffset; } - + private Directory deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for directory entry"); - + short id = buffer.getShort(); var typeCode = TypeCode.fromByte(buffer.get()); int offset = buffer.getInt(); - + return new Directory.Entry(id, typeCode, offset); } - + /** * DirectoryView */ private class ImprintDirectoryView implements Directory.DirectoryView { - + @Override public Directory findEntry(int fieldId) { try { @@ -469,18 +462,18 @@ public List toList() { } return list; } - + @Override public int size() { return getDirectoryCount(); } - + @Override public Iterator iterator() { return new ImprintDirectoryIterator(); } } - + /** * Iterator that parses directory entries lazily from raw bytes. */ @@ -488,11 +481,11 @@ private class ImprintDirectoryIterator implements Iterator { private final ByteBuffer iterBuffer; private final int totalCount; private int currentIndex; - + ImprintDirectoryIterator() { this.iterBuffer = directoryBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); this.totalCount = getDirectoryCount(); - + try { // Skip past varint to first entry VarInt.decode(iterBuffer); @@ -501,18 +494,18 @@ private class ImprintDirectoryIterator implements Iterator { } this.currentIndex = 0; } - + @Override public boolean hasNext() { return currentIndex < totalCount; } - + @Override public Directory next() { if (!hasNext()) { throw new NoSuchElementException(); } - + try { var entry = deserializeDirectoryEntry(iterBuffer); currentIndex++; @@ -522,45 +515,37 @@ public Directory next() { } } } - - /** - * Used by {@link ImprintRecordBuilder} with sorted field data. - * Creates directory buffer from field data and calculated offsets. - * - * @param sortedFields Array of FieldData objects sorted by ID - * @param offsets Array of payload offsets corresponding to each field - * @param fieldCount Number of valid fields to process - */ - static ByteBuffer createDirectoryBufferFromSorted(Object[] sortedFields, int[] offsets, int fieldCount) { - if (fieldCount == 0) - return createEmptyDirectoryBuffer(); - - int size = calculateDirectorySize(fieldCount); - var buffer = ByteBuffer.allocate(size); - buffer.order(ByteOrder.LITTLE_ENDIAN); - VarInt.encode(fieldCount, buffer); - - //this ends up being kind of a hotspot for some reason, probably boundary checking. - //Direct writes might help a bit it could get difficult since pretty much all the other - //frameworks just go straight for Unsafe + + static void writeDirectoryToBuffer(short[] sortedKeys, Object[] sortedValues, int[] offsets, int fieldCount, ByteBuffer buffer) { + // Optimize VarInt encoding for common case (< 128 fields = single byte) + if (fieldCount < 128) { + buffer.put((byte) fieldCount); + } else { + VarInt.encode(fieldCount, buffer); + } + + // Early return for empty directory + if (fieldCount == 0) { + return; + } + + // Tight loop optimization: minimize method calls and casts for (int i = 0; i < fieldCount; i++) { - var fieldData = (ImprintRecordBuilder.FieldData) sortedFields[i]; - buffer.putShort(fieldData.id); - buffer.put(fieldData.value.getTypeCode().getCode()); - buffer.putInt(offsets[i]); + var entry = (ImprintRecordBuilder.ValueWithType) sortedValues[i]; + + // Get current position once, then batch write + int pos = buffer.position(); + + // Write all 7 bytes for this entry in sequence + buffer.putShort(pos, sortedKeys[i]); // bytes 0-1: field ID + buffer.put(pos + 2, entry.typeCode); // byte 2: type code + buffer.putInt(pos + 3, offsets[i]); // bytes 3-6: offset + + // Advance buffer position by 7 bytes + buffer.position(pos + 7); } - - buffer.flip(); - return buffer; } - private static ByteBuffer createEmptyDirectoryBuffer() { - ByteBuffer buffer = ByteBuffer.allocate(1); - VarInt.encode(0, buffer); - buffer.flip(); - return buffer; - } - /** * Parse a header from a ByteBuffer without advancing the buffer position. * Utility method shared between {@link ImprintRecord} and {@link ImprintOperations}. @@ -573,14 +558,14 @@ public static Header parseHeaderFromBuffer(ByteBuffer buffer) throws ImprintExce buffer.position(startPos); } } - + /** * Calculate the size needed to store a directory with the given entry count. */ public static int calculateDirectorySize(int entryCount) { return VarInt.encodedLength(entryCount) + (entryCount * Constants.DIR_ENTRY_BYTES); } - + /** * Container for separated directory and payload buffer sections. * Utility class shared between {@link ImprintRecord} and {@link ImprintOperations}. @@ -589,14 +574,14 @@ public static class BufferSections { public final ByteBuffer directoryBuffer; public final ByteBuffer payloadBuffer; public final int directoryCount; - + public BufferSections(ByteBuffer directoryBuffer, ByteBuffer payloadBuffer, int directoryCount) { this.directoryBuffer = directoryBuffer; this.payloadBuffer = payloadBuffer; this.directoryCount = directoryCount; } } - + /** * Extract directory and payload sections from a serialized buffer. * Utility method shared between {@link ImprintRecord} and {@link ImprintOperations}. @@ -604,26 +589,26 @@ public BufferSections(ByteBuffer directoryBuffer, ByteBuffer payloadBuffer, int public static BufferSections extractBufferSections(ByteBuffer buffer, Header header) throws ImprintException { // Skip header buffer.position(buffer.position() + Constants.HEADER_BYTES); - + // Parse directory section int directoryStartPos = buffer.position(); var countResult = VarInt.decode(buffer); int directoryCount = countResult.getValue(); int directorySize = countResult.getBytesRead() + (directoryCount * Constants.DIR_ENTRY_BYTES); - + // Create directory buffer buffer.position(directoryStartPos); var directoryBuffer = buffer.slice(); directoryBuffer.limit(directorySize); - + // Advance to payload buffer.position(buffer.position() + directorySize); var payloadBuffer = buffer.slice(); payloadBuffer.limit(header.getPayloadSize()); - + return new BufferSections(directoryBuffer, payloadBuffer, directoryCount); } - + private static Header parseHeader(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < Constants.HEADER_BYTES) throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for header"); @@ -635,15 +620,15 @@ private static Header parseHeader(ByteBuffer buffer) throws ImprintException { throw new ImprintException(ErrorType.INVALID_MAGIC, "Invalid magic byte"); if (version != Constants.VERSION) throw new ImprintException(ErrorType.UNSUPPORTED_VERSION, "Unsupported version: " + version); - + var flags = new Flags(buffer.get()); int fieldSpaceId = buffer.getInt(); int schemaHash = buffer.getInt(); int payloadSize = buffer.getInt(); - + return new Header(flags, new SchemaId(fieldSpaceId, schemaHash), payloadSize); } - + private Value deserializeValue(com.imprint.types.TypeCode typeCode, ByteBuffer buffer) throws ImprintException { var valueBuffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); switch (typeCode) { diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 3ccdfcd..dc05984 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -6,6 +6,8 @@ import com.imprint.types.MapKey; import com.imprint.types.TypeCode; import com.imprint.types.Value; +import com.imprint.util.VarInt; +import lombok.SneakyThrows; import java.nio.BufferOverflowException; import java.nio.ByteBuffer; @@ -36,19 +38,19 @@ @SuppressWarnings("unused") public final class ImprintRecordBuilder { private final SchemaId schemaId; - private final ImprintFieldObjectMap fields = new ImprintFieldObjectMap<>(); + private final ImprintFieldObjectMap fields = new ImprintFieldObjectMap<>(); private int estimatedPayloadSize = 0; - static final class FieldData { - final short id; + // Minimal wrapper to avoid getTypeCode() virtual dispatch + static final class ValueWithType { final Value value; - - FieldData(short id, Value value) { - this.id = id; + final byte typeCode; + + ValueWithType(Value value, byte typeCode) { this.value = value; + this.typeCode = typeCode; } } - ImprintRecordBuilder(SchemaId schemaId) { this.schemaId = Objects.requireNonNull(schemaId, "SchemaId cannot be null"); @@ -173,30 +175,31 @@ public ImprintRecord build() throws ImprintException { public ByteBuffer buildToBuffer() throws ImprintException { // 1. Calculate conservative size BEFORE sorting (which invalidates the map) int conservativeSize = calculateConservativePayloadSize(); - + // 2. Sort fields by ID for directory ordering (zero allocation) var sortedFieldsResult = getSortedFieldsResult(); - var sortedFields = sortedFieldsResult.values; + var sortedValues = sortedFieldsResult.values; + var sortedKeys = sortedFieldsResult.keys; var fieldCount = sortedFieldsResult.count; - + // 3. Serialize payload and calculate offsets with overflow handling PayloadSerializationResult result = null; int bufferSizeMultiplier = 1; - + while (result == null && bufferSizeMultiplier <= 64) { try { - result = serializePayload(sortedFields, fieldCount, conservativeSize, bufferSizeMultiplier); + result = serializePayload(sortedValues, fieldCount, conservativeSize, bufferSizeMultiplier); } catch (BufferOverflowException e) { bufferSizeMultiplier *= 2; // Try 2x, 4x, 8x, 16x, 32x, 64x } } - + if (result == null) { - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, - "Failed to serialize payload even with 64x buffer size"); + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, + "Failed to serialize payload even with 64x buffer size"); } - - return serializeToBuffer(schemaId, sortedFields, result.offsets, fieldCount, result.payload); + + return serializeToBuffer(schemaId, sortedKeys, sortedValues, result.offsets, fieldCount, result.payload); } /** @@ -208,19 +211,28 @@ public ByteBuffer buildToBuffer() throws ImprintException { * @param typeCode the known type code (avoids virtual call) * @return this builder for method chaining */ + @SneakyThrows private ImprintRecordBuilder addField(int id, Value value, TypeCode typeCode) { Objects.requireNonNull(value, "Value cannot be null - use nullField() for explicit null values"); - var newEntry = new FieldData((short) id, value); - // Check if replacing an existing field - var oldEntry = fields.get(id); + // Calculate size for tracking (but don't store it) + int newSize = fastEstimateFieldSize(value, typeCode); + + // Create wrapper to avoid virtual dispatch later + var newEntry = new ValueWithType(value, typeCode.getCode()); + + // Efficient put with old value return - single hash operation + var oldEntry = fields.putAndReturnOld(id, newEntry); + if (oldEntry != null) { - estimatedPayloadSize -= fastEstimateFieldSize(oldEntry.value, oldEntry.value.getTypeCode()); + // Field replacement - subtract old size, add new size + int oldSize = fastEstimateFieldSize(oldEntry.value, TypeCode.fromByte(oldEntry.typeCode)); + estimatedPayloadSize += newSize - oldSize; + } else { + // New field - just add new size + estimatedPayloadSize += newSize; } - // Add or replace field - fields.put(id, newEntry); - estimatedPayloadSize += fastEstimateFieldSize(newEntry.value, typeCode); return this; } @@ -300,8 +312,8 @@ private MapKey convertToMapKey(Object obj) { } /** - * Fast heuristic-based field size estimation (no expensive operations). - * TypeCode passed directly to avoid virtual call. + * Fast conservative field size estimation optimized for performance. + * Uses minimal operations while handling both normal and large data correctly. */ private int fastEstimateFieldSize(Value value, TypeCode typeCode) { switch (typeCode) { @@ -314,27 +326,30 @@ private int fastEstimateFieldSize(Value value, TypeCode typeCode) { case FLOAT64: return 8; case STRING: + // Smart estimation: check if it's a large string with minimal overhead if (value instanceof Value.StringValue) { - return 5 + (((Value.StringValue) value).getValue().length() * 3 / 2); + int len = ((Value.StringValue) value).getValue().length(); + return len > 1000 ? 5 + len * 3 : 256; // UTF-8 worst case for large strings } else { - return 5 + ((Value.StringBufferValue) value).getBuffer().remaining(); + int remaining = ((Value.StringBufferValue) value).getBuffer().remaining(); + return remaining > 1000 ? 5 + remaining : 256; } case BYTES: + // Smart estimation: check if it's large bytes with minimal overhead if (value instanceof Value.BytesValue) { - return 5 + ((Value.BytesValue) value).getValue().length; + int len = ((Value.BytesValue) value).getValue().length; + return len > 1000 ? 5 + len : 256; } else { - return 5 + ((Value.BytesBufferValue) value).getBuffer().remaining(); + int remaining = ((Value.BytesBufferValue) value).getBuffer().remaining(); + return remaining > 1000 ? 5 + remaining : 256; } case ARRAY: - var elements = ((Value.ArrayValue) value).getValue(); - return 5 + 1 + (elements.size() * 16); + return 512; // Conservative: most arrays are < 512 bytes case MAP: - var map = ((Value.MapValue) value).getValue(); - return 5 + 2 + (map.size() * 32); //32 bytes per entry + return 512; // Conservative: most maps are < 512 bytes case ROW: - // Use actual size if available - return ((Value.RowValue) value).getValue().getSerializedSize(); - + return 1024; // Conservative: most nested records are < 1KB + default: return 64; // Fallback } @@ -347,7 +362,7 @@ private int calculateConservativePayloadSize() { // Add 25% buffer for safety margin return Math.max(estimatedPayloadSize + (estimatedPayloadSize / 4), 4096); } - + /** * Result of payload serialization containing offsets and final payload buffer. @@ -355,7 +370,7 @@ private int calculateConservativePayloadSize() { private static class PayloadSerializationResult { final int[] offsets; final ByteBuffer payload; - + PayloadSerializationResult(int[] offsets, ByteBuffer payload) { this.offsets = offsets; this.payload = payload; @@ -377,9 +392,9 @@ private PayloadSerializationResult serializePayload(Object[] sortedFields, int f private PayloadSerializationResult doSerializePayload(Object[] sortedFields, int fieldCount, ByteBuffer payloadBuffer) throws ImprintException { int[] offsets = new int[fieldCount]; for (int i = 0; i < fieldCount; i++) { - var fieldData = (FieldData) sortedFields[i]; + var entry = (ValueWithType) sortedFields[i]; offsets[i] = payloadBuffer.position(); - serializeValue(fieldData.value, payloadBuffer); + serializeValue(entry.value, payloadBuffer); } payloadBuffer.flip(); var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); @@ -387,28 +402,46 @@ private PayloadSerializationResult doSerializePayload(Object[] sortedFields, int } private void serializeValue(Value value, ByteBuffer buffer) throws ImprintException { - switch (value.getTypeCode()) { + var typeCode = value.getTypeCode(); + switch (typeCode) { case NULL: + // NULL values have no payload + break; case BOOL: + Serializers.serializeBool((Value.BoolValue) value, buffer); + break; case INT32: + Serializers.serializeInt32((Value.Int32Value) value, buffer); + break; case INT64: + Serializers.serializeInt64((Value.Int64Value) value, buffer); + break; case FLOAT32: + Serializers.serializeFloat32((Value.Float32Value) value, buffer); + break; case FLOAT64: - case BYTES: + Serializers.serializeFloat64((Value.Float64Value) value, buffer); + break; case STRING: + Serializers.serializeString(value, buffer); + break; + case BYTES: + Serializers.serializeBytes(value, buffer); + break; case ARRAY: + Serializers.serializeArray((Value.ArrayValue) value, buffer); + break; case MAP: - value.getTypeCode().getHandler().serialize(value, buffer); + Serializers.serializeMap((Value.MapValue) value, buffer); break; - //TODO eliminate this switch entirely by implementing a ROW TypeHandler case ROW: + // Keep existing nested record serialization Value.RowValue rowValue = (Value.RowValue) value; var serializedRow = rowValue.getValue().serializeToBuffer(); buffer.put(serializedRow); break; - default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + value.getTypeCode()); + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + typeCode); } } @@ -416,18 +449,18 @@ private void serializeValue(Value value, ByteBuffer buffer) throws ImprintExcept * Get fields sorted by ID from the map. * Returns internal map array reference + count to avoid any copying but sacrifices the map structure in the process. */ - private ImprintFieldObjectMap.SortedValuesResult getSortedFieldsResult() { - return fields.getSortedValues(); + private ImprintFieldObjectMap.SortedFieldsResult getSortedFieldsResult() { + return fields.getSortedFields(); } /** * Serialize components into a single ByteBuffer. */ - private static ByteBuffer serializeToBuffer(SchemaId schemaId, Object[] sortedFields, int[] offsets, int fieldCount, ByteBuffer payload) { + private static ByteBuffer serializeToBuffer(SchemaId schemaId, short[] sortedKeys, Object[] sortedValues, int[] offsets, int fieldCount, ByteBuffer payload) { var header = new Header(new Flags((byte) 0), schemaId, payload.remaining()); - var directoryBuffer = ImprintRecord.createDirectoryBufferFromSorted(sortedFields, offsets, fieldCount); + int directorySize = ImprintRecord.calculateDirectorySize(fieldCount); - int finalSize = Constants.HEADER_BYTES + directoryBuffer.remaining() + payload.remaining(); + int finalSize = Constants.HEADER_BYTES + directorySize + payload.remaining(); var finalBuffer = ByteBuffer.allocate(finalSize); finalBuffer.order(ByteOrder.LITTLE_ENDIAN); @@ -438,10 +471,191 @@ private static ByteBuffer serializeToBuffer(SchemaId schemaId, Object[] sortedFi finalBuffer.putInt(header.getSchemaId().getFieldSpaceId()); finalBuffer.putInt(header.getSchemaId().getSchemaHash()); finalBuffer.putInt(header.getPayloadSize()); - finalBuffer.put(directoryBuffer); + + // Write directory directly to final buffer + ImprintRecord.writeDirectoryToBuffer(sortedKeys, sortedValues, offsets, fieldCount, finalBuffer); + + // Write payload finalBuffer.put(payload); finalBuffer.flip(); return finalBuffer.asReadOnlyBuffer(); } -} \ No newline at end of file + + /** + * Direct serializers that avoid virtual dispatch overhead. + */ + static class Serializers { + static void serializeBool(Value.BoolValue value, ByteBuffer buffer) { + buffer.put((byte) (value.getValue() ? 1 : 0)); + } + + static void serializeInt32(Value.Int32Value value, ByteBuffer buffer) { + buffer.putInt(value.getValue()); + } + + static void serializeInt64(Value.Int64Value value, ByteBuffer buffer) { + buffer.putLong(value.getValue()); + } + + static void serializeFloat32(Value.Float32Value value, ByteBuffer buffer) { + buffer.putFloat(value.getValue()); + } + + static void serializeFloat64(Value.Float64Value value, ByteBuffer buffer) { + buffer.putDouble(value.getValue()); + } + + static void serializeString(Value value, ByteBuffer buffer) { + if (value instanceof Value.StringValue) { + var stringValue = (Value.StringValue) value; + var utf8Bytes = stringValue.getUtf8Bytes(); // Already cached! + VarInt.encode(utf8Bytes.length, buffer); + buffer.put(utf8Bytes); + } else { + var bufferValue = (Value.StringBufferValue) value; + var stringBuffer = bufferValue.getBuffer(); + VarInt.encode(stringBuffer.remaining(), buffer); + buffer.put(stringBuffer); + } + } + + static void serializeBytes(Value value, ByteBuffer buffer) { + if (value instanceof Value.BytesBufferValue) { + var bufferValue = (Value.BytesBufferValue) value; + var bytesBuffer = bufferValue.getBuffer(); + VarInt.encode(bytesBuffer.remaining(), buffer); + buffer.put(bytesBuffer); + } else { + var bytesValue = (Value.BytesValue) value; + byte[] bytes = bytesValue.getValue(); + VarInt.encode(bytes.length, buffer); + buffer.put(bytes); + } + } + + static void serializeArray(Value.ArrayValue value, ByteBuffer buffer) throws ImprintException { + var elements = value.getValue(); + VarInt.encode(elements.size(), buffer); + + if (elements.isEmpty()) return; + + var elementType = elements.get(0).getTypeCode(); + buffer.put(elementType.getCode()); + + for (var element : elements) { + if (element.getTypeCode() != elementType) { + throw new ImprintException(ErrorType.SCHEMA_ERROR, + "Array elements must have same type code: " + + element.getTypeCode() + " != " + elementType); + } + // Recursive call to serialize each element + serializeValueByType(element, elementType, buffer); + } + } + + static void serializeMap(Value.MapValue value, ByteBuffer buffer) throws ImprintException { + var map = value.getValue(); + VarInt.encode(map.size(), buffer); + + if (map.isEmpty()) return; + + var iterator = map.entrySet().iterator(); + var first = iterator.next(); + var keyType = first.getKey().getTypeCode(); + var valueType = first.getValue().getTypeCode(); + + buffer.put(keyType.getCode()); + buffer.put(valueType.getCode()); + + serializeMapKey(first.getKey(), buffer); + serializeValueByType(first.getValue(), valueType, buffer); + + while (iterator.hasNext()) { + var entry = iterator.next(); + if (entry.getKey().getTypeCode() != keyType) { + throw new ImprintException(ErrorType.SCHEMA_ERROR, + "Map keys must have same type code: " + + entry.getKey().getTypeCode() + " != " + keyType); + } + if (entry.getValue().getTypeCode() != valueType) { + throw new ImprintException(ErrorType.SCHEMA_ERROR, + "Map values must have same type code: " + + entry.getValue().getTypeCode() + " != " + valueType); + } + + serializeMapKey(entry.getKey(), buffer); + serializeValueByType(entry.getValue(), valueType, buffer); + } + } + + // Helper method to avoid infinite recursion + private static void serializeValueByType(Value value, TypeCode typeCode, ByteBuffer buffer) throws ImprintException { + switch (typeCode) { + case NULL: + break; + case BOOL: + serializeBool((Value.BoolValue) value, buffer); + break; + case INT32: + serializeInt32((Value.Int32Value) value, buffer); + break; + case INT64: + serializeInt64((Value.Int64Value) value, buffer); + break; + case FLOAT32: + serializeFloat32((Value.Float32Value) value, buffer); + break; + case FLOAT64: + serializeFloat64((Value.Float64Value) value, buffer); + break; + case STRING: + serializeString(value, buffer); + break; + case BYTES: + serializeBytes(value, buffer); + break; + case ARRAY: + serializeArray((Value.ArrayValue) value, buffer); + break; + case MAP: + serializeMap((Value.MapValue) value, buffer); + break; + default: + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + typeCode); + } + } + + private static void serializeMapKey(MapKey key, ByteBuffer buffer) throws ImprintException { + switch (key.getTypeCode()) { + case INT32: + MapKey.Int32Key int32Key = (MapKey.Int32Key) key; + buffer.putInt(int32Key.getValue()); + break; + + case INT64: + MapKey.Int64Key int64Key = (MapKey.Int64Key) key; + buffer.putLong(int64Key.getValue()); + break; + + case BYTES: + MapKey.BytesKey bytesKey = (MapKey.BytesKey) key; + byte[] bytes = bytesKey.getValue(); + VarInt.encode(bytes.length, buffer); + buffer.put(bytes); + break; + + case STRING: + MapKey.StringKey stringKey = (MapKey.StringKey) key; + byte[] stringBytes = stringKey.getValue().getBytes(java.nio.charset.StandardCharsets.UTF_8); + VarInt.encode(stringBytes.length, buffer); + buffer.put(stringBytes); + break; + + default: + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, + "Invalid map key type: " + key.getTypeCode()); + } + } + } +} diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index 8380681..1cb7128 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -14,7 +14,7 @@ import java.util.stream.IntStream; -//@Disabled +@Disabled public class ProfilerTest { private static final int RECORD_SIZE = 50; @@ -136,7 +136,7 @@ private void profileDisjointMerges() throws Exception { @Tag("serialization") @Tag("small-records") void profileSmallRecordSerialization() throws Exception { - profileSerialization("small records", RECORD_SIZE, 100_000); + profileSerialization("small records", RECORD_SIZE, 600_000); } @Test From 3919e6312635aec291be4e16ceeff7177e20df08 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Fri, 13 Jun 2025 12:37:03 -0400 Subject: [PATCH 49/53] add new Imprint specific benchmark --- .../benchmark/ImprintDetailedBenchmark.java | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 src/jmh/java/com/imprint/benchmark/ImprintDetailedBenchmark.java diff --git a/src/jmh/java/com/imprint/benchmark/ImprintDetailedBenchmark.java b/src/jmh/java/com/imprint/benchmark/ImprintDetailedBenchmark.java new file mode 100644 index 0000000..d3edc1a --- /dev/null +++ b/src/jmh/java/com/imprint/benchmark/ImprintDetailedBenchmark.java @@ -0,0 +1,104 @@ +package com.imprint.benchmark; + +import com.imprint.benchmark.serializers.ImprintSerializingBenchmark; +import com.imprint.core.ImprintRecord; +import com.imprint.core.ImprintRecordBuilder; +import com.imprint.core.SchemaId; +import com.imprint.error.ImprintException; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.util.concurrent.TimeUnit; + +/** + * Detailed breakdown of Imprint serialization performance to identify bottlenecks. + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Benchmark) +@Warmup(iterations = 3, time = 1) +@Measurement(iterations = 15, time = 1) +@Fork(value = 1, jvmArgs = {"-Xms4g", "-Xmx4g"}) +public class ImprintDetailedBenchmark { + + private DataGenerator.TestRecord testData; + private ImprintRecordBuilder preBuiltBuilder; + private ImprintRecord preBuiltRecord; + private static final SchemaId SCHEMA_ID = new SchemaId(1, 1); + + @Setup(Level.Trial) + public void setup() { + testData = DataGenerator.createTestRecord(); + try { + preBuiltBuilder = buildRecord(testData); + preBuiltRecord = preBuiltBuilder.build(); + } catch (ImprintException e) { + throw new RuntimeException(e); + } + } + + private ImprintRecordBuilder buildRecord(DataGenerator.TestRecord pojo) { + var builder = ImprintRecord.builder(SCHEMA_ID); + builder.field(0, pojo.id); + builder.field(1, pojo.timestamp); + builder.field(2, pojo.flags); + builder.field(3, pojo.active); + builder.field(4, pojo.value); + builder.field(5, pojo.data); + builder.field(6, pojo.tags); + builder.field(7, pojo.metadata); + return builder; + } + + @Benchmark + public void fieldAddition(Blackhole bh) { + // Benchmark: POJO → Builder (field addition only) + try { + var builder = buildRecord(testData); + bh.consume(builder); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Benchmark + public void buildToBuffer(Blackhole bh) { + // Benchmark: Builder → Bytes (serialization only) + try { + bh.consume(preBuiltBuilder.buildToBuffer()); + } catch (ImprintException e) { + throw new RuntimeException(e); + } + } + + @Benchmark + public void serializeToBuffer(Blackhole bh) { + // Benchmark: Record → Bytes (just buffer copy) + bh.consume(preBuiltRecord.serializeToBuffer()); + } + + @Benchmark + public void fullPipeline(Blackhole bh) { + // Benchmark: POJO → Builder → Bytes (complete pipeline) + try { + bh.consume(buildRecord(testData).buildToBuffer()); + } catch (ImprintException e) { + throw new RuntimeException(e); + } + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(ImprintDetailedBenchmark.class.getSimpleName()) + .forks(1) + .mode(Mode.AverageTime) + .timeUnit(TimeUnit.NANOSECONDS) + .build(); + + new Runner(opt).run(); + } +} \ No newline at end of file From 5525293e6dadc723be2c3a8f5e3f951296c0a5e9 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sat, 14 Jun 2025 00:31:51 -0400 Subject: [PATCH 50/53] Remove Value and TypeHandler to significantly reduce dynamic dispatch --- .../benchmark/FieldAccessBenchmark.java | 275 ---------- .../benchmark/ImprintDetailedBenchmark.java | 5 +- .../benchmark/SerializationBenchmark.java | 180 ------- .../imprint/benchmark/StringBenchmark.java | 316 ------------ .../imprint/core/ImprintFieldObjectMap.java | 2 +- .../java/com/imprint/core/ImprintRecord.java | 186 ++++--- .../imprint/core/ImprintRecordBuilder.java | 458 ++++++----------- .../com/imprint/ops/ImprintOperations.java | 8 +- src/main/java/com/imprint/types/MapKey.java | 40 +- src/main/java/com/imprint/types/TypeCode.java | 33 +- .../java/com/imprint/types/TypeHandler.java | 442 ----------------- src/main/java/com/imprint/types/Value.java | 468 ------------------ src/main/java/com/imprint/util/VarInt.java | 30 +- .../java/com/imprint/IntegrationTest.java | 188 +++---- .../imprint/ops/ImprintOperationsTest.java | 35 +- .../com/imprint/profile/ProfilerTest.java | 91 +--- .../java/com/imprint/types/MapKeyTest.java | 33 +- .../com/imprint/types/TypeHandlerTest.java | 274 ---------- .../java/com/imprint/types/ValueTest.java | 218 -------- 19 files changed, 463 insertions(+), 2819 deletions(-) delete mode 100644 src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java delete mode 100644 src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java delete mode 100644 src/jmh/java/com/imprint/benchmark/StringBenchmark.java delete mode 100644 src/main/java/com/imprint/types/TypeHandler.java delete mode 100644 src/main/java/com/imprint/types/Value.java delete mode 100644 src/test/java/com/imprint/types/TypeHandlerTest.java delete mode 100644 src/test/java/com/imprint/types/ValueTest.java diff --git a/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java b/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java deleted file mode 100644 index 06a7717..0000000 --- a/src/jmh/java/com/imprint/benchmark/FieldAccessBenchmark.java +++ /dev/null @@ -1,275 +0,0 @@ -package com.imprint.benchmark; - -import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintRecordBuilder; -import com.imprint.core.SchemaId; -import com.imprint.types.MapKey; -import com.imprint.types.Value; -import org.openjdk.jmh.annotations.*; -import org.openjdk.jmh.infra.Blackhole; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.concurrent.TimeUnit; - -/** - * Benchmarks for ImprintRecord field access and projection operations. - * Tests the zero-copy field access performance claims. - */ -@BenchmarkMode(Mode.AverageTime) -@OutputTimeUnit(TimeUnit.NANOSECONDS) -@State(Scope.Benchmark) -@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) -@Fork(1) -public class FieldAccessBenchmark { - - private ImprintRecord sparseRecord; - private ImprintRecord denseRecord; - private ImprintRecord largeRecord; - - // Field IDs for testing different access patterns - private int[] firstFields; - private int[] middleFields; - private int[] lastFields; - private int[] randomFields; - private int[] allFields; - - @Setup - public void setup() throws Exception { - sparseRecord = createSparseRecord(); // Few fields, large field IDs - denseRecord = createDenseRecord(); // Many sequential fields - largeRecord = createLargeRecord(); // Large record with complex data - - // Setup field access patterns - firstFields = new int[]{1, 2, 3, 4, 5}; - middleFields = new int[]{45, 46, 47, 48, 49}; - lastFields = new int[]{95, 96, 97, 98, 99}; - randomFields = new int[]{7, 23, 41, 67, 89}; - allFields = new int[100]; - for (int i = 0; i < 100; i++) { - allFields[i] = i + 1; - } - } - - // ===== SINGLE FIELD ACCESS BENCHMARKS ===== - - @Benchmark - public void accessFirstField(Blackhole bh) throws Exception { - var value = denseRecord.getValue(1); - bh.consume(value); - } - - @Benchmark - public void accessMiddleField(Blackhole bh) throws Exception { - var value = denseRecord.getValue(50); - bh.consume(value); - } - - @Benchmark - public void accessLastField(Blackhole bh) throws Exception { - var value = denseRecord.getValue(100); - bh.consume(value); - } - - @Benchmark - public void accessNonExistentField(Blackhole bh) throws Exception { - var value = denseRecord.getValue(999); - bh.consume(value); - } - - // ===== MULTIPLE FIELD ACCESS PATTERNS ===== - - @Benchmark - public void accessFirstFields(Blackhole bh) throws Exception { - for (int fieldId : firstFields) { - var value = denseRecord.getValue(fieldId); - bh.consume(value); - } - } - - @Benchmark - public void accessMiddleFields(Blackhole bh) throws Exception { - for (int fieldId : middleFields) { - var value = denseRecord.getValue(fieldId); - bh.consume(value); - } - } - - @Benchmark - public void accessLastFields(Blackhole bh) throws Exception { - for (int fieldId : lastFields) { - var value = denseRecord.getValue(fieldId); - bh.consume(value); - } - } - - @Benchmark - public void accessRandomFields(Blackhole bh) throws Exception { - for (int fieldId : randomFields) { - var value = denseRecord.getValue(fieldId); - bh.consume(value); - } - } - - // ===== FIELD PROJECTION BENCHMARKS ===== - - @Benchmark - public void projectSmallSubset(Blackhole bh) throws Exception { - // Project 5 fields from a 100-field record - var projection = simulateProject(denseRecord, firstFields); - bh.consume(projection); - } - - @Benchmark - public void projectMediumSubset(Blackhole bh) throws Exception { - // Project 25 fields from a 100-field record - int[] fields = Arrays.copyOf(allFields, 25); - var projection = simulateProject(denseRecord, fields); - bh.consume(projection); - } - - @Benchmark - public void projectLargeSubset(Blackhole bh) throws Exception { - // Project 75 fields from a 100-field record - int[] fields = Arrays.copyOf(allFields, 75); - var projection = simulateProject(denseRecord, fields); - bh.consume(projection); - } - - @Benchmark - public void projectAllFields(Blackhole bh) throws Exception { - // Project all fields (should be nearly equivalent to full record) - var projection = simulateProject(denseRecord, allFields); - bh.consume(projection); - } - - // ===== RAW BYTES ACCESS BENCHMARKS ===== - - @Benchmark - public void getRawBytesFirstField(Blackhole bh) { - var rawBytes = denseRecord.getRawBytes(1); - bh.consume(rawBytes); - } - - @Benchmark - public void getRawBytesMiddleField(Blackhole bh) { - var rawBytes = denseRecord.getRawBytes(50); - bh.consume(rawBytes); - } - - @Benchmark - public void getRawBytesLastField(Blackhole bh) { - var rawBytes = denseRecord.getRawBytes(100); - bh.consume(rawBytes); - } - - // ===== SPARSE VS DENSE ACCESS PATTERNS ===== - - @Benchmark - public void accessSparseRecord(Blackhole bh) throws Exception { - // Access fields in sparse record (large field IDs, few fields) - var value1 = sparseRecord.getValue(1000); - var value2 = sparseRecord.getValue(5000); - var value3 = sparseRecord.getValue(10000); - bh.consume(value1); - bh.consume(value2); - bh.consume(value3); - } - - @Benchmark - public void accessDenseRecord(Blackhole bh) throws Exception { - // Access fields in dense record (sequential field IDs) - var value1 = denseRecord.getValue(1); - var value2 = denseRecord.getValue(2); - var value3 = denseRecord.getValue(3); - bh.consume(value1); - bh.consume(value2); - bh.consume(value3); - } - - // ===== HELPER METHODS ===== - - /** - * Simulates field projection by creating a new record with only specified fields. - * This should be replaced with actual project API when available. - */ - private ImprintRecord simulateProject(ImprintRecord source, int[] fieldIds) throws Exception { - var builder = ImprintRecord.builder(source.getHeader().getSchemaId()); - - for (int fieldId : fieldIds) { - var value = source.getValue(fieldId); - if (value != null) { - builder.field(fieldId, value); - } - } - - return builder.build(); - } - - private ImprintRecord createSparseRecord() throws Exception { - return ImprintRecord.builder(new SchemaId(1, 0x12345678)) - .field(1000, Value.fromString("sparse_field_1")) - .field(5000, Value.fromInt32(42)) - .field(10000, Value.fromFloat64(3.14159)) - .field(15000, Value.fromBoolean(true)) - .field(20000, Value.fromString("sparse_field_5")) - .build(); - } - - private ImprintRecord createDenseRecord() throws Exception { - var builder = ImprintRecord.builder(new SchemaId(2, 0x87654321)); - - // Dense record with 100 sequential fields - for (int i = 1; i <= 100; i++) { - switch (i % 5) { - case 0: - builder.field(i, Value.fromString("string_field_" + i)); - break; - case 1: - builder.field(i, Value.fromInt32(i * 10)); - break; - case 2: - builder.field(i, Value.fromFloat64(i * 1.5)); - break; - case 3: - builder.field(i, Value.fromBoolean(i % 2 == 0)); - break; - case 4: - builder.field(i, Value.fromInt64(i * 1000L)); - break; - } - } - - return builder.build(); - } - - private ImprintRecord createLargeRecord() throws Exception { - var builder = ImprintRecord.builder(new SchemaId(3, 0xABCDEF12)); - - // Large record with complex fields (arrays, maps) - builder.field(1, Value.fromString("Large record with complex data")); - - // Add a large array - var list = new ArrayList(); - for (int i = 0; i < 200; i++) { - list.add(Value.fromInt32(i)); - } - builder.field(2, Value.fromArray(list)); - - // Add a large map - var map = new HashMap(); - for (int i = 0; i < 100; i++) { - map.put(MapKey.fromString("key_" + i), Value.fromString("value_" + i)); - } - builder.field(3, Value.fromMap(map)); - - // Add more fields - for (int i = 4; i <= 50; i++) { - builder.field(i, Value.fromBytes(new byte[1024])); // 1KB byte arrays - } - - return builder.build(); - } -} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/ImprintDetailedBenchmark.java b/src/jmh/java/com/imprint/benchmark/ImprintDetailedBenchmark.java index d3edc1a..c9d4514 100644 --- a/src/jmh/java/com/imprint/benchmark/ImprintDetailedBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ImprintDetailedBenchmark.java @@ -1,6 +1,5 @@ package com.imprint.benchmark; -import com.imprint.benchmark.serializers.ImprintSerializingBenchmark; import com.imprint.core.ImprintRecord; import com.imprint.core.ImprintRecordBuilder; import com.imprint.core.SchemaId; @@ -21,7 +20,7 @@ @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) @Warmup(iterations = 3, time = 1) -@Measurement(iterations = 15, time = 1) +@Measurement(iterations = 20, time = 1) @Fork(value = 1, jvmArgs = {"-Xms4g", "-Xmx4g"}) public class ImprintDetailedBenchmark { @@ -42,7 +41,7 @@ public void setup() { } private ImprintRecordBuilder buildRecord(DataGenerator.TestRecord pojo) { - var builder = ImprintRecord.builder(SCHEMA_ID); + var builder = ImprintRecord.builder(SCHEMA_ID, 8); // Pre-size for 8 fields builder.field(0, pojo.id); builder.field(1, pojo.timestamp); builder.field(2, pojo.flags); diff --git a/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java b/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java deleted file mode 100644 index 51c9f48..0000000 --- a/src/jmh/java/com/imprint/benchmark/SerializationBenchmark.java +++ /dev/null @@ -1,180 +0,0 @@ -package com.imprint.benchmark; - -import com.imprint.core.ImprintRecord; -import com.imprint.core.ImprintRecordBuilder; -import com.imprint.core.SchemaId; -import com.imprint.types.MapKey; -import com.imprint.types.Value; -import org.openjdk.jmh.annotations.*; -import org.openjdk.jmh.infra.Blackhole; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.concurrent.TimeUnit; - -/** - * Benchmarks for ImprintRecord serialization and deserialization operations. - */ -@BenchmarkMode(Mode.AverageTime) -@OutputTimeUnit(TimeUnit.NANOSECONDS) -@State(Scope.Benchmark) -@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 7, time = 1, timeUnit = TimeUnit.SECONDS) -@Fork(1) -public class SerializationBenchmark { - - private ImprintRecord smallRecord; - private ImprintRecord mediumRecord; - private ImprintRecord largeRecord; - - private ByteBuffer smallRecordBytes; - private ByteBuffer mediumRecordBytes; - private ByteBuffer largeRecordBytes; - - @Setup - public void setup() throws Exception { - // Create test records of varying sizes for deserialization benchmarks - smallRecord = createSmallRecord().build(); - mediumRecord = createMediumRecord().build(); - largeRecord = createLargeRecord().build(); - - // Pre-serialize for deserialization benchmarks - smallRecordBytes = smallRecord.serializeToBuffer(); - mediumRecordBytes = mediumRecord.serializeToBuffer(); - largeRecordBytes = largeRecord.serializeToBuffer(); - } - - // ===== SERIALIZATION BENCHMARKS ===== - - @Benchmark - public void buildAndSerializeSmallRecord(Blackhole bh) throws Exception { - ByteBuffer result = createSmallRecord().buildToBuffer(); - bh.consume(result); - } - - @Benchmark - public void buildAndSerializeMediumRecord(Blackhole bh) throws Exception { - ByteBuffer result = createMediumRecord().buildToBuffer(); - bh.consume(result); - } - - @Benchmark - public void buildAndSerializeLargeRecord(Blackhole bh) throws Exception { - ByteBuffer result = createLargeRecord().buildToBuffer(); - bh.consume(result); - } - - // ===== DESERIALIZATION BENCHMARKS ===== - - @Benchmark - public void deserializeSmallRecord(Blackhole bh) throws Exception { - ImprintRecord result = ImprintRecord.deserialize(smallRecordBytes.duplicate()); - bh.consume(result); - } - - @Benchmark - public void deserializeMediumRecord(Blackhole bh) throws Exception { - ImprintRecord result = ImprintRecord.deserialize(mediumRecordBytes.duplicate()); - bh.consume(result); - } - - @Benchmark - public void deserializeLargeRecord(Blackhole bh) throws Exception { - ImprintRecord result = ImprintRecord.deserialize(largeRecordBytes.duplicate()); - bh.consume(result); - } - - // ===== HELPER METHODS ===== - - private ImprintRecordBuilder createSmallRecord() throws Exception { - // Small record: ~10 fields, simple types - return ImprintRecord.builder(new SchemaId(1, 0x12345678)) - .field(1, "Product") - .field(2, 12345) - .field(3, 99.99) - .field(4, true) - .field(5, "Electronics"); - } - - private ImprintRecordBuilder createMediumRecord() throws Exception { - var builder = ImprintRecord.builder(new SchemaId(1, 0x12345678)); - - // Medium record: ~50 fields, mixed types including arrays - builder.field(1, "Product"); - builder.field(2, 12345); - builder.field(3, 99.99); - builder.field(4, true); - builder.field(5, "Electronics"); - - // Add array field - var tags = Arrays.asList( - "popular", - "trending", - "bestseller" - ); - builder.field(6, tags); - - // Add map field (all string values for consistency) - var metadata = new HashMap(); - metadata.put("manufacturer", "TechCorp"); - metadata.put("model", "TC-2024"); - metadata.put("year", "2024"); - builder.field(7, metadata); - - // Add more fields for medium size - for (int i = 8; i <= 50; i++) { - builder.field(i, "field_" + i + "_value"); - } - - return builder; - } - - private ImprintRecordBuilder createLargeRecord() throws Exception { - var builder = ImprintRecord.builder(new SchemaId(1, 0x12345678)); - - // Large record: ~200 fields, complex nested structures - builder.field(1, "LargeProduct"); - builder.field(2, 12345); - builder.field(3, 99.99); - - // Large array - var largeArray = new ArrayList(); - for (int i = 0; i < 100; i++) { - largeArray.add("item_" + i); - } - builder.field(4, largeArray); - - // Large map - var largeMap = new HashMap(); - for (int i = 0; i < 50; i++) { - largeMap.put("key_" + i, "value_" + i); - } - builder.field(5, largeMap); - - // Many string fields - for (int i = 6; i <= 200; i++) { - builder.field(i, "this_is_a_longer_field_value_for_field_" + i + "_to_increase_record_size"); - } - - return builder; - } - - public static void main(String[] args) throws RunnerException { - Options opt = new OptionsBuilder() - .include(SerializationBenchmark.class.getSimpleName()) - .forks(1) - .warmupIterations(5) - .measurementIterations(5) - .mode(Mode.AverageTime) - .timeUnit(TimeUnit.NANOSECONDS) - .build(); - - new Runner(opt).run(); - } -} \ No newline at end of file diff --git a/src/jmh/java/com/imprint/benchmark/StringBenchmark.java b/src/jmh/java/com/imprint/benchmark/StringBenchmark.java deleted file mode 100644 index 045940e..0000000 --- a/src/jmh/java/com/imprint/benchmark/StringBenchmark.java +++ /dev/null @@ -1,316 +0,0 @@ -package com.imprint.benchmark; - -import com.imprint.core.ImprintRecord; -import com.imprint.core.SchemaId; -import com.imprint.types.Value; -import org.openjdk.jmh.annotations.*; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -import java.nio.ByteBuffer; -import java.util.concurrent.TimeUnit; - -@BenchmarkMode(Mode.AverageTime) -@OutputTimeUnit(TimeUnit.NANOSECONDS) -@State(Scope.Benchmark) -@Fork(1) -@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) -@SuppressWarnings("unused") -public class StringBenchmark { - - private static final SchemaId SCHEMA_ID = new SchemaId(1, 42); - - // Small strings (typical field names, short values) - private String smallString5; - private String smallString20; - private String smallString50; - - // Medium strings (typical text content) - private String mediumString500; - private String mediumString2K; - - // Large strings (document content, JSON payloads) - private String largeString10K; - private String largeString100K; - private String largeString1M; - - // Pre-serialized records for deserialization benchmarks - private byte[] serializedSmall5; - private byte[] serializedSmall20; - private byte[] serializedSmall50; - private byte[] serializedMedium500; - private byte[] serializedMedium2K; - private byte[] serializedLarge10K; - private byte[] serializedLarge100K; - private byte[] serializedLarge1M; - - private ImprintRecord preDeserializedSmall5; - private ImprintRecord preDeserializedMedium500; - private ImprintRecord preDeserializedLarge100K; - - @Setup - public void setup() throws Exception { - // Generate strings of different sizes - smallString5 = generateString(5); - smallString20 = generateString(20); - smallString50 = generateString(50); - mediumString500 = generateString(500); - mediumString2K = generateString(2 * 1024); - largeString10K = generateString(10 * 1024); - largeString100K = generateString(100 * 1024); - largeString1M = generateString(1024 * 1024); - - // Pre-serialize records for deserialization benchmarks - serializedSmall5 = bufferToArray(createStringRecord(smallString5).serializeToBuffer()); - serializedSmall20 = bufferToArray(createStringRecord(smallString20).serializeToBuffer()); - serializedSmall50 = bufferToArray(createStringRecord(smallString50).serializeToBuffer()); - serializedMedium500 = bufferToArray(createStringRecord(mediumString500).serializeToBuffer()); - serializedMedium2K = bufferToArray(createStringRecord(mediumString2K).serializeToBuffer()); - serializedLarge10K = bufferToArray(createStringRecord(largeString10K).serializeToBuffer()); - serializedLarge100K = bufferToArray(createStringRecord(largeString100K).serializeToBuffer()); - serializedLarge1M = bufferToArray(createStringRecord(largeString1M).serializeToBuffer()); - - preDeserializedSmall5 = ImprintRecord.deserialize(serializedSmall5); - preDeserializedMedium500 = ImprintRecord.deserialize(serializedMedium500); - preDeserializedLarge100K = ImprintRecord.deserialize(serializedLarge100K); - } - - private String generateString(int length) { - StringBuilder sb = new StringBuilder(length); - String chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 "; - for (int i = 0; i < length; i++) { - sb.append(chars.charAt(i % chars.length())); - } - return sb.toString(); - } - - private ImprintRecord createStringRecord(String value) throws Exception { - return ImprintRecord.builder(SCHEMA_ID) - .field(1, value) - .build(); - } - - private String extractString(Value value) { - if (value instanceof Value.StringValue) { - return ((Value.StringValue) value).getValue(); - } else if (value instanceof Value.StringBufferValue) { - return ((Value.StringBufferValue) value).getValue(); - } - return null; - } - - private byte[] bufferToArray(ByteBuffer buffer) { - byte[] array = new byte[buffer.remaining()]; - buffer.duplicate().get(array); - return array; - } - - // Serialization benchmarks - - @Benchmark - public ByteBuffer serializeSmallString5() throws Exception { - return createStringRecord(smallString5).serializeToBuffer(); - } - - @Benchmark - public ByteBuffer serializeSmallString20() throws Exception { - return createStringRecord(smallString20).serializeToBuffer(); - } - - @Benchmark - public ByteBuffer serializeSmallString50() throws Exception { - return createStringRecord(smallString50).serializeToBuffer(); - } - - @Benchmark - public ByteBuffer serializeMediumString500() throws Exception { - return createStringRecord(mediumString500).serializeToBuffer(); - } - - @Benchmark - public ByteBuffer serializeMediumString2K() throws Exception { - return createStringRecord(mediumString2K).serializeToBuffer(); - } - - @Benchmark - public ByteBuffer serializeLargeString10K() throws Exception { - return createStringRecord(largeString10K).serializeToBuffer(); - } - - @Benchmark - public ByteBuffer serializeLargeString100K() throws Exception { - return createStringRecord(largeString100K).serializeToBuffer(); - } - - @Benchmark - public ByteBuffer serializeLargeString1M() throws Exception { - return createStringRecord(largeString1M).serializeToBuffer(); - } - - // Deserialization benchmarks - - @Benchmark - public ImprintRecord deserializeSmallString5() throws Exception { - return ImprintRecord.deserialize(serializedSmall5); - } - - @Benchmark - public ImprintRecord deserializeSmallString20() throws Exception { - return ImprintRecord.deserialize(serializedSmall20); - } - - @Benchmark - public ImprintRecord deserializeSmallString50() throws Exception { - return ImprintRecord.deserialize(serializedSmall50); - } - - @Benchmark - public ImprintRecord deserializeMediumString500() throws Exception { - return ImprintRecord.deserialize(serializedMedium500); - } - - @Benchmark - public ImprintRecord deserializeMediumString2K() throws Exception { - return ImprintRecord.deserialize(serializedMedium2K); - } - - @Benchmark - public ImprintRecord deserializeLargeString10K() throws Exception { - return ImprintRecord.deserialize(serializedLarge10K); - } - - @Benchmark - public ImprintRecord deserializeLargeString100K() throws Exception { - return ImprintRecord.deserialize(serializedLarge100K); - } - - @Benchmark - public ImprintRecord deserializeLargeString1M() throws Exception { - return ImprintRecord.deserialize(serializedLarge1M); - } - - // String access benchmarks - - @Benchmark - public String accessSmallString5() throws Exception { - ImprintRecord record = ImprintRecord.deserialize(serializedSmall5); - Value value = record.getValue(1); - return value != null ? extractString(value) : null; - } - - @Benchmark - public String accessMediumString500() throws Exception { - ImprintRecord record = ImprintRecord.deserialize(serializedMedium500); - Value value = record.getValue(1); - return value != null ? extractString(value) : null; - } - - @Benchmark - public String accessLargeString100K() throws Exception { - ImprintRecord record = ImprintRecord.deserialize(serializedLarge100K); - Value value = record.getValue(1); - return value != null ? extractString(value) : null; - } - - // Raw bytes access benchmarks (zero-copy) - - @Benchmark - public ByteBuffer getRawBytesSmallString5() throws Exception { - ImprintRecord record = ImprintRecord.deserialize(serializedSmall5); - return record.getRawBytes(1); - } - - @Benchmark - public ByteBuffer getRawBytesMediumString500() throws Exception { - ImprintRecord record = ImprintRecord.deserialize(serializedMedium500); - return record.getRawBytes(1); - } - - @Benchmark - public ByteBuffer getRawBytesLargeString100K() throws Exception { - ImprintRecord record = ImprintRecord.deserialize(serializedLarge100K); - return record.getRawBytes(1); - } - - // Size measurement benchmarks - - @Benchmark - public int measureSmallString5Size() throws Exception { - return createStringRecord(smallString5).serializeToBuffer().remaining(); - } - - @Benchmark - public int measureMediumString500Size() throws Exception { - return createStringRecord(mediumString500).serializeToBuffer().remaining(); - } - - @Benchmark - public int measureLargeString100KSize() throws Exception { - return createStringRecord(largeString100K).serializeToBuffer().remaining(); - } - - // Pure string access benchmarks (no record deserialization overhead) - @Benchmark - public String pureStringAccessSmall5() throws Exception { - Value value = preDeserializedSmall5.getValue(1); - return value != null ? extractString(value) : null; - } - - @Benchmark - public String pureStringAccessMedium500() throws Exception { - Value value = preDeserializedMedium500.getValue(1); - return value != null ? extractString(value) : null; - } - - @Benchmark - public String pureStringAccessLarge100K() throws Exception { - Value value = preDeserializedLarge100K.getValue(1); - return value != null ? extractString(value) : null; - } - - // Test cached vs uncached access - @Benchmark - public String cachedStringAccessSmall5() throws Exception { - // Second access should hit cache - Value value1 = preDeserializedSmall5.getValue(1); - String result1 = value1 != null ? extractString(value1) : null; - Value value2 = preDeserializedSmall5.getValue(1); - return value2 != null ? extractString(value2) : null; - } - - public static void main(String[] args) throws Exception { - runDeserializationOnly(); - } - - public static void runAll() throws Exception { - var opt = new OptionsBuilder() - .include(StringBenchmark.class.getSimpleName()) - .build(); - new Runner(opt).run(); - } - - /** - * Run only string deserialization benchmarks to measure the impact of - * ThreadLocal buffer pool optimization and fast/fallback path performance. - */ - public static void runDeserializationOnly() throws Exception { - var opt = new OptionsBuilder() - .include(StringBenchmark.class.getSimpleName() + ".*deserialize.*") // Only deserialize methods - .forks(0) // Run in same JVM to avoid serialization issues - .build(); - new Runner(opt).run(); - } - - /** - * Run only pure string access benchmarks (no record deserialization overhead) - * to isolate string decode performance with ThreadLocal buffer optimization. - */ - public static void runStringAccessOnly() throws Exception { - var opt = new OptionsBuilder() - .include(StringBenchmark.class.getSimpleName() + ".*(pureStringAccess|cachedStringAccess).*") // Only pure string access methods - .forks(0) // Run in same JVM to avoid serialization issues - .build(); - new Runner(opt).run(); - } -} \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintFieldObjectMap.java b/src/main/java/com/imprint/core/ImprintFieldObjectMap.java index 7587633..a6e63de 100644 --- a/src/main/java/com/imprint/core/ImprintFieldObjectMap.java +++ b/src/main/java/com/imprint/core/ImprintFieldObjectMap.java @@ -12,7 +12,7 @@ * - Sort values in place and return without allocation (subsequently poisons the map) */ final class ImprintFieldObjectMap { - private static final int DEFAULT_CAPACITY = 512; + private static final int DEFAULT_CAPACITY = 64; private static final float LOAD_FACTOR = 0.75f; private static final short EMPTY_KEY = -1; // Reserved empty marker (field IDs are >= 0) diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index bec6614..a8a745d 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -4,8 +4,7 @@ import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; import com.imprint.ops.ImprintOperations; -import com.imprint.types.TypeCode; -import com.imprint.types.Value; +import com.imprint.types.*; import com.imprint.util.VarInt; import lombok.AccessLevel; @@ -72,6 +71,15 @@ public static ImprintRecordBuilder builder(SchemaId schemaId) { return new ImprintRecordBuilder(schemaId); } + /** + * Create a pre-sized builder for constructing new ImprintRecord instances. + * @param schemaId Schema identifier + * @param expectedFieldCount Expected number of fields to optimize memory allocation + */ + public static ImprintRecordBuilder builder(SchemaId schemaId, int expectedFieldCount) { + return new ImprintRecordBuilder(schemaId, expectedFieldCount); + } + public static ImprintRecordBuilder builder(int fieldspaceId, int schemaHash) { return new ImprintRecordBuilder(new SchemaId(fieldspaceId, schemaHash)); } @@ -177,17 +185,17 @@ public ByteBuffer getRawBytes(short fieldId) { } /** - * Get a field value by ID. + * Get a field value by ID as Object. * Uses zero-copy binary search to locate the field. */ - public Value getValue(int fieldId) throws ImprintException { + public Object getValue(int fieldId) throws ImprintException { var entry = getDirectoryView().findEntry(fieldId); if (entry == null) return null; var fieldBuffer = getFieldBuffer(fieldId); if (fieldBuffer == null) return null; - return deserializeValue(entry.getTypeCode(), fieldBuffer); + return deserializePrimitive(entry.getTypeCode(), fieldBuffer); } /** @@ -207,53 +215,45 @@ public int getFieldCount() { // ========== TYPED GETTERS ========== public String getString(int fieldId) throws ImprintException { - var value = getValidatedValue(fieldId, "STRING"); - if (value instanceof Value.StringValue) - return ((Value.StringValue) value).getValue(); - if (value instanceof Value.StringBufferValue) - return ((Value.StringBufferValue) value).getValue(); - throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is not a STRING"); + return (String) getTypedPrimitive(fieldId, com.imprint.types.TypeCode.STRING, "STRING"); } public int getInt32(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.INT32, Value.Int32Value.class, "int32").getValue(); + return (Integer) getTypedPrimitive(fieldId, com.imprint.types.TypeCode.INT32, "INT32"); } public long getInt64(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.INT64, Value.Int64Value.class, "int64").getValue(); + return (Long) getTypedPrimitive(fieldId, com.imprint.types.TypeCode.INT64, "INT64"); } public boolean getBoolean(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.BOOL, Value.BoolValue.class, "boolean").getValue(); + return (Boolean) getTypedPrimitive(fieldId, com.imprint.types.TypeCode.BOOL, "BOOL"); } public float getFloat32(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.FLOAT32, Value.Float32Value.class, "float32").getValue(); + return (Float) getTypedPrimitive(fieldId, com.imprint.types.TypeCode.FLOAT32, "FLOAT32"); } public double getFloat64(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.FLOAT64, Value.Float64Value.class, "float64").getValue(); + return (Double) getTypedPrimitive(fieldId, com.imprint.types.TypeCode.FLOAT64, "FLOAT64"); } public byte[] getBytes(int fieldId) throws ImprintException { - var value = getValidatedValue(fieldId, "BYTES"); - if (value instanceof Value.BytesValue) - return ((Value.BytesValue) value).getValue(); - if (value instanceof Value.BytesBufferValue) - return ((Value.BytesBufferValue) value).getValue(); - throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is not BYTES"); + return (byte[]) getTypedPrimitive(fieldId, com.imprint.types.TypeCode.BYTES, "BYTES"); } - public java.util.List getArray(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.ARRAY, Value.ArrayValue.class, "ARRAY").getValue(); + @SuppressWarnings("unchecked") + public java.util.List getArray(int fieldId) throws ImprintException { + return (java.util.List) getTypedPrimitive(fieldId, com.imprint.types.TypeCode.ARRAY, "ARRAY"); } - public java.util.Map getMap(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.MAP, Value.MapValue.class, "MAP").getValue(); + @SuppressWarnings("unchecked") + public java.util.Map getMap(int fieldId) throws ImprintException { + return (java.util.Map) getTypedPrimitive(fieldId, com.imprint.types.TypeCode.MAP, "MAP"); } public ImprintRecord getRow(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, com.imprint.types.TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); + return (ImprintRecord) getTypedPrimitive(fieldId, com.imprint.types.TypeCode.ROW, "ROW"); } /** @@ -279,23 +279,24 @@ public int getSerializedSize() { /** - * Get and validate a value exists and is not null. + * Get and validate a field exists, is not null, and has the expected type. */ - private Value getValidatedValue(int fieldId, String typeName) throws ImprintException { - var value = getValue(fieldId); - if (value == null) + private Object getTypedPrimitive(int fieldId, com.imprint.types.TypeCode expectedTypeCode, String typeName) throws ImprintException { + var entry = getDirectoryView().findEntry(fieldId); + if (entry == null) throw new ImprintException(ErrorType.FIELD_NOT_FOUND, "Field " + fieldId + " not found"); - if (value.getTypeCode() == com.imprint.types.TypeCode.NULL) + + if (entry.getTypeCode() == com.imprint.types.TypeCode.NULL) throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is NULL, cannot retrieve as " + typeName); - return value; - } + + if (entry.getTypeCode() != expectedTypeCode) + throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is of type " + entry.getTypeCode() + ", expected " + typeName); + + var fieldBuffer = getFieldBuffer(fieldId); + if (fieldBuffer == null) + throw new ImprintException(ErrorType.FIELD_NOT_FOUND, "Field " + fieldId + " buffer not found"); - private T getTypedValueOrThrow(int fieldId, com.imprint.types.TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) - throws ImprintException { - var value = getValidatedValue(fieldId, expectedTypeName); - if (value.getTypeCode() == expectedTypeCode && expectedValueClass.isInstance(value)) - return expectedValueClass.cast(value); - throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected " + expectedTypeName); + return deserializePrimitive(entry.getTypeCode(), fieldBuffer); } /** @@ -516,36 +517,6 @@ public Directory next() { } } - static void writeDirectoryToBuffer(short[] sortedKeys, Object[] sortedValues, int[] offsets, int fieldCount, ByteBuffer buffer) { - // Optimize VarInt encoding for common case (< 128 fields = single byte) - if (fieldCount < 128) { - buffer.put((byte) fieldCount); - } else { - VarInt.encode(fieldCount, buffer); - } - - // Early return for empty directory - if (fieldCount == 0) { - return; - } - - // Tight loop optimization: minimize method calls and casts - for (int i = 0; i < fieldCount; i++) { - var entry = (ImprintRecordBuilder.ValueWithType) sortedValues[i]; - - // Get current position once, then batch write - int pos = buffer.position(); - - // Write all 7 bytes for this entry in sequence - buffer.putShort(pos, sortedKeys[i]); // bytes 0-1: field ID - buffer.put(pos + 2, entry.typeCode); // byte 2: type code - buffer.putInt(pos + 3, offsets[i]); // bytes 3-6: offset - - // Advance buffer position by 7 bytes - buffer.position(pos + 7); - } - } - /** * Parse a header from a ByteBuffer without advancing the buffer position. * Utility method shared between {@link ImprintRecord} and {@link ImprintOperations}. @@ -629,7 +600,7 @@ private static Header parseHeader(ByteBuffer buffer) throws ImprintException { return new Header(flags, new SchemaId(fieldSpaceId, schemaHash), payloadSize); } - private Value deserializeValue(com.imprint.types.TypeCode typeCode, ByteBuffer buffer) throws ImprintException { + private Object deserializePrimitive(com.imprint.types.TypeCode typeCode, ByteBuffer buffer) throws ImprintException { var valueBuffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); switch (typeCode) { case NULL: @@ -640,14 +611,81 @@ private Value deserializeValue(com.imprint.types.TypeCode typeCode, ByteBuffer b case FLOAT64: case BYTES: case STRING: + return ImprintDeserializers.deserializePrimitive(valueBuffer, typeCode); case ARRAY: + return deserializePrimitiveArray(valueBuffer); case MAP: - return typeCode.getHandler().deserialize(valueBuffer); + return deserializePrimitiveMap(valueBuffer); case ROW: - var nestedRecord = deserialize(valueBuffer); - return Value.fromRow(nestedRecord); + return deserialize(valueBuffer); default: throw new ImprintException(ErrorType.INVALID_TYPE_CODE, "Unknown type code: " + typeCode); } } + + private java.util.List deserializePrimitiveArray(ByteBuffer buffer) throws ImprintException { + VarInt.DecodeResult lengthResult = VarInt.decode(buffer); + int length = lengthResult.getValue(); + + if (length == 0) { + return java.util.Collections.emptyList(); + } + + if (buffer.remaining() < 1) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for ARRAY element type code."); + } + var elementType = TypeCode.fromByte(buffer.get()); + var elements = new ArrayList<>(length); + + for (int i = 0; i < length; i++) { + Object element; + if (elementType == TypeCode.ARRAY) { + element = deserializePrimitiveArray(buffer); + } else if (elementType == TypeCode.MAP) { + element = deserializePrimitiveMap(buffer); + } else if (elementType == TypeCode.ROW) { + element = deserialize(buffer); + } else { + element = ImprintDeserializers.deserializePrimitive(buffer, elementType); + } + elements.add(element); + } + + return elements; + } + + private java.util.Map deserializePrimitiveMap(ByteBuffer buffer) throws ImprintException { + VarInt.DecodeResult lengthResult = VarInt.decode(buffer); + int length = lengthResult.getValue(); + + if (length == 0) { + return java.util.Collections.emptyMap(); + } + + if (buffer.remaining() < 2) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for MAP key/value type codes."); + } + var keyType = TypeCode.fromByte(buffer.get()); + var valueType = TypeCode.fromByte(buffer.get()); + var map = new java.util.HashMap<>(length); + + for (int i = 0; i < length; i++) { + var keyPrimitive = ImprintDeserializers.deserializePrimitive(buffer, keyType); + + Object valuePrimitive; + if (valueType == TypeCode.ARRAY) { + valuePrimitive = deserializePrimitiveArray(buffer); + } else if (valueType == TypeCode.MAP) { + valuePrimitive = deserializePrimitiveMap(buffer); + } else if (valueType == TypeCode.ROW) { + valuePrimitive = deserialize(buffer); + } else { + valuePrimitive = ImprintDeserializers.deserializePrimitive(buffer, valueType); + } + + map.put(keyPrimitive, valuePrimitive); + } + + return map; + } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index dc05984..17c337d 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -3,10 +3,9 @@ import com.imprint.Constants; import com.imprint.error.ErrorType; import com.imprint.error.ImprintException; +import com.imprint.types.ImprintSerializers; import com.imprint.types.MapKey; import com.imprint.types.TypeCode; -import com.imprint.types.Value; -import com.imprint.util.VarInt; import lombok.SneakyThrows; import java.nio.BufferOverflowException; @@ -38,92 +37,89 @@ @SuppressWarnings("unused") public final class ImprintRecordBuilder { private final SchemaId schemaId; - private final ImprintFieldObjectMap fields = new ImprintFieldObjectMap<>(); + private final ImprintFieldObjectMap fields; private int estimatedPayloadSize = 0; - // Minimal wrapper to avoid getTypeCode() virtual dispatch - static final class ValueWithType { - final Value value; - final byte typeCode; - - ValueWithType(Value value, byte typeCode) { - this.value = value; - this.typeCode = typeCode; - } + // Direct primitive storage to avoid Value object creation + @lombok.Value + static class FieldValue { + byte typeCode; + Object value; + + // Fast factory methods for primitives + static FieldValue ofInt32(int value) { return new FieldValue(TypeCode.INT32.getCode(), value); } + static FieldValue ofInt64(long value) { return new FieldValue(TypeCode.INT64.getCode(), value); } + static FieldValue ofFloat32(float value) { return new FieldValue(TypeCode.FLOAT32.getCode(), value); } + static FieldValue ofFloat64(double value) { return new FieldValue(TypeCode.FLOAT64.getCode(), value); } + static FieldValue ofBool(boolean value) { return new FieldValue(TypeCode.BOOL.getCode(), value); } + static FieldValue ofString(String value) { return new FieldValue(TypeCode.STRING.getCode(), value); } + static FieldValue ofBytes(byte[] value) { return new FieldValue(TypeCode.BYTES.getCode(), value); } + static FieldValue ofArray(List value) { return new FieldValue(TypeCode.ARRAY.getCode(), value); } + static FieldValue ofMap(Map value) { return new FieldValue(TypeCode.MAP.getCode(), value); } + static FieldValue ofNull() { return new FieldValue(TypeCode.NULL.getCode(), null); } } ImprintRecordBuilder(SchemaId schemaId) { + this(schemaId, 16); // Default capacity for typical usage (8-16 fields) + } + + ImprintRecordBuilder(SchemaId schemaId, int expectedFieldCount) { this.schemaId = Objects.requireNonNull(schemaId, "SchemaId cannot be null"); + this.fields = new ImprintFieldObjectMap<>(expectedFieldCount); } - // Primitive types with automatic Value wrapping + public ImprintRecordBuilder field(int id, boolean value) { - return addField(id, Value.fromBoolean(value), TypeCode.BOOL); + return addField(id, FieldValue.ofBool(value)); } public ImprintRecordBuilder field(int id, int value) { - return addField(id, Value.fromInt32(value), TypeCode.INT32); + return addField(id, FieldValue.ofInt32(value)); } public ImprintRecordBuilder field(int id, long value) { - return addField(id, Value.fromInt64(value), TypeCode.INT64); + return addField(id, FieldValue.ofInt64(value)); } public ImprintRecordBuilder field(int id, float value) { - return addField(id, Value.fromFloat32(value), TypeCode.FLOAT32); + return addField(id, FieldValue.ofFloat32(value)); } public ImprintRecordBuilder field(int id, double value) { - return addField(id, Value.fromFloat64(value), TypeCode.FLOAT64); + return addField(id, FieldValue.ofFloat64(value)); } public ImprintRecordBuilder field(int id, String value) { - return addField(id, Value.fromString(value), TypeCode.STRING); + return addField(id, FieldValue.ofString(value)); } public ImprintRecordBuilder field(int id, byte[] value) { - return addField(id, Value.fromBytes(value), TypeCode.BYTES); + return addField(id, FieldValue.ofBytes(value)); } - // Collections with automatic conversion + // Collections - store as raw collections, convert during serialization public ImprintRecordBuilder field(int id, List values) { - var convertedValues = new ArrayList(values.size()); - for (var item : values) { - convertedValues.add(convertToValue(item)); - } - return addField(id, Value.fromArray(convertedValues), TypeCode.ARRAY); + return addField(id, FieldValue.ofArray(values)); } public ImprintRecordBuilder field(int id, Map map) { - var convertedMap = new HashMap(map.size()); - for (var entry : map.entrySet()) { - var key = convertToMapKey(entry.getKey()); - var value = convertToValue(entry.getValue()); - convertedMap.put(key, value); - } - return addField(id, Value.fromMap(convertedMap), TypeCode.MAP); + return addField(id, FieldValue.ofMap(map)); } // Nested records public ImprintRecordBuilder field(int id, ImprintRecord nestedRecord) { - return addField(id, Value.fromRow(nestedRecord), TypeCode.ROW); + return addField(id, new FieldValue(TypeCode.ROW.getCode(), nestedRecord)); } // Explicit null field public ImprintRecordBuilder nullField(int id) { - return addField(id, Value.nullValue(), TypeCode.NULL); - } - - // Direct Value API (escape hatch for advanced usage) - public ImprintRecordBuilder field(int id, Value value) { - return addField(id, value, value.getTypeCode()); // Only virtual call when type is unknown + return addField(id, FieldValue.ofNull()); } // Conditional field addition public ImprintRecordBuilder fieldIf(boolean condition, int id, Object value) { if (condition) { - var convertedValue = convertToValue(value); - return addField(id, convertedValue, convertedValue.getTypeCode()); + return addField(id, convertToFieldValue(value)); } return this; } @@ -135,8 +131,7 @@ public ImprintRecordBuilder fieldIfNotNull(int id, Object value) { // Bulk operations public ImprintRecordBuilder fields(Map fieldsMap) { for (var entry : fieldsMap.entrySet()) { - var convertedValue = convertToValue(entry.getValue()); - addField(entry.getKey(), convertedValue, convertedValue.getTypeCode()); + addField(entry.getKey(), convertToFieldValue(entry.getValue())); } return this; } @@ -207,26 +202,22 @@ public ByteBuffer buildToBuffer() throws ImprintException { * If a field with the given ID already exists, it will be replaced. * * @param id the field ID - * @param value the field value (cannot be null - use nullField() for explicit nulls) - * @param typeCode the known type code (avoids virtual call) + * @param fieldValue the field value with type code * @return this builder for method chaining */ @SneakyThrows - private ImprintRecordBuilder addField(int id, Value value, TypeCode typeCode) { - Objects.requireNonNull(value, "Value cannot be null - use nullField() for explicit null values"); + private ImprintRecordBuilder addField(int id, FieldValue fieldValue) { + Objects.requireNonNull(fieldValue, "FieldValue cannot be null"); - // Calculate size for tracking (but don't store it) - int newSize = fastEstimateFieldSize(value, typeCode); - - // Create wrapper to avoid virtual dispatch later - var newEntry = new ValueWithType(value, typeCode.getCode()); + // Calculate size for tracking using fast heuristics + int newSize = estimateFieldSize(fieldValue); // Efficient put with old value return - single hash operation - var oldEntry = fields.putAndReturnOld(id, newEntry); + var oldEntry = fields.putAndReturnOld(id, fieldValue); if (oldEntry != null) { // Field replacement - subtract old size, add new size - int oldSize = fastEstimateFieldSize(oldEntry.value, TypeCode.fromByte(oldEntry.typeCode)); + int oldSize = estimateFieldSize(oldEntry); estimatedPayloadSize += newSize - oldSize; } else { // New field - just add new size @@ -236,59 +227,41 @@ private ImprintRecordBuilder addField(int id, Value value, TypeCode typeCode) { return this; } - private Value convertToValue(Object obj) { + private FieldValue convertToFieldValue(Object obj) { if (obj == null) { - return Value.nullValue(); - } - - if (obj instanceof Value) { - return (Value) obj; + return FieldValue.ofNull(); } - // Auto-boxing conversion + // Direct primitive conversion - no Value object creation if (obj instanceof Boolean) { - return Value.fromBoolean((Boolean) obj); + return FieldValue.ofBool((Boolean) obj); } if (obj instanceof Integer) { - return Value.fromInt32((Integer) obj); + return FieldValue.ofInt32((Integer) obj); } if (obj instanceof Long) { - return Value.fromInt64((Long) obj); + return FieldValue.ofInt64((Long) obj); } if (obj instanceof Float) { - return Value.fromFloat32((Float) obj); + return FieldValue.ofFloat32((Float) obj); } if (obj instanceof Double) { - return Value.fromFloat64((Double) obj); + return FieldValue.ofFloat64((Double) obj); } if (obj instanceof String) { - return Value.fromString((String) obj); + return FieldValue.ofString((String) obj); } if (obj instanceof byte[]) { - return Value.fromBytes((byte[]) obj); + return FieldValue.ofBytes((byte[]) obj); } if (obj instanceof List) { - @SuppressWarnings("unchecked") - List list = (List) obj; - var convertedValues = new ArrayList(list.size()); - for (var item : list) { - convertedValues.add(convertToValue(item)); - } - return Value.fromArray(convertedValues); + return FieldValue.ofArray((List) obj); } if (obj instanceof Map) { - @SuppressWarnings("unchecked") - Map map = (Map) obj; - var convertedMap = new HashMap(map.size()); - for (var entry : map.entrySet()) { - var key = convertToMapKey(entry.getKey()); - var value = convertToValue(entry.getValue()); - convertedMap.put(key, value); - } - return Value.fromMap(convertedMap); + return FieldValue.ofMap((Map) obj); } if (obj instanceof ImprintRecord) { - return Value.fromRow((ImprintRecord) obj); + return new FieldValue(TypeCode.ROW.getCode(), obj); } throw new IllegalArgumentException("Unsupported type for auto-conversion: " + obj.getClass().getName()); @@ -312,47 +285,16 @@ private MapKey convertToMapKey(Object obj) { } /** - * Fast conservative field size estimation optimized for performance. - * Uses minimal operations while handling both normal and large data correctly. + * Fast field size estimation using heuristics for performance. */ - private int fastEstimateFieldSize(Value value, TypeCode typeCode) { - switch (typeCode) { - case NULL: return 0; - case BOOL: return 1; - case INT32: - case FLOAT32: - return 4; - case INT64: - case FLOAT64: - return 8; - case STRING: - // Smart estimation: check if it's a large string with minimal overhead - if (value instanceof Value.StringValue) { - int len = ((Value.StringValue) value).getValue().length(); - return len > 1000 ? 5 + len * 3 : 256; // UTF-8 worst case for large strings - } else { - int remaining = ((Value.StringBufferValue) value).getBuffer().remaining(); - return remaining > 1000 ? 5 + remaining : 256; - } - case BYTES: - // Smart estimation: check if it's large bytes with minimal overhead - if (value instanceof Value.BytesValue) { - int len = ((Value.BytesValue) value).getValue().length; - return len > 1000 ? 5 + len : 256; - } else { - int remaining = ((Value.BytesBufferValue) value).getBuffer().remaining(); - return remaining > 1000 ? 5 + remaining : 256; - } - case ARRAY: - return 512; // Conservative: most arrays are < 512 bytes - case MAP: - return 512; // Conservative: most maps are < 512 bytes - case ROW: - return 1024; // Conservative: most nested records are < 1KB - - default: - return 64; // Fallback + private int estimateFieldSize(FieldValue fieldValue) { + TypeCode typeCode; + try { + typeCode = TypeCode.fromByte(fieldValue.typeCode); + } catch (ImprintException e) { + throw new RuntimeException("Invalid type code in FieldValue: " + fieldValue.typeCode, e); } + return ImprintSerializers.estimateSize(typeCode, fieldValue.value); } /** @@ -392,58 +334,91 @@ private PayloadSerializationResult serializePayload(Object[] sortedFields, int f private PayloadSerializationResult doSerializePayload(Object[] sortedFields, int fieldCount, ByteBuffer payloadBuffer) throws ImprintException { int[] offsets = new int[fieldCount]; for (int i = 0; i < fieldCount; i++) { - var entry = (ValueWithType) sortedFields[i]; + var fieldValue = (FieldValue) sortedFields[i]; offsets[i] = payloadBuffer.position(); - serializeValue(entry.value, payloadBuffer); + serializeFieldValue(fieldValue, payloadBuffer); } payloadBuffer.flip(); var payloadView = payloadBuffer.slice().asReadOnlyBuffer(); return new PayloadSerializationResult(offsets, payloadView); } - private void serializeValue(Value value, ByteBuffer buffer) throws ImprintException { - var typeCode = value.getTypeCode(); + private void serializeFieldValue(FieldValue fieldValue, ByteBuffer buffer) throws ImprintException { + var typeCode = TypeCode.fromByte(fieldValue.typeCode); + var value = fieldValue.value; switch (typeCode) { case NULL: - // NULL values have no payload + ImprintSerializers.serializeNull(buffer); break; case BOOL: - Serializers.serializeBool((Value.BoolValue) value, buffer); + ImprintSerializers.serializeBool((Boolean) value, buffer); break; case INT32: - Serializers.serializeInt32((Value.Int32Value) value, buffer); + ImprintSerializers.serializeInt32((Integer) value, buffer); break; case INT64: - Serializers.serializeInt64((Value.Int64Value) value, buffer); + ImprintSerializers.serializeInt64((Long) value, buffer); break; case FLOAT32: - Serializers.serializeFloat32((Value.Float32Value) value, buffer); + ImprintSerializers.serializeFloat32((Float) value, buffer); break; case FLOAT64: - Serializers.serializeFloat64((Value.Float64Value) value, buffer); + ImprintSerializers.serializeFloat64((Double) value, buffer); break; case STRING: - Serializers.serializeString(value, buffer); + ImprintSerializers.serializeString((String) value, buffer); break; case BYTES: - Serializers.serializeBytes(value, buffer); + ImprintSerializers.serializeBytes((byte[]) value, buffer); break; case ARRAY: - Serializers.serializeArray((Value.ArrayValue) value, buffer); + serializeArray((List) value, buffer); break; case MAP: - Serializers.serializeMap((Value.MapValue) value, buffer); + serializeMap((Map) value, buffer); break; case ROW: - // Keep existing nested record serialization - Value.RowValue rowValue = (Value.RowValue) value; - var serializedRow = rowValue.getValue().serializeToBuffer(); + // Nested record serialization + var nestedRecord = (ImprintRecord) value; + var serializedRow = nestedRecord.serializeToBuffer(); buffer.put(serializedRow); break; default: throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + typeCode); } } + + private void serializeArray(List list, ByteBuffer buffer) throws ImprintException { + ImprintSerializers.serializeArray(list, buffer, + this::getTypeCodeForObject, + this::serializeObjectDirect); + } + + private void serializeMap(Map map, ByteBuffer buffer) throws ImprintException { + ImprintSerializers.serializeMap(map, buffer, + this::convertToMapKey, + this::getTypeCodeForObject, + this::serializeObjectDirect); + } + + // Helper methods for static serializers + private TypeCode getTypeCodeForObject(Object obj) { + var fieldValue = convertToFieldValue(obj); + try { + return TypeCode.fromByte(fieldValue.typeCode); + } catch (ImprintException e) { + throw new RuntimeException("Invalid type code", e); + } + } + + private void serializeObjectDirect(Object obj, ByteBuffer buffer) { + try { + var fieldValue = convertToFieldValue(obj); + serializeFieldValue(fieldValue, buffer); + } catch (ImprintException e) { + throw new RuntimeException("Serialization failed", e); + } + } /** * Get fields sorted by ID from the map. @@ -472,8 +447,8 @@ private static ByteBuffer serializeToBuffer(SchemaId schemaId, short[] sortedKey finalBuffer.putInt(header.getSchemaId().getSchemaHash()); finalBuffer.putInt(header.getPayloadSize()); - // Write directory directly to final buffer - ImprintRecord.writeDirectoryToBuffer(sortedKeys, sortedValues, offsets, fieldCount, finalBuffer); + // Write directory with FieldValue type codes + writeDirectoryToBuffer(sortedKeys, sortedValues, offsets, fieldCount, finalBuffer); // Write payload finalBuffer.put(payload); @@ -481,181 +456,38 @@ private static ByteBuffer serializeToBuffer(SchemaId schemaId, short[] sortedKey finalBuffer.flip(); return finalBuffer.asReadOnlyBuffer(); } - + /** - * Direct serializers that avoid virtual dispatch overhead. + * Write directory entries directly to buffer for FieldValue objects. */ - static class Serializers { - static void serializeBool(Value.BoolValue value, ByteBuffer buffer) { - buffer.put((byte) (value.getValue() ? 1 : 0)); - } - - static void serializeInt32(Value.Int32Value value, ByteBuffer buffer) { - buffer.putInt(value.getValue()); - } - - static void serializeInt64(Value.Int64Value value, ByteBuffer buffer) { - buffer.putLong(value.getValue()); - } - - static void serializeFloat32(Value.Float32Value value, ByteBuffer buffer) { - buffer.putFloat(value.getValue()); - } - - static void serializeFloat64(Value.Float64Value value, ByteBuffer buffer) { - buffer.putDouble(value.getValue()); - } - - static void serializeString(Value value, ByteBuffer buffer) { - if (value instanceof Value.StringValue) { - var stringValue = (Value.StringValue) value; - var utf8Bytes = stringValue.getUtf8Bytes(); // Already cached! - VarInt.encode(utf8Bytes.length, buffer); - buffer.put(utf8Bytes); - } else { - var bufferValue = (Value.StringBufferValue) value; - var stringBuffer = bufferValue.getBuffer(); - VarInt.encode(stringBuffer.remaining(), buffer); - buffer.put(stringBuffer); - } - } - - static void serializeBytes(Value value, ByteBuffer buffer) { - if (value instanceof Value.BytesBufferValue) { - var bufferValue = (Value.BytesBufferValue) value; - var bytesBuffer = bufferValue.getBuffer(); - VarInt.encode(bytesBuffer.remaining(), buffer); - buffer.put(bytesBuffer); - } else { - var bytesValue = (Value.BytesValue) value; - byte[] bytes = bytesValue.getValue(); - VarInt.encode(bytes.length, buffer); - buffer.put(bytes); - } - } - - static void serializeArray(Value.ArrayValue value, ByteBuffer buffer) throws ImprintException { - var elements = value.getValue(); - VarInt.encode(elements.size(), buffer); - - if (elements.isEmpty()) return; - - var elementType = elements.get(0).getTypeCode(); - buffer.put(elementType.getCode()); - - for (var element : elements) { - if (element.getTypeCode() != elementType) { - throw new ImprintException(ErrorType.SCHEMA_ERROR, - "Array elements must have same type code: " + - element.getTypeCode() + " != " + elementType); - } - // Recursive call to serialize each element - serializeValueByType(element, elementType, buffer); - } - } - - static void serializeMap(Value.MapValue value, ByteBuffer buffer) throws ImprintException { - var map = value.getValue(); - VarInt.encode(map.size(), buffer); - - if (map.isEmpty()) return; - - var iterator = map.entrySet().iterator(); - var first = iterator.next(); - var keyType = first.getKey().getTypeCode(); - var valueType = first.getValue().getTypeCode(); - - buffer.put(keyType.getCode()); - buffer.put(valueType.getCode()); - - serializeMapKey(first.getKey(), buffer); - serializeValueByType(first.getValue(), valueType, buffer); - - while (iterator.hasNext()) { - var entry = iterator.next(); - if (entry.getKey().getTypeCode() != keyType) { - throw new ImprintException(ErrorType.SCHEMA_ERROR, - "Map keys must have same type code: " + - entry.getKey().getTypeCode() + " != " + keyType); - } - if (entry.getValue().getTypeCode() != valueType) { - throw new ImprintException(ErrorType.SCHEMA_ERROR, - "Map values must have same type code: " + - entry.getValue().getTypeCode() + " != " + valueType); - } - - serializeMapKey(entry.getKey(), buffer); - serializeValueByType(entry.getValue(), valueType, buffer); - } + private static void writeDirectoryToBuffer(short[] sortedKeys, Object[] sortedValues, int[] offsets, int fieldCount, ByteBuffer buffer) { + // Write field count at the beginning of directory + // Optimize VarInt encoding for common case (< 128 fields = single byte) + if (fieldCount < 128) { + buffer.put((byte) fieldCount); + } else { + com.imprint.util.VarInt.encode(fieldCount, buffer); } - // Helper method to avoid infinite recursion - private static void serializeValueByType(Value value, TypeCode typeCode, ByteBuffer buffer) throws ImprintException { - switch (typeCode) { - case NULL: - break; - case BOOL: - serializeBool((Value.BoolValue) value, buffer); - break; - case INT32: - serializeInt32((Value.Int32Value) value, buffer); - break; - case INT64: - serializeInt64((Value.Int64Value) value, buffer); - break; - case FLOAT32: - serializeFloat32((Value.Float32Value) value, buffer); - break; - case FLOAT64: - serializeFloat64((Value.Float64Value) value, buffer); - break; - case STRING: - serializeString(value, buffer); - break; - case BYTES: - serializeBytes(value, buffer); - break; - case ARRAY: - serializeArray((Value.ArrayValue) value, buffer); - break; - case MAP: - serializeMap((Value.MapValue) value, buffer); - break; - default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + typeCode); - } + // Early return for empty directory + if (fieldCount == 0) { + return; } - private static void serializeMapKey(MapKey key, ByteBuffer buffer) throws ImprintException { - switch (key.getTypeCode()) { - case INT32: - MapKey.Int32Key int32Key = (MapKey.Int32Key) key; - buffer.putInt(int32Key.getValue()); - break; - - case INT64: - MapKey.Int64Key int64Key = (MapKey.Int64Key) key; - buffer.putLong(int64Key.getValue()); - break; - - case BYTES: - MapKey.BytesKey bytesKey = (MapKey.BytesKey) key; - byte[] bytes = bytesKey.getValue(); - VarInt.encode(bytes.length, buffer); - buffer.put(bytes); - break; - - case STRING: - MapKey.StringKey stringKey = (MapKey.StringKey) key; - byte[] stringBytes = stringKey.getValue().getBytes(java.nio.charset.StandardCharsets.UTF_8); - VarInt.encode(stringBytes.length, buffer); - buffer.put(stringBytes); - break; - - default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, - "Invalid map key type: " + key.getTypeCode()); - } + for (int i = 0; i < fieldCount; i++) { + var fieldValue = (FieldValue) sortedValues[i]; + + // Get current position once, then batch write + int pos = buffer.position(); + + // Write all 7 bytes for this entry in sequence + buffer.putShort(pos, sortedKeys[i]); // bytes 0-1: field ID + buffer.put(pos + 2, fieldValue.typeCode); // byte 2: type code + buffer.putInt(pos + 3, offsets[i]); // bytes 3-6: offset + + // Advance buffer position by 7 bytes + buffer.position(pos + 7); } } + } diff --git a/src/main/java/com/imprint/ops/ImprintOperations.java b/src/main/java/com/imprint/ops/ImprintOperations.java index 52ec5a0..54e594a 100644 --- a/src/main/java/com/imprint/ops/ImprintOperations.java +++ b/src/main/java/com/imprint/ops/ImprintOperations.java @@ -76,8 +76,8 @@ private static ByteBuffer mergeRawSections(Header firstHeader, ImprintRecord.Buf int totalMergedPayloadSize = 0; int currentMergedOffset = 0; - RawDirectoryEntry firstEntry = firstDirIter.hasNext() ? firstDirIter.next() : null; - RawDirectoryEntry secondEntry = secondDirIter.hasNext() ? secondDirIter.next() : null; + var firstEntry = firstDirIter.hasNext() ? firstDirIter.next() : null; + var secondEntry = secondDirIter.hasNext() ? secondDirIter.next() : null; // Merge directories and collect payload chunks while (firstEntry != null || secondEntry != null) { @@ -90,9 +90,9 @@ private static ByteBuffer mergeRawSections(Header firstHeader, ImprintRecord.Buf sourcePayload = getFieldPayload(firstSections.payloadBuffer, firstEntry, firstDirIter); // Skip duplicate in second if present - if (secondEntry != null && firstEntry.fieldId == secondEntry.fieldId) { + if (secondEntry != null && firstEntry.fieldId == secondEntry.fieldId) secondEntry = secondDirIter.hasNext() ? secondDirIter.next() : null; - } + firstEntry = firstDirIter.hasNext() ? firstDirIter.next() : null; } else { // Take from second diff --git a/src/main/java/com/imprint/types/MapKey.java b/src/main/java/com/imprint/types/MapKey.java index c0e0747..640d26b 100644 --- a/src/main/java/com/imprint/types/MapKey.java +++ b/src/main/java/com/imprint/types/MapKey.java @@ -35,45 +35,45 @@ public static MapKey fromString(String value) { return new StringKey(value); } - public static MapKey fromValue(Value value) throws ImprintException { - switch (value.getTypeCode()) { + /** + * Create MapKey from primitive object and type code (optimized, no Value objects). + */ + public static MapKey fromPrimitive(TypeCode typeCode, Object primitiveValue) throws ImprintException { + switch (typeCode) { case INT32: - return fromInt32(((Value.Int32Value) value).getValue()); + return fromInt32((Integer) primitiveValue); case INT64: - return fromInt64(((Value.Int64Value) value).getValue()); + return fromInt64((Long) primitiveValue); case BYTES: - if (value instanceof Value.BytesBufferValue) { - return fromBytes(((Value.BytesBufferValue) value).getValue()); - } else { - return fromBytes(((Value.BytesValue) value).getValue()); - } + return fromBytes((byte[]) primitiveValue); case STRING: - if (value instanceof Value.StringBufferValue) { - return fromString(((Value.StringBufferValue) value).getValue()); - } else { - return fromString(((Value.StringValue) value).getValue()); - } + return fromString((String) primitiveValue); default: throw new ImprintException(ErrorType.TYPE_MISMATCH, - "Cannot convert " + value.getTypeCode() + " to MapKey"); + "Cannot convert " + typeCode + " to MapKey"); } } - public Value toValue() { + + /** + * Get the primitive value as Object (optimized, no Value objects). + */ + public Object getPrimitiveValue() { switch (getTypeCode()) { case INT32: - return Value.fromInt32(((Int32Key) this).getValue()); + return ((Int32Key) this).getValue(); case INT64: - return Value.fromInt64(((Int64Key) this).getValue()); + return ((Int64Key) this).getValue(); case BYTES: - return Value.fromBytes(((BytesKey) this).getValue()); + return ((BytesKey) this).getValue(); case STRING: - return Value.fromString(((StringKey) this).getValue()); + return ((StringKey) this).getValue(); default: throw new IllegalStateException("Unknown MapKey type: " + getTypeCode()); } } + @Getter @EqualsAndHashCode(callSuper = false) public static class Int32Key extends MapKey { diff --git a/src/main/java/com/imprint/types/TypeCode.java b/src/main/java/com/imprint/types/TypeCode.java index 3447f8b..a2e63e2 100644 --- a/src/main/java/com/imprint/types/TypeCode.java +++ b/src/main/java/com/imprint/types/TypeCode.java @@ -8,21 +8,20 @@ * Type codes for Imprint values. */ public enum TypeCode { - NULL(0x0, TypeHandler.NULL), - BOOL(0x1, TypeHandler.BOOL), - INT32(0x2, TypeHandler.INT32), - INT64(0x3, TypeHandler.INT64), - FLOAT32(0x4, TypeHandler.FLOAT32), - FLOAT64(0x5, TypeHandler.FLOAT64), - BYTES(0x6, TypeHandler.BYTES), - STRING(0x7, TypeHandler.STRING), - ARRAY(0x8, TypeHandler.ARRAY), - MAP(0x9, TypeHandler.MAP), - ROW(0xA, null); // TODO: implement (basically a placeholder for user-defined type) + NULL(0x0), + BOOL(0x1), + INT32(0x2), + INT64(0x3), + FLOAT32(0x4), + FLOAT64(0x5), + BYTES(0x6), + STRING(0x7), + ARRAY(0x8), + MAP(0x9), + ROW(0xA); // TODO: implement (basically a placeholder for user-defined type) @Getter private final byte code; - private final TypeHandler handler; private static final TypeCode[] LOOKUP = new TypeCode[11]; @@ -32,16 +31,8 @@ public enum TypeCode { } } - TypeCode(int code, TypeHandler handler) { + TypeCode(int code) { this.code = (byte) code; - this.handler = handler; - } - - public TypeHandler getHandler() { - if (handler == null) { - throw new UnsupportedOperationException("Handler not implemented for " + this); - } - return handler; } public static TypeCode fromByte(byte code) throws ImprintException { diff --git a/src/main/java/com/imprint/types/TypeHandler.java b/src/main/java/com/imprint/types/TypeHandler.java deleted file mode 100644 index dbc875f..0000000 --- a/src/main/java/com/imprint/types/TypeHandler.java +++ /dev/null @@ -1,442 +0,0 @@ -package com.imprint.types; - -import com.imprint.error.ErrorType; -import com.imprint.error.ImprintException; -import com.imprint.util.VarInt; - -import java.nio.ByteBuffer; -import java.util.*; - -/** - * Interface for handling type-specific serialization, deserialization, and size estimation. - * Note that primitives are basically boxed here which could impact performance slightly - * but having all the types in their own implementation helps keep things organized for now, especially - * for dealing with and testing more complex types in the future. - */ -public interface TypeHandler { - Value deserialize(ByteBuffer buffer) throws ImprintException; - void serialize(Value value, ByteBuffer buffer) throws ImprintException; - int estimateSize(Value value) throws ImprintException; - - // Static implementations for each type - TypeHandler NULL = new TypeHandler() { - @Override - public Value deserialize(ByteBuffer buffer) { - return Value.nullValue(); - } - - @Override - public void serialize(Value value, ByteBuffer buffer) { - // NULL values have no payload - } - - @Override - public int estimateSize(Value value) { - return 0; - } - }; - - TypeHandler BOOL = new TypeHandler() { - @Override - public Value deserialize(ByteBuffer buffer) throws ImprintException { - if (buffer.remaining() < 1) { - throw new ImprintException(com.imprint.error.ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for bool"); - } - byte boolByte = buffer.get(); - if (boolByte == 0) return Value.fromBoolean(false); - if (boolByte == 1) return Value.fromBoolean(true); - throw new ImprintException(com.imprint.error.ErrorType.SCHEMA_ERROR, "Invalid boolean value: " + boolByte); - } - - @Override - public void serialize(Value value, ByteBuffer buffer) { - var boolValue = (Value.BoolValue) value; - buffer.put((byte) (boolValue.getValue() ? 1 : 0)); - } - - @Override - public int estimateSize(Value value) { - return 1; - } - }; - - TypeHandler INT32 = new TypeHandler() { - @Override - public Value deserialize(ByteBuffer buffer) throws ImprintException { - if (buffer.remaining() < 4) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for int32"); - } - return Value.fromInt32(buffer.getInt()); - } - - @Override - public void serialize(Value value, ByteBuffer buffer) { - var int32Value = (Value.Int32Value) value; - buffer.putInt(int32Value.getValue()); - } - - @Override - public int estimateSize(Value value) { - return 4; - } - }; - - TypeHandler INT64 = new TypeHandler() { - @Override - public Value deserialize(ByteBuffer buffer) throws ImprintException { - if (buffer.remaining() < 8) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for int64"); - } - return Value.fromInt64(buffer.getLong()); - } - - @Override - public void serialize(Value value, ByteBuffer buffer) { - Value.Int64Value int64Value = (Value.Int64Value) value; - buffer.putLong(int64Value.getValue()); - } - - @Override - public int estimateSize(Value value) { - return 8; - } - }; - - TypeHandler FLOAT32 = new TypeHandler() { - @Override - public Value deserialize(ByteBuffer buffer) throws ImprintException { - if (buffer.remaining() < 4) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for float32"); - } - return Value.fromFloat32(buffer.getFloat()); - } - - @Override - public void serialize(Value value, ByteBuffer buffer) { - var float32Value = (Value.Float32Value) value; - buffer.putFloat(float32Value.getValue()); - } - - @Override - public int estimateSize(Value value) { - return 4; - } - }; - - TypeHandler FLOAT64 = new TypeHandler() { - @Override - public Value deserialize(ByteBuffer buffer) throws ImprintException { - if (buffer.remaining() < 8) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for float64"); - } - return Value.fromFloat64(buffer.getDouble()); - } - - @Override - public void serialize(Value value, ByteBuffer buffer) { - var float64Value = (Value.Float64Value) value; - buffer.putDouble(float64Value.getValue()); - } - - @Override - public int estimateSize(Value value) { - return 8; - } - }; - - TypeHandler BYTES = new TypeHandler() { - @Override - public Value deserialize(ByteBuffer buffer) throws ImprintException { - VarInt.DecodeResult lengthResult = VarInt.decode(buffer); - int length = lengthResult.getValue(); - if (buffer.remaining() < length) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for bytes value data after VarInt. Slice from readValueBytes is too short. Needed: " + length + ", available: " + buffer.remaining()); - } - var bytesView = buffer.slice(); - bytesView.limit(length); - buffer.position(buffer.position() + length); - return Value.fromBytesBuffer(bytesView.asReadOnlyBuffer()); - } - - @Override - public void serialize(Value value, ByteBuffer buffer) { - if (value instanceof Value.BytesBufferValue) { - Value.BytesBufferValue bufferValue = (Value.BytesBufferValue) value; - var bytesBuffer = bufferValue.getBuffer(); - VarInt.encode(bytesBuffer.remaining(), buffer); - buffer.put(bytesBuffer); - } else { - Value.BytesValue bytesValue = (Value.BytesValue) value; - byte[] bytes = bytesValue.getValue(); - VarInt.encode(bytes.length, buffer); - buffer.put(bytes); - } - } - - @Override - public int estimateSize(Value value) { - if (value instanceof Value.BytesBufferValue) { - Value.BytesBufferValue bufferValue = (Value.BytesBufferValue) value; - int length = bufferValue.getBuffer().remaining(); - return VarInt.encodedLength(length) + length; - } else { - byte[] bytes = ((Value.BytesValue) value).getValue(); - return VarInt.encodedLength(bytes.length) + bytes.length; - } - } - }; - - TypeHandler STRING = new TypeHandler() { - @Override - public Value deserialize(ByteBuffer buffer) throws ImprintException { - VarInt.DecodeResult strLengthResult = VarInt.decode(buffer); - int strLength = strLengthResult.getValue(); - if (buffer.remaining() < strLength) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for string value data after VarInt. Slice from readValueBytes is too short. Needed: " + strLength + ", available: " + buffer.remaining()); - } - var stringBytesView = buffer.slice(); - stringBytesView.limit(strLength); - buffer.position(buffer.position() + strLength); - try { - return Value.fromStringBuffer(stringBytesView); - } catch (Exception e) { - throw new ImprintException(ErrorType.INVALID_UTF8_STRING, "Invalid UTF-8 string or buffer issue: " + e.getMessage()); - } - } - - @Override - public void serialize(Value value, ByteBuffer buffer) { - if (value instanceof Value.StringBufferValue) { - var bufferValue = (Value.StringBufferValue) value; - var stringBuffer = bufferValue.getBuffer(); - VarInt.encode(stringBuffer.remaining(), buffer); - buffer.put(stringBuffer); - } else { - var stringValue = (Value.StringValue) value; - byte[] stringBytes = stringValue.getUtf8Bytes(); - VarInt.encode(stringBytes.length, buffer); - buffer.put(stringBytes); - } - } - - @Override - public int estimateSize(Value value) { - if (value instanceof Value.StringBufferValue) { - Value.StringBufferValue bufferValue = (Value.StringBufferValue) value; - int length = bufferValue.getBuffer().remaining(); - return VarInt.encodedLength(length) + length; - } else { - Value.StringValue stringValue = (Value.StringValue) value; - int utf8Length = stringValue.getUtf8Length(); // Uses cached bytes - return VarInt.encodedLength(utf8Length) + utf8Length; - } - } - }; - - TypeHandler ARRAY = new TypeHandler() { - @Override - public Value deserialize(ByteBuffer buffer) throws ImprintException { - VarInt.DecodeResult lengthResult = VarInt.decode(buffer); - int length = lengthResult.getValue(); - - if (length == 0) { - return Value.fromArray(Collections.emptyList()); - } - - if (buffer.remaining() < 1) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for ARRAY element type code."); - } - var elementType = TypeCode.fromByte(buffer.get()); - var elements = new ArrayList(length); - var elementHandler = elementType.getHandler(); - - //Let each element handler consume what it needs from the buffer - for (int i = 0; i < length; i++) { - var element = elementHandler.deserialize(buffer); //Handler advances buffer position - elements.add(element); - } - - return Value.fromArray(elements); - } - - @Override - public void serialize(Value value, ByteBuffer buffer) throws ImprintException { - var arrayValue = (Value.ArrayValue) value; - var elements = arrayValue.getValue(); - VarInt.encode(elements.size(), buffer); - - if (elements.isEmpty()) return; - - var elementType = elements.get(0).getTypeCode(); - buffer.put(elementType.getCode()); - var elementHandler = elementType.getHandler(); - for (var element : elements) { - if (element.getTypeCode() != elementType) { - throw new ImprintException(ErrorType.SCHEMA_ERROR, - "Array elements must have same type code: " + - element.getTypeCode() + " != " + elementType); - } - elementHandler.serialize(element, buffer); - } - } - - @Override - public int estimateSize(Value value) throws ImprintException { - var arrayValue = (Value.ArrayValue) value; - var elements = arrayValue.getValue(); - int sizeOfLength = VarInt.encodedLength(elements.size()); - if (elements.isEmpty()) { - return sizeOfLength; - } - int sizeOfElementTypeCode = 1; - int arraySize = sizeOfLength + sizeOfElementTypeCode; - var elementHandler = elements.get(0).getTypeCode().getHandler(); - for (var element : elements) { - arraySize += elementHandler.estimateSize(element); - } - return arraySize; - } - }; - - TypeHandler MAP = new TypeHandler() { - @Override - public Value deserialize(ByteBuffer buffer) throws ImprintException { - VarInt.DecodeResult lengthResult = VarInt.decode(buffer); - int length = lengthResult.getValue(); - - if (length == 0) { - return Value.fromMap(Collections.emptyMap()); - } - - if (buffer.remaining() < 2) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for MAP key/value type codes."); - } - var keyType = TypeCode.fromByte(buffer.get()); - var valueType = TypeCode.fromByte(buffer.get()); - var map = new HashMap(length); - - var keyHandler = keyType.getHandler(); - var valueHandler = valueType.getHandler(); - - //Let handlers consume directly from buffer - for (int i = 0; i < length; i++) { - var keyValue = keyHandler.deserialize(buffer);// Advances buffer - var key = MapKey.fromValue(keyValue); - - var mapInternalValue = valueHandler.deserialize(buffer);//Advances buffer - - map.put(key, mapInternalValue); - } - - return Value.fromMap(map); - } - - @Override - public void serialize(Value value, ByteBuffer buffer) throws ImprintException { - var mapValue = (Value.MapValue) value; - var map = mapValue.getValue(); - VarInt.encode(map.size(), buffer); - - if (map.isEmpty()) { - return; - } - - var iterator = map.entrySet().iterator(); - var first = iterator.next(); - var keyType = first.getKey().getTypeCode(); - var valueType = first.getValue().getTypeCode(); - - buffer.put(keyType.getCode()); - buffer.put(valueType.getCode()); - - serializeMapKey(first.getKey(), buffer); - first.getValue().getTypeCode().getHandler().serialize(first.getValue(), buffer); - - while (iterator.hasNext()) { - var entry = iterator.next(); - if (entry.getKey().getTypeCode() != keyType) { - throw new ImprintException(ErrorType.SCHEMA_ERROR, - "Map keys must have same type code: " + - entry.getKey().getTypeCode() + " != " + keyType); - } - if (entry.getValue().getTypeCode() != valueType) { - throw new ImprintException(ErrorType.SCHEMA_ERROR, - "Map values must have same type code: " + - entry.getValue().getTypeCode() + " != " + valueType); - } - - serializeMapKey(entry.getKey(), buffer); - entry.getValue().getTypeCode().getHandler().serialize(entry.getValue(), buffer); - } - } - - @Override - public int estimateSize(Value value) throws ImprintException { - var mapValue = (Value.MapValue) value; - var map = mapValue.getValue(); - int sizeOfLength = VarInt.encodedLength(map.size()); - if (map.isEmpty()) { - return sizeOfLength; - } - int sizeOfTypeCodes = 2; - int mapSize = sizeOfLength + sizeOfTypeCodes; - - for (var entry : map.entrySet()) { - mapSize += estimateMapKeySize(entry.getKey()); - mapSize += entry.getValue().getTypeCode().getHandler().estimateSize(entry.getValue()); - } - return mapSize; - } - - private void serializeMapKey(MapKey key, ByteBuffer buffer) throws ImprintException { - switch (key.getTypeCode()) { - case INT32: - MapKey.Int32Key int32Key = (MapKey.Int32Key) key; - buffer.putInt(int32Key.getValue()); - break; - - case INT64: - MapKey.Int64Key int64Key = (MapKey.Int64Key) key; - buffer.putLong(int64Key.getValue()); - break; - - case BYTES: - MapKey.BytesKey bytesKey = (MapKey.BytesKey) key; - byte[] bytes = bytesKey.getValue(); - VarInt.encode(bytes.length, buffer); - buffer.put(bytes); - break; - - case STRING: - MapKey.StringKey stringKey = (MapKey.StringKey) key; - byte[] stringBytes = stringKey.getValue().getBytes(java.nio.charset.StandardCharsets.UTF_8); - VarInt.encode(stringBytes.length, buffer); - buffer.put(stringBytes); - break; - - default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, - "Invalid map key type: " + key.getTypeCode()); - } - } - - private int estimateMapKeySize(MapKey key) throws ImprintException { - switch (key.getTypeCode()) { - case INT32: return 4; - case INT64: return 8; - case BYTES: - byte[] bytes = ((MapKey.BytesKey) key).getValue(); - return VarInt.encodedLength(bytes.length) + bytes.length; - - case STRING: - var str = ((MapKey.StringKey) key).getValue(); - int utf8Length = str.getBytes(java.nio.charset.StandardCharsets.UTF_8).length; - return VarInt.encodedLength(utf8Length) + utf8Length; - - default: - throw new ImprintException(ErrorType.SERIALIZATION_ERROR, - "Invalid map key type: " + key.getTypeCode()); - } - } - }; -} \ No newline at end of file diff --git a/src/main/java/com/imprint/types/Value.java b/src/main/java/com/imprint/types/Value.java deleted file mode 100644 index 070c497..0000000 --- a/src/main/java/com/imprint/types/Value.java +++ /dev/null @@ -1,468 +0,0 @@ -package com.imprint.types; - -import com.imprint.core.ImprintRecord; -import lombok.EqualsAndHashCode; -import lombok.Getter; - -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Objects; - -/** - * A value that can be stored in an Imprint record. - */ -public abstract class Value { - - public abstract TypeCode getTypeCode(); - public abstract boolean equals(Object obj); - public abstract int hashCode(); - public abstract String toString(); - - // Factory methods - public static Value nullValue() { - return NullValue.INSTANCE; - } - - public static Value fromBoolean(boolean value) { - return new BoolValue(value); - } - - public static Value fromInt32(int value) { - return new Int32Value(value); - } - - public static Value fromInt64(long value) { - return new Int64Value(value); - } - - public static Value fromFloat32(float value) { - return new Float32Value(value); - } - - public static Value fromFloat64(double value) { - return new Float64Value(value); - } - - public static Value fromBytes(byte[] value) { - return new BytesValue(value); - } - - public static Value fromBytesBuffer(ByteBuffer value) { - return new BytesBufferValue(value); - } - - public static Value fromString(String value) { - return new StringValue(value); - } - - public static Value fromStringBuffer(ByteBuffer value) { - return new StringBufferValue(value); - } - - - public static Value fromArray(List value) { - return new ArrayValue(value); - } - - public static Value fromMap(Map value) { - return new MapValue(value); - } - - public static Value fromRow(ImprintRecord value) { - return new RowValue(value); - } - - // Null Value - @EqualsAndHashCode(callSuper = false) - public static class NullValue extends Value { - public static final NullValue INSTANCE = new NullValue(); - - private NullValue() {} - - @Override - public TypeCode getTypeCode() { return TypeCode.NULL; } - - @Override - public String toString() { - return "null"; - } - } - - // Boolean Value - @Getter - @EqualsAndHashCode(callSuper = false) - public static class BoolValue extends Value { - private final boolean value; - - public BoolValue(boolean value) { - this.value = value; - } - - public boolean getValue() { return value; } - - @Override - public TypeCode getTypeCode() { return TypeCode.BOOL; } - - @Override - public String toString() { - return String.valueOf(value); - } - } - - // Int32 Value - @Getter - @EqualsAndHashCode(callSuper = false) - public static class Int32Value extends Value { - private final int value; - - public Int32Value(int value) { - this.value = value; - } - - @Override - public TypeCode getTypeCode() { return TypeCode.INT32; } - - @Override - public String toString() { - return String.valueOf(value); - } - } - - // Int64 Value - @Getter - @EqualsAndHashCode(callSuper = false) - public static class Int64Value extends Value { - private final long value; - - public Int64Value(long value) { - this.value = value; - } - - @Override - public TypeCode getTypeCode() { return TypeCode.INT64; } - - @Override - public String toString() { - return String.valueOf(value); - } - } - - // Float32 Value - @Getter - @EqualsAndHashCode(callSuper = false) - public static class Float32Value extends Value { - private final float value; - - public Float32Value(float value) { - this.value = value; - } - - @Override - public TypeCode getTypeCode() { return TypeCode.FLOAT32; } - - @Override - public String toString() { - return String.valueOf(value); - } - } - - // Float64 Value - - @Getter - @EqualsAndHashCode(callSuper = false) - public static class Float64Value extends Value { - private final double value; - - public Float64Value(double value) { - this.value = value; - } - - @Override - public TypeCode getTypeCode() { return TypeCode.FLOAT64; } - - @Override - public String toString() { - return String.valueOf(value); - } - } - - // Bytes Value (array-based) - @Getter - public static class BytesValue extends Value { - /** - * Returns internal array. MUST NOT be modified by caller. - */ - private final byte[] value; - - /** - * Takes ownership of the byte array. Caller must not modify after construction. - */ - public BytesValue(byte[] value) { - this.value = Objects.requireNonNull(value); - } - - @Override - public TypeCode getTypeCode() { return TypeCode.BYTES; } - - @Override - public boolean equals(Object obj) { - if (this == obj) return true; - if (obj == null) return false; - if (obj instanceof BytesValue) { - BytesValue that = (BytesValue) obj; - return Arrays.equals(value, that.value); - } - if (obj instanceof BytesBufferValue) { - BytesBufferValue that = (BytesBufferValue) obj; - return Arrays.equals(value, that.getValue()); - } - return false; - } - - @Override - public int hashCode() { - return Arrays.hashCode(value); - } - - @Override - public String toString() { - return "bytes[" + value.length + "]"; - } - } - - // Bytes Value (ByteBuffer-based, zero-copy) - public static class BytesBufferValue extends Value { - private final ByteBuffer value; - - public BytesBufferValue(ByteBuffer value) { - this.value = value.asReadOnlyBuffer(); - } - - public byte[] getValue() { - // Fallback to array when needed - byte[] array = new byte[value.remaining()]; - value.duplicate().get(array); - return array; - } - - public ByteBuffer getBuffer() { - return value.duplicate(); - } - - @Override - public TypeCode getTypeCode() { return TypeCode.BYTES; } - - @Override - public boolean equals(Object obj) { - if (this == obj) return true; - if (obj == null) return false; - if (obj instanceof BytesBufferValue) { - BytesBufferValue that = (BytesBufferValue) obj; - return value.equals(that.value); - } - if (obj instanceof BytesValue) { - BytesValue that = (BytesValue) obj; - return Arrays.equals(getValue(), that.getValue()); - } - return false; - } - - @Override - public int hashCode() { - return value.hashCode(); - } - - @Override - public String toString() { - return "bytes[" + value.remaining() + "]"; - } - } - - // String Value (String-based) - public static class StringValue extends Value { - @Getter - private final String value; - private byte[] utf8BytesCache; - - public StringValue(String value) { - this.value = Objects.requireNonNull(value, "String cannot be null"); - } - - public byte[] getUtf8Bytes() { - if (utf8BytesCache == null) { - utf8BytesCache = value.getBytes(StandardCharsets.UTF_8); - } - return utf8BytesCache; - } - - public int getUtf8Length() { - return getUtf8Bytes().length; - } - - @Override - public TypeCode getTypeCode() { return TypeCode.STRING; } - - @Override - public boolean equals(Object obj) { - if (this == obj) return true; - if (obj == null) return false; - if (obj instanceof StringValue) { - StringValue that = (StringValue) obj; - return value.equals(that.value); - } - if (obj instanceof StringBufferValue) { - StringBufferValue that = (StringBufferValue) obj; - return value.equals(that.getValue()); - } - return false; - } - - @Override - public int hashCode() { - return value.hashCode(); - } - - @Override - public String toString() { - return "\"" + value + "\""; - } - } - - // String Value (ByteBuffer-based) - public static class StringBufferValue extends Value { - private final ByteBuffer value; - private String cachedString; - - private static final int THREAD_LOCAL_BUFFER_SIZE = 1024; - private static final ThreadLocal DECODE_BUFFER_CACHE = - ThreadLocal.withInitial(() -> new byte[THREAD_LOCAL_BUFFER_SIZE]); - - public StringBufferValue(ByteBuffer value) { - this.value = value.asReadOnlyBuffer(); - } - - public String getValue() { - String result = cachedString; - if (result == null) { - result = decodeUtf8(); - cachedString = result; - } - return result; - } - - private String decodeUtf8() { - final byte[] array; - final int offset; - final int length = value.remaining(); - - if (value.hasArray()) { - array = value.array(); - offset = value.arrayOffset() + value.position(); - } else { - byte[] threadLocalBuffer = DECODE_BUFFER_CACHE.get(); - if (length <= threadLocalBuffer.length) { - array = threadLocalBuffer; - } else { - // Fallback: copy bytes from the ByteBuffer to a new heap array (if too large for cache) - array = new byte[length]; - } - value.duplicate().get(array, 0, length); - offset = 0; - } - return new String(array, offset, length, StandardCharsets.UTF_8); - } - - public ByteBuffer getBuffer() { - return value.duplicate(); - } - - @Override - public TypeCode getTypeCode() { return TypeCode.STRING; } - - @Override - public boolean equals(Object obj) { - if (this == obj) return true; - if (obj == null) return false; - if (obj instanceof StringBufferValue) { - StringBufferValue that = (StringBufferValue) obj; - return value.equals(that.value); - } - if (obj instanceof StringValue) { - StringValue that = (StringValue) obj; - return getValue().equals(that.getValue()); - } - return false; - } - - @Override - public int hashCode() { - return getValue().hashCode(); // Use string hash for consistency - } - - @Override - public String toString() { - return "\"" + getValue() + "\""; - } - } - - // Array Value - @Getter - @EqualsAndHashCode(callSuper = false) - public static class ArrayValue extends Value { - private final List value; - - public ArrayValue(List value) { - this.value = List.copyOf(Objects.requireNonNull(value, "Array cannot be null")); - } - - @Override - public TypeCode getTypeCode() { return TypeCode.ARRAY; } - - @Override - public String toString() { - return value.toString(); - } - } - - // Map Value - @Getter - @EqualsAndHashCode(callSuper = false) - public static class MapValue extends Value { - private final Map value; - - public MapValue(Map value) { - this.value = Map.copyOf(Objects.requireNonNull(value, "Map cannot be null")); - } - - @Override - public TypeCode getTypeCode() { return TypeCode.MAP; } - - @Override - public String toString() { - return value.toString(); - } - } - - // Row Value - @Getter - @EqualsAndHashCode(callSuper = false) - public static class RowValue extends Value { - private final ImprintRecord value; - - public RowValue(ImprintRecord value) { - this.value = Objects.requireNonNull(value, "Record cannot be null"); - } - - @Override - public TypeCode getTypeCode() { return TypeCode.ROW; } - - @Override - public String toString() { - return "Row{" + value + "}"; - } - } - -} \ No newline at end of file diff --git a/src/main/java/com/imprint/util/VarInt.java b/src/main/java/com/imprint/util/VarInt.java index 70c9095..29dd4d3 100644 --- a/src/main/java/com/imprint/util/VarInt.java +++ b/src/main/java/com/imprint/util/VarInt.java @@ -2,10 +2,7 @@ import com.imprint.error.ImprintException; import com.imprint.error.ErrorType; -import lombok.AllArgsConstructor; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.ToString; +import lombok.*; import lombok.experimental.UtilityClass; import java.nio.ByteBuffer; @@ -46,14 +43,12 @@ public final class VarInt { public static void encode(int value, ByteBuffer buffer) { // Convert to unsigned long for proper bit manipulation long val = Integer.toUnsignedLong(value); - // Encode at least one byte, then continue while value has more bits do { byte b = (byte) (val & SEGMENT_BITS); val >>>= 7; - if (val != 0) { + if (val != 0) b |= CONTINUATION_BIT; - } buffer.put(b); } while (val != 0); } @@ -80,18 +75,14 @@ public static DecodeResult decode(ByteBuffer buffer) throws ImprintException { // Check if adding these 7 bits would overflow long segment = b & SEGMENT_BITS; - if (shift >= 32 || (shift == 28 && segment > 0xF)) { + if (shift >= 32 || (shift == 28 && segment > 0xF)) throw new ImprintException(ErrorType.MALFORMED_VARINT, "VarInt overflow"); - } - // Add the bottom 7 bits to the result result |= segment << shift; // If the high bit is not set, this is the last byte - if ((b & CONTINUATION_BIT) == 0) { + if ((b & CONTINUATION_BIT) == 0) break; - } - shift += 7; } @@ -104,10 +95,8 @@ public static DecodeResult decode(ByteBuffer buffer) throws ImprintException { * @return the number of bytes needed */ public static int encodedLength(int value) { - if (value >= 0 && value < CACHE_SIZE) { + if (value >= 0 && value < CACHE_SIZE) return ENCODED_LENGTHS[value]; - } - long val = Integer.toUnsignedLong(value); int length = 1; while (val >= 0x80) { @@ -120,12 +109,9 @@ public static int encodedLength(int value) { /** * Result of a VarInt decode operation. */ - @Getter - @AllArgsConstructor - @EqualsAndHashCode - @ToString + @Value public static class DecodeResult { - private final int value; - private final int bytesRead; + int value; + int bytesRead; } } \ No newline at end of file diff --git a/src/test/java/com/imprint/IntegrationTest.java b/src/test/java/com/imprint/IntegrationTest.java index e066f01..cc70873 100644 --- a/src/test/java/com/imprint/IntegrationTest.java +++ b/src/test/java/com/imprint/IntegrationTest.java @@ -75,19 +75,19 @@ var record = ImprintRecord.builder(schemaId) ImprintRecord deserialized = ImprintRecord.deserialize(serialized); // Verify array - List deserializedArray = deserialized.getArray(1); + List deserializedArray = deserialized.getArray(1); assertNotNull(deserializedArray); assertEquals(3, deserializedArray.size()); - assertEquals(Value.fromInt32(1), deserializedArray.get(0)); - assertEquals(Value.fromInt32(2), deserializedArray.get(1)); - assertEquals(Value.fromInt32(3), deserializedArray.get(2)); + assertEquals(Integer.valueOf(1), deserializedArray.get(0)); + assertEquals(Integer.valueOf(2), deserializedArray.get(1)); + assertEquals(Integer.valueOf(3), deserializedArray.get(2)); // Verify map - Map deserializedMap = deserialized.getMap(2); + Map deserializedMap = deserialized.getMap(2); assertNotNull(deserializedMap); assertEquals(2, deserializedMap.size()); - assertEquals(Value.fromInt32(1), deserializedMap.get(MapKey.fromString("one"))); - assertEquals(Value.fromInt32(2), deserializedMap.get(MapKey.fromString("two"))); + assertEquals(Integer.valueOf(1), deserializedMap.get("one")); + assertEquals(Integer.valueOf(2), deserializedMap.get("two")); } @Test @@ -168,18 +168,18 @@ void testProjectComplexTypes() throws ImprintException { .field(100, "nested value") .build(); - // Create homogeneous array (all strings) - var testArray = Arrays.asList(Value.fromString("item1"), Value.fromString("item2"), Value.fromString("item3")); + // Create homogeneous array (all strings) - builder will handle conversion + var testArray = Arrays.asList("item1", "item2", "item3"); - // Create homogeneous map (string keys -> string values) - var testMap = new HashMap(); - testMap.put(MapKey.fromString("key1"), Value.fromString("value1")); - testMap.put(MapKey.fromString("key2"), Value.fromString("value2")); + // Create homogeneous map (string keys -> string values) - builder will handle conversion + var testMap = new HashMap(); + testMap.put("key1", "value1"); + testMap.put("key2", "value2"); var originalRecord = ImprintRecord.builder(schemaId) .field(1, "simple string") - .field(2, Value.fromArray(testArray)) - .field(3, Value.fromMap(testMap)) + .field(2, testArray) + .field(3, testMap) .field(4, nestedRecord) .field(5, 999L) .build(); @@ -192,15 +192,15 @@ void testProjectComplexTypes() throws ImprintException { // Verify array projection (homogeneous strings) var projectedArray = projected.getArray(2); assertEquals(3, projectedArray.size()); - assertEquals(Value.fromString("item1"), projectedArray.get(0)); - assertEquals(Value.fromString("item2"), projectedArray.get(1)); - assertEquals(Value.fromString("item3"), projectedArray.get(2)); + assertEquals("item1", projectedArray.get(0)); + assertEquals("item2", projectedArray.get(1)); + assertEquals("item3", projectedArray.get(2)); // Verify map projection (string -> string) var projectedMap = projected.getMap(3); assertEquals(2, projectedMap.size()); - assertEquals(Value.fromString("value1"), projectedMap.get(MapKey.fromString("key1"))); - assertEquals(Value.fromString("value2"), projectedMap.get(MapKey.fromString("key2"))); + assertEquals("value1", projectedMap.get("key1")); + assertEquals("value2", projectedMap.get("key2")); // Verify nested record projection var projectedNested = projected.getRow(4); @@ -294,27 +294,27 @@ void testMergeComplexTypes() throws ImprintException { .field(200, "nested in record2") .build(); - // Create arrays - var array1 = Arrays.asList(Value.fromString("array1_item1"), Value.fromString("array1_item2")); - var array2 = Arrays.asList(Value.fromInt32(10), Value.fromInt32(20)); + // Create arrays - builder will handle conversion + var array1 = Arrays.asList("array1_item1", "array1_item2"); + var array2 = Arrays.asList(10, 20); - // Create maps - var map1 = new HashMap(); - map1.put(MapKey.fromString("map1_key"), Value.fromString("map1_value")); + // Create maps - builder will handle conversion + var map1 = new HashMap(); + map1.put("map1_key", "map1_value"); - var map2 = new HashMap(); - map2.put(MapKey.fromInt32(42), Value.fromBoolean(true)); + var map2 = new HashMap(); + map2.put(42, true); var record1 = ImprintRecord.builder(schemaId) .field(1, nested1) - .field(3, Value.fromArray(array1)) - .field(5, Value.fromMap(map1)) + .field(3, array1) + .field(5, map1) .build(); var record2 = ImprintRecord.builder(schemaId) .field(2, nested2) - .field(4, Value.fromArray(array2)) - .field(6, Value.fromMap(map2)) + .field(4, array2) + .field(6, map2) .build(); var merged = record1.merge(record2); @@ -331,18 +331,18 @@ void testMergeComplexTypes() throws ImprintException { // Verify arrays var mergedArray1 = merged.getArray(3); assertEquals(2, mergedArray1.size()); - assertEquals(Value.fromString("array1_item1"), mergedArray1.get(0)); + assertEquals("array1_item1", mergedArray1.get(0)); var mergedArray2 = merged.getArray(4); assertEquals(2, mergedArray2.size()); - assertEquals(Value.fromInt32(10), mergedArray2.get(0)); + assertEquals(10, mergedArray2.get(0)); // Verify maps var mergedMap1 = merged.getMap(5); - assertEquals(Value.fromString("map1_value"), mergedMap1.get(MapKey.fromString("map1_key"))); + assertEquals("map1_value", mergedMap1.get("map1_key")); var mergedMap2 = merged.getMap(6); - assertEquals(Value.fromBoolean(true), mergedMap2.get(MapKey.fromInt32(42))); + assertEquals(true, mergedMap2.get(42)); } @Test @@ -465,12 +465,14 @@ void testLargeRecordOperations() throws ImprintException { private ImprintRecord createTestRecordForGetters() throws ImprintException { SchemaId schemaId = new SchemaId(5, 0xabcdef01); - List innerList1 = Arrays.asList(Value.fromInt32(10), Value.fromInt32(20)); - List innerList2 = Arrays.asList(Value.fromInt32(30), Value.fromInt32(40)); - List listOfLists = Arrays.asList(Value.fromArray(innerList1), Value.fromArray(innerList2)); + // Create nested arrays - builder will handle conversion + List innerList1 = Arrays.asList(10, 20); + List innerList2 = Arrays.asList(30, 40); + List> listOfLists = Arrays.asList(innerList1, innerList2); - Map mapWithArrayValue = new HashMap<>(); - mapWithArrayValue.put(MapKey.fromString("list1"), Value.fromArray(innerList1)); + // Create map with array value - builder will handle conversion + Map> mapWithArrayValue = new HashMap<>(); + mapWithArrayValue.put("list1", innerList1); return ImprintRecord.builder(schemaId) .field(1, true) @@ -481,8 +483,8 @@ private ImprintRecord createTestRecordForGetters() throws ImprintException { .field(6, "hello type world") .field(7, new byte[]{10, 20, 30}) .nullField(8) - .field(9, Value.fromArray(listOfLists)) // Array of Arrays (using Value directly for test setup) - .field(10, Value.fromMap(mapWithArrayValue)) // Map with Array value + .field(9, listOfLists) // Array of Arrays - builder handles conversion + .field(10, mapWithArrayValue) // Map with Array value - builder handles conversion .field(11, Collections.emptyList()) // Empty Array via builder .field(12, Collections.emptyMap()) // Empty Map via builder .build(); @@ -516,20 +518,21 @@ void testTypeGetterArrayOfArrays() throws ImprintException { var originalRecord = createTestRecordForGetters(); var record = serializeAndDeserialize(originalRecord); - List arrOfArr = record.getArray(9); + List> arrOfArr = record.getArray(9); assertNotNull(arrOfArr); assertEquals(2, arrOfArr.size()); - assertInstanceOf(Value.ArrayValue.class, arrOfArr.get(0)); - Value.ArrayValue firstInnerArray = (Value.ArrayValue) arrOfArr.get(0); - assertEquals(2, firstInnerArray.getValue().size()); - assertEquals(Value.fromInt32(10), firstInnerArray.getValue().get(0)); - assertEquals(Value.fromInt32(20), firstInnerArray.getValue().get(1)); - - assertInstanceOf(Value.ArrayValue.class, arrOfArr.get(1)); - Value.ArrayValue secondInnerArray = (Value.ArrayValue) arrOfArr.get(1); - assertEquals(2, secondInnerArray.getValue().size()); - assertEquals(Value.fromInt32(30), secondInnerArray.getValue().get(0)); - assertEquals(Value.fromInt32(40), secondInnerArray.getValue().get(1)); + + List firstInnerArray = arrOfArr.get(0); + assertNotNull(firstInnerArray); + assertEquals(2, firstInnerArray.size()); + assertEquals(Integer.valueOf(10), firstInnerArray.get(0)); + assertEquals(Integer.valueOf(20), firstInnerArray.get(1)); + + List secondInnerArray = arrOfArr.get(1); + assertNotNull(secondInnerArray); + assertEquals(2, secondInnerArray.size()); + assertEquals(Integer.valueOf(30), secondInnerArray.get(0)); + assertEquals(Integer.valueOf(40), secondInnerArray.get(1)); } @Test @@ -538,14 +541,14 @@ void testTypeGetterMapWithArrayValue() throws ImprintException { var originalRecord = createTestRecordForGetters(); var record = serializeAndDeserialize(originalRecord); - Map mapWithArr = record.getMap(10); + Map> mapWithArr = record.getMap(10); assertNotNull(mapWithArr); assertEquals(1, mapWithArr.size()); - assertInstanceOf(Value.ArrayValue.class, mapWithArr.get(MapKey.fromString("list1"))); - Value.ArrayValue innerArray = (Value.ArrayValue) mapWithArr.get(MapKey.fromString("list1")); + + List innerArray = mapWithArr.get("list1"); assertNotNull(innerArray); - assertEquals(2, innerArray.getValue().size()); - assertEquals(Value.fromInt32(10), innerArray.getValue().get(0)); + assertEquals(2, innerArray.size()); + assertEquals(Integer.valueOf(10), innerArray.get(0)); } @Test @@ -554,11 +557,11 @@ void testTypeGettersEmptyCollections() throws ImprintException { var originalRecord = createTestRecordForGetters(); var record = serializeAndDeserialize(originalRecord); - List emptyArr = record.getArray(11); + List emptyArr = record.getArray(11); assertNotNull(emptyArr); assertTrue(emptyArr.isEmpty()); - Map emptyMap = record.getMap(12); + Map emptyMap = record.getMap(12); assertNotNull(emptyMap); assertTrue(emptyMap.isEmpty()); } @@ -584,10 +587,9 @@ var record = serializeAndDeserialize(originalRecord); assertTrue(ex.getMessage().contains("Field 8 is NULL")); - // Also test getValue for a null field returns Value.NullValue - Value nullValueField = record.getValue(8); - assertNotNull(nullValueField); - assertInstanceOf(Value.NullValue.class, nullValueField, "Field 8 should be Value.NullValue"); + // Also test getValue for a null field returns null + Object nullValueField = record.getValue(8); + assertNull(nullValueField, "Field 8 should be null"); } @Test @@ -743,37 +745,49 @@ void testDeepNesting() throws ImprintException { void testMapKeyTypeVariations() throws ImprintException { var schemaId = new SchemaId(70, 0xAAB5E75); - // Create maps with different key types - var stringKeyMap = new HashMap(); - stringKeyMap.put(MapKey.fromString("string_key"), Value.fromString("string_value")); + // Create maps with different key types - use simple types for builder + var stringKeyMap = new HashMap(); + stringKeyMap.put("string_key", "string_value"); - var intKeyMap = new HashMap(); - intKeyMap.put(MapKey.fromInt32(42), Value.fromString("int_value")); + var intKeyMap = new HashMap(); + intKeyMap.put(42, "int_value"); - var longKeyMap = new HashMap(); - longKeyMap.put(MapKey.fromInt64(9876543210L), Value.fromString("long_value")); + var longKeyMap = new HashMap(); + longKeyMap.put(9876543210L, "long_value"); - var bytesKeyMap = new HashMap(); - bytesKeyMap.put(MapKey.fromBytes(new byte[]{1, 2, 3}), Value.fromString("bytes_value")); + var bytesKeyMap = new HashMap(); + bytesKeyMap.put(new byte[]{1, 2, 3}, "bytes_value"); var record = ImprintRecord.builder(schemaId) - .field(1, Value.fromMap(stringKeyMap)) - .field(2, Value.fromMap(intKeyMap)) - .field(3, Value.fromMap(longKeyMap)) - .field(4, Value.fromMap(bytesKeyMap)) + .field(1, stringKeyMap) + .field(2, intKeyMap) + .field(3, longKeyMap) + .field(4, bytesKeyMap) .build(); var deserialized = serializeAndDeserialize(record); // Verify all map key types work correctly - assertEquals(Value.fromString("string_value"), - deserialized.getMap(1).get(MapKey.fromString("string_key"))); - assertEquals(Value.fromString("int_value"), - deserialized.getMap(2).get(MapKey.fromInt32(42))); - assertEquals(Value.fromString("long_value"), - deserialized.getMap(3).get(MapKey.fromInt64(9876543210L))); - assertEquals(Value.fromString("bytes_value"), - deserialized.getMap(4).get(MapKey.fromBytes(new byte[]{1, 2, 3}))); + assertEquals("string_value", + deserialized.getMap(1).get("string_key")); + assertEquals("int_value", + deserialized.getMap(2).get(42)); + assertEquals("long_value", + deserialized.getMap(3).get(9876543210L)); + // For byte array keys, we need to find the entry since arrays use reference equality + Map bytesKeyedMap = deserialized.getMap(4); + assertEquals(1, bytesKeyedMap.size()); + // The key should be a byte array {1, 2, 3} and the value should be "bytes_value" + byte[] expectedBytes = {1, 2, 3}; + Object actualValue = null; + for (Map.Entry entry : bytesKeyedMap.entrySet()) { + byte[] keyBytes = (byte[]) entry.getKey(); + if (java.util.Arrays.equals(keyBytes, expectedBytes)) { + actualValue = entry.getValue(); + break; + } + } + assertEquals("bytes_value", actualValue); } @Test diff --git a/src/test/java/com/imprint/ops/ImprintOperationsTest.java b/src/test/java/com/imprint/ops/ImprintOperationsTest.java index 292f8f3..4821125 100644 --- a/src/test/java/com/imprint/ops/ImprintOperationsTest.java +++ b/src/test/java/com/imprint/ops/ImprintOperationsTest.java @@ -4,7 +4,6 @@ import com.imprint.core.ImprintRecord; import com.imprint.core.SchemaId; import com.imprint.error.ImprintException; -import com.imprint.types.Value; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Nested; @@ -109,10 +108,17 @@ void shouldPreserveAllFieldsWhenProjectingAll() throws ImprintException { assertEquals(multiFieldRecord.getDirectory().size(), projected.getDirectory().size()); for (Directory entry : multiFieldRecord.getDirectory()) { - Value originalValue = multiFieldRecord.getValue(entry.getId()); - Value projectedValue = projected.getValue(entry.getId()); - assertEquals(originalValue, projectedValue, - "Field " + entry.getId() + " should have matching value"); + Object originalValue = multiFieldRecord.getValue(entry.getId()); + Object projectedValue = projected.getValue(entry.getId()); + + // Handle byte arrays specially since they don't use content equality + if (originalValue instanceof byte[] && projectedValue instanceof byte[]) { + assertArrayEquals((byte[]) originalValue, (byte[]) projectedValue, + "Field " + entry.getId() + " byte array should have matching content"); + } else { + assertEquals(originalValue, projectedValue, + "Field " + entry.getId() + " should have matching value"); + } } } @@ -298,9 +304,22 @@ void shouldHandleMergeWithEmptyRecord() throws ImprintException { // And values should be preserved for (Directory entry : multiFieldRecord.getDirectory()) { - Value originalValue = multiFieldRecord.getValue(entry.getId()); - assertEquals(originalValue, merged1.getValue(entry.getId())); - assertEquals(originalValue, merged2.getValue(entry.getId())); + Object originalValue = multiFieldRecord.getValue(entry.getId()); + Object merged1Value = merged1.getValue(entry.getId()); + Object merged2Value = merged2.getValue(entry.getId()); + + // Handle byte arrays specially since they don't use content equality + if (originalValue instanceof byte[]) { + assertArrayEquals((byte[]) originalValue, (byte[]) merged1Value, + "Field " + entry.getId() + " should be preserved in merged1"); + assertArrayEquals((byte[]) originalValue, (byte[]) merged2Value, + "Field " + entry.getId() + " should be preserved in merged2"); + } else { + assertEquals(originalValue, merged1Value, + "Field " + entry.getId() + " should be preserved in merged1"); + assertEquals(originalValue, merged2Value, + "Field " + entry.getId() + " should be preserved in merged2"); + } } } diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index 1cb7128..3b50f35 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -2,19 +2,15 @@ import com.imprint.core.ImprintRecord; import com.imprint.core.SchemaId; -import com.imprint.ops.ImprintOperations; -import com.imprint.types.Value; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.assertTrue; - import java.util.Random; import java.util.stream.IntStream; -@Disabled +//@Disabled public class ProfilerTest { private static final int RECORD_SIZE = 50; @@ -271,25 +267,25 @@ private void profileSerialization(String testName, int recordSize, int iteration for (int fieldId = 1; fieldId <= recordSize; fieldId++) { switch (fieldId % 7) { case 0: - builder.field(fieldId, Value.fromInt32(i + fieldId)); + builder.field(fieldId, i + fieldId); break; case 1: - builder.field(fieldId, Value.fromInt64(i * 1000L + fieldId)); + builder.field(fieldId, i * 1000L + fieldId); break; case 2: - builder.field(fieldId, Value.fromString("test-string-" + i + "-" + fieldId)); + builder.field(fieldId, "test-string-" + i + "-" + fieldId); break; case 3: - builder.field(fieldId, Value.fromString("longer-descriptive-text-for-field-" + fieldId + "-iteration-" + i)); + builder.field(fieldId, "longer-descriptive-text-for-field-" + fieldId + "-iteration-" + i); break; case 4: - builder.field(fieldId, Value.fromFloat64(i * 3.14159 + fieldId)); + builder.field(fieldId, i * 3.14159 + fieldId); break; case 5: - builder.field(fieldId, Value.fromBytes(("bytes-" + i + "-" + fieldId).getBytes())); + builder.field(fieldId, ("bytes-" + i + "-" + fieldId).getBytes()); break; case 6: - builder.field(fieldId, Value.fromBoolean((i + fieldId) % 2 == 0)); + builder.field(fieldId, (i + fieldId) % 2 == 0); break; } } @@ -324,16 +320,16 @@ private ImprintRecord createTestRecord(int recordSize) throws Exception { for (int i = 1; i <= recordSize; i++) { switch (i % 4) { case 0: - builder.field(i, Value.fromInt32(i * 100)); + builder.field(i, i * 100); break; case 1: - builder.field(i, Value.fromString("field-value-" + i)); + builder.field(i, "field-value-" + i); break; case 2: - builder.field(i, Value.fromFloat64(i * 3.14159)); + builder.field(i, i * 3.14159); break; case 3: - builder.field(i, Value.fromBytes(("bytes-" + i).getBytes())); + builder.field(i, ("bytes-" + i).getBytes()); break; } } @@ -346,16 +342,16 @@ private ImprintRecord createTestRecordWithFieldIds(int[] fieldIds) throws Except for (int fieldId : fieldIds) { switch (fieldId % 4) { case 0: - builder.field(fieldId, Value.fromInt32(fieldId * 100)); + builder.field(fieldId, fieldId * 100); break; case 1: - builder.field(fieldId, Value.fromString("field-value-" + fieldId)); + builder.field(fieldId, "field-value-" + fieldId); break; case 2: - builder.field(fieldId, Value.fromFloat64(fieldId * 3.14159)); + builder.field(fieldId, fieldId * 3.14159); break; case 3: - builder.field(fieldId, Value.fromBytes(("bytes-" + fieldId).getBytes())); + builder.field(fieldId, ("bytes-" + fieldId).getBytes()); break; } } @@ -379,59 +375,4 @@ private int[] generateRandomFields(Random random, int maxField, int count) { .sorted() .toArray(); } - - @Test - @Tag("profiling") - void profileBytesToBytesVsObjectMerge() throws Exception { - System.out.println("=== Bytes-to-Bytes vs Object Merge Comparison ==="); - - // Create test records - var record1 = createTestRecordWithFieldIds(new int[]{1, 3, 5, 7, 9, 11, 13, 15}); - var record2 = createTestRecordWithFieldIds(new int[]{2, 4, 6, 8, 10, 12, 14, 16}); - - var record1Bytes = record1.serializeToBuffer(); - var record2Bytes = record2.serializeToBuffer(); - - int iterations = 50_000; - - // Warm up - for (int i = 0; i < 1000; i++) { - record1.merge(record2).serializeToBuffer(); - ImprintOperations.mergeBytes(record1Bytes, record2Bytes); - } - - System.out.printf("Profiling %,d merge operations...%n", iterations); - - // Test object merge + serialize - long startObjectMerge = System.nanoTime(); - for (int i = 0; i < iterations; i++) { - var merged = record1.merge(record2); - var serialized = merged.serializeToBuffer(); - // Consume result to prevent optimization - if (serialized.remaining() == 0) throw new RuntimeException("Empty result"); - } - long objectMergeTime = System.nanoTime() - startObjectMerge; - - // Test bytes merge - long startBytesMerge = System.nanoTime(); - for (int i = 0; i < iterations; i++) { - var merged = ImprintOperations.mergeBytes(record1Bytes, record2Bytes); - // Consume result to prevent optimization - if (merged.remaining() == 0) throw new RuntimeException("Empty result"); - } - long bytesMergeTime = System.nanoTime() - startBytesMerge; - - double objectAvg = (double) objectMergeTime / iterations / 1000.0; // microseconds - double bytesAvg = (double) bytesMergeTime / iterations / 1000.0; // microseconds - double speedup = objectAvg / bytesAvg; - - System.out.printf("Object merge + serialize: %.2f ms (avg: %.1f μs/op)%n", - objectMergeTime / 1_000_000.0, objectAvg); - System.out.printf("Bytes-to-bytes merge: %.2f ms (avg: %.1f μs/op)%n", - bytesMergeTime / 1_000_000.0, bytesAvg); - System.out.printf("Speedup: %.1fx faster%n", speedup); - - // Assert that bytes approach is faster (should be at least 1.5x) - assertTrue(speedup > 1.0, String.format("Bytes merge should be faster. Got %.1fx speedup", speedup)); - } } \ No newline at end of file diff --git a/src/test/java/com/imprint/types/MapKeyTest.java b/src/test/java/com/imprint/types/MapKeyTest.java index 08f4180..f9707e4 100644 --- a/src/test/java/com/imprint/types/MapKeyTest.java +++ b/src/test/java/com/imprint/types/MapKeyTest.java @@ -8,11 +8,11 @@ class MapKeyTest { @Test - void shouldCreateMapKeysFromValues() throws ImprintException { - var int32Key = MapKey.fromValue(Value.fromInt32(42)); - var int64Key = MapKey.fromValue(Value.fromInt64(123L)); - var bytesKey = MapKey.fromValue(Value.fromBytes(new byte[]{1, 2, 3})); - var stringKey = MapKey.fromValue(Value.fromString("test")); + void shouldCreateMapKeysFromPrimitives() throws ImprintException { + var int32Key = MapKey.fromPrimitive(TypeCode.INT32, 42); + var int64Key = MapKey.fromPrimitive(TypeCode.INT64, 123L); + var bytesKey = MapKey.fromPrimitive(TypeCode.BYTES, new byte[]{1, 2, 3}); + var stringKey = MapKey.fromPrimitive(TypeCode.STRING, "test"); assertThat(int32Key).isInstanceOf(MapKey.Int32Key.class); assertThat(((MapKey.Int32Key) int32Key).getValue()).isEqualTo(42); @@ -28,31 +28,28 @@ void shouldCreateMapKeysFromValues() throws ImprintException { } @Test - void shouldConvertBackToValues() { + void shouldConvertToPrimitives() { var int32Key = MapKey.fromInt32(42); var stringKey = MapKey.fromString("test"); - var int32Value = int32Key.toValue(); - var stringValue = stringKey.toValue(); + Object int32Value = int32Key.getPrimitiveValue(); + Object stringValue = stringKey.getPrimitiveValue(); - assertThat(int32Value).isInstanceOf(Value.Int32Value.class); - assertThat(((Value.Int32Value) int32Value).getValue()).isEqualTo(42); + assertThat(int32Value).isInstanceOf(Integer.class); + assertThat(int32Value).isEqualTo(42); - assertThat(stringValue).isInstanceOf(Value.StringValue.class); - assertThat(((Value.StringValue) stringValue).getValue()).isEqualTo("test"); + assertThat(stringValue).isInstanceOf(String.class); + assertThat(stringValue).isEqualTo("test"); } @Test - void shouldRejectInvalidValueTypes() { - var boolValue = Value.fromBoolean(true); - var arrayValue = Value.fromArray(java.util.Collections.emptyList()); - - assertThatThrownBy(() -> MapKey.fromValue(boolValue)) + void shouldRejectInvalidPrimitiveTypes() { + assertThatThrownBy(() -> MapKey.fromPrimitive(TypeCode.BOOL, true)) .isInstanceOf(ImprintException.class) .extracting("errorType") .isEqualTo(ErrorType.TYPE_MISMATCH); - assertThatThrownBy(() -> MapKey.fromValue(arrayValue)) + assertThatThrownBy(() -> MapKey.fromPrimitive(TypeCode.ARRAY, java.util.Collections.emptyList())) .isInstanceOf(ImprintException.class) .extracting("errorType") .isEqualTo(ErrorType.TYPE_MISMATCH); diff --git a/src/test/java/com/imprint/types/TypeHandlerTest.java b/src/test/java/com/imprint/types/TypeHandlerTest.java deleted file mode 100644 index 75d118f..0000000 --- a/src/test/java/com/imprint/types/TypeHandlerTest.java +++ /dev/null @@ -1,274 +0,0 @@ -package com.imprint.types; - -import com.imprint.error.ImprintException; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -import static org.assertj.core.api.Assertions.*; - -/** - * Tests for individual TypeHandler implementations. - * Validates serialization, deserialization, and size estimation for each type. - */ -class TypeHandlerTest { - - @Test - void testNullHandler() throws ImprintException { - var handler = TypeHandler.NULL; - var value = Value.nullValue(); - - // Size estimation - assertThat(handler.estimateSize(value)).isEqualTo(0); - - // Serialization - var buffer = ByteBuffer.allocate(10); - handler.serialize(value, buffer); - assertThat(buffer.position()).isEqualTo(0); // NULL writes nothing - - // Deserialization - buffer.flip(); - var deserialized = handler.deserialize(buffer); - assertThat(deserialized).isEqualTo(value); - } - - @ParameterizedTest - @ValueSource(booleans = {true, false}) - void testBoolHandler(boolean testValue) throws ImprintException { - var handler = TypeHandler.BOOL; - var value = Value.fromBoolean(testValue); - - // Size estimation - assertThat(handler.estimateSize(value)).isEqualTo(1); - - // Round-trip test - var buffer = ByteBuffer.allocate(10); - handler.serialize(value, buffer); - assertThat(buffer.position()).isEqualTo(1); - - buffer.flip(); - var deserialized = handler.deserialize(buffer); - assertThat(deserialized).isEqualTo(value); - assertThat(((Value.BoolValue) deserialized).getValue()).isEqualTo(testValue); - } - - @ParameterizedTest - @ValueSource(ints = {0, 1, -1, Integer.MAX_VALUE, Integer.MIN_VALUE, 42, -42}) - void testInt32Handler(int testValue) throws ImprintException { - var handler = TypeHandler.INT32; - var value = Value.fromInt32(testValue); - - // Size estimation - assertThat(handler.estimateSize(value)).isEqualTo(4); - - // Round-trip test - var buffer = ByteBuffer.allocate(10).order(ByteOrder.LITTLE_ENDIAN); - handler.serialize(value, buffer); - assertThat(buffer.position()).isEqualTo(4); - - buffer.flip(); - var deserialized = handler.deserialize(buffer); - assertThat(deserialized).isEqualTo(value); - assertThat(((Value.Int32Value) deserialized).getValue()).isEqualTo(testValue); - } - - @ParameterizedTest - @ValueSource(longs = {0L, 1L, -1L, Long.MAX_VALUE, Long.MIN_VALUE, 123456789L}) - void testInt64Handler(long testValue) throws ImprintException { - var handler = TypeHandler.INT64; - var value = Value.fromInt64(testValue); - - // Size estimation - assertThat(handler.estimateSize(value)).isEqualTo(8); - - // Round-trip test - var buffer = ByteBuffer.allocate(20).order(ByteOrder.LITTLE_ENDIAN); - handler.serialize(value, buffer); - assertThat(buffer.position()).isEqualTo(8); - - buffer.flip(); - var deserialized = handler.deserialize(buffer); - assertThat(deserialized).isEqualTo(value); - assertThat(((Value.Int64Value) deserialized).getValue()).isEqualTo(testValue); - } - - @ParameterizedTest - @ValueSource(floats = {0.0f, 1.0f, -1.0f, Float.MAX_VALUE, Float.MIN_VALUE, 3.14159f, Float.NaN, Float.POSITIVE_INFINITY}) - void testFloat32Handler(float testValue) throws ImprintException { - var handler = TypeHandler.FLOAT32; - var value = Value.fromFloat32(testValue); - - // Size estimation - assertThat(handler.estimateSize(value)).isEqualTo(4); - - // Round-trip test - var buffer = ByteBuffer.allocate(10).order(ByteOrder.LITTLE_ENDIAN); - handler.serialize(value, buffer); - assertThat(buffer.position()).isEqualTo(4); - - buffer.flip(); - var deserialized = handler.deserialize(buffer); - assertThat(deserialized).isEqualTo(value); - - float deserializedValue = ((Value.Float32Value) deserialized).getValue(); - if (Float.isNaN(testValue)) { - assertThat(deserializedValue).isNaN(); - } else { - assertThat(deserializedValue).isEqualTo(testValue); - } - } - - @ParameterizedTest - @ValueSource(doubles = {0.0, 1.0, -1.0, Double.MAX_VALUE, Double.MIN_VALUE, Math.PI, Double.NaN, Double.POSITIVE_INFINITY}) - void testFloat64Handler(double testValue) throws ImprintException { - var handler = TypeHandler.FLOAT64; - var value = Value.fromFloat64(testValue); - - // Size estimation - assertThat(handler.estimateSize(value)).isEqualTo(8); - - // Round-trip test - var buffer = ByteBuffer.allocate(20).order(ByteOrder.LITTLE_ENDIAN); - handler.serialize(value, buffer); - assertThat(buffer.position()).isEqualTo(8); - - buffer.flip(); - var deserialized = handler.deserialize(buffer); - assertThat(deserialized).isEqualTo(value); - - double deserializedValue = ((Value.Float64Value) deserialized).getValue(); - if (Double.isNaN(testValue)) { - assertThat(deserializedValue).isNaN(); - } else { - assertThat(deserializedValue).isEqualTo(testValue); - } - } - - @ParameterizedTest - @ValueSource(strings = {"", "hello", "世界", "a very long string that exceeds typical buffer sizes and contains unicode: 🚀🎉", "null\0bytes"}) - void testStringHandler(String testValue) throws ImprintException { - var handler = TypeHandler.STRING; - var value = Value.fromString(testValue); - - byte[] utf8Bytes = testValue.getBytes(java.nio.charset.StandardCharsets.UTF_8); - int expectedSize = com.imprint.util.VarInt.encodedLength(utf8Bytes.length) + utf8Bytes.length; - - // Size estimation - assertThat(handler.estimateSize(value)).isEqualTo(expectedSize); - - // Round-trip test - var buffer = ByteBuffer.allocate(expectedSize + 20).order(ByteOrder.LITTLE_ENDIAN); - handler.serialize(value, buffer); - - buffer.flip(); - var deserialized = handler.deserialize(buffer); - - // Should return StringBufferValue (zero-copy implementation) - assertThat(deserialized).isInstanceOf(Value.StringBufferValue.class); - - String deserializedString; - if (deserialized instanceof Value.StringBufferValue) { - deserializedString = ((Value.StringBufferValue) deserialized).getValue(); - } else { - deserializedString = ((Value.StringValue) deserialized).getValue(); - } - - assertThat(deserializedString).isEqualTo(testValue); - } - - @Test - void testBytesHandlerWithArrayValue() throws ImprintException { - var handler = TypeHandler.BYTES; - byte[] testBytes = {0, 1, 2, (byte) 0xFF, 42, 127, -128}; - var value = Value.fromBytes(testBytes); - - int expectedSize = com.imprint.util.VarInt.encodedLength(testBytes.length) + testBytes.length; - - // Size estimation - assertThat(handler.estimateSize(value)).isEqualTo(expectedSize); - - // Round-trip test - var buffer = ByteBuffer.allocate(expectedSize + 20).order(ByteOrder.LITTLE_ENDIAN); - handler.serialize(value, buffer); - - buffer.flip(); - var deserialized = handler.deserialize(buffer); - - // Should return BytesBufferValue (zero-copy implementation) - assertThat(deserialized).isInstanceOf(Value.BytesBufferValue.class); - - byte[] deserializedBytes = ((Value.BytesBufferValue) deserialized).getValue(); - assertThat(deserializedBytes).isEqualTo(testBytes); - } - - @Test - void testBytesHandlerWithBufferValue() throws ImprintException { - var handler = TypeHandler.BYTES; - byte[] testBytes = {10, 20, 30, 40}; - var bufferValue = Value.fromBytesBuffer(ByteBuffer.wrap(testBytes).asReadOnlyBuffer()); - - int expectedSize = com.imprint.util.VarInt.encodedLength(testBytes.length) + testBytes.length; - - // Size estimation - assertThat(handler.estimateSize(bufferValue)).isEqualTo(expectedSize); - - // Round-trip test - var buffer = ByteBuffer.allocate(expectedSize + 20).order(ByteOrder.LITTLE_ENDIAN); - handler.serialize(bufferValue, buffer); - - buffer.flip(); - var deserialized = handler.deserialize(buffer); - - byte[] deserializedBytes = ((Value.BytesBufferValue) deserialized).getValue(); - assertThat(deserializedBytes).isEqualTo(testBytes); - } - - @Test - void testStringHandlerWithBufferValue() throws ImprintException { - var handler = TypeHandler.STRING; - String testString = "zero-copy string test"; - byte[] utf8Bytes = testString.getBytes(java.nio.charset.StandardCharsets.UTF_8); - var bufferValue = Value.fromStringBuffer(ByteBuffer.wrap(utf8Bytes).asReadOnlyBuffer()); - - int expectedSize = com.imprint.util.VarInt.encodedLength(utf8Bytes.length) + utf8Bytes.length; - - // Size estimation - assertThat(handler.estimateSize(bufferValue)).isEqualTo(expectedSize); - - // Round-trip test - var buffer = ByteBuffer.allocate(expectedSize + 20).order(ByteOrder.LITTLE_ENDIAN); - handler.serialize(bufferValue, buffer); - - buffer.flip(); - var deserialized = handler.deserialize(buffer); - - String deserializedString = ((Value.StringBufferValue) deserialized).getValue(); - assertThat(deserializedString).isEqualTo(testString); - } - - @Test - void testBoolHandlerInvalidValue() { - var handler = TypeHandler.BOOL; - var buffer = ByteBuffer.allocate(10); - buffer.put((byte) 2); // Invalid boolean value - buffer.flip(); - - assertThatThrownBy(() -> handler.deserialize(buffer)) - .isInstanceOf(ImprintException.class) - .hasMessageContaining("Invalid boolean value: 2"); - } - - @Test - void testHandlerBufferUnderflow() { - // Test that handlers properly detect buffer underflow - var int32Handler = TypeHandler.INT32; - var buffer = ByteBuffer.allocate(2); // Too small for int32 - - assertThatThrownBy(() -> int32Handler.deserialize(buffer)) - .isInstanceOf(ImprintException.class) - .hasMessageContaining("Not enough bytes for int32"); - } -} \ No newline at end of file diff --git a/src/test/java/com/imprint/types/ValueTest.java b/src/test/java/com/imprint/types/ValueTest.java deleted file mode 100644 index b092bb7..0000000 --- a/src/test/java/com/imprint/types/ValueTest.java +++ /dev/null @@ -1,218 +0,0 @@ -package com.imprint.types; - -import org.junit.jupiter.api.Test; - -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -class ValueTest { - - @Test - void shouldCreateNullValue() { - Value value = Value.nullValue(); - - assertThat(value).isInstanceOf(Value.NullValue.class); - assertThat(value.getTypeCode()).isEqualTo(TypeCode.NULL); - assertThat(value.toString()).isEqualTo("null"); - } - - @Test - void shouldCreateBooleanValues() { - Value trueValue = Value.fromBoolean(true); - Value falseValue = Value.fromBoolean(false); - - assertThat(trueValue).isInstanceOf(Value.BoolValue.class); - assertThat(((Value.BoolValue) trueValue).getValue()).isTrue(); - assertThat(trueValue.getTypeCode()).isEqualTo(TypeCode.BOOL); - - assertThat(falseValue).isInstanceOf(Value.BoolValue.class); - assertThat(((Value.BoolValue) falseValue).getValue()).isFalse(); - assertThat(falseValue.getTypeCode()).isEqualTo(TypeCode.BOOL); - } - - @Test - void shouldCreateNumericValues() { - var int32 = Value.fromInt32(42); - var int64 = Value.fromInt64(123456789L); - var float32 = Value.fromFloat32(3.14f); - var float64 = Value.fromFloat64(2.718281828); - - assertThat(int32.getTypeCode()).isEqualTo(TypeCode.INT32); - assertThat(((Value.Int32Value) int32).getValue()).isEqualTo(42); - - assertThat(int64.getTypeCode()).isEqualTo(TypeCode.INT64); - assertThat(((Value.Int64Value) int64).getValue()).isEqualTo(123456789L); - - assertThat(float32.getTypeCode()).isEqualTo(TypeCode.FLOAT32); - assertThat(((Value.Float32Value) float32).getValue()).isEqualTo(3.14f); - - assertThat(float64.getTypeCode()).isEqualTo(TypeCode.FLOAT64); - assertThat(((Value.Float64Value) float64).getValue()).isEqualTo(2.718281828); - } - - @Test - void shouldCreateBytesAndStringValues() { - byte[] bytes = {1, 2, 3, 4}; - var bytesValue = Value.fromBytes(bytes); - var stringValue = Value.fromString("hello"); - - assertThat(bytesValue.getTypeCode()).isEqualTo(TypeCode.BYTES); - assertThat(((Value.BytesValue) bytesValue).getValue()).isEqualTo(bytes); - - assertThat(stringValue.getTypeCode()).isEqualTo(TypeCode.STRING); - assertThat(((Value.StringValue) stringValue).getValue()).isEqualTo("hello"); - } - - @Test - void shouldCreateArrayValues() { - List elements = Arrays.asList( - Value.fromInt32(1), - Value.fromInt32(2), - Value.fromInt32(3) - ); - Value arrayValue = Value.fromArray(elements); - - assertThat(arrayValue.getTypeCode()).isEqualTo(TypeCode.ARRAY); - assertThat(((Value.ArrayValue) arrayValue).getValue()).isEqualTo(elements); - } - - @Test - void shouldCreateMapValues() { - var map = new HashMap(); - map.put(MapKey.fromString("key1"), Value.fromInt32(1)); - map.put(MapKey.fromString("key2"), Value.fromInt32(2)); - - Value mapValue = Value.fromMap(map); - - assertThat(mapValue.getTypeCode()).isEqualTo(TypeCode.MAP); - assertThat(((Value.MapValue) mapValue).getValue()).isEqualTo(map); - } - - @Test - void shouldHandleEqualityCorrectly() { - var int1 = Value.fromInt32(42); - var int2 = Value.fromInt32(42); - var int3 = Value.fromInt32(43); - - assertThat(int1).isEqualTo(int2); - assertThat(int1).isNotEqualTo(int3); - assertThat(int1.hashCode()).isEqualTo(int2.hashCode()); - } - - @Test - void shouldRejectNullString() { - assertThatThrownBy(() -> Value.fromString(null)) - .isInstanceOf(NullPointerException.class); - } - - @Test - void shouldCreateStringBufferValue() { - String testString = "hello world"; - byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); - ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); - - Value stringBufferValue = Value.fromStringBuffer(buffer); - - assertThat(stringBufferValue).isInstanceOf(Value.StringBufferValue.class); - assertThat(stringBufferValue.getTypeCode()).isEqualTo(TypeCode.STRING); - assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(testString); - } - - @Test - void shouldCreateBytesBufferValue() { - byte[] testBytes = {1, 2, 3, 4, 5}; - ByteBuffer buffer = ByteBuffer.wrap(testBytes); - - Value bytesBufferValue = Value.fromBytesBuffer(buffer); - - assertThat(bytesBufferValue).isInstanceOf(Value.BytesBufferValue.class); - assertThat(bytesBufferValue.getTypeCode()).isEqualTo(TypeCode.BYTES); - assertThat(((Value.BytesBufferValue) bytesBufferValue).getValue()).isEqualTo(testBytes); - } - - @Test - void shouldHandleStringBufferValueFastPath() { - // Array-backed buffer with arrayOffset() == 0 should use fast path - String testString = "fast path test"; - byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); - ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); - - Value stringBufferValue = Value.fromStringBuffer(buffer); - - // Should work correctly regardless of path taken - assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(testString); - } - - @Test - void shouldHandleStringBufferValueFallbackPath() { - // Sliced buffer will have non-zero arrayOffset, forcing fallback path - String testString = "fallback path test"; - byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); - ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); - ByteBuffer sliced = buffer.slice(); // This may break arrayOffset() == 0 - - Value stringBufferValue = Value.fromStringBuffer(sliced); - - // Should work correctly regardless of path taken - assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(testString); - } - - @Test - void shouldHandleLargeStringWithoutCaching() { - // Create string > 1KB to test the no-cache path - String largeString = "x".repeat(2000); - byte[] utf8Bytes = largeString.getBytes(StandardCharsets.UTF_8); - ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes).slice(); // Force fallback path - - Value stringBufferValue = Value.fromStringBuffer(buffer); - - assertThat(((Value.StringBufferValue) stringBufferValue).getValue()).isEqualTo(largeString); - } - - @Test - void shouldCacheStringDecoding() { - String testString = "cache test"; - byte[] utf8Bytes = testString.getBytes(StandardCharsets.UTF_8); - ByteBuffer buffer = ByteBuffer.wrap(utf8Bytes); - - Value.StringBufferValue stringBufferValue = (Value.StringBufferValue) Value.fromStringBuffer(buffer); - - // First call should decode and cache - String result1 = stringBufferValue.getValue(); - // Second call should return cached value - String result2 = stringBufferValue.getValue(); - - assertThat(result1).isEqualTo(testString); - assertThat(result2).isEqualTo(testString); - assertThat(result1).isSameAs(result2); // Should be same object reference due to caching - } - - @Test - void shouldHandleStringValueEquality() { - String testString = "equality test"; - - Value stringValue = Value.fromString(testString); - Value stringBufferValue = Value.fromStringBuffer(ByteBuffer.wrap(testString.getBytes(StandardCharsets.UTF_8))); - - assertThat(stringValue).isEqualTo(stringBufferValue); - assertThat(stringBufferValue).isEqualTo(stringValue); - assertThat(stringValue.hashCode()).isEqualTo(stringBufferValue.hashCode()); - } - - @Test - void shouldHandleBytesValueEquality() { - byte[] testBytes = {1, 2, 3, 4, 5}; - - Value bytesValue = Value.fromBytes(testBytes); - Value bytesBufferValue = Value.fromBytesBuffer(ByteBuffer.wrap(testBytes)); - - assertThat(bytesValue).isEqualTo(bytesBufferValue); - assertThat(bytesBufferValue).isEqualTo(bytesValue); - } -} \ No newline at end of file From 40e78abef926535ae131fcd2b82305916dd9975b Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Sat, 14 Jun 2025 00:43:54 -0400 Subject: [PATCH 51/53] add static serializers --- .../imprint/types/ImprintDeserializers.java | 76 ++++++++ .../com/imprint/types/ImprintSerializers.java | 173 ++++++++++++++++++ 2 files changed, 249 insertions(+) create mode 100644 src/main/java/com/imprint/types/ImprintDeserializers.java create mode 100644 src/main/java/com/imprint/types/ImprintSerializers.java diff --git a/src/main/java/com/imprint/types/ImprintDeserializers.java b/src/main/java/com/imprint/types/ImprintDeserializers.java new file mode 100644 index 0000000..d579390 --- /dev/null +++ b/src/main/java/com/imprint/types/ImprintDeserializers.java @@ -0,0 +1,76 @@ +package com.imprint.types; + +import com.imprint.error.ErrorType; +import com.imprint.error.ImprintException; +import com.imprint.util.VarInt; +import lombok.experimental.UtilityClass; + +import java.nio.ByteBuffer; + +/** + * Static primitive deserialization methods for all Imprint types. + * Returns native Java objects instead of Value wrappers for better performance. + */ +@UtilityClass +public final class ImprintDeserializers { + + // Primitive deserializers (optimized, no Value objects) + public static Object deserializePrimitive(ByteBuffer buffer, TypeCode typeCode) throws ImprintException { + switch (typeCode) { + case NULL: + return null; + case BOOL: + if (buffer.remaining() < 1) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for bool"); + } + byte boolByte = buffer.get(); + if (boolByte == 0) return false; + if (boolByte == 1) return true; + throw new ImprintException(ErrorType.SCHEMA_ERROR, "Invalid boolean value: " + boolByte); + case INT32: + if (buffer.remaining() < 4) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for int32"); + } + return buffer.getInt(); + case INT64: + if (buffer.remaining() < 8) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for int64"); + } + return buffer.getLong(); + case FLOAT32: + if (buffer.remaining() < 4) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for float32"); + } + return buffer.getFloat(); + case FLOAT64: + if (buffer.remaining() < 8) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for float64"); + } + return buffer.getDouble(); + case BYTES: + VarInt.DecodeResult lengthResult = VarInt.decode(buffer); + int length = lengthResult.getValue(); + if (buffer.remaining() < length) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for bytes value data after VarInt. Needed: " + + length + ", available: " + buffer.remaining()); + } + byte[] bytes = new byte[length]; + buffer.get(bytes); + return bytes; + case STRING: + VarInt.DecodeResult strLengthResult = VarInt.decode(buffer); + int strLength = strLengthResult.getValue(); + if (buffer.remaining() < strLength) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Not enough bytes for string value data after VarInt. Needed: " + + strLength + ", available: " + buffer.remaining()); + } + byte[] stringBytes = new byte[strLength]; + buffer.get(stringBytes); + return new String(stringBytes, java.nio.charset.StandardCharsets.UTF_8); + default: + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Cannot deserialize " + typeCode + " as primitive"); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/imprint/types/ImprintSerializers.java b/src/main/java/com/imprint/types/ImprintSerializers.java new file mode 100644 index 0000000..3c4a332 --- /dev/null +++ b/src/main/java/com/imprint/types/ImprintSerializers.java @@ -0,0 +1,173 @@ +package com.imprint.types; + +import com.imprint.error.ErrorType; +import com.imprint.error.ImprintException; +import com.imprint.util.VarInt; +import lombok.experimental.UtilityClass; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; + +/** + * Static serialization methods for all Imprint types. + * Eliminates virtual dispatch overhead from TypeHandler interface. + */ +@UtilityClass +public final class ImprintSerializers { + + + // Primitive serializers + public static void serializeBool(boolean value, ByteBuffer buffer) { + buffer.put((byte) (value ? 1 : 0)); + } + + public static void serializeInt32(int value, ByteBuffer buffer) { + buffer.putInt(value); + } + + public static void serializeInt64(long value, ByteBuffer buffer) { + buffer.putLong(value); + } + + public static void serializeFloat32(float value, ByteBuffer buffer) { + buffer.putFloat(value); + } + + public static void serializeFloat64(double value, ByteBuffer buffer) { + buffer.putDouble(value); + } + + public static void serializeString(String value, ByteBuffer buffer) { + byte[] utf8Bytes = value.getBytes(StandardCharsets.UTF_8); + VarInt.encode(utf8Bytes.length, buffer); + buffer.put(utf8Bytes); + } + + public static void serializeBytes(byte[] value, ByteBuffer buffer) { + VarInt.encode(value.length, buffer); + buffer.put(value); + } + + public static void serializeArray(java.util.List list, ByteBuffer buffer, + java.util.function.Function typeConverter, + java.util.function.BiConsumer elementSerializer) throws ImprintException { + VarInt.encode(list.size(), buffer); + + if (list.isEmpty()) return; // Empty arrays don't need type code + + // Convert first element to determine element type + Object firstElement = list.get(0); + TypeCode firstTypeCode = typeConverter.apply(firstElement); + buffer.put(firstTypeCode.getCode()); + + // Serialize all elements - they must be same type + for (Object element : list) { + TypeCode elementTypeCode = typeConverter.apply(element); + if (elementTypeCode != firstTypeCode) { + throw new ImprintException(ErrorType.SCHEMA_ERROR, + "Array elements must have same type"); + } + elementSerializer.accept(element, buffer); + } + } + + public static void serializeMap(java.util.Map map, ByteBuffer buffer, + java.util.function.Function keyConverter, + java.util.function.Function typeConverter, + java.util.function.BiConsumer valueSerializer) throws ImprintException { + VarInt.encode(map.size(), buffer); + + if (map.isEmpty()) return; + + var iterator = map.entrySet().iterator(); + var first = iterator.next(); + + // Convert key and value to determine types + MapKey firstKey = keyConverter.apply(first.getKey()); + TypeCode firstValueType = typeConverter.apply(first.getValue()); + + buffer.put(firstKey.getTypeCode().getCode()); + buffer.put(firstValueType.getCode()); + + // Serialize first pair + serializeMapKeyDirect(firstKey, buffer); + valueSerializer.accept(first.getValue(), buffer); + + // Serialize remaining pairs + while (iterator.hasNext()) { + var entry = iterator.next(); + MapKey key = keyConverter.apply(entry.getKey()); + TypeCode valueType = typeConverter.apply(entry.getValue()); + + if (key.getTypeCode() != firstKey.getTypeCode()) { + throw new ImprintException(ErrorType.SCHEMA_ERROR, + "Map keys must have same type"); + } + if (valueType != firstValueType) { + throw new ImprintException(ErrorType.SCHEMA_ERROR, + "Map values must have same type"); + } + + serializeMapKeyDirect(key, buffer); + valueSerializer.accept(entry.getValue(), buffer); + } + } + + private static void serializeMapKeyDirect(MapKey key, ByteBuffer buffer) throws ImprintException { + switch (key.getTypeCode()) { + case INT32: + buffer.putInt(((MapKey.Int32Key) key).getValue()); + break; + case INT64: + buffer.putLong(((MapKey.Int64Key) key).getValue()); + break; + case BYTES: + byte[] bytes = ((MapKey.BytesKey) key).getValue(); + VarInt.encode(bytes.length, buffer); + buffer.put(bytes); + break; + case STRING: + String str = ((MapKey.StringKey) key).getValue(); + byte[] stringBytes = str.getBytes(StandardCharsets.UTF_8); + VarInt.encode(stringBytes.length, buffer); + buffer.put(stringBytes); + break; + default: + throw new ImprintException(ErrorType.SERIALIZATION_ERROR, + "Invalid map key type: " + key.getTypeCode()); + } + } + + @SuppressWarnings("unused") + public static void serializeNull(ByteBuffer buffer) { + // NULL values have no payload data + } + + // Fast size estimation using heuristics + public static int estimateSize(TypeCode typeCode, Object value) { + switch (typeCode) { + case NULL: return 0; + case BOOL: return 1; + case INT32: + case FLOAT32: + return 4; + case INT64: + case FLOAT64: + return 8; + case STRING: + String str = (String) value; + return str.length() > 1000 ? 5 + str.length() * 3 : 256; + case BYTES: + byte[] bytes = (byte[]) value; + return bytes.length > 1000 ? 5 + bytes.length : 256; + case ARRAY: + return 512; // Conservative: most arrays are < 512 bytes + case MAP: + return 512; // Conservative: most maps are < 512 bytes + case ROW: + return 1024; // Conservative: most nested records are < 1KB + default: + return 64; // Fallback + } + } +} \ No newline at end of file From abfd8d8b1f5923d3f8a28656eebaa3a5d8e67ab2 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Tue, 17 Jun 2025 08:00:14 -0400 Subject: [PATCH 52/53] upate formatting and comments --- .../imprint/core/ImprintRecordBuilder.java | 23 ++++++++----------- .../imprint/types/ImprintDeserializers.java | 2 +- .../com/imprint/types/ImprintSerializers.java | 11 ++++----- src/main/java/com/imprint/types/MapKey.java | 3 +-- src/main/java/com/imprint/types/TypeCode.java | 5 ++-- .../com/imprint/profile/ProfilerTest.java | 2 +- 6 files changed, 20 insertions(+), 26 deletions(-) diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 17c337d..166f1f8 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -6,6 +6,7 @@ import com.imprint.types.ImprintSerializers; import com.imprint.types.MapKey; import com.imprint.types.TypeCode; +import com.imprint.util.VarInt; import lombok.SneakyThrows; import java.nio.BufferOverflowException; @@ -378,7 +379,7 @@ private void serializeFieldValue(FieldValue fieldValue, ByteBuffer buffer) throw serializeMap((Map) value, buffer); break; case ROW: - // Nested record serialization + // Nested records var nestedRecord = (ImprintRecord) value; var serializedRow = nestedRecord.serializeToBuffer(); buffer.put(serializedRow); @@ -387,7 +388,8 @@ private void serializeFieldValue(FieldValue fieldValue, ByteBuffer buffer) throw throw new ImprintException(ErrorType.SERIALIZATION_ERROR, "Unknown type code: " + typeCode); } } - + + //TODO arrays and maps need to be handled better private void serializeArray(List list, ByteBuffer buffer) throws ImprintException { ImprintSerializers.serializeArray(list, buffer, this::getTypeCodeForObject, @@ -400,8 +402,7 @@ private void serializeMap(Map map, ByteBuffer buffer) throws ImprintExcept this::getTypeCodeForObject, this::serializeObjectDirect); } - - // Helper methods for static serializers + private TypeCode getTypeCodeForObject(Object obj) { var fieldValue = convertToFieldValue(obj); try { @@ -466,25 +467,21 @@ private static void writeDirectoryToBuffer(short[] sortedKeys, Object[] sortedVa if (fieldCount < 128) { buffer.put((byte) fieldCount); } else { - com.imprint.util.VarInt.encode(fieldCount, buffer); + VarInt.encode(fieldCount, buffer); } // Early return for empty directory - if (fieldCount == 0) { + if (fieldCount == 0) return; - } + + //hopefully JIT vectorizes this for (int i = 0; i < fieldCount; i++) { var fieldValue = (FieldValue) sortedValues[i]; - - // Get current position once, then batch write int pos = buffer.position(); - - // Write all 7 bytes for this entry in sequence - buffer.putShort(pos, sortedKeys[i]); // bytes 0-1: field ID + buffer.putShort(pos, sortedKeys[i]); // bytes 0-1: field ID buffer.put(pos + 2, fieldValue.typeCode); // byte 2: type code buffer.putInt(pos + 3, offsets[i]); // bytes 3-6: offset - // Advance buffer position by 7 bytes buffer.position(pos + 7); } diff --git a/src/main/java/com/imprint/types/ImprintDeserializers.java b/src/main/java/com/imprint/types/ImprintDeserializers.java index d579390..18f561d 100644 --- a/src/main/java/com/imprint/types/ImprintDeserializers.java +++ b/src/main/java/com/imprint/types/ImprintDeserializers.java @@ -14,7 +14,7 @@ @UtilityClass public final class ImprintDeserializers { - // Primitive deserializers (optimized, no Value objects) + // Primitive boxed deserializers public static Object deserializePrimitive(ByteBuffer buffer, TypeCode typeCode) throws ImprintException { switch (typeCode) { case NULL: diff --git a/src/main/java/com/imprint/types/ImprintSerializers.java b/src/main/java/com/imprint/types/ImprintSerializers.java index 3c4a332..f76f444 100644 --- a/src/main/java/com/imprint/types/ImprintSerializers.java +++ b/src/main/java/com/imprint/types/ImprintSerializers.java @@ -140,10 +140,10 @@ private static void serializeMapKeyDirect(MapKey key, ByteBuffer buffer) throws @SuppressWarnings("unused") public static void serializeNull(ByteBuffer buffer) { - // NULL values have no payload data + // NULL values have no payload data but the method helps intent } - // Fast size estimation using heuristics + // Rough size estimate since actual takes time; might be able to accomodate this better with a growable buffer though public static int estimateSize(TypeCode typeCode, Object value) { switch (typeCode) { case NULL: return 0; @@ -161,13 +161,12 @@ public static int estimateSize(TypeCode typeCode, Object value) { byte[] bytes = (byte[]) value; return bytes.length > 1000 ? 5 + bytes.length : 256; case ARRAY: - return 512; // Conservative: most arrays are < 512 bytes case MAP: - return 512; // Conservative: most maps are < 512 bytes + return 512; //just rough estimate/guess for now; case ROW: - return 1024; // Conservative: most nested records are < 1KB + return 1024; default: - return 64; // Fallback + return 64; } } } \ No newline at end of file diff --git a/src/main/java/com/imprint/types/MapKey.java b/src/main/java/com/imprint/types/MapKey.java index 640d26b..5961f4b 100644 --- a/src/main/java/com/imprint/types/MapKey.java +++ b/src/main/java/com/imprint/types/MapKey.java @@ -49,8 +49,7 @@ public static MapKey fromPrimitive(TypeCode typeCode, Object primitiveValue) thr case STRING: return fromString((String) primitiveValue); default: - throw new ImprintException(ErrorType.TYPE_MISMATCH, - "Cannot convert " + typeCode + " to MapKey"); + throw new ImprintException(ErrorType.TYPE_MISMATCH, "Cannot convert " + typeCode + " to MapKey"); } } diff --git a/src/main/java/com/imprint/types/TypeCode.java b/src/main/java/com/imprint/types/TypeCode.java index a2e63e2..7c80d87 100644 --- a/src/main/java/com/imprint/types/TypeCode.java +++ b/src/main/java/com/imprint/types/TypeCode.java @@ -7,6 +7,7 @@ /** * Type codes for Imprint values. */ +@Getter public enum TypeCode { NULL(0x0), BOOL(0x1), @@ -20,7 +21,6 @@ public enum TypeCode { MAP(0x9), ROW(0xA); // TODO: implement (basically a placeholder for user-defined type) - @Getter private final byte code; private static final TypeCode[] LOOKUP = new TypeCode[11]; @@ -40,7 +40,6 @@ public static TypeCode fromByte(byte code) throws ImprintException { var type = LOOKUP[code]; if (type != null) return type; } - throw new ImprintException(ErrorType.INVALID_TYPE_CODE, - "Unknown type code: 0x" + Integer.toHexString(code & 0xFF)); + throw new ImprintException(ErrorType.INVALID_TYPE_CODE, "Unknown type code: 0x" + Integer.toHexString(code & 0xFF)); } } \ No newline at end of file diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index 3b50f35..5c38457 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -10,7 +10,7 @@ import java.util.stream.IntStream; -//@Disabled +@Disabled public class ProfilerTest { private static final int RECORD_SIZE = 50; From 3142025c57c192aaa8354a7365b97a6060a960b9 Mon Sep 17 00:00:00 2001 From: expand3d <> Date: Tue, 17 Jun 2025 08:28:25 -0400 Subject: [PATCH 53/53] update comments --- .../imprint/core/ImprintRecordBuilder.java | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/src/main/java/com/imprint/core/ImprintRecordBuilder.java b/src/main/java/com/imprint/core/ImprintRecordBuilder.java index 166f1f8..967aac7 100644 --- a/src/main/java/com/imprint/core/ImprintRecordBuilder.java +++ b/src/main/java/com/imprint/core/ImprintRecordBuilder.java @@ -209,19 +209,13 @@ public ByteBuffer buildToBuffer() throws ImprintException { @SneakyThrows private ImprintRecordBuilder addField(int id, FieldValue fieldValue) { Objects.requireNonNull(fieldValue, "FieldValue cannot be null"); - - // Calculate size for tracking using fast heuristics int newSize = estimateFieldSize(fieldValue); - - // Efficient put with old value return - single hash operation var oldEntry = fields.putAndReturnOld(id, fieldValue); if (oldEntry != null) { - // Field replacement - subtract old size, add new size int oldSize = estimateFieldSize(oldEntry); estimatedPayloadSize += newSize - oldSize; } else { - // New field - just add new size estimatedPayloadSize += newSize; } @@ -232,8 +226,6 @@ private FieldValue convertToFieldValue(Object obj) { if (obj == null) { return FieldValue.ofNull(); } - - // Direct primitive conversion - no Value object creation if (obj instanceof Boolean) { return FieldValue.ofBool((Boolean) obj); } @@ -285,9 +277,6 @@ private MapKey convertToMapKey(Object obj) { throw new IllegalArgumentException("Unsupported map key type: " + obj.getClass().getName()); } - /** - * Fast field size estimation using heuristics for performance. - */ private int estimateFieldSize(FieldValue fieldValue) { TypeCode typeCode; try { @@ -298,18 +287,11 @@ private int estimateFieldSize(FieldValue fieldValue) { return ImprintSerializers.estimateSize(typeCode, fieldValue.value); } - /** - * Get current estimated payload size with 25% buffer. - */ private int calculateConservativePayloadSize() { // Add 25% buffer for safety margin return Math.max(estimatedPayloadSize + (estimatedPayloadSize / 4), 4096); } - - /** - * Result of payload serialization containing offsets and final payload buffer. - */ private static class PayloadSerializationResult { final int[] offsets; final ByteBuffer payload; @@ -320,18 +302,12 @@ private static class PayloadSerializationResult { } } - /** - * Serialize payload with conservative buffer size multiplier. - */ private PayloadSerializationResult serializePayload(Object[] sortedFields, int fieldCount, int conservativeSize, int sizeMultiplier) throws ImprintException { var payloadBuffer = ByteBuffer.allocate(conservativeSize * sizeMultiplier); payloadBuffer.order(ByteOrder.LITTLE_ENDIAN); return doSerializePayload(sortedFields, fieldCount, payloadBuffer); } - /** - * Core payload serialization logic. - */ private PayloadSerializationResult doSerializePayload(Object[] sortedFields, int fieldCount, ByteBuffer payloadBuffer) throws ImprintException { int[] offsets = new int[fieldCount]; for (int i = 0; i < fieldCount; i++) {