diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d0e43cb..378ebb7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: CI on: push: - branches: [ main, dev ] + branches: [ main ] pull_request: - branches: [ main, dev ] + branches: [ main ] jobs: test: @@ -41,224 +41,4 @@ jobs: run: ./gradlew test - name: Run build - run: ./gradlew build - - benchmark: - runs-on: ubuntu-latest - needs: test - # Add explicit permissions for commenting on PRs - permissions: - contents: read - pull-requests: write - issues: write - # Only run benchmarks on main branch pushes and PRs to main to avoid excessive CI time - if: github.ref == 'refs/heads/main' || github.base_ref == 'main' - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up JDK 11 - uses: actions/setup-java@v4 - with: - java-version: '11' - distribution: 'temurin' - - - name: Cache Gradle dependencies - uses: actions/cache@v4 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: | - ${{ runner.os }}-gradle- - - - name: Make gradlew executable - run: chmod +x ./gradlew - - - name: Create benchmark results directory - run: mkdir -p benchmark-results - - - name: Run serialization benchmarks - run: | - ./gradlew jmhRunSerializationBenchmarks - continue-on-error: true - - - name: Run deserialization benchmarks - run: | - ./gradlew jmhRunDeserializationBenchmarks - continue-on-error: true - - - name: Run field access benchmarks - run: | - ./gradlew jmhRunFieldAccessBenchmarks - continue-on-error: true - - - name: Run size comparison benchmarks - run: | - ./gradlew jmhRunSizeComparisonBenchmarks - continue-on-error: true - - - name: Upload benchmark results - uses: actions/upload-artifact@v4 - if: always() - with: - name: benchmark-results-${{ github.sha }} - path: benchmark-results/ - retention-days: 30 - - - name: Comment benchmark results on PR - if: github.event_name == 'pull_request' - uses: actions/github-script@v7 - continue-on-error: true - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - try { - const fs = require('fs'); - const path = require('path'); - - // Find the latest benchmark results file - const resultsDir = 'benchmark-results'; - let latestFile = null; - let latestTime = 0; - - if (fs.existsSync(resultsDir)) { - const files = fs.readdirSync(resultsDir); - for (const file of files) { - if (file.endsWith('.json')) { - const filePath = path.join(resultsDir, file); - const stats = fs.statSync(filePath); - if (stats.mtime.getTime() > latestTime) { - latestTime = stats.mtime.getTime(); - latestFile = filePath; - } - } - } - } - - if (latestFile) { - console.log(`šŸ“Š Found benchmark results: ${latestFile}`); - const results = JSON.parse(fs.readFileSync(latestFile, 'utf8')); - - // Group results by benchmark type - const serialization = results.filter(r => r.benchmark.includes('serialize')); - const deserialization = results.filter(r => r.benchmark.includes('deserialize')); - const fieldAccess = results.filter(r => r.benchmark.includes('singleFieldAccess')); - const sizes = results.filter(r => r.benchmark.includes('measure')); - - // Format results into a table - const formatResults = (benchmarks, title) => { - if (benchmarks.length === 0) return ''; - - let table = `\n### ${title}\n\n| Library | Score (ns/op) | Error | Unit |\n|---------|---------------|-------|------|\n`; - - benchmarks - .sort((a, b) => a.primaryMetric.score - b.primaryMetric.score) - .forEach(benchmark => { - const name = benchmark.benchmark.split('.').pop().replace(/serialize|deserialize|singleFieldAccess|measure/, '').replace(/Imprint|JacksonJson|Kryo|MessagePack|Avro|Protobuf|FlatBuffers/, (match) => match); - const score = benchmark.primaryMetric.score.toFixed(2); - const error = benchmark.primaryMetric.scoreError.toFixed(2); - const unit = benchmark.primaryMetric.scoreUnit; - table += `| ${name} | ${score} | ±${error} | ${unit} |\n`; - }); - - return table; - }; - - const comment = `## šŸ“Š Benchmark Results - - Benchmark comparison between Imprint and other serialization libraries: - ${formatResults(serialization, 'Serialization Performance')} - ${formatResults(deserialization, 'Deserialization Performance')} - ${formatResults(fieldAccess, 'Single Field Access Performance')} - ${formatResults(sizes, 'Serialized Size Comparison')} - -
- View detailed results - - Results generated from commit: \`${context.sha.substring(0, 7)}\` - - Lower scores are better for performance benchmarks. - -
`; - - await github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: comment - }); - - console.log('āœ… Successfully posted benchmark results to PR'); - } else { - console.log('āš ļø No benchmark results found'); - await github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: '## šŸ“Š Benchmark Results\n\nBenchmark execution completed but no results file was found. Check the [workflow logs](' + - `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}` + ') for details.' - }); - } - } catch (error) { - console.log('āŒ Failed to post benchmark comment:', error.message); - console.log('šŸ“ Benchmark results are still available in workflow artifacts'); - - // Try to post a simple error message - try { - await github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: `## šŸ“Š Benchmark Results\n\nāš ļø Failed to process benchmark results automatically.\n\nResults are available in the [workflow artifacts](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}).` - }); - } catch (commentError) { - console.log('āŒ Also failed to post error comment:', commentError.message); - } - } - - # Optional: Run full benchmark suite on releases - benchmark-full: - runs-on: ubuntu-latest - if: startsWith(github.ref, 'refs/tags/') - permissions: - contents: read - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up JDK 11 - uses: actions/setup-java@v4 - with: - java-version: '11' - distribution: 'temurin' - - - name: Cache Gradle dependencies - uses: actions/cache@v4 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: | - ${{ runner.os }}-gradle- - - - name: Make gradlew executable - run: chmod +x ./gradlew - - - name: Create benchmark results directory - run: mkdir -p benchmark-results - - - name: Run full benchmark suite - run: | - ./gradlew jmhRunAllBenchmarks - - - name: Upload full benchmark results - uses: actions/upload-artifact@v4 - with: - name: full-benchmark-results-${{ github.ref_name }} - path: benchmark-results/ - retention-days: 90 \ No newline at end of file + run: ./gradlew build \ No newline at end of file diff --git a/build.gradle b/build.gradle index d9093f9..33b1645 100644 --- a/build.gradle +++ b/build.gradle @@ -166,11 +166,11 @@ test { } } -// JMH configuration - optimized for Java 11 +// JMH configuration jmh { - fork = 1 - warmupIterations = 2 // Reduced for faster CI - iterations = 3 // Reduced for faster CI + fork = 2 + warmupIterations = 3 + iterations = 5 resultFormat = 'JSON' includeTests = false resultsFile = file("${projectDir}/benchmark-results/jmh-results-${new Date().format('yyyy-MM-dd-HHmmss')}.json") @@ -184,133 +184,6 @@ jmh { ] } -// Create individual benchmark tasks for CI pipeline -tasks.register('jmhRunSerializationBenchmarks', JavaExec) { - dependsOn compileJmhJava - description = 'Run serialization benchmarks' - group = 'benchmarking' - - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'com.imprint.benchmark.ComparisonBenchmark' - args = ['runSerializationBenchmarks'] - - // Java 11 optimized JVM settings - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g', - '-XX:+UnlockExperimentalVMOptions' - ] - - doFirst { - file("${projectDir}/benchmark-results").mkdirs() - } -} - -tasks.register('jmhRunDeserializationBenchmarks', JavaExec) { - dependsOn compileJmhJava - description = 'Run deserialization benchmarks' - group = 'benchmarking' - - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'com.imprint.benchmark.ComparisonBenchmark' - args = ['runDeserializationBenchmarks'] - - // Java 11 optimized JVM settings - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g', - '-XX:+UnlockExperimentalVMOptions' - ] - - doFirst { - file("${projectDir}/benchmark-results").mkdirs() - } -} - -tasks.register('jmhRunFieldAccessBenchmarks', JavaExec) { - dependsOn compileJmhJava - description = 'Run field access benchmarks' - group = 'benchmarking' - - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'com.imprint.benchmark.ComparisonBenchmark' - args = ['runFieldAccessBenchmarks'] - - // Java 11 optimized JVM settings - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g', - '-XX:+UnlockExperimentalVMOptions' - ] - - doFirst { - file("${projectDir}/benchmark-results").mkdirs() - } -} - -tasks.register('jmhRunSizeComparisonBenchmarks', JavaExec) { - dependsOn compileJmhJava - description = 'Run size comparison benchmarks' - group = 'benchmarking' - - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'com.imprint.benchmark.ComparisonBenchmark' - args = ['runSizeComparisonBenchmarks'] - - // Java 11 optimized JVM settings - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g', - '-XX:+UnlockExperimentalVMOptions' - ] - - doFirst { - file("${projectDir}/benchmark-results").mkdirs() - } -} - -tasks.register('jmhRunMergeBenchmarks', JavaExec) { - dependsOn compileJmhJava - description = 'Run merge operation benchmarks' - group = 'benchmarking' - - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'com.imprint.benchmark.ComparisonBenchmark' - args = ['runMergeBenchmarks'] - - // Java 11 optimized JVM settings - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g', - '-XX:+UnlockExperimentalVMOptions' - ] - - doFirst { - file("${projectDir}/benchmark-results").mkdirs() - } -} - -tasks.register('jmhRunAllBenchmarks', JavaExec) { - dependsOn compileJmhJava - description = 'Run all comparison benchmarks' - group = 'benchmarking' - - classpath = sourceSets.jmh.runtimeClasspath - mainClass = 'com.imprint.benchmark.ComparisonBenchmark' - args = ['runAll'] - - // Java 11 optimized JVM settings - jvmArgs = [ - '-XX:+UseG1GC', - '-Xmx2g', - '-XX:+UnlockExperimentalVMOptions' - ] - - doFirst { - file("${projectDir}/benchmark-results").mkdirs() - } -} - compileJava { options.compilerArgs << '-Xlint:unchecked' options.deprecation = true diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index ee32ff0..4d9c01c 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -10,7 +10,6 @@ import com.imprint.core.SchemaId; import com.imprint.types.MapKey; import com.imprint.types.Value; -import lombok.NoArgsConstructor; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; @@ -38,8 +37,8 @@ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) -@Warmup(iterations = 2, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(1) @SuppressWarnings("unused") public class ComparisonBenchmark { @@ -91,6 +90,7 @@ public void setup() throws Exception { } // ===== SERIALIZATION BENCHMARKS ===== + @Benchmark public void serializeImprint(Blackhole bh) throws Exception { ByteBuffer result = serializeWithImprint(testData); @@ -133,13 +133,22 @@ public void serializeFlatBuffers(Blackhole bh) { bh.consume(result); } - // ===== DESERIALIZATION BENCHMARKS ===== + // ===== SETUP ONLY ===== + @Benchmark - public void deserializeImprint(Blackhole bh) throws Exception { + public void deserializeSetupImprint(Blackhole bh) throws Exception { ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); bh.consume(result); } + @Benchmark + public void deserializeSetupFlatBuffers(Blackhole bh) { + TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); + bh.consume(result); + } + + // ===== FULL DESERIALIZATION BENCHMARKS ===== + @Benchmark public void deserializeJacksonJson(Blackhole bh) throws Exception { TestRecord result = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); @@ -172,14 +181,54 @@ public void deserializeProtobuf(Blackhole bh) throws Exception { bh.consume(result); } + @Benchmark + public void deserializeImprint(Blackhole bh) throws Exception { + ImprintRecord result = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); + // Access all fields to force full deserialization + result.getInt32(1); // id + result.getString(2); // name + result.getFloat64(3); // price + result.getBoolean(4); // active + result.getString(5); // category + result.getArray(6); // tags + result.getMap(7); // metadata + for (int i = 8; i < 21; i++) { + result.getString(i); // extraData fields + } + + bh.consume(result); + } + @Benchmark public void deserializeFlatBuffers(Blackhole bh) { TestRecordFB result = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); + + // Access all fields + result.id(); + result.name(); + result.price(); + result.active(); + result.category(); + // Access all tags + for (int i = 0; i < result.tagsLength(); i++) { + result.tags(i); + } + // Access all metadata + for (int i = 0; i < result.metadataKeysLength(); i++) { + result.metadataKeys(i); + result.metadataValues(i); + } + // Access all extra data + for (int i = 0; i < result.extraDataLength(); i++) { + result.extraData(i); + } + bh.consume(result); } // ===== FIELD ACCESS BENCHMARKS ===== - // Tests accessing a single field near the end of a large record + // Tests accessing a single field near the end of a record + @Benchmark public void singleFieldAccessImprint(Blackhole bh) throws Exception { ImprintRecord record = ImprintRecord.deserialize(imprintBytesBuffer.duplicate()); @@ -264,7 +313,7 @@ public void measureFlatBuffersSize(Blackhole bh) { // ===== MERGE SIMULATION BENCHMARKS ===== - @Benchmark + //@Benchmark public void mergeImprint(Blackhole bh) throws Exception { var record1Buffer = imprintBytesBuffer.duplicate(); var record2Data = createTestRecord2(); @@ -277,7 +326,7 @@ public void mergeImprint(Blackhole bh) throws Exception { bh.consume(merged); } - @Benchmark + //@Benchmark public void mergeJacksonJson(Blackhole bh) throws Exception { var record1 = jacksonJsonMapper.readValue(jacksonJsonBytes, TestRecord.class); var record2Data = createTestRecord2(); @@ -289,7 +338,7 @@ public void mergeJacksonJson(Blackhole bh) throws Exception { bh.consume(result); } - @Benchmark + //@Benchmark public void mergeKryo(Blackhole bh) { Input input1 = new Input(new ByteArrayInputStream(kryoBytes)); var record1 = kryo.readObject(input1, TestRecord.class); @@ -306,7 +355,7 @@ public void mergeKryo(Blackhole bh) { bh.consume(result); } - @Benchmark + //@Benchmark public void mergeMessagePack(Blackhole bh) throws Exception { var record1 = messagePackMapper.readValue(messagePackBytes, TestRecord.class); var record2Data = createTestRecord2(); @@ -318,7 +367,7 @@ public void mergeMessagePack(Blackhole bh) throws Exception { bh.consume(result); } - @Benchmark + //@Benchmark public void mergeAvro(Blackhole bh) throws Exception { var record1 = deserializeWithAvro(avroBytes); var record2Data = createTestRecord2(); @@ -330,7 +379,7 @@ public void mergeAvro(Blackhole bh) throws Exception { bh.consume(result); } - @Benchmark + //@Benchmark public void mergeProtobuf(Blackhole bh) throws Exception { var record1 = TestRecordProto.TestRecord.parseFrom(protobufBytes); var record2Data = createTestRecord2(); @@ -342,7 +391,7 @@ public void mergeProtobuf(Blackhole bh) throws Exception { bh.consume(result); } - @Benchmark + //@Benchmark public void mergeFlatBuffers(Blackhole bh) { var record1 = TestRecordFB.getRootAsTestRecordFB(flatbuffersBytes.duplicate()); var record2Data = createTestRecord2(); @@ -691,8 +740,8 @@ private ByteBuffer mergeFlatBuffersRecords(TestRecordFB first, TestRecordFB seco FlatBufferBuilder builder = new FlatBufferBuilder(1024); // Use second record's values if they exist, otherwise first record's values - String name = second.name() != null && !Objects.requireNonNull(second.name()).isEmpty() ? second.name() : first.name(); - String category = second.category() != null && !Objects.requireNonNull(second.category()).isEmpty() ? second.category() : first.category(); + String name = second.name() != null && !second.name().isEmpty() ? second.name() : first.name(); + String category = second.category() != null && !second.category().isEmpty() ? second.category() : first.category(); double price = second.price() != 0.0 ? second.price() : first.price(); boolean active = second.active(); // Use second's boolean value int id = first.id(); // Keep first record's ID @@ -806,7 +855,6 @@ var record = new TestRecord(); } // Test data class for other serialization libraries - @NoArgsConstructor public static class TestRecord { public int id; public String name; @@ -815,7 +863,8 @@ public static class TestRecord { public String category; public List tags = new ArrayList<>(); public Map metadata = new HashMap<>(); - // Fields 8-20 for large record test - public List extraData = new ArrayList<>(); + public List extraData = new ArrayList<>(); // Fields 8-20 for large record test + + public TestRecord() {} // Required for deserialization } } \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintBuffers.java b/src/main/java/com/imprint/core/ImprintBuffers.java new file mode 100644 index 0000000..f6a341b --- /dev/null +++ b/src/main/java/com/imprint/core/ImprintBuffers.java @@ -0,0 +1,451 @@ +package com.imprint.core; + +import com.imprint.Constants; +import com.imprint.error.ErrorType; +import com.imprint.error.ImprintException; +import com.imprint.types.TypeCode; +import com.imprint.util.VarInt; +import lombok.Getter; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +/** + * Manages the raw buffers for an Imprint record with lazy directory parsing. + * Encapsulates all buffer operations and provides zero-copy field access. + * + *

Buffer Layout Overview:

+ *
+ * directoryBuffer: [VarInt count][DirectoryEntry 1][DirectoryEntry 2]...[DirectoryEntry N]
+ * payload:         [Field 1 data][Field 2 data]...[Field N data]
+ * 
+ * + *

Each DirectoryEntry contains: [fieldId:2bytes][typeCode:1byte][offset:4bytes]

+ */ +@Getter +public final class ImprintBuffers { + private final ByteBuffer directoryBuffer; // Raw directory bytes (includes count) + private final ByteBuffer payload; // Read-only payload view + + // Lazy-loaded directory state + private List parsedDirectory; + private boolean directoryParsed = false; + private int directoryCount = -1; // Cached count to avoid repeated VarInt decoding + + /** + * Creates buffers from raw data (used during deserialization). + * + * @param directoryBuffer Raw directory bytes including VarInt count and all entries. + * Format: [VarInt count][Entry1][Entry2]...[EntryN] + * @param payload Raw payload data containing all field values sequentially + */ + public ImprintBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { + this.directoryBuffer = directoryBuffer.asReadOnlyBuffer(); + this.payload = payload.asReadOnlyBuffer(); + } + + /** + * Creates buffers from pre-parsed directory (used during construction). + * This is more efficient when the directory is already known. + * + * @param directory Parsed directory entries, must be sorted by fieldId + * @param payload Raw payload data containing all field values + */ + public ImprintBuffers(List directory, ByteBuffer payload) { + this.parsedDirectory = Collections.unmodifiableList(Objects.requireNonNull(directory)); + this.directoryParsed = true; + this.directoryCount = directory.size(); + this.payload = payload.asReadOnlyBuffer(); + this.directoryBuffer = createDirectoryBuffer(directory); + } + + /** + * Get a zero-copy ByteBuffer view of a field's data. + * + *

Buffer Positioning Logic:

+ *
    + *
  1. Find the directory entry for the requested fieldId
  2. + *
  3. Use entry.offset as start position in payload
  4. + *
  5. Find end position by looking at next field's offset (or payload end)
  6. + *
  7. Create a slice view: payload[startOffset:endOffset]
  8. + *
+ * + * @param fieldId The field identifier to retrieve + * @return Zero-copy ByteBuffer positioned at field data, or null if field not found + * @throws ImprintException if buffer bounds are invalid or directory is corrupted + */ + public ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { + var entry = findDirectoryEntry(fieldId); + if (entry == null) + return null; + + int startOffset = entry.getOffset(); + int endOffset = findEndOffset(entry); + + if (startOffset < 0 || endOffset < 0 || startOffset > payload.limit() || + endOffset > payload.limit() || startOffset > endOffset) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Invalid field buffer range: start=" + startOffset + ", end=" + endOffset + ", payloadLimit=" + payload.limit()); + } + + ByteBuffer fieldBuffer = payload.duplicate(); + fieldBuffer.position(startOffset).limit(endOffset); + return fieldBuffer; + } + + /** + * Find a directory entry for the given field ID using the most efficient method. + * + *

Search Strategy:

+ *
    + *
  • If directory is parsed: binary search on in-memory List<DirectoryEntry>
  • + *
  • If directory is raw: binary search directly on raw bytes (faster for single lookups)
  • + *
+ * @param fieldId The field identifier to find + * @return DirectoryEntry if found, null otherwise + * @throws ImprintException if directory buffer is corrupted or truncated + */ + public DirectoryEntry findDirectoryEntry(int fieldId) throws ImprintException { + if (directoryParsed) { + int index = findDirectoryIndexInParsed(fieldId); + return index >= 0 ? parsedDirectory.get(index) : null; + } else { + return findFieldEntryInRawDirectory(fieldId); + } + } + + /** + * Get the full directory, parsing it if necessary. + * + *

Lazy Parsing Behavior:

+ *
    + *
  • First call: Parses entire directory from raw bytes into List<DirectoryEntry>
  • + *
  • Subsequent calls: Returns cached parsed directory
  • + *
  • Note - the method is not synchronized and assumes single-threaded usage.
  • + *
+ * + *

When to use: Call this if you need to access multiple fields + * from the same record. For single field access, direct field getters are more efficient.

+ * + * @return Immutable list of directory entries, sorted by fieldId + */ + public List getDirectory() { + ensureDirectoryParsed(); + return parsedDirectory; + } + + /** + * Get the directory count without fully parsing the directory. + *

+ * This method avoids parsing the entire directory when only the count is needed. + *

    + *
  1. Return cached count if available (directoryCount >= 0)
  2. + *
  3. Return parsed directory size if directory is already parsed
  4. + *
  5. Decode VarInt from raw buffer and cache the result
  6. + *
+ * + *

VarInt Decoding: The count is stored as a VarInt at the beginning + * of the directoryBuffer. This method reads just enough bytes to decode the count.

+ * + * @return Number of fields in the directory, or 0 if decoding fails + */ + public int getDirectoryCount() { + if (directoryCount >= 0) + return directoryCount; + if (directoryParsed) + return parsedDirectory.size(); + + // Decode from buffer and cache + try { + var countBuffer = directoryBuffer.duplicate(); + directoryCount = VarInt.decode(countBuffer).getValue(); + return directoryCount; + } catch (Exception e) { + return 0; + } + } + + /** + * Create a new buffer containing the serialized directory. + * + *

Output Format:

+ *
+     * [VarInt count][DirectoryEntry 1][DirectoryEntry 2]...[DirectoryEntry N]
+     * 
+ * + *

Each DirectoryEntry is serialized as: [fieldId:2bytes][typeCode:1byte][offset:4bytes]

+ * + * + * @return New ByteBuffer containing the complete serialized directory + */ + public ByteBuffer serializeDirectory() { + ensureDirectoryParsed(); + return createDirectoryBuffer(parsedDirectory); + } + + // ========== PRIVATE METHODS ========== + + /** + * Binary search on raw directory bytes to find a specific field. + * + *
    + *
  1. Position buffer at start and decode VarInt count (cache for future use)
  2. + *
  3. Calculate directory start position after VarInt
  4. + *
  5. For binary search mid-point: entryPos = startPos + (mid * DIR_ENTRY_BYTES)
  6. + *
  7. Read fieldId from calculated position (first 2 bytes of entry)
  8. + *
  9. Compare fieldId and adjust search bounds
  10. + *
  11. When found: reposition buffer and deserialize complete entry
  12. + *
+ * + *

All buffer positions are bounds-checked before access.

+ * + * @param fieldId Field identifier to search for + * @return Complete DirectoryEntry if found, null if not found + * @throws ImprintException if buffer is truncated or corrupted + */ + private DirectoryEntry findFieldEntryInRawDirectory(int fieldId) throws ImprintException { + var searchBuffer = directoryBuffer.duplicate(); + searchBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // Decode directory count (cache it) + if (directoryCount < 0) + directoryCount = VarInt.decode(searchBuffer).getValue(); + else + VarInt.decode(searchBuffer); // Skip past the count + + if (directoryCount == 0) + return null; + + int directoryStartPos = searchBuffer.position(); + int low = 0; + int high = directoryCount - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; + int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); + + if (entryPos + Constants.DIR_ENTRY_BYTES > searchBuffer.limit()) { + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Directory entry at position " + entryPos + " exceeds buffer limit"); + } + + searchBuffer.position(entryPos); + short midFieldId = searchBuffer.getShort(); + + if (midFieldId < fieldId) { + low = mid + 1; + } else if (midFieldId > fieldId) { + high = mid - 1; + } else { + // Found it - read the complete entry + searchBuffer.position(entryPos); + return deserializeDirectoryEntry(searchBuffer); + } + } + + return null; + } + + /** + * + * @param fieldId Field identifier to find + * @return Index of the field if found, or negative insertion point if not found + */ + private int findDirectoryIndexInParsed(int fieldId) { + if (!directoryParsed) + return -1; + int low = 0; + int high = parsedDirectory.size() - 1; + while (low <= high) { + int mid = (low + high) >>> 1; + int midFieldId = parsedDirectory.get(mid).getId(); + if (midFieldId < fieldId) + low = mid + 1; + else if (midFieldId > fieldId) + high = mid - 1; + else + return mid; + } + return -(low + 1); + } + + /** + * Find the end offset for a field by looking at the next field's offset. + * + *
    + *
  • Field data spans from: entry.offset to nextField.offset (exclusive)
  • + *
  • Last field spans from: entry.offset to payload.limit()
  • + *
  • This works because directory entries are sorted by fieldId
  • + *
+ * + *

Search Strategy:

+ *
    + *
  • If directory parsed: Use binary search result + 1 to get next entry
  • + *
  • If directory raw: Scan raw entries until fieldId > currentFieldId
  • + *
+ * + * @param entry The directory entry whose end offset we need to find + * @return End offset (exclusive) for the field data + * @throws ImprintException if directory scanning fails + */ + private int findEndOffset(DirectoryEntry entry) throws ImprintException { + if (directoryParsed) { + int entryIndex = findDirectoryIndexInParsed(entry.getId()); + return (entryIndex + 1 < parsedDirectory.size()) ? + parsedDirectory.get(entryIndex + 1).getOffset() : payload.limit(); + } else + return findNextOffsetInRawDirectory(entry.getId()); + } + + /** + * Scan raw directory to find the next field's offset after currentFieldId. + * + *
    + *
  1. Position buffer after VarInt count
  2. + *
  3. For each directory entry at position: startPos + (i * DIR_ENTRY_BYTES)
  4. + *
  5. Read fieldId (first 2 bytes) and offset (bytes 3-6)
  6. + *
  7. Return offset of first field where fieldId > currentFieldId
  8. + *
  9. If no next field found, return payload.limit()
  10. + *
+ * + * @param currentFieldId Find the next field after this fieldId + * @return Offset where the next field starts, or payload.limit() if this is the last field + * @throws ImprintException if directory buffer is corrupted + */ + private int findNextOffsetInRawDirectory(int currentFieldId) throws ImprintException { + var scanBuffer = directoryBuffer.duplicate(); + scanBuffer.order(ByteOrder.LITTLE_ENDIAN); + + int count = (directoryCount >= 0) ? directoryCount : VarInt.decode(scanBuffer).getValue(); + if (count == 0) + return payload.limit(); + if (directoryCount >= 0) + VarInt.decode(scanBuffer); // Skip count if cached + + int directoryStartPos = scanBuffer.position(); + + for (int i = 0; i < count; i++) { + int entryPos = directoryStartPos + (i * Constants.DIR_ENTRY_BYTES); + + if (entryPos + Constants.DIR_ENTRY_BYTES > scanBuffer.limit()) + return payload.limit(); + + scanBuffer.position(entryPos); + short fieldId = scanBuffer.getShort(); + scanBuffer.get(); // skip type + int offset = scanBuffer.getInt(); + + if (fieldId > currentFieldId) + return offset; + } + + return payload.limit(); + } + + /** + * Parse the full directory if not already parsed. + * + *
    + *
  1. Duplicate directoryBuffer to avoid affecting original position
  2. + *
  3. Set byte order to LITTLE_ENDIAN for consistent reading
  4. + *
  5. Decode VarInt count and cache it
  6. + *
  7. Read 'count' directory entries sequentially
  8. + *
  9. Each entry: [fieldId:2bytes][typeCode:1byte][offset:4bytes]
  10. + *
  11. Store as immutable list and mark as parsed
  12. + *
+ * + *

Error Handling: If parsing fails, throws RuntimeException + * since this indicates corrupted data that should never happen in normal operation.

+ * + *

Will return immediately if directory has already been parsed.

+ */ + private void ensureDirectoryParsed() { + if (directoryParsed) + return; + try { + var parseBuffer = directoryBuffer.duplicate(); + parseBuffer.order(ByteOrder.LITTLE_ENDIAN); + + var countResult = VarInt.decode(parseBuffer); + int count = countResult.getValue(); + this.directoryCount = count; + + var directory = new ArrayList(count); + for (int i = 0; i < count; i++) { + directory.add(deserializeDirectoryEntry(parseBuffer)); + } + + this.parsedDirectory = Collections.unmodifiableList(directory); + this.directoryParsed = true; + } catch (ImprintException e) { + throw new RuntimeException("Failed to parse directory", e); + } + } + + /** + * Create directory buffer from parsed entries. + * + *

Serialization Format:

+ *
    + *
  1. Calculate buffer size: VarInt.encodedLength(count) + (count * DIR_ENTRY_BYTES)
  2. + *
  3. Allocate ByteBuffer with LITTLE_ENDIAN byte order
  4. + *
  5. Write VarInt count
  6. + *
  7. Write each directory entry: [fieldId:2][typeCode:1][offset:4]
  8. + *
  9. Flip buffer and return read-only view
  10. + *
+ * + * @param directory List of directory entries to serialize + * @return Read-only ByteBuffer containing serialized directory, or empty buffer on error + */ + private ByteBuffer createDirectoryBuffer(List directory) { + try { + int bufferSize = VarInt.encodedLength(directory.size()) + + (directory.size() * Constants.DIR_ENTRY_BYTES); + var buffer = ByteBuffer.allocate(bufferSize); + buffer.order(ByteOrder.LITTLE_ENDIAN); + + VarInt.encode(directory.size(), buffer); + for (var entry : directory) { + serializeDirectoryEntry(entry, buffer); + } + + buffer.flip(); + return buffer.asReadOnlyBuffer(); + } catch (Exception e) { + return ByteBuffer.allocate(0).asReadOnlyBuffer(); + } + } + + /** + * Serialize a single directory entry to the buffer. + * Format: [fieldId:2bytes][typeCode:1byte][offset:4bytes] + */ + private void serializeDirectoryEntry(DirectoryEntry entry, ByteBuffer buffer) { + buffer.putShort(entry.getId()); + buffer.put(entry.getTypeCode().getCode()); + buffer.putInt(entry.getOffset()); + } + + /** + * Deserialize a single directory entry from the buffer. + * Reads: [fieldId:2bytes][typeCode:1byte][offset:4bytes] + * + * @param buffer Buffer positioned at the start of a directory entry + * @return Parsed DirectoryEntry + * @throws ImprintException if buffer doesn't contain enough bytes + */ + private DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { + if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for directory entry"); + + short id = buffer.getShort(); + var typeCode = TypeCode.fromByte(buffer.get()); + int offset = buffer.getInt(); + + return new DirectoryEntry(id, typeCode, offset); + } +} \ No newline at end of file diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index e7dab70..6abc9cf 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -11,87 +11,127 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.*; +import java.util.List; +import java.util.Map; +import java.util.Objects; /** - * An Imprint record containing a header, field directory, and payload. - * Uses ByteBuffer for zero-copy operations to achieve low latency. - * - *

Performance Note: All ByteBuffers should be array-backed - * (hasArray() == true) for optimal zero-copy performance. Direct buffers - * may cause performance degradation.

+ * An Imprint record containing a header and buffer management. + * Delegates all buffer operations to ImprintBuffers for cleaner separation. */ @Getter public final class ImprintRecord { private final Header header; - private final List directory; - private final ByteBuffer payload; // Read-only view for zero-copy + private final ImprintBuffers buffers; /** - * Creates a new ImprintRecord. - * - * @param payload the payload buffer. Should be array-backed for optimal performance. + * Creates a record from deserialized components. */ - public ImprintRecord(Header header, List directory, ByteBuffer payload) { + private ImprintRecord(Header header, ImprintBuffers buffers) { this.header = Objects.requireNonNull(header, "Header cannot be null"); - this.directory = Collections.unmodifiableList(Objects.requireNonNull(directory, "Directory cannot be null")); - this.payload = payload.asReadOnlyBuffer(); // Zero-copy read-only view + this.buffers = Objects.requireNonNull(buffers, "Buffers cannot be null"); } + /** + * Creates a record from pre-parsed directory (used by ImprintWriter). + */ + ImprintRecord(Header header, List directory, ByteBuffer payload) { + this.header = Objects.requireNonNull(header, "Header cannot be null"); + this.buffers = new ImprintBuffers(directory, payload); + } + + // ========== FIELD ACCESS METHODS ========== + /** * Get a value by field ID, deserializing it on demand. * Returns null if the field is not found. - * Note: If the field exists and is an explicit NULL type, this will return Value.NullValue.INSTANCE */ public Value getValue(int fieldId) throws ImprintException { - var fieldBuffer = getFieldBuffer(fieldId); - if (fieldBuffer == null) { + var entry = buffers.findDirectoryEntry(fieldId); + if (entry == null) return null; - } - int directoryIndex = findDirectoryIndex(fieldId); - if (directoryIndex < 0) { - throw new ImprintException(ErrorType.INTERNAL_ERROR, "Field ID " + fieldId + " found buffer but not in directory."); - } - var entry = directory.get(directoryIndex); + var fieldBuffer = buffers.getFieldBuffer(fieldId); + if (fieldBuffer == null) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Failed to get buffer for field " + fieldId); + return deserializeValue(entry.getTypeCode(), fieldBuffer); } /** - * Get the raw bytes for a field without deserializing. - * Returns a zero-copy ByteBuffer view, or null if field not found. + * Get raw bytes for a field without deserializing. */ public ByteBuffer getRawBytes(int fieldId) { - var fieldBuffer = getFieldBuffer(fieldId); - return fieldBuffer != null ? fieldBuffer.asReadOnlyBuffer() : null; + try { + return buffers.getFieldBuffer(fieldId); + } catch (ImprintException e) { + return null; + } } /** - * Get a ByteBuffer view of a field's data. - * Returns null if the field is not found. + * Get the directory (parsing it if necessary). */ - private ByteBuffer getFieldBuffer(int fieldId) { - int index = findDirectoryIndex(fieldId); - if (index < 0) return null; + public List getDirectory() { + return buffers.getDirectory(); + } - var entry = directory.get(index); - int startOffset = entry.getOffset(); - int endOffset = (index + 1 < directory.size()) ? - directory.get(index + 1).getOffset() : payload.limit(); + // ========== TYPED GETTERS ========== - if (startOffset > payload.limit() || endOffset > payload.limit() || startOffset > endOffset) { - return null; - } + public boolean getBoolean(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.BOOL, Value.BoolValue.class, "boolean").getValue(); + } + + public int getInt32(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.INT32, Value.Int32Value.class, "int32").getValue(); + } + + public long getInt64(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.INT64, Value.Int64Value.class, "int64").getValue(); + } - //Single allocation instead of duplicate + slice - var fieldBuffer = payload.duplicate(); - fieldBuffer.position(startOffset).limit(endOffset); - return fieldBuffer; + public float getFloat32(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.FLOAT32, Value.Float32Value.class, "float32").getValue(); + } + + public double getFloat64(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.FLOAT64, Value.Float64Value.class, "float64").getValue(); + } + + public String getString(int fieldId) throws ImprintException { + var value = getValidatedValue(fieldId, "STRING"); + if (value instanceof Value.StringValue) + return ((Value.StringValue) value).getValue(); + if (value instanceof Value.StringBufferValue) + return ((Value.StringBufferValue) value).getValue(); + throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is not a STRING"); + } + + public byte[] getBytes(int fieldId) throws ImprintException { + var value = getValidatedValue(fieldId, "BYTES"); + if (value instanceof Value.BytesValue) + return ((Value.BytesValue) value).getValue(); + if (value instanceof Value.BytesBufferValue) + return ((Value.BytesBufferValue) value).getValue(); + throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is not BYTES"); + } + + public List getArray(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.ARRAY, Value.ArrayValue.class, "ARRAY").getValue(); + } + + public Map getMap(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.MAP, Value.MapValue.class, "MAP").getValue(); + } + + public ImprintRecord getRow(int fieldId) throws ImprintException { + return getTypedValueOrThrow(fieldId, TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); } + // ========== SERIALIZATION ========== + /** * Serialize this record to a ByteBuffer. - * The returned buffer will be array-backed. */ public ByteBuffer serializeToBuffer() { var buffer = ByteBuffer.allocate(estimateSerializedSize()); @@ -100,171 +140,95 @@ public ByteBuffer serializeToBuffer() { // Write header serializeHeader(buffer); - // Write directory (always present) - VarInt.encode(directory.size(), buffer); - for (var entry : directory) { - serializeDirectoryEntry(entry, buffer); - } + // Write directory + var directoryBuffer = buffers.serializeDirectory(); + buffer.put(directoryBuffer); - // Write payload (shallow copy only) + // Write payload + var payload = buffers.getPayload(); var payloadCopy = payload.duplicate(); buffer.put(payloadCopy); - // Prepare buffer for reading buffer.flip(); return buffer; } - /** - * Create a fluent builder for constructing ImprintRecord instances. - * - * @param schemaId the schema identifier for this record - * @return a new builder instance - */ + public int estimateSerializedSize() { + int size = Constants.HEADER_BYTES; // header + size += buffers.serializeDirectory().remaining(); // directory + size += buffers.getPayload().remaining(); // payload + return size; + } + + // ========== STATIC FACTORY METHODS ========== + public static ImprintRecordBuilder builder(SchemaId schemaId) { return new ImprintRecordBuilder(schemaId); } - /** - * Create a fluent builder for constructing ImprintRecord instances. - * - * @param fieldspaceId the fieldspace identifier - * @param schemaHash the schema hash - * @return a new builder instance - */ - @SuppressWarnings("unused") public static ImprintRecordBuilder builder(int fieldspaceId, int schemaHash) { return new ImprintRecordBuilder(new SchemaId(fieldspaceId, schemaHash)); } - /** - * Deserialize a record from bytes through an array backed ByteBuffer. - */ public static ImprintRecord deserialize(byte[] bytes) throws ImprintException { return deserialize(ByteBuffer.wrap(bytes)); } - /** - * Deserialize a record from a ByteBuffer. - * - * @param buffer the buffer to deserialize from. Must be array-backed - * (buffer.hasArray() == true) for optimal zero-copy performance. - */ public static ImprintRecord deserialize(ByteBuffer buffer) throws ImprintException { buffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); // Read header var header = deserializeHeader(buffer); - // Read directory (always present) - var directory = new ArrayList(); - VarInt.DecodeResult countResult = VarInt.decode(buffer); + // Calculate directory size + int directoryStartPos = buffer.position(); + var countResult = VarInt.decode(buffer); int directoryCount = countResult.getValue(); + int directorySize = countResult.getBytesRead() + (directoryCount * Constants.DIR_ENTRY_BYTES); - for (int i = 0; i < directoryCount; i++) { - directory.add(deserializeDirectoryEntry(buffer)); - } + // Create directory buffer + buffer.position(directoryStartPos); + var directoryBuffer = buffer.slice(); + directoryBuffer.limit(directorySize); - // Read payload as ByteBuffer slice for zero-copy + // Advance past directory + buffer.position(buffer.position() + directorySize); + + // Create payload buffer var payload = buffer.slice(); payload.limit(header.getPayloadSize()); - buffer.position(buffer.position() + header.getPayloadSize()); - return new ImprintRecord(header, directory, payload); - } + // Create buffers wrapper + var buffers = new ImprintBuffers(directoryBuffer, payload); - /** - * Binary search for field ID in directory without object allocation. - * Returns the index of the field if found, or a negative value if not found. - * - * @param fieldId the field ID to search for - * @return index if found, or negative insertion point - 1 if not found - */ - private int findDirectoryIndex(int fieldId) { - int low = 0; - int high = directory.size() - 1; - - while (low <= high) { - int mid = (low + high) >>> 1; // unsigned right shift to avoid overflow - int midFieldId = directory.get(mid).getId(); - - if (midFieldId < fieldId) { - low = mid + 1; - } else if (midFieldId > fieldId) { - high = mid - 1; - } else { - return mid; // field found - } - } - return -(low + 1); // field not found, return insertion point + return new ImprintRecord(header, buffers); } - public int estimateSerializedSize() { - int size = Constants.HEADER_BYTES; // header - size += VarInt.encodedLength(directory.size()); // directory count - size += directory.size() * Constants.DIR_ENTRY_BYTES; // directory entries - size += payload.remaining(); // payload - return size; - } - - private void serializeHeader(ByteBuffer buffer) { - buffer.put(Constants.MAGIC); - buffer.put(Constants.VERSION); - buffer.put(header.getFlags().getValue()); - buffer.putInt(header.getSchemaId().getFieldSpaceId()); - buffer.putInt(header.getSchemaId().getSchemaHash()); - buffer.putInt(header.getPayloadSize()); - } - - private static Header deserializeHeader(ByteBuffer buffer) throws ImprintException { - if (buffer.remaining() < Constants.HEADER_BYTES) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for header"); - } - - byte magic = buffer.get(); - if (magic != Constants.MAGIC) { - throw new ImprintException(ErrorType.INVALID_MAGIC, - "Invalid magic byte: expected 0x" + Integer.toHexString(Constants.MAGIC) + - ", got 0x" + Integer.toHexString(magic & 0xFF)); - } - - byte version = buffer.get(); - if (version != Constants.VERSION) { - throw new ImprintException(ErrorType.UNSUPPORTED_VERSION, - "Unsupported version: " + version); - } - - var flags = new Flags(buffer.get()); - int fieldspaceId = buffer.getInt(); - int schemaHash = buffer.getInt(); - int payloadSize = buffer.getInt(); + // ========== PRIVATE HELPER METHODS ========== - return new Header(flags, new SchemaId(fieldspaceId, schemaHash), payloadSize); - } - - private void serializeDirectoryEntry(DirectoryEntry entry, ByteBuffer buffer) { - buffer.putShort(entry.getId()); - buffer.put(entry.getTypeCode().getCode()); - buffer.putInt(entry.getOffset()); + /** + * Get and validate a value exists and is not null. + */ + private Value getValidatedValue(int fieldId, String typeName) throws ImprintException { + var value = getValue(fieldId); + if (value == null) + throw new ImprintException(ErrorType.FIELD_NOT_FOUND, "Field " + fieldId + " not found"); + if (value.getTypeCode() == TypeCode.NULL) + throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is NULL, cannot retrieve as " + typeName); + return value; } - private static DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { - if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Not enough bytes for directory entry"); - } - - short id = buffer.getShort(); - var typeCode = TypeCode.fromByte(buffer.get()); - int offset = buffer.getInt(); - - return new DirectoryEntry(id, typeCode, offset); + private T getTypedValueOrThrow(int fieldId, TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) + throws ImprintException { + var value = getValidatedValue(fieldId, expectedTypeName); + if (value.getTypeCode() == expectedTypeCode && expectedValueClass.isInstance(value)) + return expectedValueClass.cast(value); + throw new ImprintException(ErrorType.TYPE_MISMATCH, "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected " + expectedTypeName); } private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws ImprintException { - var valueSpecificBuffer = buffer.duplicate(); - valueSpecificBuffer.order(ByteOrder.LITTLE_ENDIAN); + var valueBuffer = buffer.duplicate(); + valueBuffer.order(ByteOrder.LITTLE_ENDIAN); switch (typeCode) { case NULL: @@ -277,162 +241,51 @@ private Value deserializeValue(TypeCode typeCode, ByteBuffer buffer) throws Impr case STRING: case ARRAY: case MAP: - return typeCode.getHandler().deserialize(valueSpecificBuffer); + return typeCode.getHandler().deserialize(valueBuffer); case ROW: - var nestedRecord = deserialize(valueSpecificBuffer); + var nestedRecord = deserialize(valueBuffer); return Value.fromRow(nestedRecord); - default: throw new ImprintException(ErrorType.INVALID_TYPE_CODE, "Unknown type code: " + typeCode); } } - private T getTypedValueOrThrow(int fieldId, TypeCode expectedTypeCode, Class expectedValueClass, String expectedTypeName) throws ImprintException { - var value = getValue(fieldId); - - if (value == null) { - throw new ImprintException(ErrorType.FIELD_NOT_FOUND, - "Field " + fieldId + " not found, cannot retrieve as " + expectedTypeName + "."); - } - - if (value.getTypeCode() == TypeCode.NULL) { - throw new ImprintException(ErrorType.TYPE_MISMATCH, - "Field " + fieldId + " is NULL, cannot retrieve as " + expectedTypeName + "."); - } - - if (value.getTypeCode() == expectedTypeCode && expectedValueClass.isInstance(value)) { - return expectedValueClass.cast(value); - } - - throw new ImprintException(ErrorType.TYPE_MISMATCH, - "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected " + expectedTypeName + "."); - } - - /** - * Retrieves the boolean value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type BOOL. - */ - public boolean getBoolean(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.BOOL, Value.BoolValue.class, "boolean").getValue(); - } - - /** - * Retrieves the int (int32) value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type INT32. - */ - public int getInt32(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.INT32, Value.Int32Value.class, "int32").getValue(); - } - - /** - * Retrieves the long (int64) value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type INT64. - */ - public long getInt64(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.INT64, Value.Int64Value.class, "int64").getValue(); - } - - /** - * Retrieves the float (float32) value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type FLOAT32. - */ - public float getFloat32(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.FLOAT32, Value.Float32Value.class, "float32").getValue(); - } - - /** - * Retrieves the double (float64) value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type FLOAT64. - */ - public double getFloat64(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.FLOAT64, Value.Float64Value.class, "float64").getValue(); + private void serializeHeader(ByteBuffer buffer) { + buffer.put(Constants.MAGIC); + buffer.put(Constants.VERSION); + buffer.put(header.getFlags().getValue()); + buffer.putInt(header.getSchemaId().getFieldSpaceId()); + buffer.putInt(header.getSchemaId().getSchemaHash()); + buffer.putInt(header.getPayloadSize()); } - /** - * Retrieves the String value for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type STRING. - */ - public String getString(int fieldId) throws ImprintException { - var value = getValue(fieldId); - - if (value == null) { - throw new ImprintException(ErrorType.FIELD_NOT_FOUND, - "Field " + fieldId + " not found, cannot retrieve as String."); - } - if (value.getTypeCode() == TypeCode.NULL) { - throw new ImprintException(ErrorType.TYPE_MISMATCH, - "Field " + fieldId + " is NULL, cannot retrieve as String."); - } - - if (value instanceof Value.StringValue) { - return ((Value.StringValue) value).getValue(); - } - if (value instanceof Value.StringBufferValue) { - return ((Value.StringBufferValue) value).getValue(); - } + private static Header deserializeHeader(ByteBuffer buffer) throws ImprintException { + if (buffer.remaining() < Constants.HEADER_BYTES) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Not enough bytes for header"); - throw new ImprintException(ErrorType.TYPE_MISMATCH, - "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected STRING."); - } - /** - * Retrieves the byte array (byte[]) value for the given field ID. - * Note: This may involve a defensive copy depending on the underlying Value type. - * @throws ImprintException if the field is not found, is null, or is not of type BYTES. - */ - public byte[] getBytes(int fieldId) throws ImprintException { - Value value = getValue(fieldId); - - if (value == null) { - throw new ImprintException(ErrorType.FIELD_NOT_FOUND, - "Field " + fieldId + " not found, cannot retrieve as byte[]."); - } - if (value.getTypeCode() == TypeCode.NULL) { - throw new ImprintException(ErrorType.TYPE_MISMATCH, - "Field " + fieldId + " is NULL, cannot retrieve as byte[]."); + byte magic = buffer.get(); + if (magic != Constants.MAGIC) { + throw new ImprintException(ErrorType.INVALID_MAGIC, "Invalid magic byte: expected 0x" + Integer.toHexString(Constants.MAGIC) + + ", got 0x" + Integer.toHexString(magic & 0xFF)); } - if (value instanceof Value.BytesValue) { - return ((Value.BytesValue) value).getValue(); // getValue() in BytesValue returns a clone - } - if (value instanceof Value.BytesBufferValue) { - return ((Value.BytesBufferValue) value).getValue(); // getValue() in BytesBufferValue creates a new array + byte version = buffer.get(); + if (version != Constants.VERSION) { + throw new ImprintException(ErrorType.UNSUPPORTED_VERSION, "Unsupported version: " + version); } - throw new ImprintException(ErrorType.TYPE_MISMATCH, - "Field " + fieldId + " is of type " + value.getTypeCode() + ", expected BYTES."); - } - - /** - * Retrieves the List for the given field ID. - * The list itself is a copy; modifications to it will not affect the record. - * @throws ImprintException if the field is not found, is null, or is not of type ARRAY. - */ - public List getArray(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.ARRAY, Value.ArrayValue.class, "ARRAY").getValue(); - } - - /** - * Retrieves the Map for the given field ID. - * The map itself is a copy; modifications to it will not affect the record. - * @throws ImprintException if the field is not found, is null, or is not of type MAP. - */ - public Map getMap(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.MAP, Value.MapValue.class, "MAP").getValue(); - } + var flags = new Flags(buffer.get()); + int fieldSpaceId = buffer.getInt(); + int schemaHash = buffer.getInt(); + int payloadSize = buffer.getInt(); - /** - * Retrieves the nested ImprintRecord for the given field ID. - * @throws ImprintException if the field is not found, is null, or is not of type ROW. - */ - public ImprintRecord getRow(int fieldId) throws ImprintException { - return getTypedValueOrThrow(fieldId, TypeCode.ROW, Value.RowValue.class, "ROW").getValue(); + return new Header(flags, new SchemaId(fieldSpaceId, schemaHash), payloadSize); } @Override public String toString() { return String.format("ImprintRecord{header=%s, directorySize=%d, payloadSize=%d}", - header, directory.size(), payload.remaining()); + header, buffers.getDirectoryCount(), buffers.getPayload().remaining()); } - } \ No newline at end of file diff --git a/src/main/java/com/imprint/util/VarInt.java b/src/main/java/com/imprint/util/VarInt.java index 75bd132..f43683b 100644 --- a/src/main/java/com/imprint/util/VarInt.java +++ b/src/main/java/com/imprint/util/VarInt.java @@ -16,12 +16,28 @@ */ @UtilityClass public final class VarInt { - + private static final byte CONTINUATION_BIT = (byte) 0x80; private static final byte SEGMENT_BITS = 0x7f; private static final int MAX_VARINT_LEN = 5; // Enough for u32 - - + + // Simple cache for values 0-1023 + private static final int CACHE_SIZE = 1024; + private static final int[] ENCODED_LENGTHS = new int[CACHE_SIZE]; + + static { + // Pre-compute encoded lengths for cached values + for (int i = 0; i < CACHE_SIZE; i++) { + long val = Integer.toUnsignedLong(i); + int length = 1; + while (val >= 0x80) { + val >>>= 7; + length++; + } + ENCODED_LENGTHS[i] = length; + } + } + /** * Encode a 32-bit unsigned integer as a VarInt into the given ByteBuffer. * @param value the value to encode (treated as unsigned) @@ -30,7 +46,7 @@ public final class VarInt { public static void encode(int value, ByteBuffer buffer) { // Convert to unsigned long for proper bit manipulation long val = Integer.toUnsignedLong(value); - + // Encode at least one byte, then continue while value has more bits do { byte b = (byte) (val & SEGMENT_BITS); @@ -41,8 +57,7 @@ public static void encode(int value, ByteBuffer buffer) { buffer.put(b); } while (val != 0); } - - + /** * Decode a VarInt from a ByteBuffer. * @param buffer the buffer to decode from @@ -53,55 +68,55 @@ public static DecodeResult decode(ByteBuffer buffer) throws ImprintException { long result = 0; int shift = 0; int bytesRead = 0; - + while (true) { if (bytesRead >= MAX_VARINT_LEN) { throw new ImprintException(ErrorType.MALFORMED_VARINT, "VarInt too long"); } if (!buffer.hasRemaining()) { - throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, - "Unexpected end of data while reading VarInt"); + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, + "Unexpected end of data while reading VarInt"); } - + byte b = buffer.get(); bytesRead++; - + // Check if adding these 7 bits would overflow long segment = b & SEGMENT_BITS; if (shift >= 32 || (shift == 28 && segment > 0xF)) { throw new ImprintException(ErrorType.MALFORMED_VARINT, "VarInt overflow"); } - + // Add the bottom 7 bits to the result result |= segment << shift; - + // If the high bit is not set, this is the last byte if ((b & CONTINUATION_BIT) == 0) { break; } - + shift += 7; } - + return new DecodeResult((int) result, bytesRead); } - + /** * Calculate the number of bytes needed to encode the given value as a VarInt. * @param value the value to encode (treated as unsigned) * @return the number of bytes needed */ public static int encodedLength(int value) { - // Convert to unsigned long for proper bit manipulation + if (value >= 0 && value < CACHE_SIZE) { + return ENCODED_LENGTHS[value]; + } + long val = Integer.toUnsignedLong(value); int length = 1; - - // Count additional bytes needed for values >= 128 while (val >= 0x80) { val >>>= 7; length++; } - return length; } diff --git a/src/test/java/com/imprint/IntegrationTest.java b/src/test/java/com/imprint/IntegrationTest.java index 76efcc5..898adfb 100644 --- a/src/test/java/com/imprint/IntegrationTest.java +++ b/src/test/java/com/imprint/IntegrationTest.java @@ -16,16 +16,11 @@ */ public class IntegrationTest { - // Removed main method, individual methods are now JUnit tests. - @Test @DisplayName("Basic functionality: create, serialize, deserialize primitive types") void testBasicFunctionality() throws ImprintException { - System.out.println("Testing basic functionality..."); // Keep for now if desired, or remove - SchemaId schemaId = new SchemaId(1, 0xdeadbeef); - // Using ImprintRecordBuilder for consistency with other tests - ImprintRecord record = ImprintRecord.builder(schemaId) + var record = ImprintRecord.builder(schemaId) .field(1, 42) .field(2, "testing java imprint spec") .field(3, true) @@ -33,7 +28,7 @@ void testBasicFunctionality() throws ImprintException { .field(5, new byte[]{1, 2, 3, 4}) .build(); - // Verify we can read values back using ergonomic getters + // Verify we can read values back using type getters assertEquals(42, record.getInt32(1)); assertEquals("testing java imprint spec", record.getString(2)); assertTrue(record.getBoolean(3)); @@ -47,7 +42,7 @@ void testBasicFunctionality() throws ImprintException { var buffer = record.serializeToBuffer(); byte[] serialized = new byte[buffer.remaining()]; buffer.get(serialized); - ImprintRecord deserialized = ImprintRecord.deserialize(serialized); + var deserialized = ImprintRecord.deserialize(serialized); assertEquals(42, deserialized.getInt32(1)); assertEquals("testing java imprint spec", deserialized.getString(2)); @@ -61,8 +56,6 @@ void testBasicFunctionality() throws ImprintException { @Test @DisplayName("Collections: create, serialize, deserialize arrays and maps") void testArraysAndMaps() throws ImprintException { - System.out.println("Testing arrays and maps..."); - SchemaId schemaId = new SchemaId(2, 0xcafebabe); // Create an array using builder for convenience @@ -72,8 +65,7 @@ void testArraysAndMaps() throws ImprintException { Map sourceStringToIntMap = new HashMap<>(); sourceStringToIntMap.put("one", 1); sourceStringToIntMap.put("two", 2); - - ImprintRecord record = ImprintRecord.builder(schemaId) + var record = ImprintRecord.builder(schemaId) .field(1, sourceIntList) // Builder converts List to List .field(2, sourceStringToIntMap) // Builder converts Map .build(); @@ -107,14 +99,14 @@ void testArraysAndMaps() throws ImprintException { void testNestedRecords() throws ImprintException { System.out.println("Testing nested records..."); - SchemaId innerSchemaId = new SchemaId(3, 0x12345678); - ImprintRecord innerRecord = ImprintRecord.builder(innerSchemaId) + var innerSchemaId = new SchemaId(3, 0x12345678); + var innerRecord = ImprintRecord.builder(innerSchemaId) .field(1, "nested data") .field(2, 9876543210L) .build(); - SchemaId outerSchemaId = new SchemaId(4, 0x87654321); - ImprintRecord outerRecord = ImprintRecord.builder(outerSchemaId) + var outerSchemaId = new SchemaId(4, 0x87654321); + var outerRecord = ImprintRecord.builder(outerSchemaId) .field(1, innerRecord) // Builder handles ImprintRecord directly .field(2, "outer data") .build(); @@ -122,12 +114,12 @@ void testNestedRecords() throws ImprintException { var buffer = outerRecord.serializeToBuffer(); byte[] serialized = new byte[buffer.remaining()]; buffer.get(serialized); - ImprintRecord deserialized = ImprintRecord.deserialize(serialized); + var deserialized = ImprintRecord.deserialize(serialized); assertEquals(4, deserialized.getHeader().getSchemaId().getFieldSpaceId()); assertEquals("outer data", deserialized.getString(2)); - ImprintRecord nestedDeserialized = deserialized.getRow(1); + var nestedDeserialized = deserialized.getRow(1); assertNotNull(nestedDeserialized); assertEquals(3, nestedDeserialized.getHeader().getSchemaId().getFieldSpaceId()); assertEquals("nested data", nestedDeserialized.getString(1)); @@ -136,8 +128,6 @@ void testNestedRecords() throws ImprintException { System.out.println("āœ“ Nested records test passed"); } - // --- Start of broken down tests for ErgonomicGettersAndNestedTypes --- - private ImprintRecord createTestRecordForGetters() throws ImprintException { SchemaId schemaId = new SchemaId(5, 0xabcdef01); @@ -174,8 +164,8 @@ private ImprintRecord serializeAndDeserialize(ImprintRecord record) throws Impri @Test @DisplayName("Type Getters: Basic primitive and String types") void testBasicTypeGetters() throws ImprintException { - ImprintRecord originalRecord = createTestRecordForGetters(); - ImprintRecord record = serializeAndDeserialize(originalRecord); + var originalRecord = createTestRecordForGetters(); + var record = serializeAndDeserialize(originalRecord); assertTrue(record.getBoolean(1)); assertEquals(12345, record.getInt32(2)); @@ -189,8 +179,8 @@ void testBasicTypeGetters() throws ImprintException { @Test @DisplayName("Type Getters: Array of Arrays") void testTypeGetterArrayOfArrays() throws ImprintException { - ImprintRecord originalRecord = createTestRecordForGetters(); - ImprintRecord record = serializeAndDeserialize(originalRecord); + var originalRecord = createTestRecordForGetters(); + var record = serializeAndDeserialize(originalRecord); List arrOfArr = record.getArray(9); assertNotNull(arrOfArr); @@ -211,8 +201,8 @@ void testTypeGetterArrayOfArrays() throws ImprintException { @Test @DisplayName("Type Getters: Map with Array Value") void testTypeGetterMapWithArrayValue() throws ImprintException { - ImprintRecord originalRecord = createTestRecordForGetters(); - ImprintRecord record = serializeAndDeserialize(originalRecord); + var originalRecord = createTestRecordForGetters(); + var record = serializeAndDeserialize(originalRecord); Map mapWithArr = record.getMap(10); assertNotNull(mapWithArr); @@ -227,8 +217,8 @@ void testTypeGetterMapWithArrayValue() throws ImprintException { @Test @DisplayName("Type Getters: Empty Collections (Array and Map)") void testErgonomicGettersEmptyCollections() throws ImprintException { - ImprintRecord originalRecord = createTestRecordForGetters(); - ImprintRecord record = serializeAndDeserialize(originalRecord); + var originalRecord = createTestRecordForGetters(); + var record = serializeAndDeserialize(originalRecord); List emptyArr = record.getArray(11); assertNotNull(emptyArr); @@ -242,8 +232,8 @@ void testErgonomicGettersEmptyCollections() throws ImprintException { @Test @DisplayName("Type Getters: Exception for Field Not Found") void testErgonomicGetterExceptionFieldNotFound() throws ImprintException { - ImprintRecord originalRecord = createTestRecordForGetters(); - ImprintRecord record = serializeAndDeserialize(originalRecord); + var originalRecord = createTestRecordForGetters(); + var record = serializeAndDeserialize(originalRecord); ImprintException ex = assertThrows(ImprintException.class, () -> record.getInt32(99)); assertEquals(ErrorType.FIELD_NOT_FOUND, ex.getErrorType()); @@ -252,8 +242,8 @@ void testErgonomicGetterExceptionFieldNotFound() throws ImprintException { @Test @DisplayName("Type Getters: Exception for Null Field accessed as primitive") void testErgonomicGetterExceptionNullField() throws ImprintException { - ImprintRecord originalRecord = createTestRecordForGetters(); - ImprintRecord record = serializeAndDeserialize(originalRecord); + var originalRecord = createTestRecordForGetters(); + var record = serializeAndDeserialize(originalRecord); ImprintException ex = assertThrows(ImprintException.class, () -> record.getString(8)); assertEquals(ErrorType.TYPE_MISMATCH, ex.getErrorType()); // getString throws TYPE_MISMATCH for null @@ -269,8 +259,8 @@ void testErgonomicGetterExceptionNullField() throws ImprintException { @Test @DisplayName("Type Getters: Exception for Type Mismatch") void testErgonomicGetterExceptionTypeMismatch() throws ImprintException { - ImprintRecord originalRecord = createTestRecordForGetters(); - ImprintRecord record = serializeAndDeserialize(originalRecord); + var originalRecord = createTestRecordForGetters(); + var record = serializeAndDeserialize(originalRecord); ImprintException ex = assertThrows(ImprintException.class, () -> record.getInt32(6)); // Field 6 is a String assertEquals(ErrorType.TYPE_MISMATCH, ex.getErrorType()); @@ -279,20 +269,20 @@ void testErgonomicGetterExceptionTypeMismatch() throws ImprintException { @Test @DisplayName("Type Getters: Row (Nested Record)") void testErgonomicGetterRow() throws ImprintException { - SchemaId innerSchemaId = new SchemaId(6, 0x12345678); - ImprintRecord innerRecord = ImprintRecord.builder(innerSchemaId) + var innerSchemaId = new SchemaId(6, 0x12345678); + var innerRecord = ImprintRecord.builder(innerSchemaId) .field(101, "nested string") .field(102, 999L) .build(); - ImprintRecord recordWithRow = ImprintRecord.builder(new SchemaId(7, 0x87654321)) + var recordWithRow = ImprintRecord.builder(new SchemaId(7, 0x87654321)) .field(201, innerRecord) // Using builder to add row .field(202, "outer field") .build(); - ImprintRecord deserializedWithRow = serializeAndDeserialize(recordWithRow); + var deserializedWithRow = serializeAndDeserialize(recordWithRow); - ImprintRecord retrievedRow = deserializedWithRow.getRow(201); + var retrievedRow = deserializedWithRow.getRow(201); assertNotNull(retrievedRow); assertEquals(innerSchemaId, retrievedRow.getHeader().getSchemaId()); assertEquals("nested string", retrievedRow.getString(101));