diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java index 4d9c01c..6a6a958 100644 --- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java +++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java @@ -405,7 +405,7 @@ public void mergeFlatBuffers(Blackhole bh) { // ===== MAIN METHOD TO RUN BENCHMARKS ===== public static void main(String[] args) throws RunnerException { - runAll(); + runFieldAccessBenchmarks(); // Or, uncomment specific runner methods to execute subsets: // runSerializationBenchmarks(); // runDeserializationBenchmarks(); diff --git a/src/main/java/com/imprint/core/ImprintBuffers.java b/src/main/java/com/imprint/core/ImprintBuffers.java index f6a341b..c14d6df 100644 --- a/src/main/java/com/imprint/core/ImprintBuffers.java +++ b/src/main/java/com/imprint/core/ImprintBuffers.java @@ -10,9 +10,9 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.Objects; +import java.util.TreeMap; /** * Manages the raw buffers for an Imprint record with lazy directory parsing. @@ -31,17 +31,12 @@ public final class ImprintBuffers { private final ByteBuffer directoryBuffer; // Raw directory bytes (includes count) private final ByteBuffer payload; // Read-only payload view - // Lazy-loaded directory state - private List parsedDirectory; + // Lazy-loaded directory state. Needs to maintain ordering so that we can binary search the endOffset + private TreeMap parsedDirectory; private boolean directoryParsed = false; - private int directoryCount = -1; // Cached count to avoid repeated VarInt decoding /** * Creates buffers from raw data (used during deserialization). - * - * @param directoryBuffer Raw directory bytes including VarInt count and all entries. - * Format: [VarInt count][Entry1][Entry2]...[EntryN] - * @param payload Raw payload data containing all field values sequentially */ public ImprintBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { this.directoryBuffer = directoryBuffer.asReadOnlyBuffer(); @@ -50,33 +45,17 @@ public ImprintBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) { /** * Creates buffers from pre-parsed directory (used during construction). - * This is more efficient when the directory is already known. - * - * @param directory Parsed directory entries, must be sorted by fieldId - * @param payload Raw payload data containing all field values */ public ImprintBuffers(List directory, ByteBuffer payload) { - this.parsedDirectory = Collections.unmodifiableList(Objects.requireNonNull(directory)); + this.parsedDirectory = createDirectoryMap(Objects.requireNonNull(directory)); this.directoryParsed = true; - this.directoryCount = directory.size(); this.payload = payload.asReadOnlyBuffer(); this.directoryBuffer = createDirectoryBuffer(directory); } /** * Get a zero-copy ByteBuffer view of a field's data. - * - *

Buffer Positioning Logic:

- *
    - *
  1. Find the directory entry for the requested fieldId
  2. - *
  3. Use entry.offset as start position in payload
  4. - *
  5. Find end position by looking at next field's offset (or payload end)
  6. - *
  7. Create a slice view: payload[startOffset:endOffset]
  8. - *
- * - * @param fieldId The field identifier to retrieve - * @return Zero-copy ByteBuffer positioned at field data, or null if field not found - * @throws ImprintException if buffer bounds are invalid or directory is corrupted + * Optimized for the most common use case - single field access. */ public ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { var entry = findDirectoryEntry(fieldId); @@ -92,78 +71,43 @@ public ByteBuffer getFieldBuffer(int fieldId) throws ImprintException { "Invalid field buffer range: start=" + startOffset + ", end=" + endOffset + ", payloadLimit=" + payload.limit()); } - ByteBuffer fieldBuffer = payload.duplicate(); + var fieldBuffer = payload.duplicate(); fieldBuffer.position(startOffset).limit(endOffset); return fieldBuffer; } /** * Find a directory entry for the given field ID using the most efficient method. - * - *

Search Strategy:

- *
    - *
  • If directory is parsed: binary search on in-memory List<DirectoryEntry>
  • - *
  • If directory is raw: binary search directly on raw bytes (faster for single lookups)
  • - *
- * @param fieldId The field identifier to find - * @return DirectoryEntry if found, null otherwise - * @throws ImprintException if directory buffer is corrupted or truncated + *

+ * Strategy: + * - If parsed: TreeMap lookup + * - If raw: Binary search on raw bytes to avoid full unwinding of the directory */ public DirectoryEntry findDirectoryEntry(int fieldId) throws ImprintException { - if (directoryParsed) { - int index = findDirectoryIndexInParsed(fieldId); - return index >= 0 ? parsedDirectory.get(index) : null; - } else { + if (directoryParsed) + return parsedDirectory.get(fieldId); + else return findFieldEntryInRawDirectory(fieldId); - } } /** * Get the full directory, parsing it if necessary. - * - *

Lazy Parsing Behavior:

- *
    - *
  • First call: Parses entire directory from raw bytes into List<DirectoryEntry>
  • - *
  • Subsequent calls: Returns cached parsed directory
  • - *
  • Note - the method is not synchronized and assumes single-threaded usage.
  • - *
- * - *

When to use: Call this if you need to access multiple fields - * from the same record. For single field access, direct field getters are more efficient.

- * - * @return Immutable list of directory entries, sorted by fieldId + * Returns the values in fieldId order thanks to TreeMap. */ public List getDirectory() { ensureDirectoryParsed(); - return parsedDirectory; + return new ArrayList<>(parsedDirectory.values()); } /** - * Get the directory count without fully parsing the directory. - *

- * This method avoids parsing the entire directory when only the count is needed. - *

    - *
  1. Return cached count if available (directoryCount >= 0)
  2. - *
  3. Return parsed directory size if directory is already parsed
  4. - *
  5. Decode VarInt from raw buffer and cache the result
  6. - *
- * - *

VarInt Decoding: The count is stored as a VarInt at the beginning - * of the directoryBuffer. This method reads just enough bytes to decode the count.

- * - * @return Number of fields in the directory, or 0 if decoding fails + * Get directory count without parsing. */ public int getDirectoryCount() { - if (directoryCount >= 0) - return directoryCount; if (directoryParsed) return parsedDirectory.size(); - - // Decode from buffer and cache try { var countBuffer = directoryBuffer.duplicate(); - directoryCount = VarInt.decode(countBuffer).getValue(); - return directoryCount; + return VarInt.decode(countBuffer).getValue(); } catch (Exception e) { return 0; } @@ -171,52 +115,23 @@ public int getDirectoryCount() { /** * Create a new buffer containing the serialized directory. - * - *

Output Format:

- *
-     * [VarInt count][DirectoryEntry 1][DirectoryEntry 2]...[DirectoryEntry N]
-     * 
- * - *

Each DirectoryEntry is serialized as: [fieldId:2bytes][typeCode:1byte][offset:4bytes]

- * - * - * @return New ByteBuffer containing the complete serialized directory */ public ByteBuffer serializeDirectory() { ensureDirectoryParsed(); - return createDirectoryBuffer(parsedDirectory); + return createDirectoryBuffer(new ArrayList<>(parsedDirectory.values())); } // ========== PRIVATE METHODS ========== /** * Binary search on raw directory bytes to find a specific field. - * - *
    - *
  1. Position buffer at start and decode VarInt count (cache for future use)
  2. - *
  3. Calculate directory start position after VarInt
  4. - *
  5. For binary search mid-point: entryPos = startPos + (mid * DIR_ENTRY_BYTES)
  6. - *
  7. Read fieldId from calculated position (first 2 bytes of entry)
  8. - *
  9. Compare fieldId and adjust search bounds
  10. - *
  11. When found: reposition buffer and deserialize complete entry
  12. - *
- * - *

All buffer positions are bounds-checked before access.

- * - * @param fieldId Field identifier to search for - * @return Complete DirectoryEntry if found, null if not found - * @throws ImprintException if buffer is truncated or corrupted + * This avoids parsing the entire directory for single field lookups. */ private DirectoryEntry findFieldEntryInRawDirectory(int fieldId) throws ImprintException { var searchBuffer = directoryBuffer.duplicate(); searchBuffer.order(ByteOrder.LITTLE_ENDIAN); - // Decode directory count (cache it) - if (directoryCount < 0) - directoryCount = VarInt.decode(searchBuffer).getValue(); - else - VarInt.decode(searchBuffer); // Skip past the count - + int directoryCount = VarInt.decode(searchBuffer).getValue(); if (directoryCount == 0) return null; @@ -250,118 +165,65 @@ private DirectoryEntry findFieldEntryInRawDirectory(int fieldId) throws ImprintE return null; } - /** - * - * @param fieldId Field identifier to find - * @return Index of the field if found, or negative insertion point if not found - */ - private int findDirectoryIndexInParsed(int fieldId) { - if (!directoryParsed) - return -1; - int low = 0; - int high = parsedDirectory.size() - 1; - while (low <= high) { - int mid = (low + high) >>> 1; - int midFieldId = parsedDirectory.get(mid).getId(); - if (midFieldId < fieldId) - low = mid + 1; - else if (midFieldId > fieldId) - high = mid - 1; - else - return mid; - } - return -(low + 1); - } - /** * Find the end offset for a field by looking at the next field's offset. - * - *
    - *
  • Field data spans from: entry.offset to nextField.offset (exclusive)
  • - *
  • Last field spans from: entry.offset to payload.limit()
  • - *
  • This works because directory entries are sorted by fieldId
  • - *
- * - *

Search Strategy:

- *
    - *
  • If directory parsed: Use binary search result + 1 to get next entry
  • - *
  • If directory raw: Scan raw entries until fieldId > currentFieldId
  • - *
- * - * @param entry The directory entry whose end offset we need to find - * @return End offset (exclusive) for the field data - * @throws ImprintException if directory scanning fails */ private int findEndOffset(DirectoryEntry entry) throws ImprintException { if (directoryParsed) { - int entryIndex = findDirectoryIndexInParsed(entry.getId()); - return (entryIndex + 1 < parsedDirectory.size()) ? - parsedDirectory.get(entryIndex + 1).getOffset() : payload.limit(); - } else + return findNextOffsetInParsedDirectory(entry.getId()); + } else { return findNextOffsetInRawDirectory(entry.getId()); + } } /** - * Scan raw directory to find the next field's offset after currentFieldId. - * - *
    - *
  1. Position buffer after VarInt count
  2. - *
  3. For each directory entry at position: startPos + (i * DIR_ENTRY_BYTES)
  4. - *
  5. Read fieldId (first 2 bytes) and offset (bytes 3-6)
  6. - *
  7. Return offset of first field where fieldId > currentFieldId
  8. - *
  9. If no next field found, return payload.limit()
  10. - *
- * - * @param currentFieldId Find the next field after this fieldId - * @return Offset where the next field starts, or payload.limit() if this is the last field - * @throws ImprintException if directory buffer is corrupted + * Find the end offset using TreeMap's efficient navigation methods. */ + private int findNextOffsetInParsedDirectory(int currentFieldId) { + var nextEntry = parsedDirectory.higherEntry(currentFieldId); + return nextEntry != null ? nextEntry.getValue().getOffset() : payload.limit(); + } + private int findNextOffsetInRawDirectory(int currentFieldId) throws ImprintException { var scanBuffer = directoryBuffer.duplicate(); scanBuffer.order(ByteOrder.LITTLE_ENDIAN); - int count = (directoryCount >= 0) ? directoryCount : VarInt.decode(scanBuffer).getValue(); + int count = VarInt.decode(scanBuffer).getValue(); if (count == 0) return payload.limit(); - if (directoryCount >= 0) - VarInt.decode(scanBuffer); // Skip count if cached int directoryStartPos = scanBuffer.position(); + int low = 0; + int high = count - 1; + int nextOffset = payload.limit(); - for (int i = 0; i < count; i++) { - int entryPos = directoryStartPos + (i * Constants.DIR_ENTRY_BYTES); + // Binary search for the first field with fieldId > currentFieldId + while (low <= high) { + int mid = (low + high) >>> 1; + int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES); if (entryPos + Constants.DIR_ENTRY_BYTES > scanBuffer.limit()) - return payload.limit(); + break; scanBuffer.position(entryPos); short fieldId = scanBuffer.getShort(); scanBuffer.get(); // skip type int offset = scanBuffer.getInt(); - if (fieldId > currentFieldId) - return offset; + if (fieldId > currentFieldId) { + nextOffset = offset; + high = mid - 1; + } else { + low = mid + 1; + } } - return payload.limit(); + return nextOffset; } /** * Parse the full directory if not already parsed. - * - *
    - *
  1. Duplicate directoryBuffer to avoid affecting original position
  2. - *
  3. Set byte order to LITTLE_ENDIAN for consistent reading
  4. - *
  5. Decode VarInt count and cache it
  6. - *
  7. Read 'count' directory entries sequentially
  8. - *
  9. Each entry: [fieldId:2bytes][typeCode:1byte][offset:4bytes]
  10. - *
  11. Store as immutable list and mark as parsed
  12. - *
- * - *

Error Handling: If parsing fails, throws RuntimeException - * since this indicates corrupted data that should never happen in normal operation.

- * - *

Will return immediately if directory has already been parsed.

+ * Creates TreeMap for both fast lookup and ordering needed for binary search on offSets. */ private void ensureDirectoryParsed() { if (directoryParsed) @@ -372,46 +234,42 @@ private void ensureDirectoryParsed() { var countResult = VarInt.decode(parseBuffer); int count = countResult.getValue(); - this.directoryCount = count; - var directory = new ArrayList(count); + this.parsedDirectory = new TreeMap<>(); for (int i = 0; i < count; i++) { - directory.add(deserializeDirectoryEntry(parseBuffer)); + var entry = deserializeDirectoryEntry(parseBuffer); + parsedDirectory.put((int)entry.getId(), entry); } - this.parsedDirectory = Collections.unmodifiableList(directory); this.directoryParsed = true; } catch (ImprintException e) { throw new RuntimeException("Failed to parse directory", e); } } + /** + * Create a TreeMap from directory list field lookup with ordering. + */ + private TreeMap createDirectoryMap(List directory) { + var map = new TreeMap(); + for (var entry : directory) { + map.put((int)entry.getId(), entry); + } + return map; + } + /** * Create directory buffer from parsed entries. - * - *

Serialization Format:

- *
    - *
  1. Calculate buffer size: VarInt.encodedLength(count) + (count * DIR_ENTRY_BYTES)
  2. - *
  3. Allocate ByteBuffer with LITTLE_ENDIAN byte order
  4. - *
  5. Write VarInt count
  6. - *
  7. Write each directory entry: [fieldId:2][typeCode:1][offset:4]
  8. - *
  9. Flip buffer and return read-only view
  10. - *
- * - * @param directory List of directory entries to serialize - * @return Read-only ByteBuffer containing serialized directory, or empty buffer on error */ private ByteBuffer createDirectoryBuffer(List directory) { try { - int bufferSize = VarInt.encodedLength(directory.size()) + - (directory.size() * Constants.DIR_ENTRY_BYTES); + int bufferSize = VarInt.encodedLength(directory.size()) + (directory.size() * Constants.DIR_ENTRY_BYTES); var buffer = ByteBuffer.allocate(bufferSize); buffer.order(ByteOrder.LITTLE_ENDIAN); VarInt.encode(directory.size(), buffer); - for (var entry : directory) { + for (var entry : directory) serializeDirectoryEntry(entry, buffer); - } buffer.flip(); return buffer.asReadOnlyBuffer(); @@ -433,10 +291,6 @@ private void serializeDirectoryEntry(DirectoryEntry entry, ByteBuffer buffer) { /** * Deserialize a single directory entry from the buffer. * Reads: [fieldId:2bytes][typeCode:1byte][offset:4bytes] - * - * @param buffer Buffer positioned at the start of a directory entry - * @return Parsed DirectoryEntry - * @throws ImprintException if buffer doesn't contain enough bytes */ private DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException { if (buffer.remaining() < Constants.DIR_ENTRY_BYTES) diff --git a/src/main/java/com/imprint/core/ImprintOperations.java b/src/main/java/com/imprint/core/ImprintOperations.java new file mode 100644 index 0000000..4e60ebf --- /dev/null +++ b/src/main/java/com/imprint/core/ImprintOperations.java @@ -0,0 +1,207 @@ +package com.imprint.core; + +import com.imprint.error.ErrorType; +import com.imprint.error.ImprintException; +import lombok.Value; +import lombok.experimental.UtilityClass; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.*; + +@UtilityClass +public class ImprintOperations { + + /** + * Project a subset of fields from an Imprint record. Payload copying is proportional to projected data size. + * + *

Algorithm:

+ *
    + *
  1. Sort and deduplicate requested field IDs for efficient matching
  2. + *
  3. Scan directory to find matching fields and calculate ranges
  4. + *
  5. Allocate new payload buffer with exact size needed
  6. + *
  7. Copy field data ranges directly (zero-copy where possible)
  8. + *
  9. Build new directory with adjusted offsets
  10. + *
+ * + * @param record The source record to project from + * @param fieldIds Array of field IDs to include in projection + * @return New ImprintRecord containing only the requested fields + */ + public static ImprintRecord project(ImprintRecord record, int... fieldIds) { + // Sort and deduplicate field IDs for efficient matching with sorted directory + int[] sortedFieldIds = Arrays.stream(fieldIds).distinct().sorted().toArray(); + if (sortedFieldIds.length == 0) + return createEmptyRecord(record.getHeader().getSchemaId()); + + //eager fetch the entire directory (can this be lazy and just done per field?) + var sourceDirectory = record.getDirectory(); + var newDirectory = new ArrayList(sortedFieldIds.length); + var ranges = new ArrayList(); + + // Iterate through directory and compute ranges to copy + int fieldIdsIdx = 0; + int directoryIdx = 0; + int currentOffset = 0; + + while (directoryIdx < sourceDirectory.size() && fieldIdsIdx < sortedFieldIds.length) { + var field = sourceDirectory.get(directoryIdx); + if (field.getId() == sortedFieldIds[fieldIdsIdx]) { + // Calculate field length using next field's offset + int nextOffset = (directoryIdx + 1 < sourceDirectory.size()) ? + sourceDirectory.get(directoryIdx + 1).getOffset() : + record.getBuffers().getPayload().limit(); + int fieldLength = nextOffset - field.getOffset(); + + newDirectory.add(new DirectoryEntry(field.getId(), field.getTypeCode(), currentOffset)); + ranges.add(new FieldRange(field.getOffset(), nextOffset)); + + currentOffset += fieldLength; + fieldIdsIdx++; + } + directoryIdx++; + } + + // Build new payload from ranges + var newPayload = buildPayloadFromRanges(record.getBuffers().getPayload(), ranges); + + // Create new header with updated payload size + // TODO: compute correct schema hash + var newHeader = new Header(record.getHeader().getFlags(), + new SchemaId(record.getHeader().getSchemaId().getFieldSpaceId(), 0xdeadbeef), + newPayload.remaining() + ); + + return new ImprintRecord(newHeader, newDirectory, newPayload); + } + + /** + * Merge two Imprint records, combining their fields. Payload copying is proportional to total data size. + * + *

Merge Strategy:

+ *
    + *
  • Fields are merged using sort-merge algorithm on directory entries
  • + *
  • For duplicate field IDs: first record's field takes precedence
  • + *
  • Payloads are concatenated with directory offsets adjusted
  • + *
  • Schema ID from first record is preserved
  • + *
+ *

+ * + * @param first The first record (takes precedence for duplicate fields) + * @param second The second record to merge + * @return New ImprintRecord containing merged fields + * @throws ImprintException if merge fails due to incompatible records + */ + public static ImprintRecord merge(ImprintRecord first, ImprintRecord second) throws ImprintException { + var firstDir = first.getDirectory(); + var secondDir = second.getDirectory(); + + // Pre-allocate for worst case (no overlapping fields) + var newDirectory = new ArrayList(firstDir.size() + secondDir.size()); + var payloadChunks = new ArrayList(); + + int firstIdx = 0; + int secondIdx = 0; + int currentOffset = 0; + + while (firstIdx < firstDir.size() || secondIdx < secondDir.size()) { + DirectoryEntry currentEntry; + ByteBuffer currentPayload; + + if (firstIdx < firstDir.size() && + (secondIdx >= secondDir.size() || firstDir.get(firstIdx).getId() <= secondDir.get(secondIdx).getId())) { + + // Take from first record + currentEntry = firstDir.get(firstIdx); + + // Skip duplicate field in second record if present + if (secondIdx < secondDir.size() && + firstDir.get(firstIdx).getId() == secondDir.get(secondIdx).getId()) { + secondIdx++; + } + + currentPayload = first.getRawBytes(currentEntry.getId()); + firstIdx++; + } else { + // Take from second record + currentEntry = secondDir.get(secondIdx); + currentPayload = second.getRawBytes(currentEntry.getId()); + secondIdx++; + } + + if (currentPayload == null) + throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Failed to get raw bytes for field " + currentEntry.getId()); + + // Add adjusted directory entry + var newEntry = new DirectoryEntry(currentEntry.getId(), currentEntry.getTypeCode(), currentOffset); + newDirectory.add(newEntry); + + // Collect payload chunk + payloadChunks.add(currentPayload.duplicate()); + currentOffset += currentPayload.remaining(); + } + + // Build merged payload + var mergedPayload = buildPayloadFromChunks(payloadChunks); + + // Create header preserving first record's schema ID + var newHeader = new Header(first.getHeader().getFlags(), first.getHeader().getSchemaId(), mergedPayload.remaining()); + + return new ImprintRecord(newHeader, newDirectory, mergedPayload); + } + + /** + * Represents a range of bytes to copy from source payload. + */ + @Value + private static class FieldRange { + int start; + int end; + + int length() { + return end - start; + } + } + + /** + * Build a new payload buffer from field ranges in the source payload. + */ + private static ByteBuffer buildPayloadFromRanges(ByteBuffer sourcePayload, List ranges) { + int totalSize = ranges.stream().mapToInt(FieldRange::length).sum(); + var newPayload = ByteBuffer.allocate(totalSize); + newPayload.order(ByteOrder.LITTLE_ENDIAN); + + for (var range : ranges) { + var sourceSlice = sourcePayload.duplicate(); + sourceSlice.position(range.start).limit(range.end); + newPayload.put(sourceSlice); + } + + newPayload.flip(); + return newPayload; + } + + /** + * Build a new payload buffer by concatenating chunks. + */ + private static ByteBuffer buildPayloadFromChunks(List chunks) { + int totalSize = chunks.stream().mapToInt(ByteBuffer::remaining).sum(); + var mergedPayload = ByteBuffer.allocate(totalSize); + mergedPayload.order(ByteOrder.LITTLE_ENDIAN); + + for (var chunk : chunks) { + mergedPayload.put(chunk); + } + + mergedPayload.flip(); + return mergedPayload; + } + + /** + * Create an empty record with the given schema ID. + */ + private static ImprintRecord createEmptyRecord(SchemaId schemaId) { + var header = new Header(new Flags((byte) 0x01), schemaId, 0); + return new ImprintRecord(header, Collections.emptyList(), ByteBuffer.allocate(0)); + } +} diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java index 6abc9cf..e720df5 100644 --- a/src/main/java/com/imprint/core/ImprintRecord.java +++ b/src/main/java/com/imprint/core/ImprintRecord.java @@ -69,6 +69,28 @@ public ByteBuffer getRawBytes(int fieldId) { } } + /** + * Project a subset of fields from this record. + * + * @param fieldIds Array of field IDs to include in the projection + * @return New ImprintRecord containing only the requested fields + */ + public ImprintRecord project(int... fieldIds) { + return ImprintOperations.project(this, fieldIds); + } + + /** + * Merge another record into this one. + * For duplicate fields, this record's values take precedence. + * + * @param other The record to merge with this one + * @return New ImprintRecord containing merged fields + * @throws ImprintException if merge fails + */ + public ImprintRecord merge(ImprintRecord other) throws ImprintException { + return ImprintOperations.merge(this, other); + } + /** * Get the directory (parsing it if necessary). */ diff --git a/src/main/java/com/imprint/types/Value.java b/src/main/java/com/imprint/types/Value.java index fbb988c..bfa9958 100644 --- a/src/main/java/com/imprint/types/Value.java +++ b/src/main/java/com/imprint/types/Value.java @@ -169,6 +169,7 @@ public String toString() { } // Float64 Value + @Getter @EqualsAndHashCode(callSuper = false) public static class Float64Value extends Value { @@ -180,7 +181,7 @@ public Float64Value(double value) { @Override public TypeCode getTypeCode() { return TypeCode.FLOAT64; } - + @Override public String toString() { return String.valueOf(value); @@ -188,17 +189,20 @@ public String toString() { } // Bytes Value (array-based) + @Getter public static class BytesValue extends Value { + /** + * Returns internal array. MUST NOT be modified by caller. + */ private final byte[] value; - + + /** + * Takes ownership of the byte array. Caller must not modify after construction. + */ public BytesValue(byte[] value) { - this.value = value.clone(); + this.value = Objects.requireNonNull(value); } - - public byte[] getValue() { - return value.clone(); - } - + @Override public TypeCode getTypeCode() { return TypeCode.BYTES; } diff --git a/src/test/java/com/imprint/IntegrationTest.java b/src/test/java/com/imprint/IntegrationTest.java index 898adfb..ee1d426 100644 --- a/src/test/java/com/imprint/IntegrationTest.java +++ b/src/test/java/com/imprint/IntegrationTest.java @@ -49,8 +49,6 @@ var record = ImprintRecord.builder(schemaId) assertTrue(deserialized.getBoolean(3)); assertEquals(3.14159, deserialized.getFloat64(4)); assertArrayEquals(new byte[]{1,2,3,4}, deserialized.getBytes(5)); - - System.out.println("Basic functionality test passed"); } @Test @@ -90,15 +88,11 @@ var record = ImprintRecord.builder(schemaId) assertEquals(2, deserializedMap.size()); assertEquals(Value.fromInt32(1), deserializedMap.get(MapKey.fromString("one"))); assertEquals(Value.fromInt32(2), deserializedMap.get(MapKey.fromString("two"))); - - System.out.println("Arrays and maps test passed"); } @Test @DisplayName("Nested Records: create, serialize, deserialize records within records") void testNestedRecords() throws ImprintException { - System.out.println("Testing nested records..."); - var innerSchemaId = new SchemaId(3, 0x12345678); var innerRecord = ImprintRecord.builder(innerSchemaId) .field(1, "nested data") @@ -124,8 +118,348 @@ void testNestedRecords() throws ImprintException { assertEquals(3, nestedDeserialized.getHeader().getSchemaId().getFieldSpaceId()); assertEquals("nested data", nestedDeserialized.getString(1)); assertEquals(9876543210L, nestedDeserialized.getInt64(2)); + } + + @Test + @DisplayName("Project: subset of fields with serialization round-trip") + void testProjectSubsetWithSerialization() throws ImprintException { + var schemaId = new SchemaId(10, 0xabcd1234); + var originalRecord = ImprintRecord.builder(schemaId) + .field(1, 100) + .field(2, "keep this field") + .field(3, false) + .field(4, "remove this field") + .field(5, 42.5) + .field(6, new byte[]{9, 8, 7}) + .build(); + + // Project fields 1, 2, 5 (skip 3, 4, 6) + var projected = originalRecord.project(1, 2, 5); + + assertEquals(3, projected.getDirectory().size()); + assertEquals(100, projected.getInt32(1)); + assertEquals("keep this field", projected.getString(2)); + assertEquals(42.5, projected.getFloat64(5)); + + // Verify missing fields + assertNull(projected.getValue(3)); + assertNull(projected.getValue(4)); + assertNull(projected.getValue(6)); + + // Test serialization round-trip of projected record + var buffer = projected.serializeToBuffer(); + byte[] serialized = new byte[buffer.remaining()]; + buffer.get(serialized); + var deserialized = ImprintRecord.deserialize(serialized); + + assertEquals(3, deserialized.getDirectory().size()); + assertEquals(100, deserialized.getInt32(1)); + assertEquals("keep this field", deserialized.getString(2)); + assertEquals(42.5, deserialized.getFloat64(5)); + } + + @Test + @DisplayName("Project: complex data types (arrays, maps, nested records)") + void testProjectComplexTypes() throws ImprintException { + var schemaId = new SchemaId(11, 0xbeef4567); + + // Create nested record + var nestedRecord = ImprintRecord.builder(new SchemaId(12, 0x11111111)) + .field(100, "nested value") + .build(); + + // Create homogeneous array (all strings) + var testArray = Arrays.asList(Value.fromString("item1"), Value.fromString("item2"), Value.fromString("item3")); + + // Create homogeneous map (string keys -> string values) + var testMap = new HashMap(); + testMap.put(MapKey.fromString("key1"), Value.fromString("value1")); + testMap.put(MapKey.fromString("key2"), Value.fromString("value2")); + + var originalRecord = ImprintRecord.builder(schemaId) + .field(1, "simple string") + .field(2, Value.fromArray(testArray)) + .field(3, Value.fromMap(testMap)) + .field(4, nestedRecord) + .field(5, 999L) + .build(); + + // Project only complex types + var projected = originalRecord.project(2, 3, 4); + + assertEquals(3, projected.getDirectory().size()); + + // Verify array projection (homogeneous strings) + var projectedArray = projected.getArray(2); + assertEquals(3, projectedArray.size()); + assertEquals(Value.fromString("item1"), projectedArray.get(0)); + assertEquals(Value.fromString("item2"), projectedArray.get(1)); + assertEquals(Value.fromString("item3"), projectedArray.get(2)); + + // Verify map projection (string -> string) + var projectedMap = projected.getMap(3); + assertEquals(2, projectedMap.size()); + assertEquals(Value.fromString("value1"), projectedMap.get(MapKey.fromString("key1"))); + assertEquals(Value.fromString("value2"), projectedMap.get(MapKey.fromString("key2"))); + + // Verify nested record projection + var projectedNested = projected.getRow(4); + assertEquals("nested value", projectedNested.getString(100)); + + // Verify excluded fields + assertNull(projected.getValue(1)); + assertNull(projected.getValue(5)); + } + + @Test + @DisplayName("Merge: distinct fields with serialization round-trip") + void testMergeDistinctFieldsWithSerialization() throws ImprintException { + var schemaId = new SchemaId(20, 0xcafe5678); + + var record1 = ImprintRecord.builder(schemaId) + .field(1, 100) + .field(3, "from record1") + .field(5, true) + .build(); + + var record2 = ImprintRecord.builder(schemaId) + .field(2, 200L) + .field(4, "from record2") + .field(6, 3.14f) + .build(); + + var merged = record1.merge(record2); + + assertEquals(6, merged.getDirectory().size()); + assertEquals(100, merged.getInt32(1)); + assertEquals(200L, merged.getInt64(2)); + assertEquals("from record1", merged.getString(3)); + assertEquals("from record2", merged.getString(4)); + assertTrue(merged.getBoolean(5)); + assertEquals(3.14f, merged.getFloat32(6)); + + // Test serialization round-trip of merged record + var buffer = merged.serializeToBuffer(); + byte[] serialized = new byte[buffer.remaining()]; + buffer.get(serialized); + var deserialized = ImprintRecord.deserialize(serialized); + + assertEquals(6, deserialized.getDirectory().size()); + assertEquals(100, deserialized.getInt32(1)); + assertEquals(200L, deserialized.getInt64(2)); + assertEquals("from record1", deserialized.getString(3)); + assertEquals("from record2", deserialized.getString(4)); + assertTrue(deserialized.getBoolean(5)); + assertEquals(3.14f, deserialized.getFloat32(6)); + } - System.out.println("✓ Nested records test passed"); + @Test + @DisplayName("Merge: overlapping fields - first record wins") + void testMergeOverlappingFields() throws ImprintException { + var schemaId = new SchemaId(21, 0xdead9876); + + var record1 = ImprintRecord.builder(schemaId) + .field(1, "first wins") + .field(2, 100) + .field(4, true) + .build(); + + var record2 = ImprintRecord.builder(schemaId) + .field(1, "second loses") // Overlapping field + .field(2, 999) // Overlapping field + .field(3, "unique to second") + .field(4, false) // Overlapping field + .build(); + + var merged = record1.merge(record2); + + assertEquals(4, merged.getDirectory().size()); + assertEquals("first wins", merged.getString(1)); // First record wins + assertEquals(100, merged.getInt32(2)); // First record wins + assertEquals("unique to second", merged.getString(3)); // Only in second + assertTrue(merged.getBoolean(4)); // First record wins + } + + @Test + @DisplayName("Merge: complex data types and nested records") + void testMergeComplexTypes() throws ImprintException { + var schemaId = new SchemaId(22, 0xbeef1111); + + // Create nested records for both + var nested1 = ImprintRecord.builder(new SchemaId(23, 0x22222222)) + .field(100, "nested in record1") + .build(); + + var nested2 = ImprintRecord.builder(new SchemaId(24, 0x33333333)) + .field(200, "nested in record2") + .build(); + + // Create arrays + var array1 = Arrays.asList(Value.fromString("array1_item1"), Value.fromString("array1_item2")); + var array2 = Arrays.asList(Value.fromInt32(10), Value.fromInt32(20)); + + // Create maps + var map1 = new HashMap(); + map1.put(MapKey.fromString("map1_key"), Value.fromString("map1_value")); + + var map2 = new HashMap(); + map2.put(MapKey.fromInt32(42), Value.fromBoolean(true)); + + var record1 = ImprintRecord.builder(schemaId) + .field(1, nested1) + .field(3, Value.fromArray(array1)) + .field(5, Value.fromMap(map1)) + .build(); + + var record2 = ImprintRecord.builder(schemaId) + .field(2, nested2) + .field(4, Value.fromArray(array2)) + .field(6, Value.fromMap(map2)) + .build(); + + var merged = record1.merge(record2); + + assertEquals(6, merged.getDirectory().size()); + + // Verify nested records + var mergedNested1 = merged.getRow(1); + assertEquals("nested in record1", mergedNested1.getString(100)); + + var mergedNested2 = merged.getRow(2); + assertEquals("nested in record2", mergedNested2.getString(200)); + + // Verify arrays + var mergedArray1 = merged.getArray(3); + assertEquals(2, mergedArray1.size()); + assertEquals(Value.fromString("array1_item1"), mergedArray1.get(0)); + + var mergedArray2 = merged.getArray(4); + assertEquals(2, mergedArray2.size()); + assertEquals(Value.fromInt32(10), mergedArray2.get(0)); + + // Verify maps + var mergedMap1 = merged.getMap(5); + assertEquals(Value.fromString("map1_value"), mergedMap1.get(MapKey.fromString("map1_key"))); + + var mergedMap2 = merged.getMap(6); + assertEquals(Value.fromBoolean(true), mergedMap2.get(MapKey.fromInt32(42))); + } + + @Test + @DisplayName("Project and Merge: chained operations") + void testProjectAndMergeChained() throws ImprintException { + var schemaId = new SchemaId(30, 0xabcdabcd); + + // Create a large record + var fullRecord = ImprintRecord.builder(schemaId) + .field(1, "field1") + .field(2, "field2") + .field(3, "field3") + .field(4, "field4") + .field(5, "field5") + .field(6, "field6") + .build(); + + // Project different subsets + var projection1 = fullRecord.project(1, 3, 5); + var projection2 = fullRecord.project(2, 4, 6); + + assertEquals(3, projection1.getDirectory().size()); + assertEquals(3, projection2.getDirectory().size()); + + // Merge the projections back together + var recomposed = projection1.merge(projection2); + + assertEquals(6, recomposed.getDirectory().size()); + assertEquals("field1", recomposed.getString(1)); + assertEquals("field2", recomposed.getString(2)); + assertEquals("field3", recomposed.getString(3)); + assertEquals("field4", recomposed.getString(4)); + assertEquals("field5", recomposed.getString(5)); + assertEquals("field6", recomposed.getString(6)); + + // Test another chain: project the merged result + var finalProjection = recomposed.project(2, 4, 6); + assertEquals(3, finalProjection.getDirectory().size()); + assertEquals("field2", finalProjection.getString(2)); + assertEquals("field4", finalProjection.getString(4)); + assertEquals("field6", finalProjection.getString(6)); + } + + @Test + @DisplayName("Merge and Project: empty record handling") + void testMergeAndProjectEmptyRecords() throws ImprintException { + var schemaId = new SchemaId(40, 0xeeeeeeee); + + var emptyRecord = ImprintRecord.builder(schemaId).build(); + var nonEmptyRecord = ImprintRecord.builder(schemaId) + .field(1, "not empty") + .field(2, 42) + .build(); + + // Test merging with empty + var merged1 = emptyRecord.merge(nonEmptyRecord); + var merged2 = nonEmptyRecord.merge(emptyRecord); + + assertEquals(2, merged1.getDirectory().size()); + assertEquals(2, merged2.getDirectory().size()); + assertEquals("not empty", merged1.getString(1)); + assertEquals("not empty", merged2.getString(1)); + + // Test projecting empty record + var projectedEmpty = emptyRecord.project(1, 2, 3); + assertEquals(0, projectedEmpty.getDirectory().size()); + + // Test projecting non-existent fields + var projectedNonExistent = nonEmptyRecord.project(99, 100); + assertEquals(0, projectedNonExistent.getDirectory().size()); + } + + @Test + @DisplayName("Project and Merge: Large record operations") + void testLargeRecordOperations() throws ImprintException { + var schemaId = new SchemaId(50, 0xffffffff); + + // Create a record with many fields + var builder = ImprintRecord.builder(schemaId); + for (int i = 1; i <= 100; i++) { + builder.field(i, "field_" + i + "_data"); + } + var largeRecord = builder.build(); + + assertEquals(100, largeRecord.getDirectory().size()); + + // Project a subset (every 10th field) + int[] projectionFields = new int[10]; + for (int i = 0; i < 10; i++) { + projectionFields[i] = (i + 1) * 10; // 10, 20, 30, ..., 100 + } + + var projected = largeRecord.project(projectionFields); + assertEquals(10, projected.getDirectory().size()); + + for (int i = 0; i < 10; i++) { + int fieldId = (i + 1) * 10; + assertEquals("field_" + fieldId + "_data", projected.getString(fieldId)); + } + + // Create another large record for merging + var builder2 = ImprintRecord.builder(schemaId); + for (int i = 101; i <= 150; i++) { + builder2.field(i, "additional_field_" + i); + } + var additionalRecord = builder2.build(); + + // Merge the large records + var merged = largeRecord.merge(additionalRecord); + assertEquals(150, merged.getDirectory().size()); + + // Verify some values from both records + assertEquals("field_1_data", merged.getString(1)); + assertEquals("field_50_data", merged.getString(50)); + assertEquals("field_100_data", merged.getString(100)); + assertEquals("additional_field_101", merged.getString(101)); + assertEquals("additional_field_150", merged.getString(150)); } private ImprintRecord createTestRecordForGetters() throws ImprintException { @@ -216,7 +550,7 @@ var record = serializeAndDeserialize(originalRecord); @Test @DisplayName("Type Getters: Empty Collections (Array and Map)") - void testErgonomicGettersEmptyCollections() throws ImprintException { + void testTypeGettersEmptyCollections() throws ImprintException { var originalRecord = createTestRecordForGetters(); var record = serializeAndDeserialize(originalRecord); @@ -231,7 +565,7 @@ var record = serializeAndDeserialize(originalRecord); @Test @DisplayName("Type Getters: Exception for Field Not Found") - void testErgonomicGetterExceptionFieldNotFound() throws ImprintException { + void testTypeGetterExceptionFieldNotFound() throws ImprintException { var originalRecord = createTestRecordForGetters(); var record = serializeAndDeserialize(originalRecord); @@ -241,7 +575,7 @@ var record = serializeAndDeserialize(originalRecord); @Test @DisplayName("Type Getters: Exception for Null Field accessed as primitive") - void testErgonomicGetterExceptionNullField() throws ImprintException { + void testTypeGetterExceptionNullField() throws ImprintException { var originalRecord = createTestRecordForGetters(); var record = serializeAndDeserialize(originalRecord); @@ -258,7 +592,7 @@ var record = serializeAndDeserialize(originalRecord); @Test @DisplayName("Type Getters: Exception for Type Mismatch") - void testErgonomicGetterExceptionTypeMismatch() throws ImprintException { + void testTypeGetterExceptionTypeMismatch() throws ImprintException { var originalRecord = createTestRecordForGetters(); var record = serializeAndDeserialize(originalRecord); @@ -268,7 +602,7 @@ var record = serializeAndDeserialize(originalRecord); @Test @DisplayName("Type Getters: Row (Nested Record)") - void testErgonomicGetterRow() throws ImprintException { + void testTypeGetterRow() throws ImprintException { var innerSchemaId = new SchemaId(6, 0x12345678); var innerRecord = ImprintRecord.builder(innerSchemaId) .field(101, "nested string") diff --git a/src/test/java/com/imprint/core/ImprintOperationsTest.java b/src/test/java/com/imprint/core/ImprintOperationsTest.java new file mode 100644 index 0000000..1dc67fb --- /dev/null +++ b/src/test/java/com/imprint/core/ImprintOperationsTest.java @@ -0,0 +1,405 @@ +package com.imprint.core; + +import com.imprint.error.ImprintException; +import com.imprint.types.Value; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +@DisplayName("ImprintOperations") +class ImprintOperationsTest { + + private SchemaId testSchema; + private ImprintRecord multiFieldRecord; + private ImprintRecord emptyRecord; + + @BeforeEach + void setUp() throws ImprintException { + testSchema = new SchemaId(1, 0xdeadbeef); + multiFieldRecord = createTestRecord(); + emptyRecord = createEmptyTestRecord(); + } + + private ImprintRecord createTestRecord() throws ImprintException { + return ImprintRecord.builder(testSchema) + .field(1, 42) + .field(3, "hello") + .field(5, true) + .field(7, new byte[]{1, 2, 3}) + .build(); + } + + private ImprintRecord createEmptyTestRecord() throws ImprintException { + return ImprintRecord.builder(testSchema).build(); + } + + @Nested + @DisplayName("Project Operations") + class ProjectOperations { + + @Test + @DisplayName("should project subset of fields") + void shouldProjectSubsetOfFields() throws ImprintException { + // When projecting a subset of fields + ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 1, 5); + + // Then only the requested fields should be present + assertEquals(2, projected.getDirectory().size()); + assertEquals(42, projected.getInt32(1)); + assertTrue(projected.getBoolean(5)); + + // And non-requested fields should be absent + assertNull(projected.getValue(3)); + assertNull(projected.getValue(7)); + } + + @Test + @DisplayName("should maintain field order regardless of input order") + void shouldMaintainFieldOrderRegardlessOfInputOrder() throws ImprintException { + // When projecting fields in arbitrary order + ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 7, 1, 5, 3); + + // Then all requested fields should be present + assertEquals(4, projected.getDirectory().size()); + assertEquals(42, projected.getInt32(1)); + assertEquals("hello", projected.getString(3)); + assertTrue(projected.getBoolean(5)); + assertArrayEquals(new byte[]{1, 2, 3}, projected.getBytes(7)); + + // And directory should maintain sorted order + List directory = projected.getDirectory(); + for (int i = 1; i < directory.size(); i++) { + assertTrue(directory.get(i - 1).getId() < directory.get(i).getId(), + "Directory entries should be sorted by field id"); + } + } + + @Test + @DisplayName("should handle single field projection") + void shouldHandleSingleFieldProjection() throws ImprintException { + // When projecting a single field + ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 3); + + // Then only that field should be present + assertEquals(1, projected.getDirectory().size()); + assertEquals("hello", projected.getString(3)); + } + + @Test + @DisplayName("should preserve all fields when projecting all") + void shouldPreserveAllFieldsWhenProjectingAll() throws ImprintException { + // Given all field IDs from the original record + int[] allFields = multiFieldRecord.getDirectory().stream() + .mapToInt(DirectoryEntry::getId) + .toArray(); + + // When projecting all fields + ImprintRecord projected = ImprintOperations.project(multiFieldRecord, allFields); + + // Then all fields should be present with matching values + assertEquals(multiFieldRecord.getDirectory().size(), projected.getDirectory().size()); + + for (DirectoryEntry entry : multiFieldRecord.getDirectory()) { + Value originalValue = multiFieldRecord.getValue(entry.getId()); + Value projectedValue = projected.getValue(entry.getId()); + assertEquals(originalValue, projectedValue, + "Field " + entry.getId() + " should have matching value"); + } + } + + @Test + @DisplayName("should handle empty projection") + void shouldHandleEmptyProjection() { + // When projecting no fields + ImprintRecord projected = ImprintOperations.project(multiFieldRecord); + + // Then result should be empty but valid + assertEquals(0, projected.getDirectory().size()); + assertEquals(0, projected.getBuffers().getPayload().remaining()); + } + + @Test + @DisplayName("should ignore nonexistent fields") + void shouldIgnoreNonexistentFields() throws ImprintException { + // When projecting mix of existing and non-existing fields + ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 1, 99, 100); + + // Then only existing fields should be included + assertEquals(1, projected.getDirectory().size()); + assertEquals(42, projected.getInt32(1)); + assertNull(projected.getValue(99)); + assertNull(projected.getValue(100)); + } + + @Test + @DisplayName("should deduplicate requested fields") + void shouldDeduplicateRequestedFields() throws ImprintException { + // When projecting the same field multiple times + ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 1, 1, 1); + + // Then field should only appear once + assertEquals(1, projected.getDirectory().size()); + assertEquals(42, projected.getInt32(1)); + } + + @Test + @DisplayName("should handle projection from empty record") + void shouldHandleProjectionFromEmptyRecord() { + // When projecting any fields from empty record + ImprintRecord projected = ImprintOperations.project(emptyRecord, 1, 2, 3); + + // Then result should be empty but valid + assertEquals(0, projected.getDirectory().size()); + assertEquals(0, projected.getBuffers().getPayload().remaining()); + } + + @Test + @DisplayName("should preserve exact byte representation") + void shouldPreserveExactByteRepresentation() throws ImprintException { + // Given a field's original bytes + byte[] originalBytes = multiFieldRecord.getBytes(7); + + // When projecting that field + ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 7); + + // Then the byte representation should be exactly preserved + byte[] projectedBytes = projected.getBytes(7); + assertArrayEquals(originalBytes, projectedBytes, + "Byte representation should be identical"); + } + + @Test + @DisplayName("should reduce payload size when projecting subset") + void shouldReducePayloadSizeWhenProjectingSubset() throws ImprintException { + // Given a record with large and small fields + ImprintRecord largeRecord = ImprintRecord.builder(testSchema) + .field(1, 42) // 4 bytes + .field(2, "x".repeat(1000)) // ~1000+ bytes + .field(3, 123L) // 8 bytes + .field(4, new byte[500]) // 500+ bytes + .build(); + + int originalPayloadSize = largeRecord.getBuffers().getPayload().remaining(); + + // When projecting only the small fields + ImprintRecord projected = ImprintOperations.project(largeRecord, 1, 3); + + // Then the payload size should be significantly smaller + assertTrue(projected.getBuffers().getPayload().remaining() < originalPayloadSize, + "Projected payload should be smaller than original"); + + // And the values should still be correct + assertEquals(42, projected.getInt32(1)); + assertEquals(123L, projected.getInt64(3)); + } + } + + @Nested + @DisplayName("Merge Operations") + class MergeOperations { + + @Test + @DisplayName("should merge records with distinct fields") + void shouldMergeRecordsWithDistinctFields() throws ImprintException { + // Given two records with different fields + ImprintRecord record1 = ImprintRecord.builder(testSchema) + .field(1, 42) + .field(3, "hello") + .build(); + + ImprintRecord record2 = ImprintRecord.builder(testSchema) + .field(2, true) + .field(4, 123L) + .build(); + + // When merging the records + ImprintRecord merged = ImprintOperations.merge(record1, record2); + + // Then all fields should be present + assertEquals(4, merged.getDirectory().size()); + assertEquals(42, merged.getInt32(1)); + assertTrue(merged.getBoolean(2)); + assertEquals("hello", merged.getString(3)); + assertEquals(123L, merged.getInt64(4)); + + // And directory should be sorted + List directory = merged.getDirectory(); + for (int i = 1; i < directory.size(); i++) { + assertTrue(directory.get(i - 1).getId() < directory.get(i).getId(), + "Directory entries should be sorted by field id"); + } + } + + @Test + @DisplayName("should merge records with overlapping fields") + void shouldMergeRecordsWithOverlappingFields() throws ImprintException { + // Given two records with overlapping fields + ImprintRecord record1 = ImprintRecord.builder(testSchema) + .field(2, "first") + .field(3, 42) + .build(); + + ImprintRecord record2 = ImprintRecord.builder(testSchema) + .field(1, true) + .field(2, "second") // Overlapping field + .build(); + + // When merging the records + ImprintRecord merged = ImprintOperations.merge(record1, record2); + + // Then first record's values should take precedence for duplicates + assertEquals(3, merged.getDirectory().size()); + assertTrue(merged.getBoolean(1)); + assertEquals("first", merged.getString(2)); // First record wins + assertEquals(42, merged.getInt32(3)); + } + + @Test + @DisplayName("should preserve schema id from first record") + void shouldPreserveSchemaIdFromFirstRecord() throws ImprintException { + // Given two records with different schema IDs + SchemaId schema1 = new SchemaId(1, 0xdeadbeef); + SchemaId schema2 = new SchemaId(1, 0xcafebabe); + + ImprintRecord record1 = ImprintRecord.builder(schema1) + .field(1, 42) + .build(); + + ImprintRecord record2 = ImprintRecord.builder(schema2) + .field(2, true) + .build(); + + // When merging the records + ImprintRecord merged = ImprintOperations.merge(record1, record2); + + // Then schema ID from first record should be preserved + assertEquals(schema1, merged.getHeader().getSchemaId()); + } + + @Test + @DisplayName("should handle merge with empty record") + void shouldHandleMergeWithEmptyRecord() throws ImprintException { + // When merging with empty record + ImprintRecord merged1 = ImprintOperations.merge(multiFieldRecord, emptyRecord); + ImprintRecord merged2 = ImprintOperations.merge(emptyRecord, multiFieldRecord); + + // Then results should contain all original fields + assertEquals(multiFieldRecord.getDirectory().size(), merged1.getDirectory().size()); + assertEquals(multiFieldRecord.getDirectory().size(), merged2.getDirectory().size()); + + // And values should be preserved + for (DirectoryEntry entry : multiFieldRecord.getDirectory()) { + Value originalValue = multiFieldRecord.getValue(entry.getId()); + assertEquals(originalValue, merged1.getValue(entry.getId())); + assertEquals(originalValue, merged2.getValue(entry.getId())); + } + } + + @Test + @DisplayName("should handle merge of two empty records") + void shouldHandleMergeOfTwoEmptyRecords() throws ImprintException { + // When merging two empty records + ImprintRecord merged = ImprintOperations.merge(emptyRecord, emptyRecord); + + // Then result should be empty but valid + assertEquals(0, merged.getDirectory().size()); + assertEquals(0, merged.getBuffers().getPayload().remaining()); + } + + @Test + @DisplayName("should maintain correct payload offsets after merge") + void shouldMaintainCorrectPayloadOffsetsAfterMerge() throws ImprintException { + // Given records with different field sizes + ImprintRecord record1 = ImprintRecord.builder(testSchema) + .field(1, 42) // 4 bytes + .field(3, "hello") // 5+ bytes + .build(); + + ImprintRecord record2 = ImprintRecord.builder(testSchema) + .field(2, true) // 1 byte + .field(4, new byte[]{1, 2, 3, 4, 5}) // 5+ bytes + .build(); + + // When merging + ImprintRecord merged = ImprintOperations.merge(record1, record2); + + // Then all fields should be accessible with correct values + assertEquals(42, merged.getInt32(1)); + assertTrue(merged.getBoolean(2)); + assertEquals("hello", merged.getString(3)); + assertArrayEquals(new byte[]{1, 2, 3, 4, 5}, merged.getBytes(4)); + + // And directory offsets should be sequential + List directory = merged.getDirectory(); + int expectedOffset = 0; + for (DirectoryEntry entry : directory) { + assertEquals(expectedOffset, entry.getOffset(), + "Field " + entry.getId() + " should have correct offset"); + + // Calculate next offset + var fieldData = merged.getRawBytes(entry.getId()); + assertNotNull(fieldData); + expectedOffset += fieldData.remaining(); + } + } + + @Test + @DisplayName("should handle large records efficiently") + void shouldHandleLargeRecordsEfficiently() throws ImprintException { + // Given records with many fields + var builder1 = ImprintRecord.builder(testSchema); + var builder2 = ImprintRecord.builder(testSchema); + + // Add 100 fields to each record (no overlap) + for (int i = 1; i <= 100; i++) { + builder1.field(i, i * 10); + } + for (int i = 101; i <= 200; i++) { + builder2.field(i, i * 10); + } + + ImprintRecord record1 = builder1.build(); + ImprintRecord record2 = builder2.build(); + + // When merging large records + ImprintRecord merged = ImprintOperations.merge(record1, record2); + + // Then all 200 fields should be present and accessible + assertEquals(200, merged.getDirectory().size()); + + // Spot check some values + assertEquals(10, merged.getInt32(1)); + assertEquals(500, merged.getInt32(50)); + assertEquals(1000, merged.getInt32(100)); + assertEquals(1010, merged.getInt32(101)); + assertEquals(1500, merged.getInt32(150)); + assertEquals(2000, merged.getInt32(200)); + } + } + + @Nested + @DisplayName("Error Handling") + class ErrorHandling { + + @Test + @DisplayName("should handle null record gracefully") + void shouldHandleNullRecordGracefully() { + assertThrows(NullPointerException.class, () -> ImprintOperations.project(null, 1, 2, 3)); + + assertThrows(NullPointerException.class, () -> ImprintOperations.merge(null, multiFieldRecord)); + + assertThrows(NullPointerException.class, () -> ImprintOperations.merge(multiFieldRecord, null)); + } + + @Test + @DisplayName("should handle null field ids gracefully") + void shouldHandleNullFieldIdsGracefully() { + assertThrows(NullPointerException.class, () -> ImprintOperations.project(multiFieldRecord, (int[]) null)); + } + } +} diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java index 3b9f371..64be931 100644 --- a/src/test/java/com/imprint/profile/ProfilerTest.java +++ b/src/test/java/com/imprint/profile/ProfilerTest.java @@ -27,7 +27,7 @@ * - String operations and UTF-8 encoding * - ByteBuffer operations */ -@Disabled("Enable manually for profiling") +//@Disabled("Enable manually for profiling") public class ProfilerTest { private static final int ITERATIONS = 1_000_000; @@ -82,12 +82,12 @@ void profileSerialization() throws Exception { Thread.sleep(3000); var schemaId = new SchemaId(1, 0x12345678); - + System.out.println("Beginning serialization profiling..."); long start = System.nanoTime(); // Create and serialize many records (allocation hotspot) - for (int i = 0; i < 100_000; i++) { + for (int i = 0; i < 500_000; i++) { var writer = new ImprintWriter(schemaId); // Add various field types