diff --git a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java
index 4d9c01c..6a6a958 100644
--- a/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java
+++ b/src/jmh/java/com/imprint/benchmark/ComparisonBenchmark.java
@@ -405,7 +405,7 @@ public void mergeFlatBuffers(Blackhole bh) {
// ===== MAIN METHOD TO RUN BENCHMARKS =====
public static void main(String[] args) throws RunnerException {
- runAll();
+ runFieldAccessBenchmarks();
// Or, uncomment specific runner methods to execute subsets:
// runSerializationBenchmarks();
// runDeserializationBenchmarks();
diff --git a/src/main/java/com/imprint/core/ImprintBuffers.java b/src/main/java/com/imprint/core/ImprintBuffers.java
index f6a341b..c14d6df 100644
--- a/src/main/java/com/imprint/core/ImprintBuffers.java
+++ b/src/main/java/com/imprint/core/ImprintBuffers.java
@@ -10,9 +10,9 @@
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
import java.util.Objects;
+import java.util.TreeMap;
/**
* Manages the raw buffers for an Imprint record with lazy directory parsing.
@@ -31,17 +31,12 @@ public final class ImprintBuffers {
private final ByteBuffer directoryBuffer; // Raw directory bytes (includes count)
private final ByteBuffer payload; // Read-only payload view
- // Lazy-loaded directory state
- private List parsedDirectory;
+ // Lazy-loaded directory state. Needs to maintain ordering so that we can binary search the endOffset
+ private TreeMap parsedDirectory;
private boolean directoryParsed = false;
- private int directoryCount = -1; // Cached count to avoid repeated VarInt decoding
/**
* Creates buffers from raw data (used during deserialization).
- *
- * @param directoryBuffer Raw directory bytes including VarInt count and all entries.
- * Format: [VarInt count][Entry1][Entry2]...[EntryN]
- * @param payload Raw payload data containing all field values sequentially
*/
public ImprintBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) {
this.directoryBuffer = directoryBuffer.asReadOnlyBuffer();
@@ -50,33 +45,17 @@ public ImprintBuffers(ByteBuffer directoryBuffer, ByteBuffer payload) {
/**
* Creates buffers from pre-parsed directory (used during construction).
- * This is more efficient when the directory is already known.
- *
- * @param directory Parsed directory entries, must be sorted by fieldId
- * @param payload Raw payload data containing all field values
*/
public ImprintBuffers(List directory, ByteBuffer payload) {
- this.parsedDirectory = Collections.unmodifiableList(Objects.requireNonNull(directory));
+ this.parsedDirectory = createDirectoryMap(Objects.requireNonNull(directory));
this.directoryParsed = true;
- this.directoryCount = directory.size();
this.payload = payload.asReadOnlyBuffer();
this.directoryBuffer = createDirectoryBuffer(directory);
}
/**
* Get a zero-copy ByteBuffer view of a field's data.
- *
- * Buffer Positioning Logic:
- *
- * - Find the directory entry for the requested fieldId
- * - Use entry.offset as start position in payload
- * - Find end position by looking at next field's offset (or payload end)
- * - Create a slice view: payload[startOffset:endOffset]
- *
- *
- * @param fieldId The field identifier to retrieve
- * @return Zero-copy ByteBuffer positioned at field data, or null if field not found
- * @throws ImprintException if buffer bounds are invalid or directory is corrupted
+ * Optimized for the most common use case - single field access.
*/
public ByteBuffer getFieldBuffer(int fieldId) throws ImprintException {
var entry = findDirectoryEntry(fieldId);
@@ -92,78 +71,43 @@ public ByteBuffer getFieldBuffer(int fieldId) throws ImprintException {
"Invalid field buffer range: start=" + startOffset + ", end=" + endOffset + ", payloadLimit=" + payload.limit());
}
- ByteBuffer fieldBuffer = payload.duplicate();
+ var fieldBuffer = payload.duplicate();
fieldBuffer.position(startOffset).limit(endOffset);
return fieldBuffer;
}
/**
* Find a directory entry for the given field ID using the most efficient method.
- *
- * Search Strategy:
- *
- * - If directory is parsed: binary search on in-memory List<DirectoryEntry>
- * - If directory is raw: binary search directly on raw bytes (faster for single lookups)
- *
- * @param fieldId The field identifier to find
- * @return DirectoryEntry if found, null otherwise
- * @throws ImprintException if directory buffer is corrupted or truncated
+ *
+ * Strategy:
+ * - If parsed: TreeMap lookup
+ * - If raw: Binary search on raw bytes to avoid full unwinding of the directory
*/
public DirectoryEntry findDirectoryEntry(int fieldId) throws ImprintException {
- if (directoryParsed) {
- int index = findDirectoryIndexInParsed(fieldId);
- return index >= 0 ? parsedDirectory.get(index) : null;
- } else {
+ if (directoryParsed)
+ return parsedDirectory.get(fieldId);
+ else
return findFieldEntryInRawDirectory(fieldId);
- }
}
/**
* Get the full directory, parsing it if necessary.
- *
- *
Lazy Parsing Behavior:
- *
- * - First call: Parses entire directory from raw bytes into List<DirectoryEntry>
- * - Subsequent calls: Returns cached parsed directory
- * - Note - the method is not synchronized and assumes single-threaded usage.
- *
- *
- * When to use: Call this if you need to access multiple fields
- * from the same record. For single field access, direct field getters are more efficient.
- *
- * @return Immutable list of directory entries, sorted by fieldId
+ * Returns the values in fieldId order thanks to TreeMap.
*/
public List getDirectory() {
ensureDirectoryParsed();
- return parsedDirectory;
+ return new ArrayList<>(parsedDirectory.values());
}
/**
- * Get the directory count without fully parsing the directory.
- *
- * This method avoids parsing the entire directory when only the count is needed.
- *
- * - Return cached count if available (directoryCount >= 0)
- * - Return parsed directory size if directory is already parsed
- * - Decode VarInt from raw buffer and cache the result
- *
- *
- * VarInt Decoding: The count is stored as a VarInt at the beginning
- * of the directoryBuffer. This method reads just enough bytes to decode the count.
- *
- * @return Number of fields in the directory, or 0 if decoding fails
+ * Get directory count without parsing.
*/
public int getDirectoryCount() {
- if (directoryCount >= 0)
- return directoryCount;
if (directoryParsed)
return parsedDirectory.size();
-
- // Decode from buffer and cache
try {
var countBuffer = directoryBuffer.duplicate();
- directoryCount = VarInt.decode(countBuffer).getValue();
- return directoryCount;
+ return VarInt.decode(countBuffer).getValue();
} catch (Exception e) {
return 0;
}
@@ -171,52 +115,23 @@ public int getDirectoryCount() {
/**
* Create a new buffer containing the serialized directory.
- *
- * Output Format:
- *
- * [VarInt count][DirectoryEntry 1][DirectoryEntry 2]...[DirectoryEntry N]
- *
- *
- * Each DirectoryEntry is serialized as: [fieldId:2bytes][typeCode:1byte][offset:4bytes]
- *
- *
- * @return New ByteBuffer containing the complete serialized directory
*/
public ByteBuffer serializeDirectory() {
ensureDirectoryParsed();
- return createDirectoryBuffer(parsedDirectory);
+ return createDirectoryBuffer(new ArrayList<>(parsedDirectory.values()));
}
// ========== PRIVATE METHODS ==========
/**
* Binary search on raw directory bytes to find a specific field.
- *
- *
- * - Position buffer at start and decode VarInt count (cache for future use)
- * - Calculate directory start position after VarInt
- * - For binary search mid-point: entryPos = startPos + (mid * DIR_ENTRY_BYTES)
- * - Read fieldId from calculated position (first 2 bytes of entry)
- * - Compare fieldId and adjust search bounds
- * - When found: reposition buffer and deserialize complete entry
- *
- *
- * All buffer positions are bounds-checked before access.
- *
- * @param fieldId Field identifier to search for
- * @return Complete DirectoryEntry if found, null if not found
- * @throws ImprintException if buffer is truncated or corrupted
+ * This avoids parsing the entire directory for single field lookups.
*/
private DirectoryEntry findFieldEntryInRawDirectory(int fieldId) throws ImprintException {
var searchBuffer = directoryBuffer.duplicate();
searchBuffer.order(ByteOrder.LITTLE_ENDIAN);
- // Decode directory count (cache it)
- if (directoryCount < 0)
- directoryCount = VarInt.decode(searchBuffer).getValue();
- else
- VarInt.decode(searchBuffer); // Skip past the count
-
+ int directoryCount = VarInt.decode(searchBuffer).getValue();
if (directoryCount == 0)
return null;
@@ -250,118 +165,65 @@ private DirectoryEntry findFieldEntryInRawDirectory(int fieldId) throws ImprintE
return null;
}
- /**
- *
- * @param fieldId Field identifier to find
- * @return Index of the field if found, or negative insertion point if not found
- */
- private int findDirectoryIndexInParsed(int fieldId) {
- if (!directoryParsed)
- return -1;
- int low = 0;
- int high = parsedDirectory.size() - 1;
- while (low <= high) {
- int mid = (low + high) >>> 1;
- int midFieldId = parsedDirectory.get(mid).getId();
- if (midFieldId < fieldId)
- low = mid + 1;
- else if (midFieldId > fieldId)
- high = mid - 1;
- else
- return mid;
- }
- return -(low + 1);
- }
-
/**
* Find the end offset for a field by looking at the next field's offset.
- *
- *
- * - Field data spans from: entry.offset to nextField.offset (exclusive)
- * - Last field spans from: entry.offset to payload.limit()
- * - This works because directory entries are sorted by fieldId
- *
- *
- * Search Strategy:
- *
- * - If directory parsed: Use binary search result + 1 to get next entry
- * - If directory raw: Scan raw entries until fieldId > currentFieldId
- *
- *
- * @param entry The directory entry whose end offset we need to find
- * @return End offset (exclusive) for the field data
- * @throws ImprintException if directory scanning fails
*/
private int findEndOffset(DirectoryEntry entry) throws ImprintException {
if (directoryParsed) {
- int entryIndex = findDirectoryIndexInParsed(entry.getId());
- return (entryIndex + 1 < parsedDirectory.size()) ?
- parsedDirectory.get(entryIndex + 1).getOffset() : payload.limit();
- } else
+ return findNextOffsetInParsedDirectory(entry.getId());
+ } else {
return findNextOffsetInRawDirectory(entry.getId());
+ }
}
/**
- * Scan raw directory to find the next field's offset after currentFieldId.
- *
- *
- * - Position buffer after VarInt count
- * - For each directory entry at position: startPos + (i * DIR_ENTRY_BYTES)
- * - Read fieldId (first 2 bytes) and offset (bytes 3-6)
- * - Return offset of first field where fieldId > currentFieldId
- * - If no next field found, return payload.limit()
- *
- *
- * @param currentFieldId Find the next field after this fieldId
- * @return Offset where the next field starts, or payload.limit() if this is the last field
- * @throws ImprintException if directory buffer is corrupted
+ * Find the end offset using TreeMap's efficient navigation methods.
*/
+ private int findNextOffsetInParsedDirectory(int currentFieldId) {
+ var nextEntry = parsedDirectory.higherEntry(currentFieldId);
+ return nextEntry != null ? nextEntry.getValue().getOffset() : payload.limit();
+ }
+
private int findNextOffsetInRawDirectory(int currentFieldId) throws ImprintException {
var scanBuffer = directoryBuffer.duplicate();
scanBuffer.order(ByteOrder.LITTLE_ENDIAN);
- int count = (directoryCount >= 0) ? directoryCount : VarInt.decode(scanBuffer).getValue();
+ int count = VarInt.decode(scanBuffer).getValue();
if (count == 0)
return payload.limit();
- if (directoryCount >= 0)
- VarInt.decode(scanBuffer); // Skip count if cached
int directoryStartPos = scanBuffer.position();
+ int low = 0;
+ int high = count - 1;
+ int nextOffset = payload.limit();
- for (int i = 0; i < count; i++) {
- int entryPos = directoryStartPos + (i * Constants.DIR_ENTRY_BYTES);
+ // Binary search for the first field with fieldId > currentFieldId
+ while (low <= high) {
+ int mid = (low + high) >>> 1;
+ int entryPos = directoryStartPos + (mid * Constants.DIR_ENTRY_BYTES);
if (entryPos + Constants.DIR_ENTRY_BYTES > scanBuffer.limit())
- return payload.limit();
+ break;
scanBuffer.position(entryPos);
short fieldId = scanBuffer.getShort();
scanBuffer.get(); // skip type
int offset = scanBuffer.getInt();
- if (fieldId > currentFieldId)
- return offset;
+ if (fieldId > currentFieldId) {
+ nextOffset = offset;
+ high = mid - 1;
+ } else {
+ low = mid + 1;
+ }
}
- return payload.limit();
+ return nextOffset;
}
/**
* Parse the full directory if not already parsed.
- *
- *
- * - Duplicate directoryBuffer to avoid affecting original position
- * - Set byte order to LITTLE_ENDIAN for consistent reading
- * - Decode VarInt count and cache it
- * - Read 'count' directory entries sequentially
- * - Each entry: [fieldId:2bytes][typeCode:1byte][offset:4bytes]
- * - Store as immutable list and mark as parsed
- *
- *
- * Error Handling: If parsing fails, throws RuntimeException
- * since this indicates corrupted data that should never happen in normal operation.
- *
- * Will return immediately if directory has already been parsed.
+ * Creates TreeMap for both fast lookup and ordering needed for binary search on offSets.
*/
private void ensureDirectoryParsed() {
if (directoryParsed)
@@ -372,46 +234,42 @@ private void ensureDirectoryParsed() {
var countResult = VarInt.decode(parseBuffer);
int count = countResult.getValue();
- this.directoryCount = count;
- var directory = new ArrayList(count);
+ this.parsedDirectory = new TreeMap<>();
for (int i = 0; i < count; i++) {
- directory.add(deserializeDirectoryEntry(parseBuffer));
+ var entry = deserializeDirectoryEntry(parseBuffer);
+ parsedDirectory.put((int)entry.getId(), entry);
}
- this.parsedDirectory = Collections.unmodifiableList(directory);
this.directoryParsed = true;
} catch (ImprintException e) {
throw new RuntimeException("Failed to parse directory", e);
}
}
+ /**
+ * Create a TreeMap from directory list field lookup with ordering.
+ */
+ private TreeMap createDirectoryMap(List directory) {
+ var map = new TreeMap();
+ for (var entry : directory) {
+ map.put((int)entry.getId(), entry);
+ }
+ return map;
+ }
+
/**
* Create directory buffer from parsed entries.
- *
- * Serialization Format:
- *
- * - Calculate buffer size: VarInt.encodedLength(count) + (count * DIR_ENTRY_BYTES)
- * - Allocate ByteBuffer with LITTLE_ENDIAN byte order
- * - Write VarInt count
- * - Write each directory entry: [fieldId:2][typeCode:1][offset:4]
- * - Flip buffer and return read-only view
- *
- *
- * @param directory List of directory entries to serialize
- * @return Read-only ByteBuffer containing serialized directory, or empty buffer on error
*/
private ByteBuffer createDirectoryBuffer(List directory) {
try {
- int bufferSize = VarInt.encodedLength(directory.size()) +
- (directory.size() * Constants.DIR_ENTRY_BYTES);
+ int bufferSize = VarInt.encodedLength(directory.size()) + (directory.size() * Constants.DIR_ENTRY_BYTES);
var buffer = ByteBuffer.allocate(bufferSize);
buffer.order(ByteOrder.LITTLE_ENDIAN);
VarInt.encode(directory.size(), buffer);
- for (var entry : directory) {
+ for (var entry : directory)
serializeDirectoryEntry(entry, buffer);
- }
buffer.flip();
return buffer.asReadOnlyBuffer();
@@ -433,10 +291,6 @@ private void serializeDirectoryEntry(DirectoryEntry entry, ByteBuffer buffer) {
/**
* Deserialize a single directory entry from the buffer.
* Reads: [fieldId:2bytes][typeCode:1byte][offset:4bytes]
- *
- * @param buffer Buffer positioned at the start of a directory entry
- * @return Parsed DirectoryEntry
- * @throws ImprintException if buffer doesn't contain enough bytes
*/
private DirectoryEntry deserializeDirectoryEntry(ByteBuffer buffer) throws ImprintException {
if (buffer.remaining() < Constants.DIR_ENTRY_BYTES)
diff --git a/src/main/java/com/imprint/core/ImprintOperations.java b/src/main/java/com/imprint/core/ImprintOperations.java
new file mode 100644
index 0000000..4e60ebf
--- /dev/null
+++ b/src/main/java/com/imprint/core/ImprintOperations.java
@@ -0,0 +1,207 @@
+package com.imprint.core;
+
+import com.imprint.error.ErrorType;
+import com.imprint.error.ImprintException;
+import lombok.Value;
+import lombok.experimental.UtilityClass;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.*;
+
+@UtilityClass
+public class ImprintOperations {
+
+ /**
+ * Project a subset of fields from an Imprint record. Payload copying is proportional to projected data size.
+ *
+ * Algorithm:
+ *
+ * - Sort and deduplicate requested field IDs for efficient matching
+ * - Scan directory to find matching fields and calculate ranges
+ * - Allocate new payload buffer with exact size needed
+ * - Copy field data ranges directly (zero-copy where possible)
+ * - Build new directory with adjusted offsets
+ *
+ *
+ * @param record The source record to project from
+ * @param fieldIds Array of field IDs to include in projection
+ * @return New ImprintRecord containing only the requested fields
+ */
+ public static ImprintRecord project(ImprintRecord record, int... fieldIds) {
+ // Sort and deduplicate field IDs for efficient matching with sorted directory
+ int[] sortedFieldIds = Arrays.stream(fieldIds).distinct().sorted().toArray();
+ if (sortedFieldIds.length == 0)
+ return createEmptyRecord(record.getHeader().getSchemaId());
+
+ //eager fetch the entire directory (can this be lazy and just done per field?)
+ var sourceDirectory = record.getDirectory();
+ var newDirectory = new ArrayList(sortedFieldIds.length);
+ var ranges = new ArrayList();
+
+ // Iterate through directory and compute ranges to copy
+ int fieldIdsIdx = 0;
+ int directoryIdx = 0;
+ int currentOffset = 0;
+
+ while (directoryIdx < sourceDirectory.size() && fieldIdsIdx < sortedFieldIds.length) {
+ var field = sourceDirectory.get(directoryIdx);
+ if (field.getId() == sortedFieldIds[fieldIdsIdx]) {
+ // Calculate field length using next field's offset
+ int nextOffset = (directoryIdx + 1 < sourceDirectory.size()) ?
+ sourceDirectory.get(directoryIdx + 1).getOffset() :
+ record.getBuffers().getPayload().limit();
+ int fieldLength = nextOffset - field.getOffset();
+
+ newDirectory.add(new DirectoryEntry(field.getId(), field.getTypeCode(), currentOffset));
+ ranges.add(new FieldRange(field.getOffset(), nextOffset));
+
+ currentOffset += fieldLength;
+ fieldIdsIdx++;
+ }
+ directoryIdx++;
+ }
+
+ // Build new payload from ranges
+ var newPayload = buildPayloadFromRanges(record.getBuffers().getPayload(), ranges);
+
+ // Create new header with updated payload size
+ // TODO: compute correct schema hash
+ var newHeader = new Header(record.getHeader().getFlags(),
+ new SchemaId(record.getHeader().getSchemaId().getFieldSpaceId(), 0xdeadbeef),
+ newPayload.remaining()
+ );
+
+ return new ImprintRecord(newHeader, newDirectory, newPayload);
+ }
+
+ /**
+ * Merge two Imprint records, combining their fields. Payload copying is proportional to total data size.
+ *
+ * Merge Strategy:
+ *
+ * - Fields are merged using sort-merge algorithm on directory entries
+ * - For duplicate field IDs: first record's field takes precedence
+ * - Payloads are concatenated with directory offsets adjusted
+ * - Schema ID from first record is preserved
+ *
+ *
+ *
+ * @param first The first record (takes precedence for duplicate fields)
+ * @param second The second record to merge
+ * @return New ImprintRecord containing merged fields
+ * @throws ImprintException if merge fails due to incompatible records
+ */
+ public static ImprintRecord merge(ImprintRecord first, ImprintRecord second) throws ImprintException {
+ var firstDir = first.getDirectory();
+ var secondDir = second.getDirectory();
+
+ // Pre-allocate for worst case (no overlapping fields)
+ var newDirectory = new ArrayList(firstDir.size() + secondDir.size());
+ var payloadChunks = new ArrayList();
+
+ int firstIdx = 0;
+ int secondIdx = 0;
+ int currentOffset = 0;
+
+ while (firstIdx < firstDir.size() || secondIdx < secondDir.size()) {
+ DirectoryEntry currentEntry;
+ ByteBuffer currentPayload;
+
+ if (firstIdx < firstDir.size() &&
+ (secondIdx >= secondDir.size() || firstDir.get(firstIdx).getId() <= secondDir.get(secondIdx).getId())) {
+
+ // Take from first record
+ currentEntry = firstDir.get(firstIdx);
+
+ // Skip duplicate field in second record if present
+ if (secondIdx < secondDir.size() &&
+ firstDir.get(firstIdx).getId() == secondDir.get(secondIdx).getId()) {
+ secondIdx++;
+ }
+
+ currentPayload = first.getRawBytes(currentEntry.getId());
+ firstIdx++;
+ } else {
+ // Take from second record
+ currentEntry = secondDir.get(secondIdx);
+ currentPayload = second.getRawBytes(currentEntry.getId());
+ secondIdx++;
+ }
+
+ if (currentPayload == null)
+ throw new ImprintException(ErrorType.BUFFER_UNDERFLOW, "Failed to get raw bytes for field " + currentEntry.getId());
+
+ // Add adjusted directory entry
+ var newEntry = new DirectoryEntry(currentEntry.getId(), currentEntry.getTypeCode(), currentOffset);
+ newDirectory.add(newEntry);
+
+ // Collect payload chunk
+ payloadChunks.add(currentPayload.duplicate());
+ currentOffset += currentPayload.remaining();
+ }
+
+ // Build merged payload
+ var mergedPayload = buildPayloadFromChunks(payloadChunks);
+
+ // Create header preserving first record's schema ID
+ var newHeader = new Header(first.getHeader().getFlags(), first.getHeader().getSchemaId(), mergedPayload.remaining());
+
+ return new ImprintRecord(newHeader, newDirectory, mergedPayload);
+ }
+
+ /**
+ * Represents a range of bytes to copy from source payload.
+ */
+ @Value
+ private static class FieldRange {
+ int start;
+ int end;
+
+ int length() {
+ return end - start;
+ }
+ }
+
+ /**
+ * Build a new payload buffer from field ranges in the source payload.
+ */
+ private static ByteBuffer buildPayloadFromRanges(ByteBuffer sourcePayload, List ranges) {
+ int totalSize = ranges.stream().mapToInt(FieldRange::length).sum();
+ var newPayload = ByteBuffer.allocate(totalSize);
+ newPayload.order(ByteOrder.LITTLE_ENDIAN);
+
+ for (var range : ranges) {
+ var sourceSlice = sourcePayload.duplicate();
+ sourceSlice.position(range.start).limit(range.end);
+ newPayload.put(sourceSlice);
+ }
+
+ newPayload.flip();
+ return newPayload;
+ }
+
+ /**
+ * Build a new payload buffer by concatenating chunks.
+ */
+ private static ByteBuffer buildPayloadFromChunks(List chunks) {
+ int totalSize = chunks.stream().mapToInt(ByteBuffer::remaining).sum();
+ var mergedPayload = ByteBuffer.allocate(totalSize);
+ mergedPayload.order(ByteOrder.LITTLE_ENDIAN);
+
+ for (var chunk : chunks) {
+ mergedPayload.put(chunk);
+ }
+
+ mergedPayload.flip();
+ return mergedPayload;
+ }
+
+ /**
+ * Create an empty record with the given schema ID.
+ */
+ private static ImprintRecord createEmptyRecord(SchemaId schemaId) {
+ var header = new Header(new Flags((byte) 0x01), schemaId, 0);
+ return new ImprintRecord(header, Collections.emptyList(), ByteBuffer.allocate(0));
+ }
+}
diff --git a/src/main/java/com/imprint/core/ImprintRecord.java b/src/main/java/com/imprint/core/ImprintRecord.java
index 6abc9cf..e720df5 100644
--- a/src/main/java/com/imprint/core/ImprintRecord.java
+++ b/src/main/java/com/imprint/core/ImprintRecord.java
@@ -69,6 +69,28 @@ public ByteBuffer getRawBytes(int fieldId) {
}
}
+ /**
+ * Project a subset of fields from this record.
+ *
+ * @param fieldIds Array of field IDs to include in the projection
+ * @return New ImprintRecord containing only the requested fields
+ */
+ public ImprintRecord project(int... fieldIds) {
+ return ImprintOperations.project(this, fieldIds);
+ }
+
+ /**
+ * Merge another record into this one.
+ * For duplicate fields, this record's values take precedence.
+ *
+ * @param other The record to merge with this one
+ * @return New ImprintRecord containing merged fields
+ * @throws ImprintException if merge fails
+ */
+ public ImprintRecord merge(ImprintRecord other) throws ImprintException {
+ return ImprintOperations.merge(this, other);
+ }
+
/**
* Get the directory (parsing it if necessary).
*/
diff --git a/src/main/java/com/imprint/types/Value.java b/src/main/java/com/imprint/types/Value.java
index fbb988c..bfa9958 100644
--- a/src/main/java/com/imprint/types/Value.java
+++ b/src/main/java/com/imprint/types/Value.java
@@ -169,6 +169,7 @@ public String toString() {
}
// Float64 Value
+
@Getter
@EqualsAndHashCode(callSuper = false)
public static class Float64Value extends Value {
@@ -180,7 +181,7 @@ public Float64Value(double value) {
@Override
public TypeCode getTypeCode() { return TypeCode.FLOAT64; }
-
+
@Override
public String toString() {
return String.valueOf(value);
@@ -188,17 +189,20 @@ public String toString() {
}
// Bytes Value (array-based)
+ @Getter
public static class BytesValue extends Value {
+ /**
+ * Returns internal array. MUST NOT be modified by caller.
+ */
private final byte[] value;
-
+
+ /**
+ * Takes ownership of the byte array. Caller must not modify after construction.
+ */
public BytesValue(byte[] value) {
- this.value = value.clone();
+ this.value = Objects.requireNonNull(value);
}
-
- public byte[] getValue() {
- return value.clone();
- }
-
+
@Override
public TypeCode getTypeCode() { return TypeCode.BYTES; }
diff --git a/src/test/java/com/imprint/IntegrationTest.java b/src/test/java/com/imprint/IntegrationTest.java
index 898adfb..ee1d426 100644
--- a/src/test/java/com/imprint/IntegrationTest.java
+++ b/src/test/java/com/imprint/IntegrationTest.java
@@ -49,8 +49,6 @@ var record = ImprintRecord.builder(schemaId)
assertTrue(deserialized.getBoolean(3));
assertEquals(3.14159, deserialized.getFloat64(4));
assertArrayEquals(new byte[]{1,2,3,4}, deserialized.getBytes(5));
-
- System.out.println("Basic functionality test passed");
}
@Test
@@ -90,15 +88,11 @@ var record = ImprintRecord.builder(schemaId)
assertEquals(2, deserializedMap.size());
assertEquals(Value.fromInt32(1), deserializedMap.get(MapKey.fromString("one")));
assertEquals(Value.fromInt32(2), deserializedMap.get(MapKey.fromString("two")));
-
- System.out.println("Arrays and maps test passed");
}
@Test
@DisplayName("Nested Records: create, serialize, deserialize records within records")
void testNestedRecords() throws ImprintException {
- System.out.println("Testing nested records...");
-
var innerSchemaId = new SchemaId(3, 0x12345678);
var innerRecord = ImprintRecord.builder(innerSchemaId)
.field(1, "nested data")
@@ -124,8 +118,348 @@ void testNestedRecords() throws ImprintException {
assertEquals(3, nestedDeserialized.getHeader().getSchemaId().getFieldSpaceId());
assertEquals("nested data", nestedDeserialized.getString(1));
assertEquals(9876543210L, nestedDeserialized.getInt64(2));
+ }
+
+ @Test
+ @DisplayName("Project: subset of fields with serialization round-trip")
+ void testProjectSubsetWithSerialization() throws ImprintException {
+ var schemaId = new SchemaId(10, 0xabcd1234);
+ var originalRecord = ImprintRecord.builder(schemaId)
+ .field(1, 100)
+ .field(2, "keep this field")
+ .field(3, false)
+ .field(4, "remove this field")
+ .field(5, 42.5)
+ .field(6, new byte[]{9, 8, 7})
+ .build();
+
+ // Project fields 1, 2, 5 (skip 3, 4, 6)
+ var projected = originalRecord.project(1, 2, 5);
+
+ assertEquals(3, projected.getDirectory().size());
+ assertEquals(100, projected.getInt32(1));
+ assertEquals("keep this field", projected.getString(2));
+ assertEquals(42.5, projected.getFloat64(5));
+
+ // Verify missing fields
+ assertNull(projected.getValue(3));
+ assertNull(projected.getValue(4));
+ assertNull(projected.getValue(6));
+
+ // Test serialization round-trip of projected record
+ var buffer = projected.serializeToBuffer();
+ byte[] serialized = new byte[buffer.remaining()];
+ buffer.get(serialized);
+ var deserialized = ImprintRecord.deserialize(serialized);
+
+ assertEquals(3, deserialized.getDirectory().size());
+ assertEquals(100, deserialized.getInt32(1));
+ assertEquals("keep this field", deserialized.getString(2));
+ assertEquals(42.5, deserialized.getFloat64(5));
+ }
+
+ @Test
+ @DisplayName("Project: complex data types (arrays, maps, nested records)")
+ void testProjectComplexTypes() throws ImprintException {
+ var schemaId = new SchemaId(11, 0xbeef4567);
+
+ // Create nested record
+ var nestedRecord = ImprintRecord.builder(new SchemaId(12, 0x11111111))
+ .field(100, "nested value")
+ .build();
+
+ // Create homogeneous array (all strings)
+ var testArray = Arrays.asList(Value.fromString("item1"), Value.fromString("item2"), Value.fromString("item3"));
+
+ // Create homogeneous map (string keys -> string values)
+ var testMap = new HashMap();
+ testMap.put(MapKey.fromString("key1"), Value.fromString("value1"));
+ testMap.put(MapKey.fromString("key2"), Value.fromString("value2"));
+
+ var originalRecord = ImprintRecord.builder(schemaId)
+ .field(1, "simple string")
+ .field(2, Value.fromArray(testArray))
+ .field(3, Value.fromMap(testMap))
+ .field(4, nestedRecord)
+ .field(5, 999L)
+ .build();
+
+ // Project only complex types
+ var projected = originalRecord.project(2, 3, 4);
+
+ assertEquals(3, projected.getDirectory().size());
+
+ // Verify array projection (homogeneous strings)
+ var projectedArray = projected.getArray(2);
+ assertEquals(3, projectedArray.size());
+ assertEquals(Value.fromString("item1"), projectedArray.get(0));
+ assertEquals(Value.fromString("item2"), projectedArray.get(1));
+ assertEquals(Value.fromString("item3"), projectedArray.get(2));
+
+ // Verify map projection (string -> string)
+ var projectedMap = projected.getMap(3);
+ assertEquals(2, projectedMap.size());
+ assertEquals(Value.fromString("value1"), projectedMap.get(MapKey.fromString("key1")));
+ assertEquals(Value.fromString("value2"), projectedMap.get(MapKey.fromString("key2")));
+
+ // Verify nested record projection
+ var projectedNested = projected.getRow(4);
+ assertEquals("nested value", projectedNested.getString(100));
+
+ // Verify excluded fields
+ assertNull(projected.getValue(1));
+ assertNull(projected.getValue(5));
+ }
+
+ @Test
+ @DisplayName("Merge: distinct fields with serialization round-trip")
+ void testMergeDistinctFieldsWithSerialization() throws ImprintException {
+ var schemaId = new SchemaId(20, 0xcafe5678);
+
+ var record1 = ImprintRecord.builder(schemaId)
+ .field(1, 100)
+ .field(3, "from record1")
+ .field(5, true)
+ .build();
+
+ var record2 = ImprintRecord.builder(schemaId)
+ .field(2, 200L)
+ .field(4, "from record2")
+ .field(6, 3.14f)
+ .build();
+
+ var merged = record1.merge(record2);
+
+ assertEquals(6, merged.getDirectory().size());
+ assertEquals(100, merged.getInt32(1));
+ assertEquals(200L, merged.getInt64(2));
+ assertEquals("from record1", merged.getString(3));
+ assertEquals("from record2", merged.getString(4));
+ assertTrue(merged.getBoolean(5));
+ assertEquals(3.14f, merged.getFloat32(6));
+
+ // Test serialization round-trip of merged record
+ var buffer = merged.serializeToBuffer();
+ byte[] serialized = new byte[buffer.remaining()];
+ buffer.get(serialized);
+ var deserialized = ImprintRecord.deserialize(serialized);
+
+ assertEquals(6, deserialized.getDirectory().size());
+ assertEquals(100, deserialized.getInt32(1));
+ assertEquals(200L, deserialized.getInt64(2));
+ assertEquals("from record1", deserialized.getString(3));
+ assertEquals("from record2", deserialized.getString(4));
+ assertTrue(deserialized.getBoolean(5));
+ assertEquals(3.14f, deserialized.getFloat32(6));
+ }
- System.out.println("✓ Nested records test passed");
+ @Test
+ @DisplayName("Merge: overlapping fields - first record wins")
+ void testMergeOverlappingFields() throws ImprintException {
+ var schemaId = new SchemaId(21, 0xdead9876);
+
+ var record1 = ImprintRecord.builder(schemaId)
+ .field(1, "first wins")
+ .field(2, 100)
+ .field(4, true)
+ .build();
+
+ var record2 = ImprintRecord.builder(schemaId)
+ .field(1, "second loses") // Overlapping field
+ .field(2, 999) // Overlapping field
+ .field(3, "unique to second")
+ .field(4, false) // Overlapping field
+ .build();
+
+ var merged = record1.merge(record2);
+
+ assertEquals(4, merged.getDirectory().size());
+ assertEquals("first wins", merged.getString(1)); // First record wins
+ assertEquals(100, merged.getInt32(2)); // First record wins
+ assertEquals("unique to second", merged.getString(3)); // Only in second
+ assertTrue(merged.getBoolean(4)); // First record wins
+ }
+
+ @Test
+ @DisplayName("Merge: complex data types and nested records")
+ void testMergeComplexTypes() throws ImprintException {
+ var schemaId = new SchemaId(22, 0xbeef1111);
+
+ // Create nested records for both
+ var nested1 = ImprintRecord.builder(new SchemaId(23, 0x22222222))
+ .field(100, "nested in record1")
+ .build();
+
+ var nested2 = ImprintRecord.builder(new SchemaId(24, 0x33333333))
+ .field(200, "nested in record2")
+ .build();
+
+ // Create arrays
+ var array1 = Arrays.asList(Value.fromString("array1_item1"), Value.fromString("array1_item2"));
+ var array2 = Arrays.asList(Value.fromInt32(10), Value.fromInt32(20));
+
+ // Create maps
+ var map1 = new HashMap();
+ map1.put(MapKey.fromString("map1_key"), Value.fromString("map1_value"));
+
+ var map2 = new HashMap();
+ map2.put(MapKey.fromInt32(42), Value.fromBoolean(true));
+
+ var record1 = ImprintRecord.builder(schemaId)
+ .field(1, nested1)
+ .field(3, Value.fromArray(array1))
+ .field(5, Value.fromMap(map1))
+ .build();
+
+ var record2 = ImprintRecord.builder(schemaId)
+ .field(2, nested2)
+ .field(4, Value.fromArray(array2))
+ .field(6, Value.fromMap(map2))
+ .build();
+
+ var merged = record1.merge(record2);
+
+ assertEquals(6, merged.getDirectory().size());
+
+ // Verify nested records
+ var mergedNested1 = merged.getRow(1);
+ assertEquals("nested in record1", mergedNested1.getString(100));
+
+ var mergedNested2 = merged.getRow(2);
+ assertEquals("nested in record2", mergedNested2.getString(200));
+
+ // Verify arrays
+ var mergedArray1 = merged.getArray(3);
+ assertEquals(2, mergedArray1.size());
+ assertEquals(Value.fromString("array1_item1"), mergedArray1.get(0));
+
+ var mergedArray2 = merged.getArray(4);
+ assertEquals(2, mergedArray2.size());
+ assertEquals(Value.fromInt32(10), mergedArray2.get(0));
+
+ // Verify maps
+ var mergedMap1 = merged.getMap(5);
+ assertEquals(Value.fromString("map1_value"), mergedMap1.get(MapKey.fromString("map1_key")));
+
+ var mergedMap2 = merged.getMap(6);
+ assertEquals(Value.fromBoolean(true), mergedMap2.get(MapKey.fromInt32(42)));
+ }
+
+ @Test
+ @DisplayName("Project and Merge: chained operations")
+ void testProjectAndMergeChained() throws ImprintException {
+ var schemaId = new SchemaId(30, 0xabcdabcd);
+
+ // Create a large record
+ var fullRecord = ImprintRecord.builder(schemaId)
+ .field(1, "field1")
+ .field(2, "field2")
+ .field(3, "field3")
+ .field(4, "field4")
+ .field(5, "field5")
+ .field(6, "field6")
+ .build();
+
+ // Project different subsets
+ var projection1 = fullRecord.project(1, 3, 5);
+ var projection2 = fullRecord.project(2, 4, 6);
+
+ assertEquals(3, projection1.getDirectory().size());
+ assertEquals(3, projection2.getDirectory().size());
+
+ // Merge the projections back together
+ var recomposed = projection1.merge(projection2);
+
+ assertEquals(6, recomposed.getDirectory().size());
+ assertEquals("field1", recomposed.getString(1));
+ assertEquals("field2", recomposed.getString(2));
+ assertEquals("field3", recomposed.getString(3));
+ assertEquals("field4", recomposed.getString(4));
+ assertEquals("field5", recomposed.getString(5));
+ assertEquals("field6", recomposed.getString(6));
+
+ // Test another chain: project the merged result
+ var finalProjection = recomposed.project(2, 4, 6);
+ assertEquals(3, finalProjection.getDirectory().size());
+ assertEquals("field2", finalProjection.getString(2));
+ assertEquals("field4", finalProjection.getString(4));
+ assertEquals("field6", finalProjection.getString(6));
+ }
+
+ @Test
+ @DisplayName("Merge and Project: empty record handling")
+ void testMergeAndProjectEmptyRecords() throws ImprintException {
+ var schemaId = new SchemaId(40, 0xeeeeeeee);
+
+ var emptyRecord = ImprintRecord.builder(schemaId).build();
+ var nonEmptyRecord = ImprintRecord.builder(schemaId)
+ .field(1, "not empty")
+ .field(2, 42)
+ .build();
+
+ // Test merging with empty
+ var merged1 = emptyRecord.merge(nonEmptyRecord);
+ var merged2 = nonEmptyRecord.merge(emptyRecord);
+
+ assertEquals(2, merged1.getDirectory().size());
+ assertEquals(2, merged2.getDirectory().size());
+ assertEquals("not empty", merged1.getString(1));
+ assertEquals("not empty", merged2.getString(1));
+
+ // Test projecting empty record
+ var projectedEmpty = emptyRecord.project(1, 2, 3);
+ assertEquals(0, projectedEmpty.getDirectory().size());
+
+ // Test projecting non-existent fields
+ var projectedNonExistent = nonEmptyRecord.project(99, 100);
+ assertEquals(0, projectedNonExistent.getDirectory().size());
+ }
+
+ @Test
+ @DisplayName("Project and Merge: Large record operations")
+ void testLargeRecordOperations() throws ImprintException {
+ var schemaId = new SchemaId(50, 0xffffffff);
+
+ // Create a record with many fields
+ var builder = ImprintRecord.builder(schemaId);
+ for (int i = 1; i <= 100; i++) {
+ builder.field(i, "field_" + i + "_data");
+ }
+ var largeRecord = builder.build();
+
+ assertEquals(100, largeRecord.getDirectory().size());
+
+ // Project a subset (every 10th field)
+ int[] projectionFields = new int[10];
+ for (int i = 0; i < 10; i++) {
+ projectionFields[i] = (i + 1) * 10; // 10, 20, 30, ..., 100
+ }
+
+ var projected = largeRecord.project(projectionFields);
+ assertEquals(10, projected.getDirectory().size());
+
+ for (int i = 0; i < 10; i++) {
+ int fieldId = (i + 1) * 10;
+ assertEquals("field_" + fieldId + "_data", projected.getString(fieldId));
+ }
+
+ // Create another large record for merging
+ var builder2 = ImprintRecord.builder(schemaId);
+ for (int i = 101; i <= 150; i++) {
+ builder2.field(i, "additional_field_" + i);
+ }
+ var additionalRecord = builder2.build();
+
+ // Merge the large records
+ var merged = largeRecord.merge(additionalRecord);
+ assertEquals(150, merged.getDirectory().size());
+
+ // Verify some values from both records
+ assertEquals("field_1_data", merged.getString(1));
+ assertEquals("field_50_data", merged.getString(50));
+ assertEquals("field_100_data", merged.getString(100));
+ assertEquals("additional_field_101", merged.getString(101));
+ assertEquals("additional_field_150", merged.getString(150));
}
private ImprintRecord createTestRecordForGetters() throws ImprintException {
@@ -216,7 +550,7 @@ var record = serializeAndDeserialize(originalRecord);
@Test
@DisplayName("Type Getters: Empty Collections (Array and Map)")
- void testErgonomicGettersEmptyCollections() throws ImprintException {
+ void testTypeGettersEmptyCollections() throws ImprintException {
var originalRecord = createTestRecordForGetters();
var record = serializeAndDeserialize(originalRecord);
@@ -231,7 +565,7 @@ var record = serializeAndDeserialize(originalRecord);
@Test
@DisplayName("Type Getters: Exception for Field Not Found")
- void testErgonomicGetterExceptionFieldNotFound() throws ImprintException {
+ void testTypeGetterExceptionFieldNotFound() throws ImprintException {
var originalRecord = createTestRecordForGetters();
var record = serializeAndDeserialize(originalRecord);
@@ -241,7 +575,7 @@ var record = serializeAndDeserialize(originalRecord);
@Test
@DisplayName("Type Getters: Exception for Null Field accessed as primitive")
- void testErgonomicGetterExceptionNullField() throws ImprintException {
+ void testTypeGetterExceptionNullField() throws ImprintException {
var originalRecord = createTestRecordForGetters();
var record = serializeAndDeserialize(originalRecord);
@@ -258,7 +592,7 @@ var record = serializeAndDeserialize(originalRecord);
@Test
@DisplayName("Type Getters: Exception for Type Mismatch")
- void testErgonomicGetterExceptionTypeMismatch() throws ImprintException {
+ void testTypeGetterExceptionTypeMismatch() throws ImprintException {
var originalRecord = createTestRecordForGetters();
var record = serializeAndDeserialize(originalRecord);
@@ -268,7 +602,7 @@ var record = serializeAndDeserialize(originalRecord);
@Test
@DisplayName("Type Getters: Row (Nested Record)")
- void testErgonomicGetterRow() throws ImprintException {
+ void testTypeGetterRow() throws ImprintException {
var innerSchemaId = new SchemaId(6, 0x12345678);
var innerRecord = ImprintRecord.builder(innerSchemaId)
.field(101, "nested string")
diff --git a/src/test/java/com/imprint/core/ImprintOperationsTest.java b/src/test/java/com/imprint/core/ImprintOperationsTest.java
new file mode 100644
index 0000000..1dc67fb
--- /dev/null
+++ b/src/test/java/com/imprint/core/ImprintOperationsTest.java
@@ -0,0 +1,405 @@
+package com.imprint.core;
+
+import com.imprint.error.ImprintException;
+import com.imprint.types.Value;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Nested;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+@DisplayName("ImprintOperations")
+class ImprintOperationsTest {
+
+ private SchemaId testSchema;
+ private ImprintRecord multiFieldRecord;
+ private ImprintRecord emptyRecord;
+
+ @BeforeEach
+ void setUp() throws ImprintException {
+ testSchema = new SchemaId(1, 0xdeadbeef);
+ multiFieldRecord = createTestRecord();
+ emptyRecord = createEmptyTestRecord();
+ }
+
+ private ImprintRecord createTestRecord() throws ImprintException {
+ return ImprintRecord.builder(testSchema)
+ .field(1, 42)
+ .field(3, "hello")
+ .field(5, true)
+ .field(7, new byte[]{1, 2, 3})
+ .build();
+ }
+
+ private ImprintRecord createEmptyTestRecord() throws ImprintException {
+ return ImprintRecord.builder(testSchema).build();
+ }
+
+ @Nested
+ @DisplayName("Project Operations")
+ class ProjectOperations {
+
+ @Test
+ @DisplayName("should project subset of fields")
+ void shouldProjectSubsetOfFields() throws ImprintException {
+ // When projecting a subset of fields
+ ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 1, 5);
+
+ // Then only the requested fields should be present
+ assertEquals(2, projected.getDirectory().size());
+ assertEquals(42, projected.getInt32(1));
+ assertTrue(projected.getBoolean(5));
+
+ // And non-requested fields should be absent
+ assertNull(projected.getValue(3));
+ assertNull(projected.getValue(7));
+ }
+
+ @Test
+ @DisplayName("should maintain field order regardless of input order")
+ void shouldMaintainFieldOrderRegardlessOfInputOrder() throws ImprintException {
+ // When projecting fields in arbitrary order
+ ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 7, 1, 5, 3);
+
+ // Then all requested fields should be present
+ assertEquals(4, projected.getDirectory().size());
+ assertEquals(42, projected.getInt32(1));
+ assertEquals("hello", projected.getString(3));
+ assertTrue(projected.getBoolean(5));
+ assertArrayEquals(new byte[]{1, 2, 3}, projected.getBytes(7));
+
+ // And directory should maintain sorted order
+ List directory = projected.getDirectory();
+ for (int i = 1; i < directory.size(); i++) {
+ assertTrue(directory.get(i - 1).getId() < directory.get(i).getId(),
+ "Directory entries should be sorted by field id");
+ }
+ }
+
+ @Test
+ @DisplayName("should handle single field projection")
+ void shouldHandleSingleFieldProjection() throws ImprintException {
+ // When projecting a single field
+ ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 3);
+
+ // Then only that field should be present
+ assertEquals(1, projected.getDirectory().size());
+ assertEquals("hello", projected.getString(3));
+ }
+
+ @Test
+ @DisplayName("should preserve all fields when projecting all")
+ void shouldPreserveAllFieldsWhenProjectingAll() throws ImprintException {
+ // Given all field IDs from the original record
+ int[] allFields = multiFieldRecord.getDirectory().stream()
+ .mapToInt(DirectoryEntry::getId)
+ .toArray();
+
+ // When projecting all fields
+ ImprintRecord projected = ImprintOperations.project(multiFieldRecord, allFields);
+
+ // Then all fields should be present with matching values
+ assertEquals(multiFieldRecord.getDirectory().size(), projected.getDirectory().size());
+
+ for (DirectoryEntry entry : multiFieldRecord.getDirectory()) {
+ Value originalValue = multiFieldRecord.getValue(entry.getId());
+ Value projectedValue = projected.getValue(entry.getId());
+ assertEquals(originalValue, projectedValue,
+ "Field " + entry.getId() + " should have matching value");
+ }
+ }
+
+ @Test
+ @DisplayName("should handle empty projection")
+ void shouldHandleEmptyProjection() {
+ // When projecting no fields
+ ImprintRecord projected = ImprintOperations.project(multiFieldRecord);
+
+ // Then result should be empty but valid
+ assertEquals(0, projected.getDirectory().size());
+ assertEquals(0, projected.getBuffers().getPayload().remaining());
+ }
+
+ @Test
+ @DisplayName("should ignore nonexistent fields")
+ void shouldIgnoreNonexistentFields() throws ImprintException {
+ // When projecting mix of existing and non-existing fields
+ ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 1, 99, 100);
+
+ // Then only existing fields should be included
+ assertEquals(1, projected.getDirectory().size());
+ assertEquals(42, projected.getInt32(1));
+ assertNull(projected.getValue(99));
+ assertNull(projected.getValue(100));
+ }
+
+ @Test
+ @DisplayName("should deduplicate requested fields")
+ void shouldDeduplicateRequestedFields() throws ImprintException {
+ // When projecting the same field multiple times
+ ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 1, 1, 1);
+
+ // Then field should only appear once
+ assertEquals(1, projected.getDirectory().size());
+ assertEquals(42, projected.getInt32(1));
+ }
+
+ @Test
+ @DisplayName("should handle projection from empty record")
+ void shouldHandleProjectionFromEmptyRecord() {
+ // When projecting any fields from empty record
+ ImprintRecord projected = ImprintOperations.project(emptyRecord, 1, 2, 3);
+
+ // Then result should be empty but valid
+ assertEquals(0, projected.getDirectory().size());
+ assertEquals(0, projected.getBuffers().getPayload().remaining());
+ }
+
+ @Test
+ @DisplayName("should preserve exact byte representation")
+ void shouldPreserveExactByteRepresentation() throws ImprintException {
+ // Given a field's original bytes
+ byte[] originalBytes = multiFieldRecord.getBytes(7);
+
+ // When projecting that field
+ ImprintRecord projected = ImprintOperations.project(multiFieldRecord, 7);
+
+ // Then the byte representation should be exactly preserved
+ byte[] projectedBytes = projected.getBytes(7);
+ assertArrayEquals(originalBytes, projectedBytes,
+ "Byte representation should be identical");
+ }
+
+ @Test
+ @DisplayName("should reduce payload size when projecting subset")
+ void shouldReducePayloadSizeWhenProjectingSubset() throws ImprintException {
+ // Given a record with large and small fields
+ ImprintRecord largeRecord = ImprintRecord.builder(testSchema)
+ .field(1, 42) // 4 bytes
+ .field(2, "x".repeat(1000)) // ~1000+ bytes
+ .field(3, 123L) // 8 bytes
+ .field(4, new byte[500]) // 500+ bytes
+ .build();
+
+ int originalPayloadSize = largeRecord.getBuffers().getPayload().remaining();
+
+ // When projecting only the small fields
+ ImprintRecord projected = ImprintOperations.project(largeRecord, 1, 3);
+
+ // Then the payload size should be significantly smaller
+ assertTrue(projected.getBuffers().getPayload().remaining() < originalPayloadSize,
+ "Projected payload should be smaller than original");
+
+ // And the values should still be correct
+ assertEquals(42, projected.getInt32(1));
+ assertEquals(123L, projected.getInt64(3));
+ }
+ }
+
+ @Nested
+ @DisplayName("Merge Operations")
+ class MergeOperations {
+
+ @Test
+ @DisplayName("should merge records with distinct fields")
+ void shouldMergeRecordsWithDistinctFields() throws ImprintException {
+ // Given two records with different fields
+ ImprintRecord record1 = ImprintRecord.builder(testSchema)
+ .field(1, 42)
+ .field(3, "hello")
+ .build();
+
+ ImprintRecord record2 = ImprintRecord.builder(testSchema)
+ .field(2, true)
+ .field(4, 123L)
+ .build();
+
+ // When merging the records
+ ImprintRecord merged = ImprintOperations.merge(record1, record2);
+
+ // Then all fields should be present
+ assertEquals(4, merged.getDirectory().size());
+ assertEquals(42, merged.getInt32(1));
+ assertTrue(merged.getBoolean(2));
+ assertEquals("hello", merged.getString(3));
+ assertEquals(123L, merged.getInt64(4));
+
+ // And directory should be sorted
+ List directory = merged.getDirectory();
+ for (int i = 1; i < directory.size(); i++) {
+ assertTrue(directory.get(i - 1).getId() < directory.get(i).getId(),
+ "Directory entries should be sorted by field id");
+ }
+ }
+
+ @Test
+ @DisplayName("should merge records with overlapping fields")
+ void shouldMergeRecordsWithOverlappingFields() throws ImprintException {
+ // Given two records with overlapping fields
+ ImprintRecord record1 = ImprintRecord.builder(testSchema)
+ .field(2, "first")
+ .field(3, 42)
+ .build();
+
+ ImprintRecord record2 = ImprintRecord.builder(testSchema)
+ .field(1, true)
+ .field(2, "second") // Overlapping field
+ .build();
+
+ // When merging the records
+ ImprintRecord merged = ImprintOperations.merge(record1, record2);
+
+ // Then first record's values should take precedence for duplicates
+ assertEquals(3, merged.getDirectory().size());
+ assertTrue(merged.getBoolean(1));
+ assertEquals("first", merged.getString(2)); // First record wins
+ assertEquals(42, merged.getInt32(3));
+ }
+
+ @Test
+ @DisplayName("should preserve schema id from first record")
+ void shouldPreserveSchemaIdFromFirstRecord() throws ImprintException {
+ // Given two records with different schema IDs
+ SchemaId schema1 = new SchemaId(1, 0xdeadbeef);
+ SchemaId schema2 = new SchemaId(1, 0xcafebabe);
+
+ ImprintRecord record1 = ImprintRecord.builder(schema1)
+ .field(1, 42)
+ .build();
+
+ ImprintRecord record2 = ImprintRecord.builder(schema2)
+ .field(2, true)
+ .build();
+
+ // When merging the records
+ ImprintRecord merged = ImprintOperations.merge(record1, record2);
+
+ // Then schema ID from first record should be preserved
+ assertEquals(schema1, merged.getHeader().getSchemaId());
+ }
+
+ @Test
+ @DisplayName("should handle merge with empty record")
+ void shouldHandleMergeWithEmptyRecord() throws ImprintException {
+ // When merging with empty record
+ ImprintRecord merged1 = ImprintOperations.merge(multiFieldRecord, emptyRecord);
+ ImprintRecord merged2 = ImprintOperations.merge(emptyRecord, multiFieldRecord);
+
+ // Then results should contain all original fields
+ assertEquals(multiFieldRecord.getDirectory().size(), merged1.getDirectory().size());
+ assertEquals(multiFieldRecord.getDirectory().size(), merged2.getDirectory().size());
+
+ // And values should be preserved
+ for (DirectoryEntry entry : multiFieldRecord.getDirectory()) {
+ Value originalValue = multiFieldRecord.getValue(entry.getId());
+ assertEquals(originalValue, merged1.getValue(entry.getId()));
+ assertEquals(originalValue, merged2.getValue(entry.getId()));
+ }
+ }
+
+ @Test
+ @DisplayName("should handle merge of two empty records")
+ void shouldHandleMergeOfTwoEmptyRecords() throws ImprintException {
+ // When merging two empty records
+ ImprintRecord merged = ImprintOperations.merge(emptyRecord, emptyRecord);
+
+ // Then result should be empty but valid
+ assertEquals(0, merged.getDirectory().size());
+ assertEquals(0, merged.getBuffers().getPayload().remaining());
+ }
+
+ @Test
+ @DisplayName("should maintain correct payload offsets after merge")
+ void shouldMaintainCorrectPayloadOffsetsAfterMerge() throws ImprintException {
+ // Given records with different field sizes
+ ImprintRecord record1 = ImprintRecord.builder(testSchema)
+ .field(1, 42) // 4 bytes
+ .field(3, "hello") // 5+ bytes
+ .build();
+
+ ImprintRecord record2 = ImprintRecord.builder(testSchema)
+ .field(2, true) // 1 byte
+ .field(4, new byte[]{1, 2, 3, 4, 5}) // 5+ bytes
+ .build();
+
+ // When merging
+ ImprintRecord merged = ImprintOperations.merge(record1, record2);
+
+ // Then all fields should be accessible with correct values
+ assertEquals(42, merged.getInt32(1));
+ assertTrue(merged.getBoolean(2));
+ assertEquals("hello", merged.getString(3));
+ assertArrayEquals(new byte[]{1, 2, 3, 4, 5}, merged.getBytes(4));
+
+ // And directory offsets should be sequential
+ List directory = merged.getDirectory();
+ int expectedOffset = 0;
+ for (DirectoryEntry entry : directory) {
+ assertEquals(expectedOffset, entry.getOffset(),
+ "Field " + entry.getId() + " should have correct offset");
+
+ // Calculate next offset
+ var fieldData = merged.getRawBytes(entry.getId());
+ assertNotNull(fieldData);
+ expectedOffset += fieldData.remaining();
+ }
+ }
+
+ @Test
+ @DisplayName("should handle large records efficiently")
+ void shouldHandleLargeRecordsEfficiently() throws ImprintException {
+ // Given records with many fields
+ var builder1 = ImprintRecord.builder(testSchema);
+ var builder2 = ImprintRecord.builder(testSchema);
+
+ // Add 100 fields to each record (no overlap)
+ for (int i = 1; i <= 100; i++) {
+ builder1.field(i, i * 10);
+ }
+ for (int i = 101; i <= 200; i++) {
+ builder2.field(i, i * 10);
+ }
+
+ ImprintRecord record1 = builder1.build();
+ ImprintRecord record2 = builder2.build();
+
+ // When merging large records
+ ImprintRecord merged = ImprintOperations.merge(record1, record2);
+
+ // Then all 200 fields should be present and accessible
+ assertEquals(200, merged.getDirectory().size());
+
+ // Spot check some values
+ assertEquals(10, merged.getInt32(1));
+ assertEquals(500, merged.getInt32(50));
+ assertEquals(1000, merged.getInt32(100));
+ assertEquals(1010, merged.getInt32(101));
+ assertEquals(1500, merged.getInt32(150));
+ assertEquals(2000, merged.getInt32(200));
+ }
+ }
+
+ @Nested
+ @DisplayName("Error Handling")
+ class ErrorHandling {
+
+ @Test
+ @DisplayName("should handle null record gracefully")
+ void shouldHandleNullRecordGracefully() {
+ assertThrows(NullPointerException.class, () -> ImprintOperations.project(null, 1, 2, 3));
+
+ assertThrows(NullPointerException.class, () -> ImprintOperations.merge(null, multiFieldRecord));
+
+ assertThrows(NullPointerException.class, () -> ImprintOperations.merge(multiFieldRecord, null));
+ }
+
+ @Test
+ @DisplayName("should handle null field ids gracefully")
+ void shouldHandleNullFieldIdsGracefully() {
+ assertThrows(NullPointerException.class, () -> ImprintOperations.project(multiFieldRecord, (int[]) null));
+ }
+ }
+}
diff --git a/src/test/java/com/imprint/profile/ProfilerTest.java b/src/test/java/com/imprint/profile/ProfilerTest.java
index 3b9f371..64be931 100644
--- a/src/test/java/com/imprint/profile/ProfilerTest.java
+++ b/src/test/java/com/imprint/profile/ProfilerTest.java
@@ -27,7 +27,7 @@
* - String operations and UTF-8 encoding
* - ByteBuffer operations
*/
-@Disabled("Enable manually for profiling")
+//@Disabled("Enable manually for profiling")
public class ProfilerTest {
private static final int ITERATIONS = 1_000_000;
@@ -82,12 +82,12 @@ void profileSerialization() throws Exception {
Thread.sleep(3000);
var schemaId = new SchemaId(1, 0x12345678);
-
+
System.out.println("Beginning serialization profiling...");
long start = System.nanoTime();
// Create and serialize many records (allocation hotspot)
- for (int i = 0; i < 100_000; i++) {
+ for (int i = 0; i < 500_000; i++) {
var writer = new ImprintWriter(schemaId);
// Add various field types