From e9bf08791da8238cdaf12e3fdb0ed93e7c69fefd Mon Sep 17 00:00:00 2001 From: hemant Date: Sat, 7 Jun 2025 12:51:27 +0530 Subject: [PATCH 1/3] feat: change Field IDs to 2-byte --- src/ops.rs | 8 ++++---- src/serde.rs | 12 ++++++------ src/types.rs | 10 +++++----- src/writer.rs | 6 +++--- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/ops.rs b/src/ops.rs index ce50174..3da9b3b 100644 --- a/src/ops.rs +++ b/src/ops.rs @@ -5,10 +5,10 @@ use crate::{ use bytes::BytesMut; pub trait Project { - fn project(&self, field_ids: &[u32]) -> Result; + fn project(&self, field_ids: &[u16]) -> Result; } impl Project for ImprintRecord { - fn project(&self, field_ids: &[u32]) -> Result { + fn project(&self, field_ids: &[u16]) -> Result { // Sort and deduplicate the field IDs for efficient matching with sorted directory let mut sorted_field_ids = field_ids.to_vec(); sorted_field_ids.sort_unstable(); @@ -198,7 +198,7 @@ mod tests { assert_eq!(projected.get_value(7).unwrap(), Some(vec![1, 2, 3].into())); // And directory should maintain sorted order - let dir_ids: Vec = projected.directory.iter().map(|e| e.id).collect(); + let dir_ids: Vec = projected.directory.iter().map(|e| e.id).collect(); assert!( dir_ids.windows(2).all(|w| w[0] < w[1]), "directory entries should be sorted by field id" @@ -222,7 +222,7 @@ mod tests { fn should_preserve_all_fields_when_projecting_all() { // Given a record with multiple fields let record = create_test_record(); - let all_fields: Vec = record.directory.iter().map(|e| e.id).collect(); + let all_fields: Vec = record.directory.iter().map(|e| e.id).collect(); // When projecting all fields let projected = record.project(&all_fields).unwrap(); diff --git a/src/serde.rs b/src/serde.rs index e54a976..fda732d 100644 --- a/src/serde.rs +++ b/src/serde.rs @@ -11,7 +11,7 @@ use crate::{ const HEADER_BYTES: usize = 15; const DIR_COUNT_BYTES: usize = 5; -const DIR_ENTRY_BYTES: usize = 9; +const DIR_ENTRY_BYTES: usize = 7; /// A trait for types that can be written to a byte buffer pub trait Write { @@ -315,7 +315,7 @@ impl ValueRead for MapKey { impl Write for DirectoryEntry { fn write(&self, buf: &mut BytesMut) -> Result<(), ImprintError> { - buf.put_u32_le(self.id); + buf.put_u16_le(self.id); buf.put_u8(self.type_code as u8); buf.put_u32_le(self.offset); Ok(()) @@ -324,14 +324,14 @@ impl Write for DirectoryEntry { impl Read for DirectoryEntry { fn read(mut bytes: Bytes) -> Result<(Self, usize), ImprintError> { - if bytes.remaining() < 9 { + if bytes.remaining() < DIR_ENTRY_BYTES { return Err(ImprintError::BufferUnderflow { - needed: 9, + needed: DIR_ENTRY_BYTES, available: bytes.remaining(), }); } - let id = bytes.get_u32_le(); + let id = bytes.get_u16_le(); let type_code = TypeCode::try_from(bytes.get_u8())?; let offset = bytes.get_u32_le(); @@ -341,7 +341,7 @@ impl Read for DirectoryEntry { type_code, offset, }, - 9, + DIR_ENTRY_BYTES, )) } } diff --git a/src/types.rs b/src/types.rs index 7900a55..4e455fd 100644 --- a/src/types.rs +++ b/src/types.rs @@ -283,11 +283,11 @@ impl PartialEq for MapKey { } /// A directory entry describing a single field in an Imprint record. -/// Each entry has a fixed size of 9 bytes. +/// Each entry has a fixed size of 7 bytes. #[derive(Debug, Clone, PartialEq)] pub struct DirectoryEntry { - /// Uniquely assigned identifier within a fieldspace (4 bytes) - pub id: u32, + /// Uniquely assigned identifier within a fieldspace (2 bytes) + pub id: u16, /// Field type identifier (1 byte) pub type_code: TypeCode, /// Byte position of the value relative to the payload (4 bytes) @@ -319,7 +319,7 @@ pub struct ImprintRecord { impl ImprintRecord { /// Get a value by field ID, deserializing it on demand - pub fn get_value(&self, field_id: u32) -> Result, ImprintError> { + pub fn get_value(&self, field_id: u16) -> Result, ImprintError> { match self.directory.binary_search_by_key(&field_id, |e| e.id) { Ok(idx) => { let entry = &self.directory[idx]; @@ -332,7 +332,7 @@ impl ImprintRecord { } /// Get the raw bytes for a field without deserializing - pub fn get_raw_bytes(&self, field_id: u32) -> Option { + pub fn get_raw_bytes(&self, field_id: u16) -> Option { let idx = self .directory .binary_search_by_key(&field_id, |e| e.id) diff --git a/src/writer.rs b/src/writer.rs index b959040..a01a50b 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -10,7 +10,7 @@ use crate::{ /// A writer for constructing ImprintRecords by adding fields sequentially. pub struct ImprintWriter { schema_id: SchemaId, - fields: BTreeMap, // keep fields in sorted order + fields: BTreeMap, // keep fields in sorted order } impl ImprintWriter { @@ -23,7 +23,7 @@ impl ImprintWriter { } /// Adds a field to the record being built. - pub fn add_field(&mut self, id: u32, value: Value) -> Result<(), ImprintError> { + pub fn add_field(&mut self, id: u16, value: Value) -> Result<(), ImprintError> { self.fields.insert(id, value); Ok(()) } @@ -43,7 +43,7 @@ impl ImprintWriter { } let header = Header { - flags: Flags::new(0), + flags: Flags::new(0), // Set appropriate flags as needed schema_id: self.schema_id, payload_size: payload.len() as u32, }; From baf795392cbfc804954a5eb7dce2cbee3c4ae9bb Mon Sep 17 00:00:00 2001 From: hemant Date: Sat, 7 Jun 2025 12:54:52 +0530 Subject: [PATCH 2/3] feat: Update documentation for 7-byte Directory Entries (2-byte FieldIDs) --- FORMAT.md | 8 ++++---- README.md | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/FORMAT.md b/FORMAT.md index 742d004..837ea2f 100644 --- a/FORMAT.md +++ b/FORMAT.md @@ -28,18 +28,18 @@ Flags: ```text +---------------------+---------------------+---------------------+ | Count (varint) | Entry 1 | Entry 2 | ... - | (1-5 bytes) | (9 bytes) | (9 bytes) | + | (1-5 bytes) | (7 bytes) | (7 bytes) | +---------------------+---------------------+---------------------+ ``` -Each directory entry (9 bytes): +Each directory entry (7 bytes): ```text +----------------+-------+----------------+ | Field ID | Type | Field Offset | | (LE u32) | Code | (LE u32) | +----------------+-------+----------------+ - Bytes 0-3 Byte 4 Bytes 5-8 + Bytes 0-1 Byte 2 Bytes 3-6 ``` ## Type Codes @@ -210,7 +210,7 @@ Also see [LEB128 encoding](https://en.wikipedia.org/wiki/LEB128) for more detail | | | FIELD DIRECTORY (present because Flag 0x01 is set): | | +-----------------+----------------------------------------+ | -| | Count (varint) | Directory Entries (Count × 9 bytes) | | +| | Count (varint) | Directory Entries (Count × 7 bytes) | | | +-----------------+----------------------------------------+ | | | | PAYLOAD (contains encoded field values): | diff --git a/README.md b/README.md index 4417af1..40631e3 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,7 @@ Each entry has the following format: | Field | Encoding | Description | |----------|----------|----------------------------------------------------| -| `id` | `u32` | Uniquely assigned identifier within a fieldspace | +| `id` | `u16` | Uniquely assigned identifier within a fieldspace | | `type` | `u8` | Field type identifier, see below | | `offset` | `u32` | Byte position of the value relative to the payload | From 005a7dc42a10123cffdddb66b18bb4278cc1317d Mon Sep 17 00:00:00 2001 From: hemant Date: Sat, 7 Jun 2025 13:24:52 +0530 Subject: [PATCH 3/3] feat: Update the Field ID to u16 --- FORMAT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FORMAT.md b/FORMAT.md index 837ea2f..6ab2d17 100644 --- a/FORMAT.md +++ b/FORMAT.md @@ -37,7 +37,7 @@ Each directory entry (7 bytes): ```text +----------------+-------+----------------+ | Field ID | Type | Field Offset | - | (LE u32) | Code | (LE u32) | + | (LE u16) | Code | (LE u32) | +----------------+-------+----------------+ Bytes 0-1 Byte 2 Bytes 3-6 ```