Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/plotnik-lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ default = ["plotnik-langs"]
[dev-dependencies]
insta = { version = "=1.45.1", features = ["yaml"] }
indoc = "=2.0.7"
serde_json = "=1.0.148"
serde_json = "=1.0.148"
19 changes: 19 additions & 0 deletions crates/plotnik-lib/src/bytecode/constants.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//! Bytecode format constants.

// Re-export primitive type constants from the shared type system
pub use crate::type_system::{TYPE_CUSTOM_START, TYPE_NODE, TYPE_STRING, TYPE_VOID};

/// Magic bytes identifying a Plotnik bytecode file.
pub const MAGIC: [u8; 4] = *b"PTKQ";

/// Current bytecode format version.
pub const VERSION: u32 = 1;

/// Terminal step - accept state.
pub const STEP_ACCEPT: u16 = 0;

/// Section alignment in bytes.
pub const SECTION_ALIGN: usize = 64;

/// Step size in bytes (all instructions are 8-byte aligned).
pub const STEP_SIZE: usize = 8;
179 changes: 179 additions & 0 deletions crates/plotnik-lib/src/bytecode/header.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
//! Bytecode file header (64 bytes).

use super::{MAGIC, VERSION};

/// File header - first 64 bytes of the bytecode file.
///
/// Note: TypeMeta sub-section counts are stored in the TypeMetaHeader,
/// not in the main header. See type_meta.rs for details.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(C, align(64))]
pub struct Header {
/// Magic bytes: b"PTKQ"
pub magic: [u8; 4],
/// Format version (currently 1)
pub version: u32,
/// CRC32 checksum of everything after the header
pub checksum: u32,
/// Total file size in bytes
pub total_size: u32,

// Section offsets (absolute byte offsets)
pub str_blob_offset: u32,
pub str_table_offset: u32,
pub node_types_offset: u32,
pub node_fields_offset: u32,
pub trivia_offset: u32,
pub type_meta_offset: u32,
pub entrypoints_offset: u32,
pub transitions_offset: u32,

// Element counts (type counts are in TypeMetaHeader at type_meta_offset)
pub str_table_count: u16,
pub node_types_count: u16,
pub node_fields_count: u16,
pub trivia_count: u16,
pub entrypoints_count: u16,
pub transitions_count: u16,
/// Padding to maintain 64-byte size.
pub(crate) _pad: u32,
}

const _: () = assert!(std::mem::size_of::<Header>() == 64);

impl Default for Header {
fn default() -> Self {
Self {
magic: MAGIC,
version: VERSION,
checksum: 0,
total_size: 0,
str_blob_offset: 0,
str_table_offset: 0,
node_types_offset: 0,
node_fields_offset: 0,
trivia_offset: 0,
type_meta_offset: 0,
entrypoints_offset: 0,
transitions_offset: 0,
str_table_count: 0,
node_types_count: 0,
node_fields_count: 0,
trivia_count: 0,
entrypoints_count: 0,
transitions_count: 0,
_pad: 0,
}
}
}

impl Header {
/// Decode header from 64 bytes.
pub fn from_bytes(bytes: &[u8]) -> Self {
assert!(bytes.len() >= 64, "header too short");

Self {
magic: [bytes[0], bytes[1], bytes[2], bytes[3]],
version: u32::from_le_bytes([bytes[4], bytes[5], bytes[6], bytes[7]]),
checksum: u32::from_le_bytes([bytes[8], bytes[9], bytes[10], bytes[11]]),
total_size: u32::from_le_bytes([bytes[12], bytes[13], bytes[14], bytes[15]]),
str_blob_offset: u32::from_le_bytes([bytes[16], bytes[17], bytes[18], bytes[19]]),
str_table_offset: u32::from_le_bytes([bytes[20], bytes[21], bytes[22], bytes[23]]),
node_types_offset: u32::from_le_bytes([bytes[24], bytes[25], bytes[26], bytes[27]]),
node_fields_offset: u32::from_le_bytes([bytes[28], bytes[29], bytes[30], bytes[31]]),
trivia_offset: u32::from_le_bytes([bytes[32], bytes[33], bytes[34], bytes[35]]),
type_meta_offset: u32::from_le_bytes([bytes[36], bytes[37], bytes[38], bytes[39]]),
entrypoints_offset: u32::from_le_bytes([bytes[40], bytes[41], bytes[42], bytes[43]]),
transitions_offset: u32::from_le_bytes([bytes[44], bytes[45], bytes[46], bytes[47]]),
str_table_count: u16::from_le_bytes([bytes[48], bytes[49]]),
node_types_count: u16::from_le_bytes([bytes[50], bytes[51]]),
node_fields_count: u16::from_le_bytes([bytes[52], bytes[53]]),
trivia_count: u16::from_le_bytes([bytes[54], bytes[55]]),
entrypoints_count: u16::from_le_bytes([bytes[56], bytes[57]]),
transitions_count: u16::from_le_bytes([bytes[58], bytes[59]]),
_pad: u32::from_le_bytes([bytes[60], bytes[61], bytes[62], bytes[63]]),
}
}

/// Encode header to 64 bytes.
pub fn to_bytes(&self) -> [u8; 64] {
let mut bytes = [0u8; 64];
bytes[0..4].copy_from_slice(&self.magic);
bytes[4..8].copy_from_slice(&self.version.to_le_bytes());
bytes[8..12].copy_from_slice(&self.checksum.to_le_bytes());
bytes[12..16].copy_from_slice(&self.total_size.to_le_bytes());
bytes[16..20].copy_from_slice(&self.str_blob_offset.to_le_bytes());
bytes[20..24].copy_from_slice(&self.str_table_offset.to_le_bytes());
bytes[24..28].copy_from_slice(&self.node_types_offset.to_le_bytes());
bytes[28..32].copy_from_slice(&self.node_fields_offset.to_le_bytes());
bytes[32..36].copy_from_slice(&self.trivia_offset.to_le_bytes());
bytes[36..40].copy_from_slice(&self.type_meta_offset.to_le_bytes());
bytes[40..44].copy_from_slice(&self.entrypoints_offset.to_le_bytes());
bytes[44..48].copy_from_slice(&self.transitions_offset.to_le_bytes());
bytes[48..50].copy_from_slice(&self.str_table_count.to_le_bytes());
bytes[50..52].copy_from_slice(&self.node_types_count.to_le_bytes());
bytes[52..54].copy_from_slice(&self.node_fields_count.to_le_bytes());
bytes[54..56].copy_from_slice(&self.trivia_count.to_le_bytes());
bytes[56..58].copy_from_slice(&self.entrypoints_count.to_le_bytes());
bytes[58..60].copy_from_slice(&self.transitions_count.to_le_bytes());
bytes[60..64].copy_from_slice(&self._pad.to_le_bytes());
bytes
}

pub fn validate_magic(&self) -> bool {
self.magic == MAGIC
}

pub fn validate_version(&self) -> bool {
self.version == VERSION
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn header_size() {
assert_eq!(std::mem::size_of::<Header>(), 64);
}

#[test]
fn header_default() {
let h = Header::default();
assert!(h.validate_magic());
assert!(h.validate_version());
assert_eq!(h.total_size, 0);
}

#[test]
fn header_roundtrip() {
let h = Header {
magic: MAGIC,
version: VERSION,
checksum: 0x12345678,
total_size: 1024,
str_blob_offset: 64,
str_table_offset: 128,
node_types_offset: 192,
node_fields_offset: 256,
trivia_offset: 320,
type_meta_offset: 384,
entrypoints_offset: 448,
transitions_offset: 512,
str_table_count: 10,
node_types_count: 20,
node_fields_count: 5,
trivia_count: 2,
entrypoints_count: 1,
transitions_count: 15,
..Default::default()
};

let bytes = h.to_bytes();
assert_eq!(bytes.len(), 64);

let decoded = Header::from_bytes(&bytes);
assert_eq!(decoded, h);
}
}
84 changes: 84 additions & 0 deletions crates/plotnik-lib/src/bytecode/ids.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
//! Bytecode index newtypes.

use super::constants::{STEP_ACCEPT, STEP_SIZE, TYPE_CUSTOM_START, TYPE_STRING};

/// Index into the Transitions section (8-byte steps).
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Default)]
#[repr(transparent)]
pub struct StepId(pub u16);

impl StepId {
pub const ACCEPT: Self = Self(STEP_ACCEPT);

#[inline]
pub fn is_accept(self) -> bool {
self.0 == STEP_ACCEPT
}

#[inline]
pub fn byte_offset(self) -> usize {
self.0 as usize * STEP_SIZE
}
}

/// Index into the String Table.
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Default)]
#[repr(transparent)]
pub struct StringId(pub u16);

/// Index into the Type Definition table.
/// Values 0-2 are builtins; 3+ index into TypeDefs.
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Default)]
#[repr(transparent)]
pub struct QTypeId(pub u16);

impl QTypeId {
pub const VOID: Self = Self(super::constants::TYPE_VOID);
pub const NODE: Self = Self(super::constants::TYPE_NODE);
pub const STRING: Self = Self(TYPE_STRING);

#[inline]
pub fn is_builtin(self) -> bool {
self.0 <= TYPE_STRING
}

/// Index into TypeDefs array (only valid for non-builtins).
#[inline]
pub fn custom_index(self) -> Option<usize> {
if self.0 >= TYPE_CUSTOM_START {
Some((self.0 - TYPE_CUSTOM_START) as usize)
} else {
None
}
}

#[inline]
pub fn from_custom_index(idx: usize) -> Self {
Self(TYPE_CUSTOM_START + idx as u16)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn step_id_byte_offset() {
assert_eq!(StepId(0).byte_offset(), 0);
assert_eq!(StepId(1).byte_offset(), 8);
assert_eq!(StepId(10).byte_offset(), 80);
}

#[test]
fn bc_type_id_builtins() {
assert!(QTypeId::VOID.is_builtin());
assert!(QTypeId::NODE.is_builtin());
assert!(QTypeId::STRING.is_builtin());
assert!(!QTypeId(3).is_builtin());

assert_eq!(QTypeId::VOID.custom_index(), None);
assert_eq!(QTypeId(3).custom_index(), Some(0));
assert_eq!(QTypeId(5).custom_index(), Some(2));
assert_eq!(QTypeId::from_custom_index(0), QTypeId(3));
}
}
16 changes: 16 additions & 0 deletions crates/plotnik-lib/src/bytecode/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//! Bytecode module for compiled Plotnik queries.
//!
//! Implements the binary format specified in `docs/binary-format/`.

mod constants;
mod header;
mod ids;

pub use constants::{
MAGIC, SECTION_ALIGN, STEP_ACCEPT, STEP_SIZE, TYPE_CUSTOM_START, TYPE_NODE, TYPE_STRING,
TYPE_VOID, VERSION,
};

pub use ids::{QTypeId, StepId, StringId};

pub use header::Header;
1 change: 1 addition & 0 deletions crates/plotnik-lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#![cfg_attr(coverage_nightly, feature(coverage_attribute))]

pub mod bytecode;
pub mod diagnostics;
pub mod parser;
pub mod query;
Expand Down
17 changes: 13 additions & 4 deletions docs/binary-format/01-overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,16 @@ Section offsets defined in Header for robust parsing.
| [NodeTypes] | NodeTypeId → StringId | 4 |
| [NodeFields] | NodeFieldId → StringId | 4 |
| [Trivia] | List of NodeTypeId | 2 |
| [TypeMeta] | Types | Var |
| [TypeMeta] | Types (3 sub-sections) | 4 |
| [Entrypoints] | Definitions | 8 |
| [Transitions] | Tree walking graph | 8 |

**TypeMeta sub-sections** (contiguous, offsets computed from counts):

- **TypeDefs**: Structural topology
- **TypeMembers**: Fields and variants
- **TypeNames**: Name → TypeId mapping

[StringBlob]: 02-strings.md
[StringTable]: 02-strings.md
[NodeTypes]: 03-symbols.md
Expand Down Expand Up @@ -62,7 +68,7 @@ struct Header {
node_types_offset: u32,
node_fields_offset: u32,
trivia_offset: u32,
type_meta_offset: u32,
type_meta_offset: u32, // Points to TypeMeta header (see 04-types.md)
entrypoints_offset: u32,
transitions_offset: u32,

Expand All @@ -71,9 +77,12 @@ struct Header {
node_types_count: u16,
node_fields_count: u16,
trivia_count: u16,
type_defs_count: u16,
type_members_count: u16, // Number of TypeMembers
entrypoints_count: u16,
transitions_count: u16,
_pad: u32,
}
// Size: 16 + 32 + 16 = 64 bytes
//
// Note: TypeMeta sub-section counts are stored in the TypeMeta header,
// not in the main header. See 04-types.md for details.
```
2 changes: 0 additions & 2 deletions docs/binary-format/02-strings.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ Strings are stored in a centralized pool to eliminate redundancy and alignment p

**StringId (u16)**: Zero-based index into the String Table.

- `0xFFFF` is reserved as a sentinel for "None" or "Anonymous".

## 1. String Blob

Contains the raw UTF-8 bytes for all strings concatenated together.
Expand Down
Loading