plotnik-lang · zharinov · Dec 29, 2025 · Dec 29, 2025
diff --git a/crates/plotnik-lib/Cargo.toml b/crates/plotnik-lib/Cargo.toml
@@ -30,4 +30,4 @@ default = ["plotnik-langs"]
 [dev-dependencies]
 insta = { version = "=1.45.1", features = ["yaml"] }
 indoc = "=2.0.7"
-serde_json = "=1.0.148"
+serde_json = "=1.0.148"
diff --git a/crates/plotnik-lib/src/bytecode/constants.rs b/crates/plotnik-lib/src/bytecode/constants.rs
@@ -0,0 +1,19 @@
+//! Bytecode format constants.
+
+// Re-export primitive type constants from the shared type system
+pub use crate::type_system::{TYPE_CUSTOM_START, TYPE_NODE, TYPE_STRING, TYPE_VOID};
+
+/// Magic bytes identifying a Plotnik bytecode file.
+pub const MAGIC: [u8; 4] = *b"PTKQ";
+
+/// Current bytecode format version.
+pub const VERSION: u32 = 1;
+
+/// Terminal step - accept state.
+pub const STEP_ACCEPT: u16 = 0;
+
+/// Section alignment in bytes.
+pub const SECTION_ALIGN: usize = 64;
+
+/// Step size in bytes (all instructions are 8-byte aligned).
+pub const STEP_SIZE: usize = 8;
diff --git a/crates/plotnik-lib/src/bytecode/header.rs b/crates/plotnik-lib/src/bytecode/header.rs
@@ -0,0 +1,179 @@
+//! Bytecode file header (64 bytes).
+
+use super::{MAGIC, VERSION};
+
+/// File header - first 64 bytes of the bytecode file.
+///
+/// Note: TypeMeta sub-section counts are stored in the TypeMetaHeader,
+/// not in the main header. See type_meta.rs for details.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(C, align(64))]
+pub struct Header {
+    /// Magic bytes: b"PTKQ"
+    pub magic: [u8; 4],
+    /// Format version (currently 1)
+    pub version: u32,
+    /// CRC32 checksum of everything after the header
+    pub checksum: u32,
+    /// Total file size in bytes
+    pub total_size: u32,
+
+    // Section offsets (absolute byte offsets)
+    pub str_blob_offset: u32,
+    pub str_table_offset: u32,
+    pub node_types_offset: u32,
+    pub node_fields_offset: u32,
+    pub trivia_offset: u32,
+    pub type_meta_offset: u32,
+    pub entrypoints_offset: u32,
+    pub transitions_offset: u32,
+
+    // Element counts (type counts are in TypeMetaHeader at type_meta_offset)
+    pub str_table_count: u16,
+    pub node_types_count: u16,
+    pub node_fields_count: u16,
+    pub trivia_count: u16,
+    pub entrypoints_count: u16,
+    pub transitions_count: u16,
+    /// Padding to maintain 64-byte size.
+    pub(crate) _pad: u32,
+}
+
+const _: () = assert!(std::mem::size_of::<Header>() == 64);
+
+impl Default for Header {
+    fn default() -> Self {
+        Self {
+            magic: MAGIC,
+            version: VERSION,
+            checksum: 0,
+            total_size: 0,
+            str_blob_offset: 0,
+            str_table_offset: 0,
+            node_types_offset: 0,
+            node_fields_offset: 0,
+            trivia_offset: 0,
+            type_meta_offset: 0,
+            entrypoints_offset: 0,
+            transitions_offset: 0,
+            str_table_count: 0,
+            node_types_count: 0,
+            node_fields_count: 0,
+            trivia_count: 0,
+            entrypoints_count: 0,
+            transitions_count: 0,
+            _pad: 0,
+        }
+    }
+}
+
+impl Header {
+    /// Decode header from 64 bytes.
+    pub fn from_bytes(bytes: &[u8]) -> Self {
+        assert!(bytes.len() >= 64, "header too short");
+
+        Self {
+            magic: [bytes[0], bytes[1], bytes[2], bytes[3]],
+            version: u32::from_le_bytes([bytes[4], bytes[5], bytes[6], bytes[7]]),
+            checksum: u32::from_le_bytes([bytes[8], bytes[9], bytes[10], bytes[11]]),
+            total_size: u32::from_le_bytes([bytes[12], bytes[13], bytes[14], bytes[15]]),
+            str_blob_offset: u32::from_le_bytes([bytes[16], bytes[17], bytes[18], bytes[19]]),
+            str_table_offset: u32::from_le_bytes([bytes[20], bytes[21], bytes[22], bytes[23]]),
+            node_types_offset: u32::from_le_bytes([bytes[24], bytes[25], bytes[26], bytes[27]]),
+            node_fields_offset: u32::from_le_bytes([bytes[28], bytes[29], bytes[30], bytes[31]]),
+            trivia_offset: u32::from_le_bytes([bytes[32], bytes[33], bytes[34], bytes[35]]),
+            type_meta_offset: u32::from_le_bytes([bytes[36], bytes[37], bytes[38], bytes[39]]),
+            entrypoints_offset: u32::from_le_bytes([bytes[40], bytes[41], bytes[42], bytes[43]]),
+            transitions_offset: u32::from_le_bytes([bytes[44], bytes[45], bytes[46], bytes[47]]),
+            str_table_count: u16::from_le_bytes([bytes[48], bytes[49]]),
+            node_types_count: u16::from_le_bytes([bytes[50], bytes[51]]),
+            node_fields_count: u16::from_le_bytes([bytes[52], bytes[53]]),
+            trivia_count: u16::from_le_bytes([bytes[54], bytes[55]]),
+            entrypoints_count: u16::from_le_bytes([bytes[56], bytes[57]]),
+            transitions_count: u16::from_le_bytes([bytes[58], bytes[59]]),
+            _pad: u32::from_le_bytes([bytes[60], bytes[61], bytes[62], bytes[63]]),
+        }
+    }
+
+    /// Encode header to 64 bytes.
+    pub fn to_bytes(&self) -> [u8; 64] {
+        let mut bytes = [0u8; 64];
+        bytes[0..4].copy_from_slice(&self.magic);
+        bytes[4..8].copy_from_slice(&self.version.to_le_bytes());
+        bytes[8..12].copy_from_slice(&self.checksum.to_le_bytes());
+        bytes[12..16].copy_from_slice(&self.total_size.to_le_bytes());
+        bytes[16..20].copy_from_slice(&self.str_blob_offset.to_le_bytes());
+        bytes[20..24].copy_from_slice(&self.str_table_offset.to_le_bytes());
+        bytes[24..28].copy_from_slice(&self.node_types_offset.to_le_bytes());
+        bytes[28..32].copy_from_slice(&self.node_fields_offset.to_le_bytes());
+        bytes[32..36].copy_from_slice(&self.trivia_offset.to_le_bytes());
+        bytes[36..40].copy_from_slice(&self.type_meta_offset.to_le_bytes());
+        bytes[40..44].copy_from_slice(&self.entrypoints_offset.to_le_bytes());
+        bytes[44..48].copy_from_slice(&self.transitions_offset.to_le_bytes());
+        bytes[48..50].copy_from_slice(&self.str_table_count.to_le_bytes());
+        bytes[50..52].copy_from_slice(&self.node_types_count.to_le_bytes());
+        bytes[52..54].copy_from_slice(&self.node_fields_count.to_le_bytes());
+        bytes[54..56].copy_from_slice(&self.trivia_count.to_le_bytes());
+        bytes[56..58].copy_from_slice(&self.entrypoints_count.to_le_bytes());
+        bytes[58..60].copy_from_slice(&self.transitions_count.to_le_bytes());
+        bytes[60..64].copy_from_slice(&self._pad.to_le_bytes());
+        bytes
+    }
+
+    pub fn validate_magic(&self) -> bool {
+        self.magic == MAGIC
+    }
+
+    pub fn validate_version(&self) -> bool {
+        self.version == VERSION
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn header_size() {
+        assert_eq!(std::mem::size_of::<Header>(), 64);
+    }
+
+    #[test]
+    fn header_default() {
+        let h = Header::default();
+        assert!(h.validate_magic());
+        assert!(h.validate_version());
+        assert_eq!(h.total_size, 0);
+    }
+
+    #[test]
+    fn header_roundtrip() {
+        let h = Header {
+            magic: MAGIC,
+            version: VERSION,
+            checksum: 0x12345678,
+            total_size: 1024,
+            str_blob_offset: 64,
+            str_table_offset: 128,
+            node_types_offset: 192,
+            node_fields_offset: 256,
+            trivia_offset: 320,
+            type_meta_offset: 384,
+            entrypoints_offset: 448,
+            transitions_offset: 512,
+            str_table_count: 10,
+            node_types_count: 20,
+            node_fields_count: 5,
+            trivia_count: 2,
+            entrypoints_count: 1,
+            transitions_count: 15,
+            ..Default::default()
+        };
+
+        let bytes = h.to_bytes();
+        assert_eq!(bytes.len(), 64);
+
+        let decoded = Header::from_bytes(&bytes);
+        assert_eq!(decoded, h);
+    }
+}
diff --git a/crates/plotnik-lib/src/bytecode/ids.rs b/crates/plotnik-lib/src/bytecode/ids.rs
@@ -0,0 +1,84 @@
+//! Bytecode index newtypes.
+
+use super::constants::{STEP_ACCEPT, STEP_SIZE, TYPE_CUSTOM_START, TYPE_STRING};
+
+/// Index into the Transitions section (8-byte steps).
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Default)]
+#[repr(transparent)]
+pub struct StepId(pub u16);
+
+impl StepId {
+    pub const ACCEPT: Self = Self(STEP_ACCEPT);
+
+    #[inline]
+    pub fn is_accept(self) -> bool {
+        self.0 == STEP_ACCEPT
+    }
+
+    #[inline]
+    pub fn byte_offset(self) -> usize {
+        self.0 as usize * STEP_SIZE
+    }
+}
+
+/// Index into the String Table.
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Default)]
+#[repr(transparent)]
+pub struct StringId(pub u16);
+
+/// Index into the Type Definition table.
+/// Values 0-2 are builtins; 3+ index into TypeDefs.
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Default)]
+#[repr(transparent)]
+pub struct QTypeId(pub u16);
+
+impl QTypeId {
+    pub const VOID: Self = Self(super::constants::TYPE_VOID);
+    pub const NODE: Self = Self(super::constants::TYPE_NODE);
+    pub const STRING: Self = Self(TYPE_STRING);
+
+    #[inline]
+    pub fn is_builtin(self) -> bool {
+        self.0 <= TYPE_STRING
+    }
+
+    /// Index into TypeDefs array (only valid for non-builtins).
+    #[inline]
+    pub fn custom_index(self) -> Option<usize> {
+        if self.0 >= TYPE_CUSTOM_START {
+            Some((self.0 - TYPE_CUSTOM_START) as usize)
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    pub fn from_custom_index(idx: usize) -> Self {
+        Self(TYPE_CUSTOM_START + idx as u16)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn step_id_byte_offset() {
+        assert_eq!(StepId(0).byte_offset(), 0);
+        assert_eq!(StepId(1).byte_offset(), 8);
+        assert_eq!(StepId(10).byte_offset(), 80);
+    }
+
+    #[test]
+    fn bc_type_id_builtins() {
+        assert!(QTypeId::VOID.is_builtin());
+        assert!(QTypeId::NODE.is_builtin());
+        assert!(QTypeId::STRING.is_builtin());
+        assert!(!QTypeId(3).is_builtin());
+
+        assert_eq!(QTypeId::VOID.custom_index(), None);
+        assert_eq!(QTypeId(3).custom_index(), Some(0));
+        assert_eq!(QTypeId(5).custom_index(), Some(2));
+        assert_eq!(QTypeId::from_custom_index(0), QTypeId(3));
+    }
+}
diff --git a/crates/plotnik-lib/src/bytecode/mod.rs b/crates/plotnik-lib/src/bytecode/mod.rs
@@ -0,0 +1,16 @@
+//! Bytecode module for compiled Plotnik queries.
+//!
+//! Implements the binary format specified in `docs/binary-format/`.
+
+mod constants;
+mod header;
+mod ids;
+
+pub use constants::{
+    MAGIC, SECTION_ALIGN, STEP_ACCEPT, STEP_SIZE, TYPE_CUSTOM_START, TYPE_NODE, TYPE_STRING,
+    TYPE_VOID, VERSION,
+};
+
+pub use ids::{QTypeId, StepId, StringId};
+
+pub use header::Header;
diff --git a/crates/plotnik-lib/src/lib.rs b/crates/plotnik-lib/src/lib.rs
@@ -16,6 +16,7 @@
 
 #![cfg_attr(coverage_nightly, feature(coverage_attribute))]
 
+pub mod bytecode;
 pub mod diagnostics;
 pub mod parser;
 pub mod query;

diff --git a/docs/binary-format/01-overview.md b/docs/binary-format/01-overview.md
@@ -31,10 +31,16 @@ Section offsets defined in Header for robust parsing.
 | [NodeTypes]   | NodeTypeId → StringId    | 4           |
 | [NodeFields]  | NodeFieldId → StringId   | 4           |
 | [Trivia]      | List of NodeTypeId       | 2           |
-| [TypeMeta]    | Types                    | Var         |
+| [TypeMeta]    | Types (3 sub-sections)   | 4           |
 | [Entrypoints] | Definitions              | 8           |
 | [Transitions] | Tree walking graph       | 8           |
 
+**TypeMeta sub-sections** (contiguous, offsets computed from counts):
+
+- **TypeDefs**: Structural topology
+- **TypeMembers**: Fields and variants
+- **TypeNames**: Name → TypeId mapping
+
 [StringBlob]: 02-strings.md
 [StringTable]: 02-strings.md
 [NodeTypes]: 03-symbols.md
@@ -62,7 +68,7 @@ struct Header {
     node_types_offset: u32,
     node_fields_offset: u32,
     trivia_offset: u32,
-    type_meta_offset: u32,
+    type_meta_offset: u32,   // Points to TypeMeta header (see 04-types.md)
     entrypoints_offset: u32,
     transitions_offset: u32,
 
@@ -71,9 +77,12 @@ struct Header {
     node_types_count: u16,
     node_fields_count: u16,
     trivia_count: u16,
-    type_defs_count: u16,
-    type_members_count: u16, // Number of TypeMembers
     entrypoints_count: u16,
     transitions_count: u16,
+    _pad: u32,
 }
+// Size: 16 + 32 + 16 = 64 bytes
+//
+// Note: TypeMeta sub-section counts are stored in the TypeMeta header,
+// not in the main header. See 04-types.md for details.
 ```
diff --git a/docs/binary-format/02-strings.md b/docs/binary-format/02-strings.md
@@ -6,8 +6,6 @@ Strings are stored in a centralized pool to eliminate redundancy and alignment p
 
 **StringId (u16)**: Zero-based index into the String Table.
 
-- `0xFFFF` is reserved as a sentinel for "None" or "Anonymous".
-
 ## 1. String Blob
 
 Contains the raw UTF-8 bytes for all strings concatenated together.