From 3868ee5c96c32e0f09fb25b78dcd18bfc815519e Mon Sep 17 00:00:00 2001
From: Sergei Zharinov <zharinov@users.noreply.github.com>
Date: Mon, 29 Dec 2025 15:07:32 -0300
Subject: [PATCH 1/2] feat: add bytecode module loader

Add unified module storage with lazy decoding:
- Module: owns or mmaps bytecode, provides section views
- Instruction/InstructionView: decoded instruction wrappers
- StringsView, SymbolsView, TypesView, etc.
- Update binary format documentation
---
 Cargo.lock                                    |  11 +
 crates/plotnik-lib/Cargo.toml                 |   2 +
 crates/plotnik-lib/src/bytecode/entrypoint.rs |   2 +-
 crates/plotnik-lib/src/bytecode/mod.rs        |  16 +-
 crates/plotnik-lib/src/bytecode/module.rs     | 530 ++++++++++++++++++
 .../plotnik-lib/src/bytecode/module_tests.rs  | 338 +++++++++++
 6 files changed, 894 insertions(+), 5 deletions(-)
 create mode 100644 crates/plotnik-lib/src/bytecode/module.rs
 create mode 100644 crates/plotnik-lib/src/bytecode/module_tests.rs

diff --git a/Cargo.lock b/Cargo.lock
index c58567f0..4c7f1bf3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1474,6 +1474,15 @@ version = "2.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
 
+[[package]]
+name = "memmap2"
+version = "0.9.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.21.3"
@@ -1629,11 +1638,13 @@ dependencies = [
  "indoc",
  "insta",
  "logos",
+ "memmap2",
  "plotnik-core",
  "plotnik-langs",
  "rowan",
  "serde",
  "serde_json",
+ "tempfile",
  "thiserror",
 ]
 
diff --git a/crates/plotnik-lib/Cargo.toml b/crates/plotnik-lib/Cargo.toml
index 75cc0c69..7f698d86 100644
--- a/crates/plotnik-lib/Cargo.toml
+++ b/crates/plotnik-lib/Cargo.toml
@@ -21,6 +21,7 @@ rowan = "0.16.1"
 serde = { version = "1.0.228", features = ["derive"] }
 thiserror = "2.0.17"
 arborium-tree-sitter = "2.3.2"
+memmap2 = "0.9"
 plotnik-core = { version = "0.1", path = "../plotnik-core" }
 plotnik-langs = { version = "0.1", path = "../plotnik-langs", optional = true }
 
@@ -31,3 +32,4 @@ default = ["plotnik-langs"]
 insta = { version = "=1.45.1", features = ["yaml"] }
 indoc = "=2.0.7"
 serde_json = "=1.0.148"
+tempfile = "3"
diff --git a/crates/plotnik-lib/src/bytecode/entrypoint.rs b/crates/plotnik-lib/src/bytecode/entrypoint.rs
index 3550cf21..ccc848ff 100644
--- a/crates/plotnik-lib/src/bytecode/entrypoint.rs
+++ b/crates/plotnik-lib/src/bytecode/entrypoint.rs
@@ -3,7 +3,7 @@
 use super::{QTypeId, StepId, StringId};
 
 /// Named query definition entry point (8 bytes).
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
 #[repr(C)]
 pub struct Entrypoint {
     /// Definition name.
diff --git a/crates/plotnik-lib/src/bytecode/mod.rs b/crates/plotnik-lib/src/bytecode/mod.rs
index 1b39e286..6f947aec 100644
--- a/crates/plotnik-lib/src/bytecode/mod.rs
+++ b/crates/plotnik-lib/src/bytecode/mod.rs
@@ -8,6 +8,7 @@ mod entrypoint;
 mod header;
 mod ids;
 mod instructions;
+mod module;
 mod nav;
 mod sections;
 mod type_meta;
@@ -21,19 +22,26 @@ pub use ids::{QTypeId, StepId, StringId};
 
 pub use header::Header;
 
-pub use nav::Nav;
-
 pub use sections::{FieldSymbol, NodeSymbol, Slice, TriviaEntry};
 
-pub use effects::{EffectOp, EffectOpcode};
-
 pub use entrypoint::Entrypoint;
 
 pub use type_meta::{TypeDef, TypeKind, TypeMember, TypeMetaHeader, TypeName};
 
+pub use nav::Nav;
+
+pub use effects::{EffectOp, EffectOpcode};
+
 pub use instructions::{
     Call, Match, MatchView, Opcode, Return, align_to_section, select_match_opcode,
 };
 
+pub use module::{
+    ByteStorage, EntrypointsView, Instruction, InstructionView, Module, ModuleError, StringsView,
+    SymbolsView, TriviaView, TypesView,
+};
+
 #[cfg(test)]
 mod instructions_tests;
+#[cfg(test)]
+mod module_tests;
diff --git a/crates/plotnik-lib/src/bytecode/module.rs b/crates/plotnik-lib/src/bytecode/module.rs
new file mode 100644
index 00000000..5c75f23f
--- /dev/null
+++ b/crates/plotnik-lib/src/bytecode/module.rs
@@ -0,0 +1,530 @@
+//! Bytecode module with unified storage.
+//!
+//! The [`Module`] struct holds compiled bytecode in either owned or memory-mapped
+//! form, decoding instructions lazily when the VM steps into them.
+
+use std::fs::File;
+use std::io;
+use std::ops::Deref;
+use std::path::Path;
+
+use memmap2::Mmap;
+
+use super::header::Header;
+use super::ids::{QTypeId, StepId, StringId};
+use super::instructions::{Call, Match, MatchView, Opcode, Return};
+use super::sections::{FieldSymbol, NodeSymbol, TriviaEntry};
+use super::type_meta::{TypeDef, TypeMember, TypeMetaHeader, TypeName};
+use super::{Entrypoint, SECTION_ALIGN, VERSION};
+
+/// Read a little-endian u16 from bytes at the given offset.
+#[inline]
+fn read_u16_le(bytes: &[u8], offset: usize) -> u16 {
+    u16::from_le_bytes([bytes[offset], bytes[offset + 1]])
+}
+
+/// Read a little-endian u32 from bytes at the given offset.
+#[inline]
+fn read_u32_le(bytes: &[u8], offset: usize) -> u32 {
+    u32::from_le_bytes([
+        bytes[offset],
+        bytes[offset + 1],
+        bytes[offset + 2],
+        bytes[offset + 3],
+    ])
+}
+
+/// Storage for bytecode bytes—either owned or memory-mapped.
+#[derive(Debug)]
+pub enum ByteStorage {
+    /// Owned byte vector (from compilation or read into memory).
+    Owned(Vec<u8>),
+    /// Memory-mapped file.
+    Mapped(Mmap),
+}
+
+impl Deref for ByteStorage {
+    type Target = [u8];
+
+    fn deref(&self) -> &Self::Target {
+        match self {
+            ByteStorage::Owned(v) => v,
+            ByteStorage::Mapped(m) => m,
+        }
+    }
+}
+
+impl ByteStorage {
+    /// Create from owned bytes.
+    pub fn from_vec(bytes: Vec<u8>) -> Self {
+        Self::Owned(bytes)
+    }
+
+    /// Memory-map a file.
+    ///
+    /// # Safety
+    /// The file must not be modified while the mapping is active.
+    pub fn from_file(file: &File) -> io::Result<Self> {
+        // SAFETY: Caller ensures the file is not modified while mapped.
+        let mmap = unsafe { Mmap::map(file)? };
+        Ok(Self::Mapped(mmap))
+    }
+}
+
+/// Decoded instruction from bytecode.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum Instruction {
+    Match(Match),
+    Call(Call),
+    Return(Return),
+}
+
+impl Instruction {
+    /// Decode an instruction from bytecode bytes.
+    ///
+    /// The slice must start at the instruction and contain at least 8 bytes.
+    pub fn from_bytes(bytes: &[u8]) -> Self {
+        assert!(bytes.len() >= 8, "instruction too short");
+
+        let opcode = Opcode::from_u8(bytes[0] & 0xF);
+        match opcode {
+            Opcode::Call => {
+                let arr: [u8; 8] = bytes[..8].try_into().unwrap();
+                Self::Call(Call::from_bytes(arr))
+            }
+            Opcode::Return => {
+                let arr: [u8; 8] = bytes[..8].try_into().unwrap();
+                Self::Return(Return::from_bytes(arr))
+            }
+            _ => Self::Match(Match::from_bytes(bytes)),
+        }
+    }
+}
+
+/// Zero-copy instruction view for efficient VM execution.
+///
+/// Unlike `Instruction`, this doesn't allocate for Match instructions.
+#[derive(Clone, Copy, Debug)]
+pub enum InstructionView<'a> {
+    Match(MatchView<'a>),
+    Call(Call),
+    Return(Return),
+}
+
+impl<'a> InstructionView<'a> {
+    /// Decode an instruction view from bytecode bytes without allocating.
+    ///
+    /// The slice must start at the instruction and contain at least 8 bytes.
+    #[inline]
+    pub fn from_bytes(bytes: &'a [u8]) -> Self {
+        debug_assert!(bytes.len() >= 8, "instruction too short");
+
+        let opcode = Opcode::from_u8(bytes[0] & 0xF);
+        match opcode {
+            Opcode::Call => {
+                let arr: [u8; 8] = bytes[..8].try_into().unwrap();
+                Self::Call(Call::from_bytes(arr))
+            }
+            Opcode::Return => {
+                let arr: [u8; 8] = bytes[..8].try_into().unwrap();
+                Self::Return(Return::from_bytes(arr))
+            }
+            _ => Self::Match(MatchView::from_bytes(bytes)),
+        }
+    }
+}
+
+/// Module load error.
+#[derive(Debug, thiserror::Error)]
+pub enum ModuleError {
+    #[error("invalid magic: expected PTKQ")]
+    InvalidMagic,
+    #[error("unsupported version: {0} (expected {VERSION})")]
+    UnsupportedVersion(u32),
+    #[error("file too small: {0} bytes (minimum 64)")]
+    FileTooSmall(usize),
+    #[error("size mismatch: header says {header} bytes, got {actual}")]
+    SizeMismatch { header: u32, actual: usize },
+    #[error("io error: {0}")]
+    Io(#[from] io::Error),
+}
+
+/// A compiled bytecode module.
+///
+/// Instructions are decoded lazily via [`decode_step`](Self::decode_step).
+/// Cold data (strings, symbols, types) is accessed through view methods.
+#[derive(Debug)]
+pub struct Module {
+    storage: ByteStorage,
+    header: Header,
+}
+
+impl Module {
+    /// Load a module from owned bytes.
+    pub fn from_bytes(bytes: Vec<u8>) -> Result<Self, ModuleError> {
+        Self::from_storage(ByteStorage::Owned(bytes))
+    }
+
+    /// Load a module from a file path (memory-mapped).
+    pub fn from_path(path: impl AsRef<Path>) -> Result<Self, ModuleError> {
+        let file = File::open(path)?;
+        let storage = ByteStorage::from_file(&file)?;
+        Self::from_storage(storage)
+    }
+
+    /// Load a module from storage.
+    fn from_storage(storage: ByteStorage) -> Result<Self, ModuleError> {
+        if storage.len() < 64 {
+            return Err(ModuleError::FileTooSmall(storage.len()));
+        }
+
+        let header = Header::from_bytes(&storage[..64]);
+
+        if !header.validate_magic() {
+            return Err(ModuleError::InvalidMagic);
+        }
+        if !header.validate_version() {
+            return Err(ModuleError::UnsupportedVersion(header.version));
+        }
+        if header.total_size as usize != storage.len() {
+            return Err(ModuleError::SizeMismatch {
+                header: header.total_size,
+                actual: storage.len(),
+            });
+        }
+
+        Ok(Self { storage, header })
+    }
+
+    /// Get the parsed header.
+    pub fn header(&self) -> &Header {
+        &self.header
+    }
+
+    /// Get the raw bytes.
+    pub fn bytes(&self) -> &[u8] {
+        &self.storage
+    }
+
+    /// Decode an instruction at the given step ID.
+    ///
+    /// This allocates for Match instructions. For zero-allocation decoding,
+    /// use [`decode_step_view`](Self::decode_step_view) instead.
+    pub fn decode_step(&self, step_id: StepId) -> Instruction {
+        let offset = self.header.transitions_offset as usize + step_id.byte_offset();
+        Instruction::from_bytes(&self.storage[offset..])
+    }
+
+    /// Decode an instruction view at the given step ID without allocating.
+    ///
+    /// This is the VM's main access point for fetching instructions efficiently.
+    #[inline]
+    pub fn decode_step_view(&self, step_id: StepId) -> InstructionView<'_> {
+        let offset = self.header.transitions_offset as usize + step_id.byte_offset();
+        InstructionView::from_bytes(&self.storage[offset..])
+    }
+
+    /// Get a view into the string table.
+    pub fn strings(&self) -> StringsView<'_> {
+        StringsView {
+            blob: &self.storage[self.header.str_blob_offset as usize..],
+            table: self.string_table_slice(),
+        }
+    }
+
+    /// Get a view into the node type symbols.
+    pub fn node_types(&self) -> SymbolsView<'_, NodeSymbol> {
+        let offset = self.header.node_types_offset as usize;
+        let count = self.header.node_types_count as usize;
+        SymbolsView {
+            bytes: &self.storage[offset..offset + count * 4],
+            count,
+            _marker: std::marker::PhantomData,
+        }
+    }
+
+    /// Get a view into the node field symbols.
+    pub fn node_fields(&self) -> SymbolsView<'_, FieldSymbol> {
+        let offset = self.header.node_fields_offset as usize;
+        let count = self.header.node_fields_count as usize;
+        SymbolsView {
+            bytes: &self.storage[offset..offset + count * 4],
+            count,
+            _marker: std::marker::PhantomData,
+        }
+    }
+
+    /// Get a view into the trivia entries.
+    pub fn trivia(&self) -> TriviaView<'_> {
+        let offset = self.header.trivia_offset as usize;
+        let count = self.header.trivia_count as usize;
+        TriviaView {
+            bytes: &self.storage[offset..offset + count * 2],
+            count,
+        }
+    }
+
+    /// Get a view into the type metadata.
+    pub fn types(&self) -> TypesView<'_> {
+        let meta_offset = self.header.type_meta_offset as usize;
+        let meta_header = TypeMetaHeader::from_bytes(&self.storage[meta_offset..]);
+
+        // Sub-section offsets (each aligned to 64-byte boundary)
+        let defs_offset = align64(meta_offset + 8);
+        let defs_count = meta_header.type_defs_count as usize;
+        let members_offset = align64(defs_offset + defs_count * 4);
+        let members_count = meta_header.type_members_count as usize;
+        let names_offset = align64(members_offset + members_count * 4);
+        let names_count = meta_header.type_names_count as usize;
+
+        TypesView {
+            defs_bytes: &self.storage[defs_offset..defs_offset + defs_count * 4],
+            members_bytes: &self.storage[members_offset..members_offset + members_count * 4],
+            names_bytes: &self.storage[names_offset..names_offset + names_count * 4],
+            defs_count,
+            members_count,
+            names_count,
+        }
+    }
+
+    /// Get a view into the entrypoints.
+    pub fn entrypoints(&self) -> EntrypointsView<'_> {
+        let offset = self.header.entrypoints_offset as usize;
+        let count = self.header.entrypoints_count as usize;
+        EntrypointsView {
+            bytes: &self.storage[offset..offset + count * 8],
+            count,
+        }
+    }
+
+    // Helper to get string table as bytes
+    // The table has count+1 entries (includes sentinel for length calculation)
+    fn string_table_slice(&self) -> &[u8] {
+        let offset = self.header.str_table_offset as usize;
+        let count = self.header.str_table_count as usize;
+        &self.storage[offset..offset + (count + 1) * 4]
+    }
+}
+
+/// Align offset to 64-byte boundary.
+fn align64(offset: usize) -> usize {
+    let rem = offset % SECTION_ALIGN;
+    if rem == 0 {
+        offset
+    } else {
+        offset + SECTION_ALIGN - rem
+    }
+}
+
+/// View into the string table for lazy string lookup.
+pub struct StringsView<'a> {
+    blob: &'a [u8],
+    table: &'a [u8],
+}
+
+impl<'a> StringsView<'a> {
+    /// Get a string by its ID.
+    ///
+    /// The string table contains sequential u32 offsets. To get string i:
+    /// `start = table[i]`, `end = table[i+1]`, `length = end - start`.
+    pub fn get(&self, id: StringId) -> &'a str {
+        let idx = id.0 as usize;
+        let start = read_u32_le(self.table, idx * 4) as usize;
+        let end = read_u32_le(self.table, (idx + 1) * 4) as usize;
+        std::str::from_utf8(&self.blob[start..end]).expect("invalid UTF-8 in string table")
+    }
+}
+
+/// View into symbol tables (node types or field names).
+pub struct SymbolsView<'a, T> {
+    bytes: &'a [u8],
+    count: usize,
+    _marker: std::marker::PhantomData<T>,
+}
+
+impl<'a> SymbolsView<'a, NodeSymbol> {
+    /// Get a node symbol by index.
+    pub fn get(&self, idx: usize) -> NodeSymbol {
+        assert!(idx < self.count, "node symbol index out of bounds");
+        let offset = idx * 4;
+        NodeSymbol {
+            id: read_u16_le(self.bytes, offset),
+            name: StringId(read_u16_le(self.bytes, offset + 2)),
+        }
+    }
+
+    /// Number of entries.
+    pub fn len(&self) -> usize {
+        self.count
+    }
+
+    /// Check if empty.
+    pub fn is_empty(&self) -> bool {
+        self.count == 0
+    }
+}
+
+impl<'a> SymbolsView<'a, FieldSymbol> {
+    /// Get a field symbol by index.
+    pub fn get(&self, idx: usize) -> FieldSymbol {
+        assert!(idx < self.count, "field symbol index out of bounds");
+        let offset = idx * 4;
+        FieldSymbol {
+            id: read_u16_le(self.bytes, offset),
+            name: StringId(read_u16_le(self.bytes, offset + 2)),
+        }
+    }
+
+    /// Number of entries.
+    pub fn len(&self) -> usize {
+        self.count
+    }
+
+    /// Check if empty.
+    pub fn is_empty(&self) -> bool {
+        self.count == 0
+    }
+}
+
+/// View into trivia entries.
+pub struct TriviaView<'a> {
+    bytes: &'a [u8],
+    count: usize,
+}
+
+impl<'a> TriviaView<'a> {
+    /// Get a trivia entry by index.
+    pub fn get(&self, idx: usize) -> TriviaEntry {
+        assert!(idx < self.count, "trivia index out of bounds");
+        TriviaEntry {
+            node_type: read_u16_le(self.bytes, idx * 2),
+        }
+    }
+
+    /// Number of entries.
+    pub fn len(&self) -> usize {
+        self.count
+    }
+
+    /// Check if empty.
+    pub fn is_empty(&self) -> bool {
+        self.count == 0
+    }
+
+    /// Check if a node type is trivia.
+    pub fn contains(&self, node_type: u16) -> bool {
+        (0..self.count).any(|i| self.get(i).node_type == node_type)
+    }
+}
+
+/// View into type metadata.
+///
+/// The TypeMeta section contains three sub-sections:
+/// - TypeDefs: structural topology (4 bytes each)
+/// - TypeMembers: fields and variants (4 bytes each)
+/// - TypeNames: name → TypeId mapping (4 bytes each)
+pub struct TypesView<'a> {
+    defs_bytes: &'a [u8],
+    members_bytes: &'a [u8],
+    names_bytes: &'a [u8],
+    defs_count: usize,
+    members_count: usize,
+    names_count: usize,
+}
+
+impl<'a> TypesView<'a> {
+    /// Get a type definition by index.
+    pub fn get_def(&self, idx: usize) -> TypeDef {
+        assert!(idx < self.defs_count, "type def index out of bounds");
+        let offset = idx * 4;
+        TypeDef {
+            data: read_u16_le(self.defs_bytes, offset),
+            count: self.defs_bytes[offset + 2],
+            kind: self.defs_bytes[offset + 3],
+        }
+    }
+
+    /// Get a type definition by QTypeId.
+    pub fn get(&self, id: QTypeId) -> Option<TypeDef> {
+        id.custom_index().map(|idx| self.get_def(idx))
+    }
+
+    /// Get a type member by index.
+    pub fn get_member(&self, idx: usize) -> TypeMember {
+        assert!(idx < self.members_count, "type member index out of bounds");
+        let offset = idx * 4;
+        TypeMember {
+            name: StringId(read_u16_le(self.members_bytes, offset)),
+            type_id: QTypeId(read_u16_le(self.members_bytes, offset + 2)),
+        }
+    }
+
+    /// Get a type name entry by index.
+    pub fn get_name(&self, idx: usize) -> TypeName {
+        assert!(idx < self.names_count, "type name index out of bounds");
+        let offset = idx * 4;
+        TypeName {
+            name: StringId(read_u16_le(self.names_bytes, offset)),
+            type_id: QTypeId(read_u16_le(self.names_bytes, offset + 2)),
+        }
+    }
+
+    /// Number of type definitions.
+    pub fn defs_count(&self) -> usize {
+        self.defs_count
+    }
+
+    /// Number of type members.
+    pub fn members_count(&self) -> usize {
+        self.members_count
+    }
+
+    /// Number of type names.
+    pub fn names_count(&self) -> usize {
+        self.names_count
+    }
+
+    /// Iterate over members of a struct or enum type.
+    pub fn members_of(&self, def: &TypeDef) -> impl Iterator<Item = TypeMember> + '_ {
+        let start = def.data as usize;
+        let count = def.count as usize;
+        (0..count).map(move |i| self.get_member(start + i))
+    }
+}
+
+/// View into entrypoints.
+pub struct EntrypointsView<'a> {
+    bytes: &'a [u8],
+    count: usize,
+}
+
+impl<'a> EntrypointsView<'a> {
+    /// Get an entrypoint by index.
+    pub fn get(&self, idx: usize) -> Entrypoint {
+        assert!(idx < self.count, "entrypoint index out of bounds");
+        let offset = idx * 8;
+        Entrypoint {
+            name: StringId(read_u16_le(self.bytes, offset)),
+            target: StepId(read_u16_le(self.bytes, offset + 2)),
+            result_type: QTypeId(read_u16_le(self.bytes, offset + 4)),
+            ..Default::default()
+        }
+    }
+
+    /// Number of entrypoints.
+    pub fn len(&self) -> usize {
+        self.count
+    }
+
+    /// Check if empty.
+    pub fn is_empty(&self) -> bool {
+        self.count == 0
+    }
+
+    /// Find an entrypoint by name (requires StringsView for comparison).
+    pub fn find_by_name(&self, name: &str, strings: &StringsView<'_>) -> Option<Entrypoint> {
+        (0..self.count)
+            .map(|i| self.get(i))
+            .find(|e| strings.get(e.name) == name)
+    }
+}
diff --git a/crates/plotnik-lib/src/bytecode/module_tests.rs b/crates/plotnik-lib/src/bytecode/module_tests.rs
new file mode 100644
index 00000000..c4b8be1f
--- /dev/null
+++ b/crates/plotnik-lib/src/bytecode/module_tests.rs
@@ -0,0 +1,338 @@
+//! Tests for the bytecode module.
+
+use super::*;
+use crate::bytecode::nav::Nav;
+use crate::bytecode::{Header, MAGIC, Match, TypeMetaHeader, VERSION};
+
+/// Build a minimal valid bytecode for testing.
+fn build_test_bytecode() -> Vec<u8> {
+    // Layout (all sections 64-byte aligned):
+    // [0..64)     Header
+    // [64..128)   StringBlob + padding
+    // [128..192)  StringTable + padding (needs 2 u32 entries: offset + sentinel)
+    // [192..256)  NodeTypes + padding
+    // [256..320)  NodeFields + padding
+    // [320..384)  Trivia + padding
+    // [384..448)  TypeMeta: TypeMetaHeader (8 bytes) + padding
+    // [448..512)  TypeDefs sub-section (aligned)
+    // [512..576)  TypeMembers sub-section (aligned, empty)
+    // [576..640)  TypeNames sub-section (aligned, empty)
+    // [640..704)  Entrypoints + padding
+    // [704..768)  Transitions + padding
+
+    let mut bytes = vec![0u8; 768];
+
+    // String blob: "Test" at offset 0
+    let str_blob_offset = 64;
+    bytes[64] = b'T';
+    bytes[65] = b'e';
+    bytes[66] = b's';
+    bytes[67] = b't';
+
+    // String table: sequential u32 offsets with sentinel
+    // Entry 0: offset 0 (start of "Test")
+    // Entry 1: offset 4 (sentinel = end of blob)
+    let str_table_offset = 128;
+    bytes[128..132].copy_from_slice(&0u32.to_le_bytes()); // offset of string 0
+    bytes[132..136].copy_from_slice(&4u32.to_le_bytes()); // sentinel (end of blob)
+
+    // Node types: one entry (id=42, name=StringId(0))
+    let node_types_offset = 192;
+    bytes[192..194].copy_from_slice(&42u16.to_le_bytes());
+    bytes[194..196].copy_from_slice(&0u16.to_le_bytes());
+
+    // Node fields: one entry (id=7, name=StringId(0))
+    let node_fields_offset = 256;
+    bytes[256..258].copy_from_slice(&7u16.to_le_bytes());
+    bytes[258..260].copy_from_slice(&0u16.to_le_bytes());
+
+    // Trivia: one entry (node_type=100)
+    let trivia_offset = 320;
+    bytes[320..322].copy_from_slice(&100u16.to_le_bytes());
+
+    // TypeMeta section
+    let type_meta_offset = 384;
+
+    // TypeMetaHeader (8 bytes): type_defs_count=1, type_members_count=0, type_names_count=0
+    let type_meta_header = TypeMetaHeader {
+        type_defs_count: 1,
+        type_members_count: 0,
+        type_names_count: 0,
+        _pad: 0,
+    };
+    bytes[384..392].copy_from_slice(&type_meta_header.to_bytes());
+
+    // TypeDefs sub-section at aligned offset (448)
+    // One TypeDef (4 bytes): data=0, count=0, kind=3 (Struct)
+    bytes[448..450].copy_from_slice(&0u16.to_le_bytes()); // data (member index)
+    bytes[450] = 0; // count
+    bytes[451] = 3; // kind=Struct
+
+    // TypeMembers sub-section at 512 (empty)
+    // TypeNames sub-section at 576 (empty)
+
+    // Entrypoints: one entry (name=StringId(0), target=StepId(0), result_type=QTypeId(0))
+    let entrypoints_offset = 640;
+    bytes[640..642].copy_from_slice(&0u16.to_le_bytes()); // name
+    bytes[642..644].copy_from_slice(&0u16.to_le_bytes()); // target
+    bytes[644..646].copy_from_slice(&0u16.to_le_bytes()); // result_type
+    bytes[646..648].copy_from_slice(&0u16.to_le_bytes()); // padding
+
+    // Transitions: one Match8 instruction (accept state)
+    let transitions_offset = 704;
+    // type_id=0x00 (Match8, segment 0)
+    bytes[704] = 0x00;
+    // nav=Stay
+    bytes[705] = Nav::Stay.to_byte();
+    // node_type=None (0)
+    bytes[706..708].copy_from_slice(&0u16.to_le_bytes());
+    // node_field=None (0)
+    bytes[708..710].copy_from_slice(&0u16.to_le_bytes());
+    // next=0 (accept)
+    bytes[710..712].copy_from_slice(&0u16.to_le_bytes());
+
+    // Build header
+    let header = Header {
+        magic: MAGIC,
+        version: VERSION,
+        checksum: 0,
+        total_size: 768,
+        str_blob_offset: str_blob_offset as u32,
+        str_table_offset: str_table_offset as u32,
+        node_types_offset: node_types_offset as u32,
+        node_fields_offset: node_fields_offset as u32,
+        trivia_offset: trivia_offset as u32,
+        type_meta_offset: type_meta_offset as u32,
+        entrypoints_offset: entrypoints_offset as u32,
+        transitions_offset: transitions_offset as u32,
+        str_table_count: 1,
+        node_types_count: 1,
+        node_fields_count: 1,
+        trivia_count: 1,
+        entrypoints_count: 1,
+        transitions_count: 1,
+        ..Default::default()
+    };
+
+    bytes[0..64].copy_from_slice(&header.to_bytes());
+    bytes
+}
+
+#[test]
+fn module_from_bytes_valid() {
+    let bytes = build_test_bytecode();
+    let module = Module::from_bytes(bytes).unwrap();
+
+    assert!(module.header().validate_magic());
+    assert!(module.header().validate_version());
+    assert_eq!(module.header().total_size, 768);
+}
+
+#[test]
+fn module_from_bytes_too_small() {
+    let bytes = vec![0u8; 32];
+    let err = Module::from_bytes(bytes).unwrap_err();
+    assert!(matches!(err, ModuleError::FileTooSmall(32)));
+}
+
+#[test]
+fn module_from_bytes_invalid_magic() {
+    let mut bytes = build_test_bytecode();
+    bytes[0] = b'X'; // Corrupt magic
+    let err = Module::from_bytes(bytes).unwrap_err();
+    assert!(matches!(err, ModuleError::InvalidMagic));
+}
+
+#[test]
+fn module_from_bytes_wrong_version() {
+    let mut bytes = build_test_bytecode();
+    bytes[4..8].copy_from_slice(&999u32.to_le_bytes()); // Wrong version
+    let err = Module::from_bytes(bytes).unwrap_err();
+    assert!(matches!(err, ModuleError::UnsupportedVersion(999)));
+}
+
+#[test]
+fn module_from_bytes_size_mismatch() {
+    let mut bytes = build_test_bytecode();
+    bytes[12..16].copy_from_slice(&1000u32.to_le_bytes()); // Wrong total_size
+    let err = Module::from_bytes(bytes).unwrap_err();
+    assert!(matches!(
+        err,
+        ModuleError::SizeMismatch {
+            header: 1000,
+            actual: 768
+        }
+    ));
+}
+
+#[test]
+fn module_decode_step() {
+    let bytes = build_test_bytecode();
+    let module = Module::from_bytes(bytes).unwrap();
+
+    let instr = module.decode_step(StepId(0));
+    match instr {
+        Instruction::Match(m) => {
+            assert_eq!(m.nav, Nav::Stay);
+            assert!(m.is_epsilon());
+            assert!(m.is_terminal());
+        }
+        _ => panic!("expected Match instruction"),
+    }
+}
+
+#[test]
+fn module_strings_view() {
+    let bytes = build_test_bytecode();
+    let module = Module::from_bytes(bytes).unwrap();
+
+    let strings = module.strings();
+    assert_eq!(strings.get(StringId(0)), "Test");
+}
+
+#[test]
+fn module_node_types_view() {
+    let bytes = build_test_bytecode();
+    let module = Module::from_bytes(bytes).unwrap();
+
+    let node_types = module.node_types();
+    assert_eq!(node_types.len(), 1);
+    assert!(!node_types.is_empty());
+
+    let sym = node_types.get(0);
+    assert_eq!(sym.id, 42);
+    assert_eq!(sym.name, StringId(0));
+}
+
+#[test]
+fn module_node_fields_view() {
+    let bytes = build_test_bytecode();
+    let module = Module::from_bytes(bytes).unwrap();
+
+    let fields = module.node_fields();
+    assert_eq!(fields.len(), 1);
+
+    let sym = fields.get(0);
+    assert_eq!(sym.id, 7);
+    assert_eq!(sym.name, StringId(0));
+}
+
+#[test]
+fn module_trivia_view() {
+    let bytes = build_test_bytecode();
+    let module = Module::from_bytes(bytes).unwrap();
+
+    let trivia = module.trivia();
+    assert_eq!(trivia.len(), 1);
+    assert!(trivia.contains(100));
+    assert!(!trivia.contains(42));
+}
+
+#[test]
+fn module_types_view() {
+    let bytes = build_test_bytecode();
+    let module = Module::from_bytes(bytes).unwrap();
+
+    let types = module.types();
+    assert_eq!(types.defs_count(), 1);
+    assert_eq!(types.members_count(), 0);
+    assert_eq!(types.names_count(), 0);
+
+    let def = types.get_def(0);
+    assert_eq!(def.kind, 3); // Struct
+    assert_eq!(def.data, 0); // member index
+    assert_eq!(def.count, 0); // member count
+}
+
+#[test]
+fn module_entrypoints_view() {
+    let bytes = build_test_bytecode();
+    let module = Module::from_bytes(bytes).unwrap();
+
+    let entrypoints = module.entrypoints();
+    assert_eq!(entrypoints.len(), 1);
+    assert!(!entrypoints.is_empty());
+
+    let ep = entrypoints.get(0);
+    assert_eq!(ep.name, StringId(0));
+    assert_eq!(ep.target, StepId(0));
+
+    let strings = module.strings();
+    let found = entrypoints.find_by_name("Test", &strings);
+    assert!(found.is_some());
+    assert_eq!(found.unwrap().target, StepId(0));
+}
+
+#[test]
+fn instruction_from_bytes_dispatch() {
+    // Test Match8
+    let match8 = Match {
+        segment: 0,
+        nav: Nav::Down,
+        node_type: std::num::NonZeroU16::new(42),
+        node_field: None,
+        pre_effects: vec![],
+        neg_fields: vec![],
+        post_effects: vec![],
+        successors: vec![StepId(10)],
+    };
+    let bytes = match8.to_bytes().unwrap();
+    let instr = Instruction::from_bytes(&bytes);
+    assert!(matches!(instr, Instruction::Match(_)));
+
+    // Test Call
+    let call = Call {
+        segment: 0,
+        next: StepId(5),
+        target: StepId(100),
+        ref_id: 1,
+    };
+    let bytes = call.to_bytes();
+    let instr = Instruction::from_bytes(&bytes);
+    assert!(matches!(instr, Instruction::Call(_)));
+
+    // Test Return
+    let ret = Return {
+        segment: 0,
+        ref_id: 1,
+    };
+    let bytes = ret.to_bytes();
+    let instr = Instruction::from_bytes(&bytes);
+    assert!(matches!(instr, Instruction::Return(_)));
+}
+
+#[test]
+fn byte_storage_deref() {
+    let data = vec![1, 2, 3, 4, 5];
+    let storage = ByteStorage::from_vec(data.clone());
+
+    assert_eq!(&*storage, &data[..]);
+    assert_eq!(storage.len(), 5);
+    assert_eq!(storage[2], 3);
+}
+
+#[test]
+fn module_from_path_mmap() {
+    use std::io::Write;
+
+    let bytes = build_test_bytecode();
+
+    // Write to temp file
+    let mut tmpfile = tempfile::NamedTempFile::new().unwrap();
+    tmpfile.write_all(&bytes).unwrap();
+    tmpfile.flush().unwrap();
+
+    // Load via mmap
+    let module = Module::from_path(tmpfile.path()).unwrap();
+
+    assert!(module.header().validate_magic());
+    assert_eq!(module.header().total_size, 768);
+
+    // Verify we can decode instructions
+    let instr = module.decode_step(StepId(0));
+    assert!(matches!(instr, Instruction::Match(_)));
+
+    // Verify string lookup works through mmap
+    let strings = module.strings();
+    assert_eq!(strings.get(StringId(0)), "Test");
+}

From 4973f1280ec7b0826886b8cf46cfff7ddcb789c8 Mon Sep 17 00:00:00 2001
From: Sergei Zharinov <zharinov@users.noreply.github.com>
Date: Mon, 29 Dec 2025 15:07:32 -0300
Subject: [PATCH 2/2] fixup! feat: add bytecode module loader

---
 Cargo.lock                                | 10 ------
 crates/plotnik-lib/Cargo.toml             |  1 -
 crates/plotnik-lib/src/bytecode/module.rs | 42 +++++++----------------
 3 files changed, 13 insertions(+), 40 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 4c7f1bf3..325559bf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1474,15 +1474,6 @@ version = "2.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
 
-[[package]]
-name = "memmap2"
-version = "0.9.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490"
-dependencies = [
- "libc",
-]
-
 [[package]]
 name = "once_cell"
 version = "1.21.3"
@@ -1638,7 +1629,6 @@ dependencies = [
  "indoc",
  "insta",
  "logos",
- "memmap2",
  "plotnik-core",
  "plotnik-langs",
  "rowan",
diff --git a/crates/plotnik-lib/Cargo.toml b/crates/plotnik-lib/Cargo.toml
index 7f698d86..f905a3bf 100644
--- a/crates/plotnik-lib/Cargo.toml
+++ b/crates/plotnik-lib/Cargo.toml
@@ -21,7 +21,6 @@ rowan = "0.16.1"
 serde = { version = "1.0.228", features = ["derive"] }
 thiserror = "2.0.17"
 arborium-tree-sitter = "2.3.2"
-memmap2 = "0.9"
 plotnik-core = { version = "0.1", path = "../plotnik-core" }
 plotnik-langs = { version = "0.1", path = "../plotnik-langs", optional = true }
 
diff --git a/crates/plotnik-lib/src/bytecode/module.rs b/crates/plotnik-lib/src/bytecode/module.rs
index 5c75f23f..82cefde1 100644
--- a/crates/plotnik-lib/src/bytecode/module.rs
+++ b/crates/plotnik-lib/src/bytecode/module.rs
@@ -1,15 +1,12 @@
 //! Bytecode module with unified storage.
 //!
-//! The [`Module`] struct holds compiled bytecode in either owned or memory-mapped
-//! form, decoding instructions lazily when the VM steps into them.
+//! The [`Module`] struct holds compiled bytecode, decoding instructions lazily
+//! when the VM steps into them.
 
-use std::fs::File;
 use std::io;
 use std::ops::Deref;
 use std::path::Path;
 
-use memmap2::Mmap;
-
 use super::header::Header;
 use super::ids::{QTypeId, StepId, StringId};
 use super::instructions::{Call, Match, MatchView, Opcode, Return};
@@ -34,40 +31,28 @@ fn read_u32_le(bytes: &[u8], offset: usize) -> u32 {
     ])
 }
 
-/// Storage for bytecode bytes—either owned or memory-mapped.
+/// Storage for bytecode bytes.
 #[derive(Debug)]
-pub enum ByteStorage {
-    /// Owned byte vector (from compilation or read into memory).
-    Owned(Vec<u8>),
-    /// Memory-mapped file.
-    Mapped(Mmap),
-}
+pub struct ByteStorage(Vec<u8>);
 
 impl Deref for ByteStorage {
     type Target = [u8];
 
     fn deref(&self) -> &Self::Target {
-        match self {
-            ByteStorage::Owned(v) => v,
-            ByteStorage::Mapped(m) => m,
-        }
+        &self.0
     }
 }
 
 impl ByteStorage {
     /// Create from owned bytes.
     pub fn from_vec(bytes: Vec<u8>) -> Self {
-        Self::Owned(bytes)
+        Self(bytes)
     }
 
-    /// Memory-map a file.
-    ///
-    /// # Safety
-    /// The file must not be modified while the mapping is active.
-    pub fn from_file(file: &File) -> io::Result<Self> {
-        // SAFETY: Caller ensures the file is not modified while mapped.
-        let mmap = unsafe { Mmap::map(file)? };
-        Ok(Self::Mapped(mmap))
+    /// Read a file into memory.
+    pub fn from_file(path: impl AsRef<Path>) -> io::Result<Self> {
+        let bytes = std::fs::read(path)?;
+        Ok(Self(bytes))
     }
 }
 
@@ -162,13 +147,12 @@ pub struct Module {
 impl Module {
     /// Load a module from owned bytes.
     pub fn from_bytes(bytes: Vec<u8>) -> Result<Self, ModuleError> {
-        Self::from_storage(ByteStorage::Owned(bytes))
+        Self::from_storage(ByteStorage::from_vec(bytes))
     }
 
-    /// Load a module from a file path (memory-mapped).
+    /// Load a module from a file path.
     pub fn from_path(path: impl AsRef<Path>) -> Result<Self, ModuleError> {
-        let file = File::open(path)?;
-        let storage = ByteStorage::from_file(&file)?;
+        let storage = ByteStorage::from_file(&path)?;
         Self::from_storage(storage)
     }