From 173033c05da72a6e5b3fe294476a205487458cd8 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 5 Jan 2026 11:19:53 -0300 Subject: [PATCH] refactor: VM receives bootstrap address as runtime parameter --- crates/plotnik-cli/src/commands/exec.rs | 2 +- crates/plotnik-cli/src/commands/trace.rs | 2 +- crates/plotnik-lib/src/bytecode/dump.rs | 9 ++-- crates/plotnik-lib/src/bytecode/entrypoint.rs | 7 ++-- crates/plotnik-lib/src/bytecode/ids.rs | 41 ------------------- .../plotnik-lib/src/bytecode/instructions.rs | 34 +++++++++++++-- .../src/bytecode/instructions_tests.rs | 3 +- crates/plotnik-lib/src/bytecode/ir.rs | 39 ++++++++++-------- crates/plotnik-lib/src/bytecode/mod.rs | 5 ++- crates/plotnik-lib/src/bytecode/module.rs | 4 +- crates/plotnik-lib/src/emit/layout.rs | 3 +- crates/plotnik-lib/src/emit/layout_tests.rs | 15 ++++--- crates/plotnik-lib/src/emit/mod.rs | 6 +-- crates/plotnik-lib/src/engine/engine_tests.rs | 4 +- crates/plotnik-lib/src/engine/trace.rs | 2 +- crates/plotnik-lib/src/engine/vm.rs | 19 +++++---- 16 files changed, 95 insertions(+), 100 deletions(-) diff --git a/crates/plotnik-cli/src/commands/exec.rs b/crates/plotnik-cli/src/commands/exec.rs index 5c1cf85c..1d2e2857 100644 --- a/crates/plotnik-cli/src/commands/exec.rs +++ b/crates/plotnik-cli/src/commands/exec.rs @@ -38,7 +38,7 @@ pub fn run(args: ExecArgs) { }); let vm = VM::new(&tree, trivia_types, FuelLimits::default()); - let effects = match vm.execute(&module, &entrypoint) { + let effects = match vm.execute(&module, 0, &entrypoint) { Ok(effects) => effects, Err(RuntimeError::NoMatch) => { std::process::exit(1); diff --git a/crates/plotnik-cli/src/commands/trace.rs b/crates/plotnik-cli/src/commands/trace.rs index 4c22fb42..902a5dc4 100644 --- a/crates/plotnik-cli/src/commands/trace.rs +++ b/crates/plotnik-cli/src/commands/trace.rs @@ -48,7 +48,7 @@ pub fn run(args: TraceArgs) { let colors = Colors::new(args.color); let mut tracer = PrintTracer::new(&source_code, &module, args.verbosity, colors); - let effects = match vm.execute_with(&module, &entrypoint, &mut tracer) { + let effects = match vm.execute_with(&module, 0, &entrypoint, &mut tracer) { Ok(effects) => { tracer.print(); effects diff --git a/crates/plotnik-lib/src/bytecode/dump.rs b/crates/plotnik-lib/src/bytecode/dump.rs index 1232f62c..14ead7c1 100644 --- a/crates/plotnik-lib/src/bytecode/dump.rs +++ b/crates/plotnik-lib/src/bytecode/dump.rs @@ -8,7 +8,8 @@ use std::fmt::Write as _; use crate::colors::Colors; use super::format::{LineBuilder, Symbol, format_effect, nav_symbol_epsilon, width_for_count}; -use super::ids::{QTypeId, StepId}; +use super::ids::QTypeId; +use super::instructions::StepId; use super::module::{Instruction, Module}; use super::type_meta::TypeKind; use super::{Call, Match, Return, Trampoline}; @@ -73,12 +74,12 @@ impl DumpContext { let node_fields = module.node_fields(); let mut step_labels = BTreeMap::new(); - // Preamble always starts at step 0 + // Preamble always at step 0 (first in layout) step_labels.insert(0, "_ObjWrap".to_string()); for i in 0..entrypoints.len() { let ep = entrypoints.get(i); let name = strings.get(ep.name).to_string(); - step_labels.insert(ep.target.get(), name); + step_labels.insert(ep.target, name); } let mut node_type_names = BTreeMap::new(); @@ -319,7 +320,7 @@ fn dump_entrypoints(out: &mut String, module: &Module, ctx: &DumpContext) { .map(|i| { let ep = entrypoints.get(i); let name = strings.get(ep.name); - (name, ep.target.0, ep.result_type.0) + (name, ep.target, ep.result_type.0) }) .collect(); entries.sort_by_key(|(name, _, _)| *name); diff --git a/crates/plotnik-lib/src/bytecode/entrypoint.rs b/crates/plotnik-lib/src/bytecode/entrypoint.rs index 3550cf21..7f256855 100644 --- a/crates/plotnik-lib/src/bytecode/entrypoint.rs +++ b/crates/plotnik-lib/src/bytecode/entrypoint.rs @@ -1,6 +1,7 @@ //! Entrypoint section types. -use super::{QTypeId, StepId, StringId}; +use super::instructions::StepAddr; +use super::{QTypeId, StringId}; /// Named query definition entry point (8 bytes). #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -8,8 +9,8 @@ use super::{QTypeId, StepId, StringId}; pub struct Entrypoint { /// Definition name. pub name: StringId, - /// Starting instruction (StepId). - pub target: StepId, + /// Starting instruction address. + pub target: StepAddr, /// Result type. pub result_type: QTypeId, pub(crate) _pad: u16, diff --git a/crates/plotnik-lib/src/bytecode/ids.rs b/crates/plotnik-lib/src/bytecode/ids.rs index bd87d70d..1e004293 100644 --- a/crates/plotnik-lib/src/bytecode/ids.rs +++ b/crates/plotnik-lib/src/bytecode/ids.rs @@ -2,35 +2,6 @@ use std::num::NonZeroU16; -use super::constants::STEP_SIZE; - -/// Index into the Transitions section (8-byte steps). -/// -/// Step 0 is a valid address (preamble starts there). -/// In successor fields, raw value 0 means "terminal" — this sentinel -/// is handled by decoding logic, not by the type. -#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] -#[repr(transparent)] -pub struct StepId(pub u16); - -impl StepId { - #[inline] - pub fn new(n: u16) -> Self { - Self(n) - } - - /// Get the raw u16 value. - #[inline] - pub fn get(self) -> u16 { - self.0 - } - - #[inline] - pub fn byte_offset(self) -> usize { - self.0 as usize * STEP_SIZE - } -} - /// Index into the String Table. /// /// Uses NonZeroU16 to make StringId(0) unrepresentable - index 0 is @@ -58,15 +29,3 @@ impl StringId { #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Default)] #[repr(transparent)] pub struct QTypeId(pub u16); - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn step_id_byte_offset() { - assert_eq!(StepId::new(0).byte_offset(), 0); - assert_eq!(StepId::new(1).byte_offset(), 8); - assert_eq!(StepId::new(10).byte_offset(), 80); - } -} diff --git a/crates/plotnik-lib/src/bytecode/instructions.rs b/crates/plotnik-lib/src/bytecode/instructions.rs index 1ddb7dd0..8fa5ad07 100644 --- a/crates/plotnik-lib/src/bytecode/instructions.rs +++ b/crates/plotnik-lib/src/bytecode/instructions.rs @@ -7,9 +7,37 @@ use std::num::NonZeroU16; use super::constants::{SECTION_ALIGN, STEP_SIZE}; use super::effects::EffectOp; -use super::ids::StepId; use super::nav::Nav; +/// Step address in bytecode (raw u16). +/// +/// Used for layout addresses, entrypoint targets, bootstrap parameter, etc. +/// For decoded instruction successors (where 0 = terminal), use [`StepId`] instead. +pub type StepAddr = u16; + +/// Successor step address in decoded instructions. +/// +/// Uses NonZeroU16 because raw 0 means "terminal" (no successor). +/// This type is only for decoded instruction successors - use raw `u16` +/// for addresses in layout, entrypoints, and VM internals. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +#[repr(transparent)] +pub struct StepId(pub NonZeroU16); + +impl StepId { + /// Create a new StepId. Panics if n == 0. + #[inline] + pub fn new(n: u16) -> Self { + Self(NonZeroU16::new(n).expect("StepId cannot be 0")) + } + + /// Get the raw u16 value. + #[inline] + pub fn get(self) -> u16 { + self.0.get() + } +} + /// Read `count` little-endian u16 values from bytes starting at `offset`. /// Advances `offset` by `count * 2`. #[inline] @@ -178,7 +206,7 @@ impl Match { let successors = if next_raw == 0 { vec![] // terminal } else { - vec![StepId(next_raw)] + vec![StepId::new(next_raw)] }; Self { @@ -417,7 +445,7 @@ impl<'a> MatchView<'a> { if self.is_match8 { debug_assert!(idx == 0); debug_assert!(self.match8_next != 0, "terminal has no successors"); - StepId(self.match8_next) + StepId::new(self.match8_next) } else { let offset = self.succ_offset() + idx * 2; StepId::new(u16::from_le_bytes([ diff --git a/crates/plotnik-lib/src/bytecode/instructions_tests.rs b/crates/plotnik-lib/src/bytecode/instructions_tests.rs index 60d0d8ba..186d4db6 100644 --- a/crates/plotnik-lib/src/bytecode/instructions_tests.rs +++ b/crates/plotnik-lib/src/bytecode/instructions_tests.rs @@ -3,9 +3,8 @@ use std::num::NonZeroU16; use super::effects::{EffectOp, EffectOpcode}; -use super::ids::StepId; use super::instructions::{ - Call, Match, MatchView, Opcode, Return, align_to_section, select_match_opcode, + Call, Match, MatchView, Opcode, Return, StepId, align_to_section, select_match_opcode, }; use super::nav::Nav; diff --git a/crates/plotnik-lib/src/bytecode/ir.rs b/crates/plotnik-lib/src/bytecode/ir.rs index 8f7dc3df..2c7d9be6 100644 --- a/crates/plotnik-lib/src/bytecode/ir.rs +++ b/crates/plotnik-lib/src/bytecode/ir.rs @@ -1,26 +1,25 @@ //! Instruction IR with symbolic labels. //! //! Pre-layout instructions use `Label` for symbolic references. -//! After layout, labels are resolved to `StepId` for serialization. +//! After layout, labels are resolved to step addresses (u16) for serialization. //! Member indices use deferred resolution via `MemberRef`. use std::collections::BTreeMap; use std::num::NonZeroU16; use super::effects::{EffectOp, EffectOpcode}; -use super::ids::StepId; -use super::instructions::{Call, Match, Return, Trampoline, select_match_opcode}; +use super::instructions::{Call, Match, Return, StepAddr, StepId, Trampoline, select_match_opcode}; use super::nav::Nav; use crate::analyze::type_check::TypeId; -/// Symbolic reference, resolved to StepId at layout time. +/// Symbolic reference, resolved to step address at layout time. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] pub struct Label(pub u32); impl Label { - /// Resolve this label to a StepId using the layout mapping. + /// Resolve this label to a step address using the layout mapping. #[inline] - pub fn resolve(self, map: &BTreeMap) -> StepId { + pub fn resolve(self, map: &BTreeMap) -> StepAddr { *map.get(&self).expect("label not in layout") } } @@ -197,7 +196,7 @@ impl Instruction { /// - `get_member_base`: maps parent TypeId to member base index pub fn resolve( &self, - map: &BTreeMap, + map: &BTreeMap, lookup_member: F, get_member_base: G, ) -> Vec @@ -263,7 +262,7 @@ impl MatchIR { /// - `get_member_base`: maps parent TypeId to member base index pub fn resolve( &self, - map: &BTreeMap, + map: &BTreeMap, lookup_member: F, get_member_base: G, ) -> Vec @@ -271,7 +270,11 @@ impl MatchIR { F: Fn(plotnik_core::Symbol, TypeId) -> Option, G: Fn(TypeId) -> Option, { - let successors: Vec = self.successors.iter().map(|&l| l.resolve(map)).collect(); + let successors: Vec = self + .successors + .iter() + .map(|&l| StepId::new(l.resolve(map))) + .collect(); // Resolve effect member references to absolute indices let pre_effects: Vec = self @@ -323,13 +326,13 @@ pub struct CallIR { impl CallIR { /// Resolve labels and serialize to bytecode bytes. - pub fn resolve(&self, map: &BTreeMap) -> [u8; 8] { + pub fn resolve(&self, map: &BTreeMap) -> [u8; 8] { let c = Call { segment: 0, nav: self.nav, node_field: self.node_field, - next: self.next.resolve(map), - target: self.target.resolve(map), + next: StepId::new(self.next.resolve(map)), + target: StepId::new(self.target.resolve(map)), }; c.to_bytes() } @@ -364,20 +367,20 @@ pub struct TrampolineIR { impl TrampolineIR { /// Resolve labels and serialize to bytecode bytes. - pub fn resolve(&self, map: &BTreeMap) -> [u8; 8] { + pub fn resolve(&self, map: &BTreeMap) -> [u8; 8] { let t = Trampoline { segment: 0, - next: self.next.resolve(map), + next: StepId::new(self.next.resolve(map)), }; t.to_bytes() } } -/// Result of layout: maps labels to step IDs. +/// Result of layout: maps labels to step addresses. #[derive(Clone, Debug)] pub struct LayoutResult { - /// Mapping from symbolic labels to concrete step IDs. - pub label_to_step: BTreeMap, + /// Mapping from symbolic labels to concrete step addresses (raw u16). + pub label_to_step: BTreeMap, /// Total number of steps (for header). pub total_steps: u16, } @@ -464,7 +467,7 @@ mod tests { }; let mut map = BTreeMap::new(); - map.insert(Label(0), StepId::new(1)); + map.insert(Label(0), 1u16); let bytes = m.resolve(&map, |_, _| None, |_| None); assert_eq!(bytes.len(), 8); diff --git a/crates/plotnik-lib/src/bytecode/mod.rs b/crates/plotnik-lib/src/bytecode/mod.rs index 049acbf0..0e11fad3 100644 --- a/crates/plotnik-lib/src/bytecode/mod.rs +++ b/crates/plotnik-lib/src/bytecode/mod.rs @@ -18,7 +18,7 @@ mod type_meta; pub use constants::{MAGIC, SECTION_ALIGN, STEP_SIZE, VERSION}; -pub use ids::{QTypeId, StepId, StringId}; +pub use ids::{QTypeId, StringId}; pub use header::{Header, flags}; @@ -33,7 +33,8 @@ pub use nav::Nav; pub use effects::{EffectOp, EffectOpcode}; pub use instructions::{ - Call, Match, MatchView, Opcode, Return, Trampoline, align_to_section, select_match_opcode, + Call, Match, MatchView, Opcode, Return, StepAddr, StepId, Trampoline, align_to_section, + select_match_opcode, }; pub use module::{ diff --git a/crates/plotnik-lib/src/bytecode/module.rs b/crates/plotnik-lib/src/bytecode/module.rs index b9535588..ac253230 100644 --- a/crates/plotnik-lib/src/bytecode/module.rs +++ b/crates/plotnik-lib/src/bytecode/module.rs @@ -8,7 +8,7 @@ use std::ops::Deref; use std::path::Path; use super::header::Header; -use super::ids::{QTypeId, StepId, StringId}; +use super::ids::{QTypeId, StringId}; use super::instructions::{Call, Match, MatchView, Opcode, Return, Trampoline}; use super::sections::{FieldSymbol, NodeSymbol, TriviaEntry}; use super::type_meta::{TypeDef, TypeMember, TypeMetaHeader, TypeName}; @@ -521,7 +521,7 @@ impl<'a> EntrypointsView<'a> { let offset = idx * 8; Entrypoint { name: StringId::new(read_u16_le(self.bytes, offset)), - target: StepId::new(read_u16_le(self.bytes, offset + 2)), + target: read_u16_le(self.bytes, offset + 2), result_type: QTypeId(read_u16_le(self.bytes, offset + 4)), _pad: 0, } diff --git a/crates/plotnik-lib/src/emit/layout.rs b/crates/plotnik-lib/src/emit/layout.rs index 7f79a49d..0871b8ba 100644 --- a/crates/plotnik-lib/src/emit/layout.rs +++ b/crates/plotnik-lib/src/emit/layout.rs @@ -5,7 +5,6 @@ use std::collections::{BTreeMap, HashSet}; -use crate::bytecode::StepId; use crate::bytecode::ir::{Instruction, Label, LayoutResult}; const CACHE_LINE: usize = 64; @@ -177,7 +176,7 @@ fn assign_step_ids( } } - mapping.insert(label, StepId::new(current_step)); + mapping.insert(label, current_step); let step_count = (size / STEP_SIZE) as u16; current_step += step_count; current_offset += size; diff --git a/crates/plotnik-lib/src/emit/layout_tests.rs b/crates/plotnik-lib/src/emit/layout_tests.rs index c8bf558e..5656ce7b 100644 --- a/crates/plotnik-lib/src/emit/layout_tests.rs +++ b/crates/plotnik-lib/src/emit/layout_tests.rs @@ -3,7 +3,6 @@ use std::num::NonZeroU16; use super::layout::CacheAligned; use crate::bytecode::EffectOpcode; use crate::bytecode::Nav; -use crate::bytecode::StepId; use crate::bytecode::ir::{CallIR, EffectIR, Instruction, Label, MatchIR, ReturnIR}; #[test] @@ -29,7 +28,7 @@ fn layout_single_instruction() { let result = CacheAligned::layout(&instructions, &[Label(0)]); - assert_eq!(result.label_to_step.get(&Label(0)), Some(&StepId::new(0))); + assert_eq!(result.label_to_step.get(&Label(0)), Some(&0u16)); assert_eq!(result.total_steps, 1); } @@ -72,9 +71,9 @@ fn layout_linear_chain() { let result = CacheAligned::layout(&instructions, &[Label(0)]); // Should be contiguous: 0, 1, 2 - assert_eq!(result.label_to_step.get(&Label(0)), Some(&StepId::new(0))); - assert_eq!(result.label_to_step.get(&Label(1)), Some(&StepId::new(1))); - assert_eq!(result.label_to_step.get(&Label(2)), Some(&StepId::new(2))); + assert_eq!(result.label_to_step.get(&Label(0)), Some(&0u16)); + assert_eq!(result.label_to_step.get(&Label(1)), Some(&1u16)); + assert_eq!(result.label_to_step.get(&Label(2)), Some(&2u16)); } #[test] @@ -233,9 +232,9 @@ fn layout_large_instruction_cache_alignment() { let result = CacheAligned::layout(&instructions, &[Label(0)]); // Label 0 at step 0 (offset 0) - assert_eq!(result.label_to_step.get(&Label(0)), Some(&StepId::new(0))); + assert_eq!(result.label_to_step.get(&Label(0)), Some(&0u16)); // Label 1 should be aligned - either at step 1 or padded to cache line - let step1 = result.label_to_step.get(&Label(1)).unwrap(); - assert!(step1.get() >= 1); + let step1 = *result.label_to_step.get(&Label(1)).unwrap(); + assert!(step1 >= 1); } diff --git a/crates/plotnik-lib/src/emit/mod.rs b/crates/plotnik-lib/src/emit/mod.rs index 86918690..f66a55f3 100644 --- a/crates/plotnik-lib/src/emit/mod.rs +++ b/crates/plotnik-lib/src/emit/mod.rs @@ -753,7 +753,7 @@ fn emit_inner( .map_err(EmitError::Compile)?; // Layout with cache alignment - // Preamble entry FIRST ensures it gets the lowest address (step 1) + // Preamble entry FIRST ensures it gets the lowest address (step 0) let mut entry_labels: Vec