From 31a867cadc01d3db24d468a31dbcaa511c5d134d Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Wed, 7 Jan 2026 11:45:20 -0300 Subject: [PATCH] refactor: Eliminate intermediate `Match` struct allocation --- crates/plotnik-lib/src/bytecode/dump.rs | 50 ++-- .../plotnik-lib/src/bytecode/instructions.rs | 283 ++---------------- .../src/bytecode/instructions_tests.rs | 222 +++++--------- crates/plotnik-lib/src/bytecode/ir.rs | 94 ++++-- crates/plotnik-lib/src/bytecode/mod.rs | 6 +- crates/plotnik-lib/src/bytecode/module.rs | 69 +---- .../plotnik-lib/src/bytecode/module_tests.rs | 6 +- crates/plotnik-lib/src/engine/trace.rs | 24 +- crates/plotnik-lib/src/engine/vm.rs | 23 +- 9 files changed, 219 insertions(+), 558 deletions(-) diff --git a/crates/plotnik-lib/src/bytecode/dump.rs b/crates/plotnik-lib/src/bytecode/dump.rs index 3da907aa..759373f4 100644 --- a/crates/plotnik-lib/src/bytecode/dump.rs +++ b/crates/plotnik-lib/src/bytecode/dump.rs @@ -364,7 +364,7 @@ fn dump_code(out: &mut String, module: &Module, ctx: &DumpContext) { } } - let instr = module.decode_step_alloc(step); + let instr = module.decode_step(step); let line = format_instruction(step, &instr, module, ctx, step_width); out.push_str(&line); out.push('\n'); @@ -378,16 +378,13 @@ fn dump_code(out: &mut String, module: &Module, ctx: &DumpContext) { fn instruction_step_count(instr: &Instruction) -> u16 { match instr { Instruction::Match(m) => { - let slots = m.pre_effects.len() - + m.neg_fields.len() - + m.post_effects.len() - + m.successors.len(); - - if m.pre_effects.is_empty() - && m.neg_fields.is_empty() - && m.post_effects.is_empty() - && m.successors.len() <= 1 - { + let pre = m.pre_effects().count(); + let neg = m.neg_fields().count(); + let post = m.post_effects().count(); + let succ = m.succ_count(); + let slots = pre + neg + post + succ; + + if pre == 0 && neg == 0 && post == 0 && succ <= 1 { 1 // Match8 } else if slots <= 4 { 2 // Match16 @@ -432,24 +429,21 @@ fn format_match( let prefix = format!(" {:0sw$} {} ", step, symbol.format(), sw = step_width); let content = format_match_content(m, ctx); - let successors = format_successors(&m.successors, ctx, step_width); + let successors = format_match_successors(m, ctx, step_width); let base = format!("{prefix}{content}"); builder.pad_successors(base, &successors) } -/// Format Match instruction content (effects, node pattern, etc.) fn format_match_content(m: &Match, ctx: &DumpContext) -> String { let mut parts = Vec::new(); - // Pre-effects - if !m.pre_effects.is_empty() { - let effects: Vec<_> = m.pre_effects.iter().map(format_effect).collect(); - parts.push(format!("[{}]", effects.join(" "))); + let pre: Vec<_> = m.pre_effects().map(|e| format_effect(&e)).collect(); + if !pre.is_empty() { + parts.push(format!("[{}]", pre.join(" "))); } - // Negated fields - for &field_id in &m.neg_fields { + for field_id in m.neg_fields() { let name = ctx .node_field_name(field_id) .map(String::from) @@ -457,16 +451,14 @@ fn format_match_content(m: &Match, ctx: &DumpContext) -> String { parts.push(format!("-{name}")); } - // Field constraint and node type let node_part = format_node_pattern(m, ctx); if !node_part.is_empty() { parts.push(node_part); } - // Post-effects - if !m.post_effects.is_empty() { - let effects: Vec<_> = m.post_effects.iter().map(format_effect).collect(); - parts.push(format!("[{}]", effects.join(" "))); + let post: Vec<_> = m.post_effects().map(|e| format_effect(&e)).collect(); + if !post.is_empty() { + parts.push(format!("[{}]", post.join(" "))); } parts.join(" ") @@ -505,14 +497,12 @@ fn format_node_pattern(m: &Match, ctx: &DumpContext) -> String { result } -/// Format successors list or terminal symbol. -fn format_successors(successors: &[StepId], ctx: &DumpContext, step_width: usize) -> String { - if successors.is_empty() { +fn format_match_successors(m: &Match, ctx: &DumpContext, step_width: usize) -> String { + if m.is_terminal() { "◼".to_string() } else { - successors - .iter() - .map(|s| format_step(*s, ctx, step_width)) + m.successors() + .map(|s| format_step(s, ctx, step_width)) .collect::>() .join(", ") } diff --git a/crates/plotnik-lib/src/bytecode/instructions.rs b/crates/plotnik-lib/src/bytecode/instructions.rs index 8fa5ad07..701dd4f0 100644 --- a/crates/plotnik-lib/src/bytecode/instructions.rs +++ b/crates/plotnik-lib/src/bytecode/instructions.rs @@ -38,19 +38,6 @@ impl StepId { } } -/// Read `count` little-endian u16 values from bytes starting at `offset`. -/// Advances `offset` by `count * 2`. -#[inline] -fn read_u16_vec(bytes: &[u8], offset: &mut usize, count: usize) -> Vec { - (0..count) - .map(|_| { - let v = u16::from_le_bytes([bytes[*offset], bytes[*offset + 1]]); - *offset += 2; - v - }) - .collect() -} - /// Instruction opcodes (4-bit). #[derive(Clone, Copy, PartialEq, Eq, Debug)] #[repr(u8)] @@ -136,215 +123,11 @@ impl Opcode { } } -/// Match instruction for pattern matching in the VM. +/// Match instruction decoded from bytecode. /// -/// Unifies Match8 (fast-path) and Match16-64 (extended) wire formats into -/// a single runtime-friendly struct. -#[derive(Clone, PartialEq, Eq, Debug)] -pub struct Match { - /// Segment index (0-15, currently only 0 is used). - pub segment: u8, - /// Navigation command. - pub nav: Nav, - /// Node type constraint (None = wildcard). - pub node_type: Option, - /// Field constraint (None = wildcard). - pub node_field: Option, - /// Effects to execute before match attempt. - pub pre_effects: Vec, - /// Fields that must NOT be present on the node. - pub neg_fields: Vec, - /// Effects to execute after successful match. - pub post_effects: Vec, - /// Successor step IDs (empty = accept, 1 = linear, 2+ = branch). - pub successors: Vec, -} - -impl Match { - /// Check if this is a terminal (accept) state. - #[inline] - pub fn is_terminal(&self) -> bool { - self.successors.is_empty() - } - - /// Check if this is an epsilon transition (no node interaction). - #[inline] - pub fn is_epsilon(&self) -> bool { - self.nav == Nav::Stay && self.node_type.is_none() && self.node_field.is_none() - } - - /// Decode from bytecode bytes. - /// - /// The slice must start at the instruction and contain at least - /// the full instruction size (determined by opcode). - pub fn from_bytes(bytes: &[u8]) -> Self { - assert!(bytes.len() >= 8, "Match instruction too short"); - - let type_id_byte = bytes[0]; - let segment = type_id_byte >> 4; - assert!( - segment == 0, - "non-zero segment not yet supported: {segment}" - ); - let opcode = Opcode::from_u8(type_id_byte & 0xF); - - assert!(opcode.is_match(), "expected Match opcode, got {opcode:?}"); - assert!( - bytes.len() >= opcode.size(), - "Match instruction truncated: expected {} bytes, got {}", - opcode.size(), - bytes.len() - ); - - let nav = Nav::from_byte(bytes[1]); - let node_type = NonZeroU16::new(u16::from_le_bytes([bytes[2], bytes[3]])); - let node_field = NonZeroU16::new(u16::from_le_bytes([bytes[4], bytes[5]])); - - if opcode == Opcode::Match8 { - // Match8: single successor in bytes 6-7 (0 = terminal) - let next_raw = u16::from_le_bytes([bytes[6], bytes[7]]); - let successors = if next_raw == 0 { - vec![] // terminal - } else { - vec![StepId::new(next_raw)] - }; - - Self { - segment, - nav, - node_type, - node_field, - pre_effects: vec![], - neg_fields: vec![], - post_effects: vec![], - successors, - } - } else { - // Extended match: parse counts and payload - let counts = u16::from_le_bytes([bytes[6], bytes[7]]); - let pre_count = ((counts >> 13) & 0x7) as usize; - let neg_count = ((counts >> 10) & 0x7) as usize; - let post_count = ((counts >> 7) & 0x7) as usize; - let succ_count = ((counts >> 1) & 0x3F) as usize; - - let payload = &bytes[8..]; - let mut offset = 0; - - let pre_effects = read_u16_vec(payload, &mut offset, pre_count) - .into_iter() - .map(|v| EffectOp::from_bytes(v.to_le_bytes())) - .collect(); - let neg_fields = read_u16_vec(payload, &mut offset, neg_count); - let post_effects = read_u16_vec(payload, &mut offset, post_count) - .into_iter() - .map(|v| EffectOp::from_bytes(v.to_le_bytes())) - .collect(); - let successors = read_u16_vec(payload, &mut offset, succ_count) - .into_iter() - .map(StepId::new) - .collect(); - - Self { - segment, - nav, - node_type, - node_field, - pre_effects, - neg_fields, - post_effects, - successors, - } - } - } - - /// Encode to bytecode bytes. - /// - /// Automatically selects the smallest opcode that fits the payload. - /// Returns None if the payload is too large (> 28 u16 slots). - pub fn to_bytes(&self) -> Option> { - // Match8 can be used if: no effects, no neg_fields, and at most 1 successor - let can_use_match8 = self.pre_effects.is_empty() - && self.neg_fields.is_empty() - && self.post_effects.is_empty() - && self.successors.len() <= 1; - - let opcode = if can_use_match8 { - Opcode::Match8 - } else { - // Extended match: count all payload slots - let slots_needed = self.pre_effects.len() - + self.neg_fields.len() - + self.post_effects.len() - + self.successors.len(); - select_match_opcode(slots_needed)? - }; - let size = opcode.size(); - let mut bytes = vec![0u8; size]; - - // Type ID byte - bytes[0] = (self.segment << 4) | (opcode as u8); - bytes[1] = self.nav.to_byte(); - - // Node type/field - let node_type_val = self.node_type.map(|n| n.get()).unwrap_or(0); - bytes[2..4].copy_from_slice(&node_type_val.to_le_bytes()); - let node_field_val = self.node_field.map(|n| n.get()).unwrap_or(0); - bytes[4..6].copy_from_slice(&node_field_val.to_le_bytes()); - - if opcode == Opcode::Match8 { - // Match8: single successor or terminal (0) - let next = self.successors.first().map(|s| s.get()).unwrap_or(0); - bytes[6..8].copy_from_slice(&next.to_le_bytes()); - } else { - // Extended match: pack counts and payload - let pre_count = self.pre_effects.len() as u16; - let neg_count = self.neg_fields.len() as u16; - let post_count = self.post_effects.len() as u16; - let succ_count = self.successors.len() as u16; - - let counts = - (pre_count << 13) | (neg_count << 10) | (post_count << 7) | (succ_count << 1); - bytes[6..8].copy_from_slice(&counts.to_le_bytes()); - - let mut offset = 8; - - // Write pre_effects - for effect in &self.pre_effects { - bytes[offset..offset + 2].copy_from_slice(&effect.to_bytes()); - offset += 2; - } - - // Write neg_fields - for &field in &self.neg_fields { - bytes[offset..offset + 2].copy_from_slice(&field.to_le_bytes()); - offset += 2; - } - - // Write post_effects - for effect in &self.post_effects { - bytes[offset..offset + 2].copy_from_slice(&effect.to_bytes()); - offset += 2; - } - - // Write successors - for succ in &self.successors { - bytes[offset..offset + 2].copy_from_slice(&succ.get().to_le_bytes()); - offset += 2; - } - - // Remaining bytes are already zero (padding) - } - - Some(bytes) - } -} - -/// Zero-copy view into a Match instruction for efficient VM execution. -/// -/// Unlike `Match`, this doesn't allocate - it stores a reference to the -/// bytecode and provides iterator methods for accessing effects and successors. +/// Provides iterator-based access to effects and successors without allocating. #[derive(Clone, Copy, Debug)] -pub struct MatchView<'a> { +pub struct Match<'a> { bytes: &'a [u8], /// Segment index (0-15, currently only 0 is used). pub segment: u8, @@ -365,7 +148,7 @@ pub struct MatchView<'a> { succ_count: u8, } -impl<'a> MatchView<'a> { +impl<'a> Match<'a> { /// Parse a Match instruction from bytecode without allocating. /// /// The slice must start at the instruction and contain at least @@ -384,36 +167,34 @@ impl<'a> MatchView<'a> { let node_type = NonZeroU16::new(u16::from_le_bytes([bytes[2], bytes[3]])); let node_field = NonZeroU16::new(u16::from_le_bytes([bytes[4], bytes[5]])); - if opcode == Opcode::Match8 { - let next = u16::from_le_bytes([bytes[6], bytes[7]]); - Self { - bytes, - segment, - nav, - node_type, - node_field, - is_match8: true, - match8_next: next, - pre_count: 0, - neg_count: 0, - post_count: 0, - succ_count: if next == 0 { 0 } else { 1 }, - } - } else { - let counts = u16::from_le_bytes([bytes[6], bytes[7]]); - Self { - bytes, - segment, - nav, - node_type, - node_field, - is_match8: false, - match8_next: 0, - pre_count: ((counts >> 13) & 0x7) as u8, - neg_count: ((counts >> 10) & 0x7) as u8, - post_count: ((counts >> 7) & 0x7) as u8, - succ_count: ((counts >> 1) & 0x3F) as u8, - } + let (is_match8, match8_next, pre_count, neg_count, post_count, succ_count) = + if opcode == Opcode::Match8 { + let next = u16::from_le_bytes([bytes[6], bytes[7]]); + (true, next, 0, 0, 0, if next == 0 { 0 } else { 1 }) + } else { + let counts = u16::from_le_bytes([bytes[6], bytes[7]]); + ( + false, + 0, + ((counts >> 13) & 0x7) as u8, + ((counts >> 10) & 0x7) as u8, + ((counts >> 7) & 0x7) as u8, + ((counts >> 1) & 0x3F) as u8, + ) + }; + + Self { + bytes, + segment, + nav, + node_type, + node_field, + is_match8, + match8_next, + pre_count, + neg_count, + post_count, + succ_count, } } diff --git a/crates/plotnik-lib/src/bytecode/instructions_tests.rs b/crates/plotnik-lib/src/bytecode/instructions_tests.rs index 186d4db6..57bdeae2 100644 --- a/crates/plotnik-lib/src/bytecode/instructions_tests.rs +++ b/crates/plotnik-lib/src/bytecode/instructions_tests.rs @@ -1,11 +1,13 @@ //! Tests for bytecode instructions. +use std::collections::BTreeMap; use std::num::NonZeroU16; -use super::effects::{EffectOp, EffectOpcode}; +use super::effects::EffectOpcode; use super::instructions::{ - Call, Match, MatchView, Opcode, Return, StepId, align_to_section, select_match_opcode, + Call, Match, Opcode, Return, StepId, align_to_section, select_match_opcode, }; +use super::ir::{EffectIR, Label, MatchIR}; use super::nav::Nav; #[test] @@ -59,81 +61,6 @@ fn align_to_section_works() { assert_eq!(align_to_section(100), 128); } -#[test] -fn match8_roundtrip() { - let m = Match { - segment: 0, - nav: Nav::Down, - node_type: NonZeroU16::new(42), - node_field: NonZeroU16::new(7), - pre_effects: vec![], - neg_fields: vec![], - post_effects: vec![], - successors: vec![StepId::new(10)], - }; - - let bytes = m.to_bytes().unwrap(); - assert_eq!(bytes.len(), 8); - - let decoded = Match::from_bytes(&bytes); - assert_eq!(decoded, m); -} - -#[test] -fn match8_terminal_roundtrip() { - let m = Match { - segment: 0, - nav: Nav::Stay, - node_type: None, - node_field: None, - pre_effects: vec![], - neg_fields: vec![], - post_effects: vec![], - successors: vec![], - }; - - let bytes = m.to_bytes().unwrap(); - assert_eq!(bytes.len(), 8); - - let decoded = Match::from_bytes(&bytes); - assert_eq!(decoded, m); - assert!(decoded.is_terminal()); - assert!(decoded.is_epsilon()); -} - -#[test] -fn match_extended_roundtrip() { - let m = Match { - segment: 0, - nav: Nav::Next, - node_type: NonZeroU16::new(100), - node_field: None, - pre_effects: vec![EffectOp { - opcode: EffectOpcode::Obj, - payload: 0, - }], - neg_fields: vec![5, 6], - post_effects: vec![ - EffectOp { - opcode: EffectOpcode::Node, - payload: 0, - }, - EffectOp { - opcode: EffectOpcode::Set, - payload: 42, - }, - ], - successors: vec![StepId::new(20), StepId::new(30)], - }; - - let bytes = m.to_bytes().unwrap(); - // 1 pre + 2 neg + 2 post + 2 succ = 7 slots → Match24 (8 slots capacity) - assert_eq!(bytes.len(), 24); - - let decoded = Match::from_bytes(&bytes); - assert_eq!(decoded, m); -} - #[test] fn call_roundtrip() { let c = Call { @@ -158,107 +85,92 @@ fn return_roundtrip() { assert_eq!(decoded, r); } +fn label_map(pairs: &[(u32, u16)]) -> BTreeMap { + pairs.iter().map(|&(l, s)| (Label(l), s)).collect() +} + #[test] -fn match_view_match8() { - let m = Match { - segment: 0, - nav: Nav::Down, - node_type: NonZeroU16::new(42), - node_field: NonZeroU16::new(7), - pre_effects: vec![], - neg_fields: vec![], - post_effects: vec![], - successors: vec![StepId::new(10)], - }; +fn match_basic() { + let map = label_map(&[(0, 1), (1, 10)]); - let bytes = m.to_bytes().unwrap(); - let view = MatchView::from_bytes(&bytes); + let bytes = MatchIR::at(Label(0)) + .nav(Nav::Down) + .node_type(NonZeroU16::new(42)) + .node_field(NonZeroU16::new(7)) + .next(Label(1)) + .resolve(&map, |_, _| None, |_| None); - assert_eq!(view.nav, Nav::Down); - assert_eq!(view.node_type, NonZeroU16::new(42)); - assert_eq!(view.node_field, NonZeroU16::new(7)); - assert!(!view.is_terminal()); - assert!(!view.is_epsilon()); - assert_eq!(view.succ_count(), 1); - assert_eq!(view.successor(0), StepId::new(10)); - assert_eq!(view.pre_effects().count(), 0); - assert_eq!(view.neg_fields().count(), 0); - assert_eq!(view.post_effects().count(), 0); + assert_eq!(bytes.len(), 8); + + let m = Match::from_bytes(&bytes); + assert_eq!(m.nav, Nav::Down); + assert_eq!(m.node_type, NonZeroU16::new(42)); + assert_eq!(m.node_field, NonZeroU16::new(7)); + assert!(!m.is_terminal()); + assert!(!m.is_epsilon()); + assert_eq!(m.succ_count(), 1); + assert_eq!(m.successor(0), StepId::new(10)); + assert_eq!(m.pre_effects().count(), 0); + assert_eq!(m.neg_fields().count(), 0); + assert_eq!(m.post_effects().count(), 0); } #[test] -fn match_view_terminal() { - let m = Match { - segment: 0, - nav: Nav::Stay, - node_type: None, - node_field: None, - pre_effects: vec![], - neg_fields: vec![], - post_effects: vec![], - successors: vec![], - }; +fn match_terminal() { + let map = label_map(&[(0, 1)]); + + let bytes = MatchIR::terminal(Label(0)).resolve(&map, |_, _| None, |_| None); - let bytes = m.to_bytes().unwrap(); - let view = MatchView::from_bytes(&bytes); + assert_eq!(bytes.len(), 8); - assert!(view.is_terminal()); - assert!(view.is_epsilon()); - assert_eq!(view.succ_count(), 0); + let m = Match::from_bytes(&bytes); + assert!(m.is_terminal()); + assert!(m.is_epsilon()); + assert_eq!(m.succ_count(), 0); } #[test] -fn match_view_extended() { - let m = Match { - segment: 0, - nav: Nav::Next, - node_type: NonZeroU16::new(100), - node_field: None, - pre_effects: vec![EffectOp { - opcode: EffectOpcode::Obj, - payload: 0, - }], - neg_fields: vec![5, 6], - post_effects: vec![ - EffectOp { - opcode: EffectOpcode::Node, - payload: 0, - }, - EffectOp { - opcode: EffectOpcode::Set, - payload: 42, - }, - ], - successors: vec![StepId::new(20), StepId::new(30)], - }; +fn match_extended() { + let map = label_map(&[(0, 1), (1, 20), (2, 30)]); + + let bytes = MatchIR::at(Label(0)) + .nav(Nav::Next) + .node_type(NonZeroU16::new(100)) + .pre_effect(EffectIR::start_obj()) + .neg_field(5) + .neg_field(6) + .post_effect(EffectIR::node()) + .post_effect(EffectIR::with_member( + EffectOpcode::Set, + super::ir::MemberRef::absolute(42), + )) + .next_many(vec![Label(1), Label(2)]) + .resolve(&map, |_, _| None, |_| None); - let bytes = m.to_bytes().unwrap(); - let view = MatchView::from_bytes(&bytes); + // 1 pre + 2 neg + 2 post + 2 succ = 7 slots → Match24 (8 slots capacity) + assert_eq!(bytes.len(), 24); - assert_eq!(view.nav, Nav::Next); - assert_eq!(view.node_type, NonZeroU16::new(100)); - assert!(!view.is_terminal()); + let m = Match::from_bytes(&bytes); + assert_eq!(m.nav, Nav::Next); + assert_eq!(m.node_type, NonZeroU16::new(100)); + assert!(!m.is_terminal()); - // Check pre_effects - let pre: Vec<_> = view.pre_effects().collect(); + let pre: Vec<_> = m.pre_effects().collect(); assert_eq!(pre.len(), 1); assert_eq!(pre[0].opcode, EffectOpcode::Obj); - // Check neg_fields - let neg: Vec<_> = view.neg_fields().collect(); + let neg: Vec<_> = m.neg_fields().collect(); assert_eq!(neg, vec![5, 6]); - // Check post_effects - let post: Vec<_> = view.post_effects().collect(); + let post: Vec<_> = m.post_effects().collect(); assert_eq!(post.len(), 2); assert_eq!(post[0].opcode, EffectOpcode::Node); assert_eq!(post[1].opcode, EffectOpcode::Set); assert_eq!(post[1].payload, 42); - // Check successors - assert_eq!(view.succ_count(), 2); - assert_eq!(view.successor(0), StepId::new(20)); - assert_eq!(view.successor(1), StepId::new(30)); - let succs: Vec<_> = view.successors().collect(); + assert_eq!(m.succ_count(), 2); + assert_eq!(m.successor(0), StepId::new(20)); + assert_eq!(m.successor(1), StepId::new(30)); + let succs: Vec<_> = m.successors().collect(); assert_eq!(succs, vec![StepId::new(20), StepId::new(30)]); } diff --git a/crates/plotnik-lib/src/bytecode/ir.rs b/crates/plotnik-lib/src/bytecode/ir.rs index a32355e9..5369aa8d 100644 --- a/crates/plotnik-lib/src/bytecode/ir.rs +++ b/crates/plotnik-lib/src/bytecode/ir.rs @@ -8,7 +8,9 @@ use std::collections::BTreeMap; use std::num::NonZeroU16; use super::effects::{EffectOp, EffectOpcode}; -use super::instructions::{Call, Match, Return, StepAddr, StepId, Trampoline, select_match_opcode}; +use super::instructions::{ + Call, Opcode, Return, StepAddr, StepId, Trampoline, select_match_opcode, +}; use super::nav::Nav; use crate::analyze::type_check::TypeId; @@ -420,36 +422,70 @@ impl MatchIR { F: Fn(plotnik_core::Symbol, TypeId) -> Option, G: Fn(TypeId) -> Option, { - let successors: Vec = self - .successors - .iter() - .map(|&l| StepId::new(l.resolve(map))) - .collect(); - - // Resolve effect member references to absolute indices - let pre_effects: Vec = self - .pre_effects - .iter() - .map(|e| e.resolve(&lookup_member, &get_member_base)) - .collect(); - let post_effects: Vec = self - .post_effects - .iter() - .map(|e| e.resolve(&lookup_member, &get_member_base)) - .collect(); - - let m = Match { - segment: 0, - nav: self.nav, - node_type: self.node_type, - node_field: self.node_field, - pre_effects, - neg_fields: self.neg_fields.clone(), - post_effects, - successors, + let can_use_match8 = self.pre_effects.is_empty() + && self.neg_fields.is_empty() + && self.post_effects.is_empty() + && self.successors.len() <= 1; + + let opcode = if can_use_match8 { + Opcode::Match8 + } else { + let slots_needed = self.pre_effects.len() + + self.neg_fields.len() + + self.post_effects.len() + + self.successors.len(); + select_match_opcode(slots_needed).expect("instruction too large") }; - m.to_bytes().expect("instruction too large") + let size = opcode.size(); + let mut bytes = vec![0u8; size]; + + bytes[0] = opcode as u8; // segment 0 + bytes[1] = self.nav.to_byte(); + let node_type_val = self.node_type.map(|n| n.get()).unwrap_or(0); + bytes[2..4].copy_from_slice(&node_type_val.to_le_bytes()); + let node_field_val = self.node_field.map(|n| n.get()).unwrap_or(0); + bytes[4..6].copy_from_slice(&node_field_val.to_le_bytes()); + + if opcode == Opcode::Match8 { + let next = self + .successors + .first() + .map(|&l| l.resolve(map)) + .unwrap_or(0); + bytes[6..8].copy_from_slice(&next.to_le_bytes()); + } else { + let pre_count = self.pre_effects.len() as u16; + let neg_count = self.neg_fields.len() as u16; + let post_count = self.post_effects.len() as u16; + let succ_count = self.successors.len() as u16; + let counts = + (pre_count << 13) | (neg_count << 10) | (post_count << 7) | (succ_count << 1); + bytes[6..8].copy_from_slice(&counts.to_le_bytes()); + + let mut offset = 8; + for effect in &self.pre_effects { + let resolved = effect.resolve(&lookup_member, &get_member_base); + bytes[offset..offset + 2].copy_from_slice(&resolved.to_bytes()); + offset += 2; + } + for &field in &self.neg_fields { + bytes[offset..offset + 2].copy_from_slice(&field.to_le_bytes()); + offset += 2; + } + for effect in &self.post_effects { + let resolved = effect.resolve(&lookup_member, &get_member_base); + bytes[offset..offset + 2].copy_from_slice(&resolved.to_bytes()); + offset += 2; + } + for &label in &self.successors { + let addr = label.resolve(map); + bytes[offset..offset + 2].copy_from_slice(&addr.to_le_bytes()); + offset += 2; + } + } + + bytes } /// Check if this is an epsilon transition (no node interaction). diff --git a/crates/plotnik-lib/src/bytecode/mod.rs b/crates/plotnik-lib/src/bytecode/mod.rs index 8c88e1e4..0b6af1cf 100644 --- a/crates/plotnik-lib/src/bytecode/mod.rs +++ b/crates/plotnik-lib/src/bytecode/mod.rs @@ -35,13 +35,13 @@ pub use nav::Nav; pub use effects::{EffectOp, EffectOpcode}; pub use instructions::{ - Call, Match, MatchView, Opcode, Return, StepAddr, StepId, Trampoline, align_to_section, + Call, Match, Opcode, Return, StepAddr, StepId, Trampoline, align_to_section, select_match_opcode, }; pub use module::{ - ByteStorage, EntrypointsView, Instruction, InstructionView, Module, ModuleError, StringsView, - SymbolsView, TriviaView, TypesView, + ByteStorage, EntrypointsView, Instruction, Module, ModuleError, StringsView, SymbolsView, + TriviaView, TypesView, }; pub use dump::dump; diff --git a/crates/plotnik-lib/src/bytecode/module.rs b/crates/plotnik-lib/src/bytecode/module.rs index d0f80883..f965d1c9 100644 --- a/crates/plotnik-lib/src/bytecode/module.rs +++ b/crates/plotnik-lib/src/bytecode/module.rs @@ -9,7 +9,7 @@ use std::path::Path; use super::header::Header; use super::ids::{StringId, TypeId}; -use super::instructions::{Call, Match, MatchView, Opcode, Return, Trampoline}; +use super::instructions::{Call, Match, Opcode, Return, Trampoline}; use super::sections::{FieldSymbol, NodeSymbol, TriviaEntry}; use super::type_meta::{TypeDef, TypeMember, TypeMetaHeader, TypeName}; use super::{Entrypoint, SECTION_ALIGN, STEP_SIZE, VERSION}; @@ -57,55 +57,16 @@ impl ByteStorage { } /// Decoded instruction from bytecode. -#[derive(Clone, PartialEq, Eq, Debug)] -pub enum Instruction { - Match(Match), - Call(Call), - Return(Return), - Trampoline(Trampoline), -} - -impl Instruction { - /// Decode an instruction from bytecode bytes. - /// - /// The slice must start at the instruction and contain at least 8 bytes. - pub fn from_bytes(bytes: &[u8]) -> Self { - assert!(bytes.len() >= 8, "instruction too short"); - - let opcode = Opcode::from_u8(bytes[0] & 0xF); - match opcode { - Opcode::Call => { - let arr: [u8; 8] = bytes[..8].try_into().unwrap(); - Self::Call(Call::from_bytes(arr)) - } - Opcode::Return => { - let arr: [u8; 8] = bytes[..8].try_into().unwrap(); - Self::Return(Return::from_bytes(arr)) - } - Opcode::Trampoline => { - let arr: [u8; 8] = bytes[..8].try_into().unwrap(); - Self::Trampoline(Trampoline::from_bytes(arr)) - } - _ => Self::Match(Match::from_bytes(bytes)), - } - } -} - -/// Zero-copy instruction view for efficient VM execution. -/// -/// Unlike `Instruction`, this doesn't allocate for Match instructions. #[derive(Clone, Copy, Debug)] -pub enum InstructionView<'a> { - Match(MatchView<'a>), +pub enum Instruction<'a> { + Match(Match<'a>), Call(Call), Return(Return), Trampoline(Trampoline), } -impl<'a> InstructionView<'a> { - /// Decode an instruction view from bytecode bytes without allocating. - /// - /// The slice must start at the instruction and contain at least 8 bytes. +impl<'a> Instruction<'a> { + /// Decode an instruction from bytecode bytes. #[inline] pub fn from_bytes(bytes: &'a [u8]) -> Self { debug_assert!(bytes.len() >= 8, "instruction too short"); @@ -124,7 +85,7 @@ impl<'a> InstructionView<'a> { let arr: [u8; 8] = bytes[..8].try_into().unwrap(); Self::Trampoline(Trampoline::from_bytes(arr)) } - _ => Self::Match(MatchView::from_bytes(bytes)), + _ => Self::Match(Match::from_bytes(bytes)), } } } @@ -200,23 +161,11 @@ impl Module { &self.storage } - /// Decode an instruction at the given step index (raw u16). - /// - /// This allocates for Match instructions. For zero-allocation decoding, - /// use [`decode_step`](Self::decode_step) instead. - pub fn decode_step_alloc(&self, step: u16) -> Instruction { - let offset = self.header.transitions_offset as usize + (step as usize) * STEP_SIZE; - Instruction::from_bytes(&self.storage[offset..]) - } - - /// Decode an instruction view at the given step index (raw u16) without allocating. - /// - /// This is the VM's main access point for fetching instructions efficiently. - /// Step 0 is valid at runtime (though bytecode never jumps to it). + /// Decode an instruction at the given step index. #[inline] - pub fn decode_step(&self, step: u16) -> InstructionView<'_> { + pub fn decode_step(&self, step: u16) -> Instruction<'_> { let offset = self.header.transitions_offset as usize + (step as usize) * STEP_SIZE; - InstructionView::from_bytes(&self.storage[offset..]) + Instruction::from_bytes(&self.storage[offset..]) } /// Get a view into the string table. diff --git a/crates/plotnik-lib/src/bytecode/module_tests.rs b/crates/plotnik-lib/src/bytecode/module_tests.rs index f08f4754..aa280968 100644 --- a/crates/plotnik-lib/src/bytecode/module_tests.rs +++ b/crates/plotnik-lib/src/bytecode/module_tests.rs @@ -161,8 +161,7 @@ fn module_decode_step() { let bytes = Query::expect_valid_linked_bytes(input); let module = Module::from_bytes(bytes).unwrap(); - // Step 0 is the preamble entry (Obj instruction) - let instr = module.decode_step_alloc(0); + let instr = module.decode_step(0); assert!(matches!(instr, crate::bytecode::Instruction::Match(_))); } @@ -184,8 +183,7 @@ fn module_from_path_mmap() { assert!(module.header().validate_magic()); - // Verify we can decode instructions (step 0 is the preamble entry) - let instr = module.decode_step_alloc(0); + let instr = module.decode_step(0); assert!(matches!(instr, crate::bytecode::Instruction::Match(_))); // Verify string lookup works through mmap diff --git a/crates/plotnik-lib/src/engine/trace.rs b/crates/plotnik-lib/src/engine/trace.rs index be8be94d..4787272c 100644 --- a/crates/plotnik-lib/src/engine/trace.rs +++ b/crates/plotnik-lib/src/engine/trace.rs @@ -25,8 +25,8 @@ use arborium_tree_sitter::Node; use crate::Colors; use crate::bytecode::{ - EffectOpcode, InstructionView, LineBuilder, MatchView, Module, Nav, Symbol, cols, - format_effect, trace, truncate_text, width_for_count, + EffectOpcode, Instruction, LineBuilder, Match, Module, Nav, Symbol, cols, format_effect, trace, + truncate_text, width_for_count, }; use super::effect::RuntimeEffect; @@ -63,7 +63,7 @@ pub enum Verbosity { /// - `trace_enter_entrypoint` - when entering an entrypoint (for labels) pub trait Tracer { /// Called before executing an instruction. - fn trace_instruction(&mut self, ip: u16, instr: &InstructionView<'_>); + fn trace_instruction(&mut self, ip: u16, instr: &Instruction<'_>); /// Called after navigation succeeds. fn trace_nav(&mut self, nav: Nav, node: Node<'_>); @@ -114,7 +114,7 @@ pub struct NoopTracer; impl Tracer for NoopTracer { #[inline(always)] - fn trace_instruction(&mut self, _ip: u16, _instr: &InstructionView<'_>) {} + fn trace_instruction(&mut self, _ip: u16, _instr: &Instruction<'_>) {} #[inline(always)] fn trace_nav(&mut self, _nav: Nav, _node: Node<'_>) {} @@ -322,7 +322,7 @@ impl<'s> PrintTracer<'s> { /// Format match content for instruction line (matches dump format exactly). /// /// Order: [pre-effects] !neg_fields field: (type) [post-effects] - fn format_match_content(&self, m: &MatchView<'_>) -> String { + fn format_match_content(&self, m: &Match<'_>) -> String { let mut parts = Vec::new(); // Pre-effects: [Effect1 Effect2] @@ -353,7 +353,7 @@ impl<'s> PrintTracer<'s> { } /// Format node pattern: `field: (type)` or `(type)` or `field: _` or empty. - fn format_node_pattern(&self, m: &MatchView<'_>) -> String { + fn format_node_pattern(&self, m: &Match<'_>) -> String { let mut result = String::new(); if let Some(f) = m.node_field { @@ -423,9 +423,9 @@ impl<'s> PrintTracer<'s> { } impl Tracer for PrintTracer<'_> { - fn trace_instruction(&mut self, ip: u16, instr: &InstructionView<'_>) { + fn trace_instruction(&mut self, ip: u16, instr: &Instruction<'_>) { match instr { - InstructionView::Match(m) => { + Instruction::Match(m) => { // Show ε for epsilon transitions, empty otherwise (nav shown in sublines) let symbol = if m.is_epsilon() { Symbol::EPSILON @@ -436,16 +436,16 @@ impl Tracer for PrintTracer<'_> { let successors = format_match_successors(m); self.add_instruction(ip, symbol, &content, &successors); } - InstructionView::Call(c) => { + Instruction::Call(c) => { let name = self.entrypoint_name(c.target.get()); let content = self.format_def_name(name); let successors = format!("{:02} : {:02}", c.target.get(), c.next.get()); self.add_instruction(ip, Symbol::EMPTY, &content, &successors); } - InstructionView::Return(_) => { + Instruction::Return(_) => { self.pending_return_ip = Some(ip); } - InstructionView::Trampoline(t) => { + Instruction::Trampoline(t) => { // Trampoline shows as a call to the entrypoint target let content = "Trampoline"; let successors = format!("{:02}", t.next.get()); @@ -613,7 +613,7 @@ impl Tracer for PrintTracer<'_> { } /// Format match successors for instruction line. -fn format_match_successors(m: &MatchView<'_>) -> String { +fn format_match_successors(m: &Match<'_>) -> String { if m.is_terminal() { "◼".to_string() } else if m.succ_count() == 1 { diff --git a/crates/plotnik-lib/src/engine/vm.rs b/crates/plotnik-lib/src/engine/vm.rs index c932ed25..2d3ee8b6 100644 --- a/crates/plotnik-lib/src/engine/vm.rs +++ b/crates/plotnik-lib/src/engine/vm.rs @@ -4,8 +4,7 @@ use arborium_tree_sitter::{Node, Tree}; use crate::bytecode::NAMED_WILDCARD; use crate::bytecode::{ - Call, EffectOp, EffectOpcode, Entrypoint, InstructionView, MatchView, Module, Nav, StepAddr, - Trampoline, + Call, EffectOp, EffectOpcode, Entrypoint, Instruction, Match, Module, Nav, StepAddr, Trampoline, }; /// Get the nav for continue_search (always a sibling move). @@ -151,10 +150,10 @@ impl<'t> VM<'t> { tracer.trace_instruction(self.ip, &instr); let result = match instr { - InstructionView::Match(m) => self.exec_match(m, tracer), - InstructionView::Call(c) => self.exec_call(c, tracer), - InstructionView::Return(_) => self.exec_return(tracer), - InstructionView::Trampoline(t) => self.exec_trampoline(t, tracer), + Instruction::Match(m) => self.exec_match(m, tracer), + Instruction::Call(c) => self.exec_call(c, tracer), + Instruction::Return(_) => self.exec_return(tracer), + Instruction::Trampoline(t) => self.exec_trampoline(t, tracer), }; match result { @@ -165,11 +164,7 @@ impl<'t> VM<'t> { } } - fn exec_match( - &mut self, - m: MatchView<'_>, - tracer: &mut T, - ) -> Result<(), RuntimeError> { + fn exec_match(&mut self, m: Match<'_>, tracer: &mut T) -> Result<(), RuntimeError> { for effect_op in m.pre_effects() { self.emit_effect(effect_op, tracer); } @@ -190,7 +185,7 @@ impl<'t> VM<'t> { fn navigate_and_match( &mut self, - m: MatchView<'_>, + m: Match<'_>, tracer: &mut T, ) -> Result<(), RuntimeError> { let Some(policy) = self.cursor.navigate(m.nav) else { @@ -224,7 +219,7 @@ impl<'t> VM<'t> { } /// Check if current node matches type and field constraints. - fn node_matches(&self, m: MatchView<'_>, tracer: &mut T) -> bool { + fn node_matches(&self, m: Match<'_>, tracer: &mut T) -> bool { if let Some(expected) = m.node_type { if expected.get() == NAMED_WILDCARD { // Special case: `(_)` wildcard matches any named node @@ -248,7 +243,7 @@ impl<'t> VM<'t> { fn branch_to_successors( &mut self, - m: MatchView<'_>, + m: Match<'_>, tracer: &mut T, ) -> Result<(), RuntimeError> { if m.succ_count() == 0 {