From 7c38b8da88d08e5128b87978f2bc587ee700dc2e Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 5 Jan 2026 09:17:44 -0300 Subject: [PATCH] fix: implement call-site scoping for captured refs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Definitions are normalized (no Obj/EndObj wrapper) - Call sites decide scoping based on capture and return type - Universal preamble wraps every entrypoint: Obj → Trampoline → EndObj → Return - Layout starts at address 0 (preamble is first instruction) - StepId now uses u16 (0 is valid address, terminal handled by decoding logic) --- crates/plotnik-cli/src/commands/check.rs | 6 +- crates/plotnik-cli/src/commands/dump.rs | 16 +- crates/plotnik-cli/src/commands/exec.rs | 4 +- crates/plotnik-cli/src/commands/infer.rs | 14 +- .../plotnik-cli/src/commands/lang_resolver.rs | 7 +- crates/plotnik-cli/src/commands/run_common.rs | 2 +- crates/plotnik-cli/src/commands/trace.rs | 6 +- crates/plotnik-cli/src/commands/tree.rs | 11 +- .../src/analyze/type_check/infer.rs | 4 +- crates/plotnik-lib/src/bytecode/dump.rs | 47 ++-- crates/plotnik-lib/src/bytecode/format.rs | 2 +- crates/plotnik-lib/src/bytecode/ids.rs | 16 +- .../plotnik-lib/src/bytecode/instructions.rs | 57 ++++- crates/plotnik-lib/src/bytecode/ir.rs | 228 ++++++++++++++---- crates/plotnik-lib/src/bytecode/mod.rs | 8 +- crates/plotnik-lib/src/bytecode/module.rs | 12 +- .../plotnik-lib/src/bytecode/module_tests.rs | 8 +- crates/plotnik-lib/src/colors.rs | 6 +- crates/plotnik-lib/src/compile/capture.rs | 32 ++- crates/plotnik-lib/src/compile/expressions.rs | 112 ++++++--- crates/plotnik-lib/src/compile/mod.rs | 129 +++++----- crates/plotnik-lib/src/compile/navigation.rs | 31 ++- crates/plotnik-lib/src/compile/quantifier.rs | 79 ++++-- crates/plotnik-lib/src/compile/scope.rs | 96 ++++++-- crates/plotnik-lib/src/compile/sequences.rs | 32 ++- crates/plotnik-lib/src/diagnostics/message.rs | 8 +- crates/plotnik-lib/src/emit/layout.rs | 25 +- crates/plotnik-lib/src/emit/layout_tests.rs | 33 ++- crates/plotnik-lib/src/emit/mod.rs | 66 ++++- ..._codegen_tests__alternations_captured.snap | 20 +- ...n_tests__alternations_captured_tagged.snap | 28 ++- ...gen_tests__alternations_in_quantifier.snap | 62 ++--- ...__codegen_tests__alternations_labeled.snap | 26 +- ...ts__alternations_no_internal_captures.snap | 24 +- ...en_tests__alternations_null_injection.snap | 24 +- ...ternations_tagged_with_definition_ref.snap | 38 +-- ...codegen_tests__alternations_unlabeled.snap | 24 +- ...degen_tests__anchors_between_siblings.snap | 20 +- ...t__codegen_tests__anchors_first_child.snap | 18 +- ...it__codegen_tests__anchors_last_child.snap | 18 +- ...mit__codegen_tests__anchors_no_anchor.snap | 20 +- ...codegen_tests__anchors_with_anonymous.snap | 20 +- ...__emit__codegen_tests__captures_basic.snap | 16 +- ...codegen_tests__captures_deeply_nested.snap | 28 ++- ...mit__codegen_tests__captures_multiple.snap | 22 +- ...__codegen_tests__captures_nested_flat.snap | 24 +- ...sts__captures_optional_wrapper_struct.snap | 36 +-- ..._codegen_tests__captures_struct_scope.snap | 22 +- ...egen_tests__captures_with_type_custom.snap | 16 +- ...egen_tests__captures_with_type_string.snap | 16 +- ...odegen_tests__captures_wrapper_struct.snap | 50 ++-- ...tests__comprehensive_multi_definition.snap | 54 ++--- ...__codegen_tests__definitions_multiple.snap | 26 +- ...gen_tests__definitions_nested_capture.snap | 68 +++--- ..._codegen_tests__definitions_reference.snap | 36 +-- ...it__codegen_tests__definitions_single.snap | 16 +- ...it__codegen_tests__fields_alternation.snap | 30 +-- ..._emit__codegen_tests__fields_multiple.snap | 22 +- ...__emit__codegen_tests__fields_negated.snap | 20 +- ...b__emit__codegen_tests__fields_single.snap | 20 +- ..._emit__codegen_tests__nodes_anonymous.snap | 20 +- ...lib__emit__codegen_tests__nodes_error.snap | 16 +- ...b__emit__codegen_tests__nodes_missing.snap | 16 +- ...lib__emit__codegen_tests__nodes_named.snap | 16 +- ...it__codegen_tests__nodes_wildcard_any.snap | 20 +- ...__codegen_tests__nodes_wildcard_named.snap | 20 +- ...__codegen_tests__optional_first_child.snap | 36 +-- ...odegen_tests__optional_null_injection.snap | 32 +-- ..._tests__quantifiers_first_child_array.snap | 50 ++-- ...__codegen_tests__quantifiers_optional.snap | 32 +-- ...tests__quantifiers_optional_nongreedy.snap | 32 +-- ...emit__codegen_tests__quantifiers_plus.snap | 36 +-- ...gen_tests__quantifiers_plus_nongreedy.snap | 36 +-- ..._tests__quantifiers_repeat_navigation.snap | 46 ++-- ...s__quantifiers_sequence_in_called_def.snap | 70 +++--- ...emit__codegen_tests__quantifiers_star.snap | 40 +-- ...gen_tests__quantifiers_star_nongreedy.snap | 40 +-- ...degen_tests__quantifiers_struct_array.snap | 52 ++-- ...emit__codegen_tests__recursion_simple.snap | 34 +-- ...sts__recursion_with_structured_result.snap | 50 ++-- ..._emit__codegen_tests__sequences_basic.snap | 20 +- ...odegen_tests__sequences_in_quantifier.snap | 48 ++-- ...emit__codegen_tests__sequences_nested.snap | 24 +- ...odegen_tests__sequences_with_captures.snap | 22 +- crates/plotnik-lib/src/engine/engine_tests.rs | 4 +- crates/plotnik-lib/src/engine/materializer.rs | 13 +- crates/plotnik-lib/src/engine/trace.rs | 50 +++- crates/plotnik-lib/src/engine/verify.rs | 2 +- crates/plotnik-lib/src/engine/verify_tests.rs | 4 +- crates/plotnik-lib/src/engine/vm.rs | 53 +++- .../plotnik-lib/src/parser/grammar/fields.rs | 4 +- crates/plotnik-lib/src/query/printer.rs | 2 +- crates/plotnik-lib/src/typegen/typescript.rs | 39 ++- 93 files changed, 1801 insertions(+), 1139 deletions(-) diff --git a/crates/plotnik-cli/src/commands/check.rs b/crates/plotnik-cli/src/commands/check.rs index d4075e89..9d233afd 100644 --- a/crates/plotnik-cli/src/commands/check.rs +++ b/crates/plotnik-cli/src/commands/check.rs @@ -14,10 +14,8 @@ pub struct CheckArgs { } pub fn run(args: CheckArgs) { - let source_map = match load_query_source( - args.query_path.as_deref(), - args.query_text.as_deref(), - ) { + let source_map = match load_query_source(args.query_path.as_deref(), args.query_text.as_deref()) + { Ok(map) => map, Err(msg) => { eprintln!("error: {}", msg); diff --git a/crates/plotnik-cli/src/commands/dump.rs b/crates/plotnik-cli/src/commands/dump.rs index 55d65c7d..59b28d8d 100644 --- a/crates/plotnik-cli/src/commands/dump.rs +++ b/crates/plotnik-cli/src/commands/dump.rs @@ -1,8 +1,8 @@ use std::path::PathBuf; +use plotnik_lib::Colors; use plotnik_lib::QueryBuilder; use plotnik_lib::bytecode::{Module, dump}; -use plotnik_lib::Colors; use super::lang_resolver::{resolve_lang, resolve_lang_required, suggest_language}; use super::query_loader::load_query_source; @@ -15,10 +15,8 @@ pub struct DumpArgs { } pub fn run(args: DumpArgs) { - let source_map = match load_query_source( - args.query_path.as_deref(), - args.query_text.as_deref(), - ) { + let source_map = match load_query_source(args.query_path.as_deref(), args.query_text.as_deref()) + { Ok(map) => map, Err(msg) => { eprintln!("error: {}", msg); @@ -64,7 +62,9 @@ pub fn run(args: DumpArgs) { if !linked.is_valid() { eprint!( "{}", - linked.diagnostics().render_colored(linked.source_map(), args.color) + linked + .diagnostics() + .render_colored(linked.source_map(), args.color) ); std::process::exit(1); } @@ -73,7 +73,9 @@ pub fn run(args: DumpArgs) { if !query.is_valid() { eprint!( "{}", - query.diagnostics().render_colored(query.source_map(), args.color) + query + .diagnostics() + .render_colored(query.source_map(), args.color) ); std::process::exit(1); } diff --git a/crates/plotnik-cli/src/commands/exec.rs b/crates/plotnik-cli/src/commands/exec.rs index 702838bf..5c1cf85c 100644 --- a/crates/plotnik-cli/src/commands/exec.rs +++ b/crates/plotnik-cli/src/commands/exec.rs @@ -2,10 +2,10 @@ use std::path::PathBuf; +use plotnik_lib::Colors; use plotnik_lib::engine::{ - debug_verify_type, FuelLimits, Materializer, RuntimeError, ValueMaterializer, VM, + FuelLimits, Materializer, RuntimeError, VM, ValueMaterializer, debug_verify_type, }; -use plotnik_lib::Colors; use super::run_common::{self, PreparedQuery, QueryInput}; diff --git a/crates/plotnik-cli/src/commands/infer.rs b/crates/plotnik-cli/src/commands/infer.rs index 51d471e3..a0140c27 100644 --- a/crates/plotnik-cli/src/commands/infer.rs +++ b/crates/plotnik-cli/src/commands/infer.rs @@ -31,10 +31,8 @@ pub fn run(args: InferArgs) { std::process::exit(1); } - let source_map = match load_query_source( - args.query_path.as_deref(), - args.query_text.as_deref(), - ) { + let source_map = match load_query_source(args.query_path.as_deref(), args.query_text.as_deref()) + { Ok(map) => map, Err(msg) => { eprintln!("error: {}", msg); @@ -80,7 +78,9 @@ pub fn run(args: InferArgs) { if !linked.is_valid() { eprint!( "{}", - linked.diagnostics().render_colored(linked.source_map(), args.color) + linked + .diagnostics() + .render_colored(linked.source_map(), args.color) ); std::process::exit(1); } @@ -89,7 +89,9 @@ pub fn run(args: InferArgs) { if !query.is_valid() { eprint!( "{}", - query.diagnostics().render_colored(query.source_map(), args.color) + query + .diagnostics() + .render_colored(query.source_map(), args.color) ); std::process::exit(1); } diff --git a/crates/plotnik-cli/src/commands/lang_resolver.rs b/crates/plotnik-cli/src/commands/lang_resolver.rs index 59362eca..33c2ed6a 100644 --- a/crates/plotnik-cli/src/commands/lang_resolver.rs +++ b/crates/plotnik-cli/src/commands/lang_resolver.rs @@ -25,8 +25,7 @@ pub fn resolve_lang(explicit: Option<&str>, query_path: Option<&Path>) -> Option /// Resolve language, returning an error message if unknown. pub fn resolve_lang_required(lang_name: &str) -> Result { - plotnik_langs::from_name(lang_name) - .ok_or_else(|| format!("unknown language: '{}'", lang_name)) + plotnik_langs::from_name(lang_name).ok_or_else(|| format!("unknown language: '{}'", lang_name)) } /// Suggest similar language names for typos. @@ -59,9 +58,7 @@ fn levenshtein(a: &str, b: &str) -> usize { curr[0] = i; for j in 1..=n { let cost = usize::from(a_chars[i - 1] != b_chars[j - 1]); - curr[j] = (prev[j] + 1) - .min(curr[j - 1] + 1) - .min(prev[j - 1] + cost); + curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost); } std::mem::swap(&mut prev, &mut curr); } diff --git a/crates/plotnik-cli/src/commands/run_common.rs b/crates/plotnik-cli/src/commands/run_common.rs index a55dfb49..f6dee92c 100644 --- a/crates/plotnik-cli/src/commands/run_common.rs +++ b/crates/plotnik-cli/src/commands/run_common.rs @@ -6,9 +6,9 @@ use std::path::Path; use arborium_tree_sitter as tree_sitter; use plotnik_langs::Lang; +use plotnik_lib::QueryBuilder; use plotnik_lib::bytecode::{Entrypoint, Module}; use plotnik_lib::emit::emit_linked; -use plotnik_lib::QueryBuilder; use super::lang_resolver::{resolve_lang_required, suggest_language}; use super::query_loader::load_query_source; diff --git a/crates/plotnik-cli/src/commands/trace.rs b/crates/plotnik-cli/src/commands/trace.rs index 8830bb68..4c22fb42 100644 --- a/crates/plotnik-cli/src/commands/trace.rs +++ b/crates/plotnik-cli/src/commands/trace.rs @@ -2,11 +2,11 @@ use std::path::PathBuf; +use plotnik_lib::Colors; use plotnik_lib::engine::{ - debug_verify_type, FuelLimits, Materializer, PrintTracer, RuntimeError, ValueMaterializer, - Verbosity, VM, + FuelLimits, Materializer, PrintTracer, RuntimeError, VM, ValueMaterializer, Verbosity, + debug_verify_type, }; -use plotnik_lib::Colors; use super::run_common::{self, PreparedQuery, QueryInput}; diff --git a/crates/plotnik-cli/src/commands/tree.rs b/crates/plotnik-cli/src/commands/tree.rs index 34b055d5..55cd4ab5 100644 --- a/crates/plotnik-cli/src/commands/tree.rs +++ b/crates/plotnik-cli/src/commands/tree.rs @@ -27,7 +27,11 @@ pub fn run(args: TreeArgs) { } }; - let lang = resolve_lang(&args.lang, args.source_path.as_deref(), args.source_text.is_some()); + let lang = resolve_lang( + &args.lang, + args.source_path.as_deref(), + args.source_text.is_some(), + ); let tree = lang.parse(&source); print!("{}", dump_tree(&tree, &source, args.raw, args.spans)); } @@ -107,7 +111,10 @@ fn format_node_with_field( let span_suffix = if show_spans { let start = node.start_position(); let end = node.end_position(); - format!(" [{}:{}-{}:{}]", start.row, start.column, end.row, end.column) + format!( + " [{}:{}-{}:{}]", + start.row, start.column, end.row, end.column + ) } else { String::new() }; diff --git a/crates/plotnik-lib/src/analyze/type_check/infer.rs b/crates/plotnik-lib/src/analyze/type_check/infer.rs index cbe08e3a..4e423b44 100644 --- a/crates/plotnik-lib/src/analyze/type_check/infer.rs +++ b/crates/plotnik-lib/src/analyze/type_check/infer.rs @@ -512,7 +512,9 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { TypeFlow::Void => { // Scalar list: void inner -> array of Node (or Ref) let element = self.get_recursive_ref_type(inner).unwrap_or(TYPE_NODE); - let array_type = self.ctx.intern_type(TypeShape::Array { element, non_empty }); + let array_type = self + .ctx + .intern_type(TypeShape::Array { element, non_empty }); TypeFlow::Scalar(array_type) } TypeFlow::Scalar(t) => { diff --git a/crates/plotnik-lib/src/bytecode/dump.rs b/crates/plotnik-lib/src/bytecode/dump.rs index 94e5450f..1232f62c 100644 --- a/crates/plotnik-lib/src/bytecode/dump.rs +++ b/crates/plotnik-lib/src/bytecode/dump.rs @@ -7,11 +7,11 @@ use std::fmt::Write as _; use crate::colors::Colors; -use super::format::{format_effect, nav_symbol_epsilon, width_for_count, LineBuilder, Symbol}; +use super::format::{LineBuilder, Symbol, format_effect, nav_symbol_epsilon, width_for_count}; use super::ids::{QTypeId, StepId}; use super::module::{Instruction, Module}; use super::type_meta::TypeKind; -use super::{Call, Match, Return}; +use super::{Call, Match, Return, Trampoline}; /// Generate a human-readable dump of the bytecode module. pub fn dump(module: &Module, colors: Colors) -> String { @@ -37,7 +37,6 @@ fn dump_header(out: &mut String, module: &Module, ctx: &DumpContext) { out.push('\n'); } - /// Context for dump formatting, precomputes lookups for O(1) access. struct DumpContext { /// Whether the bytecode is linked (contains grammar IDs vs StringIds). @@ -74,6 +73,8 @@ impl DumpContext { let node_fields = module.node_fields(); let mut step_labels = BTreeMap::new(); + // Preamble always starts at step 0 + step_labels.insert(0, "_ObjWrap".to_string()); for i in 0..entrypoints.len() { let ep = entrypoints.get(i); let name = strings.get(ep.name).to_string(); @@ -249,10 +250,7 @@ fn dump_types_members(out: &mut String, module: &Module, ctx: &DumpContext) { writeln!( out, "M{i:0mw$}: S{:0sw$} → T{:0tw$} {}; {name}: {type_name}{}", - member.name.0, - member.type_id.0, - c.dim, - c.reset + member.name.0, member.type_id.0, c.dim, c.reset ) .unwrap(); } @@ -274,11 +272,7 @@ fn dump_types_names(out: &mut String, module: &Module, ctx: &DumpContext) { writeln!( out, "N{i:0nw$}: S{:0sw$} → T{:0tw$} {}; {}{name}{}", - entry.name.0, - entry.type_id.0, - c.dim, - c.blue, - c.reset + entry.name.0, entry.type_id.0, c.dim, c.blue, c.reset ) .unwrap(); } @@ -356,10 +350,16 @@ fn dump_code(out: &mut String, module: &Module, ctx: &DumpContext) { writeln!(out, "{}[transitions]{}", c.blue, c.reset).unwrap(); let mut step = 0u16; + let mut first_label = true; while (step as usize) < transitions_count { // Check if this step has a label (using raw u16) if let Some(label) = ctx.step_labels.get(&step) { - writeln!(out, "\n{}{label}{}:", c.blue, c.reset).unwrap(); + if first_label { + writeln!(out, "{}{label}{}:", c.blue, c.reset).unwrap(); + first_label = false; + } else { + writeln!(out, "\n{}{label}{}:", c.blue, c.reset).unwrap(); + } } let instr = module.decode_step_alloc(step); @@ -373,7 +373,6 @@ fn dump_code(out: &mut String, module: &Module, ctx: &DumpContext) { } } - fn instruction_step_count(instr: &Instruction) -> u16 { match instr { Instruction::Match(m) => { @@ -400,11 +399,10 @@ fn instruction_step_count(instr: &Instruction) -> u16 { 8 // Match64 } } - Instruction::Call(_) | Instruction::Return(_) => 1, + Instruction::Call(_) | Instruction::Return(_) | Instruction::Trampoline(_) => 1, } } - fn format_instruction( step: u16, instr: &Instruction, @@ -416,10 +414,10 @@ fn format_instruction( Instruction::Match(m) => format_match(step, m, module, ctx, step_width), Instruction::Call(c) => format_call(step, c, module, ctx, step_width), Instruction::Return(r) => format_return(step, r, module, ctx, step_width), + Instruction::Trampoline(t) => format_trampoline(step, t, ctx, step_width), } } - fn format_match( step: u16, m: &Match, @@ -571,6 +569,20 @@ fn format_return( builder.pad_successors(prefix, "▶") } +fn format_trampoline(step: u16, t: &Trampoline, _ctx: &DumpContext, step_width: usize) -> String { + let builder = LineBuilder::new(step_width); + let prefix = format!( + " {:0sw$} {} ", + step, + Symbol::EMPTY.format(), + sw = step_width + ); + let content = "Trampoline"; + let successors = format!("{:0w$}", t.next.get(), w = step_width); + let base = format!("{prefix}{content}"); + builder.pad_successors(base, &successors) +} + /// Format a step ID, showing entrypoint label or numeric ID. fn format_step(step: StepId, ctx: &DumpContext, step_width: usize) -> String { let c = &ctx.colors; @@ -580,4 +592,3 @@ fn format_step(step: StepId, ctx: &DumpContext, step_width: usize) -> String { format!("{:0w$}", step.get(), w = step_width) } } - diff --git a/crates/plotnik-lib/src/bytecode/format.rs b/crates/plotnik-lib/src/bytecode/format.rs index fc025f7e..c1802e22 100644 --- a/crates/plotnik-lib/src/bytecode/format.rs +++ b/crates/plotnik-lib/src/bytecode/format.rs @@ -6,9 +6,9 @@ //! | | pad | | (sym) | | | | | //! ``` +use super::EffectOp; use super::effects::EffectOpcode; use super::nav::Nav; -use super::EffectOp; // ============================================================================= // Column Layout diff --git a/crates/plotnik-lib/src/bytecode/ids.rs b/crates/plotnik-lib/src/bytecode/ids.rs index ce4ebc32..bd87d70d 100644 --- a/crates/plotnik-lib/src/bytecode/ids.rs +++ b/crates/plotnik-lib/src/bytecode/ids.rs @@ -6,29 +6,28 @@ use super::constants::STEP_SIZE; /// Index into the Transitions section (8-byte steps). /// -/// Uses NonZeroU16 to make StepId(0) unrepresentable - terminal state -/// is expressed through absence (empty successors, None) rather than -/// a magic value. +/// Step 0 is a valid address (preamble starts there). +/// In successor fields, raw value 0 means "terminal" — this sentinel +/// is handled by decoding logic, not by the type. #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] #[repr(transparent)] -pub struct StepId(pub NonZeroU16); +pub struct StepId(pub u16); impl StepId { - /// Create a new StepId. Panics if n == 0. #[inline] pub fn new(n: u16) -> Self { - Self(NonZeroU16::new(n).expect("StepId cannot be 0")) + Self(n) } /// Get the raw u16 value. #[inline] pub fn get(self) -> u16 { - self.0.get() + self.0 } #[inline] pub fn byte_offset(self) -> usize { - self.0.get() as usize * STEP_SIZE + self.0 as usize * STEP_SIZE } } @@ -66,6 +65,7 @@ mod tests { #[test] fn step_id_byte_offset() { + assert_eq!(StepId::new(0).byte_offset(), 0); assert_eq!(StepId::new(1).byte_offset(), 8); assert_eq!(StepId::new(10).byte_offset(), 80); } diff --git a/crates/plotnik-lib/src/bytecode/instructions.rs b/crates/plotnik-lib/src/bytecode/instructions.rs index e375f20c..1ddb7dd0 100644 --- a/crates/plotnik-lib/src/bytecode/instructions.rs +++ b/crates/plotnik-lib/src/bytecode/instructions.rs @@ -35,6 +35,7 @@ pub enum Opcode { Match64 = 0x5, Call = 0x6, Return = 0x7, + Trampoline = 0x8, } impl Opcode { @@ -48,6 +49,7 @@ impl Opcode { 0x5 => Self::Match64, 0x6 => Self::Call, 0x7 => Self::Return, + 0x8 => Self::Trampoline, _ => panic!("invalid opcode: {v}"), } } @@ -63,6 +65,7 @@ impl Opcode { Self::Match64 => 64, Self::Call => 8, Self::Return => 8, + Self::Trampoline => 8, } } @@ -172,9 +175,11 @@ impl Match { if opcode == Opcode::Match8 { // Match8: single successor in bytes 6-7 (0 = terminal) let next_raw = u16::from_le_bytes([bytes[6], bytes[7]]); - let successors = NonZeroU16::new(next_raw) - .map(|n| vec![StepId(n)]) - .unwrap_or_default(); + let successors = if next_raw == 0 { + vec![] // terminal + } else { + vec![StepId(next_raw)] + }; Self { segment, @@ -411,8 +416,8 @@ impl<'a> MatchView<'a> { ); if self.is_match8 { debug_assert!(idx == 0); - // Safe: we only call this when succ_count > 0, meaning match8_next != 0 - StepId(NonZeroU16::new(self.match8_next).unwrap()) + debug_assert!(self.match8_next != 0, "terminal has no successors"); + StepId(self.match8_next) } else { let offset = self.succ_offset() + idx * 2; StepId::new(u16::from_le_bytes([ @@ -544,6 +549,48 @@ impl Return { } } +/// Trampoline instruction for universal entry. +/// +/// Like Call, but the target comes from VM context (external parameter) +/// rather than being encoded in the instruction. Used at address 0 for +/// the entry preamble: `Obj → Trampoline → EndObj → Accept`. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub struct Trampoline { + /// Segment index (0-15). + pub segment: u8, + /// Return address (where to continue after entrypoint returns). + pub next: StepId, +} + +impl Trampoline { + /// Decode from 8-byte bytecode. + pub fn from_bytes(bytes: [u8; 8]) -> Self { + let type_id_byte = bytes[0]; + let segment = type_id_byte >> 4; + assert!( + segment == 0, + "non-zero segment not yet supported: {segment}" + ); + let opcode = Opcode::from_u8(type_id_byte & 0xF); + assert_eq!(opcode, Opcode::Trampoline, "expected Trampoline opcode"); + + Self { + segment, + next: StepId::new(u16::from_le_bytes([bytes[2], bytes[3]])), + } + } + + /// Encode to 8-byte bytecode. + pub fn to_bytes(&self) -> [u8; 8] { + let mut bytes = [0u8; 8]; + bytes[0] = (self.segment << 4) | (Opcode::Trampoline as u8); + // bytes[1] is padding + bytes[2..4].copy_from_slice(&self.next.get().to_le_bytes()); + // bytes[4..8] are reserved/padding + bytes + } +} + /// Select the smallest Match variant that fits the given payload. pub fn select_match_opcode(slots_needed: usize) -> Option { if slots_needed == 0 { diff --git a/crates/plotnik-lib/src/bytecode/ir.rs b/crates/plotnik-lib/src/bytecode/ir.rs index fc6c838f..8f7dc3df 100644 --- a/crates/plotnik-lib/src/bytecode/ir.rs +++ b/crates/plotnik-lib/src/bytecode/ir.rs @@ -9,7 +9,7 @@ use std::num::NonZeroU16; use super::effects::{EffectOp, EffectOpcode}; use super::ids::StepId; -use super::instructions::{Call, Match, Return, select_match_opcode}; +use super::instructions::{Call, Match, Return, Trampoline, select_match_opcode}; use super::nav::Nav; use crate::analyze::type_check::TypeId; @@ -27,12 +27,33 @@ impl Label { /// Symbolic reference to a struct field or enum variant. /// Resolved to absolute member index during bytecode emission. +/// +/// Struct field indices are deduplicated globally: same (name, type) pair → same index. +/// This enables call-site scoping where uncaptured refs share the caller's scope. +/// +/// Enum variant indices use the traditional (parent_type, relative_index) approach +/// since enum variants don't bubble between scopes. #[derive(Clone, Copy, Debug)] pub enum MemberRef { /// Already resolved to absolute index (for cases where it's known). Absolute(u16), - /// Deferred resolution: (struct/enum type, relative field/variant index). - Deferred { type_id: TypeId, relative_index: u16 }, + /// Deferred resolution by field identity (for struct fields). + /// The same (field_name, field_type) pair resolves to the same member index + /// regardless of which struct type contains it. + Deferred { + /// The Symbol of the field name (from query interner). + field_name: plotnik_core::Symbol, + /// The TypeId of the field's value type (from query TypeContext). + field_type: TypeId, + }, + /// Deferred resolution by parent type + relative index (for enum variants). + /// Uses the parent enum's member_base + relative_index. + DeferredByIndex { + /// The TypeId of the parent enum type. + parent_type: TypeId, + /// Relative index within the parent type's members. + relative_index: u16, + }, } impl MemberRef { @@ -41,21 +62,41 @@ impl MemberRef { Self::Absolute(index) } - /// Create a deferred reference. - pub fn deferred(type_id: TypeId, relative_index: u16) -> Self { - Self::Deferred { type_id, relative_index } + /// Create a deferred reference by field identity (for struct fields). + pub fn deferred(field_name: plotnik_core::Symbol, field_type: TypeId) -> Self { + Self::Deferred { + field_name, + field_type, + } + } + + /// Create a deferred reference by parent type + index (for enum variants). + pub fn deferred_by_index(parent_type: TypeId, relative_index: u16) -> Self { + Self::DeferredByIndex { + parent_type, + relative_index, + } } - /// Resolve this reference using a member base lookup function. - pub fn resolve(self, get_member_base: F) -> u16 + /// Resolve this reference using lookup functions. + /// + /// - `lookup_member`: maps (field_name Symbol, field_type TypeId) to member index + /// - `get_member_base`: maps parent TypeId to member base index + pub fn resolve(self, lookup_member: F, get_member_base: G) -> u16 where - F: Fn(TypeId) -> Option, + F: Fn(plotnik_core::Symbol, TypeId) -> Option, + G: Fn(TypeId) -> Option, { match self { Self::Absolute(n) => n, - Self::Deferred { type_id, relative_index } => { - get_member_base(type_id).unwrap_or(0) + relative_index - } + Self::Deferred { + field_name, + field_type, + } => lookup_member(field_name, field_type).unwrap_or(0), + Self::DeferredByIndex { + parent_type, + relative_index, + } => get_member_base(parent_type).unwrap_or(0) + relative_index, } } } @@ -74,25 +115,40 @@ pub struct EffectIR { impl EffectIR { /// Create a simple effect without member reference. pub fn simple(opcode: EffectOpcode, payload: usize) -> Self { - Self { opcode, payload, member_ref: None } + Self { + opcode, + payload, + member_ref: None, + } } /// Create an effect with a member reference. pub fn with_member(opcode: EffectOpcode, member_ref: MemberRef) -> Self { - Self { opcode, payload: 0, member_ref: Some(member_ref) } + Self { + opcode, + payload: 0, + member_ref: Some(member_ref), + } } /// Resolve this IR effect to a concrete EffectOp. - pub fn resolve(&self, get_member_base: F) -> EffectOp + /// + /// - `lookup_member`: maps (field_name Symbol, field_type TypeId) to member index + /// - `get_member_base`: maps parent TypeId to member base index + pub fn resolve(&self, lookup_member: F, get_member_base: G) -> EffectOp where - F: Fn(TypeId) -> Option, + F: Fn(plotnik_core::Symbol, TypeId) -> Option, + G: Fn(TypeId) -> Option, { let payload = if let Some(member_ref) = self.member_ref { - member_ref.resolve(&get_member_base) as usize + member_ref.resolve(&lookup_member, &get_member_base) as usize } else { self.payload }; - EffectOp { opcode: self.opcode, payload } + EffectOp { + opcode: self.opcode, + payload, + } } } @@ -102,6 +158,7 @@ pub enum Instruction { Match(MatchIR), Call(CallIR), Return(ReturnIR), + Trampoline(TrampolineIR), } impl Instruction { @@ -112,6 +169,7 @@ impl Instruction { Self::Match(m) => m.label, Self::Call(c) => c.label, Self::Return(r) => r.label, + Self::Trampoline(t) => t.label, } } @@ -119,7 +177,7 @@ impl Instruction { pub fn size(&self) -> usize { match self { Self::Match(m) => m.size(), - Self::Call(_) | Self::Return(_) => 8, + Self::Call(_) | Self::Return(_) | Self::Trampoline(_) => 8, } } @@ -129,18 +187,29 @@ impl Instruction { Self::Match(m) => m.successors.clone(), Self::Call(c) => vec![c.next], Self::Return(_) => vec![], + Self::Trampoline(t) => vec![t.next], } } /// Resolve labels and serialize to bytecode bytes. - pub fn resolve(&self, map: &BTreeMap, get_member_base: F) -> Vec + /// + /// - `lookup_member`: maps (field_name Symbol, field_type TypeId) to member index + /// - `get_member_base`: maps parent TypeId to member base index + pub fn resolve( + &self, + map: &BTreeMap, + lookup_member: F, + get_member_base: G, + ) -> Vec where - F: Fn(TypeId) -> Option, + F: Fn(plotnik_core::Symbol, TypeId) -> Option, + G: Fn(TypeId) -> Option, { match self { - Self::Match(m) => m.resolve(map, get_member_base), + Self::Match(m) => m.resolve(map, lookup_member, get_member_base), Self::Call(c) => c.resolve(map).to_vec(), Self::Return(r) => r.resolve().to_vec(), + Self::Trampoline(t) => t.resolve(map).to_vec(), } } } @@ -185,15 +254,22 @@ impl MatchIR { + self.post_effects.len() + self.successors.len(); - select_match_opcode(slots) - .map(|op| op.size()) - .unwrap_or(64) + select_match_opcode(slots).map(|op| op.size()).unwrap_or(64) } /// Resolve labels and serialize to bytecode bytes. - pub fn resolve(&self, map: &BTreeMap, get_member_base: F) -> Vec + /// + /// - `lookup_member`: maps (field_name Symbol, field_type TypeId) to member index + /// - `get_member_base`: maps parent TypeId to member base index + pub fn resolve( + &self, + map: &BTreeMap, + lookup_member: F, + get_member_base: G, + ) -> Vec where - F: Fn(TypeId) -> Option, + F: Fn(plotnik_core::Symbol, TypeId) -> Option, + G: Fn(TypeId) -> Option, { let successors: Vec = self.successors.iter().map(|&l| l.resolve(map)).collect(); @@ -201,12 +277,12 @@ impl MatchIR { let pre_effects: Vec = self .pre_effects .iter() - .map(|e| e.resolve(&get_member_base)) + .map(|e| e.resolve(&lookup_member, &get_member_base)) .collect(); let post_effects: Vec = self .post_effects .iter() - .map(|e| e.resolve(&get_member_base)) + .map(|e| e.resolve(&lookup_member, &get_member_base)) .collect(); let m = Match { @@ -274,6 +350,29 @@ impl ReturnIR { } } +/// Trampoline instruction IR with symbolic return address. +/// +/// Trampoline is like Call, but the target comes from VM context (external parameter) +/// rather than being encoded in the instruction. Used for universal entry preamble. +#[derive(Clone, Debug)] +pub struct TrampolineIR { + /// Where this instruction lives. + pub label: Label, + /// Return address (where to continue after entrypoint returns). + pub next: Label, +} + +impl TrampolineIR { + /// Resolve labels and serialize to bytecode bytes. + pub fn resolve(&self, map: &BTreeMap) -> [u8; 8] { + let t = Trampoline { + segment: 0, + next: self.next.resolve(map), + }; + t.to_bytes() + } +} + /// Result of layout: maps labels to step IDs. #[derive(Clone, Debug)] pub struct LayoutResult { @@ -345,9 +444,7 @@ mod tests { assert_eq!(c.successors(), vec![Label(4)]); - let r = Instruction::Return(ReturnIR { - label: Label(6), - }); + let r = Instruction::Return(ReturnIR { label: Label(6) }); assert!(r.successors().is_empty()); } @@ -369,7 +466,7 @@ mod tests { let mut map = BTreeMap::new(); map.insert(Label(0), StepId::new(1)); - let bytes = m.resolve(&map, |_| None); + let bytes = m.resolve(&map, |_, _| None, |_| None); assert_eq!(bytes.len(), 8); // Verify opcode is Match8 (0x0) @@ -380,33 +477,76 @@ mod tests { #[test] fn member_ref_resolution() { + use plotnik_core::Symbol; + + // Create test symbols (these are just integer handles) + let field_name = Symbol::from_raw(100); + let field_type = TypeId(10); + let parent_type = TypeId(20); + // Test absolute reference let abs = MemberRef::absolute(42); - assert_eq!(abs.resolve(|_| None), 42); - - // Test deferred reference with base lookup - let deferred = MemberRef::deferred(TypeId(10), 2); - assert_eq!(deferred.resolve(|id| if id.0 == 10 { Some(100) } else { None }), 102); + assert_eq!(abs.resolve(|_, _| None, |_| None), 42); + + // Test deferred reference with lookup (struct field) + let deferred = MemberRef::deferred(field_name, field_type); + assert_eq!( + deferred.resolve( + |name, ty| { + if name == field_name && ty == field_type { + Some(77) + } else { + None + } + }, + |_| None + ), + 77 + ); - // Test deferred reference with no base (defaults to 0) - assert_eq!(deferred.resolve(|_| None), 2); + // Test deferred reference with no match (defaults to 0) + assert_eq!(deferred.resolve(|_, _| None, |_| None), 0); + + // Test deferred by index reference (enum variant) + let by_index = MemberRef::deferred_by_index(parent_type, 3); + assert_eq!( + by_index.resolve( + |_, _| None, + |ty| if ty == parent_type { Some(50) } else { None } + ), + 53 // base 50 + relative 3 + ); } #[test] fn effect_ir_resolution() { + use plotnik_core::Symbol; + + let field_name = Symbol::from_raw(200); + let field_type = TypeId(10); + // Simple effect without member ref let simple = EffectIR::simple(EffectOpcode::Node, 5); - let resolved = simple.resolve(|_| None); + let resolved = simple.resolve(|_, _| None, |_| None); assert_eq!(resolved.opcode, EffectOpcode::Node); assert_eq!(resolved.payload, 5); // Effect with deferred member ref let set_effect = EffectIR::with_member( EffectOpcode::Set, - MemberRef::deferred(TypeId(10), 1), + MemberRef::deferred(field_name, field_type), + ); + let resolved = set_effect.resolve( + |name, ty| { + if name == field_name && ty == field_type { + Some(51) + } else { + None + } + }, + |_| None, ); - let resolved = set_effect.resolve(|id| if id.0 == 10 { Some(50) } else { None }); assert_eq!(resolved.opcode, EffectOpcode::Set); - assert_eq!(resolved.payload, 51); // base 50 + relative 1 + assert_eq!(resolved.payload, 51); } } diff --git a/crates/plotnik-lib/src/bytecode/mod.rs b/crates/plotnik-lib/src/bytecode/mod.rs index 8c52a27e..049acbf0 100644 --- a/crates/plotnik-lib/src/bytecode/mod.rs +++ b/crates/plotnik-lib/src/bytecode/mod.rs @@ -20,7 +20,7 @@ pub use constants::{MAGIC, SECTION_ALIGN, STEP_SIZE, VERSION}; pub use ids::{QTypeId, StepId, StringId}; -pub use header::{flags, Header}; +pub use header::{Header, flags}; pub use sections::{FieldSymbol, NodeSymbol, Slice, TriviaEntry}; @@ -33,7 +33,7 @@ pub use nav::Nav; pub use effects::{EffectOp, EffectOpcode}; pub use instructions::{ - Call, Match, MatchView, Opcode, Return, align_to_section, select_match_opcode, + Call, Match, MatchView, Opcode, Return, Trampoline, align_to_section, select_match_opcode, }; pub use module::{ @@ -44,8 +44,8 @@ pub use module::{ pub use dump::dump; pub use format::{ - cols, format_effect, nav_symbol, nav_symbol_epsilon, superscript, trace, truncate_text, - width_for_count, LineBuilder, Symbol, + LineBuilder, Symbol, cols, format_effect, nav_symbol, nav_symbol_epsilon, superscript, trace, + truncate_text, width_for_count, }; #[cfg(test)] diff --git a/crates/plotnik-lib/src/bytecode/module.rs b/crates/plotnik-lib/src/bytecode/module.rs index 12c946d5..b9535588 100644 --- a/crates/plotnik-lib/src/bytecode/module.rs +++ b/crates/plotnik-lib/src/bytecode/module.rs @@ -9,7 +9,7 @@ use std::path::Path; use super::header::Header; use super::ids::{QTypeId, StepId, StringId}; -use super::instructions::{Call, Match, MatchView, Opcode, Return}; +use super::instructions::{Call, Match, MatchView, Opcode, Return, Trampoline}; use super::sections::{FieldSymbol, NodeSymbol, TriviaEntry}; use super::type_meta::{TypeDef, TypeMember, TypeMetaHeader, TypeName}; use super::{Entrypoint, SECTION_ALIGN, STEP_SIZE, VERSION}; @@ -62,6 +62,7 @@ pub enum Instruction { Match(Match), Call(Call), Return(Return), + Trampoline(Trampoline), } impl Instruction { @@ -81,6 +82,10 @@ impl Instruction { let arr: [u8; 8] = bytes[..8].try_into().unwrap(); Self::Return(Return::from_bytes(arr)) } + Opcode::Trampoline => { + let arr: [u8; 8] = bytes[..8].try_into().unwrap(); + Self::Trampoline(Trampoline::from_bytes(arr)) + } _ => Self::Match(Match::from_bytes(bytes)), } } @@ -94,6 +99,7 @@ pub enum InstructionView<'a> { Match(MatchView<'a>), Call(Call), Return(Return), + Trampoline(Trampoline), } impl<'a> InstructionView<'a> { @@ -114,6 +120,10 @@ impl<'a> InstructionView<'a> { let arr: [u8; 8] = bytes[..8].try_into().unwrap(); Self::Return(Return::from_bytes(arr)) } + Opcode::Trampoline => { + let arr: [u8; 8] = bytes[..8].try_into().unwrap(); + Self::Trampoline(Trampoline::from_bytes(arr)) + } _ => Self::Match(MatchView::from_bytes(bytes)), } } diff --git a/crates/plotnik-lib/src/bytecode/module_tests.rs b/crates/plotnik-lib/src/bytecode/module_tests.rs index 08262ed6..f08f4754 100644 --- a/crates/plotnik-lib/src/bytecode/module_tests.rs +++ b/crates/plotnik-lib/src/bytecode/module_tests.rs @@ -161,8 +161,8 @@ fn module_decode_step() { let bytes = Query::expect_valid_linked_bytes(input); let module = Module::from_bytes(bytes).unwrap(); - // Step 1 is the first instruction (step 0 is reserved) - let instr = module.decode_step_alloc(1); + // Step 0 is the preamble entry (Obj instruction) + let instr = module.decode_step_alloc(0); assert!(matches!(instr, crate::bytecode::Instruction::Match(_))); } @@ -184,8 +184,8 @@ fn module_from_path_mmap() { assert!(module.header().validate_magic()); - // Verify we can decode instructions (step 1 is the first instruction) - let instr = module.decode_step_alloc(1); + // Verify we can decode instructions (step 0 is the preamble entry) + let instr = module.decode_step_alloc(0); assert!(matches!(instr, crate::bytecode::Instruction::Match(_))); // Verify string lookup works through mmap diff --git a/crates/plotnik-lib/src/colors.rs b/crates/plotnik-lib/src/colors.rs index 644f43e3..11b6a103 100644 --- a/crates/plotnik-lib/src/colors.rs +++ b/crates/plotnik-lib/src/colors.rs @@ -43,11 +43,7 @@ impl Colors { /// Create colors based on enabled flag. pub fn new(enabled: bool) -> Self { - if enabled { - Self::ON - } else { - Self::OFF - } + if enabled { Self::ON } else { Self::OFF } } /// Check if colors are enabled. diff --git a/crates/plotnik-lib/src/compile/capture.rs b/crates/plotnik-lib/src/compile/capture.rs index 3eeafd9e..3e8b9d07 100644 --- a/crates/plotnik-lib/src/compile/capture.rs +++ b/crates/plotnik-lib/src/compile/capture.rs @@ -5,13 +5,13 @@ use std::collections::HashSet; -use crate::bytecode::ir::EffectIR; +use crate::analyze::type_check::{TypeContext, TypeId, TypeShape}; use crate::bytecode::EffectOpcode; +use crate::bytecode::ir::EffectIR; use crate::parser::ast::{self, Expr}; -use crate::analyze::type_check::{TypeContext, TypeId, TypeShape}; -use super::navigation::{inner_creates_scope, is_star_or_plus_quantifier}; use super::Compiler; +use super::navigation::{inner_creates_scope, is_star_or_plus_quantifier}; /// Capture effects to attach to the innermost match instruction. /// @@ -55,10 +55,14 @@ impl Compiler<'_> { }); if !is_structured_ref && !creates_structured_scope && !is_array { - let is_text = cap.type_annotation().is_some_and(|t| { - t.name().is_some_and(|n| n.text() == "string") - }); - let opcode = if is_text { EffectOpcode::Text } else { EffectOpcode::Node }; + let is_text = cap + .type_annotation() + .is_some_and(|t| t.name().is_some_and(|n| n.text() == "string")); + let opcode = if is_text { + EffectOpcode::Text + } else { + EffectOpcode::Node + }; effects.push(EffectIR::simple(opcode, 0)); } @@ -122,7 +126,10 @@ impl Compiler<'_> { .and_then(|def_id| self.type_ctx.get_def_type(def_id)) .and_then(|def_type| self.type_ctx.get_type(def_type)) .is_some_and(|shape| { - matches!(shape, TypeShape::Struct(_) | TypeShape::Enum(_) | TypeShape::Array { .. }) + matches!( + shape, + TypeShape::Struct(_) | TypeShape::Enum(_) | TypeShape::Array { .. } + ) }) } @@ -148,10 +155,7 @@ impl Compiler<'_> { /// /// Returns true when inner is a scope-creating expression (sequence/alternation) /// that produces an untagged struct (not an enum). Enums use Enum/EndEnum instead. -pub fn check_needs_struct_wrapper( - inner: &Expr, - type_ctx: &TypeContext, -) -> bool { +pub fn check_needs_struct_wrapper(inner: &Expr, type_ctx: &TypeContext) -> bool { let inner_info = type_ctx.get_term_info(inner); let inner_creates_scope = inner_creates_scope(inner); let inner_is_untagged_bubble = inner_info.as_ref().is_some_and(|info| { @@ -171,5 +175,7 @@ pub fn check_needs_struct_wrapper( /// Get row type ID for array element scoping. pub fn get_row_type_id(inner: &Expr, type_ctx: &TypeContext) -> Option { - type_ctx.get_term_info(inner).and_then(|info| info.flow.type_id()) + type_ctx + .get_term_info(inner) + .and_then(|info| info.flow.type_id()) } diff --git a/crates/plotnik-lib/src/compile/expressions.rs b/crates/plotnik-lib/src/compile/expressions.rs index c140a1f3..b2342380 100644 --- a/crates/plotnik-lib/src/compile/expressions.rs +++ b/crates/plotnik-lib/src/compile/expressions.rs @@ -9,13 +9,16 @@ use std::num::NonZeroU16; -use crate::bytecode::ir::{CallIR, EffectIR, Instruction, Label, MatchIR}; +use crate::analyze::type_check::TypeShape; +use crate::bytecode::ir::{EffectIR, Instruction, Label, MatchIR}; use crate::bytecode::{EffectOpcode, Nav}; use crate::parser::ast::{self, Expr}; -use super::capture::CaptureEffects; -use super::navigation::{check_trailing_anchor, inner_creates_scope, is_skippable_quantifier, is_star_or_plus_quantifier}; use super::Compiler; +use super::capture::CaptureEffects; +use super::navigation::{ + check_trailing_anchor, inner_creates_scope, is_skippable_quantifier, is_star_or_plus_quantifier, +}; impl Compiler<'_> { /// Compile a named node with capture effects. @@ -75,8 +78,14 @@ impl Compiler<'_> { // If first item is skippable: skip path → exit (bypass Up), match path → Up → exit let up_label = self.fresh_label(); let skip_exit = first_is_skippable.then_some(exit); - let items_entry = - self.compile_seq_items_inner(&items, up_label, true, None, CaptureEffects::default(), skip_exit); + let items_entry = self.compile_seq_items_inner( + &items, + up_label, + true, + None, + CaptureEffects::default(), + skip_exit, + ); self.instructions.push(Instruction::Match(MatchIR { label: up_label, @@ -137,8 +146,12 @@ impl Compiler<'_> { /// Compile a reference with capture effects. /// - /// For Call instructions, capture effects are placed in an epsilon after the call, - /// since the call returns a structured value that the effects operate on. + /// Call-site scoping: the caller decides whether to wrap with Obj/EndObj based on + /// whether the ref is captured and the called definition returns a struct. + /// + /// - Captured ref returning struct: `Obj → Call → EndObj → Set → exit` + /// - Captured ref returning scalar: `Call → Set → exit` + /// - Uncaptured ref: `Call → exit` (def's Sets go to parent scope) pub(super) fn compile_ref_inner( &mut self, r: &ast::Ref, @@ -160,25 +173,34 @@ impl Compiler<'_> { return exit; }; - // Determine return address: if capture effects present, emit epsilon for them - let return_addr = if capture.post.is_empty() { - exit - } else { - self.emit_effects_epsilon(exit, capture.post, CaptureEffects::default()) - }; + // Check if the called definition returns a struct (needs scope isolation when captured) + let def_type_id = self.type_ctx.get_def_type(def_id); + let ref_returns_struct = def_type_id + .and_then(|tid| self.type_ctx.get_type(tid)) + .is_some_and(|shape| matches!(shape, TypeShape::Struct(_))); - // Emit Call instruction with caller-provided navigation and field constraint. - // Return is emitted at definition exit, not here. - let call_label = self.fresh_label(); - self.instructions.push(Instruction::Call(CallIR { - label: call_label, - nav: nav_override.unwrap_or(Nav::Stay), - node_field: field_override, - next: return_addr, - target, - })); + // Determine if this is a captured ref that needs scope isolation + let is_captured = !capture.post.is_empty(); + let needs_scope = is_captured && ref_returns_struct; - call_label + let nav = nav_override.unwrap_or(Nav::Stay); + + if needs_scope { + // Captured ref returning struct: Obj → Call → EndObj → Set → exit + // The Obj creates an isolated scope for the definition's internal captures. + let set_step = self.emit_effects_epsilon(exit, capture.post, CaptureEffects::default()); + let endobj_step = self.emit_endobj_step(set_step); + let call_label = self.emit_call(nav, field_override, endobj_step, target); + self.emit_obj_step(call_label) + } else if is_captured { + // Captured ref returning scalar: Call → Set → exit + let return_addr = + self.emit_effects_epsilon(exit, capture.post, CaptureEffects::default()); + self.emit_call(nav, field_override, return_addr, target) + } else { + // Uncaptured ref: just Call → exit (def's Sets go to parent scope) + self.emit_call(nav, field_override, exit, target) + } } /// Compile a field constraint with capture effects (passed to inner pattern). @@ -236,7 +258,10 @@ impl Compiler<'_> { // If we have a field constraint, try to merge it into the value's instruction if let Some(field_id) = node_field { // Try to find and merge with the instruction we just created - if let Some(instr) = self.instructions.iter_mut().find(|i| i.label() == value_entry) + if let Some(instr) = self + .instructions + .iter_mut() + .find(|i| i.label() == value_entry) && let Instruction::Match(m) = instr && m.node_field.is_none() { @@ -285,7 +310,9 @@ impl Compiler<'_> { let inner = cap.inner(); let inner_info = inner.as_ref().and_then(|i| self.type_ctx.get_term_info(i)); - let inner_is_bubble = inner_info.as_ref().is_some_and(|info| info.flow.is_bubble()); + let inner_is_bubble = inner_info + .as_ref() + .is_some_and(|info| info.flow.is_bubble()); let inner_creates_scope = inner.as_ref().is_some_and(inner_creates_scope); // Scope type for inner compilation: @@ -311,10 +338,24 @@ impl Compiler<'_> { if inner_is_bubble { return if inner_creates_scope { // Sequence/alternation: capture effects after EndObj (value is the struct) - self.compile_struct_scope(&inner, exit, nav_override, scope_type_id, capture_effects, outer_capture) + self.compile_struct_scope( + &inner, + exit, + nav_override, + scope_type_id, + capture_effects, + outer_capture, + ) } else { // Node with bubbles: scope wrapper for inner captures, but capture on inner match - self.compile_bubble_with_node_capture(&inner, exit, nav_override, scope_type_id, capture_effects, outer_capture) + self.compile_bubble_with_node_capture( + &inner, + exit, + nav_override, + scope_type_id, + capture_effects, + outer_capture, + ) }; } @@ -323,13 +364,24 @@ impl Compiler<'_> { let inner_is_array = is_star_or_plus_quantifier(Some(&inner)); if inner_is_array { - return self.compile_array_scope(&inner, exit, nav_override, capture_effects, outer_capture); + return self.compile_array_scope( + &inner, + exit, + nav_override, + capture_effects, + outer_capture, + ); } // Scalar: capture effects go directly on the match instruction let mut combined = capture_effects; combined.extend(outer_capture.post); - self.compile_expr_inner(&inner, exit, nav_override, CaptureEffects { post: combined }) + self.compile_expr_inner( + &inner, + exit, + nav_override, + CaptureEffects { post: combined }, + ) } /// Compile a suppressive capture (@_ or @_name). diff --git a/crates/plotnik-lib/src/compile/mod.rs b/crates/plotnik-lib/src/compile/mod.rs index 2f648838..b1733f3c 100644 --- a/crates/plotnik-lib/src/compile/mod.rs +++ b/crates/plotnik-lib/src/compile/mod.rs @@ -24,13 +24,13 @@ mod sequences; use indexmap::IndexMap; use plotnik_core::{Interner, NodeFieldId, NodeTypeId, Symbol}; -use crate::bytecode::ir::{EffectIR, Instruction, Label, MatchIR, ReturnIR}; -use crate::bytecode::{EffectOpcode, Nav}; +use crate::bytecode::Nav; +use crate::bytecode::ir::{Instruction, Label, ReturnIR, TrampolineIR}; use crate::parser::ast::Expr; -use crate::emit::StringTableBuilder; use crate::analyze::symbol_table::SymbolTable; -use crate::analyze::type_check::{DefId, TypeContext, TypeShape}; +use crate::analyze::type_check::{DefId, TypeContext}; +use crate::emit::StringTableBuilder; pub use capture::CaptureEffects; use scope::StructScope; @@ -62,6 +62,9 @@ pub struct CompileResult { pub instructions: Vec, /// Entry labels for each definition (in definition order). pub def_entries: IndexMap, + /// Entry label for the universal preamble. + /// The preamble wraps any entrypoint: Obj → Trampoline → EndObj → Return + pub preamble_entry: Label, } /// Compiler state for Thompson construction. @@ -113,7 +116,18 @@ impl<'a> Compiler<'a> { node_type_ids: Option<&'a IndexMap>, node_field_ids: Option<&'a IndexMap>, ) -> Result { - let mut compiler = Self::new(interner, type_ctx, symbol_table, strings, node_type_ids, node_field_ids); + let mut compiler = Self::new( + interner, + type_ctx, + symbol_table, + strings, + node_type_ids, + node_field_ids, + ); + + // Emit universal preamble first: Obj → Trampoline → EndObj → Return + // This wraps any entrypoint to create the top-level scope. + let preamble_entry = compiler.emit_preamble(); // Pre-allocate entry labels for all definitions for (def_id, _) in type_ctx.iter_def_types() { @@ -129,9 +143,34 @@ impl<'a> Compiler<'a> { Ok(CompileResult { instructions: compiler.instructions, def_entries: compiler.def_entries, + preamble_entry, }) } + /// Emit the universal preamble: Obj → Trampoline → EndObj → Return + /// + /// The preamble creates a scope for the entrypoint's captures. + /// The Trampoline instruction jumps to the actual entrypoint (set via VM context). + fn emit_preamble(&mut self) -> Label { + // Return (stack is empty after preamble, so this means Accept) + let return_label = self.fresh_label(); + self.instructions.push(Instruction::Return(ReturnIR { + label: return_label, + })); + + // Chain: Obj → Trampoline → EndObj → Return + let endobj_label = self.emit_endobj_step(return_label); + + let trampoline_label = self.fresh_label(); + self.instructions + .push(Instruction::Trampoline(TrampolineIR { + label: trampoline_label, + next: endobj_label, + })); + + self.emit_obj_step(trampoline_label) + } + /// Generate a fresh label. pub(super) fn fresh_label(&mut self) -> Label { let l = Label(self.next_label_id); @@ -154,24 +193,9 @@ impl<'a> Compiler<'a> { // When stack is empty, Return means Accept (top-level match completed). // When stack is non-empty, Return pops frame and jumps to return address. let return_label = self.fresh_label(); - self.instructions - .push(Instruction::Return(ReturnIR { label: return_label })); - - // Check if definition needs Obj/EndObj wrapper. - // A definition needs its own scope when: - // 1. It returns a struct type, AND - // 2. It has direct captures (CapturedExpr not inside a Ref) - // - // When captures come only from Refs (called definitions), those definitions - // already handle their own Obj/EndObj scopes. Adding another wrapper would - // create nested scopes where the inner result gets lost. - let def_returns_struct = self - .type_ctx - .get_def_type(def_id) - .and_then(|tid| self.type_ctx.get_type(tid)) - .is_some_and(|shape| matches!(shape, TypeShape::Struct(_))); - let has_direct_captures = !Self::collect_captures(body).is_empty(); - let needs_obj_wrapper = def_returns_struct && has_direct_captures; + self.instructions.push(Instruction::Return(ReturnIR { + label: return_label, + })); // Definition bodies use StayExact navigation: match at current position only. // The caller (alternation, sequence, quantifier, or VM top-level) owns the search. @@ -179,43 +203,13 @@ impl<'a> Compiler<'a> { // alternation branches should try. let body_nav = Some(Nav::StayExact); - let body_entry = if needs_obj_wrapper { - let type_id = self.type_ctx.get_def_type(def_id).expect("checked above"); - - // Emit EndObj → Return - let endobj_label = self.fresh_label(); - self.instructions.push(Instruction::Match(MatchIR { - label: endobj_label, - nav: Nav::Stay, - node_type: None, - node_field: None, - pre_effects: vec![], - neg_fields: vec![], - post_effects: vec![EffectIR::simple(EffectOpcode::EndObj, 0)], - successors: vec![return_label], - })); - - // Compile body with scope, targeting EndObj - let inner_entry = self.with_scope(type_id, |this| { - this.compile_expr_with_nav(body, endobj_label, body_nav) - }); - - // Emit Obj → inner_entry - let obj_label = self.fresh_label(); - self.instructions.push(Instruction::Match(MatchIR { - label: obj_label, - nav: Nav::Stay, - node_type: None, - node_field: None, - pre_effects: vec![EffectIR::simple(EffectOpcode::Obj, 0)], - neg_fields: vec![], - post_effects: vec![], - successors: vec![inner_entry], - })); - - obj_label - } else if let Some(type_id) = self.type_ctx.get_def_type(def_id) { - self.with_scope(type_id, |this| this.compile_expr_with_nav(body, return_label, body_nav)) + // Definitions are compiled in normalized form: body → Return + // No Obj/EndObj wrapper - that's the caller's responsibility (call-site scoping). + // We still use with_scope for member index lookup during compilation. + let body_entry = if let Some(type_id) = self.type_ctx.get_def_type(def_id) { + self.with_scope(type_id, |this| { + this.compile_expr_with_nav(body, return_label, body_nav) + }) } else { self.compile_expr_with_nav(body, return_label, body_nav) }; @@ -229,7 +223,12 @@ impl<'a> Compiler<'a> { } /// Compile an expression with an optional navigation override. - pub(super) fn compile_expr_with_nav(&mut self, expr: &Expr, exit: Label, nav_override: Option