From 7adc3f37dd9c43dfdd16aebb84e18a0ac27e8973 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 5 Jan 2026 22:28:20 -0300 Subject: [PATCH] refactor: merge alternation wrapper effects into body instructions --- crates/plotnik-lib/src/compile/capture.rs | 21 ++- crates/plotnik-lib/src/compile/expressions.rs | 23 ++- crates/plotnik-lib/src/compile/quantifier.rs | 2 + crates/plotnik-lib/src/compile/scope.rs | 63 ++++--- crates/plotnik-lib/src/compile/sequences.rs | 164 ++++++++---------- ...n_tests__alternations_captured_tagged.snap | 10 +- ...gen_tests__alternations_in_quantifier.snap | 30 ++-- ...__codegen_tests__alternations_labeled.snap | 10 +- ...en_tests__alternations_null_injection.snap | 8 +- ...ternations_tagged_in_field_constraint.snap | 12 +- ...ternations_tagged_with_definition_ref.snap | 12 +- ...codegen_tests__alternations_unlabeled.snap | 8 +- ...s__captures_enum_with_type_annotation.snap | 10 +- ...tests__comprehensive_multi_definition.snap | 10 +- ..._codegen_tests__definitions_reference.snap | 8 +- ...it__codegen_tests__fields_alternation.snap | 10 +- ...emit__codegen_tests__recursion_simple.snap | 16 +- ...sts__recursion_with_structured_result.snap | 16 +- ..._engine_tests__recursion_member_chain.snap | 60 +++---- ...on_call_searches_for_field_constraint.snap | 52 +++--- 20 files changed, 246 insertions(+), 299 deletions(-) diff --git a/crates/plotnik-lib/src/compile/capture.rs b/crates/plotnik-lib/src/compile/capture.rs index 507df877..0fc7c943 100644 --- a/crates/plotnik-lib/src/compile/capture.rs +++ b/crates/plotnik-lib/src/compile/capture.rs @@ -13,15 +13,26 @@ use crate::parser::ast::{self, Expr}; use super::Compiler; use super::navigation::{inner_creates_scope, is_star_or_plus_quantifier, is_truly_empty_scope}; -/// Capture effects to attach to the innermost match instruction. +/// Capture effects to attach to match instructions. /// -/// Instead of emitting a separate epsilon transition for capture effects, +/// Instead of emitting separate epsilon transitions for wrapper effects, /// these effects are propagated through the compilation chain and attached -/// directly to the match instruction that captures the node. +/// directly to match instructions. +/// +/// For sequences `{a b c}`: +/// - `pre` effects go on the first item (entry) +/// - `post` effects go on the last item (exit) +/// +/// For tagged alternations `[A: body]`: +/// - `pre` contains `Enum(variant)` for branch entry +/// - `post` contains `EndEnum` for branch exit #[derive(Clone, Default)] pub struct CaptureEffects { - /// Effects to place as post_effects on the matching instruction. - /// Typically: [Node/Text, Set(member)] or [Node/Text, Push] + /// Effects to place as pre_effects on the entry instruction. + /// Used for: Enum(variant) in tagged alternations. + pub pre: Vec, + /// Effects to place as post_effects on the exit instruction. + /// Typically: [Node/Text, Set(member)], [Push], or [EndEnum]. pub post: Vec, } diff --git a/crates/plotnik-lib/src/compile/expressions.rs b/crates/plotnik-lib/src/compile/expressions.rs index a7f5b1d4..97fba557 100644 --- a/crates/plotnik-lib/src/compile/expressions.rs +++ b/crates/plotnik-lib/src/compile/expressions.rs @@ -46,7 +46,7 @@ impl Compiler<'_> { nav, node_type, node_field: None, - pre_effects: vec![], + pre_effects: capture.pre, neg_fields, post_effects: capture.post, successors: vec![exit], @@ -115,7 +115,7 @@ impl Compiler<'_> { nav, node_type, node_field: None, - pre_effects: vec![], + pre_effects: capture.pre, neg_fields, post_effects: capture.post, successors: vec![items_entry], @@ -199,7 +199,7 @@ impl Compiler<'_> { nav, node_type, node_field: None, - pre_effects: vec![], + pre_effects: capture.pre, neg_fields, post_effects: capture.post, successors: vec![down_wildcard], @@ -230,7 +230,7 @@ impl Compiler<'_> { nav, node_type, node_field: None, - pre_effects: vec![], + pre_effects: capture.pre, neg_fields: vec![], post_effects: capture.post, successors: vec![exit], @@ -280,7 +280,8 @@ impl Compiler<'_> { let nav = nav_override.unwrap_or(Nav::Stay); - if needs_scope { + // Call instructions don't have pre_effects, so emit epsilon if needed + let call_entry = if needs_scope { // Captured ref returning struct: Obj → Call → EndObj → Set → exit // The Obj creates an isolated scope for the definition's internal captures. let set_step = self.emit_effects_epsilon(exit, capture.post, CaptureEffects::default()); @@ -295,7 +296,14 @@ impl Compiler<'_> { } else { // Uncaptured ref: just Call → exit (def's Sets go to parent scope) self.emit_call(nav, field_override, exit, target) + }; + + if capture.pre.is_empty() { + return call_entry; } + + // Wrap with pre-effects epsilon (e.g., Enum for tagged alternations) + self.emit_effects_epsilon(call_entry, capture.pre, CaptureEffects::default()) } /// Compile a field constraint with capture effects (passed to inner pattern). @@ -500,7 +508,10 @@ impl Compiler<'_> { &inner, exit, nav_override, - CaptureEffects { post: combined }, + CaptureEffects { + pre: outer_capture.pre, + post: combined, + }, ) } diff --git a/crates/plotnik-lib/src/compile/quantifier.rs b/crates/plotnik-lib/src/compile/quantifier.rs index 93f6b701..497852d9 100644 --- a/crates/plotnik-lib/src/compile/quantifier.rs +++ b/crates/plotnik-lib/src/compile/quantifier.rs @@ -193,6 +193,7 @@ impl Compiler<'_> { // Non-array capture: build capture effects and recurse let capture_effects = self.build_capture_effects(cap, Some(&inner)); let mut combined = CaptureEffects { + pre: capture.pre.clone(), post: capture_effects, }; combined.post.extend(capture.post); @@ -259,6 +260,7 @@ impl Compiler<'_> { let skip_endarr = self.emit_endarr_step(&capture_effects, &outer_capture.post, skip_exit); let push_effects = CaptureEffects { + pre: vec![], post: if self.quantifier_needs_node_for_push(inner) { let opcode = if cap.has_string_annotation() { EffectOpcode::Text diff --git a/crates/plotnik-lib/src/compile/scope.rs b/crates/plotnik-lib/src/compile/scope.rs index e6dfafbc..0ad621aa 100644 --- a/crates/plotnik-lib/src/compile/scope.rs +++ b/crates/plotnik-lib/src/compile/scope.rs @@ -145,6 +145,7 @@ impl Compiler<'_> { // Compile inner with capture_effects on the match instruction let inner_capture = CaptureEffects { + pre: outer_capture.pre, post: capture_effects, }; return self.compile_expr_inner(inner, actual_exit, nav_override, inner_capture); @@ -168,7 +169,9 @@ impl Compiler<'_> { })); // Compile inner WITH capture_effects on the match instruction + // Note: pre effects don't propagate through Obj/EndObj scope wrapper let inner_capture = CaptureEffects { + pre: vec![], post: capture_effects, }; let inner_entry = self.with_scope(scope_type_id.unwrap(), |this| { @@ -220,6 +223,7 @@ impl Compiler<'_> { })); let push_effects = CaptureEffects { + pre: vec![], post: if self.quantifier_needs_node_for_push(inner) { // Use Text if the capture has `:: string` annotation, else Node let opcode = if use_text_for_elements { @@ -491,39 +495,34 @@ impl Compiler<'_> { use crate::bytecode::MAX_MATCH_PAYLOAD_SLOTS; if successors.len() <= MAX_MATCH_PAYLOAD_SLOTS { - self.instructions.push(Instruction::Match(MatchIR { - label, - nav: Nav::Stay, - node_type: None, - node_field: None, - pre_effects: vec![], - neg_fields: vec![], - post_effects: vec![], - successors, - })); - } else { - // Split: first (MAX-1) successors + intermediate for rest. - // This preserves priority order: VM tries s0, s1, ..., then intermediate. - let split_at = MAX_MATCH_PAYLOAD_SLOTS - 1; - let (first_batch, rest) = successors.split_at(split_at); - - let intermediate = self.fresh_label(); - self.emit_epsilon(intermediate, rest.to_vec()); - - let mut batch = first_batch.to_vec(); - batch.push(intermediate); - - self.instructions.push(Instruction::Match(MatchIR { - label, - nav: Nav::Stay, - node_type: None, - node_field: None, - pre_effects: vec![], - neg_fields: vec![], - post_effects: vec![], - successors: batch, - })); + self.push_epsilon(label, successors); + return; } + + // Split: first (MAX-1) successors + intermediate for rest. + // This preserves priority order: VM tries s0, s1, ..., then intermediate. + let split_at = MAX_MATCH_PAYLOAD_SLOTS - 1; + let (first_batch, rest) = successors.split_at(split_at); + + let intermediate = self.fresh_label(); + self.emit_epsilon(intermediate, rest.to_vec()); + + let mut batch = first_batch.to_vec(); + batch.push(intermediate); + self.push_epsilon(label, batch); + } + + fn push_epsilon(&mut self, label: Label, successors: Vec