diff --git a/crates/plotnik-compiler/src/bytecode/ir.rs b/crates/plotnik-compiler/src/bytecode/ir.rs index 15224c6..b042949 100644 --- a/crates/plotnik-compiler/src/bytecode/ir.rs +++ b/crates/plotnik-compiler/src/bytecode/ir.rs @@ -106,7 +106,7 @@ impl Label { /// /// Enum variant indices use the traditional (parent_type, relative_index) approach /// since enum variants don't bubble between scopes. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum MemberRef { /// Already resolved to absolute index (for cases where it's known). Absolute(u16), @@ -180,7 +180,7 @@ impl MemberRef { /// Effect operation with symbolic member references. /// Used during compilation; resolved to EffectOp during emission. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct EffectIR { pub opcode: EffectOpcode, /// Payload for effects that don't use member indices. @@ -290,7 +290,7 @@ impl EffectIR { /// /// Both variants store StringId (index into StringTable). For regex predicates, /// the pattern string is also compiled to a DFA during emit. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum PredicateValueIR { /// String comparison value. String(plotnik_bytecode::StringId), @@ -302,7 +302,7 @@ pub enum PredicateValueIR { /// /// Applied after node type/field matching. Compares node text against /// a string literal or regex pattern. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct PredicateIR { pub op: PredicateOp, pub value: PredicateValueIR, diff --git a/crates/plotnik-compiler/src/compile/collapse_prefix.rs b/crates/plotnik-compiler/src/compile/collapse_prefix.rs new file mode 100644 index 0000000..f7d5d48 --- /dev/null +++ b/crates/plotnik-compiler/src/compile/collapse_prefix.rs @@ -0,0 +1,171 @@ +//! Prefix-collapse optimization: merge structurally identical successor instructions. +//! +//! When an instruction has multiple successors that differ only in their successors, +//! we can merge them into one instruction with combined successors. +//! +//! Before: +//! entry.successors = [A, B] +//! A: nav=Down, pre=[e1], post=[e2], node_type=Named("x"), successors=[α, β] +//! B: nav=Down, pre=[e1], post=[e2], node_type=Named("x"), successors=[γ] +//! +//! After: +//! entry.successors = [A] +//! A: nav=Down, pre=[e1], post=[e2], node_type=Named("x"), successors=[α, β, γ] +//! B: unreachable → removed +//! +//! This arises after epsilon elimination when expanded targets are structurally identical. + +use std::collections::{HashMap, HashSet}; + +use crate::bytecode::{InstructionIR, Label, MatchIR}; +use crate::compile::CompileResult; + +/// Collapse structurally identical successor instructions. +/// +/// Uses collect-then-apply to avoid cascading merges from mutation during iteration. +/// Skips processing instructions that are merge targets to avoid conflicting updates. +pub fn collapse_prefix(result: &mut CompileResult) { + let label_to_idx: HashMap = result + .instructions + .iter() + .enumerate() + .map(|(i, instr)| (instr.label(), i)) + .collect(); + + // Phase 1a: Identify merge targets (instructions that will receive merged successors) + let mut merge_targets: HashSet