plotnik-lang · zharinov · Jan 15, 2026 · Jan 15, 2026
diff --git a/crates/plotnik-compiler/src/bytecode/ir.rs b/crates/plotnik-compiler/src/bytecode/ir.rs
@@ -106,7 +106,7 @@ impl Label {
 ///
 /// Enum variant indices use the traditional (parent_type, relative_index) approach
 /// since enum variants don't bubble between scopes.
-#[derive(Clone, Copy, Debug)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum MemberRef {
     /// Already resolved to absolute index (for cases where it's known).
     Absolute(u16),
@@ -180,7 +180,7 @@ impl MemberRef {
 
 /// Effect operation with symbolic member references.
 /// Used during compilation; resolved to EffectOp during emission.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub struct EffectIR {
     pub opcode: EffectOpcode,
     /// Payload for effects that don't use member indices.
@@ -290,7 +290,7 @@ impl EffectIR {
 ///
 /// Both variants store StringId (index into StringTable). For regex predicates,
 /// the pattern string is also compiled to a DFA during emit.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub enum PredicateValueIR {
     /// String comparison value.
     String(plotnik_bytecode::StringId),
@@ -302,7 +302,7 @@ pub enum PredicateValueIR {
 ///
 /// Applied after node type/field matching. Compares node text against
 /// a string literal or regex pattern.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub struct PredicateIR {
     pub op: PredicateOp,
     pub value: PredicateValueIR,

diff --git a/crates/plotnik-compiler/src/compile/collapse_prefix.rs b/crates/plotnik-compiler/src/compile/collapse_prefix.rs
@@ -0,0 +1,171 @@
+//! Prefix-collapse optimization: merge structurally identical successor instructions.
+//!
+//! When an instruction has multiple successors that differ only in their successors,
+//! we can merge them into one instruction with combined successors.
+//!
+//! Before:
+//!   entry.successors = [A, B]
+//!   A: nav=Down, pre=[e1], post=[e2], node_type=Named("x"), successors=[α, β]
+//!   B: nav=Down, pre=[e1], post=[e2], node_type=Named("x"), successors=[γ]
+//!
+//! After:
+//!   entry.successors = [A]
+//!   A: nav=Down, pre=[e1], post=[e2], node_type=Named("x"), successors=[α, β, γ]
+//!   B: unreachable → removed
+//!
+//! This arises after epsilon elimination when expanded targets are structurally identical.
+
+use std::collections::{HashMap, HashSet};
+
+use crate::bytecode::{InstructionIR, Label, MatchIR};
+use crate::compile::CompileResult;
+
+/// Collapse structurally identical successor instructions.
+///
+/// Uses collect-then-apply to avoid cascading merges from mutation during iteration.
+/// Skips processing instructions that are merge targets to avoid conflicting updates.
+pub fn collapse_prefix(result: &mut CompileResult) {
+    let label_to_idx: HashMap<Label, usize> = result
+        .instructions
+        .iter()
+        .enumerate()
+        .map(|(i, instr)| (instr.label(), i))
+        .collect();
+
+    // Phase 1a: Identify merge targets (instructions that will receive merged successors)
+    let mut merge_targets: HashSet<Label> = HashSet::new();
+    for instr in &result.instructions {
+        let InstructionIR::Match(m) = instr else {
+            continue;
+        };
+        if m.successors.len() < 2 {
+            continue;
+        }
+        let groups = group_by_structure(&m.successors, &label_to_idx, &result.instructions);
+        for group in &groups {
+            if group.len() > 1 {
+                merge_targets.insert(group[0]);
+            }
+        }
+    }
+
+    // Phase 1b: Collect updates, skipping merge targets
+    let mut updates: HashMap<Label, Vec<Label>> = HashMap::new();
+    let mut removed: HashSet<Label> = HashSet::new();
+
+    for instr in &result.instructions {
+        let InstructionIR::Match(m) = instr else {
+            continue;
+        };
+
+        // Skip merge targets to avoid conflicting updates
+        if merge_targets.contains(&m.label) {
+            continue;
+        }
+
+        if m.successors.len() < 2 {
+            continue;
+        }
+
+        let groups = group_by_structure(&m.successors, &label_to_idx, &result.instructions);
+
+        if groups.iter().all(|g| g.len() == 1) {
+            continue;
+        }
+
+        let mut new_successors = Vec::new();
+        for group in groups {
+            if group.len() == 1 {
+                new_successors.push(group[0]);
+            } else {
+                let first = group[0];
+                let merged_succs: Vec<Label> = group
+                    .iter()
+                    .flat_map(|&label| {
+                        let idx = label_to_idx[&label];
+                        result.instructions[idx].successors()
+                    })
+                    .collect();
+
+                updates.insert(first, merged_succs);
+                new_successors.push(first);
+
+                removed.extend(group[1..].iter().copied());
+            }
+        }
+
+        updates.insert(m.label, new_successors);
+    }
+
+    // Phase 2: Apply all updates
+    for instr in &mut result.instructions {
+        if let InstructionIR::Match(m) = instr
+            && let Some(new_succs) = updates.remove(&m.label)
+        {
+            m.successors = new_succs;
+        }
+    }
+
+    // Phase 3: Remove absorbed instructions
+    result
+        .instructions
+        .retain(|instr| !removed.contains(&instr.label()));
+}
+
+/// Group labels by structural equality of their instructions (excluding successors).
+/// Preserves original order within groups.
+fn group_by_structure(
+    successors: &[Label],
+    label_to_idx: &HashMap<Label, usize>,
+    instructions: &[InstructionIR],
+) -> Vec<Vec<Label>> {
+    let mut groups: Vec<Vec<Label>> = Vec::new();
+
+    for &label in successors {
+        let Some(&idx) = label_to_idx.get(&label) else {
+            groups.push(vec![label]);
+            continue;
+        };
+
+        let instr = &instructions[idx];
+
+        let found = groups.iter_mut().find(|group| {
+            let Some(&first_idx) = label_to_idx.get(&group[0]) else {
+                return false;
+            };
+            structure_eq(&instructions[first_idx], instr)
+        });
+
+        if let Some(group) = found {
+            group.push(label);
+        } else {
+            groups.push(vec![label]);
+        }
+    }
+
+    groups
+}
+
+/// Check if two instructions are structurally equal (excluding label and successors).
+fn structure_eq(a: &InstructionIR, b: &InstructionIR) -> bool {
+    match (a, b) {
+        (InstructionIR::Match(a), InstructionIR::Match(b)) => structure_eq_match(a, b),
+        (InstructionIR::Call(a), InstructionIR::Call(b)) => {
+            a.nav == b.nav && a.node_field == b.node_field && a.target == b.target
+        }
+        (InstructionIR::Return(_), InstructionIR::Return(_)) => true,
+        (InstructionIR::Trampoline(_), InstructionIR::Trampoline(_)) => true,
+        _ => false,
+    }
+}
+
+/// Check if two MatchIR are structurally equal (excluding label and successors).
+fn structure_eq_match(a: &MatchIR, b: &MatchIR) -> bool {
+    a.nav == b.nav
+        && a.node_type == b.node_type
+        && a.node_field == b.node_field
+        && a.pre_effects == b.pre_effects
+        && a.neg_fields == b.neg_fields
+        && a.post_effects == b.post_effects
+        && a.predicate == b.predicate
+}
diff --git a/crates/plotnik-compiler/src/compile/collapse_up.rs b/crates/plotnik-compiler/src/compile/collapse_up.rs
@@ -26,6 +26,14 @@ pub fn collapse_up(result: &mut CompileResult) {
         .map(|(i, instr)| (instr.label(), i))
         .collect();
 
+    // Count predecessors for each label - only remove labels with exactly one predecessor
+    let mut predecessor_count: HashMap<Label, usize> = HashMap::new();
+    for instr in &result.instructions {
+        for succ in instr.successors() {
+            *predecessor_count.entry(succ).or_default() += 1;
+        }
+    }
+
     let mut removed: HashSet<Label> = HashSet::new();
 
     for i in 0..result.instructions.len() {
@@ -71,6 +79,12 @@ pub fn collapse_up(result: &mut CompileResult) {
                 break;
             }
 
+            // Only absorb if this label has exactly one predecessor
+            // (otherwise other instructions still need it)
+            if predecessor_count.get(&succ_label).copied().unwrap_or(0) != 1 {
+                break;
+            }
+
             // Merge: add levels (capped at 63)
             let new_level = current_level.saturating_add(succ_level).min(MAX_UP_LEVEL);
             current_nav = set_up_level(current_nav, new_level);

diff --git a/crates/plotnik-compiler/src/compile/compiler.rs b/crates/plotnik-compiler/src/compile/compiler.rs
@@ -13,6 +13,7 @@ use crate::parser::Expr;
 use plotnik_bytecode::Nav;
 
 use super::capture::CaptureEffects;
+use super::collapse_prefix::collapse_prefix;
 use super::collapse_up::collapse_up;
 use super::dce::remove_unreachable;
 use super::epsilon_elim::eliminate_epsilons;
@@ -88,6 +89,9 @@ impl<'a> Compiler<'a> {
         // Remove unreachable instructions (bypassed epsilons, etc.)
         remove_unreachable(&mut result);
 
+        // Collapse structurally identical successor instructions
+        collapse_prefix(&mut result);
+
         // Collapse consecutive Up instructions of the same mode
         collapse_up(&mut result);
 

diff --git a/crates/plotnik-compiler/src/compile/mod.rs b/crates/plotnik-compiler/src/compile/mod.rs
@@ -15,6 +15,7 @@
 //! - `sequences`: Sequence and alternation compilation
 
 mod capture;
+mod collapse_prefix;
 mod collapse_up;
 mod compiler;
 mod dce;

diff --git a/crates/plotnik-compiler/src/emit/emit_tests.rs b/crates/plotnik-compiler/src/emit/emit_tests.rs
@@ -467,6 +467,18 @@ fn optional_null_injection() {
     "#});
 }
 
+// Optimization: prefix collapse
+
+#[test]
+fn opt_prefix_collapse() {
+    // Alternation branches with shared prefix: [(object ...) (object ...)]
+    // Without optimization: two separate (object) instructions
+    // With optimization: one (object) with merged successors
+    snap!(indoc! {r#"
+        Test = [(object (pair)) (object (string))]
+    "#});
+}
+
 // Comprehensive
 
 #[test]

diff --git a/...-compiler/src/emit/snapshots/plotnik_compiler__emit__emit_tests__opt_prefix_collapse.snap b/...-compiler/src/emit/snapshots/plotnik_compiler__emit__emit_tests__opt_prefix_collapse.snap
@@ -0,0 +1,39 @@
+---
+source: crates/plotnik-compiler/src/emit/emit_tests.rs
+---
+Test = [(object (pair)) (object (string))]
+---
+[strings]
+S0 "Beauty will save the world"
+S1 "Test"
+S2 "object"
+S3 "pair"
+S4 "string"
+
+[type_defs]
+T0 = <Void>
+
+[type_members]
+
+[type_names]
+N0: S1 → T0  ; Test
+
+[entrypoints]
+Test = 06 :: T0
+
+[transitions]
+_ObjWrap:
+  00   ε   [Obj]                            02
+  02       Trampoline                       03
+  03   ε   [EndObj]                         05
+  05                                        ▶
+
+Test:
+  06   ε                                    08
+  07  ...  
+  08   !   (object)                         10, 12
+  10   ▽   (pair)                           11
+  11   △   _                                14
+  12   ▽   (string)                         13
+  13   △   _                                14
+  14                                        ▶