Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions crates/plotnik-compiler/src/bytecode/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ impl Label {
///
/// Enum variant indices use the traditional (parent_type, relative_index) approach
/// since enum variants don't bubble between scopes.
#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum MemberRef {
/// Already resolved to absolute index (for cases where it's known).
Absolute(u16),
Expand Down Expand Up @@ -180,7 +180,7 @@ impl MemberRef {

/// Effect operation with symbolic member references.
/// Used during compilation; resolved to EffectOp during emission.
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct EffectIR {
pub opcode: EffectOpcode,
/// Payload for effects that don't use member indices.
Expand Down Expand Up @@ -290,7 +290,7 @@ impl EffectIR {
///
/// Both variants store StringId (index into StringTable). For regex predicates,
/// the pattern string is also compiled to a DFA during emit.
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum PredicateValueIR {
/// String comparison value.
String(plotnik_bytecode::StringId),
Expand All @@ -302,7 +302,7 @@ pub enum PredicateValueIR {
///
/// Applied after node type/field matching. Compares node text against
/// a string literal or regex pattern.
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PredicateIR {
pub op: PredicateOp,
pub value: PredicateValueIR,
Expand Down
171 changes: 171 additions & 0 deletions crates/plotnik-compiler/src/compile/collapse_prefix.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
//! Prefix-collapse optimization: merge structurally identical successor instructions.
//!
//! When an instruction has multiple successors that differ only in their successors,
//! we can merge them into one instruction with combined successors.
//!
//! Before:
//! entry.successors = [A, B]
//! A: nav=Down, pre=[e1], post=[e2], node_type=Named("x"), successors=[α, β]
//! B: nav=Down, pre=[e1], post=[e2], node_type=Named("x"), successors=[γ]
//!
//! After:
//! entry.successors = [A]
//! A: nav=Down, pre=[e1], post=[e2], node_type=Named("x"), successors=[α, β, γ]
//! B: unreachable → removed
//!
//! This arises after epsilon elimination when expanded targets are structurally identical.

use std::collections::{HashMap, HashSet};

use crate::bytecode::{InstructionIR, Label, MatchIR};
use crate::compile::CompileResult;

/// Collapse structurally identical successor instructions.
///
/// Uses collect-then-apply to avoid cascading merges from mutation during iteration.
/// Skips processing instructions that are merge targets to avoid conflicting updates.
pub fn collapse_prefix(result: &mut CompileResult) {
let label_to_idx: HashMap<Label, usize> = result
.instructions
.iter()
.enumerate()
.map(|(i, instr)| (instr.label(), i))
.collect();

// Phase 1a: Identify merge targets (instructions that will receive merged successors)
let mut merge_targets: HashSet<Label> = HashSet::new();
for instr in &result.instructions {
let InstructionIR::Match(m) = instr else {
continue;
};
if m.successors.len() < 2 {
continue;
}
let groups = group_by_structure(&m.successors, &label_to_idx, &result.instructions);
for group in &groups {
if group.len() > 1 {
merge_targets.insert(group[0]);
}
}
}

// Phase 1b: Collect updates, skipping merge targets
let mut updates: HashMap<Label, Vec<Label>> = HashMap::new();
let mut removed: HashSet<Label> = HashSet::new();

for instr in &result.instructions {
let InstructionIR::Match(m) = instr else {
continue;
};

// Skip merge targets to avoid conflicting updates
if merge_targets.contains(&m.label) {
continue;
}

if m.successors.len() < 2 {
continue;
}

let groups = group_by_structure(&m.successors, &label_to_idx, &result.instructions);

if groups.iter().all(|g| g.len() == 1) {
continue;
}

let mut new_successors = Vec::new();
for group in groups {
if group.len() == 1 {
new_successors.push(group[0]);
} else {
let first = group[0];
let merged_succs: Vec<Label> = group
.iter()
.flat_map(|&label| {
let idx = label_to_idx[&label];
result.instructions[idx].successors()
})
.collect();

updates.insert(first, merged_succs);
new_successors.push(first);

removed.extend(group[1..].iter().copied());
}
}

updates.insert(m.label, new_successors);
}

// Phase 2: Apply all updates
for instr in &mut result.instructions {
if let InstructionIR::Match(m) = instr
&& let Some(new_succs) = updates.remove(&m.label)
{
m.successors = new_succs;
}
}

// Phase 3: Remove absorbed instructions
result
.instructions
.retain(|instr| !removed.contains(&instr.label()));
}

/// Group labels by structural equality of their instructions (excluding successors).
/// Preserves original order within groups.
fn group_by_structure(
successors: &[Label],
label_to_idx: &HashMap<Label, usize>,
instructions: &[InstructionIR],
) -> Vec<Vec<Label>> {
let mut groups: Vec<Vec<Label>> = Vec::new();

for &label in successors {
let Some(&idx) = label_to_idx.get(&label) else {
groups.push(vec![label]);
continue;
};

let instr = &instructions[idx];

let found = groups.iter_mut().find(|group| {
let Some(&first_idx) = label_to_idx.get(&group[0]) else {
return false;
};
structure_eq(&instructions[first_idx], instr)
});

if let Some(group) = found {
group.push(label);
} else {
groups.push(vec![label]);
}
}

groups
}

/// Check if two instructions are structurally equal (excluding label and successors).
fn structure_eq(a: &InstructionIR, b: &InstructionIR) -> bool {
match (a, b) {
(InstructionIR::Match(a), InstructionIR::Match(b)) => structure_eq_match(a, b),
(InstructionIR::Call(a), InstructionIR::Call(b)) => {
a.nav == b.nav && a.node_field == b.node_field && a.target == b.target
}
(InstructionIR::Return(_), InstructionIR::Return(_)) => true,
(InstructionIR::Trampoline(_), InstructionIR::Trampoline(_)) => true,
_ => false,
}
}

/// Check if two MatchIR are structurally equal (excluding label and successors).
fn structure_eq_match(a: &MatchIR, b: &MatchIR) -> bool {
a.nav == b.nav
&& a.node_type == b.node_type
&& a.node_field == b.node_field
&& a.pre_effects == b.pre_effects
&& a.neg_fields == b.neg_fields
&& a.post_effects == b.post_effects
&& a.predicate == b.predicate
}
14 changes: 14 additions & 0 deletions crates/plotnik-compiler/src/compile/collapse_up.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ pub fn collapse_up(result: &mut CompileResult) {
.map(|(i, instr)| (instr.label(), i))
.collect();

// Count predecessors for each label - only remove labels with exactly one predecessor
let mut predecessor_count: HashMap<Label, usize> = HashMap::new();
for instr in &result.instructions {
for succ in instr.successors() {
*predecessor_count.entry(succ).or_default() += 1;
}
}

let mut removed: HashSet<Label> = HashSet::new();

for i in 0..result.instructions.len() {
Expand Down Expand Up @@ -71,6 +79,12 @@ pub fn collapse_up(result: &mut CompileResult) {
break;
}

// Only absorb if this label has exactly one predecessor
// (otherwise other instructions still need it)
if predecessor_count.get(&succ_label).copied().unwrap_or(0) != 1 {
break;
}

// Merge: add levels (capped at 63)
let new_level = current_level.saturating_add(succ_level).min(MAX_UP_LEVEL);
current_nav = set_up_level(current_nav, new_level);
Expand Down
4 changes: 4 additions & 0 deletions crates/plotnik-compiler/src/compile/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use crate::parser::Expr;
use plotnik_bytecode::Nav;

use super::capture::CaptureEffects;
use super::collapse_prefix::collapse_prefix;
use super::collapse_up::collapse_up;
use super::dce::remove_unreachable;
use super::epsilon_elim::eliminate_epsilons;
Expand Down Expand Up @@ -88,6 +89,9 @@ impl<'a> Compiler<'a> {
// Remove unreachable instructions (bypassed epsilons, etc.)
remove_unreachable(&mut result);

// Collapse structurally identical successor instructions
collapse_prefix(&mut result);

// Collapse consecutive Up instructions of the same mode
collapse_up(&mut result);

Expand Down
1 change: 1 addition & 0 deletions crates/plotnik-compiler/src/compile/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
//! - `sequences`: Sequence and alternation compilation

mod capture;
mod collapse_prefix;
mod collapse_up;
mod compiler;
mod dce;
Expand Down
12 changes: 12 additions & 0 deletions crates/plotnik-compiler/src/emit/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,18 @@ fn optional_null_injection() {
"#});
}

// Optimization: prefix collapse

#[test]
fn opt_prefix_collapse() {
// Alternation branches with shared prefix: [(object ...) (object ...)]
// Without optimization: two separate (object) instructions
// With optimization: one (object) with merged successors
snap!(indoc! {r#"
Test = [(object (pair)) (object (string))]
"#});
}

// Comprehensive

#[test]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
---
source: crates/plotnik-compiler/src/emit/emit_tests.rs
---
Test = [(object (pair)) (object (string))]
---
[strings]
S0 "Beauty will save the world"
S1 "Test"
S2 "object"
S3 "pair"
S4 "string"

[type_defs]
T0 = <Void>

[type_members]

[type_names]
N0: S1 → T0 ; Test

[entrypoints]
Test = 06 :: T0

[transitions]
_ObjWrap:
00 ε [Obj] 02
02 Trampoline 03
03 ε [EndObj] 05
05 ▶

Test:
06 ε 08
07 ...
08 ! (object) 10, 12
10 ▽ (pair) 11
11 △ _ 14
12 ▽ (string) 13
13 △ _ 14
14 ▶