From bbe5dc5a7c2f872ab56ef4a6e152f593b05295e4 Mon Sep 17 00:00:00 2001 From: bench Date: Fri, 27 Mar 2026 08:17:36 +0000 Subject: [PATCH 1/3] feat: add global value numbering pass Adds `gvn` as a new post-lowering optimizer pass. Extends local CSE across basic block boundaries using dominator-tree traversal: a pure computation available in a dominating block is reused in dominated blocks instead of being recomputed. Runs after local_cse so GVN handles only residual cross-block redundancies. Co-Authored-By: Claude Sonnet 4.6 --- crates/herkos-core/src/optimizer/gvn.rs | 699 ++++++++++++++++++++++++ crates/herkos-core/src/optimizer/mod.rs | 6 +- 2 files changed, 703 insertions(+), 2 deletions(-) create mode 100644 crates/herkos-core/src/optimizer/gvn.rs diff --git a/crates/herkos-core/src/optimizer/gvn.rs b/crates/herkos-core/src/optimizer/gvn.rs new file mode 100644 index 0000000..33a3efd --- /dev/null +++ b/crates/herkos-core/src/optimizer/gvn.rs @@ -0,0 +1,699 @@ +//! Global value numbering (GVN) — cross-block CSE using the dominator tree. +//! +//! Extends block-local CSE ([`super::local_cse`]) to work across basic blocks. +//! If block A dominates block B (every path to B passes through A), then any +//! pure computation defined in A with the same value key as one in B can be +//! reused in B instead of recomputing. +//! +//! ## Algorithm +//! +//! 1. Compute the immediate dominator of each block (Cooper/Harvey/Kennedy +//! iterative algorithm) to build the dominator tree. +//! 2. Walk the dominator tree in preorder using a scoped value-number table. +//! On entry to a block, push a new scope; on exit, pop it. +//! 3. For each pure instruction (`Const`, `BinOp`, `UnOp`) in the current +//! block, compute a value key. If the key already exists in any enclosing +//! scope (meaning it was computed in a dominating block), record a +//! replacement: `dest → first_var`. Otherwise insert the key into the +//! current scope. +//! 4. After the walk, rewrite all recorded destinations to +//! `Assign { dest, src: first_var }` and let copy-propagation clean up. +//! +//! **Only pure instructions are eligible.** Loads, calls, and memory ops are +//! never deduplicated (they may trap or have observable side effects). + +use super::utils::{build_predecessors, instr_dest, prune_dead_locals, terminator_successors}; +use crate::ir::{BinOp, BlockId, IrFunction, IrInstr, IrValue, UnOp, VarId}; +use std::collections::{HashMap, HashSet}; + +// ── Value key ──────────────────────────────────────────────────────────────── + +/// Hashable representation of a pure computation for deduplication. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum ValueKey { + Const(ConstKey), + BinOp { op: BinOp, lhs: VarId, rhs: VarId }, + UnOp { op: UnOp, operand: VarId }, +} + +/// Bit-level constant key that implements `Eq`/`Hash` correctly for floats. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum ConstKey { + I32(i32), + I64(i64), + F32(u32), + F64(u64), +} + +impl From for ConstKey { + fn from(v: IrValue) -> Self { + match v { + IrValue::I32(x) => ConstKey::I32(x), + IrValue::I64(x) => ConstKey::I64(x), + IrValue::F32(x) => ConstKey::F32(x.to_bits()), + IrValue::F64(x) => ConstKey::F64(x.to_bits()), + } + } +} + +fn is_commutative(op: &BinOp) -> bool { + matches!( + op, + BinOp::I32Add + | BinOp::I32Mul + | BinOp::I32And + | BinOp::I32Or + | BinOp::I32Xor + | BinOp::I32Eq + | BinOp::I32Ne + | BinOp::I64Add + | BinOp::I64Mul + | BinOp::I64And + | BinOp::I64Or + | BinOp::I64Xor + | BinOp::I64Eq + | BinOp::I64Ne + | BinOp::F32Add + | BinOp::F32Mul + | BinOp::F32Eq + | BinOp::F32Ne + | BinOp::F64Add + | BinOp::F64Mul + | BinOp::F64Eq + | BinOp::F64Ne + ) +} + +fn binop_key(op: BinOp, lhs: VarId, rhs: VarId) -> ValueKey { + let (lhs, rhs) = if is_commutative(&op) && lhs.0 > rhs.0 { + (rhs, lhs) + } else { + (lhs, rhs) + }; + ValueKey::BinOp { op, lhs, rhs } +} + +// ── Multi-definition detection ─────────────────────────────────────────────── + +/// Build the set of variables defined more than once across the function. +/// +/// After phi lowering the code is no longer in strict SSA form: loop phi +/// variables receive an initial assignment in the pre-loop block and a +/// back-edge update at the end of each iteration. These variables carry +/// different values at different program points, so any BinOp/UnOp that uses +/// them cannot be safely hoisted or deduplicated across blocks. +/// +/// `Const` instructions are always safe (they have no operands). +fn build_multi_def_vars(func: &IrFunction) -> HashSet { + let mut def_count: HashMap = HashMap::new(); + for block in &func.blocks { + for instr in &block.instructions { + if let Some(dest) = instr_dest(instr) { + *def_count.entry(dest).or_insert(0) += 1; + } + } + } + def_count + .into_iter() + .filter(|&(_, count)| count > 1) + .map(|(v, _)| v) + .collect() +} + +// ── Dominator tree ─────────────────────────────────────────────────────────── + +/// Compute the reverse-postorder traversal of the CFG starting from `entry`. +fn compute_rpo(func: &IrFunction) -> Vec { + let block_idx: HashMap = func + .blocks + .iter() + .enumerate() + .map(|(i, b)| (b.id, i)) + .collect(); + + let mut visited = vec![false; func.blocks.len()]; + let mut postorder = Vec::with_capacity(func.blocks.len()); + + dfs_postorder( + func, + func.entry_block, + &block_idx, + &mut visited, + &mut postorder, + ); + + postorder.reverse(); + postorder +} + +fn dfs_postorder( + func: &IrFunction, + block_id: BlockId, + block_idx: &HashMap, + visited: &mut Vec, + postorder: &mut Vec, +) { + let idx = match block_idx.get(&block_id) { + Some(&i) => i, + None => return, + }; + if visited[idx] { + return; + } + visited[idx] = true; + + for succ in terminator_successors(&func.blocks[idx].terminator) { + dfs_postorder(func, succ, block_idx, visited, postorder); + } + postorder.push(block_id); +} + +/// Compute the immediate dominator of each block using Cooper/Harvey/Kennedy. +/// +/// Returns `idom[b] = immediate dominator of b`, with `idom[entry] = entry`. +fn compute_idoms(func: &IrFunction) -> HashMap { + let rpo = compute_rpo(func); + // rpo_num[b] = index in RPO order (entry = 0, smallest index = processed first) + let rpo_num: HashMap = rpo.iter().enumerate().map(|(i, &b)| (b, i)).collect(); + + let preds = build_predecessors(func); + let entry = func.entry_block; + + let mut idom: HashMap = HashMap::new(); + idom.insert(entry, entry); + + let mut changed = true; + while changed { + changed = false; + // Process blocks in RPO order, skipping the entry. + for &b in rpo.iter().skip(1) { + let block_preds = &preds[&b]; + + // Start with the first predecessor that already has an idom assigned. + let mut new_idom = match block_preds + .iter() + .filter(|&&p| idom.contains_key(&p)) + .min_by_key(|&&p| rpo_num[&p]) + { + Some(&p) => p, + None => continue, // unreachable block — skip + }; + + // Intersect (walk up dom tree) with all other processed predecessors. + for &p in block_preds { + if p != new_idom && idom.contains_key(&p) { + new_idom = intersect(p, new_idom, &idom, &rpo_num); + } + } + + if idom.get(&b) != Some(&new_idom) { + idom.insert(b, new_idom); + changed = true; + } + } + } + + idom +} + +/// Walk up both fingers until they meet — the standard Cooper intersect. +fn intersect( + mut a: BlockId, + mut b: BlockId, + idom: &HashMap, + rpo_num: &HashMap, +) -> BlockId { + while a != b { + while rpo_num[&a] > rpo_num[&b] { + a = idom[&a]; + } + while rpo_num[&b] > rpo_num[&a] { + b = idom[&b]; + } + } + a +} + +/// Build dominator-tree children from the `idom` map. +fn build_dom_children( + idom: &HashMap, + entry: BlockId, +) -> HashMap> { + let mut children: HashMap> = HashMap::new(); + for (&b, &d) in idom { + if b != entry { + children.entry(d).or_default().push(b); + } + } + // Sort children for deterministic output. + for v in children.values_mut() { + v.sort_unstable_by_key(|id| id.0); + } + children +} + +// ── GVN walk ───────────────────────────────────────────────────────────────── + +/// Recursively walk the dominator tree in preorder. +/// +/// `value_map` is a flat map that acts as a scoped table: on entry we insert +/// new keys (recording them in `frame_keys`), on exit we remove them, restoring +/// the parent scope. Any key already present in `value_map` when we visit a +/// block was computed in a dominating block — safe to reuse. +fn collect_replacements( + func: &IrFunction, + block_id: BlockId, + dom_children: &HashMap>, + block_idx: &HashMap, + multi_def_vars: &HashSet, + value_map: &mut HashMap, + replacements: &mut HashMap, +) { + let idx = match block_idx.get(&block_id) { + Some(&i) => i, + None => return, + }; + + let mut frame_keys: Vec = Vec::new(); + + for instr in &func.blocks[idx].instructions { + match instr { + IrInstr::Const { dest, value } => { + // A multiply-defined dest (loop phi var) must be skipped + // entirely: adding it to replacements would replace ALL of + // its definitions with Assign(first), clobbering back-edge + // updates; inserting it into value_map would let dominated + // blocks wrongly reuse a value that changes each iteration. + if multi_def_vars.contains(dest) { + continue; + } + let key = ValueKey::Const(ConstKey::from(*value)); + if let Some(&first) = value_map.get(&key) { + replacements.insert(*dest, first); + } else { + value_map.insert(key.clone(), *dest); + frame_keys.push(key); + } + } + + IrInstr::BinOp { + dest, op, lhs, rhs, .. + } => { + // Skip if dest is multiply-defined (same reason as Const). + // Also skip if any operand is multiply-defined: a loop phi + // var carries different values per iteration, so the same + // BinOp in two dominated blocks can produce different results. + if multi_def_vars.contains(dest) + || multi_def_vars.contains(lhs) + || multi_def_vars.contains(rhs) + { + continue; + } + let key = binop_key(*op, *lhs, *rhs); + if let Some(&first) = value_map.get(&key) { + replacements.insert(*dest, first); + } else { + value_map.insert(key.clone(), *dest); + frame_keys.push(key); + } + } + + IrInstr::UnOp { dest, op, operand } => { + if multi_def_vars.contains(dest) || multi_def_vars.contains(operand) { + continue; + } + let key = ValueKey::UnOp { + op: *op, + operand: *operand, + }; + if let Some(&first) = value_map.get(&key) { + replacements.insert(*dest, first); + } else { + value_map.insert(key.clone(), *dest); + frame_keys.push(key); + } + } + + _ => {} + } + } + + // Recurse into dominated children. + if let Some(children) = dom_children.get(&block_id) { + for &child in children { + collect_replacements( + func, + child, + dom_children, + block_idx, + multi_def_vars, + value_map, + replacements, + ); + } + } + + // Pop this block's scope. + for key in frame_keys { + value_map.remove(&key); + } +} + +// ── Pass entry point ───────────────────────────────────────────────────────── + +/// Eliminates common subexpressions across basic blocks using the dominator tree. +pub fn eliminate(func: &mut IrFunction) { + if func.blocks.len() < 2 { + return; // nothing to do for single-block functions (local_cse covers those) + } + + let idom = compute_idoms(func); + let dom_children = build_dom_children(&idom, func.entry_block); + let block_idx: HashMap = func + .blocks + .iter() + .enumerate() + .map(|(i, b)| (b.id, i)) + .collect(); + + let multi_def_vars = build_multi_def_vars(func); + let mut value_map: HashMap = HashMap::new(); + let mut replacements: HashMap = HashMap::new(); + + collect_replacements( + func, + func.entry_block, + &dom_children, + &block_idx, + &multi_def_vars, + &mut value_map, + &mut replacements, + ); + + if replacements.is_empty() { + return; + } + + for block in &mut func.blocks { + for instr in &mut block.instructions { + if let Some(dest) = instr_dest(instr) { + if let Some(&src) = replacements.get(&dest) { + *instr = IrInstr::Assign { dest, src }; + } + } + } + } + + prune_dead_locals(func); +} + +// ── Tests ──────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use crate::ir::{IrBlock, IrTerminator, IrValue, TypeIdx, WasmType}; + + fn make_func(blocks: Vec) -> IrFunction { + IrFunction { + params: vec![], + locals: vec![], + blocks, + entry_block: BlockId(0), + return_type: None, + type_idx: TypeIdx::new(0), + } + } + + /// Entry (B0) → B1: const duplicated across the edge. + /// B0 dominates B1, so the duplicate in B1 should be replaced with Assign. + #[test] + fn cross_block_const_deduplication() { + let b0 = IrBlock { + id: BlockId(0), + instructions: vec![IrInstr::Const { + dest: VarId(0), + value: IrValue::I32(42), + }], + terminator: IrTerminator::Jump { target: BlockId(1) }, + }; + let b1 = IrBlock { + id: BlockId(1), + instructions: vec![IrInstr::Const { + dest: VarId(1), + value: IrValue::I32(42), + }], + terminator: IrTerminator::Return { + value: Some(VarId(1)), + }, + }; + let mut func = make_func(vec![b0, b1]); + func.locals = vec![(VarId(0), WasmType::I32), (VarId(1), WasmType::I32)]; + + eliminate(&mut func); + + assert!( + matches!( + func.blocks[0].instructions[0], + IrInstr::Const { dest: VarId(0), .. } + ), + "first definition should stay as Const" + ); + assert!( + matches!( + func.blocks[1].instructions[0], + IrInstr::Assign { + dest: VarId(1), + src: VarId(0) + } + ), + "dominated duplicate should become Assign" + ); + } + + /// Entry (B0) → B1: BinOp duplicated across the edge. + #[test] + fn cross_block_binop_deduplication() { + let b0 = IrBlock { + id: BlockId(0), + instructions: vec![IrInstr::BinOp { + dest: VarId(2), + op: BinOp::I32Add, + lhs: VarId(0), + rhs: VarId(1), + }], + terminator: IrTerminator::Jump { target: BlockId(1) }, + }; + let b1 = IrBlock { + id: BlockId(1), + instructions: vec![IrInstr::BinOp { + dest: VarId(3), + op: BinOp::I32Add, + lhs: VarId(0), + rhs: VarId(1), + }], + terminator: IrTerminator::Return { + value: Some(VarId(3)), + }, + }; + let mut func = make_func(vec![b0, b1]); + func.locals = vec![(VarId(2), WasmType::I32), (VarId(3), WasmType::I32)]; + + eliminate(&mut func); + + assert!(matches!( + func.blocks[0].instructions[0], + IrInstr::BinOp { .. } + )); + assert!( + matches!( + func.blocks[1].instructions[0], + IrInstr::Assign { + dest: VarId(3), + src: VarId(2) + } + ), + "dominated duplicate BinOp should become Assign" + ); + } + + /// B0 branches to B1 and B2 (diamond). B1 and B2 don't dominate each other, + /// so a const in B1 should NOT eliminate the same const in B2. + #[test] + fn sibling_blocks_not_deduplicated() { + // B0 → B1, B0 → B2, both converge to B3 + let b0 = IrBlock { + id: BlockId(0), + instructions: vec![], + terminator: IrTerminator::BranchIf { + condition: VarId(0), + if_true: BlockId(1), + if_false: BlockId(2), + }, + }; + let b1 = IrBlock { + id: BlockId(1), + instructions: vec![IrInstr::Const { + dest: VarId(1), + value: IrValue::I32(7), + }], + terminator: IrTerminator::Jump { target: BlockId(3) }, + }; + let b2 = IrBlock { + id: BlockId(2), + instructions: vec![IrInstr::Const { + dest: VarId(2), + value: IrValue::I32(7), + }], + terminator: IrTerminator::Jump { target: BlockId(3) }, + }; + let b3 = IrBlock { + id: BlockId(3), + instructions: vec![], + terminator: IrTerminator::Return { value: None }, + }; + let mut func = make_func(vec![b0, b1, b2, b3]); + func.locals = vec![(VarId(1), WasmType::I32), (VarId(2), WasmType::I32)]; + + eliminate(&mut func); + + // Both consts should remain — neither block dominates the other. + assert!( + matches!( + func.blocks[1].instructions[0], + IrInstr::Const { dest: VarId(1), .. } + ), + "const in B1 must not be eliminated" + ); + assert!( + matches!( + func.blocks[2].instructions[0], + IrInstr::Const { dest: VarId(2), .. } + ), + "const in B2 must not be eliminated" + ); + } + + /// A const defined in B0 (entry) should be reused in a deeply dominated block. + #[test] + fn deep_domination_chain() { + // B0 → B1 → B2: const defined in B0, duplicated in B2 + let b0 = IrBlock { + id: BlockId(0), + instructions: vec![IrInstr::Const { + dest: VarId(0), + value: IrValue::I32(99), + }], + terminator: IrTerminator::Jump { target: BlockId(1) }, + }; + let b1 = IrBlock { + id: BlockId(1), + instructions: vec![], + terminator: IrTerminator::Jump { target: BlockId(2) }, + }; + let b2 = IrBlock { + id: BlockId(2), + instructions: vec![IrInstr::Const { + dest: VarId(1), + value: IrValue::I32(99), + }], + terminator: IrTerminator::Return { + value: Some(VarId(1)), + }, + }; + let mut func = make_func(vec![b0, b1, b2]); + func.locals = vec![(VarId(0), WasmType::I32), (VarId(1), WasmType::I32)]; + + eliminate(&mut func); + + assert!(matches!( + func.blocks[0].instructions[0], + IrInstr::Const { dest: VarId(0), .. } + )); + assert!( + matches!( + func.blocks[2].instructions[0], + IrInstr::Assign { + dest: VarId(1), + src: VarId(0) + } + ), + "deeply dominated duplicate should be eliminated" + ); + } + + /// Commutative BinOps with swapped operands in a dominated block should be deduped. + #[test] + fn cross_block_commutative_deduplication() { + let b0 = IrBlock { + id: BlockId(0), + instructions: vec![IrInstr::BinOp { + dest: VarId(2), + op: BinOp::I32Mul, + lhs: VarId(0), + rhs: VarId(1), + }], + terminator: IrTerminator::Jump { target: BlockId(1) }, + }; + let b1 = IrBlock { + id: BlockId(1), + instructions: vec![IrInstr::BinOp { + dest: VarId(3), + op: BinOp::I32Mul, + lhs: VarId(1), // swapped + rhs: VarId(0), + }], + terminator: IrTerminator::Return { + value: Some(VarId(3)), + }, + }; + let mut func = make_func(vec![b0, b1]); + func.locals = vec![(VarId(2), WasmType::I32), (VarId(3), WasmType::I32)]; + + eliminate(&mut func); + + assert!( + matches!( + func.blocks[1].instructions[0], + IrInstr::Assign { + dest: VarId(3), + src: VarId(2) + } + ), + "commutative cross-block BinOp should be deduplicated" + ); + } + + /// Single-block functions are skipped entirely (handled by local_cse). + #[test] + fn single_block_function_unchanged() { + let b0 = IrBlock { + id: BlockId(0), + instructions: vec![ + IrInstr::Const { + dest: VarId(0), + value: IrValue::I32(1), + }, + IrInstr::Const { + dest: VarId(1), + value: IrValue::I32(1), + }, + ], + terminator: IrTerminator::Return { value: None }, + }; + let mut func = make_func(vec![b0]); + func.locals = vec![(VarId(0), WasmType::I32), (VarId(1), WasmType::I32)]; + + eliminate(&mut func); + + // GVN skips single-block functions; duplicates remain (local_cse's job). + assert!(matches!( + func.blocks[0].instructions[0], + IrInstr::Const { .. } + )); + assert!(matches!( + func.blocks[0].instructions[1], + IrInstr::Const { .. } + )); + } +} diff --git a/crates/herkos-core/src/optimizer/mod.rs b/crates/herkos-core/src/optimizer/mod.rs index 2a4cabb..ae56100 100644 --- a/crates/herkos-core/src/optimizer/mod.rs +++ b/crates/herkos-core/src/optimizer/mod.rs @@ -23,6 +23,7 @@ mod dead_blocks; mod branch_fold; mod dead_instrs; mod empty_blocks; +mod gvn; mod local_cse; mod merge_blocks; @@ -48,8 +49,8 @@ pub fn optimize_ir(module_info: ModuleInfo, do_opt: bool) -> Result /// Optimizes the lowered IR after phi nodes have been eliminated. /// -/// Runs post-lowering structural passes, local common subexpression elimination, -/// and branch condition folding. We repeat until reaching a fixed point +/// Runs post-lowering structural passes, redundancy elimination (local CSE, +/// GVN), and branch condition folding. We repeat until reaching a fixed point /// (typically 2 iterations). pub fn optimize_lowered_ir( module_info: LoweredModuleInfo, @@ -65,6 +66,7 @@ pub fn optimize_lowered_ir( dead_blocks::eliminate(func)?; copy_prop::eliminate(func); local_cse::eliminate(func); + gvn::eliminate(func); dead_instrs::eliminate(func); branch_fold::eliminate(func); dead_instrs::eliminate(func); From 2a23008919c08f230e5ec7ea8c592e6718da48a4 Mon Sep 17 00:00:00 2001 From: metrics Date: Sat, 28 Mar 2026 02:01:04 +0000 Subject: [PATCH 2/3] refactor: consolidate value key and utility functions into utils module --- crates/herkos-core/src/optimizer/gvn.rs | 251 ++---------------- crates/herkos-core/src/optimizer/local_cse.rs | 51 +--- crates/herkos-core/src/optimizer/utils.rs | 220 +++++++++++++++ 3 files changed, 243 insertions(+), 279 deletions(-) diff --git a/crates/herkos-core/src/optimizer/gvn.rs b/crates/herkos-core/src/optimizer/gvn.rs index 33a3efd..e5a2bfc 100644 --- a/crates/herkos-core/src/optimizer/gvn.rs +++ b/crates/herkos-core/src/optimizer/gvn.rs @@ -22,235 +22,12 @@ //! **Only pure instructions are eligible.** Loads, calls, and memory ops are //! never deduplicated (they may trap or have observable side effects). -use super::utils::{build_predecessors, instr_dest, prune_dead_locals, terminator_successors}; -use crate::ir::{BinOp, BlockId, IrFunction, IrInstr, IrValue, UnOp, VarId}; -use std::collections::{HashMap, HashSet}; - -// ── Value key ──────────────────────────────────────────────────────────────── - -/// Hashable representation of a pure computation for deduplication. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -enum ValueKey { - Const(ConstKey), - BinOp { op: BinOp, lhs: VarId, rhs: VarId }, - UnOp { op: UnOp, operand: VarId }, -} - -/// Bit-level constant key that implements `Eq`/`Hash` correctly for floats. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -enum ConstKey { - I32(i32), - I64(i64), - F32(u32), - F64(u64), -} - -impl From for ConstKey { - fn from(v: IrValue) -> Self { - match v { - IrValue::I32(x) => ConstKey::I32(x), - IrValue::I64(x) => ConstKey::I64(x), - IrValue::F32(x) => ConstKey::F32(x.to_bits()), - IrValue::F64(x) => ConstKey::F64(x.to_bits()), - } - } -} - -fn is_commutative(op: &BinOp) -> bool { - matches!( - op, - BinOp::I32Add - | BinOp::I32Mul - | BinOp::I32And - | BinOp::I32Or - | BinOp::I32Xor - | BinOp::I32Eq - | BinOp::I32Ne - | BinOp::I64Add - | BinOp::I64Mul - | BinOp::I64And - | BinOp::I64Or - | BinOp::I64Xor - | BinOp::I64Eq - | BinOp::I64Ne - | BinOp::F32Add - | BinOp::F32Mul - | BinOp::F32Eq - | BinOp::F32Ne - | BinOp::F64Add - | BinOp::F64Mul - | BinOp::F64Eq - | BinOp::F64Ne - ) -} - -fn binop_key(op: BinOp, lhs: VarId, rhs: VarId) -> ValueKey { - let (lhs, rhs) = if is_commutative(&op) && lhs.0 > rhs.0 { - (rhs, lhs) - } else { - (lhs, rhs) - }; - ValueKey::BinOp { op, lhs, rhs } -} - -// ── Multi-definition detection ─────────────────────────────────────────────── - -/// Build the set of variables defined more than once across the function. -/// -/// After phi lowering the code is no longer in strict SSA form: loop phi -/// variables receive an initial assignment in the pre-loop block and a -/// back-edge update at the end of each iteration. These variables carry -/// different values at different program points, so any BinOp/UnOp that uses -/// them cannot be safely hoisted or deduplicated across blocks. -/// -/// `Const` instructions are always safe (they have no operands). -fn build_multi_def_vars(func: &IrFunction) -> HashSet { - let mut def_count: HashMap = HashMap::new(); - for block in &func.blocks { - for instr in &block.instructions { - if let Some(dest) = instr_dest(instr) { - *def_count.entry(dest).or_insert(0) += 1; - } - } - } - def_count - .into_iter() - .filter(|&(_, count)| count > 1) - .map(|(v, _)| v) - .collect() -} - -// ── Dominator tree ─────────────────────────────────────────────────────────── - -/// Compute the reverse-postorder traversal of the CFG starting from `entry`. -fn compute_rpo(func: &IrFunction) -> Vec { - let block_idx: HashMap = func - .blocks - .iter() - .enumerate() - .map(|(i, b)| (b.id, i)) - .collect(); - - let mut visited = vec![false; func.blocks.len()]; - let mut postorder = Vec::with_capacity(func.blocks.len()); - - dfs_postorder( - func, - func.entry_block, - &block_idx, - &mut visited, - &mut postorder, - ); - - postorder.reverse(); - postorder -} - -fn dfs_postorder( - func: &IrFunction, - block_id: BlockId, - block_idx: &HashMap, - visited: &mut Vec, - postorder: &mut Vec, -) { - let idx = match block_idx.get(&block_id) { - Some(&i) => i, - None => return, - }; - if visited[idx] { - return; - } - visited[idx] = true; - - for succ in terminator_successors(&func.blocks[idx].terminator) { - dfs_postorder(func, succ, block_idx, visited, postorder); - } - postorder.push(block_id); -} - -/// Compute the immediate dominator of each block using Cooper/Harvey/Kennedy. -/// -/// Returns `idom[b] = immediate dominator of b`, with `idom[entry] = entry`. -fn compute_idoms(func: &IrFunction) -> HashMap { - let rpo = compute_rpo(func); - // rpo_num[b] = index in RPO order (entry = 0, smallest index = processed first) - let rpo_num: HashMap = rpo.iter().enumerate().map(|(i, &b)| (b, i)).collect(); - - let preds = build_predecessors(func); - let entry = func.entry_block; - - let mut idom: HashMap = HashMap::new(); - idom.insert(entry, entry); - - let mut changed = true; - while changed { - changed = false; - // Process blocks in RPO order, skipping the entry. - for &b in rpo.iter().skip(1) { - let block_preds = &preds[&b]; - - // Start with the first predecessor that already has an idom assigned. - let mut new_idom = match block_preds - .iter() - .filter(|&&p| idom.contains_key(&p)) - .min_by_key(|&&p| rpo_num[&p]) - { - Some(&p) => p, - None => continue, // unreachable block — skip - }; - - // Intersect (walk up dom tree) with all other processed predecessors. - for &p in block_preds { - if p != new_idom && idom.contains_key(&p) { - new_idom = intersect(p, new_idom, &idom, &rpo_num); - } - } - - if idom.get(&b) != Some(&new_idom) { - idom.insert(b, new_idom); - changed = true; - } - } - } - - idom -} - -/// Walk up both fingers until they meet — the standard Cooper intersect. -fn intersect( - mut a: BlockId, - mut b: BlockId, - idom: &HashMap, - rpo_num: &HashMap, -) -> BlockId { - while a != b { - while rpo_num[&a] > rpo_num[&b] { - a = idom[&a]; - } - while rpo_num[&b] > rpo_num[&a] { - b = idom[&b]; - } - } - a -} - -/// Build dominator-tree children from the `idom` map. -fn build_dom_children( - idom: &HashMap, - entry: BlockId, -) -> HashMap> { - let mut children: HashMap> = HashMap::new(); - for (&b, &d) in idom { - if b != entry { - children.entry(d).or_default().push(b); - } - } - // Sort children for deterministic output. - for v in children.values_mut() { - v.sort_unstable_by_key(|id| id.0); - } - children -} +use super::utils::{ + binop_key, build_dom_children, build_multi_def_vars, compute_idoms, instr_dest, + prune_dead_locals, ConstKey, ValueKey, +}; +use crate::ir::{BlockId, IrFunction, IrInstr, VarId}; +use std::collections::HashMap; // ── GVN walk ───────────────────────────────────────────────────────────────── @@ -265,7 +42,7 @@ fn collect_replacements( block_id: BlockId, dom_children: &HashMap>, block_idx: &HashMap, - multi_def_vars: &HashSet, + multi_def_vars: &std::collections::HashSet, value_map: &mut HashMap, replacements: &mut HashMap, ) { @@ -367,8 +144,11 @@ pub fn eliminate(func: &mut IrFunction) { return; // nothing to do for single-block functions (local_cse covers those) } + // Step 1: Build the dominator tree (idom map → children map). let idom = compute_idoms(func); let dom_children = build_dom_children(&idom, func.entry_block); + + // Step 2: Build a block-ID → slice-index map for O(1) block lookup. let block_idx: HashMap = func .blocks .iter() @@ -376,7 +156,13 @@ pub fn eliminate(func: &mut IrFunction) { .map(|(i, b)| (b.id, i)) .collect(); + // Step 3: Identify variables defined more than once (loop phi vars). + // These are ineligible for cross-block deduplication. let multi_def_vars = build_multi_def_vars(func); + + // Step 4: Walk the dominator tree in preorder, collecting replacements. + // The scoped value_map ensures only dominator-block computations + // are visible when processing each block. let mut value_map: HashMap = HashMap::new(); let mut replacements: HashMap = HashMap::new(); @@ -394,6 +180,8 @@ pub fn eliminate(func: &mut IrFunction) { return; } + // Step 5: Rewrite each redundant instruction to Assign { dest, src: first_var }. + // Copy-propagation (run as part of the pipeline) will clean up the Assigns. for block in &mut func.blocks { for instr in &mut block.instructions { if let Some(dest) = instr_dest(instr) { @@ -404,6 +192,7 @@ pub fn eliminate(func: &mut IrFunction) { } } + // Step 6: Remove locals that are no longer referenced after rewriting. prune_dead_locals(func); } @@ -412,7 +201,7 @@ pub fn eliminate(func: &mut IrFunction) { #[cfg(test)] mod tests { use super::*; - use crate::ir::{IrBlock, IrTerminator, IrValue, TypeIdx, WasmType}; + use crate::ir::{BinOp, IrBlock, IrTerminator, IrValue, TypeIdx, WasmType}; fn make_func(blocks: Vec) -> IrFunction { IrFunction { diff --git a/crates/herkos-core/src/optimizer/local_cse.rs b/crates/herkos-core/src/optimizer/local_cse.rs index 84ec026..e08b00d 100644 --- a/crates/herkos-core/src/optimizer/local_cse.rs +++ b/crates/herkos-core/src/optimizer/local_cse.rs @@ -5,55 +5,10 @@ //! considered (`BinOp`, `UnOp`, `Const`). Duplicates are replaced with //! `Assign { dest, src: previous_result }`, which copy propagation cleans up. -use crate::ir::{BinOp, IrFunction, IrInstr, IrValue, UnOp, VarId}; +use crate::ir::{IrFunction, IrInstr, VarId}; use std::collections::HashMap; -use super::utils::{is_commutative, prune_dead_locals}; - -// ── Value key ──────────────────────────────────────────────────────────────── - -/// Hashable representation of a pure computation for deduplication. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -enum ValueKey { - /// Constant value (using bit-level equality for floats). - Const(ConstKey), - - /// Binary operation with operand variable IDs. - BinOp { op: BinOp, lhs: VarId, rhs: VarId }, - - /// Unary operation with operand variable ID. - UnOp { op: UnOp, operand: VarId }, -} - -/// Bit-level constant key that implements Eq/Hash correctly for floats. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -enum ConstKey { - I32(i32), - I64(i64), - F32(u32), - F64(u64), -} - -impl From for ConstKey { - fn from(v: IrValue) -> Self { - match v { - IrValue::I32(x) => ConstKey::I32(x), - IrValue::I64(x) => ConstKey::I64(x), - IrValue::F32(x) => ConstKey::F32(x.to_bits()), - IrValue::F64(x) => ConstKey::F64(x.to_bits()), - } - } -} - -/// Build a `ValueKey` for a `BinOp`, normalizing operand order for commutative ops. -fn binop_key(op: BinOp, lhs: VarId, rhs: VarId) -> ValueKey { - let (lhs, rhs) = if is_commutative(&op) && lhs.0 > rhs.0 { - (rhs, lhs) - } else { - (lhs, rhs) - }; - ValueKey::BinOp { op, lhs, rhs } -} +use super::utils::{binop_key, prune_dead_locals, ConstKey, ValueKey}; // ── Pass entry point ───────────────────────────────────────────────────────── @@ -132,7 +87,7 @@ pub fn eliminate(func: &mut IrFunction) { #[cfg(test)] mod tests { use super::*; - use crate::ir::{BlockId, IrBlock, IrTerminator, TypeIdx, WasmType}; + use crate::ir::{BinOp, BlockId, IrBlock, IrTerminator, IrValue, TypeIdx, UnOp, WasmType}; /// Helper: create a minimal IrFunction with the given blocks. fn make_func(blocks: Vec) -> IrFunction { diff --git a/crates/herkos-core/src/optimizer/utils.rs b/crates/herkos-core/src/optimizer/utils.rs index 93e7ee1..954c2a6 100644 --- a/crates/herkos-core/src/optimizer/utils.rs +++ b/crates/herkos-core/src/optimizer/utils.rs @@ -420,6 +420,226 @@ pub fn prune_dead_locals(func: &mut IrFunction) { func.locals.retain(|(var, _)| live.contains(var)); } +// ── Value key ──────────────────────────────────────────────────────────────── + +/// Hashable representation of a pure computation for deduplication. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ValueKey { + /// Constant value (using bit-level equality for floats). + Const(ConstKey), + /// Binary operation with operand variable IDs. + BinOp { op: BinOp, lhs: VarId, rhs: VarId }, + /// Unary operation with operand variable ID. + UnOp { op: UnOp, operand: VarId }, +} + +/// Build a [`ValueKey`] for a `BinOp`, normalizing operand order for commutative ops. +pub fn binop_key(op: BinOp, lhs: VarId, rhs: VarId) -> ValueKey { + let (lhs, rhs) = if is_commutative(&op) && lhs.0 > rhs.0 { + (rhs, lhs) + } else { + (lhs, rhs) + }; + ValueKey::BinOp { op, lhs, rhs } +} + +/// Bit-level constant key that implements `Eq`/`Hash` correctly for floats. +/// +/// Floats have no `Eq` or `Hash` impl in Rust because NaN != NaN. This type +/// stores the raw bit pattern instead, so two constants with identical bits +/// hash to the same bucket and compare equal — matching Wasm's value semantics. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ConstKey { + I32(i32), + I64(i64), + F32(u32), + F64(u64), +} + +impl From for ConstKey { + fn from(v: IrValue) -> Self { + match v { + IrValue::I32(x) => ConstKey::I32(x), + IrValue::I64(x) => ConstKey::I64(x), + IrValue::F32(x) => ConstKey::F32(x.to_bits()), + IrValue::F64(x) => ConstKey::F64(x.to_bits()), + } + } +} + +// ── Multi-definition detection ──────────────────────────────────────────────── + +/// Build the set of variables defined more than once across the function. +/// +/// After phi lowering the code is no longer in strict SSA form: loop phi +/// variables receive an initial assignment in the pre-loop block and a +/// back-edge update at the end of each iteration. These variables carry +/// different values at different program points, so any `BinOp`/`UnOp` that +/// uses them cannot be safely deduplicated across blocks. +/// +/// `Const` instructions are always safe to deduplicate (they have no operands). +pub fn build_multi_def_vars(func: &IrFunction) -> HashSet { + build_global_def_count(func) + .into_iter() + .filter(|&(_, count)| count > 1) + .map(|(v, _)| v) + .collect() +} + +// ── Dominator tree ──────────────────────────────────────────────────────────── + +/// Compute the reverse-postorder (RPO) traversal of the CFG from `entry_block`. +/// +/// RPO is the reverse of the DFS postorder: nodes are visited only after all +/// their DFS-tree predecessors, so every dominator appears before the blocks it +/// dominates. This ordering is required by the Cooper/Harvey/Kennedy iterative +/// dominator algorithm, which relies on processing a block's dominators before +/// the block itself. +pub fn compute_rpo(func: &IrFunction) -> Vec { + let block_idx: HashMap = func + .blocks + .iter() + .enumerate() + .map(|(i, b)| (b.id, i)) + .collect(); + + let mut visited = vec![false; func.blocks.len()]; + let mut postorder = Vec::with_capacity(func.blocks.len()); + + dfs_postorder( + func, + func.entry_block, + &block_idx, + &mut visited, + &mut postorder, + ); + + postorder.reverse(); + postorder +} + +/// Recursive DFS helper that appends `block_id` to `postorder` after visiting +/// all of its successors. +/// +/// Each block is visited at most once (guarded by `visited`). Unreachable +/// blocks — those not reachable from the entry — are never pushed and therefore +/// absent from the final RPO, which is the desired behaviour: the dominator +/// algorithm only needs to reason about reachable blocks. +fn dfs_postorder( + func: &IrFunction, + block_id: BlockId, + block_idx: &HashMap, + visited: &mut Vec, + postorder: &mut Vec, +) { + let idx = match block_idx.get(&block_id) { + Some(&i) => i, + None => return, + }; + if visited[idx] { + return; + } + visited[idx] = true; + + for succ in terminator_successors(&func.blocks[idx].terminator) { + dfs_postorder(func, succ, block_idx, visited, postorder); + } + postorder.push(block_id); +} + +/// Compute the immediate dominator of each block using the Cooper/Harvey/Kennedy +/// iterative algorithm. +/// +/// Returns a map `idom` where `idom[b]` is the immediate dominator of `b`. +/// The entry block is its own immediate dominator: `idom[entry] = entry`. +/// +/// The algorithm works by repeatedly intersecting predecessor dominators in RPO +/// until a fixed point is reached. Because blocks are processed in RPO order, +/// most dominators converge in a single pass over the function. +pub fn compute_idoms(func: &IrFunction) -> HashMap { + let rpo = compute_rpo(func); + // rpo_num[b] = index in RPO order (entry = 0, smallest = processed first). + let rpo_num: HashMap = rpo.iter().enumerate().map(|(i, &b)| (b, i)).collect(); + + let preds = build_predecessors(func); + let entry = func.entry_block; + + let mut idom: HashMap = HashMap::new(); + idom.insert(entry, entry); + + let mut changed = true; + while changed { + changed = false; + // Process blocks in RPO order, skipping the entry. + for &b in rpo.iter().skip(1) { + let block_preds = &preds[&b]; + + // Start with the first predecessor that already has an idom assigned. + let mut new_idom = match block_preds + .iter() + .filter(|&&p| idom.contains_key(&p)) + .min_by_key(|&&p| rpo_num[&p]) + { + Some(&p) => p, + None => continue, // unreachable block — skip + }; + + // Intersect (walk up dom tree) with all other processed predecessors. + for &p in block_preds { + if p != new_idom && idom.contains_key(&p) { + new_idom = intersect(p, new_idom, &idom, &rpo_num); + } + } + + if idom.get(&b) != Some(&new_idom) { + idom.insert(b, new_idom); + changed = true; + } + } + } + + idom +} + +/// Walk up both dom-tree fingers until they meet — the standard Cooper intersect. +fn intersect( + mut a: BlockId, + mut b: BlockId, + idom: &HashMap, + rpo_num: &HashMap, +) -> BlockId { + while a != b { + while rpo_num[&a] > rpo_num[&b] { + a = idom[&a]; + } + while rpo_num[&b] > rpo_num[&a] { + b = idom[&b]; + } + } + a +} + +/// Build the dominator-tree children map from the `idom` map. +/// +/// For each block `b` (except the entry), `children[idom[b]]` gains `b` as a +/// child. Children are sorted by block ID for deterministic traversal order. +pub fn build_dom_children( + idom: &HashMap, + entry: BlockId, +) -> HashMap> { + let mut children: HashMap> = HashMap::new(); + for (&b, &d) in idom { + if b != entry { + children.entry(d).or_default().push(b); + } + } + // Sort children for deterministic output. + for v in children.values_mut() { + v.sort_unstable_by_key(|id| id.0); + } + children +} + // ── Side-effect classification ─────────────────────────────────────────────── /// Returns `true` if the instruction is side-effect-free and can be safely From e62dcef5d43aa83dff3a62337dd88c1390ae1ba0 Mon Sep 17 00:00:00 2001 From: metrics Date: Sat, 28 Mar 2026 02:45:56 +0000 Subject: [PATCH 3/3] docs: clarify scope popping in collect_replacements and enhance compute_rpo documentation with examples --- crates/herkos-core/src/optimizer/gvn.rs | 4 +- crates/herkos-core/src/optimizer/utils.rs | 50 +++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/crates/herkos-core/src/optimizer/gvn.rs b/crates/herkos-core/src/optimizer/gvn.rs index e5a2bfc..dfe0677 100644 --- a/crates/herkos-core/src/optimizer/gvn.rs +++ b/crates/herkos-core/src/optimizer/gvn.rs @@ -130,7 +130,9 @@ fn collect_replacements( } } - // Pop this block's scope. + // Pop this block's scope so sibling blocks don't inherit our entries. + // Siblings are not dominated by this block, so a computation seen here + // is not guaranteed to have executed when a sibling is reached. for key in frame_keys { value_map.remove(&key); } diff --git a/crates/herkos-core/src/optimizer/utils.rs b/crates/herkos-core/src/optimizer/utils.rs index 954c2a6..266d712 100644 --- a/crates/herkos-core/src/optimizer/utils.rs +++ b/crates/herkos-core/src/optimizer/utils.rs @@ -495,6 +495,26 @@ pub fn build_multi_def_vars(func: &IrFunction) -> HashSet { /// dominates. This ordering is required by the Cooper/Harvey/Kennedy iterative /// dominator algorithm, which relies on processing a block's dominators before /// the block itself. +/// +/// For example, given the CFG: +/// +/// ```text +/// entry +/// / \ +/// A B +/// \ / +/// C +/// ``` +/// +/// DFS visits entry → A → C → B (postorder: C, A, B, entry or C, B, A, entry +/// depending on successor order). Reversing gives RPO: +/// +/// ```text +/// [entry, A, B, C] +/// ``` +/// +/// `entry` is always first; `C` is always last because it is reachable only +/// after both `A` and `B`. pub fn compute_rpo(func: &IrFunction) -> Vec { let block_idx: HashMap = func .blocks @@ -553,6 +573,18 @@ fn dfs_postorder( /// Returns a map `idom` where `idom[b]` is the immediate dominator of `b`. /// The entry block is its own immediate dominator: `idom[entry] = entry`. /// +/// A block `d` dominates block `b` if `d` appears on *every* path from the +/// entry block to `b`. For example, given: +/// +/// ```text +/// entry → A → B +/// entry → C → B +/// ``` +/// +/// `entry` dominates `B` (it is on every path), but `A` and `C` do not. +/// The immediate dominator is the closest such dominator — the last one +/// before `b` in the dominator tree. +/// /// The algorithm works by repeatedly intersecting predecessor dominators in RPO /// until a fixed point is reached. Because blocks are processed in RPO order, /// most dominators converge in a single pass over the function. @@ -623,6 +655,24 @@ fn intersect( /// /// For each block `b` (except the entry), `children[idom[b]]` gains `b` as a /// child. Children are sorted by block ID for deterministic traversal order. +/// +/// For example, given the CFG: +/// +/// ```text +/// entry +/// / \ +/// A B +/// / \ +/// C D +/// ``` +/// +/// The `idom` map is `{ A → entry, B → entry, C → A, D → A }`, and the +/// resulting children map is: +/// +/// ```text +/// entry → [A, B] +/// A → [C, D] +/// ``` pub fn build_dom_children( idom: &HashMap, entry: BlockId,