From 4a790407245560c8677f2439cfdc3c7bf074c4dd Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Thu, 15 Jan 2026 17:24:40 -0300 Subject: [PATCH] perf: Pack successor instructions into cache-line gaps --- crates/plotnik-compiler/src/emit/layout.rs | 326 +++++++++++++++--- .../plotnik-compiler/src/emit/layout_tests.rs | 278 +++------------ ...ternations_tagged_in_field_constraint.snap | 7 +- ..._emit__emit_tests__fields_alternation.snap | 7 +- ...__emit_tests__optional_null_injection.snap | 7 +- ..._tests__quantifiers_first_child_array.snap | 13 +- ...mit__emit_tests__quantifiers_optional.snap | 7 +- ...tests__quantifiers_optional_nongreedy.snap | 7 +- ..._compiler__emit__layout_tests__branch.snap | 44 +++ ...it__layout_tests__cache_line_boundary.snap | 84 +++++ ...iler__emit__layout_tests__call_return.snap | 51 +++ ...emit__layout_tests__large_instruction.snap | 69 ++++ ...ler__emit__layout_tests__linear_chain.snap | 46 +++ ...mit__layout_tests__single_instruction.snap | 34 ++ 14 files changed, 682 insertions(+), 298 deletions(-) create mode 100644 crates/plotnik-compiler/src/emit/snapshots/plotnik_compiler__emit__layout_tests__branch.snap create mode 100644 crates/plotnik-compiler/src/emit/snapshots/plotnik_compiler__emit__layout_tests__cache_line_boundary.snap create mode 100644 crates/plotnik-compiler/src/emit/snapshots/plotnik_compiler__emit__layout_tests__call_return.snap create mode 100644 crates/plotnik-compiler/src/emit/snapshots/plotnik_compiler__emit__layout_tests__large_instruction.snap create mode 100644 crates/plotnik-compiler/src/emit/snapshots/plotnik_compiler__emit__layout_tests__linear_chain.snap create mode 100644 crates/plotnik-compiler/src/emit/snapshots/plotnik_compiler__emit__layout_tests__single_instruction.snap diff --git a/crates/plotnik-compiler/src/emit/layout.rs b/crates/plotnik-compiler/src/emit/layout.rs index bbbc833..a1105bc 100644 --- a/crates/plotnik-compiler/src/emit/layout.rs +++ b/crates/plotnik-compiler/src/emit/layout.rs @@ -1,7 +1,8 @@ //! Cache-aligned instruction layout. //! //! Extracts linear chains from the control flow graph and places them -//! contiguously. Pads instructions to prevent cache line straddling. +//! contiguously. Packs successor instructions into free space of predecessor +//! blocks for improved d-cache locality. use std::collections::{BTreeMap, HashSet}; @@ -10,6 +11,170 @@ use crate::bytecode::{InstructionIR, Label, LayoutResult}; const CACHE_LINE: usize = 64; const STEP_SIZE: usize = 8; +/// Intermediate representation for layout optimization. +struct LayoutIR { + blocks: Vec, + label_to_block: BTreeMap, + label_to_offset: BTreeMap, +} + +/// A 64-byte cache-line block. +struct Block { + placements: Vec, + used: u8, +} + +/// An instruction placed within a block. +struct Placement { + label: Label, + offset: u8, + size: u8, +} + +impl Block { + fn new() -> Self { + Self { + placements: Vec::new(), + used: 0, + } + } + + fn free(&self) -> u8 { + CACHE_LINE as u8 - self.used + } + + fn can_fit(&self, size: u8) -> bool { + self.free() >= size + } + + fn place(&mut self, label: Label, size: u8) -> u8 { + let offset = self.used; + self.placements.push(Placement { + label, + offset, + size, + }); + self.used += size; + offset + } +} + +impl LayoutIR { + fn new() -> Self { + Self { + blocks: Vec::new(), + label_to_block: BTreeMap::new(), + label_to_offset: BTreeMap::new(), + } + } + + fn place(&mut self, label: Label, block_idx: usize, size: u8) { + let offset = self.blocks[block_idx].place(label, size); + self.label_to_block.insert(label, block_idx); + self.label_to_offset.insert(label, offset); + } + + /// Move an instruction from its current block to a new block. + fn move_to(&mut self, label: Label, new_block_idx: usize, size: u8) { + // Remove from old block + if let Some(&old_block_idx) = self.label_to_block.get(&label) + && let block = &mut self.blocks[old_block_idx] + && let Some(pos) = block.placements.iter().position(|p| p.label == label) + { + let old_placement = block.placements.remove(pos); + block.used -= old_placement.size; + + // Compact remaining placements + let mut offset = 0u8; + for p in &mut block.placements { + p.offset = offset; + offset += p.size; + } + } + + // Add to new block + let offset = self.blocks[new_block_idx].place(label, size); + self.label_to_block.insert(label, new_block_idx); + self.label_to_offset.insert(label, offset); + } + + fn finalize(self) -> LayoutResult { + let mut mapping = BTreeMap::new(); + let mut max_step_end = 0u16; + + for (block_idx, block) in self.blocks.iter().enumerate() { + let block_base_step = (block_idx * CACHE_LINE / STEP_SIZE) as u16; + for placement in &block.placements { + let step = block_base_step + (placement.offset / STEP_SIZE as u8) as u16; + mapping.insert(placement.label, step); + let step_end = step + (placement.size / STEP_SIZE as u8) as u16; + max_step_end = max_step_end.max(step_end); + } + } + + LayoutResult::new(mapping, max_step_end) + } +} + +/// Block-to-block reference counts for scoring. +struct BlockRefs { + /// (from_block, to_block) -> reference count + direct: BTreeMap<(usize, usize), usize>, + /// block -> list of predecessor blocks + predecessors: BTreeMap>, +} + +impl BlockRefs { + fn new() -> Self { + Self { + direct: BTreeMap::new(), + predecessors: BTreeMap::new(), + } + } + + fn add_ref(&mut self, from_block: usize, to_block: usize) { + *self.direct.entry((from_block, to_block)).or_default() += 1; + let preds = self.predecessors.entry(to_block).or_default(); + if !preds.contains(&from_block) { + preds.push(from_block); + } + } + + fn count(&self, from_block: usize, to_block: usize) -> usize { + self.direct.get(&(from_block, to_block)).copied().unwrap_or(0) + } + + fn predecessors(&self, block: usize) -> &[usize] { + self.predecessors + .get(&block) + .map(|v| v.as_slice()) + .unwrap_or(&[]) + } +} + +/// Score a candidate block for packing based on reference distance. +/// Direct refs count 1.0, 1-hop = 0.5, 2-hop = 0.25, capped at 3 hops. +fn block_score(target_block: usize, candidate_block: usize, refs: &BlockRefs) -> f32 { + let mut score = 0.0f32; + let mut frontier = vec![(candidate_block, 0u8)]; + let mut visited = HashSet::new(); + + while let Some((block, dist)) = frontier.pop() { + if !visited.insert(block) || dist > 3 { + continue; + } + + let direct_refs = refs.count(block, target_block); + score += direct_refs as f32 / (1u32 << dist) as f32; + + for &pred in refs.predecessors(block) { + frontier.push((pred, dist + 1)); + } + } + + score +} + /// Successor graph for layout analysis. struct Graph { /// label -> list of successor labels @@ -70,7 +235,121 @@ impl CacheAligned { let chains = extract_chains(&graph, instructions, entries); let ordered = order_chains(chains, entries); - assign_step_ids(ordered, &label_to_instr) + let mut ir = build_layout_ir(&ordered, &label_to_instr); + let refs = build_block_refs(&ir, &label_to_instr); + pack_successors(&mut ir, &refs, &label_to_instr); + + ir.finalize() + } +} + +/// Build initial LayoutIR from ordered chains. +fn build_layout_ir( + chains: &[Vec