Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions crates/plotnik-lib/src/bytecode/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,9 @@ pub const STEP_SIZE: usize = 8;
/// effects, neg_fields, and successors combined. When an epsilon
/// transition needs more successors, it must be split into a cascade.
pub const MAX_MATCH_PAYLOAD_SLOTS: usize = 28;

/// Maximum pre-effects per Match instruction.
///
/// Pre-effect count is stored in 3 bits (max 7). When exceeded,
/// overflow effects must be emitted in leading epsilon transitions.
pub const MAX_PRE_EFFECTS: usize = 7;
24 changes: 18 additions & 6 deletions crates/plotnik-lib/src/bytecode/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -526,12 +526,24 @@ impl MatchIR {
.unwrap_or(0);
bytes[6..8].copy_from_slice(&next.to_le_bytes());
} else {
let pre_count = self.pre_effects.len() as u16;
let neg_count = self.neg_fields.len() as u16;
let post_count = self.post_effects.len() as u16;
let succ_count = self.successors.len() as u16;
let counts =
(pre_count << 13) | (neg_count << 10) | (post_count << 7) | (succ_count << 1);
let pre_count = self.pre_effects.len();
let neg_count = self.neg_fields.len();
let post_count = self.post_effects.len();
let succ_count = self.successors.len();

// Validate bit-packed field limits (3 bits for counts, 6 bits for successors)
assert!(
pre_count <= 7,
"pre_effects overflow: {pre_count} > 7 (use emit_match_with_cascade)"
);
assert!(neg_count <= 7, "neg_fields overflow: {neg_count} > 7");
assert!(post_count <= 7, "post_effects overflow: {post_count} > 7");
assert!(succ_count <= 63, "successors overflow: {succ_count} > 63");

let counts = ((pre_count as u16) << 13)
| ((neg_count as u16) << 10)
| ((post_count as u16) << 7)
| ((succ_count as u16) << 1);
bytes[6..8].copy_from_slice(&counts.to_le_bytes());

let mut offset = 8;
Expand Down
4 changes: 3 additions & 1 deletion crates/plotnik-lib/src/bytecode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ mod nav;
mod sections;
mod type_meta;

pub use constants::{MAGIC, MAX_MATCH_PAYLOAD_SLOTS, SECTION_ALIGN, STEP_SIZE, VERSION};
pub use constants::{
MAGIC, MAX_MATCH_PAYLOAD_SLOTS, MAX_PRE_EFFECTS, SECTION_ALIGN, STEP_SIZE, VERSION,
};

pub use ids::{StringId, TypeId};

Expand Down
9 changes: 6 additions & 3 deletions crates/plotnik-lib/src/bytecode/nav.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,18 @@ impl Nav {
Self::DownSkip => 7,
Self::DownExact => 8,
Self::Up(n) => {
debug_assert!((1..=63).contains(&n));
assert!((1..=63).contains(&n), "Up level overflow: {n} > 63");
0b01_000000 | n
}
Self::UpSkipTrivia(n) => {
debug_assert!((1..=63).contains(&n));
assert!(
(1..=63).contains(&n),
"UpSkipTrivia level overflow: {n} > 63"
);
0b10_000000 | n
}
Self::UpExact(n) => {
debug_assert!((1..=63).contains(&n));
assert!((1..=63).contains(&n), "UpExact level overflow: {n} > 63");
0b11_000000 | n
}
}
Expand Down
65 changes: 65 additions & 0 deletions crates/plotnik-lib/src/compile/compile_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,68 @@ fn compile_large_tagged_alternation() {

assert!(!result.instructions.is_empty());
}

#[test]
fn compile_unlabeled_alternation_5_branches_with_captures() {
// Regression test: unlabeled alternation with 5+ branches where each has
// a unique capture requires 8+ pre-effects (4 nulls + 4 sets per branch).
// This exceeds the 3-bit limit (max 7) and must cascade via epsilon chain.
let query = QueryBuilder::one_liner(
"Q = [(identifier) @a (number) @b (string) @c (binary_expression) @d (call_expression) @e]",
)
.parse()
.unwrap()
.analyze();

let mut strings = StringTableBuilder::new();
let result = Compiler::compile(
query.interner(),
query.type_context(),
&query.symbol_table,
&mut strings,
None,
None,
)
.unwrap();

assert!(!result.instructions.is_empty());

// Verify that effects cascade created extra epsilon instructions.
// With 5 branches, each branch needs 8 pre-effects (4 missing captures × 2 effects).
// This requires at least one cascade step per branch.
let epsilon_count = result
.instructions
.iter()
.filter(|i| matches!(i, crate::bytecode::InstructionIR::Match(m) if m.is_epsilon()))
.count();

// Should have more epsilon transitions than without cascade
// (5 branches + cascade steps for overflow effects)
assert!(epsilon_count >= 5, "expected cascade epsilon steps");
}

#[test]
fn compile_unlabeled_alternation_8_branches_with_captures() {
// Even more extreme: 8 branches means 14 pre-effects per branch (7 nulls + 7 sets).
// This requires 2 cascade steps per branch.
let query = QueryBuilder::one_liner(
"Q = [(identifier) @a (number) @b (string) @c (binary_expression) @d \
(call_expression) @e (member_expression) @f (array) @g (object) @h]",
)
.parse()
.unwrap()
.analyze();

let mut strings = StringTableBuilder::new();
let result = Compiler::compile(
query.interner(),
query.type_context(),
&query.symbol_table,
&mut strings,
None,
None,
)
.unwrap();

assert!(!result.instructions.is_empty());
}
21 changes: 8 additions & 13 deletions crates/plotnik-lib/src/compile/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,14 @@ impl Compiler<'_> {

// If no items, just match and exit (capture effects go here)
if items.is_empty() {
self.instructions.push(
return self.emit_match_with_cascade(
MatchIR::epsilon(entry, exit)
.nav(nav)
.node_type(node_type)
.neg_fields(neg_fields)
.pre_effects(capture.pre)
.post_effects(capture.post)
.into(),
.post_effects(capture.post),
);
return entry;
}

// Determine Up navigation based on trailing anchor
Expand Down Expand Up @@ -102,13 +100,12 @@ impl Compiler<'_> {
.push(MatchIR::epsilon(up_label, final_exit).nav(up_nav).into());

// Emit entry instruction into the node (only pre_effects here)
self.instructions.push(
self.emit_match_with_cascade(
MatchIR::epsilon(entry, items_entry)
.nav(nav)
.node_type(node_type)
.neg_fields(neg_fields)
.pre_effects(capture.pre)
.into(),
.pre_effects(capture.pre),
);

entry
Expand Down Expand Up @@ -178,13 +175,12 @@ impl Compiler<'_> {
self.emit_wildcard_nav(down_wildcard, Nav::Down, try_label);

// entry: match parent node → down_wildcard (only pre_effects here)
self.instructions.push(
self.emit_match_with_cascade(
MatchIR::epsilon(entry, down_wildcard)
.nav(nav)
.node_type(node_type)
.neg_fields(neg_fields)
.pre_effects(capture.pre)
.into(),
.pre_effects(capture.pre),
);

entry
Expand Down Expand Up @@ -220,13 +216,12 @@ impl Compiler<'_> {
None => NodeTypeIR::Any, // `_` wildcard matches any node
};

self.instructions.push(
self.emit_match_with_cascade(
MatchIR::epsilon(entry, exit)
.nav(nav)
.node_type(node_type)
.pre_effects(capture.pre)
.post_effects(capture.post)
.into(),
.post_effects(capture.post),
);

entry
Expand Down
63 changes: 63 additions & 0 deletions crates/plotnik-lib/src/compile/scope.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,69 @@ impl Compiler<'_> {
.push(MatchIR::at(label).next_many(successors).into());
}

/// Emit a Match instruction, cascading pre-effects if they exceed the bytecode limit.
///
/// When `pre_effects.len() > MAX_PRE_EFFECTS` (7), splits overflow into leading
/// epsilon transitions to avoid bytecode encoding overflow. The original label
/// is preserved as the entry point of the cascade.
///
/// Returns the entry label (same as `instr.label`).
pub(super) fn emit_match_with_cascade(&mut self, mut instr: MatchIR) -> Label {
use crate::bytecode::MAX_PRE_EFFECTS;

let entry = instr.label;

if instr.pre_effects.len() <= MAX_PRE_EFFECTS {
self.instructions.push(instr.into());
return entry;
}

// Move all pre-effects to epsilon chain
let all_effects = std::mem::take(&mut instr.pre_effects);

// Create new label for the actual match instruction
let match_label = self.fresh_label();
instr.label = match_label;
self.instructions.push(instr.into());

// Emit cascade from entry → ... → match_label
self.emit_effects_chain(entry, match_label, all_effects);

entry
}

/// Emit a chain of epsilon transitions to execute effects in order.
///
/// Splits effects into batches of `MAX_PRE_EFFECTS` (7), emitting epsilon
/// transitions: `entry → intermediate1 → ... → exit`.
fn emit_effects_chain(&mut self, entry: Label, exit: Label, mut effects: Vec<EffectIR>) {
use crate::bytecode::MAX_PRE_EFFECTS;

if effects.is_empty() {
// Just link entry to exit
self.instructions.push(MatchIR::epsilon(entry, exit).into());
return;
}

if effects.len() <= MAX_PRE_EFFECTS {
self.instructions
.push(MatchIR::epsilon(entry, exit).pre_effects(effects).into());
return;
}

// Take first batch, recurse for rest
let first_batch: Vec<_> = effects.drain(..MAX_PRE_EFFECTS).collect();
let intermediate = self.fresh_label();

self.instructions.push(
MatchIR::epsilon(entry, intermediate)
.pre_effects(first_batch)
.into(),
);

self.emit_effects_chain(intermediate, exit, effects);
}

/// Emit a wildcard navigation step that accepts any node.
///
/// Used for skip-retry logic in quantifiers: navigates to the next position
Expand Down
44 changes: 44 additions & 0 deletions crates/plotnik-lib/src/engine/engine_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -529,3 +529,47 @@ fn wildcard_named_skips_anonymous() {
fn wildcard_bare_matches_anonymous() {
snap!("Q = (program (return_statement _ @x))", "return 42");
}

/// BUG: Unlabeled alternation with 5+ branches and unique captures per branch.
///
/// Each branch in an unlabeled alternation needs to inject null for captures
/// it doesn't have. With 5 branches (@a, @b, @c, @d, @e), each branch needs
/// 8 pre-effects (4 nulls + 4 sets). This exceeds the 3-bit limit (max 7)
/// and caused bytecode encoding overflow, resulting in garbage addresses.
///
/// Fix: Cascade overflow pre-effects into leading epsilon transitions.
#[test]
fn regression_unlabeled_alternation_5_branches_captures() {
snap!(
indoc! {r#"
Q = (program (expression_statement [
(identifier) @a
(number) @b
(string) @c
(binary_expression) @d
(call_expression) @e
]))
"#},
"foo"
);
}

/// Same bug with 8 branches - even more pre-effects requiring multiple cascade steps.
#[test]
fn regression_unlabeled_alternation_8_branches_captures() {
snap!(
indoc! {r#"
Q = (program (expression_statement [
(identifier) @a
(number) @b
(string) @c
(binary_expression) @d
(call_expression) @e
(member_expression) @f
(array) @g
(object) @h
]))
"#},
"42"
);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
---
source: crates/plotnik-lib/src/engine/engine_tests.rs
---
Q = (program (expression_statement [
(identifier) @a
(number) @b
(string) @c
(binary_expression) @d
(call_expression) @e
]))
---
foo
---
{
"b": null,
"c": null,
"d": null,
"e": null,
"a": {
"kind": "identifier",
"text": "foo",
"span": [
0,
3
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
source: crates/plotnik-lib/src/engine/engine_tests.rs
---
Q = (program (expression_statement [
(identifier) @a
(number) @b
(string) @c
(binary_expression) @d
(call_expression) @e
(member_expression) @f
(array) @g
(object) @h
]))
---
42
---
{
"a": null,
"c": null,
"d": null,
"e": null,
"f": null,
"g": null,
"h": null,
"b": {
"kind": "number",
"text": "42",
"span": [
0,
2
]
}
}