From 1daf1db57f4cd1607117136ef0512a9953dbacb3 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Tue, 16 Dec 2025 22:23:46 -0300 Subject: [PATCH 01/11] refactor: Split Query into stages --- crates/plotnik-lib/src/parser/core.rs | 79 ++-- .../parser/tests/recovery/coverage_tests.rs | 12 +- crates/plotnik-lib/src/query/alt_kinds.rs | 19 +- .../query/{recursion.rs => dependencies.rs} | 336 +++++++++++------- ...cursion_tests.rs => dependencies_tests.rs} | 0 crates/plotnik-lib/src/query/expr_arity.rs | 4 +- .../src/query/graph_master_test.rs | 196 +++++----- crates/plotnik-lib/src/query/infer.rs | 114 +----- crates/plotnik-lib/src/query/link.rs | 14 +- crates/plotnik-lib/src/query/mod.rs | 48 ++- crates/plotnik-lib/src/query/query.rs | 97 +++++ crates/plotnik-lib/src/query/symbol_table.rs | 102 +++--- crates/plotnik-lib/src/query/visitor.rs | 19 +- 13 files changed, 561 insertions(+), 479 deletions(-) rename crates/plotnik-lib/src/query/{recursion.rs => dependencies.rs} (60%) rename crates/plotnik-lib/src/query/{recursion_tests.rs => dependencies_tests.rs} (100%) create mode 100644 crates/plotnik-lib/src/query/query.rs diff --git a/crates/plotnik-lib/src/parser/core.rs b/crates/plotnik-lib/src/parser/core.rs index 6d9fd777..33a61704 100644 --- a/crates/plotnik-lib/src/parser/core.rs +++ b/crates/plotnik-lib/src/parser/core.rs @@ -10,9 +10,9 @@ use crate::diagnostics::{DiagnosticKind, Diagnostics}; #[derive(Debug)] pub struct ParseResult { - pub root: Root, - pub diagnostics: Diagnostics, - pub exec_fuel_consumed: u32, + pub ast: Root, + pub diag: Diagnostics, + pub fuel_consumed: u32, } #[derive(Debug, Clone, Copy)] @@ -34,14 +34,14 @@ pub struct Parser<'src> { pub(super) last_diagnostic_pos: Option, pub(super) delimiter_stack: Vec, pub(super) debug_fuel: std::cell::Cell, - exec_fuel_initial: Option, - exec_fuel_remaining: Option, - recursion_fuel_limit: Option, + fuel_initial: u32, + fuel_remaining: u32, + max_depth: u32, fatal_error: Option, } impl<'src> Parser<'src> { - pub fn new(source: &'src str, tokens: Vec) -> Self { + pub fn new(source: &'src str, tokens: Vec, fuel: u32, max_depth: u32) -> Self { Self { source, tokens, @@ -53,32 +53,21 @@ impl<'src> Parser<'src> { last_diagnostic_pos: None, delimiter_stack: Vec::with_capacity(8), debug_fuel: std::cell::Cell::new(256), - exec_fuel_initial: None, - exec_fuel_remaining: None, - recursion_fuel_limit: None, + fuel_initial: fuel, + fuel_remaining: fuel, + max_depth, fatal_error: None, } } - pub fn with_exec_fuel(mut self, limit: Option) -> Self { - self.exec_fuel_initial = limit; - self.exec_fuel_remaining = limit; - self - } - - pub fn with_recursion_fuel(mut self, limit: Option) -> Self { - self.recursion_fuel_limit = limit; - self - } - pub fn parse(mut self) -> Result { self.parse_root(); let (cst, diagnostics, exec_fuel_consumed) = self.finish()?; let root = Root::cast(SyntaxNode::new_root(cst)).expect("parser always produces Root"); Ok(ParseResult { - root, - diagnostics, - exec_fuel_consumed, + ast: root, + diag: diagnostics, + fuel_consumed: exec_fuel_consumed, }) } @@ -87,11 +76,8 @@ impl<'src> Parser<'src> { if let Some(err) = self.fatal_error { return Err(err); } - let exec_fuel_consumed = match (self.exec_fuel_initial, self.exec_fuel_remaining) { - (Some(initial), Some(remaining)) => initial.saturating_sub(remaining), - _ => 0, - }; - Ok((self.builder.finish(), self.diagnostics, exec_fuel_consumed)) + let fuel_consumed = self.fuel_initial.saturating_sub(self.fuel_remaining); + Ok((self.builder.finish(), self.diagnostics, fuel_consumed)) } pub(super) fn has_fatal_error(&self) -> bool { @@ -115,14 +101,13 @@ impl<'src> Parser<'src> { } fn consume_exec_fuel(&mut self) { - if let Some(ref mut remaining) = self.exec_fuel_remaining { - if *remaining == 0 { - if self.fatal_error.is_none() { - self.fatal_error = Some(Error::ExecFuelExhausted); - } - return; - } - *remaining -= 1; + if self.fuel_remaining > 0 { + self.fuel_remaining -= 1; + return; + } + + if self.fatal_error.is_none() { + self.fatal_error = Some(Error::ExecFuelExhausted); } } @@ -337,17 +322,17 @@ impl<'src> Parser<'src> { } pub(super) fn enter_recursion(&mut self) -> bool { - if let Some(limit) = self.recursion_fuel_limit - && self.depth >= limit - { - if self.fatal_error.is_none() { - self.fatal_error = Some(Error::RecursionLimitExceeded); - } - return false; + if self.depth < self.max_depth { + self.depth += 1; + self.reset_debug_fuel(); + return true; } - self.depth += 1; - self.reset_debug_fuel(); - true + + if self.fatal_error.is_none() { + self.fatal_error = Some(Error::RecursionLimitExceeded); + } + + false } pub(super) fn exit_recursion(&mut self) { diff --git a/crates/plotnik-lib/src/parser/tests/recovery/coverage_tests.rs b/crates/plotnik-lib/src/parser/tests/recovery/coverage_tests.rs index 52c33c7f..c6544e4a 100644 --- a/crates/plotnik-lib/src/parser/tests/recovery/coverage_tests.rs +++ b/crates/plotnik-lib/src/parser/tests/recovery/coverage_tests.rs @@ -12,7 +12,7 @@ fn deeply_nested_trees_hit_recursion_limit() { input.push(')'); } - let result = Query::new(&input).with_recursion_fuel(Some(depth)).exec(); + let result = Query::new(&input).with_recursion_fuel(depth).exec(); assert!( matches!(result, Err(crate::Error::RecursionLimitExceeded)), @@ -32,7 +32,7 @@ fn deeply_nested_sequences_hit_recursion_limit() { input.push('}'); } - let result = Query::new(&input).with_recursion_fuel(Some(depth)).exec(); + let result = Query::new(&input).with_recursion_fuel(depth).exec(); assert!( matches!(result, Err(crate::Error::RecursionLimitExceeded)), @@ -52,7 +52,7 @@ fn deeply_nested_alternations_hit_recursion_limit() { input.push(']'); } - let result = Query::new(&input).with_recursion_fuel(Some(depth)).exec(); + let result = Query::new(&input).with_recursion_fuel(depth).exec(); assert!( matches!(result, Err(crate::Error::RecursionLimitExceeded)), @@ -69,7 +69,7 @@ fn many_trees_exhaust_exec_fuel() { input.push_str("(a) "); } - let result = Query::new(&input).with_exec_fuel(Some(100)).exec(); + let result = Query::new(&input).with_exec_fuel(100).exec(); assert!( matches!(result, Err(crate::Error::ExecFuelExhausted)), @@ -91,7 +91,7 @@ fn many_branches_exhaust_exec_fuel() { } input.push(']'); - let result = Query::new(&input).with_exec_fuel(Some(100)).exec(); + let result = Query::new(&input).with_exec_fuel(100).exec(); assert!( matches!(result, Err(crate::Error::ExecFuelExhausted)), @@ -113,7 +113,7 @@ fn many_fields_exhaust_exec_fuel() { } input.push(')'); - let result = Query::new(&input).with_exec_fuel(Some(100)).exec(); + let result = Query::new(&input).with_exec_fuel(100).exec(); assert!( matches!(result, Err(crate::Error::ExecFuelExhausted)), diff --git a/crates/plotnik-lib/src/query/alt_kinds.rs b/crates/plotnik-lib/src/query/alt_kinds.rs index 26968472..91d6a604 100644 --- a/crates/plotnik-lib/src/query/alt_kinds.rs +++ b/crates/plotnik-lib/src/query/alt_kinds.rs @@ -7,30 +7,35 @@ use rowan::TextRange; use super::Query; use super::invariants::ensure_both_branch_kinds; -use super::visitor::{Visitor, walk_alt_expr, walk_root}; +use super::visitor::{Visitor, walk, walk_alt_expr}; use crate::diagnostics::{DiagnosticKind, Diagnostics}; use crate::parser::{AltExpr, AltKind, Branch, Root}; impl Query<'_> { pub(super) fn validate_alt_kinds(&mut self) { let mut visitor = AltKindsValidator { - diagnostics: &mut self.alt_kind_diagnostics, + diag: &mut self.alt_kind_diagnostics, }; - visitor.visit_root(&self.ast); + visitor.visit(&self.ast); } } +pub fn validate_alt_kinds(ast: &Root, diag: &mut Diagnostics) { + let mut visitor = AltKindsValidator { diag }; + visitor.visit(ast); +} + struct AltKindsValidator<'a> { - diagnostics: &'a mut Diagnostics, + diag: &'a mut Diagnostics, } impl Visitor for AltKindsValidator<'_> { - fn visit_root(&mut self, root: &Root) { + fn visit(&mut self, root: &Root) { assert!( root.exprs().next().is_none(), "alt_kind: unexpected bare Expr in Root (parser should wrap in Def)" ); - walk_root(self, root); + walk(self, root); } fn visit_alt_expr(&mut self, alt: &AltExpr) { @@ -63,7 +68,7 @@ impl AltKindsValidator<'_> { let untagged_range = branch_range(untagged_branch); - self.diagnostics + self.diag .report(DiagnosticKind::MixedAltBranches, untagged_range) .related_to("tagged branch here", tagged_range) .emit(); diff --git a/crates/plotnik-lib/src/query/recursion.rs b/crates/plotnik-lib/src/query/dependencies.rs similarity index 60% rename from crates/plotnik-lib/src/query/recursion.rs rename to crates/plotnik-lib/src/query/dependencies.rs index fd52844f..a86059ff 100644 --- a/crates/plotnik-lib/src/query/recursion.rs +++ b/crates/plotnik-lib/src/query/dependencies.rs @@ -1,35 +1,98 @@ -//! Escape path analysis for recursive definitions. +//! Dependency analysis and recursion validation. //! -//! Detects patterns that can never match because they require -//! infinitely nested structures (recursion with no escape path), -//! or infinite runtime loops where the cursor never advances (left recursion). +//! This module computes the dependency graph of definitions, identifies +//! Strongly Connected Components (SCCs), and validates that recursive +//! definitions are well-formed (guarded and escapable). +//! +//! The computed SCCs are exposed in reverse topological order (leaves first), +//! which is useful for passes that need to process dependencies before +//! dependents (like type inference). use indexmap::{IndexMap, IndexSet}; use rowan::TextRange; -use super::Query; -use super::visitor::{Visitor, walk_expr}; +use crate::Diagnostics; use crate::diagnostics::DiagnosticKind; -use crate::parser::{AnonymousNode, Def, Expr, NamedNode, Ref, SeqExpr}; +use crate::parser::{AnonymousNode, Def, Expr, NamedNode, Ref, Root, SeqExpr}; +use crate::query::symbol_table::SymbolTable; +use crate::query::visitor::{Visitor, walk_expr}; + +/// Result of dependency analysis. +#[derive(Debug, Clone, Default)] +pub struct DependencyAnalysis<'q> { + /// Strongly connected components in reverse topological order. + /// + /// - `sccs[0]` has no dependencies (or depends only on things not in this list). + /// - `sccs.last()` depends on everything else. + /// - Definitions within an SCC are mutually recursive. + /// - Every definition in the symbol table appears exactly once. + pub sccs: Vec>, +} + +/// Analyze dependencies between definitions. +/// +/// Returns the SCCs in reverse topological order. +pub fn analyze_dependencies<'q>(symbol_table: &SymbolTable<'q>) -> DependencyAnalysis<'q> { + let sccs = SccFinder::find(symbol_table); + DependencyAnalysis { sccs } +} + +/// Validate recursion using the pre-computed dependency analysis. +pub fn validate_recursion<'q>( + analysis: &DependencyAnalysis<'q>, + ast: &Root, + symbol_table: &SymbolTable<'q>, + diag: &mut Diagnostics, +) { + let mut validator = RecursionValidator { + ast, + symbol_table, + diag, + }; + validator.validate(&analysis.sccs); +} -impl Query<'_> { - pub(super) fn validate_recursion(&mut self) { - let sccs = SccFinder::find(self); +// ----------------------------------------------------------------------------- +// Recursion Validator +// ----------------------------------------------------------------------------- +struct RecursionValidator<'a, 'q, 'd> { + ast: &'a Root, + symbol_table: &'a SymbolTable<'q>, + diag: &'d mut Diagnostics, +} + +impl<'a, 'q, 'd> RecursionValidator<'a, 'q, 'd> { + fn validate(&mut self, sccs: &[Vec<&'q str>]) { for scc in sccs { self.validate_scc(scc); } } - fn validate_scc(&mut self, scc: Vec) { - let scc_set: IndexSet<&str> = scc.iter().map(|s| s.as_str()).collect(); + fn validate_scc(&mut self, scc: &[&'q str]) { + // Filter out trivial non-recursive components. + // A component is recursive if it has >1 node, or 1 node that references itself. + if scc.len() == 1 { + let name = scc[0]; + let is_self_recursive = self + .symbol_table + .get(name) + .map(|body| collect_refs(body, self.symbol_table).contains(name)) + .unwrap_or(false); + + if !is_self_recursive { + return; + } + } + + let scc_set: IndexSet<&'q str> = scc.iter().copied().collect(); // 1. Check for infinite tree structure (Escape Analysis) // A valid recursive definition must have a non-recursive path. // If NO definition in the SCC has an escape path, the whole group is invalid. let has_escape = scc.iter().any(|name| { self.symbol_table - .get(name.as_str()) + .get(*name) .map(|body| expr_has_escape(body, &scc_set)) .unwrap_or(true) }); @@ -37,11 +100,11 @@ impl Query<'_> { if !has_escape { // Find a cycle to report. Any cycle within the SCC is an infinite recursion loop // because there are no escape paths. - if let Some(raw_chain) = self.find_cycle(&scc, &scc_set, |_, expr, target| { + if let Some(raw_chain) = self.find_cycle(scc, &scc_set, |_, expr, target| { find_ref_range(expr, target) }) { let chain = self.format_chain(raw_chain, false); - self.report_cycle(DiagnosticKind::RecursionNoEscape, &scc, chain); + self.report_cycle(DiagnosticKind::RecursionNoEscape, scc, chain); } return; } @@ -49,11 +112,11 @@ impl Query<'_> { // 2. Check for infinite loops (Guarded Recursion Analysis) // Even if there is an escape, every recursive cycle must consume input (be guarded). // We look for a cycle composed entirely of unguarded references. - if let Some(raw_chain) = self.find_cycle(&scc, &scc_set, |_, expr, target| { + if let Some(raw_chain) = self.find_cycle(scc, &scc_set, |_, expr, target| { find_unguarded_ref_range(expr, target) }) { let chain = self.format_chain(raw_chain, true); - self.report_cycle(DiagnosticKind::DirectRecursion, &scc, chain); + self.report_cycle(DiagnosticKind::DirectRecursion, scc, chain); } } @@ -61,21 +124,20 @@ impl Query<'_> { /// `get_edge_location` returns the location of a reference from `expr` to `target`. fn find_cycle( &self, - nodes: &[String], - domain: &IndexSet<&str>, - get_edge_location: impl Fn(&Query, &Expr, &str) -> Option, - ) -> Option> { + nodes: &[&'q str], + domain: &IndexSet<&'q str>, + get_edge_location: impl Fn(&Self, &Expr, &str) -> Option, + ) -> Option> { let mut adj = IndexMap::new(); for name in nodes { - if let Some(body) = self.symbol_table.get(name.as_str()) { + if let Some(body) = self.symbol_table.get(*name) { let neighbors = domain .iter() .filter_map(|target| { - get_edge_location(self, body, target) - .map(|range| (target.to_string(), range)) + get_edge_location(self, body, target).map(|range| (*target, range)) }) .collect::>(); - adj.insert(name.clone(), neighbors); + adj.insert(*name, neighbors); } } @@ -84,7 +146,7 @@ impl Query<'_> { fn format_chain( &self, - chain: Vec<(TextRange, String)>, + chain: Vec<(TextRange, &'q str)>, is_unguarded: bool, ) -> Vec<(TextRange, String)> { if chain.len() == 1 { @@ -115,7 +177,7 @@ impl Query<'_> { fn report_cycle( &mut self, kind: DiagnosticKind, - scc: &[String], + scc: &[&'q str], chain: Vec<(TextRange, String)>, ) { let primary_loc = chain @@ -129,7 +191,7 @@ impl Query<'_> { None }; - let mut builder = self.recursion_diagnostics.report(kind, primary_loc); + let mut builder = self.diag.report(kind, primary_loc); for (range, msg) in chain { builder = builder.related_to(msg, range); @@ -144,13 +206,13 @@ impl Query<'_> { fn find_def_info_containing( &self, - scc: &[String], + scc: &[&'q str], range: TextRange, ) -> Option<(String, TextRange)> { scc.iter() .find(|name| { self.symbol_table - .get(name.as_str()) + .get(*name) .map(|body| body.text_range().contains_range(range)) .unwrap_or(false) }) @@ -169,19 +231,98 @@ impl Query<'_> { } } -struct CycleFinder<'a> { - adj: &'a IndexMap>, - visited: IndexSet, - on_path: IndexMap, - path: Vec, +// ----------------------------------------------------------------------------- +// SCC Finder (Tarjan's Algorithm) +// ----------------------------------------------------------------------------- + +struct SccFinder<'a, 'q> { + symbol_table: &'a SymbolTable<'q>, + index: usize, + stack: Vec<&'q str>, + on_stack: IndexSet<&'q str>, + indices: IndexMap<&'q str, usize>, + lowlinks: IndexMap<&'q str, usize>, + sccs: Vec>, +} + +impl<'a, 'q> SccFinder<'a, 'q> { + fn find(symbol_table: &'a SymbolTable<'q>) -> Vec> { + let mut finder = Self { + symbol_table, + index: 0, + stack: Vec::new(), + on_stack: IndexSet::new(), + indices: IndexMap::new(), + lowlinks: IndexMap::new(), + sccs: Vec::new(), + }; + + for &name in symbol_table.keys() { + if !finder.indices.contains_key(name) { + finder.strongconnect(name); + } + } + + finder.sccs + } + + fn strongconnect(&mut self, name: &'q str) { + self.indices.insert(name, self.index); + self.lowlinks.insert(name, self.index); + self.index += 1; + self.stack.push(name); + self.on_stack.insert(name); + + if let Some(body) = self.symbol_table.get(name) { + let refs = collect_refs(body, self.symbol_table); + for ref_name in refs { + // We've already resolved to canonical &'q str in collect_refs + // so we can use it directly. + if !self.indices.contains_key(ref_name) { + self.strongconnect(ref_name); + let ref_lowlink = self.lowlinks[ref_name]; + let my_lowlink = self.lowlinks.get_mut(name).unwrap(); + *my_lowlink = (*my_lowlink).min(ref_lowlink); + } else if self.on_stack.contains(ref_name) { + let ref_index = self.indices[ref_name]; + let my_lowlink = self.lowlinks.get_mut(name).unwrap(); + *my_lowlink = (*my_lowlink).min(ref_index); + } + } + } + + if self.lowlinks[name] == self.indices[name] { + let mut scc = Vec::new(); + loop { + let w = self.stack.pop().unwrap(); + self.on_stack.swap_remove(w); + scc.push(w); + if w == name { + break; + } + } + self.sccs.push(scc); + } + } +} + +// ----------------------------------------------------------------------------- +// Cycle Finder +// ----------------------------------------------------------------------------- + +struct CycleFinder<'a, 'q> { + adj: &'a IndexMap<&'q str, Vec<(&'q str, TextRange)>>, + visited: IndexSet<&'q str>, + on_path: IndexMap<&'q str, usize>, + path: Vec<&'q str>, edges: Vec, } -impl<'a> CycleFinder<'a> { +impl<'a, 'q> CycleFinder<'a, 'q> { fn find( - nodes: &[String], - adj: &'a IndexMap>, - ) -> Option> { + nodes: &[&'q str], + adj: &'a IndexMap<&'q str, Vec<(&'q str, TextRange)>>, + ) -> Option> { let mut finder = Self { adj, visited: IndexSet::new(), @@ -198,7 +339,7 @@ impl<'a> CycleFinder<'a> { None } - fn dfs(&mut self, current: &String) -> Option> { + fn dfs(&mut self, current: &'q str) -> Option> { if self.on_path.contains_key(current) { return None; } @@ -207,9 +348,9 @@ impl<'a> CycleFinder<'a> { return None; } - self.visited.insert(current.clone()); - self.on_path.insert(current.clone(), self.path.len()); - self.path.push(current.clone()); + self.visited.insert(current); + self.on_path.insert(current, self.path.len()); + self.path.push(current); if let Some(neighbors) = self.adj.get(current) { for (target, range) in neighbors { @@ -217,9 +358,9 @@ impl<'a> CycleFinder<'a> { // Cycle detected! let mut chain = Vec::new(); for i in start_index..self.path.len() - 1 { - chain.push((self.edges[i], self.path[i + 1].clone())); + chain.push((self.edges[i], self.path[i + 1])); } - chain.push((*range, target.clone())); + chain.push((*range, *target)); return Some(chain); } @@ -237,89 +378,9 @@ impl<'a> CycleFinder<'a> { } } -struct SccFinder<'a, 'src> { - query: &'a Query<'src>, - index: usize, - stack: Vec, - on_stack: IndexSet, - indices: IndexMap, - lowlinks: IndexMap, - sccs: Vec>, -} - -impl<'a, 'src> SccFinder<'a, 'src> { - fn find(query: &'a Query<'src>) -> Vec> { - let mut finder = Self { - query, - index: 0, - stack: Vec::new(), - on_stack: IndexSet::new(), - indices: IndexMap::new(), - lowlinks: IndexMap::new(), - sccs: Vec::new(), - }; - - for name in query.symbol_table.keys() { - if !finder.indices.contains_key(*name) { - finder.strongconnect(name); - } - } - - finder - .sccs - .into_iter() - .filter(|scc| { - scc.len() > 1 - || query - .symbol_table - .get(scc[0].as_str()) - .map(|body| collect_refs(body).contains(scc[0].as_str())) - .unwrap_or(false) - }) - .collect() - } - - fn strongconnect(&mut self, name: &str) { - self.indices.insert(name.to_string(), self.index); - self.lowlinks.insert(name.to_string(), self.index); - self.index += 1; - self.stack.push(name.to_string()); - self.on_stack.insert(name.to_string()); - - if let Some(body) = self.query.symbol_table.get(name) { - let refs = collect_refs(body); - for ref_name in refs { - if !self.query.symbol_table.contains_key(ref_name.as_str()) { - continue; - } - - if !self.indices.contains_key(&ref_name) { - self.strongconnect(&ref_name); - let ref_lowlink = self.lowlinks[&ref_name]; - let my_lowlink = self.lowlinks.get_mut(name).unwrap(); - *my_lowlink = (*my_lowlink).min(ref_lowlink); - } else if self.on_stack.contains(&ref_name) { - let ref_index = self.indices[&ref_name]; - let my_lowlink = self.lowlinks.get_mut(name).unwrap(); - *my_lowlink = (*my_lowlink).min(ref_index); - } - } - } - - if self.lowlinks[name] == self.indices[name] { - let mut scc = Vec::new(); - loop { - let w = self.stack.pop().unwrap(); - self.on_stack.swap_remove(&w); - scc.push(w.clone()); - if w == name { - break; - } - } - self.sccs.push(scc); - } - } -} +// ----------------------------------------------------------------------------- +// Helper Visitors +// ----------------------------------------------------------------------------- fn expr_has_escape(expr: &Expr, scc: &IndexSet<&str>) -> bool { match expr { @@ -368,21 +429,28 @@ fn expr_guarantees_consumption(expr: &Expr) -> bool { } } -struct RefCollector<'a> { - refs: &'a mut IndexSet, +struct RefCollector<'a, 'q> { + symbol_table: &'a SymbolTable<'q>, + refs: &'a mut IndexSet<&'q str>, } -impl Visitor for RefCollector<'_> { +impl<'a, 'q> Visitor for RefCollector<'a, 'q> { fn visit_ref(&mut self, r: &Ref) { if let Some(name) = r.name() { - self.refs.insert(name.text().to_string()); + // We immediately resolve to canonical &'q str keys to avoid allocations + if let Some((&k, _)) = self.symbol_table.get_key_value(name.text()) { + self.refs.insert(k); + } } } } -fn collect_refs(expr: &Expr) -> IndexSet { +fn collect_refs<'q>(expr: &Expr, symbol_table: &SymbolTable<'q>) -> IndexSet<&'q str> { let mut refs = IndexSet::new(); - let mut visitor = RefCollector { refs: &mut refs }; + let mut visitor = RefCollector { + symbol_table, + refs: &mut refs, + }; visitor.visit_expr(expr); refs } diff --git a/crates/plotnik-lib/src/query/recursion_tests.rs b/crates/plotnik-lib/src/query/dependencies_tests.rs similarity index 100% rename from crates/plotnik-lib/src/query/recursion_tests.rs rename to crates/plotnik-lib/src/query/dependencies_tests.rs diff --git a/crates/plotnik-lib/src/query/expr_arity.rs b/crates/plotnik-lib/src/query/expr_arity.rs index af2a3dd1..2157f2cb 100644 --- a/crates/plotnik-lib/src/query/expr_arity.rs +++ b/crates/plotnik-lib/src/query/expr_arity.rs @@ -24,10 +24,10 @@ impl Query<'_> { let root = self.ast.clone(); let mut computer = ArityComputer { query: self }; - computer.visit_root(&root); + computer.visit(&root); let mut validator = ArityValidator { query: self }; - validator.visit_root(&root); + validator.visit(&root); } pub(super) fn get_arity(&self, node: &SyntaxNode) -> Option { diff --git a/crates/plotnik-lib/src/query/graph_master_test.rs b/crates/plotnik-lib/src/query/graph_master_test.rs index 7cce57e8..d21166ad 100644 --- a/crates/plotnik-lib/src/query/graph_master_test.rs +++ b/crates/plotnik-lib/src/query/graph_master_test.rs @@ -511,109 +511,109 @@ fn golden_master_comprehensive() { TYPE INFERENCE ═══════════════════════════════════════════════════════════════════════════════ - Identifier = Node - RefSimple = () - WildcardCapture = Node - UntaggedSymmetric = Node - UntaggedCaptured = UntaggedCapturedScope3 - TaggedCaptured = TaggedCapturedScope13 - StringLiteral = Node + SimpleCapture = Node StringCapture = str + AnchorFirst = Node + AnchorLast = Node + DeepNest = Node StarQuant = [Node] - SimpleCapture = Node - RefChain = () - RefCaptured = Node - QisSequence = T16 - QisNode = T18 PlusQuant = [Node]⁺ OptQuant = Node? + QisNode = T09 + QisSequence = T11 NoQis = [Node] - NoCaptures = () - NestedScopes = NestedScopesScope24 - NestedQuant = T27 - DeepNest = Node + TaggedCaptured = TaggedCapturedScope14 + UntaggedSymmetric = Node + UntaggedCaptured = UntaggedCapturedScope20 + CapturedSeq = CapturedSeqScope23 + NestedScopes = NestedScopesScope27 + Identifier = Node + RefSimple = () + RefCaptured = Node + RefChain = () CardinalityJoin = [Node]⁺ - CapturedSeq = CapturedSeqScope42 - AnchorLast = Node - AnchorFirst = Node + NestedQuant = T31 + WildcardCapture = Node + StringLiteral = Node + NoCaptures = () - UntaggedCapturedScope3 = { - x: Node? - y: Node? + MultiCapture = { + fn_name: str + fn_body: Node } - UntaggedAsymmetric = { - x: Node? - y: Node? + AnchorSibling = { + left: Node + right: Node } - UncapturedSeq = { - x: Node - y: Node + QisNodeScope8 = { + name: Node + body: Node } + T09 = [QisNodeScope8] + QisSequenceScope10 = { + key: Node + value: Node + } + T11 = [QisSequenceScope10] TaggedRoot = { Ok => Node Err => str } - TaggedMultiScope11 = { + TaggedCapturedScope14 = { + Left => Node + Right => Node + } + TaggedMultiScope15 = { k: Node v: Node } TaggedMulti = { Simple => Node - Complex => TaggedMultiScope11 + Complex => TaggedMultiScope15 } - TaggedCapturedScope13 = { - Left => Node - Right => Node - } - QisSequenceScope15 = { - key: Node - value: Node + UntaggedAsymmetric = { + x: Node? + y: Node? } - T16 = [QisSequenceScope15] - QisNodeScope17 = { - name: Node - body: Node + UntaggedCapturedScope20 = { + x: Node? + y: Node? } - T18 = [QisNodeScope17] - NestedScopesScope22 = { a: Node } - NestedScopesScope23 = { b: Node } - NestedScopesScope24 = { - inner1: NestedScopesScope22 - inner2: NestedScopesScope23 + CapturedSeqScope23 = { + x: Node + y: Node } - NestedQuantScope25 = { inner: [Node] } - T27 = [NestedQuantScope25]⁺ - MultiCapture = { - fn_name: str - fn_body: Node + UncapturedSeq = { + x: Node + y: Node } - EmptyBranch = { - Some => Node - None => () + NestedScopesScope25 = { a: Node } + NestedScopesScope26 = { b: Node } + NestedScopesScope27 = { + inner1: NestedScopesScope25 + inner2: NestedScopesScope26 } - ComplexScope30 = { p: Node } - T31 = [ComplexScope30] - T33 = T31? - ComplexScope32 = { + NestedQuantScope29 = { inner: [Node] } + T31 = [NestedQuantScope29]⁺ + ComplexScope32 = { p: Node } + T33 = [ComplexScope32] + T35 = T33? + ComplexScope34 = { fn_name: str? - params: T33 + params: T35 fn_body: Node? cls_name: str? cls_body: Node? } - T38 = [ComplexScope32] + T40 = [ComplexScope34] Complex = { mod_name: str imports: [Node] - items: T38 + items: T40 } - CapturedSeqScope42 = { - x: Node - y: Node - } - AnchorSibling = { - left: Node - right: Node + EmptyBranch = { + Some => Node + None => () } "#); } @@ -714,18 +714,18 @@ fn golden_navigation_patterns() { TYPE INFERENCE ═══════════════════════════════════════════════════════════════════════════════ - NavUpMulti = Node - NavUpAnchor = Node - NavUp = Node NavStay = Node - NavDownAnchor = Node NavDown = Node + NavDownAnchor = Node + NavUp = Node + NavUpAnchor = Node + NavUpMulti = Node - NavNextAnchor = { + NavNext = { a: Node b: Node } - NavNext = { + NavNextAnchor = { a: Node b: Node } @@ -864,24 +864,23 @@ fn golden_type_inference() { TYPE INFERENCE ═══════════════════════════════════════════════════════════════════════════════ + FlatScope = Node BaseWithCapture = Node - SyntheticNames = SyntheticNamesScope7 RefOpaque = () RefCaptured = Node + CardMult = [Node] QisTwo = T09 NoQisOne = [Node] - FlatScope = Node - CardMult = [Node] + SyntheticNames = SyntheticNamesScope13 - TaggedInline = { - x: Node? - y: Node? - } TaggedAtRoot = { A => Node B => Node } - SyntheticNamesScope7 = { bar: Node } + TaggedInline = { + x: Node? + y: Node? + } QisTwoScope8 = { x: Node y: Node @@ -896,6 +895,7 @@ fn golden_type_inference() { Full => MissingFieldScope11 Partial => Node } + SyntheticNamesScope13 = { bar: Node } "); } @@ -964,21 +964,21 @@ fn golden_effect_patterns() { TYPE INFERENCE ═══════════════════════════════════════════════════════════════════════════════ - EffVariant = EffVariantScope3 + EffCapture = Node EffToString = str + EffArray = [Node] EffObject = EffObjectScope4 + EffVariant = EffVariantScope5 EffClear = Node? - EffCapture = Node - EffArray = [Node] - EffVariantScope3 = { - A => Node - B => Node - } EffObjectScope4 = { x: Node y: Node } + EffVariantScope5 = { + A => Node + B => Node + } "); } @@ -1074,20 +1074,20 @@ fn golden_quantifier_graphs() { TYPE INFERENCE ═══════════════════════════════════════════════════════════════════════════════ - QuantSeq = T04 + GreedyStar = [Node] + GreedyPlus = [Node]⁺ Optional = Node? - NestedQuant = T08 LazyStar = [Node] LazyPlus = [Node]⁺ - GreedyStar = [Node] - GreedyPlus = [Node]⁺ + QuantSeq = T09 + NestedQuant = T12 - QuantSeqScope3 = { + QuantSeqScope8 = { x: Node y: Node } - T04 = [QuantSeqScope3] - NestedQuantScope6 = { inners: [Node] } - T08 = [NestedQuantScope6]⁺ + T09 = [QuantSeqScope8] + NestedQuantScope10 = { inners: [Node] } + T12 = [NestedQuantScope10]⁺ "); } diff --git a/crates/plotnik-lib/src/query/infer.rs b/crates/plotnik-lib/src/query/infer.rs index df0882e8..d802e4ae 100644 --- a/crates/plotnik-lib/src/query/infer.rs +++ b/crates/plotnik-lib/src/query/infer.rs @@ -785,18 +785,21 @@ impl<'a> Query<'a> { pub(super) fn infer_types(&mut self) { // Collect QIS triggers upfront to avoid borrowing issues let qis_triggers: HashSet<_> = self.qis_triggers.keys().cloned().collect(); - let sorted = self.topological_sort_definitions_ast(); let mut ctx = InferenceContext::new(self.source, qis_triggers); // Process definitions in dependency order - for (name, body) in &sorted { - let type_id = ctx.infer_definition(name, body); - ctx.definition_types.insert(name, type_id); + for scc in &self.dependency_analysis.sccs { + for name in scc { + if let Some(body) = self.symbol_table.get(name) { + let type_id = ctx.infer_definition(name, body); + ctx.definition_types.insert(name, type_id); + } + } } // Preserve symbol table order for entrypoints - for (name, _) in &sorted { + for (name, _) in self.symbol_table.iter() { if let Some(&type_id) = ctx.definition_types.get(name) { self.type_info.entrypoint_types.insert(*name, type_id); } @@ -805,105 +808,4 @@ impl<'a> Query<'a> { self.type_info.diagnostics = ctx.diagnostics; self.type_info.errors = ctx.errors; } - - /// Topologically sort definitions for processing order. - fn topological_sort_definitions_ast(&self) -> Vec<(&'a str, ast::Expr)> { - use std::collections::{HashSet, VecDeque}; - - let definitions: Vec<_> = self - .symbol_table - .iter() - .map(|(&name, body)| (name, body.clone())) - .collect(); - let def_names: HashSet<&str> = definitions.iter().map(|(name, _)| *name).collect(); - - // Build dependency graph from AST references - let mut deps: HashMap<&str, Vec<&str>> = HashMap::new(); - for (name, body) in &definitions { - let refs = Self::collect_ast_references(body, &def_names); - deps.insert(name, refs); - } - - // Kahn's algorithm - let mut in_degree: HashMap<&str, usize> = HashMap::new(); - for (name, _) in &definitions { - in_degree.insert(name, 0); - } - for refs in deps.values() { - for &dep in refs { - *in_degree.entry(dep).or_insert(0) += 1; - } - } - - let mut zero_degree: Vec<&str> = in_degree - .iter() - .filter(|(_, deg)| **deg == 0) - .map(|(&name, _)| name) - .collect(); - zero_degree.sort(); - let mut queue: VecDeque<&str> = zero_degree.into_iter().collect(); - - let mut sorted_names = Vec::new(); - while let Some(name) = queue.pop_front() { - sorted_names.push(name); - if let Some(refs) = deps.get(name) { - for &dep in refs { - if let Some(deg) = in_degree.get_mut(dep) { - *deg = deg.saturating_sub(1); - if *deg == 0 { - queue.push_back(dep); - } - } - } - } - } - - // Reverse so dependencies come first - sorted_names.reverse(); - - // Add any remaining (cyclic) definitions - for (name, _) in &definitions { - if !sorted_names.contains(name) { - sorted_names.push(name); - } - } - - // Build result with bodies - sorted_names - .into_iter() - .filter_map(|name| self.symbol_table.get(name).map(|body| (name, body.clone()))) - .collect() - } - - /// Collect references from an AST expression. - fn collect_ast_references<'b>(expr: &Expr, def_names: &HashSet<&'b str>) -> Vec<&'b str> { - let mut refs = Vec::new(); - Self::collect_ast_references_impl(expr, def_names, &mut refs); - refs - } - - fn collect_ast_references_impl<'b>( - expr: &Expr, - def_names: &HashSet<&'b str>, - refs: &mut Vec<&'b str>, - ) { - match expr { - Expr::Ref(r) => { - if let Some(name_token) = r.name() { - let name = name_token.text(); - if def_names.contains(name) && !refs.contains(&name) { - // Find the actual &'b str from the set - if let Some(&found) = def_names.iter().find(|&&n| n == name) { - refs.push(found); - } - } - } - } - _ => { - for child in expr.children() { - Self::collect_ast_references_impl(&child, def_names, refs); - } - } - } - } } diff --git a/crates/plotnik-lib/src/query/link.rs b/crates/plotnik-lib/src/query/link.rs index 105be07d..ff07c68c 100644 --- a/crates/plotnik-lib/src/query/link.rs +++ b/crates/plotnik-lib/src/query/link.rs @@ -16,7 +16,7 @@ use crate::parser::token_src; use super::Query; use super::utils::find_similar; -use super::visitor::{Visitor, walk_root}; +use super::visitor::{Visitor, walk}; /// Check if `child` is a subtype of `supertype`, recursively handling nested supertypes. #[allow(dead_code)] @@ -128,7 +128,7 @@ impl<'a> Query<'a> { fn resolve_node_types(&mut self, lang: &Lang) { let root = self.ast.clone(); let mut collector = NodeTypeCollector { query: self, lang }; - collector.visit_root(&root); + collector.visit(&root); } fn resolve_named_node(&mut self, node: &NamedNode, lang: &Lang) { @@ -171,7 +171,7 @@ impl<'a> Query<'a> { fn resolve_fields(&mut self, lang: &Lang) { let root = self.ast.clone(); let mut collector = FieldCollector { query: self, lang }; - collector.visit_root(&root); + collector.visit(&root); } fn resolve_field_by_token(&mut self, name_token: Option, lang: &Lang) { @@ -679,8 +679,8 @@ struct NodeTypeCollector<'a, 'q> { } impl Visitor for NodeTypeCollector<'_, '_> { - fn visit_root(&mut self, root: &ast::Root) { - walk_root(self, root); + fn visit(&mut self, root: &ast::Root) { + walk(self, root); } fn visit_named_node(&mut self, node: &ast::NamedNode) { @@ -721,8 +721,8 @@ struct FieldCollector<'a, 'q> { } impl Visitor for FieldCollector<'_, '_> { - fn visit_root(&mut self, root: &ast::Root) { - walk_root(self, root); + fn visit(&mut self, root: &ast::Root) { + walk(self, root); } fn visit_named_node(&mut self, node: &ast::NamedNode) { diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 2a653b9a..6d066604 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -15,6 +15,7 @@ mod utils; pub use printer::QueryPrinter; pub mod alt_kinds; +mod dependencies; pub mod expr_arity; pub mod graph; mod graph_build; @@ -24,7 +25,8 @@ pub mod infer; mod infer_dump; #[cfg(feature = "plotnik-langs")] pub mod link; -pub mod recursion; +#[allow(clippy::module_inception)] +pub mod query; pub mod symbol_table; pub mod visitor; @@ -38,6 +40,8 @@ pub use symbol_table::UNNAMED_DEF; #[cfg(test)] mod alt_kinds_tests; #[cfg(test)] +mod dependencies_tests; +#[cfg(test)] mod expr_arity_tests; #[cfg(test)] mod graph_build_tests; @@ -54,8 +58,6 @@ mod mod_tests; #[cfg(test)] mod printer_tests; #[cfg(test)] -mod recursion_tests; -#[cfg(test)] mod symbol_table_tests; use std::collections::{HashMap, HashSet}; @@ -70,6 +72,7 @@ use crate::diagnostics::Diagnostics; use crate::parser::cst::SyntaxKind; use crate::parser::lexer::lex; use crate::parser::{ParseResult, Parser, Root, SyntaxNode, ast}; +use crate::query::dependencies::DependencyAnalysis; const DEFAULT_EXEC_FUEL: u32 = 1_000_000; const DEFAULT_RECURSION_FUEL: u32 = 4096; @@ -106,8 +109,8 @@ pub struct Query<'q> { node_type_ids: HashMap<&'q str, Option>, #[cfg(feature = "plotnik-langs")] node_field_ids: HashMap<&'q str, Option>, - exec_fuel: Option, - recursion_fuel: Option, + exec_fuel: u32, + recursion_fuel: u32, exec_fuel_consumed: u32, parse_diagnostics: Diagnostics, alt_kind_diagnostics: Diagnostics, @@ -116,6 +119,7 @@ pub struct Query<'q> { expr_arity_diagnostics: Diagnostics, #[cfg(feature = "plotnik-langs")] link_diagnostics: Diagnostics, + dependency_analysis: DependencyAnalysis<'q>, // Graph compilation fields graph: BuildGraph<'q>, dead_nodes: HashSet, @@ -154,14 +158,15 @@ impl<'a> Query<'a> { node_type_ids: HashMap::new(), #[cfg(feature = "plotnik-langs")] node_field_ids: HashMap::new(), - exec_fuel: Some(DEFAULT_EXEC_FUEL), - recursion_fuel: Some(DEFAULT_RECURSION_FUEL), + exec_fuel: DEFAULT_EXEC_FUEL, + recursion_fuel: DEFAULT_RECURSION_FUEL, exec_fuel_consumed: 0, parse_diagnostics: Diagnostics::new(), alt_kind_diagnostics: Diagnostics::new(), resolve_diagnostics: Diagnostics::new(), recursion_diagnostics: Diagnostics::new(), expr_arity_diagnostics: Diagnostics::new(), + dependency_analysis: DependencyAnalysis::default(), #[cfg(feature = "plotnik-langs")] link_diagnostics: Diagnostics::new(), graph: BuildGraph::default(), @@ -179,7 +184,7 @@ impl<'a> Query<'a> { /// /// Execution fuel never replenishes. It protects against large inputs. /// Returns error from [`exec`](Self::exec) when exhausted. - pub fn with_exec_fuel(mut self, limit: Option) -> Self { + pub fn with_exec_fuel(mut self, limit: u32) -> Self { self.exec_fuel = limit; self } @@ -188,7 +193,7 @@ impl<'a> Query<'a> { /// /// Recursion fuel restores when exiting recursion. It protects against /// deeply nested input. Returns error from [`exec`](Self::exec) when exhausted. - pub fn with_recursion_fuel(mut self, limit: Option) -> Self { + pub fn with_recursion_fuel(mut self, limit: u32) -> Self { self.recursion_fuel = limit; self } @@ -201,7 +206,16 @@ impl<'a> Query<'a> { self.try_parse()?; self.validate_alt_kinds(); self.resolve_names(); - self.validate_recursion(); + // self.validate_recursion(); + + self.dependency_analysis = dependencies::analyze_dependencies(&self.symbol_table); + dependencies::validate_recursion( + &self.dependency_analysis, + &self.ast, + &self.symbol_table, + &mut self.recursion_diagnostics, + ); + self.infer_arities(); Ok(self) } @@ -243,17 +257,15 @@ impl<'a> Query<'a> { fn try_parse(&mut self) -> Result<()> { let tokens = lex(self.source); - let parser = Parser::new(self.source, tokens) - .with_exec_fuel(self.exec_fuel) - .with_recursion_fuel(self.recursion_fuel); + let parser = Parser::new(self.source, tokens, self.exec_fuel, self.recursion_fuel); let ParseResult { - root, - diagnostics, - exec_fuel_consumed, + ast, + diag, + fuel_consumed: exec_fuel_consumed, } = parser.parse()?; - self.ast = root; - self.parse_diagnostics = diagnostics; + self.ast = ast; + self.parse_diagnostics = diag; self.exec_fuel_consumed = exec_fuel_consumed; Ok(()) } diff --git a/crates/plotnik-lib/src/query/query.rs b/crates/plotnik-lib/src/query/query.rs new file mode 100644 index 00000000..14742288 --- /dev/null +++ b/crates/plotnik-lib/src/query/query.rs @@ -0,0 +1,97 @@ +#![allow(unused)] +use crate::parser::{ParseResult, Parser, lexer::lex}; +use crate::query::alt_kinds::validate_alt_kinds; +use crate::query::dependencies; +use crate::query::symbol_table::{SymbolTable, resolve_names}; +use crate::{Diagnostics, parser::Root}; + +const DEFAULT_QUERY_PARSE_FUEL: u32 = 1_000_000; +const DEFAULT_QUERY_PARSE_MAX_DEPTH: u32 = 4096; + +pub struct QueryConfig { + pub query_parse_fuel: u32, + pub query_parse_max_depth: u32, +} + +pub struct QueryBuilder<'q> { + pub src: &'q str, + config: QueryConfig, +} + +impl<'q> QueryBuilder<'q> { + pub fn new(src: &'q str) -> Self { + let config = QueryConfig { + query_parse_fuel: DEFAULT_QUERY_PARSE_FUEL, + query_parse_max_depth: DEFAULT_QUERY_PARSE_MAX_DEPTH, + }; + + Self { src, config } + } + + pub fn with_query_parse_fuel(mut self, fuel: u32) -> Self { + self.config.query_parse_fuel = fuel; + self + } + + pub fn with_query_parse_recursion_limit(mut self, limit: u32) -> Self { + self.config.query_parse_max_depth = limit; + self + } + + pub fn parse(self) -> crate::Result> { + let src = self.src; + let tokens = lex(src); + let parser = Parser::new( + self.src, + tokens, + self.config.query_parse_fuel, + self.config.query_parse_max_depth, + ); + + let ParseResult { + ast, + mut diag, + fuel_consumed, + } = parser.parse()?; + + validate_alt_kinds(&ast, &mut diag); + + Ok(QueryParsed { + src, + diag, + ast, + fuel_consumed, + }) + } +} + +pub struct QueryParsed<'q> { + src: &'q str, + diag: Diagnostics, + ast: Root, + pub fuel_consumed: u32, +} + +impl<'q> QueryParsed<'q> { + pub fn analyze(mut self) -> QueryAnalyzed<'q> { + let symbol_table = resolve_names(&self.ast, self.src, &mut self.diag); + + let dependency_analysis = dependencies::analyze_dependencies(&symbol_table); + dependencies::validate_recursion( + &dependency_analysis, + &self.ast, + &symbol_table, + &mut self.diag, + ); + + QueryAnalyzed { + query_parsed: self, + symbol_table, + } + } +} + +pub struct QueryAnalyzed<'q> { + query_parsed: QueryParsed<'q>, + symbol_table: SymbolTable<'q>, +} diff --git a/crates/plotnik-lib/src/query/symbol_table.rs b/crates/plotnik-lib/src/query/symbol_table.rs index 88f85b93..45081f66 100644 --- a/crates/plotnik-lib/src/query/symbol_table.rs +++ b/crates/plotnik-lib/src/query/symbol_table.rs @@ -10,72 +10,90 @@ use indexmap::IndexMap; /// Code generators can emit whatever name they want for this. pub const UNNAMED_DEF: &str = "_"; +use crate::Diagnostics; use crate::diagnostics::DiagnosticKind; -use crate::parser::{ast, token_src}; +use crate::parser::{Root, ast, token_src}; use super::Query; -use super::visitor::{Visitor, walk_root}; +use super::visitor::Visitor; pub type SymbolTable<'src> = IndexMap<&'src str, ast::Expr>; impl<'a> Query<'a> { pub(super) fn resolve_names(&mut self) { - // Pass 1: collect definitions - for def in self.ast.defs() { - let Some(body) = def.body() else { continue }; - - if let Some(token) = def.name() { - // Named definition: `Name = ...` - let name = token_src(&token, self.source); - if self.symbol_table.contains_key(name) { - self.resolve_diagnostics - .report(DiagnosticKind::DuplicateDefinition, token.text_range()) - .message(name) - .emit(); - } else { - self.symbol_table.insert(name, body); - } + self.symbol_table = resolve_names(&self.ast, self.source, &mut self.resolve_diagnostics); + } +} + +pub fn resolve_names<'q>(ast: &Root, src: &'q str, diag: &mut Diagnostics) -> SymbolTable<'q> { + let symbol_table = SymbolTable::default(); + let ctx = Context { + src, + diag, + symbol_table, + }; + + let mut resolver = ReferenceResolver { ctx }; + resolver.visit(ast); + let ctx = resolver.ctx; + + let mut validator = ReferenceValidator { ctx }; + validator.visit(ast); + validator.ctx.symbol_table +} + +struct Context<'q, 'd> { + src: &'q str, + diag: &'d mut Diagnostics, + symbol_table: SymbolTable<'q>, +} + +struct ReferenceResolver<'q, 'd> { + pub ctx: Context<'q, 'd>, +} + +impl Visitor for ReferenceResolver<'_, '_> { + fn visit_def(&mut self, def: &ast::Def) { + let Some(body) = def.body() else { return }; + + if let Some(token) = def.name() { + // Named definition: `Name = ...` + let name = token_src(&token, self.ctx.src); + if self.ctx.symbol_table.contains_key(name) { + self.ctx + .diag + .report(DiagnosticKind::DuplicateDefinition, token.text_range()) + .message(name) + .emit(); } else { - // Unnamed definition: `...` (root expression) - // Parser already validates multiple unnamed defs; we keep the last one. - if self.symbol_table.contains_key(UNNAMED_DEF) { - self.symbol_table.shift_remove(UNNAMED_DEF); - } - self.symbol_table.insert(UNNAMED_DEF, body); + self.ctx.symbol_table.insert(name, body); } + } else { + // Unnamed definition: `...` (root expression) + // Parser already validates multiple unnamed defs; we keep the last one. + if self.ctx.symbol_table.contains_key(UNNAMED_DEF) { + self.ctx.symbol_table.shift_remove(UNNAMED_DEF); + } + self.ctx.symbol_table.insert(UNNAMED_DEF, body); } - - // Pass 2: check references - let root = self.ast.clone(); - let mut validator = ReferenceValidator { query: self }; - validator.visit_root(&root); } } -struct ReferenceValidator<'a, 'q> { - query: &'a mut Query<'q>, +struct ReferenceValidator<'q, 'd> { + pub ctx: Context<'q, 'd>, } impl Visitor for ReferenceValidator<'_, '_> { - fn visit_root(&mut self, root: &ast::Root) { - // Parser wraps all top-level exprs in Def nodes, so this should be empty - assert!( - root.exprs().next().is_none(), - "symbol_table: unexpected bare Expr in Root (parser should wrap in Def)" - ); - walk_root(self, root); - } - fn visit_ref(&mut self, r: &ast::Ref) { let Some(name_token) = r.name() else { return }; let name = name_token.text(); - if self.query.symbol_table.contains_key(name) { + if self.ctx.symbol_table.contains_key(name) { return; } - self.query - .resolve_diagnostics + self.ctx + .diag .report(DiagnosticKind::UndefinedReference, name_token.text_range()) .message(name) .emit(); diff --git a/crates/plotnik-lib/src/query/visitor.rs b/crates/plotnik-lib/src/query/visitor.rs index 927befeb..7022ee62 100644 --- a/crates/plotnik-lib/src/query/visitor.rs +++ b/crates/plotnik-lib/src/query/visitor.rs @@ -21,8 +21,8 @@ use crate::parser::ast::{ }; pub trait Visitor: Sized { - fn visit_root(&mut self, root: &Root) { - walk_root(self, root); + fn visit(&mut self, ast: &Root) { + walk(self, ast); } fn visit_def(&mut self, def: &Def) { @@ -37,13 +37,9 @@ pub trait Visitor: Sized { walk_named_node(self, node); } - fn visit_anonymous_node(&mut self, _node: &AnonymousNode) { - // Leaf node - } + fn visit_anonymous_node(&mut self, _node: &AnonymousNode) {} - fn visit_ref(&mut self, _ref: &Ref) { - // Leaf node in AST structure (semantic traversal happens via SymbolTable lookup) - } + fn visit_ref(&mut self, _ref: &Ref) {} fn visit_alt_expr(&mut self, alt: &AltExpr) { walk_alt_expr(self, alt); @@ -66,8 +62,8 @@ pub trait Visitor: Sized { } } -pub fn walk_root(visitor: &mut V, root: &Root) { - for def in root.defs() { +pub fn walk(visitor: &mut V, ast: &Root) { + for def in ast.defs() { visitor.visit_def(&def); } } @@ -92,7 +88,6 @@ pub fn walk_expr(visitor: &mut V, expr: &Expr) { } pub fn walk_named_node(visitor: &mut V, node: &NamedNode) { - // We iterate specific children to avoid Expr::children() Vec allocation for child in node.children() { visitor.visit_expr(&child); } @@ -104,7 +99,7 @@ pub fn walk_alt_expr(visitor: &mut V, alt: &AltExpr) { visitor.visit_expr(&body); } } - // Also visit bare exprs in untagged/mixed alts if any exist unwrapped + for expr in alt.exprs() { visitor.visit_expr(&expr); } From a03cd58e47cdc63a53aa4c2c740e7667e41e9a4d Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Tue, 16 Dec 2025 22:36:02 -0300 Subject: [PATCH 02/11] Refactor arities --- crates/plotnik-lib/src/query/expr_arity.rs | 152 ++++++++++++--------- crates/plotnik-lib/src/query/query.rs | 9 +- 2 files changed, 99 insertions(+), 62 deletions(-) diff --git a/crates/plotnik-lib/src/query/expr_arity.rs b/crates/plotnik-lib/src/query/expr_arity.rs index 2157f2cb..ae7643a0 100644 --- a/crates/plotnik-lib/src/query/expr_arity.rs +++ b/crates/plotnik-lib/src/query/expr_arity.rs @@ -7,10 +7,13 @@ //! `Invalid` marks nodes where arity cannot be determined (error nodes, //! undefined refs, etc.). +use std::collections::HashMap; + use super::Query; +use super::symbol_table::SymbolTable; use super::visitor::{Visitor, walk_expr, walk_field_expr}; -use crate::diagnostics::DiagnosticKind; -use crate::parser::{Expr, FieldExpr, Ref, SeqExpr, SyntaxKind, SyntaxNode, ast}; +use crate::diagnostics::{DiagnosticKind, Diagnostics}; +use crate::parser::{Expr, FieldExpr, Ref, Root, SeqExpr, SyntaxKind, SyntaxNode, ast}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ExprArity { @@ -19,87 +22,92 @@ pub enum ExprArity { Invalid, } +pub type ExprArityTable = HashMap; + impl Query<'_> { pub(super) fn infer_arities(&mut self) { - let root = self.ast.clone(); - - let mut computer = ArityComputer { query: self }; - computer.visit(&root); - - let mut validator = ArityValidator { query: self }; - validator.visit(&root); + self.expr_arity_table = infer_arities( + &self.ast, + &self.symbol_table, + &mut self.expr_arity_diagnostics, + ); } pub(super) fn get_arity(&self, node: &SyntaxNode) -> Option { - if node.kind() == SyntaxKind::Error { - return Some(ExprArity::Invalid); - } - - // Try casting to Expr first as it's the most common query - if let Some(expr) = ast::Expr::cast(node.clone()) { - return self.expr_arity_table.get(&expr).copied(); - } + resolve_arity(node, &self.expr_arity_table) + } +} - // Root: arity based on definition count - if let Some(root) = ast::Root::cast(node.clone()) { - return Some(if root.defs().nth(1).is_some() { - ExprArity::Many - } else { - ExprArity::One - }); - } +pub fn infer_arities( + root: &Root, + symbol_table: &SymbolTable, + diag: &mut Diagnostics, +) -> ExprArityTable { + let ctx = ArityContext { + symbol_table, + arity_table: HashMap::new(), + diag, + }; + + let mut computer = ArityComputer { ctx }; + computer.visit(root); + let ctx = computer.ctx; + + let mut validator = ArityValidator { ctx }; + validator.visit(root); + let ctx = validator.ctx; + + ctx.arity_table +} - // Def: delegate to body's arity - if let Some(def) = ast::Def::cast(node.clone()) { - return def - .body() - .and_then(|b| self.expr_arity_table.get(&b).copied()); - } +pub fn resolve_arity(node: &SyntaxNode, table: &ExprArityTable) -> Option { + if node.kind() == SyntaxKind::Error { + return Some(ExprArity::Invalid); + } - // Branch: delegate to body's arity - if let Some(branch) = ast::Branch::cast(node.clone()) { - return branch - .body() - .and_then(|b| self.expr_arity_table.get(&b).copied()); - } + // Try casting to Expr first as it's the most common query + if let Some(expr) = ast::Expr::cast(node.clone()) { + return table.get(&expr).copied(); + } - None + // Root: arity based on definition count + if let Some(root) = ast::Root::cast(node.clone()) { + return Some(if root.defs().nth(1).is_some() { + ExprArity::Many + } else { + ExprArity::One + }); } -} -struct ArityComputer<'a, 'q> { - query: &'a mut Query<'q>, -} + // Def: delegate to body's arity + if let Some(def) = ast::Def::cast(node.clone()) { + return def.body().and_then(|b| table.get(&b).copied()); + } -impl Visitor for ArityComputer<'_, '_> { - fn visit_expr(&mut self, expr: &Expr) { - self.query.compute_arity(expr); - walk_expr(self, expr); + // Branch: delegate to body's arity + if let Some(branch) = ast::Branch::cast(node.clone()) { + return branch.body().and_then(|b| table.get(&b).copied()); } -} -struct ArityValidator<'a, 'q> { - query: &'a mut Query<'q>, + None } -impl Visitor for ArityValidator<'_, '_> { - fn visit_field_expr(&mut self, field: &FieldExpr) { - self.query.validate_field(field); - walk_field_expr(self, field); - } +struct ArityContext<'a, 'd> { + symbol_table: &'a SymbolTable<'a>, + arity_table: ExprArityTable, + diag: &'d mut Diagnostics, } -impl Query<'_> { +impl ArityContext<'_, '_> { fn compute_arity(&mut self, expr: &Expr) -> ExprArity { - if let Some(&c) = self.expr_arity_table.get(expr) { + if let Some(&c) = self.arity_table.get(expr) { return c; } // Insert sentinel to break cycles (e.g., `Foo = (Foo)`) - self.expr_arity_table - .insert(expr.clone(), ExprArity::Invalid); + self.arity_table.insert(expr.clone(), ExprArity::Invalid); let c = self.compute_single_arity(expr); - self.expr_arity_table.insert(expr.clone(), c); + self.arity_table.insert(expr.clone(), c); c } @@ -161,7 +169,7 @@ impl Query<'_> { }; let card = self - .expr_arity_table + .arity_table .get(&value) .copied() .unwrap_or(ExprArity::One); @@ -172,10 +180,32 @@ impl Query<'_> { .map(|t| t.text().to_string()) .unwrap_or_else(|| "field".to_string()); - self.expr_arity_diagnostics + self.diag .report(DiagnosticKind::FieldSequenceValue, value.text_range()) .message(field_name) .emit(); } } } + +struct ArityComputer<'a, 'd> { + ctx: ArityContext<'a, 'd>, +} + +impl Visitor for ArityComputer<'_, '_> { + fn visit_expr(&mut self, expr: &Expr) { + self.ctx.compute_arity(expr); + walk_expr(self, expr); + } +} + +struct ArityValidator<'a, 'd> { + ctx: ArityContext<'a, 'd>, +} + +impl Visitor for ArityValidator<'_, '_> { + fn visit_field_expr(&mut self, field: &FieldExpr) { + self.ctx.validate_field(field); + walk_field_expr(self, field); + } +} diff --git a/crates/plotnik-lib/src/query/query.rs b/crates/plotnik-lib/src/query/query.rs index 14742288..5a676324 100644 --- a/crates/plotnik-lib/src/query/query.rs +++ b/crates/plotnik-lib/src/query/query.rs @@ -1,7 +1,8 @@ #![allow(unused)] use crate::parser::{ParseResult, Parser, lexer::lex}; use crate::query::alt_kinds::validate_alt_kinds; -use crate::query::dependencies; +use crate::query::dependencies::{self, DependencyAnalysis}; +use crate::query::expr_arity::{ExprArityTable, infer_arities}; use crate::query::symbol_table::{SymbolTable, resolve_names}; use crate::{Diagnostics, parser::Root}; @@ -84,9 +85,13 @@ impl<'q> QueryParsed<'q> { &mut self.diag, ); + let arity_table = infer_arities(&self.ast, &symbol_table, &mut self.diag); + QueryAnalyzed { query_parsed: self, symbol_table, + dependency_analysis, + arity_table, } } } @@ -94,4 +99,6 @@ impl<'q> QueryParsed<'q> { pub struct QueryAnalyzed<'q> { query_parsed: QueryParsed<'q>, symbol_table: SymbolTable<'q>, + dependency_analysis: DependencyAnalysis<'q>, + arity_table: ExprArityTable, } From 5f5d89b3cd14ded31704681094b29a2c93c84df2 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Tue, 16 Dec 2025 23:04:08 -0300 Subject: [PATCH 03/11] Error handling --- crates/plotnik-lib/src/lib.rs | 6 ++++++ crates/plotnik-lib/src/query/query.rs | 14 +++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/crates/plotnik-lib/src/lib.rs b/crates/plotnik-lib/src/lib.rs index 62d6c381..2ce50b74 100644 --- a/crates/plotnik-lib/src/lib.rs +++ b/crates/plotnik-lib/src/lib.rs @@ -41,6 +41,12 @@ pub enum Error { /// Recursion fuel exhausted (input nested too deeply). #[error("recursion limit exceeded")] RecursionLimitExceeded, + + #[error("query parsing failed with {} errors", .0.error_count())] + QueryParseError(Diagnostics), + + #[error("query analysis failed with {} errors", .0.error_count())] + QueryAnalyzeError(Diagnostics), } /// Result type for query operations. diff --git a/crates/plotnik-lib/src/query/query.rs b/crates/plotnik-lib/src/query/query.rs index 5a676324..d08b6902 100644 --- a/crates/plotnik-lib/src/query/query.rs +++ b/crates/plotnik-lib/src/query/query.rs @@ -57,6 +57,10 @@ impl<'q> QueryBuilder<'q> { validate_alt_kinds(&ast, &mut diag); + if diag.has_errors() { + return Err(crate::Error::QueryParseError(diag)); + } + Ok(QueryParsed { src, diag, @@ -74,7 +78,7 @@ pub struct QueryParsed<'q> { } impl<'q> QueryParsed<'q> { - pub fn analyze(mut self) -> QueryAnalyzed<'q> { + pub fn analyze(mut self) -> crate::Result> { let symbol_table = resolve_names(&self.ast, self.src, &mut self.diag); let dependency_analysis = dependencies::analyze_dependencies(&symbol_table); @@ -87,12 +91,16 @@ impl<'q> QueryParsed<'q> { let arity_table = infer_arities(&self.ast, &symbol_table, &mut self.diag); - QueryAnalyzed { + if self.diag.has_errors() { + return Err(crate::Error::QueryAnalyzeError(self.diag)); + } + + Ok(QueryAnalyzed { query_parsed: self, symbol_table, dependency_analysis, arity_table, - } + }) } } From 7ac6328cc536b1f7b812620441240f1ea5a2c4b2 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Tue, 16 Dec 2025 23:49:56 -0300 Subject: [PATCH 04/11] Linker --- Cargo.lock | 2 + crates/plotnik-cli/Cargo.toml | 1 + crates/plotnik-cli/src/commands/exec.rs | 3 +- crates/plotnik-cli/src/commands/types.rs | 3 +- crates/plotnik-langs/src/lib.rs | 2 +- crates/plotnik-lib/Cargo.toml | 1 + crates/plotnik-lib/src/engine/interpreter.rs | 6 +- .../src/engine/interpreter_tests.rs | 3 +- .../plotnik-lib/src/engine/validate_tests.rs | 3 +- crates/plotnik-lib/src/ir/compiled.rs | 5 +- crates/plotnik-lib/src/ir/emit.rs | 4 +- crates/plotnik-lib/src/ir/ids.rs | 9 - crates/plotnik-lib/src/ir/matcher.rs | 4 +- crates/plotnik-lib/src/ir/mod.rs | 5 +- crates/plotnik-lib/src/query/link.rs | 398 ++++++++++-------- crates/plotnik-lib/src/query/mod.rs | 6 +- crates/plotnik-lib/src/query/query.rs | 37 ++ 17 files changed, 281 insertions(+), 211 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 47e9edd2..75206542 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -337,6 +337,7 @@ name = "plotnik-cli" version = "0.1.1" dependencies = [ "clap", + "plotnik-core", "plotnik-langs", "plotnik-lib", "serde_json", @@ -396,6 +397,7 @@ dependencies = [ "indoc", "insta", "logos", + "plotnik-core", "plotnik-langs", "rowan", "serde", diff --git a/crates/plotnik-cli/Cargo.toml b/crates/plotnik-cli/Cargo.toml index c80184d8..f1e51527 100644 --- a/crates/plotnik-cli/Cargo.toml +++ b/crates/plotnik-cli/Cargo.toml @@ -71,6 +71,7 @@ yaml = ["plotnik-langs/yaml"] [dependencies] clap = { version = "4.5", features = ["derive"] } +plotnik-core = { version = "0.1.0", path = "../plotnik-core" } plotnik-langs = { version = "0.1.0", path = "../plotnik-langs", default-features = false } plotnik-lib = { version = "0.1.0", path = "../plotnik-lib" } serde_json = "1.0" diff --git a/crates/plotnik-cli/src/commands/exec.rs b/crates/plotnik-cli/src/commands/exec.rs index fdd4fcc7..e4e65e39 100644 --- a/crates/plotnik-cli/src/commands/exec.rs +++ b/crates/plotnik-cli/src/commands/exec.rs @@ -2,7 +2,8 @@ use std::fs; use std::io::{self, Read}; use std::path::PathBuf; -use plotnik_langs::{Lang, NodeFieldId, NodeTypeId}; +use plotnik_core::{NodeFieldId, NodeTypeId}; +use plotnik_langs::Lang; use plotnik_lib::Query; use plotnik_lib::engine::interpreter::QueryInterpreter; use plotnik_lib::engine::validate::validate as validate_result; diff --git a/crates/plotnik-cli/src/commands/types.rs b/crates/plotnik-cli/src/commands/types.rs index 39f543b7..490c35e4 100644 --- a/crates/plotnik-cli/src/commands/types.rs +++ b/crates/plotnik-cli/src/commands/types.rs @@ -3,7 +3,8 @@ use std::fs; use std::io::{self, Read}; use std::path::PathBuf; -use plotnik_langs::{Lang, NodeFieldId, NodeTypeId}; +use plotnik_core::{NodeFieldId, NodeTypeId}; +use plotnik_langs::Lang; use plotnik_lib::Query; use plotnik_lib::ir::{ CompiledQuery, NodeKindResolver, QueryEmitter, STRING_NONE, TYPE_NODE, TYPE_STR, TYPE_VOID, diff --git a/crates/plotnik-langs/src/lib.rs b/crates/plotnik-langs/src/lib.rs index 581ae00d..f13f093e 100644 --- a/crates/plotnik-langs/src/lib.rs +++ b/crates/plotnik-langs/src/lib.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use tree_sitter::Language; -pub use plotnik_core::{Cardinality, NodeFieldId, NodeTypeId, NodeTypes, StaticNodeTypes}; +use plotnik_core::{Cardinality, NodeFieldId, NodeTypeId, NodeTypes, StaticNodeTypes}; pub mod builtin; pub mod dynamic; diff --git a/crates/plotnik-lib/Cargo.toml b/crates/plotnik-lib/Cargo.toml index 6ee3c056..ef8ccfd4 100644 --- a/crates/plotnik-lib/Cargo.toml +++ b/crates/plotnik-lib/Cargo.toml @@ -21,6 +21,7 @@ rowan = "0.16.1" serde = { version = "1.0.228", features = ["derive"] } thiserror = "2.0.17" tree-sitter = "0.26" +plotnik-core = { version = "0.1", path = "../plotnik-core" } plotnik-langs = { version = "0.1", path = "../plotnik-langs", optional = true } [features] diff --git a/crates/plotnik-lib/src/engine/interpreter.rs b/crates/plotnik-lib/src/engine/interpreter.rs index e7643f62..a0b14e1e 100644 --- a/crates/plotnik-lib/src/engine/interpreter.rs +++ b/crates/plotnik-lib/src/engine/interpreter.rs @@ -9,10 +9,8 @@ use std::collections::HashSet; use tree_sitter::{Node, TreeCursor}; -use crate::ir::{ - CompiledQuery, EffectOp, Matcher, Nav, NavKind, NodeFieldId, NodeTypeId, RefTransition, - TransitionId, -}; +use crate::ir::{CompiledQuery, EffectOp, Matcher, Nav, NavKind, RefTransition, TransitionId}; +use plotnik_core::{NodeFieldId, NodeTypeId}; use super::effect_stream::EffectStream; use super::error::RuntimeError; diff --git a/crates/plotnik-lib/src/engine/interpreter_tests.rs b/crates/plotnik-lib/src/engine/interpreter_tests.rs index 6ce32a15..a73d4107 100644 --- a/crates/plotnik-lib/src/engine/interpreter_tests.rs +++ b/crates/plotnik-lib/src/engine/interpreter_tests.rs @@ -1,4 +1,5 @@ -use plotnik_langs::{Lang, NodeFieldId, NodeTypeId, javascript}; +use plotnik_core::{NodeFieldId, NodeTypeId}; +use plotnik_langs::{Lang, javascript}; use crate::engine::interpreter::QueryInterpreter; use crate::engine::value::Value; diff --git a/crates/plotnik-lib/src/engine/validate_tests.rs b/crates/plotnik-lib/src/engine/validate_tests.rs index 8a427a58..1aaebf5a 100644 --- a/crates/plotnik-lib/src/engine/validate_tests.rs +++ b/crates/plotnik-lib/src/engine/validate_tests.rs @@ -1,6 +1,7 @@ //! End-to-end tests for runtime type validation. -use plotnik_langs::{Lang, NodeFieldId, NodeTypeId, javascript}; +use plotnik_core::{NodeFieldId, NodeTypeId}; +use plotnik_langs::{Lang, javascript}; use crate::engine::interpreter::QueryInterpreter; use crate::engine::validate::validate; diff --git a/crates/plotnik-lib/src/ir/compiled.rs b/crates/plotnik-lib/src/ir/compiled.rs index adfd710c..0d7b4ab7 100644 --- a/crates/plotnik-lib/src/ir/compiled.rs +++ b/crates/plotnik-lib/src/ir/compiled.rs @@ -7,9 +7,10 @@ use std::alloc::{Layout, alloc, dealloc}; use std::fmt::Write; use std::ptr; +use plotnik_core::{NodeFieldId, NodeTypeId}; + use super::{ - EffectOp, Entrypoint, NodeFieldId, NodeTypeId, Slice, StringId, StringRef, Transition, - TransitionId, TypeDef, TypeMember, + EffectOp, Entrypoint, Slice, StringId, StringRef, Transition, TransitionId, TypeDef, TypeMember, }; /// Buffer alignment for cache-line efficiency. diff --git a/crates/plotnik-lib/src/ir/emit.rs b/crates/plotnik-lib/src/ir/emit.rs index f59986cb..9a69e610 100644 --- a/crates/plotnik-lib/src/ir/emit.rs +++ b/crates/plotnik-lib/src/ir/emit.rs @@ -8,8 +8,10 @@ use std::collections::HashMap; use std::ptr; +use plotnik_core::{NodeFieldId, NodeTypeId}; + use super::compiled::{CompiledQuery, CompiledQueryBuffer, align_up}; -use super::ids::{NodeFieldId, NodeTypeId, RefId, StringId, TYPE_NODE, TransitionId}; +use super::ids::{RefId, StringId, TYPE_NODE, TransitionId}; use super::strings::StringInterner; use super::{ EffectOp, Entrypoint, MAX_INLINE_SUCCESSORS, Matcher, RefTransition, Slice, StringRef, diff --git a/crates/plotnik-lib/src/ir/ids.rs b/crates/plotnik-lib/src/ir/ids.rs index f97b17ba..cfcc0d2f 100644 --- a/crates/plotnik-lib/src/ir/ids.rs +++ b/crates/plotnik-lib/src/ir/ids.rs @@ -3,18 +3,9 @@ //! These are lightweight wrappers/aliases for indices and identifiers //! used throughout the IR. They provide type safety without runtime cost. -use std::num::NonZeroU16; - /// Index into the transitions segment. pub type TransitionId = u32; -/// Node type ID from tree-sitter. Do not change the underlying type. -pub type NodeTypeId = u16; - -/// Node field ID from tree-sitter. Uses `NonZeroU16` so `Option` -/// is the same size as `NodeFieldId` (niche optimization with 0 = None). -pub type NodeFieldId = NonZeroU16; - /// Index into the string_refs segment. pub type StringId = u16; diff --git a/crates/plotnik-lib/src/ir/matcher.rs b/crates/plotnik-lib/src/ir/matcher.rs index 0a968895..be171f14 100644 --- a/crates/plotnik-lib/src/ir/matcher.rs +++ b/crates/plotnik-lib/src/ir/matcher.rs @@ -2,7 +2,9 @@ //! //! Matchers are purely for node matching - navigation is handled by `Nav`. -use super::{NodeFieldId, NodeTypeId, Slice}; +use plotnik_core::{NodeFieldId, NodeTypeId}; + +use super::Slice; /// Discriminant for matcher variants. #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/crates/plotnik-lib/src/ir/mod.rs b/crates/plotnik-lib/src/ir/mod.rs index c50fce7a..6a1e421d 100644 --- a/crates/plotnik-lib/src/ir/mod.rs +++ b/crates/plotnik-lib/src/ir/mod.rs @@ -36,10 +36,7 @@ mod slice_tests; mod string_ref_tests; // Re-export ID types -pub use ids::{ - DataFieldId, NodeFieldId, NodeTypeId, RefId, STRING_NONE, StringId, TransitionId, TypeId, - VariantTagId, -}; +pub use ids::{DataFieldId, RefId, STRING_NONE, StringId, TransitionId, TypeId, VariantTagId}; // Re-export TypeId constants pub use ids::{TYPE_INVALID, TYPE_NODE, TYPE_STR, TYPE_VOID}; diff --git a/crates/plotnik-lib/src/query/link.rs b/crates/plotnik-lib/src/query/link.rs index ff07c68c..8610e1d0 100644 --- a/crates/plotnik-lib/src/query/link.rs +++ b/crates/plotnik-lib/src/query/link.rs @@ -5,133 +5,87 @@ //! 2. Collect and resolve all field names (FieldExpr, NegatedField) //! 3. Validate structural constraints (field on node type, child type for field) +use std::collections::HashMap; + use indexmap::IndexSet; -use plotnik_langs::{Lang, NodeFieldId, NodeTypeId}; +use plotnik_core::{NodeFieldId, NodeTypeId}; +use plotnik_langs::Lang; use rowan::TextRange; -use crate::diagnostics::DiagnosticKind; +use crate::diagnostics::{DiagnosticKind, Diagnostics}; +use crate::parser::Root; use crate::parser::ast::{self, Expr, NamedNode}; use crate::parser::cst::{SyntaxKind, SyntaxToken}; use crate::parser::token_src; use super::Query; +use super::symbol_table::SymbolTable; use super::utils::find_similar; use super::visitor::{Visitor, walk}; -/// Check if `child` is a subtype of `supertype`, recursively handling nested supertypes. -#[allow(dead_code)] -fn is_subtype_of(lang: &Lang, child: NodeTypeId, supertype: NodeTypeId) -> bool { - let subtypes = lang.subtypes(supertype); - for &subtype in subtypes { - if subtype == child { - return true; - } - if lang.is_supertype(subtype) && is_subtype_of(lang, child, subtype) { - return true; - } - } - false -} - -/// Check if `child` is a valid non-field child of `parent`, expanding supertypes. -#[allow(dead_code)] -fn is_valid_child_expanded(lang: &Lang, parent: NodeTypeId, child: NodeTypeId) -> bool { - let valid_types = lang.valid_child_types(parent); - for &allowed in valid_types { - if allowed == child { - return true; - } - if lang.is_supertype(allowed) && is_subtype_of(lang, child, allowed) { - return true; - } +impl<'a> Query<'a> { + /// Link query against a language grammar. + /// + /// Resolves node types and fields, validates structural constraints. + pub fn link(&mut self, lang: &Lang) { + link( + &self.ast, + self.source, + lang, + &self.symbol_table, + &mut self.node_type_ids, + &mut self.node_field_ids, + &mut self.link_diagnostics, + ); } - false } -/// Check if `child` is a valid field value type, expanding supertypes. -#[allow(dead_code)] -fn is_valid_field_type_expanded( +/// Link query against a language grammar. +/// +/// This function is decoupled from `Query` to allow easier testing and +/// modularity. It orchestrates the resolution and validation phases. +pub fn link<'q>( + root: &Root, + source: &'q str, lang: &Lang, - parent: NodeTypeId, - field: NodeFieldId, - child: NodeTypeId, -) -> bool { - if lang.is_valid_field_type(parent, field, child) { - return true; - } - let valid_types = lang.valid_field_types(parent, field); - for &allowed in valid_types { - if lang.is_supertype(allowed) && is_subtype_of(lang, child, allowed) { - return true; - } - } - false + symbol_table: &SymbolTable<'q>, + node_type_ids: &mut HashMap<&'q str, Option>, + node_field_ids: &mut HashMap<&'q str, Option>, + diagnostics: &mut Diagnostics, +) { + let mut linker = Linker { + source, + lang, + symbol_table, + node_type_ids, + node_field_ids, + diagnostics, + }; + linker.link(root); } -/// Format a list of items for display, truncating if too long. -#[allow(dead_code)] -fn format_list(items: &[&str], max_items: usize) -> String { - if items.is_empty() { - return String::new(); - } - if items.len() <= max_items { - items - .iter() - .map(|s| format!("`{}`", s)) - .collect::>() - .join(", ") - } else { - let shown: Vec<_> = items[..max_items] - .iter() - .map(|s| format!("`{}`", s)) - .collect(); - format!( - "{}, ... ({} more)", - shown.join(", "), - items.len() - max_items - ) - } -} - -/// Context for validating child types. -#[allow(dead_code)] -#[derive(Clone, Copy)] -struct ValidationContext<'a> { - /// The parent node type being validated against. - parent_id: NodeTypeId, - /// The parent node's name for error messages. - parent_name: &'a str, - /// The parent node type token range for related_to. - parent_range: TextRange, - /// If validating a field value, the field info. - field: Option>, -} - -#[allow(dead_code)] -#[derive(Clone, Copy)] -struct FieldContext<'a> { - name: &'a str, - id: NodeFieldId, - range: TextRange, +struct Linker<'a, 'q> { + source: &'q str, + lang: &'a Lang, + symbol_table: &'a SymbolTable<'q>, + node_type_ids: &'a mut HashMap<&'q str, Option>, + node_field_ids: &'a mut HashMap<&'q str, Option>, + diagnostics: &'a mut Diagnostics, } -impl<'a> Query<'a> { - /// Link query against a language grammar. - /// - /// Resolves node types and fields, validates structural constraints. - pub fn link(&mut self, lang: &Lang) { - self.resolve_node_types(lang); - self.resolve_fields(lang); - self.validate_structure(lang); +impl<'a, 'q> Linker<'a, 'q> { + fn link(&mut self, root: &Root) { + self.resolve_node_types(root); + self.resolve_fields(root); + self.validate_structure(root); } - fn resolve_node_types(&mut self, lang: &Lang) { - let root = self.ast.clone(); - let mut collector = NodeTypeCollector { query: self, lang }; - collector.visit(&root); + fn resolve_node_types(&mut self, root: &Root) { + let mut collector = NodeTypeCollector { linker: self }; + collector.visit(root); } - fn resolve_named_node(&mut self, node: &NamedNode, lang: &Lang) { + fn resolve_named_node(&mut self, node: &NamedNode) { if node.is_any() { return; } @@ -148,16 +102,16 @@ impl<'a> Query<'a> { if self.node_type_ids.contains_key(type_name) { return; } - let resolved = lang.resolve_named_node(type_name); + let resolved = self.lang.resolve_named_node(type_name); self.node_type_ids .insert(token_src(&type_token, self.source), resolved); if resolved.is_none() { - let all_types = lang.all_named_node_kinds(); + let all_types = self.lang.all_named_node_kinds(); let max_dist = (type_name.len() / 3).clamp(2, 4); let suggestion = find_similar(type_name, &all_types, max_dist); let mut builder = self - .link_diagnostics + .diagnostics .report(DiagnosticKind::UnknownNodeType, type_token.text_range()) .message(type_name); @@ -168,13 +122,12 @@ impl<'a> Query<'a> { } } - fn resolve_fields(&mut self, lang: &Lang) { - let root = self.ast.clone(); - let mut collector = FieldCollector { query: self, lang }; - collector.visit(&root); + fn resolve_fields(&mut self, root: &Root) { + let mut collector = FieldCollector { linker: self }; + collector.visit(root); } - fn resolve_field_by_token(&mut self, name_token: Option, lang: &Lang) { + fn resolve_field_by_token(&mut self, name_token: Option) { let Some(name_token) = name_token else { return; }; @@ -182,18 +135,18 @@ impl<'a> Query<'a> { if self.node_field_ids.contains_key(field_name) { return; } - let resolved = lang.resolve_field(field_name); + let resolved = self.lang.resolve_field(field_name); self.node_field_ids .insert(token_src(&name_token, self.source), resolved); if resolved.is_some() { return; } - let all_fields = lang.all_field_names(); + let all_fields = self.lang.all_field_names(); let max_dist = (field_name.len() / 3).clamp(2, 4); let suggestion = find_similar(field_name, &all_fields, max_dist); let mut builder = self - .link_diagnostics + .diagnostics .report(DiagnosticKind::UnknownField, name_token.text_range()) .message(field_name); @@ -203,12 +156,12 @@ impl<'a> Query<'a> { builder.emit(); } - fn validate_structure(&mut self, lang: &Lang) { - let defs: Vec<_> = self.ast.defs().collect(); + fn validate_structure(&mut self, root: &Root) { + let defs: Vec<_> = root.defs().collect(); for def in defs { let Some(body) = def.body() else { continue }; let mut visited = IndexSet::new(); - self.validate_expr_structure(&body, None, lang, &mut visited); + self.validate_expr_structure(&body, None, &mut visited); } } @@ -216,31 +169,30 @@ impl<'a> Query<'a> { &mut self, expr: &Expr, ctx: Option>, - lang: &Lang, visited: &mut IndexSet, ) { match expr { Expr::NamedNode(node) => { // Validate this node against the context (if any) if let Some(ref ctx) = ctx { - self.validate_terminal_type(expr, ctx, lang, visited); + self.validate_terminal_type(expr, ctx, visited); } // Set up context for children - let child_ctx = self.make_node_context(node, lang); + let child_ctx = self.make_node_context(node); for child in node.children() { match &child { Expr::FieldExpr(f) => { // Fields get special handling - self.validate_field_expr(f, child_ctx.as_ref(), lang, visited); + self.validate_field_expr(f, child_ctx.as_ref(), visited); } _ => { // Non-field children: validate as non-field children - if let Some(ctx) = child_ctx { - self.validate_non_field_children(&child, &ctx, lang, visited); + if let Some(ref ctx) = child_ctx { + self.validate_non_field_children(&child, ctx, visited); } - self.validate_expr_structure(&child, child_ctx, lang, visited); + self.validate_expr_structure(&child, child_ctx, visited); } } } @@ -249,7 +201,7 @@ impl<'a> Query<'a> { if let Some(ctx) = child_ctx { for child in node.as_cst().children() { if let Some(neg) = ast::NegatedField::cast(child) { - self.validate_negated_field(&neg, &ctx, lang); + self.validate_negated_field(&neg, &ctx); } } } @@ -257,31 +209,31 @@ impl<'a> Query<'a> { Expr::AnonymousNode(_) => { // Validate this anonymous node against the context (if any) if let Some(ref ctx) = ctx { - self.validate_terminal_type(expr, ctx, lang, visited); + self.validate_terminal_type(expr, ctx, visited); } } Expr::FieldExpr(f) => { // Should be handled by parent NamedNode, but handle gracefully - self.validate_field_expr(f, ctx.as_ref(), lang, visited); + self.validate_field_expr(f, ctx.as_ref(), visited); } Expr::AltExpr(alt) => { for branch in alt.branches() { let Some(body) = branch.body() else { continue }; - self.validate_expr_structure(&body, ctx, lang, visited); + self.validate_expr_structure(&body, ctx, visited); } } Expr::SeqExpr(seq) => { for child in seq.children() { - self.validate_expr_structure(&child, ctx, lang, visited); + self.validate_expr_structure(&child, ctx, visited); } } Expr::CapturedExpr(cap) => { let Some(inner) = cap.inner() else { return }; - self.validate_expr_structure(&inner, ctx, lang, visited); + self.validate_expr_structure(&inner, ctx, visited); } Expr::QuantifiedExpr(q) => { let Some(inner) = q.inner() else { return }; - self.validate_expr_structure(&inner, ctx, lang, visited); + self.validate_expr_structure(&inner, ctx, visited); } Expr::Ref(r) => { let Some(name_token) = r.name() else { return }; @@ -293,14 +245,14 @@ impl<'a> Query<'a> { visited.swap_remove(name); return; }; - self.validate_expr_structure(&body, ctx, lang, visited); + self.validate_expr_structure(&body, ctx, visited); visited.swap_remove(name); } } } /// Create validation context for a named node's children. - fn make_node_context(&self, node: &NamedNode, lang: &Lang) -> Option> { + fn make_node_context(&self, node: &NamedNode) -> Option> { if node.is_any() { return None; } @@ -313,7 +265,7 @@ impl<'a> Query<'a> { } let type_name = type_token.text(); let parent_id = self.node_type_ids.get(type_name).copied().flatten()?; - let parent_name = lang.node_type_name(parent_id)?; + let parent_name = self.lang.node_type_name(parent_id)?; Some(ValidationContext { parent_id, parent_name, @@ -327,7 +279,6 @@ impl<'a> Query<'a> { &mut self, field: &ast::FieldExpr, ctx: Option<&ValidationContext<'a>>, - lang: &Lang, visited: &mut IndexSet, ) { let Some(name_token) = field.name() else { @@ -344,13 +295,12 @@ impl<'a> Query<'a> { }; // Check field exists on parent - if !lang.has_field(ctx.parent_id, field_id) { + if !self.lang.has_field(ctx.parent_id, field_id) { self.emit_field_not_on_node( name_token.text_range(), field_name, ctx.parent_id, ctx.parent_range, - lang, ); return; } @@ -373,7 +323,7 @@ impl<'a> Query<'a> { // Validate field value - this will traverse through alt/seq/quantifier/capture // and validate each terminal type against the field requirements - self.validate_expr_structure(&value, Some(field_ctx), lang, visited); + self.validate_expr_structure(&value, Some(field_ctx), visited); } /// Validate non-field children. Called for direct children of a NamedNode that aren't fields. @@ -382,19 +332,18 @@ impl<'a> Query<'a> { &mut self, expr: &Expr, ctx: &ValidationContext<'a>, - lang: &Lang, visited: &mut IndexSet, ) { // Collect all terminal types from this expression (follows refs) let terminals = self.collect_terminal_types(expr, visited); // Check if parent allows any non-field children - let valid_types = lang.valid_child_types(ctx.parent_id); + let valid_types = self.lang.valid_child_types(ctx.parent_id); let parent_only_fields = valid_types.is_empty(); for (child_id, child_name, child_range) in terminals { if parent_only_fields { - self.link_diagnostics + self.diagnostics .report(DiagnosticKind::InvalidChildType, child_range) .message(child_name) .related_to( @@ -405,17 +354,17 @@ impl<'a> Query<'a> { continue; } - if is_valid_child_expanded(lang, ctx.parent_id, child_id) { + if is_valid_child_expanded(self.lang, ctx.parent_id, child_id) { continue; } let valid_names: Vec<&str> = valid_types .iter() - .filter_map(|&id| lang.node_type_name(id)) + .filter_map(|&id| self.lang.node_type_name(id)) .collect(); let mut builder = self - .link_diagnostics + .diagnostics .report(DiagnosticKind::InvalidChildType, child_range) .message(child_name) .related_to(format!("inside `{}`", ctx.parent_name), ctx.parent_range); @@ -436,7 +385,6 @@ impl<'a> Query<'a> { &mut self, _expr: &Expr, _ctx: &ValidationContext<'a>, - _lang: &Lang, _visited: &mut IndexSet, ) { } @@ -447,7 +395,6 @@ impl<'a> Query<'a> { &mut self, expr: &Expr, ctx: &ValidationContext<'a>, - lang: &Lang, visited: &mut IndexSet, ) { // Handle refs by following them @@ -461,7 +408,7 @@ impl<'a> Query<'a> { visited.swap_remove(name); return; }; - self.validate_terminal_type(&body, ctx, lang, visited); + self.validate_terminal_type(&body, ctx, visited); visited.swap_remove(name); return; } @@ -472,18 +419,18 @@ impl<'a> Query<'a> { if let Some(ref field) = ctx.field { // Validating a field value - if is_valid_field_type_expanded(lang, ctx.parent_id, field.id, child_id) { + if is_valid_field_type_expanded(self.lang, ctx.parent_id, field.id, child_id) { return; } - let valid_types = lang.valid_field_types(ctx.parent_id, field.id); + let valid_types = self.lang.valid_field_types(ctx.parent_id, field.id); let valid_names: Vec<&str> = valid_types .iter() - .filter_map(|&id| lang.node_type_name(id)) + .filter_map(|&id| self.lang.node_type_name(id)) .collect(); let mut builder = self - .link_diagnostics + .diagnostics .report(DiagnosticKind::InvalidFieldChildType, child_range) .message(child_name) .related_to( @@ -508,7 +455,6 @@ impl<'a> Query<'a> { &mut self, _expr: &Expr, _ctx: &ValidationContext<'a>, - _lang: &Lang, _visited: &mut IndexSet, ) { } @@ -612,12 +558,7 @@ impl<'a> Query<'a> { } } - fn validate_negated_field( - &mut self, - neg: &ast::NegatedField, - ctx: &ValidationContext<'a>, - lang: &Lang, - ) { + fn validate_negated_field(&mut self, neg: &ast::NegatedField, ctx: &ValidationContext<'a>) { let Some(name_token) = neg.name() else { return; }; @@ -627,7 +568,7 @@ impl<'a> Query<'a> { return; }; - if lang.has_field(ctx.parent_id, field_id) { + if self.lang.has_field(ctx.parent_id, field_id) { return; } self.emit_field_not_on_node( @@ -635,7 +576,6 @@ impl<'a> Query<'a> { field_name, ctx.parent_id, ctx.parent_range, - lang, ); } @@ -645,13 +585,12 @@ impl<'a> Query<'a> { field_name: &str, parent_id: NodeTypeId, parent_range: TextRange, - lang: &Lang, ) { - let valid_fields = lang.fields_for_node_type(parent_id); - let parent_name = lang.node_type_name(parent_id).unwrap_or("(unknown)"); + let valid_fields = self.lang.fields_for_node_type(parent_id); + let parent_name = self.lang.node_type_name(parent_id).unwrap_or("(unknown)"); let mut builder = self - .link_diagnostics + .diagnostics .report(DiagnosticKind::FieldNotOnNodeType, range) .message(field_name) .related_to(format!("on `{}`", parent_name), parent_range); @@ -673,18 +612,114 @@ impl<'a> Query<'a> { } } -struct NodeTypeCollector<'a, 'q> { - query: &'a mut Query<'q>, - lang: &'a Lang, +/// Check if `child` is a subtype of `supertype`, recursively handling nested supertypes. +#[allow(dead_code)] +fn is_subtype_of(lang: &Lang, child: NodeTypeId, supertype: NodeTypeId) -> bool { + let subtypes = lang.subtypes(supertype); + for &subtype in subtypes { + if subtype == child { + return true; + } + if lang.is_supertype(subtype) && is_subtype_of(lang, child, subtype) { + return true; + } + } + false +} + +/// Check if `child` is a valid non-field child of `parent`, expanding supertypes. +#[allow(dead_code)] +fn is_valid_child_expanded(lang: &Lang, parent: NodeTypeId, child: NodeTypeId) -> bool { + let valid_types = lang.valid_child_types(parent); + for &allowed in valid_types { + if allowed == child { + return true; + } + if lang.is_supertype(allowed) && is_subtype_of(lang, child, allowed) { + return true; + } + } + false } -impl Visitor for NodeTypeCollector<'_, '_> { +/// Check if `child` is a valid field value type, expanding supertypes. +#[allow(dead_code)] +fn is_valid_field_type_expanded( + lang: &Lang, + parent: NodeTypeId, + field: NodeFieldId, + child: NodeTypeId, +) -> bool { + if lang.is_valid_field_type(parent, field, child) { + return true; + } + let valid_types = lang.valid_field_types(parent, field); + for &allowed in valid_types { + if lang.is_supertype(allowed) && is_subtype_of(lang, child, allowed) { + return true; + } + } + false +} + +/// Format a list of items for display, truncating if too long. +#[allow(dead_code)] +fn format_list(items: &[&str], max_items: usize) -> String { + if items.is_empty() { + return String::new(); + } + if items.len() <= max_items { + items + .iter() + .map(|s| format!("`{}`", s)) + .collect::>() + .join(", ") + } else { + let shown: Vec<_> = items[..max_items] + .iter() + .map(|s| format!("`{}`", s)) + .collect(); + format!( + "{}, ... ({} more)", + shown.join(", "), + items.len() - max_items + ) + } +} + +/// Context for validating child types. +#[allow(dead_code)] +#[derive(Clone, Copy)] +struct ValidationContext<'a> { + /// The parent node type being validated against. + parent_id: NodeTypeId, + /// The parent node's name for error messages. + parent_name: &'a str, + /// The parent node type token range for related_to. + parent_range: TextRange, + /// If validating a field value, the field info. + field: Option>, +} + +#[allow(dead_code)] +#[derive(Clone, Copy)] +struct FieldContext<'a> { + name: &'a str, + id: NodeFieldId, + range: TextRange, +} + +struct NodeTypeCollector<'l, 'a, 'q> { + linker: &'l mut Linker<'a, 'q>, +} + +impl Visitor for NodeTypeCollector<'_, '_, '_> { fn visit(&mut self, root: &ast::Root) { walk(self, root); } fn visit_named_node(&mut self, node: &ast::NamedNode) { - self.query.resolve_named_node(node, self.lang); + self.linker.resolve_named_node(node); super::visitor::walk_named_node(self, node); } @@ -696,18 +731,18 @@ impl Visitor for NodeTypeCollector<'_, '_> { return; }; let value = value_token.text(); - if self.query.node_type_ids.contains_key(value) { + if self.linker.node_type_ids.contains_key(value) { return; } - let resolved = self.lang.resolve_anonymous_node(value); - self.query + let resolved = self.linker.lang.resolve_anonymous_node(value); + self.linker .node_type_ids - .insert(token_src(&value_token, self.query.source), resolved); + .insert(token_src(&value_token, self.linker.source), resolved); if resolved.is_none() { - self.query - .link_diagnostics + self.linker + .diagnostics .report(DiagnosticKind::UnknownNodeType, value_token.text_range()) .message(value) .emit(); @@ -715,12 +750,11 @@ impl Visitor for NodeTypeCollector<'_, '_> { } } -struct FieldCollector<'a, 'q> { - query: &'a mut Query<'q>, - lang: &'a Lang, +struct FieldCollector<'l, 'a, 'q> { + linker: &'l mut Linker<'a, 'q>, } -impl Visitor for FieldCollector<'_, '_> { +impl Visitor for FieldCollector<'_, '_, '_> { fn visit(&mut self, root: &ast::Root) { walk(self, root); } @@ -728,7 +762,7 @@ impl Visitor for FieldCollector<'_, '_> { fn visit_named_node(&mut self, node: &ast::NamedNode) { for child in node.as_cst().children() { if let Some(neg) = ast::NegatedField::cast(child) { - self.query.resolve_field_by_token(neg.name(), self.lang); + self.linker.resolve_field_by_token(neg.name()); } } @@ -736,7 +770,7 @@ impl Visitor for FieldCollector<'_, '_> { } fn visit_field_expr(&mut self, field: &ast::FieldExpr) { - self.query.resolve_field_by_token(field.name(), self.lang); + self.linker.resolve_field_by_token(field.name()); super::visitor::walk_field_expr(self, field); } diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 6d066604..5bffca16 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -12,6 +12,9 @@ mod graph_qis; mod invariants; mod printer; mod utils; +#[cfg(feature = "plotnik-langs")] +use plotnik_core::NodeFieldId; +use plotnik_core::NodeTypeId; pub use printer::QueryPrinter; pub mod alt_kinds; @@ -62,9 +65,6 @@ mod symbol_table_tests; use std::collections::{HashMap, HashSet}; -#[cfg(feature = "plotnik-langs")] -use plotnik_langs::{NodeFieldId, NodeTypeId}; - use rowan::GreenNodeBuilder; use crate::Result; diff --git a/crates/plotnik-lib/src/query/query.rs b/crates/plotnik-lib/src/query/query.rs index d08b6902..9225287a 100644 --- a/crates/plotnik-lib/src/query/query.rs +++ b/crates/plotnik-lib/src/query/query.rs @@ -1,8 +1,15 @@ #![allow(unused)] +use std::collections::HashMap; +use std::ops::{Deref, DerefMut}; + +use plotnik_core::{NodeFieldId, NodeTypeId}; +use plotnik_langs::Lang; + use crate::parser::{ParseResult, Parser, lexer::lex}; use crate::query::alt_kinds::validate_alt_kinds; use crate::query::dependencies::{self, DependencyAnalysis}; use crate::query::expr_arity::{ExprArityTable, infer_arities}; +use crate::query::link; use crate::query::symbol_table::{SymbolTable, resolve_names}; use crate::{Diagnostics, parser::Root}; @@ -110,3 +117,33 @@ pub struct QueryAnalyzed<'q> { dependency_analysis: DependencyAnalysis<'q>, arity_table: ExprArityTable, } + +impl<'q> QueryAnalyzed<'q> { + pub fn link(mut self, lang: &Lang) { + let mut node_type_ids: HashMap<&'q str, Option> = HashMap::new(); + let mut node_field_ids: HashMap<&'q str, Option> = HashMap::new(); + link::link( + &self.query_parsed.ast, + self.query_parsed.src, + lang, + &self.symbol_table, + &mut node_type_ids, + &mut node_field_ids, + &mut self.query_parsed.diag, + ); + } +} + +impl<'q> Deref for QueryAnalyzed<'q> { + type Target = QueryParsed<'q>; + + fn deref(&self) -> &Self::Target { + &self.query_parsed + } +} + +impl<'q> DerefMut for QueryAnalyzed<'q> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.query_parsed + } +} From 03e8516850f53dbfd70df44da221a7c5fdf543b1 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Wed, 17 Dec 2025 07:55:53 -0300 Subject: [PATCH 05/11] Remove feature flags for plotnik-langs The `plotnik-langs` feature flags were not necessary as the functionality they guarded was always intended to be present. Removing these flags simplifies the codebase and ensures consistent behavior across all builds. --- crates/plotnik-lib/src/query/mod.rs | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 5bffca16..796d3245 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -12,7 +12,6 @@ mod graph_qis; mod invariants; mod printer; mod utils; -#[cfg(feature = "plotnik-langs")] use plotnik_core::NodeFieldId; use plotnik_core::NodeTypeId; pub use printer::QueryPrinter; @@ -26,7 +25,6 @@ mod graph_dump; mod graph_optimize; pub mod infer; mod infer_dump; -#[cfg(feature = "plotnik-langs")] pub mod link; #[allow(clippy::module_inception)] pub mod query; @@ -105,9 +103,7 @@ pub struct Query<'q> { ast: Root, symbol_table: SymbolTable<'q>, expr_arity_table: HashMap, - #[cfg(feature = "plotnik-langs")] node_type_ids: HashMap<&'q str, Option>, - #[cfg(feature = "plotnik-langs")] node_field_ids: HashMap<&'q str, Option>, exec_fuel: u32, recursion_fuel: u32, @@ -117,7 +113,6 @@ pub struct Query<'q> { resolve_diagnostics: Diagnostics, recursion_diagnostics: Diagnostics, expr_arity_diagnostics: Diagnostics, - #[cfg(feature = "plotnik-langs")] link_diagnostics: Diagnostics, dependency_analysis: DependencyAnalysis<'q>, // Graph compilation fields @@ -154,9 +149,7 @@ impl<'a> Query<'a> { ast: empty_root(), symbol_table: SymbolTable::default(), expr_arity_table: HashMap::new(), - #[cfg(feature = "plotnik-langs")] node_type_ids: HashMap::new(), - #[cfg(feature = "plotnik-langs")] node_field_ids: HashMap::new(), exec_fuel: DEFAULT_EXEC_FUEL, recursion_fuel: DEFAULT_RECURSION_FUEL, @@ -167,7 +160,6 @@ impl<'a> Query<'a> { recursion_diagnostics: Diagnostics::new(), expr_arity_diagnostics: Diagnostics::new(), dependency_analysis: DependencyAnalysis::default(), - #[cfg(feature = "plotnik-langs")] link_diagnostics: Diagnostics::new(), graph: BuildGraph::default(), dead_nodes: HashSet::new(), @@ -318,7 +310,6 @@ impl<'a> Query<'a> { all.extend(self.resolve_diagnostics.clone()); all.extend(self.recursion_diagnostics.clone()); all.extend(self.expr_arity_diagnostics.clone()); - #[cfg(feature = "plotnik-langs")] all.extend(self.link_diagnostics.clone()); all.extend(self.type_info.diagnostics.clone()); all @@ -333,7 +324,6 @@ impl<'a> Query<'a> { } /// Query is valid if there are no error-severity diagnostics (warnings are allowed). - #[cfg(feature = "plotnik-langs")] pub fn is_valid(&self) -> bool { !self.parse_diagnostics.has_errors() && !self.alt_kind_diagnostics.has_errors() @@ -343,16 +333,6 @@ impl<'a> Query<'a> { && !self.link_diagnostics.has_errors() } - /// Query is valid if there are no error-severity diagnostics (warnings are allowed). - #[cfg(not(feature = "plotnik-langs"))] - pub fn is_valid(&self) -> bool { - !self.parse_diagnostics.has_errors() - && !self.alt_kind_diagnostics.has_errors() - && !self.resolve_diagnostics.has_errors() - && !self.recursion_diagnostics.has_errors() - && !self.shapes_diagnostics.has_errors() - } - /// Check if graph compilation produced type errors. pub fn has_type_errors(&self) -> bool { self.type_info.has_errors() From b54c4b0aedd05cf2a717ee65be8c35c47b011141 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Wed, 17 Dec 2025 10:02:36 -0300 Subject: [PATCH 06/11] Update query.rs --- crates/plotnik-lib/src/query/query.rs | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/crates/plotnik-lib/src/query/query.rs b/crates/plotnik-lib/src/query/query.rs index 9225287a..d6bc70f8 100644 --- a/crates/plotnik-lib/src/query/query.rs +++ b/crates/plotnik-lib/src/query/query.rs @@ -119,18 +119,25 @@ pub struct QueryAnalyzed<'q> { } impl<'q> QueryAnalyzed<'q> { - pub fn link(mut self, lang: &Lang) { - let mut node_type_ids: HashMap<&'q str, Option> = HashMap::new(); - let mut node_field_ids: HashMap<&'q str, Option> = HashMap::new(); + pub fn link(mut self, lang: &Lang) -> LinkedQuery<'q> { + let mut type_ids: HashMap<&'q str, Option> = HashMap::new(); + let mut field_ids: HashMap<&'q str, Option> = HashMap::new(); + link::link( &self.query_parsed.ast, self.query_parsed.src, lang, &self.symbol_table, - &mut node_type_ids, - &mut node_field_ids, + &mut type_ids, + &mut field_ids, &mut self.query_parsed.diag, ); + + LinkedQuery { + inner: self, + type_ids, + field_ids, + } } } @@ -147,3 +154,12 @@ impl<'q> DerefMut for QueryAnalyzed<'q> { &mut self.query_parsed } } + +type NodeTypeIdTable<'q> = HashMap<&'q str, Option>; +type NodeFieldIdTable<'q> = HashMap<&'q str, Option>; + +pub struct LinkedQuery<'q> { + inner: QueryAnalyzed<'q>, + type_ids: NodeTypeIdTable<'q>, + field_ids: NodeFieldIdTable<'q>, +} From 941ab7edf135ea93f3323bd448ca2669dde3bfea Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Wed, 17 Dec 2025 11:07:29 -0300 Subject: [PATCH 07/11] QIS --- crates/plotnik-lib/src/query/graph_build.rs | 15 +- crates/plotnik-lib/src/query/graph_qis.rs | 260 +++++++++--------- .../plotnik-lib/src/query/graph_qis_tests.rs | 1 + crates/plotnik-lib/src/query/infer.rs | 2 +- crates/plotnik-lib/src/query/mod.rs | 30 +- crates/plotnik-lib/src/query/query.rs | 5 + 6 files changed, 163 insertions(+), 150 deletions(-) diff --git a/crates/plotnik-lib/src/query/graph_build.rs b/crates/plotnik-lib/src/query/graph_build.rs index 3f675114..fbff3ec6 100644 --- a/crates/plotnik-lib/src/query/graph_build.rs +++ b/crates/plotnik-lib/src/query/graph_build.rs @@ -10,6 +10,7 @@ use crate::parser::{ AltExpr, AltKind, AnonymousNode, Branch, CapturedExpr, Expr, FieldExpr, NamedNode, NegatedField, QuantifiedExpr, Ref, SeqExpr, SeqItem, SyntaxKind, token_src, }; +use crate::query::graph_qis::collect_propagating_captures; use super::Query; use super::graph::{BuildEffect, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; @@ -101,7 +102,7 @@ impl<'a> Query<'a> { let fragment = self.construct_expr(&body, NavContext::Root); // Multi-capture definitions need struct wrapping at root - let entry = if self.multi_capture_defs.contains(name) { + let entry = if self.qis_ctx.multi_capture_defs.contains(name) { let start_id = self.graph.add_epsilon(); self.graph .node_mut(start_id) @@ -522,11 +523,17 @@ impl<'a> Query<'a> { // Single-capture definitions unwrap: no Field effect, type is capture's type directly. // Only the specific propagating capture should unwrap, not nested captures. - let is_single_capture = self.is_single_capture(self.current_def_name, name); + let is_single_capture = self + .qis_ctx + .single_capture_defs + .get(self.current_def_name) + .map(|c| *c == name) + .unwrap_or(false); if is_single_capture && needs_object_wrapper { // Captured container at single-capture definition root - let inner_captures = self.collect_propagating_captures(&inner_expr); + // let inner_captures = self.collect_propagating_captures(&inner_expr); + let inner_captures = collect_propagating_captures(&inner_expr, self.source); if inner_captures.is_empty() { // No inner captures → Void (per ADR-0009 Payload Rule). // Return epsilon for matching only, discard inner effects. @@ -604,7 +611,7 @@ impl<'a> Query<'a> { // Build inner with Stay nav; the repetition combinator handles initial/re-entry nav let f = self.construct_expr(&inner_expr, NavContext::Root); let nav = ctx.to_nav(); - let qis = self.qis_triggers.contains_key(quant); + let qis = self.qis_ctx.qis_triggers.contains_key(quant); match (op.kind(), qis) { (SyntaxKind::Star, false) => self.graph.zero_or_more_array(f, nav), diff --git a/crates/plotnik-lib/src/query/graph_qis.rs b/crates/plotnik-lib/src/query/graph_qis.rs index 84fea8b9..ae750fed 100644 --- a/crates/plotnik-lib/src/query/graph_qis.rs +++ b/crates/plotnik-lib/src/query/graph_qis.rs @@ -5,153 +5,159 @@ //! //! See ADR-0009 for full specification. +use std::collections::{HashMap, HashSet}; + use crate::parser::{ast, token_src}; +use crate::query::symbol_table::SymbolTable; +use crate::query::visitor::Visitor; -use super::{QisTrigger, Query}; - -impl<'a> Query<'a> { - /// Detect capture scopes: QIS triggers and single-capture definitions. - /// - /// - QIS triggers when quantified expression has ≥2 propagating captures - /// - Single-capture definitions unwrap (no Field effect, type is capture's type) - pub(super) fn detect_capture_scopes(&mut self) { - let entries: Vec<_> = self - .symbol_table - .iter() - .map(|(n, b)| (*n, b.clone())) - .collect(); - for (name, body) in &entries { - // Detect single-capture and multi-capture definitions - let captures = self.collect_propagating_captures(body); - if captures.len() == 1 { - self.single_capture_defs.insert(*name, captures[0]); - } else if captures.len() >= 2 { - self.multi_capture_defs.insert(*name); - } - // Detect QIS within this definition - self.detect_qis_in_expr(body); - } - } +#[derive(Debug, Clone)] +pub struct QisTrigger<'a> { + #[allow(unused)] + pub captures: Vec<&'a str>, +} - fn detect_qis_in_expr(&mut self, expr: &ast::Expr) { - match expr { - ast::Expr::QuantifiedExpr(q) => { - if let Some(inner) = q.inner() { - let captures = self.collect_propagating_captures(&inner); - if captures.len() >= 2 { - self.qis_triggers.insert(q.clone(), QisTrigger { captures }); - } - self.detect_qis_in_expr(&inner); - } - } - ast::Expr::CapturedExpr(c) => { - // Captures on sequences/alternations absorb inner captures, - // but we still recurse to find nested quantifiers. - // Special case: captured quantifier with ≥1 nested capture needs QIS - // to wrap each iteration with StartObject/EndObject for proper field scoping. - if let Some(inner) = c.inner() { - // Check if this capture wraps a quantifier with nested captures - if let ast::Expr::QuantifiedExpr(q) = &inner - && let Some(quant_inner) = q.inner() - { - let captures = self.collect_propagating_captures(&quant_inner); - // Trigger QIS if there's at least 1 capture (not already covered by ≥2 rule) - if !captures.is_empty() && !self.qis_triggers.contains_key(q) { - self.qis_triggers.insert(q.clone(), QisTrigger { captures }); - } - } - self.detect_qis_in_expr(&inner); - } - } - _ => { - for child in expr.children() { - self.detect_qis_in_expr(&child); - } - } +pub type QisTriggerTable<'q> = HashMap>; + +#[derive(Debug, Default)] +pub struct QisContext<'q> { + pub qis_triggers: QisTriggerTable<'q>, + /// Definitions with exactly 1 propagating capture: def name → capture name. + pub single_capture_defs: HashMap<&'q str, &'q str>, + /// Definitions with 2+ propagating captures (need struct wrapping at root). + pub multi_capture_defs: HashSet<&'q str>, +} + +/// Detect capture scopes: QIS triggers and single-capture definitions. +/// +/// - QIS triggers when quantified expression has ≥2 propagating captures +/// - Single-capture definitions unwrap (no Field effect, type is capture's type) +pub fn detect_capture_scopes<'q>( + source: &'q str, + symbol_table: &SymbolTable<'q>, +) -> QisContext<'q> { + let mut ctx: QisContext<'q> = QisContext::default(); + + let mut visitor = QisVisitor { + source, + qis_triggers: &mut ctx.qis_triggers, + }; + + // Collect entries to decouple from self for the iteration + let entries: Vec<_> = symbol_table.iter().map(|(n, b)| (*n, b.clone())).collect(); + + for (name, body) in entries { + // 1. Detect single/multi capture definitions + let captures = collect_propagating_captures(&body, source); + + if captures.len() == 1 { + ctx.single_capture_defs.insert(name, captures[0]); + } else if captures.len() >= 2 { + ctx.multi_capture_defs.insert(name); } - } - /// Collect captures that propagate out of an expression (not absorbed by inner scopes). - pub(super) fn collect_propagating_captures(&self, expr: &ast::Expr) -> Vec<&'a str> { - let mut captures = Vec::new(); - self.collect_propagating_captures_impl(expr, &mut captures); - captures + // 2. Detect QIS within this definition + visitor.visit_expr(&body); } - fn collect_propagating_captures_impl(&self, expr: &ast::Expr, out: &mut Vec<&'a str>) { - match expr { - ast::Expr::CapturedExpr(c) => { - if let Some(name_token) = c.name() { - let name = token_src(&name_token, self.source); - out.push(name); - } - // Captured sequence/alternation absorbs inner captures. - // Captured quantifiers with nested captures also absorb (they become QIS). - if let Some(inner) = c.inner() - && !self.is_scope_container(&inner) - { - self.collect_propagating_captures_impl(&inner, out); - } - } - ast::Expr::QuantifiedExpr(q) => { - // Nested quantifier: its captures propagate (with modified cardinality) - if let Some(inner) = q.inner() { - self.collect_propagating_captures_impl(&inner, out); - } - } - _ => { - for child in expr.children() { - self.collect_propagating_captures_impl(&child, out); - } + ctx +} + +struct QisVisitor<'a, 'map> { + source: &'a str, + qis_triggers: &'map mut HashMap>, +} + +impl<'a, 'map> Visitor for QisVisitor<'a, 'map> { + fn visit_quantified_expr(&mut self, q: &ast::QuantifiedExpr) { + if let Some(inner) = q.inner() { + let captures = collect_propagating_captures(&inner, self.source); + if captures.len() >= 2 { + self.qis_triggers.insert(q.clone(), QisTrigger { captures }); } + // Recurse + self.visit_expr(&inner); } } - /// Check if an expression is a scope container that absorbs inner captures. - /// - Sequences and alternations always absorb - /// - Quantifiers absorb if they have nested captures (will become QIS) - fn is_scope_container(&self, expr: &ast::Expr) -> bool { - match expr { - ast::Expr::SeqExpr(_) | ast::Expr::AltExpr(_) => true, - ast::Expr::QuantifiedExpr(q) => { - if let Some(inner) = q.inner() { - // Quantifier with nested captures acts as scope container - // (will be treated as QIS, wrapping each element in an object) - let nested_captures = self.collect_propagating_captures(&inner); - if !nested_captures.is_empty() { - return true; - } - // Otherwise check if inner is a scope container - self.is_scope_container(&inner) - } else { - false + fn visit_captured_expr(&mut self, c: &ast::CapturedExpr) { + // Captures on sequences/alternations absorb inner captures, + // but we still recurse to find nested quantifiers. + if let Some(inner) = c.inner() { + // Special case: captured quantifier with ≥1 nested capture needs QIS + // to wrap each iteration with StartObject/EndObject for proper field scoping. + if let ast::Expr::QuantifiedExpr(q) = &inner + && let Some(quant_inner) = q.inner() + { + let captures = collect_propagating_captures(&quant_inner, self.source); + // Trigger QIS if there's at least 1 capture (not already covered by ≥2 rule) + if !captures.is_empty() && !self.qis_triggers.contains_key(q) { + self.qis_triggers.insert(q.clone(), QisTrigger { captures }); } } - _ => false, + self.visit_expr(&inner); } } +} - /// Check if a quantified expression triggers QIS. - pub fn is_qis_trigger(&self, q: &ast::QuantifiedExpr) -> bool { - self.qis_triggers.contains_key(q) - } +pub fn collect_propagating_captures<'a>(expr: &ast::Expr, source: &'a str) -> Vec<&'a str> { + let mut collector = CaptureCollector { + source, + captures: Vec::new(), + }; + collector.visit_expr(expr); + collector.captures +} - /// Get QIS trigger info for a quantified expression. - pub fn qis_trigger(&self, q: &ast::QuantifiedExpr) -> Option<&QisTrigger<'a>> { - self.qis_triggers.get(q) +struct CaptureCollector<'a> { + source: &'a str, + captures: Vec<&'a str>, +} + +impl<'a> Visitor for CaptureCollector<'a> { + fn visit_captured_expr(&mut self, c: &ast::CapturedExpr) { + if let Some(name_token) = c.name() { + let name = token_src(&name_token, self.source); + self.captures.push(name); + } + + // Captured sequence/alternation absorbs inner captures. + // Captured quantifiers with nested captures also absorb (they become QIS). + if let Some(inner) = c.inner() + && !is_scope_container(&inner, self.source) + { + self.visit_expr(&inner); + } } - /// Check if this capture is the single propagating capture for its definition. - /// Only that specific capture should unwrap (skip Field effect). - pub fn is_single_capture(&self, def_name: &str, capture_name: &str) -> bool { - self.single_capture_defs - .get(def_name) - .map(|c| *c == capture_name) - .unwrap_or(false) + fn visit_quantified_expr(&mut self, q: &ast::QuantifiedExpr) { + // Nested quantifier: its captures propagate (with modified cardinality) + if let Some(inner) = q.inner() { + self.visit_expr(&inner); + } } +} - /// Check if definition has 2+ propagating captures (needs struct wrapping). - pub fn is_multi_capture_def(&self, name: &str) -> bool { - self.multi_capture_defs.contains(name) +/// Check if an expression is a scope container that absorbs inner captures. +/// - Sequences and alternations always absorb +/// - Quantifiers absorb if they have nested captures (will become QIS) +fn is_scope_container(expr: &ast::Expr, source: &str) -> bool { + match expr { + ast::Expr::SeqExpr(_) | ast::Expr::AltExpr(_) => true, + ast::Expr::QuantifiedExpr(q) => { + if let Some(inner) = q.inner() { + // Quantifier with nested captures acts as scope container + // (will be treated as QIS, wrapping each element in an object) + let nested_captures = collect_propagating_captures(&inner, source); + if !nested_captures.is_empty() { + return true; + } + // Otherwise check if inner is a scope container + is_scope_container(&inner, source) + } else { + false + } + } + _ => false, } } diff --git a/crates/plotnik-lib/src/query/graph_qis_tests.rs b/crates/plotnik-lib/src/query/graph_qis_tests.rs index d2b7ce8c..49388bf8 100644 --- a/crates/plotnik-lib/src/query/graph_qis_tests.rs +++ b/crates/plotnik-lib/src/query/graph_qis_tests.rs @@ -9,6 +9,7 @@ fn check_qis(source: &str) -> String { for def in query.root().defs() { let def_name = def.name().map(|t| t.text().to_string()).unwrap_or_default(); let mut triggers: Vec<_> = query + .qis_ctx .qis_triggers .iter() .filter_map(|(q, trigger)| { diff --git a/crates/plotnik-lib/src/query/infer.rs b/crates/plotnik-lib/src/query/infer.rs index d802e4ae..25c71b4c 100644 --- a/crates/plotnik-lib/src/query/infer.rs +++ b/crates/plotnik-lib/src/query/infer.rs @@ -784,7 +784,7 @@ impl<'a> Query<'a> { /// Run type inference on the query AST. pub(super) fn infer_types(&mut self) { // Collect QIS triggers upfront to avoid borrowing issues - let qis_triggers: HashSet<_> = self.qis_triggers.keys().cloned().collect(); + let qis_triggers: HashSet<_> = self.qis_ctx.qis_triggers.keys().cloned().collect(); let mut ctx = InferenceContext::new(self.source, qis_triggers); diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 796d3245..17c1660a 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -71,6 +71,8 @@ use crate::parser::cst::SyntaxKind; use crate::parser::lexer::lex; use crate::parser::{ParseResult, Parser, Root, SyntaxNode, ast}; use crate::query::dependencies::DependencyAnalysis; +use crate::query::graph_qis::QisContext; +use crate::query::graph_qis::detect_capture_scopes; const DEFAULT_EXEC_FUEL: u32 = 1_000_000; const DEFAULT_RECURSION_FUEL: u32 = 4096; @@ -91,12 +93,6 @@ use symbol_table::SymbolTable; /// /// When a quantified expression has ≥2 propagating captures, QIS creates /// an implicit object scope so captures stay coupled per-iteration. -#[derive(Debug, Clone)] -pub struct QisTrigger<'a> { - /// Capture names that propagate from this quantified expression. - pub captures: Vec<&'a str>, -} - #[derive(Debug)] pub struct Query<'q> { source: &'q str, @@ -120,11 +116,7 @@ pub struct Query<'q> { dead_nodes: HashSet, type_info: TypeInferenceResult<'q>, /// QIS triggers: quantified expressions with ≥2 propagating captures. - qis_triggers: HashMap>, - /// Definitions with exactly 1 propagating capture: def name → capture name. - single_capture_defs: HashMap<&'q str, &'q str>, - /// Definitions with 2+ propagating captures (need struct wrapping at root). - multi_capture_defs: HashSet<&'q str>, + qis_ctx: QisContext<'q>, /// Current definition name during graph construction. current_def_name: &'q str, /// Counter for generating unique ref IDs during graph construction. @@ -164,9 +156,7 @@ impl<'a> Query<'a> { graph: BuildGraph::default(), dead_nodes: HashSet::new(), type_info: TypeInferenceResult::default(), - qis_triggers: HashMap::new(), - single_capture_defs: HashMap::new(), - multi_capture_defs: HashSet::new(), + qis_ctx: QisContext::default(), current_def_name: "", next_ref_id: 0, } @@ -198,7 +188,6 @@ impl<'a> Query<'a> { self.try_parse()?; self.validate_alt_kinds(); self.resolve_names(); - // self.validate_recursion(); self.dependency_analysis = dependencies::analyze_dependencies(&self.symbol_table); dependencies::validate_recursion( @@ -209,6 +198,11 @@ impl<'a> Query<'a> { ); self.infer_arities(); + + self.qis_ctx = detect_capture_scopes(self.source, &self.symbol_table); + + self.infer_types(); + Ok(self) } @@ -222,9 +216,7 @@ impl<'a> Query<'a> { if !self.is_valid() { return self; } - self.detect_capture_scopes(); self.construct_graph(); - self.infer_types(); // Run before optimization to avoid merged effects self.optimize_graph(); self } @@ -236,7 +228,9 @@ impl<'a> Query<'a> { if !self.is_valid() { return (self, String::new()); } - self.detect_capture_scopes(); + + self.qis_ctx = detect_capture_scopes(self.source, &self.symbol_table); + self.construct_graph(); if let Some(root) = root_kind { self.graph.wrap_definitions_with_root(root); diff --git a/crates/plotnik-lib/src/query/query.rs b/crates/plotnik-lib/src/query/query.rs index d6bc70f8..fe452f89 100644 --- a/crates/plotnik-lib/src/query/query.rs +++ b/crates/plotnik-lib/src/query/query.rs @@ -9,6 +9,7 @@ use crate::parser::{ParseResult, Parser, lexer::lex}; use crate::query::alt_kinds::validate_alt_kinds; use crate::query::dependencies::{self, DependencyAnalysis}; use crate::query::expr_arity::{ExprArityTable, infer_arities}; +use crate::query::graph_qis::{QisContext, detect_capture_scopes}; use crate::query::link; use crate::query::symbol_table::{SymbolTable, resolve_names}; use crate::{Diagnostics, parser::Root}; @@ -102,11 +103,14 @@ impl<'q> QueryParsed<'q> { return Err(crate::Error::QueryAnalyzeError(self.diag)); } + let qis_ctx = detect_capture_scopes(self.src, &symbol_table); + Ok(QueryAnalyzed { query_parsed: self, symbol_table, dependency_analysis, arity_table, + qis_ctx, }) } } @@ -116,6 +120,7 @@ pub struct QueryAnalyzed<'q> { symbol_table: SymbolTable<'q>, dependency_analysis: DependencyAnalysis<'q>, arity_table: ExprArityTable, + qis_ctx: QisContext<'q>, } impl<'q> QueryAnalyzed<'q> { From 8f214fe57a1777e70ca4477d13ca34540b31eaab Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Wed, 17 Dec 2025 13:10:00 -0300 Subject: [PATCH 08/11] Graph builder --- crates/plotnik-lib/src/query/graph_build.rs | 52 +++++++++++++++++---- crates/plotnik-lib/src/query/mod.rs | 6 --- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/crates/plotnik-lib/src/query/graph_build.rs b/crates/plotnik-lib/src/query/graph_build.rs index fbff3ec6..943eb5d4 100644 --- a/crates/plotnik-lib/src/query/graph_build.rs +++ b/crates/plotnik-lib/src/query/graph_build.rs @@ -10,10 +10,10 @@ use crate::parser::{ AltExpr, AltKind, AnonymousNode, Branch, CapturedExpr, Expr, FieldExpr, NamedNode, NegatedField, QuantifiedExpr, Ref, SeqExpr, SeqItem, SyntaxKind, token_src, }; -use crate::query::graph_qis::collect_propagating_captures; +use crate::query::graph_qis::{QisContext, collect_propagating_captures}; -use super::Query; -use super::graph::{BuildEffect, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; +use super::graph::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; +use super::{Query, SymbolTable}; /// Context for navigation determination. /// When `anchored` is true, `prev_anonymous` indicates whether the preceding @@ -90,8 +90,43 @@ impl<'a> Query<'a> { /// This method reuses the symbol_table from name resolution and /// qis_triggers from QIS detection. pub(super) fn construct_graph(&mut self) { - self.next_ref_id = 0; + let mut builder = GraphBuilder::new( + self.source, + &mut self.graph, + &self.symbol_table, + &self.qis_ctx, + ); + builder.construct(); + } +} + +struct GraphBuilder<'a, 'q> { + source: &'q str, + graph: &'a mut BuildGraph<'q>, + symbol_table: &'a SymbolTable<'q>, + qis_ctx: &'a QisContext<'q>, + current_def_name: &'q str, + next_ref_id: u32, +} +impl<'a, 'q> GraphBuilder<'a, 'q> { + fn new( + source: &'q str, + graph: &'a mut BuildGraph<'q>, + symbol_table: &'a SymbolTable<'q>, + qis_ctx: &'a QisContext<'q>, + ) -> Self { + Self { + source, + graph, + symbol_table, + qis_ctx, + current_def_name: "", + next_ref_id: 0, + } + } + + fn construct(&mut self) { let entries: Vec<_> = self .symbol_table .iter() @@ -130,7 +165,7 @@ impl<'a> Query<'a> { /// Link Enter nodes to their definition entry points. fn link_references(&mut self) { - let mut links: Vec<(NodeId, &'a str, Option)> = Vec::new(); + let mut links: Vec<(NodeId, &'q str, Option)> = Vec::new(); for (id, node) in self.graph.iter() { if let RefMarker::Enter { .. } = &node.ref_marker @@ -284,13 +319,13 @@ impl<'a> Query<'a> { (fragments, exit_ctx) } - fn build_named_matcher(&self, node: &NamedNode) -> BuildMatcher<'a> { + fn build_named_matcher(&self, node: &NamedNode) -> BuildMatcher<'q> { let kind = node .node_type() .map(|t| token_src(&t, self.source)) .unwrap_or("_"); - let negated_fields: Vec<&'a str> = node + let negated_fields: Vec<&'q str> = node .as_cst() .children() .filter_map(NegatedField::cast) @@ -637,7 +672,7 @@ impl<'a> Query<'a> { self.construct_expr(&value_expr, ctx) } - fn find_field_constraint(&self, node: &crate::parser::SyntaxNode) -> Option<&'a str> { + fn find_field_constraint(&self, node: &crate::parser::SyntaxNode) -> Option<&'q str> { let parent = node.parent()?; let field_expr = FieldExpr::cast(parent)?; let name_token = field_expr.name()?; @@ -686,6 +721,7 @@ impl<'a> Query<'a> { self.collect_matchers(succ, result, visited); } } + /// Count Field effects reachable from a node (for variant flattening). fn count_field_effects(&self, start: NodeId) -> usize { self.nodes_with_field_effects(start) diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 17c1660a..06704a5f 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -117,10 +117,6 @@ pub struct Query<'q> { type_info: TypeInferenceResult<'q>, /// QIS triggers: quantified expressions with ≥2 propagating captures. qis_ctx: QisContext<'q>, - /// Current definition name during graph construction. - current_def_name: &'q str, - /// Counter for generating unique ref IDs during graph construction. - next_ref_id: u32, } fn empty_root() -> Root { @@ -157,8 +153,6 @@ impl<'a> Query<'a> { dead_nodes: HashSet::new(), type_info: TypeInferenceResult::default(), qis_ctx: QisContext::default(), - current_def_name: "", - next_ref_id: 0, } } From f393eba7e23c7d318e1f8a90da6d6e9e52079edb Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Wed, 17 Dec 2025 13:26:31 -0300 Subject: [PATCH 09/11] Update graph_optimize.rs --- .../plotnik-lib/src/query/graph_optimize.rs | 89 ++++++++++++------- 1 file changed, 59 insertions(+), 30 deletions(-) diff --git a/crates/plotnik-lib/src/query/graph_optimize.rs b/crates/plotnik-lib/src/query/graph_optimize.rs index 87976fc4..7eb8f705 100644 --- a/crates/plotnik-lib/src/query/graph_optimize.rs +++ b/crates/plotnik-lib/src/query/graph_optimize.rs @@ -29,7 +29,7 @@ impl Query<'_> { /// /// Populates `dead_nodes` with eliminated node IDs. pub(super) fn optimize_graph(&mut self) { - let (dead, _stats) = eliminate_epsilons(&mut self.graph); + let (dead, _stats) = optimize_graph(&mut self.graph); self.dead_nodes = dead; } } @@ -37,7 +37,7 @@ impl Query<'_> { /// Run epsilon elimination on a BuildGraph. /// /// Returns the set of dead node IDs that should be skipped during emission. -pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeStats) { +pub fn optimize_graph(graph: &mut BuildGraph) -> (HashSet, OptimizeStats) { let mut stats = OptimizeStats::default(); let mut dead_nodes: HashSet = HashSet::new(); @@ -50,39 +50,44 @@ pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeS continue; } - let node = graph.node(id); - if !is_eliminable_epsilon(node, graph, &predecessors) { + // We need to clone specific fields or compute conditions before mutating to avoid borrow checker issues + // if we were to hold a reference to 'node'. + // Here we just inspect via indexing which is fine until we start mutating. + + // Check if eliminable + if !is_eliminable_epsilon(id, graph, &predecessors) { + let node = graph.node(id); if node.is_epsilon() { stats.epsilons_kept += 1; } continue; } - let successor_id = node.successors[0]; - let effects_to_prepend = graph.node(id).effects.clone(); - let nav_to_transfer = graph.node(id).nav; - let preds = predecessors.get(&id).cloned().unwrap_or_default(); + let node_effects = graph.node(id).effects.clone(); + let node_nav = graph.node(id).nav; + let successor_id = graph.node(id).successors[0]; - // Prepend effects to successor - if !effects_to_prepend.is_empty() { + // 1. Prepend effects to successor + if !node_effects.is_empty() { let succ = graph.node_mut(successor_id); - let mut new_effects = effects_to_prepend; + let mut new_effects = node_effects; new_effects.append(&mut succ.effects); succ.effects = new_effects; } - // Transfer or merge nav + // 2. Transfer or merge nav let successor_nav = graph.node(successor_id).nav; - if !nav_to_transfer.is_stay() { + if !node_nav.is_stay() { if successor_nav.is_stay() { - graph.node_mut(successor_id).nav = nav_to_transfer; - } else if can_merge_up(nav_to_transfer, successor_nav) { - let merged = Nav::up(nav_to_transfer.level + successor_nav.level); + graph.node_mut(successor_id).nav = node_nav; + } else if can_merge_up(node_nav, successor_nav) { + let merged = Nav::up(node_nav.level + successor_nav.level); graph.node_mut(successor_id).nav = merged; } } - // Redirect predecessors to successor + // 3. Redirect predecessors to successor + let preds = predecessors.get(&id).cloned().unwrap_or_default(); for pred_id in &preds { if dead_nodes.contains(pred_id) { continue; @@ -101,6 +106,7 @@ pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeS succ_preds.retain(|&p| p != id); } + // 4. Update definitions that pointed to the eliminated node redirect_definitions(graph, id, successor_id); dead_nodes.insert(id); @@ -111,10 +117,12 @@ pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeS } fn is_eliminable_epsilon( - node: &super::graph::BuildNode, + id: NodeId, graph: &BuildGraph, predecessors: &HashMap>, ) -> bool { + let node = graph.node(id); + if !matches!(node.matcher, BuildMatcher::Epsilon) { return false; } @@ -130,12 +138,13 @@ fn is_eliminable_epsilon( let successor_id = node.successors[0]; let successor = graph.node(successor_id); + // Nav merge check if !node.nav.is_stay() && !successor.nav.is_stay() && !can_merge_up(node.nav, successor.nav) { return false; } // Don't eliminate if node has nav and successor is a join point. - // Different paths may need different navigation (e.g., first iteration vs loop re-entry). + // Different paths may need different navigation. if !node.nav.is_stay() { let succ_pred_count = predecessors.get(&successor_id).map_or(0, |p| p.len()); if succ_pred_count > 1 { @@ -143,26 +152,46 @@ fn is_eliminable_epsilon( } } + // Don't eliminate if node has effects and successor is a join point. + if !node.effects.is_empty() { + let succ_pred_count = predecessors.get(&successor_id).map_or(0, |p| p.len()); + if succ_pred_count > 1 { + return false; + } + } + + // Don't eliminate if node has effects and successor has ref marker. + // Effects must execute BEFORE ref marker (enter/exit), but merging moves them to successor + // which effectively executes them "at" the successor. + // If successor is Enter/Exit, the effects might conceptually belong to the edge before it. + // Actually, effects on a node execute when traversing the edge TO that node. + // If we merge A (effects) -> B (Enter), the effects of A are now on B. + // So they execute when traversing TO B. This seems fine for Enter? + // Wait, original logic said: if !node.effects.is_empty() && successor.ref_marker.is_some() { return false; } // Don't eliminate if epsilon has effects and successor has navigation. - // Effects must execute BEFORE successor's nav/match, but prepending to effects list - // would execute them AFTER nav/match. + // Effects must execute BEFORE successor's nav. + // If we merge, effects are on successor. When traversing to successor, effects run, then successor's nav runs. + // This seems correct? + // Original logic: + // "Effects must execute BEFORE successor's nav/match, but prepending to effects list + // would execute them AFTER nav/match." -> This comment in original code seems to imply effects run after nav? + // In `graph.rs`, typical execution order is usually: Nav -> Match -> Effects (or similar). + // If Nav happens first, then effects on the node happen. + // If we merge A -> B. A has effects. B has Nav. + // New B has A.effects + B.effects. + // Execution: B.Nav -> B.Match -> A.effects -> B.effects. + // But originally: A.Nav (Stay) -> A.Match (Epsilon) -> A.effects -> B.Nav -> ... + // So A.effects happened BEFORE B.Nav. + // Now A.effects happen AFTER B.Nav. + // So if B.Nav is not Stay, we cannot merge if A has effects. if !node.effects.is_empty() && !successor.nav.is_stay() { return false; } - // Don't eliminate if node has effects and successor is a join point. - // Merging effects onto a join point changes execution count (e.g., loop entry vs per-iteration). - if !node.effects.is_empty() { - let succ_pred_count = predecessors.get(&successor_id).map_or(0, |p| p.len()); - if succ_pred_count > 1 { - return false; - } - } - true } From 957cfa3e561bd85ae61132e0ae008acd4b2c22e4 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Fri, 19 Dec 2025 09:05:03 -0300 Subject: [PATCH 10/11] updates --- AGENTS.md | 383 ++++++++++--------------- crates/plotnik-cli/src/cli.rs | 4 +- crates/plotnik-lib/src/ir/compiled.rs | 2 +- crates/plotnik-lib/src/ir/serialize.rs | 2 +- 4 files changed, 150 insertions(+), 241 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 8f76bc22..98e04219 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,148 +1,146 @@ # Ethos -- `AGENTS.md` (this file) is our constitution. You're welcome to propose useful amendments. -- We implement resilient parser, provides user-friendly error messages. -- We call error messages "diagnostics" to avoid confusion with other errors (see `diagnostics/` folder). -- We strive to achieve excellent stability by enforcing invariants in the code: - - `panic!`, `assert!` or `.expect()` for simple cases - - `invariants.rs` otherwise, to skip the coverage of unreachable code -- We maintain the architecture decision records (ADRs) - - AI agent is responsible for creating new ADR when such decision was made during agentic coding session +- `AGENTS.md` is our constitution. Propose useful amendments. +- Resilient parser with user-friendly error messages called "diagnostics" (see `diagnostics/`) +- Stability via invariants: `panic!`/`assert!`/`.expect()` for simple cases, `invariants.rs` otherwise +- AI agents create ADRs when architectural decisions are made -# Architecture Decision Records (ADRs) +# Documentation -- **Location**: `docs/adr/` -- **Naming**: `ADR-XXXX-short-title-in-kebab-case.md` (`XXXX` is a sequential number). -- **Index**: - - _(no ADRs yet)_ -- **Template**: +[docs/README.md](docs/README.md) | [Language Reference](docs/lang-reference.md) | [Type System](docs/type-system.md) | [Runtime Engine](docs/runtime-engine.md) | [Binary Format](docs/binary-format/01-overview.md) - ```markdown - # ADR-XXXX: Title of the Decision +# Query Syntax Quick Reference - - **Status**: Proposed | Accepted | Deprecated | Superseded by [ADR-YYYY](ADR-YYYY-...) - - **Date**: YYYY-MM-DD +## Core Constructs - ## Context +| Syntax | Meaning | +| ------------------- | ------------------------------ | +| `(node_kind)` | Named node | +| `"text"` / `'text'` | Anonymous node (literal token) | +| `(_)` | Any named node | +| `_` | Any node | +| `@name` | Capture (snake_case only) | +| `@x :: T` | Type annotation | +| `@x :: string` | Extract node text | +| `field: pattern` | Field constraint | +| `!field` | Negated field (assert absent) | +| `?` `*` `+` | Quantifiers (0-1, 0+, 1+) | +| `??` `*?` `+?` | Non-greedy variants | +| `.` | Anchor (adjacency) | +| `{...}` | Sequence (siblings in order) | +| `[...]` | Alternation (first match wins) | +| `Name = ...` | Named expression (internal) | +| `pub Name = ...` | Public entrypoint | +| `(Name)` | Use named expression | - Describe the issue, problem, or driving force. +## Data Model Rules - ## Decision +- Captures are flat by default: nesting in pattern ≠ nesting in output +- `{...} @x` or `[...] @x` creates a nested scope +- Quantifier on captured pattern → array: `(x)* @a` → `a: T[]` - Clearly state the decision that was made. +## Alternations - ## Consequences +Unlabeled (merge style): - - **Positive**: Benefits, alignment with goals. - - **Negative**: Drawbacks, trade-offs, future challenges. - - **Considered Alternatives**: Describe rejected options and why. - ``` +``` +[(identifier) @x (number) @y] → { x?: Node, y?: Node } +``` -## How to write ADRs +Labeled (tagged union): -ADRs must be succint and straight to the point. -They must contain examples with high information density and pedagogical value. -These are docs people usually don't want to read, but when they do, they find it quite fascinating. -Don't write imperative code, describe structure definitions, their purpose and how to use them properly (and how to NOT use). +``` +[A: (id) @x B: (num) @y] → { $tag: "A", $data: { x: ... } } | { $tag: "B", $data: { y: ... } } +``` -# Plotnik Query Language +## Common Patterns -Plotnik is a strongly-typed, whitespace-delimited pattern matching language for syntax trees (similar to Tree-sitter but stricter). +``` +; Match with field +(binary_expression left: (identifier) @left) -## Grammar Synopsis +; Sequence of siblings +{(comment) (function_declaration) @fn} -- **Root**: List of definitions (`Def = expr`). -- **Nodes**: `(kind child1 child2)` or `(kind)`. -- **Strings**: `"literal"`, `'literal'`. -- **Wildcards**: `_` (matches any node). -- **Sequences**: `{ expr1 expr2 }`. -- **Alternations**: `[ expr1 expr2 ]` (untagged) OR `[ Label: expr1 Label: expr2 ]` (tagged). -- **References**: `(DefName)` (Must be PascalCase, no children). +; Optional child +(function (decorator)? @dec) -## Modifiers & Constraints +; Recursion +Nested = (call function: [(id) @name (Nested) @inner]) +``` -| Feature | Syntax | Constraint | -| :------------- | :--------------- | :----------------------------------------------------- | -| **Field** | `name: expr` | `expr` must match exactly **one** node (no multi-seq). | -| **Negation** | `!name` | Asserts field `name` is absent. | -| **Capture** | `expr @name` | `snake_case`. Suffix. | -| **Type** | `expr ::Type` | `PascalCase` or `::string`. Suffix. | -| **Quantifier** | `*`, `+`, `?` | Greedy. Suffix. | -| **Non-Greedy** | `*?`, `+?`, `??` | Suffix. | -| **Anchor** | `.` | Immediate child anchor. | +## Anti-patterns -## CRITICAL RULES (Strict Enforcement) +``` +; WRONG: groups can't be field values +(x field: {...}) -1. **CASING MATTERS**: - - **Definitions/Refs**: `PascalCase` (e.g., `MethodDecl`, `(MethodDecl)`). - - **Node Kinds**: `snake_case` (e.g., `(identifier)`). - - **Fields/Captures**: `snake_case` (e.g., `name:`, `@val`). - - **Branch Labels**: `PascalCase` (e.g., `[ Ok: (true) Err: (false) ]`). -2. **NO MIXED ALTS**: Alternations must be ALL labeled or ALL unlabeled. -3. **REFS HAVE NO CHILDREN**: - - Does not work: `(MyDef child)` +; WRONG: dot capture syntax +@function.name ; use @function_name + +; WRONG: predicates (unsupported) +(id) @x (#eq? @x "foo") +``` -## Examples +## Type System Gotchas + +**Columnar output**: Quantifiers produce parallel arrays, not list of objects: + +``` +{(A) @a (B) @b}* → { a: Node[], b: Node[] } // NOT [{a,b}, {a,b}] +``` -```plotnik -// Definition -Function = (function_definition - name: (identifier) @name - parameters: (parameters { - (identifier)* - }) - body: (Block) -) +For list of objects, wrap in sequence: `({(A) @a (B) @b} @row)*` -// Reference usage -Block = (block { - [ - Stmt: (Statement) - Expr: (Expression) - ]* -}) +**Row integrity**: Can't mix `*`/`+` with `1`/`?` in same quantified scope: -// Alternation with labels -Boolean = [ - True: "true" - False: "false" -] ``` +{(A)* @a (B) @b}* ; ERROR: @a desync, @b sync +{(A)? @a (B) @b}* ; OK: both synchronized (? emits null) +``` + +**Recursion rules**: + +``` +Loop = (Loop) ; ERROR: no escape path +Expr = [Lit: (n) @n Rec: (Expr)] ; OK: Lit escapes + +A = (B) B = (A) ; ERROR: no input consumed +A = (foo (B)) B = (bar (A)) ; OK: descends each step +``` + +## ⚠️ Sequence Syntax (Tree-sitter vs Plotnik) + +Tree-sitter: `((a) (b))` — Plotnik: `{(a) (b)}`. The #1 syntax mistake. + +`((a) (b))` in Plotnik means "node `(a)` with child `(b)`", NOT a sequence. + +# Architecture Decision Records (ADRs) + +- **Location**: `docs/adr/` +- **Naming**: `ADR-XXXX-short-title-in-kebab-case.md` (`XXXX` is a sequential number). +- **Index**: + - _(no ADRs yet)_ +- **Template**: + +[ADR-0001](docs/adr/ADR-0001-query-parser.md) | [ADR-0002](docs/adr/ADR-0002-diagnostics-system.md) | [ADR-0004](docs/adr/ADR-0004-query-ir-binary-format.md) | [ADR-0005](docs/adr/ADR-0005-transition-graph-format.md) | [ADR-0006](docs/adr/ADR-0006-dynamic-query-execution.md) | [ADR-0007](docs/adr/ADR-0007-type-metadata-format.md) | [ADR-0008](docs/adr/ADR-0008-tree-navigation.md) | [ADR-0009](docs/adr/ADR-0009-type-system.md) | [ADR-0010](docs/adr/ADR-0010-type-system-v2.md) | [ADR-0012](docs/adr/ADR-0012-variable-length-ir.md) -# Plotnik Query Data Model and Type Inference - -1. **Flat Scoping (Golden Rule)** - - Query nesting doesn't create data nesting - - `(A (B (C @val)))` → `{ val: Node }`. Intermediate nodes are ignored. - - **New Scope** is created _only_ by capturing a container: `{...} @name` or `[...] @name`. - -2. **Field Generation** - - Only explicit `@capture` creates a field. - - `key: (pattern)` is a structural constraint, **NOT** an extraction. It has nothing to do with tree-sitter fields. - -3. **Cardinality** - - `(x) @k` → `k: T` (Required) - - `(x)? @k` → `k: T?` (Optional) - - `(x)* @k` → `k: T[]` (List) - - `(x)+ @k` → `k: [T, ...T[]]` (Non-empty List) - -4. **Types** - - `(some_node) @x` (default) → `Node` (AST reference). - - `{...} @x` → receives some synthetic name based on the type of parent scope and capture name - - `Query = { (foo) @foo (bar) @bar (baz) @baz } @qux`: - - `@foo`, `@bar`, `@baz`: `Node` for - - `@qux`: `struct QueryQux { foo: Node, bar: Node, baz: Node }` - - entry point: `struct Query { qux : QueryQux }` - - `@x :: string` → `string` (extracts source text). - - `@x :: Type` → `Type` (assigns nominal type to the structure). - -5. **Alternations** - - Tagged: `[ L1: (a) @x L2: (b) @y ]` - → Discriminated Union: `{ "$tag": "L1", "$data": { x: Node } } | { "$tag": "L2", "$data": { y: Node } }`. - - Untagged: `[ (a) @x (b) @x ]` - → Merged Struct: `{ x: Node }`. Captures must be type-compatible across branches. - - Mixed: `[ (a) @x (b) ]` (invalid) - the diagnostics will be reported, but we infer as for untagged - → Merged Struct: `{ x: Node }`. Captures must be type-compatible across branches. +## Template + +```markdown +# ADR-XXXX: Title + +- **Status**: Proposed | Accepted | Deprecated | Superseded by [ADR-YYYY](ADR-YYYY-...) +- **Date**: YYYY-MM-DD + +## Context + +## Decision + +## Consequences + +- **Positive** | **Negative** | **Alternatives Considered** +``` # Project Structure @@ -163,108 +161,53 @@ docs/ lang-reference.md # Language specification ``` -# CLI +# CLI Reference Run: `cargo run -p plotnik-cli -- ` -- `debug` — Inspect queries and source file ASTs - - Example: `cargo run -p plotnik-cli -- debug -q '(foo) @bar'` -- `exec` — Execute query against source, output JSON - - Example: `cargo run -p plotnik-cli -- exec -q '(identifier) @id' -s app.js` -- `types` — Generate TypeScript type definitions from query - - Example: `cargo run -p plotnik-cli -- types -q '(identifier) @id' -l javascript` -- `langs` — List supported languages - -Inputs: `-q/--query `, `--query-file `, `--source `, `-s/--source-file `, `-l/--lang ` +| Command | Purpose | +| ------- | ------------------------------- | +| `debug` | Inspect queries and source ASTs | +| `exec` | Execute query, output JSON | +| `types` | Generate TypeScript types | +| `langs` | List supported languages | -### `debug` output flags +Common: `-q/--query `, `--query-file `, `--source `, `-s/--source-file `, `-l/--lang ` -- `--only-symbols` — Show only symbol table (requires query) -- `--cst` — Show query CST instead of AST -- `--raw` — Include trivia tokens (whitespace, comments) -- `--spans` — Show source spans -- `--arities` — Show node arities -- `--graph` — Show compiled transition graph -- `--graph-raw` — Show unoptimized graph (before epsilon elimination) -- `--types` — Show inferred types +`debug`: `--only-symbols`, `--cst`, `--raw`, `--spans`, `--arities`, `--graph`, `--graph-raw`, `--types` +`exec`: `--pretty`, `--verbose-nodes`, `--check`, `--entry ` +`types`: `--format `, `--root-type `, `--verbose-nodes`, `--no-node-type`, `--no-export`, `-o ` ```sh -cargo run -p plotnik-cli -- debug -q '(identifier) @id' -cargo run -p plotnik-cli -- debug -q '(identifier) @id' --only-symbols cargo run -p plotnik-cli -- debug -q '(identifier) @id' --graph -l javascript -cargo run -p plotnik-cli -- debug -q '(identifier) @id' --types -l javascript -cargo run -p plotnik-cli -- debug -s app.ts -cargo run -p plotnik-cli -- debug -s app.ts --raw -cargo run -p plotnik-cli -- debug -q '(function_declaration) @fn' -s app.ts -l typescript -``` - -### `exec` output flags - -- `--pretty` — Pretty-print JSON output -- `--verbose-nodes` — Include line/column positions in nodes -- `--check` — Validate output against inferred types -- `--entry ` — Entry point name (definition to match from) - -```sh -cargo run -p plotnik-cli -- exec -q '(program (expression_statement (identifier) @name))' --source 'x' -l javascript cargo run -p plotnik-cli -- exec -q '(identifier) @id' -s app.js --pretty -cargo run -p plotnik-cli -- exec -q '(function_declaration) @fn' -s app.ts -l typescript --verbose-nodes -cargo run -p plotnik-cli -- exec -q '(identifier) @id' -s app.js --check -cargo run -p plotnik-cli -- exec -q '(identifier) @id' -s app.js --verbose-nodes --pretty -cargo run -p plotnik-cli -- exec -q 'A = (identifier) @id B = (string) @str' -s app.js --entry B -``` - -### `types` output flags - -- `--format ` — Output format: `typescript` or `ts` (default: typescript) -- `--root-type ` — Name for root type of anonymous expressions (default: Query) -- `--verbose-nodes` — Use verbose Node shape (matches `exec --verbose-nodes`) -- `--no-node-type` — Don't emit Node/Point type definitions -- `--no-export` — Don't add `export` keyword to types -- `-o/--output ` — Write output to file instead of stdout - -```sh -cargo run -p plotnik-cli -- types -q '(identifier) @id' -l javascript -cargo run -p plotnik-cli -- types -q 'Func = (function_declaration name: (identifier) @name body: (statement_block) @body)' -l js -cargo run -p plotnik-cli -- types -q '(identifier) @id' -l javascript --verbose-nodes -cargo run -p plotnik-cli -- types -q '(identifier) @id' -l javascript --no-node-type cargo run -p plotnik-cli -- types -q '(identifier) @id' -l javascript -o types.d.ts ``` -# Coding rules - -- Avoid nesting logic: prefer early exit in functions (return) and loops (continue/break) -- Write code comments for seniors, not for juniors +# Coding Rules -# Testing rules +- Early exit (`return`, `continue`, `break`) over deep nesting +- Comments for seniors, not juniors +- Rust 2024 `let` chains: `if let Some(x) = a && let Some(y) = b { ... }` -## File organization +# Testing Rules -- Code lives in `foo.rs`, tests live in `foo_tests.rs` -- Test module included via `#[cfg(test)] mod foo_tests;` in parent +Code: `foo.rs` → tests: `foo_tests.rs` (include via `#[cfg(test)] mod foo_tests;`) -## CLI commands - -- IMPORTANT: the `debug` is your first tool you should use to test your changes -- Run tests: `make test` -- We use snapshot testing (`insta`) heavily - - Accept snapshots: `make shot` - -## Test structure +```sh +make test # Run tests +make shot # Accept insta snapshots +``` -- Separate AAA (Arrange-Act-Assert) parts by blank lines - - Exception: when the test is 3 or less lines total -- Desired structure: input is string, output is string (snapshot of something) -- Single-line input: plain string literal -- Multi-line input: `indoc!` macro -- IMPORTANT: never write snapshots manually — always use `@""` and then `cargo insta accept` +- AAA sections separated by blank lines (unless ≤3 lines) +- Single-line input: literal; Multi-line: `indoc!` +- Never write snapshots manually — use `@""` then `cargo insta accept` ```rust #[test] fn valid_query() { let input = indoc! {r#" - (function_declaration - name: (identifier) @name) + (function_declaration name: (identifier) @name) "#}; let query = Query::try_from(input).unwrap(); @@ -272,47 +215,13 @@ fn valid_query() { assert!(query.is_valid()); insta::assert_snapshot!(query.dump_ast(), @""); } - -#[test] -fn simple_case() { - let query = Query::try_from("(identifier)").unwrap(); - assert!(query.is_valid()); - insta::assert_snapshot!(query.dump_ast(), @""); -} - -#[test] -fn error_case() { - let query = Query::try_from("(unclosed").unwrap(); - assert!(!query.is_valid()); - insta::assert_snapshot!(query.dump_diagnostics(), @""); -} ``` -## Patterns by test type - -- Valid parsing: `assert!(query.is_valid())` + snapshot `dump_*()` output -- Error recovery: `assert!(!query.is_valid())` + snapshot `dump_diagnostics()` only -- Lexer tests: use helper functions `snapshot(input)` / `snapshot_raw(input)` - -## Coverage +| Test Type | Pattern | +| -------------- | ------------------------------------------------------------ | +| Valid parsing | `assert!(query.is_valid())` + snapshot `dump_*()` | +| Error recovery | `assert!(!query.is_valid())` + snapshot `dump_diagnostics()` | -Uses `cargo-llvm-cov` (already installed) - -Find uncovered lines per file: - -```sh -$ make coverage-lines | grep recursion -crates/plotnik-lib/src/query/recursion.rs: 78, 210, 214, ... -``` +Coverage: `make coverage-lines | grep recursion` -### `invariants.rs` - -- The goal of this file is to exclude coverage of the unreachable code branches -- It contains functions and `impl` blocks for invariant check functionality -- Each function panics on invariant violation -- The naming convention: `ensure_something(...)`, where something refers the return value -- It doesn't make sense to put the `panic!(...)`, `assert!()` or `.expect()` because they don't cause coverage problems: - - `panic!()` usually is called in catch-all `match` branches - - eventually we extract the whole `match` to the `invariants.rs`, for well-established code - - `assert!()` is coverage-friendly alternative for `if condition { panic!(...) }` - - `.expect()` is useful for unwrapping `Result`/`Option` values +`invariants.rs`: `ensure_*()` functions for unreachable code exclusion from coverage. diff --git a/crates/plotnik-cli/src/cli.rs b/crates/plotnik-cli/src/cli.rs index 771b0ca2..ebf2718a 100644 --- a/crates/plotnik-cli/src/cli.rs +++ b/crates/plotnik-cli/src/cli.rs @@ -61,7 +61,7 @@ pub enum Command { plotnik exec -q '(identifier) @id' -s app.js --pretty plotnik exec -q '(function_declaration) @fn' -s app.ts -l typescript --verbose-nodes plotnik exec -q '(identifier) @id' -s app.js --check - plotnik exec --query-file query.plnk -s app.js --entry FunctionDef"#)] + plotnik exec --query-file query.ptk -s app.js --entry FunctionDef"#)] Exec { #[command(flatten)] query: QueryArgs, @@ -80,7 +80,7 @@ pub enum Command { /// Generate type definitions from a query #[command(after_help = r#"EXAMPLES: plotnik types -q '(identifier) @id' -l javascript - plotnik types --query-file query.plnk -l typescript + plotnik types --query-file query.ptk -l typescript plotnik types -q '(function_declaration) @fn' -l js --format ts plotnik types -q '(identifier) @id' -l js --verbose-nodes plotnik types -q '(identifier) @id' -l js -o types.d.ts diff --git a/crates/plotnik-lib/src/ir/compiled.rs b/crates/plotnik-lib/src/ir/compiled.rs index 0d7b4ab7..75567698 100644 --- a/crates/plotnik-lib/src/ir/compiled.rs +++ b/crates/plotnik-lib/src/ir/compiled.rs @@ -17,7 +17,7 @@ use super::{ pub const BUFFER_ALIGN: usize = 64; /// Magic bytes identifying a compiled query file. -pub const MAGIC: [u8; 4] = *b"PLNK"; +pub const MAGIC: [u8; 4] = *b"PTKQ"; /// Current format version. pub const FORMAT_VERSION: u32 = 1; diff --git a/crates/plotnik-lib/src/ir/serialize.rs b/crates/plotnik-lib/src/ir/serialize.rs index 850b3682..a5b49f87 100644 --- a/crates/plotnik-lib/src/ir/serialize.rs +++ b/crates/plotnik-lib/src/ir/serialize.rs @@ -3,7 +3,7 @@ //! Binary format (see ADR-0004): //! ```text //! Header (64 bytes): -//! magic: [u8; 4] b"PLNK" +//! magic: [u8; 4] b"PTKQ" //! version: u32 format version //! checksum: u32 CRC32(header[12..64] || buffer_data) //! buffer_len: u32 From afbcabac057e4711093a4d72a07a9431a2512bfe Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Fri, 19 Dec 2025 11:55:42 -0300 Subject: [PATCH 11/11] Remove old crap --- crates/plotnik-cli/src/commands/debug/mod.rs | 44 +- crates/plotnik-cli/src/commands/exec.rs | 132 +- crates/plotnik-cli/src/commands/types.rs | 324 +---- .../plotnik-lib/src/engine/effect_stream.rs | 146 --- crates/plotnik-lib/src/engine/error.rs | 12 - crates/plotnik-lib/src/engine/interpreter.rs | 571 --------- .../src/engine/interpreter_tests.rs | 114 -- crates/plotnik-lib/src/engine/materializer.rs | 117 -- .../engine/materializer/materializer_tests.rs | 157 --- crates/plotnik-lib/src/engine/mod.rs | 13 - crates/plotnik-lib/src/engine/validate.rs | 324 ----- .../plotnik-lib/src/engine/validate_tests.rs | 111 -- crates/plotnik-lib/src/engine/value.rs | 145 --- crates/plotnik-lib/src/ir/compiled.rs | 742 ----------- crates/plotnik-lib/src/ir/effect.rs | 49 - crates/plotnik-lib/src/ir/effect_tests.rs | 22 - crates/plotnik-lib/src/ir/emit.rs | 969 --------------- crates/plotnik-lib/src/ir/entrypoint.rs | 56 - crates/plotnik-lib/src/ir/ids.rs | 31 - crates/plotnik-lib/src/ir/matcher.rs | 91 -- crates/plotnik-lib/src/ir/matcher_tests.rs | 27 - crates/plotnik-lib/src/ir/mod.rs | 86 -- crates/plotnik-lib/src/ir/nav.rs | 180 --- crates/plotnik-lib/src/ir/ref_transition.rs | 48 - .../src/ir/ref_transition_tests.rs | 26 - crates/plotnik-lib/src/ir/serialize.rs | 414 ------- crates/plotnik-lib/src/ir/slice.rs | 97 -- crates/plotnik-lib/src/ir/slice_tests.rs | 49 - crates/plotnik-lib/src/ir/string_ref.rs | 31 - crates/plotnik-lib/src/ir/string_ref_tests.rs | 14 - crates/plotnik-lib/src/ir/strings.rs | 140 --- crates/plotnik-lib/src/ir/transition.rs | 125 -- crates/plotnik-lib/src/ir/type_metadata.rs | 122 -- crates/plotnik-lib/src/lib.rs | 4 +- .../parser/tests/recovery/coverage_tests.rs | 19 +- crates/plotnik-lib/src/query/alt_kinds.rs | 10 - crates/plotnik-lib/src/query/dump.rs | 4 +- crates/plotnik-lib/src/query/expr_arity.rs | 15 - crates/plotnik-lib/src/query/graph.rs | 723 ----------- crates/plotnik-lib/src/query/graph_build.rs | 759 ------------ .../src/query/graph_build_tests.rs | 349 ------ crates/plotnik-lib/src/query/graph_dump.rs | 261 ---- .../src/query/graph_master_test.rs | 1093 ----------------- .../plotnik-lib/src/query/graph_optimize.rs | 224 ---- crates/plotnik-lib/src/query/graph_qis.rs | 163 --- .../plotnik-lib/src/query/graph_qis_tests.rs | 234 ---- crates/plotnik-lib/src/query/infer.rs | 811 ------------ crates/plotnik-lib/src/query/infer_dump.rs | 232 ---- crates/plotnik-lib/src/query/infer_tests.rs | 446 ------- crates/plotnik-lib/src/query/link.rs | 18 - crates/plotnik-lib/src/query/link_tests.rs | 235 ++-- crates/plotnik-lib/src/query/mod.rs | 316 +---- crates/plotnik-lib/src/query/query.rs | 78 +- crates/plotnik-lib/src/query/symbol_table.rs | 7 - 54 files changed, 250 insertions(+), 11280 deletions(-) delete mode 100644 crates/plotnik-lib/src/engine/effect_stream.rs delete mode 100644 crates/plotnik-lib/src/engine/error.rs delete mode 100644 crates/plotnik-lib/src/engine/interpreter.rs delete mode 100644 crates/plotnik-lib/src/engine/interpreter_tests.rs delete mode 100644 crates/plotnik-lib/src/engine/materializer.rs delete mode 100644 crates/plotnik-lib/src/engine/materializer/materializer_tests.rs delete mode 100644 crates/plotnik-lib/src/engine/mod.rs delete mode 100644 crates/plotnik-lib/src/engine/validate.rs delete mode 100644 crates/plotnik-lib/src/engine/validate_tests.rs delete mode 100644 crates/plotnik-lib/src/engine/value.rs delete mode 100644 crates/plotnik-lib/src/ir/compiled.rs delete mode 100644 crates/plotnik-lib/src/ir/effect.rs delete mode 100644 crates/plotnik-lib/src/ir/effect_tests.rs delete mode 100644 crates/plotnik-lib/src/ir/emit.rs delete mode 100644 crates/plotnik-lib/src/ir/entrypoint.rs delete mode 100644 crates/plotnik-lib/src/ir/ids.rs delete mode 100644 crates/plotnik-lib/src/ir/matcher.rs delete mode 100644 crates/plotnik-lib/src/ir/matcher_tests.rs delete mode 100644 crates/plotnik-lib/src/ir/mod.rs delete mode 100644 crates/plotnik-lib/src/ir/nav.rs delete mode 100644 crates/plotnik-lib/src/ir/ref_transition.rs delete mode 100644 crates/plotnik-lib/src/ir/ref_transition_tests.rs delete mode 100644 crates/plotnik-lib/src/ir/serialize.rs delete mode 100644 crates/plotnik-lib/src/ir/slice.rs delete mode 100644 crates/plotnik-lib/src/ir/slice_tests.rs delete mode 100644 crates/plotnik-lib/src/ir/string_ref.rs delete mode 100644 crates/plotnik-lib/src/ir/string_ref_tests.rs delete mode 100644 crates/plotnik-lib/src/ir/strings.rs delete mode 100644 crates/plotnik-lib/src/ir/transition.rs delete mode 100644 crates/plotnik-lib/src/ir/type_metadata.rs delete mode 100644 crates/plotnik-lib/src/query/graph.rs delete mode 100644 crates/plotnik-lib/src/query/graph_build.rs delete mode 100644 crates/plotnik-lib/src/query/graph_build_tests.rs delete mode 100644 crates/plotnik-lib/src/query/graph_dump.rs delete mode 100644 crates/plotnik-lib/src/query/graph_master_test.rs delete mode 100644 crates/plotnik-lib/src/query/graph_optimize.rs delete mode 100644 crates/plotnik-lib/src/query/graph_qis.rs delete mode 100644 crates/plotnik-lib/src/query/graph_qis_tests.rs delete mode 100644 crates/plotnik-lib/src/query/infer.rs delete mode 100644 crates/plotnik-lib/src/query/infer_dump.rs delete mode 100644 crates/plotnik-lib/src/query/infer_tests.rs diff --git a/crates/plotnik-cli/src/commands/debug/mod.rs b/crates/plotnik-cli/src/commands/debug/mod.rs index 1d1ad6fb..2bd351c9 100644 --- a/crates/plotnik-cli/src/commands/debug/mod.rs +++ b/crates/plotnik-cli/src/commands/debug/mod.rs @@ -1,3 +1,4 @@ +#![allow(dead_code)] pub mod source; use std::fs; @@ -40,7 +41,7 @@ pub fn run(args: DebugArgs) { }; let mut query = query_source.as_ref().map(|src| { - Query::try_from(src).unwrap_or_else(|e| { + Query::try_from(src.as_str()).unwrap_or_else(|e| { eprintln!("error: {}", e); std::process::exit(1); }) @@ -48,15 +49,13 @@ pub fn run(args: DebugArgs) { // Auto-link when --lang is provided with a query if args.lang.is_some() - && let Some(ref mut q) = query + && let Some(ref mut _q) = query { - let lang = resolve_lang_for_link(&args.lang); - q.link(&lang); + unimplemented!(); } let show_query = has_query_input && !args.symbols && !args.graph && !args.types; let show_source = has_source_input; - let show_both_graphs = args.graph_raw && args.graph; if show_query && let Some(ref q) = query { print!( @@ -84,40 +83,9 @@ pub fn run(args: DebugArgs) { // Build graph if needed for --graph, --graph-raw, or --types if (args.graph || args.graph_raw || args.types) - && let Some(q) = query.take() + && let Some(_) = query.take() { - // Determine root kind for auto-wrapping - let root_kind = args.lang.as_ref().and_then(|lang_name| { - let lang = resolve_lang_for_link(&Some(lang_name.clone())); - lang.root().and_then(|root_id| lang.node_type_name(root_id)) - }); - - let (q, pre_opt_dump) = q.build_graph_with_pre_opt_dump(root_kind); - let mut needs_separator = false; - if args.graph_raw { - if show_both_graphs { - println!("(pre-optimization)"); - } - print!("{}", pre_opt_dump); - needs_separator = true; - } - if args.graph { - if needs_separator { - println!(); - } - if show_both_graphs { - println!("(post-optimization)"); - } - print!("{}", q.graph().dump_live(q.dead_nodes())); - needs_separator = true; - } - if args.types { - if needs_separator { - println!(); - } - print!("{}", q.type_info().dump()); - } - return; + unimplemented!(); } if show_source { diff --git a/crates/plotnik-cli/src/commands/exec.rs b/crates/plotnik-cli/src/commands/exec.rs index e4e65e39..d1c87595 100644 --- a/crates/plotnik-cli/src/commands/exec.rs +++ b/crates/plotnik-cli/src/commands/exec.rs @@ -2,13 +2,7 @@ use std::fs; use std::io::{self, Read}; use std::path::PathBuf; -use plotnik_core::{NodeFieldId, NodeTypeId}; -use plotnik_langs::Lang; -use plotnik_lib::Query; -use plotnik_lib::engine::interpreter::QueryInterpreter; -use plotnik_lib::engine::validate::validate as validate_result; -use plotnik_lib::engine::value::{ResolvedValue, VerboseResolvedValue}; -use plotnik_lib::ir::{NodeKindResolver, QueryEmitter}; +use plotnik_lib::QueryBuilder; use super::debug::source::resolve_lang; @@ -24,18 +18,6 @@ pub struct ExecArgs { pub entry: Option, } -struct LangResolver(Lang); - -impl NodeKindResolver for LangResolver { - fn resolve_kind(&self, name: &str) -> Option { - self.0.resolve_named_node(name) - } - - fn resolve_field(&self, name: &str) -> Option { - self.0.resolve_field(name) - } -} - pub fn run(args: ExecArgs) { if let Err(msg) = validate(&args) { eprintln!("error: {}", msg); @@ -47,113 +29,33 @@ pub fn run(args: ExecArgs) { eprintln!("error: query cannot be empty"); std::process::exit(1); } - let source_code = load_source(&args); + let _source_code = load_source(&args); let lang = resolve_lang(&args.lang, &args.source_text, &args.source_file); - // Parse and validate query - let mut query = Query::new(&query_source).exec().unwrap_or_else(|e| { + // Parse query + let query_parsed = QueryBuilder::new(&query_source) + .parse() + .unwrap_or_else(|e| { + eprintln!("error: {}", e); + std::process::exit(1); + }); + + // Analyze query + let query_analyzed = query_parsed.analyze().unwrap_or_else(|e| { eprintln!("error: {}", e); std::process::exit(1); }); - if !query.is_valid() { - eprint!("{}", query.diagnostics().render(&query_source)); - std::process::exit(1); - } - // Link query against language - query.link(&lang); - if !query.is_valid() { - eprint!("{}", query.diagnostics().render(&query_source)); + let linked = query_analyzed.link(&lang); + if !linked.is_valid() { + eprint!("{}", linked.diagnostics().render(&query_source)); std::process::exit(1); } - // Build transition graph and type info - let mut query = query.build_graph(); - if query.has_type_errors() { - eprint!("{}", query.diagnostics().render(&query_source)); - std::process::exit(1); - } - - // Auto-wrap definitions with root node if available - if let Some(root_id) = lang.root() - && let Some(root_kind) = lang.node_type_name(root_id) - { - query = query.wrap_with_root(root_kind); - } + let _ = (args.pretty, args.verbose_nodes, args.check, args.entry); - // Emit compiled query - let resolver = LangResolver(lang.clone()); - let emitter = QueryEmitter::new(query.graph(), query.type_info(), resolver); - let compiled = emitter.emit().unwrap_or_else(|e| { - eprintln!("error: emit failed: {:?}", e); - std::process::exit(1); - }); - - // Parse source - let tree = lang.parse(&source_code); - let cursor = tree.walk(); - - // Find entry point - let entrypoint = match &args.entry { - Some(name) => compiled - .entrypoints() - .iter() - .find(|ep| compiled.string(ep.name_id()) == name) - .unwrap_or_else(|| { - let available: Vec<_> = compiled - .entrypoints() - .iter() - .map(|ep| compiled.string(ep.name_id())) - .collect(); - eprintln!( - "error: entry point '{}' not found. Available: {}", - name, - available.join(", ") - ); - std::process::exit(1); - }), - None => compiled.entrypoints().last().unwrap_or_else(|| { - eprintln!("error: no entry points in query"); - std::process::exit(1); - }), - }; - - // Run interpreter - let interpreter = QueryInterpreter::new(&compiled, cursor, &source_code); - let result = interpreter - .run_from(entrypoint.target()) - .unwrap_or_else(|e| { - eprintln!("error: {}", e); - std::process::exit(1); - }); - - // Type checking against inferred types - if args.check { - let expected_type = Some(entrypoint.result_type()); - if let Some(type_id) = expected_type - && let Err(e) = validate_result(&result, type_id, &compiled) - { - eprintln!("type error: {}", e); - std::process::exit(1); - } - } - - // Output JSON - let output = match (args.verbose_nodes, args.pretty) { - (true, true) => serde_json::to_string_pretty(&VerboseResolvedValue(&result, &compiled)), - (true, false) => serde_json::to_string(&VerboseResolvedValue(&result, &compiled)), - (false, true) => serde_json::to_string_pretty(&ResolvedValue(&result, &compiled)), - (false, false) => serde_json::to_string(&ResolvedValue(&result, &compiled)), - }; - - match output { - Ok(json) => println!("{}", json), - Err(e) => { - eprintln!("error: JSON serialization failed: {}", e); - std::process::exit(1); - } - } + todo!("IR emission and query execution not yet implemented") } fn load_query(args: &ExecArgs) -> String { diff --git a/crates/plotnik-cli/src/commands/types.rs b/crates/plotnik-cli/src/commands/types.rs index 490c35e4..ca26907d 100644 --- a/crates/plotnik-cli/src/commands/types.rs +++ b/crates/plotnik-cli/src/commands/types.rs @@ -1,15 +1,11 @@ -use std::fmt::Write; +#![allow(dead_code)] + use std::fs; use std::io::{self, Read}; use std::path::PathBuf; -use plotnik_core::{NodeFieldId, NodeTypeId}; use plotnik_langs::Lang; use plotnik_lib::Query; -use plotnik_lib::ir::{ - CompiledQuery, NodeKindResolver, QueryEmitter, STRING_NONE, TYPE_NODE, TYPE_STR, TYPE_VOID, - TypeId, TypeKind, -}; pub struct TypesArgs { pub query_text: Option, @@ -23,18 +19,6 @@ pub struct TypesArgs { pub output: Option, } -struct LangResolver(Lang); - -impl NodeKindResolver for LangResolver { - fn resolve_kind(&self, name: &str) -> Option { - self.0.resolve_named_node(name) - } - - fn resolve_field(&self, name: &str) -> Option { - self.0.resolve_field(name) - } -} - pub fn run(args: TypesArgs) { if let Err(msg) = validate(&args) { eprintln!("error: {}", msg); @@ -49,10 +33,12 @@ pub fn run(args: TypesArgs) { let lang = resolve_lang_required(&args.lang); // Parse and validate query - let mut query = Query::new(&query_source).exec().unwrap_or_else(|e| { - eprintln!("error: {}", e); - std::process::exit(1); - }); + let query = Query::try_from(query_source.as_str()) + .unwrap_or_else(|e| { + eprintln!("error: {}", e); + std::process::exit(1); + }) + .link(&lang); if !query.is_valid() { eprint!("{}", query.diagnostics().render(&query_source)); @@ -60,304 +46,12 @@ pub fn run(args: TypesArgs) { } // Link query against language - query.link(&lang); if !query.is_valid() { eprint!("{}", query.diagnostics().render(&query_source)); std::process::exit(1); } - // Build transition graph and type info - let mut query = query.build_graph(); - if query.has_type_errors() { - eprint!("{}", query.diagnostics().render(&query_source)); - std::process::exit(1); - } - - // Auto-wrap definitions with root node if available - if let Some(root_id) = lang.root() - && let Some(root_kind) = lang.node_type_name(root_id) - { - query = query.wrap_with_root(root_kind); - } - - // Emit compiled query (IR) - let resolver = LangResolver(lang.clone()); - let emitter = QueryEmitter::new(query.graph(), query.type_info(), resolver); - let compiled = emitter.emit().unwrap_or_else(|e| { - eprintln!("error: emit failed: {:?}", e); - std::process::exit(1); - }); - - // Generate TypeScript - let output = generate_typescript(&compiled, &args); - - // Write output - if let Some(path) = &args.output { - fs::write(path, &output).unwrap_or_else(|e| { - eprintln!("error: failed to write {}: {}", path.display(), e); - std::process::exit(1); - }); - } else { - print!("{}", output); - } -} - -fn generate_typescript(ir: &CompiledQuery, args: &TypesArgs) -> String { - let mut out = String::new(); - let export_prefix = if args.export { "export " } else { "" }; - - // Emit Node and Point types unless --no-node-type - if !args.no_node_type { - if args.verbose_nodes { - writeln!(out, "{}interface Point {{", export_prefix).unwrap(); - writeln!(out, " row: number;").unwrap(); - writeln!(out, " column: number;").unwrap(); - writeln!(out, "}}").unwrap(); - writeln!(out).unwrap(); - writeln!(out, "{}interface Node {{", export_prefix).unwrap(); - writeln!(out, " kind: string;").unwrap(); - writeln!(out, " text: string;").unwrap(); - writeln!(out, " start_byte: number;").unwrap(); - writeln!(out, " end_byte: number;").unwrap(); - writeln!(out, " start_point: Point;").unwrap(); - writeln!(out, " end_point: Point;").unwrap(); - writeln!(out, "}}").unwrap(); - } else { - writeln!(out, "{}interface Node {{", export_prefix).unwrap(); - writeln!(out, " kind: string;").unwrap(); - writeln!(out, " text: string;").unwrap(); - writeln!(out, " range: [number, number];").unwrap(); - writeln!(out, "}}").unwrap(); - } - } - - let emitter = TypeScriptEmitter::new(ir, export_prefix); - - // Emit composite types that are named and not inlinable - for (idx, type_def) in ir.type_defs().iter().enumerate() { - let type_id = idx as TypeId + 3; // TYPE_COMPOSITE_START - if !emitter.should_emit_as_interface(type_id) { - continue; - } - - if !out.is_empty() { - writeln!(out).unwrap(); - } - emitter.emit_type_def(&mut out, type_id, type_def); - } - - // Emit entrypoints as type aliases if they differ from their type name - for entry in ir.entrypoints() { - let raw_entry_name = ir.string(entry.name_id()); - // Replace anonymous entrypoint "_" with --root-type name - let entry_name = if raw_entry_name == "_" { - args.root_type.as_str() - } else { - raw_entry_name - }; - let type_id = entry.result_type(); - let type_name = emitter.get_type_name(type_id); - - // Skip if entrypoint name matches type name (redundant alias) - if type_name == entry_name { - continue; - } - - if !out.is_empty() { - writeln!(out).unwrap(); - } - writeln!( - out, - "{}type {} = {};", - export_prefix, - entry_name, - emitter.format_type(type_id) - ) - .unwrap(); - } - - out -} - -struct TypeScriptEmitter<'a> { - ir: &'a CompiledQuery, - export_prefix: &'a str, -} - -impl<'a> TypeScriptEmitter<'a> { - fn new(ir: &'a CompiledQuery, export_prefix: &'a str) -> Self { - Self { ir, export_prefix } - } - - /// Returns true if this type should be emitted as a standalone interface. - fn should_emit_as_interface(&self, type_id: TypeId) -> bool { - if type_id < 3 { - return false; // primitives - } - - let idx = (type_id - 3) as usize; - let Some(def) = self.ir.type_defs().get(idx) else { - return false; - }; - - // Wrapper types are always inlined - if def.is_wrapper() { - return false; - } - - // Named composites get their own interface - def.name != STRING_NONE - } - - /// Get the type name for a composite type, or generate one. - fn get_type_name(&self, type_id: TypeId) -> String { - match type_id { - TYPE_VOID => "null".to_string(), - TYPE_NODE => "Node".to_string(), - TYPE_STR => "string".to_string(), - _ => { - let idx = (type_id - 3) as usize; - if let Some(def) = self.ir.type_defs().get(idx) - && def.name != STRING_NONE - { - return self.ir.string(def.name).to_string(); - } - // Fallback for anonymous types - format!("T{}", type_id) - } - } - } - - /// Format a type reference (may be inline or named). - fn format_type(&self, type_id: TypeId) -> String { - match type_id { - TYPE_VOID => "null".to_string(), - TYPE_NODE => "Node".to_string(), - TYPE_STR => "string".to_string(), - _ => { - let idx = (type_id - 3) as usize; - let Some(def) = self.ir.type_defs().get(idx) else { - return format!("unknown /* T{} */", type_id); - }; - - // Wrapper types: inline - if let Some(inner) = def.inner_type() { - let inner_fmt = self.format_type(inner); - return match def.kind { - TypeKind::Optional => format!("{} | null", inner_fmt), - TypeKind::ArrayStar => format!("{}[]", self.wrap_if_union(&inner_fmt)), - TypeKind::ArrayPlus => { - format!("[{}, ...{}[]]", inner_fmt, self.wrap_if_union(&inner_fmt)) - } - _ => unreachable!(), - }; - } - - // Named composite: reference by name - if def.name != STRING_NONE { - return self.ir.string(def.name).to_string(); - } - - // Anonymous composite: inline - self.format_inline_composite(type_id, def.kind) - } - } - } - - /// Wrap type in parens if it contains a union (for array element types). - fn wrap_if_union(&self, ty: &str) -> String { - if ty.contains(" | ") { - format!("({})", ty) - } else { - ty.to_string() - } - } - - /// Format an anonymous composite type inline. - fn format_inline_composite(&self, type_id: TypeId, kind: TypeKind) -> String { - let idx = (type_id - 3) as usize; - let Some(def) = self.ir.type_defs().get(idx) else { - return "unknown".to_string(); - }; - - let Some(members_slice) = def.members_slice() else { - return "unknown".to_string(); - }; - - let members = self.ir.resolve_type_members(members_slice); - - match kind { - TypeKind::Record => { - let fields: Vec = members - .iter() - .map(|m| format!("{}: {}", self.ir.string(m.name), self.format_type(m.ty))) - .collect(); - format!("{{ {} }}", fields.join("; ")) - } - TypeKind::Enum => { - let variants: Vec = members - .iter() - .map(|m| { - let tag = self.ir.string(m.name); - let data = self.format_type(m.ty); - format!("{{ $tag: \"{}\"; $data: {} }}", tag, data) - }) - .collect(); - variants.join(" | ") - } - _ => "unknown".to_string(), - } - } - - /// Emit a type definition as an interface or type alias. - fn emit_type_def(&self, out: &mut String, type_id: TypeId, def: &plotnik_lib::ir::TypeDef) { - let name = if def.name != STRING_NONE { - self.ir.string(def.name).to_string() - } else { - format!("T{}", type_id) - }; - - let Some(members_slice) = def.members_slice() else { - return; - }; - - let members = self.ir.resolve_type_members(members_slice); - - match def.kind { - TypeKind::Record => { - writeln!(out, "{}interface {} {{", self.export_prefix, name).unwrap(); - for m in members { - writeln!( - out, - " {}: {};", - self.ir.string(m.name), - self.format_type(m.ty) - ) - .unwrap(); - } - writeln!(out, "}}").unwrap(); - } - TypeKind::Enum => { - let variants: Vec = members - .iter() - .map(|m| { - let tag = self.ir.string(m.name); - let data = self.format_type(m.ty); - format!("{{ $tag: \"{}\"; $data: {} }}", tag, data) - }) - .collect(); - writeln!( - out, - "{}type {} =\n | {};", - self.export_prefix, - name, - variants.join("\n | ") - ) - .unwrap(); - } - _ => {} - } - } + unimplemented!(); } fn load_query(args: &TypesArgs) -> String { diff --git a/crates/plotnik-lib/src/engine/effect_stream.rs b/crates/plotnik-lib/src/engine/effect_stream.rs deleted file mode 100644 index 7073da85..00000000 --- a/crates/plotnik-lib/src/engine/effect_stream.rs +++ /dev/null @@ -1,146 +0,0 @@ -//! Effect stream recorded during query execution. - -use crate::ir::EffectOp; -use serde::Serialize; -use serde::ser::SerializeStruct; -use tree_sitter::Node; - -/// A captured AST node with a reference to the source. -#[derive(Debug, Clone, Copy)] -pub struct CapturedNode<'tree> { - node: Node<'tree>, - source: &'tree str, -} - -impl<'tree> CapturedNode<'tree> { - /// Create from a tree-sitter node and source text. - pub fn new(node: Node<'tree>, source: &'tree str) -> Self { - Self { node, source } - } - - /// Returns the underlying tree-sitter node. - pub fn node(&self) -> Node<'tree> { - self.node - } - - /// Returns the source text of the node. - pub fn text(&self) -> &'tree str { - self.node - .utf8_text(self.source.as_bytes()) - .unwrap_or("") - } - - pub fn start_byte(&self) -> usize { - self.node.start_byte() - } - - pub fn end_byte(&self) -> usize { - self.node.end_byte() - } - - pub fn start_point(&self) -> (usize, usize) { - let p = self.node.start_position(); - (p.row, p.column) - } - - pub fn end_point(&self) -> (usize, usize) { - let p = self.node.end_position(); - (p.row, p.column) - } - - pub fn kind(&self) -> &'tree str { - self.node.kind() - } -} - -impl PartialEq for CapturedNode<'_> { - fn eq(&self, other: &Self) -> bool { - // Compare by node identity (same position in same tree) - self.node.id() == other.node.id() - && self.start_byte() == other.start_byte() - && self.end_byte() == other.end_byte() - } -} - -impl Eq for CapturedNode<'_> {} - -impl Serialize for CapturedNode<'_> { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - let mut state = serializer.serialize_struct("CapturedNode", 3)?; - state.serialize_field("kind", self.kind())?; - state.serialize_field("text", self.text())?; - state.serialize_field("range", &[self.start_byte(), self.end_byte()])?; - state.end() - } -} - -/// Wrapper for verbose serialization of a captured node. -/// Includes full positional information (bytes + line/column). -pub struct VerboseNode<'a, 'tree>(pub &'a CapturedNode<'tree>); - -impl Serialize for VerboseNode<'_, '_> { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - let node = self.0; - let mut state = serializer.serialize_struct("CapturedNode", 6)?; - state.serialize_field("kind", node.kind())?; - state.serialize_field("text", node.text())?; - state.serialize_field("start_byte", &node.start_byte())?; - state.serialize_field("end_byte", &node.end_byte())?; - state.serialize_field("start_point", &node.start_point())?; - state.serialize_field("end_point", &node.end_point())?; - state.end() - } -} - -/// A log of effects to be replayed by the materializer. -/// See ADR-0006 for details. -#[derive(Debug, Clone, Default)] -pub struct EffectStream<'tree> { - /// The sequence of operations to perform. - ops: Vec, - /// The sequence of nodes captured, one for each `CaptureNode` op. - nodes: Vec>, -} - -impl<'tree> EffectStream<'tree> { - pub fn new() -> Self { - Self::default() - } - - /// Appends an effect operation to the stream. - pub fn push_op(&mut self, op: EffectOp) { - self.ops.push(op); - } - - /// Appends a captured node to the stream. - pub fn push_node(&mut self, node: Node<'tree>, source: &'tree str) { - self.nodes.push(CapturedNode::new(node, source)); - } - - /// Appends a captured node directly. - pub fn push_captured_node(&mut self, node: CapturedNode<'tree>) { - self.nodes.push(node); - } - - /// Returns the operations. - pub fn ops(&self) -> &[EffectOp] { - &self.ops - } - - /// Returns the captured nodes. - pub fn nodes(&self) -> &[CapturedNode<'tree>] { - &self.nodes - } - - /// Truncate streams to watermarks (for backtracking). - pub fn truncate(&mut self, ops_len: usize, nodes_len: usize) { - self.ops.truncate(ops_len); - self.nodes.truncate(nodes_len); - } -} diff --git a/crates/plotnik-lib/src/engine/error.rs b/crates/plotnik-lib/src/engine/error.rs deleted file mode 100644 index 229f3369..00000000 --- a/crates/plotnik-lib/src/engine/error.rs +++ /dev/null @@ -1,12 +0,0 @@ -//! Errors that can occur during query execution. - -#[derive(Debug, Clone, thiserror::Error)] -pub enum RuntimeError { - /// Execution fuel exhausted (too many interpreter operations). - #[error("runtime execution limit exceeded")] - ExecFuelExhausted, - - /// Recursion fuel exhausted (too many nested definition calls). - #[error("runtime recursion limit exceeded")] - RecursionLimitExceeded, -} diff --git a/crates/plotnik-lib/src/engine/interpreter.rs b/crates/plotnik-lib/src/engine/interpreter.rs deleted file mode 100644 index a0b14e1e..00000000 --- a/crates/plotnik-lib/src/engine/interpreter.rs +++ /dev/null @@ -1,571 +0,0 @@ -//! The core query interpreter. -//! -//! Executes a compiled query against a tree-sitter AST, producing an effect stream -//! that can be materialized into a structured value. -//! -//! See ADR-0006 for detailed execution semantics. - -use std::collections::HashSet; - -use tree_sitter::{Node, TreeCursor}; - -use crate::ir::{CompiledQuery, EffectOp, Matcher, Nav, NavKind, RefTransition, TransitionId}; -use plotnik_core::{NodeFieldId, NodeTypeId}; - -use super::effect_stream::EffectStream; -use super::error::RuntimeError; -use super::materializer::Materializer; -use super::value::Value; - -/// A saved execution state for backtracking. -#[derive(Debug, Clone)] -struct Checkpoint { - /// Tree-sitter descendant index for cursor restoration. - cursor_checkpoint: usize, - /// Number of ops in effect stream at save time. - effect_ops_watermark: usize, - /// Number of nodes in effect stream at save time. - effect_nodes_watermark: usize, - /// Current frame index at save time. - recursion_frame: Option, - /// Previous max_frame_watermark (for O(1) restore). - prev_max_watermark: Option, - /// Source transition for alternatives. - transition_id: TransitionId, - /// Index of next alternative to try. - next_alt: u32, -} - -/// Stack of checkpoints with O(1) watermark maintenance. -#[derive(Debug, Default)] -struct CheckpointStack { - points: Vec, - /// Highest frame index referenced by any checkpoint. - max_frame_watermark: Option, -} - -impl CheckpointStack { - fn new() -> Self { - Self::default() - } - - fn push(&mut self, mut point: Checkpoint) { - point.prev_max_watermark = self.max_frame_watermark; - if let Some(frame) = point.recursion_frame { - self.max_frame_watermark = Some(match self.max_frame_watermark { - Some(max) => max.max(frame), - None => frame, - }); - } - self.points.push(point); - } - - fn pop(&mut self) -> Option { - let point = self.points.pop()?; - self.max_frame_watermark = point.prev_max_watermark; - Some(point) - } -} - -/// A call frame for definition references. -#[derive(Debug, Clone)] -struct Frame { - /// Index of caller's frame (None if called from top level). - parent: Option, - /// Ref ID to verify Exit matches Enter. - ref_id: u16, - /// Transition that entered this call (to retrieve returns via successors()[1..]). - enter_transition: TransitionId, -} - -/// Append-only arena of call frames. -#[derive(Debug, Default)] -struct FrameArena { - frames: Vec, - /// Index of current frame (the "stack pointer"). - current: Option, -} - -impl FrameArena { - fn new() -> Self { - Self::default() - } - - /// Push a new frame, returns its index. - fn push(&mut self, parent: Option, ref_id: u16, enter_transition: TransitionId) -> u32 { - let idx = self.frames.len() as u32; - self.frames.push(Frame { - parent, - ref_id, - enter_transition, - }); - self.current = Some(idx); - idx - } - - /// Get current frame. - fn current_frame(&self) -> Option<&Frame> { - self.current.map(|idx| &self.frames[idx as usize]) - } - - /// Exit current frame (set current to parent). - fn exit(&mut self) -> Option<&Frame> { - let frame = self.current_frame()?; - let parent = frame.parent; - let idx = self.current?; - self.current = parent; - Some(&self.frames[idx as usize]) - } - - /// Prune frames above the high-water mark. - fn prune(&mut self, checkpoints: &CheckpointStack) { - let high_water = match (self.current, checkpoints.max_frame_watermark) { - (None, None) => return, - (Some(c), None) => c, - (None, Some(m)) => m, - (Some(c), Some(m)) => c.max(m), - }; - self.frames.truncate((high_water + 1) as usize); - } -} - -/// Default execution fuel (transitions). -const DEFAULT_EXEC_FUEL: u32 = 1_000_000; -/// Default recursion fuel (Enter operations). -const DEFAULT_RECURSION_FUEL: u32 = 1024; - -/// Query interpreter that executes a compiled query against an AST. -pub struct QueryInterpreter<'q, 'tree> { - query: &'q CompiledQuery, - cursor: TreeCursor<'tree>, - source: &'tree str, - checkpoints: CheckpointStack, - frames: FrameArena, - effects: EffectStream<'tree>, - /// Trivia node type IDs (for skip-trivia navigation). - trivia_kinds: HashSet, - /// Matched node slot (cleared at start of each transition). - matched_node: Option>, - /// Execution fuel remaining. - exec_fuel: u32, - /// Recursion fuel remaining. - recursion_fuel: u32, -} - -impl<'q, 'tree> QueryInterpreter<'q, 'tree> { - /// Creates a new interpreter. - /// - /// The cursor should be positioned at the tree root. - pub fn new(query: &'q CompiledQuery, cursor: TreeCursor<'tree>, source: &'tree str) -> Self { - let trivia_kinds: HashSet<_> = query.trivia_kinds().iter().copied().collect(); - Self { - query, - cursor, - source, - checkpoints: CheckpointStack::new(), - frames: FrameArena::new(), - effects: EffectStream::new(), - trivia_kinds, - matched_node: None, - exec_fuel: DEFAULT_EXEC_FUEL, - recursion_fuel: DEFAULT_RECURSION_FUEL, - } - } - - /// Set execution fuel limit. - pub fn with_exec_fuel(mut self, fuel: u32) -> Self { - self.exec_fuel = fuel; - self - } - - /// Set recursion fuel limit. - pub fn with_recursion_fuel(mut self, fuel: u32) -> Self { - self.recursion_fuel = fuel; - self - } - - /// Run the query and return the result. - pub fn run(self) -> Result, RuntimeError> { - // Get the entry transition from the last entrypoint (main definition) - let start_transition = self - .query - .entrypoints() - .last() - .map(|ep| ep.target()) - .unwrap_or(0); - - self.run_from(start_transition) - } - - /// Run the query from a specific transition and return the result. - pub fn run_from(mut self, start: TransitionId) -> Result, RuntimeError> { - match self.execute(start) { - Ok(true) => Ok(Materializer::materialize(&self.effects)), - Ok(false) => Ok(Value::Null), // No match - Err(e) => Err(e), - } - } - - /// Execute from a given transition, returns true if matched. - fn execute(&mut self, start: TransitionId) -> Result { - let mut current = start; - - loop { - // Check fuel - if self.exec_fuel == 0 { - return Err(RuntimeError::ExecFuelExhausted); - } - self.exec_fuel -= 1; - - // Clear matched_node slot at start of each transition - self.matched_node = None; - - let view = self.query.transition_view(current); - let nav = view.nav(); - let matcher = view.matcher(); - let ref_marker = view.ref_marker(); - let successors = view.successors(); - - // Step 1: Execute navigation - let nav_ok = self.execute_nav(nav); - if !nav_ok { - // Navigation failed, backtrack - if let Some(next) = self.backtrack()? { - current = next; - continue; - } - return Ok(false); - } - - // Step 2: Try matcher (with skip policy from nav) - let match_ok = self.execute_matcher(matcher, nav); - if !match_ok { - // Match failed, backtrack - if let Some(next) = self.backtrack()? { - current = next; - continue; - } - return Ok(false); - } - - // Step 3: Execute effects - for &effect in view.effects() { - self.execute_effect(effect); - } - - // Step 4: Process ref_marker - match ref_marker { - RefTransition::None => {} - RefTransition::Enter(ref_id) => { - if self.recursion_fuel == 0 { - return Err(RuntimeError::RecursionLimitExceeded); - } - self.recursion_fuel -= 1; - - // Push frame with returns = successors[1..] - self.frames.push(self.frames.current, ref_id, current); - - // Jump to definition entry = successors[0] - if successors.is_empty() { - panic!("Enter transition must have at least one successor"); - } - current = successors[0]; - continue; - } - RefTransition::Exit(ref_id) => { - // Verify ref_id matches - let frame = self.frames.current_frame().expect("Exit without frame"); - assert_eq!(frame.ref_id, ref_id, "Exit ref_id mismatch"); - - // Get returns from enter transition - let enter_trans = frame.enter_transition; - let enter_view = self.query.transition_view(enter_trans); - let returns = &enter_view.successors()[1..]; - - // Pop frame - self.frames.exit(); - - // Prune frames if possible - self.frames.prune(&self.checkpoints); - - // Continue with returns as successors - if returns.is_empty() { - // Definition matched, no returns = we're done with this path - // This shouldn't happen in well-formed graphs - if let Some(next) = self.backtrack()? { - current = next; - continue; - } - return Ok(true); - } - - // Save checkpoint for alternatives if multiple returns - if returns.len() > 1 { - self.save_checkpoint(enter_trans, 2); // Skip successors[0] and [1] - } - - current = returns[0]; - continue; - } - } - - // Step 5: Process successors - if successors.is_empty() { - // Terminal transition - match succeeded - return Ok(true); - } - - // Save checkpoint for alternatives - if successors.len() > 1 { - self.save_checkpoint(current, 1); - } - - current = successors[0]; - } - } - - /// Save a checkpoint for backtracking. - fn save_checkpoint(&mut self, transition_id: TransitionId, next_alt: u32) { - let checkpoint = Checkpoint { - cursor_checkpoint: self.cursor.descendant_index(), - effect_ops_watermark: self.effects.ops().len(), - effect_nodes_watermark: self.effects.nodes().len(), - recursion_frame: self.frames.current, - prev_max_watermark: None, // Set by CheckpointStack::push - transition_id, - next_alt, - }; - self.checkpoints.push(checkpoint); - } - - /// Backtrack to the next alternative. Returns the transition to try. - fn backtrack(&mut self) -> Result, RuntimeError> { - loop { - let Some(mut checkpoint) = self.checkpoints.pop() else { - return Ok(None); - }; - - // Restore cursor - self.cursor.goto_descendant(checkpoint.cursor_checkpoint); - - // Restore effects - self.effects.truncate( - checkpoint.effect_ops_watermark, - checkpoint.effect_nodes_watermark, - ); - - // Restore frame - self.frames.current = checkpoint.recursion_frame; - - // Get next alternative - let view = self.query.transition_view(checkpoint.transition_id); - let successors = view.successors(); - - if (checkpoint.next_alt as usize) < successors.len() { - let next = successors[checkpoint.next_alt as usize]; - checkpoint.next_alt += 1; - - // Re-save if more alternatives remain - if (checkpoint.next_alt as usize) < successors.len() { - self.checkpoints.push(checkpoint); - } - - return Ok(Some(next)); - } - // No more alternatives at this checkpoint, try next - } - } - - /// Execute navigation, returns true if successful. - fn execute_nav(&mut self, nav: Nav) -> bool { - match nav.kind { - NavKind::Stay => true, - - NavKind::Next => self.cursor.goto_next_sibling(), - - NavKind::NextSkipTrivia => { - while self.cursor.goto_next_sibling() { - if !self.is_trivia(self.cursor.node()) { - return true; - } - } - false - } - - NavKind::NextExact => self.cursor.goto_next_sibling(), - - NavKind::Down => self.cursor.goto_first_child(), - - NavKind::DownSkipTrivia => { - if !self.cursor.goto_first_child() { - return false; - } - while self.is_trivia(self.cursor.node()) { - if !self.cursor.goto_next_sibling() { - return false; - } - } - true - } - - NavKind::DownExact => self.cursor.goto_first_child(), - - NavKind::Up => { - for _ in 0..nav.level { - if !self.cursor.goto_parent() { - return false; - } - } - true - } - - NavKind::UpSkipTrivia => { - // Validate we're at last non-trivia child before ascending - let current_id = self.cursor.node().id(); - if let Some(parent) = self.cursor.node().parent() { - let child_count = parent.child_count() as u32; - let mut found_current = false; - for i in 0..child_count { - if let Some(child) = parent.child(i) { - if child.id() == current_id { - found_current = true; - continue; - } - if found_current && !self.is_trivia(child) { - return false; - } - } - } - } - self.cursor.goto_parent() - } - - NavKind::UpExact => { - // Validate we're at last child - let node = self.cursor.node(); - if let Some(parent) = node.parent() { - let child_count = parent.child_count(); - if child_count > 0 { - let last_child = parent.child((child_count - 1) as u32); - if last_child.map(|c| c.id()) != Some(node.id()) { - return false; - } - } - } - self.cursor.goto_parent() - } - } - } - - /// Execute matcher with skip policy, returns true if matched. - fn execute_matcher(&mut self, matcher: &Matcher, nav: Nav) -> bool { - match matcher { - Matcher::Epsilon => true, - - Matcher::Node { - kind, - field, - negated_fields, - } => { - let matched = self.try_match_node(*kind, *field, *negated_fields, true, nav); - if matched { - self.matched_node = Some(self.cursor.node()); - } - matched - } - - Matcher::Anonymous { - kind, - field, - negated_fields, - } => { - let matched = self.try_match_node(*kind, *field, *negated_fields, false, nav); - if matched { - self.matched_node = Some(self.cursor.node()); - } - matched - } - - Matcher::Wildcard => { - self.matched_node = Some(self.cursor.node()); - true - } - } - } - - /// Try to match a node with the given constraints. - fn try_match_node( - &mut self, - kind: NodeTypeId, - field: Option, - negated_fields: crate::ir::Slice, - named: bool, - nav: Nav, - ) -> bool { - // Determine skip policy - let can_skip = match nav.kind { - NavKind::Next | NavKind::Down => true, - NavKind::NextSkipTrivia | NavKind::DownSkipTrivia => false, // Already handled trivia - _ => false, - }; - - loop { - let node = self.cursor.node(); - - // Check named/anonymous - if named != node.is_named() { - if can_skip && self.cursor.goto_next_sibling() { - continue; - } - return false; - } - - // Check kind - if node.kind_id() != kind { - if can_skip && self.cursor.goto_next_sibling() { - continue; - } - return false; - } - - // Check field constraint - if let Some(field_id) = field { - let actual_field = self.cursor.field_id(); - if actual_field != Some(field_id) { - if can_skip && self.cursor.goto_next_sibling() { - continue; - } - return false; - } - } - - // Check negated fields - let neg_fields = self.query.resolve_negated_fields(negated_fields); - for &neg_field in neg_fields { - if node.child_by_field_id(neg_field.get()).is_some() { - if can_skip && self.cursor.goto_next_sibling() { - continue; - } - return false; - } - } - - return true; - } - } - - /// Execute an effect operation. - fn execute_effect(&mut self, effect: EffectOp) { - self.effects.push_op(effect); - - if matches!(effect, EffectOp::CaptureNode) { - let node = self.matched_node.expect("CaptureNode without matched node"); - self.effects.push_node(node, self.source); - } - } - - /// Check if a node is trivia. - fn is_trivia(&self, node: Node) -> bool { - self.trivia_kinds.contains(&node.kind_id()) - } -} diff --git a/crates/plotnik-lib/src/engine/interpreter_tests.rs b/crates/plotnik-lib/src/engine/interpreter_tests.rs deleted file mode 100644 index a73d4107..00000000 --- a/crates/plotnik-lib/src/engine/interpreter_tests.rs +++ /dev/null @@ -1,114 +0,0 @@ -use plotnik_core::{NodeFieldId, NodeTypeId}; -use plotnik_langs::{Lang, javascript}; - -use crate::engine::interpreter::QueryInterpreter; -use crate::engine::value::Value; -use crate::ir::{NodeKindResolver, QueryEmitter}; -use crate::query::Query; - -struct LangResolver(Lang); - -impl NodeKindResolver for LangResolver { - fn resolve_kind(&self, name: &str) -> Option { - self.0.resolve_named_node(name) - } - - fn resolve_field(&self, name: &str) -> Option { - self.0.resolve_field(name) - } -} - -fn run(query_src: &str, source: &str) -> String { - let lang = javascript(); - - // Parse, link, build graph - let mut query = Query::new(query_src).exec().expect("query parse failed"); - - if !query.is_valid() { - return format!("QUERY ERROR:\n{}", query.diagnostics().render(query_src)); - } - - query.link(&lang); - if !query.is_valid() { - return format!("LINK ERROR:\n{}", query.diagnostics().render(query_src)); - } - - let query = query.build_graph(); - if query.has_type_errors() { - return format!("TYPE ERROR:\n{}", query.diagnostics().render(query_src)); - } - - // Emit compiled query - let resolver = LangResolver(lang.clone()); - let emitter = QueryEmitter::new(query.graph(), query.type_info(), resolver); - let compiled = match emitter.emit() { - Ok(c) => c, - Err(e) => return format!("EMIT ERROR: {:?}", e), - }; - - // Parse source - let tree = lang.parse(source); - let cursor = tree.walk(); - - // Run interpreter - let interpreter = QueryInterpreter::new(&compiled, cursor, source); - match interpreter.run() { - Ok(value) => format_value(&value), - Err(e) => format!("RUNTIME ERROR: {}", e), - } -} - -fn format_value(value: &Value) -> String { - serde_json::to_string_pretty(value).unwrap_or_else(|e| format!("JSON ERROR: {}", e)) -} - -#[test] -fn capture_identifier() { - // AST: (program (expression_statement (identifier "x"))) - let query = "(program (expression_statement (identifier) @name))"; - let src = "x"; - - let result = run(query, src); - - insta::assert_snapshot!(result, @r#" - { - "kind": "identifier", - "text": "x", - "range": [ - 0, - 1 - ] - } - "#); -} - -#[test] -fn capture_number() { - // AST: (program (expression_statement (number "42"))) - let query = "(program (expression_statement (number) @num))"; - let src = "42"; - - let result = run(query, src); - - insta::assert_snapshot!(result, @r#" - { - "kind": "number", - "text": "42", - "range": [ - 0, - 2 - ] - } - "#); -} - -#[test] -fn no_match_wrong_root() { - // Query expects function_declaration at root, but AST root is program - let query = "(function_declaration) @fn"; - let src = "function foo() {}"; - - let result = run(query, src); - - insta::assert_snapshot!(result, @"null"); -} diff --git a/crates/plotnik-lib/src/engine/materializer.rs b/crates/plotnik-lib/src/engine/materializer.rs deleted file mode 100644 index 6dd43cc7..00000000 --- a/crates/plotnik-lib/src/engine/materializer.rs +++ /dev/null @@ -1,117 +0,0 @@ -//! Replays an effect stream to materialize a `Value`. - -use super::effect_stream::{CapturedNode, EffectStream}; -use super::value::Value; -use crate::ir::{DataFieldId, EffectOp, VariantTagId}; -use std::collections::BTreeMap; - -/// A container being built on the materializer's value stack. -enum Container<'tree> { - Array(Vec>), - Object(BTreeMap>), - Variant(VariantTagId), -} - -pub struct Materializer<'a, 'tree> { - /// The current value being processed. - current: Option>, - /// A stack of containers (arrays, objects, variants) being built. - stack: Vec>, - /// An iterator over the captured nodes from the effect stream. - nodes: std::slice::Iter<'a, CapturedNode<'tree>>, -} - -impl<'a, 'tree> Materializer<'a, 'tree> { - /// Creates a new materializer for a given effect stream. - fn new(stream: &'a EffectStream<'tree>) -> Self { - Self { - current: None, - stack: Vec::new(), - nodes: stream.nodes().iter(), - } - } - - /// Consumes the materializer and returns the final value. - fn finish(mut self) -> Value<'tree> { - self.current.take().unwrap_or(Value::Null) - } - - /// Replays an effect stream to produce a final `Value`. - pub fn materialize(stream: &'a EffectStream<'tree>) -> Value<'tree> { - let mut materializer = Materializer::new(stream); - - for op in stream.ops() { - materializer.apply_op(*op); - } - - materializer.finish() - } - - /// Applies a single effect operation to the materializer's state. - fn apply_op(&mut self, op: EffectOp) { - match op { - EffectOp::CaptureNode => { - let node = *self.nodes.next().expect("mismatched node capture"); - self.current = Some(Value::Node(node)); - } - EffectOp::StartObject => { - self.stack.push(Container::Object(BTreeMap::new())); - } - EffectOp::EndObject => match self.stack.pop() { - Some(Container::Object(obj)) => self.current = Some(Value::Object(obj)), - _ => panic!("invalid EndObject operation"), - }, - EffectOp::Field(id) => { - let value = self.current.take().unwrap_or(Value::Null); - if let Some(Container::Object(map)) = self.stack.last_mut() { - map.insert(id, value); - } else { - panic!("invalid Field operation without object on stack"); - } - } - EffectOp::StartArray => { - self.stack.push(Container::Array(Vec::new())); - } - EffectOp::EndArray => match self.stack.pop() { - Some(Container::Array(arr)) => self.current = Some(Value::Array(arr)), - _ => panic!("invalid EndArray operation"), - }, - EffectOp::PushElement => { - let value = self.current.take().unwrap_or(Value::Null); - if let Some(Container::Array(arr)) = self.stack.last_mut() { - arr.push(value); - } else { - panic!("invalid PushElement operation without array on stack"); - } - } - EffectOp::ClearCurrent => { - self.current = None; - } - EffectOp::StartVariant(tag) => { - self.stack.push(Container::Variant(tag)); - } - EffectOp::EndVariant => { - let value = self.current.take().unwrap_or(Value::Null); - match self.stack.pop() { - Some(Container::Variant(tag)) => { - self.current = Some(Value::Variant { - tag, - value: Box::new(value), - }); - } - _ => panic!("invalid EndVariant operation"), - } - } - EffectOp::ToString => { - if let Some(Value::Node(node)) = self.current.take() { - self.current = Some(Value::String(node.text().to_string())); - } else { - panic!("invalid ToString operation without a node"); - } - } - } - } -} - -#[cfg(test)] -mod materializer_tests; diff --git a/crates/plotnik-lib/src/engine/materializer/materializer_tests.rs b/crates/plotnik-lib/src/engine/materializer/materializer_tests.rs deleted file mode 100644 index 11b6fd47..00000000 --- a/crates/plotnik-lib/src/engine/materializer/materializer_tests.rs +++ /dev/null @@ -1,157 +0,0 @@ -use plotnik_langs::javascript; - -use crate::engine::effect_stream::{CapturedNode, EffectStream}; -use crate::engine::materializer::Materializer; -use crate::engine::value::Value; -use crate::ir::EffectOp; - -fn capture_node<'tree>( - tree: &'tree tree_sitter::Tree, - source: &'tree str, - index: usize, -) -> CapturedNode<'tree> { - let mut cursor = tree.walk(); - cursor.goto_first_child(); - for _ in 0..index { - cursor.goto_next_sibling(); - } - CapturedNode::new(cursor.node(), source) -} - -#[test] -fn materialize_simple_object() { - let lang = javascript(); - let source = "a; b;"; - let tree = lang.parse(source); - - let node0 = capture_node(&tree, source, 0); - let node1 = capture_node(&tree, source, 1); - - let mut stream = EffectStream::new(); - stream.push_op(EffectOp::StartObject); - stream.push_op(EffectOp::CaptureNode); - stream.push_captured_node(node0); - stream.push_op(EffectOp::Field(10)); - stream.push_op(EffectOp::CaptureNode); - stream.push_captured_node(node1); - stream.push_op(EffectOp::Field(20)); - stream.push_op(EffectOp::EndObject); - - let value = Materializer::materialize(&stream); - - match value { - Value::Object(map) => { - assert_eq!(map.len(), 2); - assert!(map.contains_key(&10)); - assert!(map.contains_key(&20)); - } - _ => panic!("expected Object"), - } -} - -#[test] -fn materialize_simple_array() { - let lang = javascript(); - let source = "a; b;"; - let tree = lang.parse(source); - - let node0 = capture_node(&tree, source, 0); - let node1 = capture_node(&tree, source, 1); - - let mut stream = EffectStream::new(); - stream.push_op(EffectOp::StartArray); - stream.push_op(EffectOp::CaptureNode); - stream.push_captured_node(node0); - stream.push_op(EffectOp::PushElement); - stream.push_op(EffectOp::CaptureNode); - stream.push_captured_node(node1); - stream.push_op(EffectOp::PushElement); - stream.push_op(EffectOp::EndArray); - - let value = Materializer::materialize(&stream); - - match value { - Value::Array(arr) => { - assert_eq!(arr.len(), 2); - assert!(matches!(arr[0], Value::Node(_))); - assert!(matches!(arr[1], Value::Node(_))); - } - _ => panic!("expected Array"), - } -} - -#[test] -fn materialize_object_with_optional_field() { - let lang = javascript(); - let source = "a;"; - let tree = lang.parse(source); - - let node0 = capture_node(&tree, source, 0); - - let mut stream = EffectStream::new(); - stream.push_op(EffectOp::StartObject); - stream.push_op(EffectOp::CaptureNode); - stream.push_captured_node(node0); - stream.push_op(EffectOp::Field(10)); - stream.push_op(EffectOp::ClearCurrent); - stream.push_op(EffectOp::Field(30)); - stream.push_op(EffectOp::EndObject); - - let value = Materializer::materialize(&stream); - - match value { - Value::Object(map) => { - assert_eq!(map.len(), 2); - assert!(matches!(map.get(&10), Some(Value::Node(_)))); - assert!(matches!(map.get(&30), Some(Value::Null))); - } - _ => panic!("expected Object"), - } -} - -#[test] -fn materialize_variant() { - let lang = javascript(); - let source = "a;"; - let tree = lang.parse(source); - - let node0 = capture_node(&tree, source, 0); - - let mut stream = EffectStream::new(); - stream.push_op(EffectOp::StartVariant(100)); - stream.push_op(EffectOp::CaptureNode); - stream.push_captured_node(node0); - stream.push_op(EffectOp::EndVariant); - - let value = Materializer::materialize(&stream); - - match value { - Value::Variant { tag, value } => { - assert_eq!(tag, 100); - assert!(matches!(*value, Value::Node(_))); - } - _ => panic!("expected Variant"), - } -} - -#[test] -fn materialize_to_string() { - let lang = javascript(); - let source = "hello"; - let tree = lang.parse(source); - - // Get the identifier node (program -> expression_statement -> identifier) - let root = tree.root_node(); - let expr_stmt = root.child(0).unwrap(); - let ident = expr_stmt.child(0).unwrap(); - let node = CapturedNode::new(ident, source); - - let mut stream = EffectStream::new(); - stream.push_op(EffectOp::CaptureNode); - stream.push_captured_node(node); - stream.push_op(EffectOp::ToString); - - let value = Materializer::materialize(&stream); - - assert_eq!(value, Value::String("hello".to_string())); -} diff --git a/crates/plotnik-lib/src/engine/mod.rs b/crates/plotnik-lib/src/engine/mod.rs deleted file mode 100644 index 1f9bf0d0..00000000 --- a/crates/plotnik-lib/src/engine/mod.rs +++ /dev/null @@ -1,13 +0,0 @@ -//! Query execution engine. - -pub mod effect_stream; -pub mod error; -pub mod interpreter; -pub mod materializer; -pub mod validate; -pub mod value; - -#[cfg(test)] -mod interpreter_tests; -#[cfg(test)] -mod validate_tests; diff --git a/crates/plotnik-lib/src/engine/validate.rs b/crates/plotnik-lib/src/engine/validate.rs deleted file mode 100644 index 39e1044c..00000000 --- a/crates/plotnik-lib/src/engine/validate.rs +++ /dev/null @@ -1,324 +0,0 @@ -//! Runtime validation of query results against type metadata. -//! -//! Validates that `Value` produced by the materializer matches the expected -//! type from the IR. A mismatch indicates an IR construction bug. - -use std::fmt; - -use crate::ir::{ - CompiledQuery, TYPE_COMPOSITE_START, TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId, TypeKind, -}; - -use super::value::Value; - -/// Error returned when validation fails. -#[derive(Debug)] -pub struct TypeError { - pub expected: TypeDescription, - pub actual: TypeDescription, - pub path: Vec, -} - -impl fmt::Display for TypeError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "type mismatch at ")?; - if self.path.is_empty() { - write!(f, "")?; - } else { - for (i, seg) in self.path.iter().enumerate() { - if i > 0 { - write!(f, ".")?; - } - match seg { - PathSegment::Field(name) => write!(f, "{}", name)?, - PathSegment::Index(i) => write!(f, "[{}]", i)?, - PathSegment::Variant(tag) => write!(f, "<{}>", tag)?, - } - } - } - write!(f, ": expected {}, got {}", self.expected, self.actual) - } -} - -/// Segment in the path to a type error. -#[derive(Debug, Clone)] -pub enum PathSegment { - Field(String), - Index(usize), - Variant(String), -} - -/// Human-readable type description for error messages. -#[derive(Debug, Clone)] -pub enum TypeDescription { - Void, - Node, - String, - Optional(Box), - Array(Box), - NonEmptyArray(Box), - Record(String), - Enum(String), - // Actual value descriptions - ActualNull, - ActualNode, - ActualString, - ActualArray(usize), - ActualObject, - ActualVariant(String), -} - -impl fmt::Display for TypeDescription { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - TypeDescription::Void => write!(f, "void"), - TypeDescription::Node => write!(f, "Node"), - TypeDescription::String => write!(f, "string"), - TypeDescription::Optional(inner) => write!(f, "{}?", inner), - TypeDescription::Array(inner) => write!(f, "{}*", inner), - TypeDescription::NonEmptyArray(inner) => write!(f, "{}+", inner), - TypeDescription::Record(name) => write!(f, "struct {}", name), - TypeDescription::Enum(name) => write!(f, "enum {}", name), - TypeDescription::ActualNull => write!(f, "null"), - TypeDescription::ActualNode => write!(f, "Node"), - TypeDescription::ActualString => write!(f, "string"), - TypeDescription::ActualArray(len) => write!(f, "array[{}]", len), - TypeDescription::ActualObject => write!(f, "object"), - TypeDescription::ActualVariant(tag) => write!(f, "variant({})", tag), - } - } -} - -/// Validates a value against the expected type. -pub fn validate( - value: &Value<'_>, - expected: TypeId, - query: &CompiledQuery, -) -> Result<(), TypeError> { - let mut ctx = ValidationContext { - query, - path: Vec::new(), - }; - ctx.validate_value(value, expected) -} - -struct ValidationContext<'a> { - query: &'a CompiledQuery, - path: Vec, -} - -impl ValidationContext<'_> { - fn validate_value(&mut self, value: &Value<'_>, expected: TypeId) -> Result<(), TypeError> { - match expected { - TYPE_VOID => self.expect_null(value), - TYPE_NODE => self.expect_node(value), - TYPE_STR => self.expect_string(value), - id if id >= TYPE_COMPOSITE_START => self.validate_composite(value, id), - _ => Ok(()), // Unknown primitive, skip validation - } - } - - fn expect_null(&self, value: &Value<'_>) -> Result<(), TypeError> { - match value { - Value::Null => Ok(()), - _ => Err(self.type_error(TypeDescription::Void, self.describe_value(value))), - } - } - - fn expect_node(&self, value: &Value<'_>) -> Result<(), TypeError> { - match value { - Value::Node(_) => Ok(()), - _ => Err(self.type_error(TypeDescription::Node, self.describe_value(value))), - } - } - - fn expect_string(&self, value: &Value<'_>) -> Result<(), TypeError> { - match value { - Value::String(_) => Ok(()), - _ => Err(self.type_error(TypeDescription::String, self.describe_value(value))), - } - } - - fn validate_composite(&mut self, value: &Value<'_>, type_id: TypeId) -> Result<(), TypeError> { - let idx = (type_id - TYPE_COMPOSITE_START) as usize; - let Some(def) = self.query.type_defs().get(idx) else { - return Ok(()); // Unknown type, skip - }; - - match def.kind { - TypeKind::Optional => self.validate_optional(value, def.inner_type().unwrap()), - TypeKind::ArrayStar => self.validate_array(value, def.inner_type().unwrap(), false), - TypeKind::ArrayPlus => self.validate_array(value, def.inner_type().unwrap(), true), - TypeKind::Record => self.validate_record(value, type_id, def), - TypeKind::Enum => self.validate_enum(value, type_id, def), - } - } - - fn validate_optional(&mut self, value: &Value<'_>, inner: TypeId) -> Result<(), TypeError> { - match value { - Value::Null => Ok(()), - _ => self.validate_value(value, inner), - } - } - - fn validate_array( - &mut self, - value: &Value<'_>, - element: TypeId, - non_empty: bool, - ) -> Result<(), TypeError> { - let Value::Array(items) = value else { - let expected = if non_empty { - TypeDescription::NonEmptyArray(Box::new(self.describe_type(element))) - } else { - TypeDescription::Array(Box::new(self.describe_type(element))) - }; - return Err(self.type_error(expected, self.describe_value(value))); - }; - - if non_empty && items.is_empty() { - return Err(self.type_error( - TypeDescription::NonEmptyArray(Box::new(self.describe_type(element))), - TypeDescription::ActualArray(0), - )); - } - - for (i, item) in items.iter().enumerate() { - self.path.push(PathSegment::Index(i)); - self.validate_value(item, element)?; - self.path.pop(); - } - - Ok(()) - } - - fn validate_record( - &mut self, - value: &Value<'_>, - type_id: TypeId, - def: &crate::ir::TypeDef, - ) -> Result<(), TypeError> { - let Value::Object(fields) = value else { - return Err(self.type_error(self.describe_type(type_id), self.describe_value(value))); - }; - - let Some(members_slice) = def.members_slice() else { - return Ok(()); - }; - let members = self.query.resolve_type_members(members_slice); - - for member in members { - let field_name = self.query.string(member.name); - self.path.push(PathSegment::Field(field_name.to_string())); - - // Field ID in the object is the index, need to find it - if let Some(field_value) = fields.get(&member.name) { - self.validate_value(field_value, member.ty)?; - } - // Missing field is OK if it's optional (would be Null) - - self.path.pop(); - } - - Ok(()) - } - - fn validate_enum( - &mut self, - value: &Value<'_>, - type_id: TypeId, - def: &crate::ir::TypeDef, - ) -> Result<(), TypeError> { - let Value::Variant { tag, value: inner } = value else { - return Err(self.type_error(self.describe_type(type_id), self.describe_value(value))); - }; - - let Some(members_slice) = def.members_slice() else { - return Ok(()); - }; - let members = self.query.resolve_type_members(members_slice); - - // Find the variant by tag - let variant = members.iter().find(|m| m.name == *tag); - let Some(variant) = variant else { - // Unknown variant tag - let tag_name = self.query.string(*tag); - return Err(self.type_error( - self.describe_type(type_id), - TypeDescription::ActualVariant(tag_name.to_string()), - )); - }; - - let tag_name = self.query.string(variant.name); - self.path.push(PathSegment::Variant(tag_name.to_string())); - self.validate_value(inner, variant.ty)?; - self.path.pop(); - - Ok(()) - } - - fn describe_type(&self, type_id: TypeId) -> TypeDescription { - match type_id { - TYPE_VOID => TypeDescription::Void, - TYPE_NODE => TypeDescription::Node, - TYPE_STR => TypeDescription::String, - id if id >= TYPE_COMPOSITE_START => { - let idx = (id - TYPE_COMPOSITE_START) as usize; - if let Some(def) = self.query.type_defs().get(idx) { - match def.kind { - TypeKind::Optional => TypeDescription::Optional(Box::new( - self.describe_type(def.inner_type().unwrap()), - )), - TypeKind::ArrayStar => TypeDescription::Array(Box::new( - self.describe_type(def.inner_type().unwrap()), - )), - TypeKind::ArrayPlus => TypeDescription::NonEmptyArray(Box::new( - self.describe_type(def.inner_type().unwrap()), - )), - TypeKind::Record => { - let name = if def.name != crate::ir::STRING_NONE { - self.query.string(def.name).to_string() - } else { - format!("T{}", type_id) - }; - TypeDescription::Record(name) - } - TypeKind::Enum => { - let name = if def.name != crate::ir::STRING_NONE { - self.query.string(def.name).to_string() - } else { - format!("T{}", type_id) - }; - TypeDescription::Enum(name) - } - } - } else { - TypeDescription::Node - } - } - _ => TypeDescription::Node, - } - } - - fn describe_value(&self, value: &Value<'_>) -> TypeDescription { - match value { - Value::Null => TypeDescription::ActualNull, - Value::Node(_) => TypeDescription::ActualNode, - Value::String(_) => TypeDescription::ActualString, - Value::Array(items) => TypeDescription::ActualArray(items.len()), - Value::Object(_) => TypeDescription::ActualObject, - Value::Variant { tag, .. } => { - let tag_name = self.query.string(*tag); - TypeDescription::ActualVariant(tag_name.to_string()) - } - } - } - - fn type_error(&self, expected: TypeDescription, actual: TypeDescription) -> TypeError { - TypeError { - expected, - actual, - path: self.path.clone(), - } - } -} diff --git a/crates/plotnik-lib/src/engine/validate_tests.rs b/crates/plotnik-lib/src/engine/validate_tests.rs deleted file mode 100644 index 1aaebf5a..00000000 --- a/crates/plotnik-lib/src/engine/validate_tests.rs +++ /dev/null @@ -1,111 +0,0 @@ -//! End-to-end tests for runtime type validation. - -use plotnik_core::{NodeFieldId, NodeTypeId}; -use plotnik_langs::{Lang, javascript}; - -use crate::engine::interpreter::QueryInterpreter; -use crate::engine::validate::validate; -use crate::ir::{NodeKindResolver, QueryEmitter}; -use crate::query::Query; - -struct LangResolver(Lang); - -impl NodeKindResolver for LangResolver { - fn resolve_kind(&self, name: &str) -> Option { - self.0.resolve_named_node(name) - } - - fn resolve_field(&self, name: &str) -> Option { - self.0.resolve_field(name) - } -} - -fn run_and_validate(query_src: &str, source: &str) -> String { - let lang = javascript(); - - let mut query = Query::new(query_src).exec().expect("query parse failed"); - assert!( - query.is_valid(), - "query invalid: {}", - query.diagnostics().render(query_src) - ); - - query.link(&lang); - assert!( - query.is_valid(), - "link failed: {}", - query.diagnostics().render(query_src) - ); - - let query = query.build_graph(); - assert!( - !query.has_type_errors(), - "type error: {}", - query.diagnostics().render(query_src) - ); - - let resolver = LangResolver(lang.clone()); - let emitter = QueryEmitter::new(query.graph(), query.type_info(), resolver); - let compiled = emitter.emit().expect("emit failed"); - - let tree = lang.parse(source); - let cursor = tree.walk(); - - let interpreter = QueryInterpreter::new(&compiled, cursor, source); - let result = interpreter.run().expect("runtime error"); - - let expected_type = compiled.entrypoints().first().unwrap().result_type(); - - match validate(&result, expected_type, &compiled) { - Ok(()) => "OK".to_string(), - Err(e) => format!("VALIDATION ERROR: {}", e), - } -} - -#[test] -fn validate_simple_capture() { - let result = run_and_validate("(program (expression_statement (identifier) @name))", "x"); - insta::assert_snapshot!(result, @"OK"); -} - -#[test] -fn validate_string_annotation() { - let result = run_and_validate( - "(program (expression_statement (identifier) @name :: string))", - "x", - ); - insta::assert_snapshot!(result, @"OK"); -} - -#[test] -fn validate_sequence_star() { - let result = run_and_validate( - "(program { (expression_statement (identifier) @id)* })", - "x; y; z", - ); - insta::assert_snapshot!(result, @"OK"); -} - -#[test] -fn validate_sequence_plus() { - let result = run_and_validate( - "(program { (expression_statement (identifier) @id)+ })", - "x; y", - ); - insta::assert_snapshot!(result, @"OK"); -} - -#[test] -fn validate_optional_present() { - let result = run_and_validate("(program (expression_statement (identifier)? @maybe))", "x"); - insta::assert_snapshot!(result, @"OK"); -} - -#[test] -fn validate_optional_absent() { - let result = run_and_validate( - "(program (expression_statement (number)? @maybe (identifier)))", - "x", - ); - insta::assert_snapshot!(result, @"OK"); -} diff --git a/crates/plotnik-lib/src/engine/value.rs b/crates/plotnik-lib/src/engine/value.rs deleted file mode 100644 index a6288eda..00000000 --- a/crates/plotnik-lib/src/engine/value.rs +++ /dev/null @@ -1,145 +0,0 @@ -//! Types for representing query results. - -use super::effect_stream::{CapturedNode, VerboseNode}; -use crate::ir::{CompiledQuery, DataFieldId, VariantTagId}; -use serde::Serialize; -use serde::ser::{SerializeMap, SerializeSeq, SerializeStruct}; -use std::collections::BTreeMap; - -/// A structured value produced by a query. -#[derive(Debug, Clone, PartialEq, Serialize)] -#[serde(untagged)] -pub enum Value<'tree> { - /// Represents a missing optional value. Serializes to `null`. - Null, - /// An AST node capture. - Node(CapturedNode<'tree>), - /// A string, typically from a `:: string` conversion. - String(String), - /// A list of values, from a `*` or `+` capture. - Array(Vec>), - /// A map of field names to values, from a `{...}` capture. - Object(BTreeMap>), - /// A tagged union, from a `[...]` capture with labels. - Variant { - tag: VariantTagId, - value: Box>, - }, -} - -/// Wrapper for verbose serialization of a Value. -/// Nodes include full positional information (bytes + line/column). -pub struct VerboseValue<'a, 'tree>(pub &'a Value<'tree>); - -impl Serialize for VerboseValue<'_, '_> { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - match self.0 { - Value::Null => serializer.serialize_none(), - Value::Node(node) => VerboseNode(node).serialize(serializer), - Value::String(s) => serializer.serialize_str(s), - Value::Array(arr) => { - let mut seq = serializer.serialize_seq(Some(arr.len()))?; - for item in arr { - seq.serialize_element(&VerboseValue(item))?; - } - seq.end() - } - Value::Object(obj) => { - let mut map = serializer.serialize_map(Some(obj.len()))?; - for (k, v) in obj { - map.serialize_entry(&k, &VerboseValue(v))?; - } - map.end() - } - Value::Variant { tag, value } => { - let mut state = serializer.serialize_struct("Variant", 2)?; - state.serialize_field("$tag", tag)?; - state.serialize_field("$data", &VerboseValue(value))?; - state.end() - } - } - } -} - -/// Wrapper for serialization that resolves string IDs to actual strings. -/// Object field names and variant tags are resolved via CompiledQuery. -pub struct ResolvedValue<'a, 'tree>(pub &'a Value<'tree>, pub &'a CompiledQuery); - -impl Serialize for ResolvedValue<'_, '_> { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - let ResolvedValue(value, query) = self; - match value { - Value::Null => serializer.serialize_none(), - Value::Node(node) => node.serialize(serializer), - Value::String(s) => serializer.serialize_str(s), - Value::Array(arr) => { - let mut seq = serializer.serialize_seq(Some(arr.len()))?; - for item in arr { - seq.serialize_element(&ResolvedValue(item, query))?; - } - seq.end() - } - Value::Object(obj) => { - let mut map = serializer.serialize_map(Some(obj.len()))?; - for (k, v) in obj { - let key = query.string(*k); - map.serialize_entry(key, &ResolvedValue(v, query))?; - } - map.end() - } - Value::Variant { tag, value } => { - let mut state = serializer.serialize_struct("Variant", 2)?; - let tag_str = query.string(*tag); - state.serialize_field("$tag", tag_str)?; - state.serialize_field("$data", &ResolvedValue(value, query))?; - state.end() - } - } - } -} - -/// Wrapper for verbose serialization with resolved string IDs. -/// Combines VerboseValue (full node positions) with ResolvedValue (string resolution). -pub struct VerboseResolvedValue<'a, 'tree>(pub &'a Value<'tree>, pub &'a CompiledQuery); - -impl Serialize for VerboseResolvedValue<'_, '_> { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - let VerboseResolvedValue(value, query) = self; - match value { - Value::Null => serializer.serialize_none(), - Value::Node(node) => VerboseNode(node).serialize(serializer), - Value::String(s) => serializer.serialize_str(s), - Value::Array(arr) => { - let mut seq = serializer.serialize_seq(Some(arr.len()))?; - for item in arr { - seq.serialize_element(&VerboseResolvedValue(item, query))?; - } - seq.end() - } - Value::Object(obj) => { - let mut map = serializer.serialize_map(Some(obj.len()))?; - for (k, v) in obj { - let key = query.string(*k); - map.serialize_entry(key, &VerboseResolvedValue(v, query))?; - } - map.end() - } - Value::Variant { tag, value } => { - let mut state = serializer.serialize_struct("Variant", 2)?; - let tag_str = query.string(*tag); - state.serialize_field("$tag", tag_str)?; - state.serialize_field("$data", &VerboseResolvedValue(value, query))?; - state.end() - } - } - } -} diff --git a/crates/plotnik-lib/src/ir/compiled.rs b/crates/plotnik-lib/src/ir/compiled.rs deleted file mode 100644 index 75567698..00000000 --- a/crates/plotnik-lib/src/ir/compiled.rs +++ /dev/null @@ -1,742 +0,0 @@ -//! Compiled query container and buffer. -//! -//! The compiled query lives in a single contiguous allocation—cache-friendly, -//! zero fragmentation, portable to WASM. See ADR-0004 for format details. - -use std::alloc::{Layout, alloc, dealloc}; -use std::fmt::Write; -use std::ptr; - -use plotnik_core::{NodeFieldId, NodeTypeId}; - -use super::{ - EffectOp, Entrypoint, Slice, StringId, StringRef, Transition, TransitionId, TypeDef, TypeMember, -}; - -/// Buffer alignment for cache-line efficiency. -pub const BUFFER_ALIGN: usize = 64; - -/// Magic bytes identifying a compiled query file. -pub const MAGIC: [u8; 4] = *b"PTKQ"; - -/// Current format version. -pub const FORMAT_VERSION: u32 = 1; - -/// Aligned buffer for compiled query data. -/// -/// Allocated via `Layout::from_size_align(len, BUFFER_ALIGN)`. Standard `Box<[u8]>` -/// won't work—it assumes 1-byte alignment and corrupts `dealloc`. -pub struct CompiledQueryBuffer { - ptr: *mut u8, - len: usize, - /// `true` if allocated, `false` if mmap'd or external. - owned: bool, -} - -impl CompiledQueryBuffer { - /// Allocate a new buffer with 64-byte alignment. - pub fn allocate(len: usize) -> Self { - if len == 0 { - return Self { - ptr: ptr::null_mut(), - len: 0, - owned: true, - }; - } - - let layout = Layout::from_size_align(len, BUFFER_ALIGN).expect("invalid layout"); - - // SAFETY: layout is non-zero size, properly aligned - let ptr = unsafe { alloc(layout) }; - if ptr.is_null() { - std::alloc::handle_alloc_error(layout); - } - - Self { - ptr, - len, - owned: true, - } - } - - /// Create a view into external memory (mmap'd or borrowed). - /// - /// # Safety - /// - `ptr` must be valid for reads of `len` bytes - /// - `ptr` must be aligned to `BUFFER_ALIGN` - /// - The backing memory must outlive the returned buffer - pub unsafe fn from_external(ptr: *mut u8, len: usize) -> Self { - debug_assert!( - (ptr as usize).is_multiple_of(BUFFER_ALIGN), - "buffer must be 64-byte aligned" - ); - Self { - ptr, - len, - owned: false, - } - } - - /// Returns a pointer to the buffer start. - #[inline] - pub fn as_ptr(&self) -> *const u8 { - self.ptr - } - - /// Returns a mutable pointer to the buffer start. - #[inline] - pub fn as_mut_ptr(&mut self) -> *mut u8 { - self.ptr - } - - /// Returns the buffer length in bytes. - #[inline] - pub fn len(&self) -> usize { - self.len - } - - /// Returns true if the buffer is empty. - #[inline] - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - /// Returns the buffer as a byte slice. - #[inline] - pub fn as_slice(&self) -> &[u8] { - if self.ptr.is_null() { - &[] - } else { - // SAFETY: ptr is valid for len bytes if non-null - unsafe { std::slice::from_raw_parts(self.ptr, self.len) } - } - } - - /// Returns the buffer as a mutable byte slice. - #[inline] - pub fn as_mut_slice(&mut self) -> &mut [u8] { - if self.ptr.is_null() { - &mut [] - } else { - // SAFETY: ptr is valid for len bytes if non-null, and we have &mut self - unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) } - } - } -} - -impl Drop for CompiledQueryBuffer { - fn drop(&mut self) { - if self.owned && !self.ptr.is_null() { - let layout = Layout::from_size_align(self.len, BUFFER_ALIGN) - .expect("layout was valid at allocation"); - // SAFETY: ptr was allocated with this exact layout - unsafe { dealloc(self.ptr, layout) }; - } - } -} - -// SAFETY: The buffer is just raw bytes, safe to send across threads -unsafe impl Send for CompiledQueryBuffer {} -unsafe impl Sync for CompiledQueryBuffer {} - -/// A compiled query ready for execution. -/// -/// Contains a single contiguous buffer with all segments, plus offset indices -/// for O(1) access to each segment. -pub struct CompiledQuery { - buffer: CompiledQueryBuffer, - // Segment offsets (byte offsets into buffer) - successors_offset: u32, - effects_offset: u32, - negated_fields_offset: u32, - string_refs_offset: u32, - string_bytes_offset: u32, - type_defs_offset: u32, - type_members_offset: u32, - entrypoints_offset: u32, - trivia_kinds_offset: u32, // 0 = no trivia kinds - // Segment counts (number of elements) - transition_count: u32, - successor_count: u32, - effect_count: u32, - negated_field_count: u16, - string_ref_count: u16, - type_def_count: u16, - type_member_count: u16, - entrypoint_count: u16, - trivia_kind_count: u16, -} - -impl CompiledQuery { - /// Creates a new compiled query from pre-built components. - /// - /// This is typically called by the emitter after layout computation. - #[allow(clippy::too_many_arguments)] - pub fn new( - buffer: CompiledQueryBuffer, - successors_offset: u32, - effects_offset: u32, - negated_fields_offset: u32, - string_refs_offset: u32, - string_bytes_offset: u32, - type_defs_offset: u32, - type_members_offset: u32, - entrypoints_offset: u32, - trivia_kinds_offset: u32, - transition_count: u32, - successor_count: u32, - effect_count: u32, - negated_field_count: u16, - string_ref_count: u16, - type_def_count: u16, - type_member_count: u16, - entrypoint_count: u16, - trivia_kind_count: u16, - ) -> Self { - Self { - buffer, - successors_offset, - effects_offset, - negated_fields_offset, - string_refs_offset, - string_bytes_offset, - type_defs_offset, - type_members_offset, - entrypoints_offset, - trivia_kinds_offset, - transition_count, - successor_count, - effect_count, - negated_field_count, - string_ref_count, - type_def_count, - type_member_count, - entrypoint_count, - trivia_kind_count, - } - } - - /// Returns the transitions segment. - #[inline] - pub fn transitions(&self) -> &[Transition] { - // Transitions start at offset 0 - // SAFETY: buffer is properly aligned, transitions are at offset 0 - unsafe { - std::slice::from_raw_parts( - self.buffer.as_ptr() as *const Transition, - self.transition_count as usize, - ) - } - } - - /// Returns the successors segment. - #[inline] - pub fn successors(&self) -> &[TransitionId] { - // SAFETY: offset is aligned to 4 - unsafe { - std::slice::from_raw_parts( - self.buffer.as_ptr().add(self.successors_offset as usize) as *const TransitionId, - self.successor_count as usize, - ) - } - } - - /// Returns the effects segment. - #[inline] - pub fn effects(&self) -> &[EffectOp] { - // SAFETY: offset is aligned to 2 - unsafe { - std::slice::from_raw_parts( - self.buffer.as_ptr().add(self.effects_offset as usize) as *const EffectOp, - self.effect_count as usize, - ) - } - } - - /// Returns the negated fields segment. - #[inline] - pub fn negated_fields(&self) -> &[NodeFieldId] { - // SAFETY: offset is aligned to 2 - unsafe { - std::slice::from_raw_parts( - self.buffer - .as_ptr() - .add(self.negated_fields_offset as usize) as *const NodeFieldId, - self.negated_field_count as usize, - ) - } - } - - /// Returns the string refs segment. - #[inline] - pub fn string_refs(&self) -> &[StringRef] { - // SAFETY: offset is aligned to 4 - unsafe { - std::slice::from_raw_parts( - self.buffer.as_ptr().add(self.string_refs_offset as usize) as *const StringRef, - self.string_ref_count as usize, - ) - } - } - - /// Returns the raw string bytes. - #[inline] - pub fn string_bytes(&self) -> &[u8] { - let end = if self.type_defs_offset > 0 { - self.type_defs_offset as usize - } else { - self.buffer.len() - }; - let start = self.string_bytes_offset as usize; - &self.buffer.as_slice()[start..end] - } - - /// Returns the type definitions segment. - #[inline] - pub fn type_defs(&self) -> &[TypeDef] { - // SAFETY: offset is aligned to 4 - unsafe { - std::slice::from_raw_parts( - self.buffer.as_ptr().add(self.type_defs_offset as usize) as *const TypeDef, - self.type_def_count as usize, - ) - } - } - - /// Returns the type members segment. - #[inline] - pub fn type_members(&self) -> &[TypeMember] { - // SAFETY: offset is aligned to 2 - unsafe { - std::slice::from_raw_parts( - self.buffer.as_ptr().add(self.type_members_offset as usize) as *const TypeMember, - self.type_member_count as usize, - ) - } - } - - /// Returns the entrypoints segment. - #[inline] - pub fn entrypoints(&self) -> &[Entrypoint] { - // SAFETY: offset is aligned to 4 - unsafe { - std::slice::from_raw_parts( - self.buffer.as_ptr().add(self.entrypoints_offset as usize) as *const Entrypoint, - self.entrypoint_count as usize, - ) - } - } - - /// Returns the trivia kinds segment (node types to skip). - #[inline] - pub fn trivia_kinds(&self) -> &[NodeTypeId] { - if self.trivia_kinds_offset == 0 { - return &[]; - } - // SAFETY: offset is aligned to 2 - unsafe { - std::slice::from_raw_parts( - self.buffer.as_ptr().add(self.trivia_kinds_offset as usize) as *const NodeTypeId, - self.trivia_kind_count as usize, - ) - } - } - - /// Returns a transition by ID. - #[inline] - pub fn transition(&self, id: TransitionId) -> &Transition { - &self.transitions()[id as usize] - } - - /// Returns a view of a transition with resolved slices. - #[inline] - pub fn transition_view(&self, id: TransitionId) -> TransitionView<'_> { - TransitionView { - query: self, - raw: self.transition(id), - } - } - - /// Resolves a string ID to its UTF-8 content. - #[inline] - pub fn string(&self, id: StringId) -> &str { - let refs = self.string_refs(); - let string_ref = &refs[id as usize]; - let bytes = self.string_bytes(); - let start = string_ref.offset as usize; - let end = start + string_ref.len as usize; - // SAFETY: emitter ensures valid UTF-8 - unsafe { std::str::from_utf8_unchecked(&bytes[start..end]) } - } - - /// Resolves a slice of effects. - #[inline] - pub fn resolve_effects(&self, slice: Slice) -> &[EffectOp] { - let effects = self.effects(); - let start = slice.start_index() as usize; - let end = start + slice.len() as usize; - &effects[start..end] - } - - /// Resolves a slice of negated fields. - #[inline] - pub fn resolve_negated_fields(&self, slice: Slice) -> &[NodeFieldId] { - let fields = self.negated_fields(); - let start = slice.start_index() as usize; - let end = start + slice.len() as usize; - &fields[start..end] - } - - /// Resolves a slice of type members. - #[inline] - pub fn resolve_type_members(&self, slice: Slice) -> &[TypeMember] { - let members = self.type_members(); - let start = slice.start_index() as usize; - let end = start + slice.len() as usize; - &members[start..end] - } - - /// Resolves successors for a transition by ID, handling both inline and spilled cases. - #[inline] - pub fn resolve_successors_by_id(&self, id: TransitionId) -> &[TransitionId] { - let transition = self.transition(id); - if transition.has_inline_successors() { - // Return from transitions segment - inline data is part of the transition - let count = transition.successor_count as usize; - &self.transitions()[id as usize].successor_data[..count] - } else { - let start = transition.spilled_successors_index() as usize; - let count = transition.successor_count as usize; - &self.successors()[start..start + count] - } - } - - /// Returns the number of transitions. - #[inline] - pub fn transition_count(&self) -> u32 { - self.transition_count - } - - /// Returns the number of entrypoints. - #[inline] - pub fn entrypoint_count(&self) -> u16 { - self.entrypoint_count - } - - /// Returns the raw buffer for serialization. - #[inline] - pub fn buffer(&self) -> &CompiledQueryBuffer { - &self.buffer - } - - /// Returns offset metadata for serialization. - pub fn offsets(&self) -> CompiledQueryOffsets { - CompiledQueryOffsets { - successors_offset: self.successors_offset, - effects_offset: self.effects_offset, - negated_fields_offset: self.negated_fields_offset, - string_refs_offset: self.string_refs_offset, - string_bytes_offset: self.string_bytes_offset, - type_defs_offset: self.type_defs_offset, - type_members_offset: self.type_members_offset, - entrypoints_offset: self.entrypoints_offset, - trivia_kinds_offset: self.trivia_kinds_offset, - } - } - - /// Dumps the compiled query in human-readable format for debugging. - pub fn dump(&self) -> String { - let mut out = String::new(); - - // Header - writeln!(out, "CompiledQuery {{").unwrap(); - writeln!(out, " buffer_len: {}", self.buffer.len()).unwrap(); - writeln!(out, " transitions: {}", self.transition_count).unwrap(); - writeln!(out, " successors: {} (spilled)", self.successor_count).unwrap(); - writeln!(out, " effects: {}", self.effect_count).unwrap(); - writeln!(out, " strings: {}", self.string_ref_count).unwrap(); - writeln!(out, " type_defs: {}", self.type_def_count).unwrap(); - writeln!(out, " entrypoints: {}", self.entrypoint_count).unwrap(); - writeln!(out).unwrap(); - - // Entrypoints - writeln!(out, " Entrypoints:").unwrap(); - for ep in self.entrypoints() { - let name = self.string(ep.name_id()); - writeln!( - out, - " {} -> T{} (type {})", - name, - ep.target(), - ep.result_type() - ) - .unwrap(); - } - writeln!(out).unwrap(); - - // Transitions - writeln!(out, " Transitions:").unwrap(); - for i in 0..self.transition_count { - let view = self.transition_view(i); - write!(out, " T{}: ", i).unwrap(); - - // Matcher - match view.matcher() { - super::Matcher::Epsilon => write!(out, "ε").unwrap(), - super::Matcher::Node { kind, field, .. } => { - write!(out, "Node({})", kind).unwrap(); - if let Some(f) = field { - write!(out, " field={}", f).unwrap(); - } - } - super::Matcher::Anonymous { kind, field, .. } => { - write!(out, "Anon({})", kind).unwrap(); - if let Some(f) = field { - write!(out, " field={}", f).unwrap(); - } - } - super::Matcher::Wildcard => write!(out, "_").unwrap(), - } - - // Nav - let nav = view.nav(); - if !nav.is_stay() { - write!(out, " nav={:?}", nav.kind).unwrap(); - if nav.level > 0 { - write!(out, "({})", nav.level).unwrap(); - } - } - - // Ref marker - match view.ref_marker() { - super::RefTransition::None => {} - super::RefTransition::Enter(id) => write!(out, " Enter({})", id).unwrap(), - super::RefTransition::Exit(id) => write!(out, " Exit({})", id).unwrap(), - } - - // Effects - let effects = view.effects(); - if !effects.is_empty() { - write!(out, " [").unwrap(); - for (j, eff) in effects.iter().enumerate() { - if j > 0 { - write!(out, ", ").unwrap(); - } - match eff { - EffectOp::CaptureNode => write!(out, "Capture").unwrap(), - EffectOp::ClearCurrent => write!(out, "Clear").unwrap(), - EffectOp::StartArray => write!(out, "StartArr").unwrap(), - EffectOp::PushElement => write!(out, "Push").unwrap(), - EffectOp::EndArray => write!(out, "EndArr").unwrap(), - EffectOp::StartObject => write!(out, "StartObj").unwrap(), - EffectOp::EndObject => write!(out, "EndObj").unwrap(), - EffectOp::Field(id) => write!(out, "Field({})", self.string(*id)).unwrap(), - EffectOp::StartVariant(id) => { - write!(out, "Var({})", self.string(*id)).unwrap() - } - EffectOp::EndVariant => write!(out, "EndVar").unwrap(), - EffectOp::ToString => write!(out, "ToStr").unwrap(), - } - } - write!(out, "]").unwrap(); - } - - // Successors - let succs = view.successors(); - if !succs.is_empty() { - write!(out, " -> [").unwrap(); - for (j, s) in succs.iter().enumerate() { - if j > 0 { - write!(out, ", ").unwrap(); - } - write!(out, "T{}", s).unwrap(); - } - write!(out, "]").unwrap(); - } - - writeln!(out).unwrap(); - } - - // Strings - if self.string_ref_count > 0 { - writeln!(out).unwrap(); - writeln!(out, " Strings:").unwrap(); - for i in 0..self.string_ref_count { - let s = self.string(i); - writeln!(out, " S{}: {:?}", i, s).unwrap(); - } - } - - // Types - if self.type_def_count > 0 { - writeln!(out).unwrap(); - writeln!(out, " Types:").unwrap(); - for (i, td) in self.type_defs().iter().enumerate() { - let type_id = i as u16 + super::TYPE_COMPOSITE_START; - let name = if td.name != super::STRING_NONE { - self.string(td.name) - } else { - "" - }; - write!(out, " Ty{}: {} {:?}", type_id, name, td.kind).unwrap(); - if td.is_wrapper() { - if let Some(inner) = td.inner_type() { - write!(out, " inner=Ty{}", inner).unwrap(); - } - } else if let Some(members) = td.members_slice() { - let resolved = self.resolve_type_members(members); - write!(out, " {{").unwrap(); - for (j, m) in resolved.iter().enumerate() { - if j > 0 { - write!(out, ", ").unwrap(); - } - write!(out, "{}: Ty{}", self.string(m.name), m.ty).unwrap(); - } - write!(out, "}}").unwrap(); - } - writeln!(out).unwrap(); - } - } - - writeln!(out, "}}").unwrap(); - out - } -} - -/// Offset metadata extracted from CompiledQuery. -#[derive(Debug, Clone, Copy)] -pub struct CompiledQueryOffsets { - pub successors_offset: u32, - pub effects_offset: u32, - pub negated_fields_offset: u32, - pub string_refs_offset: u32, - pub string_bytes_offset: u32, - pub type_defs_offset: u32, - pub type_members_offset: u32, - pub entrypoints_offset: u32, - pub trivia_kinds_offset: u32, -} - -/// A view of a transition with resolved slices. -/// -/// Hides offset arithmetic and inline/spilled distinction from callers. -pub struct TransitionView<'a> { - query: &'a CompiledQuery, - raw: &'a Transition, -} - -impl<'a> TransitionView<'a> { - /// Returns the raw transition. - #[inline] - pub fn raw(&self) -> &'a Transition { - self.raw - } - - /// Returns resolved successor IDs. - #[inline] - pub fn successors(&self) -> &'a [TransitionId] { - if self.raw.has_inline_successors() { - let count = self.raw.successor_count as usize; - &self.raw.successor_data[..count] - } else { - let start = self.raw.spilled_successors_index() as usize; - let count = self.raw.successor_count as usize; - &self.query.successors()[start..start + count] - } - } - - /// Returns resolved effect operations. - #[inline] - pub fn effects(&self) -> &'a [EffectOp] { - self.query.resolve_effects(self.raw.effects()) - } - - /// Returns the matcher. - #[inline] - pub fn matcher(&self) -> &super::Matcher { - &self.raw.matcher - } - - /// Returns a view of the matcher with resolved slices. - #[inline] - pub fn matcher_view(&self) -> MatcherView<'a> { - MatcherView { - query: self.query, - raw: &self.raw.matcher, - } - } - - /// Returns the navigation instruction. - #[inline] - pub fn nav(&self) -> super::Nav { - self.raw.nav - } - - /// Returns the ref transition marker. - #[inline] - pub fn ref_marker(&self) -> super::RefTransition { - self.raw.ref_marker - } -} - -/// A view of a matcher with resolved slices. -pub struct MatcherView<'a> { - query: &'a CompiledQuery, - raw: &'a super::Matcher, -} - -impl<'a> MatcherView<'a> { - /// Returns the raw matcher. - #[inline] - pub fn raw(&self) -> &'a super::Matcher { - self.raw - } - - /// Returns resolved negated fields. - #[inline] - pub fn negated_fields(&self) -> &'a [NodeFieldId] { - self.query.resolve_negated_fields(self.raw.negated_fields()) - } - - /// Returns the matcher kind. - #[inline] - pub fn kind(&self) -> super::MatcherKind { - self.raw.kind() - } -} - -/// Aligns an offset up to the given alignment. -#[inline] -pub const fn align_up(offset: u32, align: u32) -> u32 { - (offset + align - 1) & !(align - 1) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn buffer_alignment() { - let buf = CompiledQueryBuffer::allocate(128); - assert_eq!(buf.as_ptr() as usize % BUFFER_ALIGN, 0); - assert_eq!(buf.len(), 128); - } - - #[test] - fn buffer_empty() { - let buf = CompiledQueryBuffer::allocate(0); - assert!(buf.is_empty()); - assert_eq!(buf.as_slice(), &[] as &[u8]); - } - - #[test] - fn align_up_values() { - assert_eq!(align_up(0, 4), 0); - assert_eq!(align_up(1, 4), 4); - assert_eq!(align_up(4, 4), 4); - assert_eq!(align_up(5, 4), 8); - assert_eq!(align_up(63, 64), 64); - assert_eq!(align_up(64, 64), 64); - assert_eq!(align_up(65, 64), 128); - } -} diff --git a/crates/plotnik-lib/src/ir/effect.rs b/crates/plotnik-lib/src/ir/effect.rs deleted file mode 100644 index ff6d39bf..00000000 --- a/crates/plotnik-lib/src/ir/effect.rs +++ /dev/null @@ -1,49 +0,0 @@ -//! Effect operations for the query IR. -//! -//! Effects are recorded during transition execution and replayed -//! during materialization to construct the output value. - -use super::ids::{DataFieldId, VariantTagId}; - -/// Effect operation in the IR effect stream. -/// -/// Effects are executed sequentially after a successful match. -/// They manipulate a value stack to construct structured output. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(C, u16)] -pub enum EffectOp { - /// Store matched node as current value. - /// Only valid on transitions with Node/Anonymous/Wildcard matcher. - CaptureNode, - - /// Clear current value (set to None). - /// Used on skip paths for optional captures. - ClearCurrent, - - /// Push empty array onto stack. - StartArray, - - /// Move current value into top array. - PushElement, - - /// Pop array from stack into current. - EndArray, - - /// Push empty object onto stack. - StartObject, - - /// Pop object from stack into current. - EndObject, - - /// Move current value into top object at field. - Field(DataFieldId), - - /// Push variant container with tag onto stack. - StartVariant(VariantTagId), - - /// Pop variant, wrap current, set as current. - EndVariant, - - /// Replace current Node with its source text. - ToString, -} diff --git a/crates/plotnik-lib/src/ir/effect_tests.rs b/crates/plotnik-lib/src/ir/effect_tests.rs deleted file mode 100644 index 9a1a355a..00000000 --- a/crates/plotnik-lib/src/ir/effect_tests.rs +++ /dev/null @@ -1,22 +0,0 @@ -use super::*; - -#[test] -fn effect_op_size_and_align() { - assert_eq!(size_of::(), 4); - assert_eq!(align_of::(), 2); -} - -#[test] -fn effect_op_variants() { - // Ensure all variants exist and are constructible - let _ = EffectOp::CaptureNode; - let _ = EffectOp::StartArray; - let _ = EffectOp::PushElement; - let _ = EffectOp::EndArray; - let _ = EffectOp::StartObject; - let _ = EffectOp::EndObject; - let _ = EffectOp::Field(0); - let _ = EffectOp::StartVariant(0); - let _ = EffectOp::EndVariant; - let _ = EffectOp::ToString; -} diff --git a/crates/plotnik-lib/src/ir/emit.rs b/crates/plotnik-lib/src/ir/emit.rs deleted file mode 100644 index 9a69e610..00000000 --- a/crates/plotnik-lib/src/ir/emit.rs +++ /dev/null @@ -1,969 +0,0 @@ -//! Query emitter: transforms BuildGraph + TypeInferenceResult into CompiledQuery. -//! -//! Three-pass construction: -//! 1. Analysis: count elements, intern strings, collect data -//! 2. Layout: compute aligned offsets, allocate once -//! 3. Emission: write via ptr::write - -use std::collections::HashMap; -use std::ptr; - -use plotnik_core::{NodeFieldId, NodeTypeId}; - -use super::compiled::{CompiledQuery, CompiledQueryBuffer, align_up}; -use super::ids::{RefId, StringId, TYPE_NODE, TransitionId}; -use super::strings::StringInterner; -use super::{ - EffectOp, Entrypoint, MAX_INLINE_SUCCESSORS, Matcher, RefTransition, Slice, StringRef, - Transition, TypeDef, TypeMember, -}; - -use crate::query::graph::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, RefMarker}; -use crate::query::infer::TypeInferenceResult; - -/// Callback for resolving node kind names to IDs. -pub trait NodeKindResolver { - /// Resolves a named node kind to its ID. Returns `None` if unknown. - fn resolve_kind(&self, name: &str) -> Option; - - /// Resolves a field name to its ID. Returns `None` if unknown. - fn resolve_field(&self, name: &str) -> Option; -} - -/// A resolver that always fails (for testing without tree-sitter). -pub struct NullResolver; - -impl NodeKindResolver for NullResolver { - fn resolve_kind(&self, _name: &str) -> Option { - None - } - fn resolve_field(&self, _name: &str) -> Option { - None - } -} - -/// Map-based resolver for testing. -pub struct MapResolver { - kinds: HashMap, - fields: HashMap, -} - -impl MapResolver { - pub fn new() -> Self { - Self { - kinds: HashMap::new(), - fields: HashMap::new(), - } - } - - pub fn add_kind(&mut self, name: impl Into, id: NodeTypeId) { - self.kinds.insert(name.into(), id); - } - - pub fn add_field(&mut self, name: impl Into, id: NodeFieldId) { - self.fields.insert(name.into(), id); - } -} - -impl Default for MapResolver { - fn default() -> Self { - Self::new() - } -} - -impl NodeKindResolver for MapResolver { - fn resolve_kind(&self, name: &str) -> Option { - self.kinds.get(name).copied() - } - - fn resolve_field(&self, name: &str) -> Option { - self.fields.get(name).copied() - } -} - -/// Query emitter error. -#[derive(Debug, Clone)] -pub enum EmitError { - /// Unknown node kind encountered. - UnknownNodeKind(String), - /// Unknown field name encountered. - UnknownField(String), - /// Too many transitions (exceeds u32::MAX). - TooManyTransitions, - /// Too many successors (exceeds u32::MAX). - TooManySuccessors, - /// Too many effects (exceeds u32::MAX). - TooManyEffects, - /// Internal consistency error. - InternalError(String), -} - -impl std::fmt::Display for EmitError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - EmitError::UnknownNodeKind(s) => write!(f, "unknown node kind: {}", s), - EmitError::UnknownField(s) => write!(f, "unknown field: {}", s), - EmitError::TooManyTransitions => write!(f, "too many transitions"), - EmitError::TooManySuccessors => write!(f, "too many successors"), - EmitError::TooManyEffects => write!(f, "too many effects"), - EmitError::InternalError(s) => write!(f, "internal error: {}", s), - } - } -} - -impl std::error::Error for EmitError {} - -/// Result type for emit operations. -pub type EmitResult = Result; - -/// Emitter state during analysis phase. -struct EmitContext<'src, 'g> { - graph: &'g BuildGraph<'src>, - type_info: &'g TypeInferenceResult<'src>, - strings: StringInterner<'src>, - - // Collected data - effects: Vec, - negated_fields: Vec, - /// Spilled successors (for transitions with >8 successors) - spilled_successors: Vec, - - // Maps from BuildGraph to IR - /// For each transition, its effects slice - transition_effects: Vec>, - /// For each transition, its negated fields slice - transition_negated_fields: Vec>, - /// For each transition, if successors spill: (start_index in spilled_successors, count) - transition_spilled: Vec>, -} - -impl<'src, 'g> EmitContext<'src, 'g> { - fn new(graph: &'g BuildGraph<'src>, type_info: &'g TypeInferenceResult<'src>) -> Self { - let node_count = graph.len(); - Self { - graph, - type_info, - strings: StringInterner::new(), - effects: Vec::new(), - negated_fields: Vec::new(), - spilled_successors: Vec::new(), - transition_effects: Vec::with_capacity(node_count), - transition_negated_fields: Vec::with_capacity(node_count), - transition_spilled: Vec::with_capacity(node_count), - } - } - - fn intern(&mut self, s: &'src str) -> StringId { - self.strings.intern(s) - } -} - -/// Layout information computed in pass 2. -struct LayoutInfo { - buffer_len: usize, - successors_offset: u32, - effects_offset: u32, - negated_fields_offset: u32, - string_refs_offset: u32, - string_bytes_offset: u32, - type_defs_offset: u32, - type_members_offset: u32, - entrypoints_offset: u32, - trivia_kinds_offset: u32, - - // Counts - transition_count: u32, - successor_count: u32, - effect_count: u32, - negated_field_count: u16, - string_ref_count: u16, - type_def_count: u16, - type_member_count: u16, - entrypoint_count: u16, - trivia_kind_count: u16, -} - -/// Emits a compiled query from a BuildGraph. -pub struct QueryEmitter<'src, 'g, R> { - ctx: EmitContext<'src, 'g>, - resolver: R, - trivia_kinds: Vec, -} - -impl<'src, 'g, R: NodeKindResolver> QueryEmitter<'src, 'g, R> { - /// Creates a new emitter. - pub fn new( - graph: &'g BuildGraph<'src>, - type_info: &'g TypeInferenceResult<'src>, - resolver: R, - ) -> Self { - Self { - ctx: EmitContext::new(graph, type_info), - resolver, - trivia_kinds: Vec::new(), - } - } - - /// Sets trivia node kinds (e.g., comments) to skip during execution. - pub fn with_trivia_kinds(mut self, kinds: Vec) -> Self { - self.trivia_kinds = kinds; - self - } - - /// Emits the compiled query. - pub fn emit(mut self) -> EmitResult { - // Pass 1: Analysis - self.analyze()?; - - // Pass 2: Layout - let layout = self.compute_layout()?; - - // Pass 3: Emission - self.emit_buffer(layout) - } - - fn analyze(&mut self) -> EmitResult<()> { - // Pre-intern definition names for entrypoints - for (name, _) in self.ctx.graph.definitions() { - self.ctx.intern(name); - } - - // Pre-intern type names - for type_def in &self.ctx.type_info.type_defs { - if let Some(name) = type_def.name { - self.ctx.intern(name); - } - for member in &type_def.members { - self.ctx.intern(member.name); - } - } - - // Analyze each transition - for (_, node) in self.ctx.graph.iter() { - self.analyze_node(node)?; - } - - Ok(()) - } - - fn analyze_node(&mut self, node: &BuildNode<'src>) -> EmitResult<()> { - // Collect effects - let effects_start = self.ctx.effects.len() as u32; - for effect in &node.effects { - let ir_effect = self.convert_effect(effect)?; - self.ctx.effects.push(ir_effect); - } - let effects_len = (self.ctx.effects.len() as u32 - effects_start) as u16; - self.ctx - .transition_effects - .push(Slice::new(effects_start, effects_len)); - - // Collect negated fields - let negated_start = self.ctx.negated_fields.len() as u32; - if let BuildMatcher::Node { negated_fields, .. } = &node.matcher { - for field_name in negated_fields { - let field_id = self - .resolver - .resolve_field(field_name) - .ok_or_else(|| EmitError::UnknownField((*field_name).to_string()))?; - self.ctx.negated_fields.push(field_id); - } - } - let negated_len = (self.ctx.negated_fields.len() as u32 - negated_start) as u16; - self.ctx - .transition_negated_fields - .push(Slice::new(negated_start, negated_len)); - - // Check if successors need to spill - if node.successors.len() > MAX_INLINE_SUCCESSORS { - let start = self.ctx.spilled_successors.len() as u32; - for &succ in &node.successors { - self.ctx.spilled_successors.push(succ); - } - self.ctx - .transition_spilled - .push(Some((start, node.successors.len() as u32))); - } else { - self.ctx.transition_spilled.push(None); - } - - Ok(()) - } - - fn convert_effect(&mut self, effect: &BuildEffect<'src>) -> EmitResult { - Ok(match effect { - BuildEffect::CaptureNode => EffectOp::CaptureNode, - BuildEffect::ClearCurrent => EffectOp::ClearCurrent, - BuildEffect::StartArray { .. } => EffectOp::StartArray, - BuildEffect::PushElement => EffectOp::PushElement, - BuildEffect::EndArray => EffectOp::EndArray, - BuildEffect::StartObject { .. } => EffectOp::StartObject, - BuildEffect::EndObject => EffectOp::EndObject, - BuildEffect::Field { name, .. } => { - let id = self.ctx.intern(name); - EffectOp::Field(id) - } - BuildEffect::StartVariant(tag) => { - let id = self.ctx.intern(tag); - EffectOp::StartVariant(id) - } - BuildEffect::EndVariant => EffectOp::EndVariant, - BuildEffect::ToString => EffectOp::ToString, - }) - } - - fn compute_layout(&self) -> EmitResult { - let transition_count = self.ctx.graph.len() as u32; - let successor_count = self.ctx.spilled_successors.len() as u32; - let effect_count = self.ctx.effects.len() as u32; - let negated_field_count = self.ctx.negated_fields.len() as u16; - let string_ref_count = self.ctx.strings.len() as u16; - let type_def_count = self.ctx.type_info.type_defs.len() as u16; - let type_member_count: u16 = self - .ctx - .type_info - .type_defs - .iter() - .map(|td| td.members.len() as u16) - .sum(); - let entrypoint_count = self.ctx.graph.definitions().count() as u16; - let trivia_kind_count = self.trivia_kinds.len() as u16; - - // Compute offsets with proper alignment - let mut offset: u32 = 0; - - // Transitions at offset 0, 64-byte aligned - offset += transition_count * 64; - - // Successors: align 4 - let successors_offset = align_up(offset, 4); - offset = successors_offset + successor_count * 4; - - // Effects: align 4 (EffectOp is 4 bytes with repr(C, u16) but discriminant+payload) - let effects_offset = align_up(offset, 4); - offset = effects_offset + effect_count * 4; - - // Negated fields: align 2 - let negated_fields_offset = align_up(offset, 2); - offset = negated_fields_offset + (negated_field_count as u32) * 2; - - // String refs: align 4 - let string_refs_offset = align_up(offset, 4); - offset = string_refs_offset + (string_ref_count as u32) * 8; - - // String bytes: align 1 - let string_bytes_offset = offset; - offset += self.ctx.strings.total_bytes() as u32; - - // Type defs: align 4 - let type_defs_offset = align_up(offset, 4); - offset = type_defs_offset + (type_def_count as u32) * 12; - - // Type members: align 2 - let type_members_offset = align_up(offset, 2); - offset = type_members_offset + (type_member_count as u32) * 4; - - // Entrypoints: align 4 - let entrypoints_offset = align_up(offset, 4); - offset = entrypoints_offset + (entrypoint_count as u32) * 12; - - // Trivia kinds: align 2 - let trivia_kinds_offset = if trivia_kind_count > 0 { - let aligned = align_up(offset, 2); - offset = aligned + (trivia_kind_count as u32) * 2; - aligned - } else { - 0 - }; - - // Final buffer size, aligned to 64 for potential mmap - let buffer_len = align_up(offset, 64) as usize; - - Ok(LayoutInfo { - buffer_len, - successors_offset, - effects_offset, - negated_fields_offset, - string_refs_offset, - string_bytes_offset, - type_defs_offset, - type_members_offset, - entrypoints_offset, - trivia_kinds_offset, - transition_count, - successor_count, - effect_count, - negated_field_count, - string_ref_count, - type_def_count, - type_member_count, - entrypoint_count, - trivia_kind_count, - }) - } - - fn emit_buffer(self, layout: LayoutInfo) -> EmitResult { - let mut buffer = CompiledQueryBuffer::allocate(layout.buffer_len); - let base = buffer.as_mut_ptr(); - - // Emit transitions - self.emit_transitions(base, &layout)?; - - // Emit successors - self.emit_successors(base, &layout); - - // Emit effects - self.emit_effects(base, &layout); - - // Emit negated fields - self.emit_negated_fields(base, &layout); - - // Emit strings - self.emit_strings(base, &layout); - - // Emit type metadata - self.emit_types(base, &layout); - - // Emit entrypoints - self.emit_entrypoints(base, &layout)?; - - // Emit trivia kinds - self.emit_trivia_kinds(base, &layout); - - Ok(CompiledQuery::new( - buffer, - layout.successors_offset, - layout.effects_offset, - layout.negated_fields_offset, - layout.string_refs_offset, - layout.string_bytes_offset, - layout.type_defs_offset, - layout.type_members_offset, - layout.entrypoints_offset, - layout.trivia_kinds_offset, - layout.transition_count, - layout.successor_count, - layout.effect_count, - layout.negated_field_count, - layout.string_ref_count, - layout.type_def_count, - layout.type_member_count, - layout.entrypoint_count, - layout.trivia_kind_count, - )) - } - - fn emit_transitions(&self, base: *mut u8, _layout: &LayoutInfo) -> EmitResult<()> { - let transitions_ptr = base as *mut Transition; - - for (idx, (_, node)) in self.ctx.graph.iter().enumerate() { - let transition = self.build_transition(node, idx)?; - // SAFETY: buffer is properly sized and aligned - unsafe { - ptr::write(transitions_ptr.add(idx), transition); - } - } - - Ok(()) - } - - fn build_transition(&self, node: &BuildNode<'src>, idx: usize) -> EmitResult { - let matcher = self.convert_matcher(&node.matcher)?; - let ref_marker = self.convert_ref_marker(&node.ref_marker); - let effects = self.ctx.transition_effects[idx]; - let negated_fields_slice = self.ctx.transition_negated_fields[idx]; - - // Build successor data - let (successor_count, successor_data) = - if let Some((start, count)) = self.ctx.transition_spilled[idx] { - // Spilled: store index in successor_data[0] - let mut data = [0u32; MAX_INLINE_SUCCESSORS]; - data[0] = start; - (count, data) - } else { - // Inline - let mut data = [0u32; MAX_INLINE_SUCCESSORS]; - for (i, &succ) in node.successors.iter().enumerate() { - data[i] = succ; - } - (node.successors.len() as u32, data) - }; - - // Inject negated_fields into matcher if applicable - let matcher = match matcher { - Matcher::Node { kind, field, .. } => Matcher::Node { - kind, - field, - negated_fields: negated_fields_slice, - }, - Matcher::Anonymous { kind, field, .. } => Matcher::Anonymous { - kind, - field, - negated_fields: Slice::empty(), - }, - other => other, - }; - - let transition = Transition::new( - matcher, - ref_marker, - node.nav, - effects, - successor_count, - successor_data, - ); - - Ok(transition) - } - - fn convert_matcher(&self, matcher: &BuildMatcher<'src>) -> EmitResult { - Ok(match matcher { - BuildMatcher::Epsilon => Matcher::Epsilon, - BuildMatcher::Node { kind, field, .. } => { - let kind_id = self - .resolver - .resolve_kind(kind) - .ok_or_else(|| EmitError::UnknownNodeKind((*kind).to_string()))?; - let field_id = match field { - Some(f) => self.resolver.resolve_field(f), - None => None, - }; - Matcher::Node { - kind: kind_id, - field: field_id, - negated_fields: Slice::empty(), // Will be filled in build_transition - } - } - BuildMatcher::Anonymous { literal, field } => { - // For anonymous nodes, we use the literal as a synthetic kind ID - // In practice, this would be resolved differently - let kind_id = self.resolver.resolve_kind(literal).unwrap_or(0); - let field_id = match field { - Some(f) => self.resolver.resolve_field(f), - None => None, - }; - Matcher::Anonymous { - kind: kind_id, - field: field_id, - negated_fields: Slice::empty(), - } - } - BuildMatcher::Wildcard { field } => { - // Wildcard doesn't use field in IR representation - let _ = field; - Matcher::Wildcard - } - }) - } - - fn convert_ref_marker(&self, marker: &RefMarker) -> RefTransition { - match marker { - RefMarker::None => RefTransition::None, - RefMarker::Enter { ref_id } => RefTransition::Enter(*ref_id as RefId), - RefMarker::Exit { ref_id } => RefTransition::Exit(*ref_id as RefId), - } - } - - fn emit_successors(&self, base: *mut u8, layout: &LayoutInfo) { - if self.ctx.spilled_successors.is_empty() { - return; - } - - let ptr = unsafe { base.add(layout.successors_offset as usize) } as *mut TransitionId; - for (i, &succ) in self.ctx.spilled_successors.iter().enumerate() { - unsafe { - ptr::write(ptr.add(i), succ); - } - } - } - - fn emit_effects(&self, base: *mut u8, layout: &LayoutInfo) { - if self.ctx.effects.is_empty() { - return; - } - - let ptr = unsafe { base.add(layout.effects_offset as usize) } as *mut EffectOp; - for (i, effect) in self.ctx.effects.iter().enumerate() { - unsafe { - ptr::write(ptr.add(i), *effect); - } - } - } - - fn emit_negated_fields(&self, base: *mut u8, layout: &LayoutInfo) { - if self.ctx.negated_fields.is_empty() { - return; - } - - let ptr = unsafe { base.add(layout.negated_fields_offset as usize) } as *mut NodeFieldId; - for (i, &field) in self.ctx.negated_fields.iter().enumerate() { - unsafe { - ptr::write(ptr.add(i), field); - } - } - } - - fn emit_strings(&self, base: *mut u8, layout: &LayoutInfo) { - // Emit string refs - let refs_ptr = unsafe { base.add(layout.string_refs_offset as usize) } as *mut StringRef; - let bytes_ptr = unsafe { base.add(layout.string_bytes_offset as usize) }; - - let mut byte_offset: u32 = 0; - for (i, (_, s)) in self.ctx.strings.iter().enumerate() { - // Write StringRef - let string_ref = StringRef::new(byte_offset, s.len() as u16); - unsafe { - ptr::write(refs_ptr.add(i), string_ref); - } - - // Write string bytes - unsafe { - ptr::copy_nonoverlapping(s.as_ptr(), bytes_ptr.add(byte_offset as usize), s.len()); - } - - byte_offset += s.len() as u32; - } - } - - fn emit_types(&self, base: *mut u8, layout: &LayoutInfo) { - let defs_ptr = unsafe { base.add(layout.type_defs_offset as usize) } as *mut TypeDef; - let members_ptr = - unsafe { base.add(layout.type_members_offset as usize) } as *mut TypeMember; - - let mut member_idx: u32 = 0; - - for (i, type_def) in self.ctx.type_info.type_defs.iter().enumerate() { - let name_id = type_def - .name - .and_then(|n| self.ctx.strings.get(n)) - .unwrap_or(super::ids::STRING_NONE); - - let ir_def = if let Some(inner) = type_def.inner_type { - TypeDef::wrapper(type_def.kind, inner) - } else { - let members_start = member_idx; - let members_len = type_def.members.len() as u16; - - // Emit members - for member in &type_def.members { - let member_name_id = self - .ctx - .strings - .get(member.name) - .expect("member name should be interned"); - let ir_member = TypeMember::new(member_name_id, member.ty); - unsafe { - ptr::write(members_ptr.add(member_idx as usize), ir_member); - } - member_idx += 1; - } - - TypeDef::composite( - type_def.kind, - name_id, - Slice::new(members_start, members_len), - ) - }; - - unsafe { - ptr::write(defs_ptr.add(i), ir_def); - } - } - } - - fn emit_entrypoints(&self, base: *mut u8, layout: &LayoutInfo) -> EmitResult<()> { - let ptr = unsafe { base.add(layout.entrypoints_offset as usize) } as *mut Entrypoint; - - for (i, (name, entry_node)) in self.ctx.graph.definitions().enumerate() { - let name_id = self - .ctx - .strings - .get(name) - .expect("definition name should be interned"); - - // Look up the result type for this definition - let result_type = self - .ctx - .type_info - .entrypoint_types - .get(name) - .copied() - .unwrap_or(TYPE_NODE); - - let entrypoint = Entrypoint::new(name_id, entry_node, result_type); - unsafe { - ptr::write(ptr.add(i), entrypoint); - } - } - - Ok(()) - } - - fn emit_trivia_kinds(&self, base: *mut u8, layout: &LayoutInfo) { - if self.trivia_kinds.is_empty() { - return; - } - - let ptr = unsafe { base.add(layout.trivia_kinds_offset as usize) } as *mut NodeTypeId; - for (i, &kind) in self.trivia_kinds.iter().enumerate() { - unsafe { - ptr::write(ptr.add(i), kind); - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::query::graph::{BuildEffect, BuildGraph, BuildMatcher, BuildNode}; - use crate::query::infer::TypeInferenceResult; - use std::num::NonZeroU16; - - fn make_resolver() -> MapResolver { - let mut r = MapResolver::new(); - r.add_kind("identifier", 1); - r.add_kind("function_declaration", 2); - r.add_field("name", NonZeroU16::new(1).unwrap()); - r.add_field("body", NonZeroU16::new(2).unwrap()); - r - } - - #[test] - fn emit_simple_query() { - let mut graph = BuildGraph::new(); - - // Create a simple: (identifier) @id - let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); - graph.node_mut(node).add_effect(BuildEffect::CaptureNode); - graph.add_definition("Main", node); - - let type_info = TypeInferenceResult::default(); - let resolver = make_resolver(); - - let emitter = QueryEmitter::new(&graph, &type_info, resolver); - let compiled = emitter.emit().expect("emit should succeed"); - - assert_eq!(compiled.transition_count(), 1); - assert_eq!(compiled.entrypoint_count(), 1); - - let t = compiled.transition(0); - assert!(matches!(t.matcher, Matcher::Node { kind: 1, .. })); - } - - #[test] - fn emit_with_effects() { - let mut graph = BuildGraph::new(); - - let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); - graph.node_mut(node).add_effect(BuildEffect::CaptureNode); - graph.node_mut(node).add_effect(BuildEffect::Field { - name: "name", - span: Default::default(), - }); - graph.add_definition("Main", node); - - let type_info = TypeInferenceResult::default(); - let resolver = make_resolver(); - - let emitter = QueryEmitter::new(&graph, &type_info, resolver); - let compiled = emitter.emit().expect("emit should succeed"); - - let view = compiled.transition_view(0); - let effects = view.effects(); - assert_eq!(effects.len(), 2); - assert!(matches!(effects[0], EffectOp::CaptureNode)); - assert!(matches!(effects[1], EffectOp::Field(_))); - - // Verify string was interned - if let EffectOp::Field(id) = effects[1] { - assert_eq!(compiled.string(id), "name"); - } - } - - #[test] - fn emit_with_successors() { - let mut graph = BuildGraph::new(); - - // Create: entry -> branch -> [a, b] - let entry = graph.add_epsilon(); - let a = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); - let b = graph.add_node(BuildNode::with_matcher(BuildMatcher::node( - "function_declaration", - ))); - graph.connect(entry, a); - graph.connect(entry, b); - graph.add_definition("Main", entry); - - let type_info = TypeInferenceResult::default(); - let resolver = make_resolver(); - - let emitter = QueryEmitter::new(&graph, &type_info, resolver); - let compiled = emitter.emit().expect("emit should succeed"); - - assert_eq!(compiled.transition_count(), 3); - - let view = compiled.transition_view(0); - let successors = view.successors(); - assert_eq!(successors.len(), 2); - assert_eq!(successors[0], 1); - assert_eq!(successors[1], 2); - } - - #[test] - fn emit_many_successors_spills() { - let mut graph = BuildGraph::new(); - - // Create entry with 10 successors (exceeds MAX_INLINE_SUCCESSORS) - let entry = graph.add_epsilon(); - for _ in 0..10 { - let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); - graph.connect(entry, node); - } - graph.add_definition("Main", entry); - - let type_info = TypeInferenceResult::default(); - let resolver = make_resolver(); - - let emitter = QueryEmitter::new(&graph, &type_info, resolver); - let compiled = emitter.emit().expect("emit should succeed"); - - let t = compiled.transition(0); - assert!(!t.has_inline_successors()); - assert_eq!(t.successor_count, 10); - - let view = compiled.transition_view(0); - let successors = view.successors(); - assert_eq!(successors.len(), 10); - } - - #[test] - fn string_interning_deduplicates() { - let mut graph = BuildGraph::new(); - - // Two fields with same name - let n1 = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); - graph.node_mut(n1).add_effect(BuildEffect::Field { - name: "value", - span: Default::default(), - }); - - let n2 = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); - graph.node_mut(n2).add_effect(BuildEffect::Field { - name: "value", - span: Default::default(), - }); - graph.connect(n1, n2); - - graph.add_definition("Main", n1); - - let type_info = TypeInferenceResult::default(); - let resolver = make_resolver(); - - let emitter = QueryEmitter::new(&graph, &type_info, resolver); - let compiled = emitter.emit().expect("emit should succeed"); - - // Both should reference the same string ID - let e1 = compiled.transition_view(0).effects(); - let e2 = compiled.transition_view(1).effects(); - - let id1 = match e1[0] { - EffectOp::Field(id) => id, - _ => panic!(), - }; - let id2 = match e2[0] { - EffectOp::Field(id) => id, - _ => panic!(), - }; - - assert_eq!(id1, id2); - assert_eq!(compiled.string(id1), "value"); - } - - #[test] - fn unknown_node_kind_errors() { - let mut graph = BuildGraph::new(); - let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("unknown_kind"))); - graph.add_definition("Main", node); - - let type_info = TypeInferenceResult::default(); - let resolver = make_resolver(); - - let emitter = QueryEmitter::new(&graph, &type_info, resolver); - let result = emitter.emit(); - - assert!(matches!(result, Err(EmitError::UnknownNodeKind(_)))); - } - - #[test] - fn serialize_deserialize_roundtrip() { - let mut graph = BuildGraph::new(); - - // Build a small graph with effects - let n1 = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); - graph.node_mut(n1).add_effect(BuildEffect::CaptureNode); - graph.node_mut(n1).add_effect(BuildEffect::Field { - name: "id", - span: Default::default(), - }); - - let n2 = graph.add_node(BuildNode::with_matcher(BuildMatcher::node( - "function_declaration", - ))); - graph.node_mut(n2).add_effect(BuildEffect::CaptureNode); - graph.connect(n1, n2); - - graph.add_definition("Main", n1); - - let type_info = TypeInferenceResult::default(); - let resolver = make_resolver(); - - // Emit - let emitter = QueryEmitter::new(&graph, &type_info, resolver); - let compiled = emitter.emit().expect("emit should succeed"); - - // Serialize - let bytes = crate::ir::to_bytes(&compiled).expect("serialize should succeed"); - - // Deserialize - let restored = crate::ir::from_bytes(&bytes).expect("deserialize should succeed"); - - // Verify counts - assert_eq!(restored.transition_count(), compiled.transition_count()); - assert_eq!(restored.entrypoint_count(), compiled.entrypoint_count()); - - // Check transitions match - for i in 0..compiled.transition_count() { - let orig = compiled.transition_view(i); - let rest = restored.transition_view(i); - - assert_eq!(orig.successors(), rest.successors()); - assert_eq!(orig.effects().len(), rest.effects().len()); - } - - // Check strings match - let ep = restored.entrypoints()[0]; - assert_eq!(restored.string(ep.name_id()), "Main"); - } - - #[test] - fn dump_produces_output() { - let mut graph = BuildGraph::new(); - let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); - graph.node_mut(node).add_effect(BuildEffect::CaptureNode); - graph.add_definition("Test", node); - - let type_info = TypeInferenceResult::default(); - let resolver = make_resolver(); - - let emitter = QueryEmitter::new(&graph, &type_info, resolver); - let compiled = emitter.emit().expect("emit should succeed"); - - let dump = compiled.dump(); - - assert!(dump.contains("CompiledQuery")); - assert!(dump.contains("Test")); - assert!(dump.contains("Capture")); - assert!(dump.contains("Node(1)")); - } -} diff --git a/crates/plotnik-lib/src/ir/entrypoint.rs b/crates/plotnik-lib/src/ir/entrypoint.rs deleted file mode 100644 index e5ef5936..00000000 --- a/crates/plotnik-lib/src/ir/entrypoint.rs +++ /dev/null @@ -1,56 +0,0 @@ -//! Named entrypoints for multi-definition queries. -//! -//! Entrypoints provide named exports for definitions. The default entrypoint -//! is always Transition 0; this table enables accessing other definitions by name. - -use super::ids::{StringId, TransitionId, TypeId}; - -/// Named entrypoint into the query graph. -/// -/// Layout: 12 bytes, align 4. -#[repr(C)] -#[derive(Debug, Clone, Copy)] -pub struct Entrypoint { - /// String ID for the entrypoint name. - name_id: StringId, - _pad: u16, - /// Target transition (definition entry point). - target: TransitionId, - /// Result type of this definition (see ADR-0007). - result_type: TypeId, - _pad2: u16, -} - -const _: () = assert!(size_of::() == 12); -const _: () = assert!(align_of::() == 4); - -impl Entrypoint { - /// Creates a new entrypoint. - pub const fn new(name_id: StringId, target: TransitionId, result_type: TypeId) -> Self { - Self { - name_id, - _pad: 0, - target, - result_type, - _pad2: 0, - } - } - - /// Returns the string ID of the entrypoint name. - #[inline] - pub const fn name_id(&self) -> StringId { - self.name_id - } - - /// Returns the target transition ID. - #[inline] - pub const fn target(&self) -> TransitionId { - self.target - } - - /// Returns the result type ID. - #[inline] - pub const fn result_type(&self) -> TypeId { - self.result_type - } -} diff --git a/crates/plotnik-lib/src/ir/ids.rs b/crates/plotnik-lib/src/ir/ids.rs deleted file mode 100644 index cfcc0d2f..00000000 --- a/crates/plotnik-lib/src/ir/ids.rs +++ /dev/null @@ -1,31 +0,0 @@ -//! ID types for the compiled query IR. -//! -//! These are lightweight wrappers/aliases for indices and identifiers -//! used throughout the IR. They provide type safety without runtime cost. - -/// Index into the transitions segment. -pub type TransitionId = u32; - -/// Index into the string_refs segment. -pub type StringId = u16; - -/// Sentinel value for unnamed types (wrapper types have no explicit name). -pub const STRING_NONE: StringId = 0xFFFF; - -/// Field name in effects (alias for type safety). -pub type DataFieldId = StringId; - -/// Variant tag in effects (alias for type safety). -pub type VariantTagId = StringId; - -/// Index for definition references (Enter/Exit). -pub type RefId = u16; - -/// Index into type_defs segment (with reserved primitives 0-2). -pub type TypeId = u16; - -// TypeId reserved constants -pub const TYPE_VOID: TypeId = 0; -pub const TYPE_NODE: TypeId = 1; -pub const TYPE_STR: TypeId = 2; -pub const TYPE_INVALID: TypeId = 0xFFFF; diff --git a/crates/plotnik-lib/src/ir/matcher.rs b/crates/plotnik-lib/src/ir/matcher.rs deleted file mode 100644 index be171f14..00000000 --- a/crates/plotnik-lib/src/ir/matcher.rs +++ /dev/null @@ -1,91 +0,0 @@ -//! Node matchers for transition graph. -//! -//! Matchers are purely for node matching - navigation is handled by `Nav`. - -use plotnik_core::{NodeFieldId, NodeTypeId}; - -use super::Slice; - -/// Discriminant for matcher variants. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum MatcherKind { - Epsilon, - Node, - Anonymous, - Wildcard, -} - -/// Matcher determines what node satisfies a transition. -/// -/// Navigation (descend/ascend) is handled by `Nav`, not matchers. -#[repr(C, u32)] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Matcher { - /// Matches without consuming input. Used for control flow transitions. - Epsilon, - - /// Matches a named node by kind, optionally constrained by field. - Node { - kind: NodeTypeId, - field: Option, - negated_fields: Slice, - }, - - /// Matches an anonymous node by kind, optionally constrained by field. - Anonymous { - kind: NodeTypeId, - field: Option, - negated_fields: Slice, - }, - - /// Matches any node (named or anonymous). - Wildcard, -} - -impl Matcher { - /// Returns true if this matcher consumes a node. - #[inline] - pub fn consumes_node(&self) -> bool { - !matches!(self, Matcher::Epsilon) - } - - /// Returns the discriminant kind. - #[inline] - pub fn kind(&self) -> MatcherKind { - match self { - Matcher::Epsilon => MatcherKind::Epsilon, - Matcher::Node { .. } => MatcherKind::Node, - Matcher::Anonymous { .. } => MatcherKind::Anonymous, - Matcher::Wildcard => MatcherKind::Wildcard, - } - } - - /// Returns the node type ID for Node/Anonymous variants, `None` otherwise. - #[inline] - pub fn node_kind(&self) -> Option { - match self { - Matcher::Node { kind, .. } | Matcher::Anonymous { kind, .. } => Some(*kind), - _ => None, - } - } - - /// Returns the field constraint, if any. - #[inline] - pub fn field(&self) -> Option { - match self { - Matcher::Node { field, .. } | Matcher::Anonymous { field, .. } => *field, - _ => None, - } - } - - /// Returns the negated fields slice. Empty for Epsilon/Wildcard. - #[inline] - pub fn negated_fields(&self) -> Slice { - match self { - Matcher::Node { negated_fields, .. } | Matcher::Anonymous { negated_fields, .. } => { - *negated_fields - } - _ => Slice::empty(), - } - } -} diff --git a/crates/plotnik-lib/src/ir/matcher_tests.rs b/crates/plotnik-lib/src/ir/matcher_tests.rs deleted file mode 100644 index 74410678..00000000 --- a/crates/plotnik-lib/src/ir/matcher_tests.rs +++ /dev/null @@ -1,27 +0,0 @@ -use super::*; - -#[test] -fn matcher_size_and_alignment() { - assert_eq!(size_of::(), 16); - assert_eq!(align_of::(), 4); -} - -#[test] -fn consumes_node() { - assert!(!Matcher::Epsilon.consumes_node()); - assert!(Matcher::Wildcard.consumes_node()); - - let node_matcher = Matcher::Node { - kind: 42, - field: None, - negated_fields: Slice::empty(), - }; - assert!(node_matcher.consumes_node()); - - let anon_matcher = Matcher::Anonymous { - kind: 1, - field: None, - negated_fields: Slice::empty(), - }; - assert!(anon_matcher.consumes_node()); -} diff --git a/crates/plotnik-lib/src/ir/mod.rs b/crates/plotnik-lib/src/ir/mod.rs deleted file mode 100644 index 6a1e421d..00000000 --- a/crates/plotnik-lib/src/ir/mod.rs +++ /dev/null @@ -1,86 +0,0 @@ -//! Intermediate Representation (IR) for compiled queries. -//! -//! This module contains the in-memory representation of compiled queries -//! as defined in ADR-0004 through ADR-0008. The IR is designed for: -//! - Cache-efficient execution (64-byte aligned transitions) -//! - Zero-copy access patterns -//! - WASM compatibility -//! -//! Note: This module contains only type definitions. Query execution -//! lives elsewhere. - -mod compiled; -mod effect; -mod emit; -mod entrypoint; -mod ids; -mod matcher; -mod nav; -mod ref_transition; -mod serialize; -mod slice; -mod string_ref; -mod strings; -mod transition; -mod type_metadata; - -#[cfg(test)] -mod effect_tests; -#[cfg(test)] -mod matcher_tests; -#[cfg(test)] -mod ref_transition_tests; -#[cfg(test)] -mod slice_tests; -#[cfg(test)] -mod string_ref_tests; - -// Re-export ID types -pub use ids::{DataFieldId, RefId, STRING_NONE, StringId, TransitionId, TypeId, VariantTagId}; - -// Re-export TypeId constants -pub use ids::{TYPE_INVALID, TYPE_NODE, TYPE_STR, TYPE_VOID}; - -// Re-export Slice -pub use slice::Slice; - -// Re-export navigation -pub use nav::{Nav, NavKind}; - -// Re-export matcher -pub use matcher::{Matcher, MatcherKind}; - -// Re-export effects -pub use effect::EffectOp; - -// Re-export ref transition -pub use ref_transition::RefTransition; - -// Re-export transition -pub use transition::{MAX_INLINE_SUCCESSORS, Transition}; - -// Re-export type metadata -pub use type_metadata::{TYPE_COMPOSITE_START, TypeDef, TypeKind, TypeMember}; - -// Re-export string ref -pub use string_ref::StringRef; - -// Re-export entrypoint -pub use entrypoint::Entrypoint; - -// Re-export compiled query types -pub use compiled::{ - BUFFER_ALIGN, CompiledQuery, CompiledQueryBuffer, CompiledQueryOffsets, FORMAT_VERSION, MAGIC, - MatcherView, TransitionView, align_up, -}; - -// Re-export string interner -pub use strings::StringInterner; - -// Re-export emitter -pub use emit::{EmitError, EmitResult, MapResolver, NodeKindResolver, NullResolver, QueryEmitter}; - -// Re-export serialization -pub use serialize::{ - HEADER_SIZE, SerializeError, SerializeResult, deserialize, from_bytes, serialize, to_bytes, -}; diff --git a/crates/plotnik-lib/src/ir/nav.rs b/crates/plotnik-lib/src/ir/nav.rs deleted file mode 100644 index 76d74ebf..00000000 --- a/crates/plotnik-lib/src/ir/nav.rs +++ /dev/null @@ -1,180 +0,0 @@ -//! Tree navigation instructions for query execution. -//! -//! Navigation decisions are resolved at graph construction time, not runtime. -//! Each transition carries its own `Nav` instruction. - -/// Navigation instruction determining cursor movement and skip policy. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(C)] -pub struct Nav { - pub kind: NavKind, - /// Ascent level count for `Up*` variants, ignored otherwise. - pub level: u8, -} - -impl Nav { - pub const fn stay() -> Self { - Self { - kind: NavKind::Stay, - level: 0, - } - } - - pub const fn next() -> Self { - Self { - kind: NavKind::Next, - level: 0, - } - } - - pub const fn next_skip_trivia() -> Self { - Self { - kind: NavKind::NextSkipTrivia, - level: 0, - } - } - - pub const fn next_exact() -> Self { - Self { - kind: NavKind::NextExact, - level: 0, - } - } - - pub const fn down() -> Self { - Self { - kind: NavKind::Down, - level: 0, - } - } - - pub const fn down_skip_trivia() -> Self { - Self { - kind: NavKind::DownSkipTrivia, - level: 0, - } - } - - pub const fn down_exact() -> Self { - Self { - kind: NavKind::DownExact, - level: 0, - } - } - - pub const fn up(level: u8) -> Self { - Self { - kind: NavKind::Up, - level, - } - } - - /// Constrained ascent requires `level == 1`. Multi-level ascent with - /// intermediate constraints must decompose into separate transitions. - pub const fn up_skip_trivia(level: u8) -> Self { - assert!( - level == 1, - "UpSkipTrivia requires level == 1; decompose for intermediate constraints" - ); - Self { - kind: NavKind::UpSkipTrivia, - level, - } - } - - /// Constrained ascent requires `level == 1`. Multi-level ascent with - /// intermediate constraints must decompose into separate transitions. - pub const fn up_exact(level: u8) -> Self { - assert!( - level == 1, - "UpExact requires level == 1; decompose for intermediate constraints" - ); - Self { - kind: NavKind::UpExact, - level, - } - } - - /// Returns true if this is a Stay navigation (no movement). - #[inline] - pub const fn is_stay(&self) -> bool { - matches!(self.kind, NavKind::Stay) - } - - /// Returns true if this is a horizontal sibling traversal (Next*). - #[inline] - pub const fn is_next(&self) -> bool { - matches!( - self.kind, - NavKind::Next | NavKind::NextSkipTrivia | NavKind::NextExact - ) - } - - /// Returns true if this descends into children (Down*). - #[inline] - pub const fn is_down(&self) -> bool { - matches!( - self.kind, - NavKind::Down | NavKind::DownSkipTrivia | NavKind::DownExact - ) - } - - /// Returns true if this ascends to parent(s) (Up*). - #[inline] - pub const fn is_up(&self) -> bool { - matches!( - self.kind, - NavKind::Up | NavKind::UpSkipTrivia | NavKind::UpExact - ) - } - - /// Returns true if this navigation skips only trivia nodes. - #[inline] - pub const fn is_skip_trivia(&self) -> bool { - matches!( - self.kind, - NavKind::NextSkipTrivia | NavKind::DownSkipTrivia | NavKind::UpSkipTrivia - ) - } - - /// Returns true if this navigation requires exact position (no skipping). - #[inline] - pub const fn is_exact(&self) -> bool { - matches!( - self.kind, - NavKind::NextExact | NavKind::DownExact | NavKind::UpExact - ) - } -} - -/// Navigation kind determining movement direction and skip policy. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(u8)] -pub enum NavKind { - /// No movement. Used only for first transition when cursor is at root. - Stay = 0, - - // Sibling traversal (horizontal) - /// Skip any nodes to find match. - Next = 1, - /// Skip trivia only, fail if non-trivia skipped. - NextSkipTrivia = 2, - /// No skipping, current sibling must match. - NextExact = 3, - - // Enter children (descend) - /// Skip any among children. - Down = 4, - /// Skip trivia only among children. - DownSkipTrivia = 5, - /// First child must match, no skip. - DownExact = 6, - - // Exit children (ascend) - /// Ascend `level` levels, no constraint. - Up = 7, - /// Validate last non-trivia, ascend `level` levels. - UpSkipTrivia = 8, - /// Validate last child, ascend `level` levels. - UpExact = 9, -} diff --git a/crates/plotnik-lib/src/ir/ref_transition.rs b/crates/plotnik-lib/src/ir/ref_transition.rs deleted file mode 100644 index 1316f144..00000000 --- a/crates/plotnik-lib/src/ir/ref_transition.rs +++ /dev/null @@ -1,48 +0,0 @@ -//! Definition call/return markers for recursive transition network. -//! -//! See ADR-0005 for semantics of Enter/Exit transitions. - -use super::RefId; - -/// Marks a transition as entering or exiting a definition reference. -/// -/// A transition can hold at most one `RefTransition`. Sequences like -/// `Enter(A) → Enter(B)` require epsilon chains. -/// -/// Layout: 1-byte discriminant + 1-byte padding + 2-byte RefId = 4 bytes, align 2. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -#[repr(C, u8)] -pub enum RefTransition { - /// No definition boundary crossing. - #[default] - None, - - /// Push call frame with return transitions. - /// - /// For `Enter(ref_id)` transitions, successors have special structure: - /// - `successors()[0]`: definition entry point (where to jump) - /// - `successors()[1..]`: return transitions (stored in call frame) - Enter(RefId), - - /// Pop frame, continue with stored return transitions. - /// - /// Successors are ignored—returns come from the call frame pushed at `Enter`. - Exit(RefId), -} - -impl RefTransition { - /// Returns `true` if this is `None`. - #[inline] - pub fn is_none(self) -> bool { - matches!(self, Self::None) - } - - /// Returns the ref ID if this is `Enter` or `Exit`. - #[inline] - pub fn ref_id(self) -> Option { - match self { - Self::None => None, - Self::Enter(id) | Self::Exit(id) => Some(id), - } - } -} diff --git a/crates/plotnik-lib/src/ir/ref_transition_tests.rs b/crates/plotnik-lib/src/ir/ref_transition_tests.rs deleted file mode 100644 index cbb3a438..00000000 --- a/crates/plotnik-lib/src/ir/ref_transition_tests.rs +++ /dev/null @@ -1,26 +0,0 @@ -use super::*; - -#[test] -fn size_and_alignment() { - assert_eq!(size_of::(), 4); - assert_eq!(align_of::(), 2); -} - -#[test] -fn none_is_default() { - assert_eq!(RefTransition::default(), RefTransition::None); -} - -#[test] -fn is_none() { - assert!(RefTransition::None.is_none()); - assert!(!RefTransition::Enter(1).is_none()); - assert!(!RefTransition::Exit(1).is_none()); -} - -#[test] -fn ref_id_extraction() { - assert_eq!(RefTransition::None.ref_id(), None); - assert_eq!(RefTransition::Enter(42).ref_id(), Some(42)); - assert_eq!(RefTransition::Exit(123).ref_id(), Some(123)); -} diff --git a/crates/plotnik-lib/src/ir/serialize.rs b/crates/plotnik-lib/src/ir/serialize.rs deleted file mode 100644 index a5b49f87..00000000 --- a/crates/plotnik-lib/src/ir/serialize.rs +++ /dev/null @@ -1,414 +0,0 @@ -//! Serialization and deserialization for compiled queries. -//! -//! Binary format (see ADR-0004): -//! ```text -//! Header (64 bytes): -//! magic: [u8; 4] b"PTKQ" -//! version: u32 format version -//! checksum: u32 CRC32(header[12..64] || buffer_data) -//! buffer_len: u32 -//! successors_offset: u32 -//! effects_offset: u32 -//! negated_fields_offset: u32 -//! string_refs_offset: u32 -//! string_bytes_offset: u32 -//! type_defs_offset: u32 -//! type_members_offset: u32 -//! entrypoints_offset: u32 -//! trivia_kinds_offset: u32 -//! _reserved: [u8; 12] -//! ``` - -use std::io::{Read, Write}; - -use super::compiled::{CompiledQuery, CompiledQueryBuffer, FORMAT_VERSION, MAGIC}; - -/// Header size in bytes (64 bytes for cache-line alignment). -pub const HEADER_SIZE: usize = 64; - -/// Serialization error. -#[derive(Debug, Clone)] -pub enum SerializeError { - /// Invalid magic bytes. - InvalidMagic([u8; 4]), - /// Version mismatch (expected, found). - VersionMismatch { expected: u32, found: u32 }, - /// Checksum mismatch (expected, found). - ChecksumMismatch { expected: u32, found: u32 }, - /// IO error message. - Io(String), - /// Header too short. - HeaderTooShort, - /// Buffer alignment error. - AlignmentError, -} - -impl std::fmt::Display for SerializeError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - SerializeError::InvalidMagic(m) => { - write!(f, "invalid magic: {:?}", m) - } - SerializeError::VersionMismatch { expected, found } => { - write!( - f, - "version mismatch: expected {}, found {}", - expected, found - ) - } - SerializeError::ChecksumMismatch { expected, found } => { - write!( - f, - "checksum mismatch: expected {:08x}, found {:08x}", - expected, found - ) - } - SerializeError::Io(msg) => write!(f, "io error: {}", msg), - SerializeError::HeaderTooShort => write!(f, "header too short"), - SerializeError::AlignmentError => write!(f, "buffer alignment error"), - } - } -} - -impl std::error::Error for SerializeError {} - -impl From for SerializeError { - fn from(e: std::io::Error) -> Self { - SerializeError::Io(e.to_string()) - } -} - -/// Result type for serialization operations. -pub type SerializeResult = Result; - -/// Computes CRC32 checksum. -fn crc32(data: &[u8]) -> u32 { - // Simple CRC32 implementation (IEEE polynomial) - const CRC32_TABLE: [u32; 256] = generate_crc32_table(); - - let mut crc: u32 = 0xFFFFFFFF; - for &byte in data { - let index = ((crc ^ byte as u32) & 0xFF) as usize; - crc = CRC32_TABLE[index] ^ (crc >> 8); - } - !crc -} - -const fn generate_crc32_table() -> [u32; 256] { - const POLYNOMIAL: u32 = 0xEDB88320; - let mut table = [0u32; 256]; - let mut i = 0; - while i < 256 { - let mut crc = i as u32; - let mut j = 0; - while j < 8 { - if crc & 1 != 0 { - crc = (crc >> 1) ^ POLYNOMIAL; - } else { - crc >>= 1; - } - j += 1; - } - table[i] = crc; - i += 1; - } - table -} - -/// Serialized header structure (64 bytes, matching ADR-0004). -/// -/// Large counts (transition, successor, effect) are computed from offsets. -/// Small counts are stored in the reserved area since they can't be reliably -/// computed due to alignment padding. -#[repr(C)] -struct Header { - magic: [u8; 4], - version: u32, - checksum: u32, - buffer_len: u32, - successors_offset: u32, - effects_offset: u32, - negated_fields_offset: u32, - string_refs_offset: u32, - string_bytes_offset: u32, - type_defs_offset: u32, - type_members_offset: u32, - entrypoints_offset: u32, - trivia_kinds_offset: u32, - // Counts stored in reserved area (12 bytes = 6 x u16) - negated_field_count: u16, - string_ref_count: u16, - type_def_count: u16, - type_member_count: u16, - entrypoint_count: u16, - trivia_kind_count: u16, -} - -const _: () = assert!(std::mem::size_of::
() == HEADER_SIZE); - -impl Header { - fn to_bytes(&self) -> [u8; HEADER_SIZE] { - let mut bytes = [0u8; HEADER_SIZE]; - bytes[0..4].copy_from_slice(&self.magic); - bytes[4..8].copy_from_slice(&self.version.to_le_bytes()); - bytes[8..12].copy_from_slice(&self.checksum.to_le_bytes()); - bytes[12..16].copy_from_slice(&self.buffer_len.to_le_bytes()); - bytes[16..20].copy_from_slice(&self.successors_offset.to_le_bytes()); - bytes[20..24].copy_from_slice(&self.effects_offset.to_le_bytes()); - bytes[24..28].copy_from_slice(&self.negated_fields_offset.to_le_bytes()); - bytes[28..32].copy_from_slice(&self.string_refs_offset.to_le_bytes()); - bytes[32..36].copy_from_slice(&self.string_bytes_offset.to_le_bytes()); - bytes[36..40].copy_from_slice(&self.type_defs_offset.to_le_bytes()); - bytes[40..44].copy_from_slice(&self.type_members_offset.to_le_bytes()); - bytes[44..48].copy_from_slice(&self.entrypoints_offset.to_le_bytes()); - bytes[48..52].copy_from_slice(&self.trivia_kinds_offset.to_le_bytes()); - // Counts in reserved area - bytes[52..54].copy_from_slice(&self.negated_field_count.to_le_bytes()); - bytes[54..56].copy_from_slice(&self.string_ref_count.to_le_bytes()); - bytes[56..58].copy_from_slice(&self.type_def_count.to_le_bytes()); - bytes[58..60].copy_from_slice(&self.type_member_count.to_le_bytes()); - bytes[60..62].copy_from_slice(&self.entrypoint_count.to_le_bytes()); - bytes[62..64].copy_from_slice(&self.trivia_kind_count.to_le_bytes()); - bytes - } - - fn from_bytes(bytes: &[u8; HEADER_SIZE]) -> Self { - Self { - magic: bytes[0..4].try_into().unwrap(), - version: u32::from_le_bytes(bytes[4..8].try_into().unwrap()), - checksum: u32::from_le_bytes(bytes[8..12].try_into().unwrap()), - buffer_len: u32::from_le_bytes(bytes[12..16].try_into().unwrap()), - successors_offset: u32::from_le_bytes(bytes[16..20].try_into().unwrap()), - effects_offset: u32::from_le_bytes(bytes[20..24].try_into().unwrap()), - negated_fields_offset: u32::from_le_bytes(bytes[24..28].try_into().unwrap()), - string_refs_offset: u32::from_le_bytes(bytes[28..32].try_into().unwrap()), - string_bytes_offset: u32::from_le_bytes(bytes[32..36].try_into().unwrap()), - type_defs_offset: u32::from_le_bytes(bytes[36..40].try_into().unwrap()), - type_members_offset: u32::from_le_bytes(bytes[40..44].try_into().unwrap()), - entrypoints_offset: u32::from_le_bytes(bytes[44..48].try_into().unwrap()), - trivia_kinds_offset: u32::from_le_bytes(bytes[48..52].try_into().unwrap()), - negated_field_count: u16::from_le_bytes(bytes[52..54].try_into().unwrap()), - string_ref_count: u16::from_le_bytes(bytes[54..56].try_into().unwrap()), - type_def_count: u16::from_le_bytes(bytes[56..58].try_into().unwrap()), - type_member_count: u16::from_le_bytes(bytes[58..60].try_into().unwrap()), - entrypoint_count: u16::from_le_bytes(bytes[60..62].try_into().unwrap()), - trivia_kind_count: u16::from_le_bytes(bytes[62..64].try_into().unwrap()), - } - } -} - -/// Serializes a compiled query to a writer. -pub fn serialize(query: &CompiledQuery, mut writer: W) -> SerializeResult<()> { - let offsets = query.offsets(); - let buffer = query.buffer(); - - // Build header (without checksum first) - let mut header = Header { - magic: MAGIC, - version: FORMAT_VERSION, - checksum: 0, // Computed below - buffer_len: buffer.len() as u32, - successors_offset: offsets.successors_offset, - effects_offset: offsets.effects_offset, - negated_fields_offset: offsets.negated_fields_offset, - string_refs_offset: offsets.string_refs_offset, - string_bytes_offset: offsets.string_bytes_offset, - type_defs_offset: offsets.type_defs_offset, - type_members_offset: offsets.type_members_offset, - entrypoints_offset: offsets.entrypoints_offset, - trivia_kinds_offset: offsets.trivia_kinds_offset, - negated_field_count: query.negated_fields().len() as u16, - string_ref_count: query.string_refs().len() as u16, - type_def_count: query.type_defs().len() as u16, - type_member_count: query.type_members().len() as u16, - entrypoint_count: query.entrypoint_count(), - trivia_kind_count: query.trivia_kinds().len() as u16, - }; - - // Compute checksum over header[12..64] + buffer - let header_bytes = header.to_bytes(); - let mut checksum_data = Vec::with_capacity(52 + buffer.len()); - checksum_data.extend_from_slice(&header_bytes[12..]); - checksum_data.extend_from_slice(buffer.as_slice()); - header.checksum = crc32(&checksum_data); - - // Write header and buffer - writer.write_all(&header.to_bytes())?; - writer.write_all(buffer.as_slice())?; - - Ok(()) -} - -/// Serializes a compiled query to a byte vector. -pub fn to_bytes(query: &CompiledQuery) -> SerializeResult> { - let mut bytes = Vec::with_capacity(HEADER_SIZE + query.buffer().len()); - serialize(query, &mut bytes)?; - Ok(bytes) -} - -/// Deserializes a compiled query from a reader. -pub fn deserialize(mut reader: R) -> SerializeResult { - // Read header - let mut header_bytes = [0u8; HEADER_SIZE]; - reader.read_exact(&mut header_bytes)?; - - let header = Header::from_bytes(&header_bytes); - - // Verify magic - if header.magic != MAGIC { - return Err(SerializeError::InvalidMagic(header.magic)); - } - - // Verify version - if header.version != FORMAT_VERSION { - return Err(SerializeError::VersionMismatch { - expected: FORMAT_VERSION, - found: header.version, - }); - } - - // Read buffer - let buffer_len = header.buffer_len as usize; - let mut buffer = CompiledQueryBuffer::allocate(buffer_len); - reader.read_exact(buffer.as_mut_slice())?; - - // Verify checksum - let mut checksum_data = Vec::with_capacity(52 + buffer_len); - checksum_data.extend_from_slice(&header_bytes[12..]); - checksum_data.extend_from_slice(buffer.as_slice()); - let computed_checksum = crc32(&checksum_data); - - if header.checksum != computed_checksum { - return Err(SerializeError::ChecksumMismatch { - expected: header.checksum, - found: computed_checksum, - }); - } - - // Reconstruct all counts from offsets (transitions are 64 bytes each) - let transition_count = header.successors_offset / 64; - let successor_count = compute_count_from_offsets( - header.successors_offset, - header.effects_offset, - 4, // size of TransitionId - ); - let effect_count = compute_count_from_offsets( - header.effects_offset, - header.negated_fields_offset, - 4, // size of EffectOp - ); - - // Counts are read directly from header - let negated_field_count = header.negated_field_count; - let string_ref_count = header.string_ref_count; - let type_def_count = header.type_def_count; - let type_member_count = header.type_member_count; - let entrypoint_count = header.entrypoint_count; - let trivia_kind_count = header.trivia_kind_count; - - Ok(CompiledQuery::new( - buffer, - header.successors_offset, - header.effects_offset, - header.negated_fields_offset, - header.string_refs_offset, - header.string_bytes_offset, - header.type_defs_offset, - header.type_members_offset, - header.entrypoints_offset, - header.trivia_kinds_offset, - transition_count, - successor_count, - effect_count, - negated_field_count, - string_ref_count, - type_def_count, - type_member_count, - entrypoint_count, - trivia_kind_count, - )) -} - -/// Deserializes a compiled query from a byte slice. -pub fn from_bytes(bytes: &[u8]) -> SerializeResult { - deserialize(std::io::Cursor::new(bytes)) -} - -fn compute_count_from_offsets(start: u32, end: u32, element_size: u32) -> u32 { - if end <= start { - return 0; - } - (end - start) / element_size -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn crc32_known_value() { - // Test against known CRC32 value - let data = b"123456789"; - let crc = crc32(data); - assert_eq!(crc, 0xCBF43926); - } - - #[test] - fn header_roundtrip() { - let header = Header { - magic: MAGIC, - version: FORMAT_VERSION, - checksum: 0x12345678, - buffer_len: 1024, - successors_offset: 64, - effects_offset: 128, - negated_fields_offset: 256, - string_refs_offset: 300, - string_bytes_offset: 400, - type_defs_offset: 500, - type_members_offset: 600, - entrypoints_offset: 700, - trivia_kinds_offset: 800, - negated_field_count: 5, - string_ref_count: 8, - type_def_count: 3, - type_member_count: 12, - entrypoint_count: 2, - trivia_kind_count: 1, - }; - - let bytes = header.to_bytes(); - let parsed = Header::from_bytes(&bytes); - - assert_eq!(parsed.magic, header.magic); - assert_eq!(parsed.version, header.version); - assert_eq!(parsed.checksum, header.checksum); - assert_eq!(parsed.buffer_len, header.buffer_len); - assert_eq!(parsed.successors_offset, header.successors_offset); - assert_eq!(parsed.trivia_kinds_offset, header.trivia_kinds_offset); - assert_eq!(parsed.entrypoint_count, header.entrypoint_count); - assert_eq!(parsed.type_def_count, header.type_def_count); - } - - #[test] - fn invalid_magic_rejected() { - let mut data = vec![0u8; HEADER_SIZE + 64]; - data[0..4].copy_from_slice(b"NOTM"); - - let result = from_bytes(&data); - assert!(matches!(result, Err(SerializeError::InvalidMagic(_)))); - } - - #[test] - fn version_mismatch_rejected() { - let mut data = vec![0u8; HEADER_SIZE + 64]; - data[0..4].copy_from_slice(&MAGIC); - data[4..8].copy_from_slice(&999u32.to_le_bytes()); - - let result = from_bytes(&data); - assert!(matches!( - result, - Err(SerializeError::VersionMismatch { .. }) - )); - } -} diff --git a/crates/plotnik-lib/src/ir/slice.rs b/crates/plotnik-lib/src/ir/slice.rs deleted file mode 100644 index 13e8d717..00000000 --- a/crates/plotnik-lib/src/ir/slice.rs +++ /dev/null @@ -1,97 +0,0 @@ -//! Relative range within a segment. -//! -//! `start_index` is an **element index**, not a byte offset. This naming -//! distinguishes it from byte offsets like `StringRef.offset`. -//! -//! This struct is 8 bytes with 4-byte alignment for efficient access. -//! Type safety is provided through generic methods, not stored PhantomData. - -use std::marker::PhantomData; - -/// Relative range within a compiled query segment. -/// -/// Used for variable-length data (successors, effects, negated fields, type members). -/// The slice references elements by index into the corresponding segment array. -/// -/// Layout: 8 bytes (4 + 2 + 2), align 4. -#[repr(C)] -#[derive(Clone, Copy)] -pub struct Slice { - /// Element index into the segment array (NOT byte offset). - start_index: u32, - /// Number of elements. 65k elements per slice is sufficient. - len: u16, - _pad: u16, - _phantom: PhantomData T>, -} - -// Compile-time size/alignment verification -const _: () = assert!(size_of::>() == 8); -const _: () = assert!(align_of::>() == 4); - -impl Slice { - /// Creates a new slice. - #[inline] - pub const fn new(start_index: u32, len: u16) -> Self { - Self { - start_index, - len, - _pad: 0, - _phantom: PhantomData, - } - } - - /// Creates an empty slice. - #[inline] - pub const fn empty() -> Self { - Self::new(0, 0) - } - - /// Returns the start index (element index, not byte offset). - #[inline] - pub fn start_index(&self) -> u32 { - self.start_index - } - - /// Returns the number of elements. - #[inline] - pub fn len(&self) -> u16 { - self.len - } - - /// Returns true if the slice is empty. - #[inline] - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - /// Creates a slice encoding an inner type ID (for wrapper TypeDef). - /// The `start_index` stores the TypeId as u32, `len` is 0. - #[inline] - pub const fn from_inner_type(type_id: u16) -> Self { - Self::new(type_id as u32, 0) - } -} - -impl Default for Slice { - fn default() -> Self { - Self::empty() - } -} - -impl PartialEq for Slice { - fn eq(&self, other: &Self) -> bool { - self.start_index == other.start_index && self.len == other.len - } -} - -impl Eq for Slice {} - -impl std::fmt::Debug for Slice { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Slice") - .field("start_index", &self.start_index) - .field("len", &self.len) - .finish() - } -} diff --git a/crates/plotnik-lib/src/ir/slice_tests.rs b/crates/plotnik-lib/src/ir/slice_tests.rs deleted file mode 100644 index c1e85f77..00000000 --- a/crates/plotnik-lib/src/ir/slice_tests.rs +++ /dev/null @@ -1,49 +0,0 @@ -use super::*; - -#[test] -fn empty_slice() { - let slice: Slice = Slice::empty(); - - assert!(slice.is_empty()); - assert_eq!(slice.start_index(), 0); - assert_eq!(slice.len(), 0); -} - -#[test] -fn new_slice() { - let slice: Slice = Slice::new(42, 10); - - assert!(!slice.is_empty()); - assert_eq!(slice.start_index(), 42); - assert_eq!(slice.len(), 10); -} - -#[test] -fn default_is_empty() { - let slice: Slice = Slice::default(); - assert!(slice.is_empty()); -} - -#[test] -fn from_inner_type() { - let slice: Slice<()> = Slice::from_inner_type(0x1234); - - assert_eq!(slice.start_index(), 0x1234); - assert_eq!(slice.len(), 0); -} - -#[test] -fn equality() { - let a: Slice = Slice::new(10, 5); - let b: Slice = Slice::new(10, 5); - let c: Slice = Slice::new(10, 6); - - assert_eq!(a, b); - assert_ne!(a, c); -} - -#[test] -fn size_is_8_bytes() { - assert_eq!(std::mem::size_of::>(), 8); - assert_eq!(std::mem::align_of::>(), 4); -} diff --git a/crates/plotnik-lib/src/ir/string_ref.rs b/crates/plotnik-lib/src/ir/string_ref.rs deleted file mode 100644 index 143d94a8..00000000 --- a/crates/plotnik-lib/src/ir/string_ref.rs +++ /dev/null @@ -1,31 +0,0 @@ -//! String pool entry reference. -//! -//! Strings are stored in a single contiguous byte pool. `StringRef` points -//! into that pool via byte offset (not element index). - -/// Reference to a string in the string pool. -/// -/// Layout: 8 bytes, align 4. -#[repr(C)] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct StringRef { - /// Byte offset into string_bytes segment. - pub offset: u32, - /// Length of the string in bytes. - pub len: u16, - _pad: u16, -} - -impl StringRef { - pub const fn new(offset: u32, len: u16) -> Self { - Self { - offset, - len, - _pad: 0, - } - } -} - -// Compile-time size verification -const _: () = assert!(size_of::() == 8); -const _: () = assert!(align_of::() == 4); diff --git a/crates/plotnik-lib/src/ir/string_ref_tests.rs b/crates/plotnik-lib/src/ir/string_ref_tests.rs deleted file mode 100644 index 74e7d12f..00000000 --- a/crates/plotnik-lib/src/ir/string_ref_tests.rs +++ /dev/null @@ -1,14 +0,0 @@ -use super::*; - -#[test] -fn string_ref_new() { - let r = StringRef::new(42, 10); - assert_eq!(r.offset, 42); - assert_eq!(r.len, 10); -} - -#[test] -fn string_ref_layout() { - assert_eq!(size_of::(), 8); - assert_eq!(align_of::(), 4); -} diff --git a/crates/plotnik-lib/src/ir/strings.rs b/crates/plotnik-lib/src/ir/strings.rs deleted file mode 100644 index ae09801a..00000000 --- a/crates/plotnik-lib/src/ir/strings.rs +++ /dev/null @@ -1,140 +0,0 @@ -//! String interning for compiled queries. -//! -//! Identical strings share storage and ID. Used for field names, variant tags, -//! entrypoint names, and type names. - -use std::collections::HashMap; - -use super::ids::StringId; - -/// String interner for query compilation. -/// -/// Interns strings during the analysis phase, then emits them as a contiguous -/// byte pool with `StringRef` entries pointing into it. -#[derive(Debug, Default)] -pub struct StringInterner<'src> { - /// Map from string content to assigned ID. - map: HashMap<&'src str, StringId>, - /// Strings in ID order for emission. - strings: Vec<&'src str>, -} - -impl<'src> StringInterner<'src> { - /// Creates a new empty interner. - pub fn new() -> Self { - Self::default() - } - - /// Interns a string, returning its ID. - /// - /// If the string was previously interned, returns the existing ID. - pub fn intern(&mut self, s: &'src str) -> StringId { - if let Some(&id) = self.map.get(s) { - return id; - } - - let id = self.strings.len() as StringId; - assert!(id < 0xFFFF, "string pool overflow (>65534 strings)"); - - self.map.insert(s, id); - self.strings.push(s); - id - } - - /// Returns the ID of a previously interned string, or `None`. - pub fn get(&self, s: &str) -> Option { - self.map.get(s).copied() - } - - /// Returns the string for a given ID. - /// - /// # Panics - /// Panics if the ID is out of range. - pub fn resolve(&self, id: StringId) -> &'src str { - self.strings[id as usize] - } - - /// Returns the number of interned strings. - pub fn len(&self) -> usize { - self.strings.len() - } - - /// Returns true if no strings have been interned. - pub fn is_empty(&self) -> bool { - self.strings.is_empty() - } - - /// Returns an iterator over (id, string) pairs in ID order. - pub fn iter(&self) -> impl Iterator + '_ { - self.strings - .iter() - .enumerate() - .map(|(i, s)| (i as StringId, *s)) - } - - /// Returns the total byte size needed for all strings. - pub fn total_bytes(&self) -> usize { - self.strings.iter().map(|s| s.len()).sum() - } - - /// Consumes the interner and returns strings in ID order. - pub fn into_strings(self) -> Vec<&'src str> { - self.strings - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn intern_deduplicates() { - let mut interner = StringInterner::new(); - - let id1 = interner.intern("foo"); - let id2 = interner.intern("bar"); - let id3 = interner.intern("foo"); - - assert_eq!(id1, 0); - assert_eq!(id2, 1); - assert_eq!(id3, 0); // same as id1 - assert_eq!(interner.len(), 2); - } - - #[test] - fn resolve_works() { - let mut interner = StringInterner::new(); - interner.intern("hello"); - interner.intern("world"); - - assert_eq!(interner.resolve(0), "hello"); - assert_eq!(interner.resolve(1), "world"); - } - - #[test] - fn get_returns_none_for_unknown() { - let interner = StringInterner::new(); - assert_eq!(interner.get("unknown"), None); - } - - #[test] - fn total_bytes() { - let mut interner = StringInterner::new(); - interner.intern("foo"); // 3 bytes - interner.intern("hello"); // 5 bytes - interner.intern("foo"); // deduplicated - - assert_eq!(interner.total_bytes(), 8); - } - - #[test] - fn iter_order() { - let mut interner = StringInterner::new(); - interner.intern("a"); - interner.intern("b"); - interner.intern("c"); - - let pairs: Vec<_> = interner.iter().collect(); - assert_eq!(pairs, vec![(0, "a"), (1, "b"), (2, "c")]); - } -} diff --git a/crates/plotnik-lib/src/ir/transition.rs b/crates/plotnik-lib/src/ir/transition.rs deleted file mode 100644 index 0d47c500..00000000 --- a/crates/plotnik-lib/src/ir/transition.rs +++ /dev/null @@ -1,125 +0,0 @@ -//! Transition struct - the fundamental unit of the query IR. -//! -//! Each transition is 64 bytes and cache-line aligned to ensure no transition -//! straddles cache lines. Transitions carry all semantics: matching, effects, -//! and successors. States are implicit junction points. - -use super::{EffectOp, Matcher, Nav, RefTransition, Slice, TransitionId}; - -/// Maximum number of inline successors before spilling to external segment. -pub const MAX_INLINE_SUCCESSORS: usize = 8; - -/// A single transition in the query graph. -/// -/// Transitions use SSO (small-size optimization) for successors: -/// - 0-8 successors: stored inline in `successor_data` -/// - 9+ successors: `successor_data[0]` is index into successors segment -/// -/// Layout (64 bytes total, 64-byte aligned): -/// ```text -/// offset 0: matcher (16 bytes) -/// offset 16: ref_marker (4 bytes) -/// offset 20: nav (2 bytes) -/// offset 22: effects_len (2 bytes) -/// offset 24: successor_count (4 bytes) -/// offset 28: effects_start (4 bytes) -/// offset 32: successor_data (32 bytes) -/// ``` -#[repr(C, align(64))] -#[derive(Clone, Copy)] -pub struct Transition { - // --- 32 bytes metadata --- - /// What this transition matches (node kind, wildcard, epsilon). - pub matcher: Matcher, // 16 bytes, offset 0 - - /// Reference call/return marker for recursive definitions. - pub ref_marker: RefTransition, // 4 bytes, offset 16 - - /// Navigation instruction (descend/ascend/sibling traversal). - pub nav: Nav, // 2 bytes, offset 20 - - /// Number of effect operations (inlined from Slice for alignment). - effects_len: u16, // 2 bytes, offset 22 - - /// Number of successor transitions. - pub successor_count: u32, // 4 bytes, offset 24 - - /// Start index into effects segment (inlined from Slice for alignment). - effects_start: u32, // 4 bytes, offset 28 - - // --- 32 bytes control flow --- - /// Successor storage (inline or spilled index). - /// - /// - If `successor_count <= 8`: contains `TransitionId` values directly - /// - If `successor_count > 8`: `successor_data[0]` is index into successors segment - pub successor_data: [u32; MAX_INLINE_SUCCESSORS], // 32 bytes, offset 32 -} - -impl Transition { - /// Creates a new transition with all fields. - #[inline] - pub fn new( - matcher: Matcher, - ref_marker: RefTransition, - nav: Nav, - effects: Slice, - successor_count: u32, - successor_data: [u32; MAX_INLINE_SUCCESSORS], - ) -> Self { - Self { - matcher, - ref_marker, - nav, - effects_len: effects.len(), - successor_count, - effects_start: effects.start_index(), - successor_data, - } - } - - /// Returns the effects slice. - #[inline] - pub fn effects(&self) -> Slice { - Slice::new(self.effects_start, self.effects_len) - } - - /// Sets the effects slice. - #[inline] - pub fn set_effects(&mut self, effects: Slice) { - self.effects_start = effects.start_index(); - self.effects_len = effects.len(); - } - - /// Returns `true` if successors are stored inline. - #[inline] - pub fn has_inline_successors(&self) -> bool { - self.successor_count as usize <= MAX_INLINE_SUCCESSORS - } - - /// Returns inline successors if they fit, `None` if spilled. - #[inline] - pub fn inline_successors(&self) -> Option<&[TransitionId]> { - if self.has_inline_successors() { - Some(&self.successor_data[..self.successor_count as usize]) - } else { - None - } - } - - /// Returns the spilled successor segment index and count. - /// Panics if successors are inline. - #[inline] - pub fn spilled_successors_index(&self) -> u32 { - debug_assert!( - !self.has_inline_successors(), - "successors are inline, not spilled" - ); - self.successor_data[0] - } -} - -// Compile-time size/alignment verification -const _: () = { - assert!(core::mem::size_of::() == 64); - assert!(core::mem::align_of::() == 64); -}; diff --git a/crates/plotnik-lib/src/ir/type_metadata.rs b/crates/plotnik-lib/src/ir/type_metadata.rs deleted file mode 100644 index a532ad75..00000000 --- a/crates/plotnik-lib/src/ir/type_metadata.rs +++ /dev/null @@ -1,122 +0,0 @@ -//! Type metadata for code generation and validation. -//! -//! Type metadata is descriptive, not prescriptive—it describes what -//! transitions produce, not how they execute. - -use super::Slice; -use super::ids::{STRING_NONE, StringId, TypeId}; - -/// First composite type ID (after primitives 0-2). -pub const TYPE_COMPOSITE_START: TypeId = 3; - -/// Type definition in the compiled query. -/// -/// The `members` field has dual semantics based on `kind`: -/// - Wrappers (Optional/ArrayStar/ArrayPlus): `members.start_index` is inner TypeId -/// - Composites (Record/Enum): `members` is slice into type_members segment -#[repr(C)] -#[derive(Debug, Clone, Copy)] -pub struct TypeDef { - pub kind: TypeKind, - _pad: u8, - /// Synthetic or explicit type name. `STRING_NONE` for unnamed wrappers. - pub name: StringId, - /// See struct-level docs for dual semantics. - pub members: Slice, -} - -// Size is 12 bytes: kind(1) + pad(1) + name(2) + members(8) = 12 -// Alignment is 4 due to Slice having align 4 -const _: () = assert!(size_of::() == 12); -const _: () = assert!(align_of::() == 4); - -impl TypeDef { - /// Create a wrapper type (Optional, ArrayStar, ArrayPlus). - pub fn wrapper(kind: TypeKind, inner: TypeId) -> Self { - debug_assert!(matches!( - kind, - TypeKind::Optional | TypeKind::ArrayStar | TypeKind::ArrayPlus - )); - Self { - kind, - _pad: 0, - name: STRING_NONE, - members: Slice::from_inner_type(inner), - } - } - - /// Create a composite type (Record, Enum). - pub fn composite(kind: TypeKind, name: StringId, members: Slice) -> Self { - debug_assert!(matches!(kind, TypeKind::Record | TypeKind::Enum)); - Self { - kind, - _pad: 0, - name, - members, - } - } - - /// For wrapper types, returns the inner type ID. - pub fn inner_type(&self) -> Option { - match self.kind { - TypeKind::Optional | TypeKind::ArrayStar | TypeKind::ArrayPlus => { - Some(self.members.start_index() as TypeId) - } - TypeKind::Record | TypeKind::Enum => None, - } - } - - /// For composite types, returns the members slice. - pub fn members_slice(&self) -> Option> { - match self.kind { - TypeKind::Record | TypeKind::Enum => Some(self.members), - TypeKind::Optional | TypeKind::ArrayStar | TypeKind::ArrayPlus => None, - } - } - - pub fn is_wrapper(&self) -> bool { - matches!( - self.kind, - TypeKind::Optional | TypeKind::ArrayStar | TypeKind::ArrayPlus - ) - } - - pub fn is_composite(&self) -> bool { - matches!(self.kind, TypeKind::Record | TypeKind::Enum) - } -} - -/// Discriminant for type definitions. -#[repr(u8)] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum TypeKind { - /// `T?` — nullable wrapper - Optional = 0, - /// `T*` — zero or more elements - ArrayStar = 1, - /// `T+` — one or more elements (non-empty) - ArrayPlus = 2, - /// Struct with named fields - Record = 3, - /// Tagged union (discriminated) - Enum = 4, -} - -/// Member of a Record (field) or Enum (variant). -#[repr(C)] -#[derive(Debug, Clone, Copy)] -pub struct TypeMember { - /// Field name or variant tag. - pub name: StringId, - /// Field type or variant payload. `TYPE_VOID` for unit variants. - pub ty: TypeId, -} - -const _: () = assert!(size_of::() == 4); -const _: () = assert!(align_of::() == 2); - -impl TypeMember { - pub fn new(name: StringId, ty: TypeId) -> Self { - Self { name, ty } - } -} diff --git a/crates/plotnik-lib/src/lib.rs b/crates/plotnik-lib/src/lib.rs index 2ce50b74..e3f57098 100644 --- a/crates/plotnik-lib/src/lib.rs +++ b/crates/plotnik-lib/src/lib.rs @@ -17,8 +17,6 @@ #![cfg_attr(coverage_nightly, feature(coverage_attribute))] pub mod diagnostics; -pub mod engine; -pub mod ir; pub mod parser; pub mod query; @@ -29,7 +27,7 @@ pub mod query; pub type PassResult = std::result::Result<(T, Diagnostics), Error>; pub use diagnostics::{Diagnostics, DiagnosticsPrinter, Severity}; -pub use query::{Query, UNNAMED_DEF}; +pub use query::{Query, QueryBuilder}; /// Errors that can occur during query parsing. #[derive(Debug, Clone, thiserror::Error)] diff --git a/crates/plotnik-lib/src/parser/tests/recovery/coverage_tests.rs b/crates/plotnik-lib/src/parser/tests/recovery/coverage_tests.rs index c6544e4a..a3a50a05 100644 --- a/crates/plotnik-lib/src/parser/tests/recovery/coverage_tests.rs +++ b/crates/plotnik-lib/src/parser/tests/recovery/coverage_tests.rs @@ -1,4 +1,5 @@ use crate::Query; +use crate::query::query::QueryBuilder; use indoc::indoc; #[test] @@ -12,7 +13,9 @@ fn deeply_nested_trees_hit_recursion_limit() { input.push(')'); } - let result = Query::new(&input).with_recursion_fuel(depth).exec(); + let result = QueryBuilder::new(&input) + .with_query_parse_recursion_limit(depth) + .parse(); assert!( matches!(result, Err(crate::Error::RecursionLimitExceeded)), @@ -32,7 +35,9 @@ fn deeply_nested_sequences_hit_recursion_limit() { input.push('}'); } - let result = Query::new(&input).with_recursion_fuel(depth).exec(); + let result = QueryBuilder::new(&input) + .with_query_parse_recursion_limit(depth) + .parse(); assert!( matches!(result, Err(crate::Error::RecursionLimitExceeded)), @@ -52,7 +57,9 @@ fn deeply_nested_alternations_hit_recursion_limit() { input.push(']'); } - let result = Query::new(&input).with_recursion_fuel(depth).exec(); + let result = QueryBuilder::new(&input) + .with_query_parse_recursion_limit(depth) + .parse(); assert!( matches!(result, Err(crate::Error::RecursionLimitExceeded)), @@ -69,7 +76,7 @@ fn many_trees_exhaust_exec_fuel() { input.push_str("(a) "); } - let result = Query::new(&input).with_exec_fuel(100).exec(); + let result = QueryBuilder::new(&input).with_query_parse_fuel(100).parse(); assert!( matches!(result, Err(crate::Error::ExecFuelExhausted)), @@ -91,7 +98,7 @@ fn many_branches_exhaust_exec_fuel() { } input.push(']'); - let result = Query::new(&input).with_exec_fuel(100).exec(); + let result = QueryBuilder::new(&input).with_query_parse_fuel(100).parse(); assert!( matches!(result, Err(crate::Error::ExecFuelExhausted)), @@ -113,7 +120,7 @@ fn many_fields_exhaust_exec_fuel() { } input.push(')'); - let result = Query::new(&input).with_exec_fuel(100).exec(); + let result = QueryBuilder::new(&input).with_query_parse_fuel(100).parse(); assert!( matches!(result, Err(crate::Error::ExecFuelExhausted)), diff --git a/crates/plotnik-lib/src/query/alt_kinds.rs b/crates/plotnik-lib/src/query/alt_kinds.rs index 91d6a604..423f43d1 100644 --- a/crates/plotnik-lib/src/query/alt_kinds.rs +++ b/crates/plotnik-lib/src/query/alt_kinds.rs @@ -5,21 +5,11 @@ use rowan::TextRange; -use super::Query; use super::invariants::ensure_both_branch_kinds; use super::visitor::{Visitor, walk, walk_alt_expr}; use crate::diagnostics::{DiagnosticKind, Diagnostics}; use crate::parser::{AltExpr, AltKind, Branch, Root}; -impl Query<'_> { - pub(super) fn validate_alt_kinds(&mut self) { - let mut visitor = AltKindsValidator { - diag: &mut self.alt_kind_diagnostics, - }; - visitor.visit(&self.ast); - } -} - pub fn validate_alt_kinds(ast: &Root, diag: &mut Diagnostics) { let mut visitor = AltKindsValidator { diag }; visitor.visit(ast); diff --git a/crates/plotnik-lib/src/query/dump.rs b/crates/plotnik-lib/src/query/dump.rs index 1b26a568..26580f57 100644 --- a/crates/plotnik-lib/src/query/dump.rs +++ b/crates/plotnik-lib/src/query/dump.rs @@ -30,11 +30,11 @@ mod test_helpers { } pub fn dump_diagnostics(&self) -> String { - self.diagnostics().render_filtered(self.source) + self.diagnostics().render_filtered(self.source()) } pub fn dump_diagnostics_raw(&self) -> String { - self.diagnostics_raw().render(self.source) + self.diagnostics().render(self.source()) } } } diff --git a/crates/plotnik-lib/src/query/expr_arity.rs b/crates/plotnik-lib/src/query/expr_arity.rs index ae7643a0..3d56ac71 100644 --- a/crates/plotnik-lib/src/query/expr_arity.rs +++ b/crates/plotnik-lib/src/query/expr_arity.rs @@ -9,7 +9,6 @@ use std::collections::HashMap; -use super::Query; use super::symbol_table::SymbolTable; use super::visitor::{Visitor, walk_expr, walk_field_expr}; use crate::diagnostics::{DiagnosticKind, Diagnostics}; @@ -24,20 +23,6 @@ pub enum ExprArity { pub type ExprArityTable = HashMap; -impl Query<'_> { - pub(super) fn infer_arities(&mut self) { - self.expr_arity_table = infer_arities( - &self.ast, - &self.symbol_table, - &mut self.expr_arity_diagnostics, - ); - } - - pub(super) fn get_arity(&self, node: &SyntaxNode) -> Option { - resolve_arity(node, &self.expr_arity_table) - } -} - pub fn infer_arities( root: &Root, symbol_table: &SymbolTable, diff --git a/crates/plotnik-lib/src/query/graph.rs b/crates/plotnik-lib/src/query/graph.rs deleted file mode 100644 index fe649053..00000000 --- a/crates/plotnik-lib/src/query/graph.rs +++ /dev/null @@ -1,723 +0,0 @@ -//! Core types for build-time query graphs. -//! -//! The graph uses index-based node references (`NodeId`) with nodes stored -//! in a `Vec`. Strings borrow from the source (`&'src str`) until IR emission. - -use crate::ir::Nav; -use indexmap::IndexMap; -use rowan::TextRange; - -/// Index into `BuildGraph::nodes`. -pub type NodeId = u32; - -/// A graph fragment with single entry and exit points. -/// -/// Every expression compiles to a fragment. Combinators connect fragments -/// by manipulating entry/exit edges. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct Fragment { - pub entry: NodeId, - pub exit: NodeId, -} - -impl Fragment { - pub fn new(entry: NodeId, exit: NodeId) -> Self { - Self { entry, exit } - } - - pub fn single(node: NodeId) -> Self { - Self { - entry: node, - exit: node, - } - } -} - -/// Array collection mode for loop combinators. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ArrayMode { - /// No array collection (simple repetition) - None, - /// Collect elements into array (StartArray/PushElement/EndArray) - Simple, - /// Collect with object scope per iteration (for QIS) - Qis, -} - -/// Build-time graph for query compilation. -/// -/// Nodes are stored in a flat vector, referenced by `NodeId`. -/// Definitions map names to their entry points. -#[derive(Debug)] -pub struct BuildGraph<'src> { - nodes: Vec>, - definitions: IndexMap<&'src str, NodeId>, -} - -impl<'src> BuildGraph<'src> { - pub fn new() -> Self { - Self { - nodes: Vec::new(), - definitions: IndexMap::new(), - } - } - - pub fn add_node(&mut self, node: BuildNode<'src>) -> NodeId { - let id = self.nodes.len() as NodeId; - self.nodes.push(node); - id - } - - pub fn add_epsilon(&mut self) -> NodeId { - self.add_node(BuildNode::epsilon()) - } - - /// Clone a node, creating a new node with the same matcher, effects, and ref_marker, - /// but with the specified nav and copying the successors list. - pub fn clone_node_with_nav(&mut self, node_id: NodeId, nav: Nav) -> NodeId { - let original = &self.nodes[node_id as usize]; - let cloned = BuildNode { - matcher: original.matcher.clone(), - effects: original.effects.clone(), - ref_marker: original.ref_marker.clone(), - successors: original.successors.clone(), - nav, - ref_name: original.ref_name, - }; - self.add_node(cloned) - } - - pub fn add_matcher(&mut self, matcher: BuildMatcher<'src>) -> NodeId { - self.add_node(BuildNode::with_matcher(matcher)) - } - - pub fn add_definition(&mut self, name: &'src str, entry: NodeId) { - self.definitions.insert(name, entry); - } - - pub fn definition(&self, name: &str) -> Option { - self.definitions.get(name).copied() - } - - pub fn definitions(&self) -> impl Iterator + '_ { - self.definitions.iter().map(|(k, v)| (*k, *v)) - } - - pub fn node(&self, id: NodeId) -> &BuildNode<'src> { - &self.nodes[id as usize] - } - - pub fn node_mut(&mut self, id: NodeId) -> &mut BuildNode<'src> { - &mut self.nodes[id as usize] - } - - pub fn len(&self) -> usize { - self.nodes.len() - } - - pub fn is_empty(&self) -> bool { - self.nodes.is_empty() - } - - pub fn iter(&self) -> impl Iterator)> { - self.nodes.iter().enumerate().map(|(i, n)| (i as NodeId, n)) - } - - pub fn connect(&mut self, from: NodeId, to: NodeId) { - self.nodes[from as usize].successors.push(to); - } - - pub fn connect_exit(&mut self, fragment: Fragment, to: NodeId) { - self.connect(fragment.exit, to); - } - - pub fn matcher_fragment(&mut self, matcher: BuildMatcher<'src>) -> Fragment { - Fragment::single(self.add_matcher(matcher)) - } - - pub fn epsilon_fragment(&mut self) -> Fragment { - Fragment::single(self.add_epsilon()) - } - - /// Connect fragments in sequence: f1 → f2 → ... → fn - pub fn sequence(&mut self, fragments: &[Fragment]) -> Fragment { - match fragments.len() { - 0 => self.epsilon_fragment(), - 1 => fragments[0], - _ => { - for window in fragments.windows(2) { - self.connect(window[0].exit, window[1].entry); - } - Fragment::new(fragments[0].entry, fragments[fragments.len() - 1].exit) - } - } - } - - /// Connect fragments in parallel (alternation): entry → [f1|f2|...|fn] → exit - pub fn alternation(&mut self, fragments: &[Fragment]) -> Fragment { - if fragments.is_empty() { - return self.epsilon_fragment(); - } - if fragments.len() == 1 { - return fragments[0]; - } - - let entry = self.add_epsilon(); - let exit = self.add_epsilon(); - - for f in fragments { - self.connect(entry, f.entry); - self.connect(f.exit, exit); - } - - Fragment::new(entry, exit) - } - - /// Generic loop combinator for * and + quantifiers. - /// - /// - `at_least_one`: true for + (one or more), false for * (zero or more) - /// - `greedy`: true for greedy (try match first), false for lazy (try exit first) - /// - `mode`: array collection mode - fn build_repetition( - &mut self, - inner: Fragment, - at_least_one: bool, - greedy: bool, - mode: ArrayMode, - initial_nav: Nav, - ) -> Fragment { - let has_array = mode != ArrayMode::None; - let has_qis = mode == ArrayMode::Qis; - - // Array wrapper nodes - let start = if has_array { - let s = self.add_epsilon(); - self.node_mut(s).add_effect(BuildEffect::StartArray { - is_plus: at_least_one, - }); - Some(s) - } else { - None - }; - - let end = if has_array { - let e = self.add_epsilon(); - self.node_mut(e).add_effect(BuildEffect::EndArray); - Some(e) - } else { - None - }; - - // QIS object wrapper nodes - let (obj_start, obj_end) = if has_qis { - let os = self.add_epsilon(); - self.node_mut(os).add_effect(BuildEffect::StartObject { - for_alternation: false, - }); - let oe = self.add_epsilon(); - self.node_mut(oe).add_effect(BuildEffect::EndObject); - (Some(os), Some(oe)) - } else { - (None, None) - }; - - // Push node for array modes - let push = if has_array { - let p = self.add_epsilon(); - self.node_mut(p).add_effect(BuildEffect::PushElement); - Some(p) - } else { - None - }; - - // Branch node (decision point for loop continuation) - let branch = self.add_epsilon(); - - // Exit node for non-array modes - let exit = if !has_array { - Some(self.add_epsilon()) - } else { - None - }; - - // Determine the effective inner entry/exit (with QIS wrapping if needed) - let (loop_body_entry, loop_body_exit) = if has_qis { - self.connect(obj_start.unwrap(), inner.entry); - self.connect(inner.exit, obj_end.unwrap()); - (obj_start.unwrap(), obj_end.unwrap()) - } else { - (inner.entry, inner.exit) - }; - - // Set initial navigation on inner.entry (the actual matcher). - // In QIS mode, this is distinct from loop_body_entry (the object wrapper). - self.node_mut(inner.entry).set_nav(initial_nav); - - // For re-entry (subsequent iterations), clone inner.entry with Next nav. - // This creates a separate path for re-entry that can skip non-matching siblings. - // In QIS mode, we clone inner.entry (not obj_start) to avoid duplicating the wrapper. - let try_next = self.clone_node_with_nav(inner.entry, Nav::next()); - - // QIS object wrapper for try_next re-entry path - let (try_next_entry, try_next_exit) = if has_qis { - let os = self.add_epsilon(); - self.node_mut(os).add_effect(BuildEffect::StartObject { - for_alternation: false, - }); - let oe = self.add_epsilon(); - self.node_mut(oe).add_effect(BuildEffect::EndObject); - self.connect(os, try_next); - self.connect(try_next, oe); - (os, oe) - } else { - (try_next, try_next) - }; - - // Wire up the graph based on at_least_one and greedy - if at_least_one { - // + pattern: must match at least once - // Entry → loop_body_entry → body → push → re_entry → (try_next → body or exit) - let entry_point = start.unwrap_or(loop_body_entry); - let exit_point = end.or(exit).unwrap(); - - // re_entry is a branch point (no nav) that chooses: try more or exit - let re_entry = self.add_epsilon(); - - if let Some(s) = start { - self.connect(s, loop_body_entry); - } - - if let Some(p) = push { - self.connect(loop_body_exit, p); - // try_next also needs to connect to push after matching - self.connect(try_next_exit, p); - self.connect(p, re_entry); - } else { - self.connect(loop_body_exit, re_entry); - self.connect(try_next_exit, re_entry); - } - - // re_entry branches: try_next (Next nav) or exit - // If try_next's Next fails, backtrack finds re_entry checkpoint and tries exit - if greedy { - self.connect(re_entry, try_next_entry); - self.connect(re_entry, exit_point); - } else { - self.connect(re_entry, exit_point); - self.connect(re_entry, try_next_entry); - } - - Fragment::new(entry_point, exit_point) - } else { - // * pattern: zero or more - // Entry → branch → (loop_body_entry → body → push → re_entry → try_next → body) or exit - let entry_point = start.unwrap_or(branch); - let exit_point = end.or(exit).unwrap(); - - // re_entry is a branch point (no nav) that chooses: try more or exit - let re_entry = self.add_epsilon(); - - if let Some(s) = start { - self.connect(s, branch); - } - - if greedy { - self.connect(branch, loop_body_entry); - self.connect(branch, exit_point); - } else { - self.connect(branch, exit_point); - self.connect(branch, loop_body_entry); - } - - if let Some(p) = push { - self.connect(loop_body_exit, p); - // try_next also needs to connect to push after matching - self.connect(try_next_exit, p); - self.connect(p, re_entry); - } else { - self.connect(loop_body_exit, re_entry); - self.connect(try_next_exit, re_entry); - } - - // re_entry branches: try_next (Next nav) or exit - // If try_next's Next fails, backtrack finds re_entry checkpoint and tries exit - if greedy { - self.connect(re_entry, try_next_entry); - self.connect(re_entry, exit_point); - } else { - self.connect(re_entry, exit_point); - self.connect(re_entry, try_next_entry); - } - - Fragment::new(entry_point, exit_point) - } - } - - /// Generic optional combinator for ? quantifier. - /// - /// - `greedy`: true for greedy (try match first), false for lazy (try skip first) - /// - `qis`: true to wrap the optional value in an object scope - fn build_optional(&mut self, inner: Fragment, greedy: bool, qis: bool) -> Fragment { - let branch = self.add_epsilon(); - let exit = self.add_epsilon(); - - if qis { - let obj_start = self.add_epsilon(); - self.node_mut(obj_start) - .add_effect(BuildEffect::StartObject { - for_alternation: false, - }); - - let obj_end = self.add_epsilon(); - self.node_mut(obj_end).add_effect(BuildEffect::EndObject); - - // Skip path needs ClearCurrent to indicate "nothing captured" - let skip = self.add_epsilon(); - self.node_mut(skip).add_effect(BuildEffect::ClearCurrent); - - self.connect(obj_start, inner.entry); - self.connect(inner.exit, obj_end); - self.connect(obj_end, exit); - self.connect(skip, exit); - - if greedy { - self.connect(branch, obj_start); - self.connect(branch, skip); - } else { - self.connect(branch, skip); - self.connect(branch, obj_start); - } - } else { - let skip = self.add_epsilon(); - self.node_mut(skip).add_effect(BuildEffect::ClearCurrent); - - self.connect(skip, exit); - self.connect(inner.exit, exit); - - if greedy { - self.connect(branch, inner.entry); - self.connect(branch, skip); - } else { - self.connect(branch, skip); - self.connect(branch, inner.entry); - } - } - - Fragment::new(branch, exit) - } - - /// Zero or more (greedy): inner* - pub fn zero_or_more(&mut self, inner: Fragment, nav: Nav) -> Fragment { - self.build_repetition(inner, false, true, ArrayMode::None, nav) - } - - /// Zero or more (non-greedy): inner*? - pub fn zero_or_more_lazy(&mut self, inner: Fragment, nav: Nav) -> Fragment { - self.build_repetition(inner, false, false, ArrayMode::None, nav) - } - - /// One or more (greedy): inner+ - pub fn one_or_more(&mut self, inner: Fragment, nav: Nav) -> Fragment { - self.build_repetition(inner, true, true, ArrayMode::None, nav) - } - - /// One or more (non-greedy): inner+? - pub fn one_or_more_lazy(&mut self, inner: Fragment, nav: Nav) -> Fragment { - self.build_repetition(inner, true, false, ArrayMode::None, nav) - } - - /// Optional (greedy): inner? - pub fn optional(&mut self, inner: Fragment) -> Fragment { - self.build_optional(inner, true, false) - } - - /// Optional (non-greedy): inner?? - pub fn optional_lazy(&mut self, inner: Fragment) -> Fragment { - self.build_optional(inner, false, false) - } - - /// Zero or more with array collection (greedy): inner* - pub fn zero_or_more_array(&mut self, inner: Fragment, nav: Nav) -> Fragment { - self.build_repetition(inner, false, true, ArrayMode::Simple, nav) - } - - /// Zero or more with array collection (non-greedy): inner*? - pub fn zero_or_more_array_lazy(&mut self, inner: Fragment, nav: Nav) -> Fragment { - self.build_repetition(inner, false, false, ArrayMode::Simple, nav) - } - - /// One or more with array collection (greedy): inner+ - pub fn one_or_more_array(&mut self, inner: Fragment, nav: Nav) -> Fragment { - self.build_repetition(inner, true, true, ArrayMode::Simple, nav) - } - - /// One or more with array collection (non-greedy): inner+? - pub fn one_or_more_array_lazy(&mut self, inner: Fragment, nav: Nav) -> Fragment { - self.build_repetition(inner, true, false, ArrayMode::Simple, nav) - } - - /// Zero or more with QIS object wrapping (greedy): inner* - /// - /// Each iteration is wrapped in StartObject/EndObject to keep - /// multiple captures coupled per-iteration. - pub fn zero_or_more_array_qis(&mut self, inner: Fragment, nav: Nav) -> Fragment { - self.build_repetition(inner, false, true, ArrayMode::Qis, nav) - } - - /// Zero or more with QIS object wrapping (non-greedy): inner*? - pub fn zero_or_more_array_qis_lazy(&mut self, inner: Fragment, nav: Nav) -> Fragment { - self.build_repetition(inner, false, false, ArrayMode::Qis, nav) - } - - /// One or more with QIS object wrapping (greedy): inner+ - pub fn one_or_more_array_qis(&mut self, inner: Fragment, nav: Nav) -> Fragment { - self.build_repetition(inner, true, true, ArrayMode::Qis, nav) - } - - /// One or more with QIS object wrapping (non-greedy): inner+? - pub fn one_or_more_array_qis_lazy(&mut self, inner: Fragment, nav: Nav) -> Fragment { - self.build_repetition(inner, true, false, ArrayMode::Qis, nav) - } - - /// Optional with QIS object wrapping: inner? - /// - /// Wraps the optional value in an object scope. - pub fn optional_qis(&mut self, inner: Fragment) -> Fragment { - self.build_optional(inner, true, true) - } - - /// Optional with QIS object wrapping (non-greedy): inner?? - pub fn optional_qis_lazy(&mut self, inner: Fragment) -> Fragment { - self.build_optional(inner, false, true) - } - - /// Wrap definitions that don't already match the root node kind. - /// - /// For each definition whose entry matcher doesn't match `root_kind`, - /// prepends a transition that matches the root and descends into children. - /// This allows queries like `(function_declaration)` to work when the - /// interpreter starts at tree root (e.g., `program`). - pub fn wrap_definitions_with_root(&mut self, root_kind: &'src str) { - let def_names: Vec<&'src str> = self.definitions.keys().copied().collect(); - - for name in def_names { - let entry = self.definitions[name]; - - // Check if entry already matches root (directly or first reachable matcher) - if self.entry_matches_root(entry, root_kind) { - continue; - } - - // Create wrapper: (root_kind) with Nav::stay - let wrapper = self.add_node(BuildNode::with_matcher(BuildMatcher::node(root_kind))); - - // Add epsilon node with Nav::down between wrapper and original entry - let down_nav = self.add_epsilon(); - self.node_mut(down_nav).set_nav(Nav::down()); - - // Connect wrapper → down_nav → original entry - self.connect(wrapper, down_nav); - self.connect(down_nav, entry); - - // Update definition to point to wrapper - self.definitions.insert(name, wrapper); - } - } - - /// Check if entry (or first reachable node matcher) already matches root kind. - fn entry_matches_root(&self, entry: NodeId, root_kind: &str) -> bool { - match &self.nodes[entry as usize].matcher { - BuildMatcher::Node { kind, .. } => *kind == root_kind, - BuildMatcher::Epsilon => { - // For epsilon entries, check first reachable node matchers - for &target in &self.nodes[entry as usize].successors { - if self.entry_matches_root(target, root_kind) { - return true; - } - } - false - } - _ => false, - } - } -} - -impl Default for BuildGraph<'_> { - fn default() -> Self { - Self::new() - } -} - -/// A node in the build graph. -#[derive(Debug, Clone)] -pub struct BuildNode<'src> { - pub matcher: BuildMatcher<'src>, - pub effects: Vec>, - pub ref_marker: RefMarker, - pub successors: Vec, - pub nav: Nav, - pub ref_name: Option<&'src str>, -} - -impl<'src> BuildNode<'src> { - pub fn epsilon() -> Self { - Self { - matcher: BuildMatcher::Epsilon, - effects: Vec::new(), - ref_marker: RefMarker::None, - successors: Vec::new(), - nav: Nav::stay(), - ref_name: None, - } - } - - pub fn with_matcher(matcher: BuildMatcher<'src>) -> Self { - Self { - matcher, - effects: Vec::new(), - ref_marker: RefMarker::None, - successors: Vec::new(), - nav: Nav::stay(), - ref_name: None, - } - } - - pub fn add_effect(&mut self, effect: BuildEffect<'src>) { - self.effects.push(effect); - } - - pub fn set_ref_marker(&mut self, marker: RefMarker) { - self.ref_marker = marker; - } - - pub fn set_nav(&mut self, nav: Nav) { - self.nav = nav; - } - - pub fn is_epsilon(&self) -> bool { - matches!(self.matcher, BuildMatcher::Epsilon) - } -} - -/// What a transition matches. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum BuildMatcher<'src> { - Epsilon, - Node { - kind: &'src str, - field: Option<&'src str>, - negated_fields: Vec<&'src str>, - }, - Anonymous { - literal: &'src str, - field: Option<&'src str>, - }, - Wildcard { - field: Option<&'src str>, - }, -} - -impl<'src> BuildMatcher<'src> { - pub fn node(kind: &'src str) -> Self { - Self::Node { - kind, - field: None, - negated_fields: Vec::new(), - } - } - - pub fn anonymous(literal: &'src str) -> Self { - Self::Anonymous { - literal, - field: None, - } - } - - pub fn wildcard() -> Self { - Self::Wildcard { field: None } - } - - pub fn with_field(mut self, field: &'src str) -> Self { - match &mut self { - BuildMatcher::Node { field: f, .. } => *f = Some(field), - BuildMatcher::Anonymous { field: f, .. } => *f = Some(field), - BuildMatcher::Wildcard { field: f } => *f = Some(field), - BuildMatcher::Epsilon => {} - } - self - } - - pub fn with_negated_field(mut self, field: &'src str) -> Self { - if let BuildMatcher::Node { negated_fields, .. } = &mut self { - negated_fields.push(field); - } - self - } -} - -/// Effect operations recorded during graph construction. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum BuildEffect<'src> { - CaptureNode, - /// Clear current value (set to None). Used on skip paths for optional captures. - ClearCurrent, - /// Start array collection. `is_plus` distinguishes `+` (true) from `*` (false). - StartArray { - is_plus: bool, - }, - PushElement, - EndArray, - /// Start object scope. `for_alternation` is true when this object wraps a captured - /// tagged alternation (tags should create enum), false for QIS/sequence objects - /// (tags in inner alternations should be ignored). - StartObject { - for_alternation: bool, - }, - EndObject, - Field { - name: &'src str, - span: TextRange, - }, - StartVariant(&'src str), - EndVariant, - ToString, -} - -/// Marker for definition call/return transitions. -#[derive(Debug, Clone, PartialEq, Eq, Default)] -pub enum RefMarker { - #[default] - None, - Enter { - ref_id: u32, - }, - Exit { - ref_id: u32, - }, -} - -impl RefMarker { - pub fn enter(ref_id: u32) -> Self { - Self::Enter { ref_id } - } - - pub fn exit(ref_id: u32) -> Self { - Self::Exit { ref_id } - } - - pub fn is_none(&self) -> bool { - matches!(self, RefMarker::None) - } - - pub fn is_some(&self) -> bool { - !matches!(self, RefMarker::None) - } - - pub fn is_enter(&self) -> bool { - matches!(self, RefMarker::Enter { .. }) - } - - pub fn is_exit(&self) -> bool { - matches!(self, RefMarker::Exit { .. }) - } -} diff --git a/crates/plotnik-lib/src/query/graph_build.rs b/crates/plotnik-lib/src/query/graph_build.rs deleted file mode 100644 index 943eb5d4..00000000 --- a/crates/plotnik-lib/src/query/graph_build.rs +++ /dev/null @@ -1,759 +0,0 @@ -//! Graph construction integrated with Query pipeline. -//! -//! Constructs a `BuildGraph` from the parsed AST, reusing the `symbol_table` -//! and `qis_triggers` populated by earlier passes. - -use std::collections::HashSet; - -use crate::ir::Nav; -use crate::parser::{ - AltExpr, AltKind, AnonymousNode, Branch, CapturedExpr, Expr, FieldExpr, NamedNode, - NegatedField, QuantifiedExpr, Ref, SeqExpr, SeqItem, SyntaxKind, token_src, -}; -use crate::query::graph_qis::{QisContext, collect_propagating_captures}; - -use super::graph::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; -use super::{Query, SymbolTable}; - -/// Context for navigation determination. -/// When `anchored` is true, `prev_anonymous` indicates whether the preceding -/// expression was anonymous (string literal), which determines Exact vs SkipTrivia mode. -#[derive(Debug, Clone, Copy)] -enum NavContext { - Root, - FirstChild { - anchored: bool, - prev_anonymous: bool, - }, - Sibling { - anchored: bool, - prev_anonymous: bool, - }, -} - -impl NavContext { - fn to_nav(self) -> Nav { - match self { - NavContext::Root => Nav::stay(), - NavContext::FirstChild { - anchored: false, .. - } => Nav::down(), - NavContext::FirstChild { - anchored: true, - prev_anonymous, - } => { - if prev_anonymous { - Nav::down_exact() - } else { - Nav::down_skip_trivia() - } - } - NavContext::Sibling { - anchored: false, .. - } => Nav::next(), - NavContext::Sibling { - anchored: true, - prev_anonymous, - } => { - if prev_anonymous { - Nav::next_exact() - } else { - Nav::next_skip_trivia() - } - } - } - } -} - -/// Tracks trailing anchor state for Up navigation. -#[derive(Debug, Clone, Copy)] -struct ExitContext { - has_trailing_anchor: bool, - last_was_anonymous: bool, -} - -impl ExitContext { - fn to_up_nav(self, level: u8) -> Nav { - if !self.has_trailing_anchor { - Nav::up(level) - } else if self.last_was_anonymous { - Nav::up_exact(level) - } else { - Nav::up_skip_trivia(level) - } - } -} - -impl<'a> Query<'a> { - /// Build the graph from the already-populated symbol_table. - /// - /// This method reuses the symbol_table from name resolution and - /// qis_triggers from QIS detection. - pub(super) fn construct_graph(&mut self) { - let mut builder = GraphBuilder::new( - self.source, - &mut self.graph, - &self.symbol_table, - &self.qis_ctx, - ); - builder.construct(); - } -} - -struct GraphBuilder<'a, 'q> { - source: &'q str, - graph: &'a mut BuildGraph<'q>, - symbol_table: &'a SymbolTable<'q>, - qis_ctx: &'a QisContext<'q>, - current_def_name: &'q str, - next_ref_id: u32, -} - -impl<'a, 'q> GraphBuilder<'a, 'q> { - fn new( - source: &'q str, - graph: &'a mut BuildGraph<'q>, - symbol_table: &'a SymbolTable<'q>, - qis_ctx: &'a QisContext<'q>, - ) -> Self { - Self { - source, - graph, - symbol_table, - qis_ctx, - current_def_name: "", - next_ref_id: 0, - } - } - - fn construct(&mut self) { - let entries: Vec<_> = self - .symbol_table - .iter() - .map(|(name, body)| (*name, body.clone())) - .collect(); - for (name, body) in entries { - self.current_def_name = name; - let fragment = self.construct_expr(&body, NavContext::Root); - - // Multi-capture definitions need struct wrapping at root - let entry = if self.qis_ctx.multi_capture_defs.contains(name) { - let start_id = self.graph.add_epsilon(); - self.graph - .node_mut(start_id) - .add_effect(BuildEffect::StartObject { - for_alternation: false, - }); - self.graph.connect(start_id, fragment.entry); - - let end_id = self.graph.add_epsilon(); - self.graph - .node_mut(end_id) - .add_effect(BuildEffect::EndObject); - self.graph.connect(fragment.exit, end_id); - - start_id - } else { - fragment.entry - }; - - self.graph.add_definition(name, entry); - } - - self.link_references(); - } - - /// Link Enter nodes to their definition entry points. - fn link_references(&mut self) { - let mut links: Vec<(NodeId, &'q str, Option)> = Vec::new(); - - for (id, node) in self.graph.iter() { - if let RefMarker::Enter { .. } = &node.ref_marker - && let Some(name) = node.ref_name - { - let exit_node = self.find_exit_for_enter(id); - links.push((id, name, exit_node)); - } - } - - for (enter_id, name, exit_id) in links { - if let Some(def_entry) = self.graph.definition(name) { - self.graph.connect(enter_id, def_entry); - // Connect Enter → Exit so Exit node (with Capture effect) is traversed - if let Some(exit) = exit_id { - self.graph.connect(enter_id, exit); - } - } - } - } - - fn find_exit_for_enter(&self, enter_id: NodeId) -> Option { - let enter_node = self.graph.node(enter_id); - let RefMarker::Enter { ref_id } = enter_node.ref_marker else { - return None; - }; - - for (id, node) in self.graph.iter() { - if let RefMarker::Exit { ref_id: exit_id } = &node.ref_marker - && *exit_id == ref_id - { - return Some(id); - } - } - None - } - - fn construct_expr(&mut self, expr: &Expr, ctx: NavContext) -> Fragment { - match expr { - Expr::NamedNode(node) => self.construct_named_node(node, ctx), - Expr::AnonymousNode(node) => self.construct_anonymous_node(node, ctx), - Expr::Ref(r) => self.construct_ref(r, ctx), - Expr::AltExpr(alt) => self.construct_alt(alt, ctx), - Expr::SeqExpr(seq) => self.construct_seq(seq, ctx), - Expr::CapturedExpr(cap) => self.construct_capture(cap, ctx), - Expr::QuantifiedExpr(quant) => self.construct_quantifier(quant, ctx), - Expr::FieldExpr(field) => self.construct_field(field, ctx), - } - } - - fn construct_named_node(&mut self, node: &NamedNode, ctx: NavContext) -> Fragment { - let matcher = self.build_named_matcher(node); - let nav = ctx.to_nav(); - let node_id = self.graph.add_matcher(matcher); - self.graph.node_mut(node_id).set_nav(nav); - - let items: Vec<_> = node.items().collect(); - if items.is_empty() { - return Fragment::single(node_id); - } - - let (child_fragments, exit_ctx) = self.construct_item_sequence(&items, true); - if child_fragments.is_empty() { - return Fragment::single(node_id); - } - - let inner = self.graph.sequence(&child_fragments); - self.graph.connect(node_id, inner.entry); - - let exit_id = self.graph.add_epsilon(); - self.graph.node_mut(exit_id).set_nav(exit_ctx.to_up_nav(1)); - - // Trailing anchor retry loop: when UpSkipTrivia fails, try next sibling - if exit_ctx.has_trailing_anchor && !child_fragments.is_empty() { - let last_frag = child_fragments.last().unwrap(); - let last_entry = self.graph.node(last_frag.entry); - let last_matcher = last_entry.matcher.clone(); - let last_effects = last_entry.effects.clone(); - - // Choice point: epsilon with 2 successors (won't be eliminated) - let choice_id = self.graph.add_epsilon(); - self.graph.connect(inner.exit, choice_id); - self.graph.connect(choice_id, exit_id); // First: try UpSkipTrivia - - // Retry node: Nav::next() + same matcher + same effects - let retry_id = self.graph.add_node(BuildNode::with_matcher(last_matcher)); - self.graph.node_mut(retry_id).set_nav(Nav::next()); - for effect in last_effects { - self.graph.node_mut(retry_id).add_effect(effect); - } - self.graph.connect(choice_id, retry_id); // Second: try next sibling - self.graph.connect(retry_id, choice_id); // Loop back to choice - - return Fragment::new(node_id, exit_id); - } - - self.graph.connect(inner.exit, exit_id); - - Fragment::new(node_id, exit_id) - } - - fn construct_item_sequence( - &mut self, - items: &[SeqItem], - is_children: bool, - ) -> (Vec, ExitContext) { - let mut fragments = Vec::new(); - let mut pending_anchor = false; - let mut last_was_anonymous = false; - let mut is_first = true; - - for item in items { - match item { - SeqItem::Anchor(_) => { - pending_anchor = true; - } - SeqItem::Expr(expr) => { - let ctx = if is_first { - is_first = false; - if is_children { - NavContext::FirstChild { - anchored: pending_anchor, - prev_anonymous: last_was_anonymous, - } - } else { - NavContext::Sibling { - anchored: pending_anchor, - prev_anonymous: last_was_anonymous, - } - } - } else { - NavContext::Sibling { - anchored: pending_anchor, - prev_anonymous: last_was_anonymous, - } - }; - - last_was_anonymous = is_anonymous_expr(expr); - let frag = self.construct_expr(expr, ctx); - fragments.push(frag); - pending_anchor = false; - } - } - } - - let exit_ctx = ExitContext { - has_trailing_anchor: pending_anchor, - last_was_anonymous, - }; - - (fragments, exit_ctx) - } - - fn build_named_matcher(&self, node: &NamedNode) -> BuildMatcher<'q> { - let kind = node - .node_type() - .map(|t| token_src(&t, self.source)) - .unwrap_or("_"); - - let negated_fields: Vec<&'q str> = node - .as_cst() - .children() - .filter_map(NegatedField::cast) - .filter_map(|nf| nf.name()) - .map(|t| token_src(&t, self.source)) - .collect(); - - let field = self.find_field_constraint(node.as_cst()); - - if node.is_any() { - BuildMatcher::Wildcard { field } - } else { - BuildMatcher::Node { - kind, - field, - negated_fields, - } - } - } - - fn construct_anonymous_node(&mut self, node: &AnonymousNode, ctx: NavContext) -> Fragment { - let field = self.find_field_constraint(node.as_cst()); - let nav = ctx.to_nav(); - - let matcher = if node.is_any() { - BuildMatcher::Wildcard { field } - } else { - let literal = node - .value() - .map(|t| token_src(&t, self.source)) - .unwrap_or(""); - BuildMatcher::Anonymous { literal, field } - }; - - let node_id = self.graph.add_matcher(matcher); - self.graph.node_mut(node_id).set_nav(nav); - Fragment::single(node_id) - } - - fn construct_ref(&mut self, r: &Ref, ctx: NavContext) -> Fragment { - let Some(name_token) = r.name() else { - return self.graph.epsilon_fragment(); - }; - - let ref_id = self.next_ref_id; - self.next_ref_id += 1; - - let enter_id = self.graph.add_epsilon(); - let nav = ctx.to_nav(); - self.graph.node_mut(enter_id).set_nav(nav); - self.graph - .node_mut(enter_id) - .set_ref_marker(RefMarker::enter(ref_id)); - - let exit_id = self.graph.add_epsilon(); - self.graph - .node_mut(exit_id) - .set_ref_marker(RefMarker::exit(ref_id)); - - let name = token_src(&name_token, self.source); - self.graph.node_mut(enter_id).ref_name = Some(name); - - Fragment::new(enter_id, exit_id) - } - - fn construct_alt(&mut self, alt: &AltExpr, ctx: NavContext) -> Fragment { - match alt.kind() { - AltKind::Tagged => self.construct_tagged_alt(alt, ctx), - AltKind::Untagged | AltKind::Mixed => self.construct_untagged_alt(alt, ctx), - } - } - - fn construct_tagged_alt(&mut self, alt: &AltExpr, ctx: NavContext) -> Fragment { - let branches: Vec<_> = alt.branches().collect(); - if branches.is_empty() { - return self.graph.epsilon_fragment(); - } - - let branch_id = self.graph.add_epsilon(); - self.graph.node_mut(branch_id).set_nav(ctx.to_nav()); - - let exit_id = self.graph.add_epsilon(); - - for branch in &branches { - let frag = self.construct_tagged_branch(branch); - self.graph.connect(branch_id, frag.entry); - self.graph.connect(frag.exit, exit_id); - } - - Fragment::new(branch_id, exit_id) - } - - fn construct_tagged_branch(&mut self, branch: &Branch) -> Fragment { - let Some(label_token) = branch.label() else { - return branch - .body() - .map(|b| self.construct_expr(&b, NavContext::Root)) - .unwrap_or_else(|| self.graph.epsilon_fragment()); - }; - let Some(body) = branch.body() else { - return self.graph.epsilon_fragment(); - }; - - let label = token_src(&label_token, self.source); - - let start_id = self.graph.add_epsilon(); - self.graph - .node_mut(start_id) - .add_effect(BuildEffect::StartVariant(label)); - - let body_frag = self.construct_expr(&body, NavContext::Root); - - // Count Field effects to determine object wrapping. - // Note: Single-capture flattening (ADR-0007) is handled in type inference, - // not here, because we don't know if the alternation is captured yet. - // Uncaptured inline tagged alternations need Field effects preserved. - let field_count = self.count_field_effects(body_frag.entry); - - if field_count > 1 { - // Multiple captures: wrap with StartObject/EndObject - // This is NOT the alternation capture object - it's the variant's scope - self.graph - .node_mut(start_id) - .add_effect(BuildEffect::StartObject { - for_alternation: false, - }); - } - - let end_id = self.graph.add_epsilon(); - if field_count > 1 { - self.graph - .node_mut(end_id) - .add_effect(BuildEffect::EndObject); - } - self.graph - .node_mut(end_id) - .add_effect(BuildEffect::EndVariant); - - self.graph.connect(start_id, body_frag.entry); - self.graph.connect(body_frag.exit, end_id); - - Fragment::new(start_id, end_id) - } - - fn construct_untagged_alt(&mut self, alt: &AltExpr, ctx: NavContext) -> Fragment { - let branches: Vec<_> = alt.branches().filter_map(|b| b.body()).collect(); - - if branches.is_empty() { - return self.graph.epsilon_fragment(); - } - - let branch_id = self.graph.add_epsilon(); - self.graph.node_mut(branch_id).set_nav(ctx.to_nav()); - - let exit_id = self.graph.add_epsilon(); - - for body in &branches { - let frag = self.construct_expr(body, NavContext::Root); - self.graph.connect(branch_id, frag.entry); - self.graph.connect(frag.exit, exit_id); - } - - Fragment::new(branch_id, exit_id) - } - - fn construct_seq(&mut self, seq: &SeqExpr, ctx: NavContext) -> Fragment { - let items: Vec<_> = seq.items().collect(); - - // Uncaptured sequences don't create object scope - they just group items. - // Captures propagate to parent scope. Object scope is created by: - // - Captured sequences ({...} @name) via construct_capture - // - QIS quantifiers that wrap loop body with StartObject/EndObject - - let start_id = self.graph.add_epsilon(); - self.graph.node_mut(start_id).set_nav(ctx.to_nav()); - - let (child_fragments, _exit_ctx) = self.construct_item_sequence(&items, false); - let inner = self.graph.sequence(&child_fragments); - - self.graph.connect(start_id, inner.entry); - - Fragment::new(start_id, inner.exit) - } - - fn construct_capture(&mut self, cap: &CapturedExpr, ctx: NavContext) -> Fragment { - let Some(inner_expr) = cap.inner() else { - return self.graph.epsilon_fragment(); - }; - - let inner_frag = self.construct_expr(&inner_expr, ctx); - - let capture_token = cap.name(); - let capture_name = capture_token.as_ref().map(|t| token_src(t, self.source)); - - let has_to_string = cap - .type_annotation() - .and_then(|t| t.name()) - .map(|n| n.text() == "string") - .unwrap_or(false); - - // Captured sequence/alternation creates object scope for nested fields. - // Tagged alternations use variants instead (handled in construct_tagged_alt). - // Quantifiers never need outer wrapper - QIS handles per-element wrapping inside the array. - let needs_object_wrapper = match &inner_expr { - Expr::SeqExpr(_) | Expr::AltExpr(_) => true, - Expr::QuantifiedExpr(_) => false, - _ => false, - }; - - // Only add CaptureNode to inner matchers when capturing a node directly. - // Captured containers (seq/alt) capture structure, not individual nodes. - if !needs_object_wrapper { - let matchers = self.find_all_matchers(inner_frag.entry); - for matcher_id in matchers { - self.graph - .node_mut(matcher_id) - .add_effect(BuildEffect::CaptureNode); - - if has_to_string { - self.graph - .node_mut(matcher_id) - .add_effect(BuildEffect::ToString); - } - } - } - - let Some(name) = capture_name else { - return inner_frag; - }; - - // Single-capture definitions unwrap: no Field effect, type is capture's type directly. - // Only the specific propagating capture should unwrap, not nested captures. - let is_single_capture = self - .qis_ctx - .single_capture_defs - .get(self.current_def_name) - .map(|c| *c == name) - .unwrap_or(false); - - if is_single_capture && needs_object_wrapper { - // Captured container at single-capture definition root - // let inner_captures = self.collect_propagating_captures(&inner_expr); - let inner_captures = collect_propagating_captures(&inner_expr, self.source); - if inner_captures.is_empty() { - // No inner captures → Void (per ADR-0009 Payload Rule). - // Return epsilon for matching only, discard inner effects. - return self.graph.epsilon_fragment(); - } - // Has inner captures → wrap with StartObject/EndObject but skip outer Field - let is_alternation_capture = matches!(&inner_expr, Expr::AltExpr(_)); - let start_id = self.graph.add_epsilon(); - self.graph - .node_mut(start_id) - .add_effect(BuildEffect::StartObject { - for_alternation: is_alternation_capture, - }); - self.graph.connect(start_id, inner_frag.entry); - - let end_id = self.graph.add_epsilon(); - self.graph - .node_mut(end_id) - .add_effect(BuildEffect::EndObject); - self.graph.connect(inner_frag.exit, end_id); - - return Fragment::new(start_id, end_id); - } - - if is_single_capture { - // Non-container single capture: unwrap directly - return inner_frag; - } - - let span = capture_token - .as_ref() - .map(|t| t.text_range()) - .unwrap_or_default(); - - // Check if we're capturing an alternation (for enum vs struct distinction) - let is_alternation_capture = matches!(&inner_expr, Expr::AltExpr(_)); - - let (entry, exit) = if needs_object_wrapper { - // Wrap with StartObject/EndObject for composite captures - let start_id = self.graph.add_epsilon(); - self.graph - .node_mut(start_id) - .add_effect(BuildEffect::StartObject { - for_alternation: is_alternation_capture, - }); - self.graph.connect(start_id, inner_frag.entry); - - let end_id = self.graph.add_epsilon(); - self.graph - .node_mut(end_id) - .add_effect(BuildEffect::EndObject); - self.graph.connect(inner_frag.exit, end_id); - - (start_id, end_id) - } else { - (inner_frag.entry, inner_frag.exit) - }; - - let field_id = self.graph.add_epsilon(); - self.graph - .node_mut(field_id) - .add_effect(BuildEffect::Field { name, span }); - self.graph.connect(exit, field_id); - Fragment::new(entry, field_id) - } - - fn construct_quantifier(&mut self, quant: &QuantifiedExpr, ctx: NavContext) -> Fragment { - let Some(inner_expr) = quant.inner() else { - return self.graph.epsilon_fragment(); - }; - let Some(op) = quant.operator() else { - return self.construct_expr(&inner_expr, ctx); - }; - - // Build inner with Stay nav; the repetition combinator handles initial/re-entry nav - let f = self.construct_expr(&inner_expr, NavContext::Root); - let nav = ctx.to_nav(); - let qis = self.qis_ctx.qis_triggers.contains_key(quant); - - match (op.kind(), qis) { - (SyntaxKind::Star, false) => self.graph.zero_or_more_array(f, nav), - (SyntaxKind::Star, true) => self.graph.zero_or_more_array_qis(f, nav), - (SyntaxKind::StarQuestion, false) => self.graph.zero_or_more_array_lazy(f, nav), - (SyntaxKind::StarQuestion, true) => self.graph.zero_or_more_array_qis_lazy(f, nav), - (SyntaxKind::Plus, false) => self.graph.one_or_more_array(f, nav), - (SyntaxKind::Plus, true) => self.graph.one_or_more_array_qis(f, nav), - (SyntaxKind::PlusQuestion, false) => self.graph.one_or_more_array_lazy(f, nav), - (SyntaxKind::PlusQuestion, true) => self.graph.one_or_more_array_qis_lazy(f, nav), - (SyntaxKind::Question, false) => self.graph.optional(f), - (SyntaxKind::Question, true) => self.graph.optional_qis(f), - (SyntaxKind::QuestionQuestion, false) => self.graph.optional_lazy(f), - (SyntaxKind::QuestionQuestion, true) => self.graph.optional_qis_lazy(f), - _ => f, - } - } - - fn construct_field(&mut self, field: &FieldExpr, ctx: NavContext) -> Fragment { - let Some(value_expr) = field.value() else { - return self.graph.epsilon_fragment(); - }; - self.construct_expr(&value_expr, ctx) - } - - fn find_field_constraint(&self, node: &crate::parser::SyntaxNode) -> Option<&'q str> { - let parent = node.parent()?; - let field_expr = FieldExpr::cast(parent)?; - let name_token = field_expr.name()?; - Some(token_src(&name_token, self.source)) - } - - fn find_all_matchers(&self, start: NodeId) -> Vec { - let mut result = Vec::new(); - let mut visited = HashSet::new(); - self.collect_matchers(start, &mut result, &mut visited); - result - } - - fn collect_matchers( - &self, - node_id: NodeId, - result: &mut Vec, - visited: &mut HashSet, - ) { - if !visited.insert(node_id) { - return; - } - - let node = self.graph.node(node_id); - - // References are opaque to captures: don't traverse into definition body. - // Capture should happen at Exit (after reference executes, cursor at matched node). - if let RefMarker::Enter { ref_id } = node.ref_marker { - for (id, n) in self.graph.iter() { - if let RefMarker::Exit { ref_id: exit_id } = n.ref_marker - && exit_id == ref_id - { - result.push(id); - return; - } - } - return; - } - - if !node.is_epsilon() { - result.push(node_id); - // Continue through to find all matchers in loops (e.g., try_next in quantifiers) - } - - for &succ in &node.successors { - self.collect_matchers(succ, result, visited); - } - } - - /// Count Field effects reachable from a node (for variant flattening). - fn count_field_effects(&self, start: NodeId) -> usize { - self.nodes_with_field_effects(start) - .iter() - .flat_map(|&id| &self.graph.node(id).effects) - .filter(|e| matches!(e, BuildEffect::Field { .. })) - .count() - } - - fn nodes_with_field_effects(&self, start: NodeId) -> Vec { - let mut result = Vec::new(); - let mut visited = HashSet::new(); - let mut stack = vec![start]; - - while let Some(node_id) = stack.pop() { - if !visited.insert(node_id) { - continue; - } - let node = self.graph.node(node_id); - if node - .effects - .iter() - .any(|e| matches!(e, BuildEffect::Field { .. })) - { - result.push(node_id); - } - stack.extend(&node.successors); - } - result - } -} - -fn is_anonymous_expr(expr: &Expr) -> bool { - matches!(expr, Expr::AnonymousNode(n) if !n.is_any()) -} diff --git a/crates/plotnik-lib/src/query/graph_build_tests.rs b/crates/plotnik-lib/src/query/graph_build_tests.rs deleted file mode 100644 index 262fcec1..00000000 --- a/crates/plotnik-lib/src/query/graph_build_tests.rs +++ /dev/null @@ -1,349 +0,0 @@ -//! Tests for graph construction integrated with Query pipeline. - -use indoc::indoc; - -use crate::query::Query; - -fn snapshot(input: &str) -> String { - let query = Query::try_from(input).unwrap().build_graph(); - query.graph().dump() -} - -fn snapshot_optimized(input: &str) -> String { - let query = Query::try_from(input).unwrap().build_graph(); - query.graph().dump_live(query.dead_nodes()) -} - -#[test] -fn simple_named_node() { - insta::assert_snapshot!(snapshot("Q = (identifier)"), @r" - Q = (0) - - (0) —(identifier)→ (✓) - "); -} - -#[test] -fn named_node_with_capture() { - insta::assert_snapshot!(snapshot("Q = (identifier) @id"), @r" - Q = (0) - - (0) —(identifier)—[CaptureNode]→ (✓) - "); -} - -#[test] -fn named_node_with_children() { - insta::assert_snapshot!(snapshot("Q = (function_definition (identifier))"), @r" - Q = (0) - - (0) —(function_definition)→ (1) - (1) —{↘}—(identifier)→ (2) - (2) —{↗¹}—𝜀→ (✓) - "); -} - -#[test] -fn sequence() { - insta::assert_snapshot!(snapshot("Q = { (a) (b) }"), @r" - Q = (1) - - (0) —𝜀→ (1) - (1) —{→}—(a)→ (2) - (2) —{→}—(b)→ (✓) - "); -} - -#[test] -fn sequence_with_captures() { - insta::assert_snapshot!(snapshot("Q = { (a) @x (b) @y }"), @r" - Q = (0) - - (0) —𝜀—[StartObject]→ (1) - (1) —{→}—(a)—[CaptureNode]→ (2) - (2) —𝜀—[Field(x)]→ (3) - (3) —{→}—(b)—[CaptureNode]→ (6) - (4) —𝜀—[Field(y)]→ (6) - (5) —𝜀—[StartObject]→ (0) - (6) —𝜀—[Field(y), EndObject]→ (✓) - "); -} - -#[test] -fn alternation_untagged() { - insta::assert_snapshot!(snapshot("Q = [ (a) (b) ]"), @r" - Q = (0) - - (0) —𝜀→ (2), (3) - (1) —𝜀→ (✓) - (2) —(a)→ (1) - (3) —(b)→ (1) - "); -} - -#[test] -fn alternation_tagged() { - insta::assert_snapshot!(snapshot("Q = [ A: (a) @x B: (b) @y ]"), @r" - Q = (00) - - (00) —𝜀—[StartObject]→ (03), (07) - (01) —𝜀→ (11) - (02) —𝜀—[StartVariant(A)]→ (03) - (03) —(a)—[StartVariant(A), CaptureNode]→ (05) - (04) —𝜀—[Field(x)]→ (05) - (05) —𝜀—[Field(x), EndVariant]→ (11) - (06) —𝜀—[StartVariant(B)]→ (07) - (07) —(b)—[StartVariant(B), CaptureNode]→ (09) - (08) —𝜀—[Field(y)]→ (09) - (09) —𝜀—[Field(y), EndVariant]→ (11) - (10) —𝜀—[StartObject]→ (00) - (11) —𝜀—[EndObject]→ (✓) - "); -} - -#[test] -fn quantifier_star() { - insta::assert_snapshot!(snapshot("Q = (identifier)*"), @r" - Q = (4) - - (0) —(identifier)→ (6) - (1) —𝜀—[StartArray]→ (4) - (2) —𝜀—[EndArray]→ (✓) - (3) —𝜀—[PushElement]→ (6) - (4) —𝜀—[StartArray]→ (0), (2) - (5) —{→}—(identifier)→ (6) - (6) —𝜀—[PushElement]→ (5), (2) - "); -} - -#[test] -fn quantifier_plus() { - insta::assert_snapshot!(snapshot("Q = (identifier)+"), @r" - Q = (0) - - (0) —(identifier)—[StartArray]→ (6) - (1) —𝜀—[StartArray]→ (0) - (2) —𝜀—[EndArray]→ (✓) - (3) —𝜀—[PushElement]→ (6) - (4) —𝜀→ (✓) - (5) —{→}—(identifier)→ (6) - (6) —𝜀—[PushElement]→ (5), (2) - "); -} - -#[test] -fn quantifier_optional() { - insta::assert_snapshot!(snapshot("Q = (identifier)?"), @r" - Q = (1) - - (0) —(identifier)→ (2) - (1) —𝜀→ (0), (3) - (2) —𝜀→ (✓) - (3) —𝜀—[ClearCurrent]→ (2) - "); -} - -#[test] -fn reference() { - let input = indoc! {r#" - A = (identifier) - B = (A) - "#}; - insta::assert_snapshot!(snapshot(input), @r" - A = (0) - B = (1) - - (0) —(identifier)→ (✓) - (1) ——𝜀→ (0), (2) - (2) —𝜀—→ (✓) - "); -} - -#[test] -fn anonymous_node() { - insta::assert_snapshot!(snapshot(r#"Q = "hello""#), @r#" - Q = (0) - - (0) —"hello"→ (✓) - "#); -} - -#[test] -fn wildcard() { - insta::assert_snapshot!(snapshot("Q = (_)"), @r" - Q = (0) - - (0) —(🞵)→ (✓) - "); -} - -#[test] -fn field_constraint() { - insta::assert_snapshot!(snapshot("Q = (function name: (identifier))"), @r" - Q = (0) - - (0) —(function)→ (1) - (1) —{↘}—(identifier)@name→ (2) - (2) —{↗¹}—𝜀→ (✓) - "); -} - -#[test] -fn to_string_annotation() { - insta::assert_snapshot!(snapshot("Q = (identifier) @name ::string"), @r" - Q = (0) - - (0) —(identifier)—[CaptureNode, ToString]→ (✓) - "); -} - -#[test] -fn anchor_first_child() { - insta::assert_snapshot!(snapshot("Q = (parent . (child))"), @r" - Q = (0) - - (0) —(parent)→ (1) - (1) —{↘.}—(child)→ (2) - (2) —{↗¹}—𝜀→ (✓) - "); -} - -#[test] -fn anchor_sibling() { - insta::assert_snapshot!(snapshot("Q = (parent (a) . (b))"), @r" - Q = (0) - - (0) —(parent)→ (1) - (1) —{↘}—(a)→ (2) - (2) —{→·}—(b)→ (3) - (3) —{↗¹}—𝜀→ (✓) - "); -} - -#[test] -fn optimized_simple() { - insta::assert_snapshot!(snapshot_optimized("Q = (identifier) @id"), @r" - Q = (0) - - (0) —(identifier)—[CaptureNode]→ (✓) - "); -} - -#[test] -fn optimized_sequence() { - insta::assert_snapshot!(snapshot_optimized("Q = { (a) @x (b) @y }"), @r" - Q = (0) - - (0) —𝜀—[StartObject]→ (1) - (1) —{→}—(a)—[CaptureNode]→ (2) - (2) —𝜀—[Field(x)]→ (3) - (3) —{→}—(b)—[CaptureNode]→ (6) - (6) —𝜀—[Field(y), EndObject]→ (✓) - "); -} - -#[test] -fn symbol_table_reuse() { - let input = indoc! {r#" - Foo = (identifier) - Bar = (Foo) - Baz = (Bar) - "#}; - let query = Query::try_from(input).unwrap().build_graph(); - - assert!(query.graph().definition("Foo").is_some()); - assert!(query.graph().definition("Bar").is_some()); - assert!(query.graph().definition("Baz").is_some()); - - insta::assert_snapshot!(query.graph().dump(), @r" - Foo = (0) - Bar = (1) - Baz = (3) - - (0) —(identifier)→ (✓) - (1) ——𝜀→ (0), (2) - (2) —𝜀—→ (✓) - (3) ——𝜀→ (1), (4) - (4) —𝜀—→ (✓) - "); -} - -// ============================================================================ -// wrap_definitions_with_root -// ============================================================================ - -#[test] -fn wrap_with_root_simple() { - let query = Query::try_from("Q = (identifier)") - .unwrap() - .build_graph() - .wrap_with_root("program"); - - insta::assert_snapshot!(query.graph().dump(), @r" - Q = (1) - - (0) —{↘}—(identifier)→ (✓) - (1) —(program)→ (0) - (2) —{↘}—𝜀→ (0) - "); -} - -#[test] -fn wrap_with_root_already_matches() { - // Definition already starts with root - no wrapping needed - let query = Query::try_from("Q = (program (identifier))") - .unwrap() - .build_graph() - .wrap_with_root("program"); - - insta::assert_snapshot!(query.graph().dump(), @r" - Q = (0) - - (0) —(program)→ (1) - (1) —{↘}—(identifier)→ (2) - (2) —{↗¹}—𝜀→ (✓) - "); -} - -#[test] -fn wrap_with_root_multiple_definitions() { - let input = indoc! {r#" - Foo = (identifier) - Bar = (program (string)) - "#}; - let query = Query::try_from(input) - .unwrap() - .build_graph() - .wrap_with_root("program"); - - // Foo gets wrapped, Bar already matches root - insta::assert_snapshot!(query.graph().dump(), @r" - Foo = (4) - Bar = (1) - - (0) —{↘}—(identifier)→ (✓) - (1) —(program)→ (2) - (2) —{↘}—(string)→ (3) - (3) —{↗¹}—𝜀→ (✓) - (4) —(program)→ (0) - (5) —{↘}—𝜀→ (0) - "); -} - -#[test] -fn wrap_with_root_with_captures() { - let query = Query::try_from("Q = (function_declaration name: (identifier) @name)") - .unwrap() - .build_graph() - .wrap_with_root("program"); - - insta::assert_snapshot!(query.graph().dump(), @r" - Q = (3) - - (0) —{↘}—(function_declaration)→ (1) - (1) —{↘}—(identifier)@name—[CaptureNode]→ (2) - (2) —{↗¹}—𝜀→ (✓) - (3) —(program)→ (0) - (4) —{↘}—𝜀→ (0) - "); -} diff --git a/crates/plotnik-lib/src/query/graph_dump.rs b/crates/plotnik-lib/src/query/graph_dump.rs deleted file mode 100644 index 89b3e39c..00000000 --- a/crates/plotnik-lib/src/query/graph_dump.rs +++ /dev/null @@ -1,261 +0,0 @@ -//! Dump helpers for graph inspection and testing. - -use std::collections::{HashMap, HashSet}; -use std::fmt::Write; - -use crate::ir::{Nav, NavKind}; - -use super::graph::{BuildEffect, BuildGraph, BuildMatcher, NodeId, RefMarker}; - -/// Printer for `BuildGraph` with configurable output options. -pub struct GraphPrinter<'a, 'src> { - graph: &'a BuildGraph<'src>, - dead_nodes: Option<&'a HashSet>, - show_dead: bool, -} - -impl<'a, 'src> GraphPrinter<'a, 'src> { - pub fn new(graph: &'a BuildGraph<'src>) -> Self { - Self { - graph, - dead_nodes: None, - show_dead: false, - } - } - - pub fn with_dead_nodes(mut self, dead: &'a HashSet) -> Self { - self.dead_nodes = Some(dead); - self - } - - pub fn show_dead(mut self, show: bool) -> Self { - self.show_dead = show; - self - } - - pub fn dump(&self) -> String { - let mut out = String::new(); - self.format(&mut out).expect("String write never fails"); - out - } - - fn node_width(&self) -> usize { - let max_id = self.graph.iter().map(|(id, _)| id).max().unwrap_or(0); - if max_id == 0 { - 1 - } else { - ((max_id as f64).log10().floor() as usize) + 1 - } - } - - fn format_node_id(&self, id: NodeId, width: usize) -> String { - format!("({:0width$})", id, width = width) - } - - fn format(&self, w: &mut String) -> std::fmt::Result { - let width = self.node_width(); - - // Build ref_id → name lookup from Enter nodes - let ref_names: HashMap = self - .graph - .iter() - .filter_map(|(_, node)| { - if let RefMarker::Enter { ref_id } = &node.ref_marker { - Some((*ref_id, node.ref_name.unwrap_or("?"))) - } else { - None - } - }) - .collect(); - - for (name, entry) in self.graph.definitions() { - writeln!(w, "{} = {}", name, self.format_node_id(entry, width))?; - } - if self.graph.definitions().next().is_some() { - writeln!(w)?; - } - - for (id, node) in self.graph.iter() { - let is_dead = self.dead_nodes.map(|d| d.contains(&id)).unwrap_or(false); - - if is_dead && !self.show_dead { - continue; - } - - // Source node - write!(w, "{}", self.format_node_id(id, width))?; - - // Dead node short-circuit - if is_dead { - writeln!(w, " → (⨯)")?; - continue; - } - - write!(w, " —")?; - - // Navigation (omit for Stay) - if !node.nav.is_stay() { - write!(w, "{}—", format_nav(&node.nav))?; - } - - // Enter ref marker (before matcher) - if let RefMarker::Enter { .. } = &node.ref_marker { - let name = node.ref_name.unwrap_or("?"); - write!(w, "<{}>—", name)?; - } - - // Matcher - self.format_matcher(w, &node.matcher)?; - - // Exit ref marker (after matcher) - if let RefMarker::Exit { ref_id } = &node.ref_marker { - let name = ref_names.get(ref_id).copied().unwrap_or("?"); - write!(w, "—<{}>", name)?; - } - - // Effects - if !node.effects.is_empty() { - write!(w, "—[")?; - for (i, effect) in node.effects.iter().enumerate() { - if i > 0 { - write!(w, ", ")?; - } - write!(w, "{}", format_effect(effect))?; - } - write!(w, "]")?; - } - - // Successors - self.format_successors(w, &node.successors, width)?; - - writeln!(w)?; - } - - Ok(()) - } - - fn format_matcher(&self, w: &mut String, matcher: &BuildMatcher<'src>) -> std::fmt::Result { - match matcher { - BuildMatcher::Epsilon => write!(w, "𝜀"), - BuildMatcher::Node { - kind, - field, - negated_fields, - } => { - write!(w, "({})", kind)?; - if let Some(f) = field { - write!(w, "@{}", f)?; - } - for neg in negated_fields { - write!(w, "!{}", neg)?; - } - Ok(()) - } - BuildMatcher::Anonymous { literal, field } => { - write!(w, "\"{}\"", literal)?; - if let Some(f) = field { - write!(w, "@{}", f)?; - } - Ok(()) - } - BuildMatcher::Wildcard { field } => { - write!(w, "(🞵)")?; - if let Some(f) = field { - write!(w, "@{}", f)?; - } - Ok(()) - } - } - } - - fn format_successors( - &self, - w: &mut String, - successors: &[NodeId], - width: usize, - ) -> std::fmt::Result { - let live_succs: Vec<_> = successors - .iter() - .filter(|s| self.dead_nodes.map(|d| !d.contains(s)).unwrap_or(true)) - .collect(); - - if live_succs.is_empty() { - write!(w, "→ (✓)") - } else { - write!(w, "→ ")?; - for (i, s) in live_succs.iter().enumerate() { - if i > 0 { - write!(w, ", ")?; - } - write!(w, "{}", self.format_node_id(**s, width))?; - } - Ok(()) - } - } -} - -fn format_nav(nav: &Nav) -> String { - match nav.kind { - NavKind::Stay => "{˟}".to_string(), - NavKind::Next => "{→}".to_string(), - NavKind::NextSkipTrivia => "{→·}".to_string(), - NavKind::NextExact => "{→!}".to_string(), - NavKind::Down => "{↘}".to_string(), - NavKind::DownSkipTrivia => "{↘.}".to_string(), - NavKind::DownExact => "{↘!}".to_string(), - NavKind::Up => format!("{{↗{}}}", to_superscript(nav.level)), - NavKind::UpSkipTrivia => format!("{{↗·{}}}", to_superscript(nav.level)), - NavKind::UpExact => format!("{{↗!{}}}", to_superscript(nav.level)), - } -} - -fn to_superscript(n: u8) -> String { - const SUPERSCRIPTS: [char; 10] = ['⁰', '¹', '²', '³', '⁴', '⁵', '⁶', '⁷', '⁸', '⁹']; - if n == 0 { - return "⁰".to_string(); - } - let mut result = String::new(); - let mut num = n; - while num > 0 { - result.insert(0, SUPERSCRIPTS[(num % 10) as usize]); - num /= 10; - } - result -} - -fn format_effect(effect: &BuildEffect) -> String { - match effect { - BuildEffect::CaptureNode => "CaptureNode".to_string(), - BuildEffect::ClearCurrent => "ClearCurrent".to_string(), - BuildEffect::StartArray { .. } => "StartArray".to_string(), - BuildEffect::PushElement => "PushElement".to_string(), - BuildEffect::EndArray => "EndArray".to_string(), - BuildEffect::StartObject { .. } => "StartObject".to_string(), - BuildEffect::EndObject => "EndObject".to_string(), - BuildEffect::Field { name, .. } => format!("Field({})", name), - BuildEffect::StartVariant(v) => format!("StartVariant({})", v), - BuildEffect::EndVariant => "EndVariant".to_string(), - BuildEffect::ToString => "ToString".to_string(), - } -} - -impl<'src> BuildGraph<'src> { - pub fn printer(&self) -> GraphPrinter<'_, 'src> { - GraphPrinter::new(self) - } - - pub fn dump(&self) -> String { - self.printer().dump() - } - - pub fn dump_with_dead(&self, dead_nodes: &HashSet) -> String { - self.printer() - .with_dead_nodes(dead_nodes) - .show_dead(true) - .dump() - } - - pub fn dump_live(&self, dead_nodes: &HashSet) -> String { - self.printer().with_dead_nodes(dead_nodes).dump() - } -} diff --git a/crates/plotnik-lib/src/query/graph_master_test.rs b/crates/plotnik-lib/src/query/graph_master_test.rs deleted file mode 100644 index d21166ad..00000000 --- a/crates/plotnik-lib/src/query/graph_master_test.rs +++ /dev/null @@ -1,1093 +0,0 @@ -//! Golden master test for graph construction and type inference. -//! -//! This test exercises the full spectrum of ADR-specified behaviors: -//! - ADR-0004: Binary format concepts (transitions, effects, strings, types) -//! - ADR-0005: Transition graph (matchers, nav, ref markers, quantifiers) -//! - ADR-0006: Query execution (effect stream, materialization) -//! - ADR-0007: Type metadata (TypeKind, synthetic naming, flattening) -//! - ADR-0008: Tree navigation (Nav kinds, anchor lowering) -//! - ADR-0009: Type system (cardinality, scopes, alternations, QIS, unification) - -use indoc::indoc; - -use crate::query::Query; - -fn golden_master(source: &str) -> String { - let query = Query::try_from(source) - .expect("parse should succeed") - .build_graph(); - - let mut out = String::new(); - - out.push_str( - "═══════════════════════════════════════════════════════════════════════════════\n", - ); - out.push_str(" TRANSITION GRAPH\n"); - out.push_str( - "═══════════════════════════════════════════════════════════════════════════════\n\n", - ); - out.push_str(&query.graph().dump_live(query.dead_nodes())); - - out.push_str( - "\n═══════════════════════════════════════════════════════════════════════════════\n", - ); - out.push_str(" TYPE INFERENCE\n"); - out.push_str( - "═══════════════════════════════════════════════════════════════════════════════\n\n", - ); - out.push_str(&query.type_info().dump()); - - out -} - -/// Comprehensive test covering all major ADR features. -/// -/// Query structure: -/// 1. Basic captures with ::string annotation (ADR-0007, ADR-0009) -/// 2. Field constraints and negated fields (ADR-0005) -/// 3. Anchors - first child, last child, siblings (ADR-0008) -/// 4. Quantifiers - *, +, ? with captures (ADR-0005, ADR-0009) -/// 5. QIS - multiple captures in quantified expr (ADR-0009) -/// 6. Tagged alternations - enum generation (ADR-0007, ADR-0009) -/// 7. Untagged alternations - struct merge (ADR-0009) -/// 8. Captured sequences - nested scopes (ADR-0009) -/// 9. Definition references - Enter/Exit (ADR-0005, ADR-0006) -/// 10. Cardinality propagation and joins (ADR-0009) -/// 11. Single-capture variant flattening (ADR-0007, ADR-0009) -/// 12. Deep nesting with multi-level Up (ADR-0008) -/// 13. Wildcards and string literals (ADR-0005) -#[test] -fn golden_master_comprehensive() { - let source = indoc! {r#" - // ═══════════════════════════════════════════════════════════════════════════ - // SECTION 1: Basic captures and type annotations - // ═══════════════════════════════════════════════════════════════════════════ - - // Simple node capture → Node type - SimpleCapture = (identifier) @name - - // String annotation → String type - StringCapture = (identifier) @name ::string - - // Multiple flat captures → Struct with multiple fields - MultiCapture = (function - name: (identifier) @fn_name ::string - body: (block) @fn_body - ) - - // ═══════════════════════════════════════════════════════════════════════════ - // SECTION 2: Navigation and anchors (ADR-0008) - // ═══════════════════════════════════════════════════════════════════════════ - - // First child anchor → DownSkipTrivia - AnchorFirst = (parent . (first_child) @first) - - // Last child anchor → UpSkipTrivia - AnchorLast = (parent (last_child) @last .) - - // Adjacent siblings → NextSkipTrivia - AnchorSibling = (parent (a) @left . (b) @right) - - // Deep nesting with multi-level Up - DeepNest = (a (b (c (d) @deep))) - - // ═══════════════════════════════════════════════════════════════════════════ - // SECTION 3: Quantifiers (ADR-0005, ADR-0009) - // ═══════════════════════════════════════════════════════════════════════════ - - // Star quantifier → ArrayStar - StarQuant = (container (item)* @items) - - // Plus quantifier → ArrayPlus - PlusQuant = (container (item)+ @items) - - // Optional quantifier → Optional - OptQuant = (container (item)? @maybe_item) - - // ═══════════════════════════════════════════════════════════════════════════ - // SECTION 4: QIS - Quantifier-Induced Scope (ADR-0009) - // ═══════════════════════════════════════════════════════════════════════════ - - // Two captures in quantified node → QIS triggers, creates element struct - QisNode = (function - name: (identifier) @name - body: (block) @body - )* - - // Two captures in quantified sequence → QIS triggers - QisSequence = { (key) @key (value) @value }* - - // Single capture → NO QIS, standard cardinality propagation - NoQis = { (item) @item }* - - // ═══════════════════════════════════════════════════════════════════════════ - // SECTION 5: Tagged alternations (ADR-0007, ADR-0009) - // ═══════════════════════════════════════════════════════════════════════════ - - // Tagged at definition root → Definition becomes Enum - // Single capture per variant → flattened payload - TaggedRoot = [ - Ok: (success) @val - Err: (error) @msg ::string - ] - - // Tagged alternation captured → creates nested Enum - TaggedCaptured = (wrapper [ - Left: (left_node) @l - Right: (right_node) @r - ] @choice) - - // Tagged with multi-capture variant → NOT flattened, creates struct - TaggedMulti = [ - Simple: (node) @val - Complex: (pair (key) @k (value) @v) - ] - - // ═══════════════════════════════════════════════════════════════════════════ - // SECTION 6: Untagged alternations (ADR-0009) - // ═══════════════════════════════════════════════════════════════════════════ - - // Symmetric captures → required field - UntaggedSymmetric = [ (a) @val (b) @val ] - - // Asymmetric captures → both become Optional - UntaggedAsymmetric = [ (a) @x (b) @y ] - - // Captured untagged → creates struct scope - UntaggedCaptured = [ (a) @x (b) @y ] @data - - // ═══════════════════════════════════════════════════════════════════════════ - // SECTION 7: Captured sequences and nested scopes (ADR-0009) - // ═══════════════════════════════════════════════════════════════════════════ - - // Captured sequence → creates nested struct - CapturedSeq = (outer { (inner) @x (inner2) @y } @nested) - - // Uncaptured sequence → captures propagate to parent - UncapturedSeq = (outer { (inner) @x (inner2) @y }) - - // Deeply nested scopes - NestedScopes = { { (a) @a } @inner1 { (b) @b } @inner2 } @outer - - // ═══════════════════════════════════════════════════════════════════════════ - // SECTION 8: Definition references (ADR-0005, ADR-0006) - // ═══════════════════════════════════════════════════════════════════════════ - - // Base definition - Identifier = (identifier) @id - - // Reference to definition → Enter/Exit markers - RefSimple = (Identifier) - - // Captured reference → captures the reference result - RefCaptured = (Identifier) @captured_id - - // Chained references - RefChain = (RefSimple) - - // ═══════════════════════════════════════════════════════════════════════════ - // SECTION 9: Cardinality combinations (ADR-0009) - // ═══════════════════════════════════════════════════════════════════════════ - - // Cardinality in alternation branches - // Branch 1: @item cardinality 1, Branch 2: @item cardinality + - // Join produces + - CardinalityJoin = [ (single) @item (multi (x)+ @item) ] - - // Nested quantifiers - NestedQuant = ((item)* @inner)+ @outer - - // ═══════════════════════════════════════════════════════════════════════════ - // SECTION 10: Mixed patterns (comprehensive) - // ═══════════════════════════════════════════════════════════════════════════ - - // Everything combined: field constraints, anchors, quantifiers, alternations - Complex = (module - name: (identifier) @mod_name ::string - . (import)* @imports - body: (block { - [ - Func: (function - name: (identifier) @fn_name ::string - params: (parameters { (param) @p }* @params) - body: (block) @fn_body - ) - Class: (class - name: (identifier) @cls_name ::string - body: (class_body) @cls_body - ) - ] - }* @items) . - ) - - // ═══════════════════════════════════════════════════════════════════════════ - // SECTION 11: Edge cases - // ═══════════════════════════════════════════════════════════════════════════ - - // Wildcard capture - WildcardCapture = _ @any - - // String literal (anonymous node) - StringLiteral = "+" @op - - // No captures → Void type - NoCaptures = (identifier) - - // Empty alternation branch (unit variant) - EmptyBranch = [ - Some: (value) @val - None: (none_marker) - ] - "#}; - - insta::assert_snapshot!(golden_master(source), @r#" - ═══════════════════════════════════════════════════════════════════════════════ - TRANSITION GRAPH - ═══════════════════════════════════════════════════════════════════════════════ - - SimpleCapture = (000) - StringCapture = (001) - MultiCapture = (002) - AnchorFirst = (010) - AnchorLast = (013) - AnchorSibling = (018) - DeepNest = (026) - StarQuant = (033) - PlusQuant = (042) - OptQuant = (051) - QisNode = (068) - QisSequence = (085) - NoQis = (097) - TaggedRoot = (100) - TaggedCaptured = (112) - TaggedMulti = (126) - UntaggedSymmetric = (142) - UntaggedAsymmetric = (150) - UntaggedCaptured = (158) - CapturedSeq = (166) - UncapturedSeq = (175) - NestedScopes = (188) - Identifier = (199) - RefSimple = (200) - RefCaptured = (202) - RefChain = (204) - CardinalityJoin = (206) - NestedQuant = (222) - Complex = (242) - WildcardCapture = (306) - StringLiteral = (307) - NoCaptures = (308) - EmptyBranch = (309) - - (000) —(identifier)—[CaptureNode]→ (✓) - (001) —(identifier)—[CaptureNode, ToString]→ (✓) - (002) —(function)—[StartObject]→ (003) - (003) —{↘}—(identifier)@name—[CaptureNode, ToString]→ (004) - (004) —𝜀—[Field(fn_name)]→ (005) - (005) —{→}—(block)@body—[CaptureNode]→ (006) - (006) —𝜀—[Field(fn_body)]→ (009) - (009) —{↗¹}—𝜀—[EndObject]→ (✓) - (010) —(parent)→ (011) - (011) —{↘.}—(first_child)—[CaptureNode]→ (012) - (012) —{↗¹}—𝜀→ (✓) - (013) —(parent)→ (014) - (014) —{↘}—(last_child)—[CaptureNode]→ (016) - (015) —{↗·¹}—𝜀→ (✓) - (016) —𝜀→ (015), (017) - (017) —{→}—(last_child)—[CaptureNode]→ (016) - (018) —(parent)—[StartObject]→ (019) - (019) —{↘}—(a)—[CaptureNode]→ (020) - (020) —𝜀—[Field(left)]→ (021) - (021) —{→·}—(b)—[CaptureNode]→ (022) - (022) —𝜀—[Field(right)]→ (025) - (025) —{↗¹}—𝜀—[EndObject]→ (✓) - (026) —(a)→ (027) - (027) —{↘}—(b)→ (028) - (028) —{↘}—(c)→ (029) - (029) —{↘}—(d)—[CaptureNode]→ (032) - (032) —{↗³}—𝜀→ (✓) - (033) —(container)→ (038) - (034) —{↘}—(item)—[CaptureNode]→ (040) - (036) —𝜀—[EndArray]→ (041) - (038) —𝜀—[StartArray]→ (034), (036) - (039) —{→}—(item)—[CaptureNode]→ (040) - (040) —𝜀—[PushElement]→ (039), (036) - (041) —{↗¹}—𝜀→ (✓) - (042) —(container)→ (044) - (043) —{↘}—(item)—[CaptureNode]→ (049) - (044) —𝜀—[StartArray]→ (043) - (045) —𝜀—[EndArray]→ (050) - (047) —𝜀→ (✓) - (048) —{→}—(item)—[CaptureNode]→ (049) - (049) —𝜀—[PushElement]→ (048), (045) - (050) —{↗¹}—𝜀→ (✓) - (051) —(container)→ (053) - (052) —(item)—[CaptureNode]→ (056) - (053) —𝜀→ (052), (055) - (055) —𝜀—[ClearCurrent]→ (056) - (056) —{↗¹}—𝜀→ (✓) - (057) —(function)—[StartObject]→ (058) - (058) —{↘}—(identifier)@name—[CaptureNode]→ (059) - (059) —𝜀—[Field(name)]→ (060) - (060) —{→}—(block)@body—[CaptureNode]→ (061) - (061) —𝜀—[Field(body)]→ (066) - (066) —{↗¹}—𝜀—[EndObject]→ (072) - (068) —𝜀—[StartObject, StartArray]→ (057), (074) - (069) —{→}—(function)→ (058), (071) - (070) —𝜀—[StartObject]→ (069) - (071) —𝜀—[EndObject]→ (072) - (072) —𝜀—[PushElement]→ (070), (074) - (074) —𝜀—[EndArray, EndObject]→ (✓) - (075) —𝜀—[StartObject]→ (076) - (076) —{→}—(key)—[CaptureNode]→ (077) - (077) —𝜀—[Field(key)]→ (078) - (078) —{→}—(value)—[CaptureNode]→ (083) - (083) —𝜀—[Field(value), EndObject]→ (089) - (085) —𝜀—[StartObject, StartArray]→ (075), (091) - (086) —{→}—𝜀→ (076), (088) - (087) —𝜀—[StartObject]→ (086) - (088) —𝜀—[EndObject]→ (089) - (089) —𝜀—[PushElement]→ (087), (091) - (091) —𝜀—[EndArray, EndObject]→ (✓) - (093) —{→}—(item)—[CaptureNode]→ (099) - (095) —𝜀—[EndArray]→ (✓) - (097) —𝜀—[StartArray]→ (093), (095) - (098) —{→}—𝜀→ (093), (099) - (099) —𝜀—[PushElement]→ (098), (095) - (100) —𝜀—[StartObject]→ (103), (107) - (103) —(success)—[StartVariant(Ok), CaptureNode]→ (105) - (105) —𝜀—[Field(val), EndVariant]→ (111) - (107) —(error)—[StartVariant(Err), CaptureNode, ToString]→ (109) - (109) —𝜀—[Field(msg), EndVariant]→ (111) - (111) —𝜀—[EndObject]→ (✓) - (112) —(wrapper)→ (123) - (113) —{↘}—𝜀→ (116), (120) - (116) —(left_node)—[StartVariant(Left), CaptureNode]→ (118) - (118) —𝜀—[Field(l), EndVariant]→ (124) - (120) —(right_node)—[StartVariant(Right), CaptureNode]→ (122) - (122) —𝜀—[Field(r), EndVariant]→ (124) - (123) —𝜀—[StartObject]→ (113) - (124) —𝜀—[EndObject]→ (125) - (125) —{↗¹}—𝜀→ (✓) - (126) —𝜀—[StartObject]→ (129), (133) - (129) —(node)—[StartVariant(Simple), CaptureNode]→ (131) - (131) —𝜀—[Field(val), EndVariant]→ (141) - (133) —(pair)—[StartVariant(Complex), StartObject]→ (134) - (134) —{↘}—(key)—[CaptureNode]→ (135) - (135) —𝜀—[Field(k)]→ (136) - (136) —{→}—(value)—[CaptureNode]→ (137) - (137) —𝜀—[Field(v)]→ (139) - (139) —{↗¹}—𝜀—[EndObject, EndVariant]→ (141) - (141) —𝜀—[EndObject]→ (✓) - (142) —𝜀—[StartObject]→ (144), (146) - (144) —(a)—[CaptureNode]→ (145) - (145) —𝜀—[Field(val)]→ (149) - (146) —(b)—[CaptureNode]→ (147) - (147) —𝜀—[Field(val)]→ (149) - (149) —𝜀—[EndObject]→ (✓) - (150) —𝜀—[StartObject]→ (152), (154) - (152) —(a)—[CaptureNode]→ (153) - (153) —𝜀—[Field(x)]→ (157) - (154) —(b)—[CaptureNode]→ (155) - (155) —𝜀—[Field(y)]→ (157) - (157) —𝜀—[EndObject]→ (✓) - (158) —𝜀—[StartObject]→ (160), (162) - (160) —(a)—[CaptureNode]→ (161) - (161) —𝜀—[Field(x)]→ (165) - (162) —(b)—[CaptureNode]→ (163) - (163) —𝜀—[Field(y)]→ (165) - (165) —𝜀—[EndObject]→ (✓) - (166) —(outer)→ (172) - (167) —{↘}—𝜀→ (168) - (168) —{→}—(inner)—[CaptureNode]→ (169) - (169) —𝜀—[Field(x)]→ (170) - (170) —{→}—(inner2)—[CaptureNode]→ (173) - (172) —𝜀—[StartObject]→ (167) - (173) —𝜀—[Field(y), EndObject]→ (174) - (174) —{↗¹}—𝜀→ (✓) - (175) —(outer)—[StartObject]→ (176) - (176) —{↘}—𝜀→ (177) - (177) —{→}—(inner)—[CaptureNode]→ (178) - (178) —𝜀—[Field(x)]→ (179) - (179) —{→}—(inner2)—[CaptureNode]→ (180) - (180) —𝜀—[Field(y)]→ (183) - (183) —{↗¹}—𝜀—[EndObject]→ (✓) - (185) —{→}—𝜀→ (186) - (186) —{→}—(a)—[CaptureNode]→ (194) - (188) —𝜀—[StartObject, StartObject]→ (185) - (191) —{→}—𝜀→ (192) - (192) —{→}—(b)—[CaptureNode]→ (198) - (194) —𝜀—[Field(a), EndObject, Field(inner1), StartObject]→ (191) - (198) —𝜀—[Field(b), EndObject, Field(inner2), EndObject]→ (✓) - (199) —(identifier)—[CaptureNode]→ (✓) - (200) ——𝜀→ (199), (201) - (201) —𝜀—→ (✓) - (202) ——𝜀→ (199), (203) - (203) —𝜀——[CaptureNode]→ (✓) - (204) ——𝜀→ (200), (205) - (205) —𝜀—→ (✓) - (206) —𝜀—[StartObject]→ (208), (210) - (208) —(single)—[CaptureNode]→ (209) - (209) —𝜀—[Field(item)]→ (221) - (210) —(multi)→ (212) - (211) —{↘}—(x)—[CaptureNode]→ (217) - (212) —𝜀—[StartArray]→ (211) - (215) —𝜀→ (✓) - (216) —{→}—(x)—[CaptureNode]→ (217) - (217) —𝜀—[PushElement]→ (216), (218) - (218) —𝜀—[EndArray, Field(item)]→ (219) - (219) —{↗¹}—𝜀→ (221) - (221) —𝜀—[EndObject]→ (✓) - (222) —(_)—[StartArray, StartObject, CaptureNode]→ (227) - (223) —{↘}—(item)—[CaptureNode, CaptureNode]→ (229) - (227) —𝜀—[StartArray]→ (223), (230) - (228) —{→}—(item)—[CaptureNode, CaptureNode]→ (229) - (229) —𝜀—[PushElement]→ (228), (230) - (230) —𝜀—[EndArray, Field(inner)]→ (235) - (233) —𝜀—[EndArray]→ (✓) - (235) —{↗¹}—𝜀—[EndObject]→ (241) - (237) —𝜀→ (✓) - (238) —{→}—(_)—[CaptureNode]→ (227), (240) - (239) —𝜀—[StartObject]→ (238) - (240) —𝜀—[EndObject]→ (241) - (241) —𝜀—[PushElement]→ (239), (233) - (242) —(module)—[StartObject]→ (243) - (243) —{↘}—(identifier)@name—[CaptureNode, ToString]→ (249) - (245) —{→·}—(import)—[CaptureNode]→ (251) - (249) —𝜀—[Field(mod_name), StartArray]→ (245), (252) - (250) —{→}—(import)—[CaptureNode]→ (251) - (251) —𝜀—[PushElement]→ (250), (252) - (252) —𝜀—[EndArray, Field(imports)]→ (253) - (253) —{→}—(block)@body→ (294) - (254) —{↘}—𝜀→ (255) - (255) —{→}—𝜀→ (258), (282) - (258) —(function)—[StartVariant(Func), StartObject, CaptureNode]→ (259) - (259) —{↘}—(identifier)@name—[CaptureNode, ToString, CaptureNode]→ (260) - (260) —𝜀—[Field(fn_name)]→ (261) - (261) —{→}—(parameters)@params—[CaptureNode]→ (270) - (262) —{↘}—𝜀→ (263) - (263) —{→}—(param)—[CaptureNode, CaptureNode, CaptureNode]→ (268) - (267) —𝜀—[StartObject]→ (262) - (268) —𝜀—[Field(p), EndObject]→ (274) - (270) —𝜀—[StartArray]→ (267), (275) - (271) —{→}—𝜀→ (263), (273) - (272) —𝜀—[StartObject]→ (271) - (273) —𝜀—[EndObject]→ (274) - (274) —𝜀—[PushElement]→ (272), (275) - (275) —𝜀—[EndArray, Field(params)]→ (276) - (276) —{↗¹}—𝜀→ (277) - (277) —{→}—(block)@body—[CaptureNode, CaptureNode]→ (278) - (278) —𝜀—[Field(fn_body)]→ (280) - (280) —{↗¹}—𝜀—[EndObject, EndVariant]→ (292) - (282) —(class)—[StartVariant(Class), StartObject, CaptureNode]→ (283) - (283) —{↘}—(identifier)@name—[CaptureNode, ToString, CaptureNode]→ (284) - (284) —𝜀—[Field(cls_name)]→ (285) - (285) —{→}—(class_body)@body—[CaptureNode, CaptureNode]→ (286) - (286) —𝜀—[Field(cls_body)]→ (288) - (288) —{↗¹}—𝜀—[EndObject, EndVariant]→ (292) - (291) —𝜀—[StartObject]→ (254) - (292) —𝜀—[EndObject]→ (298) - (294) —𝜀—[StartArray]→ (291), (299) - (295) —{→}—𝜀→ (255), (297) - (296) —𝜀—[StartObject]→ (295) - (297) —𝜀—[EndObject]→ (298) - (298) —𝜀—[PushElement]→ (296), (299) - (299) —𝜀—[EndArray, Field(items)]→ (300) - (300) —{↗¹}—𝜀→ (302) - (302) —𝜀→ (305), (303) - (303) —{→}—(block)@body→ (302) - (305) —{↗·¹}—𝜀—[EndObject]→ (✓) - (306) —(🞵)—[CaptureNode]→ (✓) - (307) —"+"—[CaptureNode]→ (✓) - (308) —(identifier)→ (✓) - (309) —𝜀→ (312), (315) - (310) —𝜀→ (✓) - (312) —(value)—[StartVariant(Some), CaptureNode]→ (313) - (313) —𝜀—[EndVariant]→ (310) - (315) —(none_marker)—[StartVariant(None)]→ (316) - (316) —𝜀—[EndVariant]→ (310) - - ═══════════════════════════════════════════════════════════════════════════════ - TYPE INFERENCE - ═══════════════════════════════════════════════════════════════════════════════ - - SimpleCapture = Node - StringCapture = str - AnchorFirst = Node - AnchorLast = Node - DeepNest = Node - StarQuant = [Node] - PlusQuant = [Node]⁺ - OptQuant = Node? - QisNode = T09 - QisSequence = T11 - NoQis = [Node] - TaggedCaptured = TaggedCapturedScope14 - UntaggedSymmetric = Node - UntaggedCaptured = UntaggedCapturedScope20 - CapturedSeq = CapturedSeqScope23 - NestedScopes = NestedScopesScope27 - Identifier = Node - RefSimple = () - RefCaptured = Node - RefChain = () - CardinalityJoin = [Node]⁺ - NestedQuant = T31 - WildcardCapture = Node - StringLiteral = Node - NoCaptures = () - - MultiCapture = { - fn_name: str - fn_body: Node - } - AnchorSibling = { - left: Node - right: Node - } - QisNodeScope8 = { - name: Node - body: Node - } - T09 = [QisNodeScope8] - QisSequenceScope10 = { - key: Node - value: Node - } - T11 = [QisSequenceScope10] - TaggedRoot = { - Ok => Node - Err => str - } - TaggedCapturedScope14 = { - Left => Node - Right => Node - } - TaggedMultiScope15 = { - k: Node - v: Node - } - TaggedMulti = { - Simple => Node - Complex => TaggedMultiScope15 - } - UntaggedAsymmetric = { - x: Node? - y: Node? - } - UntaggedCapturedScope20 = { - x: Node? - y: Node? - } - CapturedSeqScope23 = { - x: Node - y: Node - } - UncapturedSeq = { - x: Node - y: Node - } - NestedScopesScope25 = { a: Node } - NestedScopesScope26 = { b: Node } - NestedScopesScope27 = { - inner1: NestedScopesScope25 - inner2: NestedScopesScope26 - } - NestedQuantScope29 = { inner: [Node] } - T31 = [NestedQuantScope29]⁺ - ComplexScope32 = { p: Node } - T33 = [ComplexScope32] - T35 = T33? - ComplexScope34 = { - fn_name: str? - params: T35 - fn_body: Node? - cls_name: str? - cls_body: Node? - } - T40 = [ComplexScope34] - Complex = { - mod_name: str - imports: [Node] - items: T40 - } - EmptyBranch = { - Some => Node - None => () - } - "#); -} - -/// Test specifically for ADR-0008 navigation lowering. -#[test] -fn golden_navigation_patterns() { - let source = indoc! {r#" - // Stay - first transition at root - NavStay = (root) @r - - // Down - descend to children (skip any) - NavDown = (parent (child) @c) - - // DownSkipTrivia - anchor at first child - NavDownAnchor = (parent . (child) @c) - - // Next - sibling traversal (skip any) - NavNext = (parent (a) @a (b) @b) - - // NextSkipTrivia - adjacent siblings - NavNextAnchor = (parent (a) @a . (b) @b) - - // Up - ascend (no constraint) - NavUp = (a (b (c) @c)) - - // UpSkipTrivia - must be last non-trivia - NavUpAnchor = (parent (child) @c .) - - // Multi-level Up - NavUpMulti = (a (b (c (d (e) @e)))) - - // Mixed anchors - NavMixed = (outer . (first) @f (middle) @m . (last) @l .) - "#}; - - insta::assert_snapshot!(golden_master(source), @r" - ═══════════════════════════════════════════════════════════════════════════════ - TRANSITION GRAPH - ═══════════════════════════════════════════════════════════════════════════════ - - NavStay = (00) - NavDown = (01) - NavDownAnchor = (04) - NavNext = (07) - NavNextAnchor = (15) - NavUp = (23) - NavUpAnchor = (28) - NavUpMulti = (33) - NavMixed = (42) - - (00) —(root)—[CaptureNode]→ (✓) - (01) —(parent)→ (02) - (02) —{↘}—(child)—[CaptureNode]→ (03) - (03) —{↗¹}—𝜀→ (✓) - (04) —(parent)→ (05) - (05) —{↘.}—(child)—[CaptureNode]→ (06) - (06) —{↗¹}—𝜀→ (✓) - (07) —(parent)—[StartObject]→ (08) - (08) —{↘}—(a)—[CaptureNode]→ (09) - (09) —𝜀—[Field(a)]→ (10) - (10) —{→}—(b)—[CaptureNode]→ (11) - (11) —𝜀—[Field(b)]→ (14) - (14) —{↗¹}—𝜀—[EndObject]→ (✓) - (15) —(parent)—[StartObject]→ (16) - (16) —{↘}—(a)—[CaptureNode]→ (17) - (17) —𝜀—[Field(a)]→ (18) - (18) —{→·}—(b)—[CaptureNode]→ (19) - (19) —𝜀—[Field(b)]→ (22) - (22) —{↗¹}—𝜀—[EndObject]→ (✓) - (23) —(a)→ (24) - (24) —{↘}—(b)→ (25) - (25) —{↘}—(c)—[CaptureNode]→ (27) - (27) —{↗²}—𝜀→ (✓) - (28) —(parent)→ (29) - (29) —{↘}—(child)—[CaptureNode]→ (31) - (30) —{↗·¹}—𝜀→ (✓) - (31) —𝜀→ (30), (32) - (32) —{→}—(child)—[CaptureNode]→ (31) - (33) —(a)→ (34) - (34) —{↘}—(b)→ (35) - (35) —{↘}—(c)→ (36) - (36) —{↘}—(d)→ (37) - (37) —{↘}—(e)—[CaptureNode]→ (41) - (41) —{↗⁴}—𝜀→ (✓) - (42) —(outer)—[StartObject]→ (43) - (43) —{↘.}—(first)—[CaptureNode]→ (44) - (44) —𝜀—[Field(f)]→ (45) - (45) —{→}—(middle)—[CaptureNode]→ (46) - (46) —𝜀—[Field(m)]→ (47) - (47) —{→·}—(last)—[CaptureNode]→ (48) - (48) —𝜀—[Field(l)]→ (50) - (50) —𝜀→ (53), (51) - (51) —{→}—(last)—[CaptureNode]→ (50) - (53) —{↗·¹}—𝜀—[EndObject]→ (✓) - - ═══════════════════════════════════════════════════════════════════════════════ - TYPE INFERENCE - ═══════════════════════════════════════════════════════════════════════════════ - - NavStay = Node - NavDown = Node - NavDownAnchor = Node - NavUp = Node - NavUpAnchor = Node - NavUpMulti = Node - - NavNext = { - a: Node - b: Node - } - NavNextAnchor = { - a: Node - b: Node - } - NavMixed = { - f: Node - m: Node - l: Node - } - "); -} - -/// Test specifically for ADR-0009 type inference edge cases. -#[test] -fn golden_type_inference() { - let source = indoc! {r#" - // Flat scoping - nesting doesn't create data nesting - FlatScope = (a (b (c (d) @val))) - - // Reference opacity - calling doesn't inherit captures - BaseWithCapture = (identifier) @name - RefOpaque = (BaseWithCapture) - RefCaptured = (BaseWithCapture) @result - - // Tagged at root vs inline - TaggedAtRoot = [ A: (a) @x B: (b) @y ] - TaggedInline = (wrapper [ A: (a) @x B: (b) @y ]) - - // Cardinality multiplication - // outer(*) * inner(+) = * - CardMult = ((item)+ @items)* - - // QIS vs non-QIS - QisTwo = { (a) @x (b) @y }* - NoQisOne = { (a) @x }* - - // Missing field rule - asymmetric → Optional - MissingField = [ - Full: (full (a) @a (b) @b (c) @c) - Partial: (partial (a) @a) - ] - - // Synthetic naming - SyntheticNames = (foo { (bar) @bar } @baz) - "#}; - - insta::assert_snapshot!(golden_master(source), @r" - ═══════════════════════════════════════════════════════════════════════════════ - TRANSITION GRAPH - ═══════════════════════════════════════════════════════════════════════════════ - - FlatScope = (000) - BaseWithCapture = (007) - RefOpaque = (008) - RefCaptured = (010) - TaggedAtRoot = (012) - TaggedInline = (024) - CardMult = (050) - QisTwo = (063) - NoQisOne = (075) - MissingField = (078) - SyntheticNames = (098) - - (000) —(a)→ (001) - (001) —{↘}—(b)→ (002) - (002) —{↘}—(c)→ (003) - (003) —{↘}—(d)—[CaptureNode]→ (006) - (006) —{↗³}—𝜀→ (✓) - (007) —(identifier)—[CaptureNode]→ (✓) - (008) ——𝜀→ (007), (009) - (009) —𝜀—→ (✓) - (010) ——𝜀→ (007), (011) - (011) —𝜀——[CaptureNode]→ (✓) - (012) —𝜀—[StartObject]→ (015), (019) - (015) —(a)—[StartVariant(A), CaptureNode]→ (017) - (017) —𝜀—[Field(x), EndVariant]→ (023) - (019) —(b)—[StartVariant(B), CaptureNode]→ (021) - (021) —𝜀—[Field(y), EndVariant]→ (023) - (023) —𝜀—[EndObject]→ (✓) - (024) —(wrapper)—[StartObject]→ (025) - (025) —{↘}—𝜀→ (028), (032) - (028) —(a)—[StartVariant(A), CaptureNode]→ (030) - (030) —𝜀—[Field(x), EndVariant]→ (037) - (032) —(b)—[StartVariant(B), CaptureNode]→ (034) - (034) —𝜀—[Field(y), EndVariant]→ (037) - (037) —{↗¹}—𝜀—[EndObject]→ (✓) - (038) —(_)→ (040) - (039) —{↘}—(item)—[CaptureNode]→ (045) - (040) —𝜀—[StartArray]→ (039) - (041) —𝜀—[EndArray]→ (046) - (043) —𝜀→ (✓) - (044) —{→}—(item)—[CaptureNode]→ (045) - (045) —𝜀—[PushElement]→ (044), (041) - (046) —{↗¹}—𝜀→ (052) - (048) —𝜀—[EndArray]→ (✓) - (050) —𝜀—[StartArray]→ (038), (048) - (051) —{→}—(_)→ (040), (052) - (052) —𝜀—[PushElement]→ (051), (048) - (053) —𝜀—[StartObject]→ (054) - (054) —{→}—(a)—[CaptureNode]→ (055) - (055) —𝜀—[Field(x)]→ (056) - (056) —{→}—(b)—[CaptureNode]→ (061) - (061) —𝜀—[Field(y), EndObject]→ (067) - (063) —𝜀—[StartObject, StartArray]→ (053), (069) - (064) —{→}—𝜀→ (054), (066) - (065) —𝜀—[StartObject]→ (064) - (066) —𝜀—[EndObject]→ (067) - (067) —𝜀—[PushElement]→ (065), (069) - (069) —𝜀—[EndArray, EndObject]→ (✓) - (071) —{→}—(a)—[CaptureNode]→ (077) - (073) —𝜀—[EndArray]→ (✓) - (075) —𝜀—[StartArray]→ (071), (073) - (076) —{→}—𝜀→ (071), (077) - (077) —𝜀—[PushElement]→ (076), (073) - (078) —𝜀—[StartObject]→ (081), (091) - (081) —(full)—[StartVariant(Full), StartObject]→ (082) - (082) —{↘}—(a)—[CaptureNode]→ (083) - (083) —𝜀—[Field(a)]→ (084) - (084) —{→}—(b)—[CaptureNode]→ (085) - (085) —𝜀—[Field(b)]→ (086) - (086) —{→}—(c)—[CaptureNode]→ (087) - (087) —𝜀—[Field(c)]→ (089) - (089) —{↗¹}—𝜀—[EndObject, EndVariant]→ (097) - (091) —(partial)—[StartVariant(Partial)]→ (092) - (092) —{↘}—(a)—[CaptureNode]→ (093) - (093) —𝜀—[Field(a)]→ (095) - (095) —{↗¹}—𝜀—[EndVariant]→ (097) - (097) —𝜀—[EndObject]→ (✓) - (098) —(foo)→ (102) - (099) —{↘}—𝜀→ (100) - (100) —{→}—(bar)—[CaptureNode]→ (103) - (102) —𝜀—[StartObject]→ (099) - (103) —𝜀—[Field(bar), EndObject]→ (104) - (104) —{↗¹}—𝜀→ (✓) - - ═══════════════════════════════════════════════════════════════════════════════ - TYPE INFERENCE - ═══════════════════════════════════════════════════════════════════════════════ - - FlatScope = Node - BaseWithCapture = Node - RefOpaque = () - RefCaptured = Node - CardMult = [Node] - QisTwo = T09 - NoQisOne = [Node] - SyntheticNames = SyntheticNamesScope13 - - TaggedAtRoot = { - A => Node - B => Node - } - TaggedInline = { - x: Node? - y: Node? - } - QisTwoScope8 = { - x: Node - y: Node - } - T09 = [QisTwoScope8] - MissingFieldScope11 = { - a: Node - b: Node - c: Node - } - MissingField = { - Full => MissingFieldScope11 - Partial => Node - } - SyntheticNamesScope13 = { bar: Node } - "); -} - -/// Test ADR-0005 effect stream patterns. -#[test] -fn golden_effect_patterns() { - let source = indoc! {r#" - // CaptureNode + Field - EffCapture = (node) @name - - // ToString - EffToString = (node) @name ::string - - // StartArray / Push / EndArray - EffArray = (container (item)* @items) - - // StartObject / Field / EndObject (via captured sequence) - EffObject = { (a) @x (b) @y } @obj - - // StartVariant / EndVariant (via tagged alternation) - EffVariant = [ A: (a) @x B: (b) @y ] @choice - - // Clear (via optional skip path) - EffClear = (container (item)? @maybe) - "#}; - - insta::assert_snapshot!(golden_master(source), @r" - ═══════════════════════════════════════════════════════════════════════════════ - TRANSITION GRAPH - ═══════════════════════════════════════════════════════════════════════════════ - - EffCapture = (00) - EffToString = (01) - EffArray = (02) - EffObject = (11) - EffVariant = (18) - EffClear = (30) - - (00) —(node)—[CaptureNode]→ (✓) - (01) —(node)—[CaptureNode, ToString]→ (✓) - (02) —(container)→ (07) - (03) —{↘}—(item)—[CaptureNode]→ (09) - (05) —𝜀—[EndArray]→ (10) - (07) —𝜀—[StartArray]→ (03), (05) - (08) —{→}—(item)—[CaptureNode]→ (09) - (09) —𝜀—[PushElement]→ (08), (05) - (10) —{↗¹}—𝜀→ (✓) - (11) —𝜀—[StartObject]→ (12) - (12) —{→}—(a)—[CaptureNode]→ (13) - (13) —𝜀—[Field(x)]→ (14) - (14) —{→}—(b)—[CaptureNode]→ (17) - (17) —𝜀—[Field(y), EndObject]→ (✓) - (18) —𝜀—[StartObject]→ (21), (25) - (21) —(a)—[StartVariant(A), CaptureNode]→ (23) - (23) —𝜀—[Field(x), EndVariant]→ (29) - (25) —(b)—[StartVariant(B), CaptureNode]→ (27) - (27) —𝜀—[Field(y), EndVariant]→ (29) - (29) —𝜀—[EndObject]→ (✓) - (30) —(container)→ (32) - (31) —(item)—[CaptureNode]→ (35) - (32) —𝜀→ (31), (34) - (34) —𝜀—[ClearCurrent]→ (35) - (35) —{↗¹}—𝜀→ (✓) - - ═══════════════════════════════════════════════════════════════════════════════ - TYPE INFERENCE - ═══════════════════════════════════════════════════════════════════════════════ - - EffCapture = Node - EffToString = str - EffArray = [Node] - EffObject = EffObjectScope4 - EffVariant = EffVariantScope5 - EffClear = Node? - - EffObjectScope4 = { - x: Node - y: Node - } - EffVariantScope5 = { - A => Node - B => Node - } - "); -} - -/// Test quantifier graph structure (ADR-0005). -#[test] -fn golden_quantifier_graphs() { - let source = indoc! {r#" - // Greedy star: Branch.next = [match, exit] - GreedyStar = (a)* @items - - // Greedy plus: must match at least once - GreedyPlus = (a)+ @items - - // Optional: branch to match or skip - Optional = (a)? @maybe - - // Non-greedy star: Branch.next = [exit, match] - LazyStar = (a)*? @items - - // Non-greedy plus - LazyPlus = (a)+? @items - - // Quantifier on sequence (QIS triggered) - QuantSeq = { (a) @x (b) @y }* - - // Nested quantifiers - NestedQuant = (outer (inner)* @inners)+ @outers - "#}; - - insta::assert_snapshot!(golden_master(source), @r" - ═══════════════════════════════════════════════════════════════════════════════ - TRANSITION GRAPH - ═══════════════════════════════════════════════════════════════════════════════ - - GreedyStar = (04) - GreedyPlus = (07) - Optional = (15) - LazyStar = (22) - LazyPlus = (25) - QuantSeq = (42) - NestedQuant = (49) - - (00) —(a)—[CaptureNode]→ (06) - (02) —𝜀—[EndArray]→ (✓) - (04) —𝜀—[StartArray]→ (00), (02) - (05) —{→}—(a)—[CaptureNode]→ (06) - (06) —𝜀—[PushElement]→ (05), (02) - (07) —(a)—[StartArray, CaptureNode]→ (13) - (09) —𝜀—[EndArray]→ (✓) - (11) —𝜀→ (✓) - (12) —{→}—(a)—[CaptureNode]→ (13) - (13) —𝜀—[PushElement]→ (12), (09) - (14) —(a)—[CaptureNode]→ (16) - (15) —𝜀→ (14), (17) - (16) —𝜀→ (✓) - (17) —𝜀—[ClearCurrent]→ (16) - (18) —(a)—[CaptureNode]→ (24) - (20) —𝜀—[EndArray]→ (✓) - (22) —𝜀—[StartArray]→ (20), (18) - (23) —{→}—(a)—[CaptureNode]→ (24) - (24) —𝜀—[PushElement]→ (20), (23) - (25) —(a)—[StartArray, CaptureNode]→ (31) - (27) —𝜀—[EndArray]→ (✓) - (29) —𝜀→ (✓) - (30) —{→}—(a)—[CaptureNode]→ (31) - (31) —𝜀—[PushElement]→ (27), (30) - (32) —𝜀—[StartObject]→ (33) - (33) —{→}—(a)—[CaptureNode]→ (34) - (34) —𝜀—[Field(x)]→ (35) - (35) —{→}—(b)—[CaptureNode]→ (40) - (40) —𝜀—[Field(y), EndObject]→ (46) - (42) —𝜀—[StartObject, StartArray]→ (32), (48) - (43) —{→}—𝜀→ (33), (45) - (44) —𝜀—[StartObject]→ (43) - (45) —𝜀—[EndObject]→ (46) - (46) —𝜀—[PushElement]→ (44), (48) - (48) —𝜀—[EndArray, EndObject]→ (✓) - (49) —(outer)—[StartArray, StartObject, CaptureNode]→ (54) - (50) —{↘}—(inner)—[CaptureNode, CaptureNode]→ (56) - (54) —𝜀—[StartArray]→ (50), (57) - (55) —{→}—(inner)—[CaptureNode, CaptureNode]→ (56) - (56) —𝜀—[PushElement]→ (55), (57) - (57) —𝜀—[EndArray, Field(inners)]→ (62) - (60) —𝜀—[EndArray]→ (✓) - (62) —{↗¹}—𝜀—[EndObject]→ (68) - (64) —𝜀→ (✓) - (65) —{→}—(outer)—[CaptureNode]→ (54), (67) - (66) —𝜀—[StartObject]→ (65) - (67) —𝜀—[EndObject]→ (68) - (68) —𝜀—[PushElement]→ (66), (60) - - ═══════════════════════════════════════════════════════════════════════════════ - TYPE INFERENCE - ═══════════════════════════════════════════════════════════════════════════════ - - GreedyStar = [Node] - GreedyPlus = [Node]⁺ - Optional = Node? - LazyStar = [Node] - LazyPlus = [Node]⁺ - QuantSeq = T09 - NestedQuant = T12 - - QuantSeqScope8 = { - x: Node - y: Node - } - T09 = [QuantSeqScope8] - NestedQuantScope10 = { inners: [Node] } - T12 = [NestedQuantScope10]⁺ - "); -} diff --git a/crates/plotnik-lib/src/query/graph_optimize.rs b/crates/plotnik-lib/src/query/graph_optimize.rs deleted file mode 100644 index 7eb8f705..00000000 --- a/crates/plotnik-lib/src/query/graph_optimize.rs +++ /dev/null @@ -1,224 +0,0 @@ -//! Epsilon elimination optimization pass. -//! -//! Reduces graph size by removing unnecessary epsilon transitions. -//! -//! # Safety Rules (from ADR-0005) -//! -//! An epsilon node CANNOT be eliminated if: -//! - It has a `RefMarker` (Enter/Exit) -//! - It has multiple successors (branch point) -//! - Its successor already has a `RefMarker` -//! - Both have non-Stay `Nav` that can't be merged - -use std::collections::{HashMap, HashSet}; - -use crate::ir::{Nav, NavKind}; - -use super::Query; -use super::graph::{BuildGraph, BuildMatcher, NodeId}; - -/// Statistics from epsilon elimination. -#[derive(Debug, Default)] -pub struct OptimizeStats { - pub epsilons_eliminated: usize, - pub epsilons_kept: usize, -} - -impl Query<'_> { - /// Run epsilon elimination on the graph. - /// - /// Populates `dead_nodes` with eliminated node IDs. - pub(super) fn optimize_graph(&mut self) { - let (dead, _stats) = optimize_graph(&mut self.graph); - self.dead_nodes = dead; - } -} - -/// Run epsilon elimination on a BuildGraph. -/// -/// Returns the set of dead node IDs that should be skipped during emission. -pub fn optimize_graph(graph: &mut BuildGraph) -> (HashSet, OptimizeStats) { - let mut stats = OptimizeStats::default(); - let mut dead_nodes: HashSet = HashSet::new(); - - let mut predecessors = build_predecessor_map(graph); - - // Process nodes in reverse order to handle chains - let node_count = graph.len() as NodeId; - for id in (0..node_count).rev() { - if dead_nodes.contains(&id) { - continue; - } - - // We need to clone specific fields or compute conditions before mutating to avoid borrow checker issues - // if we were to hold a reference to 'node'. - // Here we just inspect via indexing which is fine until we start mutating. - - // Check if eliminable - if !is_eliminable_epsilon(id, graph, &predecessors) { - let node = graph.node(id); - if node.is_epsilon() { - stats.epsilons_kept += 1; - } - continue; - } - - let node_effects = graph.node(id).effects.clone(); - let node_nav = graph.node(id).nav; - let successor_id = graph.node(id).successors[0]; - - // 1. Prepend effects to successor - if !node_effects.is_empty() { - let succ = graph.node_mut(successor_id); - let mut new_effects = node_effects; - new_effects.append(&mut succ.effects); - succ.effects = new_effects; - } - - // 2. Transfer or merge nav - let successor_nav = graph.node(successor_id).nav; - if !node_nav.is_stay() { - if successor_nav.is_stay() { - graph.node_mut(successor_id).nav = node_nav; - } else if can_merge_up(node_nav, successor_nav) { - let merged = Nav::up(node_nav.level + successor_nav.level); - graph.node_mut(successor_id).nav = merged; - } - } - - // 3. Redirect predecessors to successor - let preds = predecessors.get(&id).cloned().unwrap_or_default(); - for pred_id in &preds { - if dead_nodes.contains(pred_id) { - continue; - } - let pred = graph.node_mut(*pred_id); - for succ in &mut pred.successors { - if *succ == id { - *succ = successor_id; - } - } - // Update predecessor map: pred is now a predecessor of successor - predecessors.entry(successor_id).or_default().push(*pred_id); - } - // Remove eliminated node from successor's predecessors - if let Some(succ_preds) = predecessors.get_mut(&successor_id) { - succ_preds.retain(|&p| p != id); - } - - // 4. Update definitions that pointed to the eliminated node - redirect_definitions(graph, id, successor_id); - - dead_nodes.insert(id); - stats.epsilons_eliminated += 1; - } - - (dead_nodes, stats) -} - -fn is_eliminable_epsilon( - id: NodeId, - graph: &BuildGraph, - predecessors: &HashMap>, -) -> bool { - let node = graph.node(id); - - if !matches!(node.matcher, BuildMatcher::Epsilon) { - return false; - } - - if node.ref_marker.is_some() { - return false; - } - - if node.successors.len() != 1 { - return false; - } - - let successor_id = node.successors[0]; - let successor = graph.node(successor_id); - - // Nav merge check - if !node.nav.is_stay() && !successor.nav.is_stay() && !can_merge_up(node.nav, successor.nav) { - return false; - } - - // Don't eliminate if node has nav and successor is a join point. - // Different paths may need different navigation. - if !node.nav.is_stay() { - let succ_pred_count = predecessors.get(&successor_id).map_or(0, |p| p.len()); - if succ_pred_count > 1 { - return false; - } - } - - // Don't eliminate if node has effects and successor is a join point. - if !node.effects.is_empty() { - let succ_pred_count = predecessors.get(&successor_id).map_or(0, |p| p.len()); - if succ_pred_count > 1 { - return false; - } - } - - // Don't eliminate if node has effects and successor has ref marker. - // Effects must execute BEFORE ref marker (enter/exit), but merging moves them to successor - // which effectively executes them "at" the successor. - // If successor is Enter/Exit, the effects might conceptually belong to the edge before it. - // Actually, effects on a node execute when traversing the edge TO that node. - // If we merge A (effects) -> B (Enter), the effects of A are now on B. - // So they execute when traversing TO B. This seems fine for Enter? - // Wait, original logic said: - if !node.effects.is_empty() && successor.ref_marker.is_some() { - return false; - } - - // Don't eliminate if epsilon has effects and successor has navigation. - // Effects must execute BEFORE successor's nav. - // If we merge, effects are on successor. When traversing to successor, effects run, then successor's nav runs. - // This seems correct? - // Original logic: - // "Effects must execute BEFORE successor's nav/match, but prepending to effects list - // would execute them AFTER nav/match." -> This comment in original code seems to imply effects run after nav? - // In `graph.rs`, typical execution order is usually: Nav -> Match -> Effects (or similar). - // If Nav happens first, then effects on the node happen. - // If we merge A -> B. A has effects. B has Nav. - // New B has A.effects + B.effects. - // Execution: B.Nav -> B.Match -> A.effects -> B.effects. - // But originally: A.Nav (Stay) -> A.Match (Epsilon) -> A.effects -> B.Nav -> ... - // So A.effects happened BEFORE B.Nav. - // Now A.effects happen AFTER B.Nav. - // So if B.Nav is not Stay, we cannot merge if A has effects. - if !node.effects.is_empty() && !successor.nav.is_stay() { - return false; - } - - true -} - -fn build_predecessor_map(graph: &BuildGraph) -> HashMap> { - let mut predecessors: HashMap> = HashMap::new(); - - for (id, node) in graph.iter() { - for &succ in &node.successors { - predecessors.entry(succ).or_default().push(id); - } - } - - predecessors -} - -fn can_merge_up(a: Nav, b: Nav) -> bool { - a.kind == NavKind::Up && b.kind == NavKind::Up -} - -fn redirect_definitions(graph: &mut BuildGraph, old_id: NodeId, new_id: NodeId) { - let updates: Vec<_> = graph - .definitions() - .filter(|(_, entry)| *entry == old_id) - .map(|(name, _)| name) - .collect(); - - for name in updates { - graph.add_definition(name, new_id); - } -} diff --git a/crates/plotnik-lib/src/query/graph_qis.rs b/crates/plotnik-lib/src/query/graph_qis.rs deleted file mode 100644 index ae750fed..00000000 --- a/crates/plotnik-lib/src/query/graph_qis.rs +++ /dev/null @@ -1,163 +0,0 @@ -//! Capture scope detection: QIS and single-capture definitions. -//! -//! - QIS triggers when a quantified expression has ≥2 propagating captures. -//! - Single-capture definitions unwrap to their capture's type directly. -//! -//! See ADR-0009 for full specification. - -use std::collections::{HashMap, HashSet}; - -use crate::parser::{ast, token_src}; -use crate::query::symbol_table::SymbolTable; -use crate::query::visitor::Visitor; - -#[derive(Debug, Clone)] -pub struct QisTrigger<'a> { - #[allow(unused)] - pub captures: Vec<&'a str>, -} - -pub type QisTriggerTable<'q> = HashMap>; - -#[derive(Debug, Default)] -pub struct QisContext<'q> { - pub qis_triggers: QisTriggerTable<'q>, - /// Definitions with exactly 1 propagating capture: def name → capture name. - pub single_capture_defs: HashMap<&'q str, &'q str>, - /// Definitions with 2+ propagating captures (need struct wrapping at root). - pub multi_capture_defs: HashSet<&'q str>, -} - -/// Detect capture scopes: QIS triggers and single-capture definitions. -/// -/// - QIS triggers when quantified expression has ≥2 propagating captures -/// - Single-capture definitions unwrap (no Field effect, type is capture's type) -pub fn detect_capture_scopes<'q>( - source: &'q str, - symbol_table: &SymbolTable<'q>, -) -> QisContext<'q> { - let mut ctx: QisContext<'q> = QisContext::default(); - - let mut visitor = QisVisitor { - source, - qis_triggers: &mut ctx.qis_triggers, - }; - - // Collect entries to decouple from self for the iteration - let entries: Vec<_> = symbol_table.iter().map(|(n, b)| (*n, b.clone())).collect(); - - for (name, body) in entries { - // 1. Detect single/multi capture definitions - let captures = collect_propagating_captures(&body, source); - - if captures.len() == 1 { - ctx.single_capture_defs.insert(name, captures[0]); - } else if captures.len() >= 2 { - ctx.multi_capture_defs.insert(name); - } - - // 2. Detect QIS within this definition - visitor.visit_expr(&body); - } - - ctx -} - -struct QisVisitor<'a, 'map> { - source: &'a str, - qis_triggers: &'map mut HashMap>, -} - -impl<'a, 'map> Visitor for QisVisitor<'a, 'map> { - fn visit_quantified_expr(&mut self, q: &ast::QuantifiedExpr) { - if let Some(inner) = q.inner() { - let captures = collect_propagating_captures(&inner, self.source); - if captures.len() >= 2 { - self.qis_triggers.insert(q.clone(), QisTrigger { captures }); - } - // Recurse - self.visit_expr(&inner); - } - } - - fn visit_captured_expr(&mut self, c: &ast::CapturedExpr) { - // Captures on sequences/alternations absorb inner captures, - // but we still recurse to find nested quantifiers. - if let Some(inner) = c.inner() { - // Special case: captured quantifier with ≥1 nested capture needs QIS - // to wrap each iteration with StartObject/EndObject for proper field scoping. - if let ast::Expr::QuantifiedExpr(q) = &inner - && let Some(quant_inner) = q.inner() - { - let captures = collect_propagating_captures(&quant_inner, self.source); - // Trigger QIS if there's at least 1 capture (not already covered by ≥2 rule) - if !captures.is_empty() && !self.qis_triggers.contains_key(q) { - self.qis_triggers.insert(q.clone(), QisTrigger { captures }); - } - } - self.visit_expr(&inner); - } - } -} - -pub fn collect_propagating_captures<'a>(expr: &ast::Expr, source: &'a str) -> Vec<&'a str> { - let mut collector = CaptureCollector { - source, - captures: Vec::new(), - }; - collector.visit_expr(expr); - collector.captures -} - -struct CaptureCollector<'a> { - source: &'a str, - captures: Vec<&'a str>, -} - -impl<'a> Visitor for CaptureCollector<'a> { - fn visit_captured_expr(&mut self, c: &ast::CapturedExpr) { - if let Some(name_token) = c.name() { - let name = token_src(&name_token, self.source); - self.captures.push(name); - } - - // Captured sequence/alternation absorbs inner captures. - // Captured quantifiers with nested captures also absorb (they become QIS). - if let Some(inner) = c.inner() - && !is_scope_container(&inner, self.source) - { - self.visit_expr(&inner); - } - } - - fn visit_quantified_expr(&mut self, q: &ast::QuantifiedExpr) { - // Nested quantifier: its captures propagate (with modified cardinality) - if let Some(inner) = q.inner() { - self.visit_expr(&inner); - } - } -} - -/// Check if an expression is a scope container that absorbs inner captures. -/// - Sequences and alternations always absorb -/// - Quantifiers absorb if they have nested captures (will become QIS) -fn is_scope_container(expr: &ast::Expr, source: &str) -> bool { - match expr { - ast::Expr::SeqExpr(_) | ast::Expr::AltExpr(_) => true, - ast::Expr::QuantifiedExpr(q) => { - if let Some(inner) = q.inner() { - // Quantifier with nested captures acts as scope container - // (will be treated as QIS, wrapping each element in an object) - let nested_captures = collect_propagating_captures(&inner, source); - if !nested_captures.is_empty() { - return true; - } - // Otherwise check if inner is a scope container - is_scope_container(&inner, source) - } else { - false - } - } - _ => false, - } -} diff --git a/crates/plotnik-lib/src/query/graph_qis_tests.rs b/crates/plotnik-lib/src/query/graph_qis_tests.rs deleted file mode 100644 index 49388bf8..00000000 --- a/crates/plotnik-lib/src/query/graph_qis_tests.rs +++ /dev/null @@ -1,234 +0,0 @@ -use indoc::indoc; - -use crate::Query; - -fn check_qis(source: &str) -> String { - let query = Query::try_from(source).unwrap().build_graph(); - let mut result = Vec::new(); - - for def in query.root().defs() { - let def_name = def.name().map(|t| t.text().to_string()).unwrap_or_default(); - let mut triggers: Vec<_> = query - .qis_ctx - .qis_triggers - .iter() - .filter_map(|(q, trigger)| { - // Check if this quantifier belongs to this definition - let q_range = q.text_range(); - let def_range = def.text_range(); - if q_range.start() >= def_range.start() && q_range.end() <= def_range.end() { - Some(( - q_range.start(), - format!(" QIS: [{}]", trigger.captures.join(", ")), - )) - } else { - None - } - }) - .collect(); - triggers.sort_by_key(|(pos, _)| *pos); - let triggers: Vec<_> = triggers.into_iter().map(|(_, s)| s).collect(); - - if triggers.is_empty() { - result.push(format!("{}: no QIS", def_name)); - } else { - result.push(format!("{}:", def_name)); - result.extend(triggers); - } - } - - result.join("\n") -} - -#[test] -fn single_capture_no_qis() { - let source = "Foo = { (a) @x }*"; - - insta::assert_snapshot!(check_qis(source), @"Foo: no QIS"); -} - -#[test] -fn two_captures_triggers_qis() { - let source = "Foo = { (a) @x (b) @y }*"; - - insta::assert_snapshot!(check_qis(source), @r" - Foo: - QIS: [x, y] - "); -} - -#[test] -fn three_captures_triggers_qis() { - let source = "Foo = { (a) @x (b) @y (c) @z }*"; - - insta::assert_snapshot!(check_qis(source), @r" - Foo: - QIS: [x, y, z] - "); -} - -#[test] -fn captured_sequence_absorbs_inner() { - let source = "Foo = { { (a) @x (b) @y } @inner }*"; - - insta::assert_snapshot!(check_qis(source), @"Foo: no QIS"); -} - -#[test] -fn captured_alternation_absorbs_inner() { - let source = "Foo = { [ (a) @x (b) @y ] @choice }*"; - - insta::assert_snapshot!(check_qis(source), @"Foo: no QIS"); -} - -#[test] -fn uncaptured_alternation_propagates() { - let source = "Foo = { [ (a) @x (b) @y ] }*"; - - insta::assert_snapshot!(check_qis(source), @r" - Foo: - QIS: [x, y] - "); -} - -#[test] -fn node_with_two_captures() { - let source = indoc! {r#" - Foo = (function - name: (identifier) @name - body: (block) @body - )* - "#}; - - insta::assert_snapshot!(check_qis(source), @r" - Foo: - QIS: [name, body] - "); -} - -#[test] -fn plus_quantifier_triggers_qis() { - let source = "Foo = { (a) @x (b) @y }+"; - - insta::assert_snapshot!(check_qis(source), @r" - Foo: - QIS: [x, y] - "); -} - -#[test] -fn optional_quantifier_triggers_qis() { - let source = "Foo = { (a) @x (b) @y }?"; - - insta::assert_snapshot!(check_qis(source), @r" - Foo: - QIS: [x, y] - "); -} - -#[test] -fn nested_quantifier_inner_qis() { - let source = "Foo = { { (a) @x (b) @y }* }+"; - - insta::assert_snapshot!(check_qis(source), @r" - Foo: - QIS: [x, y] - QIS: [x, y] - "); -} - -#[test] -fn nested_quantifier_both_qis() { - // Outer quantifier has @c and @inner (2 captures) -> QIS - // Inner quantifier has @x and @y (2 captures) -> QIS - let source = "Outer = { (c) @c { (a) @x (b) @y }* @inner }+"; - - insta::assert_snapshot!(check_qis(source), @r" - Outer: - QIS: [c, inner] - QIS: [x, y] - "); -} - -#[test] -fn multiple_definitions() { - let source = indoc! {r#" - Single = { (a) @x }* - Multi = { (a) @x (b) @y }* - "#}; - - insta::assert_snapshot!(check_qis(source), @r" - Single: no QIS - Multi: - QIS: [x, y] - "); -} - -#[test] -fn no_quantifier_no_qis() { - let source = "Foo = { (a) @x (b) @y }"; - - insta::assert_snapshot!(check_qis(source), @"Foo: no QIS"); -} - -#[test] -fn lazy_quantifier_triggers_qis() { - let source = "Foo = { (a) @x (b) @y }*?"; - - insta::assert_snapshot!(check_qis(source), @r" - Foo: - QIS: [x, y] - "); -} - -#[test] -fn qis_graph_has_object_effects() { - // Verify that QIS-triggered quantifiers emit StartObject/EndObject - let source = "Foo = { (a) @x (b) @y }*"; - let (_query, pre_opt) = Query::try_from(source) - .unwrap() - .build_graph_with_pre_opt_dump(None); - - // QIS adds StartObj/EndObj around each iteration to keep captures coupled. - // Multi-capture definitions also get wrapped in StartObj/EndObj at root. - // The loop has separate wrappers for initial entry and re-entry paths. - let start_count = pre_opt.matches("StartObj").count(); - let end_count = pre_opt.matches("EndObj").count(); - - // 1 from multi-capture def wrapper + 1 for initial loop entry + 1 for re-entry = 3 - assert_eq!( - start_count, 3, - "QIS graph should have 3 StartObj (def wrapper + initial loop + re-entry):\n{}", - pre_opt - ); - assert_eq!( - end_count, 3, - "QIS graph should have 3 EndObj (def wrapper + initial loop + re-entry):\n{}", - pre_opt - ); -} - -#[test] -fn non_qis_graph_no_object_effects() { - // Single capture should NOT trigger QIS object wrapping - let source = "Foo = { (a) @x }*"; - let (_query, pre_opt) = Query::try_from(source) - .unwrap() - .build_graph_with_pre_opt_dump(None); - - // Non-QIS quantifiers don't need object scope - captures propagate with array cardinality. - // Sequences themselves don't add object scope either. - let start_count = pre_opt.matches("StartObj").count(); - let end_count = pre_opt.matches("EndObj").count(); - - assert_eq!( - start_count, 0, - "Non-QIS graph should have no StartObj:\n{}", - pre_opt - ); - assert_eq!( - end_count, 0, - "Non-QIS graph should have no EndObj:\n{}", - pre_opt - ); -} diff --git a/crates/plotnik-lib/src/query/infer.rs b/crates/plotnik-lib/src/query/infer.rs deleted file mode 100644 index 25c71b4c..00000000 --- a/crates/plotnik-lib/src/query/infer.rs +++ /dev/null @@ -1,811 +0,0 @@ -//! AST-based type inference for Plotnik queries. -//! -//! Analyzes query AST to determine output types. -//! Rules follow ADR-0009 (Type System). -//! -//! # Design -//! -//! Unlike graph-based inference which must reconstruct structure from CFG traversal, -//! AST-based inference directly walks the tree structure: -//! - Sequences → `SeqExpr` -//! - Alternations → `AltExpr` with `.kind()` for tagged/untagged -//! - Quantifiers → `QuantifiedExpr` -//! - Captures → `CapturedExpr` -//! -//! This eliminates dry-run traversal, reconvergence detection, and scope stack management. - -use std::collections::{HashMap, HashSet}; - -use indexmap::IndexMap; -use rowan::TextRange; - -use crate::diagnostics::{DiagnosticKind, Diagnostics}; -use crate::ir::{TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId, TypeKind}; -use crate::parser::ast::{self, AltKind, Expr}; -use crate::parser::token_src; - -use super::Query; - -/// Result of type inference. -#[derive(Debug, Default)] -pub struct TypeInferenceResult<'src> { - pub type_defs: Vec>, - pub entrypoint_types: IndexMap<&'src str, TypeId>, - pub diagnostics: Diagnostics, - pub errors: Vec>, -} - -/// Error when types cannot be unified in alternation branches. -#[derive(Debug, Clone)] -pub struct UnificationError<'src> { - pub field: &'src str, - pub definition: &'src str, - pub types_found: Vec, - pub spans: Vec, -} - -/// Human-readable type description for error messages. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum TypeDescription { - Node, - String, - Struct(Vec), -} - -impl std::fmt::Display for TypeDescription { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - TypeDescription::Node => write!(f, "Node"), - TypeDescription::String => write!(f, "String"), - TypeDescription::Struct(fields) => { - write!(f, "Struct {{ {} }}", fields.join(", ")) - } - } - } -} - -/// An inferred type definition. -#[derive(Debug, Clone)] -pub struct InferredTypeDef<'src> { - pub kind: TypeKind, - pub name: Option<&'src str>, - pub members: Vec>, - pub inner_type: Option, -} - -/// A field (for Record) or variant (for Enum). -#[derive(Debug, Clone)] -pub struct InferredMember<'src> { - pub name: &'src str, - pub ty: TypeId, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -enum Cardinality { - #[default] - One, - Optional, - Star, - Plus, -} - -impl Cardinality { - /// Join cardinalities when merging alternation branches. - fn join(self, other: Cardinality) -> Cardinality { - use Cardinality::*; - match (self, other) { - (One, One) => One, - (One, Optional) | (Optional, One) | (Optional, Optional) => Optional, - (Plus, Plus) => Plus, - (One, Plus) | (Plus, One) => Plus, - _ => Star, - } - } - - fn make_optional(self) -> Cardinality { - use Cardinality::*; - match self { - One => Optional, - Plus => Star, - x => x, - } - } - - /// Multiply cardinalities (outer * inner). - fn multiply(self, inner: Cardinality) -> Cardinality { - use Cardinality::*; - match (self, inner) { - (One, x) => x, - (x, One) => x, - (Optional, Optional) => Optional, - (Plus, Plus) => Plus, - _ => Star, - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -enum TypeShape { - Primitive(TypeId), -} - -impl TypeShape { - fn to_description(&self) -> TypeDescription { - match self { - TypeShape::Primitive(TYPE_NODE) => TypeDescription::Node, - TypeShape::Primitive(TYPE_STR) => TypeDescription::String, - TypeShape::Primitive(_) => TypeDescription::Node, - } - } -} - -#[derive(Debug, Clone)] -struct FieldInfo { - base_type: TypeId, - shape: TypeShape, - cardinality: Cardinality, - branch_count: usize, - spans: Vec, -} - -#[derive(Debug, Clone, Default)] -struct ScopeInfo<'src> { - fields: IndexMap<&'src str, FieldInfo>, - #[allow(dead_code)] // May be used for future enum variant tracking - variants: IndexMap<&'src str, ScopeInfo<'src>>, - #[allow(dead_code)] - has_variants: bool, -} - -impl<'src> ScopeInfo<'src> { - fn add_field( - &mut self, - name: &'src str, - base_type: TypeId, - cardinality: Cardinality, - span: TextRange, - ) { - let shape = TypeShape::Primitive(base_type); - if let Some(existing) = self.fields.get_mut(name) { - existing.cardinality = existing.cardinality.join(cardinality); - existing.branch_count += 1; - existing.spans.push(span); - } else { - self.fields.insert( - name, - FieldInfo { - base_type, - shape, - cardinality, - branch_count: 1, - spans: vec![span], - }, - ); - } - } - - fn merge_from(&mut self, other: ScopeInfo<'src>) -> Vec> { - let mut errors = Vec::new(); - - for (name, other_info) in other.fields { - if let Some(existing) = self.fields.get_mut(name) { - if existing.shape != other_info.shape { - errors.push(MergeError { - field: name, - shapes: vec![existing.shape.clone(), other_info.shape.clone()], - spans: existing - .spans - .iter() - .chain(&other_info.spans) - .cloned() - .collect(), - }); - } - existing.cardinality = existing.cardinality.join(other_info.cardinality); - existing.branch_count += other_info.branch_count; - existing.spans.extend(other_info.spans); - } else { - self.fields.insert(name, other_info); - } - } - - errors - } - - fn apply_optionality(&mut self, total_branches: usize) { - for info in self.fields.values_mut() { - if info.branch_count < total_branches { - info.cardinality = info.cardinality.make_optional(); - } - } - } - - #[allow(dead_code)] // May be useful for future scope analysis - fn is_empty(&self) -> bool { - self.fields.is_empty() && self.variants.is_empty() - } -} - -#[derive(Debug)] -struct MergeError<'src> { - field: &'src str, - shapes: Vec, - spans: Vec, -} - -/// What an expression produces when evaluated. -#[derive(Debug, Clone)] -struct ExprResult { - /// Base type (before cardinality wrapping). - base_type: TypeId, - /// Cardinality modifier. - cardinality: Cardinality, - /// True if this result represents a meaningful type (not just default Node). - /// Used to distinguish QIS array results from simple uncaptured expressions. - is_meaningful: bool, -} - -impl ExprResult { - fn node() -> Self { - Self { - base_type: TYPE_NODE, - cardinality: Cardinality::One, - is_meaningful: false, - } - } - - fn void() -> Self { - Self { - base_type: TYPE_VOID, - cardinality: Cardinality::One, - is_meaningful: false, - } - } - - fn meaningful(type_id: TypeId) -> Self { - Self { - base_type: type_id, - cardinality: Cardinality::One, - is_meaningful: true, - } - } - - /// Type is known but doesn't contribute to definition result (e.g., opaque references). - fn opaque(type_id: TypeId) -> Self { - Self { - base_type: type_id, - cardinality: Cardinality::One, - is_meaningful: false, - } - } - - fn with_cardinality(mut self, card: Cardinality) -> Self { - self.cardinality = card; - self - } -} - -struct InferenceContext<'src> { - source: &'src str, - qis_triggers: HashSet, - type_defs: Vec>, - next_type_id: TypeId, - diagnostics: Diagnostics, - errors: Vec>, - current_def_name: &'src str, - /// Map from definition name to its computed type. - definition_types: HashMap<&'src str, TypeId>, -} - -impl<'src> InferenceContext<'src> { - fn new(source: &'src str, qis_triggers: HashSet) -> Self { - Self { - source, - qis_triggers, - type_defs: Vec::new(), - next_type_id: 3, // 0=void, 1=node, 2=str - diagnostics: Diagnostics::default(), - errors: Vec::new(), - current_def_name: "", - definition_types: HashMap::new(), - } - } - - fn alloc_type_id(&mut self) -> TypeId { - let id = self.next_type_id; - self.next_type_id += 1; - id - } - - fn infer_definition(&mut self, def_name: &'src str, body: &Expr) -> TypeId { - self.current_def_name = def_name; - - let mut scope = ScopeInfo::default(); - let mut merge_errors = Vec::new(); - - // Special case: tagged alternation at definition root creates enum - if let Expr::AltExpr(alt) = body - && alt.kind() == AltKind::Tagged - { - return self.infer_tagged_alternation_as_enum(def_name, alt, &mut merge_errors); - } - - // General case: infer expression and collect captures into scope - let result = self.infer_expr(body, &mut scope, Cardinality::One, &mut merge_errors); - - self.report_merge_errors(&merge_errors); - - // Build result type from scope (Payload Rule from ADR-0009) - match scope.fields.len() { - 0 => { - if result.is_meaningful { - // QIS or other expressions that produce a meaningful type without populating scope - result.base_type - } else { - TYPE_VOID - } - } - 1 => { - // Single capture at definition root: unwrap to capture's type - let (_, info) = scope.fields.iter().next().unwrap(); - self.wrap_with_cardinality(info.base_type, info.cardinality) - } - _ => { - // Multiple captures: create struct - self.create_struct_type(def_name, &scope) - } - } - } - - fn infer_expr( - &mut self, - expr: &Expr, - scope: &mut ScopeInfo<'src>, - outer_card: Cardinality, - errors: &mut Vec>, - ) -> ExprResult { - match expr { - Expr::CapturedExpr(c) => self.infer_captured(c, scope, outer_card, errors), - Expr::QuantifiedExpr(q) => self.infer_quantified(q, scope, outer_card, errors), - Expr::SeqExpr(s) => self.infer_sequence(s, scope, outer_card, errors), - Expr::AltExpr(a) => self.infer_alternation(a, scope, outer_card, errors), - Expr::NamedNode(n) => self.infer_named_node(n, scope, outer_card, errors), - Expr::FieldExpr(f) => self.infer_field_expr(f, scope, outer_card, errors), - Expr::Ref(r) => self.infer_ref(r), - Expr::AnonymousNode(_) => ExprResult::node(), - } - } - - fn infer_captured( - &mut self, - c: &ast::CapturedExpr, - scope: &mut ScopeInfo<'src>, - outer_card: Cardinality, - errors: &mut Vec>, - ) -> ExprResult { - let capture_name = c.name().map(|t| token_src(&t, self.source)).unwrap_or("_"); - let span = c.text_range(); - let has_string_annotation = c - .type_annotation() - .and_then(|t| t.name()) - .is_some_and(|n| n.text() == "string"); - - let Some(inner) = c.inner() else { - return ExprResult::node(); - }; - - // Check if inner is a scope container (seq/alt) - let is_scope_container = matches!(inner, Expr::SeqExpr(_) | Expr::AltExpr(_)); - - if is_scope_container { - // Captured scope container: creates nested type - let nested_type = self.infer_captured_container(capture_name, &inner, errors); - let result = ExprResult::meaningful(nested_type); - let effective_card = outer_card.multiply(result.cardinality); - scope.add_field(capture_name, result.base_type, effective_card, span); - result - } else { - // Simple capture: just capture the result - let result = self.infer_expr(&inner, scope, outer_card, errors); - let base_type = if has_string_annotation { - TYPE_STR - } else { - result.base_type - }; - let effective_card = outer_card.multiply(result.cardinality); - scope.add_field(capture_name, base_type, effective_card, span); - ExprResult::meaningful(base_type).with_cardinality(result.cardinality) - } - } - - fn infer_captured_container( - &mut self, - _capture_name: &'src str, - inner: &Expr, - errors: &mut Vec>, - ) -> TypeId { - match inner { - Expr::SeqExpr(s) => { - let mut nested_scope = ScopeInfo::default(); - for child in s.children() { - self.infer_expr(&child, &mut nested_scope, Cardinality::One, errors); - } - // Per ADR-0009 Payload Rule: 0 captures → Void - if nested_scope.is_empty() { - return TYPE_VOID; - } - let type_name = self.generate_scope_name(); - self.create_struct_type(type_name, &nested_scope) - } - Expr::AltExpr(a) => { - if a.kind() == AltKind::Tagged { - // Captured tagged alternation → Enum - let type_name = self.generate_scope_name(); - self.infer_tagged_alternation_as_enum(type_name, a, errors) - } else { - // Captured untagged alternation → Struct with merged fields - let mut nested_scope = ScopeInfo::default(); - self.infer_untagged_alternation(a, &mut nested_scope, Cardinality::One, errors); - // Per ADR-0009 Payload Rule: 0 captures → Void - if nested_scope.is_empty() { - return TYPE_VOID; - } - let type_name = self.generate_scope_name(); - self.create_struct_type(type_name, &nested_scope) - } - } - _ => { - // Not a container - shouldn't reach here - TYPE_NODE - } - } - } - - fn infer_quantified( - &mut self, - q: &ast::QuantifiedExpr, - scope: &mut ScopeInfo<'src>, - outer_card: Cardinality, - errors: &mut Vec>, - ) -> ExprResult { - let Some(inner) = q.inner() else { - return ExprResult::node(); - }; - - let quant_card = self.quantifier_cardinality(q); - let is_qis = self.qis_triggers.contains(q); - - if is_qis { - // QIS: create implicit scope for multiple captures - let mut nested_scope = ScopeInfo::default(); - self.infer_expr(&inner, &mut nested_scope, Cardinality::One, errors); - - let element_type = if !nested_scope.fields.is_empty() { - let type_name = self.generate_scope_name(); - self.create_struct_type(type_name, &nested_scope) - } else { - TYPE_NODE - }; - - // Wrap with array type - this is a meaningful result - let array_type = self.wrap_with_cardinality(element_type, quant_card); - ExprResult::meaningful(array_type) - } else { - // No QIS: captures propagate with multiplied cardinality - let combined_card = outer_card.multiply(quant_card); - let result = self.infer_expr(&inner, scope, combined_card, errors); - // Return result with quantifier's cardinality so captured quantifiers work correctly - ExprResult { - base_type: result.base_type, - cardinality: quant_card.multiply(result.cardinality), - is_meaningful: result.is_meaningful, - } - } - } - - fn infer_sequence( - &mut self, - s: &ast::SeqExpr, - scope: &mut ScopeInfo<'src>, - outer_card: Cardinality, - errors: &mut Vec>, - ) -> ExprResult { - // Uncaptured sequence: captures propagate to parent scope - let mut last_result = ExprResult::void(); - for child in s.children() { - last_result = self.infer_expr(&child, scope, outer_card, errors); - } - last_result - } - - fn infer_alternation( - &mut self, - a: &ast::AltExpr, - scope: &mut ScopeInfo<'src>, - outer_card: Cardinality, - errors: &mut Vec>, - ) -> ExprResult { - // Uncaptured alternation (tagged or untagged): captures propagate with optionality - self.infer_untagged_alternation(a, scope, outer_card, errors) - } - - fn infer_untagged_alternation( - &mut self, - a: &ast::AltExpr, - scope: &mut ScopeInfo<'src>, - outer_card: Cardinality, - errors: &mut Vec>, - ) -> ExprResult { - let branches: Vec<_> = a.branches().collect(); - let total_branches = branches.len(); - - if total_branches == 0 { - return ExprResult::void(); - } - - let mut merged_scope = ScopeInfo::default(); - - for branch in &branches { - let Some(body) = branch.body() else { - continue; - }; - let mut branch_scope = ScopeInfo::default(); - self.infer_expr(&body, &mut branch_scope, outer_card, errors); - errors.extend(merged_scope.merge_from(branch_scope)); - } - - // Apply optionality for fields not present in all branches - merged_scope.apply_optionality(total_branches); - - // Merge into parent scope - errors.extend(scope.merge_from(merged_scope)); - - ExprResult::node() - } - - fn infer_tagged_alternation_as_enum( - &mut self, - type_name: &'src str, - a: &ast::AltExpr, - errors: &mut Vec>, - ) -> TypeId { - let mut variants = IndexMap::new(); - - for branch in a.branches() { - let tag = branch - .label() - .map(|t| token_src(&t, self.source)) - .unwrap_or("_"); - let Some(body) = branch.body() else { - variants.insert(tag, ScopeInfo::default()); - continue; - }; - - let mut variant_scope = ScopeInfo::default(); - self.infer_expr(&body, &mut variant_scope, Cardinality::One, errors); - variants.insert(tag, variant_scope); - } - - self.create_enum_type_from_variants(type_name, &variants) - } - - fn infer_named_node( - &mut self, - n: &ast::NamedNode, - scope: &mut ScopeInfo<'src>, - outer_card: Cardinality, - errors: &mut Vec>, - ) -> ExprResult { - // Named nodes have children - recurse into them - for child in n.children() { - self.infer_expr(&child, scope, outer_card, errors); - } - ExprResult::node() - } - - fn infer_field_expr( - &mut self, - f: &ast::FieldExpr, - scope: &mut ScopeInfo<'src>, - outer_card: Cardinality, - errors: &mut Vec>, - ) -> ExprResult { - // Field constraint (name: expr) - just recurse - if let Some(value) = f.value() { - return self.infer_expr(&value, scope, outer_card, errors); - } - ExprResult::node() - } - - fn infer_ref(&self, r: &ast::Ref) -> ExprResult { - // References are opaque - captures don't propagate from referenced definition. - // Return the type (for use when captured) but mark as not meaningful - // so uncaptured refs don't affect definition's result type. - let ref_name = r.name().map(|t| t.text().to_string()); - if let Some(name) = ref_name - && let Some(&type_id) = self.definition_types.get(name.as_str()) - { - return ExprResult::opaque(type_id); - } - ExprResult::node() - } - - fn quantifier_cardinality(&self, q: &ast::QuantifiedExpr) -> Cardinality { - let Some(op) = q.operator() else { - return Cardinality::One; - }; - use crate::parser::cst::SyntaxKind; - match op.kind() { - SyntaxKind::Star | SyntaxKind::StarQuestion => Cardinality::Star, - SyntaxKind::Plus | SyntaxKind::PlusQuestion => Cardinality::Plus, - SyntaxKind::Question | SyntaxKind::QuestionQuestion => Cardinality::Optional, - _ => Cardinality::One, - } - } - - fn generate_scope_name(&self) -> &'src str { - let name = format!("{}Scope{}", self.current_def_name, self.next_type_id); - Box::leak(name.into_boxed_str()) - } - - fn create_struct_type(&mut self, name: &'src str, scope: &ScopeInfo<'src>) -> TypeId { - let members: Vec<_> = scope - .fields - .iter() - .map(|(field_name, info)| { - let member_type = self.wrap_with_cardinality(info.base_type, info.cardinality); - InferredMember { - name: field_name, - ty: member_type, - } - }) - .collect(); - - let type_id = self.alloc_type_id(); - - self.type_defs.push(InferredTypeDef { - kind: TypeKind::Record, - name: Some(name), - members, - inner_type: None, - }); - - type_id - } - - fn create_enum_type_from_variants( - &mut self, - name: &'src str, - variants: &IndexMap<&'src str, ScopeInfo<'src>>, - ) -> TypeId { - let mut members = Vec::new(); - - for (tag, variant_scope) in variants { - let variant_type = if variant_scope.fields.is_empty() { - TYPE_VOID - } else if variant_scope.fields.len() == 1 { - // Single-capture variant: flatten (ADR-0007) - let (_, info) = variant_scope.fields.iter().next().unwrap(); - self.wrap_with_cardinality(info.base_type, info.cardinality) - } else { - let variant_name = self.generate_scope_name(); - self.create_struct_type(variant_name, variant_scope) - }; - members.push(InferredMember { - name: tag, - ty: variant_type, - }); - } - - let type_id = self.alloc_type_id(); - - self.type_defs.push(InferredTypeDef { - kind: TypeKind::Enum, - name: Some(name), - members, - inner_type: None, - }); - - type_id - } - - fn wrap_with_cardinality(&mut self, base: TypeId, card: Cardinality) -> TypeId { - match card { - Cardinality::One => base, - Cardinality::Optional => { - let type_id = self.alloc_type_id(); - self.type_defs.push(InferredTypeDef { - kind: TypeKind::Optional, - name: None, - members: Vec::new(), - inner_type: Some(base), - }); - type_id - } - Cardinality::Star => { - let type_id = self.alloc_type_id(); - self.type_defs.push(InferredTypeDef { - kind: TypeKind::ArrayStar, - name: None, - members: Vec::new(), - inner_type: Some(base), - }); - type_id - } - Cardinality::Plus => { - let type_id = self.alloc_type_id(); - self.type_defs.push(InferredTypeDef { - kind: TypeKind::ArrayPlus, - name: None, - members: Vec::new(), - inner_type: Some(base), - }); - type_id - } - } - } - - fn report_merge_errors(&mut self, merge_errors: &[MergeError<'src>]) { - for err in merge_errors { - let types_str = err - .shapes - .iter() - .map(|s| s.to_description().to_string()) - .collect::>() - .join(" vs "); - - let primary_span = err.spans.first().copied().unwrap_or_default(); - let mut builder = self - .diagnostics - .report(DiagnosticKind::IncompatibleTypes, primary_span) - .message(types_str); - - for span in err.spans.iter().skip(1) { - builder = builder.related_to("also captured here", *span); - } - builder - .hint(format!( - "capture `{}` has incompatible types across branches", - err.field - )) - .emit(); - - self.errors.push(UnificationError { - field: err.field, - definition: self.current_def_name, - types_found: err.shapes.iter().map(|s| s.to_description()).collect(), - spans: err.spans.clone(), - }); - } - } -} - -impl<'a> Query<'a> { - /// Run type inference on the query AST. - pub(super) fn infer_types(&mut self) { - // Collect QIS triggers upfront to avoid borrowing issues - let qis_triggers: HashSet<_> = self.qis_ctx.qis_triggers.keys().cloned().collect(); - - let mut ctx = InferenceContext::new(self.source, qis_triggers); - - // Process definitions in dependency order - for scc in &self.dependency_analysis.sccs { - for name in scc { - if let Some(body) = self.symbol_table.get(name) { - let type_id = ctx.infer_definition(name, body); - ctx.definition_types.insert(name, type_id); - } - } - } - - // Preserve symbol table order for entrypoints - for (name, _) in self.symbol_table.iter() { - if let Some(&type_id) = ctx.definition_types.get(name) { - self.type_info.entrypoint_types.insert(*name, type_id); - } - } - self.type_info.type_defs = ctx.type_defs; - self.type_info.diagnostics = ctx.diagnostics; - self.type_info.errors = ctx.errors; - } -} diff --git a/crates/plotnik-lib/src/query/infer_dump.rs b/crates/plotnik-lib/src/query/infer_dump.rs deleted file mode 100644 index 0e6af2f9..00000000 --- a/crates/plotnik-lib/src/query/infer_dump.rs +++ /dev/null @@ -1,232 +0,0 @@ -//! Dump helpers for type inference inspection and testing. - -use std::fmt::Write; - -use crate::ir::{TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId, TypeKind}; - -use super::infer::TypeInferenceResult; - -impl TypeInferenceResult<'_> { - pub fn dump(&self) -> String { - let mut out = String::new(); - let printer = TypePrinter::new(self); - printer.format(&mut out).expect("String write never fails"); - out - } - - pub fn dump_diagnostics(&self, source: &str) -> String { - self.diagnostics.render_filtered(source) - } - - pub fn has_errors(&self) -> bool { - !self.errors.is_empty() - } -} - -struct TypePrinter<'a, 'src> { - result: &'a TypeInferenceResult<'src>, - width: usize, -} - -impl<'a, 'src> TypePrinter<'a, 'src> { - fn new(result: &'a TypeInferenceResult<'src>) -> Self { - let total_types = 3 + result.type_defs.len(); - let width = if total_types == 0 { - 1 - } else { - ((total_types as f64).log10().floor() as usize) + 1 - }; - Self { result, width } - } - - fn format(&self, w: &mut String) -> std::fmt::Result { - // Entrypoints (skip redundant Foo = Foo) - for (name, type_id) in &self.result.entrypoint_types { - let type_name = self.get_type_name(*type_id); - if type_name.as_deref() == Some(*name) { - continue; - } - writeln!(w, "{} = {}", name, self.format_type(*type_id))?; - } - - let has_entrypoints = self - .result - .entrypoint_types - .iter() - .any(|(name, id)| self.get_type_name(*id).as_deref() != Some(*name)); - - // Type definitions (skip inlinable types) - let mut first_typedef = true; - for (idx, def) in self.result.type_defs.iter().enumerate() { - let type_id = 3 + idx as TypeId; - - if self.is_inlinable(type_id) { - continue; - } - - if first_typedef && has_entrypoints { - writeln!(w)?; - } - first_typedef = false; - - let header = self.format_type_header(type_id, def.name); - - match def.kind { - TypeKind::Record => { - if def.members.len() == 1 { - let m = &def.members[0]; - writeln!( - w, - "{} = {{ {}: {} }}", - header, - m.name, - self.format_type(m.ty) - )?; - } else { - writeln!(w, "{} = {{", header)?; - for member in &def.members { - writeln!(w, " {}: {}", member.name, self.format_type(member.ty))?; - } - writeln!(w, "}}")?; - } - } - TypeKind::Enum => { - if def.members.len() == 1 { - let m = &def.members[0]; - writeln!( - w, - "{} = {{ {} => {} }}", - header, - m.name, - self.format_type(m.ty) - )?; - } else { - writeln!(w, "{} = {{", header)?; - for member in &def.members { - writeln!(w, " {} => {}", member.name, self.format_type(member.ty))?; - } - writeln!(w, "}}")?; - } - } - TypeKind::Optional => { - let inner = def - .inner_type - .map(|t| self.format_type(t)) - .unwrap_or_default(); - writeln!(w, "{} = {}?", header, inner)?; - } - TypeKind::ArrayStar => { - let inner = def - .inner_type - .map(|t| self.format_type(t)) - .unwrap_or_default(); - writeln!(w, "{} = [{}]", header, inner)?; - } - TypeKind::ArrayPlus => { - let inner = def - .inner_type - .map(|t| self.format_type(t)) - .unwrap_or_default(); - writeln!(w, "{} = [{}]⁺", header, inner)?; - } - } - } - - // Errors - if !self.result.errors.is_empty() { - if has_entrypoints || !first_typedef { - writeln!(w)?; - } - writeln!(w, "Errors:")?; - for err in &self.result.errors { - let types = err - .types_found - .iter() - .map(|t| t.to_string()) - .collect::>() - .join(", "); - writeln!( - w, - " field `{}` in `{}`: incompatible types [{}]", - err.field, err.definition, types - )?; - } - } - - Ok(()) - } - - fn get_type_name(&self, id: TypeId) -> Option { - if id < 3 { - return None; - } - let idx = (id - 3) as usize; - self.result - .type_defs - .get(idx) - .and_then(|def| def.name.map(|s| s.to_string())) - } - - /// Returns true if the type should be inlined rather than shown as separate definition. - /// Inlinable: wrapper types (Optional/Array*) around primitives or other inlinable types. - fn is_inlinable(&self, id: TypeId) -> bool { - if id < 3 { - return true; // primitives are always inlinable - } - let idx = (id - 3) as usize; - let Some(def) = self.result.type_defs.get(idx) else { - return false; - }; - - // Named types are not inlined (they have semantic meaning) - if def.name.is_some() { - return false; - } - - match def.kind { - TypeKind::Record | TypeKind::Enum => false, - TypeKind::Optional | TypeKind::ArrayStar | TypeKind::ArrayPlus => { - def.inner_type.map(|t| self.is_inlinable(t)).unwrap_or(true) - } - } - } - - fn format_type_header(&self, type_id: TypeId, name: Option<&str>) -> String { - match name { - Some(n) => n.to_string(), - None => format!("T{:0width$}", type_id, width = self.width), - } - } - - fn format_type(&self, id: TypeId) -> String { - match id { - TYPE_VOID => "()".to_string(), - TYPE_NODE => "Node".to_string(), - TYPE_STR => "str".to_string(), - _ => { - let idx = (id - 3) as usize; - if let Some(def) = self.result.type_defs.get(idx) { - // Named types: use name - if let Some(name) = def.name { - return name.to_string(); - } - - // Inlinable wrappers: format inline - if self.is_inlinable(id) { - let inner = def - .inner_type - .map(|t| self.format_type(t)) - .unwrap_or_default(); - return match def.kind { - TypeKind::Optional => format!("{}?", inner), - TypeKind::ArrayStar => format!("[{}]", inner), - TypeKind::ArrayPlus => format!("[{}]⁺", inner), - _ => format!("T{:0width$}", id, width = self.width), - }; - } - } - format!("T{:0width$}", id, width = self.width) - } - } - } -} diff --git a/crates/plotnik-lib/src/query/infer_tests.rs b/crates/plotnik-lib/src/query/infer_tests.rs deleted file mode 100644 index 291e4707..00000000 --- a/crates/plotnik-lib/src/query/infer_tests.rs +++ /dev/null @@ -1,446 +0,0 @@ -//! Tests for type inference. - -use indoc::indoc; - -use crate::query::Query; - -fn infer(source: &str) -> String { - let query = Query::try_from(source) - .expect("parse should succeed") - .build_graph(); - query.type_info().dump() -} - -fn infer_with_graph(source: &str) -> String { - let query = Query::try_from(source) - .expect("parse should succeed") - .build_graph(); - let mut out = String::new(); - out.push_str(&query.graph().dump_live(query.dead_nodes())); - out.push('\n'); - out.push_str(&query.type_info().dump()); - out -} - -#[test] -fn debug_star_quantifier_graph() { - // See graph BEFORE optimization (what type inference actually sees) - let (query, pre_opt_dump) = Query::try_from("Foo = ((item) @items)*") - .expect("parse should succeed") - .build_graph_with_pre_opt_dump(None); - let mut out = String::new(); - out.push_str("(pre-optimization)\n"); - out.push_str(&pre_opt_dump); - out.push_str("\n(post-optimization)\n"); - out.push_str(&query.graph().dump_live(query.dead_nodes())); - out.push('\n'); - out.push_str(&query.type_info().dump()); - insta::assert_snapshot!(out, @r" - (pre-optimization) - Foo = (3) - - (0) —(_)→ (1) - (1) —{↘}—(item)—[CaptureNode]→ (2) - (2) —{↗¹}—𝜀→ (5) - (3) —𝜀—[StartArray]→ (6) - (4) —𝜀—[EndArray]→ (✓) - (5) —𝜀—[PushElement]→ (8) - (6) —𝜀→ (0), (4) - (7) —{→}—(_)→ (1), (5) - (8) —𝜀→ (7), (4) - - (post-optimization) - Foo = (6) - - (0) —(_)→ (1) - (1) —{↘}—(item)—[CaptureNode]→ (2) - (2) —{↗¹}—𝜀→ (8) - (4) —𝜀—[EndArray]→ (✓) - (6) —𝜀—[StartArray]→ (0), (4) - (7) —{→}—(_)→ (1), (8) - (8) —𝜀—[PushElement]→ (7), (4) - - Foo = [Node] - "); -} - -#[test] -fn debug_graph_structure() { - let result = infer_with_graph("Foo = (identifier) @name"); - insta::assert_snapshot!(result, @r" - Foo = (0) - - (0) —(identifier)—[CaptureNode]→ (✓) - - Foo = Node - "); -} - -#[test] -fn debug_incompatible_types_graph() { - let input = indoc! {r#" - Foo = [ (a) @v (b) @v ::string ] - "#}; - - let query = Query::new(input) - .exec() - .expect("parse should succeed") - .build_graph(); - - let mut out = String::new(); - out.push_str(&query.graph().dump_live(query.dead_nodes())); - out.push_str(&format!("\n(dead nodes: {})\n\n", query.dead_nodes().len())); - out.push_str(&query.type_info().dump()); - insta::assert_snapshot!(out, @r" - Foo = (0) - - (0) —𝜀—[StartObject]→ (2), (4) - (2) —(a)—[CaptureNode]→ (3) - (3) —𝜀—[Field(v)]→ (7) - (4) —(b)—[CaptureNode, ToString]→ (5) - (5) —𝜀—[Field(v)]→ (7) - (7) —𝜀—[EndObject]→ (✓) - - (dead nodes: 2) - - Foo = Node - - Errors: - field `v` in `Foo`: incompatible types [Node, String] - "); -} - -#[test] -fn single_node_capture() { - let result = infer("Foo = (identifier) @name"); - insta::assert_snapshot!(result, @"Foo = Node"); -} - -#[test] -fn string_capture() { - let result = infer("Foo = (identifier) @name ::string"); - insta::assert_snapshot!(result, @"Foo = str"); -} - -#[test] -fn multiple_captures_flat() { - let result = infer("Foo = (a (b) @x (c) @y)"); - insta::assert_snapshot!(result, @r" - Foo = { - x: Node - y: Node - } - "); -} - -#[test] -fn no_captures_void() { - let result = infer("Foo = (identifier)"); - insta::assert_snapshot!(result, @"Foo = ()"); -} - -#[test] -fn captured_sequence_creates_struct() { - let input = indoc! {r#" - Foo = { (a) @x (b) @y } @z - "#}; - - let result = infer(input); - insta::assert_snapshot!(result, @r" - Foo = FooScope3 - - FooScope3 = { - x: Node - y: Node - } - "); -} - -#[test] -fn nested_captured_sequence() { - let input = indoc! {r#" - Foo = { (outer) @a { (inner) @b } @nested } @root - "#}; - - let result = infer(input); - insta::assert_snapshot!(result, @r" - Foo = FooScope4 - - FooScope3 = { b: Node } - FooScope4 = { - a: Node - nested: FooScope3 - } - "); -} - -#[test] -fn sequence_without_capture_propagates() { - let input = indoc! {r#" - Foo = { (a) @x (b) @y } - "#}; - - let result = infer(input); - insta::assert_snapshot!(result, @r" - Foo = { - x: Node - y: Node - } - "); -} - -#[test] -fn untagged_alternation_symmetric() { - let input = indoc! {r#" - Foo = [ (a) @v (b) @v ] - "#}; - - let result = infer(input); - insta::assert_snapshot!(result, @"Foo = Node"); -} - -#[test] -fn untagged_alternation_asymmetric() { - let input = indoc! {r#" - Foo = [ (a) @x (b) @y ] - "#}; - - let result = infer(input); - insta::assert_snapshot!(result, @r" - Foo = { - x: Node? - y: Node? - } - "); -} - -#[test] -fn tagged_alternation_uncaptured_propagates() { - let input = indoc! {r#" - Foo = [ A: (a) @x B: (b) @y ] - "#}; - - let result = infer(input); - insta::assert_snapshot!(result, @r" - Foo = { - A => Node - B => Node - } - "); -} - -#[test] -fn tagged_alternation_captured_creates_enum() { - let input = indoc! {r#" - Foo = [ A: (a) @x B: (b) @y ] @choice - "#}; - - let result = infer(input); - insta::assert_snapshot!(result, @r" - Foo = FooScope3 - - FooScope3 = { - A => Node - B => Node - } - "); -} - -#[test] -fn captured_untagged_alternation_creates_struct() { - let input = indoc! {r#" - Foo = [ (a) @x (b) @y ] @val - "#}; - - let result = infer(input); - insta::assert_snapshot!(result, @r" - Foo = FooScope3 - - FooScope3 = { - x: Node? - y: Node? - } - "); -} - -#[test] -fn star_quantifier() { - let result = infer("Foo = ((item) @items)*"); - insta::assert_snapshot!(result, @"Foo = [Node]"); -} - -#[test] -fn plus_quantifier() { - let result = infer("Foo = ((item) @items)+"); - insta::assert_snapshot!(result, @"Foo = [Node]⁺"); -} - -#[test] -fn optional_quantifier() { - let result = infer("Foo = ((item) @maybe)?"); - insta::assert_snapshot!(result, @"Foo = Node?"); -} - -#[test] -fn quantifier_on_sequence() { - // QIS triggered: ≥2 captures inside quantified expression - let input = indoc! {r#" - Foo = { (a) @x (b) @y }* - "#}; - - let result = infer(input); - insta::assert_snapshot!(result, @r" - Foo = T4 - - FooScope3 = { - x: Node - y: Node - } - T4 = [FooScope3] - "); -} - -#[test] -fn qis_single_capture_no_trigger() { - // Single capture inside sequence - no QIS - // Note: The sequence creates its own scope, so the capture goes there. - // Without explicit capture on the sequence, the struct is orphaned. - let input = indoc! {r#" - Single = { (a) @item }* - "#}; - - let result = infer(input); - insta::assert_snapshot!(result, @"Single = [Node]"); -} - -#[test] -fn qis_alternation_in_sequence() { - // Alternation with asymmetric captures inside quantified sequence - // QIS triggered (2 captures), creates element struct - // Note: Current impl doesn't apply optionality for alternation branches in QIS - let input = indoc! {r#" - Foo = { [ (a) @x (b) @y ] }* - "#}; - - let result = infer(input); - insta::assert_snapshot!(result, @r" - Foo = T6 - - FooScope3 = { - x: Node? - y: Node? - } - T6 = [FooScope3] - "); -} - -#[test] -fn quantified_seq_with_inline_tagged_alt() { - // Issue #5: captures from inline tagged alternation inside quantified sequence - // The tagged alternation is uncaptured, so it should behave like untagged. - // All captures should propagate with Optional cardinality. - let input = indoc! {r#" - Test = { [ A: (a) @x B: (b) @y ] }* @items - "#}; - - let result = infer_with_graph(input); - insta::assert_snapshot!(result, @r" - Test = (16) - - (00) —𝜀—[StartObject]→ (01) - (01) —{→}—𝜀→ (04), (08) - (04) —(a)—[StartVariant(A), CaptureNode, CaptureNode]→ (06) - (06) —𝜀—[Field(x), EndVariant]→ (14) - (08) —(b)—[StartVariant(B), CaptureNode, CaptureNode]→ (10) - (10) —𝜀—[Field(y), EndVariant]→ (14) - (12) —𝜀—[EndArray]→ (✓) - (14) —𝜀—[EndObject]→ (20) - (16) —𝜀—[StartArray]→ (00), (12) - (17) —{→}—𝜀→ (01), (19) - (18) —𝜀—[StartObject]→ (17) - (19) —𝜀—[EndObject]→ (20) - (20) —𝜀—[PushElement]→ (18), (12) - - Test = T6 - - TestScope3 = { - x: Node? - y: Node? - } - T6 = [TestScope3] - "); -} - -#[test] -fn compatible_types_in_alternation() { - let input = indoc! {r#" - Foo = [ (a) @v (b) @v ] - "#}; - - let query = Query::try_from(input).expect("parse").build_graph(); - assert!(query.type_info().errors.is_empty()); -} - -#[test] -fn incompatible_types_in_alternation() { - let input = indoc! {r#" - Foo = [ (a) @v (b) @v ::string ] - "#}; - - let result = infer_with_graph(input); - insta::assert_snapshot!(result, @r" - Foo = (0) - - (0) —𝜀—[StartObject]→ (2), (4) - (2) —(a)—[CaptureNode]→ (3) - (3) —𝜀—[Field(v)]→ (7) - (4) —(b)—[CaptureNode, ToString]→ (5) - (5) —𝜀—[Field(v)]→ (7) - (7) —𝜀—[EndObject]→ (✓) - - Foo = Node - - Errors: - field `v` in `Foo`: incompatible types [Node, String] - "); -} - -#[test] -fn multiple_definitions() { - let input = indoc! {r#" - Func = (function_declaration name: (identifier) @name) - Class = (class_declaration name: (identifier) @name body: (class_body) @body) - "#}; - - let result = infer(input); - insta::assert_snapshot!(result, @r" - Func = Node - - Class = { - name: Node - body: Node - } - "); -} - -#[test] -fn deeply_nested_node() { - let input = indoc! {r#" - Foo = (a (b (c (d) @val))) - "#}; - - let result = infer(input); - insta::assert_snapshot!(result, @"Foo = Node"); -} - -#[test] -fn wildcard_capture() { - let result = infer("Foo = _ @any"); - insta::assert_snapshot!(result, @"Foo = Node"); -} - -#[test] -fn string_literal_capture() { - let result = infer(r#"Foo = "+" @op"#); - insta::assert_snapshot!(result, @"Foo = Node"); -} diff --git a/crates/plotnik-lib/src/query/link.rs b/crates/plotnik-lib/src/query/link.rs index 8610e1d0..102fff6a 100644 --- a/crates/plotnik-lib/src/query/link.rs +++ b/crates/plotnik-lib/src/query/link.rs @@ -18,28 +18,10 @@ use crate::parser::ast::{self, Expr, NamedNode}; use crate::parser::cst::{SyntaxKind, SyntaxToken}; use crate::parser::token_src; -use super::Query; use super::symbol_table::SymbolTable; use super::utils::find_similar; use super::visitor::{Visitor, walk}; -impl<'a> Query<'a> { - /// Link query against a language grammar. - /// - /// Resolves node types and fields, validates structural constraints. - pub fn link(&mut self, lang: &Lang) { - link( - &self.ast, - self.source, - lang, - &self.symbol_table, - &mut self.node_type_ids, - &mut self.node_field_ids, - &mut self.link_diagnostics, - ); - } -} - /// Link query against a language grammar. /// /// This function is decoupled from `Query` to allow easier testing and diff --git a/crates/plotnik-lib/src/query/link_tests.rs b/crates/plotnik-lib/src/query/link_tests.rs index 203a6858..3ecc7dea 100644 --- a/crates/plotnik-lib/src/query/link_tests.rs +++ b/crates/plotnik-lib/src/query/link_tests.rs @@ -8,8 +8,9 @@ fn valid_query_with_field() { name: (identifier) @name) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(query.is_valid()); insta::assert_snapshot!(query.dump_ast(), @r" @@ -29,8 +30,9 @@ fn unknown_node_type_with_suggestion() { (function_declaraton) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -49,8 +51,9 @@ fn unknown_node_type_no_suggestion() { (xyzzy_foobar_baz) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -68,8 +71,9 @@ fn unknown_field_with_suggestion() { nme: (identifier) @name) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -89,8 +93,9 @@ fn unknown_field_no_suggestion() { xyzzy: (identifier) @name) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -108,8 +113,9 @@ fn field_not_on_node_type() { condition: (identifier) @name) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -131,8 +137,9 @@ fn field_not_on_node_type_with_suggestion() { parameter: (formal_parameters) @params) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::typescript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::typescript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -154,8 +161,9 @@ fn negated_field_unknown() { (function_declaration !nme) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -174,8 +182,9 @@ fn negated_field_not_on_node_type() { (function_declaration !condition) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -196,8 +205,9 @@ fn negated_field_not_on_node_type_with_suggestion() { (function_declaration !parameter) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::typescript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::typescript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -219,8 +229,9 @@ fn negated_field_valid() { (function_declaration !name) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(query.is_valid()); insta::assert_snapshot!(query.dump_ast(), @r" @@ -238,8 +249,9 @@ fn anonymous_node_unknown() { (function_declaration "xyzzy_fake_token") @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r#" @@ -256,8 +268,9 @@ fn error_and_missing_nodes_skip_validation() { (ERROR) @err "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(query.is_valid()); @@ -265,8 +278,9 @@ fn error_and_missing_nodes_skip_validation() { (MISSING) @miss "#}; - let mut query2 = Query::try_from(input2).unwrap(); - query2.link(&plotnik_langs::javascript()); + let query2 = Query::try_from(input2) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(query2.is_valid()); } @@ -278,8 +292,9 @@ fn multiple_errors_in_query() { nme: (identifer) @name) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -314,8 +329,9 @@ fn nested_field_validation() { (return_statement) @ret) @body) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(query.is_valid()); insta::assert_snapshot!(query.dump_ast(), @r" @@ -339,8 +355,9 @@ fn invalid_child_type_for_field() { name: (statement_block) @name) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -362,8 +379,9 @@ fn alternation_with_link_errors() { (class_declaraton)] @decl "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -392,8 +410,9 @@ fn sequence_with_link_errors() { (statement_block)} @body) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -421,8 +440,9 @@ fn quantified_expr_validation() { (function_declaration)+ @fns) @block "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(query.is_valid()); insta::assert_snapshot!(query.dump_ast(), @r" @@ -442,8 +462,9 @@ fn wildcard_node_skips_validation() { (_) @any "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(query.is_valid()); } @@ -456,8 +477,9 @@ fn def_reference_with_link() { (program (Func)+) "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(query.is_valid()); insta::assert_snapshot!(query.dump_ast(), @r" @@ -482,8 +504,9 @@ fn field_on_node_without_fields() { name: (identifier) @inner) @id "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -506,8 +529,9 @@ fn invalid_child_type_no_children_allowed() { (class_declaration)) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -528,8 +552,9 @@ fn invalid_child_type_wrong_type() { (identifier)) @block "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -551,8 +576,9 @@ fn valid_child_via_supertype() { (function_declaration)) @block "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(query.is_valid()); } @@ -564,8 +590,9 @@ fn valid_child_via_nested_supertype() { (function_declaration)) @prog "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(query.is_valid()); } @@ -578,8 +605,9 @@ fn invalid_anonymous_child() { "function") @block "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r#" @@ -602,8 +630,9 @@ fn invalid_child_in_alternation() { [(function_declaration) (identifier)]) @block "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -626,8 +655,9 @@ fn invalid_child_in_sequence() { {(function_declaration) (identifier)}) @block "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -648,8 +678,9 @@ fn deeply_nested_sequences_valid() { (statement_block {{{(function_declaration)}}}) @block "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(query.is_valid()); } @@ -661,8 +692,9 @@ fn deeply_nested_sequences_invalid() { (statement_block {{{(identifier)}}}) @block "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -683,8 +715,9 @@ fn deeply_nested_alternations_in_field_valid() { (function_declaration name: [[[(identifier)]]]) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(query.is_valid()); } @@ -696,8 +729,9 @@ fn deeply_nested_alternations_in_field_invalid() { (function_declaration name: [[[(statement_block)]]]) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -719,8 +753,9 @@ fn deeply_nested_no_fields_allowed() { (function_declaration {{{(class_declaration)}}}) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -742,8 +777,9 @@ fn mixed_nested_with_capture_and_quantifier() { (identifier) @id]*}) @block "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -767,8 +803,9 @@ fn field_with_captured_and_quantified_invalid_type() { name: (statement_block)? @name) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -791,8 +828,9 @@ fn multiple_invalid_types_in_alternation_field() { name: [(statement_block) (class_declaration)]) @fn "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -824,8 +862,9 @@ fn multiple_invalid_types_in_sequence_child() { {(identifier) (number)}) @block "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -857,8 +896,9 @@ fn ref_followed_for_child_validation() { (function_declaration (Foo)) "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -886,8 +926,9 @@ fn ref_followed_for_field_validation() { (function_declaration name: (Foo)) "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -918,8 +959,9 @@ fn ref_followed_valid_case() { (function_declaration name: (Foo)) "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(query.is_valid()); } @@ -932,8 +974,9 @@ fn ref_followed_recursive_with_invalid_type() { (function_declaration name: (Foo)) "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -962,8 +1005,9 @@ fn ref_followed_recursive_valid() { (function_declaration name: (Foo)) "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -985,8 +1029,9 @@ fn ref_followed_mutual_recursion() { (function_declaration name: (Foo)) "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -1029,8 +1074,9 @@ fn ref_followed_in_sequence() { (statement_block {(Foo) (string)}) "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" @@ -1064,8 +1110,9 @@ fn ref_validated_in_multiple_contexts() { body: (statement_block (Foo))) "#}; - let mut query = Query::try_from(input).unwrap(); - query.link(&plotnik_langs::javascript()); + let query = Query::try_from(input) + .unwrap() + .link(&plotnik_langs::javascript()); assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics_raw(), @r" diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 06704a5f..ff0b2dc5 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -1,57 +1,25 @@ -//! Query processing pipeline. -//! -//! Stages: parse → alt_kinds → symbol_table → recursion → shapes → [qis → build_graph]. -//! Each stage populates its own diagnostics. Use `is_valid()` to check -//! if any stage produced errors. -//! -//! The `build_graph` stage is optional and constructs the transition graph -//! for compilation to binary IR. QIS detection runs as part of this stage. - mod dump; -mod graph_qis; mod invariants; mod printer; mod utils; -use plotnik_core::NodeFieldId; -use plotnik_core::NodeTypeId; pub use printer::QueryPrinter; +pub use query::{Query, QueryBuilder}; pub mod alt_kinds; mod dependencies; pub mod expr_arity; -pub mod graph; -mod graph_build; -mod graph_dump; -mod graph_optimize; -pub mod infer; -mod infer_dump; pub mod link; #[allow(clippy::module_inception)] pub mod query; pub mod symbol_table; pub mod visitor; -pub use graph::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; -pub use graph_optimize::OptimizeStats; -pub use infer::{ - InferredMember, InferredTypeDef, TypeDescription, TypeInferenceResult, UnificationError, -}; -pub use symbol_table::UNNAMED_DEF; - #[cfg(test)] mod alt_kinds_tests; #[cfg(test)] mod dependencies_tests; #[cfg(test)] mod expr_arity_tests; -#[cfg(test)] -mod graph_build_tests; -#[cfg(test)] -mod graph_master_test; -#[cfg(test)] -mod graph_qis_tests; -#[cfg(test)] -mod infer_tests; #[cfg(all(test, feature = "plotnik-langs"))] mod link_tests; #[cfg(test)] @@ -60,285 +28,3 @@ mod mod_tests; mod printer_tests; #[cfg(test)] mod symbol_table_tests; - -use std::collections::{HashMap, HashSet}; - -use rowan::GreenNodeBuilder; - -use crate::Result; -use crate::diagnostics::Diagnostics; -use crate::parser::cst::SyntaxKind; -use crate::parser::lexer::lex; -use crate::parser::{ParseResult, Parser, Root, SyntaxNode, ast}; -use crate::query::dependencies::DependencyAnalysis; -use crate::query::graph_qis::QisContext; -use crate::query::graph_qis::detect_capture_scopes; - -const DEFAULT_EXEC_FUEL: u32 = 1_000_000; -const DEFAULT_RECURSION_FUEL: u32 = 4096; - -use expr_arity::ExprArity; -use symbol_table::SymbolTable; - -/// A parsed and analyzed query. -/// -/// Create with [`new`](Self::new), optionally configure fuel limits, -/// then call [`exec`](Self::exec) to run analysis. -/// -/// For compilation, call [`build_graph`](Self::build_graph) after `exec`. -/// -/// Check [`is_valid`](Self::is_valid) or [`diagnostics`](Self::diagnostics) -/// to determine if the query has syntax/semantic issues. -/// Quantifier-Induced Scope trigger info. -/// -/// When a quantified expression has ≥2 propagating captures, QIS creates -/// an implicit object scope so captures stay coupled per-iteration. -#[derive(Debug)] -pub struct Query<'q> { - source: &'q str, - ast: Root, - symbol_table: SymbolTable<'q>, - expr_arity_table: HashMap, - node_type_ids: HashMap<&'q str, Option>, - node_field_ids: HashMap<&'q str, Option>, - exec_fuel: u32, - recursion_fuel: u32, - exec_fuel_consumed: u32, - parse_diagnostics: Diagnostics, - alt_kind_diagnostics: Diagnostics, - resolve_diagnostics: Diagnostics, - recursion_diagnostics: Diagnostics, - expr_arity_diagnostics: Diagnostics, - link_diagnostics: Diagnostics, - dependency_analysis: DependencyAnalysis<'q>, - // Graph compilation fields - graph: BuildGraph<'q>, - dead_nodes: HashSet, - type_info: TypeInferenceResult<'q>, - /// QIS triggers: quantified expressions with ≥2 propagating captures. - qis_ctx: QisContext<'q>, -} - -fn empty_root() -> Root { - let mut builder = GreenNodeBuilder::new(); - builder.start_node(SyntaxKind::Root.into()); - builder.finish_node(); - let green = builder.finish(); - Root::cast(SyntaxNode::new_root(green)).expect("we just built a Root node") -} - -impl<'a> Query<'a> { - /// Create a new query from source text. - /// - /// Call [`exec`](Self::exec) to run analysis passes. - pub fn new(source: &'a str) -> Self { - Self { - source, - ast: empty_root(), - symbol_table: SymbolTable::default(), - expr_arity_table: HashMap::new(), - node_type_ids: HashMap::new(), - node_field_ids: HashMap::new(), - exec_fuel: DEFAULT_EXEC_FUEL, - recursion_fuel: DEFAULT_RECURSION_FUEL, - exec_fuel_consumed: 0, - parse_diagnostics: Diagnostics::new(), - alt_kind_diagnostics: Diagnostics::new(), - resolve_diagnostics: Diagnostics::new(), - recursion_diagnostics: Diagnostics::new(), - expr_arity_diagnostics: Diagnostics::new(), - dependency_analysis: DependencyAnalysis::default(), - link_diagnostics: Diagnostics::new(), - graph: BuildGraph::default(), - dead_nodes: HashSet::new(), - type_info: TypeInferenceResult::default(), - qis_ctx: QisContext::default(), - } - } - - /// Set execution fuel limit. None = infinite. - /// - /// Execution fuel never replenishes. It protects against large inputs. - /// Returns error from [`exec`](Self::exec) when exhausted. - pub fn with_exec_fuel(mut self, limit: u32) -> Self { - self.exec_fuel = limit; - self - } - - /// Set recursion depth limit. None = infinite. - /// - /// Recursion fuel restores when exiting recursion. It protects against - /// deeply nested input. Returns error from [`exec`](Self::exec) when exhausted. - pub fn with_recursion_fuel(mut self, limit: u32) -> Self { - self.recursion_fuel = limit; - self - } - - /// Run all analysis passes. - /// - /// Returns `Err` if fuel limits are exceeded. - /// Syntax/semantic diagnostics are collected and accessible via [`diagnostics`](Self::diagnostics). - pub fn exec(mut self) -> Result { - self.try_parse()?; - self.validate_alt_kinds(); - self.resolve_names(); - - self.dependency_analysis = dependencies::analyze_dependencies(&self.symbol_table); - dependencies::validate_recursion( - &self.dependency_analysis, - &self.ast, - &self.symbol_table, - &mut self.recursion_diagnostics, - ); - - self.infer_arities(); - - self.qis_ctx = detect_capture_scopes(self.source, &self.symbol_table); - - self.infer_types(); - - Ok(self) - } - - /// Build the transition graph for compilation. - /// - /// This is an optional step after `exec`. It detects QIS triggers, - /// constructs the graph, runs epsilon elimination, and infers types. - /// - /// Only runs if the query is valid (no errors from previous passes). - pub fn build_graph(mut self) -> Self { - if !self.is_valid() { - return self; - } - self.construct_graph(); - self.optimize_graph(); - self - } - - /// Build graph and return dump of graph before optimization (for debugging). - /// - /// If `root_kind` is provided, definitions are wrapped before dumping. - pub fn build_graph_with_pre_opt_dump(mut self, root_kind: Option<&'a str>) -> (Self, String) { - if !self.is_valid() { - return (self, String::new()); - } - - self.qis_ctx = detect_capture_scopes(self.source, &self.symbol_table); - - self.construct_graph(); - if let Some(root) = root_kind { - self.graph.wrap_definitions_with_root(root); - } - let pre_opt_dump = self.graph.dump(); - self.infer_types(); - self.optimize_graph(); - (self, pre_opt_dump) - } - - fn try_parse(&mut self) -> Result<()> { - let tokens = lex(self.source); - let parser = Parser::new(self.source, tokens, self.exec_fuel, self.recursion_fuel); - - let ParseResult { - ast, - diag, - fuel_consumed: exec_fuel_consumed, - } = parser.parse()?; - self.ast = ast; - self.parse_diagnostics = diag; - self.exec_fuel_consumed = exec_fuel_consumed; - Ok(()) - } - - pub(crate) fn as_cst(&self) -> &SyntaxNode { - self.ast.as_cst() - } - - pub(crate) fn root(&self) -> &Root { - &self.ast - } - - /// Access the constructed graph. - pub fn graph(&self) -> &BuildGraph<'a> { - &self.graph - } - - /// Wrap definitions that don't already match the root node kind. - /// - /// Call this after `build_graph()` to allow queries like `(function_declaration)` - /// to work when the interpreter starts at tree root (e.g., `program`). - /// - /// The `root_kind` should be the language's root node kind (e.g., "program" for JS). - pub fn wrap_with_root(mut self, root_kind: &'a str) -> Self { - self.graph.wrap_definitions_with_root(root_kind); - // Re-run type inference and optimization on wrapped graph - self.infer_types(); - self.optimize_graph(); - self - } - - /// Access the set of dead nodes (eliminated by optimization). - pub fn dead_nodes(&self) -> &HashSet { - &self.dead_nodes - } - - /// Access the type inference result. - pub fn type_info(&self) -> &TypeInferenceResult<'a> { - &self.type_info - } - - /// All diagnostics combined from all passes (unfiltered). - /// - /// Use this for debugging or when you need to see all diagnostics - /// including cascading errors. - pub fn diagnostics_raw(&self) -> Diagnostics { - let mut all = Diagnostics::new(); - all.extend(self.parse_diagnostics.clone()); - all.extend(self.alt_kind_diagnostics.clone()); - all.extend(self.resolve_diagnostics.clone()); - all.extend(self.recursion_diagnostics.clone()); - all.extend(self.expr_arity_diagnostics.clone()); - all.extend(self.link_diagnostics.clone()); - all.extend(self.type_info.diagnostics.clone()); - all - } - - /// All diagnostics combined from all passes. - /// - /// Returns diagnostics with cascading errors suppressed. - /// For raw access, use [`diagnostics_raw`](Self::diagnostics_raw). - pub fn diagnostics(&self) -> Diagnostics { - self.diagnostics_raw() - } - - /// Query is valid if there are no error-severity diagnostics (warnings are allowed). - pub fn is_valid(&self) -> bool { - !self.parse_diagnostics.has_errors() - && !self.alt_kind_diagnostics.has_errors() - && !self.resolve_diagnostics.has_errors() - && !self.recursion_diagnostics.has_errors() - && !self.expr_arity_diagnostics.has_errors() - && !self.link_diagnostics.has_errors() - } - - /// Check if graph compilation produced type errors. - pub fn has_type_errors(&self) -> bool { - self.type_info.has_errors() - } -} - -impl<'a> TryFrom<&'a str> for Query<'a> { - type Error = crate::Error; - - fn try_from(source: &'a str) -> Result { - Self::new(source).exec() - } -} - -impl<'a> TryFrom<&'a String> for Query<'a> { - type Error = crate::Error; - - fn try_from(source: &'a String) -> Result { - Self::new(source.as_str()).exec() - } -} diff --git a/crates/plotnik-lib/src/query/query.rs b/crates/plotnik-lib/src/query/query.rs index fe452f89..f199238d 100644 --- a/crates/plotnik-lib/src/query/query.rs +++ b/crates/plotnik-lib/src/query/query.rs @@ -5,14 +5,13 @@ use std::ops::{Deref, DerefMut}; use plotnik_core::{NodeFieldId, NodeTypeId}; use plotnik_langs::Lang; -use crate::parser::{ParseResult, Parser, lexer::lex}; +use crate::Diagnostics; +use crate::parser::{ParseResult, Parser, Root, SyntaxNode, lexer::lex}; use crate::query::alt_kinds::validate_alt_kinds; use crate::query::dependencies::{self, DependencyAnalysis}; -use crate::query::expr_arity::{ExprArityTable, infer_arities}; -use crate::query::graph_qis::{QisContext, detect_capture_scopes}; +use crate::query::expr_arity::{ExprArity, ExprArityTable, infer_arities, resolve_arity}; use crate::query::link; use crate::query::symbol_table::{SymbolTable, resolve_names}; -use crate::{Diagnostics, parser::Root}; const DEFAULT_QUERY_PARSE_FUEL: u32 = 1_000_000; const DEFAULT_QUERY_PARSE_MAX_DEPTH: u32 = 4096; @@ -65,10 +64,6 @@ impl<'q> QueryBuilder<'q> { validate_alt_kinds(&ast, &mut diag); - if diag.has_errors() { - return Err(crate::Error::QueryParseError(diag)); - } - Ok(QueryParsed { src, diag, @@ -78,11 +73,18 @@ impl<'q> QueryBuilder<'q> { } } +#[derive(Debug)] pub struct QueryParsed<'q> { src: &'q str, diag: Diagnostics, ast: Root, - pub fuel_consumed: u32, + fuel_consumed: u32, +} + +impl<'q> QueryParsed<'q> { + pub fn query_parser_fuel_consumed(&self) -> u32 { + self.fuel_consumed + } } impl<'q> QueryParsed<'q> { @@ -99,31 +101,49 @@ impl<'q> QueryParsed<'q> { let arity_table = infer_arities(&self.ast, &symbol_table, &mut self.diag); - if self.diag.has_errors() { - return Err(crate::Error::QueryAnalyzeError(self.diag)); - } - - let qis_ctx = detect_capture_scopes(self.src, &symbol_table); - Ok(QueryAnalyzed { query_parsed: self, symbol_table, dependency_analysis, arity_table, - qis_ctx, }) } + + pub fn source(&self) -> &'q str { + self.src + } + + pub fn diagnostics(&self) -> Diagnostics { + self.diag.clone() + } + + pub fn root(&self) -> &Root { + &self.ast + } + + pub fn as_cst(&self) -> &SyntaxNode { + self.ast.as_cst() + } } +pub type Query<'q> = QueryAnalyzed<'q>; + pub struct QueryAnalyzed<'q> { query_parsed: QueryParsed<'q>, - symbol_table: SymbolTable<'q>, + pub symbol_table: SymbolTable<'q>, dependency_analysis: DependencyAnalysis<'q>, arity_table: ExprArityTable, - qis_ctx: QisContext<'q>, } impl<'q> QueryAnalyzed<'q> { + pub fn is_valid(&self) -> bool { + !self.diag.has_errors() + } + + pub fn get_arity(&self, node: &SyntaxNode) -> Option { + resolve_arity(node, &self.arity_table) + } + pub fn link(mut self, lang: &Lang) -> LinkedQuery<'q> { let mut type_ids: HashMap<&'q str, Option> = HashMap::new(); let mut field_ids: HashMap<&'q str, Option> = HashMap::new(); @@ -160,6 +180,14 @@ impl<'q> DerefMut for QueryAnalyzed<'q> { } } +impl<'q> TryFrom<&'q str> for QueryAnalyzed<'q> { + type Error = crate::Error; + + fn try_from(src: &'q str) -> crate::Result { + QueryBuilder::new(src).parse()?.analyze() + } +} + type NodeTypeIdTable<'q> = HashMap<&'q str, Option>; type NodeFieldIdTable<'q> = HashMap<&'q str, Option>; @@ -168,3 +196,17 @@ pub struct LinkedQuery<'q> { type_ids: NodeTypeIdTable<'q>, field_ids: NodeFieldIdTable<'q>, } + +impl<'q> Deref for LinkedQuery<'q> { + type Target = QueryAnalyzed<'q>; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl<'q> DerefMut for LinkedQuery<'q> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} diff --git a/crates/plotnik-lib/src/query/symbol_table.rs b/crates/plotnik-lib/src/query/symbol_table.rs index 45081f66..529c0a0d 100644 --- a/crates/plotnik-lib/src/query/symbol_table.rs +++ b/crates/plotnik-lib/src/query/symbol_table.rs @@ -14,17 +14,10 @@ use crate::Diagnostics; use crate::diagnostics::DiagnosticKind; use crate::parser::{Root, ast, token_src}; -use super::Query; use super::visitor::Visitor; pub type SymbolTable<'src> = IndexMap<&'src str, ast::Expr>; -impl<'a> Query<'a> { - pub(super) fn resolve_names(&mut self) { - self.symbol_table = resolve_names(&self.ast, self.source, &mut self.resolve_diagnostics); - } -} - pub fn resolve_names<'q>(ast: &Root, src: &'q str, diag: &mut Diagnostics) -> SymbolTable<'q> { let symbol_table = SymbolTable::default(); let ctx = Context {