From a6bcb2f3c870b249ed3c3dd587cd04516a149c2b Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 11:21:26 -0300 Subject: [PATCH 01/23] feat: IR compiler --- AGENTS.md | 1 + crates/plotnik-lib/src/graph/build.rs | 588 +++++++++++++++ crates/plotnik-lib/src/graph/build_tests.rs | 440 +++++++++++ crates/plotnik-lib/src/graph/construct.rs | 546 ++++++++++++++ .../plotnik-lib/src/graph/construct_tests.rs | 686 ++++++++++++++++++ crates/plotnik-lib/src/graph/mod.rs | 25 + crates/plotnik-lib/src/ir/ids.rs | 3 + crates/plotnik-lib/src/ir/nav.rs | 12 + crates/plotnik-lib/src/ir/slice.rs | 21 +- crates/plotnik-lib/src/ir/slice_tests.rs | 5 +- crates/plotnik-lib/src/ir/transition.rs | 46 +- crates/plotnik-lib/src/ir/type_metadata.rs | 14 +- crates/plotnik-lib/src/lib.rs | 1 + crates/plotnik-lib/src/parser/ast.rs | 53 ++ crates/plotnik-lib/src/parser/mod.rs | 2 +- docs/adr/ADR-0004-query-ir-binary-format.md | 7 +- docs/adr/ADR-0005-transition-graph-format.md | 27 +- docs/adr/ADR-0006-dynamic-query-execution.md | 9 +- docs/adr/ADR-0007-type-metadata-format.md | 19 +- docs/adr/ADR-0009-type-system.md | 441 +++++++++++ 20 files changed, 2892 insertions(+), 54 deletions(-) create mode 100644 crates/plotnik-lib/src/graph/build.rs create mode 100644 crates/plotnik-lib/src/graph/build_tests.rs create mode 100644 crates/plotnik-lib/src/graph/construct.rs create mode 100644 crates/plotnik-lib/src/graph/construct_tests.rs create mode 100644 crates/plotnik-lib/src/graph/mod.rs create mode 100644 docs/adr/ADR-0009-type-system.md diff --git a/AGENTS.md b/AGENTS.md index 587080e8..cc5b3248 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -22,6 +22,7 @@ - [ADR-0006: Dynamic Query Execution](docs/adr/ADR-0006-dynamic-query-execution.md) - [ADR-0007: Type Metadata Format](docs/adr/ADR-0007-type-metadata-format.md) - [ADR-0008: Tree Navigation](docs/adr/ADR-0008-tree-navigation.md) + - [ADR-0009: Type System](docs/adr/ADR-0009-type-system.md) - **Template**: ```markdown diff --git a/crates/plotnik-lib/src/graph/build.rs b/crates/plotnik-lib/src/graph/build.rs new file mode 100644 index 00000000..d656f7d2 --- /dev/null +++ b/crates/plotnik-lib/src/graph/build.rs @@ -0,0 +1,588 @@ +//! Core types and construction for build-time query graphs. +//! +//! The graph uses index-based node references (`NodeId`) with nodes stored +//! in a `Vec`. Strings borrow from the source (`&'src str`) until IR emission. + +use crate::ir::Nav; +use indexmap::IndexMap; + +/// Index into `BuildGraph::nodes`. +pub type NodeId = u32; + +/// A graph fragment with single entry and exit points. +/// +/// Every expression compiles to a fragment. Combinators connect fragments +/// by manipulating entry/exit edges. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Fragment { + pub entry: NodeId, + pub exit: NodeId, +} + +impl Fragment { + pub fn new(entry: NodeId, exit: NodeId) -> Self { + Self { entry, exit } + } + + /// Single-node fragment where entry equals exit. + pub fn single(node: NodeId) -> Self { + Self { + entry: node, + exit: node, + } + } +} + +/// Build-time graph for query compilation. +/// +/// Nodes are stored in a flat vector, referenced by `NodeId`. +/// Definitions map names to their entry points. +#[derive(Debug)] +pub struct BuildGraph<'src> { + nodes: Vec>, + definitions: IndexMap<&'src str, NodeId>, +} + +impl<'src> BuildGraph<'src> { + pub fn new() -> Self { + Self { + nodes: Vec::new(), + definitions: IndexMap::new(), + } + } + + /// Add a node, returning its ID. + pub fn add_node(&mut self, node: BuildNode<'src>) -> NodeId { + let id = self.nodes.len() as NodeId; + self.nodes.push(node); + id + } + + /// Add an epsilon node (no matcher, no effects). + pub fn add_epsilon(&mut self) -> NodeId { + self.add_node(BuildNode::epsilon()) + } + + /// Add a matcher node. + pub fn add_matcher(&mut self, matcher: BuildMatcher<'src>) -> NodeId { + self.add_node(BuildNode::with_matcher(matcher)) + } + + /// Register a definition entry point. + pub fn add_definition(&mut self, name: &'src str, entry: NodeId) { + self.definitions.insert(name, entry); + } + + /// Get definition entry point by name. + pub fn definition(&self, name: &str) -> Option { + self.definitions.get(name).copied() + } + + /// Iterate over all definitions. + pub fn definitions(&self) -> impl Iterator + '_ { + self.definitions.iter().map(|(k, v)| (*k, *v)) + } + + /// Get node by ID. + pub fn node(&self, id: NodeId) -> &BuildNode<'src> { + &self.nodes[id as usize] + } + + /// Get mutable node by ID. + pub fn node_mut(&mut self, id: NodeId) -> &mut BuildNode<'src> { + &mut self.nodes[id as usize] + } + + /// Number of nodes in the graph. + pub fn len(&self) -> usize { + self.nodes.len() + } + + /// Returns true if graph has no nodes. + pub fn is_empty(&self) -> bool { + self.nodes.is_empty() + } + + /// Iterate over all nodes with their IDs. + pub fn iter(&self) -> impl Iterator)> { + self.nodes.iter().enumerate().map(|(i, n)| (i as NodeId, n)) + } + + /// Connect source node to target (add edge). + pub fn connect(&mut self, from: NodeId, to: NodeId) { + self.nodes[from as usize].successors.push(to); + } + + /// Connect a fragment's exit to another node. + pub fn connect_exit(&mut self, fragment: Fragment, to: NodeId) { + self.connect(fragment.exit, to); + } + + // ───────────────────────────────────────────────────────────────────── + // Fragment Combinators + // ───────────────────────────────────────────────────────────────────── + + /// Create a single-node fragment from a matcher. + pub fn matcher_fragment(&mut self, matcher: BuildMatcher<'src>) -> Fragment { + Fragment::single(self.add_matcher(matcher)) + } + + /// Create an epsilon fragment. + pub fn epsilon_fragment(&mut self) -> Fragment { + Fragment::single(self.add_epsilon()) + } + + /// Connect fragments in sequence: f1 → f2 → ... → fn + /// + /// Returns fragment spanning from first entry to last exit. + pub fn sequence(&mut self, fragments: &[Fragment]) -> Fragment { + match fragments.len() { + 0 => self.epsilon_fragment(), + 1 => fragments[0], + _ => { + for window in fragments.windows(2) { + self.connect(window[0].exit, window[1].entry); + } + Fragment::new(fragments[0].entry, fragments[fragments.len() - 1].exit) + } + } + } + + /// Connect fragments in parallel (alternation): entry → [f1|f2|...|fn] → exit + /// + /// Creates shared epsilon entry and exit nodes. + pub fn alternation(&mut self, fragments: &[Fragment]) -> Fragment { + if fragments.is_empty() { + return self.epsilon_fragment(); + } + if fragments.len() == 1 { + return fragments[0]; + } + + let entry = self.add_epsilon(); + let exit = self.add_epsilon(); + + for f in fragments { + self.connect(entry, f.entry); + self.connect(f.exit, exit); + } + + Fragment::new(entry, exit) + } + + /// Zero or more (greedy): inner* + /// + /// ```text + /// ┌──────────────┐ + /// ↓ │ + /// entry ─→ branch ─→ inner ─┘ + /// │ + /// └─→ exit + /// ``` + pub fn zero_or_more(&mut self, inner: Fragment) -> Fragment { + let branch = self.add_epsilon(); + let exit = self.add_epsilon(); + + // Greedy: try inner first + self.connect(branch, inner.entry); + self.connect(branch, exit); + self.connect(inner.exit, branch); + + Fragment::new(branch, exit) + } + + /// Zero or more (non-greedy): inner*? + pub fn zero_or_more_lazy(&mut self, inner: Fragment) -> Fragment { + let branch = self.add_epsilon(); + let exit = self.add_epsilon(); + + // Non-greedy: try exit first + self.connect(branch, exit); + self.connect(branch, inner.entry); + self.connect(inner.exit, branch); + + Fragment::new(branch, exit) + } + + /// One or more (greedy): inner+ + /// + /// ```text + /// ┌──────────────┐ + /// ↓ │ + /// entry ─→ inner ─→ branch ─┘ + /// │ + /// └─→ exit + /// ``` + pub fn one_or_more(&mut self, inner: Fragment) -> Fragment { + let branch = self.add_epsilon(); + let exit = self.add_epsilon(); + + self.connect(inner.exit, branch); + // Greedy: try inner first + self.connect(branch, inner.entry); + self.connect(branch, exit); + + Fragment::new(inner.entry, exit) + } + + /// One or more (non-greedy): inner+? + pub fn one_or_more_lazy(&mut self, inner: Fragment) -> Fragment { + let branch = self.add_epsilon(); + let exit = self.add_epsilon(); + + self.connect(inner.exit, branch); + // Non-greedy: try exit first + self.connect(branch, exit); + self.connect(branch, inner.entry); + + Fragment::new(inner.entry, exit) + } + + /// Optional (greedy): inner? + /// + /// ```text + /// entry ─→ branch ─→ inner ─→ exit + /// │ ↑ + /// └──────────────────┘ + /// ``` + pub fn optional(&mut self, inner: Fragment) -> Fragment { + let branch = self.add_epsilon(); + let exit = self.add_epsilon(); + + // Greedy: try inner first + self.connect(branch, inner.entry); + self.connect(branch, exit); + self.connect(inner.exit, exit); + + Fragment::new(branch, exit) + } + + /// Optional (non-greedy): inner?? + pub fn optional_lazy(&mut self, inner: Fragment) -> Fragment { + let branch = self.add_epsilon(); + let exit = self.add_epsilon(); + + // Non-greedy: try skip first + self.connect(branch, exit); + self.connect(branch, inner.entry); + self.connect(inner.exit, exit); + + Fragment::new(branch, exit) + } + + // ───────────────────────────────────────────────────────────────────── + // Array-Collecting Loop Combinators + // + // These place PushElement on the back-edge so each iteration pushes. + // ───────────────────────────────────────────────────────────────────── + + /// Zero or more with array collection (greedy): inner* + /// + /// ```text + /// StartArray → branch → inner → PushElement ─┐ + /// │ │ + /// └─→ EndArray ←──────────────┘ + /// ``` + pub fn zero_or_more_array(&mut self, inner: Fragment) -> Fragment { + let start = self.add_epsilon(); + self.node_mut(start).add_effect(BuildEffect::StartArray); + + let branch = self.add_epsilon(); + let push = self.add_epsilon(); + self.node_mut(push).add_effect(BuildEffect::PushElement); + + let end = self.add_epsilon(); + self.node_mut(end).add_effect(BuildEffect::EndArray); + + self.connect(start, branch); + // Greedy: try inner first + self.connect(branch, inner.entry); + self.connect(branch, end); + // Back-edge with push + self.connect(inner.exit, push); + self.connect(push, branch); + + Fragment::new(start, end) + } + + /// Zero or more with array collection (non-greedy): inner*? + pub fn zero_or_more_array_lazy(&mut self, inner: Fragment) -> Fragment { + let start = self.add_epsilon(); + self.node_mut(start).add_effect(BuildEffect::StartArray); + + let branch = self.add_epsilon(); + let push = self.add_epsilon(); + self.node_mut(push).add_effect(BuildEffect::PushElement); + + let end = self.add_epsilon(); + self.node_mut(end).add_effect(BuildEffect::EndArray); + + self.connect(start, branch); + // Non-greedy: try exit first + self.connect(branch, end); + self.connect(branch, inner.entry); + // Back-edge with push + self.connect(inner.exit, push); + self.connect(push, branch); + + Fragment::new(start, end) + } + + /// One or more with array collection (greedy): inner+ + /// + /// ```text + /// StartArray → inner → PushElement → branch ─┐ + /// │ │ + /// └─→ EndArray + /// ``` + pub fn one_or_more_array(&mut self, inner: Fragment) -> Fragment { + let start = self.add_epsilon(); + self.node_mut(start).add_effect(BuildEffect::StartArray); + + let push = self.add_epsilon(); + self.node_mut(push).add_effect(BuildEffect::PushElement); + + let branch = self.add_epsilon(); + + let end = self.add_epsilon(); + self.node_mut(end).add_effect(BuildEffect::EndArray); + + self.connect(start, inner.entry); + self.connect(inner.exit, push); + self.connect(push, branch); + // Greedy: try inner first + self.connect(branch, inner.entry); + self.connect(branch, end); + + Fragment::new(start, end) + } + + /// One or more with array collection (non-greedy): inner+? + pub fn one_or_more_array_lazy(&mut self, inner: Fragment) -> Fragment { + let start = self.add_epsilon(); + self.node_mut(start).add_effect(BuildEffect::StartArray); + + let push = self.add_epsilon(); + self.node_mut(push).add_effect(BuildEffect::PushElement); + + let branch = self.add_epsilon(); + + let end = self.add_epsilon(); + self.node_mut(end).add_effect(BuildEffect::EndArray); + + self.connect(start, inner.entry); + self.connect(inner.exit, push); + self.connect(push, branch); + // Non-greedy: try exit first + self.connect(branch, end); + self.connect(branch, inner.entry); + + Fragment::new(start, end) + } +} + +impl Default for BuildGraph<'_> { + fn default() -> Self { + Self::new() + } +} + +/// A node in the build graph. +#[derive(Debug, Clone)] +pub struct BuildNode<'src> { + pub matcher: BuildMatcher<'src>, + pub effects: Vec>, + pub ref_marker: RefMarker, + pub successors: Vec, + /// Navigation instruction for this transition (see ADR-0008). + pub nav: Nav, + /// Reference name for Enter nodes (resolved during linking). + pub ref_name: Option<&'src str>, +} + +impl<'src> BuildNode<'src> { + /// Create an epsilon node (pass-through, no match). + pub fn epsilon() -> Self { + Self { + matcher: BuildMatcher::Epsilon, + effects: Vec::new(), + ref_marker: RefMarker::None, + successors: Vec::new(), + nav: Nav::stay(), + ref_name: None, + } + } + + /// Create a node with a matcher. + pub fn with_matcher(matcher: BuildMatcher<'src>) -> Self { + Self { + matcher, + effects: Vec::new(), + ref_marker: RefMarker::None, + successors: Vec::new(), + nav: Nav::stay(), + ref_name: None, + } + } + + /// Add an effect to this node. + pub fn add_effect(&mut self, effect: BuildEffect<'src>) { + self.effects.push(effect); + } + + /// Set the ref marker. + pub fn set_ref_marker(&mut self, marker: RefMarker) { + self.ref_marker = marker; + } + + /// Set the navigation instruction. + pub fn set_nav(&mut self, nav: Nav) { + self.nav = nav; + } + + /// Returns true if this is an epsilon node. + pub fn is_epsilon(&self) -> bool { + matches!(self.matcher, BuildMatcher::Epsilon) + } +} + +/// What a transition matches. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum BuildMatcher<'src> { + /// Matches without consuming input. Control flow only. + Epsilon, + + /// Matches a named node by kind. + Node { + kind: &'src str, + field: Option<&'src str>, + negated_fields: Vec<&'src str>, + }, + + /// Matches an anonymous node (string literal). + Anonymous { + literal: &'src str, + field: Option<&'src str>, + }, + + /// Matches any node. + Wildcard { field: Option<&'src str> }, +} + +impl<'src> BuildMatcher<'src> { + pub fn node(kind: &'src str) -> Self { + Self::Node { + kind, + field: None, + negated_fields: Vec::new(), + } + } + + pub fn anonymous(literal: &'src str) -> Self { + Self::Anonymous { + literal, + field: None, + } + } + + pub fn wildcard() -> Self { + Self::Wildcard { field: None } + } + + /// Set field constraint. + pub fn with_field(mut self, field: &'src str) -> Self { + match &mut self { + BuildMatcher::Node { field: f, .. } => *f = Some(field), + BuildMatcher::Anonymous { field: f, .. } => *f = Some(field), + BuildMatcher::Wildcard { field: f } => *f = Some(field), + BuildMatcher::Epsilon => {} + } + self + } + + /// Add negated field (Node matcher only). + pub fn with_negated_field(mut self, field: &'src str) -> Self { + if let BuildMatcher::Node { negated_fields, .. } = &mut self { + negated_fields.push(field); + } + self + } +} + +/// Effect operations recorded during graph construction. +/// +/// These mirror `ir::EffectOp` but use borrowed strings. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum BuildEffect<'src> { + /// Store matched node as current value. + CaptureNode, + + /// Push empty array onto stack. + StartArray, + + /// Move current value into top array. + PushElement, + + /// Pop array from stack into current. + EndArray, + + /// Push empty object onto stack. + StartObject, + + /// Pop object from stack into current. + EndObject, + + /// Move current value into top object at field. + Field(&'src str), + + /// Push variant container with tag onto stack. + StartVariant(&'src str), + + /// Pop variant, wrap current, set as current. + EndVariant, + + /// Replace current Node with its source text. + ToString, +} + +/// Marker for definition call/return transitions. +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub enum RefMarker { + /// Not a reference transition. + #[default] + None, + + /// Enter a definition call. Stores return points for Exit. + Enter { + /// Index identifying this ref (for matching Enter/Exit pairs). + ref_id: u32, + }, + + /// Exit a definition call. Returns to points stored at Enter. + Exit { + /// Must match corresponding Enter's ref_id. + ref_id: u32, + }, +} + +impl RefMarker { + pub fn enter(ref_id: u32) -> Self { + Self::Enter { ref_id } + } + + pub fn exit(ref_id: u32) -> Self { + Self::Exit { ref_id } + } + + pub fn is_none(&self) -> bool { + matches!(self, RefMarker::None) + } + + pub fn is_enter(&self) -> bool { + matches!(self, RefMarker::Enter { .. }) + } + + pub fn is_exit(&self) -> bool { + matches!(self, RefMarker::Exit { .. }) + } +} diff --git a/crates/plotnik-lib/src/graph/build_tests.rs b/crates/plotnik-lib/src/graph/build_tests.rs new file mode 100644 index 00000000..b5e524a1 --- /dev/null +++ b/crates/plotnik-lib/src/graph/build_tests.rs @@ -0,0 +1,440 @@ +//! Tests for BuildGraph construction and fragment combinators. + +use super::*; + +fn dump_graph(graph: &BuildGraph) -> String { + let mut out = String::new(); + + for (name, entry) in graph.definitions() { + out.push_str(&format!("{} = N{}\n", name, entry)); + } + if graph.definitions().next().is_some() { + out.push('\n'); + } + + for (id, node) in graph.iter() { + out.push_str(&format!("N{}: ", id)); + + // Matcher + match &node.matcher { + BuildMatcher::Epsilon => out.push('ε'), + BuildMatcher::Node { + kind, + field, + negated_fields, + } => { + out.push_str(&format!("({})", kind)); + if let Some(f) = field { + out.push_str(&format!(" @{}", f)); + } + for neg in negated_fields { + out.push_str(&format!(" !{}", neg)); + } + } + BuildMatcher::Anonymous { literal, field } => { + out.push_str(&format!("\"{}\"", literal)); + if let Some(f) = field { + out.push_str(&format!(" @{}", f)); + } + } + BuildMatcher::Wildcard { field } => { + out.push('_'); + if let Some(f) = field { + out.push_str(&format!(" @{}", f)); + } + } + } + + // Ref marker + match &node.ref_marker { + RefMarker::None => {} + RefMarker::Enter { ref_id } => out.push_str(&format!(" +Enter({})", ref_id)), + RefMarker::Exit { ref_id } => out.push_str(&format!(" +Exit({})", ref_id)), + } + + // Effects + for effect in &node.effects { + let eff = match effect { + BuildEffect::CaptureNode => "Capture".to_string(), + BuildEffect::StartArray => "StartArray".to_string(), + BuildEffect::PushElement => "Push".to_string(), + BuildEffect::EndArray => "EndArray".to_string(), + BuildEffect::StartObject => "StartObj".to_string(), + BuildEffect::EndObject => "EndObj".to_string(), + BuildEffect::Field(f) => format!("Field({})", f), + BuildEffect::StartVariant(v) => format!("Variant({})", v), + BuildEffect::EndVariant => "EndVariant".to_string(), + BuildEffect::ToString => "ToString".to_string(), + }; + out.push_str(&format!(" [{}]", eff)); + } + + // Successors + if node.successors.is_empty() { + out.push_str(" → ∅"); + } else { + out.push_str(" → "); + let succs: Vec<_> = node.successors.iter().map(|s| format!("N{}", s)).collect(); + out.push_str(&succs.join(", ")); + } + + out.push('\n'); + } + + out +} + +#[test] +fn single_matcher() { + let mut g = BuildGraph::new(); + + let frag = g.matcher_fragment(BuildMatcher::node("identifier")); + + assert_eq!(frag.entry, frag.exit); + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (identifier) → ∅ + "#); +} + +#[test] +fn epsilon_fragment() { + let mut g = BuildGraph::new(); + + let frag = g.epsilon_fragment(); + + assert_eq!(frag.entry, frag.exit); + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: ε → ∅ + "#); +} + +#[test] +fn sequence_empty() { + let mut g = BuildGraph::new(); + + let frag = g.sequence(&[]); + + assert_eq!(frag.entry, frag.exit); + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: ε → ∅ + "#); +} + +#[test] +fn sequence_single() { + let mut g = BuildGraph::new(); + let f1 = g.matcher_fragment(BuildMatcher::node("identifier")); + + let frag = g.sequence(&[f1]); + + assert_eq!(frag, f1); + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (identifier) → ∅ + "#); +} + +#[test] +fn sequence_two() { + let mut g = BuildGraph::new(); + let f1 = g.matcher_fragment(BuildMatcher::node("identifier")); + let f2 = g.matcher_fragment(BuildMatcher::node("number")); + + let frag = g.sequence(&[f1, f2]); + + assert_eq!(frag.entry, f1.entry); + assert_eq!(frag.exit, f2.exit); + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (identifier) → N1 + N1: (number) → ∅ + "#); +} + +#[test] +fn sequence_three() { + let mut g = BuildGraph::new(); + let f1 = g.matcher_fragment(BuildMatcher::node("a")); + let f2 = g.matcher_fragment(BuildMatcher::node("b")); + let f3 = g.matcher_fragment(BuildMatcher::node("c")); + + let frag = g.sequence(&[f1, f2, f3]); + + assert_eq!(frag.entry, f1.entry); + assert_eq!(frag.exit, f3.exit); + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (a) → N1 + N1: (b) → N2 + N2: (c) → ∅ + "#); +} + +#[test] +fn alternation_empty() { + let mut g = BuildGraph::new(); + + let frag = g.alternation(&[]); + + assert_eq!(frag.entry, frag.exit); + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: ε → ∅ + "#); +} + +#[test] +fn alternation_single() { + let mut g = BuildGraph::new(); + let f1 = g.matcher_fragment(BuildMatcher::node("identifier")); + + let frag = g.alternation(&[f1]); + + assert_eq!(frag, f1); + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (identifier) → ∅ + "#); +} + +#[test] +fn alternation_two() { + let mut g = BuildGraph::new(); + let f1 = g.matcher_fragment(BuildMatcher::node("identifier")); + let f2 = g.matcher_fragment(BuildMatcher::node("number")); + + let frag = g.alternation(&[f1, f2]); + + // Entry connects to both branches, both branches connect to exit + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (identifier) → N3 + N1: (number) → N3 + N2: ε → N0, N1 + N3: ε → ∅ + "#); + assert_eq!(frag.entry, 2); + assert_eq!(frag.exit, 3); +} + +#[test] +fn zero_or_more_greedy() { + let mut g = BuildGraph::new(); + let inner = g.matcher_fragment(BuildMatcher::node("item")); + + let frag = g.zero_or_more(inner); + + // Greedy: branch tries inner first, then exit + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (item) → N1 + N1: ε → N0, N2 + N2: ε → ∅ + "#); + assert_eq!(frag.entry, 1); // branch node + assert_eq!(frag.exit, 2); +} + +#[test] +fn zero_or_more_lazy() { + let mut g = BuildGraph::new(); + let inner = g.matcher_fragment(BuildMatcher::node("item")); + + let frag = g.zero_or_more_lazy(inner); + + // Non-greedy: branch tries exit first, then inner + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (item) → N1 + N1: ε → N2, N0 + N2: ε → ∅ + "#); + assert_eq!(frag.entry, 1); + assert_eq!(frag.exit, 2); +} + +#[test] +fn one_or_more_greedy() { + let mut g = BuildGraph::new(); + let inner = g.matcher_fragment(BuildMatcher::node("item")); + + let frag = g.one_or_more(inner); + + // Entry is inner, greedy branch after + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (item) → N1 + N1: ε → N0, N2 + N2: ε → ∅ + "#); + assert_eq!(frag.entry, 0); // inner node + assert_eq!(frag.exit, 2); +} + +#[test] +fn one_or_more_lazy() { + let mut g = BuildGraph::new(); + let inner = g.matcher_fragment(BuildMatcher::node("item")); + + let frag = g.one_or_more_lazy(inner); + + // Entry is inner, non-greedy branch after + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (item) → N1 + N1: ε → N2, N0 + N2: ε → ∅ + "#); + assert_eq!(frag.entry, 0); + assert_eq!(frag.exit, 2); +} + +#[test] +fn optional_greedy() { + let mut g = BuildGraph::new(); + let inner = g.matcher_fragment(BuildMatcher::node("item")); + + let frag = g.optional(inner); + + // Greedy: branch tries inner first + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (item) → N2 + N1: ε → N0, N2 + N2: ε → ∅ + "#); + assert_eq!(frag.entry, 1); + assert_eq!(frag.exit, 2); +} + +#[test] +fn optional_lazy() { + let mut g = BuildGraph::new(); + let inner = g.matcher_fragment(BuildMatcher::node("item")); + + let frag = g.optional_lazy(inner); + + // Non-greedy: branch skips first + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (item) → N2 + N1: ε → N2, N0 + N2: ε → ∅ + "#); + assert_eq!(frag.entry, 1); + assert_eq!(frag.exit, 2); +} + +#[test] +fn matcher_with_field() { + let mut g = BuildGraph::new(); + + g.matcher_fragment(BuildMatcher::node("identifier").with_field("name")); + + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (identifier) @name → ∅ + "#); +} + +#[test] +fn matcher_with_negated_fields() { + let mut g = BuildGraph::new(); + + g.matcher_fragment( + BuildMatcher::node("call") + .with_negated_field("arguments") + .with_negated_field("type_arguments"), + ); + + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (call) !arguments !type_arguments → ∅ + "#); +} + +#[test] +fn anonymous_matcher() { + let mut g = BuildGraph::new(); + + g.matcher_fragment(BuildMatcher::anonymous("+")); + + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: "+" → ∅ + "#); +} + +#[test] +fn wildcard_matcher() { + let mut g = BuildGraph::new(); + + g.matcher_fragment(BuildMatcher::wildcard()); + + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: _ → ∅ + "#); +} + +#[test] +fn node_with_effects() { + let mut g = BuildGraph::new(); + let id = g.add_matcher(BuildMatcher::node("identifier")); + g.node_mut(id).add_effect(BuildEffect::CaptureNode); + g.node_mut(id).add_effect(BuildEffect::Field("name")); + + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (identifier) [Capture] [Field(name)] → ∅ + "#); +} + +#[test] +fn node_with_ref_marker() { + let mut g = BuildGraph::new(); + let enter = g.add_epsilon(); + g.node_mut(enter).set_ref_marker(RefMarker::enter(0)); + + let exit = g.add_epsilon(); + g.node_mut(exit).set_ref_marker(RefMarker::exit(0)); + + g.connect(enter, exit); + + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: ε +Enter(0) → N1 + N1: ε +Exit(0) → ∅ + "#); +} + +#[test] +fn definition_registration() { + let mut g = BuildGraph::new(); + let f1 = g.matcher_fragment(BuildMatcher::node("identifier")); + g.add_definition("Ident", f1.entry); + + let f2 = g.matcher_fragment(BuildMatcher::node("number")); + g.add_definition("Num", f2.entry); + + assert_eq!(g.definition("Ident"), Some(0)); + assert_eq!(g.definition("Num"), Some(1)); + assert_eq!(g.definition("Unknown"), None); + + insta::assert_snapshot!(dump_graph(&g), @r#" + Ident = N0 + Num = N1 + + N0: (identifier) → ∅ + N1: (number) → ∅ + "#); +} + +#[test] +fn complex_nested_structure() { + let mut g = BuildGraph::new(); + + // Build: (func { (identifier)+ (block) }) + let ident = g.matcher_fragment(BuildMatcher::node("identifier")); + let idents = g.one_or_more(ident); + + let block = g.matcher_fragment(BuildMatcher::node("block")); + let body = g.sequence(&[idents, block]); + + let func = g.matcher_fragment(BuildMatcher::node("func")); + g.connect_exit(func, body.entry); + + g.add_definition("Func", func.entry); + + insta::assert_snapshot!(dump_graph(&g), @r#" + Func = N4 + + N0: (identifier) → N1 + N1: ε → N0, N2 + N2: ε → N3 + N3: (block) → ∅ + N4: (func) → N0 + "#); +} diff --git a/crates/plotnik-lib/src/graph/construct.rs b/crates/plotnik-lib/src/graph/construct.rs new file mode 100644 index 00000000..cf9454e5 --- /dev/null +++ b/crates/plotnik-lib/src/graph/construct.rs @@ -0,0 +1,546 @@ +//! AST-to-graph construction. +//! +//! Translates the parsed and analyzed AST into a `BuildGraph`. +//! This is the bridge between `parser::ast` and `graph::BuildGraph`. + +use crate::ir::Nav; +use crate::parser::{ + AltExpr, AltKind, AnonymousNode, Branch, CapturedExpr, Def, Expr, FieldExpr, NamedNode, + NegatedField, QuantifiedExpr, Ref, Root, SeqExpr, SeqItem, SyntaxKind, token_src, +}; + +use super::{BuildEffect, BuildGraph, BuildMatcher, Fragment, NodeId, RefMarker}; + +/// Constructs a `BuildGraph` from a parsed query AST. +pub struct GraphConstructor<'src> { + source: &'src str, + graph: BuildGraph<'src>, + next_ref_id: u32, +} + +/// Context for navigation determination. +#[derive(Debug, Clone, Copy)] +enum NavContext { + /// First expression at definition root level. + Root, + /// First child inside a parent node. + FirstChild { anchored: bool }, + /// Sibling after previous expression. + Sibling { anchored: bool }, +} + +impl NavContext { + /// Determine the Nav based on context and expression type. + fn to_nav(self, is_anonymous: bool) -> Nav { + match self { + NavContext::Root => Nav::stay(), + NavContext::FirstChild { anchored: false } => Nav::down(), + NavContext::FirstChild { anchored: true } => { + if is_anonymous { + Nav::down_exact() + } else { + Nav::down_skip_trivia() + } + } + NavContext::Sibling { anchored: false } => Nav::next(), + NavContext::Sibling { anchored: true } => { + if is_anonymous { + Nav::next_exact() + } else { + Nav::next_skip_trivia() + } + } + } + } +} + +/// Tracks trailing anchor state for Up navigation. +#[derive(Debug, Clone, Copy)] +struct ExitContext { + /// Whether there's a trailing anchor before exit. + has_trailing_anchor: bool, + /// Whether the last expression was anonymous (for Exact vs SkipTrivia). + last_was_anonymous: bool, +} + +impl ExitContext { + fn to_up_nav(self, level: u8) -> Nav { + if !self.has_trailing_anchor { + Nav::up(level) + } else if self.last_was_anonymous { + Nav::up_exact(level) + } else { + Nav::up_skip_trivia(level) + } + } +} + +impl<'src> GraphConstructor<'src> { + pub fn new(source: &'src str) -> Self { + Self { + source, + graph: BuildGraph::new(), + next_ref_id: 0, + } + } + + /// Construct graph from a parsed Root AST. + pub fn construct(mut self, root: &Root) -> BuildGraph<'src> { + for def in root.defs() { + self.construct_def(&def); + } + self.link_references(); + self.graph + } + + /// Link Enter nodes to their definition entry points. + /// + /// Per ADR-0005, Enter's successors should be: + /// - successors[0]: definition entry point + /// - successors[1..]: return transitions (the Exit node's successor) + fn link_references(&mut self) { + // Collect all Enter nodes with their ref_name and corresponding Exit successors + let mut links: Vec<(NodeId, &'src str, Vec)> = Vec::new(); + + for (id, node) in self.graph.iter() { + if let RefMarker::Enter { ref_id } = &node.ref_marker { + if let Some(name) = node.ref_name { + // Find the corresponding Exit node and its successors + let exit_successors = self.find_exit_successors(*ref_id); + links.push((id, name, exit_successors)); + } + } + } + + // Apply links + for (enter_id, name, return_transitions) in links { + if let Some(def_entry) = self.graph.definition(name) { + // Connect Enter to definition entry point + self.graph.connect(enter_id, def_entry); + // Add return transitions + for ret in return_transitions { + self.graph.connect(enter_id, ret); + } + } + } + } + + /// Find successors of the Exit node matching the given ref_id. + fn find_exit_successors(&self, ref_id: u32) -> Vec { + for (_, node) in self.graph.iter() { + if let RefMarker::Exit { ref_id: exit_id } = &node.ref_marker { + if *exit_id == ref_id { + return node.successors.clone(); + } + } + } + Vec::new() + } + + fn construct_def(&mut self, def: &Def) { + let Some(name_token) = def.name() else { + return; + }; + let Some(body) = def.body() else { + return; + }; + + let name = token_src(&name_token, self.source); + let fragment = self.construct_expr(&body, NavContext::Root); + self.graph.add_definition(name, fragment.entry); + } + + fn construct_expr(&mut self, expr: &Expr, ctx: NavContext) -> Fragment { + match expr { + Expr::NamedNode(node) => self.construct_named_node(node, ctx), + Expr::AnonymousNode(node) => self.construct_anonymous_node(node, ctx), + Expr::Ref(r) => self.construct_ref(r, ctx), + Expr::AltExpr(alt) => self.construct_alt(alt, ctx), + Expr::SeqExpr(seq) => self.construct_seq(seq, ctx), + Expr::CapturedExpr(cap) => self.construct_capture(cap, ctx), + Expr::QuantifiedExpr(quant) => self.construct_quantifier(quant, ctx), + Expr::FieldExpr(field) => self.construct_field(field, ctx), + } + } + + fn construct_named_node(&mut self, node: &NamedNode, ctx: NavContext) -> Fragment { + let matcher = self.build_named_matcher(node); + let nav = ctx.to_nav(false); + let node_id = self.graph.add_matcher(matcher); + self.graph.node_mut(node_id).set_nav(nav); + + // Process children with anchor tracking + let items: Vec<_> = node.items().collect(); + if items.is_empty() { + return Fragment::single(node_id); + } + + let (child_fragments, exit_ctx) = self.construct_item_sequence(&items, true); + if child_fragments.is_empty() { + return Fragment::single(node_id); + } + + let inner = self.graph.sequence(&child_fragments); + self.graph.connect(node_id, inner.entry); + + // Add exit transition with appropriate Up nav + let exit_id = self.graph.add_epsilon(); + self.graph.node_mut(exit_id).set_nav(exit_ctx.to_up_nav(1)); + self.graph.connect(inner.exit, exit_id); + + Fragment::new(node_id, exit_id) + } + + /// Construct a sequence of items (expressions and anchors). + /// Returns fragments and exit context for trailing anchor handling. + fn construct_item_sequence( + &mut self, + items: &[SeqItem], + is_children: bool, + ) -> (Vec, ExitContext) { + let mut fragments = Vec::new(); + let mut pending_anchor = false; + let mut last_was_anonymous = false; + let mut is_first = true; + + for item in items { + match item { + SeqItem::Anchor(_) => { + pending_anchor = true; + } + SeqItem::Expr(expr) => { + let ctx = if is_first { + is_first = false; + if is_children { + NavContext::FirstChild { + anchored: pending_anchor, + } + } else { + // For sequences at root level, first item inherits parent context + NavContext::Sibling { + anchored: pending_anchor, + } + } + } else { + NavContext::Sibling { + anchored: pending_anchor, + } + }; + + last_was_anonymous = is_anonymous_expr(expr); + let frag = self.construct_expr(expr, ctx); + fragments.push(frag); + pending_anchor = false; + } + } + } + + let exit_ctx = ExitContext { + has_trailing_anchor: pending_anchor, + last_was_anonymous, + }; + + (fragments, exit_ctx) + } + + fn build_named_matcher(&mut self, node: &NamedNode) -> BuildMatcher<'src> { + let kind = node + .node_type() + .map(|t| token_src(&t, self.source)) + .unwrap_or("_"); + + let negated_fields: Vec<&'src str> = node + .as_cst() + .children() + .filter_map(NegatedField::cast) + .filter_map(|nf| nf.name()) + .map(|t| token_src(&t, self.source)) + .collect(); + + let field = self.find_field_constraint(node.as_cst()); + + if node.is_any() { + BuildMatcher::Wildcard { field } + } else { + BuildMatcher::Node { + kind, + field, + negated_fields, + } + } + } + + fn construct_anonymous_node(&mut self, node: &AnonymousNode, ctx: NavContext) -> Fragment { + let field = self.find_field_constraint(node.as_cst()); + let nav = ctx.to_nav(true); + + let matcher = if node.is_any() { + BuildMatcher::Wildcard { field } + } else { + let literal = node + .value() + .map(|t| token_src(&t, self.source)) + .unwrap_or(""); + BuildMatcher::Anonymous { literal, field } + }; + + let node_id = self.graph.add_matcher(matcher); + self.graph.node_mut(node_id).set_nav(nav); + Fragment::single(node_id) + } + + fn construct_ref(&mut self, r: &Ref, ctx: NavContext) -> Fragment { + let Some(name_token) = r.name() else { + return self.graph.epsilon_fragment(); + }; + + let ref_id = self.next_ref_id; + self.next_ref_id += 1; + + // Create Enter node with navigation from context + let enter_id = self.graph.add_epsilon(); + let nav = ctx.to_nav(false); + self.graph.node_mut(enter_id).set_nav(nav); + self.graph + .node_mut(enter_id) + .set_ref_marker(RefMarker::enter(ref_id)); + + // Create Exit node (nav will be set during linking based on definition structure) + let exit_id = self.graph.add_epsilon(); + self.graph + .node_mut(exit_id) + .set_ref_marker(RefMarker::exit(ref_id)); + + // Store ref name for later resolution + let name = token_src(&name_token, self.source); + self.graph.node_mut(enter_id).ref_name = Some(name); + + Fragment::new(enter_id, exit_id) + } + + fn construct_alt(&mut self, alt: &AltExpr, ctx: NavContext) -> Fragment { + match alt.kind() { + AltKind::Tagged => self.construct_tagged_alt(alt, ctx), + AltKind::Untagged | AltKind::Mixed => self.construct_untagged_alt(alt, ctx), + } + } + + fn construct_tagged_alt(&mut self, alt: &AltExpr, ctx: NavContext) -> Fragment { + let branches: Vec<_> = alt.branches().collect(); + if branches.is_empty() { + return self.graph.epsilon_fragment(); + } + + // Branch node inherits context nav + let branch_id = self.graph.add_epsilon(); + self.graph.node_mut(branch_id).set_nav(ctx.to_nav(false)); + + let exit_id = self.graph.add_epsilon(); + + for branch in &branches { + let frag = self.construct_tagged_branch(branch); + self.graph.connect(branch_id, frag.entry); + self.graph.connect(frag.exit, exit_id); + } + + Fragment::new(branch_id, exit_id) + } + + fn construct_tagged_branch(&mut self, branch: &Branch) -> Fragment { + let Some(label_token) = branch.label() else { + return branch + .body() + .map(|b| self.construct_expr(&b, NavContext::Root)) + .unwrap_or_else(|| self.graph.epsilon_fragment()); + }; + let Some(body) = branch.body() else { + return self.graph.epsilon_fragment(); + }; + + let label = token_src(&label_token, self.source); + + // StartVariant (epsilon, no nav change) + let start_id = self.graph.add_epsilon(); + self.graph + .node_mut(start_id) + .add_effect(BuildEffect::StartVariant(label)); + + // Body inherits root context (alternation resets nav context) + let body_frag = self.construct_expr(&body, NavContext::Root); + + let end_id = self.graph.add_epsilon(); + self.graph + .node_mut(end_id) + .add_effect(BuildEffect::EndVariant); + + self.graph.connect(start_id, body_frag.entry); + self.graph.connect(body_frag.exit, end_id); + + Fragment::new(start_id, end_id) + } + + fn construct_untagged_alt(&mut self, alt: &AltExpr, ctx: NavContext) -> Fragment { + let branches: Vec<_> = alt.branches().filter_map(|b| b.body()).collect(); + + if branches.is_empty() { + return self.graph.epsilon_fragment(); + } + + // Branch node inherits context nav + let branch_id = self.graph.add_epsilon(); + self.graph.node_mut(branch_id).set_nav(ctx.to_nav(false)); + + let exit_id = self.graph.add_epsilon(); + + for body in &branches { + // Each branch resets to root context + let frag = self.construct_expr(body, NavContext::Root); + self.graph.connect(branch_id, frag.entry); + self.graph.connect(frag.exit, exit_id); + } + + Fragment::new(branch_id, exit_id) + } + + fn construct_seq(&mut self, seq: &SeqExpr, ctx: NavContext) -> Fragment { + let items: Vec<_> = seq.items().collect(); + + // Wrap sequence in StartObject/EndObject + let start_id = self.graph.add_epsilon(); + self.graph.node_mut(start_id).set_nav(ctx.to_nav(false)); + self.graph + .node_mut(start_id) + .add_effect(BuildEffect::StartObject); + + let (child_fragments, _exit_ctx) = self.construct_item_sequence(&items, false); + let inner = self.graph.sequence(&child_fragments); + + let end_id = self.graph.add_epsilon(); + self.graph + .node_mut(end_id) + .add_effect(BuildEffect::EndObject); + + self.graph.connect(start_id, inner.entry); + self.graph.connect(inner.exit, end_id); + + Fragment::new(start_id, end_id) + } + + fn construct_capture(&mut self, cap: &CapturedExpr, ctx: NavContext) -> Fragment { + let Some(inner_expr) = cap.inner() else { + return self.graph.epsilon_fragment(); + }; + + let inner_frag = self.construct_expr(&inner_expr, ctx); + + let capture_name = cap.name().map(|t| token_src(&t, self.source)); + + let has_to_string = cap + .type_annotation() + .and_then(|t| t.name()) + .map(|n| n.text() == "string") + .unwrap_or(false); + + // Attach CaptureNode to all reachable matchers + let matchers = self.find_all_matchers(inner_frag.entry); + for matcher_id in matchers { + self.graph + .node_mut(matcher_id) + .add_effect(BuildEffect::CaptureNode); + + if has_to_string { + self.graph + .node_mut(matcher_id) + .add_effect(BuildEffect::ToString); + } + } + + // Add Field effect at exit + if let Some(name) = capture_name { + let field_id = self.graph.add_epsilon(); + self.graph + .node_mut(field_id) + .add_effect(BuildEffect::Field(name)); + self.graph.connect(inner_frag.exit, field_id); + Fragment::new(inner_frag.entry, field_id) + } else { + inner_frag + } + } + + fn construct_quantifier(&mut self, quant: &QuantifiedExpr, ctx: NavContext) -> Fragment { + let Some(inner_expr) = quant.inner() else { + return self.graph.epsilon_fragment(); + }; + let Some(op) = quant.operator() else { + return self.construct_expr(&inner_expr, ctx); + }; + + // First iteration uses parent context, subsequent use Sibling + let inner_frag = self.construct_expr(&inner_expr, ctx); + + match op.kind() { + SyntaxKind::Star => self.graph.zero_or_more_array(inner_frag), + SyntaxKind::StarQuestion => self.graph.zero_or_more_array_lazy(inner_frag), + SyntaxKind::Plus => self.graph.one_or_more_array(inner_frag), + SyntaxKind::PlusQuestion => self.graph.one_or_more_array_lazy(inner_frag), + SyntaxKind::Question => self.graph.optional(inner_frag), + SyntaxKind::QuestionQuestion => self.graph.optional_lazy(inner_frag), + _ => inner_frag, + } + } + + fn construct_field(&mut self, field: &FieldExpr, ctx: NavContext) -> Fragment { + let Some(value_expr) = field.value() else { + return self.graph.epsilon_fragment(); + }; + self.construct_expr(&value_expr, ctx) + } + + /// Find field constraint from parent FieldExpr. + fn find_field_constraint(&self, node: &crate::parser::SyntaxNode) -> Option<&'src str> { + let parent = node.parent()?; + let field_expr = FieldExpr::cast(parent)?; + let name_token = field_expr.name()?; + Some(token_src(&name_token, self.source)) + } + + /// Find all non-epsilon matcher nodes reachable from start. + fn find_all_matchers(&self, start: NodeId) -> Vec { + let mut result = Vec::new(); + let mut visited = std::collections::HashSet::new(); + self.collect_matchers(start, &mut result, &mut visited); + result + } + + fn collect_matchers( + &self, + node_id: NodeId, + result: &mut Vec, + visited: &mut std::collections::HashSet, + ) { + if !visited.insert(node_id) { + return; + } + + let node = self.graph.node(node_id); + if !node.is_epsilon() { + result.push(node_id); + return; + } + + for &succ in &node.successors { + self.collect_matchers(succ, result, visited); + } + } +} + +/// Returns true if expression is an anonymous node (string literal). +fn is_anonymous_expr(expr: &Expr) -> bool { + matches!(expr, Expr::AnonymousNode(n) if !n.is_any()) +} + +/// Convenience function to construct a graph from source and AST. +pub fn construct_graph<'src>(source: &'src str, root: &Root) -> BuildGraph<'src> { + GraphConstructor::new(source).construct(root) +} diff --git a/crates/plotnik-lib/src/graph/construct_tests.rs b/crates/plotnik-lib/src/graph/construct_tests.rs new file mode 100644 index 00000000..17f1bc5c --- /dev/null +++ b/crates/plotnik-lib/src/graph/construct_tests.rs @@ -0,0 +1,686 @@ +//! Tests for AST-to-graph construction. + +use crate::graph::{BuildEffect, BuildGraph, BuildMatcher, RefMarker}; +use crate::ir::{Nav, NavKind}; +use crate::parser::Parser; +use crate::parser::lexer::lex; + +use super::construct_graph; + +fn parse_and_construct(source: &str) -> BuildGraph<'_> { + let tokens = lex(source); + let parser = Parser::new(source, tokens); + let result = parser.parse().expect("parse should succeed"); + construct_graph(source, &result.root) +} + +fn dump_graph(graph: &BuildGraph) -> String { + let mut out = String::new(); + + for (name, entry) in graph.definitions() { + out.push_str(&format!("{} = N{}\n", name, entry)); + } + if graph.definitions().next().is_some() { + out.push('\n'); + } + + for (id, node) in graph.iter() { + out.push_str(&format!("N{}: ", id)); + + // Nav (skip Stay as it's the default) + if !node.nav.is_stay() { + let nav_str = format_nav(&node.nav); + out.push_str(&format!("[{}] ", nav_str)); + } + + // Matcher + match &node.matcher { + BuildMatcher::Epsilon => out.push('ε'), + BuildMatcher::Node { + kind, + field, + negated_fields, + } => { + out.push_str(&format!("({})", kind)); + if let Some(f) = field { + out.push_str(&format!(" @{}", f)); + } + for neg in negated_fields { + out.push_str(&format!(" !{}", neg)); + } + } + BuildMatcher::Anonymous { literal, field } => { + out.push_str(&format!("\"{}\"", literal)); + if let Some(f) = field { + out.push_str(&format!(" @{}", f)); + } + } + BuildMatcher::Wildcard { field } => { + out.push('_'); + if let Some(f) = field { + out.push_str(&format!(" @{}", f)); + } + } + } + + // Ref marker + match &node.ref_marker { + RefMarker::None => {} + RefMarker::Enter { ref_id } => { + let name = node.ref_name.unwrap_or("?"); + out.push_str(&format!(" +Enter({}, {})", ref_id, name)); + } + RefMarker::Exit { ref_id } => out.push_str(&format!(" +Exit({})", ref_id)), + } + + // Effects + for effect in &node.effects { + let eff = match effect { + BuildEffect::CaptureNode => "Capture".to_string(), + BuildEffect::StartArray => "StartArray".to_string(), + BuildEffect::PushElement => "Push".to_string(), + BuildEffect::EndArray => "EndArray".to_string(), + BuildEffect::StartObject => "StartObj".to_string(), + BuildEffect::EndObject => "EndObj".to_string(), + BuildEffect::Field(f) => format!("Field({})", f), + BuildEffect::StartVariant(v) => format!("Variant({})", v), + BuildEffect::EndVariant => "EndVariant".to_string(), + BuildEffect::ToString => "ToString".to_string(), + }; + out.push_str(&format!(" [{}]", eff)); + } + + // Successors + if node.successors.is_empty() { + out.push_str(" → ∅"); + } else { + out.push_str(" → "); + let succs: Vec<_> = node.successors.iter().map(|s| format!("N{}", s)).collect(); + out.push_str(&succs.join(", ")); + } + + out.push('\n'); + } + + out +} + +fn format_nav(nav: &Nav) -> String { + match nav.kind { + NavKind::Stay => "Stay".to_string(), + NavKind::Next => "Next".to_string(), + NavKind::NextSkipTrivia => "Next.".to_string(), + NavKind::NextExact => "Next!".to_string(), + NavKind::Down => "Down".to_string(), + NavKind::DownSkipTrivia => "Down.".to_string(), + NavKind::DownExact => "Down!".to_string(), + NavKind::Up => format!("Up({})", nav.level), + NavKind::UpSkipTrivia => format!("Up.({})", nav.level), + NavKind::UpExact => format!("Up!({})", nav.level), + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Basic Expressions +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn simple_named_node() { + let g = parse_and_construct("Foo = (identifier)"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: (identifier) → ∅ + "); +} + +#[test] +fn anonymous_string() { + let g = parse_and_construct(r#"Op = "+""#); + + insta::assert_snapshot!(dump_graph(&g), @r#" + Op = N0 + + N0: "+" → ∅ + "#); +} + +#[test] +fn wildcard() { + let g = parse_and_construct("Any = (_)"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Any = N0 + + N0: _ → ∅ + "); +} + +#[test] +fn wildcard_underscore_literal() { + let g = parse_and_construct("Any = _"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Any = N0 + + N0: _ → ∅ + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Nested Nodes +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn nested_node() { + let g = parse_and_construct("Foo = (call (identifier))"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: (call) → N1 + N1: [Down] (identifier) → N2 + N2: [Up(1)] ε → ∅ + "); +} + +#[test] +fn deeply_nested() { + let g = parse_and_construct("Foo = (a (b (c)))"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: (a) → N1 + N1: [Down] (b) → N2 + N2: [Down] (c) → N3 + N3: [Up(1)] ε → N4 + N4: [Up(1)] ε → ∅ + "); +} + +#[test] +fn sibling_nodes() { + let g = parse_and_construct("Foo = (call (identifier) (arguments))"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: (call) → N1 + N1: [Down] (identifier) → N2 + N2: [Next] (arguments) → N3 + N3: [Up(1)] ε → ∅ + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Anchors +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn anchor_first_child() { + // . before first child → DownSkipTrivia + let g = parse_and_construct("Foo = (block . (statement))"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: (block) → N1 + N1: [Down.] (statement) → N2 + N2: [Up(1)] ε → ∅ + "); +} + +#[test] +fn anchor_last_child() { + // . after last child → UpSkipTrivia + let g = parse_and_construct("Foo = (block (statement) .)"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: (block) → N1 + N1: [Down] (statement) → N2 + N2: [Up.(1)] ε → ∅ + "); +} + +#[test] +fn anchor_adjacent_siblings() { + // . between siblings → NextSkipTrivia + let g = parse_and_construct("Foo = (block (a) . (b))"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: (block) → N1 + N1: [Down] (a) → N2 + N2: [Next.] (b) → N3 + N3: [Up(1)] ε → ∅ + "); +} + +#[test] +fn anchor_both_ends() { + // . at start and end + let g = parse_and_construct("Foo = (array . (element) .)"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: (array) → N1 + N1: [Down.] (element) → N2 + N2: [Up.(1)] ε → ∅ + "); +} + +#[test] +fn anchor_string_literal_first() { + // . before string literal → DownExact + let g = parse_and_construct(r#"Foo = (pair . ":" (value))"#); + + insta::assert_snapshot!(dump_graph(&g), @r#" + Foo = N0 + + N0: (pair) → N1 + N1: [Down!] ":" → N2 + N2: [Next] (value) → N3 + N3: [Up(1)] ε → ∅ + "#); +} + +#[test] +fn anchor_string_literal_adjacent() { + // . after string literal before node → NextExact on string, but string is prev + // Actually the anchor affects the FOLLOWING node, so ":" has Down, "=" has Next! + let g = parse_and_construct(r#"Foo = (assignment (id) "=" . (value))"#); + + insta::assert_snapshot!(dump_graph(&g), @r#" + Foo = N0 + + N0: (assignment) → N1 + N1: [Down] (id) → N2 + N2: [Next] "=" → N3 + N3: [Next.] (value) → N4 + N4: [Up(1)] ε → ∅ + "#); +} + +#[test] +fn anchor_string_literal_last() { + // . after string literal at end → UpExact + let g = parse_and_construct(r#"Foo = (semi (stmt) ";" .)"#); + + insta::assert_snapshot!(dump_graph(&g), @r#" + Foo = N0 + + N0: (semi) → N1 + N1: [Down] (stmt) → N2 + N2: [Next] ";" → N3 + N3: [Up!(1)] ε → ∅ + "#); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Fields +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn field_constraint() { + let g = parse_and_construct("Foo = (call name: (identifier))"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: (call) → N1 + N1: [Down] (identifier) @name → N2 + N2: [Up(1)] ε → ∅ + "); +} + +#[test] +fn negated_field() { + let g = parse_and_construct("Foo = (call !arguments)"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: (call) !arguments → ∅ + "); +} + +#[test] +fn multiple_negated_fields() { + let g = parse_and_construct("Foo = (call !arguments !type_arguments)"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: (call) !arguments !type_arguments → ∅ + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Sequences +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn sequence_expr() { + let g = parse_and_construct("Foo = { (a) (b) }"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: ε [StartObj] → N1 + N1: [Next] (a) → N2 + N2: [Next] (b) → N3 + N3: ε [EndObj] → ∅ + "); +} + +#[test] +fn empty_sequence() { + let g = parse_and_construct("Foo = { }"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: ε [StartObj] → N1 + N1: ε → N2 + N2: ε [EndObj] → ∅ + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Alternations +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn untagged_alternation() { + let g = parse_and_construct("Foo = [(identifier) (number)]"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: ε → N2, N3 + N1: ε → ∅ + N2: (identifier) → N1 + N3: (number) → N1 + "); +} + +#[test] +fn tagged_alternation() { + let g = parse_and_construct("Foo = [Ident: (identifier) Num: (number)]"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: ε → N2, N5 + N1: ε → ∅ + N2: ε [Variant(Ident)] → N3 + N3: (identifier) → N4 + N4: ε [EndVariant] → N1 + N5: ε [Variant(Num)] → N6 + N6: (number) → N7 + N7: ε [EndVariant] → N1 + "); +} + +#[test] +fn single_branch_alt() { + let g = parse_and_construct("Foo = [(identifier)]"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: ε → N2 + N1: ε → ∅ + N2: (identifier) → N1 + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Captures +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn simple_capture() { + let g = parse_and_construct("Foo = (identifier) @name"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: (identifier) [Capture] → N1 + N1: ε [Field(name)] → ∅ + "); +} + +#[test] +fn capture_with_string_type() { + let g = parse_and_construct("Foo = (identifier) @name ::string"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: (identifier) [Capture] [ToString] → N1 + N1: ε [Field(name)] → ∅ + "); +} + +#[test] +fn nested_capture() { + let g = parse_and_construct("Foo = (call name: (identifier) @fn_name)"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N0 + + N0: (call) → N1 + N1: [Down] (identifier) @name [Capture] → N2 + N2: ε [Field(fn_name)] → N3 + N3: [Up(1)] ε → ∅ + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Quantifiers +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn zero_or_more() { + let g = parse_and_construct("Foo = (identifier)*"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N1 + + N0: (identifier) → N3 + N1: ε [StartArray] → N2 + N2: ε → N0, N4 + N3: ε [Push] → N2 + N4: ε [EndArray] → ∅ + "); +} + +#[test] +fn one_or_more() { + let g = parse_and_construct("Foo = (identifier)+"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N1 + + N0: (identifier) → N2 + N1: ε [StartArray] → N0 + N2: ε [Push] → N3 + N3: ε → N0, N4 + N4: ε [EndArray] → ∅ + "); +} + +#[test] +fn optional() { + let g = parse_and_construct("Foo = (identifier)?"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N1 + + N0: (identifier) → N2 + N1: ε → N0, N2 + N2: ε → ∅ + "); +} + +#[test] +fn lazy_zero_or_more() { + let g = parse_and_construct("Foo = (identifier)*?"); + + insta::assert_snapshot!(dump_graph(&g), @r" + Foo = N1 + + N0: (identifier) → N3 + N1: ε [StartArray] → N2 + N2: ε → N4, N0 + N3: ε [Push] → N2 + N4: ε [EndArray] → ∅ + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// References +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn simple_reference() { + let g = parse_and_construct( + " + Ident = (identifier) + Foo = (call (Ident)) + ", + ); + + insta::assert_snapshot!(dump_graph(&g), @r" + Ident = N0 + Foo = N1 + + N0: (identifier) → ∅ + N1: (call) → N2 + N2: [Down] ε +Enter(0, Ident) → N0, N4 + N3: ε +Exit(0) → N4 + N4: [Up(1)] ε → ∅ + "); +} + +#[test] +fn multiple_references() { + let g = parse_and_construct( + " + Expr = [(identifier) (number)] + Foo = (binary left: (Expr) right: (Expr)) + ", + ); + + insta::assert_snapshot!(dump_graph(&g), @r" + Expr = N0 + Foo = N4 + + N0: ε → N2, N3 + N1: ε → ∅ + N2: (identifier) → N1 + N3: (number) → N1 + N4: (binary) → N5 + N5: [Down] ε +Enter(0, Expr) → N0, N7 + N6: ε +Exit(0) → N7 + N7: [Next] ε +Enter(1, Expr) → N0, N9 + N8: ε +Exit(1) → N9 + N9: [Up(1)] ε → ∅ + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Multiple Definitions +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn multiple_definitions() { + let g = parse_and_construct( + " + Ident = (identifier) + Num = (number) + Str = (string) + ", + ); + + insta::assert_snapshot!(dump_graph(&g), @r" + Ident = N0 + Num = N1 + Str = N2 + + N0: (identifier) → ∅ + N1: (number) → ∅ + N2: (string) → ∅ + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Complex Examples +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn function_pattern() { + let g = parse_and_construct( + " + Func = (function_definition + name: (identifier) @name + parameters: (parameters (identifier)* @params) + body: (block)) + ", + ); + + insta::assert_snapshot!(dump_graph(&g), @r" + Func = N0 + + N0: (function_definition) → N1 + N1: [Down] (identifier) @name [Capture] → N2 + N2: ε [Field(name)] → N3 + N3: [Next] (parameters) @parameters → N5 + N4: [Down] (identifier) [Capture] → N7 + N5: ε [StartArray] → N6 + N6: ε → N4, N8 + N7: ε [Push] → N6 + N8: ε [EndArray] → N9 + N9: ε [Field(params)] → N10 + N10: [Up(1)] ε → N11 + N11: [Next] (block) @body → N12 + N12: [Up(1)] ε → ∅ + "); +} + +#[test] +fn binary_expression_pattern() { + let g = parse_and_construct( + r#" + BinOp = (binary_expression + left: (_) @left + operator: ["+" "-" "*" "/"] @op ::string + right: (_) @right) + "#, + ); + + insta::assert_snapshot!(dump_graph(&g), @r#" + BinOp = N0 + + N0: (binary_expression) → N1 + N1: [Down] _ @left [Capture] → N2 + N2: ε [Field(left)] → N3 + N3: [Next] ε → N5, N6, N7, N8 + N4: ε → N9 + N5: "+" [Capture] [ToString] → N4 + N6: "-" [Capture] [ToString] → N4 + N7: "*" [Capture] [ToString] → N4 + N8: "/" [Capture] [ToString] → N4 + N9: ε [Field(op)] → N10 + N10: [Next] _ @right [Capture] → N11 + N11: ε [Field(right)] → N12 + N12: [Up(1)] ε → ∅ + "#); +} diff --git a/crates/plotnik-lib/src/graph/mod.rs b/crates/plotnik-lib/src/graph/mod.rs new file mode 100644 index 00000000..40fee28b --- /dev/null +++ b/crates/plotnik-lib/src/graph/mod.rs @@ -0,0 +1,25 @@ +//! Build-time graph representation for query compilation. +//! +//! This module provides an intermediate graph representation between +//! the parsed AST and the final compiled IR. The graph is mutable during +//! construction and supports analysis passes like epsilon elimination. +//! +//! # Architecture +//! +//! ```text +//! AST (parser) → BuildGraph → [analysis passes] → CompiledQuery (ir) +//! ``` +//! +//! The `BuildGraph` borrows strings from the source (`&'src str`). +//! String interning happens during emission to `CompiledQuery`. + +mod build; +mod construct; + +#[cfg(test)] +mod build_tests; +#[cfg(test)] +mod construct_tests; + +pub use build::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; +pub use construct::{GraphConstructor, construct_graph}; diff --git a/crates/plotnik-lib/src/ir/ids.rs b/crates/plotnik-lib/src/ir/ids.rs index dcb88265..f97b17ba 100644 --- a/crates/plotnik-lib/src/ir/ids.rs +++ b/crates/plotnik-lib/src/ir/ids.rs @@ -18,6 +18,9 @@ pub type NodeFieldId = NonZeroU16; /// Index into the string_refs segment. pub type StringId = u16; +/// Sentinel value for unnamed types (wrapper types have no explicit name). +pub const STRING_NONE: StringId = 0xFFFF; + /// Field name in effects (alias for type safety). pub type DataFieldId = StringId; diff --git a/crates/plotnik-lib/src/ir/nav.rs b/crates/plotnik-lib/src/ir/nav.rs index 5630a426..76d74ebf 100644 --- a/crates/plotnik-lib/src/ir/nav.rs +++ b/crates/plotnik-lib/src/ir/nav.rs @@ -69,14 +69,26 @@ impl Nav { } } + /// Constrained ascent requires `level == 1`. Multi-level ascent with + /// intermediate constraints must decompose into separate transitions. pub const fn up_skip_trivia(level: u8) -> Self { + assert!( + level == 1, + "UpSkipTrivia requires level == 1; decompose for intermediate constraints" + ); Self { kind: NavKind::UpSkipTrivia, level, } } + /// Constrained ascent requires `level == 1`. Multi-level ascent with + /// intermediate constraints must decompose into separate transitions. pub const fn up_exact(level: u8) -> Self { + assert!( + level == 1, + "UpExact requires level == 1; decompose for intermediate constraints" + ); Self { kind: NavKind::UpExact, level, diff --git a/crates/plotnik-lib/src/ir/slice.rs b/crates/plotnik-lib/src/ir/slice.rs index c3abbfb2..13e8d717 100644 --- a/crates/plotnik-lib/src/ir/slice.rs +++ b/crates/plotnik-lib/src/ir/slice.rs @@ -3,7 +3,7 @@ //! `start_index` is an **element index**, not a byte offset. This naming //! distinguishes it from byte offsets like `StringRef.offset`. //! -//! This struct is 6 bytes to fit the Transition layout requirements. +//! This struct is 8 bytes with 4-byte alignment for efficient access. //! Type safety is provided through generic methods, not stored PhantomData. use std::marker::PhantomData; @@ -13,20 +13,21 @@ use std::marker::PhantomData; /// Used for variable-length data (successors, effects, negated fields, type members). /// The slice references elements by index into the corresponding segment array. /// -/// Layout: 6 bytes (4 + 2), no padding due to `repr(C, packed)`. -/// Alignment is 1 due to packing, so reads may be unaligned on some platforms. -#[repr(C, packed)] +/// Layout: 8 bytes (4 + 2 + 2), align 4. +#[repr(C)] #[derive(Clone, Copy)] pub struct Slice { /// Element index into the segment array (NOT byte offset). start_index: u32, /// Number of elements. 65k elements per slice is sufficient. len: u16, + _pad: u16, _phantom: PhantomData T>, } -// Compile-time size verification -const _: () = assert!(size_of::>() == 6); +// Compile-time size/alignment verification +const _: () = assert!(size_of::>() == 8); +const _: () = assert!(align_of::>() == 4); impl Slice { /// Creates a new slice. @@ -35,6 +36,7 @@ impl Slice { Self { start_index, len, + _pad: 0, _phantom: PhantomData, } } @@ -48,7 +50,6 @@ impl Slice { /// Returns the start index (element index, not byte offset). #[inline] pub fn start_index(&self) -> u32 { - // Packed struct - field may be unaligned, so copy out self.start_index } @@ -80,7 +81,7 @@ impl Default for Slice { impl PartialEq for Slice { fn eq(&self, other: &Self) -> bool { - self.start_index() == other.start_index() && self.len() == other.len() + self.start_index == other.start_index && self.len == other.len } } @@ -89,8 +90,8 @@ impl Eq for Slice {} impl std::fmt::Debug for Slice { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Slice") - .field("start_index", &self.start_index()) - .field("len", &self.len()) + .field("start_index", &self.start_index) + .field("len", &self.len) .finish() } } diff --git a/crates/plotnik-lib/src/ir/slice_tests.rs b/crates/plotnik-lib/src/ir/slice_tests.rs index dee8ae26..c1e85f77 100644 --- a/crates/plotnik-lib/src/ir/slice_tests.rs +++ b/crates/plotnik-lib/src/ir/slice_tests.rs @@ -43,6 +43,7 @@ fn equality() { } #[test] -fn size_is_6_bytes() { - assert_eq!(std::mem::size_of::>(), 6); +fn size_is_8_bytes() { + assert_eq!(std::mem::size_of::>(), 8); + assert_eq!(std::mem::align_of::>(), 4); } diff --git a/crates/plotnik-lib/src/ir/transition.rs b/crates/plotnik-lib/src/ir/transition.rs index a452d239..34376da2 100644 --- a/crates/plotnik-lib/src/ir/transition.rs +++ b/crates/plotnik-lib/src/ir/transition.rs @@ -14,35 +14,61 @@ pub const MAX_INLINE_SUCCESSORS: usize = 8; /// Transitions use SSO (small-size optimization) for successors: /// - 0-8 successors: stored inline in `successor_data` /// - 9+ successors: `successor_data[0]` is index into successors segment +/// +/// Layout (64 bytes total, 64-byte aligned): +/// ```text +/// offset 0: matcher (16 bytes) +/// offset 16: ref_marker (4 bytes) +/// offset 20: nav (2 bytes) +/// offset 22: effects_len (2 bytes) +/// offset 24: successor_count (4 bytes) +/// offset 28: effects_start (4 bytes) +/// offset 32: successor_data (32 bytes) +/// ``` #[repr(C, align(64))] #[derive(Clone, Copy)] pub struct Transition { // --- 32 bytes metadata --- /// What this transition matches (node kind, wildcard, epsilon). - pub matcher: Matcher, // 16 bytes + pub matcher: Matcher, // 16 bytes, offset 0 /// Reference call/return marker for recursive definitions. - pub ref_marker: RefTransition, // 4 bytes + pub ref_marker: RefTransition, // 4 bytes, offset 16 - /// Number of successor transitions. - pub successor_count: u32, // 4 bytes + /// Navigation instruction (descend/ascend/sibling traversal). + pub nav: Nav, // 2 bytes, offset 20 - /// Effects to execute on successful match. - /// When empty: start_index=0, len=0. - pub effects: Slice, // 6 bytes + /// Number of effect operations (inlined from Slice for alignment). + effects_len: u16, // 2 bytes, offset 22 - /// Navigation instruction (descend/ascend/sibling traversal). - pub nav: Nav, // 2 bytes + /// Number of successor transitions. + pub successor_count: u32, // 4 bytes, offset 24 + + /// Start index into effects segment (inlined from Slice for alignment). + effects_start: u32, // 4 bytes, offset 28 // --- 32 bytes control flow --- /// Successor storage (inline or spilled index). /// /// - If `successor_count <= 8`: contains `TransitionId` values directly /// - If `successor_count > 8`: `successor_data[0]` is index into successors segment - pub successor_data: [u32; MAX_INLINE_SUCCESSORS], // 32 bytes + pub successor_data: [u32; MAX_INLINE_SUCCESSORS], // 32 bytes, offset 32 } impl Transition { + /// Returns the effects slice. + #[inline] + pub fn effects(&self) -> Slice { + Slice::new(self.effects_start, self.effects_len) + } + + /// Sets the effects slice. + #[inline] + pub fn set_effects(&mut self, effects: Slice) { + self.effects_start = effects.start_index(); + self.effects_len = effects.len(); + } + /// Returns `true` if successors are stored inline. #[inline] pub fn has_inline_successors(&self) -> bool { diff --git a/crates/plotnik-lib/src/ir/type_metadata.rs b/crates/plotnik-lib/src/ir/type_metadata.rs index 46fda12a..a532ad75 100644 --- a/crates/plotnik-lib/src/ir/type_metadata.rs +++ b/crates/plotnik-lib/src/ir/type_metadata.rs @@ -4,7 +4,7 @@ //! transitions produce, not how they execute. use super::Slice; -use super::ids::{StringId, TypeId}; +use super::ids::{STRING_NONE, StringId, TypeId}; /// First composite type ID (after primitives 0-2). pub const TYPE_COMPOSITE_START: TypeId = 3; @@ -19,16 +19,16 @@ pub const TYPE_COMPOSITE_START: TypeId = 3; pub struct TypeDef { pub kind: TypeKind, _pad: u8, - /// Synthetic or explicit type name. `0xFFFF` for unnamed wrappers. + /// Synthetic or explicit type name. `STRING_NONE` for unnamed wrappers. pub name: StringId, /// See struct-level docs for dual semantics. pub members: Slice, - _pad2: u16, } -// Size is 12 bytes: kind(1) + pad(1) + name(2) + members(6) + pad2(2) -// Alignment is 2 due to packed Slice having align 1 +// Size is 12 bytes: kind(1) + pad(1) + name(2) + members(8) = 12 +// Alignment is 4 due to Slice having align 4 const _: () = assert!(size_of::() == 12); +const _: () = assert!(align_of::() == 4); impl TypeDef { /// Create a wrapper type (Optional, ArrayStar, ArrayPlus). @@ -40,9 +40,8 @@ impl TypeDef { Self { kind, _pad: 0, - name: 0xFFFF, + name: STRING_NONE, members: Slice::from_inner_type(inner), - _pad2: 0, } } @@ -54,7 +53,6 @@ impl TypeDef { _pad: 0, name, members, - _pad2: 0, } } diff --git a/crates/plotnik-lib/src/lib.rs b/crates/plotnik-lib/src/lib.rs index 418441dd..31a0f89c 100644 --- a/crates/plotnik-lib/src/lib.rs +++ b/crates/plotnik-lib/src/lib.rs @@ -17,6 +17,7 @@ #![cfg_attr(coverage_nightly, feature(coverage_attribute))] pub mod diagnostics; +pub mod graph; pub mod infer; pub mod ir; pub mod parser; diff --git a/crates/plotnik-lib/src/parser/ast.rs b/crates/plotnik-lib/src/parser/ast.rs index 420aa78b..680d2889 100644 --- a/crates/plotnik-lib/src/parser/ast.rs +++ b/crates/plotnik-lib/src/parser/ast.rs @@ -98,6 +98,39 @@ ast_node!(FieldExpr, Field); ast_node!(NegatedField, NegatedField); ast_node!(Anchor, Anchor); +/// Either an expression or an anchor in a sequence. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum SeqItem { + Expr(Expr), + Anchor(Anchor), +} + +impl SeqItem { + pub fn cast(node: SyntaxNode) -> Option { + if let Some(expr) = Expr::cast(node.clone()) { + return Some(SeqItem::Expr(expr)); + } + if let Some(anchor) = Anchor::cast(node) { + return Some(SeqItem::Anchor(anchor)); + } + None + } + + pub fn as_anchor(&self) -> Option<&Anchor> { + match self { + SeqItem::Anchor(a) => Some(a), + _ => None, + } + } + + pub fn as_expr(&self) -> Option<&Expr> { + match self { + SeqItem::Expr(e) => Some(e), + _ => None, + } + } +} + /// Anonymous node: string literal (`"+"`) or wildcard (`_`). /// Maps from CST `Str` or `Wildcard`. #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -204,6 +237,16 @@ impl NamedNode { pub fn children(&self) -> impl Iterator + '_ { self.0.children().filter_map(Expr::cast) } + + /// Returns all anchors in this node. + pub fn anchors(&self) -> impl Iterator + '_ { + self.0.children().filter_map(Anchor::cast) + } + + /// Returns children interleaved with anchors, preserving order. + pub fn items(&self) -> impl Iterator + '_ { + self.0.children().filter_map(SeqItem::cast) + } } impl Ref { @@ -266,6 +309,16 @@ impl SeqExpr { pub fn children(&self) -> impl Iterator + '_ { self.0.children().filter_map(Expr::cast) } + + /// Returns all anchors in this sequence. + pub fn anchors(&self) -> impl Iterator + '_ { + self.0.children().filter_map(Anchor::cast) + } + + /// Returns children interleaved with anchors, preserving order. + pub fn items(&self) -> impl Iterator + '_ { + self.0.children().filter_map(SeqItem::cast) + } } impl CapturedExpr { diff --git a/crates/plotnik-lib/src/parser/mod.rs b/crates/plotnik-lib/src/parser/mod.rs index 4e3ff52e..0d0f44fb 100644 --- a/crates/plotnik-lib/src/parser/mod.rs +++ b/crates/plotnik-lib/src/parser/mod.rs @@ -42,7 +42,7 @@ pub use cst::{SyntaxKind, SyntaxNode, SyntaxToken}; pub use ast::{ AltExpr, AltKind, Anchor, AnonymousNode, Branch, CapturedExpr, Def, Expr, FieldExpr, NamedNode, - NegatedField, QuantifiedExpr, Ref, Root, SeqExpr, Type, token_src, + NegatedField, QuantifiedExpr, Ref, Root, SeqExpr, SeqItem, Type, token_src, }; pub use core::{ParseResult, Parser}; diff --git a/docs/adr/ADR-0004-query-ir-binary-format.md b/docs/adr/ADR-0004-query-ir-binary-format.md index ebcd2a3f..72f0eb3e 100644 --- a/docs/adr/ADR-0004-query-ir-binary-format.md +++ b/docs/adr/ADR-0004-query-ir-binary-format.md @@ -79,6 +79,7 @@ Single pool for all strings (field names, variant tags, entrypoint names, type n ```rust type StringId = u16; +const STRING_NONE: StringId = 0xFFFF; // sentinel for unnamed types #[repr(C)] struct StringRef { @@ -118,7 +119,7 @@ struct Entrypoint { Header (64 bytes): magic: [u8; 4] b"PLNK" version: u32 format version + ABI hash - checksum: u32 CRC32(offsets || buffer_data) + checksum: u32 CRC32(header[12..64] || buffer_data) buffer_len: u32 successors_offset: u32 effects_offset: u32 @@ -138,6 +139,8 @@ Header is 64 bytes to ensure buffer data starts at a 64-byte aligned offset. Thi Little-endian always. UTF-8 strings. Version mismatch or checksum failure → recompile. +**Checksum coverage**: The checksum covers bytes 12–63 of the header (everything after the checksum field) plus all buffer data. The magic and version are verified independently before checksum validation—a version mismatch triggers recompile without checking the checksum. + ### Construction Three passes: @@ -166,7 +169,7 @@ Buffer layout: 0x0280 Negated Fields [] 0x0280 String Refs [{0,4}, {4,5}, {9,5}, ...] 0x02C0 String Bytes "namevalueIdentNumFuncExpr" -0x0300 Type Defs [Record{...}, Enum{...}, ...] +0x0300 Type Defs [Struct{...}, Enum{...}, ...] 0x0340 Type Members [{name,Str}, {Ident,Ty5}, ...] 0x0380 Entrypoints [{name=Func, target=Tr0, type=Ty3}, ...] 0x03A0 Trivia Kinds [comment, ...] diff --git a/docs/adr/ADR-0005-transition-graph-format.md b/docs/adr/ADR-0005-transition-graph-format.md index e6ea9513..eb8e19a8 100644 --- a/docs/adr/ADR-0005-transition-graph-format.md +++ b/docs/adr/ADR-0005-transition-graph-format.md @@ -25,30 +25,33 @@ type RefId = u16; Relative range within a segment: ```rust -#[repr(C, packed)] +#[repr(C)] struct Slice { start_index: u32, // element index into segment array (NOT byte offset) len: u16, // 65k elements per slice is sufficient + _pad: u16, _phantom: PhantomData T>, } -// 6 bytes, align 1 (packed to avoid padding) +// 8 bytes, align 4 ``` -**Note**: `repr(C, packed)` is required to achieve 6 bytes. Standard `repr(C)` would pad to 8 bytes for alignment. The packed repr means field access may be unaligned on some platforms—accessors should copy values out rather than returning references. - `start_index` is an **element index**, not a byte offset. This naming distinguishes it from byte offsets like `StringRef.offset` and `CompiledQuery.*_offset`. The distinction matters for typed array access. ### Transition ```rust +/// Transitions use SSO (small-size optimization) for successors: +/// - 0-8 successors: stored inline in `successor_data` +/// - 9+ successors: `successor_data[0]` is index into successors segment #[repr(C, align(64))] struct Transition { // --- 32 bytes metadata --- matcher: Matcher, // 16 (offset 0) ref_marker: RefTransition, // 4 (offset 16) - successor_count: u32, // 4 (offset 20) - effects: Slice, // 6 (offset 24, when no effects: start and len are zero) - nav: Nav, // 2 (offset 30, see ADR-0008) + nav: Nav, // 2 (offset 20, see ADR-0008) + effects_len: u16, // 2 (offset 22, inlined from Slice) + successor_count: u32, // 4 (offset 24) + effects_start: u32, // 4 (offset 28, inlined from Slice) // --- 32 bytes control flow --- successor_data: [u32; 8], // 32 (offset 32) @@ -56,6 +59,8 @@ struct Transition { // 64 bytes, align 64 (cache-line aligned) ``` +The `effects_start` and `effects_len` fields are inlined rather than using `Slice` to maintain 64-byte alignment without sacrificing inline successor slots. Accessors reconstruct a `Slice` on demand. + Navigation is fully determined by `nav`—no runtime dispatch based on previous matcher. See [ADR-0008](ADR-0008-tree-navigation.md) for `Nav` definition and semantics. Single `ref_marker` slot—sequences like `Enter(A) → Enter(B)` remain as epsilon chains. @@ -94,16 +99,16 @@ enum Matcher { Node { kind: NodeTypeId, // 2 field: Option, // 2 - negated_fields: Slice, // 8 + negated_fields: Slice, // 8 (align 4, starts at offset 8) }, Anonymous { kind: NodeTypeId, // 2 field: Option, // 2 - negated_fields: Slice, // 8 + negated_fields: Slice, // 8 (align 4, starts at offset 8) }, Wildcard, } -// 16 bytes, align 4 +// 16 bytes, align 4 (discriminant 4 + payload 12, but payload naturally aligns) ``` `Option` uses 0 for `None` (niche optimization). @@ -126,6 +131,8 @@ Layout: 1-byte discriminant + 1-byte padding + 2-byte `RefId` payload = 4 bytes. Explicit `None` ensures stable binary layout (`Option` niche is unspecified). +**RefId semantics**: `RefId` is a unique identifier assigned per definition reference during graph construction. It is **not** an index into the `Entrypoints` table (which is for named exports). The actual jump target comes from `successors()[0]}` of the `Enter` transition. `RefId` exists solely to verify that `Exit(id)` matches the corresponding `Enter(id)` at runtime—a mismatch indicates an IR bug. + ### Enter/Exit Semantics **Problem**: A definition can be called from multiple sites. Naively, `Exit.next` would contain all possible return points from all call sites, requiring O(N) filtering at runtime to find which return is valid for the current call. diff --git a/docs/adr/ADR-0006-dynamic-query-execution.md b/docs/adr/ADR-0006-dynamic-query-execution.md index 81be2c82..e4664b8c 100644 --- a/docs/adr/ADR-0006-dynamic-query-execution.md +++ b/docs/adr/ADR-0006-dynamic-query-execution.md @@ -17,10 +17,17 @@ For each transition: 1. Execute `nav` initial movement (e.g., goto_first_child, goto_next_sibling) 2. Search loop: try matcher, on fail apply skip policy (advance or fail) 3. On match success: store matched node, execute `effects` sequentially -4. Process successors with backtracking +4. Process `ref_marker` (see below) +5. Process successors with backtracking For `Up*` variants, step 2 becomes: validate exit constraint, ascend N levels (no search loop). +**RefTransition handling** (step 4): + +- `None`: no action, proceed to step 5 +- `Enter(ref_id)`: push frame onto `FrameArena`, store `successors()[1..]` as returns, then jump to `successors()[0]` (definition entry)—step 5 is skipped +- `Exit(ref_id)`: verify `ref_id` matches current frame, pop frame, use stored returns as successors—step 5 uses these instead of the transition's own successors + Navigation is fully determined by `nav`—no runtime dispatch based on previous matcher. See [ADR-0008](ADR-0008-tree-navigation.md) for detailed semantics. The matched node is stored in a temporary slot (`matched_node`) accessible to `CaptureNode` effect. Effects execute in order—`CaptureNode` reads from this slot and sets `executor.current`. diff --git a/docs/adr/ADR-0007-type-metadata-format.md b/docs/adr/ADR-0007-type-metadata-format.md index 0f0e6c28..353cd69b 100644 --- a/docs/adr/ADR-0007-type-metadata-format.md +++ b/docs/adr/ADR-0007-type-metadata-format.md @@ -53,10 +53,9 @@ struct TypeDef { kind: TypeKind, // 1 _pad: u8, // 1 name: StringId, // 2 - synthetic or explicit, 0xFFFF for wrappers - members: Slice, // 6 - see interpretation below - _pad2: u16, // 2 + members: Slice, // 8 - see interpretation below } -// 12 bytes, align 2 (due to packed Slice having align 1) +// 12 bytes, align 4 ``` The `members` field has dual semantics based on `kind`: @@ -64,7 +63,7 @@ The `members` field has dual semantics based on `kind`: | Kind | `members.start_index` | `members.len` | | ---------------------------------- | ----------------------- | ------------- | | Wrappers (Optional/Array\*/Array+) | Inner `TypeId` (as u32) | 0 | -| Composites (Record/Enum) | Index into type_members | Member count | +| Composites (Struct/Enum) | Index into type_members | Member count | This reuses `Slice` for consistency with [ADR-0005](ADR-0005-transition-graph-format.md), while keeping TypeDef compact. @@ -76,7 +75,7 @@ enum TypeKind { Optional = 0, // T? — members.start = inner TypeId ArrayStar = 1, // T* — members.start = element TypeId ArrayPlus = 2, // T+ — members.start = element TypeId - Record = 3, // struct — members = slice into type_members + Struct = 3, // struct — members = slice into type_members Enum = 4, // tagged union — members = slice into type_members } ``` @@ -86,12 +85,12 @@ enum TypeKind { | Optional | `expr?` | Nullable wrapper | | ArrayStar | `expr*` | Zero or more elements | | ArrayPlus | `expr+` | One or more elements (non-empty) | -| Record | `{ ... } @name` | Named fields | +| Struct | `{ ... } @name` | Named fields | | Enum | `[ A: ... B: ... ]` | Tagged union (discriminated) | ### TypeMember -Shared structure for Record fields and Enum variants: +Shared structure for Struct fields and Enum variants: ```rust #[repr(C)] @@ -132,10 +131,10 @@ Func = (function_declaration Type graph: ``` -T3: Record "Func" → [name: Str, body: T4] +T3: Struct "Func" → [name: Str, body: T4] T4: Enum "FuncBody" → [Stmt: T5, Expr: T6] -T5: Record "FuncBodyStmt" → [stmt: Node] -T6: Record "FuncBodyExpr" → [expr: Node] +T5: Struct "FuncBodyStmt" → [stmt: Node] +T6: Struct "FuncBodyExpr" → [expr: Node] Entrypoint: Func → result_type: T3 ``` diff --git a/docs/adr/ADR-0009-type-system.md b/docs/adr/ADR-0009-type-system.md new file mode 100644 index 00000000..c82ec09e --- /dev/null +++ b/docs/adr/ADR-0009-type-system.md @@ -0,0 +1,441 @@ +# ADR-0009: Type System + +- **Status**: Proposed +- **Date**: 2025-01-14 + +## Context + +Type inference transforms a `BuildGraph` into `TypeDef`/`TypeMember` structures (ADR-0007). This ADR formalizes the inference rules, particularly the semantics of alternations. + +## Decision + +### Type Universe + +``` +τ ::= Void -- definition with no captures (TypeId = 0) + | Node -- AST node reference (TypeId = 1) + | String -- extracted source text (TypeId = 2) + | Optional(τ) -- nullable wrapper + | ArrayStar(τ) -- zero or more + | ArrayPlus(τ) -- one or more + | Struct(fields) -- struct with named fields + | Enum(variants) -- tagged union +``` + +### Cardinality + +Cardinality describes how many values a capture produces: + +| Cardinality | Notation | Wrapper | Semantics | +| ----------- | -------- | ----------- | ------------ | +| Required | `1` | none | exactly one | +| Optional | `?` | `Optional` | zero or one | +| Star | `*` | `ArrayStar` | zero or more | +| Plus | `+` | `ArrayPlus` | one or more | + +Cardinality propagates through nesting: + +``` +outer * inner = result +────────────────────── + 1 * 1 = 1 + 1 * ? = ? + 1 * * = * + 1 * + = + + ? * 1 = ? + ? * ? = ? + ? * * = * + ? * + = * + * * 1 = * + * * ? = * + * * * = * + * * + = * + + * 1 = + + + * ? = * + + * * = * + + * + = + +``` + +### Scope Rules + +A **scope** is a container that collects captures into fields. + +Scopes are created by: + +1. **Definition root**: inherits the scope type of its root expression (see below) +2. **Captured sequence**: `{...} @name` creates a nested Struct scope +3. **Captured tagged alternation**: `[A: ... B: ...] @name` creates an Enum; each variant has its own scope +4. **Captured untagged alternation**: `[...] @name` creates a Struct; captures from branches merge + +**Definition root semantics**: A definition `Foo = expr` is equivalent to capturing the root expression with the definition name. Therefore: + +- `Foo = [ A: ... B: ... ]` → `Foo` is an Enum (tagged alternation at root) +- `Foo = { ... }` or `Foo = (node ...)` → `Foo` is a Struct (captures propagate to root scope) +- `Foo = (node) @x` → `Foo` is a Struct with field `x` + +**Critical rule**: Tags only have effect when the alternation is captured. An _inline_ uncaptured tagged alternation behaves identically to an untagged one—captures propagate to parent scope. + +### Flat Scoping Principle + +Query nesting does NOT create data nesting. Intermediate structure is invisible: + +```plotnik +Query = (a (b (c) @val)) +``` + +Result type: `Struct { val: Node }` — the `(a ...)` and `(b ...)` wrappers contribute nothing. + +Only explicit scope markers (`{...} @x`, `[...] @x` with tags) introduce nesting in the output type. + +### Type Inference for Captures + +| Pattern | Inferred Type | +| ----------------------------- | -------------------- | +| `(node) @x` | `Node` | +| `"literal" @x` | `Node` | +| `@x ::string` | `String` | +| `@x ::TypeName` | `TypeName` (nominal) | +| `{...} @x` | synthetic Struct | +| `[A: ... B: ...] @x` (tagged) | Enum with variants | +| `[...] @x` (untagged) | merged Struct | + +### Alternation Semantics + +This is the most complex part of type inference. The key insight: + +> **Tags only matter when the alternation is captured.** + +#### Case 1: Uncaptured Alternation (Tagged or Untagged) + +Captures propagate to the parent scope. Asymmetric captures become Optional. + +```plotnik +Foo = [ A: (a) @x B: (b) @y ] +``` + +Despite tags, this is uncaptured. Behavior: + +- `@x` appears only in branch A → propagates as `Optional(Node)` +- `@y` appears only in branch B → propagates as `Optional(Node)` +- Result: `Foo { x: Optional(Node), y: Optional(Node) }` +- Diagnostic (warning): asymmetric captures + +```plotnik +Bar = [ (a) @v (b) @v ] +``` + +Untagged, uncaptured. Both branches have `@v`: + +- `@v` appears in all branches with type `Node` → propagates as `Node` +- Result: `Bar { v: Node }` + +#### Case 2: Captured Untagged Alternation + +Creates a Struct scope. Captures from branches merge into it. + +```plotnik +Foo = [ (a) @x (b) @y ] @z +``` + +- `@z` creates a Struct scope +- `@x` and `@y` are asymmetric → both become Optional within `@z`'s scope +- Result: `Foo { z: FooZ }` where `FooZ { x: Optional(Node), y: Optional(Node) }` + +```plotnik +Bar = [ (a) @v (b) @v ] @z +``` + +- `@z` creates a Struct scope +- `@v` appears in all branches → required within `@z`'s scope +- Result: `Bar { z: BarZ }` where `BarZ { v: Node }` + +#### Case 3: Captured Tagged Alternation + +Creates an Enum. Each variant has its own independent scope. + +```plotnik +Foo = [ A: (a) @x B: (b) @y ] @z +``` + +- `@z` creates an Enum because tags are present AND alternation is captured +- Variant `A` has scope with `@x: Node` +- Variant `B` has scope with `@y: Node` +- Result: `Foo { z: FooZ }` where `FooZ` is: + ``` + Enum FooZ { + A: FooZA { x: Node } + B: FooZB { y: Node } + } + ``` + +### Unification Rules (for merging) + +When merging captures across untagged alternation branches: + +``` +unify(τ, τ) = τ +unify(Node, Node) = Node +unify(String, String) = String +unify(Struct(f₁), Struct(f₂)) = Struct(f₁) if f₁ = f₂ +unify(τ₁, τ₂) = ⊥ (error) +``` + +### Cardinality Join (for merging) + +When the same capture appears in multiple branches with different cardinalities: + +``` + + + /|\ + * | (arrays collapse to *) + \| + ? + | + 1 +``` + +| Left | Right | Join | +| ---- | ----- | ---- | +| 1 | 1 | 1 | +| 1 | ? | ? | +| 1 | \* | \* | +| 1 | + | + | +| ? | ? | ? | +| ? | \* | \* | +| ? | + | \* | +| \* | \* | \* | +| \* | + | \* | +| + | + | + | + +### Cardinality Lifting Coercion + +When cardinality join produces an array type (`*` or `+`) but a branch has scalar cardinality (`1` or `?`), the compiler inserts coercion effects to wrap the scalar in a singleton array. + +| Original | Lifted to | Effect transformation | +| -------- | ---------- | ------------------------------------------------------------------------------------------- | +| `1` | `*` or `+` | `CaptureNode` → `StartArray, CaptureNode, PushElement, EndArray` | +| `?` | `*` | absent → `StartArray, EndArray`; present → `StartArray, CaptureNode, PushElement, EndArray` | + +This ensures the materializer always receives homogeneous values matching the declared type. + +Example: + +```plotnik +Items = [ (single) @item (multi { (x)+ @item }) ] +``` + +Branch 1 has `@item: 1`, branch 2 has `@item: +`. Join is `+`. Branch 1's effects are lifted: + +``` +// Before lifting: +CaptureNode, Field("item") + +// After lifting: +StartArray, CaptureNode, PushElement, EndArray, Field("item") +``` + +### Quantifier-Induced Scope (QIS) + +When a quantified expression contains multiple captures, they must stay coupled per-iteration. QIS creates an implicit scope to preserve this structural relationship. + +**Trigger**: Quantifier `Q ∈ {*, +, ?}` applied to expression `E`, where `E` has **≥2 propagating captures** (captures not absorbed by inner scopes). + +**Mechanism**: QIS creates an implicit scope around `E`. Captures propagate to this scope (not the parent), forming a struct element type. + +**Containers**: Any expression can trigger QIS: + +- Node: `(node ...)Q` +- Sequence: `{...}Q` +- Alternation: `[...]Q` + +**Naming**: + +| Context | Element Type Name | +| ---------------------------- | ----------------------------------- | +| At definition root | `{Def}Item` | +| Explicit capture `E Q @name` | `{Parent}{Name}` | +| Neither | **Error**: require explicit `@name` | + +**Result Type**: + +| Q | Result | +| --- | ------------------------ | +| `*` | `ArrayStar(ElementType)` | +| `+` | `ArrayPlus(ElementType)` | +| `?` | `Optional(ElementType)` | + +**Interior rules**: Standard type inference within the implicit scope: + +- Uncaptured alternations (tagged or not): asymmetric captures → Optional +- Captured tagged alternations: Enum with variant scopes + +**Non-trigger** (≤1 propagating capture): No QIS. Single capture propagates with cardinality multiplication `Q × innerCard`. + +**Examples**: + +```plotnik +// Node as container - keeps name/body paired +Functions = (function_declaration + name: (identifier) @name + body: (block) @body +)* +// → Functions = ArrayStar(FunctionsItem) +// → FunctionsItem = { name: Node, body: Node } + +// Alternation in quantified sequence +Foo = { [ (a) @x (b) @y ] }* +// → Foo = ArrayStar(FooItem) +// → FooItem = { x: Optional(Node), y: Optional(Node) } + +// Tagged but uncaptured (tags ignored, same result) +Bar = { [ A: (a) @x B: (b) @y ] }* +// → Bar = ArrayStar(BarItem) +// → BarItem = { x: Optional(Node), y: Optional(Node) } + +// Tagged AND captured (no QIS - single propagating capture) +Baz = { [ A: (a) @x B: (b) @y ] @choice }* +// → Baz = ArrayStar(BazChoice) +// → BazChoice = Enum { A: { x: Node }, B: { y: Node } } + +// Nested with explicit capture +Outer = (parent { [ (a) @x (b) @y ] }* @items) +// → Outer = { items: ArrayStar(OuterItems) } +// → OuterItems = { x: Optional(Node), y: Optional(Node) } + +// Single capture - no QIS, standard rules +Single = { (a) @item }* +// → Single = { item: ArrayStar(Node) } + +// Error: QIS triggered but no capture, not at root +Bad = (parent { [ (a) @x (b) @y ] }* (other) @z) +// → Error: quantified expression with multiple captures requires @name +``` + +### Missing Field Rule + +If a capture appears in some branches but not all, the field becomes `Optional` (or `*` if original was array). + +This is intentional: users can have common fields be required across all branches, while branch-specific fields become optional. + +### Synthetic Naming + +Types without explicit `::Name` receive synthetic names: + +| Context | Pattern | Example | +| -------------------- | ----------------- | ------------ | +| Definition root | `{DefName}` | `Func` | +| Captured sequence | `{Def}{Capture}` | `FuncParams` | +| Captured alternation | `{Def}{Capture}` | `FuncBody` | +| Enum variant payload | `{Enum}{Variant}` | `FuncBodyOk` | + +Collision resolution: append numeric suffix (`Foo`, `Foo2`, `Foo3`, ...). + +### Error Conditions + +| Condition | Severity | Recovery | Diagnostic Kind (future) | +| ------------------------------------ | -------- | ----------------------------- | ------------------------------ | +| Type mismatch in untagged alt | Error | Use `TYPE_INVALID`, continue | `TypeMismatchInAlt` | +| Duplicate capture in same scope | Error | Keep first, ignore duplicates | `DuplicateCapture` | +| Empty definition (no captures) | Info | Type is `Void` (TypeId = 0) | (no diagnostic) | +| Inline uncaptured tagged alternation | Warning | Treat as untagged | `UnusedBranchLabels` | +| QIS without capture (not at root) | Error | Cannot infer element type | `MultiCaptureQuantifierNoName` | + +The last warning applies only to literal tagged alternations, not references. If `Foo = [ A: ... ]` is used as `(Foo)`, no warning—the user intentionally reuses a definition. But `(parent [ A: ... B: ... ])` inline without capture likely indicates a forgotten `@name`. + +## Examples + +### Example 1: Captured Sequence + +```plotnik +Foo = (foo {(bar) @bar} @baz) +``` + +- `@bar` captures `(bar)` → `Node` +- `@baz` captures the sequence containing `@bar` → creates scope +- Types: + - `@bar: Node` + - `@baz: FooBaz { bar: Node }` + - `Foo: { baz: FooBaz }` + +### Example 2: Uncaptured Sequence + +```plotnik +Foo = (foo {(bar) @bar}) +``` + +- `@bar` captures `(bar)` → `Node` +- Sequence `{...}` is NOT captured → `@bar` propagates to `Foo`'s scope +- Types: + - `Foo: { bar: Node }` + +### Example 3: Tagged Alternation at Definition Root + +```plotnik +Result = [ + Ok: (value) @val + Err: (error) @msg ::string +] +``` + +- Tagged alternation at definition root → `Result` is an Enum +- Types: + - `Result: Enum { Ok: ResultOk, Err: ResultErr }` + - `ResultOk: { val: Node }` + - `ResultErr: { msg: String }` + +### Example 4: Tagged Alternation (Inline, Uncaptured) + +```plotnik +Foo = (parent [ + Ok: (value) @val + Err: (error) @msg ::string +]) +``` + +- Tagged alternation is inline and uncaptured → tags ignored, behaves like untagged +- `@val` only in Ok branch → `Optional(Node)` +- `@msg` only in Err branch → `Optional(String)` +- Types: + - `Foo: { val: Optional(Node), msg: Optional(String) }` +- Diagnostic: warning (inline uncaptured tagged alternation) + +### Example 5: Cardinality in Alternation + +```plotnik +Items = [ (single) @item (multi { (x)+ @item }) ] +``` + +- Branch 1: `@item` cardinality `1`, type `Node` +- Branch 2: `@item` cardinality `+`, type `Node` +- Join: cardinality `+` (both present, LUB of `1` and `+`) +- Types: + - `Items: { item: ArrayPlus(Node) }` + +### Example 6: Nested Quantifier + +```plotnik +Funcs = (module { (function)* @fns }) +``` + +- `@fns` has cardinality `*` from quantifier +- Sequence not captured → propagates to root +- Types: + - `Funcs: { fns: ArrayStar(Node) }` + +## Consequences + +**Positive**: + +- Explicit rules enable deterministic inference +- "Tags only matter when captured" is a simple mental model +- Warning on asymmetric captures catches likely bugs +- Definition root inherits type naturally—no wrapper structs for top-level enums + +**Negative**: + +- LUB cardinality join can lose precision + +**Alternatives Considered**: + +- Error on uncaptured tagged alternations (rejected: too restrictive for incremental development) +- Definition root always Struct (rejected: forces wrapper types for enums, e.g., `struct Expr { val: ExprEnum }` instead of `enum Expr`) From 253aaf47e4a0d6fd1f2502d3f2e6efae9f6ceb12 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 11:42:27 -0300 Subject: [PATCH 02/23] optimize and analysis --- crates/plotnik-lib/src/graph/analysis.rs | 204 +++++ .../plotnik-lib/src/graph/analysis_tests.rs | 251 ++++++ crates/plotnik-lib/src/graph/mod.rs | 8 + crates/plotnik-lib/src/graph/optimize.rs | 192 +++++ .../plotnik-lib/src/graph/optimize_tests.rs | 589 +++++++++++++ crates/plotnik-lib/src/infer/emit/mod.rs | 14 - crates/plotnik-lib/src/infer/emit/rust.rs | 247 ------ .../plotnik-lib/src/infer/emit/rust_tests.rs | 592 ------------- .../plotnik-lib/src/infer/emit/typescript.rs | 300 ------- .../src/infer/emit/typescript_tests.rs | 793 ------------------ crates/plotnik-lib/src/infer/mod.rs | 21 - crates/plotnik-lib/src/infer/types.rs | 280 ------- crates/plotnik-lib/src/infer/types_tests.rs | 377 --------- crates/plotnik-lib/src/infer/tyton.rs | 547 ------------ crates/plotnik-lib/src/infer/tyton_tests.rs | 599 ------------- crates/plotnik-lib/src/lib.rs | 1 - 16 files changed, 1244 insertions(+), 3771 deletions(-) create mode 100644 crates/plotnik-lib/src/graph/analysis.rs create mode 100644 crates/plotnik-lib/src/graph/analysis_tests.rs create mode 100644 crates/plotnik-lib/src/graph/optimize.rs create mode 100644 crates/plotnik-lib/src/graph/optimize_tests.rs delete mode 100644 crates/plotnik-lib/src/infer/emit/mod.rs delete mode 100644 crates/plotnik-lib/src/infer/emit/rust.rs delete mode 100644 crates/plotnik-lib/src/infer/emit/rust_tests.rs delete mode 100644 crates/plotnik-lib/src/infer/emit/typescript.rs delete mode 100644 crates/plotnik-lib/src/infer/emit/typescript_tests.rs delete mode 100644 crates/plotnik-lib/src/infer/mod.rs delete mode 100644 crates/plotnik-lib/src/infer/types.rs delete mode 100644 crates/plotnik-lib/src/infer/types_tests.rs delete mode 100644 crates/plotnik-lib/src/infer/tyton.rs delete mode 100644 crates/plotnik-lib/src/infer/tyton_tests.rs diff --git a/crates/plotnik-lib/src/graph/analysis.rs b/crates/plotnik-lib/src/graph/analysis.rs new file mode 100644 index 00000000..d7bde1f8 --- /dev/null +++ b/crates/plotnik-lib/src/graph/analysis.rs @@ -0,0 +1,204 @@ +//! Analysis pass for emission preparation. +//! +//! This module prepares a `BuildGraph` for emission to the binary format +//! by computing counts, interning strings, and mapping node IDs. +//! +//! # Three-Phase Construction (ADR-0004) +//! +//! 1. **Analysis** (this module): Count elements, intern strings +//! 2. **Layout**: Compute aligned offsets, allocate once +//! 3. **Emission**: Write to buffer +//! +//! # String Interning +//! +//! All strings (field names, variant tags, node kinds, definition names) +//! are deduplicated. Identical strings share storage and `StringId`. + +use super::{BuildEffect, BuildGraph, BuildMatcher, NodeId}; +use crate::ir::StringId; +use indexmap::IndexMap; +use std::collections::HashSet; + +/// Result of analyzing a BuildGraph for emission. +#[derive(Debug)] +pub struct AnalysisResult<'src> { + /// String interner with all unique strings. + pub strings: StringInterner<'src>, + + /// Mapping from BuildGraph NodeId to emission index. + /// Dead nodes map to `None`. + pub node_map: Vec>, + + /// Number of live transitions to emit. + pub transition_count: u32, + + /// Total successor slots needed in the spill segment. + /// (Only for nodes with >8 successors) + pub spilled_successor_count: u32, + + /// Total effects across all nodes. + pub effect_count: u32, + + /// Total negated fields across all matchers. + pub negated_field_count: u32, + + /// Number of definition entrypoints. + pub entrypoint_count: u32, +} + +/// String interner for deduplication. +/// +/// Strings are stored in insertion order. `StringId` is the index. +#[derive(Debug, Default)] +pub struct StringInterner<'src> { + /// Map from string content to its ID. + index: IndexMap<&'src str, StringId>, +} + +impl<'src> StringInterner<'src> { + pub fn new() -> Self { + Self { + index: IndexMap::new(), + } + } + + /// Intern a string, returning its ID. + /// Returns existing ID if already interned. + pub fn intern(&mut self, s: &'src str) -> StringId { + let next_id = self.index.len() as StringId; + *self.index.entry(s).or_insert(next_id) + } + + /// Get the ID of an already-interned string. + pub fn get(&self, s: &str) -> Option { + self.index.get(s).copied() + } + + /// Iterate over all strings in insertion order. + pub fn iter(&self) -> impl Iterator + '_ { + self.index.iter().map(|(s, id)| (*s, *id)) + } + + /// Number of interned strings. + pub fn len(&self) -> usize { + self.index.len() + } + + /// Returns true if no strings have been interned. + pub fn is_empty(&self) -> bool { + self.index.is_empty() + } + + /// Total byte length of all strings. + pub fn total_bytes(&self) -> usize { + self.index.keys().map(|s| s.len()).sum() + } +} + +/// Analyze a BuildGraph for emission. +/// +/// The `dead_nodes` set contains nodes eliminated by optimization passes. +/// These are skipped during analysis and won't appear in the output. +pub fn analyze<'src>( + graph: &BuildGraph<'src>, + dead_nodes: &HashSet, +) -> AnalysisResult<'src> { + let mut strings = StringInterner::new(); + let mut node_map: Vec> = vec![None; graph.len()]; + + let mut transition_count: u32 = 0; + let mut spilled_successor_count: u32 = 0; + let mut effect_count: u32 = 0; + let mut negated_field_count: u32 = 0; + + // First pass: map live nodes to emission indices and count elements + for (id, node) in graph.iter() { + if dead_nodes.contains(&id) { + continue; + } + + node_map[id as usize] = Some(transition_count); + transition_count += 1; + + // Count successors that spill (>8) + let live_successors = count_live_successors(node, dead_nodes); + if live_successors > 8 { + spilled_successor_count += live_successors as u32; + } + + // Count effects + effect_count += node.effects.len() as u32; + + // Intern strings and count negated fields from matcher + match &node.matcher { + BuildMatcher::Node { + kind, + field, + negated_fields, + } => { + strings.intern(kind); + if let Some(f) = field { + strings.intern(f); + } + for nf in negated_fields { + strings.intern(nf); + } + negated_field_count += negated_fields.len() as u32; + } + BuildMatcher::Anonymous { literal, field } => { + strings.intern(literal); + if let Some(f) = field { + strings.intern(f); + } + } + BuildMatcher::Wildcard { field } => { + if let Some(f) = field { + strings.intern(f); + } + } + BuildMatcher::Epsilon => {} + } + + // Intern strings from effects + for effect in &node.effects { + match effect { + BuildEffect::Field(name) => { + strings.intern(name); + } + BuildEffect::StartVariant(tag) => { + strings.intern(tag); + } + _ => {} + } + } + + // Intern ref name if present + if let Some(name) = node.ref_name { + strings.intern(name); + } + } + + // Intern definition names + let entrypoint_count = graph.definitions().count() as u32; + for (name, _) in graph.definitions() { + strings.intern(name); + } + + AnalysisResult { + strings, + node_map, + transition_count, + spilled_successor_count, + effect_count, + negated_field_count, + entrypoint_count, + } +} + +/// Count live successors (excluding dead nodes). +fn count_live_successors(node: &super::BuildNode, dead_nodes: &HashSet) -> usize { + node.successors + .iter() + .filter(|s| !dead_nodes.contains(s)) + .count() +} diff --git a/crates/plotnik-lib/src/graph/analysis_tests.rs b/crates/plotnik-lib/src/graph/analysis_tests.rs new file mode 100644 index 00000000..4370048c --- /dev/null +++ b/crates/plotnik-lib/src/graph/analysis_tests.rs @@ -0,0 +1,251 @@ +//! Tests for analysis module. + +use std::collections::HashSet; + +use super::*; +use crate::graph::{BuildEffect, BuildGraph, BuildMatcher, RefMarker}; + +#[test] +fn string_interner_deduplicates() { + let mut interner = StringInterner::new(); + + let id1 = interner.intern("name"); + let id2 = interner.intern("value"); + let id3 = interner.intern("name"); // duplicate + + assert_eq!(id1, id3); + assert_ne!(id1, id2); + assert_eq!(interner.len(), 2); +} + +#[test] +fn string_interner_preserves_order() { + let mut interner = StringInterner::new(); + + interner.intern("alpha"); + interner.intern("beta"); + interner.intern("gamma"); + + let strings: Vec<_> = interner.iter().collect(); + + assert_eq!(strings, vec![("alpha", 0), ("beta", 1), ("gamma", 2)]); +} + +#[test] +fn string_interner_total_bytes() { + let mut interner = StringInterner::new(); + + interner.intern("foo"); + interner.intern("bar"); + interner.intern("foo"); // duplicate, not counted twice + + assert_eq!(interner.total_bytes(), 6); // "foo" + "bar" +} + +#[test] +fn analyze_empty_graph() { + let g = BuildGraph::new(); + let dead = HashSet::new(); + + let result = analyze(&g, &dead); + + assert_eq!(result.transition_count, 0); + assert_eq!(result.effect_count, 0); + assert_eq!(result.entrypoint_count, 0); + assert!(result.strings.is_empty()); +} + +#[test] +fn analyze_single_matcher() { + let mut g = BuildGraph::new(); + g.add_matcher(BuildMatcher::node("identifier")); + let dead = HashSet::new(); + + let result = analyze(&g, &dead); + + assert_eq!(result.transition_count, 1); + assert_eq!(result.node_map[0], Some(0)); + assert_eq!(result.strings.len(), 1); + assert_eq!(result.strings.get("identifier"), Some(0)); +} + +#[test] +fn analyze_skips_dead_nodes() { + let mut g = BuildGraph::new(); + let n0 = g.add_matcher(BuildMatcher::node("a")); + let n1 = g.add_epsilon(); // will be dead + let n2 = g.add_matcher(BuildMatcher::node("b")); + g.connect(n0, n1); + g.connect(n1, n2); + + let mut dead = HashSet::new(); + dead.insert(n1); + + let result = analyze(&g, &dead); + + assert_eq!(result.transition_count, 2); + assert_eq!(result.node_map[0], Some(0)); + assert_eq!(result.node_map[1], None); // dead + assert_eq!(result.node_map[2], Some(1)); +} + +#[test] +fn analyze_counts_effects() { + let mut g = BuildGraph::new(); + let id = g.add_matcher(BuildMatcher::node("identifier")); + g.node_mut(id).add_effect(BuildEffect::CaptureNode); + g.node_mut(id).add_effect(BuildEffect::Field("name")); + g.node_mut(id).add_effect(BuildEffect::ToString); + + let dead = HashSet::new(); + let result = analyze(&g, &dead); + + assert_eq!(result.effect_count, 3); + // "identifier" and "name" interned + assert_eq!(result.strings.len(), 2); +} + +#[test] +fn analyze_counts_negated_fields() { + let mut g = BuildGraph::new(); + g.add_matcher( + BuildMatcher::node("call") + .with_negated_field("arguments") + .with_negated_field("type_arguments"), + ); + + let dead = HashSet::new(); + let result = analyze(&g, &dead); + + assert_eq!(result.negated_field_count, 2); + // "call", "arguments", "type_arguments" interned + assert_eq!(result.strings.len(), 3); +} + +#[test] +fn analyze_interns_field_constraints() { + let mut g = BuildGraph::new(); + g.add_matcher(BuildMatcher::node("function").with_field("name")); + + let dead = HashSet::new(); + let result = analyze(&g, &dead); + + assert_eq!(result.strings.len(), 2); + assert!(result.strings.get("function").is_some()); + assert!(result.strings.get("name").is_some()); +} + +#[test] +fn analyze_interns_anonymous_literals() { + let mut g = BuildGraph::new(); + g.add_matcher(BuildMatcher::anonymous("+")); + g.add_matcher(BuildMatcher::anonymous("-")); + g.add_matcher(BuildMatcher::anonymous("+")); // duplicate + + let dead = HashSet::new(); + let result = analyze(&g, &dead); + + assert_eq!(result.transition_count, 3); + assert_eq!(result.strings.len(), 2); // "+" and "-" +} + +#[test] +fn analyze_interns_variant_tags() { + let mut g = BuildGraph::new(); + let n0 = g.add_epsilon(); + g.node_mut(n0).add_effect(BuildEffect::StartVariant("True")); + + let n1 = g.add_epsilon(); + g.node_mut(n1) + .add_effect(BuildEffect::StartVariant("False")); + + let dead = HashSet::new(); + let result = analyze(&g, &dead); + + assert_eq!(result.strings.len(), 2); + assert!(result.strings.get("True").is_some()); + assert!(result.strings.get("False").is_some()); +} + +#[test] +fn analyze_counts_entrypoints() { + let mut g = BuildGraph::new(); + let f1 = g.matcher_fragment(BuildMatcher::node("identifier")); + g.add_definition("Ident", f1.entry); + + let f2 = g.matcher_fragment(BuildMatcher::node("number")); + g.add_definition("Num", f2.entry); + + let dead = HashSet::new(); + let result = analyze(&g, &dead); + + assert_eq!(result.entrypoint_count, 2); + // "identifier", "number", "Ident", "Num" interned + assert_eq!(result.strings.len(), 4); +} + +#[test] +fn analyze_deduplicates_across_sources() { + let mut g = BuildGraph::new(); + + // "name" appears as: node kind, field constraint, effect field, definition name + let n0 = g.add_matcher(BuildMatcher::node("name").with_field("name")); + g.node_mut(n0).add_effect(BuildEffect::Field("name")); + g.add_definition("name", n0); + + let dead = HashSet::new(); + let result = analyze(&g, &dead); + + // All "name" references should resolve to same StringId + assert_eq!(result.strings.len(), 1); + assert_eq!(result.strings.get("name"), Some(0)); +} + +#[test] +fn analyze_wildcard_with_field() { + let mut g = BuildGraph::new(); + g.add_matcher(BuildMatcher::wildcard().with_field("body")); + + let dead = HashSet::new(); + let result = analyze(&g, &dead); + + assert_eq!(result.strings.len(), 1); + assert!(result.strings.get("body").is_some()); +} + +#[test] +fn analyze_ref_names() { + let mut g = BuildGraph::new(); + let enter = g.add_epsilon(); + g.node_mut(enter).set_ref_marker(RefMarker::enter(0)); + g.node_mut(enter).ref_name = Some("Function"); + + let dead = HashSet::new(); + let result = analyze(&g, &dead); + + assert_eq!(result.strings.len(), 1); + assert!(result.strings.get("Function").is_some()); +} + +#[test] +fn node_map_indices_are_contiguous() { + let mut g = BuildGraph::new(); + g.add_matcher(BuildMatcher::node("a")); // 0 -> 0 + g.add_epsilon(); // 1 -> dead + g.add_matcher(BuildMatcher::node("b")); // 2 -> 1 + g.add_epsilon(); // 3 -> dead + g.add_matcher(BuildMatcher::node("c")); // 4 -> 2 + + let mut dead = HashSet::new(); + dead.insert(1); + dead.insert(3); + + let result = analyze(&g, &dead); + + assert_eq!(result.transition_count, 3); + assert_eq!(result.node_map[0], Some(0)); + assert_eq!(result.node_map[1], None); + assert_eq!(result.node_map[2], Some(1)); + assert_eq!(result.node_map[3], None); + assert_eq!(result.node_map[4], Some(2)); +} diff --git a/crates/plotnik-lib/src/graph/mod.rs b/crates/plotnik-lib/src/graph/mod.rs index 40fee28b..8676f86d 100644 --- a/crates/plotnik-lib/src/graph/mod.rs +++ b/crates/plotnik-lib/src/graph/mod.rs @@ -13,13 +13,21 @@ //! The `BuildGraph` borrows strings from the source (`&'src str`). //! String interning happens during emission to `CompiledQuery`. +mod analysis; mod build; mod construct; +mod optimize; +#[cfg(test)] +mod analysis_tests; #[cfg(test)] mod build_tests; #[cfg(test)] mod construct_tests; +#[cfg(test)] +mod optimize_tests; +pub use analysis::{AnalysisResult, StringInterner, analyze}; pub use build::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; pub use construct::{GraphConstructor, construct_graph}; +pub use optimize::{OptimizeStats, eliminate_epsilons}; diff --git a/crates/plotnik-lib/src/graph/optimize.rs b/crates/plotnik-lib/src/graph/optimize.rs new file mode 100644 index 00000000..fd21017b --- /dev/null +++ b/crates/plotnik-lib/src/graph/optimize.rs @@ -0,0 +1,192 @@ +//! Epsilon elimination optimization pass. +//! +//! Reduces graph size by removing unnecessary epsilon transitions. +//! This simplifies the graph for subsequent analysis passes and reduces +//! runtime traversal overhead. +//! +//! # Safety Rules (from ADR-0005) +//! +//! An epsilon node CANNOT be eliminated if: +//! - It has a `RefMarker` (Enter/Exit) — single slot constraint +//! - It has multiple successors (branch point) +//! - Its successor already has a `RefMarker` (would lose one) +//! - Both have non-Stay `Nav` that can't be merged (only unconstrained Up can merge) +//! +//! # Algorithm +//! +//! 1. Build predecessor map +//! 2. Identify eliminable epsilon nodes +//! 3. For each eliminable epsilon: +//! - Prepend its effects to successor +//! - Redirect all predecessors to successor +//! - Mark epsilon as dead (will be skipped in emission) + +use super::{BuildGraph, BuildMatcher, NodeId}; +use crate::ir::{Nav, NavKind}; +use std::collections::{HashMap, HashSet}; + +/// Statistics from epsilon elimination. +#[derive(Debug, Default)] +pub struct OptimizeStats { + /// Number of epsilon nodes eliminated. + pub epsilons_eliminated: usize, + /// Number of epsilon nodes kept (branch points, ref markers, etc). + pub epsilons_kept: usize, +} + +/// Run epsilon elimination on the graph. +/// +/// Returns the set of dead node IDs that should be skipped during emission. +pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeStats) { + let mut stats = OptimizeStats::default(); + let mut dead_nodes: HashSet = HashSet::new(); + + // Build predecessor map: node -> list of predecessors + let predecessors = build_predecessor_map(graph); + + // Process nodes in reverse order to handle chains + // (eliminates inner epsilons before outer ones see them) + let node_count = graph.len() as NodeId; + for id in (0..node_count).rev() { + if dead_nodes.contains(&id) { + continue; + } + + let node = graph.node(id); + if !is_eliminable_epsilon(node, graph) { + if node.is_epsilon() { + stats.epsilons_kept += 1; + } + continue; + } + + // Get the single successor (already verified in is_eliminable_epsilon) + let successor_id = node.successors[0]; + + // Skip if successor has a RefMarker and we have effects + // (can't merge effects into a ref transition) + let successor = graph.node(successor_id); + if !successor.ref_marker.is_none() && !node.effects.is_empty() { + stats.epsilons_kept += 1; + continue; + } + + // Collect data needed for the merge + let effects_to_prepend = graph.node(id).effects.clone(); + let nav_to_transfer = graph.node(id).nav; + let preds = predecessors.get(&id).cloned().unwrap_or_default(); + + // Prepend effects to successor + if !effects_to_prepend.is_empty() { + let succ = graph.node_mut(successor_id); + let mut new_effects = effects_to_prepend; + new_effects.append(&mut succ.effects); + succ.effects = new_effects; + } + + // Transfer or merge nav + let successor_nav = graph.node(successor_id).nav; + if !nav_to_transfer.is_stay() { + if successor_nav.is_stay() { + // Simple transfer + graph.node_mut(successor_id).nav = nav_to_transfer; + } else if can_merge_up(nav_to_transfer, successor_nav) { + // Merge unconstrained Up levels + let merged = Nav::up(nav_to_transfer.level + successor_nav.level); + graph.node_mut(successor_id).nav = merged; + } + } + + // Redirect predecessors to successor + for pred_id in &preds { + if dead_nodes.contains(pred_id) { + continue; + } + let pred = graph.node_mut(*pred_id); + for succ in &mut pred.successors { + if *succ == id { + *succ = successor_id; + } + } + } + + // Update definition entry points + redirect_definitions(graph, id, successor_id); + + // Mark as dead + dead_nodes.insert(id); + stats.epsilons_eliminated += 1; + } + + (dead_nodes, stats) +} + +/// Check if an epsilon node can be eliminated. +fn is_eliminable_epsilon(node: &super::BuildNode, graph: &BuildGraph) -> bool { + // Must be epsilon + if !matches!(node.matcher, BuildMatcher::Epsilon) { + return false; + } + + // Must not have RefMarker + if !node.ref_marker.is_none() { + return false; + } + + // Must have exactly one successor (not a branch point) + if node.successors.len() != 1 { + return false; + } + + let successor_id = node.successors[0]; + let successor = graph.node(successor_id); + + // Can't merge if both have non-Stay nav, UNLESS both are unconstrained Up + // (Up(n) + Up(m) = Up(n+m)) + if !node.nav.is_stay() && !successor.nav.is_stay() { + if !can_merge_up(node.nav, successor.nav) { + return false; + } + } + + // Can't merge if both have effects and successor has RefMarker + // (effects must stay ordered relative to ref transitions) + if !node.effects.is_empty() && !successor.ref_marker.is_none() { + return false; + } + + true +} + +/// Build a map from each node to its predecessors. +fn build_predecessor_map(graph: &BuildGraph) -> HashMap> { + let mut predecessors: HashMap> = HashMap::new(); + + for (id, node) in graph.iter() { + for &succ in &node.successors { + predecessors.entry(succ).or_default().push(id); + } + } + + predecessors +} + +/// Check if two Nav instructions can be merged (only unconstrained Up). +fn can_merge_up(a: Nav, b: Nav) -> bool { + a.kind == NavKind::Up && b.kind == NavKind::Up +} + +/// Update definition entry points if they pointed to eliminated node. +fn redirect_definitions(graph: &mut BuildGraph, old_id: NodeId, new_id: NodeId) { + // Collect definitions that need updating + let updates: Vec<_> = graph + .definitions() + .filter(|(_, entry)| *entry == old_id) + .map(|(name, _)| name) + .collect(); + + // Apply updates + for name in updates { + graph.add_definition(name, new_id); + } +} diff --git a/crates/plotnik-lib/src/graph/optimize_tests.rs b/crates/plotnik-lib/src/graph/optimize_tests.rs new file mode 100644 index 00000000..de915ebb --- /dev/null +++ b/crates/plotnik-lib/src/graph/optimize_tests.rs @@ -0,0 +1,589 @@ +//! Tests for epsilon elimination optimization pass. + +use std::collections::HashSet; + +use super::*; +use crate::graph::{BuildEffect, BuildMatcher, NodeId, RefMarker}; + +fn dump_graph(graph: &BuildGraph) -> String { + let mut out = String::new(); + + for (name, entry) in graph.definitions() { + out.push_str(&format!("{} = N{}\n", name, entry)); + } + if graph.definitions().next().is_some() { + out.push('\n'); + } + + for (id, node) in graph.iter() { + out.push_str(&format!("N{}: ", id)); + + match &node.matcher { + BuildMatcher::Epsilon => out.push('ε'), + BuildMatcher::Node { + kind, + field, + negated_fields, + } => { + out.push_str(&format!("({})", kind)); + if let Some(f) = field { + out.push_str(&format!(" @{}", f)); + } + for neg in negated_fields { + out.push_str(&format!(" !{}", neg)); + } + } + BuildMatcher::Anonymous { literal, field } => { + out.push_str(&format!("\"{}\"", literal)); + if let Some(f) = field { + out.push_str(&format!(" @{}", f)); + } + } + BuildMatcher::Wildcard { field } => { + out.push('_'); + if let Some(f) = field { + out.push_str(&format!(" @{}", f)); + } + } + } + + match &node.ref_marker { + RefMarker::None => {} + RefMarker::Enter { ref_id } => out.push_str(&format!(" +Enter({})", ref_id)), + RefMarker::Exit { ref_id } => out.push_str(&format!(" +Exit({})", ref_id)), + } + + for effect in &node.effects { + let eff = match effect { + BuildEffect::CaptureNode => "Capture".to_string(), + BuildEffect::StartArray => "StartArray".to_string(), + BuildEffect::PushElement => "Push".to_string(), + BuildEffect::EndArray => "EndArray".to_string(), + BuildEffect::StartObject => "StartObj".to_string(), + BuildEffect::EndObject => "EndObj".to_string(), + BuildEffect::Field(f) => format!("Field({})", f), + BuildEffect::StartVariant(v) => format!("Variant({})", v), + BuildEffect::EndVariant => "EndVariant".to_string(), + BuildEffect::ToString => "ToString".to_string(), + }; + out.push_str(&format!(" [{}]", eff)); + } + + if node.successors.is_empty() { + out.push_str(" → ∅"); + } else { + out.push_str(" → "); + let succs: Vec<_> = node.successors.iter().map(|s| format!("N{}", s)).collect(); + out.push_str(&succs.join(", ")); + } + + out.push('\n'); + } + + out +} + +fn dump_live_graph(graph: &BuildGraph, dead: &HashSet) -> String { + let mut out = String::new(); + + for (name, entry) in graph.definitions() { + out.push_str(&format!("{} = N{}\n", name, entry)); + } + if graph.definitions().next().is_some() { + out.push('\n'); + } + + for (id, node) in graph.iter() { + if dead.contains(&id) { + continue; + } + + out.push_str(&format!("N{}: ", id)); + + match &node.matcher { + BuildMatcher::Epsilon => out.push('ε'), + BuildMatcher::Node { kind, .. } => out.push_str(&format!("({})", kind)), + BuildMatcher::Anonymous { literal, .. } => out.push_str(&format!("\"{}\"", literal)), + BuildMatcher::Wildcard { .. } => out.push('_'), + } + + match &node.ref_marker { + RefMarker::None => {} + RefMarker::Enter { ref_id } => out.push_str(&format!(" +Enter({})", ref_id)), + RefMarker::Exit { ref_id } => out.push_str(&format!(" +Exit({})", ref_id)), + } + + for effect in &node.effects { + let eff = match effect { + BuildEffect::CaptureNode => "Capture".to_string(), + BuildEffect::StartArray => "StartArray".to_string(), + BuildEffect::PushElement => "Push".to_string(), + BuildEffect::EndArray => "EndArray".to_string(), + BuildEffect::StartObject => "StartObj".to_string(), + BuildEffect::EndObject => "EndObj".to_string(), + BuildEffect::Field(f) => format!("Field({})", f), + BuildEffect::StartVariant(v) => format!("Variant({})", v), + BuildEffect::EndVariant => "EndVariant".to_string(), + BuildEffect::ToString => "ToString".to_string(), + }; + out.push_str(&format!(" [{}]", eff)); + } + + if node.successors.is_empty() { + out.push_str(" → ∅"); + } else { + out.push_str(" → "); + let succs: Vec<_> = node + .successors + .iter() + .filter(|s| !dead.contains(s)) + .map(|s| format!("N{}", s)) + .collect(); + out.push_str(&succs.join(", ")); + } + + out.push('\n'); + } + + out +} + +#[test] +fn eliminates_simple_epsilon_chain() { + let mut g = BuildGraph::new(); + + // Build: ε → ε → (identifier) + let id = g.add_matcher(BuildMatcher::node("identifier")); + let e1 = g.add_epsilon(); + let e2 = g.add_epsilon(); + g.connect(e2, e1); + g.connect(e1, id); + + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (identifier) → ∅ + N1: ε → N0 + N2: ε → N1 + "#); + + let (dead, stats) = eliminate_epsilons(&mut g); + + assert_eq!(stats.epsilons_eliminated, 2); + insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + N0: (identifier) → ∅ + "#); +} + +#[test] +fn keeps_branch_point_epsilon() { + let mut g = BuildGraph::new(); + + // Build alternation: ε → [A, B] + let a = g.add_matcher(BuildMatcher::node("a")); + let b = g.add_matcher(BuildMatcher::node("b")); + let branch = g.add_epsilon(); + g.connect(branch, a); + g.connect(branch, b); + + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (a) → ∅ + N1: (b) → ∅ + N2: ε → N0, N1 + "#); + + let (dead, stats) = eliminate_epsilons(&mut g); + + assert_eq!(stats.epsilons_eliminated, 0); + assert_eq!(stats.epsilons_kept, 1); + insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + N0: (a) → ∅ + N1: (b) → ∅ + N2: ε → N0, N1 + "#); +} + +#[test] +fn keeps_epsilon_with_enter_marker() { + let mut g = BuildGraph::new(); + + let target = g.add_matcher(BuildMatcher::node("target")); + let enter = g.add_epsilon(); + g.node_mut(enter).set_ref_marker(RefMarker::enter(0)); + g.connect(enter, target); + + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (target) → ∅ + N1: ε +Enter(0) → N0 + "#); + + let (dead, stats) = eliminate_epsilons(&mut g); + + assert_eq!(stats.epsilons_eliminated, 0); + assert_eq!(stats.epsilons_kept, 1); + insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + N0: (target) → ∅ + N1: ε +Enter(0) → N0 + "#); +} + +#[test] +fn keeps_epsilon_with_exit_marker() { + let mut g = BuildGraph::new(); + + let target = g.add_matcher(BuildMatcher::node("target")); + let exit = g.add_epsilon(); + g.node_mut(exit).set_ref_marker(RefMarker::exit(0)); + g.connect(exit, target); + + let (dead, stats) = eliminate_epsilons(&mut g); + + assert_eq!(stats.epsilons_eliminated, 0); + assert_eq!(stats.epsilons_kept, 1); + assert!(dead.is_empty()); +} + +#[test] +fn merges_effects_into_successor() { + let mut g = BuildGraph::new(); + + // ε[StartArray] → ε[EndArray] → (identifier)[Capture] + let id = g.add_matcher(BuildMatcher::node("identifier")); + g.node_mut(id).add_effect(BuildEffect::CaptureNode); + + let end_arr = g.add_epsilon(); + g.node_mut(end_arr).add_effect(BuildEffect::EndArray); + g.connect(end_arr, id); + + let start_arr = g.add_epsilon(); + g.node_mut(start_arr).add_effect(BuildEffect::StartArray); + g.connect(start_arr, end_arr); + + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (identifier) [Capture] → ∅ + N1: ε [EndArray] → N0 + N2: ε [StartArray] → N1 + "#); + + let (dead, stats) = eliminate_epsilons(&mut g); + + assert_eq!(stats.epsilons_eliminated, 2); + insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + N0: (identifier) [StartArray] [EndArray] [Capture] → ∅ + "#); +} + +#[test] +fn redirects_multiple_predecessors() { + let mut g = BuildGraph::new(); + + // A → ε → C + // B ↗ + let c = g.add_matcher(BuildMatcher::node("c")); + let eps = g.add_epsilon(); + let a = g.add_matcher(BuildMatcher::node("a")); + let b = g.add_matcher(BuildMatcher::node("b")); + + g.connect(eps, c); + g.connect(a, eps); + g.connect(b, eps); + + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (c) → ∅ + N1: ε → N0 + N2: (a) → N1 + N3: (b) → N1 + "#); + + let (dead, stats) = eliminate_epsilons(&mut g); + + assert_eq!(stats.epsilons_eliminated, 1); + insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + N0: (c) → ∅ + N2: (a) → N0 + N3: (b) → N0 + "#); +} + +#[test] +fn updates_definition_entry_point() { + let mut g = BuildGraph::new(); + + // Def = ε → (identifier) + let id = g.add_matcher(BuildMatcher::node("identifier")); + let eps = g.add_epsilon(); + g.connect(eps, id); + g.add_definition("Def", eps); + + insta::assert_snapshot!(dump_graph(&g), @r#" + Def = N1 + + N0: (identifier) → ∅ + N1: ε → N0 + "#); + + let (dead, _stats) = eliminate_epsilons(&mut g); + + // Definition should now point to identifier node + assert_eq!(g.definition("Def"), Some(0)); + insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + Def = N0 + + N0: (identifier) → ∅ + "#); +} + +#[test] +fn keeps_exit_epsilon_with_no_successor() { + let mut g = BuildGraph::new(); + + // (a) → ε (terminal) + let eps = g.add_epsilon(); + let a = g.add_matcher(BuildMatcher::node("a")); + g.connect(a, eps); + + let (dead, stats) = eliminate_epsilons(&mut g); + + // Epsilon with no successors cannot be eliminated + assert_eq!(stats.epsilons_kept, 1); + assert!(dead.is_empty()); +} + +#[test] +fn quantifier_preserves_branch_structure() { + let mut g = BuildGraph::new(); + + // Typical zero_or_more structure: entry(branch) → [inner → branch, exit] + let inner = g.matcher_fragment(BuildMatcher::node("item")); + let _frag = g.zero_or_more(inner); + + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (item) → N1 + N1: ε → N0, N2 + N2: ε → ∅ + "#); + + let (dead, stats) = eliminate_epsilons(&mut g); + + // Branch (N1) must remain, exit (N2) can't be eliminated (no successor) + assert_eq!(stats.epsilons_kept, 2); + assert_eq!(stats.epsilons_eliminated, 0); + insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + N0: (item) → N1 + N1: ε → N0, N2 + N2: ε → ∅ + "#); +} + +#[test] +fn alternation_exit_epsilon_eliminated() { + let mut g = BuildGraph::new(); + + let f1 = g.matcher_fragment(BuildMatcher::node("a")); + let f2 = g.matcher_fragment(BuildMatcher::node("b")); + let frag = g.alternation(&[f1, f2]); + + // Add a successor to the exit so it can be eliminated + let final_node = g.add_matcher(BuildMatcher::node("end")); + g.connect(frag.exit, final_node); + + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (a) → N3 + N1: (b) → N3 + N2: ε → N0, N1 + N3: ε → N4 + N4: (end) → ∅ + "#); + + let (dead, stats) = eliminate_epsilons(&mut g); + + // Exit epsilon (N3) should be eliminated, branch (N2) kept + assert_eq!(stats.epsilons_eliminated, 1); + assert_eq!(stats.epsilons_kept, 1); + insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + N0: (a) → N4 + N1: (b) → N4 + N2: ε → N0, N1 + N4: (end) → ∅ + "#); +} + +#[test] +fn does_not_merge_effects_into_ref_marker() { + let mut g = BuildGraph::new(); + + // ε[Field] → ε+Exit(0) → (target) + let target = g.add_matcher(BuildMatcher::node("target")); + let exit = g.add_epsilon(); + g.node_mut(exit).set_ref_marker(RefMarker::exit(0)); + g.connect(exit, target); + + let field_eps = g.add_epsilon(); + g.node_mut(field_eps).add_effect(BuildEffect::Field("name")); + g.connect(field_eps, exit); + + insta::assert_snapshot!(dump_graph(&g), @r#" + N0: (target) → ∅ + N1: ε +Exit(0) → N0 + N2: ε [Field(name)] → N1 + "#); + + let (dead, stats) = eliminate_epsilons(&mut g); + + // Should NOT merge Field effect into Exit node + assert_eq!(stats.epsilons_kept, 2); + assert_eq!(stats.epsilons_eliminated, 0); + insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + N0: (target) → ∅ + N1: ε +Exit(0) → N0 + N2: ε [Field(name)] → N1 + "#); +} + +#[test] +fn transfers_nav_to_stay_successor() { + use crate::ir::Nav; + + let mut g = BuildGraph::new(); + + // ε[UpSkipTrivia(1)] → (target)[Stay] + // Nav can be transferred to target, epsilon eliminated + let target = g.add_matcher(BuildMatcher::node("end")); + let up_epsilon = g.add_epsilon(); + g.node_mut(up_epsilon).set_nav(Nav::up_skip_trivia(1)); + g.connect(up_epsilon, target); + + let (dead, stats) = eliminate_epsilons(&mut g); + + // Epsilon eliminated, nav transferred to target + assert_eq!(stats.epsilons_eliminated, 1); + assert!(dead.contains(&1)); + assert_eq!(g.node(0).nav, Nav::up_skip_trivia(1)); +} + +#[test] +fn keeps_epsilon_when_both_have_nav() { + use crate::ir::Nav; + + let mut g = BuildGraph::new(); + + // ε[UpSkipTrivia(1)] → ε[UpSkipTrivia(1)] → (target) + // Can't merge two non-Stay navs + let target = g.add_matcher(BuildMatcher::node("end")); + + let up1 = g.add_epsilon(); + g.node_mut(up1).set_nav(Nav::up_skip_trivia(1)); + g.connect(up1, target); + + let up2 = g.add_epsilon(); + g.node_mut(up2).set_nav(Nav::up_skip_trivia(1)); + g.connect(up2, up1); + + let (dead, stats) = eliminate_epsilons(&mut g); + + // First epsilon (up1) eliminated (successor has Stay) + // Second epsilon (up2) kept (successor up1 has non-Stay nav) + assert_eq!(stats.epsilons_eliminated, 1); + assert_eq!(stats.epsilons_kept, 1); + assert!(dead.contains(&1)); // up1 eliminated + assert!(!dead.contains(&2)); // up2 kept +} + +#[test] +fn eliminates_epsilon_with_stay_nav() { + use crate::ir::Nav; + + let mut g = BuildGraph::new(); + + // ε[Stay] → (target) - Stay is the default, can be eliminated + let target = g.add_matcher(BuildMatcher::node("target")); + let eps = g.add_epsilon(); + g.node_mut(eps).set_nav(Nav::stay()); // explicit Stay + g.connect(eps, target); + + let (dead, stats) = eliminate_epsilons(&mut g); + + assert_eq!(stats.epsilons_eliminated, 1); + assert!(dead.contains(&1)); // epsilon was eliminated +} + +#[test] +fn merges_unconstrained_up_levels() { + use crate::ir::Nav; + + let mut g = BuildGraph::new(); + + // Simulates: ((((foo)))) - no anchors + // ε[Up(1)] → ε[Up(1)] → ε[Up(1)] → (target) + let target = g.add_matcher(BuildMatcher::node("end")); + + let up1 = g.add_epsilon(); + g.node_mut(up1).set_nav(Nav::up(1)); + g.connect(up1, target); + + let up2 = g.add_epsilon(); + g.node_mut(up2).set_nav(Nav::up(1)); + g.connect(up2, up1); + + let up3 = g.add_epsilon(); + g.node_mut(up3).set_nav(Nav::up(1)); + g.connect(up3, up2); + + let (_dead, stats) = eliminate_epsilons(&mut g); + + // All epsilons eliminated, levels merged into target + assert_eq!(stats.epsilons_eliminated, 3); + assert_eq!(g.node(0).nav, Nav::up(3)); +} + +#[test] +fn does_not_merge_constrained_up() { + use crate::ir::Nav; + + let mut g = BuildGraph::new(); + + // Simulates: ((((foo) .) .) .) - anchors at each level + // ε[UpSkipTrivia(1)] → ε[UpSkipTrivia(1)] → (target) + let target = g.add_matcher(BuildMatcher::node("end")); + + let up1 = g.add_epsilon(); + g.node_mut(up1).set_nav(Nav::up_skip_trivia(1)); + g.connect(up1, target); + + let up2 = g.add_epsilon(); + g.node_mut(up2).set_nav(Nav::up_skip_trivia(1)); + g.connect(up2, up1); + + let (dead, stats) = eliminate_epsilons(&mut g); + + // First epsilon eliminated (transfers to target) + // Second kept (can't merge UpSkipTrivia) + assert_eq!(stats.epsilons_eliminated, 1); + assert_eq!(stats.epsilons_kept, 1); + assert!(dead.contains(&1)); + assert!(!dead.contains(&2)); +} + +#[test] +fn does_not_merge_mixed_up_kinds() { + use crate::ir::Nav; + + let mut g = BuildGraph::new(); + + // ε[Up(1)] → ε[UpSkipTrivia(1)] → (target) + // Different Up kinds cannot merge + let target = g.add_matcher(BuildMatcher::node("end")); + + let up1 = g.add_epsilon(); + g.node_mut(up1).set_nav(Nav::up_skip_trivia(1)); + g.connect(up1, target); + + let up2 = g.add_epsilon(); + g.node_mut(up2).set_nav(Nav::up(1)); // unconstrained + g.connect(up2, up1); + + let (_dead, stats) = eliminate_epsilons(&mut g); + + // First epsilon eliminated (transfers to target) + // Second kept (can't merge Up with UpSkipTrivia) + assert_eq!(stats.epsilons_eliminated, 1); + assert_eq!(stats.epsilons_kept, 1); +} diff --git a/crates/plotnik-lib/src/infer/emit/mod.rs b/crates/plotnik-lib/src/infer/emit/mod.rs deleted file mode 100644 index 2131fffe..00000000 --- a/crates/plotnik-lib/src/infer/emit/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -//! Code emitters for inferred types. -//! -//! This module provides language-specific code generation from a `TypeTable`. - -pub mod rust; -pub mod typescript; - -#[cfg(test)] -mod rust_tests; -#[cfg(test)] -mod typescript_tests; - -pub use rust::{Indirection, RustEmitConfig, emit_rust}; -pub use typescript::{OptionalStyle, TypeScriptEmitConfig, emit_typescript}; diff --git a/crates/plotnik-lib/src/infer/emit/rust.rs b/crates/plotnik-lib/src/infer/emit/rust.rs deleted file mode 100644 index b3680273..00000000 --- a/crates/plotnik-lib/src/infer/emit/rust.rs +++ /dev/null @@ -1,247 +0,0 @@ -//! Rust code emitter for inferred types. -//! -//! Emits Rust struct and enum definitions from a `TypeTable`. - -use indexmap::IndexMap; - -use super::super::types::{TypeKey, TypeTable, TypeValue}; - -/// Configuration for Rust emission. -#[derive(Debug, Clone)] -pub struct RustEmitConfig { - /// Indirection type for cyclic references. - pub indirection: Indirection, - /// Whether to derive common traits. - pub derive_debug: bool, - pub derive_clone: bool, - pub derive_partial_eq: bool, - /// Name for the default (unnamed) query entry point type. - pub default_query_name: String, -} - -/// How to handle cyclic type references. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Indirection { - Box, - Rc, - Arc, -} - -impl Default for RustEmitConfig { - fn default() -> Self { - Self { - indirection: Indirection::Box, - derive_debug: true, - derive_clone: true, - derive_partial_eq: false, - default_query_name: "QueryResult".to_string(), - } - } -} - -/// Emit Rust code from a type table. -pub fn emit_rust(table: &TypeTable<'_>, config: &RustEmitConfig) -> String { - let mut output = String::new(); - let sorted = topological_sort(table); - - for key in sorted { - let Some(value) = table.get(&key) else { - continue; - }; - - // Skip built-in types - if matches!(key, TypeKey::Node | TypeKey::String | TypeKey::Unit) { - continue; - } - - let type_def = emit_type_def(&key, value, table, config); - if !type_def.is_empty() { - output.push_str(&type_def); - output.push_str("\n\n"); - } - } - - output.trim_end().to_string() -} - -fn emit_type_def( - key: &TypeKey<'_>, - value: &TypeValue<'_>, - table: &TypeTable<'_>, - config: &RustEmitConfig, -) -> String { - let name = match key { - TypeKey::DefaultQuery => config.default_query_name.clone(), - _ => key.to_pascal_case(), - }; - - match value { - TypeValue::Node | TypeValue::String | TypeValue::Unit | TypeValue::Invalid => String::new(), - - TypeValue::Struct(fields) => { - let mut out = emit_derives(config); - if fields.is_empty() { - out.push_str(&format!("pub struct {};", name)); - } else { - out.push_str(&format!("pub struct {} {{\n", name)); - for (field_name, field_type) in fields { - let type_str = emit_type_ref(field_type, table, config); - out.push_str(&format!(" pub {}: {},\n", field_name, type_str)); - } - out.push('}'); - } - out - } - - TypeValue::TaggedUnion(variants) => { - let mut out = emit_derives(config); - out.push_str(&format!("pub enum {} {{\n", name)); - for (variant_name, variant_key) in variants { - let fields = match table.get(variant_key) { - Some(TypeValue::Struct(f)) => Some(f), - Some(TypeValue::Unit) | None => None, - _ => None, - }; - match fields { - Some(f) if !f.is_empty() => { - out.push_str(&format!(" {} {{\n", variant_name)); - for (field_name, field_type) in f { - let type_str = emit_type_ref(field_type, table, config); - out.push_str(&format!(" {}: {},\n", field_name, type_str)); - } - out.push_str(" },\n"); - } - _ => { - out.push_str(&format!(" {},\n", variant_name)); - } - } - } - out.push('}'); - out - } - - TypeValue::Optional(_) | TypeValue::List(_) | TypeValue::NonEmptyList(_) => { - // Wrapper types become type aliases - let mut out = String::new(); - let inner_type = emit_type_ref(key, table, config); - out.push_str(&format!("pub type {} = {};", name, inner_type)); - out - } - } -} - -pub(crate) fn emit_type_ref( - key: &TypeKey<'_>, - table: &TypeTable<'_>, - config: &RustEmitConfig, -) -> String { - let is_cyclic = table.is_cyclic(key); - - let base = match table.get(key) { - Some(TypeValue::Node) => "Node".to_string(), - Some(TypeValue::String) => "String".to_string(), - Some(TypeValue::Unit) | Some(TypeValue::Invalid) => "()".to_string(), - Some(TypeValue::Optional(inner)) => { - let inner_str = emit_type_ref(inner, table, config); - format!("Option<{}>", inner_str) - } - Some(TypeValue::List(inner)) => { - let inner_str = emit_type_ref(inner, table, config); - format!("Vec<{}>", inner_str) - } - Some(TypeValue::NonEmptyList(inner)) => { - let inner_str = emit_type_ref(inner, table, config); - format!("Vec<{}>", inner_str) - } - // Struct, TaggedUnion, or undefined forward reference - use pascal-cased name - Some(TypeValue::Struct(_)) | Some(TypeValue::TaggedUnion(_)) | None => match key { - TypeKey::DefaultQuery => config.default_query_name.clone(), - _ => key.to_pascal_case(), - }, - }; - - if is_cyclic { - wrap_indirection(&base, config.indirection) - } else { - base - } -} - -pub(crate) fn wrap_indirection(type_str: &str, indirection: Indirection) -> String { - match indirection { - Indirection::Box => format!("Box<{}>", type_str), - Indirection::Rc => format!("Rc<{}>", type_str), - Indirection::Arc => format!("Arc<{}>", type_str), - } -} - -pub(crate) fn emit_derives(config: &RustEmitConfig) -> String { - let mut derives = Vec::new(); - if config.derive_debug { - derives.push("Debug"); - } - if config.derive_clone { - derives.push("Clone"); - } - if config.derive_partial_eq { - derives.push("PartialEq"); - } - - if derives.is_empty() { - String::new() - } else { - format!("#[derive({})]\n", derives.join(", ")) - } -} - -/// Topologically sort types so dependencies come before dependents. -pub(crate) fn topological_sort<'src>(table: &TypeTable<'src>) -> Vec> { - let mut result = Vec::new(); - let mut visited = IndexMap::new(); - - for key in table.types.keys() { - visit(key, table, &mut visited, &mut result); - } - - result -} - -fn visit<'src>( - key: &TypeKey<'src>, - table: &TypeTable<'src>, - visited: &mut IndexMap, bool>, - result: &mut Vec>, -) { - if visited.contains_key(key) { - return; - } - - visited.insert(key.clone(), true); - - let Some(value) = table.get(key) else { - visited.insert(key.clone(), false); - result.push(key.clone()); - return; - }; - - for dep in dependencies(value) { - visit(&dep, table, visited, result); - } - - visited.insert(key.clone(), false); - result.push(key.clone()); -} - -pub(crate) fn dependencies<'src>(value: &TypeValue<'src>) -> Vec> { - match value { - TypeValue::Node | TypeValue::String | TypeValue::Unit | TypeValue::Invalid => vec![], - - TypeValue::Struct(fields) => fields.values().cloned().collect(), - - TypeValue::TaggedUnion(variants) => variants.values().cloned().collect(), - - TypeValue::Optional(inner) | TypeValue::List(inner) | TypeValue::NonEmptyList(inner) => { - vec![inner.clone()] - } - } -} diff --git a/crates/plotnik-lib/src/infer/emit/rust_tests.rs b/crates/plotnik-lib/src/infer/emit/rust_tests.rs deleted file mode 100644 index 932d64a1..00000000 --- a/crates/plotnik-lib/src/infer/emit/rust_tests.rs +++ /dev/null @@ -1,592 +0,0 @@ -use super::rust::{Indirection, RustEmitConfig, emit_rust}; -use crate::infer::tyton::parse; -use indoc::indoc; - -fn emit(input: &str) -> String { - let table = parse(input).expect("tyton parse failed"); - emit_rust(&table, &RustEmitConfig::default()) -} - -fn emit_with_config(input: &str, config: &RustEmitConfig) -> String { - let table = parse(input).expect("tyton parse failed"); - emit_rust(&table, config) -} - -fn emit_cyclic(input: &str, cyclic_types: &[&str]) -> String { - let mut table = parse(input).expect("tyton parse failed"); - for name in cyclic_types { - table.mark_cyclic(crate::infer::TypeKey::Named(name)); - } - emit_rust(&table, &RustEmitConfig::default()) -} - -// --- Simple Structs --- - -#[test] -fn emit_struct_single_field() { - let input = "Foo = { #Node @value }"; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Foo { - pub value: Node, - } - "); -} - -#[test] -fn emit_struct_multiple_fields() { - let input = "Func = { #string @name #Node @body #Node @params }"; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Func { - pub name: String, - pub body: Node, - pub params: Node, - } - "); -} - -#[test] -fn emit_struct_empty() { - let input = "Empty = {}"; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Empty; - "); -} - -#[test] -fn emit_struct_with_unit_field() { - let input = "Wrapper = { () @marker }"; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Wrapper { - pub marker: (), - } - "); -} - -#[test] -fn emit_struct_nested_refs() { - let input = indoc! {r#" - Inner = { #Node @value } - Outer = { Inner @inner #string @label } - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Inner { - pub value: Node, - } - - #[derive(Debug, Clone)] - pub struct Outer { - pub inner: Inner, - pub label: String, - } - "); -} - -// --- Tagged Unions --- - -#[test] -fn emit_tagged_union_simple() { - let input = indoc! {r#" - AssignStmt = { #Node @target #Node @value } - CallStmt = { #Node @func } - Stmt = [ Assign: AssignStmt Call: CallStmt ] - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct AssignStmt { - pub target: Node, - pub value: Node, - } - - #[derive(Debug, Clone)] - pub struct CallStmt { - pub func: Node, - } - - #[derive(Debug, Clone)] - pub enum Stmt { - Assign { - target: Node, - value: Node, - }, - Call { - func: Node, - }, - } - "); -} - -#[test] -fn emit_tagged_union_with_empty_variant() { - let input = indoc! {r#" - ValueVariant = { #Node @value } - Expr = [ Some: ValueVariant None: () ] - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct ValueVariant { - pub value: Node, - } - - #[derive(Debug, Clone)] - pub enum Expr { - Some { - value: Node, - }, - None, - } - "); -} - -#[test] -fn emit_tagged_union_all_empty() { - let input = "Token = [ Comma: () Dot: () Semi: () ]"; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub enum Token { - Comma, - Dot, - Semi, - } - "); -} - -#[test] -fn emit_tagged_union_with_builtins() { - let input = "Value = [ Text: #string Code: #Node Empty: () ]"; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub enum Value { - Text, - Code, - Empty, - } - "); -} - -// --- Wrapper Types --- - -#[test] -fn emit_optional() { - let input = "MaybeNode = #Node?"; - insta::assert_snapshot!(emit(input), @"pub type MaybeNode = Option;"); -} - -#[test] -fn emit_list() { - let input = "Nodes = #Node*"; - insta::assert_snapshot!(emit(input), @"pub type Nodes = Vec;"); -} - -#[test] -fn emit_non_empty_list() { - let input = "Nodes = #Node+"; - insta::assert_snapshot!(emit(input), @"pub type Nodes = Vec;"); -} - -#[test] -fn emit_optional_named() { - let input = indoc! {r#" - Stmt = { #Node @value } - MaybeStmt = Stmt? - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Stmt { - pub value: Node, - } - - pub type MaybeStmt = Option; - "); -} - -#[test] -fn emit_list_named() { - let input = indoc! {r#" - Stmt = { #Node @value } - Stmts = Stmt* - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Stmt { - pub value: Node, - } - - pub type Stmts = Vec; - "); -} - -#[test] -fn emit_nested_wrappers() { - let input = indoc! {r#" - Item = { #Node @value } - Items = Item* - MaybeItems = Items? - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Item { - pub value: Node, - } - - pub type Items = Vec; - - pub type MaybeItems = Option>; - "); -} - -// --- Cyclic Types --- - -#[test] -fn emit_cyclic_box() { - let input = indoc! {r#" - TreeNode = { #Node @value TreeNode @left TreeNode @right } - "#}; - insta::assert_snapshot!(emit_cyclic(input, &["TreeNode"]), @r" - #[derive(Debug, Clone)] - pub struct TreeNode { - pub value: Node, - pub left: Box, - pub right: Box, - } - "); -} - -#[test] -fn emit_cyclic_rc() { - let input = "TreeNode = { #Node @value TreeNode @child }"; - let config = RustEmitConfig { - indirection: Indirection::Rc, - ..Default::default() - }; - let mut table = parse(input).expect("tyton parse failed"); - table.mark_cyclic(crate::infer::TypeKey::Named("TreeNode")); - insta::assert_snapshot!(emit_rust(&table, &config), @r" - #[derive(Debug, Clone)] - pub struct TreeNode { - pub value: Node, - pub child: Rc, - } - "); -} - -#[test] -fn emit_cyclic_arc() { - let input = "TreeNode = { #Node @value TreeNode @child }"; - let config = RustEmitConfig { - indirection: Indirection::Arc, - ..Default::default() - }; - let mut table = parse(input).expect("tyton parse failed"); - table.mark_cyclic(crate::infer::TypeKey::Named("TreeNode")); - insta::assert_snapshot!(emit_rust(&table, &config), @r" - #[derive(Debug, Clone)] - pub struct TreeNode { - pub value: Node, - pub child: Arc, - } - "); -} - -// --- Config Variations --- - -#[test] -fn emit_no_derives() { - let input = "Foo = { #Node @value }"; - let config = RustEmitConfig { - derive_debug: false, - derive_clone: false, - derive_partial_eq: false, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - pub struct Foo { - pub value: Node, - } - "); -} - -#[test] -fn emit_debug_only() { - let input = "Foo = { #Node @value }"; - let config = RustEmitConfig { - derive_debug: true, - derive_clone: false, - derive_partial_eq: false, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - #[derive(Debug)] - pub struct Foo { - pub value: Node, - } - "); -} - -#[test] -fn emit_all_derives() { - let input = "Foo = { #Node @value }"; - let config = RustEmitConfig { - derive_debug: true, - derive_clone: true, - derive_partial_eq: true, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - #[derive(Debug, Clone, PartialEq)] - pub struct Foo { - pub value: Node, - } - "); -} - -// --- Complex Scenarios --- - -#[test] -fn emit_complex_program() { - let input = indoc! {r#" - FuncInfo = { #string @name #Node @body } - Param = { #string @name #string @type_annotation } - Params = Param* - FuncDecl = { FuncInfo @info Params @params } - ExprStmt = { #Node @expr } - Stmt = [ Func: FuncDecl Expr: ExprStmt ] - Program = { Stmt @statements } - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct FuncInfo { - pub name: String, - pub body: Node, - } - - #[derive(Debug, Clone)] - pub struct Param { - pub name: String, - pub type_annotation: String, - } - - pub type Params = Vec; - - #[derive(Debug, Clone)] - pub struct FuncDecl { - pub info: FuncInfo, - pub params: Vec, - } - - #[derive(Debug, Clone)] - pub struct ExprStmt { - pub expr: Node, - } - - #[derive(Debug, Clone)] - pub enum Stmt { - Func { - info: FuncInfo, - params: Vec, - }, - Expr { - expr: Node, - }, - } - - #[derive(Debug, Clone)] - pub struct Program { - pub statements: Stmt, - } - "); -} - -#[test] -fn emit_synthetic_keys() { - let input = indoc! {r#" - Container = { @inner } - InnerWrapper = ? - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Container { - pub inner: InnerField, - } - - pub type InnerWrapper = Option; - "); -} - -#[test] -fn emit_mixed_wrappers_and_structs() { - let input = indoc! {r#" - Leaf = { #string @text } - Branch = { #Node @left #Node @right } - Tree = [ Leaf: Leaf Branch: Branch ] - Forest = Tree* - MaybeForest = Forest? - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Leaf { - pub text: String, - } - - #[derive(Debug, Clone)] - pub struct Branch { - pub left: Node, - pub right: Node, - } - - #[derive(Debug, Clone)] - pub enum Tree { - Leaf { - text: String, - }, - Branch { - left: Node, - right: Node, - }, - } - - pub type Forest = Vec; - - pub type MaybeForest = Option>; - "); -} - -// --- Edge Cases --- - -#[test] -fn emit_single_variant_union() { - let input = indoc! {r#" - OnlyVariant = { #Node @value } - Single = [ Only: OnlyVariant ] - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct OnlyVariant { - pub value: Node, - } - - #[derive(Debug, Clone)] - pub enum Single { - Only { - value: Node, - }, - } - "); -} - -#[test] -fn emit_deeply_nested() { - let input = indoc! {r#" - A = { #Node @val } - B = { A @a } - C = { B @b } - D = { C @c } - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct A { - pub val: Node, - } - - #[derive(Debug, Clone)] - pub struct B { - pub a: A, - } - - #[derive(Debug, Clone)] - pub struct C { - pub b: B, - } - - #[derive(Debug, Clone)] - pub struct D { - pub c: C, - } - "); -} - -#[test] -fn emit_list_of_optionals() { - let input = indoc! {r#" - Item = { #Node @value } - MaybeItem = Item? - Items = MaybeItem* - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Item { - pub value: Node, - } - - pub type MaybeItem = Option; - - pub type Items = Vec>; - "); -} - -#[test] -fn emit_builtin_value_with_named_key() { - let input = indoc! {r#" - AliasNode = #Node - AliasString = #string - AliasUnit = () - "#}; - insta::assert_snapshot!(emit(input), @""); -} - -// --- DefaultQuery --- - -#[test] -fn emit_default_query_struct() { - let input = "#DefaultQuery = { #Node @value }"; - - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct QueryResult { - pub value: Node, - } - "); -} - -#[test] -fn emit_default_query_custom_name() { - let input = "#DefaultQuery = { #Node @value }"; - let config = RustEmitConfig { - default_query_name: "MyResult".to_string(), - ..Default::default() - }; - - insta::assert_snapshot!(emit_with_config(input, &config), @r" - #[derive(Debug, Clone)] - pub struct MyResult { - pub value: Node, - } - "); -} - -#[test] -fn emit_default_query_referenced() { - let input = indoc! {r#" - Item = { #Node @value } - Items = Item* - #DefaultQuery = { Items @items } - "#}; - - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Item { - pub value: Node, - } - - pub type Items = Vec; - - #[derive(Debug, Clone)] - pub struct QueryResult { - pub items: Vec, - } - "); -} diff --git a/crates/plotnik-lib/src/infer/emit/typescript.rs b/crates/plotnik-lib/src/infer/emit/typescript.rs deleted file mode 100644 index 72621fd1..00000000 --- a/crates/plotnik-lib/src/infer/emit/typescript.rs +++ /dev/null @@ -1,300 +0,0 @@ -//! TypeScript code emitter for inferred types. -//! -//! Emits TypeScript interface and type definitions from a `TypeTable`. - -use indexmap::IndexMap; - -use super::super::types::{TypeKey, TypeTable, TypeValue}; - -/// Configuration for TypeScript emission. -#[derive(Debug, Clone)] -pub struct TypeScriptEmitConfig { - /// How to represent optional values. - pub optional_style: OptionalStyle, - /// Whether to export types. - pub export: bool, - /// Whether to make fields readonly. - pub readonly: bool, - /// Whether to inline synthetic types. - pub inline_synthetic: bool, - /// Name for the Node type. - pub node_type_name: String, - /// Whether to emit `type Foo = ...` instead of `interface Foo { ... }`. - pub use_type_alias: bool, - /// Name for the default (unnamed) query entry point. - pub default_query_name: String, -} - -/// How to represent optional types. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum OptionalStyle { - /// `T | null` - Null, - /// `T | undefined` - Undefined, - /// `T?` (optional property) - QuestionMark, -} - -impl Default for TypeScriptEmitConfig { - fn default() -> Self { - Self { - optional_style: OptionalStyle::Null, - export: false, - readonly: false, - inline_synthetic: true, - node_type_name: "SyntaxNode".to_string(), - use_type_alias: false, - default_query_name: "QueryResult".to_string(), - } - } -} - -/// Emit TypeScript code from a type table. -pub fn emit_typescript(table: &TypeTable<'_>, config: &TypeScriptEmitConfig) -> String { - let mut output = String::new(); - let sorted = topological_sort(table); - - for key in sorted { - let Some(value) = table.get(&key) else { - continue; - }; - - // Skip built-in types - if matches!(key, TypeKey::Node | TypeKey::String | TypeKey::Unit) { - continue; - } - - // Skip synthetic types if inlining - if config.inline_synthetic && matches!(key, TypeKey::Synthetic(_)) { - continue; - } - - let type_def = emit_type_def(&key, value, table, config); - if !type_def.is_empty() { - output.push_str(&type_def); - output.push_str("\n\n"); - } - } - - output.trim_end().to_string() -} - -fn emit_type_def( - key: &TypeKey<'_>, - value: &TypeValue<'_>, - table: &TypeTable<'_>, - config: &TypeScriptEmitConfig, -) -> String { - let name = type_name(key, config); - let export_prefix = if config.export && !matches!(key, TypeKey::Synthetic(_)) { - "export " - } else { - "" - }; - - match value { - TypeValue::Node | TypeValue::String | TypeValue::Unit | TypeValue::Invalid => String::new(), - - TypeValue::Struct(fields) => { - if config.use_type_alias { - let inline = emit_inline_struct(fields, table, config); - format!("{}type {} = {};", export_prefix, name, inline) - } else if fields.is_empty() { - format!("{}interface {} {{}}", export_prefix, name) - } else { - let mut out = format!("{}interface {} {{\n", export_prefix, name); - for (field_name, field_type) in fields { - let (type_str, is_optional) = emit_field_type(field_type, table, config); - let readonly = if config.readonly { "readonly " } else { "" }; - let optional = - if is_optional && config.optional_style == OptionalStyle::QuestionMark { - "?" - } else { - "" - }; - out.push_str(&format!( - " {}{}{}: {};\n", - readonly, field_name, optional, type_str - )); - } - out.push('}'); - out - } - } - - TypeValue::TaggedUnion(variants) => { - let mut out = format!("{}type {} =\n", export_prefix, name); - let variant_count = variants.len(); - for (i, (variant_name, variant_key)) in variants.iter().enumerate() { - out.push_str(" | { tag: \""); - out.push_str(variant_name); - out.push('"'); - // Look up variant type to get fields - if let Some(TypeValue::Struct(fields)) = table.get(variant_key) { - for (field_name, field_type) in fields { - let (type_str, is_optional) = emit_field_type(field_type, table, config); - let optional = if is_optional - && config.optional_style == OptionalStyle::QuestionMark - { - "?" - } else { - "" - }; - out.push_str(&format!("; {}{}: {}", field_name, optional, type_str)); - } - } - out.push_str(" }"); - if i < variant_count - 1 { - out.push('\n'); - } - } - out.push(';'); - out - } - - TypeValue::Optional(_) | TypeValue::List(_) | TypeValue::NonEmptyList(_) => { - let (type_str, _) = emit_field_type(key, table, config); - format!("{}type {} = {};", export_prefix, name, type_str) - } - } -} - -/// Returns (type_string, is_optional) -pub(crate) fn emit_field_type( - key: &TypeKey<'_>, - table: &TypeTable<'_>, - config: &TypeScriptEmitConfig, -) -> (String, bool) { - match table.get(key) { - Some(TypeValue::Node) => (config.node_type_name.clone(), false), - Some(TypeValue::String) => ("string".to_string(), false), - Some(TypeValue::Unit) | Some(TypeValue::Invalid) => ("{}".to_string(), false), - - Some(TypeValue::Optional(inner)) => { - let (inner_str, _) = emit_field_type(inner, table, config); - let type_str = match config.optional_style { - OptionalStyle::Null => format!("{} | null", inner_str), - OptionalStyle::Undefined => format!("{} | undefined", inner_str), - OptionalStyle::QuestionMark => inner_str, - }; - (type_str, true) - } - - Some(TypeValue::List(inner)) => { - let (inner_str, _) = emit_field_type(inner, table, config); - (format!("{}[]", wrap_if_union(&inner_str)), false) - } - - Some(TypeValue::NonEmptyList(inner)) => { - let (inner_str, _) = emit_field_type(inner, table, config); - (format!("[{}, ...{}[]]", inner_str, inner_str), false) - } - - Some(TypeValue::Struct(fields)) => { - if config.inline_synthetic && matches!(key, TypeKey::Synthetic(_)) { - (emit_inline_struct(fields, table, config), false) - } else { - (type_name(key, config), false) - } - } - - Some(TypeValue::TaggedUnion(_)) => (type_name(key, config), false), - - None => (type_name(key, config), false), - } -} - -pub(crate) fn emit_inline_struct( - fields: &IndexMap<&str, TypeKey<'_>>, - table: &TypeTable<'_>, - config: &TypeScriptEmitConfig, -) -> String { - if fields.is_empty() { - return "{}".to_string(); - } - - let mut out = String::from("{ "); - for (i, (field_name, field_type)) in fields.iter().enumerate() { - let (type_str, is_optional) = emit_field_type(field_type, table, config); - let optional = if is_optional && config.optional_style == OptionalStyle::QuestionMark { - "?" - } else { - "" - }; - out.push_str(field_name); - out.push_str(optional); - out.push_str(": "); - out.push_str(&type_str); - if i < fields.len() - 1 { - out.push_str("; "); - } - } - out.push_str(" }"); - out -} - -fn type_name(key: &TypeKey<'_>, config: &TypeScriptEmitConfig) -> String { - if key.is_default_query() { - config.default_query_name.clone() - } else { - key.to_pascal_case() - } -} - -pub(crate) fn wrap_if_union(type_str: &str) -> String { - if type_str.contains('|') { - format!("({})", type_str) - } else { - type_str.to_string() - } -} - -/// Topologically sort types so dependencies come before dependents. -pub(crate) fn topological_sort<'src>(table: &TypeTable<'src>) -> Vec> { - let mut result = Vec::new(); - let mut visited = IndexMap::new(); - - for key in table.types.keys() { - visit(key, table, &mut visited, &mut result); - } - - result -} - -fn visit<'src>( - key: &TypeKey<'src>, - table: &TypeTable<'src>, - visited: &mut IndexMap, bool>, - result: &mut Vec>, -) { - if visited.contains_key(key) { - return; - } - - visited.insert(key.clone(), true); - - let Some(value) = table.get(key) else { - visited.insert(key.clone(), false); - result.push(key.clone()); - return; - }; - - for dep in dependencies(value) { - visit(&dep, table, visited, result); - } - - visited.insert(key.clone(), false); - result.push(key.clone()); -} - -pub(crate) fn dependencies<'src>(value: &TypeValue<'src>) -> Vec> { - match value { - TypeValue::Node | TypeValue::String | TypeValue::Unit | TypeValue::Invalid => vec![], - TypeValue::Struct(fields) => fields.values().cloned().collect(), - TypeValue::TaggedUnion(variants) => variants.values().cloned().collect(), - TypeValue::Optional(inner) | TypeValue::List(inner) | TypeValue::NonEmptyList(inner) => { - vec![inner.clone()] - } - } -} diff --git a/crates/plotnik-lib/src/infer/emit/typescript_tests.rs b/crates/plotnik-lib/src/infer/emit/typescript_tests.rs deleted file mode 100644 index 5aae21dc..00000000 --- a/crates/plotnik-lib/src/infer/emit/typescript_tests.rs +++ /dev/null @@ -1,793 +0,0 @@ -use super::typescript::{OptionalStyle, TypeScriptEmitConfig, emit_typescript}; -use crate::infer::tyton::parse; -use indoc::indoc; - -fn emit(input: &str) -> String { - let table = parse(input).expect("tyton parse failed"); - emit_typescript(&table, &TypeScriptEmitConfig::default()) -} - -fn emit_with_config(input: &str, config: &TypeScriptEmitConfig) -> String { - let table = parse(input).expect("tyton parse failed"); - emit_typescript(&table, config) -} - -// --- Simple Structs (Interfaces) --- - -#[test] -fn emit_interface_single_field() { - let input = "Foo = { #Node @value }"; - insta::assert_snapshot!(emit(input), @r" - interface Foo { - value: SyntaxNode; - } - "); -} - -#[test] -fn emit_interface_multiple_fields() { - let input = "Func = { #string @name #Node @body #Node @params }"; - insta::assert_snapshot!(emit(input), @r" - interface Func { - name: string; - body: SyntaxNode; - params: SyntaxNode; - } - "); -} - -#[test] -fn emit_interface_empty() { - let input = "Empty = {}"; - insta::assert_snapshot!(emit(input), @"interface Empty {}"); -} - -#[test] -fn emit_interface_with_unit_field() { - let input = "Wrapper = { () @marker }"; - insta::assert_snapshot!(emit(input), @r" - interface Wrapper { - marker: {}; - } - "); -} - -#[test] -fn emit_interface_nested_refs() { - let input = indoc! {r#" - Inner = { #Node @value } - Outer = { Inner @inner #string @label } - "#}; - insta::assert_snapshot!(emit(input), @r" - interface Inner { - value: SyntaxNode; - } - - interface Outer { - inner: Inner; - label: string; - } - "); -} - -// --- Tagged Unions --- - -#[test] -fn emit_tagged_union_simple() { - let input = indoc! {r#" - AssignStmt = { #Node @target #Node @value } - CallStmt = { #Node @func } - Stmt = [ Assign: AssignStmt Call: CallStmt ] - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface AssignStmt { - target: SyntaxNode; - value: SyntaxNode; - } - - interface CallStmt { - func: SyntaxNode; - } - - type Stmt = - | { tag: "Assign"; target: SyntaxNode; value: SyntaxNode } - | { tag: "Call"; func: SyntaxNode }; - "#); -} - -#[test] -fn emit_tagged_union_with_empty_variant() { - let input = indoc! {r#" - ValueVariant = { #Node @value } - Expr = [ Some: ValueVariant None: () ] - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface ValueVariant { - value: SyntaxNode; - } - - type Expr = - | { tag: "Some"; value: SyntaxNode } - | { tag: "None" }; - "#); -} - -#[test] -fn emit_tagged_union_all_empty() { - let input = "Token = [ Comma: () Dot: () Semi: () ]"; - insta::assert_snapshot!(emit(input), @r#" - type Token = - | { tag: "Comma" } - | { tag: "Dot" } - | { tag: "Semi" }; - "#); -} - -#[test] -fn emit_tagged_union_with_builtins() { - let input = "Value = [ Text: #string Code: #Node Empty: () ]"; - insta::assert_snapshot!(emit(input), @r#" - type Value = - | { tag: "Text" } - | { tag: "Code" } - | { tag: "Empty" }; - "#); -} - -// --- Wrapper Types --- - -#[test] -fn emit_optional_null() { - let input = "MaybeNode = #Node?"; - insta::assert_snapshot!(emit(input), @"type MaybeNode = SyntaxNode | null;"); -} - -#[test] -fn emit_optional_undefined() { - let input = "MaybeNode = #Node?"; - let config = TypeScriptEmitConfig { - optional_style: OptionalStyle::Undefined, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @"type MaybeNode = SyntaxNode | undefined;"); -} - -#[test] -fn emit_optional_question_mark() { - let input = indoc! {r#" - MaybeNode = #Node? - Foo = { MaybeNode @maybe } - "#}; - let config = TypeScriptEmitConfig { - optional_style: OptionalStyle::QuestionMark, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - type MaybeNode = SyntaxNode; - - interface Foo { - maybe?: SyntaxNode; - } - "); -} - -#[test] -fn emit_list() { - let input = "Nodes = #Node*"; - insta::assert_snapshot!(emit(input), @"type Nodes = SyntaxNode[];"); -} - -#[test] -fn emit_non_empty_list() { - let input = "Nodes = #Node+"; - insta::assert_snapshot!(emit(input), @"type Nodes = [SyntaxNode, ...SyntaxNode[]];"); -} - -#[test] -fn emit_optional_named() { - let input = indoc! {r#" - Stmt = { #Node @value } - MaybeStmt = Stmt? - "#}; - insta::assert_snapshot!(emit(input), @r" - interface Stmt { - value: SyntaxNode; - } - - type MaybeStmt = Stmt | null; - "); -} - -#[test] -fn emit_list_named() { - let input = indoc! {r#" - Stmt = { #Node @value } - Stmts = Stmt* - "#}; - insta::assert_snapshot!(emit(input), @r" - interface Stmt { - value: SyntaxNode; - } - - type Stmts = Stmt[]; - "); -} - -#[test] -fn emit_nested_wrappers() { - let input = indoc! {r#" - Item = { #Node @value } - Items = Item* - MaybeItems = Items? - "#}; - insta::assert_snapshot!(emit(input), @r" - interface Item { - value: SyntaxNode; - } - - type Items = Item[]; - - type MaybeItems = Item[] | null; - "); -} - -#[test] -fn emit_list_of_optionals() { - let input = indoc! {r#" - Item = { #Node @value } - MaybeItem = Item? - Items = MaybeItem* - "#}; - insta::assert_snapshot!(emit(input), @r" - interface Item { - value: SyntaxNode; - } - - type MaybeItem = Item | null; - - type Items = (Item | null)[]; - "); -} - -// --- Config Variations --- - -#[test] -fn emit_with_export() { - let input = "Foo = { #Node @value }"; - let config = TypeScriptEmitConfig { - export: true, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - export interface Foo { - value: SyntaxNode; - } - "); -} - -#[test] -fn emit_readonly_fields() { - let input = "Foo = { #Node @value #string @name }"; - let config = TypeScriptEmitConfig { - readonly: true, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - interface Foo { - readonly value: SyntaxNode; - readonly name: string; - } - "); -} - -#[test] -fn emit_custom_node_type() { - let input = "Foo = { #Node @value }"; - let config = TypeScriptEmitConfig { - node_type_name: "TSNode".to_string(), - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - interface Foo { - value: TSNode; - } - "); -} - -#[test] -fn emit_type_alias_instead_of_interface() { - let input = "Foo = { #Node @value #string @name }"; - let config = TypeScriptEmitConfig { - use_type_alias: true, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @"type Foo = { value: SyntaxNode; name: string };"); -} - -#[test] -fn emit_type_alias_empty() { - let input = "Empty = {}"; - let config = TypeScriptEmitConfig { - use_type_alias: true, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @"type Empty = {};"); -} - -#[test] -fn emit_type_alias_nested() { - let input = indoc! {r#" - Inner = { #Node @value } - Outer = { Inner @inner #string @label } - "#}; - let config = TypeScriptEmitConfig { - use_type_alias: true, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - type Inner = { value: SyntaxNode }; - - type Outer = { inner: Inner; label: string }; - "); -} - -#[test] -fn emit_no_inline_synthetic() { - let input = indoc! {r#" - Container = { @inner } - "#}; - let config = TypeScriptEmitConfig { - inline_synthetic: false, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - interface Container { - inner: InnerField; - } - "); -} - -#[test] -fn emit_inline_synthetic() { - let input = indoc! {r#" - Container = { @inner } - "#}; - insta::assert_snapshot!(emit(input), @r" - interface Container { - inner: InnerField; - } - "); -} - -// --- Complex Scenarios --- - -#[test] -fn emit_complex_program() { - let input = indoc! {r#" - FuncInfo = { #string @name #Node @body } - Param = { #string @name #string @type_annotation } - Params = Param* - FuncDecl = { FuncInfo @info Params @params } - ExprStmt = { #Node @expr } - Stmt = [ Func: FuncDecl Expr: ExprStmt ] - Program = { Stmt @statements } - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface FuncInfo { - name: string; - body: SyntaxNode; - } - - interface Param { - name: string; - type_annotation: string; - } - - type Params = Param[]; - - interface FuncDecl { - info: FuncInfo; - params: Param[]; - } - - interface ExprStmt { - expr: SyntaxNode; - } - - type Stmt = - | { tag: "Func"; info: FuncInfo; params: Param[] } - | { tag: "Expr"; expr: SyntaxNode }; - - interface Program { - statements: Stmt; - } - "#); -} - -#[test] -fn emit_mixed_wrappers_and_structs() { - let input = indoc! {r#" - Leaf = { #string @text } - Branch = { #Node @left #Node @right } - Tree = [ Leaf: Leaf Branch: Branch ] - Forest = Tree* - MaybeForest = Forest? - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface Leaf { - text: string; - } - - interface Branch { - left: SyntaxNode; - right: SyntaxNode; - } - - type Tree = - | { tag: "Leaf"; text: string } - | { tag: "Branch"; left: SyntaxNode; right: SyntaxNode }; - - type Forest = Tree[]; - - type MaybeForest = Tree[] | null; - "#); -} - -#[test] -fn emit_all_config_options() { - let input = indoc! {r#" - MaybeNode = #Node? - Item = { #Node @value MaybeNode @maybe } - Items = Item* - "#}; - let config = TypeScriptEmitConfig { - optional_style: OptionalStyle::QuestionMark, - export: true, - readonly: true, - inline_synthetic: true, - node_type_name: "ASTNode".to_string(), - use_type_alias: false, - default_query_name: "QueryResult".to_string(), - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - export type MaybeNode = ASTNode; - - export interface Item { - readonly value: ASTNode; - readonly maybe?: ASTNode; - } - - export type Items = Item[]; - "); -} - -// --- Edge Cases --- - -#[test] -fn emit_single_variant_union() { - let input = indoc! {r#" - OnlyVariant = { #Node @value } - Single = [ Only: OnlyVariant ] - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface OnlyVariant { - value: SyntaxNode; - } - - type Single = - | { tag: "Only"; value: SyntaxNode }; - "#); -} - -#[test] -fn emit_deeply_nested() { - let input = indoc! {r#" - A = { #Node @val } - B = { A @a } - C = { B @b } - D = { C @c } - "#}; - insta::assert_snapshot!(emit(input), @r" - interface A { - val: SyntaxNode; - } - - interface B { - a: A; - } - - interface C { - b: B; - } - - interface D { - c: C; - } - "); -} - -#[test] -fn emit_union_in_list() { - let input = indoc! {r#" - A = { #Node @a } - B = { #Node @b } - Choice = [ A: A B: B ] - Choices = Choice* - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface A { - a: SyntaxNode; - } - - interface B { - b: SyntaxNode; - } - - type Choice = - | { tag: "A"; a: SyntaxNode } - | { tag: "B"; b: SyntaxNode }; - - type Choices = Choice[]; - "#); -} - -#[test] -fn emit_optional_in_struct_null_style() { - let input = indoc! {r#" - MaybeNode = #Node? - Container = { MaybeNode @item #string @name } - "#}; - insta::assert_snapshot!(emit(input), @r" - type MaybeNode = SyntaxNode | null; - - interface Container { - item: SyntaxNode | null; - name: string; - } - "); -} - -#[test] -fn emit_optional_in_struct_undefined_style() { - let input = indoc! {r#" - MaybeNode = #Node? - Container = { MaybeNode @item #string @name } - "#}; - let config = TypeScriptEmitConfig { - optional_style: OptionalStyle::Undefined, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - type MaybeNode = SyntaxNode | undefined; - - interface Container { - item: SyntaxNode | undefined; - name: string; - } - "); -} - -#[test] -fn emit_tagged_union_with_optional_field_question_mark() { - let input = indoc! {r#" - MaybeNode = #Node? - VariantA = { MaybeNode @value } - VariantB = { #Node @item } - Choice = [ A: VariantA B: VariantB ] - "#}; - let config = TypeScriptEmitConfig { - optional_style: OptionalStyle::QuestionMark, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r#" - type MaybeNode = SyntaxNode; - - interface VariantA { - value?: SyntaxNode; - } - - interface VariantB { - item: SyntaxNode; - } - - type Choice = - | { tag: "A"; value?: SyntaxNode } - | { tag: "B"; item: SyntaxNode }; - "#); -} - -#[test] -fn emit_struct_with_union_field() { - let input = indoc! {r#" - A = { #Node @a } - B = { #Node @b } - Choice = [ A: A B: B ] - Container = { Choice @choice #string @name } - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface A { - a: SyntaxNode; - } - - interface B { - b: SyntaxNode; - } - - type Choice = - | { tag: "A"; a: SyntaxNode } - | { tag: "B"; b: SyntaxNode }; - - interface Container { - choice: Choice; - name: string; - } - "#); -} - -#[test] -fn emit_struct_with_forward_ref() { - let input = indoc! {r#" - Container = { Later @item } - Later = { #Node @value } - "#}; - insta::assert_snapshot!(emit(input), @r" - interface Later { - value: SyntaxNode; - } - - interface Container { - item: Later; - } - "); -} - -#[test] -fn emit_synthetic_type_no_inline() { - let input = " = { #Node @value }"; - let config = TypeScriptEmitConfig { - inline_synthetic: false, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - interface FooBar { - value: SyntaxNode; - } - "); -} - -#[test] -fn emit_synthetic_type_with_inline() { - let input = " = { #Node @value }"; - let config = TypeScriptEmitConfig { - inline_synthetic: true, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @""); -} - -#[test] -fn emit_field_referencing_tagged_union() { - let input = indoc! {r#" - VarA = { #Node @x } - VarB = { #Node @y } - Choice = [ A: VarA B: VarB ] - Container = { Choice @choice } - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface VarA { - x: SyntaxNode; - } - - interface VarB { - y: SyntaxNode; - } - - type Choice = - | { tag: "A"; x: SyntaxNode } - | { tag: "B"; y: SyntaxNode }; - - interface Container { - choice: Choice; - } - "#); -} - -#[test] -fn emit_field_referencing_unknown_type() { - let input = "Container = { DoesNotExist @unknown }"; - insta::assert_snapshot!(emit(input), @r" - interface Container { - unknown: DoesNotExist; - } - "); -} - -#[test] -fn emit_empty_interface_no_type_alias() { - let input = "Empty = {}"; - let config = TypeScriptEmitConfig { - use_type_alias: false, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @"interface Empty {}"); -} - -#[test] -fn emit_inline_synthetic_struct_with_optional_field() { - let input = indoc! {r#" - MaybeNode = #Node? - = { #Node @value MaybeNode @maybe } - Container = { @inner } - "#}; - let config = TypeScriptEmitConfig { - inline_synthetic: true, - optional_style: OptionalStyle::QuestionMark, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - type MaybeNode = SyntaxNode; - - interface Container { - inner: { value: SyntaxNode; maybe?: SyntaxNode }; - } - "); -} - -#[test] -fn emit_builtin_value_with_named_key() { - let input = indoc! {r#" - AliasNode = #Node - AliasString = #string - AliasUnit = () - "#}; - insta::assert_snapshot!(emit(input), @""); -} - -// --- DefaultQuery --- - -#[test] -fn emit_default_query_interface() { - let input = "#DefaultQuery = { #Node @value }"; - - insta::assert_snapshot!(emit(input), @r" - interface QueryResult { - value: SyntaxNode; - } - "); -} - -#[test] -fn emit_default_query_custom_name() { - let input = "#DefaultQuery = { #Node @value }"; - let config = TypeScriptEmitConfig { - default_query_name: "MyResult".to_string(), - ..Default::default() - }; - - insta::assert_snapshot!(emit_with_config(input, &config), @r" - interface MyResult { - value: SyntaxNode; - } - "); -} - -#[test] -fn emit_default_query_referenced() { - let input = indoc! {r#" - Item = { #Node @value } - Items = Item* - #DefaultQuery = { Items @items } - "#}; - - insta::assert_snapshot!(emit(input), @r" - interface Item { - value: SyntaxNode; - } - - type Items = Item[]; - - interface QueryResult { - items: Item[]; - } - "); -} diff --git a/crates/plotnik-lib/src/infer/mod.rs b/crates/plotnik-lib/src/infer/mod.rs deleted file mode 100644 index 46471372..00000000 --- a/crates/plotnik-lib/src/infer/mod.rs +++ /dev/null @@ -1,21 +0,0 @@ -//! Type inference for query output types. -//! -//! This module provides: -//! - `TypeTable`: collection of inferred types -//! - `TypeKey` / `TypeValue`: type representation -//! - `emit_rust`: Rust code emitter -//! - `emit_typescript`: TypeScript code emitter - -pub mod emit; -mod types; -pub mod tyton; - -#[cfg(test)] -mod types_tests; -#[cfg(test)] -mod tyton_tests; - -pub use emit::{ - Indirection, OptionalStyle, RustEmitConfig, TypeScriptEmitConfig, emit_rust, emit_typescript, -}; -pub use types::{TypeKey, TypeTable, TypeValue}; diff --git a/crates/plotnik-lib/src/infer/types.rs b/crates/plotnik-lib/src/infer/types.rs deleted file mode 100644 index 6e9081bd..00000000 --- a/crates/plotnik-lib/src/infer/types.rs +++ /dev/null @@ -1,280 +0,0 @@ -//! Type representation for inferred query output types. -//! -//! # Overview -//! -//! The type system is flat: all types live in a `TypeTable` keyed by `TypeKey`. -//! Wrapper types (Optional, List, NonEmptyList) reference inner types by key. -//! -//! # Design Decisions -//! -//! ## Alternation Handling -//! -//! Alternations (`[A: ... B: ...]` or `[... ...]`) produce different type structures: -//! -//! - **Tagged alternations** (`[A: expr B: expr]`): Become `TaggedUnion` with named variants. -//! Each branch gets its own struct type, discriminated by the tag name. -//! -//! - **Untagged/mixed alternations** (`[expr expr]`): Branches are "merged" into a single -//! struct where fields are combined. The merge rules: -//! 1. Field present in all branches with same type → field has that type -//! 2. Field present in some branches only → field becomes Optional -//! 3. Field present in all branches but with different types → field gets Invalid type -//! -//! ## Invalid Type -//! -//! The `Invalid` type represents a type conflict that couldn't be resolved (e.g., field -//! has `Node` in one branch and `String` in another). It is emitted the same as `Unit` -//! in code generators—this keeps output valid while signaling the user made a questionable -//! query. Diagnostics should warn about Invalid types during inference. -//! -//! ## Type Keys vs Type Values -//! -//! - `TypeKey`: Identity/reference to a type. Used in field types, wrapper inner types. -//! - `TypeValue`: The actual type definition. Stored in the table. -//! -//! Built-in types (Node, String, Unit, Invalid) have both a key and value variant for -//! consistency—the key is what you reference, the value is what gets stored. -//! -//! ## DefaultQuery Key -//! -//! `TypeKey::DefaultQuery` represents the unnamed entry point query (the last definition -//! without a name). It has no corresponding `TypeValue` variant—it's purely a key that -//! maps to a Struct or other value. The emitted name ("QueryResult" by default) is -//! configurable per code generator. -//! -//! ## Synthetic Keys -//! -//! For nested captures like `(function @fn { (param @p) @params })`, we need unique type -//! names. Synthetic keys use path segments: `["fn", "params"]` → `FnParams`. This avoids -//! name collisions while keeping names readable. - -use indexmap::IndexMap; - -/// Identity of a type in the type table. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum TypeKey<'src> { - /// Tree-sitter node (built-in) - Node, - /// String value from `:: string` annotation (built-in) - String, - /// Unit type for empty captures (built-in) - Unit, - /// Invalid type for unresolvable conflicts (built-in) - /// Emitted same as Unit in code generators. - Invalid, - /// The unnamed entry point query (last definition without a name). - /// Default emitted name is "QueryResult", but emitters may override. - DefaultQuery, - /// User-provided type name via `:: TypeName` - Named(&'src str), - /// Path-based synthetic name: ["Foo", "bar"] → FooBar - Synthetic(Vec<&'src str>), -} - -impl TypeKey<'_> { - /// Render as PascalCase type name. - pub fn to_pascal_case(&self) -> String { - match self { - TypeKey::Node => "Node".to_string(), - TypeKey::String => "String".to_string(), - TypeKey::Unit => "Unit".to_string(), - TypeKey::Invalid => "Unit".to_string(), // Invalid emits as Unit - TypeKey::DefaultQuery => "DefaultQuery".to_string(), - TypeKey::Named(name) => (*name).to_string(), - TypeKey::Synthetic(segments) => segments.iter().map(|s| to_pascal(s)).collect(), - } - } - - /// Returns true if this is a built-in primitive type. - pub fn is_builtin(&self) -> bool { - matches!( - self, - TypeKey::Node | TypeKey::String | TypeKey::Unit | TypeKey::Invalid - ) - } - - /// Returns true if this is the default query entry point. - pub fn is_default_query(&self) -> bool { - matches!(self, TypeKey::DefaultQuery) - } -} - -/// Convert snake_case or lowercase to PascalCase. -pub(crate) fn to_pascal(s: &str) -> String { - s.split('_') - .map(|part| { - let mut chars = part.chars(); - match chars.next() { - None => String::new(), - Some(first) => first.to_uppercase().chain(chars).collect(), - } - }) - .collect() -} - -/// Type definition stored in the type table. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum TypeValue<'src> { - /// Tree-sitter node primitive - Node, - /// String primitive - String, - /// Unit type (empty struct) - Unit, - /// Invalid type (conflicting types in untagged union) - /// Emitted same as Unit. Presence indicates a diagnostic should be emitted. - Invalid, - /// Struct with named fields - Struct(IndexMap<&'src str, TypeKey<'src>>), - /// Tagged union: variant name → variant type (must resolve to Struct or Unit) - TaggedUnion(IndexMap<&'src str, TypeKey<'src>>), - /// Optional wrapper - Optional(TypeKey<'src>), - /// Zero-or-more list wrapper - List(TypeKey<'src>), - /// One-or-more list wrapper - NonEmptyList(TypeKey<'src>), -} - -/// Result of merging a single field across branches. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum MergedField<'src> { - /// Field has same type in all branches where present - Same(TypeKey<'src>), - /// Field has same type but missing in some branches → needs Optional wrapper - Optional(TypeKey<'src>), - /// Field has conflicting types across branches → Invalid - Conflict, -} - -/// Collection of all inferred types for a query. -#[derive(Debug, Clone)] -pub struct TypeTable<'src> { - /// All type definitions, keyed by their identity. - /// Pre-populated with built-in types (Node, String, Unit, Invalid). - pub types: IndexMap, TypeValue<'src>>, - /// Types that contain cyclic references (need Box in Rust). - pub cyclic: Vec>, -} - -impl<'src> TypeTable<'src> { - /// Create a new type table with built-in types pre-populated. - pub fn new() -> Self { - let mut types = IndexMap::new(); - types.insert(TypeKey::Node, TypeValue::Node); - types.insert(TypeKey::String, TypeValue::String); - types.insert(TypeKey::Unit, TypeValue::Unit); - types.insert(TypeKey::Invalid, TypeValue::Invalid); - Self { - types, - cyclic: Vec::new(), - } - } - - /// Insert a type definition. Returns the key for chaining. - pub fn insert(&mut self, key: TypeKey<'src>, value: TypeValue<'src>) -> TypeKey<'src> { - self.types.insert(key.clone(), value); - key - } - - /// Mark a type as cyclic (requires indirection in Rust). - pub fn mark_cyclic(&mut self, key: TypeKey<'src>) { - if !self.cyclic.contains(&key) { - self.cyclic.push(key); - } - } - - /// Check if a type is cyclic. - pub fn is_cyclic(&self, key: &TypeKey<'src>) -> bool { - self.cyclic.contains(key) - } - - /// Get a type by key. - pub fn get(&self, key: &TypeKey<'src>) -> Option<&TypeValue<'src>> { - self.types.get(key) - } - - /// Iterate over all types in insertion order. - pub fn iter(&self) -> impl Iterator, &TypeValue<'src>)> { - self.types.iter() - } - - /// Merge fields from multiple struct branches (for untagged unions). - /// - /// Given a list of field maps (one per branch), produces a merged field map where: - /// - Fields present in all branches with the same type keep that type - /// - Fields present in only some branches become Optional - /// - Fields with conflicting types across branches become Invalid - /// - /// # Example - /// - /// Branch 1: `{ name: String, value: Node }` - /// Branch 2: `{ name: String, extra: Node }` - /// - /// Merged: `{ name: String, value: Optional, extra: Optional }` - /// - /// # Type Conflict Example - /// - /// Branch 1: `{ x: String }` - /// Branch 2: `{ x: Node }` - /// - /// Merged: `{ x: Invalid }` (with diagnostic warning) - pub fn merge_fields( - branches: &[IndexMap<&'src str, TypeKey<'src>>], - ) -> IndexMap<&'src str, MergedField<'src>> { - if branches.is_empty() { - return IndexMap::new(); - } - - // Collect all field names across all branches - let mut all_fields: IndexMap<&'src str, ()> = IndexMap::new(); - for branch in branches { - for field_name in branch.keys() { - all_fields.entry(*field_name).or_insert(()); - } - } - - let mut result = IndexMap::new(); - let branch_count = branches.len(); - - for field_name in all_fields.keys() { - // Collect (type, count) for this field across branches - let mut type_occurrences: Vec<&TypeKey<'src>> = Vec::new(); - for branch in branches { - if let Some(ty) = branch.get(field_name) { - type_occurrences.push(ty); - } - } - - let present_count = type_occurrences.len(); - if present_count == 0 { - continue; - } - - // Check if all occurrences have the same type - let first_type = type_occurrences[0]; - let all_same_type = type_occurrences.iter().all(|t| *t == first_type); - - let merged = if !all_same_type { - // Type conflict - MergedField::Conflict - } else if present_count == branch_count { - // Present in all branches with same type - MergedField::Same(first_type.clone()) - } else { - // Present in some branches only - MergedField::Optional(first_type.clone()) - }; - - result.insert(*field_name, merged); - } - - result - } -} - -impl Default for TypeTable<'_> { - fn default() -> Self { - Self::new() - } -} diff --git a/crates/plotnik-lib/src/infer/types_tests.rs b/crates/plotnik-lib/src/infer/types_tests.rs deleted file mode 100644 index 32299deb..00000000 --- a/crates/plotnik-lib/src/infer/types_tests.rs +++ /dev/null @@ -1,377 +0,0 @@ -use super::types::{MergedField, TypeKey, TypeTable, TypeValue, to_pascal}; -use indexmap::IndexMap; - -#[test] -fn type_key_to_pascal_case_builtins() { - assert_eq!(TypeKey::Node.to_pascal_case(), "Node"); - assert_eq!(TypeKey::String.to_pascal_case(), "String"); - assert_eq!(TypeKey::Unit.to_pascal_case(), "Unit"); - assert_eq!(TypeKey::Invalid.to_pascal_case(), "Unit"); // Invalid emits as Unit -} - -#[test] -fn type_key_to_pascal_case_named() { - assert_eq!( - TypeKey::Named("FunctionInfo").to_pascal_case(), - "FunctionInfo" - ); - assert_eq!(TypeKey::Named("Stmt").to_pascal_case(), "Stmt"); -} - -#[test] -fn type_key_to_pascal_case_synthetic() { - assert_eq!(TypeKey::Synthetic(vec!["Foo"]).to_pascal_case(), "Foo"); - assert_eq!( - TypeKey::Synthetic(vec!["Foo", "bar"]).to_pascal_case(), - "FooBar" - ); - assert_eq!( - TypeKey::Synthetic(vec!["Foo", "bar", "baz"]).to_pascal_case(), - "FooBarBaz" - ); -} - -#[test] -fn type_key_to_pascal_case_snake_case_segments() { - assert_eq!( - TypeKey::Synthetic(vec!["Foo", "bar_baz"]).to_pascal_case(), - "FooBarBaz" - ); - assert_eq!( - TypeKey::Synthetic(vec!["function_info", "params"]).to_pascal_case(), - "FunctionInfoParams" - ); -} - -#[test] -fn type_table_new_has_builtins() { - let table = TypeTable::new(); - assert_eq!(table.get(&TypeKey::Node), Some(&TypeValue::Node)); - assert_eq!(table.get(&TypeKey::String), Some(&TypeValue::String)); - assert_eq!(table.get(&TypeKey::Unit), Some(&TypeValue::Unit)); - assert_eq!(table.get(&TypeKey::Invalid), Some(&TypeValue::Invalid)); -} - -#[test] -fn type_table_insert_and_get() { - let mut table = TypeTable::new(); - let key = TypeKey::Named("Foo"); - let value = TypeValue::Struct(IndexMap::new()); - table.insert(key.clone(), value.clone()); - assert_eq!(table.get(&key), Some(&value)); -} - -#[test] -fn type_table_cyclic_tracking() { - let mut table = TypeTable::new(); - let key = TypeKey::Named("Recursive"); - - assert!(!table.is_cyclic(&key)); - table.mark_cyclic(key.clone()); - assert!(table.is_cyclic(&key)); - - // Double marking is idempotent - table.mark_cyclic(key.clone()); - assert_eq!(table.cyclic.len(), 1); -} - -#[test] -fn type_table_iter_preserves_order() { - let mut table = TypeTable::new(); - table.insert(TypeKey::Named("A"), TypeValue::Unit); - table.insert(TypeKey::Named("B"), TypeValue::Unit); - table.insert(TypeKey::Named("C"), TypeValue::Unit); - - let keys: Vec<_> = table.iter().map(|(k, _)| k.clone()).collect(); - // Builtins first (Node, String, Unit, Invalid), then inserted order - assert_eq!(keys[0], TypeKey::Node); - assert_eq!(keys[1], TypeKey::String); - assert_eq!(keys[2], TypeKey::Unit); - assert_eq!(keys[3], TypeKey::Invalid); - assert_eq!(keys[4], TypeKey::Named("A")); - assert_eq!(keys[5], TypeKey::Named("B")); - assert_eq!(keys[6], TypeKey::Named("C")); -} - -#[test] -fn type_table_default() { - let table: TypeTable = Default::default(); - assert!(table.get(&TypeKey::Node).is_some()); -} - -#[test] -fn type_value_equality() { - let s1 = TypeValue::Struct(IndexMap::new()); - let s2 = TypeValue::Struct(IndexMap::new()); - assert_eq!(s1, s2); - - let mut fields = IndexMap::new(); - fields.insert("x", TypeKey::Node); - let s3 = TypeValue::Struct(fields); - assert_ne!(s1, s3); -} - -#[test] -fn type_value_wrapper_types() { - let opt = TypeValue::Optional(TypeKey::Node); - let list = TypeValue::List(TypeKey::Node); - let ne_list = TypeValue::NonEmptyList(TypeKey::Node); - - assert_ne!(opt, list); - assert_ne!(list, ne_list); -} - -#[test] -fn type_value_tagged_union() { - let mut table = TypeTable::new(); - - let mut assign_fields = IndexMap::new(); - assign_fields.insert("target", TypeKey::String); - table.insert( - TypeKey::Synthetic(vec!["Stmt", "Assign"]), - TypeValue::Struct(assign_fields), - ); - - let mut call_fields = IndexMap::new(); - call_fields.insert("func", TypeKey::String); - table.insert( - TypeKey::Synthetic(vec!["Stmt", "Call"]), - TypeValue::Struct(call_fields), - ); - - let mut variants = IndexMap::new(); - variants.insert("Assign", TypeKey::Synthetic(vec!["Stmt", "Assign"])); - variants.insert("Call", TypeKey::Synthetic(vec!["Stmt", "Call"])); - - let union = TypeValue::TaggedUnion(variants); - table.insert(TypeKey::Named("Stmt"), union); - - let TypeValue::TaggedUnion(v) = table.get(&TypeKey::Named("Stmt")).unwrap() else { - panic!("expected TaggedUnion"); - }; - assert_eq!(v.len(), 2); - assert!(v.contains_key("Assign")); - assert!(v.contains_key("Call")); - assert!(table.get(&v["Assign"]).is_some()); -} - -#[test] -fn type_value_tagged_union_empty_variant() { - let mut table = TypeTable::new(); - - let mut variants = IndexMap::new(); - variants.insert("Empty", TypeKey::Unit); - table.insert( - TypeKey::Named("MaybeEmpty"), - TypeValue::TaggedUnion(variants), - ); - - let TypeValue::TaggedUnion(v) = table.get(&TypeKey::Named("MaybeEmpty")).unwrap() else { - panic!("expected TaggedUnion"); - }; - assert_eq!(v["Empty"], TypeKey::Unit); -} - -#[test] -fn to_pascal_empty_string() { - assert_eq!(to_pascal(""), ""); -} - -#[test] -fn to_pascal_single_char() { - assert_eq!(to_pascal("a"), "A"); - assert_eq!(to_pascal("Z"), "Z"); -} - -#[test] -fn to_pascal_already_pascal() { - assert_eq!(to_pascal("FooBar"), "FooBar"); -} - -#[test] -fn to_pascal_multiple_underscores() { - assert_eq!(to_pascal("foo__bar"), "FooBar"); - assert_eq!(to_pascal("_foo_"), "Foo"); -} - -#[test] -fn type_key_equality() { - assert_eq!(TypeKey::Node, TypeKey::Node); - assert_ne!(TypeKey::Node, TypeKey::String); - assert_eq!(TypeKey::Named("Foo"), TypeKey::Named("Foo")); - assert_ne!(TypeKey::Named("Foo"), TypeKey::Named("Bar")); - assert_eq!( - TypeKey::Synthetic(vec!["a", "b"]), - TypeKey::Synthetic(vec!["a", "b"]) - ); - assert_ne!( - TypeKey::Synthetic(vec!["a", "b"]), - TypeKey::Synthetic(vec!["a", "c"]) - ); -} - -#[test] -fn type_key_hash_consistency() { - use std::collections::HashSet; - let mut set = HashSet::new(); - set.insert(TypeKey::Node); - set.insert(TypeKey::Named("Foo")); - set.insert(TypeKey::Synthetic(vec!["a", "b"])); - - assert!(set.contains(&TypeKey::Node)); - assert!(set.contains(&TypeKey::Named("Foo"))); - assert!(set.contains(&TypeKey::Synthetic(vec!["a", "b"]))); - assert!(!set.contains(&TypeKey::String)); -} - -#[test] -fn type_key_is_builtin() { - assert!(TypeKey::Node.is_builtin()); - assert!(TypeKey::String.is_builtin()); - assert!(TypeKey::Unit.is_builtin()); - assert!(TypeKey::Invalid.is_builtin()); - assert!(!TypeKey::Named("Foo").is_builtin()); - assert!(!TypeKey::Synthetic(vec!["a"]).is_builtin()); -} - -#[test] -fn type_value_invalid() { - assert_eq!(TypeValue::Invalid, TypeValue::Invalid); - assert_ne!(TypeValue::Invalid, TypeValue::Unit); -} - -#[test] -fn merge_fields_empty_branches() { - let branches: Vec> = vec![]; - - let merged = TypeTable::merge_fields(&branches); - - assert!(merged.is_empty()); -} - -#[test] -fn merge_fields_single_branch() { - let mut branch = IndexMap::new(); - branch.insert("name", TypeKey::String); - branch.insert("value", TypeKey::Node); - - let merged = TypeTable::merge_fields(&[branch]); - - assert_eq!(merged.len(), 2); - assert_eq!(merged["name"], MergedField::Same(TypeKey::String)); - assert_eq!(merged["value"], MergedField::Same(TypeKey::Node)); -} - -#[test] -fn merge_fields_identical_branches() { - let mut branch1 = IndexMap::new(); - branch1.insert("name", TypeKey::String); - - let mut branch2 = IndexMap::new(); - branch2.insert("name", TypeKey::String); - - let merged = TypeTable::merge_fields(&[branch1, branch2]); - - assert_eq!(merged.len(), 1); - assert_eq!(merged["name"], MergedField::Same(TypeKey::String)); -} - -#[test] -fn merge_fields_missing_in_some_branches() { - let mut branch1 = IndexMap::new(); - branch1.insert("name", TypeKey::String); - branch1.insert("value", TypeKey::Node); - - let mut branch2 = IndexMap::new(); - branch2.insert("name", TypeKey::String); - // value missing - - let merged = TypeTable::merge_fields(&[branch1, branch2]); - - assert_eq!(merged.len(), 2); - assert_eq!(merged["name"], MergedField::Same(TypeKey::String)); - assert_eq!(merged["value"], MergedField::Optional(TypeKey::Node)); -} - -#[test] -fn merge_fields_disjoint_branches() { - let mut branch1 = IndexMap::new(); - branch1.insert("a", TypeKey::String); - - let mut branch2 = IndexMap::new(); - branch2.insert("b", TypeKey::Node); - - let merged = TypeTable::merge_fields(&[branch1, branch2]); - - assert_eq!(merged.len(), 2); - assert_eq!(merged["a"], MergedField::Optional(TypeKey::String)); - assert_eq!(merged["b"], MergedField::Optional(TypeKey::Node)); -} - -#[test] -fn merge_fields_type_conflict() { - let mut branch1 = IndexMap::new(); - branch1.insert("x", TypeKey::String); - - let mut branch2 = IndexMap::new(); - branch2.insert("x", TypeKey::Node); - - let merged = TypeTable::merge_fields(&[branch1, branch2]); - - assert_eq!(merged.len(), 1); - assert_eq!(merged["x"], MergedField::Conflict); -} - -#[test] -fn merge_fields_partial_conflict() { - // Three branches: x is String in branch 1 and 2, Node in branch 3 - let mut branch1 = IndexMap::new(); - branch1.insert("x", TypeKey::String); - - let mut branch2 = IndexMap::new(); - branch2.insert("x", TypeKey::String); - - let mut branch3 = IndexMap::new(); - branch3.insert("x", TypeKey::Node); - - let merged = TypeTable::merge_fields(&[branch1, branch2, branch3]); - - assert_eq!(merged["x"], MergedField::Conflict); -} - -#[test] -fn merge_fields_complex_scenario() { - // Branch 1: { name: String, value: Node } - // Branch 2: { name: String, extra: Node } - // Result: { name: String, value: Optional, extra: Optional } - let mut branch1 = IndexMap::new(); - branch1.insert("name", TypeKey::String); - branch1.insert("value", TypeKey::Node); - - let mut branch2 = IndexMap::new(); - branch2.insert("name", TypeKey::String); - branch2.insert("extra", TypeKey::Node); - - let merged = TypeTable::merge_fields(&[branch1, branch2]); - - assert_eq!(merged.len(), 3); - assert_eq!(merged["name"], MergedField::Same(TypeKey::String)); - assert_eq!(merged["value"], MergedField::Optional(TypeKey::Node)); - assert_eq!(merged["extra"], MergedField::Optional(TypeKey::Node)); -} - -#[test] -fn merge_fields_preserves_order() { - let mut branch1 = IndexMap::new(); - branch1.insert("z", TypeKey::String); - branch1.insert("a", TypeKey::String); - - let mut branch2 = IndexMap::new(); - branch2.insert("m", TypeKey::String); - - let merged = TypeTable::merge_fields(&[branch1, branch2]); - - let keys: Vec<_> = merged.keys().collect(); - // Order follows first occurrence across branches - assert_eq!(keys, vec![&"z", &"a", &"m"]); -} diff --git a/crates/plotnik-lib/src/infer/tyton.rs b/crates/plotnik-lib/src/infer/tyton.rs deleted file mode 100644 index e3a89364..00000000 --- a/crates/plotnik-lib/src/infer/tyton.rs +++ /dev/null @@ -1,547 +0,0 @@ -//! Tyton: Types Testing Object Notation -//! -//! A compact DSL for constructing `TypeTable` test fixtures. -//! Supports both parsing (text → TypeTable) and emitting (TypeTable → text). -//! -//! # Design -//! -//! Tyton uses a **flattened structure** mirroring `TypeTable`: all types are -//! top-level definitions referenced by name. No inline nesting is supported. -//! -//! ```text -//! // ✗ Invalid: inline optional -//! Foo = { #Node? @maybe } -//! -//! // ✓ Valid: separate definition + reference -//! MaybeNode = #Node? -//! Foo = { MaybeNode @maybe } -//! ``` -//! -//! # Syntax -//! -//! Keys: -//! - `#Node` — built-in node type -//! - `#string` — built-in string type -//! - `#Invalid` — built-in invalid type -//! - `#DefaultQuery` — unnamed entry point query -//! - `()` — built-in unit type -//! - `PascalName` — named type -//! - `` — synthetic key from path segments -//! -//! Values: -//! - `{ Type @field ... }` — struct with fields -//! - `[ Tag: Type ... ]` — tagged union -//! - `Key?` — optional wrapper -//! - `Key*` — list wrapper -//! - `Key+` — non-empty list wrapper -//! - `#Node` / `#string` / `()` — bare builtin alias -//! -//! Definitions: -//! - `Name = { ... }` — define a struct -//! - `Name = [ ... ]` — define a tagged union -//! - `Name = Other?` — define an optional -//! - ` = { ... }` — define with synthetic key -//! - `#DefaultQuery = { ... }` — define unnamed entry point -//! - `AliasNode = #Node` — alias to builtin -//! -//! # Example -//! -//! ```text -//! FuncInfo = { #string @name #Node @body } -//! Stmt = [ Assign: AssignStmt Call: CallStmt ] -//! Stmts = Stmt* -//! ``` - -use std::fmt::Write; - -use indexmap::IndexMap; -use logos::Logos; - -use super::{TypeKey, TypeTable, TypeValue}; - -#[derive(Logos, Debug, Clone, PartialEq)] -#[logos(skip r"[ \t\n\r]+")] -enum Token<'src> { - // Built-in type keywords (prefixed with #) - #[token("#Node")] - Node, - - #[token("#string")] - String, - - #[token("#Invalid")] - Invalid, - - #[token("#DefaultQuery")] - DefaultQuery, - - #[token("()")] - Unit, - - // Symbols - #[token("=")] - Eq, - - #[token("{")] - LBrace, - - #[token("}")] - RBrace, - - #[token("[")] - LBracket, - - #[token("]")] - RBracket, - - #[token("<")] - LAngle, - - #[token(">")] - RAngle, - - #[token(":")] - Colon, - - #[token("@")] - At, - - #[token("?")] - Question, - - #[token("*")] - Star, - - #[token("+")] - Plus, - - // Identifiers: PascalCase for type names, snake_case for fields/segments - #[regex(r"[A-Z][a-zA-Z0-9]*", |lex| lex.slice())] - UpperIdent(&'src str), - - #[regex(r"[a-z][a-z0-9_]*", |lex| lex.slice())] - LowerIdent(&'src str), -} - -struct Parser<'src> { - tokens: Vec<(Token<'src>, std::ops::Range)>, - pos: usize, - input: &'src str, -} - -#[derive(Debug)] -pub struct ParseError { - pub message: String, - pub span: std::ops::Range, -} - -impl std::fmt::Display for ParseError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{} at {:?}", self.message, self.span) - } -} - -impl std::error::Error for ParseError {} - -impl<'src> Parser<'src> { - fn new(input: &'src str) -> Result { - let lexer = Token::lexer(input); - let mut tokens = Vec::new(); - - for (result, span) in lexer.spanned() { - match result { - Ok(token) => tokens.push((token, span)), - Err(_) => { - return Err(ParseError { - message: format!("unexpected character: {:?}", &input[span.clone()]), - span, - }); - } - } - } - - Ok(Self { - tokens, - pos: 0, - input, - }) - } - - fn peek(&self) -> Option<&Token<'src>> { - self.tokens.get(self.pos).map(|(t, _)| t) - } - - fn advance(&mut self) -> Option<&Token<'src>> { - let token = self.tokens.get(self.pos).map(|(t, _)| t); - if token.is_some() { - self.pos += 1; - } - token - } - - fn current_span(&self) -> std::ops::Range { - self.tokens - .get(self.pos) - .map(|(_, s)| s.clone()) - .unwrap_or(self.input.len()..self.input.len()) - } - - fn expect(&mut self, expected: Token<'src>) -> Result<(), ParseError> { - let span = self.current_span(); - match self.advance() { - Some(t) if std::mem::discriminant(t) == std::mem::discriminant(&expected) => Ok(()), - Some(t) => Err(ParseError { - message: format!("expected {:?}, got {:?}", expected, t), - span, - }), - None => Err(ParseError { - message: format!("expected {:?}, got EOF", expected), - span, - }), - } - } - - fn parse_type_key(&mut self) -> Result, ParseError> { - let span = self.current_span(); - match self.peek() { - Some(Token::Node) => { - self.advance(); - Ok(TypeKey::Node) - } - Some(Token::String) => { - self.advance(); - Ok(TypeKey::String) - } - Some(Token::Invalid) => { - self.advance(); - Ok(TypeKey::Invalid) - } - Some(Token::DefaultQuery) => { - self.advance(); - Ok(TypeKey::DefaultQuery) - } - Some(Token::Unit) => { - self.advance(); - Ok(TypeKey::Unit) - } - Some(Token::UpperIdent(name)) => { - let name = *name; - self.advance(); - Ok(TypeKey::Named(name)) - } - Some(Token::LAngle) => self.parse_synthetic_key(), - _ => Err(ParseError { - message: "expected type key".to_string(), - span, - }), - } - } - - fn parse_synthetic_key(&mut self) -> Result, ParseError> { - self.expect(Token::LAngle)?; - let mut segments = Vec::new(); - - loop { - let span = self.current_span(); - match self.peek() { - Some(Token::RAngle) => { - self.advance(); - break; - } - Some(Token::UpperIdent(s)) => { - let s = *s; - self.advance(); - segments.push(s); - } - Some(Token::LowerIdent(s)) => { - let s = *s; - self.advance(); - segments.push(s); - } - _ => { - return Err(ParseError { - message: "expected identifier or '>'".to_string(), - span, - }); - } - } - } - - if segments.is_empty() { - return Err(ParseError { - message: "synthetic key cannot be empty".to_string(), - span: self.current_span(), - }); - } - - Ok(TypeKey::Synthetic(segments)) - } - - fn parse_type_value(&mut self) -> Result, ParseError> { - let span = self.current_span(); - match self.peek() { - Some(Token::LBrace) => self.parse_struct(), - Some(Token::LBracket) => self.parse_tagged_union(), - Some(Token::Node) => { - self.advance(); - self.parse_wrapper_or_bare(TypeKey::Node, TypeValue::Node) - } - Some(Token::String) => { - self.advance(); - self.parse_wrapper_or_bare(TypeKey::String, TypeValue::String) - } - Some(Token::Invalid) => { - self.advance(); - self.parse_wrapper_or_bare(TypeKey::Invalid, TypeValue::Invalid) - } - Some(Token::Unit) => { - self.advance(); - self.parse_wrapper_or_bare(TypeKey::Unit, TypeValue::Unit) - } - Some(Token::UpperIdent(_)) | Some(Token::LAngle) => { - let key = self.parse_type_key()?; - self.parse_wrapper(key) - } - _ => Err(ParseError { - message: "expected type value".to_string(), - span, - }), - } - } - - fn parse_wrapper_or_bare( - &mut self, - key: TypeKey<'src>, - bare: TypeValue<'src>, - ) -> Result, ParseError> { - match self.peek() { - Some(Token::Question) => { - self.advance(); - Ok(TypeValue::Optional(key)) - } - Some(Token::Star) => { - self.advance(); - Ok(TypeValue::List(key)) - } - Some(Token::Plus) => { - self.advance(); - Ok(TypeValue::NonEmptyList(key)) - } - _ => Ok(bare), - } - } - - fn parse_struct(&mut self) -> Result, ParseError> { - self.expect(Token::LBrace)?; - let mut fields = IndexMap::new(); - - loop { - if matches!(self.peek(), Some(Token::RBrace)) { - self.advance(); - break; - } - - let type_key = self.parse_type_key()?; - self.expect(Token::At)?; - - let span = self.current_span(); - let field_name = match self.advance() { - Some(Token::LowerIdent(name)) => *name, - _ => { - return Err(ParseError { - message: "expected field name (lowercase)".to_string(), - span, - }); - } - }; - - fields.insert(field_name, type_key); - } - - Ok(TypeValue::Struct(fields)) - } - - fn parse_tagged_union(&mut self) -> Result, ParseError> { - self.expect(Token::LBracket)?; - let mut variants = IndexMap::new(); - - loop { - if matches!(self.peek(), Some(Token::RBracket)) { - self.advance(); - break; - } - - let span = self.current_span(); - let tag = match self.advance() { - Some(Token::UpperIdent(name)) => *name, - _ => { - return Err(ParseError { - message: "expected variant tag (uppercase)".to_string(), - span, - }); - } - }; - - self.expect(Token::Colon)?; - let type_key = self.parse_type_key()?; - variants.insert(tag, type_key); - } - - Ok(TypeValue::TaggedUnion(variants)) - } - - fn parse_wrapper(&mut self, inner: TypeKey<'src>) -> Result, ParseError> { - match self.peek() { - Some(Token::Question) => { - self.advance(); - Ok(TypeValue::Optional(inner)) - } - Some(Token::Star) => { - self.advance(); - Ok(TypeValue::List(inner)) - } - Some(Token::Plus) => { - self.advance(); - Ok(TypeValue::NonEmptyList(inner)) - } - _ => Err(ParseError { - message: "expected quantifier (?, *, +) after type key".to_string(), - span: self.current_span(), - }), - } - } - - fn parse_definition(&mut self) -> Result<(TypeKey<'src>, TypeValue<'src>), ParseError> { - let span = self.current_span(); - let key = match self.peek() { - Some(Token::UpperIdent(name)) => { - let name = *name; - self.advance(); - TypeKey::Named(name) - } - Some(Token::DefaultQuery) => { - self.advance(); - TypeKey::DefaultQuery - } - Some(Token::LAngle) => self.parse_synthetic_key()?, - _ => { - return Err(ParseError { - message: "expected type name (uppercase) or synthetic key".to_string(), - span, - }); - } - }; - - self.expect(Token::Eq)?; - let value = self.parse_type_value()?; - - Ok((key, value)) - } - - fn parse_all(&mut self) -> Result, ParseError> { - let mut table = TypeTable::new(); - - while self.peek().is_some() { - let (key, value) = self.parse_definition()?; - table.insert(key, value); - } - - Ok(table) - } -} - -/// Parse tyton notation into a TypeTable. -pub fn parse(input: &str) -> Result, ParseError> { - let mut parser = Parser::new(input)?; - parser.parse_all() -} - -/// Emit TypeTable as tyton notation. -pub fn emit(table: &TypeTable<'_>) -> String { - let mut out = String::new(); - - for (key, value) in table.iter() { - if is_builtin(key) { - continue; - } - if !out.is_empty() { - out.push('\n'); - } - emit_key(&mut out, key); - out.push_str(" = "); - emit_value(&mut out, value); - } - - out -} - -fn is_builtin(key: &TypeKey<'_>) -> bool { - matches!( - key, - TypeKey::Node | TypeKey::String | TypeKey::Unit | TypeKey::Invalid - ) -} - -fn emit_key(out: &mut String, key: &TypeKey<'_>) { - match key { - TypeKey::Node => out.push_str("#Node"), - TypeKey::String => out.push_str("#string"), - TypeKey::Invalid => out.push_str("#Invalid"), - TypeKey::Unit => out.push_str("()"), - TypeKey::DefaultQuery => out.push_str("#DefaultQuery"), - TypeKey::Named(name) => out.push_str(name), - TypeKey::Synthetic(segments) => { - out.push('<'); - for (i, seg) in segments.iter().enumerate() { - if i > 0 { - out.push(' '); - } - out.push_str(seg); - } - out.push('>'); - } - } -} - -fn emit_value(out: &mut String, value: &TypeValue<'_>) { - match value { - TypeValue::Node => out.push_str("#Node"), - TypeValue::String => out.push_str("#string"), - TypeValue::Invalid => out.push_str("#Invalid"), - TypeValue::Unit => out.push_str("()"), - TypeValue::Struct(fields) => { - out.push_str("{ "); - for (i, (field, key)) in fields.iter().enumerate() { - if i > 0 { - out.push(' '); - } - emit_key(out, key); - write!(out, " @{}", field).unwrap(); - } - out.push_str(" }"); - } - TypeValue::TaggedUnion(variants) => { - out.push_str("[ "); - for (i, (tag, key)) in variants.iter().enumerate() { - if i > 0 { - out.push(' '); - } - write!(out, "{}: ", tag).unwrap(); - emit_key(out, key); - } - out.push_str(" ]"); - } - TypeValue::Optional(key) => { - emit_key(out, key); - out.push('?'); - } - TypeValue::List(key) => { - emit_key(out, key); - out.push('*'); - } - TypeValue::NonEmptyList(key) => { - emit_key(out, key); - out.push('+'); - } - } -} diff --git a/crates/plotnik-lib/src/infer/tyton_tests.rs b/crates/plotnik-lib/src/infer/tyton_tests.rs deleted file mode 100644 index c948f295..00000000 --- a/crates/plotnik-lib/src/infer/tyton_tests.rs +++ /dev/null @@ -1,599 +0,0 @@ -use super::tyton::{emit, parse}; -use indoc::indoc; - -fn dump_table(input: &str) -> String { - match parse(input) { - Ok(table) => { - let mut out = String::new(); - for (key, value) in table.iter() { - out.push_str(&format!("{:?} = {:?}\n", key, value)); - } - out - } - Err(e) => format!("ERROR: {}", e), - } -} - -#[test] -fn parse_empty() { - insta::assert_snapshot!(dump_table(""), @r" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - "); -} - -#[test] -fn parse_struct_simple() { - let input = "Foo = { #Node @name }"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Foo") = Struct({"name": Node}) - "#); -} - -#[test] -fn parse_struct_multiple_fields() { - let input = "Func = { #string @name #Node @body #Node @params }"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Func") = Struct({"name": String, "body": Node, "params": Node}) - "#); -} - -#[test] -fn parse_struct_empty() { - let input = "Empty = {}"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Empty") = Struct({}) - "#); -} - -#[test] -fn parse_struct_with_unit() { - let input = "Wrapper = { () @unit }"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Wrapper") = Struct({"unit": Unit}) - "#); -} - -#[test] -fn parse_tagged_union() { - let input = "Stmt = [ Assign: AssignStmt Call: CallStmt ]"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Stmt") = TaggedUnion({"Assign": Named("AssignStmt"), "Call": Named("CallStmt")}) - "#); -} - -#[test] -fn parse_tagged_union_single() { - let input = "Single = [ Only: OnlyVariant ]"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Single") = TaggedUnion({"Only": Named("OnlyVariant")}) - "#); -} - -#[test] -fn parse_tagged_union_with_builtins() { - let input = "Mixed = [ Text: #string Code: #Node Empty: () ]"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Mixed") = TaggedUnion({"Text": String, "Code": Node, "Empty": Unit}) - "#); -} - -#[test] -fn parse_optional() { - let input = "MaybeNode = #Node?"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("MaybeNode") = Optional(Node) - "#); -} - -#[test] -fn parse_list() { - let input = "Nodes = #Node*"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Nodes") = List(Node) - "#); -} - -#[test] -fn parse_non_empty_list() { - let input = "Nodes = #Node+"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Nodes") = NonEmptyList(Node) - "#); -} - -#[test] -fn parse_optional_named() { - let input = "MaybeStmt = Stmt?"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("MaybeStmt") = Optional(Named("Stmt")) - "#); -} - -#[test] -fn parse_list_named() { - let input = "Stmts = Stmt*"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Stmts") = List(Named("Stmt")) - "#); -} - -#[test] -fn parse_synthetic_key_simple() { - let input = "Wrapper = ?"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Wrapper") = Optional(Synthetic(["Foo", "bar"])) - "#); -} - -#[test] -fn parse_synthetic_key_multiple_segments() { - let input = "Wrapper = *"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Wrapper") = List(Synthetic(["Foo", "bar", "baz"])) - "#); -} - -#[test] -fn parse_struct_with_synthetic() { - let input = "Container = { @inner }"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Container") = Struct({"inner": Synthetic(["Inner", "field"])}) - "#); -} - -#[test] -fn parse_union_with_synthetic() { - let input = "Choice = [ First: Second: ]"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Choice") = TaggedUnion({"First": Synthetic(["Choice", "first"]), "Second": Synthetic(["Choice", "second"])}) - "#); -} - -#[test] -fn parse_multiple_definitions() { - let input = indoc! {r#" - AssignStmt = { #Node @target #Node @value } - CallStmt = { #Node @func #Node @args } - Stmt = [ Assign: AssignStmt Call: CallStmt ] - Stmts = Stmt* - "#}; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("AssignStmt") = Struct({"target": Node, "value": Node}) - Named("CallStmt") = Struct({"func": Node, "args": Node}) - Named("Stmt") = TaggedUnion({"Assign": Named("AssignStmt"), "Call": Named("CallStmt")}) - Named("Stmts") = List(Named("Stmt")) - "#); -} - -#[test] -fn parse_complex_example() { - let input = indoc! {r#" - FuncInfo = { #string @name #Node @body } - Param = { #string @name #string @type_annotation } - Params = Param* - FuncDecl = { FuncInfo @info Params @params } - Stmt = [ Func: FuncDecl Expr: #Node ] - MaybeStmt = Stmt? - Program = { Stmt @statements } - "#}; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("FuncInfo") = Struct({"name": String, "body": Node}) - Named("Param") = Struct({"name": String, "type_annotation": String}) - Named("Params") = List(Named("Param")) - Named("FuncDecl") = Struct({"info": Named("FuncInfo"), "params": Named("Params")}) - Named("Stmt") = TaggedUnion({"Func": Named("FuncDecl"), "Expr": Node}) - Named("MaybeStmt") = Optional(Named("Stmt")) - Named("Program") = Struct({"statements": Named("Stmt")}) - "#); -} - -#[test] -fn parse_all_builtins() { - let input = indoc! {r#" - AllBuiltins = { #Node @node #string @str () @unit } - OptNode = #Node? - ListStr = #string* - NonEmptyUnit = ()+ - "#}; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("AllBuiltins") = Struct({"node": Node, "str": String, "unit": Unit}) - Named("OptNode") = Optional(Node) - Named("ListStr") = List(String) - Named("NonEmptyUnit") = NonEmptyList(Unit) - "#); -} - -#[test] -fn parse_invalid_builtin() { - let input = "HasInvalid = { #Invalid @bad }"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("HasInvalid") = Struct({"bad": Invalid}) - "#); -} - -#[test] -fn parse_invalid_wrapper() { - let input = "MaybeInvalid = #Invalid?"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("MaybeInvalid") = Optional(Invalid) - "#); -} - -#[test] -fn error_missing_eq() { - let input = "Foo { #Node @x }"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected Eq, got LBrace at 4..5"); -} - -#[test] -fn error_missing_at() { - let input = "Foo = { #Node name }"; - insta::assert_snapshot!(dump_table(input), @r#"ERROR: expected At, got LowerIdent("name") at 14..18"#); -} - -#[test] -fn error_missing_colon_in_union() { - let input = "Foo = [ A B ]"; - insta::assert_snapshot!(dump_table(input), @r#"ERROR: expected Colon, got UpperIdent("B") at 10..11"#); -} - -#[test] -fn error_empty_synthetic() { - let input = "Foo = <>?"; - insta::assert_snapshot!(dump_table(input), @"ERROR: synthetic key cannot be empty at 8..9"); -} - -#[test] -fn error_unclosed_brace() { - let input = "Foo = { #Node @x"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected type key at 16..16"); -} - -#[test] -fn error_unclosed_bracket() { - let input = "Foo = [ A: B"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected variant tag (uppercase) at 12..12"); -} - -#[test] -fn error_lowercase_type_name() { - let input = "foo = { #Node @x }"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected type name (uppercase) or synthetic key at 0..3"); -} - -#[test] -fn error_uppercase_field_name() { - let input = "Foo = { #Node @Name }"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected field name (lowercase) at 15..19"); -} - -#[test] -fn parse_bare_builtin_alias_node() { - let input = "AliasNode = #Node"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("AliasNode") = Node - "#); -} - -#[test] -fn parse_bare_builtin_alias_string() { - let input = "AliasString = #string"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("AliasString") = String - "#); -} - -#[test] -fn parse_bare_builtin_alias_unit() { - let input = "AliasUnit = ()"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("AliasUnit") = Unit - "#); -} - -#[test] -fn parse_bare_builtin_alias_invalid() { - let input = "AliasInvalid = #Invalid"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("AliasInvalid") = Invalid - "#); -} - -#[test] -fn parse_synthetic_definition_struct() { - let input = " = { #Node @value }"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Synthetic(["Foo", "bar"]) = Struct({"value": Node}) - "#); -} - -#[test] -fn parse_synthetic_definition_union() { - let input = " = [ A: #Node B: #string ]"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Synthetic(["Choice", "first"]) = TaggedUnion({"A": Node, "B": String}) - "#); -} - -#[test] -fn parse_synthetic_definition_wrapper() { - let input = " = #Node?"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Synthetic(["Inner", "nested"]) = Optional(Node) - "#); -} - -#[test] -fn error_invalid_char() { - let input = "Foo = { #Node @x $ }"; - insta::assert_snapshot!(dump_table(input), @r#"ERROR: unexpected character: "$" at 17..18"#); -} - -#[test] -fn error_eof_in_struct() { - let input = "Foo = { #Node @x"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected type key at 16..16"); -} - -#[test] -fn error_eof_expecting_colon() { - let input = "Foo = [ A"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected Colon, got EOF at 9..9"); -} - -#[test] -fn error_invalid_token_in_synthetic() { - let input = "Foo = ?"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected identifier or '>' at 9..10"); -} - -#[test] -fn error_invalid_type_value() { - let input = "Foo = @bar"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected type value at 6..7"); -} - -#[test] -fn error_unprefixed_node() { - let input = "Foo = { Node @x }"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Foo") = Struct({"x": Named("Node")}) - "#); -} - -#[test] -fn error_unprefixed_string() { - let input = "Foo = string"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected type value at 6..12"); -} - -// === emit tests === - -#[test] -fn emit_empty() { - let table = parse("").unwrap(); - insta::assert_snapshot!(emit(&table), @""); -} - -#[test] -fn emit_struct_simple() { - let table = parse("Foo = { #Node @name }").unwrap(); - insta::assert_snapshot!(emit(&table), @"Foo = { #Node @name }"); -} - -#[test] -fn emit_struct_multiple_fields() { - let table = parse("Func = { #string @name #Node @body #Node @params }").unwrap(); - insta::assert_snapshot!(emit(&table), @"Func = { #string @name #Node @body #Node @params }"); -} - -#[test] -fn emit_struct_empty() { - let table = parse("Empty = {}").unwrap(); - insta::assert_snapshot!(emit(&table), @"Empty = { }"); -} - -#[test] -fn emit_tagged_union() { - let table = parse("Stmt = [ Assign: AssignStmt Call: CallStmt ]").unwrap(); - insta::assert_snapshot!(emit(&table), @"Stmt = [ Assign: AssignStmt Call: CallStmt ]"); -} - -#[test] -fn emit_optional() { - let table = parse("MaybeNode = #Node?").unwrap(); - insta::assert_snapshot!(emit(&table), @"MaybeNode = #Node?"); -} - -#[test] -fn emit_list() { - let table = parse("Nodes = #Node*").unwrap(); - insta::assert_snapshot!(emit(&table), @"Nodes = #Node*"); -} - -#[test] -fn emit_non_empty_list() { - let table = parse("Nodes = #Node+").unwrap(); - insta::assert_snapshot!(emit(&table), @"Nodes = #Node+"); -} - -#[test] -fn emit_synthetic_key() { - let table = parse(" = { #Node @value }").unwrap(); - insta::assert_snapshot!(emit(&table), @" = { #Node @value }"); -} - -#[test] -fn emit_synthetic_in_wrapper() { - let table = parse("Wrapper = ?").unwrap(); - insta::assert_snapshot!(emit(&table), @"Wrapper = ?"); -} - -#[test] -fn emit_bare_builtins() { - let input = indoc! {r#" - AliasNode = #Node - AliasString = #string - AliasUnit = () - "#}; - let table = parse(input).unwrap(); - insta::assert_snapshot!(emit(&table), @r" - AliasNode = #Node - AliasString = #string - AliasUnit = () - "); -} - -#[test] -fn emit_multiple_definitions() { - let input = indoc! {r#" - AssignStmt = { #Node @target #Node @value } - CallStmt = { #Node @func #Node @args } - Stmt = [ Assign: AssignStmt Call: CallStmt ] - Stmts = Stmt* - "#}; - let table = parse(input).unwrap(); - insta::assert_snapshot!(emit(&table), @r" - AssignStmt = { #Node @target #Node @value } - CallStmt = { #Node @func #Node @args } - Stmt = [ Assign: AssignStmt Call: CallStmt ] - Stmts = Stmt* - "); -} - -#[test] -fn emit_roundtrip() { - let input = indoc! {r#" - FuncInfo = { #string @name #Node @body } - Param = { #string @name #string @type_annotation } - Params = Param* - FuncDecl = { FuncInfo @info Params @params } - Stmt = [ Func: FuncDecl Expr: #Node ] - MaybeStmt = Stmt? - "#}; - - let table1 = parse(input).unwrap(); - let emitted = emit(&table1); - let table2 = parse(&emitted).unwrap(); - - assert_eq!(table1.types, table2.types); -} diff --git a/crates/plotnik-lib/src/lib.rs b/crates/plotnik-lib/src/lib.rs index 31a0f89c..0e075319 100644 --- a/crates/plotnik-lib/src/lib.rs +++ b/crates/plotnik-lib/src/lib.rs @@ -18,7 +18,6 @@ pub mod diagnostics; pub mod graph; -pub mod infer; pub mod ir; pub mod parser; pub mod query; From 5e4d6a43f3abe35530cb40507144c3d049e0eb93 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 13:21:03 -0300 Subject: [PATCH 03/23] Add typing module to graph module --- crates/plotnik-lib/src/graph/mod.rs | 4 + crates/plotnik-lib/src/graph/typing.rs | 471 +++++++++++++++++++ crates/plotnik-lib/src/graph/typing_tests.rs | 289 ++++++++++++ 3 files changed, 764 insertions(+) create mode 100644 crates/plotnik-lib/src/graph/typing.rs create mode 100644 crates/plotnik-lib/src/graph/typing_tests.rs diff --git a/crates/plotnik-lib/src/graph/mod.rs b/crates/plotnik-lib/src/graph/mod.rs index 8676f86d..764a99af 100644 --- a/crates/plotnik-lib/src/graph/mod.rs +++ b/crates/plotnik-lib/src/graph/mod.rs @@ -17,6 +17,7 @@ mod analysis; mod build; mod construct; mod optimize; +mod typing; #[cfg(test)] mod analysis_tests; @@ -26,8 +27,11 @@ mod build_tests; mod construct_tests; #[cfg(test)] mod optimize_tests; +#[cfg(test)] +mod typing_tests; pub use analysis::{AnalysisResult, StringInterner, analyze}; pub use build::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; pub use construct::{GraphConstructor, construct_graph}; pub use optimize::{OptimizeStats, eliminate_epsilons}; +pub use typing::{InferredMember, InferredTypeDef, TypeInferenceResult, dump_types, infer_types}; diff --git a/crates/plotnik-lib/src/graph/typing.rs b/crates/plotnik-lib/src/graph/typing.rs new file mode 100644 index 00000000..95733729 --- /dev/null +++ b/crates/plotnik-lib/src/graph/typing.rs @@ -0,0 +1,471 @@ +//! Type inference for BuildGraph. +//! +//! This module analyzes a BuildGraph and infers the output type structure +//! for each definition. The inference follows rules from ADR-0007 and ADR-0009. +//! +//! # Algorithm Overview +//! +//! 1. Walk graph from each definition entry point +//! 2. Track "pending value" - the captured value waiting for a Field assignment +//! 3. When Field(name) is encountered, record the pending value as a field +//! 4. Handle branching by merging field sets from all branches +//! 5. Handle quantifiers via array cardinality markers + +use super::{BuildEffect, BuildGraph, NodeId}; +use crate::ir::{TYPE_NODE, TYPE_STR, TYPE_VOID}; +use crate::ir::{TypeId, TypeKind}; +use indexmap::IndexMap; +use std::collections::HashSet; + +/// Result of type inference on a BuildGraph. +#[derive(Debug)] +pub struct TypeInferenceResult<'src> { + /// All inferred type definitions (composite types only). + pub type_defs: Vec>, + /// Mapping from definition name to its result TypeId. + pub entrypoint_types: IndexMap<&'src str, TypeId>, +} + +/// An inferred type definition (before emission). +#[derive(Debug, Clone)] +pub struct InferredTypeDef<'src> { + pub kind: TypeKind, + pub name: Option<&'src str>, + /// For Record/Enum: fields or variants. For wrappers: empty. + pub members: Vec>, + /// For wrapper types: the inner TypeId. + pub inner_type: Option, +} + +/// A field (for Record) or variant (for Enum). +#[derive(Debug, Clone)] +pub struct InferredMember<'src> { + pub name: &'src str, + pub ty: TypeId, +} + +/// Cardinality of a capture. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Cardinality { + One, + Optional, + Star, + Plus, +} + +impl Cardinality { + /// Join cardinalities (for alternation branches). + fn join(self, other: Cardinality) -> Cardinality { + use Cardinality::*; + match (self, other) { + (One, One) => One, + (One, Optional) | (Optional, One) | (Optional, Optional) => Optional, + (Plus, Plus) => Plus, + (One, Plus) | (Plus, One) => Plus, + _ => Star, + } + } + + /// Make optional (for fields missing in some alternation branches). + fn make_optional(self) -> Cardinality { + use Cardinality::*; + match self { + One => Optional, + Plus => Star, + x => x, + } + } +} + +/// Inferred field information collected during traversal. +#[derive(Debug, Clone)] +struct FieldInfo { + base_type: TypeId, + cardinality: Cardinality, + /// Number of branches this field appears in (for optional detection). + branch_count: usize, +} + +/// Collected scope information from traversal. +#[derive(Debug, Clone, Default)] +struct ScopeInfo<'src> { + fields: IndexMap<&'src str, FieldInfo>, + /// Variants for tagged alternations. + variants: IndexMap<&'src str, ScopeInfo<'src>>, + /// Whether we've seen variant markers (StartVariant/EndVariant). + has_variants: bool, +} + +impl<'src> ScopeInfo<'src> { + fn add_field(&mut self, name: &'src str, base_type: TypeId, cardinality: Cardinality) { + if let Some(existing) = self.fields.get_mut(name) { + existing.cardinality = existing.cardinality.join(cardinality); + existing.branch_count += 1; + } else { + self.fields.insert( + name, + FieldInfo { + base_type, + cardinality, + branch_count: 1, + }, + ); + } + } + + fn merge_from(&mut self, other: ScopeInfo<'src>, total_branches: usize) { + for (name, info) in other.fields { + if let Some(existing) = self.fields.get_mut(name) { + existing.cardinality = existing.cardinality.join(info.cardinality); + existing.branch_count += info.branch_count; + } else { + self.fields.insert(name, info); + } + } + + // Merge variants - don't overwrite, merge fields into existing + for (tag, variant_info) in other.variants { + if let Some(existing) = self.variants.get_mut(tag) { + // Merge fields from child scope into existing variant + for (name, info) in variant_info.fields { + if let Some(existing_field) = existing.fields.get_mut(name) { + existing_field.cardinality = + existing_field.cardinality.join(info.cardinality); + existing_field.branch_count += info.branch_count; + } else { + existing.fields.insert(name, info); + } + } + } else { + self.variants.insert(tag, variant_info); + } + self.has_variants = true; + } + + // Mark fields as optional if they don't appear in all branches + for info in self.fields.values_mut() { + if info.branch_count < total_branches { + info.cardinality = info.cardinality.make_optional(); + } + } + } +} + +/// State during graph traversal. +#[derive(Debug, Clone, Copy)] +struct TraversalState<'src> { + /// The type of the current pending value (after CaptureNode). + pending_type: Option, + /// Current cardinality wrapper (from array effects). + cardinality: Cardinality, + /// Current variant tag (inside StartVariant..EndVariant). + current_variant: Option<&'src str>, + /// Depth counter for nested objects. + object_depth: u32, +} + +impl Default for TraversalState<'_> { + fn default() -> Self { + Self { + pending_type: None, + cardinality: Cardinality::One, + current_variant: None, + object_depth: 0, + } + } +} + +/// Context for type inference. +struct InferenceContext<'src, 'g> { + graph: &'g BuildGraph<'src>, + dead_nodes: &'g HashSet, + type_defs: Vec>, + next_type_id: TypeId, +} + +impl<'src, 'g> InferenceContext<'src, 'g> { + fn new(graph: &'g BuildGraph<'src>, dead_nodes: &'g HashSet) -> Self { + Self { + graph, + dead_nodes, + type_defs: Vec::new(), + next_type_id: 3, // TYPE_COMPOSITE_START + } + } + + fn alloc_type_id(&mut self) -> TypeId { + let id = self.next_type_id; + self.next_type_id += 1; + id + } + + fn infer_definition(&mut self, def_name: &'src str, entry_id: NodeId) -> TypeId { + let mut visited = HashSet::new(); + let scope = self.traverse(entry_id, TraversalState::default(), &mut visited, 0); + + if scope.has_variants && !scope.variants.is_empty() { + self.create_enum_type(def_name, &scope) + } else if !scope.fields.is_empty() { + self.create_struct_type(def_name, &scope) + } else { + TYPE_VOID + } + } + + fn traverse( + &mut self, + node_id: NodeId, + mut state: TraversalState<'src>, + visited: &mut HashSet, + depth: usize, + ) -> ScopeInfo<'src> { + if self.dead_nodes.contains(&node_id) || depth > 200 { + return ScopeInfo::default(); + } + + // Cycle detection - but allow revisiting at different depths for quantifiers + if !visited.insert(node_id) && depth > 50 { + return ScopeInfo::default(); + } + + let node = self.graph.node(node_id); + let mut scope = ScopeInfo::default(); + + // Process effects on this node + for effect in &node.effects { + match effect { + BuildEffect::CaptureNode => { + state.pending_type = Some(TYPE_NODE); + } + BuildEffect::ToString => { + state.pending_type = Some(TYPE_STR); + } + BuildEffect::Field(name) => { + if let Some(base_type) = state.pending_type.take() { + if let Some(tag) = state.current_variant { + // Inside a variant - add to variant scope + let variant_scope = scope.variants.entry(tag).or_default(); + variant_scope.add_field(*name, base_type, state.cardinality); + } else { + scope.add_field(*name, base_type, state.cardinality); + } + } + state.cardinality = Cardinality::One; + } + BuildEffect::StartArray => { + // Mark that we're collecting into an array + } + BuildEffect::PushElement => { + // Element pushed to array + } + BuildEffect::EndArray => { + state.cardinality = Cardinality::Star; + } + BuildEffect::StartObject => { + state.object_depth += 1; + } + BuildEffect::EndObject => { + state.object_depth = state.object_depth.saturating_sub(1); + } + BuildEffect::StartVariant(tag) => { + state.current_variant = Some(*tag); + scope.has_variants = true; + } + BuildEffect::EndVariant => { + if let Some(tag) = state.current_variant.take() { + // Ensure variant exists even if empty + scope.variants.entry(tag).or_default(); + } + } + } + } + + // Process successors + let live_successors: Vec<_> = node + .successors + .iter() + .filter(|s| !self.dead_nodes.contains(s)) + .copied() + .collect(); + + if live_successors.is_empty() { + // Terminal node + } else if live_successors.len() == 1 { + // Linear path - continue with same state + let child_scope = self.traverse(live_successors[0], state, visited, depth + 1); + scope.merge_from(child_scope, 1); + } else { + // Branching - traverse each branch and merge results + let total_branches = live_successors.len(); + for succ in live_successors { + let child_scope = self.traverse(succ, state.clone(), visited, depth + 1); + scope.merge_from(child_scope, total_branches); + } + } + + scope + } + + fn create_struct_type(&mut self, name: &'src str, scope: &ScopeInfo<'src>) -> TypeId { + // Create members first - this may allocate wrapper types + let members: Vec<_> = scope + .fields + .iter() + .map(|(field_name, info)| { + let member_type = self.wrap_with_cardinality(info.base_type, info.cardinality); + InferredMember { + name: field_name, + ty: member_type, + } + }) + .collect(); + + // Now allocate struct type_id - this ensures proper ordering + let type_id = self.alloc_type_id(); + + self.type_defs.push(InferredTypeDef { + kind: TypeKind::Record, + name: Some(name), + members, + inner_type: None, + }); + + type_id + } + + fn create_enum_type(&mut self, name: &'src str, scope: &ScopeInfo<'src>) -> TypeId { + // Create variant payloads first - this may allocate nested types + let mut members = Vec::new(); + for (tag, variant_scope) in &scope.variants { + let variant_type = if variant_scope.fields.is_empty() { + TYPE_VOID + } else { + // Create synthetic name for variant payload + let variant_name = format!("{}{}", name, tag); + let leaked: &'src str = Box::leak(variant_name.into_boxed_str()); + self.create_struct_type(leaked, variant_scope) + }; + members.push(InferredMember { + name: tag, + ty: variant_type, + }); + } + + // Now allocate enum type_id - this ensures proper ordering + let type_id = self.alloc_type_id(); + + self.type_defs.push(InferredTypeDef { + kind: TypeKind::Enum, + name: Some(name), + members, + inner_type: None, + }); + + type_id + } + + fn wrap_with_cardinality(&mut self, base: TypeId, card: Cardinality) -> TypeId { + match card { + Cardinality::One => base, + Cardinality::Optional => { + let type_id = self.alloc_type_id(); + self.type_defs.push(InferredTypeDef { + kind: TypeKind::Optional, + name: None, + members: Vec::new(), + inner_type: Some(base), + }); + type_id + } + Cardinality::Star => { + let type_id = self.alloc_type_id(); + self.type_defs.push(InferredTypeDef { + kind: TypeKind::ArrayStar, + name: None, + members: Vec::new(), + inner_type: Some(base), + }); + type_id + } + Cardinality::Plus => { + let type_id = self.alloc_type_id(); + self.type_defs.push(InferredTypeDef { + kind: TypeKind::ArrayPlus, + name: None, + members: Vec::new(), + inner_type: Some(base), + }); + type_id + } + } + } +} + +/// Infer types for all definitions in a BuildGraph. +pub fn infer_types<'src>( + graph: &BuildGraph<'src>, + dead_nodes: &HashSet, +) -> TypeInferenceResult<'src> { + let mut ctx = InferenceContext::new(graph, dead_nodes); + let mut entrypoint_types = IndexMap::new(); + + for (name, entry_id) in graph.definitions() { + let type_id = ctx.infer_definition(name, entry_id); + entrypoint_types.insert(name, type_id); + } + + TypeInferenceResult { + type_defs: ctx.type_defs, + entrypoint_types, + } +} + +/// Format inferred types for debugging/testing. +pub fn dump_types(result: &TypeInferenceResult) -> String { + let mut out = String::new(); + + out.push_str("=== Entrypoints ===\n"); + for (name, type_id) in &result.entrypoint_types { + out.push_str(&format!("{} → {}\n", name, format_type_id(*type_id))); + } + + if !result.type_defs.is_empty() { + out.push_str("\n=== Types ===\n"); + for (idx, def) in result.type_defs.iter().enumerate() { + let type_id = idx as TypeId + 3; + let name = def.name.unwrap_or(""); + out.push_str(&format!("T{}: {:?} {}", type_id, def.kind, name)); + + if let Some(inner) = def.inner_type { + out.push_str(&format!(" → {}", format_type_id(inner))); + } + + if !def.members.is_empty() { + out.push_str(" {\n"); + for member in &def.members { + out.push_str(&format!( + " {}: {}\n", + member.name, + format_type_id(member.ty) + )); + } + out.push('}'); + } + out.push('\n'); + } + } + + out +} + +fn format_type_id(id: TypeId) -> String { + if id == TYPE_VOID { + "Void".to_string() + } else if id == TYPE_NODE { + "Node".to_string() + } else if id == TYPE_STR { + "String".to_string() + } else { + format!("T{}", id) + } +} diff --git a/crates/plotnik-lib/src/graph/typing_tests.rs b/crates/plotnik-lib/src/graph/typing_tests.rs new file mode 100644 index 00000000..3aec563c --- /dev/null +++ b/crates/plotnik-lib/src/graph/typing_tests.rs @@ -0,0 +1,289 @@ +//! Tests for type inference. + +use crate::graph::{construct_graph, infer_types}; +use crate::parser::Parser; +use crate::parser::lexer::lex; +use std::collections::HashSet; + +use super::dump_types; + +fn infer(source: &str) -> String { + let tokens = lex(source); + let parser = Parser::new(source, tokens); + let result = parser.parse().expect("parse should succeed"); + let graph = construct_graph(source, &result.root); + let dead_nodes = HashSet::new(); + + let inference = infer_types(&graph, &dead_nodes); + dump_types(&inference) +} + +#[test] +fn simple_capture() { + let result = infer("Foo = (identifier) @name"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + name: Node + } + "); +} + +#[test] +fn capture_with_string_type() { + let result = infer("Foo = (identifier) @name ::string"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + name: String + } + "); +} + +#[test] +fn multiple_captures() { + let result = infer("Foo = (function name: (identifier) @name body: (block) @body)"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + name: Node + body: Node + } + "); +} + +#[test] +fn no_captures() { + let result = infer("Foo = (identifier)"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → Void + "); +} + +#[test] +fn optional_quantifier() { + let result = infer("Foo = (identifier)? @name"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: Optional → Node + T4: Record Foo { + name: T3 + } + "); +} + +#[test] +fn star_quantifier() { + let result = infer("Foo = (identifier)* @names"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: ArrayStar → Node + T4: Record Foo { + names: T3 + } + "); +} + +#[test] +fn plus_quantifier() { + let result = infer("Foo = (identifier)+ @names"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: ArrayStar → Node + T4: Record Foo { + names: T3 + } + "); +} + +#[test] +fn tagged_alternation() { + let result = infer("Foo = [ Ok: (value) @val Err: (error) @err ]"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T5 + + === Types === + T3: Record FooOk { + val: Node + } + T4: Record FooErr { + err: Node + } + T5: Enum Foo { + Ok: T3 + Err: T4 + } + "); +} + +#[test] +fn untagged_alternation_symmetric() { + let result = infer("Foo = [ (a) @x (b) @x ]"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: Optional → Node + T4: Record Foo { + x: T3 + } + "); +} + +#[test] +fn untagged_alternation_asymmetric() { + let result = infer("Foo = [ (a) @x (b) @y ]"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T5 + + === Types === + T3: Optional → Node + T4: Optional → Node + T5: Record Foo { + x: T3 + y: T4 + } + "); +} + +#[test] +fn sequence_capture() { + let result = infer("Foo = { (a) @x (b) @y } @seq"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + x: Node + y: Node + } + "); +} + +#[test] +fn nested_captures() { + let result = infer("Foo = (outer (inner) @inner) @outer"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + inner: Node + } + "); +} + +#[test] +fn multiple_definitions() { + let result = infer( + r#" + Func = (function name: (identifier) @name) + Call = (call function: (identifier) @fn) + "#, + ); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Func → T3 + Call → T4 + + === Types === + T3: Record Func { + name: Node + } + T4: Record Call { + fn: Node + } + "); +} + +/// Documents the graph structure for a captured plus quantifier. +/// Used to understand effect ordering for type inference. +#[test] +fn graph_structure_captured_plus() { + use crate::graph::construct_graph; + use crate::parser::Parser; + use crate::parser::lexer::lex; + + let source = "Foo = (identifier)+ @names"; + let tokens = lex(source); + let parser = Parser::new(source, tokens); + let result = parser.parse().expect("parse should succeed"); + let graph = construct_graph(source, &result.root); + + let mut out = String::new(); + for (id, node) in graph.iter() { + out.push_str(&format!("N{}: ", id)); + for effect in &node.effects { + out.push_str(&format!("{:?} ", effect)); + } + out.push_str(&format!("→ {:?}\n", node.successors)); + } + insta::assert_snapshot!(out, @r#" + N0: CaptureNode → [2] + N1: StartArray → [0] + N2: PushElement → [3] + N3: → [0, 4] + N4: EndArray → [5] + N5: Field("names") → [] + "#); +} + +/// Documents the graph structure for a tagged alternation. +/// Used to understand variant effect ordering for type inference. +#[test] +fn graph_structure_tagged_alternation() { + use crate::graph::construct_graph; + use crate::parser::Parser; + use crate::parser::lexer::lex; + + let source = "Foo = [ Ok: (value) @val Err: (error) @err ]"; + let tokens = lex(source); + let parser = Parser::new(source, tokens); + let result = parser.parse().expect("parse should succeed"); + let graph = construct_graph(source, &result.root); + + let mut out = String::new(); + for (id, node) in graph.iter() { + out.push_str(&format!("N{}: ", id)); + for effect in &node.effects { + out.push_str(&format!("{:?} ", effect)); + } + out.push_str(&format!("→ {:?}\n", node.successors)); + } + insta::assert_snapshot!(out, @r#" + N0: → [2, 6] + N1: → [] + N2: StartVariant("Ok") → [3] + N3: CaptureNode → [4] + N4: Field("val") → [5] + N5: EndVariant → [1] + N6: StartVariant("Err") → [7] + N7: CaptureNode → [8] + N8: Field("err") → [9] + N9: EndVariant → [1] + "#); +} From b30a8ea1e471739f21a037ffba2a267b4fcdc3e7 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 13:29:49 -0300 Subject: [PATCH 04/23] Add type unification and error reporting to type inference --- crates/plotnik-lib/src/graph/mod.rs | 5 +- crates/plotnik-lib/src/graph/typing.rs | 218 +++++++++++++++++-- crates/plotnik-lib/src/graph/typing_tests.rs | 152 ++++++++++++- docs/adr/ADR-0009-type-system.md | 85 +++++++- 4 files changed, 422 insertions(+), 38 deletions(-) diff --git a/crates/plotnik-lib/src/graph/mod.rs b/crates/plotnik-lib/src/graph/mod.rs index 764a99af..6d956b36 100644 --- a/crates/plotnik-lib/src/graph/mod.rs +++ b/crates/plotnik-lib/src/graph/mod.rs @@ -34,4 +34,7 @@ pub use analysis::{AnalysisResult, StringInterner, analyze}; pub use build::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; pub use construct::{GraphConstructor, construct_graph}; pub use optimize::{OptimizeStats, eliminate_epsilons}; -pub use typing::{InferredMember, InferredTypeDef, TypeInferenceResult, dump_types, infer_types}; +pub use typing::{ + InferredMember, InferredTypeDef, TypeDescription, TypeInferenceResult, UnificationError, + dump_types, infer_types, +}; diff --git a/crates/plotnik-lib/src/graph/typing.rs b/crates/plotnik-lib/src/graph/typing.rs index 95733729..462200ba 100644 --- a/crates/plotnik-lib/src/graph/typing.rs +++ b/crates/plotnik-lib/src/graph/typing.rs @@ -8,12 +8,19 @@ //! 1. Walk graph from each definition entry point //! 2. Track "pending value" - the captured value waiting for a Field assignment //! 3. When Field(name) is encountered, record the pending value as a field -//! 4. Handle branching by merging field sets from all branches +//! 4. Handle branching by merging field sets from all branches (1-level merge) //! 5. Handle quantifiers via array cardinality markers +//! +//! # 1-Level Merge Semantics +//! +//! When merging captures across alternation branches: +//! - Top-level fields merge with optionality for asymmetric captures +//! - Base types (Node, String) must match exactly +//! - Nested structs must be structurally identical (not recursively merged) +//! - All incompatibilities are reported, not just the first use super::{BuildEffect, BuildGraph, NodeId}; -use crate::ir::{TYPE_NODE, TYPE_STR, TYPE_VOID}; -use crate::ir::{TypeId, TypeKind}; +use crate::ir::{TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId, TypeKind}; use indexmap::IndexMap; use std::collections::HashSet; @@ -24,6 +31,39 @@ pub struct TypeInferenceResult<'src> { pub type_defs: Vec>, /// Mapping from definition name to its result TypeId. pub entrypoint_types: IndexMap<&'src str, TypeId>, + /// Type unification errors (incompatible types in alternation branches). + pub errors: Vec>, +} + +/// Error when types cannot be unified in alternation branches. +#[derive(Debug, Clone)] +pub struct UnificationError<'src> { + /// The field name where incompatibility was detected. + pub field: &'src str, + /// Definition context where the error occurred. + pub definition: &'src str, + /// Types found across branches (for error message). + pub types_found: Vec, +} + +/// Human-readable type description for error messages. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum TypeDescription { + Node, + String, + Struct(Vec), // field names for identification +} + +impl std::fmt::Display for TypeDescription { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TypeDescription::Node => write!(f, "Node"), + TypeDescription::String => write!(f, "String"), + TypeDescription::Struct(fields) => { + write!(f, "Struct {{ {} }}", fields.join(", ")) + } + } + } } /// An inferred type definition (before emission). @@ -77,19 +117,49 @@ impl Cardinality { } } +/// Type shape for 1-level merge comparison. +/// Tracks enough information to detect incompatibilities. +#[derive(Debug, Clone, PartialEq, Eq)] +#[allow(dead_code)] // Struct variant is infrastructure for captured sequence support +enum TypeShape<'src> { + /// Primitive: Node or String + Primitive(TypeId), + /// Struct with known field names (for structural identity check) + Struct(Vec<&'src str>), +} + +impl<'src> TypeShape<'src> { + fn to_description(&self) -> TypeDescription { + match self { + TypeShape::Primitive(TYPE_NODE) => TypeDescription::Node, + TypeShape::Primitive(TYPE_STR) => TypeDescription::String, + TypeShape::Primitive(_) => TypeDescription::Node, // fallback + TypeShape::Struct(fields) => { + TypeDescription::Struct(fields.iter().map(|s| s.to_string()).collect()) + } + } + } +} + /// Inferred field information collected during traversal. #[derive(Debug, Clone)] -struct FieldInfo { +struct FieldInfo<'src> { + /// The inferred type shape (for compatibility checking). + shape: TypeShape<'src>, + /// Base TypeId (TYPE_NODE or TYPE_STR for primitives, placeholder for structs). base_type: TypeId, + /// Cardinality from quantifiers. cardinality: Cardinality, /// Number of branches this field appears in (for optional detection). branch_count: usize, + /// All shapes seen at this field (for error reporting). + all_shapes: Vec>, } /// Collected scope information from traversal. #[derive(Debug, Clone, Default)] struct ScopeInfo<'src> { - fields: IndexMap<&'src str, FieldInfo>, + fields: IndexMap<&'src str, FieldInfo<'src>>, /// Variants for tagged alternations. variants: IndexMap<&'src str, ScopeInfo<'src>>, /// Whether we've seen variant markers (StartVariant/EndVariant). @@ -98,24 +168,45 @@ struct ScopeInfo<'src> { impl<'src> ScopeInfo<'src> { fn add_field(&mut self, name: &'src str, base_type: TypeId, cardinality: Cardinality) { + let shape = TypeShape::Primitive(base_type); if let Some(existing) = self.fields.get_mut(name) { existing.cardinality = existing.cardinality.join(cardinality); existing.branch_count += 1; + if !existing.all_shapes.contains(&shape) { + existing.all_shapes.push(shape); + } } else { self.fields.insert( name, FieldInfo { + shape: shape.clone(), base_type, cardinality, branch_count: 1, + all_shapes: vec![shape], }, ); } } - fn merge_from(&mut self, other: ScopeInfo<'src>, total_branches: usize) { + /// Merge another scope into this one, applying 1-level merge semantics. + /// Returns errors for incompatible types. + /// Note: Does NOT apply optionality - call `apply_optionality` after all branches merged. + fn merge_from(&mut self, other: ScopeInfo<'src>) -> Vec> { + let mut errors = Vec::new(); + for (name, info) in other.fields { if let Some(existing) = self.fields.get_mut(name) { + // Check type compatibility (1-level merge) + if let Some(err) = check_compatibility(&existing.shape, &info.shape, name) { + errors.push(err); + // Collect all shapes for error reporting + for shape in &info.all_shapes { + if !existing.all_shapes.contains(shape) { + existing.all_shapes.push(shape.clone()); + } + } + } existing.cardinality = existing.cardinality.join(info.cardinality); existing.branch_count += info.branch_count; } else { @@ -123,26 +214,23 @@ impl<'src> ScopeInfo<'src> { } } - // Merge variants - don't overwrite, merge fields into existing + // Merge variants for (tag, variant_info) in other.variants { if let Some(existing) = self.variants.get_mut(tag) { - // Merge fields from child scope into existing variant - for (name, info) in variant_info.fields { - if let Some(existing_field) = existing.fields.get_mut(name) { - existing_field.cardinality = - existing_field.cardinality.join(info.cardinality); - existing_field.branch_count += info.branch_count; - } else { - existing.fields.insert(name, info); - } - } + let variant_errors = existing.merge_from(variant_info); + errors.extend(variant_errors); } else { self.variants.insert(tag, variant_info); } self.has_variants = true; } - // Mark fields as optional if they don't appear in all branches + errors + } + + /// Apply optionality to fields that don't appear in all branches. + /// Must be called after all branches have been merged. + fn apply_optionality(&mut self, total_branches: usize) { for info in self.fields.values_mut() { if info.branch_count < total_branches { info.cardinality = info.cardinality.make_optional(); @@ -151,6 +239,50 @@ impl<'src> ScopeInfo<'src> { } } +/// Internal error during merge (before conversion to UnificationError). +#[derive(Debug)] +struct MergeError<'src> { + field: &'src str, + shapes: Vec>, +} + +/// Check if two type shapes are compatible under 1-level merge semantics. +fn check_compatibility<'src>( + a: &TypeShape<'src>, + b: &TypeShape<'src>, + field: &'src str, +) -> Option> { + match (a, b) { + // Same primitive types are compatible + (TypeShape::Primitive(t1), TypeShape::Primitive(t2)) if t1 == t2 => None, + + // Different primitives (Node vs String) are incompatible + (TypeShape::Primitive(_), TypeShape::Primitive(_)) => Some(MergeError { + field, + shapes: vec![a.clone(), b.clone()], + }), + + // Struct vs Primitive is incompatible + (TypeShape::Struct(_), TypeShape::Primitive(_)) + | (TypeShape::Primitive(_), TypeShape::Struct(_)) => Some(MergeError { + field, + shapes: vec![a.clone(), b.clone()], + }), + + // Structs: must have identical field sets (1-level, no deep merge) + (TypeShape::Struct(fields_a), TypeShape::Struct(fields_b)) => { + if fields_a == fields_b { + None + } else { + Some(MergeError { + field, + shapes: vec![a.clone(), b.clone()], + }) + } + } + } +} + /// State during graph traversal. #[derive(Debug, Clone, Copy)] struct TraversalState<'src> { @@ -181,6 +313,7 @@ struct InferenceContext<'src, 'g> { dead_nodes: &'g HashSet, type_defs: Vec>, next_type_id: TypeId, + errors: Vec>, } impl<'src, 'g> InferenceContext<'src, 'g> { @@ -190,6 +323,7 @@ impl<'src, 'g> InferenceContext<'src, 'g> { dead_nodes, type_defs: Vec::new(), next_type_id: 3, // TYPE_COMPOSITE_START + errors: Vec::new(), } } @@ -201,7 +335,23 @@ impl<'src, 'g> InferenceContext<'src, 'g> { fn infer_definition(&mut self, def_name: &'src str, entry_id: NodeId) -> TypeId { let mut visited = HashSet::new(); - let scope = self.traverse(entry_id, TraversalState::default(), &mut visited, 0); + let mut merge_errors = Vec::new(); + let scope = self.traverse( + entry_id, + TraversalState::default(), + &mut visited, + 0, + &mut merge_errors, + ); + + // Convert merge errors to unification errors + for err in merge_errors { + self.errors.push(UnificationError { + field: err.field, + definition: def_name, + types_found: err.shapes.iter().map(|s| s.to_description()).collect(), + }); + } if scope.has_variants && !scope.variants.is_empty() { self.create_enum_type(def_name, &scope) @@ -218,6 +368,7 @@ impl<'src, 'g> InferenceContext<'src, 'g> { mut state: TraversalState<'src>, visited: &mut HashSet, depth: usize, + errors: &mut Vec>, ) -> ScopeInfo<'src> { if self.dead_nodes.contains(&node_id) || depth > 200 { return ScopeInfo::default(); @@ -292,15 +443,19 @@ impl<'src, 'g> InferenceContext<'src, 'g> { // Terminal node } else if live_successors.len() == 1 { // Linear path - continue with same state - let child_scope = self.traverse(live_successors[0], state, visited, depth + 1); - scope.merge_from(child_scope, 1); + let child_scope = self.traverse(live_successors[0], state, visited, depth + 1, errors); + let merge_errors = scope.merge_from(child_scope); + errors.extend(merge_errors); } else { // Branching - traverse each branch and merge results let total_branches = live_successors.len(); for succ in live_successors { - let child_scope = self.traverse(succ, state.clone(), visited, depth + 1); - scope.merge_from(child_scope, total_branches); + let child_scope = self.traverse(succ, state.clone(), visited, depth + 1, errors); + let merge_errors = scope.merge_from(child_scope); + errors.extend(merge_errors); } + // Apply optionality after all branches merged + scope.apply_optionality(total_branches); } scope @@ -417,6 +572,7 @@ pub fn infer_types<'src>( TypeInferenceResult { type_defs: ctx.type_defs, entrypoint_types, + errors: ctx.errors, } } @@ -455,6 +611,22 @@ pub fn dump_types(result: &TypeInferenceResult) -> String { } } + if !result.errors.is_empty() { + out.push_str("\n=== Errors ===\n"); + for err in &result.errors { + out.push_str(&format!( + "field `{}` in `{}`: incompatible types [{}]\n", + err.field, + err.definition, + err.types_found + .iter() + .map(|t| t.to_string()) + .collect::>() + .join(", ") + )); + } + } + out } diff --git a/crates/plotnik-lib/src/graph/typing_tests.rs b/crates/plotnik-lib/src/graph/typing_tests.rs index 3aec563c..090cd283 100644 --- a/crates/plotnik-lib/src/graph/typing_tests.rs +++ b/crates/plotnik-lib/src/graph/typing_tests.rs @@ -141,12 +141,11 @@ fn untagged_alternation_symmetric() { let result = infer("Foo = [ (a) @x (b) @x ]"); insta::assert_snapshot!(result, @r" === Entrypoints === - Foo → T4 + Foo → T3 === Types === - T3: Optional → Node - T4: Record Foo { - x: T3 + T3: Record Foo { + x: Node } "); } @@ -287,3 +286,148 @@ fn graph_structure_tagged_alternation() { N9: EndVariant → [1] "#); } + +// ============================================================================= +// 1-Level Merge Semantics Tests (ADR-0009) +// ============================================================================= + +#[test] +fn merge_incompatible_primitives_node_vs_string() { + // Same field with Node in one branch, String in another + let result = infer("Foo = [ (a) @val (b) @val ::string ]"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + val: Node + } + + === Errors === + field `val` in `Foo`: incompatible types [Node, String] + "); +} + +#[test] +fn merge_compatible_same_type_node() { + // Same field with Node in both branches - should merge without error + let result = infer("Foo = [ (a) @val (b) @val ]"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + val: Node + } + "); +} + +#[test] +fn merge_compatible_same_type_string() { + // Same field with String in both branches - should merge without error + let result = infer("Foo = [ (a) @val ::string (b) @val ::string ]"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + val: String + } + "); +} + +#[test] +fn merge_asymmetric_fields_become_optional() { + // Different fields in each branch - both become optional (the feature) + let result = infer("Foo = [ (a) @x (b) @y ]"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T5 + + === Types === + T3: Optional → Node + T4: Optional → Node + T5: Record Foo { + x: T3 + y: T4 + } + "); +} + +#[test] +fn merge_mixed_compatible_and_asymmetric() { + // @common in both branches (compatible), @x and @y asymmetric + // Note: flat scoping means nested captures propagate to root + let result = infer("Foo = [ { (a) @common (b) @x } { (a) @common (c) @y } ]"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T5 + + === Types === + T3: Optional → Node + T4: Optional → Node + T5: Record Foo { + common: Node + x: T3 + y: T4 + } + "); +} + +#[test] +fn merge_multiple_incompatible_fields_reports_all() { + // Multiple fields with type mismatches - should report all errors + let result = infer("Foo = [ (a) @x (b) @y (c) @x ::string (d) @y ::string ]"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T5 + + === Types === + T3: Optional → Node + T4: Optional → Node + T5: Record Foo { + x: T3 + y: T4 + } + + === Errors === + field `x` in `Foo`: incompatible types [Node, String] + field `y` in `Foo`: incompatible types [Node, String] + "); +} + +#[test] +fn merge_three_branches_all_compatible() { + // Three branches, all with same type - no error + let result = infer("Foo = [ (a) @val (b) @val (c) @val ]"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + val: Node + } + "); +} + +#[test] +fn merge_three_branches_one_incompatible() { + // Three branches, one has different type + let result = infer("Foo = [ (a) @val (b) @val (c) @val ::string ]"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + val: Node + } + + === Errors === + field `val` in `Foo`: incompatible types [Node, String] + "); +} diff --git a/docs/adr/ADR-0009-type-system.md b/docs/adr/ADR-0009-type-system.md index c82ec09e..afa1694e 100644 --- a/docs/adr/ADR-0009-type-system.md +++ b/docs/adr/ADR-0009-type-system.md @@ -1,6 +1,6 @@ # ADR-0009: Type System -- **Status**: Proposed +- **Status**: Accepted - **Date**: 2025-01-14 ## Context @@ -118,7 +118,6 @@ Despite tags, this is uncaptured. Behavior: - `@x` appears only in branch A → propagates as `Optional(Node)` - `@y` appears only in branch B → propagates as `Optional(Node)` - Result: `Foo { x: Optional(Node), y: Optional(Node) }` -- Diagnostic (warning): asymmetric captures ```plotnik Bar = [ (a) @v (b) @v ] @@ -168,16 +167,60 @@ Foo = [ A: (a) @x B: (b) @y ] @z } ``` -### Unification Rules (for merging) +### Unification Rules (1-Level Merge) -When merging captures across untagged alternation branches: +When merging captures across untagged alternation branches, we apply **1-level merge semantics**. This balances flexibility with type safety: top-level fields merge with optionality, but nested struct mismatches are errors. + +**Design rationale**: Plotnik's purpose is typed extraction. Deep recursive merging would produce heavily-optional types (`{ a?: { b?: { c?: Node } } }`), forcing users back to defensive checking—undermining the library's value. Tagged+captured alternations exist when precise discrimination is needed. + +**Base type compatibility**: ``` -unify(τ, τ) = τ unify(Node, Node) = Node unify(String, String) = String -unify(Struct(f₁), Struct(f₂)) = Struct(f₁) if f₁ = f₂ -unify(τ₁, τ₂) = ⊥ (error) +unify(Node, String) = ⊥ (error: incompatible primitives) +unify(Node, Struct) = ⊥ (error: primitive vs composite) +unify(String, Struct) = ⊥ (error: primitive vs composite) +``` + +**Struct merging** (1-level only): + +``` +unify(Struct(f₁), Struct(f₂)) = Struct(merged_fields) + where merged_fields: + - fields in both f₁ and f₂: unify types (must be compatible) + - fields only in f₁: become Optional + - fields only in f₂: become Optional +``` + +Nested structs are compared by **structural identity**, not recursively merged. If a field has type `Struct` in both branches but the structs differ, it's an error. + +**Cardinality interaction**: Cardinality join happens first, then type unification. If `T` and `T[]` appear at the same field, lift to array, then unify element types. + +**Error reporting**: When unification fails, the compiler reports ALL incompatibilities across all branches, not just the first. This helps users fix multiple issues in one iteration. + +**Examples**: + +``` +// OK: top-level field merge +Branch 1: { x: Node, y: Node } +Branch 2: { x: Node, z: String } +Result: { x: Node, y?: Node, z?: String } + +// OK: nested structs identical +Branch 1: { data: { a: Node }, extra: Node } +Branch 2: { data: { a: Node } } +Result: { data: { a: Node }, extra?: Node } + +// ERROR: nested structs differ (no deep merge) +Branch 1: { data: { a: Node } } +Branch 2: { data: { b: Node } } +→ Error: field `data` has incompatible struct types + +// ERROR: primitive vs primitive mismatch +Branch 1: { val: String } +Branch 2: { val: Node } +→ Error: field `val` has incompatible types: `String` vs `Node` ``` ### Cardinality Join (for merging) @@ -334,7 +377,9 @@ Collision resolution: append numeric suffix (`Foo`, `Foo2`, `Foo3`, ...). | Condition | Severity | Recovery | Diagnostic Kind (future) | | ------------------------------------ | -------- | ----------------------------- | ------------------------------ | -| Type mismatch in untagged alt | Error | Use `TYPE_INVALID`, continue | `TypeMismatchInAlt` | +| Incompatible primitives in alt | Error | Use `TYPE_INVALID`, continue | `TypeMismatchInAlt` | +| Primitive vs Struct in alt | Error | Use `TYPE_INVALID`, continue | `TypeMismatchInAlt` | +| Nested struct mismatch in alt | Error | Use `TYPE_INVALID`, continue | `StructMismatchInAlt` | | Duplicate capture in same scope | Error | Keep first, ignore duplicates | `DuplicateCapture` | | Empty definition (no captures) | Info | Type is `Void` (TypeId = 0) | (no diagnostic) | | Inline uncaptured tagged alternation | Warning | Treat as untagged | `UnusedBranchLabels` | @@ -342,6 +387,21 @@ Collision resolution: append numeric suffix (`Foo`, `Foo2`, `Foo3`, ...). The last warning applies only to literal tagged alternations, not references. If `Foo = [ A: ... ]` is used as `(Foo)`, no warning—the user intentionally reuses a definition. But `(parent [ A: ... B: ... ])` inline without capture likely indicates a forgotten `@name`. +**Exhaustive error reporting**: When type unification fails, the compiler explores all branches and reports all incompatibilities. Example diagnostic: + +``` +error: incompatible types in alternation branches + --> query.plot:3:5 + | + 3 | (a { (x) @val ::string }) @data + | ^^^ `String` here + 4 | (b { (x { (y) @inner }) @val }) @data + | ^^^ `Node` here + | + = note: capture `val` has incompatible types across branches + = help: use tagged alternation `[ A: ... B: ... ]` for precise discrimination +``` + ## Examples ### Example 1: Captured Sequence @@ -397,7 +457,7 @@ Foo = (parent [ - `@msg` only in Err branch → `Optional(String)` - Types: - `Foo: { val: Optional(Node), msg: Optional(String) }` -- Diagnostic: warning (inline uncaptured tagged alternation) +- Diagnostic: warning `UnusedBranchLabels` (inline uncaptured tagged alternation) ### Example 5: Cardinality in Alternation @@ -428,14 +488,19 @@ Funcs = (module { (function)* @fns }) - Explicit rules enable deterministic inference - "Tags only matter when captured" is a simple mental model -- Warning on asymmetric captures catches likely bugs +- 1-level merge provides flexibility while preserving type safety +- Asymmetric fields becoming Optional is intuitive ("match any branch, get what's available") - Definition root inherits type naturally—no wrapper structs for top-level enums +- Exhaustive error reporting helps users fix all issues in one iteration **Negative**: - LUB cardinality join can lose precision +- 1-level merge is less flexible than deep merge (intentional trade-off) **Alternatives Considered**: - Error on uncaptured tagged alternations (rejected: too restrictive for incremental development) - Definition root always Struct (rejected: forces wrapper types for enums, e.g., `struct Expr { val: ExprEnum }` instead of `enum Expr`) +- Deep recursive merge for nested structs (rejected: produces heavily-optional types that defeat the purpose of typed extraction; users who need flexibility at depth should use tagged+captured alternations for precision) +- Strict struct equality for merging (rejected: too restrictive for common patterns like `[ (a) @x (b) @y ]`) From 588bd26ee7cb68dac1e3ee654f462fcd6994a1c8 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 13:43:02 -0300 Subject: [PATCH 05/23] Move graph dump utilities to separate module --- crates/plotnik-lib/src/graph/build_tests.rs | 174 +++------- .../plotnik-lib/src/graph/construct_tests.rs | 177 ++-------- crates/plotnik-lib/src/graph/dump.rs | 322 ++++++++++++++++++ crates/plotnik-lib/src/graph/mod.rs | 4 +- .../plotnik-lib/src/graph/optimize_tests.rs | 223 +++--------- crates/plotnik-lib/src/graph/typing.rs | 66 ---- crates/plotnik-lib/src/graph/typing_tests.rs | 4 +- 7 files changed, 446 insertions(+), 524 deletions(-) create mode 100644 crates/plotnik-lib/src/graph/dump.rs diff --git a/crates/plotnik-lib/src/graph/build_tests.rs b/crates/plotnik-lib/src/graph/build_tests.rs index b5e524a1..69cde419 100644 --- a/crates/plotnik-lib/src/graph/build_tests.rs +++ b/crates/plotnik-lib/src/graph/build_tests.rs @@ -2,88 +2,6 @@ use super::*; -fn dump_graph(graph: &BuildGraph) -> String { - let mut out = String::new(); - - for (name, entry) in graph.definitions() { - out.push_str(&format!("{} = N{}\n", name, entry)); - } - if graph.definitions().next().is_some() { - out.push('\n'); - } - - for (id, node) in graph.iter() { - out.push_str(&format!("N{}: ", id)); - - // Matcher - match &node.matcher { - BuildMatcher::Epsilon => out.push('ε'), - BuildMatcher::Node { - kind, - field, - negated_fields, - } => { - out.push_str(&format!("({})", kind)); - if let Some(f) = field { - out.push_str(&format!(" @{}", f)); - } - for neg in negated_fields { - out.push_str(&format!(" !{}", neg)); - } - } - BuildMatcher::Anonymous { literal, field } => { - out.push_str(&format!("\"{}\"", literal)); - if let Some(f) = field { - out.push_str(&format!(" @{}", f)); - } - } - BuildMatcher::Wildcard { field } => { - out.push('_'); - if let Some(f) = field { - out.push_str(&format!(" @{}", f)); - } - } - } - - // Ref marker - match &node.ref_marker { - RefMarker::None => {} - RefMarker::Enter { ref_id } => out.push_str(&format!(" +Enter({})", ref_id)), - RefMarker::Exit { ref_id } => out.push_str(&format!(" +Exit({})", ref_id)), - } - - // Effects - for effect in &node.effects { - let eff = match effect { - BuildEffect::CaptureNode => "Capture".to_string(), - BuildEffect::StartArray => "StartArray".to_string(), - BuildEffect::PushElement => "Push".to_string(), - BuildEffect::EndArray => "EndArray".to_string(), - BuildEffect::StartObject => "StartObj".to_string(), - BuildEffect::EndObject => "EndObj".to_string(), - BuildEffect::Field(f) => format!("Field({})", f), - BuildEffect::StartVariant(v) => format!("Variant({})", v), - BuildEffect::EndVariant => "EndVariant".to_string(), - BuildEffect::ToString => "ToString".to_string(), - }; - out.push_str(&format!(" [{}]", eff)); - } - - // Successors - if node.successors.is_empty() { - out.push_str(" → ∅"); - } else { - out.push_str(" → "); - let succs: Vec<_> = node.successors.iter().map(|s| format!("N{}", s)).collect(); - out.push_str(&succs.join(", ")); - } - - out.push('\n'); - } - - out -} - #[test] fn single_matcher() { let mut g = BuildGraph::new(); @@ -91,9 +9,9 @@ fn single_matcher() { let frag = g.matcher_fragment(BuildMatcher::node("identifier")); assert_eq!(frag.entry, frag.exit); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (identifier) → ∅ - "#); + "); } #[test] @@ -103,9 +21,9 @@ fn epsilon_fragment() { let frag = g.epsilon_fragment(); assert_eq!(frag.entry, frag.exit); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: ε → ∅ - "#); + "); } #[test] @@ -115,9 +33,9 @@ fn sequence_empty() { let frag = g.sequence(&[]); assert_eq!(frag.entry, frag.exit); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: ε → ∅ - "#); + "); } #[test] @@ -128,9 +46,9 @@ fn sequence_single() { let frag = g.sequence(&[f1]); assert_eq!(frag, f1); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (identifier) → ∅ - "#); + "); } #[test] @@ -143,10 +61,10 @@ fn sequence_two() { assert_eq!(frag.entry, f1.entry); assert_eq!(frag.exit, f2.exit); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (identifier) → N1 N1: (number) → ∅ - "#); + "); } #[test] @@ -160,11 +78,11 @@ fn sequence_three() { assert_eq!(frag.entry, f1.entry); assert_eq!(frag.exit, f3.exit); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (a) → N1 N1: (b) → N2 N2: (c) → ∅ - "#); + "); } #[test] @@ -174,9 +92,9 @@ fn alternation_empty() { let frag = g.alternation(&[]); assert_eq!(frag.entry, frag.exit); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: ε → ∅ - "#); + "); } #[test] @@ -187,9 +105,9 @@ fn alternation_single() { let frag = g.alternation(&[f1]); assert_eq!(frag, f1); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (identifier) → ∅ - "#); + "); } #[test] @@ -201,12 +119,12 @@ fn alternation_two() { let frag = g.alternation(&[f1, f2]); // Entry connects to both branches, both branches connect to exit - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (identifier) → N3 N1: (number) → N3 N2: ε → N0, N1 N3: ε → ∅ - "#); + "); assert_eq!(frag.entry, 2); assert_eq!(frag.exit, 3); } @@ -219,11 +137,11 @@ fn zero_or_more_greedy() { let frag = g.zero_or_more(inner); // Greedy: branch tries inner first, then exit - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (item) → N1 N1: ε → N0, N2 N2: ε → ∅ - "#); + "); assert_eq!(frag.entry, 1); // branch node assert_eq!(frag.exit, 2); } @@ -236,11 +154,11 @@ fn zero_or_more_lazy() { let frag = g.zero_or_more_lazy(inner); // Non-greedy: branch tries exit first, then inner - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (item) → N1 N1: ε → N2, N0 N2: ε → ∅ - "#); + "); assert_eq!(frag.entry, 1); assert_eq!(frag.exit, 2); } @@ -253,11 +171,11 @@ fn one_or_more_greedy() { let frag = g.one_or_more(inner); // Entry is inner, greedy branch after - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (item) → N1 N1: ε → N0, N2 N2: ε → ∅ - "#); + "); assert_eq!(frag.entry, 0); // inner node assert_eq!(frag.exit, 2); } @@ -270,11 +188,11 @@ fn one_or_more_lazy() { let frag = g.one_or_more_lazy(inner); // Entry is inner, non-greedy branch after - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (item) → N1 N1: ε → N2, N0 N2: ε → ∅ - "#); + "); assert_eq!(frag.entry, 0); assert_eq!(frag.exit, 2); } @@ -287,11 +205,11 @@ fn optional_greedy() { let frag = g.optional(inner); // Greedy: branch tries inner first - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (item) → N2 N1: ε → N0, N2 N2: ε → ∅ - "#); + "); assert_eq!(frag.entry, 1); assert_eq!(frag.exit, 2); } @@ -304,11 +222,11 @@ fn optional_lazy() { let frag = g.optional_lazy(inner); // Non-greedy: branch skips first - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (item) → N2 N1: ε → N2, N0 N2: ε → ∅ - "#); + "); assert_eq!(frag.entry, 1); assert_eq!(frag.exit, 2); } @@ -319,9 +237,9 @@ fn matcher_with_field() { g.matcher_fragment(BuildMatcher::node("identifier").with_field("name")); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (identifier) @name → ∅ - "#); + "); } #[test] @@ -334,9 +252,9 @@ fn matcher_with_negated_fields() { .with_negated_field("type_arguments"), ); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (call) !arguments !type_arguments → ∅ - "#); + "); } #[test] @@ -345,7 +263,7 @@ fn anonymous_matcher() { g.matcher_fragment(BuildMatcher::anonymous("+")); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r#" N0: "+" → ∅ "#); } @@ -356,9 +274,9 @@ fn wildcard_matcher() { g.matcher_fragment(BuildMatcher::wildcard()); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: _ → ∅ - "#); + "); } #[test] @@ -368,9 +286,9 @@ fn node_with_effects() { g.node_mut(id).add_effect(BuildEffect::CaptureNode); g.node_mut(id).add_effect(BuildEffect::Field("name")); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (identifier) [Capture] [Field(name)] → ∅ - "#); + "); } #[test] @@ -384,10 +302,10 @@ fn node_with_ref_marker() { g.connect(enter, exit); - insta::assert_snapshot!(dump_graph(&g), @r#" - N0: ε +Enter(0) → N1 + insta::assert_snapshot!(g.dump(), @r" + N0: ε +Enter(0, ?) → N1 N1: ε +Exit(0) → ∅ - "#); + "); } #[test] @@ -403,13 +321,13 @@ fn definition_registration() { assert_eq!(g.definition("Num"), Some(1)); assert_eq!(g.definition("Unknown"), None); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" Ident = N0 Num = N1 N0: (identifier) → ∅ N1: (number) → ∅ - "#); + "); } #[test] @@ -428,7 +346,7 @@ fn complex_nested_structure() { g.add_definition("Func", func.entry); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" Func = N4 N0: (identifier) → N1 @@ -436,5 +354,5 @@ fn complex_nested_structure() { N2: ε → N3 N3: (block) → ∅ N4: (func) → N0 - "#); + "); } diff --git a/crates/plotnik-lib/src/graph/construct_tests.rs b/crates/plotnik-lib/src/graph/construct_tests.rs index 17f1bc5c..e136a964 100644 --- a/crates/plotnik-lib/src/graph/construct_tests.rs +++ b/crates/plotnik-lib/src/graph/construct_tests.rs @@ -1,7 +1,6 @@ //! Tests for AST-to-graph construction. -use crate::graph::{BuildEffect, BuildGraph, BuildMatcher, RefMarker}; -use crate::ir::{Nav, NavKind}; +use crate::graph::BuildGraph; use crate::parser::Parser; use crate::parser::lexer::lex; @@ -14,112 +13,6 @@ fn parse_and_construct(source: &str) -> BuildGraph<'_> { construct_graph(source, &result.root) } -fn dump_graph(graph: &BuildGraph) -> String { - let mut out = String::new(); - - for (name, entry) in graph.definitions() { - out.push_str(&format!("{} = N{}\n", name, entry)); - } - if graph.definitions().next().is_some() { - out.push('\n'); - } - - for (id, node) in graph.iter() { - out.push_str(&format!("N{}: ", id)); - - // Nav (skip Stay as it's the default) - if !node.nav.is_stay() { - let nav_str = format_nav(&node.nav); - out.push_str(&format!("[{}] ", nav_str)); - } - - // Matcher - match &node.matcher { - BuildMatcher::Epsilon => out.push('ε'), - BuildMatcher::Node { - kind, - field, - negated_fields, - } => { - out.push_str(&format!("({})", kind)); - if let Some(f) = field { - out.push_str(&format!(" @{}", f)); - } - for neg in negated_fields { - out.push_str(&format!(" !{}", neg)); - } - } - BuildMatcher::Anonymous { literal, field } => { - out.push_str(&format!("\"{}\"", literal)); - if let Some(f) = field { - out.push_str(&format!(" @{}", f)); - } - } - BuildMatcher::Wildcard { field } => { - out.push('_'); - if let Some(f) = field { - out.push_str(&format!(" @{}", f)); - } - } - } - - // Ref marker - match &node.ref_marker { - RefMarker::None => {} - RefMarker::Enter { ref_id } => { - let name = node.ref_name.unwrap_or("?"); - out.push_str(&format!(" +Enter({}, {})", ref_id, name)); - } - RefMarker::Exit { ref_id } => out.push_str(&format!(" +Exit({})", ref_id)), - } - - // Effects - for effect in &node.effects { - let eff = match effect { - BuildEffect::CaptureNode => "Capture".to_string(), - BuildEffect::StartArray => "StartArray".to_string(), - BuildEffect::PushElement => "Push".to_string(), - BuildEffect::EndArray => "EndArray".to_string(), - BuildEffect::StartObject => "StartObj".to_string(), - BuildEffect::EndObject => "EndObj".to_string(), - BuildEffect::Field(f) => format!("Field({})", f), - BuildEffect::StartVariant(v) => format!("Variant({})", v), - BuildEffect::EndVariant => "EndVariant".to_string(), - BuildEffect::ToString => "ToString".to_string(), - }; - out.push_str(&format!(" [{}]", eff)); - } - - // Successors - if node.successors.is_empty() { - out.push_str(" → ∅"); - } else { - out.push_str(" → "); - let succs: Vec<_> = node.successors.iter().map(|s| format!("N{}", s)).collect(); - out.push_str(&succs.join(", ")); - } - - out.push('\n'); - } - - out -} - -fn format_nav(nav: &Nav) -> String { - match nav.kind { - NavKind::Stay => "Stay".to_string(), - NavKind::Next => "Next".to_string(), - NavKind::NextSkipTrivia => "Next.".to_string(), - NavKind::NextExact => "Next!".to_string(), - NavKind::Down => "Down".to_string(), - NavKind::DownSkipTrivia => "Down.".to_string(), - NavKind::DownExact => "Down!".to_string(), - NavKind::Up => format!("Up({})", nav.level), - NavKind::UpSkipTrivia => format!("Up.({})", nav.level), - NavKind::UpExact => format!("Up!({})", nav.level), - } -} - // ───────────────────────────────────────────────────────────────────────────── // Basic Expressions // ───────────────────────────────────────────────────────────────────────────── @@ -128,7 +21,7 @@ fn format_nav(nav: &Nav) -> String { fn simple_named_node() { let g = parse_and_construct("Foo = (identifier)"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: (identifier) → ∅ @@ -139,7 +32,7 @@ fn simple_named_node() { fn anonymous_string() { let g = parse_and_construct(r#"Op = "+""#); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r#" Op = N0 N0: "+" → ∅ @@ -150,7 +43,7 @@ fn anonymous_string() { fn wildcard() { let g = parse_and_construct("Any = (_)"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Any = N0 N0: _ → ∅ @@ -161,7 +54,7 @@ fn wildcard() { fn wildcard_underscore_literal() { let g = parse_and_construct("Any = _"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Any = N0 N0: _ → ∅ @@ -176,7 +69,7 @@ fn wildcard_underscore_literal() { fn nested_node() { let g = parse_and_construct("Foo = (call (identifier))"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: (call) → N1 @@ -189,7 +82,7 @@ fn nested_node() { fn deeply_nested() { let g = parse_and_construct("Foo = (a (b (c)))"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: (a) → N1 @@ -204,7 +97,7 @@ fn deeply_nested() { fn sibling_nodes() { let g = parse_and_construct("Foo = (call (identifier) (arguments))"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: (call) → N1 @@ -223,7 +116,7 @@ fn anchor_first_child() { // . before first child → DownSkipTrivia let g = parse_and_construct("Foo = (block . (statement))"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: (block) → N1 @@ -237,7 +130,7 @@ fn anchor_last_child() { // . after last child → UpSkipTrivia let g = parse_and_construct("Foo = (block (statement) .)"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: (block) → N1 @@ -251,7 +144,7 @@ fn anchor_adjacent_siblings() { // . between siblings → NextSkipTrivia let g = parse_and_construct("Foo = (block (a) . (b))"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: (block) → N1 @@ -266,7 +159,7 @@ fn anchor_both_ends() { // . at start and end let g = parse_and_construct("Foo = (array . (element) .)"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: (array) → N1 @@ -280,7 +173,7 @@ fn anchor_string_literal_first() { // . before string literal → DownExact let g = parse_and_construct(r#"Foo = (pair . ":" (value))"#); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r#" Foo = N0 N0: (pair) → N1 @@ -296,7 +189,7 @@ fn anchor_string_literal_adjacent() { // Actually the anchor affects the FOLLOWING node, so ":" has Down, "=" has Next! let g = parse_and_construct(r#"Foo = (assignment (id) "=" . (value))"#); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r#" Foo = N0 N0: (assignment) → N1 @@ -312,7 +205,7 @@ fn anchor_string_literal_last() { // . after string literal at end → UpExact let g = parse_and_construct(r#"Foo = (semi (stmt) ";" .)"#); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r#" Foo = N0 N0: (semi) → N1 @@ -330,7 +223,7 @@ fn anchor_string_literal_last() { fn field_constraint() { let g = parse_and_construct("Foo = (call name: (identifier))"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: (call) → N1 @@ -343,7 +236,7 @@ fn field_constraint() { fn negated_field() { let g = parse_and_construct("Foo = (call !arguments)"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: (call) !arguments → ∅ @@ -354,7 +247,7 @@ fn negated_field() { fn multiple_negated_fields() { let g = parse_and_construct("Foo = (call !arguments !type_arguments)"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: (call) !arguments !type_arguments → ∅ @@ -369,7 +262,7 @@ fn multiple_negated_fields() { fn sequence_expr() { let g = parse_and_construct("Foo = { (a) (b) }"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: ε [StartObj] → N1 @@ -383,7 +276,7 @@ fn sequence_expr() { fn empty_sequence() { let g = parse_and_construct("Foo = { }"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: ε [StartObj] → N1 @@ -400,7 +293,7 @@ fn empty_sequence() { fn untagged_alternation() { let g = parse_and_construct("Foo = [(identifier) (number)]"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: ε → N2, N3 @@ -414,7 +307,7 @@ fn untagged_alternation() { fn tagged_alternation() { let g = parse_and_construct("Foo = [Ident: (identifier) Num: (number)]"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: ε → N2, N5 @@ -432,7 +325,7 @@ fn tagged_alternation() { fn single_branch_alt() { let g = parse_and_construct("Foo = [(identifier)]"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: ε → N2 @@ -449,7 +342,7 @@ fn single_branch_alt() { fn simple_capture() { let g = parse_and_construct("Foo = (identifier) @name"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: (identifier) [Capture] → N1 @@ -461,7 +354,7 @@ fn simple_capture() { fn capture_with_string_type() { let g = parse_and_construct("Foo = (identifier) @name ::string"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: (identifier) [Capture] [ToString] → N1 @@ -473,7 +366,7 @@ fn capture_with_string_type() { fn nested_capture() { let g = parse_and_construct("Foo = (call name: (identifier) @fn_name)"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N0 N0: (call) → N1 @@ -491,7 +384,7 @@ fn nested_capture() { fn zero_or_more() { let g = parse_and_construct("Foo = (identifier)*"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N1 N0: (identifier) → N3 @@ -506,7 +399,7 @@ fn zero_or_more() { fn one_or_more() { let g = parse_and_construct("Foo = (identifier)+"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N1 N0: (identifier) → N2 @@ -521,7 +414,7 @@ fn one_or_more() { fn optional() { let g = parse_and_construct("Foo = (identifier)?"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N1 N0: (identifier) → N2 @@ -534,7 +427,7 @@ fn optional() { fn lazy_zero_or_more() { let g = parse_and_construct("Foo = (identifier)*?"); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Foo = N1 N0: (identifier) → N3 @@ -558,7 +451,7 @@ fn simple_reference() { ", ); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Ident = N0 Foo = N1 @@ -579,7 +472,7 @@ fn multiple_references() { ", ); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Expr = N0 Foo = N4 @@ -610,7 +503,7 @@ fn multiple_definitions() { ", ); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Ident = N0 Num = N1 Str = N2 @@ -636,7 +529,7 @@ fn function_pattern() { ", ); - insta::assert_snapshot!(dump_graph(&g), @r" + insta::assert_snapshot!(g.dump(), @r" Func = N0 N0: (function_definition) → N1 @@ -666,7 +559,7 @@ fn binary_expression_pattern() { "#, ); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r#" BinOp = N0 N0: (binary_expression) → N1 diff --git a/crates/plotnik-lib/src/graph/dump.rs b/crates/plotnik-lib/src/graph/dump.rs new file mode 100644 index 00000000..23b385e7 --- /dev/null +++ b/crates/plotnik-lib/src/graph/dump.rs @@ -0,0 +1,322 @@ +//! Dump helpers for graph inspection and testing. +//! +//! Provides formatted output for `BuildGraph` and `TypeInferenceResult` +//! suitable for snapshot testing and debugging. + +use super::{BuildEffect, BuildGraph, BuildMatcher, NodeId, RefMarker, TypeInferenceResult}; +use crate::ir::{Nav, NavKind, TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId}; +use std::collections::HashSet; +use std::fmt::Write; + +/// Printer for `BuildGraph` with configurable output options. +pub struct GraphPrinter<'a, 'src> { + graph: &'a BuildGraph<'src>, + dead_nodes: Option<&'a HashSet>, + show_dead: bool, +} + +impl<'a, 'src> GraphPrinter<'a, 'src> { + pub fn new(graph: &'a BuildGraph<'src>) -> Self { + Self { + graph, + dead_nodes: None, + show_dead: false, + } + } + + /// Mark nodes as dead (from optimization pass). + pub fn with_dead_nodes(mut self, dead: &'a HashSet) -> Self { + self.dead_nodes = Some(dead); + self + } + + /// Show dead nodes (struck through or marked). + pub fn show_dead(mut self, show: bool) -> Self { + self.show_dead = show; + self + } + + /// Filter dead nodes from successor lists. + pub fn filter_dead_successors(self) -> Self { + // This is controlled by dead_nodes being set + self + } + + pub fn dump(&self) -> String { + let mut out = String::new(); + self.format(&mut out).expect("String write never fails"); + out + } + + fn format(&self, w: &mut String) -> std::fmt::Result { + // Definitions header + for (name, entry) in self.graph.definitions() { + writeln!(w, "{} = N{}", name, entry)?; + } + if self.graph.definitions().next().is_some() { + writeln!(w)?; + } + + // Nodes + for (id, node) in self.graph.iter() { + let is_dead = self.dead_nodes.map(|d| d.contains(&id)).unwrap_or(false); + + if is_dead && !self.show_dead { + continue; + } + + // Node header + if is_dead { + write!(w, "N{}: ✗ ", id)?; + } else { + write!(w, "N{}: ", id)?; + } + + // Navigation (skip Stay) + if !node.nav.is_stay() { + write!(w, "[{}] ", format_nav(&node.nav))?; + } + + // Matcher + self.format_matcher(w, &node.matcher)?; + + // Ref marker + match &node.ref_marker { + RefMarker::None => {} + RefMarker::Enter { ref_id } => { + let name = node.ref_name.unwrap_or("?"); + write!(w, " +Enter({}, {})", ref_id, name)?; + } + RefMarker::Exit { ref_id } => { + write!(w, " +Exit({})", ref_id)?; + } + } + + // Effects + for effect in &node.effects { + write!(w, " [{}]", format_effect(effect))?; + } + + // Successors (filter dead nodes from list) + self.format_successors(w, &node.successors)?; + + writeln!(w)?; + } + + Ok(()) + } + + fn format_matcher(&self, w: &mut String, matcher: &BuildMatcher<'src>) -> std::fmt::Result { + match matcher { + BuildMatcher::Epsilon => write!(w, "ε"), + BuildMatcher::Node { + kind, + field, + negated_fields, + } => { + write!(w, "({})", kind)?; + if let Some(f) = field { + write!(w, " @{}", f)?; + } + for neg in negated_fields { + write!(w, " !{}", neg)?; + } + Ok(()) + } + BuildMatcher::Anonymous { literal, field } => { + write!(w, "\"{}\"", literal)?; + if let Some(f) = field { + write!(w, " @{}", f)?; + } + Ok(()) + } + BuildMatcher::Wildcard { field } => { + write!(w, "_")?; + if let Some(f) = field { + write!(w, " @{}", f)?; + } + Ok(()) + } + } + } + + fn format_successors(&self, w: &mut String, successors: &[NodeId]) -> std::fmt::Result { + // Filter out dead nodes from successor list + let live_succs: Vec<_> = successors + .iter() + .filter(|s| self.dead_nodes.map(|d| !d.contains(s)).unwrap_or(true)) + .collect(); + + if live_succs.is_empty() { + write!(w, " → ∅") + } else { + write!(w, " → ")?; + let succs: Vec<_> = live_succs.iter().map(|s| format!("N{}", s)).collect(); + write!(w, "{}", succs.join(", ")) + } + } +} + +fn format_nav(nav: &Nav) -> String { + match nav.kind { + NavKind::Stay => "Stay".to_string(), + NavKind::Next => "Next".to_string(), + NavKind::NextSkipTrivia => "Next.".to_string(), + NavKind::NextExact => "Next!".to_string(), + NavKind::Down => "Down".to_string(), + NavKind::DownSkipTrivia => "Down.".to_string(), + NavKind::DownExact => "Down!".to_string(), + NavKind::Up => format!("Up({})", nav.level), + NavKind::UpSkipTrivia => format!("Up.({})", nav.level), + NavKind::UpExact => format!("Up!({})", nav.level), + } +} + +fn format_effect(effect: &BuildEffect) -> String { + match effect { + BuildEffect::CaptureNode => "Capture".to_string(), + BuildEffect::StartArray => "StartArray".to_string(), + BuildEffect::PushElement => "Push".to_string(), + BuildEffect::EndArray => "EndArray".to_string(), + BuildEffect::StartObject => "StartObj".to_string(), + BuildEffect::EndObject => "EndObj".to_string(), + BuildEffect::Field(f) => format!("Field({})", f), + BuildEffect::StartVariant(v) => format!("Variant({})", v), + BuildEffect::EndVariant => "EndVariant".to_string(), + BuildEffect::ToString => "ToString".to_string(), + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// BuildGraph dump methods +// ───────────────────────────────────────────────────────────────────────────── + +impl<'src> BuildGraph<'src> { + /// Create a printer for this graph. + pub fn printer(&self) -> GraphPrinter<'_, 'src> { + GraphPrinter::new(self) + } + + /// Dump graph in default format. + pub fn dump(&self) -> String { + self.printer().dump() + } + + /// Dump graph showing dead nodes from optimization. + pub fn dump_with_dead(&self, dead_nodes: &HashSet) -> String { + self.printer() + .with_dead_nodes(dead_nodes) + .show_dead(true) + .dump() + } + + /// Dump only live nodes (dead nodes filtered out completely). + pub fn dump_live(&self, dead_nodes: &HashSet) -> String { + self.printer().with_dead_nodes(dead_nodes).dump() + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// TypeInferenceResult dump +// ───────────────────────────────────────────────────────────────────────────── + +impl TypeInferenceResult<'_> { + /// Dump inferred types for debugging/testing. + pub fn dump(&self) -> String { + let mut out = String::new(); + + out.push_str("=== Entrypoints ===\n"); + for (name, type_id) in &self.entrypoint_types { + out.push_str(&format!("{} → {}\n", name, format_type_id(*type_id))); + } + + if !self.type_defs.is_empty() { + out.push_str("\n=== Types ===\n"); + for (idx, def) in self.type_defs.iter().enumerate() { + let type_id = idx as TypeId + 3; + let name = def.name.unwrap_or(""); + out.push_str(&format!("T{}: {:?} {}", type_id, def.kind, name)); + + if let Some(inner) = def.inner_type { + out.push_str(&format!(" → {}", format_type_id(inner))); + } + + if !def.members.is_empty() { + out.push_str(" {\n"); + for member in &def.members { + out.push_str(&format!( + " {}: {}\n", + member.name, + format_type_id(member.ty) + )); + } + out.push('}'); + } + out.push('\n'); + } + } + + if !self.errors.is_empty() { + out.push_str("\n=== Errors ===\n"); + for err in &self.errors { + out.push_str(&format!( + "field `{}` in `{}`: incompatible types [{}]\n", + err.field, + err.definition, + err.types_found + .iter() + .map(|t| t.to_string()) + .collect::>() + .join(", ") + )); + } + } + + out + } +} + +fn format_type_id(id: TypeId) -> String { + if id == TYPE_VOID { + "Void".to_string() + } else if id == TYPE_NODE { + "Node".to_string() + } else if id == TYPE_STR { + "String".to_string() + } else { + format!("T{}", id) + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Test-only dump helpers +// ───────────────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod test_helpers { + use super::*; + + impl<'src> BuildGraph<'src> { + /// Dump graph for snapshot tests. + pub fn dump_graph(&self) -> String { + self.dump() + } + + /// Dump graph with optimization info. + pub fn dump_optimized(&self, dead_nodes: &HashSet) -> String { + self.printer().with_dead_nodes(dead_nodes).dump() + } + } + + impl TypeInferenceResult<'_> { + /// Dump types for snapshot tests. + pub fn dump_types(&self) -> String { + self.dump() + } + + /// Check if inference has errors. + pub fn has_errors(&self) -> bool { + !self.errors.is_empty() + } + } +} diff --git a/crates/plotnik-lib/src/graph/mod.rs b/crates/plotnik-lib/src/graph/mod.rs index 6d956b36..9cef1e42 100644 --- a/crates/plotnik-lib/src/graph/mod.rs +++ b/crates/plotnik-lib/src/graph/mod.rs @@ -16,6 +16,7 @@ mod analysis; mod build; mod construct; +mod dump; mod optimize; mod typing; @@ -33,8 +34,9 @@ mod typing_tests; pub use analysis::{AnalysisResult, StringInterner, analyze}; pub use build::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; pub use construct::{GraphConstructor, construct_graph}; +pub use dump::GraphPrinter; pub use optimize::{OptimizeStats, eliminate_epsilons}; pub use typing::{ InferredMember, InferredTypeDef, TypeDescription, TypeInferenceResult, UnificationError, - dump_types, infer_types, + infer_types, }; diff --git a/crates/plotnik-lib/src/graph/optimize_tests.rs b/crates/plotnik-lib/src/graph/optimize_tests.rs index de915ebb..d7c3d59d 100644 --- a/crates/plotnik-lib/src/graph/optimize_tests.rs +++ b/crates/plotnik-lib/src/graph/optimize_tests.rs @@ -1,152 +1,7 @@ //! Tests for epsilon elimination optimization pass. -use std::collections::HashSet; - use super::*; -use crate::graph::{BuildEffect, BuildMatcher, NodeId, RefMarker}; - -fn dump_graph(graph: &BuildGraph) -> String { - let mut out = String::new(); - - for (name, entry) in graph.definitions() { - out.push_str(&format!("{} = N{}\n", name, entry)); - } - if graph.definitions().next().is_some() { - out.push('\n'); - } - - for (id, node) in graph.iter() { - out.push_str(&format!("N{}: ", id)); - - match &node.matcher { - BuildMatcher::Epsilon => out.push('ε'), - BuildMatcher::Node { - kind, - field, - negated_fields, - } => { - out.push_str(&format!("({})", kind)); - if let Some(f) = field { - out.push_str(&format!(" @{}", f)); - } - for neg in negated_fields { - out.push_str(&format!(" !{}", neg)); - } - } - BuildMatcher::Anonymous { literal, field } => { - out.push_str(&format!("\"{}\"", literal)); - if let Some(f) = field { - out.push_str(&format!(" @{}", f)); - } - } - BuildMatcher::Wildcard { field } => { - out.push('_'); - if let Some(f) = field { - out.push_str(&format!(" @{}", f)); - } - } - } - - match &node.ref_marker { - RefMarker::None => {} - RefMarker::Enter { ref_id } => out.push_str(&format!(" +Enter({})", ref_id)), - RefMarker::Exit { ref_id } => out.push_str(&format!(" +Exit({})", ref_id)), - } - - for effect in &node.effects { - let eff = match effect { - BuildEffect::CaptureNode => "Capture".to_string(), - BuildEffect::StartArray => "StartArray".to_string(), - BuildEffect::PushElement => "Push".to_string(), - BuildEffect::EndArray => "EndArray".to_string(), - BuildEffect::StartObject => "StartObj".to_string(), - BuildEffect::EndObject => "EndObj".to_string(), - BuildEffect::Field(f) => format!("Field({})", f), - BuildEffect::StartVariant(v) => format!("Variant({})", v), - BuildEffect::EndVariant => "EndVariant".to_string(), - BuildEffect::ToString => "ToString".to_string(), - }; - out.push_str(&format!(" [{}]", eff)); - } - - if node.successors.is_empty() { - out.push_str(" → ∅"); - } else { - out.push_str(" → "); - let succs: Vec<_> = node.successors.iter().map(|s| format!("N{}", s)).collect(); - out.push_str(&succs.join(", ")); - } - - out.push('\n'); - } - - out -} - -fn dump_live_graph(graph: &BuildGraph, dead: &HashSet) -> String { - let mut out = String::new(); - - for (name, entry) in graph.definitions() { - out.push_str(&format!("{} = N{}\n", name, entry)); - } - if graph.definitions().next().is_some() { - out.push('\n'); - } - - for (id, node) in graph.iter() { - if dead.contains(&id) { - continue; - } - - out.push_str(&format!("N{}: ", id)); - - match &node.matcher { - BuildMatcher::Epsilon => out.push('ε'), - BuildMatcher::Node { kind, .. } => out.push_str(&format!("({})", kind)), - BuildMatcher::Anonymous { literal, .. } => out.push_str(&format!("\"{}\"", literal)), - BuildMatcher::Wildcard { .. } => out.push('_'), - } - - match &node.ref_marker { - RefMarker::None => {} - RefMarker::Enter { ref_id } => out.push_str(&format!(" +Enter({})", ref_id)), - RefMarker::Exit { ref_id } => out.push_str(&format!(" +Exit({})", ref_id)), - } - - for effect in &node.effects { - let eff = match effect { - BuildEffect::CaptureNode => "Capture".to_string(), - BuildEffect::StartArray => "StartArray".to_string(), - BuildEffect::PushElement => "Push".to_string(), - BuildEffect::EndArray => "EndArray".to_string(), - BuildEffect::StartObject => "StartObj".to_string(), - BuildEffect::EndObject => "EndObj".to_string(), - BuildEffect::Field(f) => format!("Field({})", f), - BuildEffect::StartVariant(v) => format!("Variant({})", v), - BuildEffect::EndVariant => "EndVariant".to_string(), - BuildEffect::ToString => "ToString".to_string(), - }; - out.push_str(&format!(" [{}]", eff)); - } - - if node.successors.is_empty() { - out.push_str(" → ∅"); - } else { - out.push_str(" → "); - let succs: Vec<_> = node - .successors - .iter() - .filter(|s| !dead.contains(s)) - .map(|s| format!("N{}", s)) - .collect(); - out.push_str(&succs.join(", ")); - } - - out.push('\n'); - } - - out -} +use crate::graph::{BuildEffect, BuildMatcher, RefMarker}; #[test] fn eliminates_simple_epsilon_chain() { @@ -159,18 +14,18 @@ fn eliminates_simple_epsilon_chain() { g.connect(e2, e1); g.connect(e1, id); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (identifier) → ∅ N1: ε → N0 N2: ε → N1 - "#); + "); let (dead, stats) = eliminate_epsilons(&mut g); assert_eq!(stats.epsilons_eliminated, 2); - insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + insta::assert_snapshot!(g.dump_live(&dead), @r" N0: (identifier) → ∅ - "#); + "); } #[test] @@ -184,21 +39,21 @@ fn keeps_branch_point_epsilon() { g.connect(branch, a); g.connect(branch, b); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (a) → ∅ N1: (b) → ∅ N2: ε → N0, N1 - "#); + "); let (dead, stats) = eliminate_epsilons(&mut g); assert_eq!(stats.epsilons_eliminated, 0); assert_eq!(stats.epsilons_kept, 1); - insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + insta::assert_snapshot!(g.dump_live(&dead), @r" N0: (a) → ∅ N1: (b) → ∅ N2: ε → N0, N1 - "#); + "); } #[test] @@ -210,19 +65,19 @@ fn keeps_epsilon_with_enter_marker() { g.node_mut(enter).set_ref_marker(RefMarker::enter(0)); g.connect(enter, target); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (target) → ∅ - N1: ε +Enter(0) → N0 - "#); + N1: ε +Enter(0, ?) → N0 + "); let (dead, stats) = eliminate_epsilons(&mut g); assert_eq!(stats.epsilons_eliminated, 0); assert_eq!(stats.epsilons_kept, 1); - insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + insta::assert_snapshot!(g.dump_live(&dead), @r" N0: (target) → ∅ - N1: ε +Enter(0) → N0 - "#); + N1: ε +Enter(0, ?) → N0 + "); } #[test] @@ -257,18 +112,18 @@ fn merges_effects_into_successor() { g.node_mut(start_arr).add_effect(BuildEffect::StartArray); g.connect(start_arr, end_arr); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (identifier) [Capture] → ∅ N1: ε [EndArray] → N0 N2: ε [StartArray] → N1 - "#); + "); let (dead, stats) = eliminate_epsilons(&mut g); assert_eq!(stats.epsilons_eliminated, 2); - insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + insta::assert_snapshot!(g.dump_live(&dead), @r" N0: (identifier) [StartArray] [EndArray] [Capture] → ∅ - "#); + "); } #[test] @@ -286,21 +141,21 @@ fn redirects_multiple_predecessors() { g.connect(a, eps); g.connect(b, eps); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (c) → ∅ N1: ε → N0 N2: (a) → N1 N3: (b) → N1 - "#); + "); let (dead, stats) = eliminate_epsilons(&mut g); assert_eq!(stats.epsilons_eliminated, 1); - insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + insta::assert_snapshot!(g.dump_live(&dead), @r" N0: (c) → ∅ N2: (a) → N0 N3: (b) → N0 - "#); + "); } #[test] @@ -313,22 +168,22 @@ fn updates_definition_entry_point() { g.connect(eps, id); g.add_definition("Def", eps); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" Def = N1 N0: (identifier) → ∅ N1: ε → N0 - "#); + "); let (dead, _stats) = eliminate_epsilons(&mut g); // Definition should now point to identifier node assert_eq!(g.definition("Def"), Some(0)); - insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + insta::assert_snapshot!(g.dump_live(&dead), @r" Def = N0 N0: (identifier) → ∅ - "#); + "); } #[test] @@ -355,22 +210,22 @@ fn quantifier_preserves_branch_structure() { let inner = g.matcher_fragment(BuildMatcher::node("item")); let _frag = g.zero_or_more(inner); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (item) → N1 N1: ε → N0, N2 N2: ε → ∅ - "#); + "); let (dead, stats) = eliminate_epsilons(&mut g); // Branch (N1) must remain, exit (N2) can't be eliminated (no successor) assert_eq!(stats.epsilons_kept, 2); assert_eq!(stats.epsilons_eliminated, 0); - insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + insta::assert_snapshot!(g.dump_live(&dead), @r" N0: (item) → N1 N1: ε → N0, N2 N2: ε → ∅ - "#); + "); } #[test] @@ -385,25 +240,25 @@ fn alternation_exit_epsilon_eliminated() { let final_node = g.add_matcher(BuildMatcher::node("end")); g.connect(frag.exit, final_node); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (a) → N3 N1: (b) → N3 N2: ε → N0, N1 N3: ε → N4 N4: (end) → ∅ - "#); + "); let (dead, stats) = eliminate_epsilons(&mut g); // Exit epsilon (N3) should be eliminated, branch (N2) kept assert_eq!(stats.epsilons_eliminated, 1); assert_eq!(stats.epsilons_kept, 1); - insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + insta::assert_snapshot!(g.dump_live(&dead), @r" N0: (a) → N4 N1: (b) → N4 N2: ε → N0, N1 N4: (end) → ∅ - "#); + "); } #[test] @@ -420,22 +275,22 @@ fn does_not_merge_effects_into_ref_marker() { g.node_mut(field_eps).add_effect(BuildEffect::Field("name")); g.connect(field_eps, exit); - insta::assert_snapshot!(dump_graph(&g), @r#" + insta::assert_snapshot!(g.dump(), @r" N0: (target) → ∅ N1: ε +Exit(0) → N0 N2: ε [Field(name)] → N1 - "#); + "); let (dead, stats) = eliminate_epsilons(&mut g); // Should NOT merge Field effect into Exit node assert_eq!(stats.epsilons_kept, 2); assert_eq!(stats.epsilons_eliminated, 0); - insta::assert_snapshot!(dump_live_graph(&g, &dead), @r#" + insta::assert_snapshot!(g.dump_live(&dead), @r" N0: (target) → ∅ N1: ε +Exit(0) → N0 N2: ε [Field(name)] → N1 - "#); + "); } #[test] diff --git a/crates/plotnik-lib/src/graph/typing.rs b/crates/plotnik-lib/src/graph/typing.rs index 462200ba..26073878 100644 --- a/crates/plotnik-lib/src/graph/typing.rs +++ b/crates/plotnik-lib/src/graph/typing.rs @@ -575,69 +575,3 @@ pub fn infer_types<'src>( errors: ctx.errors, } } - -/// Format inferred types for debugging/testing. -pub fn dump_types(result: &TypeInferenceResult) -> String { - let mut out = String::new(); - - out.push_str("=== Entrypoints ===\n"); - for (name, type_id) in &result.entrypoint_types { - out.push_str(&format!("{} → {}\n", name, format_type_id(*type_id))); - } - - if !result.type_defs.is_empty() { - out.push_str("\n=== Types ===\n"); - for (idx, def) in result.type_defs.iter().enumerate() { - let type_id = idx as TypeId + 3; - let name = def.name.unwrap_or(""); - out.push_str(&format!("T{}: {:?} {}", type_id, def.kind, name)); - - if let Some(inner) = def.inner_type { - out.push_str(&format!(" → {}", format_type_id(inner))); - } - - if !def.members.is_empty() { - out.push_str(" {\n"); - for member in &def.members { - out.push_str(&format!( - " {}: {}\n", - member.name, - format_type_id(member.ty) - )); - } - out.push('}'); - } - out.push('\n'); - } - } - - if !result.errors.is_empty() { - out.push_str("\n=== Errors ===\n"); - for err in &result.errors { - out.push_str(&format!( - "field `{}` in `{}`: incompatible types [{}]\n", - err.field, - err.definition, - err.types_found - .iter() - .map(|t| t.to_string()) - .collect::>() - .join(", ") - )); - } - } - - out -} - -fn format_type_id(id: TypeId) -> String { - if id == TYPE_VOID { - "Void".to_string() - } else if id == TYPE_NODE { - "Node".to_string() - } else if id == TYPE_STR { - "String".to_string() - } else { - format!("T{}", id) - } -} diff --git a/crates/plotnik-lib/src/graph/typing_tests.rs b/crates/plotnik-lib/src/graph/typing_tests.rs index 090cd283..883cbd4b 100644 --- a/crates/plotnik-lib/src/graph/typing_tests.rs +++ b/crates/plotnik-lib/src/graph/typing_tests.rs @@ -5,8 +5,6 @@ use crate::parser::Parser; use crate::parser::lexer::lex; use std::collections::HashSet; -use super::dump_types; - fn infer(source: &str) -> String { let tokens = lex(source); let parser = Parser::new(source, tokens); @@ -15,7 +13,7 @@ fn infer(source: &str) -> String { let dead_nodes = HashSet::new(); let inference = infer_types(&graph, &dead_nodes); - dump_types(&inference) + inference.dump() } #[test] From 60812219d24e474b9fa5bb149f60ad16818f6a03 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 13:54:05 -0300 Subject: [PATCH 06/23] Add type inference error tracking with spans --- crates/plotnik-lib/src/diagnostics/message.rs | 7 ++ crates/plotnik-lib/src/graph/analysis.rs | 2 +- .../plotnik-lib/src/graph/analysis_tests.rs | 12 ++- crates/plotnik-lib/src/graph/build.rs | 3 +- crates/plotnik-lib/src/graph/build_tests.rs | 7 +- crates/plotnik-lib/src/graph/construct.rs | 9 +- crates/plotnik-lib/src/graph/dump.rs | 17 ++-- .../plotnik-lib/src/graph/optimize_tests.rs | 7 +- crates/plotnik-lib/src/graph/typing.rs | 70 +++++++++++++-- crates/plotnik-lib/src/graph/typing_tests.rs | 90 +++++++++++-------- 10 files changed, 165 insertions(+), 59 deletions(-) diff --git a/crates/plotnik-lib/src/diagnostics/message.rs b/crates/plotnik-lib/src/diagnostics/message.rs index 027c5468..0106c6c3 100644 --- a/crates/plotnik-lib/src/diagnostics/message.rs +++ b/crates/plotnik-lib/src/diagnostics/message.rs @@ -61,6 +61,9 @@ pub enum DiagnosticKind { DirectRecursion, FieldSequenceValue, + // Type inference errors + IncompatibleTypes, + // Link pass - grammar validation UnknownNodeType, UnknownField, @@ -166,6 +169,9 @@ impl DiagnosticKind { Self::DirectRecursion => "infinite recursion: cycle consumes no input", Self::FieldSequenceValue => "field must match exactly one node", + // Type inference + Self::IncompatibleTypes => "incompatible types in alternation branches", + // Link pass - grammar validation Self::UnknownNodeType => "unknown node type", Self::UnknownField => "unknown field", @@ -192,6 +198,7 @@ impl DiagnosticKind { // Semantic errors with name context Self::DuplicateDefinition => "`{}` is already defined".to_string(), Self::UndefinedReference => "`{}` is not defined".to_string(), + Self::IncompatibleTypes => "incompatible types: {}".to_string(), // Link pass errors with context Self::UnknownNodeType => "`{}` is not a valid node type".to_string(), diff --git a/crates/plotnik-lib/src/graph/analysis.rs b/crates/plotnik-lib/src/graph/analysis.rs index d7bde1f8..0484ba17 100644 --- a/crates/plotnik-lib/src/graph/analysis.rs +++ b/crates/plotnik-lib/src/graph/analysis.rs @@ -162,7 +162,7 @@ pub fn analyze<'src>( // Intern strings from effects for effect in &node.effects { match effect { - BuildEffect::Field(name) => { + BuildEffect::Field { name, .. } => { strings.intern(name); } BuildEffect::StartVariant(tag) => { diff --git a/crates/plotnik-lib/src/graph/analysis_tests.rs b/crates/plotnik-lib/src/graph/analysis_tests.rs index 4370048c..c6143444 100644 --- a/crates/plotnik-lib/src/graph/analysis_tests.rs +++ b/crates/plotnik-lib/src/graph/analysis_tests.rs @@ -2,6 +2,8 @@ use std::collections::HashSet; +use rowan::TextRange; + use super::*; use crate::graph::{BuildEffect, BuildGraph, BuildMatcher, RefMarker}; @@ -94,7 +96,10 @@ fn analyze_counts_effects() { let mut g = BuildGraph::new(); let id = g.add_matcher(BuildMatcher::node("identifier")); g.node_mut(id).add_effect(BuildEffect::CaptureNode); - g.node_mut(id).add_effect(BuildEffect::Field("name")); + g.node_mut(id).add_effect(BuildEffect::Field { + name: "name", + span: TextRange::default(), + }); g.node_mut(id).add_effect(BuildEffect::ToString); let dead = HashSet::new(); @@ -190,7 +195,10 @@ fn analyze_deduplicates_across_sources() { // "name" appears as: node kind, field constraint, effect field, definition name let n0 = g.add_matcher(BuildMatcher::node("name").with_field("name")); - g.node_mut(n0).add_effect(BuildEffect::Field("name")); + g.node_mut(n0).add_effect(BuildEffect::Field { + name: "name", + span: TextRange::default(), + }); g.add_definition("name", n0); let dead = HashSet::new(); diff --git a/crates/plotnik-lib/src/graph/build.rs b/crates/plotnik-lib/src/graph/build.rs index d656f7d2..465addad 100644 --- a/crates/plotnik-lib/src/graph/build.rs +++ b/crates/plotnik-lib/src/graph/build.rs @@ -5,6 +5,7 @@ use crate::ir::Nav; use indexmap::IndexMap; +use rowan::TextRange; /// Index into `BuildGraph::nodes`. pub type NodeId = u32; @@ -533,7 +534,7 @@ pub enum BuildEffect<'src> { EndObject, /// Move current value into top object at field. - Field(&'src str), + Field { name: &'src str, span: TextRange }, /// Push variant container with tag onto stack. StartVariant(&'src str), diff --git a/crates/plotnik-lib/src/graph/build_tests.rs b/crates/plotnik-lib/src/graph/build_tests.rs index 69cde419..26aae297 100644 --- a/crates/plotnik-lib/src/graph/build_tests.rs +++ b/crates/plotnik-lib/src/graph/build_tests.rs @@ -1,5 +1,7 @@ //! Tests for BuildGraph construction and fragment combinators. +use rowan::TextRange; + use super::*; #[test] @@ -284,7 +286,10 @@ fn node_with_effects() { let mut g = BuildGraph::new(); let id = g.add_matcher(BuildMatcher::node("identifier")); g.node_mut(id).add_effect(BuildEffect::CaptureNode); - g.node_mut(id).add_effect(BuildEffect::Field("name")); + g.node_mut(id).add_effect(BuildEffect::Field { + name: "name", + span: TextRange::default(), + }); insta::assert_snapshot!(g.dump(), @r" N0: (identifier) [Capture] [Field(name)] → ∅ diff --git a/crates/plotnik-lib/src/graph/construct.rs b/crates/plotnik-lib/src/graph/construct.rs index cf9454e5..7d71b34e 100644 --- a/crates/plotnik-lib/src/graph/construct.rs +++ b/crates/plotnik-lib/src/graph/construct.rs @@ -433,7 +433,8 @@ impl<'src> GraphConstructor<'src> { let inner_frag = self.construct_expr(&inner_expr, ctx); - let capture_name = cap.name().map(|t| token_src(&t, self.source)); + let capture_token = cap.name(); + let capture_name = capture_token.as_ref().map(|t| token_src(t, self.source)); let has_to_string = cap .type_annotation() @@ -457,10 +458,14 @@ impl<'src> GraphConstructor<'src> { // Add Field effect at exit if let Some(name) = capture_name { + let span = capture_token + .as_ref() + .map(|t| t.text_range()) + .unwrap_or_default(); let field_id = self.graph.add_epsilon(); self.graph .node_mut(field_id) - .add_effect(BuildEffect::Field(name)); + .add_effect(BuildEffect::Field { name, span }); self.graph.connect(inner_frag.exit, field_id); Fragment::new(inner_frag.entry, field_id) } else { diff --git a/crates/plotnik-lib/src/graph/dump.rs b/crates/plotnik-lib/src/graph/dump.rs index 23b385e7..393cecfd 100644 --- a/crates/plotnik-lib/src/graph/dump.rs +++ b/crates/plotnik-lib/src/graph/dump.rs @@ -180,7 +180,7 @@ fn format_effect(effect: &BuildEffect) -> String { BuildEffect::EndArray => "EndArray".to_string(), BuildEffect::StartObject => "StartObj".to_string(), BuildEffect::EndObject => "EndObj".to_string(), - BuildEffect::Field(f) => format!("Field({})", f), + BuildEffect::Field { name, .. } => format!("Field({})", name), BuildEffect::StartVariant(v) => format!("Variant({})", v), BuildEffect::EndVariant => "EndVariant".to_string(), BuildEffect::ToString => "ToString".to_string(), @@ -274,6 +274,16 @@ impl TypeInferenceResult<'_> { out } + + /// Render diagnostics for display (used in tests and CLI). + pub fn dump_diagnostics(&self, source: &str) -> String { + self.diagnostics.render_filtered(source) + } + + /// Check if inference produced any errors. + pub fn has_errors(&self) -> bool { + self.diagnostics.has_errors() + } } fn format_type_id(id: TypeId) -> String { @@ -313,10 +323,5 @@ mod test_helpers { pub fn dump_types(&self) -> String { self.dump() } - - /// Check if inference has errors. - pub fn has_errors(&self) -> bool { - !self.errors.is_empty() - } } } diff --git a/crates/plotnik-lib/src/graph/optimize_tests.rs b/crates/plotnik-lib/src/graph/optimize_tests.rs index d7c3d59d..c5ce7e98 100644 --- a/crates/plotnik-lib/src/graph/optimize_tests.rs +++ b/crates/plotnik-lib/src/graph/optimize_tests.rs @@ -1,5 +1,7 @@ //! Tests for epsilon elimination optimization pass. +use rowan::TextRange; + use super::*; use crate::graph::{BuildEffect, BuildMatcher, RefMarker}; @@ -272,7 +274,10 @@ fn does_not_merge_effects_into_ref_marker() { g.connect(exit, target); let field_eps = g.add_epsilon(); - g.node_mut(field_eps).add_effect(BuildEffect::Field("name")); + g.node_mut(field_eps).add_effect(BuildEffect::Field { + name: "name", + span: TextRange::default(), + }); g.connect(field_eps, exit); insta::assert_snapshot!(g.dump(), @r" diff --git a/crates/plotnik-lib/src/graph/typing.rs b/crates/plotnik-lib/src/graph/typing.rs index 26073878..8aff4b2f 100644 --- a/crates/plotnik-lib/src/graph/typing.rs +++ b/crates/plotnik-lib/src/graph/typing.rs @@ -20,8 +20,10 @@ //! - All incompatibilities are reported, not just the first use super::{BuildEffect, BuildGraph, NodeId}; +use crate::diagnostics::{DiagnosticKind, Diagnostics}; use crate::ir::{TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId, TypeKind}; use indexmap::IndexMap; +use rowan::TextRange; use std::collections::HashSet; /// Result of type inference on a BuildGraph. @@ -31,7 +33,10 @@ pub struct TypeInferenceResult<'src> { pub type_defs: Vec>, /// Mapping from definition name to its result TypeId. pub entrypoint_types: IndexMap<&'src str, TypeId>, + /// Type inference diagnostics. + pub diagnostics: Diagnostics, /// Type unification errors (incompatible types in alternation branches). + /// Kept for backward compatibility; diagnostics is the primary error channel. pub errors: Vec>, } @@ -44,6 +49,8 @@ pub struct UnificationError<'src> { pub definition: &'src str, /// Types found across branches (for error message). pub types_found: Vec, + /// Spans of the conflicting captures. + pub spans: Vec, } /// Human-readable type description for error messages. @@ -154,6 +161,8 @@ struct FieldInfo<'src> { branch_count: usize, /// All shapes seen at this field (for error reporting). all_shapes: Vec>, + /// Spans where this field was captured (for error reporting). + spans: Vec, } /// Collected scope information from traversal. @@ -167,14 +176,21 @@ struct ScopeInfo<'src> { } impl<'src> ScopeInfo<'src> { - fn add_field(&mut self, name: &'src str, base_type: TypeId, cardinality: Cardinality) { + fn add_field( + &mut self, + name: &'src str, + base_type: TypeId, + cardinality: Cardinality, + span: TextRange, + ) { let shape = TypeShape::Primitive(base_type); if let Some(existing) = self.fields.get_mut(name) { existing.cardinality = existing.cardinality.join(cardinality); existing.branch_count += 1; if !existing.all_shapes.contains(&shape) { - existing.all_shapes.push(shape); + existing.all_shapes.push(shape.clone()); } + existing.spans.push(span); } else { self.fields.insert( name, @@ -184,6 +200,7 @@ impl<'src> ScopeInfo<'src> { cardinality, branch_count: 1, all_shapes: vec![shape], + spans: vec![span], }, ); } @@ -198,7 +215,10 @@ impl<'src> ScopeInfo<'src> { for (name, info) in other.fields { if let Some(existing) = self.fields.get_mut(name) { // Check type compatibility (1-level merge) - if let Some(err) = check_compatibility(&existing.shape, &info.shape, name) { + if let Some(mut err) = check_compatibility(&existing.shape, &info.shape, name) { + // Attach spans from both sides + err.spans = existing.spans.clone(); + err.spans.extend(info.spans.iter().cloned()); errors.push(err); // Collect all shapes for error reporting for shape in &info.all_shapes { @@ -207,6 +227,8 @@ impl<'src> ScopeInfo<'src> { } } } + // Always merge spans + existing.spans.extend(info.spans); existing.cardinality = existing.cardinality.join(info.cardinality); existing.branch_count += info.branch_count; } else { @@ -244,6 +266,7 @@ impl<'src> ScopeInfo<'src> { struct MergeError<'src> { field: &'src str, shapes: Vec>, + spans: Vec, } /// Check if two type shapes are compatible under 1-level merge semantics. @@ -260,6 +283,7 @@ fn check_compatibility<'src>( (TypeShape::Primitive(_), TypeShape::Primitive(_)) => Some(MergeError { field, shapes: vec![a.clone(), b.clone()], + spans: vec![], // Filled in by caller }), // Struct vs Primitive is incompatible @@ -267,6 +291,7 @@ fn check_compatibility<'src>( | (TypeShape::Primitive(_), TypeShape::Struct(_)) => Some(MergeError { field, shapes: vec![a.clone(), b.clone()], + spans: vec![], // Filled in by caller }), // Structs: must have identical field sets (1-level, no deep merge) @@ -277,6 +302,7 @@ fn check_compatibility<'src>( Some(MergeError { field, shapes: vec![a.clone(), b.clone()], + spans: vec![], // Filled in by caller }) } } @@ -313,6 +339,7 @@ struct InferenceContext<'src, 'g> { dead_nodes: &'g HashSet, type_defs: Vec>, next_type_id: TypeId, + diagnostics: Diagnostics, errors: Vec>, } @@ -323,6 +350,7 @@ impl<'src, 'g> InferenceContext<'src, 'g> { dead_nodes, type_defs: Vec::new(), next_type_id: 3, // TYPE_COMPOSITE_START + diagnostics: Diagnostics::new(), errors: Vec::new(), } } @@ -344,12 +372,39 @@ impl<'src, 'g> InferenceContext<'src, 'g> { &mut merge_errors, ); - // Convert merge errors to unification errors + // Convert merge errors to unification errors and diagnostics for err in merge_errors { + let types_str = err + .shapes + .iter() + .map(|s| s.to_description().to_string()) + .collect::>() + .join(" vs "); + + // Use first span as primary, others as related + let primary_span = err.spans.first().copied().unwrap_or_default(); + let mut builder = self + .diagnostics + .report(DiagnosticKind::IncompatibleTypes, primary_span) + .message(types_str); + + // Add related spans + for span in err.spans.iter().skip(1) { + builder = builder.related_to("also captured here", *span); + } + builder + .hint(format!( + "capture `{}` has incompatible types across branches", + err.field + )) + .emit(); + + // Keep legacy error for backward compat self.errors.push(UnificationError { field: err.field, definition: def_name, types_found: err.shapes.iter().map(|s| s.to_description()).collect(), + spans: err.spans, }); } @@ -391,14 +446,14 @@ impl<'src, 'g> InferenceContext<'src, 'g> { BuildEffect::ToString => { state.pending_type = Some(TYPE_STR); } - BuildEffect::Field(name) => { + BuildEffect::Field { name, span } => { if let Some(base_type) = state.pending_type.take() { if let Some(tag) = state.current_variant { // Inside a variant - add to variant scope let variant_scope = scope.variants.entry(tag).or_default(); - variant_scope.add_field(*name, base_type, state.cardinality); + variant_scope.add_field(*name, base_type, state.cardinality, *span); } else { - scope.add_field(*name, base_type, state.cardinality); + scope.add_field(*name, base_type, state.cardinality, *span); } } state.cardinality = Cardinality::One; @@ -572,6 +627,7 @@ pub fn infer_types<'src>( TypeInferenceResult { type_defs: ctx.type_defs, entrypoint_types, + diagnostics: ctx.diagnostics, errors: ctx.errors, } } diff --git a/crates/plotnik-lib/src/graph/typing_tests.rs b/crates/plotnik-lib/src/graph/typing_tests.rs index 883cbd4b..a0d51fed 100644 --- a/crates/plotnik-lib/src/graph/typing_tests.rs +++ b/crates/plotnik-lib/src/graph/typing_tests.rs @@ -1,6 +1,6 @@ //! Tests for type inference. -use crate::graph::{construct_graph, infer_types}; +use crate::graph::{TypeInferenceResult, construct_graph, infer_types}; use crate::parser::Parser; use crate::parser::lexer::lex; use std::collections::HashSet; @@ -16,6 +16,21 @@ fn infer(source: &str) -> String { inference.dump() } +fn infer_full(source: &str) -> TypeInferenceResult<'_> { + let tokens = lex(source); + let parser = Parser::new(source, tokens); + let result = parser.parse().expect("parse should succeed"); + let graph = construct_graph(source, &result.root); + let dead_nodes = HashSet::new(); + + infer_types(&graph, &dead_nodes) +} + +fn infer_diagnostics(source: &str) -> String { + let inference = infer_full(source); + inference.dump_diagnostics(source) +} + #[test] fn simple_capture() { let result = infer("Foo = (identifier) @name"); @@ -231,22 +246,16 @@ fn graph_structure_captured_plus() { let result = parser.parse().expect("parse should succeed"); let graph = construct_graph(source, &result.root); - let mut out = String::new(); - for (id, node) in graph.iter() { - out.push_str(&format!("N{}: ", id)); - for effect in &node.effects { - out.push_str(&format!("{:?} ", effect)); - } - out.push_str(&format!("→ {:?}\n", node.successors)); - } - insta::assert_snapshot!(out, @r#" - N0: CaptureNode → [2] - N1: StartArray → [0] - N2: PushElement → [3] - N3: → [0, 4] - N4: EndArray → [5] - N5: Field("names") → [] - "#); + insta::assert_snapshot!(graph.dump(), @r" + Foo = N1 + + N0: (identifier) [Capture] → N2 + N1: ε [StartArray] → N0 + N2: ε [Push] → N3 + N3: ε → N0, N4 + N4: ε [EndArray] → N5 + N5: ε [Field(names)] → ∅ + "); } /// Documents the graph structure for a tagged alternation. @@ -263,26 +272,20 @@ fn graph_structure_tagged_alternation() { let result = parser.parse().expect("parse should succeed"); let graph = construct_graph(source, &result.root); - let mut out = String::new(); - for (id, node) in graph.iter() { - out.push_str(&format!("N{}: ", id)); - for effect in &node.effects { - out.push_str(&format!("{:?} ", effect)); - } - out.push_str(&format!("→ {:?}\n", node.successors)); - } - insta::assert_snapshot!(out, @r#" - N0: → [2, 6] - N1: → [] - N2: StartVariant("Ok") → [3] - N3: CaptureNode → [4] - N4: Field("val") → [5] - N5: EndVariant → [1] - N6: StartVariant("Err") → [7] - N7: CaptureNode → [8] - N8: Field("err") → [9] - N9: EndVariant → [1] - "#); + insta::assert_snapshot!(graph.dump(), @r" + Foo = N0 + + N0: ε → N2, N6 + N1: ε → ∅ + N2: ε [Variant(Ok)] → N3 + N3: (value) [Capture] → N4 + N4: ε [Field(val)] → N5 + N5: ε [EndVariant] → N1 + N6: ε [Variant(Err)] → N7 + N7: (error) [Capture] → N8 + N8: ε [Field(err)] → N9 + N9: ε [EndVariant] → N1 + "); } // ============================================================================= @@ -292,7 +295,8 @@ fn graph_structure_tagged_alternation() { #[test] fn merge_incompatible_primitives_node_vs_string() { // Same field with Node in one branch, String in another - let result = infer("Foo = [ (a) @val (b) @val ::string ]"); + let source = "Foo = [ (a) @val (b) @val ::string ]"; + let result = infer(source); insta::assert_snapshot!(result, @r" === Entrypoints === Foo → T3 @@ -305,6 +309,16 @@ fn merge_incompatible_primitives_node_vs_string() { === Errors === field `val` in `Foo`: incompatible types [Node, String] "); + + // Verify diagnostic output with proper spans + insta::assert_snapshot!(infer_diagnostics(source), @r" + error: incompatible types: Node vs String + | + 1 | Foo = [ (a) @val (b) @val ::string ] + | ^^^ --- also captured here + | + help: capture `val` has incompatible types across branches + "); } #[test] From bf1ca42a264b8eab9875a2bf1d2d5d00d8e389c0 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 14:35:31 -0300 Subject: [PATCH 07/23] Move to query --- crates/plotnik-lib/src/diagnostics/message.rs | 11 +- crates/plotnik-lib/src/graph/analysis.rs | 204 ------ .../plotnik-lib/src/graph/analysis_tests.rs | 259 -------- crates/plotnik-lib/src/graph/build_tests.rs | 363 ----------- .../plotnik-lib/src/graph/construct_tests.rs | 579 ------------------ crates/plotnik-lib/src/graph/mod.rs | 42 -- .../plotnik-lib/src/graph/optimize_tests.rs | 449 -------------- crates/plotnik-lib/src/graph/typing_tests.rs | 445 -------------- .../{graph/build.rs => query/build_graph.rs} | 119 +--- .../src/{graph => query}/construct.rs | 117 ++-- .../plotnik-lib/src/query/construct_tests.rs | 267 ++++++++ .../{graph/dump.rs => query/graph_dump.rs} | 144 +---- crates/plotnik-lib/src/query/mod.rs | 70 ++- .../src/{graph => query}/optimize.rs | 65 +- .../src/{graph => query}/typing.rs | 230 ++++--- 15 files changed, 524 insertions(+), 2840 deletions(-) delete mode 100644 crates/plotnik-lib/src/graph/analysis.rs delete mode 100644 crates/plotnik-lib/src/graph/analysis_tests.rs delete mode 100644 crates/plotnik-lib/src/graph/build_tests.rs delete mode 100644 crates/plotnik-lib/src/graph/construct_tests.rs delete mode 100644 crates/plotnik-lib/src/graph/mod.rs delete mode 100644 crates/plotnik-lib/src/graph/optimize_tests.rs delete mode 100644 crates/plotnik-lib/src/graph/typing_tests.rs rename crates/plotnik-lib/src/{graph/build.rs => query/build_graph.rs} (77%) rename crates/plotnik-lib/src/{graph => query}/construct.rs (81%) create mode 100644 crates/plotnik-lib/src/query/construct_tests.rs rename crates/plotnik-lib/src/{graph/dump.rs => query/graph_dump.rs} (50%) rename crates/plotnik-lib/src/{graph => query}/optimize.rs (67%) rename crates/plotnik-lib/src/{graph => query}/typing.rs (73%) diff --git a/crates/plotnik-lib/src/diagnostics/message.rs b/crates/plotnik-lib/src/diagnostics/message.rs index 0106c6c3..f29853fa 100644 --- a/crates/plotnik-lib/src/diagnostics/message.rs +++ b/crates/plotnik-lib/src/diagnostics/message.rs @@ -63,6 +63,8 @@ pub enum DiagnosticKind { // Type inference errors IncompatibleTypes, + MultiCaptureQuantifierNoName, + UnusedBranchLabels, // Link pass - grammar validation UnknownNodeType, @@ -78,7 +80,10 @@ pub enum DiagnosticKind { impl DiagnosticKind { /// Default severity for this kind. Can be overridden by policy. pub fn default_severity(&self) -> Severity { - Severity::Error + match self { + Self::UnusedBranchLabels => Severity::Warning, + _ => Severity::Error, + } } /// Whether this kind suppresses `other` when spans overlap. @@ -171,6 +176,10 @@ impl DiagnosticKind { // Type inference Self::IncompatibleTypes => "incompatible types in alternation branches", + Self::MultiCaptureQuantifierNoName => { + "quantified expression with multiple captures requires `@name`" + } + Self::UnusedBranchLabels => "branch labels have no effect without capture", // Link pass - grammar validation Self::UnknownNodeType => "unknown node type", diff --git a/crates/plotnik-lib/src/graph/analysis.rs b/crates/plotnik-lib/src/graph/analysis.rs deleted file mode 100644 index 0484ba17..00000000 --- a/crates/plotnik-lib/src/graph/analysis.rs +++ /dev/null @@ -1,204 +0,0 @@ -//! Analysis pass for emission preparation. -//! -//! This module prepares a `BuildGraph` for emission to the binary format -//! by computing counts, interning strings, and mapping node IDs. -//! -//! # Three-Phase Construction (ADR-0004) -//! -//! 1. **Analysis** (this module): Count elements, intern strings -//! 2. **Layout**: Compute aligned offsets, allocate once -//! 3. **Emission**: Write to buffer -//! -//! # String Interning -//! -//! All strings (field names, variant tags, node kinds, definition names) -//! are deduplicated. Identical strings share storage and `StringId`. - -use super::{BuildEffect, BuildGraph, BuildMatcher, NodeId}; -use crate::ir::StringId; -use indexmap::IndexMap; -use std::collections::HashSet; - -/// Result of analyzing a BuildGraph for emission. -#[derive(Debug)] -pub struct AnalysisResult<'src> { - /// String interner with all unique strings. - pub strings: StringInterner<'src>, - - /// Mapping from BuildGraph NodeId to emission index. - /// Dead nodes map to `None`. - pub node_map: Vec>, - - /// Number of live transitions to emit. - pub transition_count: u32, - - /// Total successor slots needed in the spill segment. - /// (Only for nodes with >8 successors) - pub spilled_successor_count: u32, - - /// Total effects across all nodes. - pub effect_count: u32, - - /// Total negated fields across all matchers. - pub negated_field_count: u32, - - /// Number of definition entrypoints. - pub entrypoint_count: u32, -} - -/// String interner for deduplication. -/// -/// Strings are stored in insertion order. `StringId` is the index. -#[derive(Debug, Default)] -pub struct StringInterner<'src> { - /// Map from string content to its ID. - index: IndexMap<&'src str, StringId>, -} - -impl<'src> StringInterner<'src> { - pub fn new() -> Self { - Self { - index: IndexMap::new(), - } - } - - /// Intern a string, returning its ID. - /// Returns existing ID if already interned. - pub fn intern(&mut self, s: &'src str) -> StringId { - let next_id = self.index.len() as StringId; - *self.index.entry(s).or_insert(next_id) - } - - /// Get the ID of an already-interned string. - pub fn get(&self, s: &str) -> Option { - self.index.get(s).copied() - } - - /// Iterate over all strings in insertion order. - pub fn iter(&self) -> impl Iterator + '_ { - self.index.iter().map(|(s, id)| (*s, *id)) - } - - /// Number of interned strings. - pub fn len(&self) -> usize { - self.index.len() - } - - /// Returns true if no strings have been interned. - pub fn is_empty(&self) -> bool { - self.index.is_empty() - } - - /// Total byte length of all strings. - pub fn total_bytes(&self) -> usize { - self.index.keys().map(|s| s.len()).sum() - } -} - -/// Analyze a BuildGraph for emission. -/// -/// The `dead_nodes` set contains nodes eliminated by optimization passes. -/// These are skipped during analysis and won't appear in the output. -pub fn analyze<'src>( - graph: &BuildGraph<'src>, - dead_nodes: &HashSet, -) -> AnalysisResult<'src> { - let mut strings = StringInterner::new(); - let mut node_map: Vec> = vec![None; graph.len()]; - - let mut transition_count: u32 = 0; - let mut spilled_successor_count: u32 = 0; - let mut effect_count: u32 = 0; - let mut negated_field_count: u32 = 0; - - // First pass: map live nodes to emission indices and count elements - for (id, node) in graph.iter() { - if dead_nodes.contains(&id) { - continue; - } - - node_map[id as usize] = Some(transition_count); - transition_count += 1; - - // Count successors that spill (>8) - let live_successors = count_live_successors(node, dead_nodes); - if live_successors > 8 { - spilled_successor_count += live_successors as u32; - } - - // Count effects - effect_count += node.effects.len() as u32; - - // Intern strings and count negated fields from matcher - match &node.matcher { - BuildMatcher::Node { - kind, - field, - negated_fields, - } => { - strings.intern(kind); - if let Some(f) = field { - strings.intern(f); - } - for nf in negated_fields { - strings.intern(nf); - } - negated_field_count += negated_fields.len() as u32; - } - BuildMatcher::Anonymous { literal, field } => { - strings.intern(literal); - if let Some(f) = field { - strings.intern(f); - } - } - BuildMatcher::Wildcard { field } => { - if let Some(f) = field { - strings.intern(f); - } - } - BuildMatcher::Epsilon => {} - } - - // Intern strings from effects - for effect in &node.effects { - match effect { - BuildEffect::Field { name, .. } => { - strings.intern(name); - } - BuildEffect::StartVariant(tag) => { - strings.intern(tag); - } - _ => {} - } - } - - // Intern ref name if present - if let Some(name) = node.ref_name { - strings.intern(name); - } - } - - // Intern definition names - let entrypoint_count = graph.definitions().count() as u32; - for (name, _) in graph.definitions() { - strings.intern(name); - } - - AnalysisResult { - strings, - node_map, - transition_count, - spilled_successor_count, - effect_count, - negated_field_count, - entrypoint_count, - } -} - -/// Count live successors (excluding dead nodes). -fn count_live_successors(node: &super::BuildNode, dead_nodes: &HashSet) -> usize { - node.successors - .iter() - .filter(|s| !dead_nodes.contains(s)) - .count() -} diff --git a/crates/plotnik-lib/src/graph/analysis_tests.rs b/crates/plotnik-lib/src/graph/analysis_tests.rs deleted file mode 100644 index c6143444..00000000 --- a/crates/plotnik-lib/src/graph/analysis_tests.rs +++ /dev/null @@ -1,259 +0,0 @@ -//! Tests for analysis module. - -use std::collections::HashSet; - -use rowan::TextRange; - -use super::*; -use crate::graph::{BuildEffect, BuildGraph, BuildMatcher, RefMarker}; - -#[test] -fn string_interner_deduplicates() { - let mut interner = StringInterner::new(); - - let id1 = interner.intern("name"); - let id2 = interner.intern("value"); - let id3 = interner.intern("name"); // duplicate - - assert_eq!(id1, id3); - assert_ne!(id1, id2); - assert_eq!(interner.len(), 2); -} - -#[test] -fn string_interner_preserves_order() { - let mut interner = StringInterner::new(); - - interner.intern("alpha"); - interner.intern("beta"); - interner.intern("gamma"); - - let strings: Vec<_> = interner.iter().collect(); - - assert_eq!(strings, vec![("alpha", 0), ("beta", 1), ("gamma", 2)]); -} - -#[test] -fn string_interner_total_bytes() { - let mut interner = StringInterner::new(); - - interner.intern("foo"); - interner.intern("bar"); - interner.intern("foo"); // duplicate, not counted twice - - assert_eq!(interner.total_bytes(), 6); // "foo" + "bar" -} - -#[test] -fn analyze_empty_graph() { - let g = BuildGraph::new(); - let dead = HashSet::new(); - - let result = analyze(&g, &dead); - - assert_eq!(result.transition_count, 0); - assert_eq!(result.effect_count, 0); - assert_eq!(result.entrypoint_count, 0); - assert!(result.strings.is_empty()); -} - -#[test] -fn analyze_single_matcher() { - let mut g = BuildGraph::new(); - g.add_matcher(BuildMatcher::node("identifier")); - let dead = HashSet::new(); - - let result = analyze(&g, &dead); - - assert_eq!(result.transition_count, 1); - assert_eq!(result.node_map[0], Some(0)); - assert_eq!(result.strings.len(), 1); - assert_eq!(result.strings.get("identifier"), Some(0)); -} - -#[test] -fn analyze_skips_dead_nodes() { - let mut g = BuildGraph::new(); - let n0 = g.add_matcher(BuildMatcher::node("a")); - let n1 = g.add_epsilon(); // will be dead - let n2 = g.add_matcher(BuildMatcher::node("b")); - g.connect(n0, n1); - g.connect(n1, n2); - - let mut dead = HashSet::new(); - dead.insert(n1); - - let result = analyze(&g, &dead); - - assert_eq!(result.transition_count, 2); - assert_eq!(result.node_map[0], Some(0)); - assert_eq!(result.node_map[1], None); // dead - assert_eq!(result.node_map[2], Some(1)); -} - -#[test] -fn analyze_counts_effects() { - let mut g = BuildGraph::new(); - let id = g.add_matcher(BuildMatcher::node("identifier")); - g.node_mut(id).add_effect(BuildEffect::CaptureNode); - g.node_mut(id).add_effect(BuildEffect::Field { - name: "name", - span: TextRange::default(), - }); - g.node_mut(id).add_effect(BuildEffect::ToString); - - let dead = HashSet::new(); - let result = analyze(&g, &dead); - - assert_eq!(result.effect_count, 3); - // "identifier" and "name" interned - assert_eq!(result.strings.len(), 2); -} - -#[test] -fn analyze_counts_negated_fields() { - let mut g = BuildGraph::new(); - g.add_matcher( - BuildMatcher::node("call") - .with_negated_field("arguments") - .with_negated_field("type_arguments"), - ); - - let dead = HashSet::new(); - let result = analyze(&g, &dead); - - assert_eq!(result.negated_field_count, 2); - // "call", "arguments", "type_arguments" interned - assert_eq!(result.strings.len(), 3); -} - -#[test] -fn analyze_interns_field_constraints() { - let mut g = BuildGraph::new(); - g.add_matcher(BuildMatcher::node("function").with_field("name")); - - let dead = HashSet::new(); - let result = analyze(&g, &dead); - - assert_eq!(result.strings.len(), 2); - assert!(result.strings.get("function").is_some()); - assert!(result.strings.get("name").is_some()); -} - -#[test] -fn analyze_interns_anonymous_literals() { - let mut g = BuildGraph::new(); - g.add_matcher(BuildMatcher::anonymous("+")); - g.add_matcher(BuildMatcher::anonymous("-")); - g.add_matcher(BuildMatcher::anonymous("+")); // duplicate - - let dead = HashSet::new(); - let result = analyze(&g, &dead); - - assert_eq!(result.transition_count, 3); - assert_eq!(result.strings.len(), 2); // "+" and "-" -} - -#[test] -fn analyze_interns_variant_tags() { - let mut g = BuildGraph::new(); - let n0 = g.add_epsilon(); - g.node_mut(n0).add_effect(BuildEffect::StartVariant("True")); - - let n1 = g.add_epsilon(); - g.node_mut(n1) - .add_effect(BuildEffect::StartVariant("False")); - - let dead = HashSet::new(); - let result = analyze(&g, &dead); - - assert_eq!(result.strings.len(), 2); - assert!(result.strings.get("True").is_some()); - assert!(result.strings.get("False").is_some()); -} - -#[test] -fn analyze_counts_entrypoints() { - let mut g = BuildGraph::new(); - let f1 = g.matcher_fragment(BuildMatcher::node("identifier")); - g.add_definition("Ident", f1.entry); - - let f2 = g.matcher_fragment(BuildMatcher::node("number")); - g.add_definition("Num", f2.entry); - - let dead = HashSet::new(); - let result = analyze(&g, &dead); - - assert_eq!(result.entrypoint_count, 2); - // "identifier", "number", "Ident", "Num" interned - assert_eq!(result.strings.len(), 4); -} - -#[test] -fn analyze_deduplicates_across_sources() { - let mut g = BuildGraph::new(); - - // "name" appears as: node kind, field constraint, effect field, definition name - let n0 = g.add_matcher(BuildMatcher::node("name").with_field("name")); - g.node_mut(n0).add_effect(BuildEffect::Field { - name: "name", - span: TextRange::default(), - }); - g.add_definition("name", n0); - - let dead = HashSet::new(); - let result = analyze(&g, &dead); - - // All "name" references should resolve to same StringId - assert_eq!(result.strings.len(), 1); - assert_eq!(result.strings.get("name"), Some(0)); -} - -#[test] -fn analyze_wildcard_with_field() { - let mut g = BuildGraph::new(); - g.add_matcher(BuildMatcher::wildcard().with_field("body")); - - let dead = HashSet::new(); - let result = analyze(&g, &dead); - - assert_eq!(result.strings.len(), 1); - assert!(result.strings.get("body").is_some()); -} - -#[test] -fn analyze_ref_names() { - let mut g = BuildGraph::new(); - let enter = g.add_epsilon(); - g.node_mut(enter).set_ref_marker(RefMarker::enter(0)); - g.node_mut(enter).ref_name = Some("Function"); - - let dead = HashSet::new(); - let result = analyze(&g, &dead); - - assert_eq!(result.strings.len(), 1); - assert!(result.strings.get("Function").is_some()); -} - -#[test] -fn node_map_indices_are_contiguous() { - let mut g = BuildGraph::new(); - g.add_matcher(BuildMatcher::node("a")); // 0 -> 0 - g.add_epsilon(); // 1 -> dead - g.add_matcher(BuildMatcher::node("b")); // 2 -> 1 - g.add_epsilon(); // 3 -> dead - g.add_matcher(BuildMatcher::node("c")); // 4 -> 2 - - let mut dead = HashSet::new(); - dead.insert(1); - dead.insert(3); - - let result = analyze(&g, &dead); - - assert_eq!(result.transition_count, 3); - assert_eq!(result.node_map[0], Some(0)); - assert_eq!(result.node_map[1], None); - assert_eq!(result.node_map[2], Some(1)); - assert_eq!(result.node_map[3], None); - assert_eq!(result.node_map[4], Some(2)); -} diff --git a/crates/plotnik-lib/src/graph/build_tests.rs b/crates/plotnik-lib/src/graph/build_tests.rs deleted file mode 100644 index 26aae297..00000000 --- a/crates/plotnik-lib/src/graph/build_tests.rs +++ /dev/null @@ -1,363 +0,0 @@ -//! Tests for BuildGraph construction and fragment combinators. - -use rowan::TextRange; - -use super::*; - -#[test] -fn single_matcher() { - let mut g = BuildGraph::new(); - - let frag = g.matcher_fragment(BuildMatcher::node("identifier")); - - assert_eq!(frag.entry, frag.exit); - insta::assert_snapshot!(g.dump(), @r" - N0: (identifier) → ∅ - "); -} - -#[test] -fn epsilon_fragment() { - let mut g = BuildGraph::new(); - - let frag = g.epsilon_fragment(); - - assert_eq!(frag.entry, frag.exit); - insta::assert_snapshot!(g.dump(), @r" - N0: ε → ∅ - "); -} - -#[test] -fn sequence_empty() { - let mut g = BuildGraph::new(); - - let frag = g.sequence(&[]); - - assert_eq!(frag.entry, frag.exit); - insta::assert_snapshot!(g.dump(), @r" - N0: ε → ∅ - "); -} - -#[test] -fn sequence_single() { - let mut g = BuildGraph::new(); - let f1 = g.matcher_fragment(BuildMatcher::node("identifier")); - - let frag = g.sequence(&[f1]); - - assert_eq!(frag, f1); - insta::assert_snapshot!(g.dump(), @r" - N0: (identifier) → ∅ - "); -} - -#[test] -fn sequence_two() { - let mut g = BuildGraph::new(); - let f1 = g.matcher_fragment(BuildMatcher::node("identifier")); - let f2 = g.matcher_fragment(BuildMatcher::node("number")); - - let frag = g.sequence(&[f1, f2]); - - assert_eq!(frag.entry, f1.entry); - assert_eq!(frag.exit, f2.exit); - insta::assert_snapshot!(g.dump(), @r" - N0: (identifier) → N1 - N1: (number) → ∅ - "); -} - -#[test] -fn sequence_three() { - let mut g = BuildGraph::new(); - let f1 = g.matcher_fragment(BuildMatcher::node("a")); - let f2 = g.matcher_fragment(BuildMatcher::node("b")); - let f3 = g.matcher_fragment(BuildMatcher::node("c")); - - let frag = g.sequence(&[f1, f2, f3]); - - assert_eq!(frag.entry, f1.entry); - assert_eq!(frag.exit, f3.exit); - insta::assert_snapshot!(g.dump(), @r" - N0: (a) → N1 - N1: (b) → N2 - N2: (c) → ∅ - "); -} - -#[test] -fn alternation_empty() { - let mut g = BuildGraph::new(); - - let frag = g.alternation(&[]); - - assert_eq!(frag.entry, frag.exit); - insta::assert_snapshot!(g.dump(), @r" - N0: ε → ∅ - "); -} - -#[test] -fn alternation_single() { - let mut g = BuildGraph::new(); - let f1 = g.matcher_fragment(BuildMatcher::node("identifier")); - - let frag = g.alternation(&[f1]); - - assert_eq!(frag, f1); - insta::assert_snapshot!(g.dump(), @r" - N0: (identifier) → ∅ - "); -} - -#[test] -fn alternation_two() { - let mut g = BuildGraph::new(); - let f1 = g.matcher_fragment(BuildMatcher::node("identifier")); - let f2 = g.matcher_fragment(BuildMatcher::node("number")); - - let frag = g.alternation(&[f1, f2]); - - // Entry connects to both branches, both branches connect to exit - insta::assert_snapshot!(g.dump(), @r" - N0: (identifier) → N3 - N1: (number) → N3 - N2: ε → N0, N1 - N3: ε → ∅ - "); - assert_eq!(frag.entry, 2); - assert_eq!(frag.exit, 3); -} - -#[test] -fn zero_or_more_greedy() { - let mut g = BuildGraph::new(); - let inner = g.matcher_fragment(BuildMatcher::node("item")); - - let frag = g.zero_or_more(inner); - - // Greedy: branch tries inner first, then exit - insta::assert_snapshot!(g.dump(), @r" - N0: (item) → N1 - N1: ε → N0, N2 - N2: ε → ∅ - "); - assert_eq!(frag.entry, 1); // branch node - assert_eq!(frag.exit, 2); -} - -#[test] -fn zero_or_more_lazy() { - let mut g = BuildGraph::new(); - let inner = g.matcher_fragment(BuildMatcher::node("item")); - - let frag = g.zero_or_more_lazy(inner); - - // Non-greedy: branch tries exit first, then inner - insta::assert_snapshot!(g.dump(), @r" - N0: (item) → N1 - N1: ε → N2, N0 - N2: ε → ∅ - "); - assert_eq!(frag.entry, 1); - assert_eq!(frag.exit, 2); -} - -#[test] -fn one_or_more_greedy() { - let mut g = BuildGraph::new(); - let inner = g.matcher_fragment(BuildMatcher::node("item")); - - let frag = g.one_or_more(inner); - - // Entry is inner, greedy branch after - insta::assert_snapshot!(g.dump(), @r" - N0: (item) → N1 - N1: ε → N0, N2 - N2: ε → ∅ - "); - assert_eq!(frag.entry, 0); // inner node - assert_eq!(frag.exit, 2); -} - -#[test] -fn one_or_more_lazy() { - let mut g = BuildGraph::new(); - let inner = g.matcher_fragment(BuildMatcher::node("item")); - - let frag = g.one_or_more_lazy(inner); - - // Entry is inner, non-greedy branch after - insta::assert_snapshot!(g.dump(), @r" - N0: (item) → N1 - N1: ε → N2, N0 - N2: ε → ∅ - "); - assert_eq!(frag.entry, 0); - assert_eq!(frag.exit, 2); -} - -#[test] -fn optional_greedy() { - let mut g = BuildGraph::new(); - let inner = g.matcher_fragment(BuildMatcher::node("item")); - - let frag = g.optional(inner); - - // Greedy: branch tries inner first - insta::assert_snapshot!(g.dump(), @r" - N0: (item) → N2 - N1: ε → N0, N2 - N2: ε → ∅ - "); - assert_eq!(frag.entry, 1); - assert_eq!(frag.exit, 2); -} - -#[test] -fn optional_lazy() { - let mut g = BuildGraph::new(); - let inner = g.matcher_fragment(BuildMatcher::node("item")); - - let frag = g.optional_lazy(inner); - - // Non-greedy: branch skips first - insta::assert_snapshot!(g.dump(), @r" - N0: (item) → N2 - N1: ε → N2, N0 - N2: ε → ∅ - "); - assert_eq!(frag.entry, 1); - assert_eq!(frag.exit, 2); -} - -#[test] -fn matcher_with_field() { - let mut g = BuildGraph::new(); - - g.matcher_fragment(BuildMatcher::node("identifier").with_field("name")); - - insta::assert_snapshot!(g.dump(), @r" - N0: (identifier) @name → ∅ - "); -} - -#[test] -fn matcher_with_negated_fields() { - let mut g = BuildGraph::new(); - - g.matcher_fragment( - BuildMatcher::node("call") - .with_negated_field("arguments") - .with_negated_field("type_arguments"), - ); - - insta::assert_snapshot!(g.dump(), @r" - N0: (call) !arguments !type_arguments → ∅ - "); -} - -#[test] -fn anonymous_matcher() { - let mut g = BuildGraph::new(); - - g.matcher_fragment(BuildMatcher::anonymous("+")); - - insta::assert_snapshot!(g.dump(), @r#" - N0: "+" → ∅ - "#); -} - -#[test] -fn wildcard_matcher() { - let mut g = BuildGraph::new(); - - g.matcher_fragment(BuildMatcher::wildcard()); - - insta::assert_snapshot!(g.dump(), @r" - N0: _ → ∅ - "); -} - -#[test] -fn node_with_effects() { - let mut g = BuildGraph::new(); - let id = g.add_matcher(BuildMatcher::node("identifier")); - g.node_mut(id).add_effect(BuildEffect::CaptureNode); - g.node_mut(id).add_effect(BuildEffect::Field { - name: "name", - span: TextRange::default(), - }); - - insta::assert_snapshot!(g.dump(), @r" - N0: (identifier) [Capture] [Field(name)] → ∅ - "); -} - -#[test] -fn node_with_ref_marker() { - let mut g = BuildGraph::new(); - let enter = g.add_epsilon(); - g.node_mut(enter).set_ref_marker(RefMarker::enter(0)); - - let exit = g.add_epsilon(); - g.node_mut(exit).set_ref_marker(RefMarker::exit(0)); - - g.connect(enter, exit); - - insta::assert_snapshot!(g.dump(), @r" - N0: ε +Enter(0, ?) → N1 - N1: ε +Exit(0) → ∅ - "); -} - -#[test] -fn definition_registration() { - let mut g = BuildGraph::new(); - let f1 = g.matcher_fragment(BuildMatcher::node("identifier")); - g.add_definition("Ident", f1.entry); - - let f2 = g.matcher_fragment(BuildMatcher::node("number")); - g.add_definition("Num", f2.entry); - - assert_eq!(g.definition("Ident"), Some(0)); - assert_eq!(g.definition("Num"), Some(1)); - assert_eq!(g.definition("Unknown"), None); - - insta::assert_snapshot!(g.dump(), @r" - Ident = N0 - Num = N1 - - N0: (identifier) → ∅ - N1: (number) → ∅ - "); -} - -#[test] -fn complex_nested_structure() { - let mut g = BuildGraph::new(); - - // Build: (func { (identifier)+ (block) }) - let ident = g.matcher_fragment(BuildMatcher::node("identifier")); - let idents = g.one_or_more(ident); - - let block = g.matcher_fragment(BuildMatcher::node("block")); - let body = g.sequence(&[idents, block]); - - let func = g.matcher_fragment(BuildMatcher::node("func")); - g.connect_exit(func, body.entry); - - g.add_definition("Func", func.entry); - - insta::assert_snapshot!(g.dump(), @r" - Func = N4 - - N0: (identifier) → N1 - N1: ε → N0, N2 - N2: ε → N3 - N3: (block) → ∅ - N4: (func) → N0 - "); -} diff --git a/crates/plotnik-lib/src/graph/construct_tests.rs b/crates/plotnik-lib/src/graph/construct_tests.rs deleted file mode 100644 index e136a964..00000000 --- a/crates/plotnik-lib/src/graph/construct_tests.rs +++ /dev/null @@ -1,579 +0,0 @@ -//! Tests for AST-to-graph construction. - -use crate::graph::BuildGraph; -use crate::parser::Parser; -use crate::parser::lexer::lex; - -use super::construct_graph; - -fn parse_and_construct(source: &str) -> BuildGraph<'_> { - let tokens = lex(source); - let parser = Parser::new(source, tokens); - let result = parser.parse().expect("parse should succeed"); - construct_graph(source, &result.root) -} - -// ───────────────────────────────────────────────────────────────────────────── -// Basic Expressions -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn simple_named_node() { - let g = parse_and_construct("Foo = (identifier)"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: (identifier) → ∅ - "); -} - -#[test] -fn anonymous_string() { - let g = parse_and_construct(r#"Op = "+""#); - - insta::assert_snapshot!(g.dump(), @r#" - Op = N0 - - N0: "+" → ∅ - "#); -} - -#[test] -fn wildcard() { - let g = parse_and_construct("Any = (_)"); - - insta::assert_snapshot!(g.dump(), @r" - Any = N0 - - N0: _ → ∅ - "); -} - -#[test] -fn wildcard_underscore_literal() { - let g = parse_and_construct("Any = _"); - - insta::assert_snapshot!(g.dump(), @r" - Any = N0 - - N0: _ → ∅ - "); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Nested Nodes -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn nested_node() { - let g = parse_and_construct("Foo = (call (identifier))"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: (call) → N1 - N1: [Down] (identifier) → N2 - N2: [Up(1)] ε → ∅ - "); -} - -#[test] -fn deeply_nested() { - let g = parse_and_construct("Foo = (a (b (c)))"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: (a) → N1 - N1: [Down] (b) → N2 - N2: [Down] (c) → N3 - N3: [Up(1)] ε → N4 - N4: [Up(1)] ε → ∅ - "); -} - -#[test] -fn sibling_nodes() { - let g = parse_and_construct("Foo = (call (identifier) (arguments))"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: (call) → N1 - N1: [Down] (identifier) → N2 - N2: [Next] (arguments) → N3 - N3: [Up(1)] ε → ∅ - "); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Anchors -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn anchor_first_child() { - // . before first child → DownSkipTrivia - let g = parse_and_construct("Foo = (block . (statement))"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: (block) → N1 - N1: [Down.] (statement) → N2 - N2: [Up(1)] ε → ∅ - "); -} - -#[test] -fn anchor_last_child() { - // . after last child → UpSkipTrivia - let g = parse_and_construct("Foo = (block (statement) .)"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: (block) → N1 - N1: [Down] (statement) → N2 - N2: [Up.(1)] ε → ∅ - "); -} - -#[test] -fn anchor_adjacent_siblings() { - // . between siblings → NextSkipTrivia - let g = parse_and_construct("Foo = (block (a) . (b))"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: (block) → N1 - N1: [Down] (a) → N2 - N2: [Next.] (b) → N3 - N3: [Up(1)] ε → ∅ - "); -} - -#[test] -fn anchor_both_ends() { - // . at start and end - let g = parse_and_construct("Foo = (array . (element) .)"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: (array) → N1 - N1: [Down.] (element) → N2 - N2: [Up.(1)] ε → ∅ - "); -} - -#[test] -fn anchor_string_literal_first() { - // . before string literal → DownExact - let g = parse_and_construct(r#"Foo = (pair . ":" (value))"#); - - insta::assert_snapshot!(g.dump(), @r#" - Foo = N0 - - N0: (pair) → N1 - N1: [Down!] ":" → N2 - N2: [Next] (value) → N3 - N3: [Up(1)] ε → ∅ - "#); -} - -#[test] -fn anchor_string_literal_adjacent() { - // . after string literal before node → NextExact on string, but string is prev - // Actually the anchor affects the FOLLOWING node, so ":" has Down, "=" has Next! - let g = parse_and_construct(r#"Foo = (assignment (id) "=" . (value))"#); - - insta::assert_snapshot!(g.dump(), @r#" - Foo = N0 - - N0: (assignment) → N1 - N1: [Down] (id) → N2 - N2: [Next] "=" → N3 - N3: [Next.] (value) → N4 - N4: [Up(1)] ε → ∅ - "#); -} - -#[test] -fn anchor_string_literal_last() { - // . after string literal at end → UpExact - let g = parse_and_construct(r#"Foo = (semi (stmt) ";" .)"#); - - insta::assert_snapshot!(g.dump(), @r#" - Foo = N0 - - N0: (semi) → N1 - N1: [Down] (stmt) → N2 - N2: [Next] ";" → N3 - N3: [Up!(1)] ε → ∅ - "#); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Fields -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn field_constraint() { - let g = parse_and_construct("Foo = (call name: (identifier))"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: (call) → N1 - N1: [Down] (identifier) @name → N2 - N2: [Up(1)] ε → ∅ - "); -} - -#[test] -fn negated_field() { - let g = parse_and_construct("Foo = (call !arguments)"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: (call) !arguments → ∅ - "); -} - -#[test] -fn multiple_negated_fields() { - let g = parse_and_construct("Foo = (call !arguments !type_arguments)"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: (call) !arguments !type_arguments → ∅ - "); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Sequences -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn sequence_expr() { - let g = parse_and_construct("Foo = { (a) (b) }"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: ε [StartObj] → N1 - N1: [Next] (a) → N2 - N2: [Next] (b) → N3 - N3: ε [EndObj] → ∅ - "); -} - -#[test] -fn empty_sequence() { - let g = parse_and_construct("Foo = { }"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: ε [StartObj] → N1 - N1: ε → N2 - N2: ε [EndObj] → ∅ - "); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Alternations -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn untagged_alternation() { - let g = parse_and_construct("Foo = [(identifier) (number)]"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: ε → N2, N3 - N1: ε → ∅ - N2: (identifier) → N1 - N3: (number) → N1 - "); -} - -#[test] -fn tagged_alternation() { - let g = parse_and_construct("Foo = [Ident: (identifier) Num: (number)]"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: ε → N2, N5 - N1: ε → ∅ - N2: ε [Variant(Ident)] → N3 - N3: (identifier) → N4 - N4: ε [EndVariant] → N1 - N5: ε [Variant(Num)] → N6 - N6: (number) → N7 - N7: ε [EndVariant] → N1 - "); -} - -#[test] -fn single_branch_alt() { - let g = parse_and_construct("Foo = [(identifier)]"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: ε → N2 - N1: ε → ∅ - N2: (identifier) → N1 - "); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Captures -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn simple_capture() { - let g = parse_and_construct("Foo = (identifier) @name"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: (identifier) [Capture] → N1 - N1: ε [Field(name)] → ∅ - "); -} - -#[test] -fn capture_with_string_type() { - let g = parse_and_construct("Foo = (identifier) @name ::string"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: (identifier) [Capture] [ToString] → N1 - N1: ε [Field(name)] → ∅ - "); -} - -#[test] -fn nested_capture() { - let g = parse_and_construct("Foo = (call name: (identifier) @fn_name)"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N0 - - N0: (call) → N1 - N1: [Down] (identifier) @name [Capture] → N2 - N2: ε [Field(fn_name)] → N3 - N3: [Up(1)] ε → ∅ - "); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Quantifiers -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn zero_or_more() { - let g = parse_and_construct("Foo = (identifier)*"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N1 - - N0: (identifier) → N3 - N1: ε [StartArray] → N2 - N2: ε → N0, N4 - N3: ε [Push] → N2 - N4: ε [EndArray] → ∅ - "); -} - -#[test] -fn one_or_more() { - let g = parse_and_construct("Foo = (identifier)+"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N1 - - N0: (identifier) → N2 - N1: ε [StartArray] → N0 - N2: ε [Push] → N3 - N3: ε → N0, N4 - N4: ε [EndArray] → ∅ - "); -} - -#[test] -fn optional() { - let g = parse_and_construct("Foo = (identifier)?"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N1 - - N0: (identifier) → N2 - N1: ε → N0, N2 - N2: ε → ∅ - "); -} - -#[test] -fn lazy_zero_or_more() { - let g = parse_and_construct("Foo = (identifier)*?"); - - insta::assert_snapshot!(g.dump(), @r" - Foo = N1 - - N0: (identifier) → N3 - N1: ε [StartArray] → N2 - N2: ε → N4, N0 - N3: ε [Push] → N2 - N4: ε [EndArray] → ∅ - "); -} - -// ───────────────────────────────────────────────────────────────────────────── -// References -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn simple_reference() { - let g = parse_and_construct( - " - Ident = (identifier) - Foo = (call (Ident)) - ", - ); - - insta::assert_snapshot!(g.dump(), @r" - Ident = N0 - Foo = N1 - - N0: (identifier) → ∅ - N1: (call) → N2 - N2: [Down] ε +Enter(0, Ident) → N0, N4 - N3: ε +Exit(0) → N4 - N4: [Up(1)] ε → ∅ - "); -} - -#[test] -fn multiple_references() { - let g = parse_and_construct( - " - Expr = [(identifier) (number)] - Foo = (binary left: (Expr) right: (Expr)) - ", - ); - - insta::assert_snapshot!(g.dump(), @r" - Expr = N0 - Foo = N4 - - N0: ε → N2, N3 - N1: ε → ∅ - N2: (identifier) → N1 - N3: (number) → N1 - N4: (binary) → N5 - N5: [Down] ε +Enter(0, Expr) → N0, N7 - N6: ε +Exit(0) → N7 - N7: [Next] ε +Enter(1, Expr) → N0, N9 - N8: ε +Exit(1) → N9 - N9: [Up(1)] ε → ∅ - "); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Multiple Definitions -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn multiple_definitions() { - let g = parse_and_construct( - " - Ident = (identifier) - Num = (number) - Str = (string) - ", - ); - - insta::assert_snapshot!(g.dump(), @r" - Ident = N0 - Num = N1 - Str = N2 - - N0: (identifier) → ∅ - N1: (number) → ∅ - N2: (string) → ∅ - "); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Complex Examples -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn function_pattern() { - let g = parse_and_construct( - " - Func = (function_definition - name: (identifier) @name - parameters: (parameters (identifier)* @params) - body: (block)) - ", - ); - - insta::assert_snapshot!(g.dump(), @r" - Func = N0 - - N0: (function_definition) → N1 - N1: [Down] (identifier) @name [Capture] → N2 - N2: ε [Field(name)] → N3 - N3: [Next] (parameters) @parameters → N5 - N4: [Down] (identifier) [Capture] → N7 - N5: ε [StartArray] → N6 - N6: ε → N4, N8 - N7: ε [Push] → N6 - N8: ε [EndArray] → N9 - N9: ε [Field(params)] → N10 - N10: [Up(1)] ε → N11 - N11: [Next] (block) @body → N12 - N12: [Up(1)] ε → ∅ - "); -} - -#[test] -fn binary_expression_pattern() { - let g = parse_and_construct( - r#" - BinOp = (binary_expression - left: (_) @left - operator: ["+" "-" "*" "/"] @op ::string - right: (_) @right) - "#, - ); - - insta::assert_snapshot!(g.dump(), @r#" - BinOp = N0 - - N0: (binary_expression) → N1 - N1: [Down] _ @left [Capture] → N2 - N2: ε [Field(left)] → N3 - N3: [Next] ε → N5, N6, N7, N8 - N4: ε → N9 - N5: "+" [Capture] [ToString] → N4 - N6: "-" [Capture] [ToString] → N4 - N7: "*" [Capture] [ToString] → N4 - N8: "/" [Capture] [ToString] → N4 - N9: ε [Field(op)] → N10 - N10: [Next] _ @right [Capture] → N11 - N11: ε [Field(right)] → N12 - N12: [Up(1)] ε → ∅ - "#); -} diff --git a/crates/plotnik-lib/src/graph/mod.rs b/crates/plotnik-lib/src/graph/mod.rs deleted file mode 100644 index 9cef1e42..00000000 --- a/crates/plotnik-lib/src/graph/mod.rs +++ /dev/null @@ -1,42 +0,0 @@ -//! Build-time graph representation for query compilation. -//! -//! This module provides an intermediate graph representation between -//! the parsed AST and the final compiled IR. The graph is mutable during -//! construction and supports analysis passes like epsilon elimination. -//! -//! # Architecture -//! -//! ```text -//! AST (parser) → BuildGraph → [analysis passes] → CompiledQuery (ir) -//! ``` -//! -//! The `BuildGraph` borrows strings from the source (`&'src str`). -//! String interning happens during emission to `CompiledQuery`. - -mod analysis; -mod build; -mod construct; -mod dump; -mod optimize; -mod typing; - -#[cfg(test)] -mod analysis_tests; -#[cfg(test)] -mod build_tests; -#[cfg(test)] -mod construct_tests; -#[cfg(test)] -mod optimize_tests; -#[cfg(test)] -mod typing_tests; - -pub use analysis::{AnalysisResult, StringInterner, analyze}; -pub use build::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; -pub use construct::{GraphConstructor, construct_graph}; -pub use dump::GraphPrinter; -pub use optimize::{OptimizeStats, eliminate_epsilons}; -pub use typing::{ - InferredMember, InferredTypeDef, TypeDescription, TypeInferenceResult, UnificationError, - infer_types, -}; diff --git a/crates/plotnik-lib/src/graph/optimize_tests.rs b/crates/plotnik-lib/src/graph/optimize_tests.rs deleted file mode 100644 index c5ce7e98..00000000 --- a/crates/plotnik-lib/src/graph/optimize_tests.rs +++ /dev/null @@ -1,449 +0,0 @@ -//! Tests for epsilon elimination optimization pass. - -use rowan::TextRange; - -use super::*; -use crate::graph::{BuildEffect, BuildMatcher, RefMarker}; - -#[test] -fn eliminates_simple_epsilon_chain() { - let mut g = BuildGraph::new(); - - // Build: ε → ε → (identifier) - let id = g.add_matcher(BuildMatcher::node("identifier")); - let e1 = g.add_epsilon(); - let e2 = g.add_epsilon(); - g.connect(e2, e1); - g.connect(e1, id); - - insta::assert_snapshot!(g.dump(), @r" - N0: (identifier) → ∅ - N1: ε → N0 - N2: ε → N1 - "); - - let (dead, stats) = eliminate_epsilons(&mut g); - - assert_eq!(stats.epsilons_eliminated, 2); - insta::assert_snapshot!(g.dump_live(&dead), @r" - N0: (identifier) → ∅ - "); -} - -#[test] -fn keeps_branch_point_epsilon() { - let mut g = BuildGraph::new(); - - // Build alternation: ε → [A, B] - let a = g.add_matcher(BuildMatcher::node("a")); - let b = g.add_matcher(BuildMatcher::node("b")); - let branch = g.add_epsilon(); - g.connect(branch, a); - g.connect(branch, b); - - insta::assert_snapshot!(g.dump(), @r" - N0: (a) → ∅ - N1: (b) → ∅ - N2: ε → N0, N1 - "); - - let (dead, stats) = eliminate_epsilons(&mut g); - - assert_eq!(stats.epsilons_eliminated, 0); - assert_eq!(stats.epsilons_kept, 1); - insta::assert_snapshot!(g.dump_live(&dead), @r" - N0: (a) → ∅ - N1: (b) → ∅ - N2: ε → N0, N1 - "); -} - -#[test] -fn keeps_epsilon_with_enter_marker() { - let mut g = BuildGraph::new(); - - let target = g.add_matcher(BuildMatcher::node("target")); - let enter = g.add_epsilon(); - g.node_mut(enter).set_ref_marker(RefMarker::enter(0)); - g.connect(enter, target); - - insta::assert_snapshot!(g.dump(), @r" - N0: (target) → ∅ - N1: ε +Enter(0, ?) → N0 - "); - - let (dead, stats) = eliminate_epsilons(&mut g); - - assert_eq!(stats.epsilons_eliminated, 0); - assert_eq!(stats.epsilons_kept, 1); - insta::assert_snapshot!(g.dump_live(&dead), @r" - N0: (target) → ∅ - N1: ε +Enter(0, ?) → N0 - "); -} - -#[test] -fn keeps_epsilon_with_exit_marker() { - let mut g = BuildGraph::new(); - - let target = g.add_matcher(BuildMatcher::node("target")); - let exit = g.add_epsilon(); - g.node_mut(exit).set_ref_marker(RefMarker::exit(0)); - g.connect(exit, target); - - let (dead, stats) = eliminate_epsilons(&mut g); - - assert_eq!(stats.epsilons_eliminated, 0); - assert_eq!(stats.epsilons_kept, 1); - assert!(dead.is_empty()); -} - -#[test] -fn merges_effects_into_successor() { - let mut g = BuildGraph::new(); - - // ε[StartArray] → ε[EndArray] → (identifier)[Capture] - let id = g.add_matcher(BuildMatcher::node("identifier")); - g.node_mut(id).add_effect(BuildEffect::CaptureNode); - - let end_arr = g.add_epsilon(); - g.node_mut(end_arr).add_effect(BuildEffect::EndArray); - g.connect(end_arr, id); - - let start_arr = g.add_epsilon(); - g.node_mut(start_arr).add_effect(BuildEffect::StartArray); - g.connect(start_arr, end_arr); - - insta::assert_snapshot!(g.dump(), @r" - N0: (identifier) [Capture] → ∅ - N1: ε [EndArray] → N0 - N2: ε [StartArray] → N1 - "); - - let (dead, stats) = eliminate_epsilons(&mut g); - - assert_eq!(stats.epsilons_eliminated, 2); - insta::assert_snapshot!(g.dump_live(&dead), @r" - N0: (identifier) [StartArray] [EndArray] [Capture] → ∅ - "); -} - -#[test] -fn redirects_multiple_predecessors() { - let mut g = BuildGraph::new(); - - // A → ε → C - // B ↗ - let c = g.add_matcher(BuildMatcher::node("c")); - let eps = g.add_epsilon(); - let a = g.add_matcher(BuildMatcher::node("a")); - let b = g.add_matcher(BuildMatcher::node("b")); - - g.connect(eps, c); - g.connect(a, eps); - g.connect(b, eps); - - insta::assert_snapshot!(g.dump(), @r" - N0: (c) → ∅ - N1: ε → N0 - N2: (a) → N1 - N3: (b) → N1 - "); - - let (dead, stats) = eliminate_epsilons(&mut g); - - assert_eq!(stats.epsilons_eliminated, 1); - insta::assert_snapshot!(g.dump_live(&dead), @r" - N0: (c) → ∅ - N2: (a) → N0 - N3: (b) → N0 - "); -} - -#[test] -fn updates_definition_entry_point() { - let mut g = BuildGraph::new(); - - // Def = ε → (identifier) - let id = g.add_matcher(BuildMatcher::node("identifier")); - let eps = g.add_epsilon(); - g.connect(eps, id); - g.add_definition("Def", eps); - - insta::assert_snapshot!(g.dump(), @r" - Def = N1 - - N0: (identifier) → ∅ - N1: ε → N0 - "); - - let (dead, _stats) = eliminate_epsilons(&mut g); - - // Definition should now point to identifier node - assert_eq!(g.definition("Def"), Some(0)); - insta::assert_snapshot!(g.dump_live(&dead), @r" - Def = N0 - - N0: (identifier) → ∅ - "); -} - -#[test] -fn keeps_exit_epsilon_with_no_successor() { - let mut g = BuildGraph::new(); - - // (a) → ε (terminal) - let eps = g.add_epsilon(); - let a = g.add_matcher(BuildMatcher::node("a")); - g.connect(a, eps); - - let (dead, stats) = eliminate_epsilons(&mut g); - - // Epsilon with no successors cannot be eliminated - assert_eq!(stats.epsilons_kept, 1); - assert!(dead.is_empty()); -} - -#[test] -fn quantifier_preserves_branch_structure() { - let mut g = BuildGraph::new(); - - // Typical zero_or_more structure: entry(branch) → [inner → branch, exit] - let inner = g.matcher_fragment(BuildMatcher::node("item")); - let _frag = g.zero_or_more(inner); - - insta::assert_snapshot!(g.dump(), @r" - N0: (item) → N1 - N1: ε → N0, N2 - N2: ε → ∅ - "); - - let (dead, stats) = eliminate_epsilons(&mut g); - - // Branch (N1) must remain, exit (N2) can't be eliminated (no successor) - assert_eq!(stats.epsilons_kept, 2); - assert_eq!(stats.epsilons_eliminated, 0); - insta::assert_snapshot!(g.dump_live(&dead), @r" - N0: (item) → N1 - N1: ε → N0, N2 - N2: ε → ∅ - "); -} - -#[test] -fn alternation_exit_epsilon_eliminated() { - let mut g = BuildGraph::new(); - - let f1 = g.matcher_fragment(BuildMatcher::node("a")); - let f2 = g.matcher_fragment(BuildMatcher::node("b")); - let frag = g.alternation(&[f1, f2]); - - // Add a successor to the exit so it can be eliminated - let final_node = g.add_matcher(BuildMatcher::node("end")); - g.connect(frag.exit, final_node); - - insta::assert_snapshot!(g.dump(), @r" - N0: (a) → N3 - N1: (b) → N3 - N2: ε → N0, N1 - N3: ε → N4 - N4: (end) → ∅ - "); - - let (dead, stats) = eliminate_epsilons(&mut g); - - // Exit epsilon (N3) should be eliminated, branch (N2) kept - assert_eq!(stats.epsilons_eliminated, 1); - assert_eq!(stats.epsilons_kept, 1); - insta::assert_snapshot!(g.dump_live(&dead), @r" - N0: (a) → N4 - N1: (b) → N4 - N2: ε → N0, N1 - N4: (end) → ∅ - "); -} - -#[test] -fn does_not_merge_effects_into_ref_marker() { - let mut g = BuildGraph::new(); - - // ε[Field] → ε+Exit(0) → (target) - let target = g.add_matcher(BuildMatcher::node("target")); - let exit = g.add_epsilon(); - g.node_mut(exit).set_ref_marker(RefMarker::exit(0)); - g.connect(exit, target); - - let field_eps = g.add_epsilon(); - g.node_mut(field_eps).add_effect(BuildEffect::Field { - name: "name", - span: TextRange::default(), - }); - g.connect(field_eps, exit); - - insta::assert_snapshot!(g.dump(), @r" - N0: (target) → ∅ - N1: ε +Exit(0) → N0 - N2: ε [Field(name)] → N1 - "); - - let (dead, stats) = eliminate_epsilons(&mut g); - - // Should NOT merge Field effect into Exit node - assert_eq!(stats.epsilons_kept, 2); - assert_eq!(stats.epsilons_eliminated, 0); - insta::assert_snapshot!(g.dump_live(&dead), @r" - N0: (target) → ∅ - N1: ε +Exit(0) → N0 - N2: ε [Field(name)] → N1 - "); -} - -#[test] -fn transfers_nav_to_stay_successor() { - use crate::ir::Nav; - - let mut g = BuildGraph::new(); - - // ε[UpSkipTrivia(1)] → (target)[Stay] - // Nav can be transferred to target, epsilon eliminated - let target = g.add_matcher(BuildMatcher::node("end")); - let up_epsilon = g.add_epsilon(); - g.node_mut(up_epsilon).set_nav(Nav::up_skip_trivia(1)); - g.connect(up_epsilon, target); - - let (dead, stats) = eliminate_epsilons(&mut g); - - // Epsilon eliminated, nav transferred to target - assert_eq!(stats.epsilons_eliminated, 1); - assert!(dead.contains(&1)); - assert_eq!(g.node(0).nav, Nav::up_skip_trivia(1)); -} - -#[test] -fn keeps_epsilon_when_both_have_nav() { - use crate::ir::Nav; - - let mut g = BuildGraph::new(); - - // ε[UpSkipTrivia(1)] → ε[UpSkipTrivia(1)] → (target) - // Can't merge two non-Stay navs - let target = g.add_matcher(BuildMatcher::node("end")); - - let up1 = g.add_epsilon(); - g.node_mut(up1).set_nav(Nav::up_skip_trivia(1)); - g.connect(up1, target); - - let up2 = g.add_epsilon(); - g.node_mut(up2).set_nav(Nav::up_skip_trivia(1)); - g.connect(up2, up1); - - let (dead, stats) = eliminate_epsilons(&mut g); - - // First epsilon (up1) eliminated (successor has Stay) - // Second epsilon (up2) kept (successor up1 has non-Stay nav) - assert_eq!(stats.epsilons_eliminated, 1); - assert_eq!(stats.epsilons_kept, 1); - assert!(dead.contains(&1)); // up1 eliminated - assert!(!dead.contains(&2)); // up2 kept -} - -#[test] -fn eliminates_epsilon_with_stay_nav() { - use crate::ir::Nav; - - let mut g = BuildGraph::new(); - - // ε[Stay] → (target) - Stay is the default, can be eliminated - let target = g.add_matcher(BuildMatcher::node("target")); - let eps = g.add_epsilon(); - g.node_mut(eps).set_nav(Nav::stay()); // explicit Stay - g.connect(eps, target); - - let (dead, stats) = eliminate_epsilons(&mut g); - - assert_eq!(stats.epsilons_eliminated, 1); - assert!(dead.contains(&1)); // epsilon was eliminated -} - -#[test] -fn merges_unconstrained_up_levels() { - use crate::ir::Nav; - - let mut g = BuildGraph::new(); - - // Simulates: ((((foo)))) - no anchors - // ε[Up(1)] → ε[Up(1)] → ε[Up(1)] → (target) - let target = g.add_matcher(BuildMatcher::node("end")); - - let up1 = g.add_epsilon(); - g.node_mut(up1).set_nav(Nav::up(1)); - g.connect(up1, target); - - let up2 = g.add_epsilon(); - g.node_mut(up2).set_nav(Nav::up(1)); - g.connect(up2, up1); - - let up3 = g.add_epsilon(); - g.node_mut(up3).set_nav(Nav::up(1)); - g.connect(up3, up2); - - let (_dead, stats) = eliminate_epsilons(&mut g); - - // All epsilons eliminated, levels merged into target - assert_eq!(stats.epsilons_eliminated, 3); - assert_eq!(g.node(0).nav, Nav::up(3)); -} - -#[test] -fn does_not_merge_constrained_up() { - use crate::ir::Nav; - - let mut g = BuildGraph::new(); - - // Simulates: ((((foo) .) .) .) - anchors at each level - // ε[UpSkipTrivia(1)] → ε[UpSkipTrivia(1)] → (target) - let target = g.add_matcher(BuildMatcher::node("end")); - - let up1 = g.add_epsilon(); - g.node_mut(up1).set_nav(Nav::up_skip_trivia(1)); - g.connect(up1, target); - - let up2 = g.add_epsilon(); - g.node_mut(up2).set_nav(Nav::up_skip_trivia(1)); - g.connect(up2, up1); - - let (dead, stats) = eliminate_epsilons(&mut g); - - // First epsilon eliminated (transfers to target) - // Second kept (can't merge UpSkipTrivia) - assert_eq!(stats.epsilons_eliminated, 1); - assert_eq!(stats.epsilons_kept, 1); - assert!(dead.contains(&1)); - assert!(!dead.contains(&2)); -} - -#[test] -fn does_not_merge_mixed_up_kinds() { - use crate::ir::Nav; - - let mut g = BuildGraph::new(); - - // ε[Up(1)] → ε[UpSkipTrivia(1)] → (target) - // Different Up kinds cannot merge - let target = g.add_matcher(BuildMatcher::node("end")); - - let up1 = g.add_epsilon(); - g.node_mut(up1).set_nav(Nav::up_skip_trivia(1)); - g.connect(up1, target); - - let up2 = g.add_epsilon(); - g.node_mut(up2).set_nav(Nav::up(1)); // unconstrained - g.connect(up2, up1); - - let (_dead, stats) = eliminate_epsilons(&mut g); - - // First epsilon eliminated (transfers to target) - // Second kept (can't merge Up with UpSkipTrivia) - assert_eq!(stats.epsilons_eliminated, 1); - assert_eq!(stats.epsilons_kept, 1); -} diff --git a/crates/plotnik-lib/src/graph/typing_tests.rs b/crates/plotnik-lib/src/graph/typing_tests.rs deleted file mode 100644 index a0d51fed..00000000 --- a/crates/plotnik-lib/src/graph/typing_tests.rs +++ /dev/null @@ -1,445 +0,0 @@ -//! Tests for type inference. - -use crate::graph::{TypeInferenceResult, construct_graph, infer_types}; -use crate::parser::Parser; -use crate::parser::lexer::lex; -use std::collections::HashSet; - -fn infer(source: &str) -> String { - let tokens = lex(source); - let parser = Parser::new(source, tokens); - let result = parser.parse().expect("parse should succeed"); - let graph = construct_graph(source, &result.root); - let dead_nodes = HashSet::new(); - - let inference = infer_types(&graph, &dead_nodes); - inference.dump() -} - -fn infer_full(source: &str) -> TypeInferenceResult<'_> { - let tokens = lex(source); - let parser = Parser::new(source, tokens); - let result = parser.parse().expect("parse should succeed"); - let graph = construct_graph(source, &result.root); - let dead_nodes = HashSet::new(); - - infer_types(&graph, &dead_nodes) -} - -fn infer_diagnostics(source: &str) -> String { - let inference = infer_full(source); - inference.dump_diagnostics(source) -} - -#[test] -fn simple_capture() { - let result = infer("Foo = (identifier) @name"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T3 - - === Types === - T3: Record Foo { - name: Node - } - "); -} - -#[test] -fn capture_with_string_type() { - let result = infer("Foo = (identifier) @name ::string"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T3 - - === Types === - T3: Record Foo { - name: String - } - "); -} - -#[test] -fn multiple_captures() { - let result = infer("Foo = (function name: (identifier) @name body: (block) @body)"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T3 - - === Types === - T3: Record Foo { - name: Node - body: Node - } - "); -} - -#[test] -fn no_captures() { - let result = infer("Foo = (identifier)"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → Void - "); -} - -#[test] -fn optional_quantifier() { - let result = infer("Foo = (identifier)? @name"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T4 - - === Types === - T3: Optional → Node - T4: Record Foo { - name: T3 - } - "); -} - -#[test] -fn star_quantifier() { - let result = infer("Foo = (identifier)* @names"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T4 - - === Types === - T3: ArrayStar → Node - T4: Record Foo { - names: T3 - } - "); -} - -#[test] -fn plus_quantifier() { - let result = infer("Foo = (identifier)+ @names"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T4 - - === Types === - T3: ArrayStar → Node - T4: Record Foo { - names: T3 - } - "); -} - -#[test] -fn tagged_alternation() { - let result = infer("Foo = [ Ok: (value) @val Err: (error) @err ]"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T5 - - === Types === - T3: Record FooOk { - val: Node - } - T4: Record FooErr { - err: Node - } - T5: Enum Foo { - Ok: T3 - Err: T4 - } - "); -} - -#[test] -fn untagged_alternation_symmetric() { - let result = infer("Foo = [ (a) @x (b) @x ]"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T3 - - === Types === - T3: Record Foo { - x: Node - } - "); -} - -#[test] -fn untagged_alternation_asymmetric() { - let result = infer("Foo = [ (a) @x (b) @y ]"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T5 - - === Types === - T3: Optional → Node - T4: Optional → Node - T5: Record Foo { - x: T3 - y: T4 - } - "); -} - -#[test] -fn sequence_capture() { - let result = infer("Foo = { (a) @x (b) @y } @seq"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T3 - - === Types === - T3: Record Foo { - x: Node - y: Node - } - "); -} - -#[test] -fn nested_captures() { - let result = infer("Foo = (outer (inner) @inner) @outer"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T3 - - === Types === - T3: Record Foo { - inner: Node - } - "); -} - -#[test] -fn multiple_definitions() { - let result = infer( - r#" - Func = (function name: (identifier) @name) - Call = (call function: (identifier) @fn) - "#, - ); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Func → T3 - Call → T4 - - === Types === - T3: Record Func { - name: Node - } - T4: Record Call { - fn: Node - } - "); -} - -/// Documents the graph structure for a captured plus quantifier. -/// Used to understand effect ordering for type inference. -#[test] -fn graph_structure_captured_plus() { - use crate::graph::construct_graph; - use crate::parser::Parser; - use crate::parser::lexer::lex; - - let source = "Foo = (identifier)+ @names"; - let tokens = lex(source); - let parser = Parser::new(source, tokens); - let result = parser.parse().expect("parse should succeed"); - let graph = construct_graph(source, &result.root); - - insta::assert_snapshot!(graph.dump(), @r" - Foo = N1 - - N0: (identifier) [Capture] → N2 - N1: ε [StartArray] → N0 - N2: ε [Push] → N3 - N3: ε → N0, N4 - N4: ε [EndArray] → N5 - N5: ε [Field(names)] → ∅ - "); -} - -/// Documents the graph structure for a tagged alternation. -/// Used to understand variant effect ordering for type inference. -#[test] -fn graph_structure_tagged_alternation() { - use crate::graph::construct_graph; - use crate::parser::Parser; - use crate::parser::lexer::lex; - - let source = "Foo = [ Ok: (value) @val Err: (error) @err ]"; - let tokens = lex(source); - let parser = Parser::new(source, tokens); - let result = parser.parse().expect("parse should succeed"); - let graph = construct_graph(source, &result.root); - - insta::assert_snapshot!(graph.dump(), @r" - Foo = N0 - - N0: ε → N2, N6 - N1: ε → ∅ - N2: ε [Variant(Ok)] → N3 - N3: (value) [Capture] → N4 - N4: ε [Field(val)] → N5 - N5: ε [EndVariant] → N1 - N6: ε [Variant(Err)] → N7 - N7: (error) [Capture] → N8 - N8: ε [Field(err)] → N9 - N9: ε [EndVariant] → N1 - "); -} - -// ============================================================================= -// 1-Level Merge Semantics Tests (ADR-0009) -// ============================================================================= - -#[test] -fn merge_incompatible_primitives_node_vs_string() { - // Same field with Node in one branch, String in another - let source = "Foo = [ (a) @val (b) @val ::string ]"; - let result = infer(source); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T3 - - === Types === - T3: Record Foo { - val: Node - } - - === Errors === - field `val` in `Foo`: incompatible types [Node, String] - "); - - // Verify diagnostic output with proper spans - insta::assert_snapshot!(infer_diagnostics(source), @r" - error: incompatible types: Node vs String - | - 1 | Foo = [ (a) @val (b) @val ::string ] - | ^^^ --- also captured here - | - help: capture `val` has incompatible types across branches - "); -} - -#[test] -fn merge_compatible_same_type_node() { - // Same field with Node in both branches - should merge without error - let result = infer("Foo = [ (a) @val (b) @val ]"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T3 - - === Types === - T3: Record Foo { - val: Node - } - "); -} - -#[test] -fn merge_compatible_same_type_string() { - // Same field with String in both branches - should merge without error - let result = infer("Foo = [ (a) @val ::string (b) @val ::string ]"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T3 - - === Types === - T3: Record Foo { - val: String - } - "); -} - -#[test] -fn merge_asymmetric_fields_become_optional() { - // Different fields in each branch - both become optional (the feature) - let result = infer("Foo = [ (a) @x (b) @y ]"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T5 - - === Types === - T3: Optional → Node - T4: Optional → Node - T5: Record Foo { - x: T3 - y: T4 - } - "); -} - -#[test] -fn merge_mixed_compatible_and_asymmetric() { - // @common in both branches (compatible), @x and @y asymmetric - // Note: flat scoping means nested captures propagate to root - let result = infer("Foo = [ { (a) @common (b) @x } { (a) @common (c) @y } ]"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T5 - - === Types === - T3: Optional → Node - T4: Optional → Node - T5: Record Foo { - common: Node - x: T3 - y: T4 - } - "); -} - -#[test] -fn merge_multiple_incompatible_fields_reports_all() { - // Multiple fields with type mismatches - should report all errors - let result = infer("Foo = [ (a) @x (b) @y (c) @x ::string (d) @y ::string ]"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T5 - - === Types === - T3: Optional → Node - T4: Optional → Node - T5: Record Foo { - x: T3 - y: T4 - } - - === Errors === - field `x` in `Foo`: incompatible types [Node, String] - field `y` in `Foo`: incompatible types [Node, String] - "); -} - -#[test] -fn merge_three_branches_all_compatible() { - // Three branches, all with same type - no error - let result = infer("Foo = [ (a) @val (b) @val (c) @val ]"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T3 - - === Types === - T3: Record Foo { - val: Node - } - "); -} - -#[test] -fn merge_three_branches_one_incompatible() { - // Three branches, one has different type - let result = infer("Foo = [ (a) @val (b) @val (c) @val ::string ]"); - insta::assert_snapshot!(result, @r" - === Entrypoints === - Foo → T3 - - === Types === - T3: Record Foo { - val: Node - } - - === Errors === - field `val` in `Foo`: incompatible types [Node, String] - "); -} diff --git a/crates/plotnik-lib/src/graph/build.rs b/crates/plotnik-lib/src/query/build_graph.rs similarity index 77% rename from crates/plotnik-lib/src/graph/build.rs rename to crates/plotnik-lib/src/query/build_graph.rs index 465addad..a53716f0 100644 --- a/crates/plotnik-lib/src/graph/build.rs +++ b/crates/plotnik-lib/src/query/build_graph.rs @@ -1,4 +1,4 @@ -//! Core types and construction for build-time query graphs. +//! Core types for build-time query graphs. //! //! The graph uses index-based node references (`NodeId`) with nodes stored //! in a `Vec`. Strings borrow from the source (`&'src str`) until IR emission. @@ -25,7 +25,6 @@ impl Fragment { Self { entry, exit } } - /// Single-node fragment where entry equals exit. pub fn single(node: NodeId) -> Self { Self { entry: node, @@ -52,69 +51,56 @@ impl<'src> BuildGraph<'src> { } } - /// Add a node, returning its ID. pub fn add_node(&mut self, node: BuildNode<'src>) -> NodeId { let id = self.nodes.len() as NodeId; self.nodes.push(node); id } - /// Add an epsilon node (no matcher, no effects). pub fn add_epsilon(&mut self) -> NodeId { self.add_node(BuildNode::epsilon()) } - /// Add a matcher node. pub fn add_matcher(&mut self, matcher: BuildMatcher<'src>) -> NodeId { self.add_node(BuildNode::with_matcher(matcher)) } - /// Register a definition entry point. pub fn add_definition(&mut self, name: &'src str, entry: NodeId) { self.definitions.insert(name, entry); } - /// Get definition entry point by name. pub fn definition(&self, name: &str) -> Option { self.definitions.get(name).copied() } - /// Iterate over all definitions. pub fn definitions(&self) -> impl Iterator + '_ { self.definitions.iter().map(|(k, v)| (*k, *v)) } - /// Get node by ID. pub fn node(&self, id: NodeId) -> &BuildNode<'src> { &self.nodes[id as usize] } - /// Get mutable node by ID. pub fn node_mut(&mut self, id: NodeId) -> &mut BuildNode<'src> { &mut self.nodes[id as usize] } - /// Number of nodes in the graph. pub fn len(&self) -> usize { self.nodes.len() } - /// Returns true if graph has no nodes. pub fn is_empty(&self) -> bool { self.nodes.is_empty() } - /// Iterate over all nodes with their IDs. pub fn iter(&self) -> impl Iterator)> { self.nodes.iter().enumerate().map(|(i, n)| (i as NodeId, n)) } - /// Connect source node to target (add edge). pub fn connect(&mut self, from: NodeId, to: NodeId) { self.nodes[from as usize].successors.push(to); } - /// Connect a fragment's exit to another node. pub fn connect_exit(&mut self, fragment: Fragment, to: NodeId) { self.connect(fragment.exit, to); } @@ -123,19 +109,15 @@ impl<'src> BuildGraph<'src> { // Fragment Combinators // ───────────────────────────────────────────────────────────────────── - /// Create a single-node fragment from a matcher. pub fn matcher_fragment(&mut self, matcher: BuildMatcher<'src>) -> Fragment { Fragment::single(self.add_matcher(matcher)) } - /// Create an epsilon fragment. pub fn epsilon_fragment(&mut self) -> Fragment { Fragment::single(self.add_epsilon()) } /// Connect fragments in sequence: f1 → f2 → ... → fn - /// - /// Returns fragment spanning from first entry to last exit. pub fn sequence(&mut self, fragments: &[Fragment]) -> Fragment { match fragments.len() { 0 => self.epsilon_fragment(), @@ -150,8 +132,6 @@ impl<'src> BuildGraph<'src> { } /// Connect fragments in parallel (alternation): entry → [f1|f2|...|fn] → exit - /// - /// Creates shared epsilon entry and exit nodes. pub fn alternation(&mut self, fragments: &[Fragment]) -> Fragment { if fragments.is_empty() { return self.epsilon_fragment(); @@ -172,19 +152,10 @@ impl<'src> BuildGraph<'src> { } /// Zero or more (greedy): inner* - /// - /// ```text - /// ┌──────────────┐ - /// ↓ │ - /// entry ─→ branch ─→ inner ─┘ - /// │ - /// └─→ exit - /// ``` pub fn zero_or_more(&mut self, inner: Fragment) -> Fragment { let branch = self.add_epsilon(); let exit = self.add_epsilon(); - // Greedy: try inner first self.connect(branch, inner.entry); self.connect(branch, exit); self.connect(inner.exit, branch); @@ -197,7 +168,6 @@ impl<'src> BuildGraph<'src> { let branch = self.add_epsilon(); let exit = self.add_epsilon(); - // Non-greedy: try exit first self.connect(branch, exit); self.connect(branch, inner.entry); self.connect(inner.exit, branch); @@ -206,20 +176,11 @@ impl<'src> BuildGraph<'src> { } /// One or more (greedy): inner+ - /// - /// ```text - /// ┌──────────────┐ - /// ↓ │ - /// entry ─→ inner ─→ branch ─┘ - /// │ - /// └─→ exit - /// ``` pub fn one_or_more(&mut self, inner: Fragment) -> Fragment { let branch = self.add_epsilon(); let exit = self.add_epsilon(); self.connect(inner.exit, branch); - // Greedy: try inner first self.connect(branch, inner.entry); self.connect(branch, exit); @@ -232,7 +193,6 @@ impl<'src> BuildGraph<'src> { let exit = self.add_epsilon(); self.connect(inner.exit, branch); - // Non-greedy: try exit first self.connect(branch, exit); self.connect(branch, inner.entry); @@ -240,17 +200,10 @@ impl<'src> BuildGraph<'src> { } /// Optional (greedy): inner? - /// - /// ```text - /// entry ─→ branch ─→ inner ─→ exit - /// │ ↑ - /// └──────────────────┘ - /// ``` pub fn optional(&mut self, inner: Fragment) -> Fragment { let branch = self.add_epsilon(); let exit = self.add_epsilon(); - // Greedy: try inner first self.connect(branch, inner.entry); self.connect(branch, exit); self.connect(inner.exit, exit); @@ -263,7 +216,6 @@ impl<'src> BuildGraph<'src> { let branch = self.add_epsilon(); let exit = self.add_epsilon(); - // Non-greedy: try skip first self.connect(branch, exit); self.connect(branch, inner.entry); self.connect(inner.exit, exit); @@ -273,17 +225,9 @@ impl<'src> BuildGraph<'src> { // ───────────────────────────────────────────────────────────────────── // Array-Collecting Loop Combinators - // - // These place PushElement on the back-edge so each iteration pushes. // ───────────────────────────────────────────────────────────────────── /// Zero or more with array collection (greedy): inner* - /// - /// ```text - /// StartArray → branch → inner → PushElement ─┐ - /// │ │ - /// └─→ EndArray ←──────────────┘ - /// ``` pub fn zero_or_more_array(&mut self, inner: Fragment) -> Fragment { let start = self.add_epsilon(); self.node_mut(start).add_effect(BuildEffect::StartArray); @@ -296,10 +240,8 @@ impl<'src> BuildGraph<'src> { self.node_mut(end).add_effect(BuildEffect::EndArray); self.connect(start, branch); - // Greedy: try inner first self.connect(branch, inner.entry); self.connect(branch, end); - // Back-edge with push self.connect(inner.exit, push); self.connect(push, branch); @@ -319,10 +261,8 @@ impl<'src> BuildGraph<'src> { self.node_mut(end).add_effect(BuildEffect::EndArray); self.connect(start, branch); - // Non-greedy: try exit first self.connect(branch, end); self.connect(branch, inner.entry); - // Back-edge with push self.connect(inner.exit, push); self.connect(push, branch); @@ -330,12 +270,6 @@ impl<'src> BuildGraph<'src> { } /// One or more with array collection (greedy): inner+ - /// - /// ```text - /// StartArray → inner → PushElement → branch ─┐ - /// │ │ - /// └─→ EndArray - /// ``` pub fn one_or_more_array(&mut self, inner: Fragment) -> Fragment { let start = self.add_epsilon(); self.node_mut(start).add_effect(BuildEffect::StartArray); @@ -351,7 +285,6 @@ impl<'src> BuildGraph<'src> { self.connect(start, inner.entry); self.connect(inner.exit, push); self.connect(push, branch); - // Greedy: try inner first self.connect(branch, inner.entry); self.connect(branch, end); @@ -374,7 +307,6 @@ impl<'src> BuildGraph<'src> { self.connect(start, inner.entry); self.connect(inner.exit, push); self.connect(push, branch); - // Non-greedy: try exit first self.connect(branch, end); self.connect(branch, inner.entry); @@ -395,14 +327,11 @@ pub struct BuildNode<'src> { pub effects: Vec>, pub ref_marker: RefMarker, pub successors: Vec, - /// Navigation instruction for this transition (see ADR-0008). pub nav: Nav, - /// Reference name for Enter nodes (resolved during linking). pub ref_name: Option<&'src str>, } impl<'src> BuildNode<'src> { - /// Create an epsilon node (pass-through, no match). pub fn epsilon() -> Self { Self { matcher: BuildMatcher::Epsilon, @@ -414,7 +343,6 @@ impl<'src> BuildNode<'src> { } } - /// Create a node with a matcher. pub fn with_matcher(matcher: BuildMatcher<'src>) -> Self { Self { matcher, @@ -426,22 +354,18 @@ impl<'src> BuildNode<'src> { } } - /// Add an effect to this node. pub fn add_effect(&mut self, effect: BuildEffect<'src>) { self.effects.push(effect); } - /// Set the ref marker. pub fn set_ref_marker(&mut self, marker: RefMarker) { self.ref_marker = marker; } - /// Set the navigation instruction. pub fn set_nav(&mut self, nav: Nav) { self.nav = nav; } - /// Returns true if this is an epsilon node. pub fn is_epsilon(&self) -> bool { matches!(self.matcher, BuildMatcher::Epsilon) } @@ -450,24 +374,19 @@ impl<'src> BuildNode<'src> { /// What a transition matches. #[derive(Debug, Clone, PartialEq, Eq)] pub enum BuildMatcher<'src> { - /// Matches without consuming input. Control flow only. Epsilon, - - /// Matches a named node by kind. Node { kind: &'src str, field: Option<&'src str>, negated_fields: Vec<&'src str>, }, - - /// Matches an anonymous node (string literal). Anonymous { literal: &'src str, field: Option<&'src str>, }, - - /// Matches any node. - Wildcard { field: Option<&'src str> }, + Wildcard { + field: Option<&'src str>, + }, } impl<'src> BuildMatcher<'src> { @@ -490,7 +409,6 @@ impl<'src> BuildMatcher<'src> { Self::Wildcard { field: None } } - /// Set field constraint. pub fn with_field(mut self, field: &'src str) -> Self { match &mut self { BuildMatcher::Node { field: f, .. } => *f = Some(field), @@ -501,7 +419,6 @@ impl<'src> BuildMatcher<'src> { self } - /// Add negated field (Node matcher only). pub fn with_negated_field(mut self, field: &'src str) -> Self { if let BuildMatcher::Node { negated_fields, .. } = &mut self { negated_fields.push(field); @@ -511,57 +428,29 @@ impl<'src> BuildMatcher<'src> { } /// Effect operations recorded during graph construction. -/// -/// These mirror `ir::EffectOp` but use borrowed strings. #[derive(Debug, Clone, PartialEq, Eq)] pub enum BuildEffect<'src> { - /// Store matched node as current value. CaptureNode, - - /// Push empty array onto stack. StartArray, - - /// Move current value into top array. PushElement, - - /// Pop array from stack into current. EndArray, - - /// Push empty object onto stack. StartObject, - - /// Pop object from stack into current. EndObject, - - /// Move current value into top object at field. Field { name: &'src str, span: TextRange }, - - /// Push variant container with tag onto stack. StartVariant(&'src str), - - /// Pop variant, wrap current, set as current. EndVariant, - - /// Replace current Node with its source text. ToString, } /// Marker for definition call/return transitions. #[derive(Debug, Clone, PartialEq, Eq, Default)] pub enum RefMarker { - /// Not a reference transition. #[default] None, - - /// Enter a definition call. Stores return points for Exit. Enter { - /// Index identifying this ref (for matching Enter/Exit pairs). ref_id: u32, }, - - /// Exit a definition call. Returns to points stored at Enter. Exit { - /// Must match corresponding Enter's ref_id. ref_id: u32, }, } diff --git a/crates/plotnik-lib/src/graph/construct.rs b/crates/plotnik-lib/src/query/construct.rs similarity index 81% rename from crates/plotnik-lib/src/graph/construct.rs rename to crates/plotnik-lib/src/query/construct.rs index 7d71b34e..703f9a7c 100644 --- a/crates/plotnik-lib/src/graph/construct.rs +++ b/crates/plotnik-lib/src/query/construct.rs @@ -1,36 +1,28 @@ -//! AST-to-graph construction. +//! Graph construction integrated with Query pipeline. //! -//! Translates the parsed and analyzed AST into a `BuildGraph`. -//! This is the bridge between `parser::ast` and `graph::BuildGraph`. +//! Constructs a `BuildGraph` from the parsed AST, reusing the `symbol_table` +//! populated by earlier passes. + +use std::collections::HashSet; use crate::ir::Nav; use crate::parser::{ - AltExpr, AltKind, AnonymousNode, Branch, CapturedExpr, Def, Expr, FieldExpr, NamedNode, - NegatedField, QuantifiedExpr, Ref, Root, SeqExpr, SeqItem, SyntaxKind, token_src, + AltExpr, AltKind, AnonymousNode, Branch, CapturedExpr, Expr, FieldExpr, NamedNode, + NegatedField, QuantifiedExpr, Ref, SeqExpr, SeqItem, SyntaxKind, token_src, }; -use super::{BuildEffect, BuildGraph, BuildMatcher, Fragment, NodeId, RefMarker}; - -/// Constructs a `BuildGraph` from a parsed query AST. -pub struct GraphConstructor<'src> { - source: &'src str, - graph: BuildGraph<'src>, - next_ref_id: u32, -} +use super::Query; +use super::build_graph::{BuildEffect, BuildGraph, BuildMatcher, Fragment, NodeId, RefMarker}; /// Context for navigation determination. #[derive(Debug, Clone, Copy)] enum NavContext { - /// First expression at definition root level. Root, - /// First child inside a parent node. FirstChild { anchored: bool }, - /// Sibling after previous expression. Sibling { anchored: bool }, } impl NavContext { - /// Determine the Nav based on context and expression type. fn to_nav(self, is_anonymous: bool) -> Nav { match self { NavContext::Root => Nav::stay(), @@ -57,9 +49,7 @@ impl NavContext { /// Tracks trailing anchor state for Up navigation. #[derive(Debug, Clone, Copy)] struct ExitContext { - /// Whether there's a trailing anchor before exit. has_trailing_anchor: bool, - /// Whether the last expression was anonymous (for Exact vs SkipTrivia). last_was_anonymous: bool, } @@ -75,8 +65,34 @@ impl ExitContext { } } +impl<'a> Query<'a> { + /// Build the graph from the already-populated symbol_table. + /// + /// This method reuses the symbol_table from name resolution, + /// avoiding duplicate iteration over definitions. + pub(super) fn construct_graph(&mut self) { + let mut constructor = GraphConstructor::new(self.source); + + // Reuse symbol_table: iterate name -> body pairs + for (name, body) in &self.symbol_table { + let fragment = constructor.construct_expr(body, NavContext::Root); + constructor.graph.add_definition(name, fragment.entry); + } + + constructor.link_references(); + self.graph = constructor.graph; + } +} + +/// Internal constructor that builds the graph. +struct GraphConstructor<'src> { + source: &'src str, + graph: BuildGraph<'src>, + next_ref_id: u32, +} + impl<'src> GraphConstructor<'src> { - pub fn new(source: &'src str) -> Self { + fn new(source: &'src str) -> Self { Self { source, graph: BuildGraph::new(), @@ -84,40 +100,22 @@ impl<'src> GraphConstructor<'src> { } } - /// Construct graph from a parsed Root AST. - pub fn construct(mut self, root: &Root) -> BuildGraph<'src> { - for def in root.defs() { - self.construct_def(&def); - } - self.link_references(); - self.graph - } - /// Link Enter nodes to their definition entry points. - /// - /// Per ADR-0005, Enter's successors should be: - /// - successors[0]: definition entry point - /// - successors[1..]: return transitions (the Exit node's successor) fn link_references(&mut self) { - // Collect all Enter nodes with their ref_name and corresponding Exit successors let mut links: Vec<(NodeId, &'src str, Vec)> = Vec::new(); for (id, node) in self.graph.iter() { if let RefMarker::Enter { ref_id } = &node.ref_marker { if let Some(name) = node.ref_name { - // Find the corresponding Exit node and its successors let exit_successors = self.find_exit_successors(*ref_id); links.push((id, name, exit_successors)); } } } - // Apply links for (enter_id, name, return_transitions) in links { if let Some(def_entry) = self.graph.definition(name) { - // Connect Enter to definition entry point self.graph.connect(enter_id, def_entry); - // Add return transitions for ret in return_transitions { self.graph.connect(enter_id, ret); } @@ -125,7 +123,6 @@ impl<'src> GraphConstructor<'src> { } } - /// Find successors of the Exit node matching the given ref_id. fn find_exit_successors(&self, ref_id: u32) -> Vec { for (_, node) in self.graph.iter() { if let RefMarker::Exit { ref_id: exit_id } = &node.ref_marker { @@ -137,19 +134,6 @@ impl<'src> GraphConstructor<'src> { Vec::new() } - fn construct_def(&mut self, def: &Def) { - let Some(name_token) = def.name() else { - return; - }; - let Some(body) = def.body() else { - return; - }; - - let name = token_src(&name_token, self.source); - let fragment = self.construct_expr(&body, NavContext::Root); - self.graph.add_definition(name, fragment.entry); - } - fn construct_expr(&mut self, expr: &Expr, ctx: NavContext) -> Fragment { match expr { Expr::NamedNode(node) => self.construct_named_node(node, ctx), @@ -169,7 +153,6 @@ impl<'src> GraphConstructor<'src> { let node_id = self.graph.add_matcher(matcher); self.graph.node_mut(node_id).set_nav(nav); - // Process children with anchor tracking let items: Vec<_> = node.items().collect(); if items.is_empty() { return Fragment::single(node_id); @@ -183,7 +166,6 @@ impl<'src> GraphConstructor<'src> { let inner = self.graph.sequence(&child_fragments); self.graph.connect(node_id, inner.entry); - // Add exit transition with appropriate Up nav let exit_id = self.graph.add_epsilon(); self.graph.node_mut(exit_id).set_nav(exit_ctx.to_up_nav(1)); self.graph.connect(inner.exit, exit_id); @@ -191,8 +173,6 @@ impl<'src> GraphConstructor<'src> { Fragment::new(node_id, exit_id) } - /// Construct a sequence of items (expressions and anchors). - /// Returns fragments and exit context for trailing anchor handling. fn construct_item_sequence( &mut self, items: &[SeqItem], @@ -216,7 +196,6 @@ impl<'src> GraphConstructor<'src> { anchored: pending_anchor, } } else { - // For sequences at root level, first item inherits parent context NavContext::Sibling { anchored: pending_anchor, } @@ -297,7 +276,6 @@ impl<'src> GraphConstructor<'src> { let ref_id = self.next_ref_id; self.next_ref_id += 1; - // Create Enter node with navigation from context let enter_id = self.graph.add_epsilon(); let nav = ctx.to_nav(false); self.graph.node_mut(enter_id).set_nav(nav); @@ -305,13 +283,11 @@ impl<'src> GraphConstructor<'src> { .node_mut(enter_id) .set_ref_marker(RefMarker::enter(ref_id)); - // Create Exit node (nav will be set during linking based on definition structure) let exit_id = self.graph.add_epsilon(); self.graph .node_mut(exit_id) .set_ref_marker(RefMarker::exit(ref_id)); - // Store ref name for later resolution let name = token_src(&name_token, self.source); self.graph.node_mut(enter_id).ref_name = Some(name); @@ -331,7 +307,6 @@ impl<'src> GraphConstructor<'src> { return self.graph.epsilon_fragment(); } - // Branch node inherits context nav let branch_id = self.graph.add_epsilon(); self.graph.node_mut(branch_id).set_nav(ctx.to_nav(false)); @@ -359,13 +334,11 @@ impl<'src> GraphConstructor<'src> { let label = token_src(&label_token, self.source); - // StartVariant (epsilon, no nav change) let start_id = self.graph.add_epsilon(); self.graph .node_mut(start_id) .add_effect(BuildEffect::StartVariant(label)); - // Body inherits root context (alternation resets nav context) let body_frag = self.construct_expr(&body, NavContext::Root); let end_id = self.graph.add_epsilon(); @@ -386,14 +359,12 @@ impl<'src> GraphConstructor<'src> { return self.graph.epsilon_fragment(); } - // Branch node inherits context nav let branch_id = self.graph.add_epsilon(); self.graph.node_mut(branch_id).set_nav(ctx.to_nav(false)); let exit_id = self.graph.add_epsilon(); for body in &branches { - // Each branch resets to root context let frag = self.construct_expr(body, NavContext::Root); self.graph.connect(branch_id, frag.entry); self.graph.connect(frag.exit, exit_id); @@ -405,7 +376,6 @@ impl<'src> GraphConstructor<'src> { fn construct_seq(&mut self, seq: &SeqExpr, ctx: NavContext) -> Fragment { let items: Vec<_> = seq.items().collect(); - // Wrap sequence in StartObject/EndObject let start_id = self.graph.add_epsilon(); self.graph.node_mut(start_id).set_nav(ctx.to_nav(false)); self.graph @@ -442,7 +412,6 @@ impl<'src> GraphConstructor<'src> { .map(|n| n.text() == "string") .unwrap_or(false); - // Attach CaptureNode to all reachable matchers let matchers = self.find_all_matchers(inner_frag.entry); for matcher_id in matchers { self.graph @@ -456,7 +425,6 @@ impl<'src> GraphConstructor<'src> { } } - // Add Field effect at exit if let Some(name) = capture_name { let span = capture_token .as_ref() @@ -481,7 +449,6 @@ impl<'src> GraphConstructor<'src> { return self.construct_expr(&inner_expr, ctx); }; - // First iteration uses parent context, subsequent use Sibling let inner_frag = self.construct_expr(&inner_expr, ctx); match op.kind() { @@ -502,7 +469,6 @@ impl<'src> GraphConstructor<'src> { self.construct_expr(&value_expr, ctx) } - /// Find field constraint from parent FieldExpr. fn find_field_constraint(&self, node: &crate::parser::SyntaxNode) -> Option<&'src str> { let parent = node.parent()?; let field_expr = FieldExpr::cast(parent)?; @@ -510,10 +476,9 @@ impl<'src> GraphConstructor<'src> { Some(token_src(&name_token, self.source)) } - /// Find all non-epsilon matcher nodes reachable from start. fn find_all_matchers(&self, start: NodeId) -> Vec { let mut result = Vec::new(); - let mut visited = std::collections::HashSet::new(); + let mut visited = HashSet::new(); self.collect_matchers(start, &mut result, &mut visited); result } @@ -522,7 +487,7 @@ impl<'src> GraphConstructor<'src> { &self, node_id: NodeId, result: &mut Vec, - visited: &mut std::collections::HashSet, + visited: &mut HashSet, ) { if !visited.insert(node_id) { return; @@ -540,12 +505,6 @@ impl<'src> GraphConstructor<'src> { } } -/// Returns true if expression is an anonymous node (string literal). fn is_anonymous_expr(expr: &Expr) -> bool { matches!(expr, Expr::AnonymousNode(n) if !n.is_any()) } - -/// Convenience function to construct a graph from source and AST. -pub fn construct_graph<'src>(source: &'src str, root: &Root) -> BuildGraph<'src> { - GraphConstructor::new(source).construct(root) -} diff --git a/crates/plotnik-lib/src/query/construct_tests.rs b/crates/plotnik-lib/src/query/construct_tests.rs new file mode 100644 index 00000000..3877409c --- /dev/null +++ b/crates/plotnik-lib/src/query/construct_tests.rs @@ -0,0 +1,267 @@ +//! Tests for graph construction integrated with Query pipeline. + +use indoc::indoc; + +use crate::query::Query; + +fn snapshot(input: &str) -> String { + let query = Query::try_from(input).unwrap().build_graph(); + query.graph().dump() +} + +fn snapshot_optimized(input: &str) -> String { + let query = Query::try_from(input).unwrap().build_graph(); + query.graph().dump_live(query.dead_nodes()) +} + +#[test] +fn simple_named_node() { + insta::assert_snapshot!(snapshot("Q = (identifier)"), @r" + Q = N0 + + N0: (identifier) → ∅ + "); +} + +#[test] +fn named_node_with_capture() { + insta::assert_snapshot!(snapshot("Q = (identifier) @id"), @r" + Q = N0 + + N0: (identifier) [Capture] → N1 + N1: ε [Field(id)] → ∅ + "); +} + +#[test] +fn named_node_with_children() { + insta::assert_snapshot!(snapshot("Q = (function_definition (identifier))"), @r" + Q = N0 + + N0: (function_definition) → N1 + N1: [Down] (identifier) → N2 + N2: [Up(1)] ε → ∅ + "); +} + +#[test] +fn sequence() { + insta::assert_snapshot!(snapshot("Q = { (a) (b) }"), @r" + Q = N1 + + N0: ε [StartObj] → N1 + N1: [Next] (a) [StartObj] → N2 + N2: [Next] (b) → N3 + N3: ε [EndObj] → ∅ + "); +} + +#[test] +fn sequence_with_captures() { + insta::assert_snapshot!(snapshot("Q = { (a) @x (b) @y }"), @r" + Q = N1 + + N0: ε [StartObj] → N1 + N1: [Next] (a) [StartObj] [Capture] → N3 + N2: ε [Field(x)] → N3 + N3: [Next] (b) [Field(x)] [Capture] → N5 + N4: ε [Field(y)] → N5 + N5: ε [Field(y)] [EndObj] → ∅ + "); +} + +#[test] +fn alternation_untagged() { + insta::assert_snapshot!(snapshot("Q = [ (a) (b) ]"), @r" + Q = N0 + + N0: ε → N2, N3 + N1: ε → ∅ + N2: (a) → N1 + N3: (b) → N1 + "); +} + +#[test] +fn alternation_tagged() { + insta::assert_snapshot!(snapshot("Q = [ A: (a) @x B: (b) @y ]"), @r" + Q = N0 + + N0: ε → N3, N7 + N1: ε [Field(x)] [EndVariant] [Field(y)] [EndVariant] → ∅ + N2: ε [Variant(A)] → N3 + N3: (a) [Variant(A)] [Capture] → N1 + N4: ε [Field(x)] → N1 + N5: ε [EndVariant] → N1 + N6: ε [Variant(B)] → N7 + N7: (b) [Variant(B)] [Capture] → N1 + N8: ε [Field(y)] → N1 + N9: ε [EndVariant] → N1 + "); +} + +#[test] +fn quantifier_star() { + insta::assert_snapshot!(snapshot("Q = (identifier)*"), @r" + Q = N2 + + N0: (identifier) → N2 + N1: ε [StartArray] → N2 + N2: ε [StartArray] [Push] → N0, N4 + N3: ε [Push] → N2 + N4: ε [EndArray] → ∅ + "); +} + +#[test] +fn quantifier_plus() { + insta::assert_snapshot!(snapshot("Q = (identifier)+"), @r" + Q = N0 + + N0: (identifier) [StartArray] → N3 + N1: ε [StartArray] → N0 + N2: ε [Push] → N3 + N3: ε [Push] → N0, N4 + N4: ε [EndArray] → ∅ + "); +} + +#[test] +fn quantifier_optional() { + insta::assert_snapshot!(snapshot("Q = (identifier)?"), @r" + Q = N1 + + N0: (identifier) → N2 + N1: ε → N0, N2 + N2: ε → ∅ + "); +} + +#[test] +fn reference() { + let input = indoc! {r#" + A = (identifier) + B = (A) + "#}; + insta::assert_snapshot!(snapshot(input), @r" + A = N0 + B = N1 + + N0: (identifier) → ∅ + N1: ε +Enter(0, A) → N0 + N2: ε +Exit(0) → ∅ + "); +} + +#[test] +fn anonymous_node() { + insta::assert_snapshot!(snapshot(r#"Q = "hello""#), @r#" + Q = N0 + + N0: "hello" → ∅ + "#); +} + +#[test] +fn wildcard() { + insta::assert_snapshot!(snapshot("Q = (_)"), @r" + Q = N0 + + N0: _ → ∅ + "); +} + +#[test] +fn field_constraint() { + insta::assert_snapshot!(snapshot("Q = (function name: (identifier))"), @r" + Q = N0 + + N0: (function) → N1 + N1: [Down] (identifier) @name → N2 + N2: [Up(1)] ε → ∅ + "); +} + +#[test] +fn to_string_annotation() { + insta::assert_snapshot!(snapshot("Q = (identifier) @name ::string"), @r" + Q = N0 + + N0: (identifier) [Capture] [ToString] → N1 + N1: ε [Field(name)] → ∅ + "); +} + +#[test] +fn anchor_first_child() { + insta::assert_snapshot!(snapshot("Q = (parent . (child))"), @r" + Q = N0 + + N0: (parent) → N1 + N1: [Down.] (child) → N2 + N2: [Up(1)] ε → ∅ + "); +} + +#[test] +fn anchor_sibling() { + insta::assert_snapshot!(snapshot("Q = (parent (a) . (b))"), @r" + Q = N0 + + N0: (parent) → N1 + N1: [Down] (a) → N2 + N2: [Next.] (b) → N3 + N3: [Up(1)] ε → ∅ + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Optimization tests +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn optimized_simple() { + insta::assert_snapshot!(snapshot_optimized("Q = (identifier) @id"), @r" + Q = N0 + + N0: (identifier) [Capture] → N1 + N1: ε [Field(id)] → ∅ + "); +} + +#[test] +fn optimized_sequence() { + insta::assert_snapshot!(snapshot_optimized("Q = { (a) @x (b) @y }"), @r" + Q = N1 + + N1: [Next] (a) [StartObj] [Capture] → N3 + N3: [Next] (b) [Field(x)] [Capture] → N5 + N5: ε [Field(y)] [EndObj] → ∅ + "); +} + +#[test] +fn symbol_table_reuse() { + let input = indoc! {r#" + Foo = (identifier) + Bar = (Foo) + Baz = (Bar) + "#}; + let query = Query::try_from(input).unwrap().build_graph(); + + assert!(query.graph().definition("Foo").is_some()); + assert!(query.graph().definition("Bar").is_some()); + assert!(query.graph().definition("Baz").is_some()); + + insta::assert_snapshot!(query.graph().dump(), @r" + Foo = N0 + Bar = N1 + Baz = N3 + + N0: (identifier) → ∅ + N1: ε +Enter(0, Foo) → N0 + N2: ε +Exit(0) → ∅ + N3: ε +Enter(1, Bar) → N1 + N4: ε +Exit(1) → ∅ + "); +} diff --git a/crates/plotnik-lib/src/graph/dump.rs b/crates/plotnik-lib/src/query/graph_dump.rs similarity index 50% rename from crates/plotnik-lib/src/graph/dump.rs rename to crates/plotnik-lib/src/query/graph_dump.rs index 393cecfd..3d668204 100644 --- a/crates/plotnik-lib/src/graph/dump.rs +++ b/crates/plotnik-lib/src/query/graph_dump.rs @@ -1,13 +1,12 @@ //! Dump helpers for graph inspection and testing. -//! -//! Provides formatted output for `BuildGraph` and `TypeInferenceResult` -//! suitable for snapshot testing and debugging. -use super::{BuildEffect, BuildGraph, BuildMatcher, NodeId, RefMarker, TypeInferenceResult}; -use crate::ir::{Nav, NavKind, TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId}; use std::collections::HashSet; use std::fmt::Write; +use crate::ir::{Nav, NavKind}; + +use super::build_graph::{BuildEffect, BuildGraph, BuildMatcher, NodeId, RefMarker}; + /// Printer for `BuildGraph` with configurable output options. pub struct GraphPrinter<'a, 'src> { graph: &'a BuildGraph<'src>, @@ -24,24 +23,16 @@ impl<'a, 'src> GraphPrinter<'a, 'src> { } } - /// Mark nodes as dead (from optimization pass). pub fn with_dead_nodes(mut self, dead: &'a HashSet) -> Self { self.dead_nodes = Some(dead); self } - /// Show dead nodes (struck through or marked). pub fn show_dead(mut self, show: bool) -> Self { self.show_dead = show; self } - /// Filter dead nodes from successor lists. - pub fn filter_dead_successors(self) -> Self { - // This is controlled by dead_nodes being set - self - } - pub fn dump(&self) -> String { let mut out = String::new(); self.format(&mut out).expect("String write never fails"); @@ -49,7 +40,6 @@ impl<'a, 'src> GraphPrinter<'a, 'src> { } fn format(&self, w: &mut String) -> std::fmt::Result { - // Definitions header for (name, entry) in self.graph.definitions() { writeln!(w, "{} = N{}", name, entry)?; } @@ -57,7 +47,6 @@ impl<'a, 'src> GraphPrinter<'a, 'src> { writeln!(w)?; } - // Nodes for (id, node) in self.graph.iter() { let is_dead = self.dead_nodes.map(|d| d.contains(&id)).unwrap_or(false); @@ -65,22 +54,18 @@ impl<'a, 'src> GraphPrinter<'a, 'src> { continue; } - // Node header if is_dead { write!(w, "N{}: ✗ ", id)?; } else { write!(w, "N{}: ", id)?; } - // Navigation (skip Stay) if !node.nav.is_stay() { write!(w, "[{}] ", format_nav(&node.nav))?; } - // Matcher self.format_matcher(w, &node.matcher)?; - // Ref marker match &node.ref_marker { RefMarker::None => {} RefMarker::Enter { ref_id } => { @@ -92,12 +77,10 @@ impl<'a, 'src> GraphPrinter<'a, 'src> { } } - // Effects for effect in &node.effects { write!(w, " [{}]", format_effect(effect))?; } - // Successors (filter dead nodes from list) self.format_successors(w, &node.successors)?; writeln!(w)?; @@ -141,7 +124,6 @@ impl<'a, 'src> GraphPrinter<'a, 'src> { } fn format_successors(&self, w: &mut String, successors: &[NodeId]) -> std::fmt::Result { - // Filter out dead nodes from successor list let live_succs: Vec<_> = successors .iter() .filter(|s| self.dead_nodes.map(|d| !d.contains(s)).unwrap_or(true)) @@ -187,22 +169,15 @@ fn format_effect(effect: &BuildEffect) -> String { } } -// ───────────────────────────────────────────────────────────────────────────── -// BuildGraph dump methods -// ───────────────────────────────────────────────────────────────────────────── - impl<'src> BuildGraph<'src> { - /// Create a printer for this graph. pub fn printer(&self) -> GraphPrinter<'_, 'src> { GraphPrinter::new(self) } - /// Dump graph in default format. pub fn dump(&self) -> String { self.printer().dump() } - /// Dump graph showing dead nodes from optimization. pub fn dump_with_dead(&self, dead_nodes: &HashSet) -> String { self.printer() .with_dead_nodes(dead_nodes) @@ -210,118 +185,7 @@ impl<'src> BuildGraph<'src> { .dump() } - /// Dump only live nodes (dead nodes filtered out completely). pub fn dump_live(&self, dead_nodes: &HashSet) -> String { self.printer().with_dead_nodes(dead_nodes).dump() } } - -// ───────────────────────────────────────────────────────────────────────────── -// TypeInferenceResult dump -// ───────────────────────────────────────────────────────────────────────────── - -impl TypeInferenceResult<'_> { - /// Dump inferred types for debugging/testing. - pub fn dump(&self) -> String { - let mut out = String::new(); - - out.push_str("=== Entrypoints ===\n"); - for (name, type_id) in &self.entrypoint_types { - out.push_str(&format!("{} → {}\n", name, format_type_id(*type_id))); - } - - if !self.type_defs.is_empty() { - out.push_str("\n=== Types ===\n"); - for (idx, def) in self.type_defs.iter().enumerate() { - let type_id = idx as TypeId + 3; - let name = def.name.unwrap_or(""); - out.push_str(&format!("T{}: {:?} {}", type_id, def.kind, name)); - - if let Some(inner) = def.inner_type { - out.push_str(&format!(" → {}", format_type_id(inner))); - } - - if !def.members.is_empty() { - out.push_str(" {\n"); - for member in &def.members { - out.push_str(&format!( - " {}: {}\n", - member.name, - format_type_id(member.ty) - )); - } - out.push('}'); - } - out.push('\n'); - } - } - - if !self.errors.is_empty() { - out.push_str("\n=== Errors ===\n"); - for err in &self.errors { - out.push_str(&format!( - "field `{}` in `{}`: incompatible types [{}]\n", - err.field, - err.definition, - err.types_found - .iter() - .map(|t| t.to_string()) - .collect::>() - .join(", ") - )); - } - } - - out - } - - /// Render diagnostics for display (used in tests and CLI). - pub fn dump_diagnostics(&self, source: &str) -> String { - self.diagnostics.render_filtered(source) - } - - /// Check if inference produced any errors. - pub fn has_errors(&self) -> bool { - self.diagnostics.has_errors() - } -} - -fn format_type_id(id: TypeId) -> String { - if id == TYPE_VOID { - "Void".to_string() - } else if id == TYPE_NODE { - "Node".to_string() - } else if id == TYPE_STR { - "String".to_string() - } else { - format!("T{}", id) - } -} - -// ───────────────────────────────────────────────────────────────────────────── -// Test-only dump helpers -// ───────────────────────────────────────────────────────────────────────────── - -#[cfg(test)] -mod test_helpers { - use super::*; - - impl<'src> BuildGraph<'src> { - /// Dump graph for snapshot tests. - pub fn dump_graph(&self) -> String { - self.dump() - } - - /// Dump graph with optimization info. - pub fn dump_optimized(&self, dead_nodes: &HashSet) -> String { - self.printer().with_dead_nodes(dead_nodes).dump() - } - } - - impl TypeInferenceResult<'_> { - /// Dump types for snapshot tests. - pub fn dump_types(&self) -> String { - self.dump() - } - } -} diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 203b4197..dbb821b0 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -1,8 +1,11 @@ //! Query processing pipeline. //! -//! Stages: parse → alt_kinds → symbol_table → recursion → shapes. +//! Stages: parse → alt_kinds → symbol_table → recursion → shapes → [build_graph]. //! Each stage populates its own diagnostics. Use `is_valid()` to check //! if any stage produced errors. +//! +//! The `build_graph` stage is optional and constructs the transition graph +//! for compilation to binary IR. mod dump; mod invariants; @@ -10,14 +13,29 @@ mod printer; pub use printer::QueryPrinter; pub mod alt_kinds; +pub mod build_graph; +mod construct; +mod graph_dump; #[cfg(feature = "plotnik-langs")] pub mod link; +mod optimize; pub mod recursion; pub mod shapes; pub mod symbol_table; +pub mod typing; + +pub use build_graph::{ + BuildEffect, BuildGraph, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker, +}; +pub use optimize::OptimizeStats; +pub use typing::{ + InferredMember, InferredTypeDef, TypeDescription, TypeInferenceResult, UnificationError, +}; #[cfg(test)] mod alt_kinds_tests; +#[cfg(test)] +mod construct_tests; #[cfg(all(test, feature = "plotnik-langs"))] mod link_tests; #[cfg(test)] @@ -31,7 +49,7 @@ mod shapes_tests; #[cfg(test)] mod symbol_table_tests; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; #[cfg(feature = "plotnik-langs")] use plotnik_langs::{NodeFieldId, NodeTypeId}; @@ -55,9 +73,11 @@ use symbol_table::SymbolTable; /// Create with [`new`](Self::new), optionally configure fuel limits, /// then call [`exec`](Self::exec) to run analysis. /// +/// For compilation, call [`build_graph`](Self::build_graph) after `exec`. +/// /// Check [`is_valid`](Self::is_valid) or [`diagnostics`](Self::diagnostics) /// to determine if the query has syntax/semantic issues. -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct Query<'a> { source: &'a str, ast: Root, @@ -77,6 +97,10 @@ pub struct Query<'a> { shapes_diagnostics: Diagnostics, #[cfg(feature = "plotnik-langs")] link_diagnostics: Diagnostics, + // Graph compilation fields + graph: BuildGraph<'a>, + dead_nodes: HashSet, + type_info: TypeInferenceResult<'a>, } fn empty_root() -> Root { @@ -111,6 +135,9 @@ impl<'a> Query<'a> { shapes_diagnostics: Diagnostics::new(), #[cfg(feature = "plotnik-langs")] link_diagnostics: Diagnostics::new(), + graph: BuildGraph::default(), + dead_nodes: HashSet::new(), + type_info: TypeInferenceResult::default(), } } @@ -145,6 +172,22 @@ impl<'a> Query<'a> { Ok(self) } + /// Build the transition graph for compilation. + /// + /// This is an optional step after `exec`. It constructs the graph, + /// runs epsilon elimination, and infers types. + /// + /// Only runs if the query is valid (no errors from previous passes). + pub fn build_graph(mut self) -> Self { + if !self.is_valid() { + return self; + } + self.construct_graph(); + self.optimize_graph(); + self.infer_types(); + self + } + fn try_parse(&mut self) -> Result<()> { let tokens = lex(self.source); let parser = Parser::new(self.source, tokens) @@ -170,6 +213,21 @@ impl<'a> Query<'a> { &self.ast } + /// Access the constructed graph. + pub fn graph(&self) -> &BuildGraph<'a> { + &self.graph + } + + /// Access the set of dead nodes (eliminated by optimization). + pub fn dead_nodes(&self) -> &HashSet { + &self.dead_nodes + } + + /// Access the type inference result. + pub fn type_info(&self) -> &TypeInferenceResult<'a> { + &self.type_info + } + pub(crate) fn shape_cardinality(&self, node: &SyntaxNode) -> ShapeCardinality { // Error nodes are invalid if node.kind() == SyntaxKind::Error { @@ -220,6 +278,7 @@ impl<'a> Query<'a> { all.extend(self.shapes_diagnostics.clone()); #[cfg(feature = "plotnik-langs")] all.extend(self.link_diagnostics.clone()); + all.extend(self.type_info.diagnostics.clone()); all } @@ -251,6 +310,11 @@ impl<'a> Query<'a> { && !self.recursion_diagnostics.has_errors() && !self.shapes_diagnostics.has_errors() } + + /// Check if graph compilation produced type errors. + pub fn has_type_errors(&self) -> bool { + self.type_info.has_errors() + } } impl<'a> TryFrom<&'a str> for Query<'a> { diff --git a/crates/plotnik-lib/src/graph/optimize.rs b/crates/plotnik-lib/src/query/optimize.rs similarity index 67% rename from crates/plotnik-lib/src/graph/optimize.rs rename to crates/plotnik-lib/src/query/optimize.rs index fd21017b..5c7ffbb2 100644 --- a/crates/plotnik-lib/src/graph/optimize.rs +++ b/crates/plotnik-lib/src/query/optimize.rs @@ -1,51 +1,49 @@ //! Epsilon elimination optimization pass. //! //! Reduces graph size by removing unnecessary epsilon transitions. -//! This simplifies the graph for subsequent analysis passes and reduces -//! runtime traversal overhead. //! //! # Safety Rules (from ADR-0005) //! //! An epsilon node CANNOT be eliminated if: -//! - It has a `RefMarker` (Enter/Exit) — single slot constraint +//! - It has a `RefMarker` (Enter/Exit) //! - It has multiple successors (branch point) -//! - Its successor already has a `RefMarker` (would lose one) -//! - Both have non-Stay `Nav` that can't be merged (only unconstrained Up can merge) -//! -//! # Algorithm -//! -//! 1. Build predecessor map -//! 2. Identify eliminable epsilon nodes -//! 3. For each eliminable epsilon: -//! - Prepend its effects to successor -//! - Redirect all predecessors to successor -//! - Mark epsilon as dead (will be skipped in emission) - -use super::{BuildGraph, BuildMatcher, NodeId}; -use crate::ir::{Nav, NavKind}; +//! - Its successor already has a `RefMarker` +//! - Both have non-Stay `Nav` that can't be merged + use std::collections::{HashMap, HashSet}; +use crate::ir::{Nav, NavKind}; + +use super::Query; +use super::build_graph::{BuildGraph, BuildMatcher, NodeId}; + /// Statistics from epsilon elimination. #[derive(Debug, Default)] pub struct OptimizeStats { - /// Number of epsilon nodes eliminated. pub epsilons_eliminated: usize, - /// Number of epsilon nodes kept (branch points, ref markers, etc). pub epsilons_kept: usize, } -/// Run epsilon elimination on the graph. +impl Query<'_> { + /// Run epsilon elimination on the graph. + /// + /// Populates `dead_nodes` with eliminated node IDs. + pub(super) fn optimize_graph(&mut self) { + let (dead, _stats) = eliminate_epsilons(&mut self.graph); + self.dead_nodes = dead; + } +} + +/// Run epsilon elimination on a BuildGraph. /// /// Returns the set of dead node IDs that should be skipped during emission. pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeStats) { let mut stats = OptimizeStats::default(); let mut dead_nodes: HashSet = HashSet::new(); - // Build predecessor map: node -> list of predecessors let predecessors = build_predecessor_map(graph); // Process nodes in reverse order to handle chains - // (eliminates inner epsilons before outer ones see them) let node_count = graph.len() as NodeId; for id in (0..node_count).rev() { if dead_nodes.contains(&id) { @@ -60,18 +58,14 @@ pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeS continue; } - // Get the single successor (already verified in is_eliminable_epsilon) let successor_id = node.successors[0]; - // Skip if successor has a RefMarker and we have effects - // (can't merge effects into a ref transition) let successor = graph.node(successor_id); if !successor.ref_marker.is_none() && !node.effects.is_empty() { stats.epsilons_kept += 1; continue; } - // Collect data needed for the merge let effects_to_prepend = graph.node(id).effects.clone(); let nav_to_transfer = graph.node(id).nav; let preds = predecessors.get(&id).cloned().unwrap_or_default(); @@ -88,10 +82,8 @@ pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeS let successor_nav = graph.node(successor_id).nav; if !nav_to_transfer.is_stay() { if successor_nav.is_stay() { - // Simple transfer graph.node_mut(successor_id).nav = nav_to_transfer; } else if can_merge_up(nav_to_transfer, successor_nav) { - // Merge unconstrained Up levels let merged = Nav::up(nav_to_transfer.level + successor_nav.level); graph.node_mut(successor_id).nav = merged; } @@ -110,10 +102,8 @@ pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeS } } - // Update definition entry points redirect_definitions(graph, id, successor_id); - // Mark as dead dead_nodes.insert(id); stats.epsilons_eliminated += 1; } @@ -121,19 +111,15 @@ pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeS (dead_nodes, stats) } -/// Check if an epsilon node can be eliminated. -fn is_eliminable_epsilon(node: &super::BuildNode, graph: &BuildGraph) -> bool { - // Must be epsilon +fn is_eliminable_epsilon(node: &super::build_graph::BuildNode, graph: &BuildGraph) -> bool { if !matches!(node.matcher, BuildMatcher::Epsilon) { return false; } - // Must not have RefMarker if !node.ref_marker.is_none() { return false; } - // Must have exactly one successor (not a branch point) if node.successors.len() != 1 { return false; } @@ -141,16 +127,12 @@ fn is_eliminable_epsilon(node: &super::BuildNode, graph: &BuildGraph) -> bool { let successor_id = node.successors[0]; let successor = graph.node(successor_id); - // Can't merge if both have non-Stay nav, UNLESS both are unconstrained Up - // (Up(n) + Up(m) = Up(n+m)) if !node.nav.is_stay() && !successor.nav.is_stay() { if !can_merge_up(node.nav, successor.nav) { return false; } } - // Can't merge if both have effects and successor has RefMarker - // (effects must stay ordered relative to ref transitions) if !node.effects.is_empty() && !successor.ref_marker.is_none() { return false; } @@ -158,7 +140,6 @@ fn is_eliminable_epsilon(node: &super::BuildNode, graph: &BuildGraph) -> bool { true } -/// Build a map from each node to its predecessors. fn build_predecessor_map(graph: &BuildGraph) -> HashMap> { let mut predecessors: HashMap> = HashMap::new(); @@ -171,21 +152,17 @@ fn build_predecessor_map(graph: &BuildGraph) -> HashMap> { predecessors } -/// Check if two Nav instructions can be merged (only unconstrained Up). fn can_merge_up(a: Nav, b: Nav) -> bool { a.kind == NavKind::Up && b.kind == NavKind::Up } -/// Update definition entry points if they pointed to eliminated node. fn redirect_definitions(graph: &mut BuildGraph, old_id: NodeId, new_id: NodeId) { - // Collect definitions that need updating let updates: Vec<_> = graph .definitions() .filter(|(_, entry)| *entry == old_id) .map(|(name, _)| name) .collect(); - // Apply updates for name in updates { graph.add_definition(name, new_id); } diff --git a/crates/plotnik-lib/src/graph/typing.rs b/crates/plotnik-lib/src/query/typing.rs similarity index 73% rename from crates/plotnik-lib/src/graph/typing.rs rename to crates/plotnik-lib/src/query/typing.rs index 8aff4b2f..457f9de3 100644 --- a/crates/plotnik-lib/src/graph/typing.rs +++ b/crates/plotnik-lib/src/query/typing.rs @@ -1,7 +1,7 @@ -//! Type inference for BuildGraph. +//! Type inference for Query's BuildGraph. //! -//! This module analyzes a BuildGraph and infers the output type structure -//! for each definition. The inference follows rules from ADR-0007 and ADR-0009. +//! Analyzes the graph and infers output type structure for each definition. +//! Follows rules from ADR-0007 and ADR-0009. //! //! # Algorithm Overview //! @@ -10,46 +10,33 @@ //! 3. When Field(name) is encountered, record the pending value as a field //! 4. Handle branching by merging field sets from all branches (1-level merge) //! 5. Handle quantifiers via array cardinality markers -//! -//! # 1-Level Merge Semantics -//! -//! When merging captures across alternation branches: -//! - Top-level fields merge with optionality for asymmetric captures -//! - Base types (Node, String) must match exactly -//! - Nested structs must be structurally identical (not recursively merged) -//! - All incompatibilities are reported, not just the first -use super::{BuildEffect, BuildGraph, NodeId}; -use crate::diagnostics::{DiagnosticKind, Diagnostics}; -use crate::ir::{TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId, TypeKind}; +use std::collections::HashSet; + use indexmap::IndexMap; use rowan::TextRange; -use std::collections::HashSet; -/// Result of type inference on a BuildGraph. -#[derive(Debug)] +use crate::diagnostics::{DiagnosticKind, Diagnostics}; +use crate::ir::{TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId, TypeKind}; + +use super::Query; +use super::build_graph::{BuildEffect, BuildGraph, NodeId}; + +/// Result of type inference. +#[derive(Debug, Default)] pub struct TypeInferenceResult<'src> { - /// All inferred type definitions (composite types only). pub type_defs: Vec>, - /// Mapping from definition name to its result TypeId. pub entrypoint_types: IndexMap<&'src str, TypeId>, - /// Type inference diagnostics. pub diagnostics: Diagnostics, - /// Type unification errors (incompatible types in alternation branches). - /// Kept for backward compatibility; diagnostics is the primary error channel. pub errors: Vec>, } /// Error when types cannot be unified in alternation branches. #[derive(Debug, Clone)] pub struct UnificationError<'src> { - /// The field name where incompatibility was detected. pub field: &'src str, - /// Definition context where the error occurred. pub definition: &'src str, - /// Types found across branches (for error message). pub types_found: Vec, - /// Spans of the conflicting captures. pub spans: Vec, } @@ -58,7 +45,7 @@ pub struct UnificationError<'src> { pub enum TypeDescription { Node, String, - Struct(Vec), // field names for identification + Struct(Vec), } impl std::fmt::Display for TypeDescription { @@ -73,14 +60,12 @@ impl std::fmt::Display for TypeDescription { } } -/// An inferred type definition (before emission). +/// An inferred type definition. #[derive(Debug, Clone)] pub struct InferredTypeDef<'src> { pub kind: TypeKind, pub name: Option<&'src str>, - /// For Record/Enum: fields or variants. For wrappers: empty. pub members: Vec>, - /// For wrapper types: the inner TypeId. pub inner_type: Option, } @@ -91,7 +76,6 @@ pub struct InferredMember<'src> { pub ty: TypeId, } -/// Cardinality of a capture. #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum Cardinality { One, @@ -101,7 +85,6 @@ enum Cardinality { } impl Cardinality { - /// Join cardinalities (for alternation branches). fn join(self, other: Cardinality) -> Cardinality { use Cardinality::*; match (self, other) { @@ -113,7 +96,6 @@ impl Cardinality { } } - /// Make optional (for fields missing in some alternation branches). fn make_optional(self) -> Cardinality { use Cardinality::*; match self { @@ -124,14 +106,10 @@ impl Cardinality { } } -/// Type shape for 1-level merge comparison. -/// Tracks enough information to detect incompatibilities. #[derive(Debug, Clone, PartialEq, Eq)] -#[allow(dead_code)] // Struct variant is infrastructure for captured sequence support +#[allow(dead_code)] enum TypeShape<'src> { - /// Primitive: Node or String Primitive(TypeId), - /// Struct with known field names (for structural identity check) Struct(Vec<&'src str>), } @@ -140,7 +118,7 @@ impl<'src> TypeShape<'src> { match self { TypeShape::Primitive(TYPE_NODE) => TypeDescription::Node, TypeShape::Primitive(TYPE_STR) => TypeDescription::String, - TypeShape::Primitive(_) => TypeDescription::Node, // fallback + TypeShape::Primitive(_) => TypeDescription::Node, TypeShape::Struct(fields) => { TypeDescription::Struct(fields.iter().map(|s| s.to_string()).collect()) } @@ -148,30 +126,20 @@ impl<'src> TypeShape<'src> { } } -/// Inferred field information collected during traversal. #[derive(Debug, Clone)] struct FieldInfo<'src> { - /// The inferred type shape (for compatibility checking). shape: TypeShape<'src>, - /// Base TypeId (TYPE_NODE or TYPE_STR for primitives, placeholder for structs). base_type: TypeId, - /// Cardinality from quantifiers. cardinality: Cardinality, - /// Number of branches this field appears in (for optional detection). branch_count: usize, - /// All shapes seen at this field (for error reporting). all_shapes: Vec>, - /// Spans where this field was captured (for error reporting). spans: Vec, } -/// Collected scope information from traversal. #[derive(Debug, Clone, Default)] struct ScopeInfo<'src> { fields: IndexMap<&'src str, FieldInfo<'src>>, - /// Variants for tagged alternations. variants: IndexMap<&'src str, ScopeInfo<'src>>, - /// Whether we've seen variant markers (StartVariant/EndVariant). has_variants: bool, } @@ -206,28 +174,21 @@ impl<'src> ScopeInfo<'src> { } } - /// Merge another scope into this one, applying 1-level merge semantics. - /// Returns errors for incompatible types. - /// Note: Does NOT apply optionality - call `apply_optionality` after all branches merged. fn merge_from(&mut self, other: ScopeInfo<'src>) -> Vec> { let mut errors = Vec::new(); for (name, info) in other.fields { if let Some(existing) = self.fields.get_mut(name) { - // Check type compatibility (1-level merge) if let Some(mut err) = check_compatibility(&existing.shape, &info.shape, name) { - // Attach spans from both sides err.spans = existing.spans.clone(); err.spans.extend(info.spans.iter().cloned()); errors.push(err); - // Collect all shapes for error reporting for shape in &info.all_shapes { if !existing.all_shapes.contains(shape) { existing.all_shapes.push(shape.clone()); } } } - // Always merge spans existing.spans.extend(info.spans); existing.cardinality = existing.cardinality.join(info.cardinality); existing.branch_count += info.branch_count; @@ -236,7 +197,6 @@ impl<'src> ScopeInfo<'src> { } } - // Merge variants for (tag, variant_info) in other.variants { if let Some(existing) = self.variants.get_mut(tag) { let variant_errors = existing.merge_from(variant_info); @@ -250,8 +210,6 @@ impl<'src> ScopeInfo<'src> { errors } - /// Apply optionality to fields that don't appear in all branches. - /// Must be called after all branches have been merged. fn apply_optionality(&mut self, total_branches: usize) { for info in self.fields.values_mut() { if info.branch_count < total_branches { @@ -261,7 +219,6 @@ impl<'src> ScopeInfo<'src> { } } -/// Internal error during merge (before conversion to UnificationError). #[derive(Debug)] struct MergeError<'src> { field: &'src str, @@ -269,32 +226,24 @@ struct MergeError<'src> { spans: Vec, } -/// Check if two type shapes are compatible under 1-level merge semantics. fn check_compatibility<'src>( a: &TypeShape<'src>, b: &TypeShape<'src>, field: &'src str, ) -> Option> { match (a, b) { - // Same primitive types are compatible (TypeShape::Primitive(t1), TypeShape::Primitive(t2)) if t1 == t2 => None, - - // Different primitives (Node vs String) are incompatible (TypeShape::Primitive(_), TypeShape::Primitive(_)) => Some(MergeError { field, shapes: vec![a.clone(), b.clone()], - spans: vec![], // Filled in by caller + spans: vec![], }), - - // Struct vs Primitive is incompatible (TypeShape::Struct(_), TypeShape::Primitive(_)) | (TypeShape::Primitive(_), TypeShape::Struct(_)) => Some(MergeError { field, shapes: vec![a.clone(), b.clone()], - spans: vec![], // Filled in by caller + spans: vec![], }), - - // Structs: must have identical field sets (1-level, no deep merge) (TypeShape::Struct(fields_a), TypeShape::Struct(fields_b)) => { if fields_a == fields_b { None @@ -302,23 +251,18 @@ fn check_compatibility<'src>( Some(MergeError { field, shapes: vec![a.clone(), b.clone()], - spans: vec![], // Filled in by caller + spans: vec![], }) } } } } -/// State during graph traversal. #[derive(Debug, Clone, Copy)] struct TraversalState<'src> { - /// The type of the current pending value (after CaptureNode). pending_type: Option, - /// Current cardinality wrapper (from array effects). cardinality: Cardinality, - /// Current variant tag (inside StartVariant..EndVariant). current_variant: Option<&'src str>, - /// Depth counter for nested objects. object_depth: u32, } @@ -333,7 +277,6 @@ impl Default for TraversalState<'_> { } } -/// Context for type inference. struct InferenceContext<'src, 'g> { graph: &'g BuildGraph<'src>, dead_nodes: &'g HashSet, @@ -372,7 +315,6 @@ impl<'src, 'g> InferenceContext<'src, 'g> { &mut merge_errors, ); - // Convert merge errors to unification errors and diagnostics for err in merge_errors { let types_str = err .shapes @@ -381,14 +323,12 @@ impl<'src, 'g> InferenceContext<'src, 'g> { .collect::>() .join(" vs "); - // Use first span as primary, others as related let primary_span = err.spans.first().copied().unwrap_or_default(); let mut builder = self .diagnostics .report(DiagnosticKind::IncompatibleTypes, primary_span) .message(types_str); - // Add related spans for span in err.spans.iter().skip(1) { builder = builder.related_to("also captured here", *span); } @@ -399,7 +339,6 @@ impl<'src, 'g> InferenceContext<'src, 'g> { )) .emit(); - // Keep legacy error for backward compat self.errors.push(UnificationError { field: err.field, definition: def_name, @@ -429,7 +368,6 @@ impl<'src, 'g> InferenceContext<'src, 'g> { return ScopeInfo::default(); } - // Cycle detection - but allow revisiting at different depths for quantifiers if !visited.insert(node_id) && depth > 50 { return ScopeInfo::default(); } @@ -437,7 +375,6 @@ impl<'src, 'g> InferenceContext<'src, 'g> { let node = self.graph.node(node_id); let mut scope = ScopeInfo::default(); - // Process effects on this node for effect in &node.effects { match effect { BuildEffect::CaptureNode => { @@ -449,7 +386,6 @@ impl<'src, 'g> InferenceContext<'src, 'g> { BuildEffect::Field { name, span } => { if let Some(base_type) = state.pending_type.take() { if let Some(tag) = state.current_variant { - // Inside a variant - add to variant scope let variant_scope = scope.variants.entry(tag).or_default(); variant_scope.add_field(*name, base_type, state.cardinality, *span); } else { @@ -458,12 +394,8 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } state.cardinality = Cardinality::One; } - BuildEffect::StartArray => { - // Mark that we're collecting into an array - } - BuildEffect::PushElement => { - // Element pushed to array - } + BuildEffect::StartArray => {} + BuildEffect::PushElement => {} BuildEffect::EndArray => { state.cardinality = Cardinality::Star; } @@ -479,14 +411,12 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } BuildEffect::EndVariant => { if let Some(tag) = state.current_variant.take() { - // Ensure variant exists even if empty scope.variants.entry(tag).or_default(); } } } } - // Process successors let live_successors: Vec<_> = node .successors .iter() @@ -497,19 +427,16 @@ impl<'src, 'g> InferenceContext<'src, 'g> { if live_successors.is_empty() { // Terminal node } else if live_successors.len() == 1 { - // Linear path - continue with same state let child_scope = self.traverse(live_successors[0], state, visited, depth + 1, errors); let merge_errors = scope.merge_from(child_scope); errors.extend(merge_errors); } else { - // Branching - traverse each branch and merge results let total_branches = live_successors.len(); for succ in live_successors { - let child_scope = self.traverse(succ, state.clone(), visited, depth + 1, errors); + let child_scope = self.traverse(succ, state, visited, depth + 1, errors); let merge_errors = scope.merge_from(child_scope); errors.extend(merge_errors); } - // Apply optionality after all branches merged scope.apply_optionality(total_branches); } @@ -517,7 +444,6 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } fn create_struct_type(&mut self, name: &'src str, scope: &ScopeInfo<'src>) -> TypeId { - // Create members first - this may allocate wrapper types let members: Vec<_> = scope .fields .iter() @@ -530,7 +456,6 @@ impl<'src, 'g> InferenceContext<'src, 'g> { }) .collect(); - // Now allocate struct type_id - this ensures proper ordering let type_id = self.alloc_type_id(); self.type_defs.push(InferredTypeDef { @@ -544,13 +469,11 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } fn create_enum_type(&mut self, name: &'src str, scope: &ScopeInfo<'src>) -> TypeId { - // Create variant payloads first - this may allocate nested types let mut members = Vec::new(); for (tag, variant_scope) in &scope.variants { let variant_type = if variant_scope.fields.is_empty() { TYPE_VOID } else { - // Create synthetic name for variant payload let variant_name = format!("{}{}", name, tag); let leaked: &'src str = Box::leak(variant_name.into_boxed_str()); self.create_struct_type(leaked, variant_scope) @@ -561,7 +484,6 @@ impl<'src, 'g> InferenceContext<'src, 'g> { }); } - // Now allocate enum type_id - this ensures proper ordering let type_id = self.alloc_type_id(); self.type_defs.push(InferredTypeDef { @@ -611,23 +533,97 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } } -/// Infer types for all definitions in a BuildGraph. -pub fn infer_types<'src>( - graph: &BuildGraph<'src>, - dead_nodes: &HashSet, -) -> TypeInferenceResult<'src> { - let mut ctx = InferenceContext::new(graph, dead_nodes); - let mut entrypoint_types = IndexMap::new(); - - for (name, entry_id) in graph.definitions() { - let type_id = ctx.infer_definition(name, entry_id); - entrypoint_types.insert(name, type_id); +impl<'a> Query<'a> { + /// Run type inference on the graph. + pub(super) fn infer_types(&mut self) { + let mut ctx = InferenceContext::new(&self.graph, &self.dead_nodes); + + for (name, entry_id) in self.graph.definitions() { + let type_id = ctx.infer_definition(name, entry_id); + self.type_info.entrypoint_types.insert(name, type_id); + } + + self.type_info.type_defs = ctx.type_defs; + self.type_info.diagnostics = ctx.diagnostics; + self.type_info.errors = ctx.errors; } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Dump helpers +// ───────────────────────────────────────────────────────────────────────────── + +impl TypeInferenceResult<'_> { + pub fn dump(&self) -> String { + let mut out = String::new(); + + out.push_str("=== Entrypoints ===\n"); + for (name, type_id) in &self.entrypoint_types { + out.push_str(&format!("{} → {}\n", name, format_type_id(*type_id))); + } + + if !self.type_defs.is_empty() { + out.push_str("\n=== Types ===\n"); + for (idx, def) in self.type_defs.iter().enumerate() { + let type_id = idx as TypeId + 3; + let name = def.name.unwrap_or(""); + out.push_str(&format!("T{}: {:?} {}", type_id, def.kind, name)); + + if let Some(inner) = def.inner_type { + out.push_str(&format!(" → {}", format_type_id(inner))); + } + + if !def.members.is_empty() { + out.push_str(" {\n"); + for member in &def.members { + out.push_str(&format!( + " {}: {}\n", + member.name, + format_type_id(member.ty) + )); + } + out.push('}'); + } + out.push('\n'); + } + } + + if !self.errors.is_empty() { + out.push_str("\n=== Errors ===\n"); + for err in &self.errors { + out.push_str(&format!( + "field `{}` in `{}`: incompatible types [{}]\n", + err.field, + err.definition, + err.types_found + .iter() + .map(|t| t.to_string()) + .collect::>() + .join(", ") + )); + } + } + + out + } + + pub fn dump_diagnostics(&self, source: &str) -> String { + self.diagnostics.render_filtered(source) + } + + pub fn has_errors(&self) -> bool { + self.diagnostics.has_errors() + } +} - TypeInferenceResult { - type_defs: ctx.type_defs, - entrypoint_types, - diagnostics: ctx.diagnostics, - errors: ctx.errors, +fn format_type_id(id: TypeId) -> String { + if id == TYPE_VOID { + "Void".to_string() + } else if id == TYPE_NODE { + "Node".to_string() + } else if id == TYPE_STR { + "String".to_string() + } else { + format!("T{}", id) } } From f2d655465f6a6af87b92c426c66d15e93fea0066 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 14:55:38 -0300 Subject: [PATCH 08/23] Fixes --- crates/plotnik-lib/src/lib.rs | 1 - crates/plotnik-lib/src/query/mod.rs | 4 +- crates/plotnik-lib/src/query/typing.rs | 314 +++++++++-- crates/plotnik-lib/src/query/typing_tests.rs | 517 +++++++++++++++++++ 4 files changed, 778 insertions(+), 58 deletions(-) create mode 100644 crates/plotnik-lib/src/query/typing_tests.rs diff --git a/crates/plotnik-lib/src/lib.rs b/crates/plotnik-lib/src/lib.rs index 0e075319..cdfa048f 100644 --- a/crates/plotnik-lib/src/lib.rs +++ b/crates/plotnik-lib/src/lib.rs @@ -17,7 +17,6 @@ #![cfg_attr(coverage_nightly, feature(coverage_attribute))] pub mod diagnostics; -pub mod graph; pub mod ir; pub mod parser; pub mod query; diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index dbb821b0..19f8d4fd 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -48,6 +48,8 @@ mod recursion_tests; mod shapes_tests; #[cfg(test)] mod symbol_table_tests; +#[cfg(test)] +mod typing_tests; use std::collections::{HashMap, HashSet}; @@ -183,8 +185,8 @@ impl<'a> Query<'a> { return self; } self.construct_graph(); + self.infer_types(); // Run before optimization to avoid merged effects self.optimize_graph(); - self.infer_types(); self } diff --git a/crates/plotnik-lib/src/query/typing.rs b/crates/plotnik-lib/src/query/typing.rs index 457f9de3..a4f77603 100644 --- a/crates/plotnik-lib/src/query/typing.rs +++ b/crates/plotnik-lib/src/query/typing.rs @@ -5,11 +5,11 @@ //! //! # Algorithm Overview //! -//! 1. Walk graph from each definition entry point -//! 2. Track "pending value" - the captured value waiting for a Field assignment -//! 3. When Field(name) is encountered, record the pending value as a field -//! 4. Handle branching by merging field sets from all branches (1-level merge) -//! 5. Handle quantifiers via array cardinality markers +//! 1. Walk graph from each definition entry point using stack-based scope tracking +//! 2. StartObject/EndObject delimit scopes that may become composite types +//! 3. When EndObject is hit, the scope is resolved into a pending type +//! 4. QIS (Quantifier-Induced Scope) creates implicit structs for multi-capture quantifiers +//! 5. Field(name) consumes pending type and records it in current scope use std::collections::HashSet; @@ -104,13 +104,26 @@ impl Cardinality { x => x, } } + + fn multiply(self, other: Cardinality) -> Cardinality { + use Cardinality::*; + match (self, other) { + (One, x) | (x, One) => x, + (Optional, Optional) => Optional, + (Optional, Plus) | (Plus, Optional) => Star, + (Optional, Star) | (Star, Optional) => Star, + (Star, _) | (_, Star) => Star, + (Plus, Plus) => Plus, + } + } } +/// Shape includes type information for proper compatibility checking. #[derive(Debug, Clone, PartialEq, Eq)] -#[allow(dead_code)] enum TypeShape<'src> { Primitive(TypeId), - Struct(Vec<&'src str>), + Struct(Vec<(&'src str, TypeId)>), + Composite(TypeId), } impl<'src> TypeShape<'src> { @@ -120,8 +133,9 @@ impl<'src> TypeShape<'src> { TypeShape::Primitive(TYPE_STR) => TypeDescription::String, TypeShape::Primitive(_) => TypeDescription::Node, TypeShape::Struct(fields) => { - TypeDescription::Struct(fields.iter().map(|s| s.to_string()).collect()) + TypeDescription::Struct(fields.iter().map(|(n, _)| n.to_string()).collect()) } + TypeShape::Composite(_) => TypeDescription::Struct(vec!["...".to_string()]), } } } @@ -148,15 +162,15 @@ impl<'src> ScopeInfo<'src> { &mut self, name: &'src str, base_type: TypeId, + shape: TypeShape<'src>, cardinality: Cardinality, span: TextRange, ) { - let shape = TypeShape::Primitive(base_type); if let Some(existing) = self.fields.get_mut(name) { existing.cardinality = existing.cardinality.join(cardinality); existing.branch_count += 1; if !existing.all_shapes.contains(&shape) { - existing.all_shapes.push(shape.clone()); + existing.all_shapes.push(shape); } existing.spans.push(span); } else { @@ -217,6 +231,10 @@ impl<'src> ScopeInfo<'src> { } } } + + fn is_empty(&self) -> bool { + self.fields.is_empty() && self.variants.is_empty() + } } #[derive(Debug)] @@ -238,45 +256,107 @@ fn check_compatibility<'src>( shapes: vec![a.clone(), b.clone()], spans: vec![], }), - (TypeShape::Struct(_), TypeShape::Primitive(_)) - | (TypeShape::Primitive(_), TypeShape::Struct(_)) => Some(MergeError { - field, - shapes: vec![a.clone(), b.clone()], - spans: vec![], - }), + (TypeShape::Composite(t1), TypeShape::Composite(t2)) if t1 == t2 => None, (TypeShape::Struct(fields_a), TypeShape::Struct(fields_b)) => { - if fields_a == fields_b { - None - } else { - Some(MergeError { + // Compare field names AND types + if fields_a.len() != fields_b.len() { + return Some(MergeError { field, shapes: vec![a.clone(), b.clone()], spans: vec![], - }) + }); + } + for ((name_a, type_a), (name_b, type_b)) in fields_a.iter().zip(fields_b.iter()) { + if name_a != name_b || type_a != type_b { + return Some(MergeError { + field, + shapes: vec![a.clone(), b.clone()], + spans: vec![], + }); + } } + None } + // Struct vs Primitive or Composite mismatch + _ => Some(MergeError { + field, + shapes: vec![a.clone(), b.clone()], + spans: vec![], + }), } } -#[derive(Debug, Clone, Copy)] -struct TraversalState<'src> { - pending_type: Option, +/// Entry on the scope stack during traversal. +#[derive(Debug, Clone)] +struct ScopeStackEntry<'src> { + scope: ScopeInfo<'src>, + is_object: bool, + /// Captures pending type before StartObject (for sequences captured as a whole) + outer_pending: Option>, +} + +impl<'src> ScopeStackEntry<'src> { + fn new_root() -> Self { + Self { + scope: ScopeInfo::default(), + is_object: false, + outer_pending: None, + } + } + + fn new_object(outer_pending: Option>) -> Self { + Self { + scope: ScopeInfo::default(), + is_object: true, + outer_pending, + } + } +} + +/// Pending type waiting for a Field assignment. +#[derive(Debug, Clone)] +struct PendingType<'src> { + shape: TypeShape<'src>, + base_type: TypeId, cardinality: Cardinality, +} + +impl<'src> PendingType<'src> { + fn primitive(ty: TypeId) -> Self { + Self { + shape: TypeShape::Primitive(ty), + base_type: ty, + cardinality: Cardinality::One, + } + } +} + +#[derive(Debug, Clone)] +struct TraversalState<'src> { + pending: Option>, current_variant: Option<&'src str>, - object_depth: u32, + /// Stack of array cardinalities (for nested arrays) + array_cardinality_stack: Vec, } impl Default for TraversalState<'_> { fn default() -> Self { Self { - pending_type: None, - cardinality: Cardinality::One, + pending: None, current_variant: None, - object_depth: 0, + array_cardinality_stack: Vec::new(), } } } +impl TraversalState<'_> { + fn current_array_cardinality(&self) -> Cardinality { + self.array_cardinality_stack + .iter() + .fold(Cardinality::One, |acc, c| acc.multiply(*c)) + } +} + struct InferenceContext<'src, 'g> { graph: &'g BuildGraph<'src>, dead_nodes: &'g HashSet, @@ -284,6 +364,7 @@ struct InferenceContext<'src, 'g> { next_type_id: TypeId, diagnostics: Diagnostics, errors: Vec>, + current_def_name: &'src str, } impl<'src, 'g> InferenceContext<'src, 'g> { @@ -295,6 +376,7 @@ impl<'src, 'g> InferenceContext<'src, 'g> { next_type_id: 3, // TYPE_COMPOSITE_START diagnostics: Diagnostics::new(), errors: Vec::new(), + current_def_name: "", } } @@ -305,16 +387,26 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } fn infer_definition(&mut self, def_name: &'src str, entry_id: NodeId) -> TypeId { + self.current_def_name = def_name; let mut visited = HashSet::new(); let mut merge_errors = Vec::new(); - let scope = self.traverse( + let mut scope_stack = vec![ScopeStackEntry::new_root()]; + + self.traverse( entry_id, TraversalState::default(), &mut visited, 0, &mut merge_errors, + &mut scope_stack, ); + // Pop to get final root scope + let root_entry = scope_stack + .pop() + .unwrap_or_else(|| ScopeStackEntry::new_root()); + let scope = root_entry.scope; + for err in merge_errors { let types_str = err .shapes @@ -363,55 +455,127 @@ impl<'src, 'g> InferenceContext<'src, 'g> { visited: &mut HashSet, depth: usize, errors: &mut Vec>, - ) -> ScopeInfo<'src> { + scope_stack: &mut Vec>, + ) { if self.dead_nodes.contains(&node_id) || depth > 200 { - return ScopeInfo::default(); + return; } - if !visited.insert(node_id) && depth > 50 { - return ScopeInfo::default(); + // Don't re-visit nodes - each node's effects should only be processed once + if !visited.insert(node_id) { + return; } let node = self.graph.node(node_id); - let mut scope = ScopeInfo::default(); for effect in &node.effects { match effect { BuildEffect::CaptureNode => { - state.pending_type = Some(TYPE_NODE); + state.pending = Some(PendingType::primitive(TYPE_NODE)); } BuildEffect::ToString => { - state.pending_type = Some(TYPE_STR); + state.pending = Some(PendingType::primitive(TYPE_STR)); } BuildEffect::Field { name, span } => { - if let Some(base_type) = state.pending_type.take() { + if let Some(pending) = state.pending.take() { + let current_scope = scope_stack + .last_mut() + .map(|e| &mut e.scope) + .expect("scope stack should not be empty"); + + let effective_cardinality = pending + .cardinality + .multiply(state.current_array_cardinality()); if let Some(tag) = state.current_variant { - let variant_scope = scope.variants.entry(tag).or_default(); - variant_scope.add_field(*name, base_type, state.cardinality, *span); + let variant_scope = current_scope.variants.entry(tag).or_default(); + variant_scope.add_field( + *name, + pending.base_type, + pending.shape, + effective_cardinality, + *span, + ); } else { - scope.add_field(*name, base_type, state.cardinality, *span); + current_scope.add_field( + *name, + pending.base_type, + pending.shape, + effective_cardinality, + *span, + ); } } - state.cardinality = Cardinality::One; } - BuildEffect::StartArray => {} + BuildEffect::StartArray => { + // Push Star cardinality onto the stack when entering an array + state.array_cardinality_stack.push(Cardinality::Star); + } BuildEffect::PushElement => {} BuildEffect::EndArray => { - state.cardinality = Cardinality::Star; + // Pop cardinality when exiting array + state.array_cardinality_stack.pop(); } BuildEffect::StartObject => { - state.object_depth += 1; + // Push new object scope, saving outer pending type + let entry = ScopeStackEntry::new_object(state.pending.take()); + scope_stack.push(entry); } BuildEffect::EndObject => { - state.object_depth = state.object_depth.saturating_sub(1); + // Pop the object scope + if let Some(finished_entry) = scope_stack.pop() { + if finished_entry.is_object { + let finished_scope = finished_entry.scope; + + if !finished_scope.is_empty() { + // Create a struct type for this scope + let type_name = self.generate_scope_name(); + let type_id = self.create_struct_type(type_name, &finished_scope); + + // Collect field info for shape + let field_types: Vec<(&'src str, TypeId)> = finished_scope + .fields + .iter() + .map(|(name, info)| (*name, info.base_type)) + .collect(); + + state.pending = Some(PendingType { + shape: TypeShape::Composite(type_id), + base_type: type_id, + cardinality: Cardinality::One, + }); + + // If there were fields, update shape to include them + if !field_types.is_empty() { + if let Some(ref mut p) = state.pending { + p.shape = TypeShape::Struct(field_types); + } + } + } else { + // Empty object - restore outer pending if any + state.pending = finished_entry.outer_pending; + } + } else { + // Shouldn't happen - mismatched StartObject/EndObject + // Put it back + scope_stack.push(finished_entry); + } + } } BuildEffect::StartVariant(tag) => { state.current_variant = Some(*tag); - scope.has_variants = true; + let current_scope = scope_stack + .last_mut() + .map(|e| &mut e.scope) + .expect("scope stack should not be empty"); + current_scope.has_variants = true; } BuildEffect::EndVariant => { if let Some(tag) = state.current_variant.take() { - scope.variants.entry(tag).or_default(); + let current_scope = scope_stack + .last_mut() + .map(|e| &mut e.scope) + .expect("scope stack should not be empty"); + current_scope.variants.entry(tag).or_default(); } } } @@ -427,20 +591,58 @@ impl<'src, 'g> InferenceContext<'src, 'g> { if live_successors.is_empty() { // Terminal node } else if live_successors.len() == 1 { - let child_scope = self.traverse(live_successors[0], state, visited, depth + 1, errors); - let merge_errors = scope.merge_from(child_scope); - errors.extend(merge_errors); + self.traverse( + live_successors[0], + state, + visited, + depth + 1, + errors, + scope_stack, + ); } else { + // Branching: collect results from all branches, then merge let total_branches = live_successors.len(); - for succ in live_successors { - let child_scope = self.traverse(succ, state, visited, depth + 1, errors); - let merge_errors = scope.merge_from(child_scope); - errors.extend(merge_errors); + let initial_scope_len = scope_stack.len(); + let mut branch_scopes: Vec> = Vec::new(); + + // Traverse each branch independently + for succ in &live_successors { + let mut branch_stack = scope_stack.clone(); + + self.traverse( + *succ, + state.clone(), + &mut visited.clone(), + depth + 1, + errors, + &mut branch_stack, + ); + + // Extract scope from this branch (pop any nested scopes first) + while branch_stack.len() > initial_scope_len { + branch_stack.pop(); + } + if let Some(entry) = branch_stack.last() { + branch_scopes.push(entry.scope.clone()); + } + } + + // Merge all branch scopes into main scope + if let Some(main_entry) = scope_stack.last_mut() { + for branch_scope in branch_scopes { + let merge_errs = main_entry.scope.merge_from(branch_scope); + errors.extend(merge_errs); + } + // Apply optionality for fields not present in all branches + main_entry.scope.apply_optionality(total_branches); } - scope.apply_optionality(total_branches); } + } - scope + fn generate_scope_name(&self) -> &'src str { + // Generate synthetic name - leak for simplicity + let name = format!("{}Scope{}", self.current_def_name, self.next_type_id); + Box::leak(name.into_boxed_str()) } fn create_struct_type(&mut self, name: &'src str, scope: &ScopeInfo<'src>) -> TypeId { diff --git a/crates/plotnik-lib/src/query/typing_tests.rs b/crates/plotnik-lib/src/query/typing_tests.rs new file mode 100644 index 00000000..b4917f33 --- /dev/null +++ b/crates/plotnik-lib/src/query/typing_tests.rs @@ -0,0 +1,517 @@ +//! Tests for type inference. + +use indoc::indoc; + +use crate::query::Query; + +fn infer(source: &str) -> String { + let query = Query::try_from(source) + .expect("parse should succeed") + .build_graph(); + query.type_info().dump() +} + +fn infer_with_graph(source: &str) -> String { + let query = Query::try_from(source) + .expect("parse should succeed") + .build_graph(); + let mut out = String::new(); + out.push_str("=== Graph ===\n"); + out.push_str(&query.graph().dump_live(query.dead_nodes())); + out.push_str("\n"); + out.push_str(&query.type_info().dump()); + out +} + +#[test] +fn debug_graph_structure() { + let result = infer_with_graph("Foo = (identifier) @name"); + insta::assert_snapshot!(result, @r" + === Graph === + Foo = N0 + + N0: (identifier) [Capture] → N1 + N1: ε [Field(name)] → ∅ + + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + name: Node + } + "); +} + +#[test] +fn debug_incompatible_types_graph() { + let input = indoc! {r#" + Foo = [ (a) @v (b) @v ::string ] + "#}; + + let query = Query::new(input) + .exec() + .expect("parse should succeed") + .build_graph(); + + let mut out = String::new(); + out.push_str("=== Graph (after optimization) ===\n"); + out.push_str(&query.graph().dump_live(query.dead_nodes())); + out.push_str("\n=== Dead nodes count: "); + out.push_str(&query.dead_nodes().len().to_string()); + out.push_str(" ===\n\n"); + out.push_str(&query.type_info().dump()); + insta::assert_snapshot!(out, @r" + === Graph (after optimization) === + Foo = N0 + + N0: ε → N2, N4 + N1: ε [Field(v)] [Field(v)] → ∅ + N2: (a) [Capture] → N1 + N4: (b) [Capture] [ToString] → N1 + + === Dead nodes count: 2 === + + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + v: Node + } + + === Errors === + field `v` in `Foo`: incompatible types [Node, String] + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Basic captures +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn single_node_capture() { + let result = infer("Foo = (identifier) @name"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + name: Node + } + "); +} + +#[test] +fn string_capture() { + let result = infer("Foo = (identifier) @name ::string"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + name: String + } + "); +} + +#[test] +fn multiple_captures_flat() { + let result = infer("Foo = (a (b) @x (c) @y)"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + x: Node + y: Node + } + "); +} + +#[test] +fn no_captures_void() { + let result = infer("Foo = (identifier)"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → Void + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Captured sequences (composite types) +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn captured_sequence_creates_struct() { + let input = indoc! {r#" + Foo = { (a) @x (b) @y } @z + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: Record FooScope3 { + x: Node + y: Node + } + T4: Record Foo { + z: T3 + } + "); +} + +#[test] +fn nested_captured_sequence() { + let input = indoc! {r#" + Foo = { (outer) @a { (inner) @b } @nested } @root + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T5 + + === Types === + T3: Record FooScope3 { + b: Node + } + T4: Record FooScope4 { + a: Node + nested: T3 + } + T5: Record Foo { + root: T4 + } + "); +} + +#[test] +fn sequence_without_capture_propagates() { + let input = indoc! {r#" + Foo = { (a) @x (b) @y } + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → Void + + === Types === + T3: Record FooScope3 { + x: Node + y: Node + } + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Alternations +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn untagged_alternation_symmetric() { + let input = indoc! {r#" + Foo = [ (a) @v (b) @v ] + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + v: Node + } + "); +} + +#[test] +fn untagged_alternation_asymmetric() { + let input = indoc! {r#" + Foo = [ (a) @x (b) @y ] + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T5 + + === Types === + T3: Optional → Node + T4: Optional → Node + T5: Record Foo { + x: T3 + y: T4 + } + "); +} + +#[test] +fn tagged_alternation_uncaptured_propagates() { + let input = indoc! {r#" + Foo = [ A: (a) @x B: (b) @y ] + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T5 + + === Types === + T3: Record FooA { + x: Node + } + T4: Record FooB { + y: Node + } + T5: Enum Foo { + A: T3 + B: T4 + } + "); +} + +#[test] +fn tagged_alternation_captured_creates_enum() { + let input = indoc! {r#" + Foo = [ A: (a) @x B: (b) @y ] @choice + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T5 + + === Types === + T3: Record FooA { + x: Node + } + T4: Record FooB { + y: Node + } + T5: Enum Foo { + A: T3 + B: T4 + } + "); +} + +#[test] +fn captured_untagged_alternation_creates_struct() { + let input = indoc! {r#" + Foo = [ (a) @x (b) @y ] @val + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T5 + + === Types === + T3: Optional → Node + T4: Optional → Node + T5: Record Foo { + x: T3 + y: T4 + } + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Quantifiers +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn star_quantifier() { + let result = infer("Foo = ((item) @items)*"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: ArrayStar → Node + T4: Record Foo { + items: T3 + } + "); +} + +#[test] +fn plus_quantifier() { + let result = infer("Foo = ((item) @items)+"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: ArrayStar → Node + T4: Record Foo { + items: T3 + } + "); +} + +#[test] +fn optional_quantifier() { + let result = infer("Foo = ((item) @maybe)?"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: Optional → Node + T4: Record Foo { + maybe: T3 + } + "); +} + +#[test] +fn quantifier_on_sequence() { + let input = indoc! {r#" + Foo = { (a) @x (b) @y }* + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → Void + + === Types === + T3: ArrayStar → Node + T4: ArrayStar → Node + T5: Record FooScope3 { + x: T3 + y: T4 + } + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Type compatibility +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn compatible_types_in_alternation() { + let input = indoc! {r#" + Foo = [ (a) @v (b) @v ] + "#}; + + let query = Query::try_from(input).expect("parse").build_graph(); + assert!(query.type_info().errors.is_empty()); +} + +#[test] +fn incompatible_types_in_alternation() { + let input = indoc! {r#" + Foo = [ (a) @v (b) @v ::string ] + "#}; + + let result = infer_with_graph(input); + insta::assert_snapshot!(result, @r" + === Graph === + Foo = N0 + + N0: ε → N2, N4 + N1: ε [Field(v)] [Field(v)] → ∅ + N2: (a) [Capture] → N1 + N4: (b) [Capture] [ToString] → N1 + + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + v: Node + } + + === Errors === + field `v` in `Foo`: incompatible types [Node, String] + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Multiple definitions +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn multiple_definitions() { + let input = indoc! {r#" + Func = (function_declaration name: (identifier) @name) + Class = (class_declaration name: (identifier) @name body: (class_body) @body) + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Func → T3 + Class → T4 + + === Types === + T3: Record Func { + name: Node + } + T4: Record Class { + name: Node + body: Node + } + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Edge cases +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn deeply_nested_node() { + let input = indoc! {r#" + Foo = (a (b (c (d) @val))) + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + val: Node + } + "); +} + +#[test] +fn wildcard_capture() { + let result = infer("Foo = _ @any"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + any: Node + } + "); +} + +#[test] +fn string_literal_capture() { + let result = infer(r#"Foo = "+" @op"#); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + op: Node + } + "); +} From 841cd0d2a4a129d48447dbcbb59c66893faaac56 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 15:53:02 -0300 Subject: [PATCH 09/23] QIS --- .../src/query/{build_graph.rs => graph.rs} | 177 +++++++++ .../query/{construct.rs => graph_build.rs} | 70 ++-- ...onstruct_tests.rs => graph_build_tests.rs} | 24 +- crates/plotnik-lib/src/query/graph_dump.rs | 2 +- .../query/{optimize.rs => graph_optimize.rs} | 19 +- crates/plotnik-lib/src/query/graph_qis.rs | 107 ++++++ .../plotnik-lib/src/query/graph_qis_tests.rs | 230 ++++++++++++ crates/plotnik-lib/src/query/mod.rs | 56 ++- crates/plotnik-lib/src/query/typing.rs | 337 ++++++++++++++++-- crates/plotnik-lib/src/query/typing_tests.rs | 124 ++++++- 10 files changed, 1036 insertions(+), 110 deletions(-) rename crates/plotnik-lib/src/query/{build_graph.rs => graph.rs} (68%) rename crates/plotnik-lib/src/query/{construct.rs => graph_build.rs} (89%) rename crates/plotnik-lib/src/query/{construct_tests.rs => graph_build_tests.rs} (93%) rename crates/plotnik-lib/src/query/{optimize.rs => graph_optimize.rs} (88%) create mode 100644 crates/plotnik-lib/src/query/graph_qis.rs create mode 100644 crates/plotnik-lib/src/query/graph_qis_tests.rs diff --git a/crates/plotnik-lib/src/query/build_graph.rs b/crates/plotnik-lib/src/query/graph.rs similarity index 68% rename from crates/plotnik-lib/src/query/build_graph.rs rename to crates/plotnik-lib/src/query/graph.rs index a53716f0..b2c8baaa 100644 --- a/crates/plotnik-lib/src/query/build_graph.rs +++ b/crates/plotnik-lib/src/query/graph.rs @@ -312,6 +312,183 @@ impl<'src> BuildGraph<'src> { Fragment::new(start, end) } + + // ───────────────────────────────────────────────────────────────────── + // QIS-Aware Array Combinators (wrap each iteration with object scope) + // ───────────────────────────────────────────────────────────────────── + + /// Zero or more with QIS object wrapping (greedy): inner* + /// + /// Each iteration is wrapped in StartObject/EndObject to keep + /// multiple captures coupled per-iteration. + pub fn zero_or_more_array_qis(&mut self, inner: Fragment) -> Fragment { + let start = self.add_epsilon(); + self.node_mut(start).add_effect(BuildEffect::StartArray); + + let branch = self.add_epsilon(); + + let obj_start = self.add_epsilon(); + self.node_mut(obj_start) + .add_effect(BuildEffect::StartObject); + + let obj_end = self.add_epsilon(); + self.node_mut(obj_end).add_effect(BuildEffect::EndObject); + + let push = self.add_epsilon(); + self.node_mut(push).add_effect(BuildEffect::PushElement); + + let end = self.add_epsilon(); + self.node_mut(end).add_effect(BuildEffect::EndArray); + + self.connect(start, branch); + self.connect(branch, obj_start); + self.connect(branch, end); + self.connect(obj_start, inner.entry); + self.connect(inner.exit, obj_end); + self.connect(obj_end, push); + self.connect(push, branch); + + Fragment::new(start, end) + } + + /// Zero or more with QIS object wrapping (non-greedy): inner*? + pub fn zero_or_more_array_qis_lazy(&mut self, inner: Fragment) -> Fragment { + let start = self.add_epsilon(); + self.node_mut(start).add_effect(BuildEffect::StartArray); + + let branch = self.add_epsilon(); + + let obj_start = self.add_epsilon(); + self.node_mut(obj_start) + .add_effect(BuildEffect::StartObject); + + let obj_end = self.add_epsilon(); + self.node_mut(obj_end).add_effect(BuildEffect::EndObject); + + let push = self.add_epsilon(); + self.node_mut(push).add_effect(BuildEffect::PushElement); + + let end = self.add_epsilon(); + self.node_mut(end).add_effect(BuildEffect::EndArray); + + self.connect(start, branch); + self.connect(branch, end); + self.connect(branch, obj_start); + self.connect(obj_start, inner.entry); + self.connect(inner.exit, obj_end); + self.connect(obj_end, push); + self.connect(push, branch); + + Fragment::new(start, end) + } + + /// One or more with QIS object wrapping (greedy): inner+ + pub fn one_or_more_array_qis(&mut self, inner: Fragment) -> Fragment { + let start = self.add_epsilon(); + self.node_mut(start).add_effect(BuildEffect::StartArray); + + let obj_start = self.add_epsilon(); + self.node_mut(obj_start) + .add_effect(BuildEffect::StartObject); + + let obj_end = self.add_epsilon(); + self.node_mut(obj_end).add_effect(BuildEffect::EndObject); + + let push = self.add_epsilon(); + self.node_mut(push).add_effect(BuildEffect::PushElement); + + let branch = self.add_epsilon(); + + let end = self.add_epsilon(); + self.node_mut(end).add_effect(BuildEffect::EndArray); + + self.connect(start, obj_start); + self.connect(obj_start, inner.entry); + self.connect(inner.exit, obj_end); + self.connect(obj_end, push); + self.connect(push, branch); + self.connect(branch, obj_start); + self.connect(branch, end); + + Fragment::new(start, end) + } + + /// One or more with QIS object wrapping (non-greedy): inner+? + pub fn one_or_more_array_qis_lazy(&mut self, inner: Fragment) -> Fragment { + let start = self.add_epsilon(); + self.node_mut(start).add_effect(BuildEffect::StartArray); + + let obj_start = self.add_epsilon(); + self.node_mut(obj_start) + .add_effect(BuildEffect::StartObject); + + let obj_end = self.add_epsilon(); + self.node_mut(obj_end).add_effect(BuildEffect::EndObject); + + let push = self.add_epsilon(); + self.node_mut(push).add_effect(BuildEffect::PushElement); + + let branch = self.add_epsilon(); + + let end = self.add_epsilon(); + self.node_mut(end).add_effect(BuildEffect::EndArray); + + self.connect(start, obj_start); + self.connect(obj_start, inner.entry); + self.connect(inner.exit, obj_end); + self.connect(obj_end, push); + self.connect(push, branch); + self.connect(branch, end); + self.connect(branch, obj_start); + + Fragment::new(start, end) + } + + /// Optional with QIS object wrapping: inner? + /// + /// Wraps the optional value in an object scope. + pub fn optional_qis(&mut self, inner: Fragment) -> Fragment { + let branch = self.add_epsilon(); + + let obj_start = self.add_epsilon(); + self.node_mut(obj_start) + .add_effect(BuildEffect::StartObject); + + let obj_end = self.add_epsilon(); + self.node_mut(obj_end).add_effect(BuildEffect::EndObject); + + let exit = self.add_epsilon(); + + self.connect(branch, obj_start); + self.connect(branch, exit); + self.connect(obj_start, inner.entry); + self.connect(inner.exit, obj_end); + self.connect(obj_end, exit); + + Fragment::new(branch, exit) + } + + /// Optional with QIS object wrapping (non-greedy): inner?? + pub fn optional_qis_lazy(&mut self, inner: Fragment) -> Fragment { + let branch = self.add_epsilon(); + + let obj_start = self.add_epsilon(); + self.node_mut(obj_start) + .add_effect(BuildEffect::StartObject); + + let obj_end = self.add_epsilon(); + self.node_mut(obj_end).add_effect(BuildEffect::EndObject); + + let exit = self.add_epsilon(); + + self.connect(branch, exit); + self.connect(branch, obj_start); + self.connect(obj_start, inner.entry); + self.connect(inner.exit, obj_end); + self.connect(obj_end, exit); + + Fragment::new(branch, exit) + } } impl Default for BuildGraph<'_> { diff --git a/crates/plotnik-lib/src/query/construct.rs b/crates/plotnik-lib/src/query/graph_build.rs similarity index 89% rename from crates/plotnik-lib/src/query/construct.rs rename to crates/plotnik-lib/src/query/graph_build.rs index 703f9a7c..5207fe3c 100644 --- a/crates/plotnik-lib/src/query/construct.rs +++ b/crates/plotnik-lib/src/query/graph_build.rs @@ -1,7 +1,7 @@ //! Graph construction integrated with Query pipeline. //! //! Constructs a `BuildGraph` from the parsed AST, reusing the `symbol_table` -//! populated by earlier passes. +//! and `qis_triggers` populated by earlier passes. use std::collections::HashSet; @@ -12,7 +12,7 @@ use crate::parser::{ }; use super::Query; -use super::build_graph::{BuildEffect, BuildGraph, BuildMatcher, Fragment, NodeId, RefMarker}; +use super::graph::{BuildEffect, BuildMatcher, Fragment, NodeId, RefMarker}; /// Context for navigation determination. #[derive(Debug, Clone, Copy)] @@ -68,46 +68,32 @@ impl ExitContext { impl<'a> Query<'a> { /// Build the graph from the already-populated symbol_table. /// - /// This method reuses the symbol_table from name resolution, - /// avoiding duplicate iteration over definitions. + /// This method reuses the symbol_table from name resolution and + /// qis_triggers from QIS detection. pub(super) fn construct_graph(&mut self) { - let mut constructor = GraphConstructor::new(self.source); + self.next_ref_id = 0; - // Reuse symbol_table: iterate name -> body pairs - for (name, body) in &self.symbol_table { - let fragment = constructor.construct_expr(body, NavContext::Root); - constructor.graph.add_definition(name, fragment.entry); + let entries: Vec<_> = self + .symbol_table + .iter() + .map(|(name, body)| (*name, body.clone())) + .collect(); + for (name, body) in entries { + let fragment = self.construct_expr(&body, NavContext::Root); + self.graph.add_definition(name, fragment.entry); } - constructor.link_references(); - self.graph = constructor.graph; - } -} - -/// Internal constructor that builds the graph. -struct GraphConstructor<'src> { - source: &'src str, - graph: BuildGraph<'src>, - next_ref_id: u32, -} - -impl<'src> GraphConstructor<'src> { - fn new(source: &'src str) -> Self { - Self { - source, - graph: BuildGraph::new(), - next_ref_id: 0, - } + self.link_references(); } /// Link Enter nodes to their definition entry points. fn link_references(&mut self) { - let mut links: Vec<(NodeId, &'src str, Vec)> = Vec::new(); + let mut links: Vec<(NodeId, &'a str, Vec)> = Vec::new(); for (id, node) in self.graph.iter() { - if let RefMarker::Enter { ref_id } = &node.ref_marker { + if let RefMarker::Enter { .. } = &node.ref_marker { if let Some(name) = node.ref_name { - let exit_successors = self.find_exit_successors(*ref_id); + let exit_successors = self.find_exit_successors_for_enter(id); links.push((id, name, exit_successors)); } } @@ -123,7 +109,12 @@ impl<'src> GraphConstructor<'src> { } } - fn find_exit_successors(&self, ref_id: u32) -> Vec { + fn find_exit_successors_for_enter(&self, enter_id: NodeId) -> Vec { + let enter_node = self.graph.node(enter_id); + let RefMarker::Enter { ref_id } = enter_node.ref_marker else { + return Vec::new(); + }; + for (_, node) in self.graph.iter() { if let RefMarker::Exit { ref_id: exit_id } = &node.ref_marker { if *exit_id == ref_id { @@ -222,13 +213,13 @@ impl<'src> GraphConstructor<'src> { (fragments, exit_ctx) } - fn build_named_matcher(&mut self, node: &NamedNode) -> BuildMatcher<'src> { + fn build_named_matcher(&self, node: &NamedNode) -> BuildMatcher<'a> { let kind = node .node_type() .map(|t| token_src(&t, self.source)) .unwrap_or("_"); - let negated_fields: Vec<&'src str> = node + let negated_fields: Vec<&'a str> = node .as_cst() .children() .filter_map(NegatedField::cast) @@ -450,13 +441,22 @@ impl<'src> GraphConstructor<'src> { }; let inner_frag = self.construct_expr(&inner_expr, ctx); + let is_qis = self.qis_triggers.contains_key(quant); match op.kind() { + SyntaxKind::Star if is_qis => self.graph.zero_or_more_array_qis(inner_frag), SyntaxKind::Star => self.graph.zero_or_more_array(inner_frag), + SyntaxKind::StarQuestion if is_qis => { + self.graph.zero_or_more_array_qis_lazy(inner_frag) + } SyntaxKind::StarQuestion => self.graph.zero_or_more_array_lazy(inner_frag), + SyntaxKind::Plus if is_qis => self.graph.one_or_more_array_qis(inner_frag), SyntaxKind::Plus => self.graph.one_or_more_array(inner_frag), + SyntaxKind::PlusQuestion if is_qis => self.graph.one_or_more_array_qis_lazy(inner_frag), SyntaxKind::PlusQuestion => self.graph.one_or_more_array_lazy(inner_frag), + SyntaxKind::Question if is_qis => self.graph.optional_qis(inner_frag), SyntaxKind::Question => self.graph.optional(inner_frag), + SyntaxKind::QuestionQuestion if is_qis => self.graph.optional_qis_lazy(inner_frag), SyntaxKind::QuestionQuestion => self.graph.optional_lazy(inner_frag), _ => inner_frag, } @@ -469,7 +469,7 @@ impl<'src> GraphConstructor<'src> { self.construct_expr(&value_expr, ctx) } - fn find_field_constraint(&self, node: &crate::parser::SyntaxNode) -> Option<&'src str> { + fn find_field_constraint(&self, node: &crate::parser::SyntaxNode) -> Option<&'a str> { let parent = node.parent()?; let field_expr = FieldExpr::cast(parent)?; let name_token = field_expr.name()?; diff --git a/crates/plotnik-lib/src/query/construct_tests.rs b/crates/plotnik-lib/src/query/graph_build_tests.rs similarity index 93% rename from crates/plotnik-lib/src/query/construct_tests.rs rename to crates/plotnik-lib/src/query/graph_build_tests.rs index 3877409c..8002c819 100644 --- a/crates/plotnik-lib/src/query/construct_tests.rs +++ b/crates/plotnik-lib/src/query/graph_build_tests.rs @@ -88,26 +88,26 @@ fn alternation_tagged() { Q = N0 N0: ε → N3, N7 - N1: ε [Field(x)] [EndVariant] [Field(y)] [EndVariant] → ∅ + N1: ε → ∅ N2: ε [Variant(A)] → N3 - N3: (a) [Variant(A)] [Capture] → N1 - N4: ε [Field(x)] → N1 - N5: ε [EndVariant] → N1 + N3: (a) [Variant(A)] [Capture] → N5 + N4: ε [Field(x)] → N5 + N5: ε [Field(x)] [EndVariant] → N1 N6: ε [Variant(B)] → N7 - N7: (b) [Variant(B)] [Capture] → N1 - N8: ε [Field(y)] → N1 - N9: ε [EndVariant] → N1 + N7: (b) [Variant(B)] [Capture] → N9 + N8: ε [Field(y)] → N9 + N9: ε [Field(y)] [EndVariant] → N1 "); } #[test] fn quantifier_star() { insta::assert_snapshot!(snapshot("Q = (identifier)*"), @r" - Q = N2 + Q = N1 - N0: (identifier) → N2 + N0: (identifier) → N3 N1: ε [StartArray] → N2 - N2: ε [StartArray] [Push] → N0, N4 + N2: ε → N0, N4 N3: ε [Push] → N2 N4: ε [EndArray] → ∅ "); @@ -116,9 +116,9 @@ fn quantifier_star() { #[test] fn quantifier_plus() { insta::assert_snapshot!(snapshot("Q = (identifier)+"), @r" - Q = N0 + Q = N1 - N0: (identifier) [StartArray] → N3 + N0: (identifier) → N3 N1: ε [StartArray] → N0 N2: ε [Push] → N3 N3: ε [Push] → N0, N4 diff --git a/crates/plotnik-lib/src/query/graph_dump.rs b/crates/plotnik-lib/src/query/graph_dump.rs index 3d668204..ed2868c6 100644 --- a/crates/plotnik-lib/src/query/graph_dump.rs +++ b/crates/plotnik-lib/src/query/graph_dump.rs @@ -5,7 +5,7 @@ use std::fmt::Write; use crate::ir::{Nav, NavKind}; -use super::build_graph::{BuildEffect, BuildGraph, BuildMatcher, NodeId, RefMarker}; +use super::graph::{BuildEffect, BuildGraph, BuildMatcher, NodeId, RefMarker}; /// Printer for `BuildGraph` with configurable output options. pub struct GraphPrinter<'a, 'src> { diff --git a/crates/plotnik-lib/src/query/optimize.rs b/crates/plotnik-lib/src/query/graph_optimize.rs similarity index 88% rename from crates/plotnik-lib/src/query/optimize.rs rename to crates/plotnik-lib/src/query/graph_optimize.rs index 5c7ffbb2..afda8bfe 100644 --- a/crates/plotnik-lib/src/query/optimize.rs +++ b/crates/plotnik-lib/src/query/graph_optimize.rs @@ -15,7 +15,7 @@ use std::collections::{HashMap, HashSet}; use crate::ir::{Nav, NavKind}; use super::Query; -use super::build_graph::{BuildGraph, BuildMatcher, NodeId}; +use super::graph::{BuildGraph, BuildMatcher, NodeId}; /// Statistics from epsilon elimination. #[derive(Debug, Default)] @@ -51,7 +51,7 @@ pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeS } let node = graph.node(id); - if !is_eliminable_epsilon(node, graph) { + if !is_eliminable_epsilon(node, graph, &predecessors) { if node.is_epsilon() { stats.epsilons_kept += 1; } @@ -111,7 +111,11 @@ pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeS (dead_nodes, stats) } -fn is_eliminable_epsilon(node: &super::build_graph::BuildNode, graph: &BuildGraph) -> bool { +fn is_eliminable_epsilon( + node: &super::graph::BuildNode, + graph: &BuildGraph, + predecessors: &HashMap>, +) -> bool { if !matches!(node.matcher, BuildMatcher::Epsilon) { return false; } @@ -137,6 +141,15 @@ fn is_eliminable_epsilon(node: &super::build_graph::BuildNode, graph: &BuildGrap return false; } + // Don't eliminate if node has effects and successor is a join point. + // Merging effects onto a join point changes execution count (e.g., loop entry vs per-iteration). + if !node.effects.is_empty() { + let succ_pred_count = predecessors.get(&successor_id).map_or(0, |p| p.len()); + if succ_pred_count > 1 { + return false; + } + } + true } diff --git a/crates/plotnik-lib/src/query/graph_qis.rs b/crates/plotnik-lib/src/query/graph_qis.rs new file mode 100644 index 00000000..334d42c4 --- /dev/null +++ b/crates/plotnik-lib/src/query/graph_qis.rs @@ -0,0 +1,107 @@ +//! Quantifier-Induced Scope (QIS) detection. +//! +//! QIS triggers when a quantified expression has ≥2 propagating captures. +//! This creates an implicit object scope so captures stay coupled per-iteration. +//! +//! See ADR-0009 for full specification. + +use crate::parser::{ast, token_src}; + +use super::{QisTrigger, Query}; + +impl<'a> Query<'a> { + /// Detect Quantifier-Induced Scope triggers. + /// + /// QIS triggers when a quantified expression has ≥2 propagating captures + /// (captures not absorbed by inner scopes like `{...} @x` or `[A: ...] @x`). + pub(super) fn detect_qis(&mut self) { + let bodies: Vec<_> = self.symbol_table.values().cloned().collect(); + for body in &bodies { + self.detect_qis_in_expr(body); + } + } + + fn detect_qis_in_expr(&mut self, expr: &ast::Expr) { + match expr { + ast::Expr::QuantifiedExpr(q) => { + if let Some(inner) = q.inner() { + let captures = self.collect_propagating_captures(&inner); + if captures.len() >= 2 { + self.qis_triggers.insert(q.clone(), QisTrigger { captures }); + } + self.detect_qis_in_expr(&inner); + } + } + ast::Expr::CapturedExpr(c) => { + // Captures on sequences/alternations absorb inner captures, + // but we still recurse to find nested quantifiers + if let Some(inner) = c.inner() { + self.detect_qis_in_expr(&inner); + } + } + _ => { + for child in expr.children() { + self.detect_qis_in_expr(&child); + } + } + } + } + + /// Collect captures that propagate out of an expression (not absorbed by inner scopes). + fn collect_propagating_captures(&self, expr: &ast::Expr) -> Vec<&'a str> { + let mut captures = Vec::new(); + self.collect_propagating_captures_impl(expr, &mut captures); + captures + } + + fn collect_propagating_captures_impl(&self, expr: &ast::Expr, out: &mut Vec<&'a str>) { + match expr { + ast::Expr::CapturedExpr(c) => { + if let Some(name_token) = c.name() { + let name = token_src(&name_token, self.source); + out.push(name); + } + // Captured sequence/alternation absorbs inner captures. + // Need to look through quantifiers to find the actual container. + if let Some(inner) = c.inner() { + if !Self::is_scope_container(&inner) { + self.collect_propagating_captures_impl(&inner, out); + } + } + } + ast::Expr::QuantifiedExpr(q) => { + // Nested quantifier: its captures propagate (with modified cardinality) + if let Some(inner) = q.inner() { + self.collect_propagating_captures_impl(&inner, out); + } + } + _ => { + for child in expr.children() { + self.collect_propagating_captures_impl(&child, out); + } + } + } + } + + /// Check if an expression is a scope container (seq/alt), looking through quantifiers. + fn is_scope_container(expr: &ast::Expr) -> bool { + match expr { + ast::Expr::SeqExpr(_) | ast::Expr::AltExpr(_) => true, + ast::Expr::QuantifiedExpr(q) => q + .inner() + .map(|i| Self::is_scope_container(&i)) + .unwrap_or(false), + _ => false, + } + } + + /// Check if a quantified expression triggers QIS. + pub fn is_qis_trigger(&self, q: &ast::QuantifiedExpr) -> bool { + self.qis_triggers.contains_key(q) + } + + /// Get QIS trigger info for a quantified expression. + pub fn qis_trigger(&self, q: &ast::QuantifiedExpr) -> Option<&QisTrigger<'a>> { + self.qis_triggers.get(q) + } +} diff --git a/crates/plotnik-lib/src/query/graph_qis_tests.rs b/crates/plotnik-lib/src/query/graph_qis_tests.rs new file mode 100644 index 00000000..7004a738 --- /dev/null +++ b/crates/plotnik-lib/src/query/graph_qis_tests.rs @@ -0,0 +1,230 @@ +use indoc::indoc; + +use crate::Query; + +fn check_qis(source: &str) -> String { + let query = Query::try_from(source).unwrap().build_graph(); + let mut result = Vec::new(); + + for def in query.root().defs() { + let def_name = def.name().map(|t| t.text().to_string()).unwrap_or_default(); + let mut triggers: Vec<_> = query + .qis_triggers + .iter() + .filter_map(|(q, trigger)| { + // Check if this quantifier belongs to this definition + let q_range = q.text_range(); + let def_range = def.text_range(); + if q_range.start() >= def_range.start() && q_range.end() <= def_range.end() { + Some(( + q_range.start(), + format!(" QIS: [{}]", trigger.captures.join(", ")), + )) + } else { + None + } + }) + .collect(); + triggers.sort_by_key(|(pos, _)| *pos); + let triggers: Vec<_> = triggers.into_iter().map(|(_, s)| s).collect(); + + if triggers.is_empty() { + result.push(format!("{}: no QIS", def_name)); + } else { + result.push(format!("{}:", def_name)); + result.extend(triggers); + } + } + + result.join("\n") +} + +#[test] +fn single_capture_no_qis() { + let source = "Foo = { (a) @x }*"; + + insta::assert_snapshot!(check_qis(source), @"Foo: no QIS"); +} + +#[test] +fn two_captures_triggers_qis() { + let source = "Foo = { (a) @x (b) @y }*"; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [x, y] + "); +} + +#[test] +fn three_captures_triggers_qis() { + let source = "Foo = { (a) @x (b) @y (c) @z }*"; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [x, y, z] + "); +} + +#[test] +fn captured_sequence_absorbs_inner() { + let source = "Foo = { { (a) @x (b) @y } @inner }*"; + + insta::assert_snapshot!(check_qis(source), @"Foo: no QIS"); +} + +#[test] +fn captured_alternation_absorbs_inner() { + let source = "Foo = { [ (a) @x (b) @y ] @choice }*"; + + insta::assert_snapshot!(check_qis(source), @"Foo: no QIS"); +} + +#[test] +fn uncaptured_alternation_propagates() { + let source = "Foo = { [ (a) @x (b) @y ] }*"; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [x, y] + "); +} + +#[test] +fn node_with_two_captures() { + let source = indoc! {r#" + Foo = (function + name: (identifier) @name + body: (block) @body + )* + "#}; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [name, body] + "); +} + +#[test] +fn plus_quantifier_triggers_qis() { + let source = "Foo = { (a) @x (b) @y }+"; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [x, y] + "); +} + +#[test] +fn optional_quantifier_triggers_qis() { + let source = "Foo = { (a) @x (b) @y }?"; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [x, y] + "); +} + +#[test] +fn nested_quantifier_inner_qis() { + let source = "Foo = { { (a) @x (b) @y }* }+"; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [x, y] + QIS: [x, y] + "); +} + +#[test] +fn nested_quantifier_both_qis() { + // Outer quantifier has @c and @inner (2 captures) -> QIS + // Inner quantifier has @x and @y (2 captures) -> QIS + let source = "Outer = { (c) @c { (a) @x (b) @y }* @inner }+"; + + insta::assert_snapshot!(check_qis(source), @r" + Outer: + QIS: [c, inner] + QIS: [x, y] + "); +} + +#[test] +fn multiple_definitions() { + let source = indoc! {r#" + Single = { (a) @x }* + Multi = { (a) @x (b) @y }* + "#}; + + insta::assert_snapshot!(check_qis(source), @r" + Single: no QIS + Multi: + QIS: [x, y] + "); +} + +#[test] +fn no_quantifier_no_qis() { + let source = "Foo = { (a) @x (b) @y }"; + + insta::assert_snapshot!(check_qis(source), @"Foo: no QIS"); +} + +#[test] +fn lazy_quantifier_triggers_qis() { + let source = "Foo = { (a) @x (b) @y }*?"; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [x, y] + "); +} + +#[test] +fn qis_graph_has_object_effects() { + // Verify that QIS-triggered quantifiers emit StartObject/EndObject + let source = "Foo = { (a) @x (b) @y }*"; + let (_query, pre_opt) = Query::try_from(source) + .unwrap() + .build_graph_with_pre_opt_dump(); + + // QIS adds StartObj/EndObj around each iteration (in addition to sequence's pair) + // So we expect 2 of each: one from sequence, one from QIS loop + let start_count = pre_opt.matches("StartObj").count(); + let end_count = pre_opt.matches("EndObj").count(); + + assert_eq!( + start_count, 2, + "QIS graph should have 2 StartObj (sequence + QIS):\n{}", + pre_opt + ); + assert_eq!( + end_count, 2, + "QIS graph should have 2 EndObj (sequence + QIS):\n{}", + pre_opt + ); +} + +#[test] +fn non_qis_graph_no_object_effects() { + // Single capture should NOT trigger QIS object wrapping + let source = "Foo = { (a) @x }*"; + let (_query, pre_opt) = Query::try_from(source) + .unwrap() + .build_graph_with_pre_opt_dump(); + + // Count in pre-optimization graph to avoid optimizer noise + // The inner sequence { (a) @x } adds StartObj/EndObj once + // QIS would add another pair per iteration in the loop structure + let start_count = pre_opt.matches("StartObj").count(); + let end_count = pre_opt.matches("EndObj").count(); + + assert_eq!( + start_count, 1, + "Non-QIS graph should have only sequence's StartObj" + ); + assert_eq!( + end_count, 1, + "Non-QIS graph should have only sequence's EndObj" + ); +} diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 19f8d4fd..3a09c3d8 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -1,33 +1,32 @@ //! Query processing pipeline. //! -//! Stages: parse → alt_kinds → symbol_table → recursion → shapes → [build_graph]. +//! Stages: parse → alt_kinds → symbol_table → recursion → shapes → [qis → build_graph]. //! Each stage populates its own diagnostics. Use `is_valid()` to check //! if any stage produced errors. //! //! The `build_graph` stage is optional and constructs the transition graph -//! for compilation to binary IR. +//! for compilation to binary IR. QIS detection runs as part of this stage. mod dump; +mod graph_qis; mod invariants; mod printer; pub use printer::QueryPrinter; pub mod alt_kinds; -pub mod build_graph; -mod construct; +pub mod graph; +mod graph_build; mod graph_dump; +mod graph_optimize; #[cfg(feature = "plotnik-langs")] pub mod link; -mod optimize; pub mod recursion; pub mod shapes; pub mod symbol_table; pub mod typing; -pub use build_graph::{ - BuildEffect, BuildGraph, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker, -}; -pub use optimize::OptimizeStats; +pub use graph::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; +pub use graph_optimize::OptimizeStats; pub use typing::{ InferredMember, InferredTypeDef, TypeDescription, TypeInferenceResult, UnificationError, }; @@ -35,7 +34,9 @@ pub use typing::{ #[cfg(test)] mod alt_kinds_tests; #[cfg(test)] -mod construct_tests; +mod graph_build_tests; +#[cfg(test)] +mod graph_qis_tests; #[cfg(all(test, feature = "plotnik-langs"))] mod link_tests; #[cfg(test)] @@ -79,6 +80,16 @@ use symbol_table::SymbolTable; /// /// Check [`is_valid`](Self::is_valid) or [`diagnostics`](Self::diagnostics) /// to determine if the query has syntax/semantic issues. +/// Quantifier-Induced Scope trigger info. +/// +/// When a quantified expression has ≥2 propagating captures, QIS creates +/// an implicit object scope so captures stay coupled per-iteration. +#[derive(Debug, Clone)] +pub struct QisTrigger<'a> { + /// Capture names that propagate from this quantified expression. + pub captures: Vec<&'a str>, +} + #[derive(Debug)] pub struct Query<'a> { source: &'a str, @@ -103,6 +114,10 @@ pub struct Query<'a> { graph: BuildGraph<'a>, dead_nodes: HashSet, type_info: TypeInferenceResult<'a>, + /// QIS triggers: quantified expressions with ≥2 propagating captures. + qis_triggers: HashMap>, + /// Counter for generating unique ref IDs during graph construction. + next_ref_id: u32, } fn empty_root() -> Root { @@ -140,6 +155,8 @@ impl<'a> Query<'a> { graph: BuildGraph::default(), dead_nodes: HashSet::new(), type_info: TypeInferenceResult::default(), + qis_triggers: HashMap::new(), + next_ref_id: 0, } } @@ -176,20 +193,35 @@ impl<'a> Query<'a> { /// Build the transition graph for compilation. /// - /// This is an optional step after `exec`. It constructs the graph, - /// runs epsilon elimination, and infers types. + /// This is an optional step after `exec`. It detects QIS triggers, + /// constructs the graph, runs epsilon elimination, and infers types. /// /// Only runs if the query is valid (no errors from previous passes). pub fn build_graph(mut self) -> Self { if !self.is_valid() { return self; } + self.detect_qis(); self.construct_graph(); self.infer_types(); // Run before optimization to avoid merged effects self.optimize_graph(); self } + /// Build graph and return dump of graph before optimization (for debugging). + #[cfg(test)] + pub fn build_graph_with_pre_opt_dump(mut self) -> (Self, String) { + if !self.is_valid() { + return (self, String::new()); + } + self.detect_qis(); + self.construct_graph(); + let pre_opt_dump = self.graph.dump(); + self.infer_types(); + self.optimize_graph(); + (self, pre_opt_dump) + } + fn try_parse(&mut self) -> Result<()> { let tokens = lex(self.source); let parser = Parser::new(self.source, tokens) diff --git a/crates/plotnik-lib/src/query/typing.rs b/crates/plotnik-lib/src/query/typing.rs index a4f77603..e56b7ff7 100644 --- a/crates/plotnik-lib/src/query/typing.rs +++ b/crates/plotnik-lib/src/query/typing.rs @@ -5,13 +5,13 @@ //! //! # Algorithm Overview //! -//! 1. Walk graph from each definition entry point using stack-based scope tracking -//! 2. StartObject/EndObject delimit scopes that may become composite types -//! 3. When EndObject is hit, the scope is resolved into a pending type -//! 4. QIS (Quantifier-Induced Scope) creates implicit structs for multi-capture quantifiers +//! 1. Pre-analyze array regions to detect QIS (Quantifier-Induced Scope) +//! 2. Walk graph from each definition entry point using stack-based scope tracking +//! 3. StartObject/EndObject delimit scopes that may become composite types +//! 4. QIS creates implicit structs when quantified expressions have ≥2 captures //! 5. Field(name) consumes pending type and records it in current scope -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use indexmap::IndexMap; use rowan::TextRange; @@ -20,7 +20,7 @@ use crate::diagnostics::{DiagnosticKind, Diagnostics}; use crate::ir::{TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId, TypeKind}; use super::Query; -use super::build_graph::{BuildEffect, BuildGraph, NodeId}; +use super::graph::{BuildEffect, BuildGraph, NodeId}; /// Result of type inference. #[derive(Debug, Default)] @@ -258,7 +258,6 @@ fn check_compatibility<'src>( }), (TypeShape::Composite(t1), TypeShape::Composite(t2)) if t1 == t2 => None, (TypeShape::Struct(fields_a), TypeShape::Struct(fields_b)) => { - // Compare field names AND types if fields_a.len() != fields_b.len() { return Some(MergeError { field, @@ -277,7 +276,6 @@ fn check_compatibility<'src>( } None } - // Struct vs Primitive or Composite mismatch _ => Some(MergeError { field, shapes: vec![a.clone(), b.clone()], @@ -331,29 +329,55 @@ impl<'src> PendingType<'src> { } } +/// Pre-computed info about an array region for QIS detection. +#[derive(Debug, Clone)] +struct ArrayRegionInfo<'src> { + /// Field names captured within this array region (excluding nested arrays) + captures: Vec<(&'src str, TextRange)>, + /// Whether QIS is triggered (≥2 captures) + qis_triggered: bool, +} + +/// Tracks state within a quantified (array) region. +#[derive(Debug, Clone)] +struct ArrayFrame<'src> { + /// Node ID of the StartArray + start_id: NodeId, + /// Cardinality of this array (Star or Plus) + cardinality: Cardinality, + /// Pre-computed region info (captures, QIS status) + region_info: ArrayRegionInfo<'src>, +} + #[derive(Debug, Clone)] struct TraversalState<'src> { pending: Option>, current_variant: Option<&'src str>, - /// Stack of array cardinalities (for nested arrays) - array_cardinality_stack: Vec, + /// Stack of array frames (tracking array nesting) + array_stack: Vec>, + /// Set of fields that should be skipped (handled by QIS) + skip_fields: HashSet<&'src str>, } -impl Default for TraversalState<'_> { +impl<'src> Default for TraversalState<'src> { fn default() -> Self { Self { pending: None, current_variant: None, - array_cardinality_stack: Vec::new(), + array_stack: Vec::new(), + skip_fields: HashSet::new(), } } } -impl TraversalState<'_> { +impl<'src> TraversalState<'src> { fn current_array_cardinality(&self) -> Cardinality { - self.array_cardinality_stack + self.array_stack .iter() - .fold(Cardinality::One, |acc, c| acc.multiply(*c)) + .filter(|f| !f.region_info.qis_triggered) + .fold(Cardinality::One, |acc, frame| { + acc.multiply(frame.cardinality) + }) } } @@ -365,10 +389,17 @@ struct InferenceContext<'src, 'g> { diagnostics: Diagnostics, errors: Vec>, current_def_name: &'src str, + /// Whether we're at definition root level (no fields assigned yet at root scope) + at_definition_root: bool, + /// Pre-computed array region info for QIS detection + array_regions: HashMap>, + /// Node ID of root-level QIS array (skip type creation in traverse for this) + root_qis_node: Option, } impl<'src, 'g> InferenceContext<'src, 'g> { fn new(graph: &'g BuildGraph<'src>, dead_nodes: &'g HashSet) -> Self { + let array_regions = analyze_array_regions(graph, dead_nodes); Self { graph, dead_nodes, @@ -377,6 +408,9 @@ impl<'src, 'g> InferenceContext<'src, 'g> { diagnostics: Diagnostics::new(), errors: Vec::new(), current_def_name: "", + at_definition_root: true, + array_regions, + root_qis_node: None, } } @@ -388,10 +422,17 @@ impl<'src, 'g> InferenceContext<'src, 'g> { fn infer_definition(&mut self, def_name: &'src str, entry_id: NodeId) -> TypeId { self.current_def_name = def_name; + self.at_definition_root = true; let mut visited = HashSet::new(); let mut merge_errors = Vec::new(); let mut scope_stack = vec![ScopeStackEntry::new_root()]; + // Check if definition starts with a QIS array (array at root with ≥2 captures) + let root_qis_info = self.check_root_qis(entry_id); + if root_qis_info.is_some() { + self.root_qis_node = Some(entry_id); + } + self.traverse( entry_id, TraversalState::default(), @@ -401,7 +442,6 @@ impl<'src, 'g> InferenceContext<'src, 'g> { &mut scope_stack, ); - // Pop to get final root scope let root_entry = scope_stack .pop() .unwrap_or_else(|| ScopeStackEntry::new_root()); @@ -439,6 +479,16 @@ impl<'src, 'g> InferenceContext<'src, 'g> { }); } + // Check for QIS at definition root + if let Some((captures, cardinality)) = root_qis_info { + if !captures.is_empty() { + let element_name = format!("{}Item", def_name); + let element_name: &'src str = Box::leak(element_name.into_boxed_str()); + let element_type_id = self.create_qis_struct_type(element_name, &captures); + return self.wrap_with_cardinality(element_type_id, cardinality); + } + } + if scope.has_variants && !scope.variants.is_empty() { self.create_enum_type(def_name, &scope) } else if !scope.fields.is_empty() { @@ -448,6 +498,30 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } } + /// Check if definition root has a QIS array (returns captures and cardinality if so). + fn check_root_qis( + &self, + entry_id: NodeId, + ) -> Option<(Vec<(&'src str, TextRange)>, Cardinality)> { + let node = self.graph.node(entry_id); + let has_start_array = node + .effects + .iter() + .any(|e| matches!(e, BuildEffect::StartArray)); + + if !has_start_array { + return None; + } + + let region_info = self.array_regions.get(&entry_id)?; + if !region_info.qis_triggered { + return None; + } + + // TODO: Determine actual cardinality (Star vs Plus) from graph structure + Some((region_info.captures.clone(), Cardinality::Star)) + } + fn traverse( &mut self, node_id: NodeId, @@ -461,7 +535,6 @@ impl<'src, 'g> InferenceContext<'src, 'g> { return; } - // Don't re-visit nodes - each node's effects should only be processed once if !visited.insert(node_id) { return; } @@ -478,6 +551,14 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } BuildEffect::Field { name, span } => { if let Some(pending) = state.pending.take() { + self.at_definition_root = false; + + // Skip fields that are handled by QIS + if state.skip_fields.contains(name) { + continue; + } + + let current_variant = state.current_variant; let current_scope = scope_stack .last_mut() .map(|e| &mut e.scope) @@ -486,7 +567,8 @@ impl<'src, 'g> InferenceContext<'src, 'g> { let effective_cardinality = pending .cardinality .multiply(state.current_array_cardinality()); - if let Some(tag) = state.current_variant { + + if let Some(tag) = current_variant { let variant_scope = current_scope.variants.entry(tag).or_default(); variant_scope.add_field( *name, @@ -507,31 +589,77 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } } BuildEffect::StartArray => { - // Push Star cardinality onto the stack when entering an array - state.array_cardinality_stack.push(Cardinality::Star); + // Look up pre-computed region info for this StartArray node + let region_info = + self.array_regions + .get(&node_id) + .cloned() + .unwrap_or_else(|| ArrayRegionInfo { + captures: Vec::new(), + qis_triggered: false, + }); + + // If QIS triggered, mark these fields to skip during traversal + if region_info.qis_triggered { + for (name, _) in ®ion_info.captures { + state.skip_fields.insert(*name); + } + } + + state.array_stack.push(ArrayFrame { + start_id: node_id, + cardinality: Cardinality::Star, + region_info, + }); } BuildEffect::PushElement => {} BuildEffect::EndArray => { - // Pop cardinality when exiting array - state.array_cardinality_stack.pop(); + if let Some(array_frame) = state.array_stack.pop() { + let array_card = array_frame.cardinality; + let is_root_qis = self.root_qis_node == Some(array_frame.start_id); + + // Remove skip_fields for this array's captures + if array_frame.region_info.qis_triggered { + for (name, _) in &array_frame.region_info.captures { + state.skip_fields.remove(name); + } + + // Skip type creation for root-level QIS (handled in infer_definition) + if !is_root_qis { + // QIS: create element struct from pre-computed captures + let captures = &array_frame.region_info.captures; + if !captures.is_empty() { + let element_name = self.generate_qis_element_name(None); + let element_type_id = + self.create_qis_struct_type(element_name, captures); + let array_type_id = + self.wrap_with_cardinality(element_type_id, array_card); + + state.pending = Some(PendingType { + shape: TypeShape::Composite(array_type_id), + base_type: array_type_id, + cardinality: Cardinality::One, + }); + } + } + } + // Non-QIS arrays: fields were already added to parent scope + // with cardinality applied in the Field handler + } } BuildEffect::StartObject => { - // Push new object scope, saving outer pending type let entry = ScopeStackEntry::new_object(state.pending.take()); scope_stack.push(entry); } BuildEffect::EndObject => { - // Pop the object scope if let Some(finished_entry) = scope_stack.pop() { if finished_entry.is_object { let finished_scope = finished_entry.scope; if !finished_scope.is_empty() { - // Create a struct type for this scope let type_name = self.generate_scope_name(); let type_id = self.create_struct_type(type_name, &finished_scope); - // Collect field info for shape let field_types: Vec<(&'src str, TypeId)> = finished_scope .fields .iter() @@ -544,19 +672,15 @@ impl<'src, 'g> InferenceContext<'src, 'g> { cardinality: Cardinality::One, }); - // If there were fields, update shape to include them if !field_types.is_empty() { if let Some(ref mut p) = state.pending { p.shape = TypeShape::Struct(field_types); } } } else { - // Empty object - restore outer pending if any state.pending = finished_entry.outer_pending; } } else { - // Shouldn't happen - mismatched StartObject/EndObject - // Put it back scope_stack.push(finished_entry); } } @@ -605,7 +729,6 @@ impl<'src, 'g> InferenceContext<'src, 'g> { let initial_scope_len = scope_stack.len(); let mut branch_scopes: Vec> = Vec::new(); - // Traverse each branch independently for succ in &live_successors { let mut branch_stack = scope_stack.clone(); @@ -618,7 +741,6 @@ impl<'src, 'g> InferenceContext<'src, 'g> { &mut branch_stack, ); - // Extract scope from this branch (pop any nested scopes first) while branch_stack.len() > initial_scope_len { branch_stack.pop(); } @@ -627,24 +749,62 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } } - // Merge all branch scopes into main scope if let Some(main_entry) = scope_stack.last_mut() { for branch_scope in branch_scopes { let merge_errs = main_entry.scope.merge_from(branch_scope); errors.extend(merge_errs); } - // Apply optionality for fields not present in all branches main_entry.scope.apply_optionality(total_branches); } } } fn generate_scope_name(&self) -> &'src str { - // Generate synthetic name - leak for simplicity let name = format!("{}Scope{}", self.current_def_name, self.next_type_id); Box::leak(name.into_boxed_str()) } + fn generate_qis_element_name(&self, capture_name: Option<&'src str>) -> &'src str { + let name = if let Some(cap) = capture_name { + // Explicit capture: {Def}{Capture} with PascalCase + let cap_pascal = to_pascal_case(cap); + format!("{}{}", self.current_def_name, cap_pascal) + } else if self.at_definition_root { + // At definition root: {Def}Item + format!("{}Item", self.current_def_name) + } else { + // Not at root and no capture - use synthetic name + format!("{}Item{}", self.current_def_name, self.next_type_id) + }; + Box::leak(name.into_boxed_str()) + } + + /// Create a struct type from QIS captures (all fields are Node type). + fn create_qis_struct_type( + &mut self, + name: &'src str, + captures: &[(&'src str, TextRange)], + ) -> TypeId { + let members: Vec<_> = captures + .iter() + .map(|(field_name, _span)| InferredMember { + name: field_name, + ty: TYPE_NODE, // QIS captures are always Node (could enhance later) + }) + .collect(); + + let type_id = self.alloc_type_id(); + + self.type_defs.push(InferredTypeDef { + kind: TypeKind::Record, + name: Some(name), + members, + inner_type: None, + }); + + type_id + } + fn create_struct_type(&mut self, name: &'src str, scope: &ScopeInfo<'src>) -> TypeId { let members: Vec<_> = scope .fields @@ -735,6 +895,113 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } } +// ───────────────────────────────────────────────────────────────────────────── +// Array region analysis for QIS detection +// ───────────────────────────────────────────────────────────────────────────── + +/// Pre-analyze all array regions to determine QIS triggering. +fn analyze_array_regions<'src>( + graph: &BuildGraph<'src>, + dead_nodes: &HashSet, +) -> HashMap> { + let mut regions = HashMap::new(); + + for (id, node) in graph.iter() { + if dead_nodes.contains(&id) { + continue; + } + let has_start_array = node + .effects + .iter() + .any(|e| matches!(e, BuildEffect::StartArray)); + if has_start_array { + let info = find_array_region_captures(graph, dead_nodes, id); + regions.insert(id, info); + } + } + + regions +} + +/// Find all captures within an array region (between StartArray and EndArray). +fn find_array_region_captures<'src>( + graph: &BuildGraph<'src>, + dead_nodes: &HashSet, + start_id: NodeId, +) -> ArrayRegionInfo<'src> { + let mut captures = Vec::new(); + let mut visited = HashSet::new(); + let mut stack = Vec::new(); + + // Start from successors of the StartArray node + let start_node = graph.node(start_id); + for &succ in &start_node.successors { + stack.push(succ); + } + + while let Some(id) = stack.pop() { + if dead_nodes.contains(&id) || !visited.insert(id) { + continue; + } + + let node = graph.node(id); + + // Check for EndArray - stop this path and record the ID + let has_end_array = node + .effects + .iter() + .any(|e| matches!(e, BuildEffect::EndArray)); + if has_end_array { + continue; + } + + // Check for nested StartArray - skip its contents + let has_start_array = node + .effects + .iter() + .any(|e| matches!(e, BuildEffect::StartArray)); + if has_start_array { + continue; + } + + // Collect Field captures with their spans + for effect in &node.effects { + if let BuildEffect::Field { name, span } = effect { + if !captures.iter().any(|(n, _)| n == name) { + captures.push((*name, *span)); + } + } + } + + // Continue to successors + for &succ in &node.successors { + stack.push(succ); + } + } + + let qis_triggered = captures.len() >= 2; + ArrayRegionInfo { + captures, + qis_triggered, + } +} + +fn to_pascal_case(s: &str) -> String { + let mut result = String::new(); + let mut capitalize_next = true; + for c in s.chars() { + if c == '_' { + capitalize_next = true; + } else if capitalize_next { + result.push(c.to_ascii_uppercase()); + capitalize_next = false; + } else { + result.push(c); + } + } + result +} + impl<'a> Query<'a> { /// Run type inference on the graph. pub(super) fn infer_types(&mut self) { diff --git a/crates/plotnik-lib/src/query/typing_tests.rs b/crates/plotnik-lib/src/query/typing_tests.rs index b4917f33..ce89c0e6 100644 --- a/crates/plotnik-lib/src/query/typing_tests.rs +++ b/crates/plotnik-lib/src/query/typing_tests.rs @@ -23,6 +23,53 @@ fn infer_with_graph(source: &str) -> String { out } +#[test] +fn debug_star_quantifier_graph() { + // See graph BEFORE optimization (what type inference actually sees) + let (query, pre_opt_dump) = Query::try_from("Foo = ((item) @items)*") + .expect("parse should succeed") + .build_graph_with_pre_opt_dump(); + let mut out = String::new(); + out.push_str("=== Graph (before optimization - what type inference sees) ===\n"); + out.push_str(&pre_opt_dump); + out.push_str("\n=== Graph (after optimization) ===\n"); + out.push_str(&query.graph().dump_live(query.dead_nodes())); + out.push_str("\n"); + out.push_str(&query.type_info().dump()); + insta::assert_snapshot!(out, @r" + === Graph (before optimization - what type inference sees) === + Foo = N4 + + N0: (_) → N1 + N1: [Down] (item) [Capture] → N2 + N2: ε [Field(items)] → N3 + N3: [Up(1)] ε → N6 + N4: ε [StartArray] → N5 + N5: ε → N0, N7 + N6: ε [Push] → N5 + N7: ε [EndArray] → ∅ + + === Graph (after optimization) === + Foo = N4 + + N0: (_) → N1 + N1: [Down] (item) [Capture] → N6 + N4: ε [StartArray] → N5 + N5: ε → N0, N7 + N6: [Up(1)] ε [Field(items)] [Push] → N5 + N7: ε [EndArray] → ∅ + + === Entrypoints === + Foo → T4 + + === Types === + T3: ArrayStar → Node + T4: Record Foo { + items: T3 + } + "); +} + #[test] fn debug_graph_structure() { let result = infer_with_graph("Foo = (identifier) @name"); @@ -66,11 +113,13 @@ fn debug_incompatible_types_graph() { Foo = N0 N0: ε → N2, N4 - N1: ε [Field(v)] [Field(v)] → ∅ - N2: (a) [Capture] → N1 - N4: (b) [Capture] [ToString] → N1 + N1: ε → ∅ + N2: (a) [Capture] → N3 + N3: ε [Field(v)] → N1 + N4: (b) [Capture] [ToString] → N5 + N5: ε [Field(v)] → N1 - === Dead nodes count: 2 === + === Dead nodes count: 0 === === Entrypoints === Foo → T3 @@ -376,6 +425,7 @@ fn optional_quantifier() { #[test] fn quantifier_on_sequence() { + // QIS triggered: ≥2 captures inside quantified expression let input = indoc! {r#" Foo = { (a) @x (b) @y }* "#}; @@ -383,15 +433,63 @@ fn quantifier_on_sequence() { let result = infer(input); insta::assert_snapshot!(result, @r" === Entrypoints === - Foo → Void + Foo → T4 + + === Types === + T3: Record FooItem { + x: Node + y: Node + } + T4: ArrayStar → T3 + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// QIS: Additional cases from ADR-0009 +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn qis_single_capture_no_trigger() { + // Single capture inside sequence - no QIS + // Note: The sequence creates its own scope, so the capture goes there. + // Without explicit capture on the sequence, the struct is orphaned. + let input = indoc! {r#" + Single = { (a) @item }* + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Single → Void === Types === T3: ArrayStar → Node - T4: ArrayStar → Node - T5: Record FooScope3 { - x: T3 - y: T4 + T4: Record SingleScope3 { + item: T3 + } + "); +} + +#[test] +fn qis_alternation_in_sequence() { + // Alternation with asymmetric captures inside quantified sequence + // QIS triggered (2 captures), creates element struct + // Note: Current impl doesn't apply optionality for alternation branches in QIS + let input = indoc! {r#" + Foo = { [ (a) @x (b) @y ] }* + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: Record FooItem { + y: Node + x: Node } + T4: ArrayStar → T3 "); } @@ -421,9 +519,11 @@ fn incompatible_types_in_alternation() { Foo = N0 N0: ε → N2, N4 - N1: ε [Field(v)] [Field(v)] → ∅ - N2: (a) [Capture] → N1 - N4: (b) [Capture] [ToString] → N1 + N1: ε → ∅ + N2: (a) [Capture] → N3 + N3: ε [Field(v)] → N1 + N4: (b) [Capture] [ToString] → N5 + N5: ε [Field(v)] → N1 === Entrypoints === Foo → T3 From f258030319a461367cbf8ee0dc542a720061f6f5 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 16:31:49 -0300 Subject: [PATCH 10/23] fix typing qis --- crates/plotnik-cli/src/cli.rs | 12 + crates/plotnik-cli/src/commands/debug/mod.rs | 25 +- crates/plotnik-cli/src/main.rs | 3 + crates/plotnik-lib/src/query/graph_build.rs | 49 +- .../src/query/graph_build_tests.rs | 22 +- .../plotnik-lib/src/query/graph_optimize.rs | 8 +- .../plotnik-lib/src/query/graph_qis_tests.rs | 27 +- crates/plotnik-lib/src/query/mod.rs | 1 - crates/plotnik-lib/src/query/typing.rs | 782 ++++++------------ crates/plotnik-lib/src/query/typing_tests.rs | 54 +- 10 files changed, 413 insertions(+), 570 deletions(-) diff --git a/crates/plotnik-cli/src/cli.rs b/crates/plotnik-cli/src/cli.rs index 395fad1f..a83fb67a 100644 --- a/crates/plotnik-cli/src/cli.rs +++ b/crates/plotnik-cli/src/cli.rs @@ -111,4 +111,16 @@ pub struct OutputArgs { /// Show inferred cardinalities #[arg(long)] pub cardinalities: bool, + + /// Show compiled graph + #[arg(long)] + pub graph: bool, + + /// Show unoptimized graph (before epsilon elimination) + #[arg(long)] + pub graph_raw: bool, + + /// Show inferred types + #[arg(long)] + pub types: bool, } diff --git a/crates/plotnik-cli/src/commands/debug/mod.rs b/crates/plotnik-cli/src/commands/debug/mod.rs index 22a5d0e3..0d07fbdb 100644 --- a/crates/plotnik-cli/src/commands/debug/mod.rs +++ b/crates/plotnik-cli/src/commands/debug/mod.rs @@ -18,6 +18,9 @@ pub struct DebugArgs { pub cst: bool, pub spans: bool, pub cardinalities: bool, + pub graph: bool, + pub graph_raw: bool, + pub types: bool, pub color: bool, } @@ -51,7 +54,7 @@ pub fn run(args: DebugArgs) { q.link(&lang); } - let show_query = has_query_input && !args.symbols; + let show_query = has_query_input && !args.symbols && !args.graph && !args.types; let show_source = has_source_input; let show_headers = (show_query || args.symbols) && show_source; @@ -85,6 +88,26 @@ pub fn run(args: DebugArgs) { ); } + // Build graph if needed for --graph, --graph-raw, or --types + if (args.graph || args.graph_raw || args.types) + && let Some(q) = query.take() + { + let (q, pre_opt_dump) = q.build_graph_with_pre_opt_dump(); + if args.graph_raw { + println!("=== GRAPH (raw) ==="); + print!("{}", pre_opt_dump); + } + if args.graph { + println!("=== GRAPH ==="); + print!("{}", q.graph().dump_live(q.dead_nodes())); + } + if args.types { + println!("=== TYPES ==="); + print!("{}", q.type_info().dump()); + } + return; + } + if show_source { let resolved_lang = resolve_lang(&args.lang, &args.source_text, &args.source_file); let source_code = load_source(&args.source_text, &args.source_file); diff --git a/crates/plotnik-cli/src/main.rs b/crates/plotnik-cli/src/main.rs index b67e3465..e1579a29 100644 --- a/crates/plotnik-cli/src/main.rs +++ b/crates/plotnik-cli/src/main.rs @@ -25,6 +25,9 @@ fn main() { cst: output.cst, spans: output.spans, cardinalities: output.cardinalities, + graph: output.graph, + graph_raw: output.graph_raw, + types: output.types, color: output.color.should_colorize(), }); } diff --git a/crates/plotnik-lib/src/query/graph_build.rs b/crates/plotnik-lib/src/query/graph_build.rs index 5207fe3c..679886e6 100644 --- a/crates/plotnik-lib/src/query/graph_build.rs +++ b/crates/plotnik-lib/src/query/graph_build.rs @@ -367,24 +367,20 @@ impl<'a> Query<'a> { fn construct_seq(&mut self, seq: &SeqExpr, ctx: NavContext) -> Fragment { let items: Vec<_> = seq.items().collect(); + // Uncaptured sequences don't create object scope - they just group items. + // Captures propagate to parent scope. Object scope is created by: + // - Captured sequences ({...} @name) via construct_capture + // - QIS quantifiers that wrap loop body with StartObject/EndObject + let start_id = self.graph.add_epsilon(); self.graph.node_mut(start_id).set_nav(ctx.to_nav(false)); - self.graph - .node_mut(start_id) - .add_effect(BuildEffect::StartObject); let (child_fragments, _exit_ctx) = self.construct_item_sequence(&items, false); let inner = self.graph.sequence(&child_fragments); - let end_id = self.graph.add_epsilon(); - self.graph - .node_mut(end_id) - .add_effect(BuildEffect::EndObject); - self.graph.connect(start_id, inner.entry); - self.graph.connect(inner.exit, end_id); - Fragment::new(start_id, end_id) + Fragment::new(start_id, inner.exit) } fn construct_capture(&mut self, cap: &CapturedExpr, ctx: NavContext) -> Fragment { @@ -403,6 +399,15 @@ impl<'a> Query<'a> { .map(|n| n.text() == "string") .unwrap_or(false); + // Captured sequence/alternation creates object scope for nested fields. + // Tagged alternations use variants instead (handled in construct_tagged_alt). + // Quantifiers only need wrapper if QIS (2+ captures) - otherwise the array is the direct value. + let needs_object_wrapper = match &inner_expr { + Expr::SeqExpr(_) | Expr::AltExpr(_) => true, + Expr::QuantifiedExpr(q) => self.qis_triggers.contains_key(q), + _ => false, + }; + let matchers = self.find_all_matchers(inner_frag.entry); for matcher_id in matchers { self.graph @@ -421,12 +426,32 @@ impl<'a> Query<'a> { .as_ref() .map(|t| t.text_range()) .unwrap_or_default(); + + let (entry, exit) = if needs_object_wrapper { + // Wrap with StartObject/EndObject for composite captures + let start_id = self.graph.add_epsilon(); + self.graph + .node_mut(start_id) + .add_effect(BuildEffect::StartObject); + self.graph.connect(start_id, inner_frag.entry); + + let end_id = self.graph.add_epsilon(); + self.graph + .node_mut(end_id) + .add_effect(BuildEffect::EndObject); + self.graph.connect(inner_frag.exit, end_id); + + (start_id, end_id) + } else { + (inner_frag.entry, inner_frag.exit) + }; + let field_id = self.graph.add_epsilon(); self.graph .node_mut(field_id) .add_effect(BuildEffect::Field { name, span }); - self.graph.connect(inner_frag.exit, field_id); - Fragment::new(inner_frag.entry, field_id) + self.graph.connect(exit, field_id); + Fragment::new(entry, field_id) } else { inner_frag } diff --git a/crates/plotnik-lib/src/query/graph_build_tests.rs b/crates/plotnik-lib/src/query/graph_build_tests.rs index 8002c819..33873e9c 100644 --- a/crates/plotnik-lib/src/query/graph_build_tests.rs +++ b/crates/plotnik-lib/src/query/graph_build_tests.rs @@ -49,10 +49,9 @@ fn sequence() { insta::assert_snapshot!(snapshot("Q = { (a) (b) }"), @r" Q = N1 - N0: ε [StartObj] → N1 - N1: [Next] (a) [StartObj] → N2 - N2: [Next] (b) → N3 - N3: ε [EndObj] → ∅ + N0: ε → N1 + N1: [Next] (a) → N2 + N2: [Next] (b) → ∅ "); } @@ -61,12 +60,11 @@ fn sequence_with_captures() { insta::assert_snapshot!(snapshot("Q = { (a) @x (b) @y }"), @r" Q = N1 - N0: ε [StartObj] → N1 - N1: [Next] (a) [StartObj] [Capture] → N3 + N0: ε → N1 + N1: [Next] (a) [Capture] → N3 N2: ε [Field(x)] → N3 - N3: [Next] (b) [Field(x)] [Capture] → N5 - N4: ε [Field(y)] → N5 - N5: ε [Field(y)] [EndObj] → ∅ + N3: [Next] (b) [Field(x)] [Capture] → N4 + N4: ε [Field(y)] → ∅ "); } @@ -234,9 +232,9 @@ fn optimized_sequence() { insta::assert_snapshot!(snapshot_optimized("Q = { (a) @x (b) @y }"), @r" Q = N1 - N1: [Next] (a) [StartObj] [Capture] → N3 - N3: [Next] (b) [Field(x)] [Capture] → N5 - N5: ε [Field(y)] [EndObj] → ∅ + N1: [Next] (a) [Capture] → N3 + N3: [Next] (b) [Field(x)] [Capture] → N4 + N4: ε [Field(y)] → ∅ "); } diff --git a/crates/plotnik-lib/src/query/graph_optimize.rs b/crates/plotnik-lib/src/query/graph_optimize.rs index afda8bfe..0fedf888 100644 --- a/crates/plotnik-lib/src/query/graph_optimize.rs +++ b/crates/plotnik-lib/src/query/graph_optimize.rs @@ -41,7 +41,7 @@ pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeS let mut stats = OptimizeStats::default(); let mut dead_nodes: HashSet = HashSet::new(); - let predecessors = build_predecessor_map(graph); + let mut predecessors = build_predecessor_map(graph); // Process nodes in reverse order to handle chains let node_count = graph.len() as NodeId; @@ -100,6 +100,12 @@ pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeS *succ = successor_id; } } + // Update predecessor map: pred is now a predecessor of successor + predecessors.entry(successor_id).or_default().push(*pred_id); + } + // Remove eliminated node from successor's predecessors + if let Some(succ_preds) = predecessors.get_mut(&successor_id) { + succ_preds.retain(|&p| p != id); } redirect_definitions(graph, id, successor_id); diff --git a/crates/plotnik-lib/src/query/graph_qis_tests.rs b/crates/plotnik-lib/src/query/graph_qis_tests.rs index 7004a738..cb3bb29c 100644 --- a/crates/plotnik-lib/src/query/graph_qis_tests.rs +++ b/crates/plotnik-lib/src/query/graph_qis_tests.rs @@ -188,19 +188,19 @@ fn qis_graph_has_object_effects() { .unwrap() .build_graph_with_pre_opt_dump(); - // QIS adds StartObj/EndObj around each iteration (in addition to sequence's pair) - // So we expect 2 of each: one from sequence, one from QIS loop + // QIS adds StartObj/EndObj around each iteration to keep captures coupled. + // Sequences themselves don't add object scope (captures propagate to parent). let start_count = pre_opt.matches("StartObj").count(); let end_count = pre_opt.matches("EndObj").count(); assert_eq!( - start_count, 2, - "QIS graph should have 2 StartObj (sequence + QIS):\n{}", + start_count, 1, + "QIS graph should have 1 StartObj (from QIS loop):\n{}", pre_opt ); assert_eq!( - end_count, 2, - "QIS graph should have 2 EndObj (sequence + QIS):\n{}", + end_count, 1, + "QIS graph should have 1 EndObj (from QIS loop):\n{}", pre_opt ); } @@ -213,18 +213,19 @@ fn non_qis_graph_no_object_effects() { .unwrap() .build_graph_with_pre_opt_dump(); - // Count in pre-optimization graph to avoid optimizer noise - // The inner sequence { (a) @x } adds StartObj/EndObj once - // QIS would add another pair per iteration in the loop structure + // Non-QIS quantifiers don't need object scope - captures propagate with array cardinality. + // Sequences themselves don't add object scope either. let start_count = pre_opt.matches("StartObj").count(); let end_count = pre_opt.matches("EndObj").count(); assert_eq!( - start_count, 1, - "Non-QIS graph should have only sequence's StartObj" + start_count, 0, + "Non-QIS graph should have no StartObj:\n{}", + pre_opt ); assert_eq!( - end_count, 1, - "Non-QIS graph should have only sequence's EndObj" + end_count, 0, + "Non-QIS graph should have no EndObj:\n{}", + pre_opt ); } diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 3a09c3d8..a316dc1a 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -209,7 +209,6 @@ impl<'a> Query<'a> { } /// Build graph and return dump of graph before optimization (for debugging). - #[cfg(test)] pub fn build_graph_with_pre_opt_dump(mut self) -> (Self, String) { if !self.is_valid() { return (self, String::new()); diff --git a/crates/plotnik-lib/src/query/typing.rs b/crates/plotnik-lib/src/query/typing.rs index e56b7ff7..a797009e 100644 --- a/crates/plotnik-lib/src/query/typing.rs +++ b/crates/plotnik-lib/src/query/typing.rs @@ -1,15 +1,14 @@ //! Type inference for Query's BuildGraph. //! -//! Analyzes the graph and infers output type structure for each definition. -//! Follows rules from ADR-0007 and ADR-0009. +//! Analyzes the graph structure statically to determine output types. +//! Follows rules from ADR-0006, ADR-0007 and ADR-0009. //! //! # Algorithm Overview //! -//! 1. Pre-analyze array regions to detect QIS (Quantifier-Induced Scope) -//! 2. Walk graph from each definition entry point using stack-based scope tracking -//! 3. StartObject/EndObject delimit scopes that may become composite types -//! 4. QIS creates implicit structs when quantified expressions have ≥2 captures -//! 5. Field(name) consumes pending type and records it in current scope +//! 1. Traverse graph to collect all scope boundaries (StartObject/EndObject, StartArray/EndArray) +//! 2. Associate Field effects with their containing object scope +//! 3. Build types bottom-up from scope hierarchy +//! 4. Handle branching by merging fields with optionality rules use std::collections::{HashMap, HashSet}; @@ -76,8 +75,13 @@ pub struct InferredMember<'src> { pub ty: TypeId, } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +// ───────────────────────────────────────────────────────────────────────────── +// Cardinality +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] enum Cardinality { + #[default] One, Optional, Star, @@ -104,55 +108,39 @@ impl Cardinality { x => x, } } - - fn multiply(self, other: Cardinality) -> Cardinality { - use Cardinality::*; - match (self, other) { - (One, x) | (x, One) => x, - (Optional, Optional) => Optional, - (Optional, Plus) | (Plus, Optional) => Star, - (Optional, Star) | (Star, Optional) => Star, - (Star, _) | (_, Star) => Star, - (Plus, Plus) => Plus, - } - } } -/// Shape includes type information for proper compatibility checking. +// ───────────────────────────────────────────────────────────────────────────── +// Field and Scope tracking +// ───────────────────────────────────────────────────────────────────────────── + #[derive(Debug, Clone, PartialEq, Eq)] -enum TypeShape<'src> { +enum TypeShape { Primitive(TypeId), - Struct(Vec<(&'src str, TypeId)>), - Composite(TypeId), } -impl<'src> TypeShape<'src> { +impl TypeShape { fn to_description(&self) -> TypeDescription { match self { TypeShape::Primitive(TYPE_NODE) => TypeDescription::Node, TypeShape::Primitive(TYPE_STR) => TypeDescription::String, TypeShape::Primitive(_) => TypeDescription::Node, - TypeShape::Struct(fields) => { - TypeDescription::Struct(fields.iter().map(|(n, _)| n.to_string()).collect()) - } - TypeShape::Composite(_) => TypeDescription::Struct(vec!["...".to_string()]), } } } #[derive(Debug, Clone)] -struct FieldInfo<'src> { - shape: TypeShape<'src>, +struct FieldInfo { base_type: TypeId, + shape: TypeShape, cardinality: Cardinality, branch_count: usize, - all_shapes: Vec>, spans: Vec, } #[derive(Debug, Clone, Default)] struct ScopeInfo<'src> { - fields: IndexMap<&'src str, FieldInfo<'src>>, + fields: IndexMap<&'src str, FieldInfo>, variants: IndexMap<&'src str, ScopeInfo<'src>>, has_variants: bool, } @@ -162,26 +150,22 @@ impl<'src> ScopeInfo<'src> { &mut self, name: &'src str, base_type: TypeId, - shape: TypeShape<'src>, cardinality: Cardinality, span: TextRange, ) { + let shape = TypeShape::Primitive(base_type); if let Some(existing) = self.fields.get_mut(name) { existing.cardinality = existing.cardinality.join(cardinality); existing.branch_count += 1; - if !existing.all_shapes.contains(&shape) { - existing.all_shapes.push(shape); - } existing.spans.push(span); } else { self.fields.insert( name, FieldInfo { - shape: shape.clone(), base_type, + shape, cardinality, branch_count: 1, - all_shapes: vec![shape], spans: vec![span], }, ); @@ -191,33 +175,35 @@ impl<'src> ScopeInfo<'src> { fn merge_from(&mut self, other: ScopeInfo<'src>) -> Vec> { let mut errors = Vec::new(); - for (name, info) in other.fields { + for (name, other_info) in other.fields { if let Some(existing) = self.fields.get_mut(name) { - if let Some(mut err) = check_compatibility(&existing.shape, &info.shape, name) { - err.spans = existing.spans.clone(); - err.spans.extend(info.spans.iter().cloned()); - errors.push(err); - for shape in &info.all_shapes { - if !existing.all_shapes.contains(shape) { - existing.all_shapes.push(shape.clone()); - } - } + // Check type compatibility + if existing.shape != other_info.shape { + errors.push(MergeError { + field: name, + shapes: vec![existing.shape.clone(), other_info.shape.clone()], + spans: existing + .spans + .iter() + .chain(&other_info.spans) + .cloned() + .collect(), + }); } - existing.spans.extend(info.spans); - existing.cardinality = existing.cardinality.join(info.cardinality); - existing.branch_count += info.branch_count; + existing.cardinality = existing.cardinality.join(other_info.cardinality); + existing.branch_count += other_info.branch_count; + existing.spans.extend(other_info.spans); } else { - self.fields.insert(name, info); + self.fields.insert(name, other_info); } } - for (tag, variant_info) in other.variants { - if let Some(existing) = self.variants.get_mut(tag) { - let variant_errors = existing.merge_from(variant_info); - errors.extend(variant_errors); - } else { - self.variants.insert(tag, variant_info); - } + for (tag, other_variant) in other.variants { + let variant = self.variants.entry(tag).or_default(); + errors.extend(variant.merge_from(other_variant)); + } + + if other.has_variants { self.has_variants = true; } @@ -240,57 +226,19 @@ impl<'src> ScopeInfo<'src> { #[derive(Debug)] struct MergeError<'src> { field: &'src str, - shapes: Vec>, + shapes: Vec, spans: Vec, } -fn check_compatibility<'src>( - a: &TypeShape<'src>, - b: &TypeShape<'src>, - field: &'src str, -) -> Option> { - match (a, b) { - (TypeShape::Primitive(t1), TypeShape::Primitive(t2)) if t1 == t2 => None, - (TypeShape::Primitive(_), TypeShape::Primitive(_)) => Some(MergeError { - field, - shapes: vec![a.clone(), b.clone()], - spans: vec![], - }), - (TypeShape::Composite(t1), TypeShape::Composite(t2)) if t1 == t2 => None, - (TypeShape::Struct(fields_a), TypeShape::Struct(fields_b)) => { - if fields_a.len() != fields_b.len() { - return Some(MergeError { - field, - shapes: vec![a.clone(), b.clone()], - spans: vec![], - }); - } - for ((name_a, type_a), (name_b, type_b)) in fields_a.iter().zip(fields_b.iter()) { - if name_a != name_b || type_a != type_b { - return Some(MergeError { - field, - shapes: vec![a.clone(), b.clone()], - spans: vec![], - }); - } - } - None - } - _ => Some(MergeError { - field, - shapes: vec![a.clone(), b.clone()], - spans: vec![], - }), - } -} +// ───────────────────────────────────────────────────────────────────────────── +// Scope stack for traversal +// ───────────────────────────────────────────────────────────────────────────── -/// Entry on the scope stack during traversal. -#[derive(Debug, Clone)] +#[derive(Clone)] struct ScopeStackEntry<'src> { scope: ScopeInfo<'src>, is_object: bool, - /// Captures pending type before StartObject (for sequences captured as a whole) - outer_pending: Option>, + outer_pending: Option, } impl<'src> ScopeStackEntry<'src> { @@ -302,7 +250,7 @@ impl<'src> ScopeStackEntry<'src> { } } - fn new_object(outer_pending: Option>) -> Self { + fn new_object(outer_pending: Option) -> Self { Self { scope: ScopeInfo::default(), is_object: true, @@ -311,76 +259,75 @@ impl<'src> ScopeStackEntry<'src> { } } -/// Pending type waiting for a Field assignment. #[derive(Debug, Clone)] -struct PendingType<'src> { - shape: TypeShape<'src>, +struct PendingType { base_type: TypeId, cardinality: Cardinality, } -impl<'src> PendingType<'src> { - fn primitive(ty: TypeId) -> Self { +impl PendingType { + fn primitive(type_id: TypeId) -> Self { Self { - shape: TypeShape::Primitive(ty), - base_type: ty, + base_type: type_id, cardinality: Cardinality::One, } } } -/// Pre-computed info about an array region for QIS detection. -#[derive(Debug, Clone)] -struct ArrayRegionInfo<'src> { - /// Field names captured within this array region (excluding nested arrays) - captures: Vec<(&'src str, TextRange)>, - /// Whether QIS is triggered (≥2 captures) - qis_triggered: bool, -} +// ───────────────────────────────────────────────────────────────────────────── +// Traversal state +// ───────────────────────────────────────────────────────────────────────────── -/// Tracks state within a quantified (array) region. -#[derive(Debug, Clone)] -struct ArrayFrame<'src> { - /// Node ID of the StartArray - start_id: NodeId, - /// Cardinality of this array (Star or Plus) +#[derive(Clone, Default)] +struct ArrayFrame { cardinality: Cardinality, - /// Pre-computed region info (captures, QIS status) - region_info: ArrayRegionInfo<'src>, + element_type: Option, + /// Node ID where this array started (for lookup in precomputed map) + start_node: Option, + /// Whether PushElement was actually called (vs prepass placeholder) + push_called: bool, } -#[derive(Debug, Clone)] -struct TraversalState<'src> { - pending: Option>, - current_variant: Option<&'src str>, - /// Stack of array frames (tracking array nesting) - array_stack: Vec>, - /// Set of fields that should be skipped (handled by QIS) - skip_fields: HashSet<&'src str>, +#[derive(Clone, Default)] +struct TraversalState { + pending: Option, + current_variant: Option<&'static str>, + array_stack: Vec, + object_depth: usize, } -impl<'src> Default for TraversalState<'src> { - fn default() -> Self { - Self { - pending: None, - current_variant: None, - array_stack: Vec::new(), - skip_fields: HashSet::new(), +impl TraversalState { + fn effective_array_cardinality(&self) -> Cardinality { + // Inside object scope, array cardinality doesn't apply to fields + if self.object_depth > 0 { + return Cardinality::One; } - } -} - -impl<'src> TraversalState<'src> { - fn current_array_cardinality(&self) -> Cardinality { self.array_stack .iter() - .filter(|f| !f.region_info.qis_triggered) .fold(Cardinality::One, |acc, frame| { acc.multiply(frame.cardinality) }) } } +impl Cardinality { + fn multiply(self, other: Cardinality) -> Cardinality { + use Cardinality::*; + match (self, other) { + (One, x) | (x, One) => x, + (Optional, Optional) => Optional, + (Optional, Plus) | (Plus, Optional) => Star, + (Optional, Star) | (Star, Optional) => Star, + (Star, _) | (_, Star) => Star, + (Plus, Plus) => Plus, + } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Inference context +// ───────────────────────────────────────────────────────────────────────────── + struct InferenceContext<'src, 'g> { graph: &'g BuildGraph<'src>, dead_nodes: &'g HashSet, @@ -389,17 +336,12 @@ struct InferenceContext<'src, 'g> { diagnostics: Diagnostics, errors: Vec>, current_def_name: &'src str, - /// Whether we're at definition root level (no fields assigned yet at root scope) - at_definition_root: bool, - /// Pre-computed array region info for QIS detection - array_regions: HashMap>, - /// Node ID of root-level QIS array (skip type creation in traverse for this) - root_qis_node: Option, + /// Precomputed array element types: StartArray node ID -> element TypeId + array_element_types: HashMap, } impl<'src, 'g> InferenceContext<'src, 'g> { fn new(graph: &'g BuildGraph<'src>, dead_nodes: &'g HashSet) -> Self { - let array_regions = analyze_array_regions(graph, dead_nodes); Self { graph, dead_nodes, @@ -408,9 +350,7 @@ impl<'src, 'g> InferenceContext<'src, 'g> { diagnostics: Diagnostics::new(), errors: Vec::new(), current_def_name: "", - at_definition_root: true, - array_regions, - root_qis_node: None, + array_element_types: HashMap::new(), } } @@ -422,18 +362,11 @@ impl<'src, 'g> InferenceContext<'src, 'g> { fn infer_definition(&mut self, def_name: &'src str, entry_id: NodeId) -> TypeId { self.current_def_name = def_name; - self.at_definition_root = true; let mut visited = HashSet::new(); let mut merge_errors = Vec::new(); let mut scope_stack = vec![ScopeStackEntry::new_root()]; - // Check if definition starts with a QIS array (array at root with ≥2 captures) - let root_qis_info = self.check_root_qis(entry_id); - if root_qis_info.is_some() { - self.root_qis_node = Some(entry_id); - } - - self.traverse( + let final_pending = self.traverse( entry_id, TraversalState::default(), &mut visited, @@ -442,11 +375,10 @@ impl<'src, 'g> InferenceContext<'src, 'g> { &mut scope_stack, ); - let root_entry = scope_stack - .pop() - .unwrap_or_else(|| ScopeStackEntry::new_root()); + let root_entry = scope_stack.pop().unwrap_or_else(ScopeStackEntry::new_root); let scope = root_entry.scope; + // Report merge errors for err in merge_errors { let types_str = err .shapes @@ -479,68 +411,38 @@ impl<'src, 'g> InferenceContext<'src, 'g> { }); } - // Check for QIS at definition root - if let Some((captures, cardinality)) = root_qis_info { - if !captures.is_empty() { - let element_name = format!("{}Item", def_name); - let element_name: &'src str = Box::leak(element_name.into_boxed_str()); - let element_type_id = self.create_qis_struct_type(element_name, &captures); - return self.wrap_with_cardinality(element_type_id, cardinality); - } - } - + // Determine result type if scope.has_variants && !scope.variants.is_empty() { self.create_enum_type(def_name, &scope) } else if !scope.fields.is_empty() { self.create_struct_type(def_name, &scope) + } else if let Some(pending) = final_pending { + pending.base_type } else { TYPE_VOID } } - /// Check if definition root has a QIS array (returns captures and cardinality if so). - fn check_root_qis( - &self, - entry_id: NodeId, - ) -> Option<(Vec<(&'src str, TextRange)>, Cardinality)> { - let node = self.graph.node(entry_id); - let has_start_array = node - .effects - .iter() - .any(|e| matches!(e, BuildEffect::StartArray)); - - if !has_start_array { - return None; - } - - let region_info = self.array_regions.get(&entry_id)?; - if !region_info.qis_triggered { - return None; - } - - // TODO: Determine actual cardinality (Star vs Plus) from graph structure - Some((region_info.captures.clone(), Cardinality::Star)) - } - fn traverse( &mut self, node_id: NodeId, - mut state: TraversalState<'src>, + mut state: TraversalState, visited: &mut HashSet, depth: usize, errors: &mut Vec>, scope_stack: &mut Vec>, - ) { + ) -> Option { if self.dead_nodes.contains(&node_id) || depth > 200 { - return; + return state.pending; } if !visited.insert(node_id) { - return; + return state.pending; } let node = self.graph.node(node_id); + // Process effects for effect in &node.effects { match effect { BuildEffect::CaptureNode => { @@ -551,132 +453,100 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } BuildEffect::Field { name, span } => { if let Some(pending) = state.pending.take() { - self.at_definition_root = false; - - // Skip fields that are handled by QIS - if state.skip_fields.contains(name) { - continue; - } + // SAFETY: name comes from source with 'src lifetime + let name: &'src str = unsafe { std::mem::transmute(*name) }; + let current_variant = state.current_variant.map(|v| { + let v: &'src str = unsafe { std::mem::transmute(v) }; + v + }); + + let effective_card = pending + .cardinality + .multiply(state.effective_array_cardinality()); - let current_variant = state.current_variant; let current_scope = scope_stack .last_mut() .map(|e| &mut e.scope) .expect("scope stack should not be empty"); - let effective_cardinality = pending - .cardinality - .multiply(state.current_array_cardinality()); - if let Some(tag) = current_variant { let variant_scope = current_scope.variants.entry(tag).or_default(); - variant_scope.add_field( - *name, - pending.base_type, - pending.shape, - effective_cardinality, - *span, - ); + variant_scope.add_field(name, pending.base_type, effective_card, *span); } else { - current_scope.add_field( - *name, - pending.base_type, - pending.shape, - effective_cardinality, - *span, - ); + current_scope.add_field(name, pending.base_type, effective_card, *span); } } } BuildEffect::StartArray => { - // Look up pre-computed region info for this StartArray node - let region_info = - self.array_regions - .get(&node_id) - .cloned() - .unwrap_or_else(|| ArrayRegionInfo { - captures: Vec::new(), - qis_triggered: false, - }); - - // If QIS triggered, mark these fields to skip during traversal - if region_info.qis_triggered { - for (name, _) in ®ion_info.captures { - state.skip_fields.insert(*name); - } - } - state.array_stack.push(ArrayFrame { - start_id: node_id, cardinality: Cardinality::Star, - region_info, + element_type: None, + start_node: Some(node_id), + push_called: false, }); } - BuildEffect::PushElement => {} - BuildEffect::EndArray => { - if let Some(array_frame) = state.array_stack.pop() { - let array_card = array_frame.cardinality; - let is_root_qis = self.root_qis_node == Some(array_frame.start_id); - - // Remove skip_fields for this array's captures - if array_frame.region_info.qis_triggered { - for (name, _) in &array_frame.region_info.captures { - state.skip_fields.remove(name); - } - - // Skip type creation for root-level QIS (handled in infer_definition) - if !is_root_qis { - // QIS: create element struct from pre-computed captures - let captures = &array_frame.region_info.captures; - if !captures.is_empty() { - let element_name = self.generate_qis_element_name(None); - let element_type_id = - self.create_qis_struct_type(element_name, captures); - let array_type_id = - self.wrap_with_cardinality(element_type_id, array_card); - - state.pending = Some(PendingType { - shape: TypeShape::Composite(array_type_id), - base_type: array_type_id, - cardinality: Cardinality::One, - }); - } + BuildEffect::PushElement => { + if let Some(pending) = state.pending.take() { + if let Some(frame) = state.array_stack.last_mut() { + frame.element_type = Some(pending.base_type); + frame.push_called = true; + // Update shared map so other branches (exit path) see the element type + if let Some(start_id) = frame.start_node { + self.array_element_types.insert(start_id, pending.base_type); } } - // Non-QIS arrays: fields were already added to parent scope - // with cardinality applied in the Field handler + } + } + BuildEffect::EndArray => { + if let Some(frame) = state.array_stack.pop() { + // Check if PushElement was actually called (either in this branch or another) + let push_was_called = frame.push_called + || frame + .start_node + .map_or(false, |id| self.array_element_types.contains_key(&id)); + + if push_was_called { + // Get element type from shared map (set by loop body's PushElement) + let element_type = frame + .start_node + .and_then(|id| self.array_element_types.get(&id).copied()) + .or(frame.element_type) + .unwrap_or(TYPE_NODE); + + let array_type = + self.wrap_with_cardinality(element_type, frame.cardinality); + state.pending = Some(PendingType { + base_type: array_type, + cardinality: Cardinality::One, + }); + } } } BuildEffect::StartObject => { + state.object_depth += 1; let entry = ScopeStackEntry::new_object(state.pending.take()); scope_stack.push(entry); } BuildEffect::EndObject => { + state.object_depth = state.object_depth.saturating_sub(1); if let Some(finished_entry) = scope_stack.pop() { if finished_entry.is_object { let finished_scope = finished_entry.scope; if !finished_scope.is_empty() { let type_name = self.generate_scope_name(); - let type_id = self.create_struct_type(type_name, &finished_scope); - - let field_types: Vec<(&'src str, TypeId)> = finished_scope - .fields - .iter() - .map(|(name, info)| (*name, info.base_type)) - .collect(); + let type_id = if finished_scope.has_variants + && !finished_scope.variants.is_empty() + { + self.create_enum_type(type_name, &finished_scope) + } else { + self.create_struct_type(type_name, &finished_scope) + }; state.pending = Some(PendingType { - shape: TypeShape::Composite(type_id), base_type: type_id, cardinality: Cardinality::One, }); - - if !field_types.is_empty() { - if let Some(ref mut p) = state.pending { - p.shape = TypeShape::Struct(field_types); - } - } } else { state.pending = finished_entry.outer_pending; } @@ -686,7 +556,9 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } } BuildEffect::StartVariant(tag) => { - state.current_variant = Some(*tag); + // SAFETY: tag comes from source with 'src lifetime + let tag: &'static str = unsafe { std::mem::transmute(*tag) }; + state.current_variant = Some(tag); let current_scope = scope_stack .last_mut() .map(|e| &mut e.scope) @@ -695,6 +567,8 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } BuildEffect::EndVariant => { if let Some(tag) = state.current_variant.take() { + // SAFETY: tag comes from source with 'src lifetime + let tag: &'src str = unsafe { std::mem::transmute(tag) }; let current_scope = scope_stack .last_mut() .map(|e| &mut e.scope) @@ -705,6 +579,7 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } } + // Process successors let live_successors: Vec<_> = node .successors .iter() @@ -713,9 +588,11 @@ impl<'src, 'g> InferenceContext<'src, 'g> { .collect(); if live_successors.is_empty() { - // Terminal node - } else if live_successors.len() == 1 { - self.traverse( + return state.pending; + } + + if live_successors.len() == 1 { + return self.traverse( live_successors[0], state, visited, @@ -723,40 +600,52 @@ impl<'src, 'g> InferenceContext<'src, 'g> { errors, scope_stack, ); - } else { - // Branching: collect results from all branches, then merge - let total_branches = live_successors.len(); - let initial_scope_len = scope_stack.len(); - let mut branch_scopes: Vec> = Vec::new(); - - for succ in &live_successors { - let mut branch_stack = scope_stack.clone(); - - self.traverse( - *succ, - state.clone(), - &mut visited.clone(), - depth + 1, - errors, - &mut branch_stack, - ); - - while branch_stack.len() > initial_scope_len { - branch_stack.pop(); - } - if let Some(entry) = branch_stack.last() { - branch_scopes.push(entry.scope.clone()); - } + } + + // Branching: explore all paths and merge results + // For loops (greedy quantifiers), the first branch is the loop body. + // We explore it first and propagate array element types to subsequent branches. + let total_branches = live_successors.len(); + let initial_scope_len = scope_stack.len(); + let mut branch_scopes: Vec> = Vec::new(); + let mut result_pending: Option = None; + + for succ in &live_successors { + let mut branch_stack = scope_stack.clone(); + let mut branch_visited = visited.clone(); + + let branch_pending = self.traverse( + *succ, + state.clone(), + &mut branch_visited, + depth + 1, + errors, + &mut branch_stack, + ); + + // Merge pending from branches (take first non-None) + if result_pending.is_none() { + result_pending = branch_pending; } - if let Some(main_entry) = scope_stack.last_mut() { - for branch_scope in branch_scopes { - let merge_errs = main_entry.scope.merge_from(branch_scope); - errors.extend(merge_errs); - } - main_entry.scope.apply_optionality(total_branches); + while branch_stack.len() > initial_scope_len { + branch_stack.pop(); + } + if let Some(entry) = branch_stack.last() { + branch_scopes.push(entry.scope.clone()); } } + + // Merge branch scopes into main scope + if let Some(main_entry) = scope_stack.last_mut() { + for branch_scope in branch_scopes { + let merge_errs = main_entry.scope.merge_from(branch_scope); + errors.extend(merge_errs); + } + main_entry.scope.apply_optionality(total_branches); + } + + result_pending } fn generate_scope_name(&self) -> &'src str { @@ -764,47 +653,6 @@ impl<'src, 'g> InferenceContext<'src, 'g> { Box::leak(name.into_boxed_str()) } - fn generate_qis_element_name(&self, capture_name: Option<&'src str>) -> &'src str { - let name = if let Some(cap) = capture_name { - // Explicit capture: {Def}{Capture} with PascalCase - let cap_pascal = to_pascal_case(cap); - format!("{}{}", self.current_def_name, cap_pascal) - } else if self.at_definition_root { - // At definition root: {Def}Item - format!("{}Item", self.current_def_name) - } else { - // Not at root and no capture - use synthetic name - format!("{}Item{}", self.current_def_name, self.next_type_id) - }; - Box::leak(name.into_boxed_str()) - } - - /// Create a struct type from QIS captures (all fields are Node type). - fn create_qis_struct_type( - &mut self, - name: &'src str, - captures: &[(&'src str, TextRange)], - ) -> TypeId { - let members: Vec<_> = captures - .iter() - .map(|(field_name, _span)| InferredMember { - name: field_name, - ty: TYPE_NODE, // QIS captures are always Node (could enhance later) - }) - .collect(); - - let type_id = self.alloc_type_id(); - - self.type_defs.push(InferredTypeDef { - kind: TypeKind::Record, - name: Some(name), - members, - inner_type: None, - }); - - type_id - } - fn create_struct_type(&mut self, name: &'src str, scope: &ScopeInfo<'src>) -> TypeId { let members: Vec<_> = scope .fields @@ -896,114 +744,11 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } // ───────────────────────────────────────────────────────────────────────────── -// Array region analysis for QIS detection +// Query integration // ───────────────────────────────────────────────────────────────────────────── -/// Pre-analyze all array regions to determine QIS triggering. -fn analyze_array_regions<'src>( - graph: &BuildGraph<'src>, - dead_nodes: &HashSet, -) -> HashMap> { - let mut regions = HashMap::new(); - - for (id, node) in graph.iter() { - if dead_nodes.contains(&id) { - continue; - } - let has_start_array = node - .effects - .iter() - .any(|e| matches!(e, BuildEffect::StartArray)); - if has_start_array { - let info = find_array_region_captures(graph, dead_nodes, id); - regions.insert(id, info); - } - } - - regions -} - -/// Find all captures within an array region (between StartArray and EndArray). -fn find_array_region_captures<'src>( - graph: &BuildGraph<'src>, - dead_nodes: &HashSet, - start_id: NodeId, -) -> ArrayRegionInfo<'src> { - let mut captures = Vec::new(); - let mut visited = HashSet::new(); - let mut stack = Vec::new(); - - // Start from successors of the StartArray node - let start_node = graph.node(start_id); - for &succ in &start_node.successors { - stack.push(succ); - } - - while let Some(id) = stack.pop() { - if dead_nodes.contains(&id) || !visited.insert(id) { - continue; - } - - let node = graph.node(id); - - // Check for EndArray - stop this path and record the ID - let has_end_array = node - .effects - .iter() - .any(|e| matches!(e, BuildEffect::EndArray)); - if has_end_array { - continue; - } - - // Check for nested StartArray - skip its contents - let has_start_array = node - .effects - .iter() - .any(|e| matches!(e, BuildEffect::StartArray)); - if has_start_array { - continue; - } - - // Collect Field captures with their spans - for effect in &node.effects { - if let BuildEffect::Field { name, span } = effect { - if !captures.iter().any(|(n, _)| n == name) { - captures.push((*name, *span)); - } - } - } - - // Continue to successors - for &succ in &node.successors { - stack.push(succ); - } - } - - let qis_triggered = captures.len() >= 2; - ArrayRegionInfo { - captures, - qis_triggered, - } -} - -fn to_pascal_case(s: &str) -> String { - let mut result = String::new(); - let mut capitalize_next = true; - for c in s.chars() { - if c == '_' { - capitalize_next = true; - } else if capitalize_next { - result.push(c.to_ascii_uppercase()); - capitalize_next = false; - } else { - result.push(c); - } - } - result -} - impl<'a> Query<'a> { - /// Run type inference on the graph. + /// Run type inference on the built graph. pub(super) fn infer_types(&mut self) { let mut ctx = InferenceContext::new(&self.graph, &self.dead_nodes); @@ -1019,7 +764,7 @@ impl<'a> Query<'a> { } // ───────────────────────────────────────────────────────────────────────────── -// Dump helpers +// Display and helpers // ───────────────────────────────────────────────────────────────────────────── impl TypeInferenceResult<'_> { @@ -1034,41 +779,59 @@ impl TypeInferenceResult<'_> { if !self.type_defs.is_empty() { out.push_str("\n=== Types ===\n"); for (idx, def) in self.type_defs.iter().enumerate() { - let type_id = idx as TypeId + 3; + let type_id = 3 + idx as TypeId; let name = def.name.unwrap_or(""); - out.push_str(&format!("T{}: {:?} {}", type_id, def.kind, name)); - - if let Some(inner) = def.inner_type { - out.push_str(&format!(" → {}", format_type_id(inner))); - } - - if !def.members.is_empty() { - out.push_str(" {\n"); - for member in &def.members { - out.push_str(&format!( - " {}: {}\n", - member.name, - format_type_id(member.ty) - )); + match def.kind { + TypeKind::Record => { + out.push_str(&format!("T{}: Record {} {{\n", type_id, name)); + for member in &def.members { + out.push_str(&format!( + " {}: {}\n", + member.name, + format_type_id(member.ty) + )); + } + out.push_str("}\n"); + } + TypeKind::Enum => { + out.push_str(&format!("T{}: Enum {} {{\n", type_id, name)); + for member in &def.members { + out.push_str(&format!( + " {}: {}\n", + member.name, + format_type_id(member.ty) + )); + } + out.push_str("}\n"); + } + TypeKind::Optional => { + let inner = def.inner_type.map(format_type_id).unwrap_or_default(); + out.push_str(&format!("T{}: Optional {} → {}\n", type_id, name, inner)); + } + TypeKind::ArrayStar => { + let inner = def.inner_type.map(format_type_id).unwrap_or_default(); + out.push_str(&format!("T{}: ArrayStar {} → {}\n", type_id, name, inner)); + } + TypeKind::ArrayPlus => { + let inner = def.inner_type.map(format_type_id).unwrap_or_default(); + out.push_str(&format!("T{}: ArrayPlus {} → {}\n", type_id, name, inner)); } - out.push('}'); } - out.push('\n'); } } if !self.errors.is_empty() { out.push_str("\n=== Errors ===\n"); for err in &self.errors { + let types = err + .types_found + .iter() + .map(|t| t.to_string()) + .collect::>() + .join(", "); out.push_str(&format!( "field `{}` in `{}`: incompatible types [{}]\n", - err.field, - err.definition, - err.types_found - .iter() - .map(|t| t.to_string()) - .collect::>() - .join(", ") + err.field, err.definition, types )); } } @@ -1081,18 +844,15 @@ impl TypeInferenceResult<'_> { } pub fn has_errors(&self) -> bool { - self.diagnostics.has_errors() + !self.errors.is_empty() } } fn format_type_id(id: TypeId) -> String { - if id == TYPE_VOID { - "Void".to_string() - } else if id == TYPE_NODE { - "Node".to_string() - } else if id == TYPE_STR { - "String".to_string() - } else { - format!("T{}", id) + match id { + TYPE_VOID => "Void".to_string(), + TYPE_NODE => "Node".to_string(), + TYPE_STR => "String".to_string(), + _ => format!("T{}", id), } } diff --git a/crates/plotnik-lib/src/query/typing_tests.rs b/crates/plotnik-lib/src/query/typing_tests.rs index ce89c0e6..87922f27 100644 --- a/crates/plotnik-lib/src/query/typing_tests.rs +++ b/crates/plotnik-lib/src/query/typing_tests.rs @@ -250,10 +250,10 @@ fn sequence_without_capture_propagates() { let result = infer(input); insta::assert_snapshot!(result, @r" === Entrypoints === - Foo → Void + Foo → T3 === Types === - T3: Record FooScope3 { + T3: Record Foo { x: Node y: Node } @@ -337,19 +337,27 @@ fn tagged_alternation_captured_creates_enum() { let result = infer(input); insta::assert_snapshot!(result, @r" === Entrypoints === - Foo → T5 + Foo → T7 === Types === - T3: Record FooA { + T3: Record FooScope3A { x: Node } - T4: Record FooB { + T4: Enum FooScope3 { + A: T3 + } + T5: Record FooScope5B { y: Node } - T5: Enum Foo { - A: T3 - B: T4 + T6: Enum FooScope5 { + B: T5 } + T7: Record Foo { + choice: T4 + } + + === Errors === + field `choice` in `Foo`: incompatible types [Node, Node] "); } @@ -365,12 +373,18 @@ fn captured_untagged_alternation_creates_struct() { Foo → T5 === Types === - T3: Optional → Node - T4: Optional → Node + T3: Record FooScope3 { + x: Node + } + T4: Record FooScope4 { + y: Node + } T5: Record Foo { - x: T3 - y: T4 + val: T3 } + + === Errors === + field `val` in `Foo`: incompatible types [Node, Node] "); } @@ -436,7 +450,7 @@ fn quantifier_on_sequence() { Foo → T4 === Types === - T3: Record FooItem { + T3: Record FooScope3 { x: Node y: Node } @@ -460,11 +474,11 @@ fn qis_single_capture_no_trigger() { let result = infer(input); insta::assert_snapshot!(result, @r" === Entrypoints === - Single → Void + Single → T4 === Types === T3: ArrayStar → Node - T4: Record SingleScope3 { + T4: Record Single { item: T3 } "); @@ -482,14 +496,16 @@ fn qis_alternation_in_sequence() { let result = infer(input); insta::assert_snapshot!(result, @r" === Entrypoints === - Foo → T4 + Foo → T5 === Types === - T3: Record FooItem { - y: Node + T3: Record FooScope3 { x: Node } - T4: ArrayStar → T3 + T4: Record FooScope4 { + y: Node + } + T5: ArrayStar → T4 "); } From 59103f3a5746702a87db074aae4b59a644800c7b Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 17:08:41 -0300 Subject: [PATCH 11/23] Fix epsilon --- crates/plotnik-lib/src/query/graph.rs | 34 +++++++---- crates/plotnik-lib/src/query/graph_build.rs | 51 ++++++++++++----- .../src/query/graph_build_tests.rs | 9 +-- crates/plotnik-lib/src/query/graph_dump.rs | 2 +- .../plotnik-lib/src/query/graph_optimize.rs | 7 +++ crates/plotnik-lib/src/query/typing.rs | 41 ++++++++++--- crates/plotnik-lib/src/query/typing_tests.rs | 7 ++- docs/adr/ADR-0007-type-metadata-format.md | 31 +++++----- docs/adr/ADR-0009-type-system.md | 57 ++++++++++++++++--- 9 files changed, 177 insertions(+), 62 deletions(-) diff --git a/crates/plotnik-lib/src/query/graph.rs b/crates/plotnik-lib/src/query/graph.rs index b2c8baaa..87a5dbf1 100644 --- a/crates/plotnik-lib/src/query/graph.rs +++ b/crates/plotnik-lib/src/query/graph.rs @@ -230,7 +230,8 @@ impl<'src> BuildGraph<'src> { /// Zero or more with array collection (greedy): inner* pub fn zero_or_more_array(&mut self, inner: Fragment) -> Fragment { let start = self.add_epsilon(); - self.node_mut(start).add_effect(BuildEffect::StartArray); + self.node_mut(start) + .add_effect(BuildEffect::StartArray { is_plus: false }); let branch = self.add_epsilon(); let push = self.add_epsilon(); @@ -251,7 +252,8 @@ impl<'src> BuildGraph<'src> { /// Zero or more with array collection (non-greedy): inner*? pub fn zero_or_more_array_lazy(&mut self, inner: Fragment) -> Fragment { let start = self.add_epsilon(); - self.node_mut(start).add_effect(BuildEffect::StartArray); + self.node_mut(start) + .add_effect(BuildEffect::StartArray { is_plus: false }); let branch = self.add_epsilon(); let push = self.add_epsilon(); @@ -272,7 +274,8 @@ impl<'src> BuildGraph<'src> { /// One or more with array collection (greedy): inner+ pub fn one_or_more_array(&mut self, inner: Fragment) -> Fragment { let start = self.add_epsilon(); - self.node_mut(start).add_effect(BuildEffect::StartArray); + self.node_mut(start) + .add_effect(BuildEffect::StartArray { is_plus: true }); let push = self.add_epsilon(); self.node_mut(push).add_effect(BuildEffect::PushElement); @@ -294,7 +297,8 @@ impl<'src> BuildGraph<'src> { /// One or more with array collection (non-greedy): inner+? pub fn one_or_more_array_lazy(&mut self, inner: Fragment) -> Fragment { let start = self.add_epsilon(); - self.node_mut(start).add_effect(BuildEffect::StartArray); + self.node_mut(start) + .add_effect(BuildEffect::StartArray { is_plus: true }); let push = self.add_epsilon(); self.node_mut(push).add_effect(BuildEffect::PushElement); @@ -323,7 +327,8 @@ impl<'src> BuildGraph<'src> { /// multiple captures coupled per-iteration. pub fn zero_or_more_array_qis(&mut self, inner: Fragment) -> Fragment { let start = self.add_epsilon(); - self.node_mut(start).add_effect(BuildEffect::StartArray); + self.node_mut(start) + .add_effect(BuildEffect::StartArray { is_plus: false }); let branch = self.add_epsilon(); @@ -354,7 +359,8 @@ impl<'src> BuildGraph<'src> { /// Zero or more with QIS object wrapping (non-greedy): inner*? pub fn zero_or_more_array_qis_lazy(&mut self, inner: Fragment) -> Fragment { let start = self.add_epsilon(); - self.node_mut(start).add_effect(BuildEffect::StartArray); + self.node_mut(start) + .add_effect(BuildEffect::StartArray { is_plus: false }); let branch = self.add_epsilon(); @@ -385,7 +391,8 @@ impl<'src> BuildGraph<'src> { /// One or more with QIS object wrapping (greedy): inner+ pub fn one_or_more_array_qis(&mut self, inner: Fragment) -> Fragment { let start = self.add_epsilon(); - self.node_mut(start).add_effect(BuildEffect::StartArray); + self.node_mut(start) + .add_effect(BuildEffect::StartArray { is_plus: true }); let obj_start = self.add_epsilon(); self.node_mut(obj_start) @@ -416,7 +423,8 @@ impl<'src> BuildGraph<'src> { /// One or more with QIS object wrapping (non-greedy): inner+? pub fn one_or_more_array_qis_lazy(&mut self, inner: Fragment) -> Fragment { let start = self.add_epsilon(); - self.node_mut(start).add_effect(BuildEffect::StartArray); + self.node_mut(start) + .add_effect(BuildEffect::StartArray { is_plus: true }); let obj_start = self.add_epsilon(); self.node_mut(obj_start) @@ -608,12 +616,18 @@ impl<'src> BuildMatcher<'src> { #[derive(Debug, Clone, PartialEq, Eq)] pub enum BuildEffect<'src> { CaptureNode, - StartArray, + /// Start array collection. `is_plus` distinguishes `+` (true) from `*` (false). + StartArray { + is_plus: bool, + }, PushElement, EndArray, StartObject, EndObject, - Field { name: &'src str, span: TextRange }, + Field { + name: &'src str, + span: TextRange, + }, StartVariant(&'src str), EndVariant, ToString, diff --git a/crates/plotnik-lib/src/query/graph_build.rs b/crates/plotnik-lib/src/query/graph_build.rs index 679886e6..32a2bd1b 100644 --- a/crates/plotnik-lib/src/query/graph_build.rs +++ b/crates/plotnik-lib/src/query/graph_build.rs @@ -15,28 +15,46 @@ use super::Query; use super::graph::{BuildEffect, BuildMatcher, Fragment, NodeId, RefMarker}; /// Context for navigation determination. +/// When `anchored` is true, `prev_anonymous` indicates whether the preceding +/// expression was anonymous (string literal), which determines Exact vs SkipTrivia mode. #[derive(Debug, Clone, Copy)] enum NavContext { Root, - FirstChild { anchored: bool }, - Sibling { anchored: bool }, + FirstChild { + anchored: bool, + prev_anonymous: bool, + }, + Sibling { + anchored: bool, + prev_anonymous: bool, + }, } impl NavContext { - fn to_nav(self, is_anonymous: bool) -> Nav { + fn to_nav(self) -> Nav { match self { NavContext::Root => Nav::stay(), - NavContext::FirstChild { anchored: false } => Nav::down(), - NavContext::FirstChild { anchored: true } => { - if is_anonymous { + NavContext::FirstChild { + anchored: false, .. + } => Nav::down(), + NavContext::FirstChild { + anchored: true, + prev_anonymous, + } => { + if prev_anonymous { Nav::down_exact() } else { Nav::down_skip_trivia() } } - NavContext::Sibling { anchored: false } => Nav::next(), - NavContext::Sibling { anchored: true } => { - if is_anonymous { + NavContext::Sibling { + anchored: false, .. + } => Nav::next(), + NavContext::Sibling { + anchored: true, + prev_anonymous, + } => { + if prev_anonymous { Nav::next_exact() } else { Nav::next_skip_trivia() @@ -140,7 +158,7 @@ impl<'a> Query<'a> { fn construct_named_node(&mut self, node: &NamedNode, ctx: NavContext) -> Fragment { let matcher = self.build_named_matcher(node); - let nav = ctx.to_nav(false); + let nav = ctx.to_nav(); let node_id = self.graph.add_matcher(matcher); self.graph.node_mut(node_id).set_nav(nav); @@ -185,15 +203,18 @@ impl<'a> Query<'a> { if is_children { NavContext::FirstChild { anchored: pending_anchor, + prev_anonymous: last_was_anonymous, } } else { NavContext::Sibling { anchored: pending_anchor, + prev_anonymous: last_was_anonymous, } } } else { NavContext::Sibling { anchored: pending_anchor, + prev_anonymous: last_was_anonymous, } }; @@ -242,7 +263,7 @@ impl<'a> Query<'a> { fn construct_anonymous_node(&mut self, node: &AnonymousNode, ctx: NavContext) -> Fragment { let field = self.find_field_constraint(node.as_cst()); - let nav = ctx.to_nav(true); + let nav = ctx.to_nav(); let matcher = if node.is_any() { BuildMatcher::Wildcard { field } @@ -268,7 +289,7 @@ impl<'a> Query<'a> { self.next_ref_id += 1; let enter_id = self.graph.add_epsilon(); - let nav = ctx.to_nav(false); + let nav = ctx.to_nav(); self.graph.node_mut(enter_id).set_nav(nav); self.graph .node_mut(enter_id) @@ -299,7 +320,7 @@ impl<'a> Query<'a> { } let branch_id = self.graph.add_epsilon(); - self.graph.node_mut(branch_id).set_nav(ctx.to_nav(false)); + self.graph.node_mut(branch_id).set_nav(ctx.to_nav()); let exit_id = self.graph.add_epsilon(); @@ -351,7 +372,7 @@ impl<'a> Query<'a> { } let branch_id = self.graph.add_epsilon(); - self.graph.node_mut(branch_id).set_nav(ctx.to_nav(false)); + self.graph.node_mut(branch_id).set_nav(ctx.to_nav()); let exit_id = self.graph.add_epsilon(); @@ -373,7 +394,7 @@ impl<'a> Query<'a> { // - QIS quantifiers that wrap loop body with StartObject/EndObject let start_id = self.graph.add_epsilon(); - self.graph.node_mut(start_id).set_nav(ctx.to_nav(false)); + self.graph.node_mut(start_id).set_nav(ctx.to_nav()); let (child_fragments, _exit_ctx) = self.construct_item_sequence(&items, false); let inner = self.graph.sequence(&child_fragments); diff --git a/crates/plotnik-lib/src/query/graph_build_tests.rs b/crates/plotnik-lib/src/query/graph_build_tests.rs index 33873e9c..f762a9d0 100644 --- a/crates/plotnik-lib/src/query/graph_build_tests.rs +++ b/crates/plotnik-lib/src/query/graph_build_tests.rs @@ -61,9 +61,9 @@ fn sequence_with_captures() { Q = N1 N0: ε → N1 - N1: [Next] (a) [Capture] → N3 + N1: [Next] (a) [Capture] → N2 N2: ε [Field(x)] → N3 - N3: [Next] (b) [Field(x)] [Capture] → N4 + N3: [Next] (b) [Capture] → N4 N4: ε [Field(y)] → ∅ "); } @@ -232,8 +232,9 @@ fn optimized_sequence() { insta::assert_snapshot!(snapshot_optimized("Q = { (a) @x (b) @y }"), @r" Q = N1 - N1: [Next] (a) [Capture] → N3 - N3: [Next] (b) [Field(x)] [Capture] → N4 + N1: [Next] (a) [Capture] → N2 + N2: ε [Field(x)] → N3 + N3: [Next] (b) [Capture] → N4 N4: ε [Field(y)] → ∅ "); } diff --git a/crates/plotnik-lib/src/query/graph_dump.rs b/crates/plotnik-lib/src/query/graph_dump.rs index ed2868c6..363cdef3 100644 --- a/crates/plotnik-lib/src/query/graph_dump.rs +++ b/crates/plotnik-lib/src/query/graph_dump.rs @@ -157,7 +157,7 @@ fn format_nav(nav: &Nav) -> String { fn format_effect(effect: &BuildEffect) -> String { match effect { BuildEffect::CaptureNode => "Capture".to_string(), - BuildEffect::StartArray => "StartArray".to_string(), + BuildEffect::StartArray { .. } => "StartArray".to_string(), BuildEffect::PushElement => "Push".to_string(), BuildEffect::EndArray => "EndArray".to_string(), BuildEffect::StartObject => "StartObj".to_string(), diff --git a/crates/plotnik-lib/src/query/graph_optimize.rs b/crates/plotnik-lib/src/query/graph_optimize.rs index 0fedf888..482a306e 100644 --- a/crates/plotnik-lib/src/query/graph_optimize.rs +++ b/crates/plotnik-lib/src/query/graph_optimize.rs @@ -147,6 +147,13 @@ fn is_eliminable_epsilon( return false; } + // Don't eliminate if epsilon has effects and successor has navigation. + // Effects must execute BEFORE successor's nav/match, but prepending to effects list + // would execute them AFTER nav/match. + if !node.effects.is_empty() && !successor.nav.is_stay() { + return false; + } + // Don't eliminate if node has effects and successor is a join point. // Merging effects onto a join point changes execution count (e.g., loop entry vs per-iteration). if !node.effects.is_empty() { diff --git a/crates/plotnik-lib/src/query/typing.rs b/crates/plotnik-lib/src/query/typing.rs index a797009e..d5ff3672 100644 --- a/crates/plotnik-lib/src/query/typing.rs +++ b/crates/plotnik-lib/src/query/typing.rs @@ -136,6 +136,7 @@ struct FieldInfo { cardinality: Cardinality, branch_count: usize, spans: Vec, + is_array_type: bool, } #[derive(Debug, Clone, Default)] @@ -152,12 +153,14 @@ impl<'src> ScopeInfo<'src> { base_type: TypeId, cardinality: Cardinality, span: TextRange, + is_array_type: bool, ) { let shape = TypeShape::Primitive(base_type); if let Some(existing) = self.fields.get_mut(name) { existing.cardinality = existing.cardinality.join(cardinality); existing.branch_count += 1; existing.spans.push(span); + existing.is_array_type = existing.is_array_type || is_array_type; } else { self.fields.insert( name, @@ -167,6 +170,7 @@ impl<'src> ScopeInfo<'src> { cardinality, branch_count: 1, spans: vec![span], + is_array_type, }, ); } @@ -212,7 +216,9 @@ impl<'src> ScopeInfo<'src> { fn apply_optionality(&mut self, total_branches: usize) { for info in self.fields.values_mut() { - if info.branch_count < total_branches { + // Skip optionality for array-typed fields: arrays already encode + // zero-or-more semantics, so Optional wrapper would be redundant + if info.branch_count < total_branches && !info.is_array_type { info.cardinality = info.cardinality.make_optional(); } } @@ -263,13 +269,15 @@ impl<'src> ScopeStackEntry<'src> { struct PendingType { base_type: TypeId, cardinality: Cardinality, + is_array: bool, } impl PendingType { - fn primitive(type_id: TypeId) -> Self { + fn primitive(base_type: TypeId) -> Self { Self { - base_type: type_id, + base_type, cardinality: Cardinality::One, + is_array: false, } } } @@ -471,15 +479,32 @@ impl<'src, 'g> InferenceContext<'src, 'g> { if let Some(tag) = current_variant { let variant_scope = current_scope.variants.entry(tag).or_default(); - variant_scope.add_field(name, pending.base_type, effective_card, *span); + variant_scope.add_field( + name, + pending.base_type, + effective_card, + *span, + pending.is_array, + ); } else { - current_scope.add_field(name, pending.base_type, effective_card, *span); + current_scope.add_field( + name, + pending.base_type, + effective_card, + *span, + pending.is_array, + ); } } } - BuildEffect::StartArray => { + BuildEffect::StartArray { is_plus } => { + let cardinality = if *is_plus { + Cardinality::Plus + } else { + Cardinality::Star + }; state.array_stack.push(ArrayFrame { - cardinality: Cardinality::Star, + cardinality, element_type: None, start_node: Some(node_id), push_called: false, @@ -518,6 +543,7 @@ impl<'src, 'g> InferenceContext<'src, 'g> { state.pending = Some(PendingType { base_type: array_type, cardinality: Cardinality::One, + is_array: true, }); } } @@ -546,6 +572,7 @@ impl<'src, 'g> InferenceContext<'src, 'g> { state.pending = Some(PendingType { base_type: type_id, cardinality: Cardinality::One, + is_array: false, }); } else { state.pending = finished_entry.outer_pending; diff --git a/crates/plotnik-lib/src/query/typing_tests.rs b/crates/plotnik-lib/src/query/typing_tests.rs index 87922f27..46be5dd2 100644 --- a/crates/plotnik-lib/src/query/typing_tests.rs +++ b/crates/plotnik-lib/src/query/typing_tests.rs @@ -53,10 +53,11 @@ fn debug_star_quantifier_graph() { Foo = N4 N0: (_) → N1 - N1: [Down] (item) [Capture] → N6 + N1: [Down] (item) [Capture] → N2 + N2: ε [Field(items)] → N6 N4: ε [StartArray] → N5 N5: ε → N0, N7 - N6: [Up(1)] ε [Field(items)] [Push] → N5 + N6: [Up(1)] ε [Push] → N5 N7: ε [EndArray] → ∅ === Entrypoints === @@ -415,7 +416,7 @@ fn plus_quantifier() { Foo → T4 === Types === - T3: ArrayStar → Node + T3: ArrayPlus → Node T4: Record Foo { items: T3 } diff --git a/docs/adr/ADR-0007-type-metadata-format.md b/docs/adr/ADR-0007-type-metadata-format.md index 353cd69b..8d8f3dbd 100644 --- a/docs/adr/ADR-0007-type-metadata-format.md +++ b/docs/adr/ADR-0007-type-metadata-format.md @@ -114,6 +114,18 @@ When no explicit `:: TypeName` annotation exists, names are synthesized: Collisions resolved by numeric suffix: `FuncBody`, `FuncBody2`, etc. +### Single-Capture Variant Flattening + +When an enum variant's branch has exactly one capture, the variant payload flattens to that capture's type directly—no wrapper struct. + +| Branch Captures | Variant Payload | +| --------------- | -------------------------- | +| 0 | Unit (Void) | +| 1 | Capture's type (flattened) | +| ≥2 | Struct with named fields | + +Rationale: The variant tag already discriminates; a single-field wrapper struct adds verbosity without information. + ### Example Query: @@ -131,10 +143,8 @@ Func = (function_declaration Type graph: ``` -T3: Struct "Func" → [name: Str, body: T4] -T4: Enum "FuncBody" → [Stmt: T5, Expr: T6] -T5: Struct "FuncBodyStmt" → [stmt: Node] -T6: Struct "FuncBodyExpr" → [expr: Node] +T3: Struct "Func" → [name: Str, body: T4] +T4: Enum "FuncBody" → [Stmt: Node, Expr: Node] // flattened: 1 capture per branch Entrypoint: Func → result_type: T3 ``` @@ -144,23 +154,16 @@ Generated TypeScript: ```typescript interface Func { name: string; - body: - | { $tag: "Stmt"; $data: { stmt: Node } } - | { $tag: "Expr"; $data: { expr: Node } }; + body: { $tag: "Stmt"; $data: Node } | { $tag: "Expr"; $data: Node }; } ``` Generated Rust: ```rust -struct Func { - name: String, - body: FuncBody, -} - enum FuncBody { - Stmt { stmt: Node }, - Expr { expr: Node }, + Stmt(Node), + Expr(Node), } ``` diff --git a/docs/adr/ADR-0009-type-system.md b/docs/adr/ADR-0009-type-system.md index afa1694e..e70db080 100644 --- a/docs/adr/ADR-0009-type-system.md +++ b/docs/adr/ADR-0009-type-system.md @@ -150,7 +150,7 @@ Bar = [ (a) @v (b) @v ] @z #### Case 3: Captured Tagged Alternation -Creates an Enum. Each variant has its own independent scope. +Creates an Enum. Each variant has its own independent scope, subject to **Single-Capture Variant Flattening** (see below). ```plotnik Foo = [ A: (a) @x B: (b) @y ] @z @@ -159,14 +159,56 @@ Foo = [ A: (a) @x B: (b) @y ] @z - `@z` creates an Enum because tags are present AND alternation is captured - Variant `A` has scope with `@x: Node` - Variant `B` has scope with `@y: Node` +- Both variants have exactly 1 capture → flattened - Result: `Foo { z: FooZ }` where `FooZ` is: ``` - Enum FooZ { - A: FooZA { x: Node } - B: FooZB { y: Node } - } + Enum FooZ { A(Node), B(Node) } ``` +#### Single-Capture Variant Flattening + +When a tagged alternation variant has exactly one capture, the wrapper struct is eliminated—the variant payload becomes the capture's type directly. + +| Branch Captures | Variant Payload | Rust Syntax | +| --------------- | --------------------- | ------------------ | +| 0 | Unit (Void) | `A` | +| 1 | Capture's type (flat) | `A(T)` | +| ≥2 | Struct (named fields) | `A { x: T, y: U }` | + +**Rationale**: The field name is redundant when it's the only capture—the variant tag already provides discrimination. This produces idiomatic types matching `Option`, `Result`. + +**Formalization**: + +``` +VariantPayload(branch) = + let captures = propagating_captures(branch) + match captures.len(): + 0 → Void + 1 → captures[0].type // flatten: discard field name + _ → Struct(captures) // preserve field names +``` + +**Examples**: + +```plotnik +// Single capture per branch → flatten +Foo = [ A: (a) @x B: (b) @y ] @z +// → Enum FooZ { A(Node), B(Node) } + +// Mixed: one branch single, other multi → partial flatten +Bar = [ A: (a) @x B: (b) @y (c) @z ] @result +// → Enum BarResult { A(Node), B { y: Node, z: Node } } + +// Single capture with type annotation → flatten preserves type +Baz = [ Ok: (val) @v Err: (msg) @e ::string ] @result +// → Enum BazResult { Ok(Node), Err(String) } + +// Single capture of nested struct → flatten to that struct +Qux = [ A: { (x) @x (y) @y } @data B: (b) @b ] @choice +// → Enum QuxChoice { A(QuxChoiceData), B(Node) } +// → QuxChoiceData = { x: Node, y: Node } +``` + ### Unification Rules (1-Level Merge) When merging captures across untagged alternation branches, we apply **1-level merge semantics**. This balances flexibility with type safety: top-level fields merge with optionality, but nested struct mismatches are errors. @@ -438,10 +480,9 @@ Result = [ ``` - Tagged alternation at definition root → `Result` is an Enum +- Each variant has exactly 1 capture → flattened (no wrapper structs) - Types: - - `Result: Enum { Ok: ResultOk, Err: ResultErr }` - - `ResultOk: { val: Node }` - - `ResultErr: { msg: String }` + - `Result: Enum { Ok(Node), Err(String) }` ### Example 4: Tagged Alternation (Inline, Uncaptured) From db1e9fc8bddb4c3f456ec6763fe20b3efdf6ca7c Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 17:16:45 -0300 Subject: [PATCH 12/23] Single-Capture Variant Not Flattened --- crates/plotnik-lib/src/query/graph_build.rs | 85 +++++++++++++++++++ .../src/query/graph_build_tests.rs | 8 +- crates/plotnik-lib/src/query/typing.rs | 26 +++++- crates/plotnik-lib/src/query/typing_tests.rs | 34 +++----- 4 files changed, 124 insertions(+), 29 deletions(-) diff --git a/crates/plotnik-lib/src/query/graph_build.rs b/crates/plotnik-lib/src/query/graph_build.rs index 32a2bd1b..496574ab 100644 --- a/crates/plotnik-lib/src/query/graph_build.rs +++ b/crates/plotnik-lib/src/query/graph_build.rs @@ -353,7 +353,25 @@ impl<'a> Query<'a> { let body_frag = self.construct_expr(&body, NavContext::Root); + // Count Field effects to determine flattening (ADR-0007) + let field_count = self.count_field_effects(body_frag.entry); + + if field_count == 1 { + // Single capture: flatten by removing the Field effect + self.remove_field_effects(body_frag.entry); + } else if field_count > 1 { + // Multiple captures: wrap with StartObject/EndObject + self.graph + .node_mut(start_id) + .add_effect(BuildEffect::StartObject); + } + let end_id = self.graph.add_epsilon(); + if field_count > 1 { + self.graph + .node_mut(end_id) + .add_effect(BuildEffect::EndObject); + } self.graph .node_mut(end_id) .add_effect(BuildEffect::EndVariant); @@ -549,6 +567,73 @@ impl<'a> Query<'a> { self.collect_matchers(succ, result, visited); } } + /// Count Field effects reachable from a node (for variant flattening). + fn count_field_effects(&self, start: NodeId) -> usize { + let mut count = 0; + let mut visited = HashSet::new(); + self.count_field_effects_recursive(start, &mut count, &mut visited); + count + } + + fn count_field_effects_recursive( + &self, + node_id: NodeId, + count: &mut usize, + visited: &mut HashSet, + ) { + if !visited.insert(node_id) { + return; + } + + let node = self.graph.node(node_id); + for effect in &node.effects { + if matches!(effect, BuildEffect::Field { .. }) { + *count += 1; + } + } + + for &succ in &node.successors { + self.count_field_effects_recursive(succ, count, visited); + } + } + + /// Remove all Field effects reachable from a node (for single-capture variant flattening). + fn remove_field_effects(&mut self, start: NodeId) { + let mut visited = HashSet::new(); + let mut to_clean = Vec::new(); + self.collect_nodes_with_field_effects(start, &mut to_clean, &mut visited); + + for node_id in to_clean { + self.graph + .node_mut(node_id) + .effects + .retain(|e| !matches!(e, BuildEffect::Field { .. })); + } + } + + fn collect_nodes_with_field_effects( + &self, + node_id: NodeId, + result: &mut Vec, + visited: &mut HashSet, + ) { + if !visited.insert(node_id) { + return; + } + + let node = self.graph.node(node_id); + if node + .effects + .iter() + .any(|e| matches!(e, BuildEffect::Field { .. })) + { + result.push(node_id); + } + + for &succ in &node.successors { + self.collect_nodes_with_field_effects(succ, result, visited); + } + } } fn is_anonymous_expr(expr: &Expr) -> bool { diff --git a/crates/plotnik-lib/src/query/graph_build_tests.rs b/crates/plotnik-lib/src/query/graph_build_tests.rs index f762a9d0..77f15639 100644 --- a/crates/plotnik-lib/src/query/graph_build_tests.rs +++ b/crates/plotnik-lib/src/query/graph_build_tests.rs @@ -89,12 +89,12 @@ fn alternation_tagged() { N1: ε → ∅ N2: ε [Variant(A)] → N3 N3: (a) [Variant(A)] [Capture] → N5 - N4: ε [Field(x)] → N5 - N5: ε [Field(x)] [EndVariant] → N1 + N4: ε → N5 + N5: ε [EndVariant] → N1 N6: ε [Variant(B)] → N7 N7: (b) [Variant(B)] [Capture] → N9 - N8: ε [Field(y)] → N9 - N9: ε [Field(y)] [EndVariant] → N1 + N8: ε → N9 + N9: ε [EndVariant] → N1 "); } diff --git a/crates/plotnik-lib/src/query/typing.rs b/crates/plotnik-lib/src/query/typing.rs index d5ff3672..2e43e82f 100644 --- a/crates/plotnik-lib/src/query/typing.rs +++ b/crates/plotnik-lib/src/query/typing.rs @@ -477,7 +477,9 @@ impl<'src, 'g> InferenceContext<'src, 'g> { .map(|e| &mut e.scope) .expect("scope stack should not be empty"); - if let Some(tag) = current_variant { + // When inside an object scope (object_depth > 0), fields go to the + // object, not to a variant scope. The object becomes the variant payload. + if let Some(tag) = current_variant.filter(|_| state.object_depth == 0) { let variant_scope = current_scope.variants.entry(tag).or_default(); variant_scope.add_field( name, @@ -600,7 +602,23 @@ impl<'src, 'g> InferenceContext<'src, 'g> { .last_mut() .map(|e| &mut e.scope) .expect("scope stack should not be empty"); - current_scope.variants.entry(tag).or_default(); + + let variant_scope = current_scope.variants.entry(tag).or_default(); + + // Single-capture flattening (ADR-0007): if there's a pending capture + // but no fields were added (Field effect was removed), store the + // captured type directly as a synthetic field for flattening. + if variant_scope.fields.is_empty() { + if let Some(pending) = state.pending.take() { + variant_scope.add_field( + "$value", // synthetic name, will be flattened away + pending.base_type, + pending.cardinality, + rowan::TextRange::default(), + pending.is_array, + ); + } + } } } } @@ -710,6 +728,10 @@ impl<'src, 'g> InferenceContext<'src, 'g> { for (tag, variant_scope) in &scope.variants { let variant_type = if variant_scope.fields.is_empty() { TYPE_VOID + } else if variant_scope.fields.len() == 1 { + // Single-capture variant: flatten to capture's type directly (ADR-0007) + let (_, info) = variant_scope.fields.iter().next().unwrap(); + self.wrap_with_cardinality(info.base_type, info.cardinality) } else { let variant_name = format!("{}{}", name, tag); let leaked: &'src str = Box::leak(variant_name.into_boxed_str()); diff --git a/crates/plotnik-lib/src/query/typing_tests.rs b/crates/plotnik-lib/src/query/typing_tests.rs index 46be5dd2..b7d6d927 100644 --- a/crates/plotnik-lib/src/query/typing_tests.rs +++ b/crates/plotnik-lib/src/query/typing_tests.rs @@ -313,18 +313,12 @@ fn tagged_alternation_uncaptured_propagates() { let result = infer(input); insta::assert_snapshot!(result, @r" === Entrypoints === - Foo → T5 + Foo → T3 === Types === - T3: Record FooA { - x: Node - } - T4: Record FooB { - y: Node - } - T5: Enum Foo { - A: T3 - B: T4 + T3: Enum Foo { + A: Node + B: Node } "); } @@ -338,23 +332,17 @@ fn tagged_alternation_captured_creates_enum() { let result = infer(input); insta::assert_snapshot!(result, @r" === Entrypoints === - Foo → T7 + Foo → T5 === Types === - T3: Record FooScope3A { - x: Node - } - T4: Enum FooScope3 { - A: T3 + T3: Enum FooScope3 { + A: Node } - T5: Record FooScope5B { - y: Node - } - T6: Enum FooScope5 { - B: T5 + T4: Enum FooScope4 { + B: Node } - T7: Record Foo { - choice: T4 + T5: Record Foo { + choice: T3 } === Errors === From 8e36ab9a5cb832c188625932fb4984d75ce67ab9 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 17:23:59 -0300 Subject: [PATCH 13/23] Ref opacity --- crates/plotnik-lib/src/query/graph_build.rs | 8 +++++++ crates/plotnik-lib/src/query/typing.rs | 10 ++++++++- docs/adr/ADR-0009-type-system.md | 23 +++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/crates/plotnik-lib/src/query/graph_build.rs b/crates/plotnik-lib/src/query/graph_build.rs index 496574ab..0a0deec9 100644 --- a/crates/plotnik-lib/src/query/graph_build.rs +++ b/crates/plotnik-lib/src/query/graph_build.rs @@ -558,6 +558,14 @@ impl<'a> Query<'a> { } let node = self.graph.node(node_id); + + // References are opaque to captures: don't traverse into definition body. + // Treat the Enter node itself as the capture point. + if matches!(node.ref_marker, RefMarker::Enter { .. }) { + result.push(node_id); + return; + } + if !node.is_epsilon() { result.push(node_id); return; diff --git a/crates/plotnik-lib/src/query/typing.rs b/crates/plotnik-lib/src/query/typing.rs index 2e43e82f..bec4228d 100644 --- a/crates/plotnik-lib/src/query/typing.rs +++ b/crates/plotnik-lib/src/query/typing.rs @@ -19,7 +19,7 @@ use crate::diagnostics::{DiagnosticKind, Diagnostics}; use crate::ir::{TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId, TypeKind}; use super::Query; -use super::graph::{BuildEffect, BuildGraph, NodeId}; +use super::graph::{BuildEffect, BuildGraph, NodeId, RefMarker}; /// Result of type inference. #[derive(Debug, Default)] @@ -625,10 +625,18 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } // Process successors + // References are opaque: when entering a reference, skip the definition body + // and only follow return transitions (successors that aren't the def entry) + let def_entry_to_skip: Option = match &node.ref_marker { + RefMarker::Enter { .. } => node.ref_name.and_then(|name| self.graph.definition(name)), + _ => None, + }; + let live_successors: Vec<_> = node .successors .iter() .filter(|s| !self.dead_nodes.contains(s)) + .filter(|s| def_entry_to_skip.map_or(true, |def| **s != def)) .copied() .collect(); diff --git a/docs/adr/ADR-0009-type-system.md b/docs/adr/ADR-0009-type-system.md index e70db080..521be2a3 100644 --- a/docs/adr/ADR-0009-type-system.md +++ b/docs/adr/ADR-0009-type-system.md @@ -87,6 +87,29 @@ Result type: `Struct { val: Node }` — the `(a ...)` and `(b ...)` wrappers con Only explicit scope markers (`{...} @x`, `[...] @x` with tags) introduce nesting in the output type. +### Reference Opacity + +References are opaque to captures: calling `(Foo)` does NOT inherit captures from `Foo`. + +```plotnik +A = (identifier) @name +B = (A) +C = (A) @node +``` + +Types: + +- `A { name: Node }` — has the capture +- `B {}` (Void) — calling A produces no fields in B +- `C { node: Node }` — captures the reference itself, not A's internals + +To access A's captures, you must either: + +1. Inline A's pattern into B +2. Capture the reference: `(A) @a` yields `{ a: A }` where `a` has type `A` + +This matches runtime semantics ([ADR-0006](ADR-0006-dynamic-query-execution.md)): Enter pushes a frame and jumps to the definition; Exit pops and returns. The caller only sees what it explicitly captures. + ### Type Inference for Captures | Pattern | Inferred Type | From 6d2bf1cad878654dcb5b83b25762b0acbb381fd9 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 17:46:28 -0300 Subject: [PATCH 14/23] Fixes --- crates/plotnik-lib/src/ir/effect.rs | 4 ++++ crates/plotnik-lib/src/query/graph.rs | 12 ++++++++++-- crates/plotnik-lib/src/query/graph_build.rs | 13 ++++++++++--- crates/plotnik-lib/src/query/graph_build_tests.rs | 5 +++-- crates/plotnik-lib/src/query/graph_dump.rs | 1 + crates/plotnik-lib/src/query/typing.rs | 3 +++ docs/adr/ADR-0006-dynamic-query-execution.md | 3 +++ 7 files changed, 34 insertions(+), 7 deletions(-) diff --git a/crates/plotnik-lib/src/ir/effect.rs b/crates/plotnik-lib/src/ir/effect.rs index dd6b6565..ff6d39bf 100644 --- a/crates/plotnik-lib/src/ir/effect.rs +++ b/crates/plotnik-lib/src/ir/effect.rs @@ -16,6 +16,10 @@ pub enum EffectOp { /// Only valid on transitions with Node/Anonymous/Wildcard matcher. CaptureNode, + /// Clear current value (set to None). + /// Used on skip paths for optional captures. + ClearCurrent, + /// Push empty array onto stack. StartArray, diff --git a/crates/plotnik-lib/src/query/graph.rs b/crates/plotnik-lib/src/query/graph.rs index 87a5dbf1..cea4f8ac 100644 --- a/crates/plotnik-lib/src/query/graph.rs +++ b/crates/plotnik-lib/src/query/graph.rs @@ -202,10 +202,13 @@ impl<'src> BuildGraph<'src> { /// Optional (greedy): inner? pub fn optional(&mut self, inner: Fragment) -> Fragment { let branch = self.add_epsilon(); + let skip = self.add_epsilon(); + self.node_mut(skip).add_effect(BuildEffect::ClearCurrent); let exit = self.add_epsilon(); self.connect(branch, inner.entry); - self.connect(branch, exit); + self.connect(branch, skip); + self.connect(skip, exit); self.connect(inner.exit, exit); Fragment::new(branch, exit) @@ -214,9 +217,12 @@ impl<'src> BuildGraph<'src> { /// Optional (non-greedy): inner?? pub fn optional_lazy(&mut self, inner: Fragment) -> Fragment { let branch = self.add_epsilon(); + let skip = self.add_epsilon(); + self.node_mut(skip).add_effect(BuildEffect::ClearCurrent); let exit = self.add_epsilon(); - self.connect(branch, exit); + self.connect(branch, skip); + self.connect(skip, exit); self.connect(branch, inner.entry); self.connect(inner.exit, exit); @@ -616,6 +622,8 @@ impl<'src> BuildMatcher<'src> { #[derive(Debug, Clone, PartialEq, Eq)] pub enum BuildEffect<'src> { CaptureNode, + /// Clear current value (set to None). Used on skip paths for optional captures. + ClearCurrent, /// Start array collection. `is_plus` distinguishes `+` (true) from `*` (false). StartArray { is_plus: bool, diff --git a/crates/plotnik-lib/src/query/graph_build.rs b/crates/plotnik-lib/src/query/graph_build.rs index 0a0deec9..a2484776 100644 --- a/crates/plotnik-lib/src/query/graph_build.rs +++ b/crates/plotnik-lib/src/query/graph_build.rs @@ -560,9 +560,16 @@ impl<'a> Query<'a> { let node = self.graph.node(node_id); // References are opaque to captures: don't traverse into definition body. - // Treat the Enter node itself as the capture point. - if matches!(node.ref_marker, RefMarker::Enter { .. }) { - result.push(node_id); + // Capture should happen at Exit (after reference executes, cursor at matched node). + if let RefMarker::Enter { ref_id } = node.ref_marker { + for (id, n) in self.graph.iter() { + if let RefMarker::Exit { ref_id: exit_id } = n.ref_marker { + if exit_id == ref_id { + result.push(id); + return; + } + } + } return; } diff --git a/crates/plotnik-lib/src/query/graph_build_tests.rs b/crates/plotnik-lib/src/query/graph_build_tests.rs index 77f15639..87290264 100644 --- a/crates/plotnik-lib/src/query/graph_build_tests.rs +++ b/crates/plotnik-lib/src/query/graph_build_tests.rs @@ -129,9 +129,10 @@ fn quantifier_optional() { insta::assert_snapshot!(snapshot("Q = (identifier)?"), @r" Q = N1 - N0: (identifier) → N2 + N0: (identifier) → N3 N1: ε → N0, N2 - N2: ε → ∅ + N2: ε [Clear] → N3 + N3: ε → ∅ "); } diff --git a/crates/plotnik-lib/src/query/graph_dump.rs b/crates/plotnik-lib/src/query/graph_dump.rs index 363cdef3..cbf3e679 100644 --- a/crates/plotnik-lib/src/query/graph_dump.rs +++ b/crates/plotnik-lib/src/query/graph_dump.rs @@ -157,6 +157,7 @@ fn format_nav(nav: &Nav) -> String { fn format_effect(effect: &BuildEffect) -> String { match effect { BuildEffect::CaptureNode => "Capture".to_string(), + BuildEffect::ClearCurrent => "Clear".to_string(), BuildEffect::StartArray { .. } => "StartArray".to_string(), BuildEffect::PushElement => "Push".to_string(), BuildEffect::EndArray => "EndArray".to_string(), diff --git a/crates/plotnik-lib/src/query/typing.rs b/crates/plotnik-lib/src/query/typing.rs index bec4228d..20618146 100644 --- a/crates/plotnik-lib/src/query/typing.rs +++ b/crates/plotnik-lib/src/query/typing.rs @@ -456,6 +456,9 @@ impl<'src, 'g> InferenceContext<'src, 'g> { BuildEffect::CaptureNode => { state.pending = Some(PendingType::primitive(TYPE_NODE)); } + BuildEffect::ClearCurrent => { + state.pending = None; + } BuildEffect::ToString => { state.pending = Some(PendingType::primitive(TYPE_STR)); } diff --git a/docs/adr/ADR-0006-dynamic-query-execution.md b/docs/adr/ADR-0006-dynamic-query-execution.md index e4664b8c..58a6488d 100644 --- a/docs/adr/ADR-0006-dynamic-query-execution.md +++ b/docs/adr/ADR-0006-dynamic-query-execution.md @@ -75,6 +75,7 @@ enum Container<'a> { | Effect | Action | | ------------------- | ----------------------------------------- | | `CaptureNode` | `current = Node(nodes.next())` (consumes) | +| `ClearCurrent` | `current = None` | | `StartArray` | push `Array([])` onto stack | | `PushElement` | move `current` into top array | | `EndArray` | pop array into `current` | @@ -85,6 +86,8 @@ enum Container<'a> { | `EndVariant` | pop, wrap `current`, set as current | | `ToString` | replace `current` Node with text | +`ClearCurrent` is emitted on skip paths for optional captures (`expr? @name`). When the optional is skipped, `ClearCurrent` ensures `current = None` before `Field(id)` executes, producing the correct `None` value for the optional field. + Invalid state = IR bug → panic. ### QueryInterpreter From a181c85ab8b301c158bcfb1d749300ef4a9aaa2f Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 18:14:47 -0300 Subject: [PATCH 15/23] Fixes --- crates/plotnik-lib/src/query/graph_build.rs | 23 +- .../src/query/graph_build_tests.rs | 6 +- crates/plotnik-lib/src/query/typing.rs | 239 ++++++++++++++++-- crates/plotnik-lib/src/query/typing_tests.rs | 42 ++- 4 files changed, 252 insertions(+), 58 deletions(-) diff --git a/crates/plotnik-lib/src/query/graph_build.rs b/crates/plotnik-lib/src/query/graph_build.rs index a2484776..2d7048b6 100644 --- a/crates/plotnik-lib/src/query/graph_build.rs +++ b/crates/plotnik-lib/src/query/graph_build.rs @@ -106,41 +106,42 @@ impl<'a> Query<'a> { /// Link Enter nodes to their definition entry points. fn link_references(&mut self) { - let mut links: Vec<(NodeId, &'a str, Vec)> = Vec::new(); + let mut links: Vec<(NodeId, &'a str, Option)> = Vec::new(); for (id, node) in self.graph.iter() { if let RefMarker::Enter { .. } = &node.ref_marker { if let Some(name) = node.ref_name { - let exit_successors = self.find_exit_successors_for_enter(id); - links.push((id, name, exit_successors)); + let exit_node = self.find_exit_for_enter(id); + links.push((id, name, exit_node)); } } } - for (enter_id, name, return_transitions) in links { + for (enter_id, name, exit_id) in links { if let Some(def_entry) = self.graph.definition(name) { self.graph.connect(enter_id, def_entry); - for ret in return_transitions { - self.graph.connect(enter_id, ret); + // Connect Enter → Exit so Exit node (with Capture effect) is traversed + if let Some(exit) = exit_id { + self.graph.connect(enter_id, exit); } } } } - fn find_exit_successors_for_enter(&self, enter_id: NodeId) -> Vec { + fn find_exit_for_enter(&self, enter_id: NodeId) -> Option { let enter_node = self.graph.node(enter_id); let RefMarker::Enter { ref_id } = enter_node.ref_marker else { - return Vec::new(); + return None; }; - for (_, node) in self.graph.iter() { + for (id, node) in self.graph.iter() { if let RefMarker::Exit { ref_id: exit_id } = &node.ref_marker { if *exit_id == ref_id { - return node.successors.clone(); + return Some(id); } } } - Vec::new() + None } fn construct_expr(&mut self, expr: &Expr, ctx: NavContext) -> Fragment { diff --git a/crates/plotnik-lib/src/query/graph_build_tests.rs b/crates/plotnik-lib/src/query/graph_build_tests.rs index 87290264..afdc88bf 100644 --- a/crates/plotnik-lib/src/query/graph_build_tests.rs +++ b/crates/plotnik-lib/src/query/graph_build_tests.rs @@ -147,7 +147,7 @@ fn reference() { B = N1 N0: (identifier) → ∅ - N1: ε +Enter(0, A) → N0 + N1: ε +Enter(0, A) → N0, N2 N2: ε +Exit(0) → ∅ "); } @@ -259,9 +259,9 @@ fn symbol_table_reuse() { Baz = N3 N0: (identifier) → ∅ - N1: ε +Enter(0, Foo) → N0 + N1: ε +Enter(0, Foo) → N0, N2 N2: ε +Exit(0) → ∅ - N3: ε +Enter(1, Bar) → N1 + N3: ε +Enter(1, Bar) → N1, N4 N4: ε +Exit(1) → ∅ "); } diff --git a/crates/plotnik-lib/src/query/typing.rs b/crates/plotnik-lib/src/query/typing.rs index 20618146..821a4341 100644 --- a/crates/plotnik-lib/src/query/typing.rs +++ b/crates/plotnik-lib/src/query/typing.rs @@ -10,7 +10,7 @@ //! 3. Build types bottom-up from scope hierarchy //! 4. Handle branching by merging fields with optionality rules -use std::collections::{HashMap, HashSet}; +use std::collections::{HashMap, HashSet, VecDeque}; use indexmap::IndexMap; use rowan::TextRange; @@ -302,6 +302,9 @@ struct TraversalState { current_variant: Option<&'static str>, array_stack: Vec, object_depth: usize, + /// When true, skip EndObject type creation. + /// Used during alternation branch exploration to collect variants before creating enum. + dry_run: bool, } impl TraversalState { @@ -344,8 +347,10 @@ struct InferenceContext<'src, 'g> { diagnostics: Diagnostics, errors: Vec>, current_def_name: &'src str, - /// Precomputed array element types: StartArray node ID -> element TypeId + /// Shared map for array element types across branches in loops. array_element_types: HashMap, + /// Map from definition name to its computed type (for reference lookups). + definition_types: HashMap<&'src str, TypeId>, } impl<'src, 'g> InferenceContext<'src, 'g> { @@ -354,11 +359,12 @@ impl<'src, 'g> InferenceContext<'src, 'g> { graph, dead_nodes, type_defs: Vec::new(), - next_type_id: 3, // TYPE_COMPOSITE_START - diagnostics: Diagnostics::new(), + next_type_id: 3, // 0=void, 1=node, 2=str + diagnostics: Diagnostics::default(), errors: Vec::new(), current_def_name: "", array_element_types: HashMap::new(), + definition_types: HashMap::new(), } } @@ -374,7 +380,7 @@ impl<'src, 'g> InferenceContext<'src, 'g> { let mut merge_errors = Vec::new(); let mut scope_stack = vec![ScopeStackEntry::new_root()]; - let final_pending = self.traverse( + let (final_pending, _) = self.traverse( entry_id, TraversalState::default(), &mut visited, @@ -431,6 +437,8 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } } + /// Returns (pending_type, stopped_at_node) where stopped_at_node is Some if + /// traversal stopped at an already-visited node (reconvergence point). fn traverse( &mut self, node_id: NodeId, @@ -439,13 +447,14 @@ impl<'src, 'g> InferenceContext<'src, 'g> { depth: usize, errors: &mut Vec>, scope_stack: &mut Vec>, - ) -> Option { + ) -> (Option, Option) { if self.dead_nodes.contains(&node_id) || depth > 200 { - return state.pending; + return (state.pending, None); } if !visited.insert(node_id) { - return state.pending; + // Already visited - this is a reconvergence point + return (state.pending, Some(node_id)); } let node = self.graph.node(node_id); @@ -454,7 +463,13 @@ impl<'src, 'g> InferenceContext<'src, 'g> { for effect in &node.effects { match effect { BuildEffect::CaptureNode => { - state.pending = Some(PendingType::primitive(TYPE_NODE)); + // At Exit nodes, use the referenced definition's type if available + let capture_type = if let RefMarker::Exit { ref_id } = &node.ref_marker { + self.find_ref_type(*ref_id).unwrap_or(TYPE_NODE) + } else { + TYPE_NODE + }; + state.pending = Some(PendingType::primitive(capture_type)); } BuildEffect::ClearCurrent => { state.pending = None; @@ -528,6 +543,8 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } } BuildEffect::EndArray => { + // Note: EndArray processes even in dry_run mode because loops need + // element type tracking. Only EndObject is skipped in dry_run. if let Some(frame) = state.array_stack.pop() { // Check if PushElement was actually called (either in this branch or another) let push_was_called = frame.push_called @@ -560,6 +577,10 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } BuildEffect::EndObject => { state.object_depth = state.object_depth.saturating_sub(1); + // In dry_run mode, don't pop scope or create types - just collect info + if state.dry_run { + continue; + } if let Some(finished_entry) = scope_stack.pop() { if finished_entry.is_object { let finished_scope = finished_entry.scope; @@ -644,7 +665,7 @@ impl<'src, 'g> InferenceContext<'src, 'g> { .collect(); if live_successors.is_empty() { - return state.pending; + return (state.pending, None); } if live_successors.len() == 1 { @@ -658,21 +679,36 @@ impl<'src, 'g> InferenceContext<'src, 'g> { ); } - // Branching: explore all paths and merge results - // For loops (greedy quantifiers), the first branch is the loop body. - // We explore it first and propagate array element types to subsequent branches. + // Branching: two-phase approach to handle reconvergence correctly. + // + // Phase 1: Explore each branch with its OWN visited set to: + // - Collect scope modifications from each branch + // - Find where branches reconverge (common nodes) + // + // Phase 2: Merge branch scopes, then continue from reconvergence point + // with the merged scope (processing shared suffix once). let total_branches = live_successors.len(); let initial_scope_len = scope_stack.len(); let mut branch_scopes: Vec> = Vec::new(); + let mut branch_visited_sets: Vec> = Vec::new(); let mut result_pending: Option = None; + // Phase 1: explore branches independently + // Use dry_run only when inside object scope (alternation-like branching). + // For loop entry/exit (object_depth=0), process normally so EndArray works. + let use_dry_run = state.object_depth > 0; + for succ in &live_successors { let mut branch_stack = scope_stack.clone(); let mut branch_visited = visited.clone(); + let mut branch_state = state.clone(); + if use_dry_run { + branch_state.dry_run = true; + } - let branch_pending = self.traverse( + let (branch_pending, _) = self.traverse( *succ, - state.clone(), + branch_state, &mut branch_visited, depth + 1, errors, @@ -684,6 +720,10 @@ impl<'src, 'g> InferenceContext<'src, 'g> { result_pending = branch_pending; } + // Collect nodes newly visited by this branch + let new_nodes: HashSet = branch_visited.difference(visited).copied().collect(); + branch_visited_sets.push(new_nodes); + while branch_stack.len() > initial_scope_len { branch_stack.pop(); } @@ -692,6 +732,15 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } } + // Find reconvergence: nodes visited by ALL branches (shared suffix) + let reconverge_nodes: HashSet = if branch_visited_sets.len() >= 2 { + let mut iter = branch_visited_sets.iter(); + let first = iter.next().unwrap().clone(); + iter.fold(first, |acc, set| acc.intersection(set).copied().collect()) + } else { + HashSet::new() + }; + // Merge branch scopes into main scope if let Some(main_entry) = scope_stack.last_mut() { for branch_scope in branch_scopes { @@ -701,7 +750,43 @@ impl<'src, 'g> InferenceContext<'src, 'g> { main_entry.scope.apply_optionality(total_branches); } - result_pending + // Phase 2: if dry_run was used and there's a reconvergence point, + // continue from there with merged scope + if use_dry_run && !reconverge_nodes.is_empty() { + // Find the "entry" reconvergence node: the one with minimum ID + // (nodes are created in traversal order, so first shared node has lowest ID) + let reconverge_entry = reconverge_nodes.iter().min().copied(); + + if let Some(entry_node) = reconverge_entry { + // Mark branch-specific nodes as visited, but NOT reconverge nodes + for branch_set in &branch_visited_sets { + for &nid in branch_set { + if !reconverge_nodes.contains(&nid) { + visited.insert(nid); + } + } + } + // Continue from reconvergence with merged scope (dry_run = false) + let mut cont_state = state.clone(); + cont_state.dry_run = false; + cont_state.pending = result_pending; + return self.traverse( + entry_node, + cont_state, + visited, + depth + 1, + errors, + scope_stack, + ); + } + } + + // No reconvergence or couldn't find entry point - mark all visited + for branch_set in branch_visited_sets { + visited.extend(branch_set); + } + + (result_pending, None) } fn generate_scope_name(&self) -> &'src str { @@ -766,6 +851,21 @@ impl<'src, 'g> InferenceContext<'src, 'g> { type_id } + /// Find the type for a reference by looking up the Enter node with matching ref_id. + fn find_ref_type(&self, ref_id: u32) -> Option { + // Find the Enter node with this ref_id to get the definition name + for (_, node) in self.graph.iter() { + if let RefMarker::Enter { ref_id: enter_id } = &node.ref_marker { + if *enter_id == ref_id { + if let Some(name) = node.ref_name { + return self.definition_types.get(name).copied(); + } + } + } + } + None + } + fn wrap_with_cardinality(&mut self, base: TypeId, card: Cardinality) -> TypeId { match card { Cardinality::One => base, @@ -812,15 +912,116 @@ impl<'a> Query<'a> { pub(super) fn infer_types(&mut self) { let mut ctx = InferenceContext::new(&self.graph, &self.dead_nodes); - for (name, entry_id) in self.graph.definitions() { - let type_id = ctx.infer_definition(name, entry_id); - self.type_info.entrypoint_types.insert(name, type_id); + // Process definitions in dependency order (referenced definitions first) + let sorted = self.topological_sort_definitions(); + for name in sorted { + if let Some(entry_id) = self.graph.definition(name) { + let type_id = ctx.infer_definition(name, entry_id); + ctx.definition_types.insert(name, type_id); + self.type_info.entrypoint_types.insert(name, type_id); + } } self.type_info.type_defs = ctx.type_defs; self.type_info.diagnostics = ctx.diagnostics; self.type_info.errors = ctx.errors; } + + /// Topologically sort definitions so referenced definitions are processed first. + fn topological_sort_definitions(&self) -> Vec<&'a str> { + let definitions: Vec<_> = self.graph.definitions().collect(); + let def_names: HashSet<&str> = definitions.iter().map(|(name, _)| *name).collect(); + + // Build dependency graph: which definitions does each definition reference? + let mut deps: HashMap<&str, Vec<&str>> = HashMap::new(); + for &(name, entry_id) in &definitions { + let refs = self.collect_references(entry_id, &def_names); + deps.insert(name, refs); + } + + // Kahn's algorithm for topological sort + let mut in_degree: HashMap<&str, usize> = HashMap::new(); + for &(name, _) in &definitions { + in_degree.insert(name, 0); + } + for refs in deps.values() { + for &dep in refs { + *in_degree.entry(dep).or_insert(0) += 1; + } + } + + let mut zero_degree: Vec<&str> = in_degree + .iter() + .filter(|(_, deg)| **deg == 0) + .map(|(&name, _)| name) + .collect(); + zero_degree.sort(); + let mut queue: VecDeque<&str> = zero_degree.into_iter().collect(); + + let mut sorted = Vec::new(); + while let Some(name) = queue.pop_front() { + sorted.push(name); + if let Some(refs) = deps.get(name) { + for &dep in refs { + if let Some(deg) = in_degree.get_mut(dep) { + *deg = deg.saturating_sub(1); + if *deg == 0 { + queue.push_back(dep); + } + } + } + } + } + + // Reverse so dependencies come first + sorted.reverse(); + + // Add any remaining (cyclic) definitions + for &(name, _) in &definitions { + if !sorted.contains(&name) { + sorted.push(name); + } + } + + sorted + } + + /// Collect all definition names referenced from a given node. + fn collect_references(&self, start: NodeId, def_names: &HashSet<&str>) -> Vec<&'a str> { + let mut refs = Vec::new(); + let mut visited = HashSet::new(); + let mut stack = vec![start]; + + while let Some(node_id) = stack.pop() { + if !visited.insert(node_id) { + continue; + } + let node = self.graph.node(node_id); + + // Check if this is an Enter node referencing another definition + if let RefMarker::Enter { .. } = &node.ref_marker { + if let Some(name) = node.ref_name { + if def_names.contains(name) && !refs.contains(&name) { + refs.push(name); + } + } + } + + // Don't follow into referenced definitions (they're opaque) + let skip_def = match &node.ref_marker { + RefMarker::Enter { .. } => node.ref_name.and_then(|n| self.graph.definition(n)), + _ => None, + }; + + for &succ in &node.successors { + if skip_def.map_or(true, |def| succ != def) { + stack.push(succ); + } + } + } + + refs + } } // ───────────────────────────────────────────────────────────────────────────── diff --git a/crates/plotnik-lib/src/query/typing_tests.rs b/crates/plotnik-lib/src/query/typing_tests.rs index b7d6d927..63134620 100644 --- a/crates/plotnik-lib/src/query/typing_tests.rs +++ b/crates/plotnik-lib/src/query/typing_tests.rs @@ -332,21 +332,16 @@ fn tagged_alternation_captured_creates_enum() { let result = infer(input); insta::assert_snapshot!(result, @r" === Entrypoints === - Foo → T5 + Foo → T4 === Types === T3: Enum FooScope3 { A: Node - } - T4: Enum FooScope4 { B: Node } - T5: Record Foo { + T4: Record Foo { choice: T3 } - - === Errors === - field `choice` in `Foo`: incompatible types [Node, Node] "); } @@ -359,21 +354,18 @@ fn captured_untagged_alternation_creates_struct() { let result = infer(input); insta::assert_snapshot!(result, @r" === Entrypoints === - Foo → T5 + Foo → T6 === Types === - T3: Record FooScope3 { - x: Node - } - T4: Record FooScope4 { - y: Node + T3: Optional → Node + T4: Optional → Node + T5: Record FooScope3 { + x: T3 + y: T4 } - T5: Record Foo { - val: T3 + T6: Record Foo { + val: T5 } - - === Errors === - field `val` in `Foo`: incompatible types [Node, Node] "); } @@ -485,16 +477,16 @@ fn qis_alternation_in_sequence() { let result = infer(input); insta::assert_snapshot!(result, @r" === Entrypoints === - Foo → T5 + Foo → T6 === Types === - T3: Record FooScope3 { - x: Node - } - T4: Record FooScope4 { - y: Node + T3: Optional → Node + T4: Optional → Node + T5: Record FooScope3 { + x: T3 + y: T4 } - T5: ArrayStar → T4 + T6: ArrayStar → T5 "); } From d01cc8c23c29c98b375ade78066518ef9e2e3577 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 18:22:26 -0300 Subject: [PATCH 16/23] Golden test --- .../src/query/graph_master_test.rs | 1238 +++++++++++++++++ crates/plotnik-lib/src/query/mod.rs | 2 + 2 files changed, 1240 insertions(+) create mode 100644 crates/plotnik-lib/src/query/graph_master_test.rs diff --git a/crates/plotnik-lib/src/query/graph_master_test.rs b/crates/plotnik-lib/src/query/graph_master_test.rs new file mode 100644 index 00000000..4473b394 --- /dev/null +++ b/crates/plotnik-lib/src/query/graph_master_test.rs @@ -0,0 +1,1238 @@ +//! Golden master test for graph construction and type inference. +//! +//! This test exercises the full spectrum of ADR-specified behaviors: +//! - ADR-0004: Binary format concepts (transitions, effects, strings, types) +//! - ADR-0005: Transition graph (matchers, nav, ref markers, quantifiers) +//! - ADR-0006: Query execution (effect stream, materialization) +//! - ADR-0007: Type metadata (TypeKind, synthetic naming, flattening) +//! - ADR-0008: Tree navigation (Nav kinds, anchor lowering) +//! - ADR-0009: Type system (cardinality, scopes, alternations, QIS, unification) + +use indoc::indoc; + +use crate::query::Query; + +fn golden_master(source: &str) -> String { + let query = Query::try_from(source) + .expect("parse should succeed") + .build_graph(); + + let mut out = String::new(); + + out.push_str( + "═══════════════════════════════════════════════════════════════════════════════\n", + ); + out.push_str(" TRANSITION GRAPH\n"); + out.push_str( + "═══════════════════════════════════════════════════════════════════════════════\n\n", + ); + out.push_str(&query.graph().dump_live(query.dead_nodes())); + + out.push_str( + "\n═══════════════════════════════════════════════════════════════════════════════\n", + ); + out.push_str(" TYPE INFERENCE\n"); + out.push_str( + "═══════════════════════════════════════════════════════════════════════════════\n\n", + ); + out.push_str(&query.type_info().dump()); + + out +} + +/// Comprehensive test covering all major ADR features. +/// +/// Query structure: +/// 1. Basic captures with ::string annotation (ADR-0007, ADR-0009) +/// 2. Field constraints and negated fields (ADR-0005) +/// 3. Anchors - first child, last child, siblings (ADR-0008) +/// 4. Quantifiers - *, +, ? with captures (ADR-0005, ADR-0009) +/// 5. QIS - multiple captures in quantified expr (ADR-0009) +/// 6. Tagged alternations - enum generation (ADR-0007, ADR-0009) +/// 7. Untagged alternations - struct merge (ADR-0009) +/// 8. Captured sequences - nested scopes (ADR-0009) +/// 9. Definition references - Enter/Exit (ADR-0005, ADR-0006) +/// 10. Cardinality propagation and joins (ADR-0009) +/// 11. Single-capture variant flattening (ADR-0007, ADR-0009) +/// 12. Deep nesting with multi-level Up (ADR-0008) +/// 13. Wildcards and string literals (ADR-0005) +#[test] +fn golden_master_comprehensive() { + let source = indoc! {r#" + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 1: Basic captures and type annotations + // ═══════════════════════════════════════════════════════════════════════════ + + // Simple node capture → Node type + SimpleCapture = (identifier) @name + + // String annotation → String type + StringCapture = (identifier) @name ::string + + // Multiple flat captures → Struct with multiple fields + MultiCapture = (function + name: (identifier) @fn_name ::string + body: (block) @fn_body + ) + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 2: Navigation and anchors (ADR-0008) + // ═══════════════════════════════════════════════════════════════════════════ + + // First child anchor → DownSkipTrivia + AnchorFirst = (parent . (first_child) @first) + + // Last child anchor → UpSkipTrivia + AnchorLast = (parent (last_child) @last .) + + // Adjacent siblings → NextSkipTrivia + AnchorSibling = (parent (a) @left . (b) @right) + + // Deep nesting with multi-level Up + DeepNest = (a (b (c (d) @deep))) + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 3: Quantifiers (ADR-0005, ADR-0009) + // ═══════════════════════════════════════════════════════════════════════════ + + // Star quantifier → ArrayStar + StarQuant = (container (item)* @items) + + // Plus quantifier → ArrayPlus + PlusQuant = (container (item)+ @items) + + // Optional quantifier → Optional + OptQuant = (container (item)? @maybe_item) + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 4: QIS - Quantifier-Induced Scope (ADR-0009) + // ═══════════════════════════════════════════════════════════════════════════ + + // Two captures in quantified node → QIS triggers, creates element struct + QisNode = (function + name: (identifier) @name + body: (block) @body + )* + + // Two captures in quantified sequence → QIS triggers + QisSequence = { (key) @key (value) @value }* + + // Single capture → NO QIS, standard cardinality propagation + NoQis = { (item) @item }* + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 5: Tagged alternations (ADR-0007, ADR-0009) + // ═══════════════════════════════════════════════════════════════════════════ + + // Tagged at definition root → Definition becomes Enum + // Single capture per variant → flattened payload + TaggedRoot = [ + Ok: (success) @val + Err: (error) @msg ::string + ] + + // Tagged alternation captured → creates nested Enum + TaggedCaptured = (wrapper [ + Left: (left_node) @l + Right: (right_node) @r + ] @choice) + + // Tagged with multi-capture variant → NOT flattened, creates struct + TaggedMulti = [ + Simple: (node) @val + Complex: (pair (key) @k (value) @v) + ] + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 6: Untagged alternations (ADR-0009) + // ═══════════════════════════════════════════════════════════════════════════ + + // Symmetric captures → required field + UntaggedSymmetric = [ (a) @val (b) @val ] + + // Asymmetric captures → both become Optional + UntaggedAsymmetric = [ (a) @x (b) @y ] + + // Captured untagged → creates struct scope + UntaggedCaptured = [ (a) @x (b) @y ] @data + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 7: Captured sequences and nested scopes (ADR-0009) + // ═══════════════════════════════════════════════════════════════════════════ + + // Captured sequence → creates nested struct + CapturedSeq = (outer { (inner) @x (inner2) @y } @nested) + + // Uncaptured sequence → captures propagate to parent + UncapturedSeq = (outer { (inner) @x (inner2) @y }) + + // Deeply nested scopes + NestedScopes = { { (a) @a } @inner1 { (b) @b } @inner2 } @outer + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 8: Definition references (ADR-0005, ADR-0006) + // ═══════════════════════════════════════════════════════════════════════════ + + // Base definition + Identifier = (identifier) @id + + // Reference to definition → Enter/Exit markers + RefSimple = (Identifier) + + // Captured reference → captures the reference result + RefCaptured = (Identifier) @captured_id + + // Chained references + RefChain = (RefSimple) + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 9: Cardinality combinations (ADR-0009) + // ═══════════════════════════════════════════════════════════════════════════ + + // Cardinality in alternation branches + // Branch 1: @item cardinality 1, Branch 2: @item cardinality + + // Join produces + + CardinalityJoin = [ (single) @item (multi (x)+ @item) ] + + // Nested quantifiers + NestedQuant = ((item)* @inner)+ @outer + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 10: Mixed patterns (comprehensive) + // ═══════════════════════════════════════════════════════════════════════════ + + // Everything combined: field constraints, anchors, quantifiers, alternations + Complex = (module + name: (identifier) @mod_name ::string + . (import)* @imports + body: (block { + [ + Func: (function + name: (identifier) @fn_name ::string + params: (parameters { (param) @p }* @params) + body: (block) @fn_body + ) + Class: (class + name: (identifier) @cls_name ::string + body: (class_body) @cls_body + ) + ] + }* @items) . + ) + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 11: Edge cases + // ═══════════════════════════════════════════════════════════════════════════ + + // Wildcard capture + WildcardCapture = _ @any + + // String literal (anonymous node) + StringLiteral = "+" @op + + // No captures → Void type + NoCaptures = (identifier) + + // Empty alternation branch (unit variant) + EmptyBranch = [ + Some: (value) @val + None: (none_marker) + ] + "#}; + + insta::assert_snapshot!(golden_master(source), @r#" + ═══════════════════════════════════════════════════════════════════════════════ + TRANSITION GRAPH + ═══════════════════════════════════════════════════════════════════════════════ + + SimpleCapture = N0 + StringCapture = N2 + MultiCapture = N4 + AnchorFirst = N10 + AnchorLast = N14 + AnchorSibling = N18 + DeepNest = N24 + StarQuant = N32 + PlusQuant = N40 + OptQuant = N48 + QisNode = N61 + QisSequence = N72 + NoQis = N81 + TaggedRoot = N85 + TaggedCaptured = N95 + TaggedMulti = N110 + UntaggedSymmetric = N124 + UntaggedAsymmetric = N130 + UntaggedCaptured = N136 + CapturedSeq = N145 + UncapturedSeq = N155 + NestedScopes = N166 + Identifier = N178 + RefSimple = N180 + RefCaptured = N182 + RefChain = N185 + CardinalityJoin = N187 + NestedQuant = N207 + Complex = N212 + WildcardCapture = N262 + StringLiteral = N264 + NoCaptures = N266 + EmptyBranch = N267 + + N0: (identifier) [Capture] → N1 + N1: ε [Field(name)] → ∅ + N2: (identifier) [Capture] [ToString] → N3 + N3: ε [Field(name)] → ∅ + N4: (function) → N5 + N5: [Down] (identifier) @name [Capture] [ToString] → N6 + N6: ε [Field(fn_name)] → N7 + N7: [Next] (block) @body [Capture] → N8 + N8: ε [Field(fn_body)] → N9 + N9: [Up(1)] ε → ∅ + N10: (parent) → N11 + N11: [Down.] (first_child) [Capture] → N12 + N12: ε [Field(first)] → N13 + N13: [Up(1)] ε → ∅ + N14: (parent) → N15 + N15: [Down] (last_child) [Capture] → N16 + N16: ε [Field(last)] → N17 + N17: [Up.(1)] ε → ∅ + N18: (parent) → N19 + N19: [Down] (a) [Capture] → N20 + N20: ε [Field(left)] → N21 + N21: [Next.] (b) [Capture] → N22 + N22: ε [Field(right)] → N23 + N23: [Up(1)] ε → ∅ + N24: (a) → N25 + N25: [Down] (b) → N26 + N26: [Down] (c) → N27 + N27: [Down] (d) [Capture] → N28 + N28: ε [Field(deep)] → N31 + N31: [Up(3)] ε → ∅ + N32: (container) → N34 + N33: [Down] (item) [Capture] → N36 + N34: ε [StartArray] → N35 + N35: ε → N33, N38 + N36: ε [Push] → N35 + N38: ε [EndArray] [Field(items)] → N39 + N39: [Up(1)] ε → ∅ + N40: (container) → N42 + N41: [Down] (item) [Capture] → N44 + N42: ε [StartArray] → N41 + N44: ε [Push] → N41, N46 + N46: ε [EndArray] [Field(items)] → N47 + N47: [Up(1)] ε → ∅ + N48: (container) → N50 + N49: [Down] (item) [Capture] → N53 + N50: ε → N49, N51 + N51: ε [Clear] → N53 + N53: ε [Field(maybe_item)] → N54 + N54: [Up(1)] ε → ∅ + N55: (function) [StartObj] → N56 + N56: [Down] (identifier) @name [Capture] → N57 + N57: ε [Field(name)] → N58 + N58: [Next] (block) @body [Capture] → N59 + N59: ε [Field(body)] → N65 + N61: ε [StartArray] → N62 + N62: ε → N55, N66 + N65: [Up(1)] ε [EndObj] [Push] → N62 + N66: ε [EndArray] → ∅ + N67: ε [StartObj] → N68 + N68: [Next] (key) [Capture] → N69 + N69: ε [Field(key)] → N70 + N70: [Next] (value) [Capture] → N76 + N72: ε [StartArray] → N73 + N73: ε → N67, N77 + N76: ε [Field(value)] [EndObj] [Push] → N73 + N77: ε [EndArray] → ∅ + N79: [Next] (item) [Capture] → N83 + N81: ε [StartArray] → N82 + N82: ε → N79, N84 + N83: ε [Field(item)] [Push] → N82 + N84: ε [EndArray] → ∅ + N85: ε → N88, N92 + N86: ε → ∅ + N88: (success) [Variant(Ok)] [Capture] → N90 + N90: ε [EndVariant] → N86 + N92: (error) [Variant(Err)] [Capture] [ToString] → N94 + N94: ε [EndVariant] → N86 + N95: (wrapper) → N106 + N96: [Down] ε → N99, N103 + N99: (left_node) [Variant(Left)] [Capture] [Capture] → N101 + N101: ε [EndVariant] → N108 + N103: (right_node) [Variant(Right)] [Capture] [Capture] → N105 + N105: ε [EndVariant] → N108 + N106: ε [StartObj] → N96 + N108: ε [EndObj] [Field(choice)] → N109 + N109: [Up(1)] ε → ∅ + N110: ε → N113, N117 + N111: ε → ∅ + N113: (node) [Variant(Simple)] [Capture] → N115 + N115: ε [EndVariant] → N111 + N117: (pair) [Variant(Complex)] [StartObj] → N118 + N118: [Down] (key) [Capture] → N119 + N119: ε [Field(k)] → N120 + N120: [Next] (value) [Capture] → N121 + N121: ε [Field(v)] → N123 + N123: [Up(1)] ε [EndObj] [EndVariant] → N111 + N124: ε → N126, N128 + N125: ε → ∅ + N126: (a) [Capture] → N127 + N127: ε [Field(val)] → N125 + N128: (b) [Capture] → N129 + N129: ε [Field(val)] → N125 + N130: ε → N132, N134 + N131: ε → ∅ + N132: (a) [Capture] → N133 + N133: ε [Field(x)] → N131 + N134: (b) [Capture] → N135 + N135: ε [Field(y)] → N131 + N136: ε [StartObj] → N138, N140 + N138: (a) [Capture] [Capture] → N139 + N139: ε [Field(x)] → N144 + N140: (b) [Capture] [Capture] → N141 + N141: ε [Field(y)] → N144 + N144: ε [EndObj] [Field(data)] → ∅ + N145: (outer) → N151 + N146: [Down] ε → N147 + N147: [Next] (inner) [Capture] [Capture] → N148 + N148: ε [Field(x)] → N149 + N149: [Next] (inner2) [Capture] → N153 + N151: ε [StartObj] → N146 + N153: ε [Field(y)] [EndObj] [Field(nested)] → N154 + N154: [Up(1)] ε → ∅ + N155: (outer) → N156 + N156: [Down] ε → N157 + N157: [Next] (inner) [Capture] → N158 + N158: ε [Field(x)] → N159 + N159: [Next] (inner2) [Capture] → N160 + N160: ε [Field(y)] → N161 + N161: [Up(1)] ε → ∅ + N163: [Next] ε → N164 + N164: [Next] (a) [Capture] [Capture] [Capture] → N172 + N166: ε [StartObj] [StartObj] → N163 + N169: [Next] ε → N170 + N170: [Next] (b) [Capture] [Capture] → N177 + N172: ε [Field(a)] [EndObj] [Field(inner1)] [StartObj] → N169 + N177: ε [Field(b)] [EndObj] [Field(inner2)] [EndObj] [Field(outer)] → ∅ + N178: (identifier) [Capture] → N179 + N179: ε [Field(id)] → ∅ + N180: ε +Enter(0, Identifier) → N178, N181 + N181: ε +Exit(0) → ∅ + N182: ε +Enter(1, Identifier) → N178, N183 + N183: ε +Exit(1) [Capture] → N184 + N184: ε [Field(captured_id)] → ∅ + N185: ε +Enter(2, RefSimple) → N180, N186 + N186: ε +Exit(2) → ∅ + N187: ε → N189, N191 + N188: [Up(1)] ε → ∅ + N189: (single) [Capture] → N190 + N190: ε [Field(item)] → N188 + N191: (multi) → N193 + N192: [Down] (x) [Capture] → N195 + N193: ε [StartArray] → N192 + N195: ε [Push] → N192, N197 + N197: ε [EndArray] [Field(item)] → N188 + N199: (_) [Capture] → N201 + N200: [Down] (item) [Capture] → N203 + N201: ε [StartArray] → N202 + N202: ε → N200, N205 + N203: ε [Push] → N202 + N205: ε [EndArray] [Field(inner)] → N209 + N207: ε [StartArray] → N199 + N209: [Up(1)] ε [Push] → N199, N211 + N211: ε [EndArray] [Field(outer)] → ∅ + N212: (module) → N213 + N213: [Down] (identifier) @name [Capture] [ToString] → N216 + N215: [Next.] (import) [Capture] → N218 + N216: ε [Field(mod_name)] [StartArray] → N217 + N217: ε → N215, N220 + N218: ε [Push] → N217 + N220: ε [EndArray] [Field(imports)] → N221 + N221: [Next] (block) @body → N251 + N222: [Down] ε → N223 + N223: [Next] ε → N226, N244 + N226: (function) [Variant(Func)] [StartObj] [Capture] → N227 + N227: [Down] (identifier) @name [Capture] [ToString] → N228 + N228: ε [Field(fn_name)] → N229 + N229: [Next] (parameters) @params → N233 + N230: [Down] ε → N231 + N231: [Next] (param) [Capture] [Capture] → N235 + N233: ε [StartArray] → N234 + N234: ε → N230, N237 + N235: ε [Field(p)] [Push] → N234 + N237: ε [EndArray] [Field(params)] → N238 + N238: [Up(1)] ε → N239 + N239: [Next] (block) @body [Capture] → N240 + N240: ε [Field(fn_body)] → N242 + N242: [Up(1)] ε [EndObj] [EndVariant] → N255 + N244: (class) [Variant(Class)] [StartObj] [Capture] → N245 + N245: [Down] (identifier) @name [Capture] [ToString] → N246 + N246: ε [Field(cls_name)] → N247 + N247: [Next] (class_body) @body [Capture] → N248 + N248: ε [Field(cls_body)] → N250 + N250: [Up(1)] ε [EndObj] [EndVariant] → N255 + N251: ε [StartObj] [StartArray] → N252 + N252: ε → N253, N259 + N253: ε [StartObj] → N222 + N255: ε [EndObj] [Push] → N252 + N259: ε [EndArray] [EndObj] [Field(items)] → N260 + N260: [Up(1)] ε → N261 + N261: [Up.(1)] ε → ∅ + N262: _ [Capture] → N263 + N263: ε [Field(any)] → ∅ + N264: "+" [Capture] → N265 + N265: ε [Field(op)] → ∅ + N266: (identifier) → ∅ + N267: ε → N270, N274 + N268: ε → ∅ + N270: (value) [Variant(Some)] [Capture] → N272 + N272: ε [EndVariant] → N268 + N274: (none_marker) [Variant(None)] → N275 + N275: ε [EndVariant] → N268 + + ═══════════════════════════════════════════════════════════════════════════════ + TYPE INFERENCE + ═══════════════════════════════════════════════════════════════════════════════ + + === Entrypoints === + Identifier → T3 + RefSimple → Void + WildcardCapture → T4 + UntaggedSymmetric → T5 + UntaggedCaptured → T9 + UntaggedAsymmetric → T12 + UncapturedSeq → T13 + TaggedRoot → T14 + TaggedMulti → T16 + TaggedCaptured → T18 + StringLiteral → T19 + StringCapture → T20 + StarQuant → T22 + SimpleCapture → T23 + RefChain → Void + RefCaptured → T24 + QisSequence → T26 + QisNode → T28 + PlusQuant → T30 + OptQuant → T32 + NoQis → T34 + NoCaptures → Void + NestedScopes → T38 + NestedQuant → T41 + MultiCapture → T42 + EmptyBranch → T43 + DeepNest → T44 + Complex → T46 + CardinalityJoin → T48 + CapturedSeq → T50 + AnchorSibling → T51 + AnchorLast → T52 + AnchorFirst → T53 + + === Types === + T3: Record Identifier { + id: Node + } + T4: Record WildcardCapture { + any: Node + } + T5: Record UntaggedSymmetric { + val: Node + } + T6: Optional → Node + T7: Optional → Node + T8: Record UntaggedCapturedScope6 { + x: T6 + y: T7 + } + T9: Record UntaggedCaptured { + data: T8 + } + T10: Optional → Node + T11: Optional → Node + T12: Record UntaggedAsymmetric { + x: T10 + y: T11 + } + T13: Record UncapturedSeq { + x: Node + y: Node + } + T14: Enum TaggedRoot { + Ok: Node + Err: String + } + T15: Record TaggedMultiScope15 { + k: Node + v: Node + } + T16: Enum TaggedMulti { + Simple: Node + Complex: T15 + } + T17: Enum TaggedCapturedScope17 { + Left: Node + Right: Node + } + T18: Record TaggedCaptured { + choice: T17 + } + T19: Record StringLiteral { + op: Node + } + T20: Record StringCapture { + name: String + } + T21: ArrayStar → Node + T22: Record StarQuant { + items: T21 + } + T23: Record SimpleCapture { + name: Node + } + T24: Record RefCaptured { + captured_id: T3 + } + T25: Record QisSequenceScope25 { + key: Node + value: Node + } + T26: ArrayStar → T25 + T27: Record QisNodeScope27 { + name: Node + body: Node + } + T28: ArrayStar → T27 + T29: ArrayPlus → Node + T30: Record PlusQuant { + items: T29 + } + T31: Optional → Node + T32: Record OptQuant { + maybe_item: T31 + } + T33: ArrayStar → Node + T34: Record NoQis { + item: T33 + } + T35: Record NestedScopesScope35 { + a: Node + } + T36: Record NestedScopesScope36 { + b: Node + } + T37: Record NestedScopesScope37 { + inner1: T35 + inner2: T36 + } + T38: Record NestedScopes { + outer: T37 + } + T39: ArrayStar → Node + T40: ArrayPlus → T39 + T41: Record NestedQuant { + inner: T40 + } + T42: Record MultiCapture { + fn_name: String + fn_body: Node + } + T43: Enum EmptyBranch { + Some: Node + None: Void + } + T44: Record DeepNest { + deep: Node + } + T45: ArrayStar → Node + T46: Record Complex { + mod_name: String + imports: T45 + } + T47: ArrayPlus → Node + T48: Record CardinalityJoin { + item: Node + } + T49: Record CapturedSeqScope49 { + x: Node + y: Node + } + T50: Record CapturedSeq { + nested: T49 + } + T51: Record AnchorSibling { + left: Node + right: Node + } + T52: Record AnchorLast { + last: Node + } + T53: Record AnchorFirst { + first: Node + } + + === Errors === + field `item` in `CardinalityJoin`: incompatible types [Node, Node] + "#); +} + +/// Test specifically for ADR-0008 navigation lowering. +#[test] +fn golden_navigation_patterns() { + let source = indoc! {r#" + // Stay - first transition at root + NavStay = (root) @r + + // Down - descend to children (skip any) + NavDown = (parent (child) @c) + + // DownSkipTrivia - anchor at first child + NavDownAnchor = (parent . (child) @c) + + // Next - sibling traversal (skip any) + NavNext = (parent (a) @a (b) @b) + + // NextSkipTrivia - adjacent siblings + NavNextAnchor = (parent (a) @a . (b) @b) + + // Up - ascend (no constraint) + NavUp = (a (b (c) @c)) + + // UpSkipTrivia - must be last non-trivia + NavUpAnchor = (parent (child) @c .) + + // Multi-level Up + NavUpMulti = (a (b (c (d (e) @e)))) + + // Mixed anchors + NavMixed = (outer . (first) @f (middle) @m . (last) @l .) + "#}; + + insta::assert_snapshot!(golden_master(source), @r" + ═══════════════════════════════════════════════════════════════════════════════ + TRANSITION GRAPH + ═══════════════════════════════════════════════════════════════════════════════ + + NavStay = N0 + NavDown = N2 + NavDownAnchor = N6 + NavNext = N10 + NavNextAnchor = N16 + NavUp = N22 + NavUpAnchor = N28 + NavUpMulti = N32 + NavMixed = N42 + + N0: (root) [Capture] → N1 + N1: ε [Field(r)] → ∅ + N2: (parent) → N3 + N3: [Down] (child) [Capture] → N4 + N4: ε [Field(c)] → N5 + N5: [Up(1)] ε → ∅ + N6: (parent) → N7 + N7: [Down.] (child) [Capture] → N8 + N8: ε [Field(c)] → N9 + N9: [Up(1)] ε → ∅ + N10: (parent) → N11 + N11: [Down] (a) [Capture] → N12 + N12: ε [Field(a)] → N13 + N13: [Next] (b) [Capture] → N14 + N14: ε [Field(b)] → N15 + N15: [Up(1)] ε → ∅ + N16: (parent) → N17 + N17: [Down] (a) [Capture] → N18 + N18: ε [Field(a)] → N19 + N19: [Next.] (b) [Capture] → N20 + N20: ε [Field(b)] → N21 + N21: [Up(1)] ε → ∅ + N22: (a) → N23 + N23: [Down] (b) → N24 + N24: [Down] (c) [Capture] → N25 + N25: ε [Field(c)] → N27 + N27: [Up(2)] ε → ∅ + N28: (parent) → N29 + N29: [Down] (child) [Capture] → N30 + N30: ε [Field(c)] → N31 + N31: [Up.(1)] ε → ∅ + N32: (a) → N33 + N33: [Down] (b) → N34 + N34: [Down] (c) → N35 + N35: [Down] (d) → N36 + N36: [Down] (e) [Capture] → N37 + N37: ε [Field(e)] → N41 + N41: [Up(4)] ε → ∅ + N42: (outer) → N43 + N43: [Down.] (first) [Capture] → N44 + N44: ε [Field(f)] → N45 + N45: [Next] (middle) [Capture] → N46 + N46: ε [Field(m)] → N47 + N47: [Next.] (last) [Capture] → N48 + N48: ε [Field(l)] → N49 + N49: [Up.(1)] ε → ∅ + + ═══════════════════════════════════════════════════════════════════════════════ + TYPE INFERENCE + ═══════════════════════════════════════════════════════════════════════════════ + + === Entrypoints === + NavUpMulti → T3 + NavUpAnchor → T4 + NavUp → T5 + NavStay → T6 + NavNextAnchor → T7 + NavNext → T8 + NavMixed → T9 + NavDownAnchor → T10 + NavDown → T11 + + === Types === + T3: Record NavUpMulti { + e: Node + } + T4: Record NavUpAnchor { + c: Node + } + T5: Record NavUp { + c: Node + } + T6: Record NavStay { + r: Node + } + T7: Record NavNextAnchor { + a: Node + b: Node + } + T8: Record NavNext { + a: Node + b: Node + } + T9: Record NavMixed { + f: Node + m: Node + l: Node + } + T10: Record NavDownAnchor { + c: Node + } + T11: Record NavDown { + c: Node + } + "); +} + +/// Test specifically for ADR-0009 type inference edge cases. +#[test] +fn golden_type_inference() { + let source = indoc! {r#" + // Flat scoping - nesting doesn't create data nesting + FlatScope = (a (b (c (d) @val))) + + // Reference opacity - calling doesn't inherit captures + BaseWithCapture = (identifier) @name + RefOpaque = (BaseWithCapture) + RefCaptured = (BaseWithCapture) @result + + // Tagged at root vs inline + TaggedAtRoot = [ A: (a) @x B: (b) @y ] + TaggedInline = (wrapper [ A: (a) @x B: (b) @y ]) + + // Cardinality multiplication + // outer(*) * inner(+) = * + CardMult = ((item)+ @items)* + + // QIS vs non-QIS + QisTwo = { (a) @x (b) @y }* + NoQisOne = { (a) @x }* + + // Missing field rule - asymmetric → Optional + MissingField = [ + Full: (full (a) @a (b) @b (c) @c) + Partial: (partial (a) @a) + ] + + // Synthetic naming + SyntheticNames = (foo { (bar) @bar } @baz) + "#}; + + insta::assert_snapshot!(golden_master(source), @r" + ═══════════════════════════════════════════════════════════════════════════════ + TRANSITION GRAPH + ═══════════════════════════════════════════════════════════════════════════════ + + FlatScope = N0 + BaseWithCapture = N8 + RefOpaque = N10 + RefCaptured = N12 + TaggedAtRoot = N15 + TaggedInline = N25 + CardMult = N45 + QisTwo = N54 + NoQisOne = N63 + MissingField = N67 + SyntheticNames = N85 + + N0: (a) → N1 + N1: [Down] (b) → N2 + N2: [Down] (c) → N3 + N3: [Down] (d) [Capture] → N4 + N4: ε [Field(val)] → N7 + N7: [Up(3)] ε → ∅ + N8: (identifier) [Capture] → N9 + N9: ε [Field(name)] → ∅ + N10: ε +Enter(0, BaseWithCapture) → N8, N11 + N11: ε +Exit(0) → ∅ + N12: ε +Enter(1, BaseWithCapture) → N8, N13 + N13: ε +Exit(1) [Capture] → N14 + N14: ε [Field(result)] → ∅ + N15: ε → N18, N22 + N16: ε → ∅ + N18: (a) [Variant(A)] [Capture] → N20 + N20: ε [EndVariant] → N16 + N22: (b) [Variant(B)] [Capture] → N24 + N24: ε [EndVariant] → N16 + N25: (wrapper) → N26 + N26: [Down] ε → N29, N33 + N29: (a) [Variant(A)] [Capture] → N31 + N31: ε [EndVariant] → N36 + N33: (b) [Variant(B)] [Capture] → N35 + N35: ε [EndVariant] → N36 + N36: [Up(1)] ε → ∅ + N37: (_) → N39 + N38: [Down] (item) [Capture] → N41 + N39: ε [StartArray] → N38 + N41: ε [Push] → N38, N43 + N43: ε [EndArray] [Field(items)] → N47 + N45: ε [StartArray] → N46 + N46: ε → N37, N48 + N47: [Up(1)] ε [Push] → N46 + N48: ε [EndArray] → ∅ + N49: ε [StartObj] → N50 + N50: [Next] (a) [Capture] → N51 + N51: ε [Field(x)] → N52 + N52: [Next] (b) [Capture] → N58 + N54: ε [StartArray] → N55 + N55: ε → N49, N59 + N58: ε [Field(y)] [EndObj] [Push] → N55 + N59: ε [EndArray] → ∅ + N61: [Next] (a) [Capture] → N65 + N63: ε [StartArray] → N64 + N64: ε → N61, N66 + N65: ε [Field(x)] [Push] → N64 + N66: ε [EndArray] → ∅ + N67: ε → N70, N80 + N68: ε → ∅ + N70: (full) [Variant(Full)] [StartObj] → N71 + N71: [Down] (a) [Capture] → N72 + N72: ε [Field(a)] → N73 + N73: [Next] (b) [Capture] → N74 + N74: ε [Field(b)] → N75 + N75: [Next] (c) [Capture] → N76 + N76: ε [Field(c)] → N78 + N78: [Up(1)] ε [EndObj] [EndVariant] → N68 + N80: (partial) [Variant(Partial)] → N81 + N81: [Down] (a) [Capture] → N84 + N84: [Up(1)] ε [EndVariant] → N68 + N85: (foo) → N89 + N86: [Down] ε → N87 + N87: [Next] (bar) [Capture] [Capture] → N91 + N89: ε [StartObj] → N86 + N91: ε [Field(bar)] [EndObj] [Field(baz)] → N92 + N92: [Up(1)] ε → ∅ + + ═══════════════════════════════════════════════════════════════════════════════ + TYPE INFERENCE + ═══════════════════════════════════════════════════════════════════════════════ + + === Entrypoints === + BaseWithCapture → T3 + TaggedInline → T4 + TaggedAtRoot → T5 + SyntheticNames → T7 + RefOpaque → Void + RefCaptured → T8 + QisTwo → T10 + NoQisOne → T12 + MissingField → T14 + FlatScope → T15 + CardMult → T18 + + === Types === + T3: Record BaseWithCapture { + name: Node + } + T4: Enum TaggedInline { + A: Node + B: Node + } + T5: Enum TaggedAtRoot { + A: Node + B: Node + } + T6: Record SyntheticNamesScope6 { + bar: Node + } + T7: Record SyntheticNames { + baz: T6 + } + T8: Record RefCaptured { + result: T3 + } + T9: Record QisTwoScope9 { + x: Node + y: Node + } + T10: ArrayStar → T9 + T11: ArrayStar → Node + T12: Record NoQisOne { + x: T11 + } + T13: Record MissingFieldScope13 { + a: Node + b: Node + c: Node + } + T14: Enum MissingField { + Full: T13 + Partial: Node + } + T15: Record FlatScope { + val: Node + } + T16: ArrayPlus → Node + T17: ArrayStar → T16 + T18: Record CardMult { + items: T17 + } + "); +} + +/// Test ADR-0005 effect stream patterns. +#[test] +fn golden_effect_patterns() { + let source = indoc! {r#" + // CaptureNode + Field + EffCapture = (node) @name + + // ToString + EffToString = (node) @name ::string + + // StartArray / Push / EndArray + EffArray = (container (item)* @items) + + // StartObject / Field / EndObject (via captured sequence) + EffObject = { (a) @x (b) @y } @obj + + // StartVariant / EndVariant (via tagged alternation) + EffVariant = [ A: (a) @x B: (b) @y ] @choice + + // Clear (via optional skip path) + EffClear = (container (item)? @maybe) + "#}; + + insta::assert_snapshot!(golden_master(source), @r" + ═══════════════════════════════════════════════════════════════════════════════ + TRANSITION GRAPH + ═══════════════════════════════════════════════════════════════════════════════ + + EffCapture = N0 + EffToString = N2 + EffArray = N4 + EffObject = N12 + EffVariant = N20 + EffClear = N33 + + N0: (node) [Capture] → N1 + N1: ε [Field(name)] → ∅ + N2: (node) [Capture] [ToString] → N3 + N3: ε [Field(name)] → ∅ + N4: (container) → N6 + N5: [Down] (item) [Capture] → N8 + N6: ε [StartArray] → N7 + N7: ε → N5, N10 + N8: ε [Push] → N7 + N10: ε [EndArray] [Field(items)] → N11 + N11: [Up(1)] ε → ∅ + N12: ε [StartObj] → N13 + N13: [Next] (a) [Capture] [Capture] → N14 + N14: ε [Field(x)] → N15 + N15: [Next] (b) [Capture] → N19 + N19: ε [Field(y)] [EndObj] [Field(obj)] → ∅ + N20: ε [StartObj] → N23, N27 + N23: (a) [Variant(A)] [Capture] [Capture] → N25 + N25: ε [EndVariant] → N32 + N27: (b) [Variant(B)] [Capture] [Capture] → N29 + N29: ε [EndVariant] → N32 + N32: ε [EndObj] [Field(choice)] → ∅ + N33: (container) → N35 + N34: [Down] (item) [Capture] → N38 + N35: ε → N34, N36 + N36: ε [Clear] → N38 + N38: ε [Field(maybe)] → N39 + N39: [Up(1)] ε → ∅ + + ═══════════════════════════════════════════════════════════════════════════════ + TYPE INFERENCE + ═══════════════════════════════════════════════════════════════════════════════ + + === Entrypoints === + EffVariant → T4 + EffToString → T5 + EffObject → T7 + EffClear → T9 + EffCapture → T10 + EffArray → T12 + + === Types === + T3: Enum EffVariantScope3 { + A: Node + B: Node + } + T4: Record EffVariant { + choice: T3 + } + T5: Record EffToString { + name: String + } + T6: Record EffObjectScope6 { + x: Node + y: Node + } + T7: Record EffObject { + obj: T6 + } + T8: Optional → Node + T9: Record EffClear { + maybe: T8 + } + T10: Record EffCapture { + name: Node + } + T11: ArrayStar → Node + T12: Record EffArray { + items: T11 + } + "); +} + +/// Test quantifier graph structure (ADR-0005). +#[test] +fn golden_quantifier_graphs() { + let source = indoc! {r#" + // Greedy star: Branch.next = [match, exit] + GreedyStar = (a)* @items + + // Greedy plus: must match at least once + GreedyPlus = (a)+ @items + + // Optional: branch to match or skip + Optional = (a)? @maybe + + // Non-greedy star: Branch.next = [exit, match] + LazyStar = (a)*? @items + + // Non-greedy plus + LazyPlus = (a)+? @items + + // Quantifier on sequence (QIS triggered) + QuantSeq = { (a) @x (b) @y }* + + // Nested quantifiers + NestedQuant = (outer (inner)* @inners)+ @outers + "#}; + + insta::assert_snapshot!(golden_master(source), @r" + ═══════════════════════════════════════════════════════════════════════════════ + TRANSITION GRAPH + ═══════════════════════════════════════════════════════════════════════════════ + + GreedyStar = N1 + GreedyPlus = N7 + Optional = N13 + LazyStar = N18 + LazyPlus = N24 + QuantSeq = N34 + NestedQuant = N48 + + N0: (a) [Capture] → N3 + N1: ε [StartArray] → N2 + N2: ε → N0, N5 + N3: ε [Push] → N2 + N5: ε [EndArray] [Field(items)] → ∅ + N6: (a) [Capture] → N9 + N7: ε [StartArray] → N6 + N9: ε [Push] → N6, N11 + N11: ε [EndArray] [Field(items)] → ∅ + N12: (a) [Capture] → N16 + N13: ε → N12, N14 + N14: ε [Clear] → N16 + N16: ε [Field(maybe)] → ∅ + N17: (a) [Capture] → N20 + N18: ε [StartArray] → N19 + N19: ε → N22, N17 + N20: ε [Push] → N19 + N22: ε [EndArray] [Field(items)] → ∅ + N23: (a) [Capture] → N26 + N24: ε [StartArray] → N23 + N26: ε [Push] → N28, N23 + N28: ε [EndArray] [Field(items)] → ∅ + N29: ε [StartObj] → N30 + N30: [Next] (a) [Capture] → N31 + N31: ε [Field(x)] → N32 + N32: [Next] (b) [Capture] → N38 + N34: ε [StartArray] → N35 + N35: ε → N29, N39 + N38: ε [Field(y)] [EndObj] [Push] → N35 + N39: ε [EndArray] → ∅ + N40: (outer) [Capture] → N42 + N41: [Down] (inner) [Capture] → N44 + N42: ε [StartArray] → N43 + N43: ε → N41, N46 + N44: ε [Push] → N43 + N46: ε [EndArray] [Field(inners)] → N50 + N48: ε [StartArray] → N40 + N50: [Up(1)] ε [Push] → N40, N52 + N52: ε [EndArray] [Field(outers)] → ∅ + + ═══════════════════════════════════════════════════════════════════════════════ + TYPE INFERENCE + ═══════════════════════════════════════════════════════════════════════════════ + + === Entrypoints === + QuantSeq → T4 + Optional → T6 + NestedQuant → T9 + LazyStar → Void + LazyPlus → T11 + GreedyStar → T13 + GreedyPlus → T15 + + === Types === + T3: Record QuantSeqScope3 { + x: Node + y: Node + } + T4: ArrayStar → T3 + T5: Optional → Node + T6: Record Optional { + maybe: T5 + } + T7: ArrayStar → Node + T8: ArrayPlus → T7 + T9: Record NestedQuant { + inners: T8 + } + T10: ArrayPlus → Node + T11: Record LazyPlus { + items: T10 + } + T12: ArrayStar → Node + T13: Record GreedyStar { + items: T12 + } + T14: ArrayPlus → Node + T15: Record GreedyPlus { + items: T14 + } + "); +} diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index a316dc1a..fd15c609 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -36,6 +36,8 @@ mod alt_kinds_tests; #[cfg(test)] mod graph_build_tests; #[cfg(test)] +mod graph_master_test; +#[cfg(test)] mod graph_qis_tests; #[cfg(all(test, feature = "plotnik-langs"))] mod link_tests; From 84051e0816c8991614d04d74872e920172b257c2 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 18:43:08 -0300 Subject: [PATCH 17/23] Refactor --- crates/plotnik-lib/src/query/graph.rs | 482 +++++++---------- crates/plotnik-lib/src/query/graph_build.rs | 111 ++-- .../src/query/graph_build_tests.rs | 24 +- .../src/query/graph_master_test.rs | 150 ++--- .../plotnik-lib/src/query/graph_optimize.rs | 11 +- crates/plotnik-lib/src/query/typing.rs | 511 +++++++++--------- crates/plotnik-lib/src/query/typing_tests.rs | 16 +- 7 files changed, 613 insertions(+), 692 deletions(-) diff --git a/crates/plotnik-lib/src/query/graph.rs b/crates/plotnik-lib/src/query/graph.rs index cea4f8ac..56f5b061 100644 --- a/crates/plotnik-lib/src/query/graph.rs +++ b/crates/plotnik-lib/src/query/graph.rs @@ -33,6 +33,17 @@ impl Fragment { } } +/// Array collection mode for loop combinators. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ArrayMode { + /// No array collection (simple repetition) + None, + /// Collect elements into array (StartArray/PushElement/EndArray) + Simple, + /// Collect with object scope per iteration (for QIS) + Qis, +} + /// Build-time graph for query compilation. /// /// Nodes are stored in a flat vector, referenced by `NodeId`. @@ -151,82 +162,217 @@ impl<'src> BuildGraph<'src> { Fragment::new(entry, exit) } - /// Zero or more (greedy): inner* - pub fn zero_or_more(&mut self, inner: Fragment) -> Fragment { + // ───────────────────────────────────────────────────────────────────── + // Generic Loop/Optional Builders + // ───────────────────────────────────────────────────────────────────── + + /// Generic loop combinator for * and + quantifiers. + /// + /// - `at_least_one`: true for + (one or more), false for * (zero or more) + /// - `greedy`: true for greedy (try match first), false for lazy (try exit first) + /// - `mode`: array collection mode + fn build_repetition( + &mut self, + inner: Fragment, + at_least_one: bool, + greedy: bool, + mode: ArrayMode, + ) -> Fragment { + let has_array = mode != ArrayMode::None; + let has_qis = mode == ArrayMode::Qis; + + // Array wrapper nodes + let start = if has_array { + let s = self.add_epsilon(); + self.node_mut(s).add_effect(BuildEffect::StartArray { + is_plus: at_least_one, + }); + Some(s) + } else { + None + }; + + let end = if has_array { + let e = self.add_epsilon(); + self.node_mut(e).add_effect(BuildEffect::EndArray); + Some(e) + } else { + None + }; + + // QIS object wrapper nodes + let (obj_start, obj_end) = if has_qis { + let os = self.add_epsilon(); + self.node_mut(os).add_effect(BuildEffect::StartObject); + let oe = self.add_epsilon(); + self.node_mut(oe).add_effect(BuildEffect::EndObject); + (Some(os), Some(oe)) + } else { + (None, None) + }; + + // Push node for array modes + let push = if has_array { + let p = self.add_epsilon(); + self.node_mut(p).add_effect(BuildEffect::PushElement); + Some(p) + } else { + None + }; + + // Branch node (decision point for loop continuation) let branch = self.add_epsilon(); - let exit = self.add_epsilon(); - self.connect(branch, inner.entry); - self.connect(branch, exit); - self.connect(inner.exit, branch); + // Exit node for non-array modes + let exit = if !has_array { + Some(self.add_epsilon()) + } else { + None + }; + + // Determine the effective inner entry/exit (with QIS wrapping if needed) + let (loop_body_entry, loop_body_exit) = if has_qis { + self.connect(obj_start.unwrap(), inner.entry); + self.connect(inner.exit, obj_end.unwrap()); + (obj_start.unwrap(), obj_end.unwrap()) + } else { + (inner.entry, inner.exit) + }; + + // Wire up the graph based on at_least_one and greedy + if at_least_one { + // + pattern: must match at least once + // Entry → body → push/branch → (loop back or exit) + let entry_point = start.unwrap_or(loop_body_entry); + let exit_point = end.or(exit).unwrap(); + + if let Some(s) = start { + self.connect(s, loop_body_entry); + } - Fragment::new(branch, exit) + if let Some(p) = push { + self.connect(loop_body_exit, p); + self.connect(p, branch); + } else { + self.connect(loop_body_exit, branch); + } + + if greedy { + self.connect(branch, loop_body_entry); + self.connect(branch, exit_point); + } else { + self.connect(branch, exit_point); + self.connect(branch, loop_body_entry); + } + + Fragment::new(entry_point, exit_point) + } else { + // * pattern: zero or more + // Entry → branch → (body → push → branch) or exit + let entry_point = start.unwrap_or(branch); + let exit_point = end.or(exit).unwrap(); + + if let Some(s) = start { + self.connect(s, branch); + } + + if greedy { + self.connect(branch, loop_body_entry); + self.connect(branch, exit_point); + } else { + self.connect(branch, exit_point); + self.connect(branch, loop_body_entry); + } + + if let Some(p) = push { + self.connect(loop_body_exit, p); + self.connect(p, branch); + } else { + self.connect(loop_body_exit, branch); + } + + Fragment::new(entry_point, exit_point) + } } - /// Zero or more (non-greedy): inner*? - pub fn zero_or_more_lazy(&mut self, inner: Fragment) -> Fragment { + /// Generic optional combinator for ? quantifier. + /// + /// - `greedy`: true for greedy (try match first), false for lazy (try skip first) + /// - `qis`: true to wrap the optional value in an object scope + fn build_optional(&mut self, inner: Fragment, greedy: bool, qis: bool) -> Fragment { let branch = self.add_epsilon(); let exit = self.add_epsilon(); - self.connect(branch, exit); - self.connect(branch, inner.entry); - self.connect(inner.exit, branch); + if qis { + let obj_start = self.add_epsilon(); + self.node_mut(obj_start) + .add_effect(BuildEffect::StartObject); + + let obj_end = self.add_epsilon(); + self.node_mut(obj_end).add_effect(BuildEffect::EndObject); + + self.connect(obj_start, inner.entry); + self.connect(inner.exit, obj_end); + self.connect(obj_end, exit); + + if greedy { + self.connect(branch, obj_start); + self.connect(branch, exit); + } else { + self.connect(branch, exit); + self.connect(branch, obj_start); + } + } else { + let skip = self.add_epsilon(); + self.node_mut(skip).add_effect(BuildEffect::ClearCurrent); + + self.connect(skip, exit); + self.connect(inner.exit, exit); + + if greedy { + self.connect(branch, inner.entry); + self.connect(branch, skip); + } else { + self.connect(branch, skip); + self.connect(branch, inner.entry); + } + } Fragment::new(branch, exit) } - /// One or more (greedy): inner+ - pub fn one_or_more(&mut self, inner: Fragment) -> Fragment { - let branch = self.add_epsilon(); - let exit = self.add_epsilon(); + // ───────────────────────────────────────────────────────────────────── + // Simple Loop Combinators (no array collection) + // ───────────────────────────────────────────────────────────────────── - self.connect(inner.exit, branch); - self.connect(branch, inner.entry); - self.connect(branch, exit); + /// Zero or more (greedy): inner* + pub fn zero_or_more(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, false, true, ArrayMode::None) + } - Fragment::new(inner.entry, exit) + /// Zero or more (non-greedy): inner*? + pub fn zero_or_more_lazy(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, false, false, ArrayMode::None) + } + + /// One or more (greedy): inner+ + pub fn one_or_more(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, true, true, ArrayMode::None) } /// One or more (non-greedy): inner+? pub fn one_or_more_lazy(&mut self, inner: Fragment) -> Fragment { - let branch = self.add_epsilon(); - let exit = self.add_epsilon(); - - self.connect(inner.exit, branch); - self.connect(branch, exit); - self.connect(branch, inner.entry); - - Fragment::new(inner.entry, exit) + self.build_repetition(inner, true, false, ArrayMode::None) } /// Optional (greedy): inner? pub fn optional(&mut self, inner: Fragment) -> Fragment { - let branch = self.add_epsilon(); - let skip = self.add_epsilon(); - self.node_mut(skip).add_effect(BuildEffect::ClearCurrent); - let exit = self.add_epsilon(); - - self.connect(branch, inner.entry); - self.connect(branch, skip); - self.connect(skip, exit); - self.connect(inner.exit, exit); - - Fragment::new(branch, exit) + self.build_optional(inner, true, false) } /// Optional (non-greedy): inner?? pub fn optional_lazy(&mut self, inner: Fragment) -> Fragment { - let branch = self.add_epsilon(); - let skip = self.add_epsilon(); - self.node_mut(skip).add_effect(BuildEffect::ClearCurrent); - let exit = self.add_epsilon(); - - self.connect(branch, skip); - self.connect(skip, exit); - self.connect(branch, inner.entry); - self.connect(inner.exit, exit); - - Fragment::new(branch, exit) + self.build_optional(inner, false, false) } // ───────────────────────────────────────────────────────────────────── @@ -235,92 +381,22 @@ impl<'src> BuildGraph<'src> { /// Zero or more with array collection (greedy): inner* pub fn zero_or_more_array(&mut self, inner: Fragment) -> Fragment { - let start = self.add_epsilon(); - self.node_mut(start) - .add_effect(BuildEffect::StartArray { is_plus: false }); - - let branch = self.add_epsilon(); - let push = self.add_epsilon(); - self.node_mut(push).add_effect(BuildEffect::PushElement); - - let end = self.add_epsilon(); - self.node_mut(end).add_effect(BuildEffect::EndArray); - - self.connect(start, branch); - self.connect(branch, inner.entry); - self.connect(branch, end); - self.connect(inner.exit, push); - self.connect(push, branch); - - Fragment::new(start, end) + self.build_repetition(inner, false, true, ArrayMode::Simple) } /// Zero or more with array collection (non-greedy): inner*? pub fn zero_or_more_array_lazy(&mut self, inner: Fragment) -> Fragment { - let start = self.add_epsilon(); - self.node_mut(start) - .add_effect(BuildEffect::StartArray { is_plus: false }); - - let branch = self.add_epsilon(); - let push = self.add_epsilon(); - self.node_mut(push).add_effect(BuildEffect::PushElement); - - let end = self.add_epsilon(); - self.node_mut(end).add_effect(BuildEffect::EndArray); - - self.connect(start, branch); - self.connect(branch, end); - self.connect(branch, inner.entry); - self.connect(inner.exit, push); - self.connect(push, branch); - - Fragment::new(start, end) + self.build_repetition(inner, false, false, ArrayMode::Simple) } /// One or more with array collection (greedy): inner+ pub fn one_or_more_array(&mut self, inner: Fragment) -> Fragment { - let start = self.add_epsilon(); - self.node_mut(start) - .add_effect(BuildEffect::StartArray { is_plus: true }); - - let push = self.add_epsilon(); - self.node_mut(push).add_effect(BuildEffect::PushElement); - - let branch = self.add_epsilon(); - - let end = self.add_epsilon(); - self.node_mut(end).add_effect(BuildEffect::EndArray); - - self.connect(start, inner.entry); - self.connect(inner.exit, push); - self.connect(push, branch); - self.connect(branch, inner.entry); - self.connect(branch, end); - - Fragment::new(start, end) + self.build_repetition(inner, true, true, ArrayMode::Simple) } /// One or more with array collection (non-greedy): inner+? pub fn one_or_more_array_lazy(&mut self, inner: Fragment) -> Fragment { - let start = self.add_epsilon(); - self.node_mut(start) - .add_effect(BuildEffect::StartArray { is_plus: true }); - - let push = self.add_epsilon(); - self.node_mut(push).add_effect(BuildEffect::PushElement); - - let branch = self.add_epsilon(); - - let end = self.add_epsilon(); - self.node_mut(end).add_effect(BuildEffect::EndArray); - - self.connect(start, inner.entry); - self.connect(inner.exit, push); - self.connect(push, branch); - self.connect(branch, end); - self.connect(branch, inner.entry); - - Fragment::new(start, end) + self.build_repetition(inner, true, false, ArrayMode::Simple) } // ───────────────────────────────────────────────────────────────────── @@ -332,176 +408,34 @@ impl<'src> BuildGraph<'src> { /// Each iteration is wrapped in StartObject/EndObject to keep /// multiple captures coupled per-iteration. pub fn zero_or_more_array_qis(&mut self, inner: Fragment) -> Fragment { - let start = self.add_epsilon(); - self.node_mut(start) - .add_effect(BuildEffect::StartArray { is_plus: false }); - - let branch = self.add_epsilon(); - - let obj_start = self.add_epsilon(); - self.node_mut(obj_start) - .add_effect(BuildEffect::StartObject); - - let obj_end = self.add_epsilon(); - self.node_mut(obj_end).add_effect(BuildEffect::EndObject); - - let push = self.add_epsilon(); - self.node_mut(push).add_effect(BuildEffect::PushElement); - - let end = self.add_epsilon(); - self.node_mut(end).add_effect(BuildEffect::EndArray); - - self.connect(start, branch); - self.connect(branch, obj_start); - self.connect(branch, end); - self.connect(obj_start, inner.entry); - self.connect(inner.exit, obj_end); - self.connect(obj_end, push); - self.connect(push, branch); - - Fragment::new(start, end) + self.build_repetition(inner, false, true, ArrayMode::Qis) } /// Zero or more with QIS object wrapping (non-greedy): inner*? pub fn zero_or_more_array_qis_lazy(&mut self, inner: Fragment) -> Fragment { - let start = self.add_epsilon(); - self.node_mut(start) - .add_effect(BuildEffect::StartArray { is_plus: false }); - - let branch = self.add_epsilon(); - - let obj_start = self.add_epsilon(); - self.node_mut(obj_start) - .add_effect(BuildEffect::StartObject); - - let obj_end = self.add_epsilon(); - self.node_mut(obj_end).add_effect(BuildEffect::EndObject); - - let push = self.add_epsilon(); - self.node_mut(push).add_effect(BuildEffect::PushElement); - - let end = self.add_epsilon(); - self.node_mut(end).add_effect(BuildEffect::EndArray); - - self.connect(start, branch); - self.connect(branch, end); - self.connect(branch, obj_start); - self.connect(obj_start, inner.entry); - self.connect(inner.exit, obj_end); - self.connect(obj_end, push); - self.connect(push, branch); - - Fragment::new(start, end) + self.build_repetition(inner, false, false, ArrayMode::Qis) } /// One or more with QIS object wrapping (greedy): inner+ pub fn one_or_more_array_qis(&mut self, inner: Fragment) -> Fragment { - let start = self.add_epsilon(); - self.node_mut(start) - .add_effect(BuildEffect::StartArray { is_plus: true }); - - let obj_start = self.add_epsilon(); - self.node_mut(obj_start) - .add_effect(BuildEffect::StartObject); - - let obj_end = self.add_epsilon(); - self.node_mut(obj_end).add_effect(BuildEffect::EndObject); - - let push = self.add_epsilon(); - self.node_mut(push).add_effect(BuildEffect::PushElement); - - let branch = self.add_epsilon(); - - let end = self.add_epsilon(); - self.node_mut(end).add_effect(BuildEffect::EndArray); - - self.connect(start, obj_start); - self.connect(obj_start, inner.entry); - self.connect(inner.exit, obj_end); - self.connect(obj_end, push); - self.connect(push, branch); - self.connect(branch, obj_start); - self.connect(branch, end); - - Fragment::new(start, end) + self.build_repetition(inner, true, true, ArrayMode::Qis) } /// One or more with QIS object wrapping (non-greedy): inner+? pub fn one_or_more_array_qis_lazy(&mut self, inner: Fragment) -> Fragment { - let start = self.add_epsilon(); - self.node_mut(start) - .add_effect(BuildEffect::StartArray { is_plus: true }); - - let obj_start = self.add_epsilon(); - self.node_mut(obj_start) - .add_effect(BuildEffect::StartObject); - - let obj_end = self.add_epsilon(); - self.node_mut(obj_end).add_effect(BuildEffect::EndObject); - - let push = self.add_epsilon(); - self.node_mut(push).add_effect(BuildEffect::PushElement); - - let branch = self.add_epsilon(); - - let end = self.add_epsilon(); - self.node_mut(end).add_effect(BuildEffect::EndArray); - - self.connect(start, obj_start); - self.connect(obj_start, inner.entry); - self.connect(inner.exit, obj_end); - self.connect(obj_end, push); - self.connect(push, branch); - self.connect(branch, end); - self.connect(branch, obj_start); - - Fragment::new(start, end) + self.build_repetition(inner, true, false, ArrayMode::Qis) } /// Optional with QIS object wrapping: inner? /// /// Wraps the optional value in an object scope. pub fn optional_qis(&mut self, inner: Fragment) -> Fragment { - let branch = self.add_epsilon(); - - let obj_start = self.add_epsilon(); - self.node_mut(obj_start) - .add_effect(BuildEffect::StartObject); - - let obj_end = self.add_epsilon(); - self.node_mut(obj_end).add_effect(BuildEffect::EndObject); - - let exit = self.add_epsilon(); - - self.connect(branch, obj_start); - self.connect(branch, exit); - self.connect(obj_start, inner.entry); - self.connect(inner.exit, obj_end); - self.connect(obj_end, exit); - - Fragment::new(branch, exit) + self.build_optional(inner, true, true) } /// Optional with QIS object wrapping (non-greedy): inner?? pub fn optional_qis_lazy(&mut self, inner: Fragment) -> Fragment { - let branch = self.add_epsilon(); - - let obj_start = self.add_epsilon(); - self.node_mut(obj_start) - .add_effect(BuildEffect::StartObject); - - let obj_end = self.add_epsilon(); - self.node_mut(obj_end).add_effect(BuildEffect::EndObject); - - let exit = self.add_epsilon(); - - self.connect(branch, exit); - self.connect(branch, obj_start); - self.connect(obj_start, inner.entry); - self.connect(inner.exit, obj_end); - self.connect(obj_end, exit); - - Fragment::new(branch, exit) + self.build_optional(inner, false, true) } } @@ -667,6 +601,10 @@ impl RefMarker { matches!(self, RefMarker::None) } + pub fn is_some(&self) -> bool { + !matches!(self, RefMarker::None) + } + pub fn is_enter(&self) -> bool { matches!(self, RefMarker::Enter { .. }) } diff --git a/crates/plotnik-lib/src/query/graph_build.rs b/crates/plotnik-lib/src/query/graph_build.rs index 2d7048b6..e2664609 100644 --- a/crates/plotnik-lib/src/query/graph_build.rs +++ b/crates/plotnik-lib/src/query/graph_build.rs @@ -505,25 +505,23 @@ impl<'a> Query<'a> { return self.construct_expr(&inner_expr, ctx); }; - let inner_frag = self.construct_expr(&inner_expr, ctx); - let is_qis = self.qis_triggers.contains_key(quant); - - match op.kind() { - SyntaxKind::Star if is_qis => self.graph.zero_or_more_array_qis(inner_frag), - SyntaxKind::Star => self.graph.zero_or_more_array(inner_frag), - SyntaxKind::StarQuestion if is_qis => { - self.graph.zero_or_more_array_qis_lazy(inner_frag) - } - SyntaxKind::StarQuestion => self.graph.zero_or_more_array_lazy(inner_frag), - SyntaxKind::Plus if is_qis => self.graph.one_or_more_array_qis(inner_frag), - SyntaxKind::Plus => self.graph.one_or_more_array(inner_frag), - SyntaxKind::PlusQuestion if is_qis => self.graph.one_or_more_array_qis_lazy(inner_frag), - SyntaxKind::PlusQuestion => self.graph.one_or_more_array_lazy(inner_frag), - SyntaxKind::Question if is_qis => self.graph.optional_qis(inner_frag), - SyntaxKind::Question => self.graph.optional(inner_frag), - SyntaxKind::QuestionQuestion if is_qis => self.graph.optional_qis_lazy(inner_frag), - SyntaxKind::QuestionQuestion => self.graph.optional_lazy(inner_frag), - _ => inner_frag, + let f = self.construct_expr(&inner_expr, ctx); + let qis = self.qis_triggers.contains_key(quant); + + match (op.kind(), qis) { + (SyntaxKind::Star, false) => self.graph.zero_or_more_array(f), + (SyntaxKind::Star, true) => self.graph.zero_or_more_array_qis(f), + (SyntaxKind::StarQuestion, false) => self.graph.zero_or_more_array_lazy(f), + (SyntaxKind::StarQuestion, true) => self.graph.zero_or_more_array_qis_lazy(f), + (SyntaxKind::Plus, false) => self.graph.one_or_more_array(f), + (SyntaxKind::Plus, true) => self.graph.one_or_more_array_qis(f), + (SyntaxKind::PlusQuestion, false) => self.graph.one_or_more_array_lazy(f), + (SyntaxKind::PlusQuestion, true) => self.graph.one_or_more_array_qis_lazy(f), + (SyntaxKind::Question, false) => self.graph.optional(f), + (SyntaxKind::Question, true) => self.graph.optional_qis(f), + (SyntaxKind::QuestionQuestion, false) => self.graph.optional_lazy(f), + (SyntaxKind::QuestionQuestion, true) => self.graph.optional_qis_lazy(f), + _ => f, } } @@ -585,41 +583,16 @@ impl<'a> Query<'a> { } /// Count Field effects reachable from a node (for variant flattening). fn count_field_effects(&self, start: NodeId) -> usize { - let mut count = 0; - let mut visited = HashSet::new(); - self.count_field_effects_recursive(start, &mut count, &mut visited); - count - } - - fn count_field_effects_recursive( - &self, - node_id: NodeId, - count: &mut usize, - visited: &mut HashSet, - ) { - if !visited.insert(node_id) { - return; - } - - let node = self.graph.node(node_id); - for effect in &node.effects { - if matches!(effect, BuildEffect::Field { .. }) { - *count += 1; - } - } - - for &succ in &node.successors { - self.count_field_effects_recursive(succ, count, visited); - } + self.nodes_with_field_effects(start) + .iter() + .flat_map(|&id| &self.graph.node(id).effects) + .filter(|e| matches!(e, BuildEffect::Field { .. })) + .count() } /// Remove all Field effects reachable from a node (for single-capture variant flattening). fn remove_field_effects(&mut self, start: NodeId) { - let mut visited = HashSet::new(); - let mut to_clean = Vec::new(); - self.collect_nodes_with_field_effects(start, &mut to_clean, &mut visited); - - for node_id in to_clean { + for node_id in self.nodes_with_field_effects(start) { self.graph .node_mut(node_id) .effects @@ -627,28 +600,26 @@ impl<'a> Query<'a> { } } - fn collect_nodes_with_field_effects( - &self, - node_id: NodeId, - result: &mut Vec, - visited: &mut HashSet, - ) { - if !visited.insert(node_id) { - return; - } - - let node = self.graph.node(node_id); - if node - .effects - .iter() - .any(|e| matches!(e, BuildEffect::Field { .. })) - { - result.push(node_id); - } + fn nodes_with_field_effects(&self, start: NodeId) -> Vec { + let mut result = Vec::new(); + let mut visited = HashSet::new(); + let mut stack = vec![start]; - for &succ in &node.successors { - self.collect_nodes_with_field_effects(succ, result, visited); + while let Some(node_id) = stack.pop() { + if !visited.insert(node_id) { + continue; + } + let node = self.graph.node(node_id); + if node + .effects + .iter() + .any(|e| matches!(e, BuildEffect::Field { .. })) + { + result.push(node_id); + } + stack.extend(&node.successors); } + result } } diff --git a/crates/plotnik-lib/src/query/graph_build_tests.rs b/crates/plotnik-lib/src/query/graph_build_tests.rs index afdc88bf..39c461e1 100644 --- a/crates/plotnik-lib/src/query/graph_build_tests.rs +++ b/crates/plotnik-lib/src/query/graph_build_tests.rs @@ -104,10 +104,10 @@ fn quantifier_star() { Q = N1 N0: (identifier) → N3 - N1: ε [StartArray] → N2 - N2: ε → N0, N4 - N3: ε [Push] → N2 - N4: ε [EndArray] → ∅ + N1: ε [StartArray] → N4 + N2: ε [EndArray] → ∅ + N3: ε [Push] → N4 + N4: ε → N0, N2 "); } @@ -116,11 +116,11 @@ fn quantifier_plus() { insta::assert_snapshot!(snapshot("Q = (identifier)+"), @r" Q = N1 - N0: (identifier) → N3 + N0: (identifier) → N4 N1: ε [StartArray] → N0 - N2: ε [Push] → N3 - N3: ε [Push] → N0, N4 - N4: ε [EndArray] → ∅ + N2: ε [EndArray] → ∅ + N3: ε [Push] → N4 + N4: ε [Push] → N0, N2 "); } @@ -129,10 +129,10 @@ fn quantifier_optional() { insta::assert_snapshot!(snapshot("Q = (identifier)?"), @r" Q = N1 - N0: (identifier) → N3 - N1: ε → N0, N2 - N2: ε [Clear] → N3 - N3: ε → ∅ + N0: (identifier) → N2 + N1: ε → N0, N3 + N2: ε → ∅ + N3: ε [Clear] → N2 "); } diff --git a/crates/plotnik-lib/src/query/graph_master_test.rs b/crates/plotnik-lib/src/query/graph_master_test.rs index 4473b394..b47dae25 100644 --- a/crates/plotnik-lib/src/query/graph_master_test.rs +++ b/crates/plotnik-lib/src/query/graph_master_test.rs @@ -311,21 +311,21 @@ fn golden_master_comprehensive() { N31: [Up(3)] ε → ∅ N32: (container) → N34 N33: [Down] (item) [Capture] → N36 - N34: ε [StartArray] → N35 - N35: ε → N33, N38 - N36: ε [Push] → N35 + N34: ε [StartArray] → N37 + N36: ε [Push] → N37 + N37: ε → N33, N38 N38: ε [EndArray] [Field(items)] → N39 N39: [Up(1)] ε → ∅ N40: (container) → N42 - N41: [Down] (item) [Capture] → N44 + N41: [Down] (item) [Capture] → N45 N42: ε [StartArray] → N41 - N44: ε [Push] → N41, N46 + N45: ε [Push] → N41, N46 N46: ε [EndArray] [Field(items)] → N47 N47: [Up(1)] ε → ∅ N48: (container) → N50 N49: [Down] (item) [Capture] → N53 - N50: ε → N49, N51 - N51: ε [Clear] → N53 + N50: ε → N49, N52 + N52: ε [Clear] → N53 N53: ε [Field(maybe_item)] → N54 N54: [Up(1)] ε → ∅ N55: (function) [StartObj] → N56 @@ -333,23 +333,23 @@ fn golden_master_comprehensive() { N57: ε [Field(name)] → N58 N58: [Next] (block) @body [Capture] → N59 N59: ε [Field(body)] → N65 - N61: ε [StartArray] → N62 - N62: ε → N55, N66 - N65: [Up(1)] ε [EndObj] [Push] → N62 - N66: ε [EndArray] → ∅ + N61: ε [StartArray] → N66 + N62: ε [EndArray] → ∅ + N65: [Up(1)] ε [EndObj] [Push] → N66 + N66: ε → N55, N62 N67: ε [StartObj] → N68 N68: [Next] (key) [Capture] → N69 N69: ε [Field(key)] → N70 N70: [Next] (value) [Capture] → N76 - N72: ε [StartArray] → N73 - N73: ε → N67, N77 - N76: ε [Field(value)] [EndObj] [Push] → N73 - N77: ε [EndArray] → ∅ + N72: ε [StartArray] → N77 + N73: ε [EndArray] → ∅ + N76: ε [Field(value)] [EndObj] [Push] → N77 + N77: ε → N67, N73 N79: [Next] (item) [Capture] → N83 - N81: ε [StartArray] → N82 - N82: ε → N79, N84 - N83: ε [Field(item)] [Push] → N82 - N84: ε [EndArray] → ∅ + N81: ε [StartArray] → N84 + N82: ε [EndArray] → ∅ + N83: ε [Field(item)] [Push] → N84 + N84: ε → N79, N82 N85: ε → N88, N92 N86: ε → ∅ N88: (success) [Variant(Ok)] [Capture] → N90 @@ -429,25 +429,25 @@ fn golden_master_comprehensive() { N189: (single) [Capture] → N190 N190: ε [Field(item)] → N188 N191: (multi) → N193 - N192: [Down] (x) [Capture] → N195 + N192: [Down] (x) [Capture] → N196 N193: ε [StartArray] → N192 - N195: ε [Push] → N192, N197 + N196: ε [Push] → N192, N197 N197: ε [EndArray] [Field(item)] → N188 N199: (_) [Capture] → N201 N200: [Down] (item) [Capture] → N203 - N201: ε [StartArray] → N202 - N202: ε → N200, N205 - N203: ε [Push] → N202 - N205: ε [EndArray] [Field(inner)] → N209 + N201: ε [StartArray] → N204 + N203: ε [Push] → N204 + N204: ε → N200, N205 + N205: ε [EndArray] [Field(inner)] → N210 N207: ε [StartArray] → N199 - N209: [Up(1)] ε [Push] → N199, N211 + N210: [Up(1)] ε [Push] → N199, N211 N211: ε [EndArray] [Field(outer)] → ∅ N212: (module) → N213 N213: [Down] (identifier) @name [Capture] [ToString] → N216 N215: [Next.] (import) [Capture] → N218 - N216: ε [Field(mod_name)] [StartArray] → N217 - N217: ε → N215, N220 - N218: ε [Push] → N217 + N216: ε [Field(mod_name)] [StartArray] → N219 + N218: ε [Push] → N219 + N219: ε → N215, N220 N220: ε [EndArray] [Field(imports)] → N221 N221: [Next] (block) @body → N251 N222: [Down] ε → N223 @@ -458,9 +458,9 @@ fn golden_master_comprehensive() { N229: [Next] (parameters) @params → N233 N230: [Down] ε → N231 N231: [Next] (param) [Capture] [Capture] → N235 - N233: ε [StartArray] → N234 - N234: ε → N230, N237 - N235: ε [Field(p)] [Push] → N234 + N233: ε [StartArray] → N236 + N235: ε [Field(p)] [Push] → N236 + N236: ε → N230, N237 N237: ε [EndArray] [Field(params)] → N238 N238: [Up(1)] ε → N239 N239: [Next] (block) @body [Capture] → N240 @@ -472,10 +472,10 @@ fn golden_master_comprehensive() { N247: [Next] (class_body) @body [Capture] → N248 N248: ε [Field(cls_body)] → N250 N250: [Up(1)] ε [EndObj] [EndVariant] → N255 - N251: ε [StartObj] [StartArray] → N252 - N252: ε → N253, N259 + N251: ε [StartObj] [StartArray] → N256 N253: ε [StartObj] → N222 - N255: ε [EndObj] [Push] → N252 + N255: ε [EndObj] [Push] → N256 + N256: ε → N253, N259 N259: ε [EndArray] [EndObj] [Field(items)] → N260 N260: [Up(1)] ε → N261 N261: [Up.(1)] ε → ∅ @@ -899,27 +899,27 @@ fn golden_type_inference() { N35: ε [EndVariant] → N36 N36: [Up(1)] ε → ∅ N37: (_) → N39 - N38: [Down] (item) [Capture] → N41 + N38: [Down] (item) [Capture] → N42 N39: ε [StartArray] → N38 - N41: ε [Push] → N38, N43 + N42: ε [Push] → N38, N43 N43: ε [EndArray] [Field(items)] → N47 - N45: ε [StartArray] → N46 - N46: ε → N37, N48 - N47: [Up(1)] ε [Push] → N46 - N48: ε [EndArray] → ∅ + N45: ε [StartArray] → N48 + N46: ε [EndArray] → ∅ + N47: [Up(1)] ε [Push] → N48 + N48: ε → N37, N46 N49: ε [StartObj] → N50 N50: [Next] (a) [Capture] → N51 N51: ε [Field(x)] → N52 N52: [Next] (b) [Capture] → N58 - N54: ε [StartArray] → N55 - N55: ε → N49, N59 - N58: ε [Field(y)] [EndObj] [Push] → N55 - N59: ε [EndArray] → ∅ + N54: ε [StartArray] → N59 + N55: ε [EndArray] → ∅ + N58: ε [Field(y)] [EndObj] [Push] → N59 + N59: ε → N49, N55 N61: [Next] (a) [Capture] → N65 - N63: ε [StartArray] → N64 - N64: ε → N61, N66 - N65: ε [Field(x)] [Push] → N64 - N66: ε [EndArray] → ∅ + N63: ε [StartArray] → N66 + N64: ε [EndArray] → ∅ + N65: ε [Field(x)] [Push] → N66 + N66: ε → N61, N64 N67: ε → N70, N80 N68: ε → ∅ N70: (full) [Variant(Full)] [StartObj] → N71 @@ -1048,9 +1048,9 @@ fn golden_effect_patterns() { N3: ε [Field(name)] → ∅ N4: (container) → N6 N5: [Down] (item) [Capture] → N8 - N6: ε [StartArray] → N7 - N7: ε → N5, N10 - N8: ε [Push] → N7 + N6: ε [StartArray] → N9 + N8: ε [Push] → N9 + N9: ε → N5, N10 N10: ε [EndArray] [Field(items)] → N11 N11: [Up(1)] ε → ∅ N12: ε [StartObj] → N13 @@ -1066,8 +1066,8 @@ fn golden_effect_patterns() { N32: ε [EndObj] [Field(choice)] → ∅ N33: (container) → N35 N34: [Down] (item) [Capture] → N38 - N35: ε → N34, N36 - N36: ε [Clear] → N38 + N35: ε → N34, N37 + N37: ε [Clear] → N38 N38: ε [Field(maybe)] → N39 N39: [Up(1)] ε → ∅ @@ -1155,43 +1155,43 @@ fn golden_quantifier_graphs() { NestedQuant = N48 N0: (a) [Capture] → N3 - N1: ε [StartArray] → N2 - N2: ε → N0, N5 - N3: ε [Push] → N2 + N1: ε [StartArray] → N4 + N3: ε [Push] → N4 + N4: ε → N0, N5 N5: ε [EndArray] [Field(items)] → ∅ - N6: (a) [Capture] → N9 + N6: (a) [Capture] → N10 N7: ε [StartArray] → N6 - N9: ε [Push] → N6, N11 + N10: ε [Push] → N6, N11 N11: ε [EndArray] [Field(items)] → ∅ N12: (a) [Capture] → N16 - N13: ε → N12, N14 - N14: ε [Clear] → N16 + N13: ε → N12, N15 + N15: ε [Clear] → N16 N16: ε [Field(maybe)] → ∅ N17: (a) [Capture] → N20 - N18: ε [StartArray] → N19 - N19: ε → N22, N17 - N20: ε [Push] → N19 + N18: ε [StartArray] → N21 + N20: ε [Push] → N21 + N21: ε → N22, N17 N22: ε [EndArray] [Field(items)] → ∅ - N23: (a) [Capture] → N26 + N23: (a) [Capture] → N27 N24: ε [StartArray] → N23 - N26: ε [Push] → N28, N23 + N27: ε [Push] → N28, N23 N28: ε [EndArray] [Field(items)] → ∅ N29: ε [StartObj] → N30 N30: [Next] (a) [Capture] → N31 N31: ε [Field(x)] → N32 N32: [Next] (b) [Capture] → N38 - N34: ε [StartArray] → N35 - N35: ε → N29, N39 - N38: ε [Field(y)] [EndObj] [Push] → N35 - N39: ε [EndArray] → ∅ + N34: ε [StartArray] → N39 + N35: ε [EndArray] → ∅ + N38: ε [Field(y)] [EndObj] [Push] → N39 + N39: ε → N29, N35 N40: (outer) [Capture] → N42 N41: [Down] (inner) [Capture] → N44 - N42: ε [StartArray] → N43 - N43: ε → N41, N46 - N44: ε [Push] → N43 - N46: ε [EndArray] [Field(inners)] → N50 + N42: ε [StartArray] → N45 + N44: ε [Push] → N45 + N45: ε → N41, N46 + N46: ε [EndArray] [Field(inners)] → N51 N48: ε [StartArray] → N40 - N50: [Up(1)] ε [Push] → N40, N52 + N51: [Up(1)] ε [Push] → N40, N52 N52: ε [EndArray] [Field(outers)] → ∅ ═══════════════════════════════════════════════════════════════════════════════ diff --git a/crates/plotnik-lib/src/query/graph_optimize.rs b/crates/plotnik-lib/src/query/graph_optimize.rs index 482a306e..7faa0f0d 100644 --- a/crates/plotnik-lib/src/query/graph_optimize.rs +++ b/crates/plotnik-lib/src/query/graph_optimize.rs @@ -59,13 +59,6 @@ pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeS } let successor_id = node.successors[0]; - - let successor = graph.node(successor_id); - if !successor.ref_marker.is_none() && !node.effects.is_empty() { - stats.epsilons_kept += 1; - continue; - } - let effects_to_prepend = graph.node(id).effects.clone(); let nav_to_transfer = graph.node(id).nav; let preds = predecessors.get(&id).cloned().unwrap_or_default(); @@ -126,7 +119,7 @@ fn is_eliminable_epsilon( return false; } - if !node.ref_marker.is_none() { + if node.ref_marker.is_some() { return false; } @@ -143,7 +136,7 @@ fn is_eliminable_epsilon( } } - if !node.effects.is_empty() && !successor.ref_marker.is_none() { + if !node.effects.is_empty() && successor.ref_marker.is_some() { return false; } diff --git a/crates/plotnik-lib/src/query/typing.rs b/crates/plotnik-lib/src/query/typing.rs index 821a4341..778de98e 100644 --- a/crates/plotnik-lib/src/query/typing.rs +++ b/crates/plotnik-lib/src/query/typing.rs @@ -19,7 +19,7 @@ use crate::diagnostics::{DiagnosticKind, Diagnostics}; use crate::ir::{TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId, TypeKind}; use super::Query; -use super::graph::{BuildEffect, BuildGraph, NodeId, RefMarker}; +use super::graph::{BuildEffect, BuildGraph, BuildNode, NodeId, RefMarker}; /// Result of type inference. #[derive(Debug, Default)] @@ -453,217 +453,17 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } if !visited.insert(node_id) { - // Already visited - this is a reconvergence point return (state.pending, Some(node_id)); } let node = self.graph.node(node_id); - // Process effects for effect in &node.effects { - match effect { - BuildEffect::CaptureNode => { - // At Exit nodes, use the referenced definition's type if available - let capture_type = if let RefMarker::Exit { ref_id } = &node.ref_marker { - self.find_ref_type(*ref_id).unwrap_or(TYPE_NODE) - } else { - TYPE_NODE - }; - state.pending = Some(PendingType::primitive(capture_type)); - } - BuildEffect::ClearCurrent => { - state.pending = None; - } - BuildEffect::ToString => { - state.pending = Some(PendingType::primitive(TYPE_STR)); - } - BuildEffect::Field { name, span } => { - if let Some(pending) = state.pending.take() { - // SAFETY: name comes from source with 'src lifetime - let name: &'src str = unsafe { std::mem::transmute(*name) }; - let current_variant = state.current_variant.map(|v| { - let v: &'src str = unsafe { std::mem::transmute(v) }; - v - }); - - let effective_card = pending - .cardinality - .multiply(state.effective_array_cardinality()); - - let current_scope = scope_stack - .last_mut() - .map(|e| &mut e.scope) - .expect("scope stack should not be empty"); - - // When inside an object scope (object_depth > 0), fields go to the - // object, not to a variant scope. The object becomes the variant payload. - if let Some(tag) = current_variant.filter(|_| state.object_depth == 0) { - let variant_scope = current_scope.variants.entry(tag).or_default(); - variant_scope.add_field( - name, - pending.base_type, - effective_card, - *span, - pending.is_array, - ); - } else { - current_scope.add_field( - name, - pending.base_type, - effective_card, - *span, - pending.is_array, - ); - } - } - } - BuildEffect::StartArray { is_plus } => { - let cardinality = if *is_plus { - Cardinality::Plus - } else { - Cardinality::Star - }; - state.array_stack.push(ArrayFrame { - cardinality, - element_type: None, - start_node: Some(node_id), - push_called: false, - }); - } - BuildEffect::PushElement => { - if let Some(pending) = state.pending.take() { - if let Some(frame) = state.array_stack.last_mut() { - frame.element_type = Some(pending.base_type); - frame.push_called = true; - // Update shared map so other branches (exit path) see the element type - if let Some(start_id) = frame.start_node { - self.array_element_types.insert(start_id, pending.base_type); - } - } - } - } - BuildEffect::EndArray => { - // Note: EndArray processes even in dry_run mode because loops need - // element type tracking. Only EndObject is skipped in dry_run. - if let Some(frame) = state.array_stack.pop() { - // Check if PushElement was actually called (either in this branch or another) - let push_was_called = frame.push_called - || frame - .start_node - .map_or(false, |id| self.array_element_types.contains_key(&id)); - - if push_was_called { - // Get element type from shared map (set by loop body's PushElement) - let element_type = frame - .start_node - .and_then(|id| self.array_element_types.get(&id).copied()) - .or(frame.element_type) - .unwrap_or(TYPE_NODE); - - let array_type = - self.wrap_with_cardinality(element_type, frame.cardinality); - state.pending = Some(PendingType { - base_type: array_type, - cardinality: Cardinality::One, - is_array: true, - }); - } - } - } - BuildEffect::StartObject => { - state.object_depth += 1; - let entry = ScopeStackEntry::new_object(state.pending.take()); - scope_stack.push(entry); - } - BuildEffect::EndObject => { - state.object_depth = state.object_depth.saturating_sub(1); - // In dry_run mode, don't pop scope or create types - just collect info - if state.dry_run { - continue; - } - if let Some(finished_entry) = scope_stack.pop() { - if finished_entry.is_object { - let finished_scope = finished_entry.scope; - - if !finished_scope.is_empty() { - let type_name = self.generate_scope_name(); - let type_id = if finished_scope.has_variants - && !finished_scope.variants.is_empty() - { - self.create_enum_type(type_name, &finished_scope) - } else { - self.create_struct_type(type_name, &finished_scope) - }; - - state.pending = Some(PendingType { - base_type: type_id, - cardinality: Cardinality::One, - is_array: false, - }); - } else { - state.pending = finished_entry.outer_pending; - } - } else { - scope_stack.push(finished_entry); - } - } - } - BuildEffect::StartVariant(tag) => { - // SAFETY: tag comes from source with 'src lifetime - let tag: &'static str = unsafe { std::mem::transmute(*tag) }; - state.current_variant = Some(tag); - let current_scope = scope_stack - .last_mut() - .map(|e| &mut e.scope) - .expect("scope stack should not be empty"); - current_scope.has_variants = true; - } - BuildEffect::EndVariant => { - if let Some(tag) = state.current_variant.take() { - // SAFETY: tag comes from source with 'src lifetime - let tag: &'src str = unsafe { std::mem::transmute(tag) }; - let current_scope = scope_stack - .last_mut() - .map(|e| &mut e.scope) - .expect("scope stack should not be empty"); - - let variant_scope = current_scope.variants.entry(tag).or_default(); - - // Single-capture flattening (ADR-0007): if there's a pending capture - // but no fields were added (Field effect was removed), store the - // captured type directly as a synthetic field for flattening. - if variant_scope.fields.is_empty() { - if let Some(pending) = state.pending.take() { - variant_scope.add_field( - "$value", // synthetic name, will be flattened away - pending.base_type, - pending.cardinality, - rowan::TextRange::default(), - pending.is_array, - ); - } - } - } - } - } + self.process_effect(effect, node_id, &node.ref_marker, &mut state, scope_stack); } // Process successors - // References are opaque: when entering a reference, skip the definition body - // and only follow return transitions (successors that aren't the def entry) - let def_entry_to_skip: Option = match &node.ref_marker { - RefMarker::Enter { .. } => node.ref_name.and_then(|name| self.graph.definition(name)), - _ => None, - }; - - let live_successors: Vec<_> = node - .successors - .iter() - .filter(|s| !self.dead_nodes.contains(s)) - .filter(|s| def_entry_to_skip.map_or(true, |def| **s != def)) - .copied() - .collect(); - + let live_successors = self.get_live_successors(node); if live_successors.is_empty() { return (state.pending, None); } @@ -679,32 +479,261 @@ impl<'src, 'g> InferenceContext<'src, 'g> { ); } - // Branching: two-phase approach to handle reconvergence correctly. - // - // Phase 1: Explore each branch with its OWN visited set to: - // - Collect scope modifications from each branch - // - Find where branches reconverge (common nodes) - // - // Phase 2: Merge branch scopes, then continue from reconvergence point - // with the merged scope (processing shared suffix once). - let total_branches = live_successors.len(); + self.explore_branches(live_successors, state, visited, depth, errors, scope_stack) + } + + /// Process a single effect, updating state and scope_stack. + fn process_effect( + &mut self, + effect: &BuildEffect<'src>, + node_id: NodeId, + ref_marker: &RefMarker, + state: &mut TraversalState, + scope_stack: &mut Vec>, + ) { + match effect { + BuildEffect::CaptureNode => { + let capture_type = match ref_marker { + RefMarker::Exit { ref_id } => self.find_ref_type(*ref_id).unwrap_or(TYPE_NODE), + _ => TYPE_NODE, + }; + state.pending = Some(PendingType::primitive(capture_type)); + } + BuildEffect::ClearCurrent => { + state.pending = None; + } + BuildEffect::ToString => { + state.pending = Some(PendingType::primitive(TYPE_STR)); + } + BuildEffect::Field { name, span } => { + self.process_field_effect(name, *span, state, scope_stack); + } + BuildEffect::StartArray { is_plus } => { + let cardinality = if *is_plus { + Cardinality::Plus + } else { + Cardinality::Star + }; + state.array_stack.push(ArrayFrame { + cardinality, + element_type: None, + start_node: Some(node_id), + push_called: false, + }); + } + BuildEffect::PushElement => { + self.process_push_element(state); + } + BuildEffect::EndArray => { + self.process_end_array(state); + } + BuildEffect::StartObject => { + state.object_depth += 1; + scope_stack.push(ScopeStackEntry::new_object(state.pending.take())); + } + BuildEffect::EndObject => { + state.object_depth = state.object_depth.saturating_sub(1); + if !state.dry_run { + self.process_end_object(state, scope_stack); + } + } + BuildEffect::StartVariant(tag) => { + // SAFETY: tag comes from source with 'src lifetime + let tag: &'static str = unsafe { std::mem::transmute(*tag) }; + state.current_variant = Some(tag); + if let Some(entry) = scope_stack.last_mut() { + entry.scope.has_variants = true; + } + } + BuildEffect::EndVariant => { + self.process_end_variant(state, scope_stack); + } + } + } + + fn process_field_effect( + &self, + name: &str, + span: TextRange, + state: &mut TraversalState, + scope_stack: &mut Vec>, + ) { + let Some(pending) = state.pending.take() else { + return; + }; + + // SAFETY: name comes from source with 'src lifetime + let name: &'src str = unsafe { std::mem::transmute(name) }; + let current_variant: Option<&'src str> = state + .current_variant + .map(|v| unsafe { std::mem::transmute(v) }); + + let effective_card = pending + .cardinality + .multiply(state.effective_array_cardinality()); + let Some(entry) = scope_stack.last_mut() else { + return; + }; + + // Inside object scope, fields go to object, not variant scope + let target_scope = match current_variant.filter(|_| state.object_depth == 0) { + Some(tag) => entry.scope.variants.entry(tag).or_default(), + None => &mut entry.scope, + }; + target_scope.add_field( + name, + pending.base_type, + effective_card, + span, + pending.is_array, + ); + } + + fn process_push_element(&mut self, state: &mut TraversalState) { + let Some(pending) = state.pending.take() else { + return; + }; + let Some(frame) = state.array_stack.last_mut() else { + return; + }; + + frame.element_type = Some(pending.base_type); + frame.push_called = true; + if let Some(start_id) = frame.start_node { + self.array_element_types.insert(start_id, pending.base_type); + } + } + + fn process_end_array(&mut self, state: &mut TraversalState) { + let Some(frame) = state.array_stack.pop() else { + return; + }; + + let push_was_called = frame.push_called + || frame + .start_node + .map_or(false, |id| self.array_element_types.contains_key(&id)); + + if !push_was_called { + return; + } + + let element_type = frame + .start_node + .and_then(|id| self.array_element_types.get(&id).copied()) + .or(frame.element_type) + .unwrap_or(TYPE_NODE); + + let array_type = self.wrap_with_cardinality(element_type, frame.cardinality); + state.pending = Some(PendingType { + base_type: array_type, + cardinality: Cardinality::One, + is_array: true, + }); + } + + fn process_end_object( + &mut self, + state: &mut TraversalState, + scope_stack: &mut Vec>, + ) { + let Some(finished_entry) = scope_stack.pop() else { + return; + }; + if !finished_entry.is_object { + scope_stack.push(finished_entry); + return; + } + + let finished_scope = finished_entry.scope; + if finished_scope.is_empty() { + state.pending = finished_entry.outer_pending; + return; + } + + let type_name = self.generate_scope_name(); + let type_id = if finished_scope.has_variants && !finished_scope.variants.is_empty() { + self.create_enum_type(type_name, &finished_scope) + } else { + self.create_struct_type(type_name, &finished_scope) + }; + + state.pending = Some(PendingType { + base_type: type_id, + cardinality: Cardinality::One, + is_array: false, + }); + } + + fn process_end_variant( + &self, + state: &mut TraversalState, + scope_stack: &mut Vec>, + ) { + let Some(tag) = state.current_variant.take() else { + return; + }; + // SAFETY: tag comes from source with 'src lifetime + let tag: &'src str = unsafe { std::mem::transmute(tag) }; + + let Some(entry) = scope_stack.last_mut() else { + return; + }; + let variant_scope = entry.scope.variants.entry(tag).or_default(); + + // Single-capture flattening (ADR-0007) + if variant_scope.fields.is_empty() { + if let Some(pending) = state.pending.take() { + variant_scope.add_field( + "$value", + pending.base_type, + pending.cardinality, + rowan::TextRange::default(), + pending.is_array, + ); + } + } + } + + /// Get live successors, filtering dead nodes and ref entry points. + fn get_live_successors(&self, node: &BuildNode<'src>) -> Vec { + let def_entry_to_skip = match &node.ref_marker { + RefMarker::Enter { .. } => node.ref_name.and_then(|name| self.graph.definition(name)), + _ => None, + }; + + node.successors + .iter() + .copied() + .filter(|s| !self.dead_nodes.contains(s)) + .filter(|s| def_entry_to_skip.map_or(true, |def| *s != def)) + .collect() + } + + /// Explore multiple branches, merge scopes, handle reconvergence. + fn explore_branches( + &mut self, + successors: Vec, + state: TraversalState, + visited: &mut HashSet, + depth: usize, + errors: &mut Vec>, + scope_stack: &mut Vec>, + ) -> (Option, Option) { + let total_branches = successors.len(); let initial_scope_len = scope_stack.len(); + let use_dry_run = state.object_depth > 0; + let mut branch_scopes: Vec> = Vec::new(); let mut branch_visited_sets: Vec> = Vec::new(); let mut result_pending: Option = None; // Phase 1: explore branches independently - // Use dry_run only when inside object scope (alternation-like branching). - // For loop entry/exit (object_depth=0), process normally so EndArray works. - let use_dry_run = state.object_depth > 0; - - for succ in &live_successors { + for succ in &successors { let mut branch_stack = scope_stack.clone(); let mut branch_visited = visited.clone(); let mut branch_state = state.clone(); - if use_dry_run { - branch_state.dry_run = true; - } + branch_state.dry_run = use_dry_run; let (branch_pending, _) = self.traverse( *succ, @@ -715,12 +744,10 @@ impl<'src, 'g> InferenceContext<'src, 'g> { &mut branch_stack, ); - // Merge pending from branches (take first non-None) if result_pending.is_none() { result_pending = branch_pending; } - // Collect nodes newly visited by this branch let new_nodes: HashSet = branch_visited.difference(visited).copied().collect(); branch_visited_sets.push(new_nodes); @@ -732,33 +759,19 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } } - // Find reconvergence: nodes visited by ALL branches (shared suffix) - let reconverge_nodes: HashSet = if branch_visited_sets.len() >= 2 { - let mut iter = branch_visited_sets.iter(); - let first = iter.next().unwrap().clone(); - iter.fold(first, |acc, set| acc.intersection(set).copied().collect()) - } else { - HashSet::new() - }; - - // Merge branch scopes into main scope + // Merge branch scopes if let Some(main_entry) = scope_stack.last_mut() { for branch_scope in branch_scopes { - let merge_errs = main_entry.scope.merge_from(branch_scope); - errors.extend(merge_errs); + errors.extend(main_entry.scope.merge_from(branch_scope)); } main_entry.scope.apply_optionality(total_branches); } - // Phase 2: if dry_run was used and there's a reconvergence point, - // continue from there with merged scope - if use_dry_run && !reconverge_nodes.is_empty() { - // Find the "entry" reconvergence node: the one with minimum ID - // (nodes are created in traversal order, so first shared node has lowest ID) - let reconverge_entry = reconverge_nodes.iter().min().copied(); + // Find and process reconvergence + let reconverge_nodes = self.find_reconvergence(&branch_visited_sets); - if let Some(entry_node) = reconverge_entry { - // Mark branch-specific nodes as visited, but NOT reconverge nodes + if use_dry_run && !reconverge_nodes.is_empty() { + if let Some(entry_node) = reconverge_nodes.iter().min().copied() { for branch_set in &branch_visited_sets { for &nid in branch_set { if !reconverge_nodes.contains(&nid) { @@ -766,8 +779,7 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } } } - // Continue from reconvergence with merged scope (dry_run = false) - let mut cont_state = state.clone(); + let mut cont_state = state; cont_state.dry_run = false; cont_state.pending = result_pending; return self.traverse( @@ -781,14 +793,21 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } } - // No reconvergence or couldn't find entry point - mark all visited for branch_set in branch_visited_sets { visited.extend(branch_set); } - (result_pending, None) } + fn find_reconvergence(&self, branch_sets: &[HashSet]) -> HashSet { + if branch_sets.len() < 2 { + return HashSet::new(); + } + let mut iter = branch_sets.iter(); + let first = iter.next().unwrap().clone(); + iter.fold(first, |acc, set| acc.intersection(set).copied().collect()) + } + fn generate_scope_name(&self) -> &'src str { let name = format!("{}Scope{}", self.current_def_name, self.next_type_id); Box::leak(name.into_boxed_str()) diff --git a/crates/plotnik-lib/src/query/typing_tests.rs b/crates/plotnik-lib/src/query/typing_tests.rs index 63134620..4a2857b5 100644 --- a/crates/plotnik-lib/src/query/typing_tests.rs +++ b/crates/plotnik-lib/src/query/typing_tests.rs @@ -44,10 +44,10 @@ fn debug_star_quantifier_graph() { N1: [Down] (item) [Capture] → N2 N2: ε [Field(items)] → N3 N3: [Up(1)] ε → N6 - N4: ε [StartArray] → N5 - N5: ε → N0, N7 - N6: ε [Push] → N5 - N7: ε [EndArray] → ∅ + N4: ε [StartArray] → N7 + N5: ε [EndArray] → ∅ + N6: ε [Push] → N7 + N7: ε → N0, N5 === Graph (after optimization) === Foo = N4 @@ -55,10 +55,10 @@ fn debug_star_quantifier_graph() { N0: (_) → N1 N1: [Down] (item) [Capture] → N2 N2: ε [Field(items)] → N6 - N4: ε [StartArray] → N5 - N5: ε → N0, N7 - N6: [Up(1)] ε [Push] → N5 - N7: ε [EndArray] → ∅ + N4: ε [StartArray] → N7 + N5: ε [EndArray] → ∅ + N6: [Up(1)] ε [Push] → N7 + N7: ε → N0, N5 === Entrypoints === Foo → T4 From 1e1ab44eb6ca82ff5b1618fd63b78b3bfc98b9c1 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 18:50:01 -0300 Subject: [PATCH 18/23] Sentinel for default --- crates/plotnik-lib/src/lib.rs | 2 +- crates/plotnik-lib/src/query/mod.rs | 1 + crates/plotnik-lib/src/query/printer_tests.rs | 15 +++++++++++++- crates/plotnik-lib/src/query/symbol_table.rs | 20 ++++++++++++++----- .../src/query/symbol_table_tests.rs | 8 ++++++-- 5 files changed, 37 insertions(+), 9 deletions(-) diff --git a/crates/plotnik-lib/src/lib.rs b/crates/plotnik-lib/src/lib.rs index cdfa048f..adad70f6 100644 --- a/crates/plotnik-lib/src/lib.rs +++ b/crates/plotnik-lib/src/lib.rs @@ -28,7 +28,7 @@ pub mod query; pub type PassResult = std::result::Result<(T, Diagnostics), Error>; pub use diagnostics::{Diagnostics, DiagnosticsPrinter, Severity}; -pub use query::Query; +pub use query::{Query, UNNAMED_DEF}; /// Errors that can occur during query parsing. #[derive(Debug, Clone, thiserror::Error)] diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index fd15c609..321bf01e 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -27,6 +27,7 @@ pub mod typing; pub use graph::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; pub use graph_optimize::OptimizeStats; +pub use symbol_table::UNNAMED_DEF; pub use typing::{ InferredMember, InferredTypeDef, TypeDescription, TypeInferenceResult, UnificationError, }; diff --git a/crates/plotnik-lib/src/query/printer_tests.rs b/crates/plotnik-lib/src/query/printer_tests.rs index be5b8f51..b7813b96 100644 --- a/crates/plotnik-lib/src/query/printer_tests.rs +++ b/crates/plotnik-lib/src/query/printer_tests.rs @@ -160,6 +160,9 @@ fn printer_symbols_with_cardinalities() { insta::assert_snapshot!(q.printer().only_symbols(true).with_cardinalities(true).dump(), @r" A¹ B⁺ + _ + A¹ + B⁺ "); } @@ -175,6 +178,9 @@ fn printer_symbols_with_refs() { A B A + _ + B + A "); } @@ -193,6 +199,10 @@ fn printer_symbols_cycle() { B A B (cycle) + _ + A + B + A (cycle) "); } @@ -200,7 +210,10 @@ fn printer_symbols_cycle() { fn printer_symbols_undefined_ref() { let input = "(call (Undefined))"; let q = Query::try_from(input).unwrap(); - insta::assert_snapshot!(q.printer().only_symbols(true).dump(), @""); + insta::assert_snapshot!(q.printer().only_symbols(true).dump(), @r" + _ + Undefined? + "); } #[test] diff --git a/crates/plotnik-lib/src/query/symbol_table.rs b/crates/plotnik-lib/src/query/symbol_table.rs index 4066d62d..6ab31455 100644 --- a/crates/plotnik-lib/src/query/symbol_table.rs +++ b/crates/plotnik-lib/src/query/symbol_table.rs @@ -6,6 +6,10 @@ use indexmap::IndexMap; +/// Sentinel name for unnamed definitions (bare expressions at root level). +/// Code generators can emit whatever name they want for this. +pub const UNNAMED_DEF: &str = "_"; + use crate::diagnostics::DiagnosticKind; use crate::parser::{Expr, Ref, ast, token_src}; @@ -17,13 +21,14 @@ impl<'a> Query<'a> { pub(super) fn resolve_names(&mut self) { // Pass 1: collect definitions for def in self.ast.defs() { - let Some(name_token) = def.name() else { - continue; + let (name, is_named) = match def.name() { + Some(token) => (token_src(&token, self.source), true), + None => (UNNAMED_DEF, false), }; - let name = token_src(&name_token, self.source); - - if self.symbol_table.contains_key(name) { + // Skip duplicate check for unnamed definitions (already diagnosed by parser) + if is_named && self.symbol_table.contains_key(name) { + let name_token = def.name().unwrap(); self.resolve_diagnostics .report(DiagnosticKind::DuplicateDefinition, name_token.text_range()) .message(name) @@ -31,6 +36,11 @@ impl<'a> Query<'a> { continue; } + // For unnamed defs, only keep the last one (parser already warned about others) + if !is_named && self.symbol_table.contains_key(name) { + self.symbol_table.shift_remove(name); + } + let Some(body) = def.body() else { continue; }; diff --git a/crates/plotnik-lib/src/query/symbol_table_tests.rs b/crates/plotnik-lib/src/query/symbol_table_tests.rs index b18fc915..979d8f9e 100644 --- a/crates/plotnik-lib/src/query/symbol_table_tests.rs +++ b/crates/plotnik-lib/src/query/symbol_table_tests.rs @@ -180,7 +180,11 @@ fn entry_point_reference() { let query = Query::try_from(input).unwrap(); assert!(query.is_valid()); - insta::assert_snapshot!(query.dump_symbols(), @"Expr"); + insta::assert_snapshot!(query.dump_symbols(), @r" + Expr + _ + Expr + "); } #[test] @@ -202,7 +206,7 @@ fn no_definitions() { let input = "(identifier)"; let query = Query::try_from(input).unwrap(); assert!(query.is_valid()); - insta::assert_snapshot!(query.dump_symbols(), @""); + insta::assert_snapshot!(query.dump_symbols(), @"_"); } #[test] From 7db4c3a0b905af3cd24cc90d0649dea44ec4f15d Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 19:11:18 -0300 Subject: [PATCH 19/23] Fix ADR mismatch --- crates/plotnik-lib/src/query/graph.rs | 15 +- crates/plotnik-lib/src/query/graph_build.rs | 32 ++- .../src/query/graph_build_tests.rs | 8 +- crates/plotnik-lib/src/query/graph_dump.rs | 2 +- .../src/query/graph_master_test.rs | 243 +++++++++--------- crates/plotnik-lib/src/query/typing.rs | 81 ++++-- crates/plotnik-lib/src/query/typing_tests.rs | 46 +++- 7 files changed, 260 insertions(+), 167 deletions(-) diff --git a/crates/plotnik-lib/src/query/graph.rs b/crates/plotnik-lib/src/query/graph.rs index 56f5b061..5950c3ee 100644 --- a/crates/plotnik-lib/src/query/graph.rs +++ b/crates/plotnik-lib/src/query/graph.rs @@ -203,7 +203,9 @@ impl<'src> BuildGraph<'src> { // QIS object wrapper nodes let (obj_start, obj_end) = if has_qis { let os = self.add_epsilon(); - self.node_mut(os).add_effect(BuildEffect::StartObject); + self.node_mut(os).add_effect(BuildEffect::StartObject { + for_alternation: false, + }); let oe = self.add_epsilon(); self.node_mut(oe).add_effect(BuildEffect::EndObject); (Some(os), Some(oe)) @@ -306,7 +308,9 @@ impl<'src> BuildGraph<'src> { if qis { let obj_start = self.add_epsilon(); self.node_mut(obj_start) - .add_effect(BuildEffect::StartObject); + .add_effect(BuildEffect::StartObject { + for_alternation: false, + }); let obj_end = self.add_epsilon(); self.node_mut(obj_end).add_effect(BuildEffect::EndObject); @@ -564,7 +568,12 @@ pub enum BuildEffect<'src> { }, PushElement, EndArray, - StartObject, + /// Start object scope. `for_alternation` is true when this object wraps a captured + /// tagged alternation (tags should create enum), false for QIS/sequence objects + /// (tags in inner alternations should be ignored). + StartObject { + for_alternation: bool, + }, EndObject, Field { name: &'src str, diff --git a/crates/plotnik-lib/src/query/graph_build.rs b/crates/plotnik-lib/src/query/graph_build.rs index e2664609..f524b446 100644 --- a/crates/plotnik-lib/src/query/graph_build.rs +++ b/crates/plotnik-lib/src/query/graph_build.rs @@ -354,17 +354,20 @@ impl<'a> Query<'a> { let body_frag = self.construct_expr(&body, NavContext::Root); - // Count Field effects to determine flattening (ADR-0007) + // Count Field effects to determine object wrapping. + // Note: Single-capture flattening (ADR-0007) is handled in type inference, + // not here, because we don't know if the alternation is captured yet. + // Uncaptured inline tagged alternations need Field effects preserved. let field_count = self.count_field_effects(body_frag.entry); - if field_count == 1 { - // Single capture: flatten by removing the Field effect - self.remove_field_effects(body_frag.entry); - } else if field_count > 1 { + if field_count > 1 { // Multiple captures: wrap with StartObject/EndObject + // This is NOT the alternation capture object - it's the variant's scope self.graph .node_mut(start_id) - .add_effect(BuildEffect::StartObject); + .add_effect(BuildEffect::StartObject { + for_alternation: false, + }); } let end_id = self.graph.add_epsilon(); @@ -467,12 +470,17 @@ impl<'a> Query<'a> { .map(|t| t.text_range()) .unwrap_or_default(); + // Check if we're capturing an alternation (for enum vs struct distinction) + let is_alternation_capture = matches!(&inner_expr, Expr::AltExpr(_)); + let (entry, exit) = if needs_object_wrapper { // Wrap with StartObject/EndObject for composite captures let start_id = self.graph.add_epsilon(); self.graph .node_mut(start_id) - .add_effect(BuildEffect::StartObject); + .add_effect(BuildEffect::StartObject { + for_alternation: is_alternation_capture, + }); self.graph.connect(start_id, inner_frag.entry); let end_id = self.graph.add_epsilon(); @@ -590,16 +598,6 @@ impl<'a> Query<'a> { .count() } - /// Remove all Field effects reachable from a node (for single-capture variant flattening). - fn remove_field_effects(&mut self, start: NodeId) { - for node_id in self.nodes_with_field_effects(start) { - self.graph - .node_mut(node_id) - .effects - .retain(|e| !matches!(e, BuildEffect::Field { .. })); - } - } - fn nodes_with_field_effects(&self, start: NodeId) -> Vec { let mut result = Vec::new(); let mut visited = HashSet::new(); diff --git a/crates/plotnik-lib/src/query/graph_build_tests.rs b/crates/plotnik-lib/src/query/graph_build_tests.rs index 39c461e1..82345f76 100644 --- a/crates/plotnik-lib/src/query/graph_build_tests.rs +++ b/crates/plotnik-lib/src/query/graph_build_tests.rs @@ -89,12 +89,12 @@ fn alternation_tagged() { N1: ε → ∅ N2: ε [Variant(A)] → N3 N3: (a) [Variant(A)] [Capture] → N5 - N4: ε → N5 - N5: ε [EndVariant] → N1 + N4: ε [Field(x)] → N5 + N5: ε [Field(x)] [EndVariant] → N1 N6: ε [Variant(B)] → N7 N7: (b) [Variant(B)] [Capture] → N9 - N8: ε → N9 - N9: ε [EndVariant] → N1 + N8: ε [Field(y)] → N9 + N9: ε [Field(y)] [EndVariant] → N1 "); } diff --git a/crates/plotnik-lib/src/query/graph_dump.rs b/crates/plotnik-lib/src/query/graph_dump.rs index cbf3e679..0fc3f19f 100644 --- a/crates/plotnik-lib/src/query/graph_dump.rs +++ b/crates/plotnik-lib/src/query/graph_dump.rs @@ -161,7 +161,7 @@ fn format_effect(effect: &BuildEffect) -> String { BuildEffect::StartArray { .. } => "StartArray".to_string(), BuildEffect::PushElement => "Push".to_string(), BuildEffect::EndArray => "EndArray".to_string(), - BuildEffect::StartObject => "StartObj".to_string(), + BuildEffect::StartObject { .. } => "StartObj".to_string(), BuildEffect::EndObject => "EndObj".to_string(), BuildEffect::Field { name, .. } => format!("Field({})", name), BuildEffect::StartVariant(v) => format!("Variant({})", v), diff --git a/crates/plotnik-lib/src/query/graph_master_test.rs b/crates/plotnik-lib/src/query/graph_master_test.rs index b47dae25..a3eb86d2 100644 --- a/crates/plotnik-lib/src/query/graph_master_test.rs +++ b/crates/plotnik-lib/src/query/graph_master_test.rs @@ -353,22 +353,22 @@ fn golden_master_comprehensive() { N85: ε → N88, N92 N86: ε → ∅ N88: (success) [Variant(Ok)] [Capture] → N90 - N90: ε [EndVariant] → N86 + N90: ε [Field(val)] [EndVariant] → N86 N92: (error) [Variant(Err)] [Capture] [ToString] → N94 - N94: ε [EndVariant] → N86 + N94: ε [Field(msg)] [EndVariant] → N86 N95: (wrapper) → N106 N96: [Down] ε → N99, N103 N99: (left_node) [Variant(Left)] [Capture] [Capture] → N101 - N101: ε [EndVariant] → N108 + N101: ε [Field(l)] [EndVariant] → N108 N103: (right_node) [Variant(Right)] [Capture] [Capture] → N105 - N105: ε [EndVariant] → N108 + N105: ε [Field(r)] [EndVariant] → N108 N106: ε [StartObj] → N96 N108: ε [EndObj] [Field(choice)] → N109 N109: [Up(1)] ε → ∅ N110: ε → N113, N117 N111: ε → ∅ N113: (node) [Variant(Simple)] [Capture] → N115 - N115: ε [EndVariant] → N111 + N115: ε [Field(val)] [EndVariant] → N111 N117: (pair) [Variant(Complex)] [StartObj] → N118 N118: [Down] (key) [Capture] → N119 N119: ε [Field(k)] → N120 @@ -487,7 +487,7 @@ fn golden_master_comprehensive() { N267: ε → N270, N274 N268: ε → ∅ N270: (value) [Variant(Some)] [Capture] → N272 - N272: ε [EndVariant] → N268 + N272: ε [Field(val)] [EndVariant] → N268 N274: (none_marker) [Variant(None)] → N275 N275: ε [EndVariant] → N268 @@ -512,23 +512,23 @@ fn golden_master_comprehensive() { SimpleCapture → T23 RefChain → Void RefCaptured → T24 - QisSequence → T26 - QisNode → T28 - PlusQuant → T30 - OptQuant → T32 - NoQis → T34 + QisSequence → T25 + QisNode → T26 + PlusQuant → T28 + OptQuant → T30 + NoQis → T32 NoCaptures → Void - NestedScopes → T38 - NestedQuant → T41 - MultiCapture → T42 - EmptyBranch → T43 - DeepNest → T44 - Complex → T46 - CardinalityJoin → T48 - CapturedSeq → T50 - AnchorSibling → T51 - AnchorLast → T52 - AnchorFirst → T53 + NestedScopes → T36 + NestedQuant → T39 + MultiCapture → T40 + EmptyBranch → T41 + DeepNest → T42 + Complex → T44 + CardinalityJoin → T46 + CapturedSeq → T48 + AnchorSibling → T49 + AnchorLast → T50 + AnchorFirst → T51 === Types === T3: Record Identifier { @@ -598,82 +598,78 @@ fn golden_master_comprehensive() { key: Node value: Node } - T26: ArrayStar → T25 - T27: Record QisNodeScope27 { + T26: Record QisNodeScope26 { name: Node body: Node } - T28: ArrayStar → T27 - T29: ArrayPlus → Node - T30: Record PlusQuant { - items: T29 + T27: ArrayPlus → Node + T28: Record PlusQuant { + items: T27 } - T31: Optional → Node - T32: Record OptQuant { - maybe_item: T31 + T29: Optional → Node + T30: Record OptQuant { + maybe_item: T29 } - T33: ArrayStar → Node - T34: Record NoQis { - item: T33 + T31: ArrayStar → Node + T32: Record NoQis { + item: T31 } - T35: Record NestedScopesScope35 { + T33: Record NestedScopesScope33 { a: Node } - T36: Record NestedScopesScope36 { + T34: Record NestedScopesScope34 { b: Node } - T37: Record NestedScopesScope37 { - inner1: T35 - inner2: T36 + T35: Record NestedScopesScope35 { + inner1: T33 + inner2: T34 } - T38: Record NestedScopes { - outer: T37 + T36: Record NestedScopes { + outer: T35 } - T39: ArrayStar → Node - T40: ArrayPlus → T39 - T41: Record NestedQuant { - inner: T40 + T37: ArrayStar → Node + T38: ArrayPlus → Node + T39: Record NestedQuant { + inner: T37 + outer: T38 } - T42: Record MultiCapture { + T40: Record MultiCapture { fn_name: String fn_body: Node } - T43: Enum EmptyBranch { + T41: Enum EmptyBranch { Some: Node None: Void } - T44: Record DeepNest { + T42: Record DeepNest { deep: Node } - T45: ArrayStar → Node - T46: Record Complex { + T43: ArrayStar → Node + T44: Record Complex { mod_name: String - imports: T45 + imports: T43 } - T47: ArrayPlus → Node - T48: Record CardinalityJoin { - item: Node + T45: ArrayPlus → Node + T46: Record CardinalityJoin { + item: T45 } - T49: Record CapturedSeqScope49 { + T47: Record CapturedSeqScope47 { x: Node y: Node } - T50: Record CapturedSeq { - nested: T49 + T48: Record CapturedSeq { + nested: T47 } - T51: Record AnchorSibling { + T49: Record AnchorSibling { left: Node right: Node } - T52: Record AnchorLast { + T50: Record AnchorLast { last: Node } - T53: Record AnchorFirst { + T51: Record AnchorFirst { first: Node } - - === Errors === - field `item` in `CardinalityJoin`: incompatible types [Node, Node] "#); } @@ -888,15 +884,15 @@ fn golden_type_inference() { N15: ε → N18, N22 N16: ε → ∅ N18: (a) [Variant(A)] [Capture] → N20 - N20: ε [EndVariant] → N16 + N20: ε [Field(x)] [EndVariant] → N16 N22: (b) [Variant(B)] [Capture] → N24 - N24: ε [EndVariant] → N16 + N24: ε [Field(y)] [EndVariant] → N16 N25: (wrapper) → N26 N26: [Down] ε → N29, N33 N29: (a) [Variant(A)] [Capture] → N31 - N31: ε [EndVariant] → N36 + N31: ε [Field(x)] [EndVariant] → N36 N33: (b) [Variant(B)] [Capture] → N35 - N35: ε [EndVariant] → N36 + N35: ε [Field(y)] [EndVariant] → N36 N36: [Up(1)] ε → ∅ N37: (_) → N39 N38: [Down] (item) [Capture] → N42 @@ -931,7 +927,8 @@ fn golden_type_inference() { N76: ε [Field(c)] → N78 N78: [Up(1)] ε [EndObj] [EndVariant] → N68 N80: (partial) [Variant(Partial)] → N81 - N81: [Down] (a) [Capture] → N84 + N81: [Down] (a) [Capture] → N82 + N82: ε [Field(a)] → N84 N84: [Up(1)] ε [EndVariant] → N68 N85: (foo) → N89 N86: [Down] ε → N87 @@ -946,61 +943,61 @@ fn golden_type_inference() { === Entrypoints === BaseWithCapture → T3 - TaggedInline → T4 - TaggedAtRoot → T5 - SyntheticNames → T7 + TaggedInline → T6 + TaggedAtRoot → T7 + SyntheticNames → T9 RefOpaque → Void - RefCaptured → T8 - QisTwo → T10 - NoQisOne → T12 - MissingField → T14 - FlatScope → T15 + RefCaptured → T10 + QisTwo → T11 + NoQisOne → T13 + MissingField → T15 + FlatScope → T16 CardMult → T18 === Types === T3: Record BaseWithCapture { name: Node } - T4: Enum TaggedInline { - A: Node - B: Node + T4: Optional → Node + T5: Optional → Node + T6: Record TaggedInline { + x: T4 + y: T5 } - T5: Enum TaggedAtRoot { + T7: Enum TaggedAtRoot { A: Node B: Node } - T6: Record SyntheticNamesScope6 { + T8: Record SyntheticNamesScope8 { bar: Node } - T7: Record SyntheticNames { - baz: T6 + T9: Record SyntheticNames { + baz: T8 } - T8: Record RefCaptured { + T10: Record RefCaptured { result: T3 } - T9: Record QisTwoScope9 { + T11: Record QisTwoScope11 { x: Node y: Node } - T10: ArrayStar → T9 - T11: ArrayStar → Node - T12: Record NoQisOne { - x: T11 + T12: ArrayStar → Node + T13: Record NoQisOne { + x: T12 } - T13: Record MissingFieldScope13 { + T14: Record MissingFieldScope14 { a: Node b: Node c: Node } - T14: Enum MissingField { - Full: T13 + T15: Enum MissingField { + Full: T14 Partial: Node } - T15: Record FlatScope { + T16: Record FlatScope { val: Node } - T16: ArrayPlus → Node - T17: ArrayStar → T16 + T17: ArrayStar → Node T18: Record CardMult { items: T17 } @@ -1060,9 +1057,9 @@ fn golden_effect_patterns() { N19: ε [Field(y)] [EndObj] [Field(obj)] → ∅ N20: ε [StartObj] → N23, N27 N23: (a) [Variant(A)] [Capture] [Capture] → N25 - N25: ε [EndVariant] → N32 + N25: ε [Field(x)] [EndVariant] → N32 N27: (b) [Variant(B)] [Capture] [Capture] → N29 - N29: ε [EndVariant] → N32 + N29: ε [Field(y)] [EndVariant] → N32 N32: ε [EndObj] [Field(choice)] → ∅ N33: (container) → N35 N34: [Down] (item) [Capture] → N38 @@ -1199,40 +1196,44 @@ fn golden_quantifier_graphs() { ═══════════════════════════════════════════════════════════════════════════════ === Entrypoints === - QuantSeq → T4 - Optional → T6 - NestedQuant → T9 - LazyStar → Void - LazyPlus → T11 - GreedyStar → T13 - GreedyPlus → T15 + QuantSeq → T3 + Optional → T5 + NestedQuant → T8 + LazyStar → T10 + LazyPlus → T12 + GreedyStar → T14 + GreedyPlus → T16 === Types === T3: Record QuantSeqScope3 { x: Node y: Node } - T4: ArrayStar → T3 - T5: Optional → Node - T6: Record Optional { - maybe: T5 + T4: Optional → Node + T5: Record Optional { + maybe: T4 } - T7: ArrayStar → Node - T8: ArrayPlus → T7 - T9: Record NestedQuant { - inners: T8 + T6: ArrayStar → Node + T7: ArrayPlus → Node + T8: Record NestedQuant { + inners: T6 + outers: T7 } - T10: ArrayPlus → Node - T11: Record LazyPlus { - items: T10 + T9: ArrayStar → Node + T10: Record LazyStar { + items: T9 } - T12: ArrayStar → Node - T13: Record GreedyStar { - items: T12 + T11: ArrayPlus → Node + T12: Record LazyPlus { + items: T11 + } + T13: ArrayStar → Node + T14: Record GreedyStar { + items: T13 } - T14: ArrayPlus → Node - T15: Record GreedyPlus { - items: T14 + T15: ArrayPlus → Node + T16: Record GreedyPlus { + items: T15 } "); } diff --git a/crates/plotnik-lib/src/query/typing.rs b/crates/plotnik-lib/src/query/typing.rs index 778de98e..8ce16e91 100644 --- a/crates/plotnik-lib/src/query/typing.rs +++ b/crates/plotnik-lib/src/query/typing.rs @@ -296,15 +296,42 @@ struct ArrayFrame { push_called: bool, } -#[derive(Clone, Default)] +#[derive(Clone)] struct TraversalState { pending: Option, current_variant: Option<&'static str>, array_stack: Vec, object_depth: usize, + /// Stack tracking whether each object scope is for a captured alternation. + /// Tags only create enums when inside an object opened for_alternation=true. + object_alt_stack: Vec, /// When true, skip EndObject type creation. /// Used during alternation branch exploration to collect variants before creating enum. dry_run: bool, + /// True when we're still at the definition root (no structural context entered). + /// Used to determine if tagged alternations should create enums (ADR-0009 §Case 1 vs §Case 3). + at_definition_root: bool, +} + +impl Default for TraversalState { + fn default() -> Self { + Self { + pending: None, + current_variant: None, + array_stack: Vec::new(), + object_depth: 0, + object_alt_stack: Vec::new(), + dry_run: false, + at_definition_root: true, + } + } +} + +impl TraversalState { + /// Check if we're inside an object scope opened for a captured alternation. + fn in_alternation_object(&self) -> bool { + self.object_alt_stack.last().copied().unwrap_or(false) + } } impl TraversalState { @@ -458,6 +485,11 @@ impl<'src, 'g> InferenceContext<'src, 'g> { let node = self.graph.node(node_id); + // Clear definition root flag when we enter structural context (non-epsilon matchers) + if !node.is_epsilon() { + state.at_definition_root = false; + } + for effect in &node.effects { self.process_effect(effect, node_id, &node.ref_marker, &mut state, scope_stack); } @@ -527,12 +559,16 @@ impl<'src, 'g> InferenceContext<'src, 'g> { BuildEffect::EndArray => { self.process_end_array(state); } - BuildEffect::StartObject => { + BuildEffect::StartObject { for_alternation } => { + // Entering an object scope means we're no longer at definition root + state.at_definition_root = false; state.object_depth += 1; + state.object_alt_stack.push(*for_alternation); scope_stack.push(ScopeStackEntry::new_object(state.pending.take())); } BuildEffect::EndObject => { state.object_depth = state.object_depth.saturating_sub(1); + state.object_alt_stack.pop(); if !state.dry_run { self.process_end_object(state, scope_stack); } @@ -541,8 +577,15 @@ impl<'src, 'g> InferenceContext<'src, 'g> { // SAFETY: tag comes from source with 'src lifetime let tag: &'static str = unsafe { std::mem::transmute(*tag) }; state.current_variant = Some(tag); - if let Some(entry) = scope_stack.last_mut() { - entry.scope.has_variants = true; + // Create enum for: + // - Definition root tagged alternations (ADR-0009 §Case 3) + // - Captured tagged alternations inside objects with for_alternation=true + // Uncaptured inline tagged alternations (including inside QIS objects) + // behave like untagged (ADR-0009 §Case 1). + if state.at_definition_root || state.in_alternation_object() { + if let Some(entry) = scope_stack.last_mut() { + entry.scope.has_variants = true; + } } } BuildEffect::EndVariant => { @@ -575,8 +618,14 @@ impl<'src, 'g> InferenceContext<'src, 'g> { return; }; - // Inside object scope, fields go to object, not variant scope - let target_scope = match current_variant.filter(|_| state.object_depth == 0) { + // Route fields to variant scope only when: + // 1. We're in a variant context (current_variant is set) + // 2. Either at definition root OR inside an alternation-capturing object + // 3. The scope is creating an enum (has_variants is true) + // Otherwise, fields go to the main scope (for uncaptured inline alternations). + let in_variant_context = + (state.object_depth == 0 || state.in_alternation_object()) && entry.scope.has_variants; + let target_scope = match current_variant.filter(|_| in_variant_context) { Some(tag) => entry.scope.variants.entry(tag).or_default(), None => &mut entry.scope, }; @@ -609,25 +658,21 @@ impl<'src, 'g> InferenceContext<'src, 'g> { return; }; - let push_was_called = frame.push_called - || frame - .start_node - .map_or(false, |id| self.array_element_types.contains_key(&id)); - - if !push_was_called { - return; - } - + // Get element type from recorded types or default to Node. + // For lazy quantifiers (*?, +?), the exit path may not execute the loop body, + // but we still need to produce an array type (empty array case). let element_type = frame .start_node .and_then(|id| self.array_element_types.get(&id).copied()) .or(frame.element_type) .unwrap_or(TYPE_NODE); - let array_type = self.wrap_with_cardinality(element_type, frame.cardinality); + // Keep element type with cardinality (not wrapped array type) to enable + // proper cardinality join in alternations (ADR-0009 §Cardinality Lifting Coercion). + // The array wrapper is applied later in create_struct_type via wrap_with_cardinality. state.pending = Some(PendingType { - base_type: array_type, - cardinality: Cardinality::One, + base_type: element_type, + cardinality: frame.cardinality, is_array: true, }); } diff --git a/crates/plotnik-lib/src/query/typing_tests.rs b/crates/plotnik-lib/src/query/typing_tests.rs index 4a2857b5..38affd9d 100644 --- a/crates/plotnik-lib/src/query/typing_tests.rs +++ b/crates/plotnik-lib/src/query/typing_tests.rs @@ -428,14 +428,13 @@ fn quantifier_on_sequence() { let result = infer(input); insta::assert_snapshot!(result, @r" === Entrypoints === - Foo → T4 + Foo → T3 === Types === T3: Record FooScope3 { x: Node y: Node } - T4: ArrayStar → T3 "); } @@ -477,7 +476,7 @@ fn qis_alternation_in_sequence() { let result = infer(input); insta::assert_snapshot!(result, @r" === Entrypoints === - Foo → T6 + Foo → T5 === Types === T3: Optional → Node @@ -486,7 +485,48 @@ fn qis_alternation_in_sequence() { x: T3 y: T4 } + "); +} + +#[test] +fn quantified_seq_with_inline_tagged_alt() { + // Issue #5: captures from inline tagged alternation inside quantified sequence + // The tagged alternation is uncaptured, so it should behave like untagged. + // All captures should propagate with Optional cardinality. + let input = indoc! {r#" + Test = { [ A: (a) @x B: (b) @y ] }* @items + "#}; + + let result = infer_with_graph(input); + insta::assert_snapshot!(result, @r" + === Graph === + Test = N11 + + N0: ε [StartObj] → N1 + N1: [Next] ε → N4, N8 + N4: (a) [Variant(A)] [Capture] [Capture] → N6 + N6: ε [Field(x)] [EndVariant] → N15 + N8: (b) [Variant(B)] [Capture] [Capture] → N10 + N10: ε [Field(y)] [EndVariant] → N15 + N11: ε [StartObj] [StartArray] → N16 + N15: ε [EndObj] [Push] → N16 + N16: ε → N0, N19 + N19: ε [EndArray] [EndObj] [Field(items)] → ∅ + + === Entrypoints === + Test → T7 + + === Types === + T3: Optional → Node + T4: Optional → Node + T5: Record TestScope3 { + x: T3 + y: T4 + } T6: ArrayStar → T5 + T7: Record Test { + items: T6 + } "); } From 089f9cc314b8f790ed0c9dcc53126332bc799e14 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 19:29:25 -0300 Subject: [PATCH 20/23] Fixes --- crates/plotnik-lib/src/query/graph.rs | 9 +- .../src/query/graph_master_test.rs | 204 +++++++++--------- crates/plotnik-lib/src/query/typing.rs | 28 ++- crates/plotnik-lib/src/query/typing_tests.rs | 6 +- 4 files changed, 139 insertions(+), 108 deletions(-) diff --git a/crates/plotnik-lib/src/query/graph.rs b/crates/plotnik-lib/src/query/graph.rs index 5950c3ee..8336db2a 100644 --- a/crates/plotnik-lib/src/query/graph.rs +++ b/crates/plotnik-lib/src/query/graph.rs @@ -315,15 +315,20 @@ impl<'src> BuildGraph<'src> { let obj_end = self.add_epsilon(); self.node_mut(obj_end).add_effect(BuildEffect::EndObject); + // Skip path needs ClearCurrent to indicate "nothing captured" + let skip = self.add_epsilon(); + self.node_mut(skip).add_effect(BuildEffect::ClearCurrent); + self.connect(obj_start, inner.entry); self.connect(inner.exit, obj_end); self.connect(obj_end, exit); + self.connect(skip, exit); if greedy { self.connect(branch, obj_start); - self.connect(branch, exit); + self.connect(branch, skip); } else { - self.connect(branch, exit); + self.connect(branch, skip); self.connect(branch, obj_start); } } else { diff --git a/crates/plotnik-lib/src/query/graph_master_test.rs b/crates/plotnik-lib/src/query/graph_master_test.rs index a3eb86d2..e5d42bfb 100644 --- a/crates/plotnik-lib/src/query/graph_master_test.rs +++ b/crates/plotnik-lib/src/query/graph_master_test.rs @@ -512,23 +512,23 @@ fn golden_master_comprehensive() { SimpleCapture → T23 RefChain → Void RefCaptured → T24 - QisSequence → T25 - QisNode → T26 - PlusQuant → T28 - OptQuant → T30 - NoQis → T32 + QisSequence → T26 + QisNode → T28 + PlusQuant → T30 + OptQuant → T32 + NoQis → T34 NoCaptures → Void - NestedScopes → T36 - NestedQuant → T39 - MultiCapture → T40 - EmptyBranch → T41 - DeepNest → T42 - Complex → T44 - CardinalityJoin → T46 - CapturedSeq → T48 - AnchorSibling → T49 - AnchorLast → T50 - AnchorFirst → T51 + NestedScopes → T38 + NestedQuant → T41 + MultiCapture → T42 + EmptyBranch → T43 + DeepNest → T44 + Complex → T46 + CardinalityJoin → T48 + CapturedSeq → T50 + AnchorSibling → T51 + AnchorLast → T52 + AnchorFirst → T53 === Types === T3: Record Identifier { @@ -598,76 +598,78 @@ fn golden_master_comprehensive() { key: Node value: Node } - T26: Record QisNodeScope26 { + T26: ArrayStar → T25 + T27: Record QisNodeScope27 { name: Node body: Node } - T27: ArrayPlus → Node - T28: Record PlusQuant { - items: T27 + T28: ArrayStar → T27 + T29: ArrayPlus → Node + T30: Record PlusQuant { + items: T29 } - T29: Optional → Node - T30: Record OptQuant { - maybe_item: T29 + T31: Optional → Node + T32: Record OptQuant { + maybe_item: T31 } - T31: ArrayStar → Node - T32: Record NoQis { - item: T31 + T33: ArrayStar → Node + T34: Record NoQis { + item: T33 } - T33: Record NestedScopesScope33 { + T35: Record NestedScopesScope35 { a: Node } - T34: Record NestedScopesScope34 { + T36: Record NestedScopesScope36 { b: Node } - T35: Record NestedScopesScope35 { - inner1: T33 - inner2: T34 + T37: Record NestedScopesScope37 { + inner1: T35 + inner2: T36 } - T36: Record NestedScopes { - outer: T35 + T38: Record NestedScopes { + outer: T37 } - T37: ArrayStar → Node - T38: ArrayPlus → Node - T39: Record NestedQuant { - inner: T37 - outer: T38 + T39: ArrayStar → Node + T40: ArrayPlus → Node + T41: Record NestedQuant { + inner: T39 + outer: T40 } - T40: Record MultiCapture { + T42: Record MultiCapture { fn_name: String fn_body: Node } - T41: Enum EmptyBranch { + T43: Enum EmptyBranch { Some: Node None: Void } - T42: Record DeepNest { + T44: Record DeepNest { deep: Node } - T43: ArrayStar → Node - T44: Record Complex { + T45: ArrayStar → Node + T46: Record Complex { mod_name: String - imports: T43 + imports: T45 } - T45: ArrayPlus → Node - T46: Record CardinalityJoin { - item: T45 + T47: ArrayPlus → Node + T48: Record CardinalityJoin { + item: T47 } - T47: Record CapturedSeqScope47 { + T49: Record CapturedSeqScope49 { x: Node y: Node } - T48: Record CapturedSeq { - nested: T47 + T50: Record CapturedSeq { + nested: T49 } - T49: Record AnchorSibling { + T51: Record AnchorSibling { left: Node right: Node } - T50: Record AnchorLast { + T52: Record AnchorLast { last: Node } - T51: Record AnchorFirst { + T53: Record AnchorFirst { first: Node } "#); @@ -948,11 +950,11 @@ fn golden_type_inference() { SyntheticNames → T9 RefOpaque → Void RefCaptured → T10 - QisTwo → T11 - NoQisOne → T13 - MissingField → T15 - FlatScope → T16 - CardMult → T18 + QisTwo → T12 + NoQisOne → T14 + MissingField → T16 + FlatScope → T17 + CardMult → T19 === Types === T3: Record BaseWithCapture { @@ -981,25 +983,26 @@ fn golden_type_inference() { x: Node y: Node } - T12: ArrayStar → Node - T13: Record NoQisOne { - x: T12 + T12: ArrayStar → T11 + T13: ArrayStar → Node + T14: Record NoQisOne { + x: T13 } - T14: Record MissingFieldScope14 { + T15: Record MissingFieldScope15 { a: Node b: Node c: Node } - T15: Enum MissingField { - Full: T14 + T16: Enum MissingField { + Full: T15 Partial: Node } - T16: Record FlatScope { + T17: Record FlatScope { val: Node } - T17: ArrayStar → Node - T18: Record CardMult { - items: T17 + T18: ArrayStar → Node + T19: Record CardMult { + items: T18 } "); } @@ -1196,44 +1199,45 @@ fn golden_quantifier_graphs() { ═══════════════════════════════════════════════════════════════════════════════ === Entrypoints === - QuantSeq → T3 - Optional → T5 - NestedQuant → T8 - LazyStar → T10 - LazyPlus → T12 - GreedyStar → T14 - GreedyPlus → T16 + QuantSeq → T4 + Optional → T6 + NestedQuant → T9 + LazyStar → T11 + LazyPlus → T13 + GreedyStar → T15 + GreedyPlus → T17 === Types === T3: Record QuantSeqScope3 { x: Node y: Node } - T4: Optional → Node - T5: Record Optional { - maybe: T4 - } - T6: ArrayStar → Node - T7: ArrayPlus → Node - T8: Record NestedQuant { - inners: T6 - outers: T7 - } - T9: ArrayStar → Node - T10: Record LazyStar { - items: T9 - } - T11: ArrayPlus → Node - T12: Record LazyPlus { - items: T11 - } - T13: ArrayStar → Node - T14: Record GreedyStar { - items: T13 - } - T15: ArrayPlus → Node - T16: Record GreedyPlus { - items: T15 + T4: ArrayStar → T3 + T5: Optional → Node + T6: Record Optional { + maybe: T5 + } + T7: ArrayStar → Node + T8: ArrayPlus → Node + T9: Record NestedQuant { + inners: T7 + outers: T8 + } + T10: ArrayStar → Node + T11: Record LazyStar { + items: T10 + } + T12: ArrayPlus → Node + T13: Record LazyPlus { + items: T12 + } + T14: ArrayStar → Node + T15: Record GreedyStar { + items: T14 + } + T16: ArrayPlus → Node + T17: Record GreedyPlus { + items: T16 } "); } diff --git a/crates/plotnik-lib/src/query/typing.rs b/crates/plotnik-lib/src/query/typing.rs index 8ce16e91..3596007f 100644 --- a/crates/plotnik-lib/src/query/typing.rs +++ b/crates/plotnik-lib/src/query/typing.rs @@ -305,9 +305,13 @@ struct TraversalState { /// Stack tracking whether each object scope is for a captured alternation. /// Tags only create enums when inside an object opened for_alternation=true. object_alt_stack: Vec, - /// When true, skip EndObject type creation. + /// When true, skip EndObject type creation at the dry_run_depth level. /// Used during alternation branch exploration to collect variants before creating enum. dry_run: bool, + /// The object_depth at which dry_run was enabled. EndObject is only skipped + /// when object_depth matches this value (after decrement), allowing nested + /// objects to be processed normally. + dry_run_depth: usize, /// True when we're still at the definition root (no structural context entered). /// Used to determine if tagged alternations should create enums (ADR-0009 §Case 1 vs §Case 3). at_definition_root: bool, @@ -322,6 +326,7 @@ impl Default for TraversalState { object_depth: 0, object_alt_stack: Vec::new(), dry_run: false, + dry_run_depth: 0, at_definition_root: true, } } @@ -458,7 +463,12 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } else if !scope.fields.is_empty() { self.create_struct_type(def_name, &scope) } else if let Some(pending) = final_pending { - pending.base_type + // Wrap with cardinality for array types (from EndArray effect) + if pending.is_array { + self.wrap_with_cardinality(pending.base_type, pending.cardinality) + } else { + pending.base_type + } } else { TYPE_VOID } @@ -567,9 +577,12 @@ impl<'src, 'g> InferenceContext<'src, 'g> { scope_stack.push(ScopeStackEntry::new_object(state.pending.take())); } BuildEffect::EndObject => { + // Only skip EndObject processing when closing the scope that was open at dry_run time. + // Check BEFORE decrementing to correctly identify which scope we're closing. + let skip_for_dry_run = state.dry_run && state.object_depth == state.dry_run_depth; state.object_depth = state.object_depth.saturating_sub(1); state.object_alt_stack.pop(); - if !state.dry_run { + if !skip_for_dry_run { self.process_end_object(state, scope_stack); } } @@ -692,7 +705,11 @@ impl<'src, 'g> InferenceContext<'src, 'g> { let finished_scope = finished_entry.scope; if finished_scope.is_empty() { - state.pending = finished_entry.outer_pending; + // If current pending exists (from nested EndObject), keep it. + // Otherwise restore what was pending before this object started. + if state.pending.is_none() { + state.pending = finished_entry.outer_pending; + } return; } @@ -779,6 +796,9 @@ impl<'src, 'g> InferenceContext<'src, 'g> { let mut branch_visited = visited.clone(); let mut branch_state = state.clone(); branch_state.dry_run = use_dry_run; + if use_dry_run { + branch_state.dry_run_depth = state.object_depth; + } let (branch_pending, _) = self.traverse( *succ, diff --git a/crates/plotnik-lib/src/query/typing_tests.rs b/crates/plotnik-lib/src/query/typing_tests.rs index 38affd9d..2cb0179e 100644 --- a/crates/plotnik-lib/src/query/typing_tests.rs +++ b/crates/plotnik-lib/src/query/typing_tests.rs @@ -428,13 +428,14 @@ fn quantifier_on_sequence() { let result = infer(input); insta::assert_snapshot!(result, @r" === Entrypoints === - Foo → T3 + Foo → T4 === Types === T3: Record FooScope3 { x: Node y: Node } + T4: ArrayStar → T3 "); } @@ -476,7 +477,7 @@ fn qis_alternation_in_sequence() { let result = infer(input); insta::assert_snapshot!(result, @r" === Entrypoints === - Foo → T5 + Foo → T6 === Types === T3: Optional → Node @@ -485,6 +486,7 @@ fn qis_alternation_in_sequence() { x: T3 y: T4 } + T6: ArrayStar → T5 "); } From bbd64d90cb6d188f43adbbc8f5b1813531f52728 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 19:43:25 -0300 Subject: [PATCH 21/23] AST-based typing --- .../src/query/graph_master_test.rs | 52 +- crates/plotnik-lib/src/query/typing.rs | 1057 +++++++---------- 2 files changed, 471 insertions(+), 638 deletions(-) diff --git a/crates/plotnik-lib/src/query/graph_master_test.rs b/crates/plotnik-lib/src/query/graph_master_test.rs index e5d42bfb..1d77456a 100644 --- a/crates/plotnik-lib/src/query/graph_master_test.rs +++ b/crates/plotnik-lib/src/query/graph_master_test.rs @@ -523,12 +523,12 @@ fn golden_master_comprehensive() { MultiCapture → T42 EmptyBranch → T43 DeepNest → T44 - Complex → T46 - CardinalityJoin → T48 - CapturedSeq → T50 - AnchorSibling → T51 - AnchorLast → T52 - AnchorFirst → T53 + Complex → T54 + CardinalityJoin → T56 + CapturedSeq → T58 + AnchorSibling → T59 + AnchorLast → T60 + AnchorFirst → T61 === Types === T3: Record Identifier { @@ -646,30 +646,46 @@ fn golden_master_comprehensive() { T44: Record DeepNest { deep: Node } - T45: ArrayStar → Node - T46: Record Complex { + T45: Optional → String + T46: ArrayStar → Node + T47: ArrayStar → Node + T48: Optional → Node + T49: Optional → String + T50: Optional → Node + T51: Record ComplexScope45 { + fn_name: T45 + p: T46 + params: T47 + fn_body: T48 + cls_name: T49 + cls_body: T50 + } + T52: ArrayStar → T51 + T53: ArrayStar → Node + T54: Record Complex { mod_name: String - imports: T45 + imports: T53 + items: T52 } - T47: ArrayPlus → Node - T48: Record CardinalityJoin { - item: T47 + T55: ArrayPlus → Node + T56: Record CardinalityJoin { + item: T55 } - T49: Record CapturedSeqScope49 { + T57: Record CapturedSeqScope57 { x: Node y: Node } - T50: Record CapturedSeq { - nested: T49 + T58: Record CapturedSeq { + nested: T57 } - T51: Record AnchorSibling { + T59: Record AnchorSibling { left: Node right: Node } - T52: Record AnchorLast { + T60: Record AnchorLast { last: Node } - T53: Record AnchorFirst { + T61: Record AnchorFirst { first: Node } "#); diff --git a/crates/plotnik-lib/src/query/typing.rs b/crates/plotnik-lib/src/query/typing.rs index 3596007f..bfb796c8 100644 --- a/crates/plotnik-lib/src/query/typing.rs +++ b/crates/plotnik-lib/src/query/typing.rs @@ -1,25 +1,30 @@ -//! Type inference for Query's BuildGraph. +//! AST-based type inference for Plotnik queries. //! -//! Analyzes the graph structure statically to determine output types. -//! Follows rules from ADR-0006, ADR-0007 and ADR-0009. +//! Analyzes query AST to determine output types. +//! Rules follow ADR-0009 (Type System). //! -//! # Algorithm Overview +//! # Design //! -//! 1. Traverse graph to collect all scope boundaries (StartObject/EndObject, StartArray/EndArray) -//! 2. Associate Field effects with their containing object scope -//! 3. Build types bottom-up from scope hierarchy -//! 4. Handle branching by merging fields with optionality rules +//! Unlike graph-based inference which must reconstruct structure from CFG traversal, +//! AST-based inference directly walks the tree structure: +//! - Sequences → `SeqExpr` +//! - Alternations → `AltExpr` with `.kind()` for tagged/untagged +//! - Quantifiers → `QuantifiedExpr` +//! - Captures → `CapturedExpr` +//! +//! This eliminates dry-run traversal, reconvergence detection, and scope stack management. -use std::collections::{HashMap, HashSet, VecDeque}; +use std::collections::{HashMap, HashSet}; use indexmap::IndexMap; use rowan::TextRange; use crate::diagnostics::{DiagnosticKind, Diagnostics}; use crate::ir::{TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId, TypeKind}; +use crate::parser::ast::{self, AltKind, Expr}; +use crate::parser::token_src; use super::Query; -use super::graph::{BuildEffect, BuildGraph, BuildNode, NodeId, RefMarker}; /// Result of type inference. #[derive(Debug, Default)] @@ -89,6 +94,7 @@ enum Cardinality { } impl Cardinality { + /// Join cardinalities when merging alternation branches. fn join(self, other: Cardinality) -> Cardinality { use Cardinality::*; match (self, other) { @@ -108,10 +114,22 @@ impl Cardinality { x => x, } } + + /// Multiply cardinalities (outer * inner). + fn multiply(self, inner: Cardinality) -> Cardinality { + use Cardinality::*; + match (self, inner) { + (One, x) => x, + (x, One) => x, + (Optional, Optional) => Optional, + (Plus, Plus) => Plus, + _ => Star, + } + } } // ───────────────────────────────────────────────────────────────────────────── -// Field and Scope tracking +// Type shape for unification checking // ───────────────────────────────────────────────────────────────────────────── #[derive(Debug, Clone, PartialEq, Eq)] @@ -129,6 +147,10 @@ impl TypeShape { } } +// ───────────────────────────────────────────────────────────────────────────── +// Field tracking within a scope +// ───────────────────────────────────────────────────────────────────────────── + #[derive(Debug, Clone)] struct FieldInfo { base_type: TypeId, @@ -136,13 +158,14 @@ struct FieldInfo { cardinality: Cardinality, branch_count: usize, spans: Vec, - is_array_type: bool, } #[derive(Debug, Clone, Default)] struct ScopeInfo<'src> { fields: IndexMap<&'src str, FieldInfo>, + #[allow(dead_code)] // May be used for future enum variant tracking variants: IndexMap<&'src str, ScopeInfo<'src>>, + #[allow(dead_code)] has_variants: bool, } @@ -153,14 +176,12 @@ impl<'src> ScopeInfo<'src> { base_type: TypeId, cardinality: Cardinality, span: TextRange, - is_array_type: bool, ) { let shape = TypeShape::Primitive(base_type); if let Some(existing) = self.fields.get_mut(name) { existing.cardinality = existing.cardinality.join(cardinality); existing.branch_count += 1; existing.spans.push(span); - existing.is_array_type = existing.is_array_type || is_array_type; } else { self.fields.insert( name, @@ -170,7 +191,6 @@ impl<'src> ScopeInfo<'src> { cardinality, branch_count: 1, spans: vec![span], - is_array_type, }, ); } @@ -181,7 +201,6 @@ impl<'src> ScopeInfo<'src> { for (name, other_info) in other.fields { if let Some(existing) = self.fields.get_mut(name) { - // Check type compatibility if existing.shape != other_info.shape { errors.push(MergeError { field: name, @@ -202,28 +221,18 @@ impl<'src> ScopeInfo<'src> { } } - for (tag, other_variant) in other.variants { - let variant = self.variants.entry(tag).or_default(); - errors.extend(variant.merge_from(other_variant)); - } - - if other.has_variants { - self.has_variants = true; - } - errors } fn apply_optionality(&mut self, total_branches: usize) { for info in self.fields.values_mut() { - // Skip optionality for array-typed fields: arrays already encode - // zero-or-more semantics, so Optional wrapper would be redundant - if info.branch_count < total_branches && !info.is_array_type { + if info.branch_count < total_branches { info.cardinality = info.cardinality.make_optional(); } } } + #[allow(dead_code)] // May be useful for future scope analysis fn is_empty(&self) -> bool { self.fields.is_empty() && self.variants.is_empty() } @@ -237,133 +246,58 @@ struct MergeError<'src> { } // ───────────────────────────────────────────────────────────────────────────── -// Scope stack for traversal +// Inference result from expression // ───────────────────────────────────────────────────────────────────────────── -#[derive(Clone)] -struct ScopeStackEntry<'src> { - scope: ScopeInfo<'src>, - is_object: bool, - outer_pending: Option, -} - -impl<'src> ScopeStackEntry<'src> { - fn new_root() -> Self { - Self { - scope: ScopeInfo::default(), - is_object: false, - outer_pending: None, - } - } - - fn new_object(outer_pending: Option) -> Self { - Self { - scope: ScopeInfo::default(), - is_object: true, - outer_pending, - } - } -} - +/// What an expression produces when evaluated. #[derive(Debug, Clone)] -struct PendingType { +struct ExprResult { + /// Base type (before cardinality wrapping). base_type: TypeId, + /// Cardinality modifier. cardinality: Cardinality, - is_array: bool, + /// True if this result represents a meaningful type (not just default Node). + /// Used to distinguish QIS array results from simple uncaptured expressions. + is_meaningful: bool, } -impl PendingType { - fn primitive(base_type: TypeId) -> Self { +impl ExprResult { + fn node() -> Self { Self { - base_type, + base_type: TYPE_NODE, cardinality: Cardinality::One, - is_array: false, + is_meaningful: false, } } -} - -// ───────────────────────────────────────────────────────────────────────────── -// Traversal state -// ───────────────────────────────────────────────────────────────────────────── - -#[derive(Clone, Default)] -struct ArrayFrame { - cardinality: Cardinality, - element_type: Option, - /// Node ID where this array started (for lookup in precomputed map) - start_node: Option, - /// Whether PushElement was actually called (vs prepass placeholder) - push_called: bool, -} - -#[derive(Clone)] -struct TraversalState { - pending: Option, - current_variant: Option<&'static str>, - array_stack: Vec, - object_depth: usize, - /// Stack tracking whether each object scope is for a captured alternation. - /// Tags only create enums when inside an object opened for_alternation=true. - object_alt_stack: Vec, - /// When true, skip EndObject type creation at the dry_run_depth level. - /// Used during alternation branch exploration to collect variants before creating enum. - dry_run: bool, - /// The object_depth at which dry_run was enabled. EndObject is only skipped - /// when object_depth matches this value (after decrement), allowing nested - /// objects to be processed normally. - dry_run_depth: usize, - /// True when we're still at the definition root (no structural context entered). - /// Used to determine if tagged alternations should create enums (ADR-0009 §Case 1 vs §Case 3). - at_definition_root: bool, -} -impl Default for TraversalState { - fn default() -> Self { + fn void() -> Self { Self { - pending: None, - current_variant: None, - array_stack: Vec::new(), - object_depth: 0, - object_alt_stack: Vec::new(), - dry_run: false, - dry_run_depth: 0, - at_definition_root: true, + base_type: TYPE_VOID, + cardinality: Cardinality::One, + is_meaningful: false, } } -} -impl TraversalState { - /// Check if we're inside an object scope opened for a captured alternation. - fn in_alternation_object(&self) -> bool { - self.object_alt_stack.last().copied().unwrap_or(false) + fn meaningful(type_id: TypeId) -> Self { + Self { + base_type: type_id, + cardinality: Cardinality::One, + is_meaningful: true, + } } -} -impl TraversalState { - fn effective_array_cardinality(&self) -> Cardinality { - // Inside object scope, array cardinality doesn't apply to fields - if self.object_depth > 0 { - return Cardinality::One; + /// Type is known but doesn't contribute to definition result (e.g., opaque references). + fn opaque(type_id: TypeId) -> Self { + Self { + base_type: type_id, + cardinality: Cardinality::One, + is_meaningful: false, } - self.array_stack - .iter() - .fold(Cardinality::One, |acc, frame| { - acc.multiply(frame.cardinality) - }) } -} -impl Cardinality { - fn multiply(self, other: Cardinality) -> Cardinality { - use Cardinality::*; - match (self, other) { - (One, x) | (x, One) => x, - (Optional, Optional) => Optional, - (Optional, Plus) | (Plus, Optional) => Star, - (Optional, Star) | (Star, Optional) => Star, - (Star, _) | (_, Star) => Star, - (Plus, Plus) => Plus, - } + fn with_cardinality(mut self, card: Cardinality) -> Self { + self.cardinality = card; + self } } @@ -371,31 +305,28 @@ impl Cardinality { // Inference context // ───────────────────────────────────────────────────────────────────────────── -struct InferenceContext<'src, 'g> { - graph: &'g BuildGraph<'src>, - dead_nodes: &'g HashSet, +struct InferenceContext<'src> { + source: &'src str, + qis_triggers: HashSet, type_defs: Vec>, next_type_id: TypeId, diagnostics: Diagnostics, errors: Vec>, current_def_name: &'src str, - /// Shared map for array element types across branches in loops. - array_element_types: HashMap, - /// Map from definition name to its computed type (for reference lookups). + /// Map from definition name to its computed type. definition_types: HashMap<&'src str, TypeId>, } -impl<'src, 'g> InferenceContext<'src, 'g> { - fn new(graph: &'g BuildGraph<'src>, dead_nodes: &'g HashSet) -> Self { +impl<'src> InferenceContext<'src> { + fn new(source: &'src str, qis_triggers: HashSet) -> Self { Self { - graph, - dead_nodes, + source, + qis_triggers, type_defs: Vec::new(), next_type_id: 3, // 0=void, 1=node, 2=str diagnostics: Diagnostics::default(), errors: Vec::new(), current_def_name: "", - array_element_types: HashMap::new(), definition_types: HashMap::new(), } } @@ -406,471 +337,323 @@ impl<'src, 'g> InferenceContext<'src, 'g> { id } - fn infer_definition(&mut self, def_name: &'src str, entry_id: NodeId) -> TypeId { - self.current_def_name = def_name; - let mut visited = HashSet::new(); - let mut merge_errors = Vec::new(); - let mut scope_stack = vec![ScopeStackEntry::new_root()]; - - let (final_pending, _) = self.traverse( - entry_id, - TraversalState::default(), - &mut visited, - 0, - &mut merge_errors, - &mut scope_stack, - ); + // ───────────────────────────────────────────────────────────────────────── + // Definition inference + // ───────────────────────────────────────────────────────────────────────── - let root_entry = scope_stack.pop().unwrap_or_else(ScopeStackEntry::new_root); - let scope = root_entry.scope; - - // Report merge errors - for err in merge_errors { - let types_str = err - .shapes - .iter() - .map(|s| s.to_description().to_string()) - .collect::>() - .join(" vs "); + fn infer_definition(&mut self, def_name: &'src str, body: &Expr) -> TypeId { + self.current_def_name = def_name; - let primary_span = err.spans.first().copied().unwrap_or_default(); - let mut builder = self - .diagnostics - .report(DiagnosticKind::IncompatibleTypes, primary_span) - .message(types_str); + let mut scope = ScopeInfo::default(); + let mut merge_errors = Vec::new(); - for span in err.spans.iter().skip(1) { - builder = builder.related_to("also captured here", *span); + // Special case: tagged alternation at definition root creates enum + if let Expr::AltExpr(alt) = body { + if alt.kind() == AltKind::Tagged { + return self.infer_tagged_alternation_as_enum(def_name, alt, &mut merge_errors); } - builder - .hint(format!( - "capture `{}` has incompatible types across branches", - err.field - )) - .emit(); - - self.errors.push(UnificationError { - field: err.field, - definition: def_name, - types_found: err.shapes.iter().map(|s| s.to_description()).collect(), - spans: err.spans, - }); } - // Determine result type - if scope.has_variants && !scope.variants.is_empty() { - self.create_enum_type(def_name, &scope) - } else if !scope.fields.is_empty() { + // General case: infer expression and collect captures into scope + let result = self.infer_expr(body, &mut scope, Cardinality::One, &mut merge_errors); + + self.report_merge_errors(&merge_errors); + + // Build result type from scope + if !scope.fields.is_empty() { self.create_struct_type(def_name, &scope) - } else if let Some(pending) = final_pending { - // Wrap with cardinality for array types (from EndArray effect) - if pending.is_array { - self.wrap_with_cardinality(pending.base_type, pending.cardinality) - } else { - pending.base_type - } + } else if result.is_meaningful { + // QIS or other expressions that produce a meaningful type without populating scope + result.base_type } else { TYPE_VOID } } - /// Returns (pending_type, stopped_at_node) where stopped_at_node is Some if - /// traversal stopped at an already-visited node (reconvergence point). - fn traverse( + // ───────────────────────────────────────────────────────────────────────── + // Expression inference + // ───────────────────────────────────────────────────────────────────────── + + fn infer_expr( &mut self, - node_id: NodeId, - mut state: TraversalState, - visited: &mut HashSet, - depth: usize, + expr: &Expr, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, errors: &mut Vec>, - scope_stack: &mut Vec>, - ) -> (Option, Option) { - if self.dead_nodes.contains(&node_id) || depth > 200 { - return (state.pending, None); - } - - if !visited.insert(node_id) { - return (state.pending, Some(node_id)); - } - - let node = self.graph.node(node_id); - - // Clear definition root flag when we enter structural context (non-epsilon matchers) - if !node.is_epsilon() { - state.at_definition_root = false; + ) -> ExprResult { + match expr { + Expr::CapturedExpr(c) => self.infer_captured(c, scope, outer_card, errors), + Expr::QuantifiedExpr(q) => self.infer_quantified(q, scope, outer_card, errors), + Expr::SeqExpr(s) => self.infer_sequence(s, scope, outer_card, errors), + Expr::AltExpr(a) => self.infer_alternation(a, scope, outer_card, errors), + Expr::NamedNode(n) => self.infer_named_node(n, scope, outer_card, errors), + Expr::FieldExpr(f) => self.infer_field_expr(f, scope, outer_card, errors), + Expr::Ref(r) => self.infer_ref(r), + Expr::AnonymousNode(_) => ExprResult::node(), } + } - for effect in &node.effects { - self.process_effect(effect, node_id, &node.ref_marker, &mut state, scope_stack); - } + fn infer_captured( + &mut self, + c: &ast::CapturedExpr, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + let capture_name = c.name().map(|t| token_src(&t, self.source)).unwrap_or("_"); + let span = c.text_range(); + let has_string_annotation = c + .type_annotation() + .and_then(|t| t.name()) + .is_some_and(|n| n.text() == "string"); + + let Some(inner) = c.inner() else { + return ExprResult::node(); + }; - // Process successors - let live_successors = self.get_live_successors(node); - if live_successors.is_empty() { - return (state.pending, None); - } + // Check if inner is a scope container (seq/alt) + let is_scope_container = matches!(inner, Expr::SeqExpr(_) | Expr::AltExpr(_)); - if live_successors.len() == 1 { - return self.traverse( - live_successors[0], - state, - visited, - depth + 1, - errors, - scope_stack, - ); + if is_scope_container { + // Captured scope container: creates nested type + let nested_type = self.infer_captured_container(capture_name, &inner, errors); + let result = ExprResult::meaningful(nested_type); + let effective_card = outer_card.multiply(result.cardinality); + scope.add_field(capture_name, result.base_type, effective_card, span); + result + } else { + // Simple capture: just capture the result + let result = self.infer_expr(&inner, scope, outer_card, errors); + let base_type = if has_string_annotation { + TYPE_STR + } else { + result.base_type + }; + let effective_card = outer_card.multiply(result.cardinality); + scope.add_field(capture_name, base_type, effective_card, span); + ExprResult::meaningful(base_type).with_cardinality(result.cardinality) } - - self.explore_branches(live_successors, state, visited, depth, errors, scope_stack) } - /// Process a single effect, updating state and scope_stack. - fn process_effect( + fn infer_captured_container( &mut self, - effect: &BuildEffect<'src>, - node_id: NodeId, - ref_marker: &RefMarker, - state: &mut TraversalState, - scope_stack: &mut Vec>, - ) { - match effect { - BuildEffect::CaptureNode => { - let capture_type = match ref_marker { - RefMarker::Exit { ref_id } => self.find_ref_type(*ref_id).unwrap_or(TYPE_NODE), - _ => TYPE_NODE, - }; - state.pending = Some(PendingType::primitive(capture_type)); - } - BuildEffect::ClearCurrent => { - state.pending = None; - } - BuildEffect::ToString => { - state.pending = Some(PendingType::primitive(TYPE_STR)); - } - BuildEffect::Field { name, span } => { - self.process_field_effect(name, *span, state, scope_stack); - } - BuildEffect::StartArray { is_plus } => { - let cardinality = if *is_plus { - Cardinality::Plus - } else { - Cardinality::Star - }; - state.array_stack.push(ArrayFrame { - cardinality, - element_type: None, - start_node: Some(node_id), - push_called: false, - }); - } - BuildEffect::PushElement => { - self.process_push_element(state); - } - BuildEffect::EndArray => { - self.process_end_array(state); - } - BuildEffect::StartObject { for_alternation } => { - // Entering an object scope means we're no longer at definition root - state.at_definition_root = false; - state.object_depth += 1; - state.object_alt_stack.push(*for_alternation); - scope_stack.push(ScopeStackEntry::new_object(state.pending.take())); - } - BuildEffect::EndObject => { - // Only skip EndObject processing when closing the scope that was open at dry_run time. - // Check BEFORE decrementing to correctly identify which scope we're closing. - let skip_for_dry_run = state.dry_run && state.object_depth == state.dry_run_depth; - state.object_depth = state.object_depth.saturating_sub(1); - state.object_alt_stack.pop(); - if !skip_for_dry_run { - self.process_end_object(state, scope_stack); + _capture_name: &'src str, + inner: &Expr, + errors: &mut Vec>, + ) -> TypeId { + match inner { + Expr::SeqExpr(s) => { + let mut nested_scope = ScopeInfo::default(); + for child in s.children() { + self.infer_expr(&child, &mut nested_scope, Cardinality::One, errors); } - } - BuildEffect::StartVariant(tag) => { - // SAFETY: tag comes from source with 'src lifetime - let tag: &'static str = unsafe { std::mem::transmute(*tag) }; - state.current_variant = Some(tag); - // Create enum for: - // - Definition root tagged alternations (ADR-0009 §Case 3) - // - Captured tagged alternations inside objects with for_alternation=true - // Uncaptured inline tagged alternations (including inside QIS objects) - // behave like untagged (ADR-0009 §Case 1). - if state.at_definition_root || state.in_alternation_object() { - if let Some(entry) = scope_stack.last_mut() { - entry.scope.has_variants = true; - } + let type_name = self.generate_scope_name(); + self.create_struct_type(type_name, &nested_scope) + } + Expr::AltExpr(a) => { + if a.kind() == AltKind::Tagged { + // Captured tagged alternation → Enum + let type_name = self.generate_scope_name(); + self.infer_tagged_alternation_as_enum(type_name, a, errors) + } else { + // Captured untagged alternation → Struct with merged fields + let mut nested_scope = ScopeInfo::default(); + self.infer_untagged_alternation(a, &mut nested_scope, Cardinality::One, errors); + let type_name = self.generate_scope_name(); + self.create_struct_type(type_name, &nested_scope) } } - BuildEffect::EndVariant => { - self.process_end_variant(state, scope_stack); + _ => { + // Not a container - shouldn't reach here + TYPE_NODE } } } - fn process_field_effect( - &self, - name: &str, - span: TextRange, - state: &mut TraversalState, - scope_stack: &mut Vec>, - ) { - let Some(pending) = state.pending.take() else { - return; + fn infer_quantified( + &mut self, + q: &ast::QuantifiedExpr, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + let Some(inner) = q.inner() else { + return ExprResult::node(); }; - // SAFETY: name comes from source with 'src lifetime - let name: &'src str = unsafe { std::mem::transmute(name) }; - let current_variant: Option<&'src str> = state - .current_variant - .map(|v| unsafe { std::mem::transmute(v) }); - - let effective_card = pending - .cardinality - .multiply(state.effective_array_cardinality()); - let Some(entry) = scope_stack.last_mut() else { - return; - }; + let quant_card = self.quantifier_cardinality(q); + let is_qis = self.qis_triggers.contains(q); - // Route fields to variant scope only when: - // 1. We're in a variant context (current_variant is set) - // 2. Either at definition root OR inside an alternation-capturing object - // 3. The scope is creating an enum (has_variants is true) - // Otherwise, fields go to the main scope (for uncaptured inline alternations). - let in_variant_context = - (state.object_depth == 0 || state.in_alternation_object()) && entry.scope.has_variants; - let target_scope = match current_variant.filter(|_| in_variant_context) { - Some(tag) => entry.scope.variants.entry(tag).or_default(), - None => &mut entry.scope, - }; - target_scope.add_field( - name, - pending.base_type, - effective_card, - span, - pending.is_array, - ); - } + if is_qis { + // QIS: create implicit scope for multiple captures + let mut nested_scope = ScopeInfo::default(); + self.infer_expr(&inner, &mut nested_scope, Cardinality::One, errors); - fn process_push_element(&mut self, state: &mut TraversalState) { - let Some(pending) = state.pending.take() else { - return; - }; - let Some(frame) = state.array_stack.last_mut() else { - return; - }; + let element_type = if !nested_scope.fields.is_empty() { + let type_name = self.generate_scope_name(); + self.create_struct_type(type_name, &nested_scope) + } else { + TYPE_NODE + }; - frame.element_type = Some(pending.base_type); - frame.push_called = true; - if let Some(start_id) = frame.start_node { - self.array_element_types.insert(start_id, pending.base_type); + // Wrap with array type - this is a meaningful result + let array_type = self.wrap_with_cardinality(element_type, quant_card); + ExprResult::meaningful(array_type) + } else { + // No QIS: captures propagate with multiplied cardinality + let combined_card = outer_card.multiply(quant_card); + let result = self.infer_expr(&inner, scope, combined_card, errors); + // Return result with quantifier's cardinality so captured quantifiers work correctly + ExprResult { + base_type: result.base_type, + cardinality: quant_card.multiply(result.cardinality), + is_meaningful: result.is_meaningful, + } } } - fn process_end_array(&mut self, state: &mut TraversalState) { - let Some(frame) = state.array_stack.pop() else { - return; - }; + fn infer_sequence( + &mut self, + s: &ast::SeqExpr, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + // Uncaptured sequence: captures propagate to parent scope + let mut last_result = ExprResult::void(); + for child in s.children() { + last_result = self.infer_expr(&child, scope, outer_card, errors); + } + last_result + } - // Get element type from recorded types or default to Node. - // For lazy quantifiers (*?, +?), the exit path may not execute the loop body, - // but we still need to produce an array type (empty array case). - let element_type = frame - .start_node - .and_then(|id| self.array_element_types.get(&id).copied()) - .or(frame.element_type) - .unwrap_or(TYPE_NODE); - - // Keep element type with cardinality (not wrapped array type) to enable - // proper cardinality join in alternations (ADR-0009 §Cardinality Lifting Coercion). - // The array wrapper is applied later in create_struct_type via wrap_with_cardinality. - state.pending = Some(PendingType { - base_type: element_type, - cardinality: frame.cardinality, - is_array: true, - }); + fn infer_alternation( + &mut self, + a: &ast::AltExpr, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + // Uncaptured alternation (tagged or untagged): captures propagate with optionality + self.infer_untagged_alternation(a, scope, outer_card, errors) } - fn process_end_object( + fn infer_untagged_alternation( &mut self, - state: &mut TraversalState, - scope_stack: &mut Vec>, - ) { - let Some(finished_entry) = scope_stack.pop() else { - return; - }; - if !finished_entry.is_object { - scope_stack.push(finished_entry); - return; - } + a: &ast::AltExpr, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + let branches: Vec<_> = a.branches().collect(); + let total_branches = branches.len(); - let finished_scope = finished_entry.scope; - if finished_scope.is_empty() { - // If current pending exists (from nested EndObject), keep it. - // Otherwise restore what was pending before this object started. - if state.pending.is_none() { - state.pending = finished_entry.outer_pending; - } - return; + if total_branches == 0 { + return ExprResult::void(); } - let type_name = self.generate_scope_name(); - let type_id = if finished_scope.has_variants && !finished_scope.variants.is_empty() { - self.create_enum_type(type_name, &finished_scope) - } else { - self.create_struct_type(type_name, &finished_scope) - }; - - state.pending = Some(PendingType { - base_type: type_id, - cardinality: Cardinality::One, - is_array: false, - }); - } + let mut merged_scope = ScopeInfo::default(); - fn process_end_variant( - &self, - state: &mut TraversalState, - scope_stack: &mut Vec>, - ) { - let Some(tag) = state.current_variant.take() else { - return; - }; - // SAFETY: tag comes from source with 'src lifetime - let tag: &'src str = unsafe { std::mem::transmute(tag) }; - - let Some(entry) = scope_stack.last_mut() else { - return; - }; - let variant_scope = entry.scope.variants.entry(tag).or_default(); - - // Single-capture flattening (ADR-0007) - if variant_scope.fields.is_empty() { - if let Some(pending) = state.pending.take() { - variant_scope.add_field( - "$value", - pending.base_type, - pending.cardinality, - rowan::TextRange::default(), - pending.is_array, - ); - } + for branch in &branches { + let Some(body) = branch.body() else { + continue; + }; + let mut branch_scope = ScopeInfo::default(); + self.infer_expr(&body, &mut branch_scope, outer_card, errors); + errors.extend(merged_scope.merge_from(branch_scope)); } - } - /// Get live successors, filtering dead nodes and ref entry points. - fn get_live_successors(&self, node: &BuildNode<'src>) -> Vec { - let def_entry_to_skip = match &node.ref_marker { - RefMarker::Enter { .. } => node.ref_name.and_then(|name| self.graph.definition(name)), - _ => None, - }; + // Apply optionality for fields not present in all branches + merged_scope.apply_optionality(total_branches); - node.successors - .iter() - .copied() - .filter(|s| !self.dead_nodes.contains(s)) - .filter(|s| def_entry_to_skip.map_or(true, |def| *s != def)) - .collect() + // Merge into parent scope + errors.extend(scope.merge_from(merged_scope)); + + ExprResult::node() } - /// Explore multiple branches, merge scopes, handle reconvergence. - fn explore_branches( + fn infer_tagged_alternation_as_enum( &mut self, - successors: Vec, - state: TraversalState, - visited: &mut HashSet, - depth: usize, + type_name: &'src str, + a: &ast::AltExpr, errors: &mut Vec>, - scope_stack: &mut Vec>, - ) -> (Option, Option) { - let total_branches = successors.len(); - let initial_scope_len = scope_stack.len(); - let use_dry_run = state.object_depth > 0; - - let mut branch_scopes: Vec> = Vec::new(); - let mut branch_visited_sets: Vec> = Vec::new(); - let mut result_pending: Option = None; - - // Phase 1: explore branches independently - for succ in &successors { - let mut branch_stack = scope_stack.clone(); - let mut branch_visited = visited.clone(); - let mut branch_state = state.clone(); - branch_state.dry_run = use_dry_run; - if use_dry_run { - branch_state.dry_run_depth = state.object_depth; - } - - let (branch_pending, _) = self.traverse( - *succ, - branch_state, - &mut branch_visited, - depth + 1, - errors, - &mut branch_stack, - ); + ) -> TypeId { + let mut variants = IndexMap::new(); + + for branch in a.branches() { + let tag = branch + .label() + .map(|t| token_src(&t, self.source)) + .unwrap_or("_"); + let Some(body) = branch.body() else { + variants.insert(tag, ScopeInfo::default()); + continue; + }; - if result_pending.is_none() { - result_pending = branch_pending; - } + let mut variant_scope = ScopeInfo::default(); + self.infer_expr(&body, &mut variant_scope, Cardinality::One, errors); + variants.insert(tag, variant_scope); + } - let new_nodes: HashSet = branch_visited.difference(visited).copied().collect(); - branch_visited_sets.push(new_nodes); + self.create_enum_type_from_variants(type_name, &variants) + } - while branch_stack.len() > initial_scope_len { - branch_stack.pop(); - } - if let Some(entry) = branch_stack.last() { - branch_scopes.push(entry.scope.clone()); - } + fn infer_named_node( + &mut self, + n: &ast::NamedNode, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + // Named nodes have children - recurse into them + for child in n.children() { + self.infer_expr(&child, scope, outer_card, errors); } + ExprResult::node() + } - // Merge branch scopes - if let Some(main_entry) = scope_stack.last_mut() { - for branch_scope in branch_scopes { - errors.extend(main_entry.scope.merge_from(branch_scope)); - } - main_entry.scope.apply_optionality(total_branches); + fn infer_field_expr( + &mut self, + f: &ast::FieldExpr, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + // Field constraint (name: expr) - just recurse + if let Some(value) = f.value() { + return self.infer_expr(&value, scope, outer_card, errors); } + ExprResult::node() + } - // Find and process reconvergence - let reconverge_nodes = self.find_reconvergence(&branch_visited_sets); - - if use_dry_run && !reconverge_nodes.is_empty() { - if let Some(entry_node) = reconverge_nodes.iter().min().copied() { - for branch_set in &branch_visited_sets { - for &nid in branch_set { - if !reconverge_nodes.contains(&nid) { - visited.insert(nid); - } - } - } - let mut cont_state = state; - cont_state.dry_run = false; - cont_state.pending = result_pending; - return self.traverse( - entry_node, - cont_state, - visited, - depth + 1, - errors, - scope_stack, - ); + fn infer_ref(&self, r: &ast::Ref) -> ExprResult { + // References are opaque - captures don't propagate from referenced definition. + // Return the type (for use when captured) but mark as not meaningful + // so uncaptured refs don't affect definition's result type. + let ref_name = r.name().map(|t| t.text().to_string()); + if let Some(name) = ref_name { + if let Some(&type_id) = self.definition_types.get(name.as_str()) { + return ExprResult::opaque(type_id); } } - - for branch_set in branch_visited_sets { - visited.extend(branch_set); - } - (result_pending, None) + ExprResult::node() } - fn find_reconvergence(&self, branch_sets: &[HashSet]) -> HashSet { - if branch_sets.len() < 2 { - return HashSet::new(); + // ───────────────────────────────────────────────────────────────────────── + // Helpers + // ───────────────────────────────────────────────────────────────────────── + + fn quantifier_cardinality(&self, q: &ast::QuantifiedExpr) -> Cardinality { + let Some(op) = q.operator() else { + return Cardinality::One; + }; + use crate::parser::cst::SyntaxKind; + match op.kind() { + SyntaxKind::Star | SyntaxKind::StarQuestion => Cardinality::Star, + SyntaxKind::Plus | SyntaxKind::PlusQuestion => Cardinality::Plus, + SyntaxKind::Question | SyntaxKind::QuestionQuestion => Cardinality::Optional, + _ => Cardinality::One, } - let mut iter = branch_sets.iter(); - let first = iter.next().unwrap().clone(); - iter.fold(first, |acc, set| acc.intersection(set).copied().collect()) } fn generate_scope_name(&self) -> &'src str { @@ -903,19 +686,23 @@ impl<'src, 'g> InferenceContext<'src, 'g> { type_id } - fn create_enum_type(&mut self, name: &'src str, scope: &ScopeInfo<'src>) -> TypeId { + fn create_enum_type_from_variants( + &mut self, + name: &'src str, + variants: &IndexMap<&'src str, ScopeInfo<'src>>, + ) -> TypeId { let mut members = Vec::new(); - for (tag, variant_scope) in &scope.variants { + + for (tag, variant_scope) in variants { let variant_type = if variant_scope.fields.is_empty() { TYPE_VOID } else if variant_scope.fields.len() == 1 { - // Single-capture variant: flatten to capture's type directly (ADR-0007) + // Single-capture variant: flatten (ADR-0007) let (_, info) = variant_scope.fields.iter().next().unwrap(); self.wrap_with_cardinality(info.base_type, info.cardinality) } else { - let variant_name = format!("{}{}", name, tag); - let leaked: &'src str = Box::leak(variant_name.into_boxed_str()); - self.create_struct_type(leaked, variant_scope) + let variant_name = self.generate_scope_name(); + self.create_struct_type(variant_name, variant_scope) }; members.push(InferredMember { name: tag, @@ -935,21 +722,6 @@ impl<'src, 'g> InferenceContext<'src, 'g> { type_id } - /// Find the type for a reference by looking up the Enter node with matching ref_id. - fn find_ref_type(&self, ref_id: u32) -> Option { - // Find the Enter node with this ref_id to get the definition name - for (_, node) in self.graph.iter() { - if let RefMarker::Enter { ref_id: enter_id } = &node.ref_marker { - if *enter_id == ref_id { - if let Some(name) = node.ref_name { - return self.definition_types.get(name).copied(); - } - } - } - } - None - } - fn wrap_with_cardinality(&mut self, base: TypeId, card: Cardinality) -> TypeId { match card { Cardinality::One => base, @@ -985,6 +757,40 @@ impl<'src, 'g> InferenceContext<'src, 'g> { } } } + + fn report_merge_errors(&mut self, merge_errors: &[MergeError<'src>]) { + for err in merge_errors { + let types_str = err + .shapes + .iter() + .map(|s| s.to_description().to_string()) + .collect::>() + .join(" vs "); + + let primary_span = err.spans.first().copied().unwrap_or_default(); + let mut builder = self + .diagnostics + .report(DiagnosticKind::IncompatibleTypes, primary_span) + .message(types_str); + + for span in err.spans.iter().skip(1) { + builder = builder.related_to("also captured here", *span); + } + builder + .hint(format!( + "capture `{}` has incompatible types across branches", + err.field + )) + .emit(); + + self.errors.push(UnificationError { + field: err.field, + definition: self.current_def_name, + types_found: err.shapes.iter().map(|s| s.to_description()).collect(), + spans: err.spans.clone(), + }); + } + } } // ───────────────────────────────────────────────────────────────────────────── @@ -992,40 +798,52 @@ impl<'src, 'g> InferenceContext<'src, 'g> { // ───────────────────────────────────────────────────────────────────────────── impl<'a> Query<'a> { - /// Run type inference on the built graph. + /// Run type inference on the query AST. pub(super) fn infer_types(&mut self) { - let mut ctx = InferenceContext::new(&self.graph, &self.dead_nodes); - - // Process definitions in dependency order (referenced definitions first) - let sorted = self.topological_sort_definitions(); - for name in sorted { - if let Some(entry_id) = self.graph.definition(name) { - let type_id = ctx.infer_definition(name, entry_id); - ctx.definition_types.insert(name, type_id); - self.type_info.entrypoint_types.insert(name, type_id); - } + // Collect QIS triggers upfront to avoid borrowing issues + let qis_triggers: HashSet<_> = self.qis_triggers.keys().cloned().collect(); + let sorted = self.topological_sort_definitions_ast(); + + let mut ctx = InferenceContext::new(self.source, qis_triggers); + + // Process definitions in dependency order + for (name, body) in &sorted { + let type_id = ctx.infer_definition(*name, body); + ctx.definition_types.insert(name, type_id); } + // Preserve symbol table order for entrypoints + for (name, _) in &sorted { + if let Some(&type_id) = ctx.definition_types.get(name) { + self.type_info.entrypoint_types.insert(*name, type_id); + } + } self.type_info.type_defs = ctx.type_defs; self.type_info.diagnostics = ctx.diagnostics; self.type_info.errors = ctx.errors; } - /// Topologically sort definitions so referenced definitions are processed first. - fn topological_sort_definitions(&self) -> Vec<&'a str> { - let definitions: Vec<_> = self.graph.definitions().collect(); + /// Topologically sort definitions for processing order. + fn topological_sort_definitions_ast(&self) -> Vec<(&'a str, ast::Expr)> { + use std::collections::{HashSet, VecDeque}; + + let definitions: Vec<_> = self + .symbol_table + .iter() + .map(|(&name, body)| (name, body.clone())) + .collect(); let def_names: HashSet<&str> = definitions.iter().map(|(name, _)| *name).collect(); - // Build dependency graph: which definitions does each definition reference? + // Build dependency graph from AST references let mut deps: HashMap<&str, Vec<&str>> = HashMap::new(); - for &(name, entry_id) in &definitions { - let refs = self.collect_references(entry_id, &def_names); + for (name, body) in &definitions { + let refs = Self::collect_ast_references(body, &def_names); deps.insert(name, refs); } - // Kahn's algorithm for topological sort + // Kahn's algorithm let mut in_degree: HashMap<&str, usize> = HashMap::new(); - for &(name, _) in &definitions { + for (name, _) in &definitions { in_degree.insert(name, 0); } for refs in deps.values() { @@ -1042,9 +860,9 @@ impl<'a> Query<'a> { zero_degree.sort(); let mut queue: VecDeque<&str> = zero_degree.into_iter().collect(); - let mut sorted = Vec::new(); + let mut sorted_names = Vec::new(); while let Some(name) = queue.pop_front() { - sorted.push(name); + sorted_names.push(name); if let Some(refs) = deps.get(name) { for &dep in refs { if let Some(deg) = in_degree.get_mut(dep) { @@ -1058,53 +876,52 @@ impl<'a> Query<'a> { } // Reverse so dependencies come first - sorted.reverse(); + sorted_names.reverse(); // Add any remaining (cyclic) definitions - for &(name, _) in &definitions { - if !sorted.contains(&name) { - sorted.push(name); + for (name, _) in &definitions { + if !sorted_names.contains(name) { + sorted_names.push(name); } } - sorted + // Build result with bodies + sorted_names + .into_iter() + .filter_map(|name| self.symbol_table.get(name).map(|body| (name, body.clone()))) + .collect() } - /// Collect all definition names referenced from a given node. - fn collect_references(&self, start: NodeId, def_names: &HashSet<&str>) -> Vec<&'a str> { + /// Collect references from an AST expression. + fn collect_ast_references<'b>(expr: &Expr, def_names: &HashSet<&'b str>) -> Vec<&'b str> { let mut refs = Vec::new(); - let mut visited = HashSet::new(); - let mut stack = vec![start]; - - while let Some(node_id) = stack.pop() { - if !visited.insert(node_id) { - continue; - } - let node = self.graph.node(node_id); + Self::collect_ast_references_impl(expr, def_names, &mut refs); + refs + } - // Check if this is an Enter node referencing another definition - if let RefMarker::Enter { .. } = &node.ref_marker { - if let Some(name) = node.ref_name { - if def_names.contains(name) && !refs.contains(&name) { - refs.push(name); + fn collect_ast_references_impl<'b>( + expr: &Expr, + def_names: &HashSet<&'b str>, + refs: &mut Vec<&'b str>, + ) { + match expr { + Expr::Ref(r) => { + if let Some(name_token) = r.name() { + let name = name_token.text(); + if def_names.contains(name) && !refs.iter().any(|&r| r == name) { + // Find the actual &'b str from the set + if let Some(&found) = def_names.iter().find(|&&n| n == name) { + refs.push(found); + } } } } - - // Don't follow into referenced definitions (they're opaque) - let skip_def = match &node.ref_marker { - RefMarker::Enter { .. } => node.ref_name.and_then(|n| self.graph.definition(n)), - _ => None, - }; - - for &succ in &node.successors { - if skip_def.map_or(true, |def| succ != def) { - stack.push(succ); + _ => { + for child in expr.children() { + Self::collect_ast_references_impl(&child, def_names, refs); } } } - - refs } } From 631ac5ab612465f5e36b195eb5636f0658daadaa Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 19:58:51 -0300 Subject: [PATCH 22/23] Emitter --- crates/plotnik-lib/src/ir/compiled.rs | 757 ++++++++++++++++++ crates/plotnik-lib/src/ir/emit.rs | 979 ++++++++++++++++++++++++ crates/plotnik-lib/src/ir/mod.rs | 24 +- crates/plotnik-lib/src/ir/serialize.rs | 414 ++++++++++ crates/plotnik-lib/src/ir/strings.rs | 140 ++++ crates/plotnik-lib/src/ir/transition.rs | 21 + 6 files changed, 2334 insertions(+), 1 deletion(-) create mode 100644 crates/plotnik-lib/src/ir/compiled.rs create mode 100644 crates/plotnik-lib/src/ir/emit.rs create mode 100644 crates/plotnik-lib/src/ir/serialize.rs create mode 100644 crates/plotnik-lib/src/ir/strings.rs diff --git a/crates/plotnik-lib/src/ir/compiled.rs b/crates/plotnik-lib/src/ir/compiled.rs new file mode 100644 index 00000000..7933454c --- /dev/null +++ b/crates/plotnik-lib/src/ir/compiled.rs @@ -0,0 +1,757 @@ +//! Compiled query container and buffer. +//! +//! The compiled query lives in a single contiguous allocation—cache-friendly, +//! zero fragmentation, portable to WASM. See ADR-0004 for format details. + +use std::alloc::{Layout, alloc, dealloc}; +use std::fmt::Write; +use std::ptr; + +use super::{ + EffectOp, Entrypoint, NodeFieldId, NodeTypeId, Slice, StringId, StringRef, Transition, + TransitionId, TypeDef, TypeMember, +}; + +/// Buffer alignment for cache-line efficiency. +pub const BUFFER_ALIGN: usize = 64; + +/// Magic bytes identifying a compiled query file. +pub const MAGIC: [u8; 4] = *b"PLNK"; + +/// Current format version. +pub const FORMAT_VERSION: u32 = 1; + +/// Aligned buffer for compiled query data. +/// +/// Allocated via `Layout::from_size_align(len, BUFFER_ALIGN)`. Standard `Box<[u8]>` +/// won't work—it assumes 1-byte alignment and corrupts `dealloc`. +pub struct CompiledQueryBuffer { + ptr: *mut u8, + len: usize, + /// `true` if allocated, `false` if mmap'd or external. + owned: bool, +} + +impl CompiledQueryBuffer { + /// Allocate a new buffer with 64-byte alignment. + pub fn allocate(len: usize) -> Self { + if len == 0 { + return Self { + ptr: ptr::null_mut(), + len: 0, + owned: true, + }; + } + + let layout = Layout::from_size_align(len, BUFFER_ALIGN).expect("invalid layout"); + + // SAFETY: layout is non-zero size, properly aligned + let ptr = unsafe { alloc(layout) }; + if ptr.is_null() { + std::alloc::handle_alloc_error(layout); + } + + Self { + ptr, + len, + owned: true, + } + } + + /// Create a view into external memory (mmap'd or borrowed). + /// + /// # Safety + /// - `ptr` must be valid for reads of `len` bytes + /// - `ptr` must be aligned to `BUFFER_ALIGN` + /// - The backing memory must outlive the returned buffer + pub unsafe fn from_external(ptr: *mut u8, len: usize) -> Self { + debug_assert!( + ptr as usize % BUFFER_ALIGN == 0, + "buffer must be 64-byte aligned" + ); + Self { + ptr, + len, + owned: false, + } + } + + /// Returns a pointer to the buffer start. + #[inline] + pub fn as_ptr(&self) -> *const u8 { + self.ptr + } + + /// Returns a mutable pointer to the buffer start. + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut u8 { + self.ptr + } + + /// Returns the buffer length in bytes. + #[inline] + pub fn len(&self) -> usize { + self.len + } + + /// Returns true if the buffer is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Returns the buffer as a byte slice. + #[inline] + pub fn as_slice(&self) -> &[u8] { + if self.ptr.is_null() { + &[] + } else { + // SAFETY: ptr is valid for len bytes if non-null + unsafe { std::slice::from_raw_parts(self.ptr, self.len) } + } + } + + /// Returns the buffer as a mutable byte slice. + #[inline] + pub fn as_mut_slice(&mut self) -> &mut [u8] { + if self.ptr.is_null() { + &mut [] + } else { + // SAFETY: ptr is valid for len bytes if non-null, and we have &mut self + unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) } + } + } +} + +impl Drop for CompiledQueryBuffer { + fn drop(&mut self) { + if self.owned && !self.ptr.is_null() { + let layout = Layout::from_size_align(self.len, BUFFER_ALIGN) + .expect("layout was valid at allocation"); + // SAFETY: ptr was allocated with this exact layout + unsafe { dealloc(self.ptr, layout) }; + } + } +} + +// SAFETY: The buffer is just raw bytes, safe to send across threads +unsafe impl Send for CompiledQueryBuffer {} +unsafe impl Sync for CompiledQueryBuffer {} + +/// A compiled query ready for execution. +/// +/// Contains a single contiguous buffer with all segments, plus offset indices +/// for O(1) access to each segment. +pub struct CompiledQuery { + buffer: CompiledQueryBuffer, + // Segment offsets (byte offsets into buffer) + successors_offset: u32, + effects_offset: u32, + negated_fields_offset: u32, + string_refs_offset: u32, + string_bytes_offset: u32, + type_defs_offset: u32, + type_members_offset: u32, + entrypoints_offset: u32, + trivia_kinds_offset: u32, // 0 = no trivia kinds + // Segment counts (number of elements) + transition_count: u32, + successor_count: u32, + effect_count: u32, + negated_field_count: u16, + string_ref_count: u16, + type_def_count: u16, + type_member_count: u16, + entrypoint_count: u16, + trivia_kind_count: u16, +} + +impl CompiledQuery { + /// Creates a new compiled query from pre-built components. + /// + /// This is typically called by the emitter after layout computation. + #[allow(clippy::too_many_arguments)] + pub fn new( + buffer: CompiledQueryBuffer, + successors_offset: u32, + effects_offset: u32, + negated_fields_offset: u32, + string_refs_offset: u32, + string_bytes_offset: u32, + type_defs_offset: u32, + type_members_offset: u32, + entrypoints_offset: u32, + trivia_kinds_offset: u32, + transition_count: u32, + successor_count: u32, + effect_count: u32, + negated_field_count: u16, + string_ref_count: u16, + type_def_count: u16, + type_member_count: u16, + entrypoint_count: u16, + trivia_kind_count: u16, + ) -> Self { + Self { + buffer, + successors_offset, + effects_offset, + negated_fields_offset, + string_refs_offset, + string_bytes_offset, + type_defs_offset, + type_members_offset, + entrypoints_offset, + trivia_kinds_offset, + transition_count, + successor_count, + effect_count, + negated_field_count, + string_ref_count, + type_def_count, + type_member_count, + entrypoint_count, + trivia_kind_count, + } + } + + // ───────────────────────────────────────────────────────────────────── + // Segment accessors + // ───────────────────────────────────────────────────────────────────── + + /// Returns the transitions segment. + #[inline] + pub fn transitions(&self) -> &[Transition] { + // Transitions start at offset 0 + // SAFETY: buffer is properly aligned, transitions are at offset 0 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr() as *const Transition, + self.transition_count as usize, + ) + } + } + + /// Returns the successors segment. + #[inline] + pub fn successors(&self) -> &[TransitionId] { + // SAFETY: offset is aligned to 4 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr().add(self.successors_offset as usize) as *const TransitionId, + self.successor_count as usize, + ) + } + } + + /// Returns the effects segment. + #[inline] + pub fn effects(&self) -> &[EffectOp] { + // SAFETY: offset is aligned to 2 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr().add(self.effects_offset as usize) as *const EffectOp, + self.effect_count as usize, + ) + } + } + + /// Returns the negated fields segment. + #[inline] + pub fn negated_fields(&self) -> &[NodeFieldId] { + // SAFETY: offset is aligned to 2 + unsafe { + std::slice::from_raw_parts( + self.buffer + .as_ptr() + .add(self.negated_fields_offset as usize) as *const NodeFieldId, + self.negated_field_count as usize, + ) + } + } + + /// Returns the string refs segment. + #[inline] + pub fn string_refs(&self) -> &[StringRef] { + // SAFETY: offset is aligned to 4 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr().add(self.string_refs_offset as usize) as *const StringRef, + self.string_ref_count as usize, + ) + } + } + + /// Returns the raw string bytes. + #[inline] + pub fn string_bytes(&self) -> &[u8] { + let end = if self.type_defs_offset > 0 { + self.type_defs_offset as usize + } else { + self.buffer.len() + }; + let start = self.string_bytes_offset as usize; + &self.buffer.as_slice()[start..end] + } + + /// Returns the type definitions segment. + #[inline] + pub fn type_defs(&self) -> &[TypeDef] { + // SAFETY: offset is aligned to 4 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr().add(self.type_defs_offset as usize) as *const TypeDef, + self.type_def_count as usize, + ) + } + } + + /// Returns the type members segment. + #[inline] + pub fn type_members(&self) -> &[TypeMember] { + // SAFETY: offset is aligned to 2 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr().add(self.type_members_offset as usize) as *const TypeMember, + self.type_member_count as usize, + ) + } + } + + /// Returns the entrypoints segment. + #[inline] + pub fn entrypoints(&self) -> &[Entrypoint] { + // SAFETY: offset is aligned to 4 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr().add(self.entrypoints_offset as usize) as *const Entrypoint, + self.entrypoint_count as usize, + ) + } + } + + /// Returns the trivia kinds segment (node types to skip). + #[inline] + pub fn trivia_kinds(&self) -> &[NodeTypeId] { + if self.trivia_kinds_offset == 0 { + return &[]; + } + // SAFETY: offset is aligned to 2 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr().add(self.trivia_kinds_offset as usize) as *const NodeTypeId, + self.trivia_kind_count as usize, + ) + } + } + + // ───────────────────────────────────────────────────────────────────── + // High-level accessors + // ───────────────────────────────────────────────────────────────────── + + /// Returns a transition by ID. + #[inline] + pub fn transition(&self, id: TransitionId) -> &Transition { + &self.transitions()[id as usize] + } + + /// Returns a view of a transition with resolved slices. + #[inline] + pub fn transition_view(&self, id: TransitionId) -> TransitionView<'_> { + TransitionView { + query: self, + raw: self.transition(id), + } + } + + /// Resolves a string ID to its UTF-8 content. + #[inline] + pub fn string(&self, id: StringId) -> &str { + let refs = self.string_refs(); + let string_ref = &refs[id as usize]; + let bytes = self.string_bytes(); + let start = string_ref.offset as usize; + let end = start + string_ref.len as usize; + // SAFETY: emitter ensures valid UTF-8 + unsafe { std::str::from_utf8_unchecked(&bytes[start..end]) } + } + + /// Resolves a slice of effects. + #[inline] + pub fn resolve_effects(&self, slice: Slice) -> &[EffectOp] { + let effects = self.effects(); + let start = slice.start_index() as usize; + let end = start + slice.len() as usize; + &effects[start..end] + } + + /// Resolves a slice of negated fields. + #[inline] + pub fn resolve_negated_fields(&self, slice: Slice) -> &[NodeFieldId] { + let fields = self.negated_fields(); + let start = slice.start_index() as usize; + let end = start + slice.len() as usize; + &fields[start..end] + } + + /// Resolves a slice of type members. + #[inline] + pub fn resolve_type_members(&self, slice: Slice) -> &[TypeMember] { + let members = self.type_members(); + let start = slice.start_index() as usize; + let end = start + slice.len() as usize; + &members[start..end] + } + + /// Resolves successors for a transition by ID, handling both inline and spilled cases. + #[inline] + pub fn resolve_successors_by_id(&self, id: TransitionId) -> &[TransitionId] { + let transition = self.transition(id); + if transition.has_inline_successors() { + // Return from transitions segment - inline data is part of the transition + let count = transition.successor_count as usize; + &self.transitions()[id as usize].successor_data[..count] + } else { + let start = transition.spilled_successors_index() as usize; + let count = transition.successor_count as usize; + &self.successors()[start..start + count] + } + } + + /// Returns the number of transitions. + #[inline] + pub fn transition_count(&self) -> u32 { + self.transition_count + } + + /// Returns the number of entrypoints. + #[inline] + pub fn entrypoint_count(&self) -> u16 { + self.entrypoint_count + } + + /// Returns the raw buffer for serialization. + #[inline] + pub fn buffer(&self) -> &CompiledQueryBuffer { + &self.buffer + } + + /// Returns offset metadata for serialization. + pub fn offsets(&self) -> CompiledQueryOffsets { + CompiledQueryOffsets { + successors_offset: self.successors_offset, + effects_offset: self.effects_offset, + negated_fields_offset: self.negated_fields_offset, + string_refs_offset: self.string_refs_offset, + string_bytes_offset: self.string_bytes_offset, + type_defs_offset: self.type_defs_offset, + type_members_offset: self.type_members_offset, + entrypoints_offset: self.entrypoints_offset, + trivia_kinds_offset: self.trivia_kinds_offset, + } + } + + /// Dumps the compiled query in human-readable format for debugging. + pub fn dump(&self) -> String { + let mut out = String::new(); + + // Header + writeln!(out, "CompiledQuery {{").unwrap(); + writeln!(out, " buffer_len: {}", self.buffer.len()).unwrap(); + writeln!(out, " transitions: {}", self.transition_count).unwrap(); + writeln!(out, " successors: {} (spilled)", self.successor_count).unwrap(); + writeln!(out, " effects: {}", self.effect_count).unwrap(); + writeln!(out, " strings: {}", self.string_ref_count).unwrap(); + writeln!(out, " type_defs: {}", self.type_def_count).unwrap(); + writeln!(out, " entrypoints: {}", self.entrypoint_count).unwrap(); + writeln!(out).unwrap(); + + // Entrypoints + writeln!(out, " Entrypoints:").unwrap(); + for ep in self.entrypoints() { + let name = self.string(ep.name_id()); + writeln!( + out, + " {} -> T{} (type {})", + name, + ep.target(), + ep.result_type() + ) + .unwrap(); + } + writeln!(out).unwrap(); + + // Transitions + writeln!(out, " Transitions:").unwrap(); + for i in 0..self.transition_count { + let view = self.transition_view(i); + write!(out, " T{}: ", i).unwrap(); + + // Matcher + match view.matcher() { + super::Matcher::Epsilon => write!(out, "ε").unwrap(), + super::Matcher::Node { kind, field, .. } => { + write!(out, "Node({})", kind).unwrap(); + if let Some(f) = field { + write!(out, " field={}", f).unwrap(); + } + } + super::Matcher::Anonymous { kind, field, .. } => { + write!(out, "Anon({})", kind).unwrap(); + if let Some(f) = field { + write!(out, " field={}", f).unwrap(); + } + } + super::Matcher::Wildcard => write!(out, "_").unwrap(), + } + + // Nav + let nav = view.nav(); + if !nav.is_stay() { + write!(out, " nav={:?}", nav.kind).unwrap(); + if nav.level > 0 { + write!(out, "({})", nav.level).unwrap(); + } + } + + // Ref marker + match view.ref_marker() { + super::RefTransition::None => {} + super::RefTransition::Enter(id) => write!(out, " Enter({})", id).unwrap(), + super::RefTransition::Exit(id) => write!(out, " Exit({})", id).unwrap(), + } + + // Effects + let effects = view.effects(); + if !effects.is_empty() { + write!(out, " [").unwrap(); + for (j, eff) in effects.iter().enumerate() { + if j > 0 { + write!(out, ", ").unwrap(); + } + match eff { + EffectOp::CaptureNode => write!(out, "Capture").unwrap(), + EffectOp::ClearCurrent => write!(out, "Clear").unwrap(), + EffectOp::StartArray => write!(out, "StartArr").unwrap(), + EffectOp::PushElement => write!(out, "Push").unwrap(), + EffectOp::EndArray => write!(out, "EndArr").unwrap(), + EffectOp::StartObject => write!(out, "StartObj").unwrap(), + EffectOp::EndObject => write!(out, "EndObj").unwrap(), + EffectOp::Field(id) => write!(out, "Field({})", self.string(*id)).unwrap(), + EffectOp::StartVariant(id) => { + write!(out, "Var({})", self.string(*id)).unwrap() + } + EffectOp::EndVariant => write!(out, "EndVar").unwrap(), + EffectOp::ToString => write!(out, "ToStr").unwrap(), + } + } + write!(out, "]").unwrap(); + } + + // Successors + let succs = view.successors(); + if !succs.is_empty() { + write!(out, " -> [").unwrap(); + for (j, s) in succs.iter().enumerate() { + if j > 0 { + write!(out, ", ").unwrap(); + } + write!(out, "T{}", s).unwrap(); + } + write!(out, "]").unwrap(); + } + + writeln!(out).unwrap(); + } + + // Strings + if self.string_ref_count > 0 { + writeln!(out).unwrap(); + writeln!(out, " Strings:").unwrap(); + for i in 0..self.string_ref_count { + let s = self.string(i); + writeln!(out, " S{}: {:?}", i, s).unwrap(); + } + } + + // Types + if self.type_def_count > 0 { + writeln!(out).unwrap(); + writeln!(out, " Types:").unwrap(); + for (i, td) in self.type_defs().iter().enumerate() { + let type_id = i as u16 + super::TYPE_COMPOSITE_START; + let name = if td.name != super::STRING_NONE { + self.string(td.name) + } else { + "" + }; + write!(out, " Ty{}: {} {:?}", type_id, name, td.kind).unwrap(); + if td.is_wrapper() { + if let Some(inner) = td.inner_type() { + write!(out, " inner=Ty{}", inner).unwrap(); + } + } else if let Some(members) = td.members_slice() { + let resolved = self.resolve_type_members(members); + write!(out, " {{").unwrap(); + for (j, m) in resolved.iter().enumerate() { + if j > 0 { + write!(out, ", ").unwrap(); + } + write!(out, "{}: Ty{}", self.string(m.name), m.ty).unwrap(); + } + write!(out, "}}").unwrap(); + } + writeln!(out).unwrap(); + } + } + + writeln!(out, "}}").unwrap(); + out + } +} + +/// Offset metadata extracted from CompiledQuery. +#[derive(Debug, Clone, Copy)] +pub struct CompiledQueryOffsets { + pub successors_offset: u32, + pub effects_offset: u32, + pub negated_fields_offset: u32, + pub string_refs_offset: u32, + pub string_bytes_offset: u32, + pub type_defs_offset: u32, + pub type_members_offset: u32, + pub entrypoints_offset: u32, + pub trivia_kinds_offset: u32, +} + +// ───────────────────────────────────────────────────────────────────────────── +// View types +// ───────────────────────────────────────────────────────────────────────────── + +/// A view of a transition with resolved slices. +/// +/// Hides offset arithmetic and inline/spilled distinction from callers. +pub struct TransitionView<'a> { + query: &'a CompiledQuery, + raw: &'a Transition, +} + +impl<'a> TransitionView<'a> { + /// Returns the raw transition. + #[inline] + pub fn raw(&self) -> &'a Transition { + self.raw + } + + /// Returns resolved successor IDs. + #[inline] + pub fn successors(&self) -> &'a [TransitionId] { + if self.raw.has_inline_successors() { + let count = self.raw.successor_count as usize; + &self.raw.successor_data[..count] + } else { + let start = self.raw.spilled_successors_index() as usize; + let count = self.raw.successor_count as usize; + &self.query.successors()[start..start + count] + } + } + + /// Returns resolved effect operations. + #[inline] + pub fn effects(&self) -> &'a [EffectOp] { + self.query.resolve_effects(self.raw.effects()) + } + + /// Returns the matcher. + #[inline] + pub fn matcher(&self) -> &super::Matcher { + &self.raw.matcher + } + + /// Returns a view of the matcher with resolved slices. + #[inline] + pub fn matcher_view(&self) -> MatcherView<'a> { + MatcherView { + query: self.query, + raw: &self.raw.matcher, + } + } + + /// Returns the navigation instruction. + #[inline] + pub fn nav(&self) -> super::Nav { + self.raw.nav + } + + /// Returns the ref transition marker. + #[inline] + pub fn ref_marker(&self) -> super::RefTransition { + self.raw.ref_marker + } +} + +/// A view of a matcher with resolved slices. +pub struct MatcherView<'a> { + query: &'a CompiledQuery, + raw: &'a super::Matcher, +} + +impl<'a> MatcherView<'a> { + /// Returns the raw matcher. + #[inline] + pub fn raw(&self) -> &'a super::Matcher { + self.raw + } + + /// Returns resolved negated fields. + #[inline] + pub fn negated_fields(&self) -> &'a [NodeFieldId] { + self.query.resolve_negated_fields(self.raw.negated_fields()) + } + + /// Returns the matcher kind. + #[inline] + pub fn kind(&self) -> super::MatcherKind { + self.raw.kind() + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Alignment helpers +// ───────────────────────────────────────────────────────────────────────────── + +/// Aligns an offset up to the given alignment. +#[inline] +pub const fn align_up(offset: u32, align: u32) -> u32 { + (offset + align - 1) & !(align - 1) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn buffer_alignment() { + let buf = CompiledQueryBuffer::allocate(128); + assert_eq!(buf.as_ptr() as usize % BUFFER_ALIGN, 0); + assert_eq!(buf.len(), 128); + } + + #[test] + fn buffer_empty() { + let buf = CompiledQueryBuffer::allocate(0); + assert!(buf.is_empty()); + assert_eq!(buf.as_slice(), &[] as &[u8]); + } + + #[test] + fn align_up_values() { + assert_eq!(align_up(0, 4), 0); + assert_eq!(align_up(1, 4), 4); + assert_eq!(align_up(4, 4), 4); + assert_eq!(align_up(5, 4), 8); + assert_eq!(align_up(63, 64), 64); + assert_eq!(align_up(64, 64), 64); + assert_eq!(align_up(65, 64), 128); + } +} diff --git a/crates/plotnik-lib/src/ir/emit.rs b/crates/plotnik-lib/src/ir/emit.rs new file mode 100644 index 00000000..c0e72c64 --- /dev/null +++ b/crates/plotnik-lib/src/ir/emit.rs @@ -0,0 +1,979 @@ +//! Query emitter: transforms BuildGraph + TypeInferenceResult into CompiledQuery. +//! +//! Three-pass construction: +//! 1. Analysis: count elements, intern strings, collect data +//! 2. Layout: compute aligned offsets, allocate once +//! 3. Emission: write via ptr::write + +use std::collections::HashMap; +use std::ptr; + +use super::compiled::{CompiledQuery, CompiledQueryBuffer, align_up}; +use super::ids::{NodeFieldId, NodeTypeId, RefId, StringId, TYPE_NODE, TransitionId}; +use super::strings::StringInterner; +use super::{ + EffectOp, Entrypoint, MAX_INLINE_SUCCESSORS, Matcher, RefTransition, Slice, StringRef, + Transition, TypeDef, TypeMember, +}; + +use crate::query::graph::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, RefMarker}; +use crate::query::typing::TypeInferenceResult; + +/// Callback for resolving node kind names to IDs. +pub trait NodeKindResolver { + /// Resolves a named node kind to its ID. Returns `None` if unknown. + fn resolve_kind(&self, name: &str) -> Option; + + /// Resolves a field name to its ID. Returns `None` if unknown. + fn resolve_field(&self, name: &str) -> Option; +} + +/// A resolver that always fails (for testing without tree-sitter). +pub struct NullResolver; + +impl NodeKindResolver for NullResolver { + fn resolve_kind(&self, _name: &str) -> Option { + None + } + fn resolve_field(&self, _name: &str) -> Option { + None + } +} + +/// Map-based resolver for testing. +pub struct MapResolver { + kinds: HashMap, + fields: HashMap, +} + +impl MapResolver { + pub fn new() -> Self { + Self { + kinds: HashMap::new(), + fields: HashMap::new(), + } + } + + pub fn add_kind(&mut self, name: impl Into, id: NodeTypeId) { + self.kinds.insert(name.into(), id); + } + + pub fn add_field(&mut self, name: impl Into, id: NodeFieldId) { + self.fields.insert(name.into(), id); + } +} + +impl Default for MapResolver { + fn default() -> Self { + Self::new() + } +} + +impl NodeKindResolver for MapResolver { + fn resolve_kind(&self, name: &str) -> Option { + self.kinds.get(name).copied() + } + + fn resolve_field(&self, name: &str) -> Option { + self.fields.get(name).copied() + } +} + +/// Query emitter error. +#[derive(Debug, Clone)] +pub enum EmitError { + /// Unknown node kind encountered. + UnknownNodeKind(String), + /// Unknown field name encountered. + UnknownField(String), + /// Too many transitions (exceeds u32::MAX). + TooManyTransitions, + /// Too many successors (exceeds u32::MAX). + TooManySuccessors, + /// Too many effects (exceeds u32::MAX). + TooManyEffects, + /// Internal consistency error. + InternalError(String), +} + +impl std::fmt::Display for EmitError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + EmitError::UnknownNodeKind(s) => write!(f, "unknown node kind: {}", s), + EmitError::UnknownField(s) => write!(f, "unknown field: {}", s), + EmitError::TooManyTransitions => write!(f, "too many transitions"), + EmitError::TooManySuccessors => write!(f, "too many successors"), + EmitError::TooManyEffects => write!(f, "too many effects"), + EmitError::InternalError(s) => write!(f, "internal error: {}", s), + } + } +} + +impl std::error::Error for EmitError {} + +/// Result type for emit operations. +pub type EmitResult = Result; + +/// Emitter state during analysis phase. +struct EmitContext<'src, 'g> { + graph: &'g BuildGraph<'src>, + type_info: &'g TypeInferenceResult<'src>, + strings: StringInterner<'src>, + + // Collected data + effects: Vec, + negated_fields: Vec, + /// Spilled successors (for transitions with >8 successors) + spilled_successors: Vec, + + // Maps from BuildGraph to IR + /// For each transition, its effects slice + transition_effects: Vec>, + /// For each transition, its negated fields slice + transition_negated_fields: Vec>, + /// For each transition, if successors spill: (start_index in spilled_successors, count) + transition_spilled: Vec>, +} + +impl<'src, 'g> EmitContext<'src, 'g> { + fn new(graph: &'g BuildGraph<'src>, type_info: &'g TypeInferenceResult<'src>) -> Self { + let node_count = graph.len(); + Self { + graph, + type_info, + strings: StringInterner::new(), + effects: Vec::new(), + negated_fields: Vec::new(), + spilled_successors: Vec::new(), + transition_effects: Vec::with_capacity(node_count), + transition_negated_fields: Vec::with_capacity(node_count), + transition_spilled: Vec::with_capacity(node_count), + } + } + + fn intern(&mut self, s: &'src str) -> StringId { + self.strings.intern(s) + } +} + +/// Layout information computed in pass 2. +struct LayoutInfo { + buffer_len: usize, + successors_offset: u32, + effects_offset: u32, + negated_fields_offset: u32, + string_refs_offset: u32, + string_bytes_offset: u32, + type_defs_offset: u32, + type_members_offset: u32, + entrypoints_offset: u32, + trivia_kinds_offset: u32, + + // Counts + transition_count: u32, + successor_count: u32, + effect_count: u32, + negated_field_count: u16, + string_ref_count: u16, + type_def_count: u16, + type_member_count: u16, + entrypoint_count: u16, + trivia_kind_count: u16, +} + +/// Emits a compiled query from a BuildGraph. +pub struct QueryEmitter<'src, 'g, R> { + ctx: EmitContext<'src, 'g>, + resolver: R, + trivia_kinds: Vec, +} + +impl<'src, 'g, R: NodeKindResolver> QueryEmitter<'src, 'g, R> { + /// Creates a new emitter. + pub fn new( + graph: &'g BuildGraph<'src>, + type_info: &'g TypeInferenceResult<'src>, + resolver: R, + ) -> Self { + Self { + ctx: EmitContext::new(graph, type_info), + resolver, + trivia_kinds: Vec::new(), + } + } + + /// Sets trivia node kinds (e.g., comments) to skip during execution. + pub fn with_trivia_kinds(mut self, kinds: Vec) -> Self { + self.trivia_kinds = kinds; + self + } + + /// Emits the compiled query. + pub fn emit(mut self) -> EmitResult { + // Pass 1: Analysis + self.analyze()?; + + // Pass 2: Layout + let layout = self.compute_layout()?; + + // Pass 3: Emission + self.emit_buffer(layout) + } + + // ───────────────────────────────────────────────────────────────────── + // Pass 1: Analysis + // ───────────────────────────────────────────────────────────────────── + + fn analyze(&mut self) -> EmitResult<()> { + // Pre-intern definition names for entrypoints + for (name, _) in self.ctx.graph.definitions() { + self.ctx.intern(name); + } + + // Pre-intern type names + for type_def in &self.ctx.type_info.type_defs { + if let Some(name) = type_def.name { + self.ctx.intern(name); + } + for member in &type_def.members { + self.ctx.intern(member.name); + } + } + + // Analyze each transition + for (_, node) in self.ctx.graph.iter() { + self.analyze_node(node)?; + } + + Ok(()) + } + + fn analyze_node(&mut self, node: &BuildNode<'src>) -> EmitResult<()> { + // Collect effects + let effects_start = self.ctx.effects.len() as u32; + for effect in &node.effects { + let ir_effect = self.convert_effect(effect)?; + self.ctx.effects.push(ir_effect); + } + let effects_len = (self.ctx.effects.len() as u32 - effects_start) as u16; + self.ctx + .transition_effects + .push(Slice::new(effects_start, effects_len)); + + // Collect negated fields + let negated_start = self.ctx.negated_fields.len() as u32; + if let BuildMatcher::Node { negated_fields, .. } = &node.matcher { + for field_name in negated_fields { + let field_id = self + .resolver + .resolve_field(field_name) + .ok_or_else(|| EmitError::UnknownField((*field_name).to_string()))?; + self.ctx.negated_fields.push(field_id); + } + } + let negated_len = (self.ctx.negated_fields.len() as u32 - negated_start) as u16; + self.ctx + .transition_negated_fields + .push(Slice::new(negated_start, negated_len)); + + // Check if successors need to spill + if node.successors.len() > MAX_INLINE_SUCCESSORS { + let start = self.ctx.spilled_successors.len() as u32; + for &succ in &node.successors { + self.ctx.spilled_successors.push(succ); + } + self.ctx + .transition_spilled + .push(Some((start, node.successors.len() as u32))); + } else { + self.ctx.transition_spilled.push(None); + } + + Ok(()) + } + + fn convert_effect(&mut self, effect: &BuildEffect<'src>) -> EmitResult { + Ok(match effect { + BuildEffect::CaptureNode => EffectOp::CaptureNode, + BuildEffect::ClearCurrent => EffectOp::ClearCurrent, + BuildEffect::StartArray { .. } => EffectOp::StartArray, + BuildEffect::PushElement => EffectOp::PushElement, + BuildEffect::EndArray => EffectOp::EndArray, + BuildEffect::StartObject { .. } => EffectOp::StartObject, + BuildEffect::EndObject => EffectOp::EndObject, + BuildEffect::Field { name, .. } => { + let id = self.ctx.intern(name); + EffectOp::Field(id) + } + BuildEffect::StartVariant(tag) => { + let id = self.ctx.intern(tag); + EffectOp::StartVariant(id) + } + BuildEffect::EndVariant => EffectOp::EndVariant, + BuildEffect::ToString => EffectOp::ToString, + }) + } + + // ───────────────────────────────────────────────────────────────────── + // Pass 2: Layout + // ───────────────────────────────────────────────────────────────────── + + fn compute_layout(&self) -> EmitResult { + let transition_count = self.ctx.graph.len() as u32; + let successor_count = self.ctx.spilled_successors.len() as u32; + let effect_count = self.ctx.effects.len() as u32; + let negated_field_count = self.ctx.negated_fields.len() as u16; + let string_ref_count = self.ctx.strings.len() as u16; + let type_def_count = self.ctx.type_info.type_defs.len() as u16; + let type_member_count: u16 = self + .ctx + .type_info + .type_defs + .iter() + .map(|td| td.members.len() as u16) + .sum(); + let entrypoint_count = self.ctx.graph.definitions().count() as u16; + let trivia_kind_count = self.trivia_kinds.len() as u16; + + // Compute offsets with proper alignment + let mut offset: u32 = 0; + + // Transitions at offset 0, 64-byte aligned + offset += transition_count * 64; + + // Successors: align 4 + let successors_offset = align_up(offset, 4); + offset = successors_offset + successor_count * 4; + + // Effects: align 4 (EffectOp is 4 bytes with repr(C, u16) but discriminant+payload) + let effects_offset = align_up(offset, 4); + offset = effects_offset + effect_count * 4; + + // Negated fields: align 2 + let negated_fields_offset = align_up(offset, 2); + offset = negated_fields_offset + (negated_field_count as u32) * 2; + + // String refs: align 4 + let string_refs_offset = align_up(offset, 4); + offset = string_refs_offset + (string_ref_count as u32) * 8; + + // String bytes: align 1 + let string_bytes_offset = offset; + offset += self.ctx.strings.total_bytes() as u32; + + // Type defs: align 4 + let type_defs_offset = align_up(offset, 4); + offset = type_defs_offset + (type_def_count as u32) * 12; + + // Type members: align 2 + let type_members_offset = align_up(offset, 2); + offset = type_members_offset + (type_member_count as u32) * 4; + + // Entrypoints: align 4 + let entrypoints_offset = align_up(offset, 4); + offset = entrypoints_offset + (entrypoint_count as u32) * 12; + + // Trivia kinds: align 2 + let trivia_kinds_offset = if trivia_kind_count > 0 { + let aligned = align_up(offset, 2); + offset = aligned + (trivia_kind_count as u32) * 2; + aligned + } else { + 0 + }; + + // Final buffer size, aligned to 64 for potential mmap + let buffer_len = align_up(offset, 64) as usize; + + Ok(LayoutInfo { + buffer_len, + successors_offset, + effects_offset, + negated_fields_offset, + string_refs_offset, + string_bytes_offset, + type_defs_offset, + type_members_offset, + entrypoints_offset, + trivia_kinds_offset, + transition_count, + successor_count, + effect_count, + negated_field_count, + string_ref_count, + type_def_count, + type_member_count, + entrypoint_count, + trivia_kind_count, + }) + } + + // ───────────────────────────────────────────────────────────────────── + // Pass 3: Emission + // ───────────────────────────────────────────────────────────────────── + + fn emit_buffer(self, layout: LayoutInfo) -> EmitResult { + let mut buffer = CompiledQueryBuffer::allocate(layout.buffer_len); + let base = buffer.as_mut_ptr(); + + // Emit transitions + self.emit_transitions(base, &layout)?; + + // Emit successors + self.emit_successors(base, &layout); + + // Emit effects + self.emit_effects(base, &layout); + + // Emit negated fields + self.emit_negated_fields(base, &layout); + + // Emit strings + self.emit_strings(base, &layout); + + // Emit type metadata + self.emit_types(base, &layout); + + // Emit entrypoints + self.emit_entrypoints(base, &layout)?; + + // Emit trivia kinds + self.emit_trivia_kinds(base, &layout); + + Ok(CompiledQuery::new( + buffer, + layout.successors_offset, + layout.effects_offset, + layout.negated_fields_offset, + layout.string_refs_offset, + layout.string_bytes_offset, + layout.type_defs_offset, + layout.type_members_offset, + layout.entrypoints_offset, + layout.trivia_kinds_offset, + layout.transition_count, + layout.successor_count, + layout.effect_count, + layout.negated_field_count, + layout.string_ref_count, + layout.type_def_count, + layout.type_member_count, + layout.entrypoint_count, + layout.trivia_kind_count, + )) + } + + fn emit_transitions(&self, base: *mut u8, _layout: &LayoutInfo) -> EmitResult<()> { + let transitions_ptr = base as *mut Transition; + + for (idx, (_, node)) in self.ctx.graph.iter().enumerate() { + let transition = self.build_transition(node, idx)?; + // SAFETY: buffer is properly sized and aligned + unsafe { + ptr::write(transitions_ptr.add(idx), transition); + } + } + + Ok(()) + } + + fn build_transition(&self, node: &BuildNode<'src>, idx: usize) -> EmitResult { + let matcher = self.convert_matcher(&node.matcher)?; + let ref_marker = self.convert_ref_marker(&node.ref_marker); + let effects = self.ctx.transition_effects[idx]; + let negated_fields_slice = self.ctx.transition_negated_fields[idx]; + + // Build successor data + let (successor_count, successor_data) = + if let Some((start, count)) = self.ctx.transition_spilled[idx] { + // Spilled: store index in successor_data[0] + let mut data = [0u32; MAX_INLINE_SUCCESSORS]; + data[0] = start; + (count, data) + } else { + // Inline + let mut data = [0u32; MAX_INLINE_SUCCESSORS]; + for (i, &succ) in node.successors.iter().enumerate() { + data[i] = succ; + } + (node.successors.len() as u32, data) + }; + + // Inject negated_fields into matcher if applicable + let matcher = match matcher { + Matcher::Node { kind, field, .. } => Matcher::Node { + kind, + field, + negated_fields: negated_fields_slice, + }, + Matcher::Anonymous { kind, field, .. } => Matcher::Anonymous { + kind, + field, + negated_fields: Slice::empty(), + }, + other => other, + }; + + let transition = Transition::new( + matcher, + ref_marker, + node.nav, + effects, + successor_count, + successor_data, + ); + + Ok(transition) + } + + fn convert_matcher(&self, matcher: &BuildMatcher<'src>) -> EmitResult { + Ok(match matcher { + BuildMatcher::Epsilon => Matcher::Epsilon, + BuildMatcher::Node { kind, field, .. } => { + let kind_id = self + .resolver + .resolve_kind(kind) + .ok_or_else(|| EmitError::UnknownNodeKind((*kind).to_string()))?; + let field_id = match field { + Some(f) => self.resolver.resolve_field(f), + None => None, + }; + Matcher::Node { + kind: kind_id, + field: field_id, + negated_fields: Slice::empty(), // Will be filled in build_transition + } + } + BuildMatcher::Anonymous { literal, field } => { + // For anonymous nodes, we use the literal as a synthetic kind ID + // In practice, this would be resolved differently + let kind_id = self.resolver.resolve_kind(literal).unwrap_or(0); + let field_id = match field { + Some(f) => self.resolver.resolve_field(f), + None => None, + }; + Matcher::Anonymous { + kind: kind_id, + field: field_id, + negated_fields: Slice::empty(), + } + } + BuildMatcher::Wildcard { field } => { + // Wildcard doesn't use field in IR representation + let _ = field; + Matcher::Wildcard + } + }) + } + + fn convert_ref_marker(&self, marker: &RefMarker) -> RefTransition { + match marker { + RefMarker::None => RefTransition::None, + RefMarker::Enter { ref_id } => RefTransition::Enter(*ref_id as RefId), + RefMarker::Exit { ref_id } => RefTransition::Exit(*ref_id as RefId), + } + } + + fn emit_successors(&self, base: *mut u8, layout: &LayoutInfo) { + if self.ctx.spilled_successors.is_empty() { + return; + } + + let ptr = unsafe { base.add(layout.successors_offset as usize) } as *mut TransitionId; + for (i, &succ) in self.ctx.spilled_successors.iter().enumerate() { + unsafe { + ptr::write(ptr.add(i), succ); + } + } + } + + fn emit_effects(&self, base: *mut u8, layout: &LayoutInfo) { + if self.ctx.effects.is_empty() { + return; + } + + let ptr = unsafe { base.add(layout.effects_offset as usize) } as *mut EffectOp; + for (i, effect) in self.ctx.effects.iter().enumerate() { + unsafe { + ptr::write(ptr.add(i), *effect); + } + } + } + + fn emit_negated_fields(&self, base: *mut u8, layout: &LayoutInfo) { + if self.ctx.negated_fields.is_empty() { + return; + } + + let ptr = unsafe { base.add(layout.negated_fields_offset as usize) } as *mut NodeFieldId; + for (i, &field) in self.ctx.negated_fields.iter().enumerate() { + unsafe { + ptr::write(ptr.add(i), field); + } + } + } + + fn emit_strings(&self, base: *mut u8, layout: &LayoutInfo) { + // Emit string refs + let refs_ptr = unsafe { base.add(layout.string_refs_offset as usize) } as *mut StringRef; + let bytes_ptr = unsafe { base.add(layout.string_bytes_offset as usize) }; + + let mut byte_offset: u32 = 0; + for (i, (_, s)) in self.ctx.strings.iter().enumerate() { + // Write StringRef + let string_ref = StringRef::new(byte_offset, s.len() as u16); + unsafe { + ptr::write(refs_ptr.add(i), string_ref); + } + + // Write string bytes + unsafe { + ptr::copy_nonoverlapping(s.as_ptr(), bytes_ptr.add(byte_offset as usize), s.len()); + } + + byte_offset += s.len() as u32; + } + } + + fn emit_types(&self, base: *mut u8, layout: &LayoutInfo) { + let defs_ptr = unsafe { base.add(layout.type_defs_offset as usize) } as *mut TypeDef; + let members_ptr = + unsafe { base.add(layout.type_members_offset as usize) } as *mut TypeMember; + + let mut member_idx: u32 = 0; + + for (i, type_def) in self.ctx.type_info.type_defs.iter().enumerate() { + let name_id = type_def + .name + .and_then(|n| self.ctx.strings.get(n)) + .unwrap_or(super::ids::STRING_NONE); + + let ir_def = if let Some(inner) = type_def.inner_type { + TypeDef::wrapper(type_def.kind, inner) + } else { + let members_start = member_idx; + let members_len = type_def.members.len() as u16; + + // Emit members + for member in &type_def.members { + let member_name_id = self + .ctx + .strings + .get(member.name) + .expect("member name should be interned"); + let ir_member = TypeMember::new(member_name_id, member.ty); + unsafe { + ptr::write(members_ptr.add(member_idx as usize), ir_member); + } + member_idx += 1; + } + + TypeDef::composite( + type_def.kind, + name_id, + Slice::new(members_start, members_len), + ) + }; + + unsafe { + ptr::write(defs_ptr.add(i), ir_def); + } + } + } + + fn emit_entrypoints(&self, base: *mut u8, layout: &LayoutInfo) -> EmitResult<()> { + let ptr = unsafe { base.add(layout.entrypoints_offset as usize) } as *mut Entrypoint; + + for (i, (name, entry_node)) in self.ctx.graph.definitions().enumerate() { + let name_id = self + .ctx + .strings + .get(name) + .expect("definition name should be interned"); + + // Look up the result type for this definition + let result_type = self + .ctx + .type_info + .entrypoint_types + .get(name) + .copied() + .unwrap_or(TYPE_NODE); + + let entrypoint = Entrypoint::new(name_id, entry_node, result_type); + unsafe { + ptr::write(ptr.add(i), entrypoint); + } + } + + Ok(()) + } + + fn emit_trivia_kinds(&self, base: *mut u8, layout: &LayoutInfo) { + if self.trivia_kinds.is_empty() { + return; + } + + let ptr = unsafe { base.add(layout.trivia_kinds_offset as usize) } as *mut NodeTypeId; + for (i, &kind) in self.trivia_kinds.iter().enumerate() { + unsafe { + ptr::write(ptr.add(i), kind); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::query::graph::{BuildEffect, BuildGraph, BuildMatcher, BuildNode}; + use crate::query::typing::TypeInferenceResult; + use std::num::NonZeroU16; + + fn make_resolver() -> MapResolver { + let mut r = MapResolver::new(); + r.add_kind("identifier", 1); + r.add_kind("function_declaration", 2); + r.add_field("name", NonZeroU16::new(1).unwrap()); + r.add_field("body", NonZeroU16::new(2).unwrap()); + r + } + + #[test] + fn emit_simple_query() { + let mut graph = BuildGraph::new(); + + // Create a simple: (identifier) @id + let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + graph.node_mut(node).add_effect(BuildEffect::CaptureNode); + graph.add_definition("Main", node); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let compiled = emitter.emit().expect("emit should succeed"); + + assert_eq!(compiled.transition_count(), 1); + assert_eq!(compiled.entrypoint_count(), 1); + + let t = compiled.transition(0); + assert!(matches!(t.matcher, Matcher::Node { kind: 1, .. })); + } + + #[test] + fn emit_with_effects() { + let mut graph = BuildGraph::new(); + + let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + graph.node_mut(node).add_effect(BuildEffect::CaptureNode); + graph.node_mut(node).add_effect(BuildEffect::Field { + name: "name", + span: Default::default(), + }); + graph.add_definition("Main", node); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let compiled = emitter.emit().expect("emit should succeed"); + + let view = compiled.transition_view(0); + let effects = view.effects(); + assert_eq!(effects.len(), 2); + assert!(matches!(effects[0], EffectOp::CaptureNode)); + assert!(matches!(effects[1], EffectOp::Field(_))); + + // Verify string was interned + if let EffectOp::Field(id) = effects[1] { + assert_eq!(compiled.string(id), "name"); + } + } + + #[test] + fn emit_with_successors() { + let mut graph = BuildGraph::new(); + + // Create: entry -> branch -> [a, b] + let entry = graph.add_epsilon(); + let a = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + let b = graph.add_node(BuildNode::with_matcher(BuildMatcher::node( + "function_declaration", + ))); + graph.connect(entry, a); + graph.connect(entry, b); + graph.add_definition("Main", entry); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let compiled = emitter.emit().expect("emit should succeed"); + + assert_eq!(compiled.transition_count(), 3); + + let view = compiled.transition_view(0); + let successors = view.successors(); + assert_eq!(successors.len(), 2); + assert_eq!(successors[0], 1); + assert_eq!(successors[1], 2); + } + + #[test] + fn emit_many_successors_spills() { + let mut graph = BuildGraph::new(); + + // Create entry with 10 successors (exceeds MAX_INLINE_SUCCESSORS) + let entry = graph.add_epsilon(); + for _ in 0..10 { + let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + graph.connect(entry, node); + } + graph.add_definition("Main", entry); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let compiled = emitter.emit().expect("emit should succeed"); + + let t = compiled.transition(0); + assert!(!t.has_inline_successors()); + assert_eq!(t.successor_count, 10); + + let view = compiled.transition_view(0); + let successors = view.successors(); + assert_eq!(successors.len(), 10); + } + + #[test] + fn string_interning_deduplicates() { + let mut graph = BuildGraph::new(); + + // Two fields with same name + let n1 = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + graph.node_mut(n1).add_effect(BuildEffect::Field { + name: "value", + span: Default::default(), + }); + + let n2 = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + graph.node_mut(n2).add_effect(BuildEffect::Field { + name: "value", + span: Default::default(), + }); + graph.connect(n1, n2); + + graph.add_definition("Main", n1); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let compiled = emitter.emit().expect("emit should succeed"); + + // Both should reference the same string ID + let e1 = compiled.transition_view(0).effects(); + let e2 = compiled.transition_view(1).effects(); + + let id1 = match e1[0] { + EffectOp::Field(id) => id, + _ => panic!(), + }; + let id2 = match e2[0] { + EffectOp::Field(id) => id, + _ => panic!(), + }; + + assert_eq!(id1, id2); + assert_eq!(compiled.string(id1), "value"); + } + + #[test] + fn unknown_node_kind_errors() { + let mut graph = BuildGraph::new(); + let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("unknown_kind"))); + graph.add_definition("Main", node); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let result = emitter.emit(); + + assert!(matches!(result, Err(EmitError::UnknownNodeKind(_)))); + } + + #[test] + fn serialize_deserialize_roundtrip() { + let mut graph = BuildGraph::new(); + + // Build a small graph with effects + let n1 = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + graph.node_mut(n1).add_effect(BuildEffect::CaptureNode); + graph.node_mut(n1).add_effect(BuildEffect::Field { + name: "id", + span: Default::default(), + }); + + let n2 = graph.add_node(BuildNode::with_matcher(BuildMatcher::node( + "function_declaration", + ))); + graph.node_mut(n2).add_effect(BuildEffect::CaptureNode); + graph.connect(n1, n2); + + graph.add_definition("Main", n1); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + // Emit + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let compiled = emitter.emit().expect("emit should succeed"); + + // Serialize + let bytes = crate::ir::to_bytes(&compiled).expect("serialize should succeed"); + + // Deserialize + let restored = crate::ir::from_bytes(&bytes).expect("deserialize should succeed"); + + // Verify counts + assert_eq!(restored.transition_count(), compiled.transition_count()); + assert_eq!(restored.entrypoint_count(), compiled.entrypoint_count()); + + // Check transitions match + for i in 0..compiled.transition_count() { + let orig = compiled.transition_view(i); + let rest = restored.transition_view(i); + + assert_eq!(orig.successors(), rest.successors()); + assert_eq!(orig.effects().len(), rest.effects().len()); + } + + // Check strings match + let ep = restored.entrypoints()[0]; + assert_eq!(restored.string(ep.name_id()), "Main"); + } + + #[test] + fn dump_produces_output() { + let mut graph = BuildGraph::new(); + let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + graph.node_mut(node).add_effect(BuildEffect::CaptureNode); + graph.add_definition("Test", node); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let compiled = emitter.emit().expect("emit should succeed"); + + let dump = compiled.dump(); + + assert!(dump.contains("CompiledQuery")); + assert!(dump.contains("Test")); + assert!(dump.contains("Capture")); + assert!(dump.contains("Node(1)")); + } +} diff --git a/crates/plotnik-lib/src/ir/mod.rs b/crates/plotnik-lib/src/ir/mod.rs index df4de243..c50fce7a 100644 --- a/crates/plotnik-lib/src/ir/mod.rs +++ b/crates/plotnik-lib/src/ir/mod.rs @@ -9,14 +9,18 @@ //! Note: This module contains only type definitions. Query execution //! lives elsewhere. +mod compiled; mod effect; +mod emit; mod entrypoint; mod ids; mod matcher; mod nav; mod ref_transition; +mod serialize; mod slice; mod string_ref; +mod strings; mod transition; mod type_metadata; @@ -33,7 +37,8 @@ mod string_ref_tests; // Re-export ID types pub use ids::{ - DataFieldId, NodeFieldId, NodeTypeId, RefId, StringId, TransitionId, TypeId, VariantTagId, + DataFieldId, NodeFieldId, NodeTypeId, RefId, STRING_NONE, StringId, TransitionId, TypeId, + VariantTagId, }; // Re-export TypeId constants @@ -65,3 +70,20 @@ pub use string_ref::StringRef; // Re-export entrypoint pub use entrypoint::Entrypoint; + +// Re-export compiled query types +pub use compiled::{ + BUFFER_ALIGN, CompiledQuery, CompiledQueryBuffer, CompiledQueryOffsets, FORMAT_VERSION, MAGIC, + MatcherView, TransitionView, align_up, +}; + +// Re-export string interner +pub use strings::StringInterner; + +// Re-export emitter +pub use emit::{EmitError, EmitResult, MapResolver, NodeKindResolver, NullResolver, QueryEmitter}; + +// Re-export serialization +pub use serialize::{ + HEADER_SIZE, SerializeError, SerializeResult, deserialize, from_bytes, serialize, to_bytes, +}; diff --git a/crates/plotnik-lib/src/ir/serialize.rs b/crates/plotnik-lib/src/ir/serialize.rs new file mode 100644 index 00000000..850b3682 --- /dev/null +++ b/crates/plotnik-lib/src/ir/serialize.rs @@ -0,0 +1,414 @@ +//! Serialization and deserialization for compiled queries. +//! +//! Binary format (see ADR-0004): +//! ```text +//! Header (64 bytes): +//! magic: [u8; 4] b"PLNK" +//! version: u32 format version +//! checksum: u32 CRC32(header[12..64] || buffer_data) +//! buffer_len: u32 +//! successors_offset: u32 +//! effects_offset: u32 +//! negated_fields_offset: u32 +//! string_refs_offset: u32 +//! string_bytes_offset: u32 +//! type_defs_offset: u32 +//! type_members_offset: u32 +//! entrypoints_offset: u32 +//! trivia_kinds_offset: u32 +//! _reserved: [u8; 12] +//! ``` + +use std::io::{Read, Write}; + +use super::compiled::{CompiledQuery, CompiledQueryBuffer, FORMAT_VERSION, MAGIC}; + +/// Header size in bytes (64 bytes for cache-line alignment). +pub const HEADER_SIZE: usize = 64; + +/// Serialization error. +#[derive(Debug, Clone)] +pub enum SerializeError { + /// Invalid magic bytes. + InvalidMagic([u8; 4]), + /// Version mismatch (expected, found). + VersionMismatch { expected: u32, found: u32 }, + /// Checksum mismatch (expected, found). + ChecksumMismatch { expected: u32, found: u32 }, + /// IO error message. + Io(String), + /// Header too short. + HeaderTooShort, + /// Buffer alignment error. + AlignmentError, +} + +impl std::fmt::Display for SerializeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SerializeError::InvalidMagic(m) => { + write!(f, "invalid magic: {:?}", m) + } + SerializeError::VersionMismatch { expected, found } => { + write!( + f, + "version mismatch: expected {}, found {}", + expected, found + ) + } + SerializeError::ChecksumMismatch { expected, found } => { + write!( + f, + "checksum mismatch: expected {:08x}, found {:08x}", + expected, found + ) + } + SerializeError::Io(msg) => write!(f, "io error: {}", msg), + SerializeError::HeaderTooShort => write!(f, "header too short"), + SerializeError::AlignmentError => write!(f, "buffer alignment error"), + } + } +} + +impl std::error::Error for SerializeError {} + +impl From for SerializeError { + fn from(e: std::io::Error) -> Self { + SerializeError::Io(e.to_string()) + } +} + +/// Result type for serialization operations. +pub type SerializeResult = Result; + +/// Computes CRC32 checksum. +fn crc32(data: &[u8]) -> u32 { + // Simple CRC32 implementation (IEEE polynomial) + const CRC32_TABLE: [u32; 256] = generate_crc32_table(); + + let mut crc: u32 = 0xFFFFFFFF; + for &byte in data { + let index = ((crc ^ byte as u32) & 0xFF) as usize; + crc = CRC32_TABLE[index] ^ (crc >> 8); + } + !crc +} + +const fn generate_crc32_table() -> [u32; 256] { + const POLYNOMIAL: u32 = 0xEDB88320; + let mut table = [0u32; 256]; + let mut i = 0; + while i < 256 { + let mut crc = i as u32; + let mut j = 0; + while j < 8 { + if crc & 1 != 0 { + crc = (crc >> 1) ^ POLYNOMIAL; + } else { + crc >>= 1; + } + j += 1; + } + table[i] = crc; + i += 1; + } + table +} + +/// Serialized header structure (64 bytes, matching ADR-0004). +/// +/// Large counts (transition, successor, effect) are computed from offsets. +/// Small counts are stored in the reserved area since they can't be reliably +/// computed due to alignment padding. +#[repr(C)] +struct Header { + magic: [u8; 4], + version: u32, + checksum: u32, + buffer_len: u32, + successors_offset: u32, + effects_offset: u32, + negated_fields_offset: u32, + string_refs_offset: u32, + string_bytes_offset: u32, + type_defs_offset: u32, + type_members_offset: u32, + entrypoints_offset: u32, + trivia_kinds_offset: u32, + // Counts stored in reserved area (12 bytes = 6 x u16) + negated_field_count: u16, + string_ref_count: u16, + type_def_count: u16, + type_member_count: u16, + entrypoint_count: u16, + trivia_kind_count: u16, +} + +const _: () = assert!(std::mem::size_of::
() == HEADER_SIZE); + +impl Header { + fn to_bytes(&self) -> [u8; HEADER_SIZE] { + let mut bytes = [0u8; HEADER_SIZE]; + bytes[0..4].copy_from_slice(&self.magic); + bytes[4..8].copy_from_slice(&self.version.to_le_bytes()); + bytes[8..12].copy_from_slice(&self.checksum.to_le_bytes()); + bytes[12..16].copy_from_slice(&self.buffer_len.to_le_bytes()); + bytes[16..20].copy_from_slice(&self.successors_offset.to_le_bytes()); + bytes[20..24].copy_from_slice(&self.effects_offset.to_le_bytes()); + bytes[24..28].copy_from_slice(&self.negated_fields_offset.to_le_bytes()); + bytes[28..32].copy_from_slice(&self.string_refs_offset.to_le_bytes()); + bytes[32..36].copy_from_slice(&self.string_bytes_offset.to_le_bytes()); + bytes[36..40].copy_from_slice(&self.type_defs_offset.to_le_bytes()); + bytes[40..44].copy_from_slice(&self.type_members_offset.to_le_bytes()); + bytes[44..48].copy_from_slice(&self.entrypoints_offset.to_le_bytes()); + bytes[48..52].copy_from_slice(&self.trivia_kinds_offset.to_le_bytes()); + // Counts in reserved area + bytes[52..54].copy_from_slice(&self.negated_field_count.to_le_bytes()); + bytes[54..56].copy_from_slice(&self.string_ref_count.to_le_bytes()); + bytes[56..58].copy_from_slice(&self.type_def_count.to_le_bytes()); + bytes[58..60].copy_from_slice(&self.type_member_count.to_le_bytes()); + bytes[60..62].copy_from_slice(&self.entrypoint_count.to_le_bytes()); + bytes[62..64].copy_from_slice(&self.trivia_kind_count.to_le_bytes()); + bytes + } + + fn from_bytes(bytes: &[u8; HEADER_SIZE]) -> Self { + Self { + magic: bytes[0..4].try_into().unwrap(), + version: u32::from_le_bytes(bytes[4..8].try_into().unwrap()), + checksum: u32::from_le_bytes(bytes[8..12].try_into().unwrap()), + buffer_len: u32::from_le_bytes(bytes[12..16].try_into().unwrap()), + successors_offset: u32::from_le_bytes(bytes[16..20].try_into().unwrap()), + effects_offset: u32::from_le_bytes(bytes[20..24].try_into().unwrap()), + negated_fields_offset: u32::from_le_bytes(bytes[24..28].try_into().unwrap()), + string_refs_offset: u32::from_le_bytes(bytes[28..32].try_into().unwrap()), + string_bytes_offset: u32::from_le_bytes(bytes[32..36].try_into().unwrap()), + type_defs_offset: u32::from_le_bytes(bytes[36..40].try_into().unwrap()), + type_members_offset: u32::from_le_bytes(bytes[40..44].try_into().unwrap()), + entrypoints_offset: u32::from_le_bytes(bytes[44..48].try_into().unwrap()), + trivia_kinds_offset: u32::from_le_bytes(bytes[48..52].try_into().unwrap()), + negated_field_count: u16::from_le_bytes(bytes[52..54].try_into().unwrap()), + string_ref_count: u16::from_le_bytes(bytes[54..56].try_into().unwrap()), + type_def_count: u16::from_le_bytes(bytes[56..58].try_into().unwrap()), + type_member_count: u16::from_le_bytes(bytes[58..60].try_into().unwrap()), + entrypoint_count: u16::from_le_bytes(bytes[60..62].try_into().unwrap()), + trivia_kind_count: u16::from_le_bytes(bytes[62..64].try_into().unwrap()), + } + } +} + +/// Serializes a compiled query to a writer. +pub fn serialize(query: &CompiledQuery, mut writer: W) -> SerializeResult<()> { + let offsets = query.offsets(); + let buffer = query.buffer(); + + // Build header (without checksum first) + let mut header = Header { + magic: MAGIC, + version: FORMAT_VERSION, + checksum: 0, // Computed below + buffer_len: buffer.len() as u32, + successors_offset: offsets.successors_offset, + effects_offset: offsets.effects_offset, + negated_fields_offset: offsets.negated_fields_offset, + string_refs_offset: offsets.string_refs_offset, + string_bytes_offset: offsets.string_bytes_offset, + type_defs_offset: offsets.type_defs_offset, + type_members_offset: offsets.type_members_offset, + entrypoints_offset: offsets.entrypoints_offset, + trivia_kinds_offset: offsets.trivia_kinds_offset, + negated_field_count: query.negated_fields().len() as u16, + string_ref_count: query.string_refs().len() as u16, + type_def_count: query.type_defs().len() as u16, + type_member_count: query.type_members().len() as u16, + entrypoint_count: query.entrypoint_count(), + trivia_kind_count: query.trivia_kinds().len() as u16, + }; + + // Compute checksum over header[12..64] + buffer + let header_bytes = header.to_bytes(); + let mut checksum_data = Vec::with_capacity(52 + buffer.len()); + checksum_data.extend_from_slice(&header_bytes[12..]); + checksum_data.extend_from_slice(buffer.as_slice()); + header.checksum = crc32(&checksum_data); + + // Write header and buffer + writer.write_all(&header.to_bytes())?; + writer.write_all(buffer.as_slice())?; + + Ok(()) +} + +/// Serializes a compiled query to a byte vector. +pub fn to_bytes(query: &CompiledQuery) -> SerializeResult> { + let mut bytes = Vec::with_capacity(HEADER_SIZE + query.buffer().len()); + serialize(query, &mut bytes)?; + Ok(bytes) +} + +/// Deserializes a compiled query from a reader. +pub fn deserialize(mut reader: R) -> SerializeResult { + // Read header + let mut header_bytes = [0u8; HEADER_SIZE]; + reader.read_exact(&mut header_bytes)?; + + let header = Header::from_bytes(&header_bytes); + + // Verify magic + if header.magic != MAGIC { + return Err(SerializeError::InvalidMagic(header.magic)); + } + + // Verify version + if header.version != FORMAT_VERSION { + return Err(SerializeError::VersionMismatch { + expected: FORMAT_VERSION, + found: header.version, + }); + } + + // Read buffer + let buffer_len = header.buffer_len as usize; + let mut buffer = CompiledQueryBuffer::allocate(buffer_len); + reader.read_exact(buffer.as_mut_slice())?; + + // Verify checksum + let mut checksum_data = Vec::with_capacity(52 + buffer_len); + checksum_data.extend_from_slice(&header_bytes[12..]); + checksum_data.extend_from_slice(buffer.as_slice()); + let computed_checksum = crc32(&checksum_data); + + if header.checksum != computed_checksum { + return Err(SerializeError::ChecksumMismatch { + expected: header.checksum, + found: computed_checksum, + }); + } + + // Reconstruct all counts from offsets (transitions are 64 bytes each) + let transition_count = header.successors_offset / 64; + let successor_count = compute_count_from_offsets( + header.successors_offset, + header.effects_offset, + 4, // size of TransitionId + ); + let effect_count = compute_count_from_offsets( + header.effects_offset, + header.negated_fields_offset, + 4, // size of EffectOp + ); + + // Counts are read directly from header + let negated_field_count = header.negated_field_count; + let string_ref_count = header.string_ref_count; + let type_def_count = header.type_def_count; + let type_member_count = header.type_member_count; + let entrypoint_count = header.entrypoint_count; + let trivia_kind_count = header.trivia_kind_count; + + Ok(CompiledQuery::new( + buffer, + header.successors_offset, + header.effects_offset, + header.negated_fields_offset, + header.string_refs_offset, + header.string_bytes_offset, + header.type_defs_offset, + header.type_members_offset, + header.entrypoints_offset, + header.trivia_kinds_offset, + transition_count, + successor_count, + effect_count, + negated_field_count, + string_ref_count, + type_def_count, + type_member_count, + entrypoint_count, + trivia_kind_count, + )) +} + +/// Deserializes a compiled query from a byte slice. +pub fn from_bytes(bytes: &[u8]) -> SerializeResult { + deserialize(std::io::Cursor::new(bytes)) +} + +fn compute_count_from_offsets(start: u32, end: u32, element_size: u32) -> u32 { + if end <= start { + return 0; + } + (end - start) / element_size +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn crc32_known_value() { + // Test against known CRC32 value + let data = b"123456789"; + let crc = crc32(data); + assert_eq!(crc, 0xCBF43926); + } + + #[test] + fn header_roundtrip() { + let header = Header { + magic: MAGIC, + version: FORMAT_VERSION, + checksum: 0x12345678, + buffer_len: 1024, + successors_offset: 64, + effects_offset: 128, + negated_fields_offset: 256, + string_refs_offset: 300, + string_bytes_offset: 400, + type_defs_offset: 500, + type_members_offset: 600, + entrypoints_offset: 700, + trivia_kinds_offset: 800, + negated_field_count: 5, + string_ref_count: 8, + type_def_count: 3, + type_member_count: 12, + entrypoint_count: 2, + trivia_kind_count: 1, + }; + + let bytes = header.to_bytes(); + let parsed = Header::from_bytes(&bytes); + + assert_eq!(parsed.magic, header.magic); + assert_eq!(parsed.version, header.version); + assert_eq!(parsed.checksum, header.checksum); + assert_eq!(parsed.buffer_len, header.buffer_len); + assert_eq!(parsed.successors_offset, header.successors_offset); + assert_eq!(parsed.trivia_kinds_offset, header.trivia_kinds_offset); + assert_eq!(parsed.entrypoint_count, header.entrypoint_count); + assert_eq!(parsed.type_def_count, header.type_def_count); + } + + #[test] + fn invalid_magic_rejected() { + let mut data = vec![0u8; HEADER_SIZE + 64]; + data[0..4].copy_from_slice(b"NOTM"); + + let result = from_bytes(&data); + assert!(matches!(result, Err(SerializeError::InvalidMagic(_)))); + } + + #[test] + fn version_mismatch_rejected() { + let mut data = vec![0u8; HEADER_SIZE + 64]; + data[0..4].copy_from_slice(&MAGIC); + data[4..8].copy_from_slice(&999u32.to_le_bytes()); + + let result = from_bytes(&data); + assert!(matches!( + result, + Err(SerializeError::VersionMismatch { .. }) + )); + } +} diff --git a/crates/plotnik-lib/src/ir/strings.rs b/crates/plotnik-lib/src/ir/strings.rs new file mode 100644 index 00000000..ae09801a --- /dev/null +++ b/crates/plotnik-lib/src/ir/strings.rs @@ -0,0 +1,140 @@ +//! String interning for compiled queries. +//! +//! Identical strings share storage and ID. Used for field names, variant tags, +//! entrypoint names, and type names. + +use std::collections::HashMap; + +use super::ids::StringId; + +/// String interner for query compilation. +/// +/// Interns strings during the analysis phase, then emits them as a contiguous +/// byte pool with `StringRef` entries pointing into it. +#[derive(Debug, Default)] +pub struct StringInterner<'src> { + /// Map from string content to assigned ID. + map: HashMap<&'src str, StringId>, + /// Strings in ID order for emission. + strings: Vec<&'src str>, +} + +impl<'src> StringInterner<'src> { + /// Creates a new empty interner. + pub fn new() -> Self { + Self::default() + } + + /// Interns a string, returning its ID. + /// + /// If the string was previously interned, returns the existing ID. + pub fn intern(&mut self, s: &'src str) -> StringId { + if let Some(&id) = self.map.get(s) { + return id; + } + + let id = self.strings.len() as StringId; + assert!(id < 0xFFFF, "string pool overflow (>65534 strings)"); + + self.map.insert(s, id); + self.strings.push(s); + id + } + + /// Returns the ID of a previously interned string, or `None`. + pub fn get(&self, s: &str) -> Option { + self.map.get(s).copied() + } + + /// Returns the string for a given ID. + /// + /// # Panics + /// Panics if the ID is out of range. + pub fn resolve(&self, id: StringId) -> &'src str { + self.strings[id as usize] + } + + /// Returns the number of interned strings. + pub fn len(&self) -> usize { + self.strings.len() + } + + /// Returns true if no strings have been interned. + pub fn is_empty(&self) -> bool { + self.strings.is_empty() + } + + /// Returns an iterator over (id, string) pairs in ID order. + pub fn iter(&self) -> impl Iterator + '_ { + self.strings + .iter() + .enumerate() + .map(|(i, s)| (i as StringId, *s)) + } + + /// Returns the total byte size needed for all strings. + pub fn total_bytes(&self) -> usize { + self.strings.iter().map(|s| s.len()).sum() + } + + /// Consumes the interner and returns strings in ID order. + pub fn into_strings(self) -> Vec<&'src str> { + self.strings + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn intern_deduplicates() { + let mut interner = StringInterner::new(); + + let id1 = interner.intern("foo"); + let id2 = interner.intern("bar"); + let id3 = interner.intern("foo"); + + assert_eq!(id1, 0); + assert_eq!(id2, 1); + assert_eq!(id3, 0); // same as id1 + assert_eq!(interner.len(), 2); + } + + #[test] + fn resolve_works() { + let mut interner = StringInterner::new(); + interner.intern("hello"); + interner.intern("world"); + + assert_eq!(interner.resolve(0), "hello"); + assert_eq!(interner.resolve(1), "world"); + } + + #[test] + fn get_returns_none_for_unknown() { + let interner = StringInterner::new(); + assert_eq!(interner.get("unknown"), None); + } + + #[test] + fn total_bytes() { + let mut interner = StringInterner::new(); + interner.intern("foo"); // 3 bytes + interner.intern("hello"); // 5 bytes + interner.intern("foo"); // deduplicated + + assert_eq!(interner.total_bytes(), 8); + } + + #[test] + fn iter_order() { + let mut interner = StringInterner::new(); + interner.intern("a"); + interner.intern("b"); + interner.intern("c"); + + let pairs: Vec<_> = interner.iter().collect(); + assert_eq!(pairs, vec![(0, "a"), (1, "b"), (2, "c")]); + } +} diff --git a/crates/plotnik-lib/src/ir/transition.rs b/crates/plotnik-lib/src/ir/transition.rs index 34376da2..0d47c500 100644 --- a/crates/plotnik-lib/src/ir/transition.rs +++ b/crates/plotnik-lib/src/ir/transition.rs @@ -56,6 +56,27 @@ pub struct Transition { } impl Transition { + /// Creates a new transition with all fields. + #[inline] + pub fn new( + matcher: Matcher, + ref_marker: RefTransition, + nav: Nav, + effects: Slice, + successor_count: u32, + successor_data: [u32; MAX_INLINE_SUCCESSORS], + ) -> Self { + Self { + matcher, + ref_marker, + nav, + effects_len: effects.len(), + successor_count, + effects_start: effects.start_index(), + successor_data, + } + } + /// Returns the effects slice. #[inline] pub fn effects(&self) -> Slice { From 8862893adc9ddd61ad0be2979aa0881e77e1f819 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sun, 14 Dec 2025 08:55:39 -0300 Subject: [PATCH 23/23] Fix clippy --- crates/plotnik-lib/src/ir/compiled.rs | 2 +- crates/plotnik-lib/src/query/graph_build.rs | 28 +++++++++---------- .../plotnik-lib/src/query/graph_optimize.rs | 6 ++-- crates/plotnik-lib/src/query/graph_qis.rs | 8 +++--- crates/plotnik-lib/src/query/typing.rs | 20 ++++++------- crates/plotnik-lib/src/query/typing_tests.rs | 4 +-- 6 files changed, 33 insertions(+), 35 deletions(-) diff --git a/crates/plotnik-lib/src/ir/compiled.rs b/crates/plotnik-lib/src/ir/compiled.rs index 7933454c..4463898b 100644 --- a/crates/plotnik-lib/src/ir/compiled.rs +++ b/crates/plotnik-lib/src/ir/compiled.rs @@ -66,7 +66,7 @@ impl CompiledQueryBuffer { /// - The backing memory must outlive the returned buffer pub unsafe fn from_external(ptr: *mut u8, len: usize) -> Self { debug_assert!( - ptr as usize % BUFFER_ALIGN == 0, + (ptr as usize).is_multiple_of(BUFFER_ALIGN), "buffer must be 64-byte aligned" ); Self { diff --git a/crates/plotnik-lib/src/query/graph_build.rs b/crates/plotnik-lib/src/query/graph_build.rs index f524b446..8def0eef 100644 --- a/crates/plotnik-lib/src/query/graph_build.rs +++ b/crates/plotnik-lib/src/query/graph_build.rs @@ -109,11 +109,11 @@ impl<'a> Query<'a> { let mut links: Vec<(NodeId, &'a str, Option)> = Vec::new(); for (id, node) in self.graph.iter() { - if let RefMarker::Enter { .. } = &node.ref_marker { - if let Some(name) = node.ref_name { - let exit_node = self.find_exit_for_enter(id); - links.push((id, name, exit_node)); - } + if let RefMarker::Enter { .. } = &node.ref_marker + && let Some(name) = node.ref_name + { + let exit_node = self.find_exit_for_enter(id); + links.push((id, name, exit_node)); } } @@ -135,10 +135,10 @@ impl<'a> Query<'a> { }; for (id, node) in self.graph.iter() { - if let RefMarker::Exit { ref_id: exit_id } = &node.ref_marker { - if *exit_id == ref_id { - return Some(id); - } + if let RefMarker::Exit { ref_id: exit_id } = &node.ref_marker + && *exit_id == ref_id + { + return Some(id); } } None @@ -570,11 +570,11 @@ impl<'a> Query<'a> { // Capture should happen at Exit (after reference executes, cursor at matched node). if let RefMarker::Enter { ref_id } = node.ref_marker { for (id, n) in self.graph.iter() { - if let RefMarker::Exit { ref_id: exit_id } = n.ref_marker { - if exit_id == ref_id { - result.push(id); - return; - } + if let RefMarker::Exit { ref_id: exit_id } = n.ref_marker + && exit_id == ref_id + { + result.push(id); + return; } } return; diff --git a/crates/plotnik-lib/src/query/graph_optimize.rs b/crates/plotnik-lib/src/query/graph_optimize.rs index 7faa0f0d..952df895 100644 --- a/crates/plotnik-lib/src/query/graph_optimize.rs +++ b/crates/plotnik-lib/src/query/graph_optimize.rs @@ -130,10 +130,8 @@ fn is_eliminable_epsilon( let successor_id = node.successors[0]; let successor = graph.node(successor_id); - if !node.nav.is_stay() && !successor.nav.is_stay() { - if !can_merge_up(node.nav, successor.nav) { - return false; - } + if !node.nav.is_stay() && !successor.nav.is_stay() && !can_merge_up(node.nav, successor.nav) { + return false; } if !node.effects.is_empty() && successor.ref_marker.is_some() { diff --git a/crates/plotnik-lib/src/query/graph_qis.rs b/crates/plotnik-lib/src/query/graph_qis.rs index 334d42c4..a3be746e 100644 --- a/crates/plotnik-lib/src/query/graph_qis.rs +++ b/crates/plotnik-lib/src/query/graph_qis.rs @@ -63,10 +63,10 @@ impl<'a> Query<'a> { } // Captured sequence/alternation absorbs inner captures. // Need to look through quantifiers to find the actual container. - if let Some(inner) = c.inner() { - if !Self::is_scope_container(&inner) { - self.collect_propagating_captures_impl(&inner, out); - } + if let Some(inner) = c.inner() + && !Self::is_scope_container(&inner) + { + self.collect_propagating_captures_impl(&inner, out); } } ast::Expr::QuantifiedExpr(q) => { diff --git a/crates/plotnik-lib/src/query/typing.rs b/crates/plotnik-lib/src/query/typing.rs index bfb796c8..d6faf758 100644 --- a/crates/plotnik-lib/src/query/typing.rs +++ b/crates/plotnik-lib/src/query/typing.rs @@ -348,10 +348,10 @@ impl<'src> InferenceContext<'src> { let mut merge_errors = Vec::new(); // Special case: tagged alternation at definition root creates enum - if let Expr::AltExpr(alt) = body { - if alt.kind() == AltKind::Tagged { - return self.infer_tagged_alternation_as_enum(def_name, alt, &mut merge_errors); - } + if let Expr::AltExpr(alt) = body + && alt.kind() == AltKind::Tagged + { + return self.infer_tagged_alternation_as_enum(def_name, alt, &mut merge_errors); } // General case: infer expression and collect captures into scope @@ -631,10 +631,10 @@ impl<'src> InferenceContext<'src> { // Return the type (for use when captured) but mark as not meaningful // so uncaptured refs don't affect definition's result type. let ref_name = r.name().map(|t| t.text().to_string()); - if let Some(name) = ref_name { - if let Some(&type_id) = self.definition_types.get(name.as_str()) { - return ExprResult::opaque(type_id); - } + if let Some(name) = ref_name + && let Some(&type_id) = self.definition_types.get(name.as_str()) + { + return ExprResult::opaque(type_id); } ExprResult::node() } @@ -808,7 +808,7 @@ impl<'a> Query<'a> { // Process definitions in dependency order for (name, body) in &sorted { - let type_id = ctx.infer_definition(*name, body); + let type_id = ctx.infer_definition(name, body); ctx.definition_types.insert(name, type_id); } @@ -908,7 +908,7 @@ impl<'a> Query<'a> { Expr::Ref(r) => { if let Some(name_token) = r.name() { let name = name_token.text(); - if def_names.contains(name) && !refs.iter().any(|&r| r == name) { + if def_names.contains(name) && !refs.contains(&name) { // Find the actual &'b str from the set if let Some(&found) = def_names.iter().find(|&&n| n == name) { refs.push(found); diff --git a/crates/plotnik-lib/src/query/typing_tests.rs b/crates/plotnik-lib/src/query/typing_tests.rs index 2cb0179e..efd5735e 100644 --- a/crates/plotnik-lib/src/query/typing_tests.rs +++ b/crates/plotnik-lib/src/query/typing_tests.rs @@ -18,7 +18,7 @@ fn infer_with_graph(source: &str) -> String { let mut out = String::new(); out.push_str("=== Graph ===\n"); out.push_str(&query.graph().dump_live(query.dead_nodes())); - out.push_str("\n"); + out.push('\n'); out.push_str(&query.type_info().dump()); out } @@ -34,7 +34,7 @@ fn debug_star_quantifier_graph() { out.push_str(&pre_opt_dump); out.push_str("\n=== Graph (after optimization) ===\n"); out.push_str(&query.graph().dump_live(query.dead_nodes())); - out.push_str("\n"); + out.push('\n'); out.push_str(&query.type_info().dump()); insta::assert_snapshot!(out, @r" === Graph (before optimization - what type inference sees) ===