diff --git a/AGENTS.md b/AGENTS.md index 587080e8..cc5b3248 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -22,6 +22,7 @@ - [ADR-0006: Dynamic Query Execution](docs/adr/ADR-0006-dynamic-query-execution.md) - [ADR-0007: Type Metadata Format](docs/adr/ADR-0007-type-metadata-format.md) - [ADR-0008: Tree Navigation](docs/adr/ADR-0008-tree-navigation.md) + - [ADR-0009: Type System](docs/adr/ADR-0009-type-system.md) - **Template**: ```markdown diff --git a/crates/plotnik-cli/src/cli.rs b/crates/plotnik-cli/src/cli.rs index 395fad1f..a83fb67a 100644 --- a/crates/plotnik-cli/src/cli.rs +++ b/crates/plotnik-cli/src/cli.rs @@ -111,4 +111,16 @@ pub struct OutputArgs { /// Show inferred cardinalities #[arg(long)] pub cardinalities: bool, + + /// Show compiled graph + #[arg(long)] + pub graph: bool, + + /// Show unoptimized graph (before epsilon elimination) + #[arg(long)] + pub graph_raw: bool, + + /// Show inferred types + #[arg(long)] + pub types: bool, } diff --git a/crates/plotnik-cli/src/commands/debug/mod.rs b/crates/plotnik-cli/src/commands/debug/mod.rs index 22a5d0e3..0d07fbdb 100644 --- a/crates/plotnik-cli/src/commands/debug/mod.rs +++ b/crates/plotnik-cli/src/commands/debug/mod.rs @@ -18,6 +18,9 @@ pub struct DebugArgs { pub cst: bool, pub spans: bool, pub cardinalities: bool, + pub graph: bool, + pub graph_raw: bool, + pub types: bool, pub color: bool, } @@ -51,7 +54,7 @@ pub fn run(args: DebugArgs) { q.link(&lang); } - let show_query = has_query_input && !args.symbols; + let show_query = has_query_input && !args.symbols && !args.graph && !args.types; let show_source = has_source_input; let show_headers = (show_query || args.symbols) && show_source; @@ -85,6 +88,26 @@ pub fn run(args: DebugArgs) { ); } + // Build graph if needed for --graph, --graph-raw, or --types + if (args.graph || args.graph_raw || args.types) + && let Some(q) = query.take() + { + let (q, pre_opt_dump) = q.build_graph_with_pre_opt_dump(); + if args.graph_raw { + println!("=== GRAPH (raw) ==="); + print!("{}", pre_opt_dump); + } + if args.graph { + println!("=== GRAPH ==="); + print!("{}", q.graph().dump_live(q.dead_nodes())); + } + if args.types { + println!("=== TYPES ==="); + print!("{}", q.type_info().dump()); + } + return; + } + if show_source { let resolved_lang = resolve_lang(&args.lang, &args.source_text, &args.source_file); let source_code = load_source(&args.source_text, &args.source_file); diff --git a/crates/plotnik-cli/src/main.rs b/crates/plotnik-cli/src/main.rs index b67e3465..e1579a29 100644 --- a/crates/plotnik-cli/src/main.rs +++ b/crates/plotnik-cli/src/main.rs @@ -25,6 +25,9 @@ fn main() { cst: output.cst, spans: output.spans, cardinalities: output.cardinalities, + graph: output.graph, + graph_raw: output.graph_raw, + types: output.types, color: output.color.should_colorize(), }); } diff --git a/crates/plotnik-lib/src/diagnostics/message.rs b/crates/plotnik-lib/src/diagnostics/message.rs index 027c5468..f29853fa 100644 --- a/crates/plotnik-lib/src/diagnostics/message.rs +++ b/crates/plotnik-lib/src/diagnostics/message.rs @@ -61,6 +61,11 @@ pub enum DiagnosticKind { DirectRecursion, FieldSequenceValue, + // Type inference errors + IncompatibleTypes, + MultiCaptureQuantifierNoName, + UnusedBranchLabels, + // Link pass - grammar validation UnknownNodeType, UnknownField, @@ -75,7 +80,10 @@ pub enum DiagnosticKind { impl DiagnosticKind { /// Default severity for this kind. Can be overridden by policy. pub fn default_severity(&self) -> Severity { - Severity::Error + match self { + Self::UnusedBranchLabels => Severity::Warning, + _ => Severity::Error, + } } /// Whether this kind suppresses `other` when spans overlap. @@ -166,6 +174,13 @@ impl DiagnosticKind { Self::DirectRecursion => "infinite recursion: cycle consumes no input", Self::FieldSequenceValue => "field must match exactly one node", + // Type inference + Self::IncompatibleTypes => "incompatible types in alternation branches", + Self::MultiCaptureQuantifierNoName => { + "quantified expression with multiple captures requires `@name`" + } + Self::UnusedBranchLabels => "branch labels have no effect without capture", + // Link pass - grammar validation Self::UnknownNodeType => "unknown node type", Self::UnknownField => "unknown field", @@ -192,6 +207,7 @@ impl DiagnosticKind { // Semantic errors with name context Self::DuplicateDefinition => "`{}` is already defined".to_string(), Self::UndefinedReference => "`{}` is not defined".to_string(), + Self::IncompatibleTypes => "incompatible types: {}".to_string(), // Link pass errors with context Self::UnknownNodeType => "`{}` is not a valid node type".to_string(), diff --git a/crates/plotnik-lib/src/infer/emit/mod.rs b/crates/plotnik-lib/src/infer/emit/mod.rs deleted file mode 100644 index 2131fffe..00000000 --- a/crates/plotnik-lib/src/infer/emit/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -//! Code emitters for inferred types. -//! -//! This module provides language-specific code generation from a `TypeTable`. - -pub mod rust; -pub mod typescript; - -#[cfg(test)] -mod rust_tests; -#[cfg(test)] -mod typescript_tests; - -pub use rust::{Indirection, RustEmitConfig, emit_rust}; -pub use typescript::{OptionalStyle, TypeScriptEmitConfig, emit_typescript}; diff --git a/crates/plotnik-lib/src/infer/emit/rust.rs b/crates/plotnik-lib/src/infer/emit/rust.rs deleted file mode 100644 index b3680273..00000000 --- a/crates/plotnik-lib/src/infer/emit/rust.rs +++ /dev/null @@ -1,247 +0,0 @@ -//! Rust code emitter for inferred types. -//! -//! Emits Rust struct and enum definitions from a `TypeTable`. - -use indexmap::IndexMap; - -use super::super::types::{TypeKey, TypeTable, TypeValue}; - -/// Configuration for Rust emission. -#[derive(Debug, Clone)] -pub struct RustEmitConfig { - /// Indirection type for cyclic references. - pub indirection: Indirection, - /// Whether to derive common traits. - pub derive_debug: bool, - pub derive_clone: bool, - pub derive_partial_eq: bool, - /// Name for the default (unnamed) query entry point type. - pub default_query_name: String, -} - -/// How to handle cyclic type references. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Indirection { - Box, - Rc, - Arc, -} - -impl Default for RustEmitConfig { - fn default() -> Self { - Self { - indirection: Indirection::Box, - derive_debug: true, - derive_clone: true, - derive_partial_eq: false, - default_query_name: "QueryResult".to_string(), - } - } -} - -/// Emit Rust code from a type table. -pub fn emit_rust(table: &TypeTable<'_>, config: &RustEmitConfig) -> String { - let mut output = String::new(); - let sorted = topological_sort(table); - - for key in sorted { - let Some(value) = table.get(&key) else { - continue; - }; - - // Skip built-in types - if matches!(key, TypeKey::Node | TypeKey::String | TypeKey::Unit) { - continue; - } - - let type_def = emit_type_def(&key, value, table, config); - if !type_def.is_empty() { - output.push_str(&type_def); - output.push_str("\n\n"); - } - } - - output.trim_end().to_string() -} - -fn emit_type_def( - key: &TypeKey<'_>, - value: &TypeValue<'_>, - table: &TypeTable<'_>, - config: &RustEmitConfig, -) -> String { - let name = match key { - TypeKey::DefaultQuery => config.default_query_name.clone(), - _ => key.to_pascal_case(), - }; - - match value { - TypeValue::Node | TypeValue::String | TypeValue::Unit | TypeValue::Invalid => String::new(), - - TypeValue::Struct(fields) => { - let mut out = emit_derives(config); - if fields.is_empty() { - out.push_str(&format!("pub struct {};", name)); - } else { - out.push_str(&format!("pub struct {} {{\n", name)); - for (field_name, field_type) in fields { - let type_str = emit_type_ref(field_type, table, config); - out.push_str(&format!(" pub {}: {},\n", field_name, type_str)); - } - out.push('}'); - } - out - } - - TypeValue::TaggedUnion(variants) => { - let mut out = emit_derives(config); - out.push_str(&format!("pub enum {} {{\n", name)); - for (variant_name, variant_key) in variants { - let fields = match table.get(variant_key) { - Some(TypeValue::Struct(f)) => Some(f), - Some(TypeValue::Unit) | None => None, - _ => None, - }; - match fields { - Some(f) if !f.is_empty() => { - out.push_str(&format!(" {} {{\n", variant_name)); - for (field_name, field_type) in f { - let type_str = emit_type_ref(field_type, table, config); - out.push_str(&format!(" {}: {},\n", field_name, type_str)); - } - out.push_str(" },\n"); - } - _ => { - out.push_str(&format!(" {},\n", variant_name)); - } - } - } - out.push('}'); - out - } - - TypeValue::Optional(_) | TypeValue::List(_) | TypeValue::NonEmptyList(_) => { - // Wrapper types become type aliases - let mut out = String::new(); - let inner_type = emit_type_ref(key, table, config); - out.push_str(&format!("pub type {} = {};", name, inner_type)); - out - } - } -} - -pub(crate) fn emit_type_ref( - key: &TypeKey<'_>, - table: &TypeTable<'_>, - config: &RustEmitConfig, -) -> String { - let is_cyclic = table.is_cyclic(key); - - let base = match table.get(key) { - Some(TypeValue::Node) => "Node".to_string(), - Some(TypeValue::String) => "String".to_string(), - Some(TypeValue::Unit) | Some(TypeValue::Invalid) => "()".to_string(), - Some(TypeValue::Optional(inner)) => { - let inner_str = emit_type_ref(inner, table, config); - format!("Option<{}>", inner_str) - } - Some(TypeValue::List(inner)) => { - let inner_str = emit_type_ref(inner, table, config); - format!("Vec<{}>", inner_str) - } - Some(TypeValue::NonEmptyList(inner)) => { - let inner_str = emit_type_ref(inner, table, config); - format!("Vec<{}>", inner_str) - } - // Struct, TaggedUnion, or undefined forward reference - use pascal-cased name - Some(TypeValue::Struct(_)) | Some(TypeValue::TaggedUnion(_)) | None => match key { - TypeKey::DefaultQuery => config.default_query_name.clone(), - _ => key.to_pascal_case(), - }, - }; - - if is_cyclic { - wrap_indirection(&base, config.indirection) - } else { - base - } -} - -pub(crate) fn wrap_indirection(type_str: &str, indirection: Indirection) -> String { - match indirection { - Indirection::Box => format!("Box<{}>", type_str), - Indirection::Rc => format!("Rc<{}>", type_str), - Indirection::Arc => format!("Arc<{}>", type_str), - } -} - -pub(crate) fn emit_derives(config: &RustEmitConfig) -> String { - let mut derives = Vec::new(); - if config.derive_debug { - derives.push("Debug"); - } - if config.derive_clone { - derives.push("Clone"); - } - if config.derive_partial_eq { - derives.push("PartialEq"); - } - - if derives.is_empty() { - String::new() - } else { - format!("#[derive({})]\n", derives.join(", ")) - } -} - -/// Topologically sort types so dependencies come before dependents. -pub(crate) fn topological_sort<'src>(table: &TypeTable<'src>) -> Vec> { - let mut result = Vec::new(); - let mut visited = IndexMap::new(); - - for key in table.types.keys() { - visit(key, table, &mut visited, &mut result); - } - - result -} - -fn visit<'src>( - key: &TypeKey<'src>, - table: &TypeTable<'src>, - visited: &mut IndexMap, bool>, - result: &mut Vec>, -) { - if visited.contains_key(key) { - return; - } - - visited.insert(key.clone(), true); - - let Some(value) = table.get(key) else { - visited.insert(key.clone(), false); - result.push(key.clone()); - return; - }; - - for dep in dependencies(value) { - visit(&dep, table, visited, result); - } - - visited.insert(key.clone(), false); - result.push(key.clone()); -} - -pub(crate) fn dependencies<'src>(value: &TypeValue<'src>) -> Vec> { - match value { - TypeValue::Node | TypeValue::String | TypeValue::Unit | TypeValue::Invalid => vec![], - - TypeValue::Struct(fields) => fields.values().cloned().collect(), - - TypeValue::TaggedUnion(variants) => variants.values().cloned().collect(), - - TypeValue::Optional(inner) | TypeValue::List(inner) | TypeValue::NonEmptyList(inner) => { - vec![inner.clone()] - } - } -} diff --git a/crates/plotnik-lib/src/infer/emit/rust_tests.rs b/crates/plotnik-lib/src/infer/emit/rust_tests.rs deleted file mode 100644 index 932d64a1..00000000 --- a/crates/plotnik-lib/src/infer/emit/rust_tests.rs +++ /dev/null @@ -1,592 +0,0 @@ -use super::rust::{Indirection, RustEmitConfig, emit_rust}; -use crate::infer::tyton::parse; -use indoc::indoc; - -fn emit(input: &str) -> String { - let table = parse(input).expect("tyton parse failed"); - emit_rust(&table, &RustEmitConfig::default()) -} - -fn emit_with_config(input: &str, config: &RustEmitConfig) -> String { - let table = parse(input).expect("tyton parse failed"); - emit_rust(&table, config) -} - -fn emit_cyclic(input: &str, cyclic_types: &[&str]) -> String { - let mut table = parse(input).expect("tyton parse failed"); - for name in cyclic_types { - table.mark_cyclic(crate::infer::TypeKey::Named(name)); - } - emit_rust(&table, &RustEmitConfig::default()) -} - -// --- Simple Structs --- - -#[test] -fn emit_struct_single_field() { - let input = "Foo = { #Node @value }"; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Foo { - pub value: Node, - } - "); -} - -#[test] -fn emit_struct_multiple_fields() { - let input = "Func = { #string @name #Node @body #Node @params }"; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Func { - pub name: String, - pub body: Node, - pub params: Node, - } - "); -} - -#[test] -fn emit_struct_empty() { - let input = "Empty = {}"; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Empty; - "); -} - -#[test] -fn emit_struct_with_unit_field() { - let input = "Wrapper = { () @marker }"; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Wrapper { - pub marker: (), - } - "); -} - -#[test] -fn emit_struct_nested_refs() { - let input = indoc! {r#" - Inner = { #Node @value } - Outer = { Inner @inner #string @label } - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Inner { - pub value: Node, - } - - #[derive(Debug, Clone)] - pub struct Outer { - pub inner: Inner, - pub label: String, - } - "); -} - -// --- Tagged Unions --- - -#[test] -fn emit_tagged_union_simple() { - let input = indoc! {r#" - AssignStmt = { #Node @target #Node @value } - CallStmt = { #Node @func } - Stmt = [ Assign: AssignStmt Call: CallStmt ] - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct AssignStmt { - pub target: Node, - pub value: Node, - } - - #[derive(Debug, Clone)] - pub struct CallStmt { - pub func: Node, - } - - #[derive(Debug, Clone)] - pub enum Stmt { - Assign { - target: Node, - value: Node, - }, - Call { - func: Node, - }, - } - "); -} - -#[test] -fn emit_tagged_union_with_empty_variant() { - let input = indoc! {r#" - ValueVariant = { #Node @value } - Expr = [ Some: ValueVariant None: () ] - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct ValueVariant { - pub value: Node, - } - - #[derive(Debug, Clone)] - pub enum Expr { - Some { - value: Node, - }, - None, - } - "); -} - -#[test] -fn emit_tagged_union_all_empty() { - let input = "Token = [ Comma: () Dot: () Semi: () ]"; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub enum Token { - Comma, - Dot, - Semi, - } - "); -} - -#[test] -fn emit_tagged_union_with_builtins() { - let input = "Value = [ Text: #string Code: #Node Empty: () ]"; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub enum Value { - Text, - Code, - Empty, - } - "); -} - -// --- Wrapper Types --- - -#[test] -fn emit_optional() { - let input = "MaybeNode = #Node?"; - insta::assert_snapshot!(emit(input), @"pub type MaybeNode = Option;"); -} - -#[test] -fn emit_list() { - let input = "Nodes = #Node*"; - insta::assert_snapshot!(emit(input), @"pub type Nodes = Vec;"); -} - -#[test] -fn emit_non_empty_list() { - let input = "Nodes = #Node+"; - insta::assert_snapshot!(emit(input), @"pub type Nodes = Vec;"); -} - -#[test] -fn emit_optional_named() { - let input = indoc! {r#" - Stmt = { #Node @value } - MaybeStmt = Stmt? - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Stmt { - pub value: Node, - } - - pub type MaybeStmt = Option; - "); -} - -#[test] -fn emit_list_named() { - let input = indoc! {r#" - Stmt = { #Node @value } - Stmts = Stmt* - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Stmt { - pub value: Node, - } - - pub type Stmts = Vec; - "); -} - -#[test] -fn emit_nested_wrappers() { - let input = indoc! {r#" - Item = { #Node @value } - Items = Item* - MaybeItems = Items? - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Item { - pub value: Node, - } - - pub type Items = Vec; - - pub type MaybeItems = Option>; - "); -} - -// --- Cyclic Types --- - -#[test] -fn emit_cyclic_box() { - let input = indoc! {r#" - TreeNode = { #Node @value TreeNode @left TreeNode @right } - "#}; - insta::assert_snapshot!(emit_cyclic(input, &["TreeNode"]), @r" - #[derive(Debug, Clone)] - pub struct TreeNode { - pub value: Node, - pub left: Box, - pub right: Box, - } - "); -} - -#[test] -fn emit_cyclic_rc() { - let input = "TreeNode = { #Node @value TreeNode @child }"; - let config = RustEmitConfig { - indirection: Indirection::Rc, - ..Default::default() - }; - let mut table = parse(input).expect("tyton parse failed"); - table.mark_cyclic(crate::infer::TypeKey::Named("TreeNode")); - insta::assert_snapshot!(emit_rust(&table, &config), @r" - #[derive(Debug, Clone)] - pub struct TreeNode { - pub value: Node, - pub child: Rc, - } - "); -} - -#[test] -fn emit_cyclic_arc() { - let input = "TreeNode = { #Node @value TreeNode @child }"; - let config = RustEmitConfig { - indirection: Indirection::Arc, - ..Default::default() - }; - let mut table = parse(input).expect("tyton parse failed"); - table.mark_cyclic(crate::infer::TypeKey::Named("TreeNode")); - insta::assert_snapshot!(emit_rust(&table, &config), @r" - #[derive(Debug, Clone)] - pub struct TreeNode { - pub value: Node, - pub child: Arc, - } - "); -} - -// --- Config Variations --- - -#[test] -fn emit_no_derives() { - let input = "Foo = { #Node @value }"; - let config = RustEmitConfig { - derive_debug: false, - derive_clone: false, - derive_partial_eq: false, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - pub struct Foo { - pub value: Node, - } - "); -} - -#[test] -fn emit_debug_only() { - let input = "Foo = { #Node @value }"; - let config = RustEmitConfig { - derive_debug: true, - derive_clone: false, - derive_partial_eq: false, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - #[derive(Debug)] - pub struct Foo { - pub value: Node, - } - "); -} - -#[test] -fn emit_all_derives() { - let input = "Foo = { #Node @value }"; - let config = RustEmitConfig { - derive_debug: true, - derive_clone: true, - derive_partial_eq: true, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - #[derive(Debug, Clone, PartialEq)] - pub struct Foo { - pub value: Node, - } - "); -} - -// --- Complex Scenarios --- - -#[test] -fn emit_complex_program() { - let input = indoc! {r#" - FuncInfo = { #string @name #Node @body } - Param = { #string @name #string @type_annotation } - Params = Param* - FuncDecl = { FuncInfo @info Params @params } - ExprStmt = { #Node @expr } - Stmt = [ Func: FuncDecl Expr: ExprStmt ] - Program = { Stmt @statements } - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct FuncInfo { - pub name: String, - pub body: Node, - } - - #[derive(Debug, Clone)] - pub struct Param { - pub name: String, - pub type_annotation: String, - } - - pub type Params = Vec; - - #[derive(Debug, Clone)] - pub struct FuncDecl { - pub info: FuncInfo, - pub params: Vec, - } - - #[derive(Debug, Clone)] - pub struct ExprStmt { - pub expr: Node, - } - - #[derive(Debug, Clone)] - pub enum Stmt { - Func { - info: FuncInfo, - params: Vec, - }, - Expr { - expr: Node, - }, - } - - #[derive(Debug, Clone)] - pub struct Program { - pub statements: Stmt, - } - "); -} - -#[test] -fn emit_synthetic_keys() { - let input = indoc! {r#" - Container = { @inner } - InnerWrapper = ? - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Container { - pub inner: InnerField, - } - - pub type InnerWrapper = Option; - "); -} - -#[test] -fn emit_mixed_wrappers_and_structs() { - let input = indoc! {r#" - Leaf = { #string @text } - Branch = { #Node @left #Node @right } - Tree = [ Leaf: Leaf Branch: Branch ] - Forest = Tree* - MaybeForest = Forest? - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Leaf { - pub text: String, - } - - #[derive(Debug, Clone)] - pub struct Branch { - pub left: Node, - pub right: Node, - } - - #[derive(Debug, Clone)] - pub enum Tree { - Leaf { - text: String, - }, - Branch { - left: Node, - right: Node, - }, - } - - pub type Forest = Vec; - - pub type MaybeForest = Option>; - "); -} - -// --- Edge Cases --- - -#[test] -fn emit_single_variant_union() { - let input = indoc! {r#" - OnlyVariant = { #Node @value } - Single = [ Only: OnlyVariant ] - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct OnlyVariant { - pub value: Node, - } - - #[derive(Debug, Clone)] - pub enum Single { - Only { - value: Node, - }, - } - "); -} - -#[test] -fn emit_deeply_nested() { - let input = indoc! {r#" - A = { #Node @val } - B = { A @a } - C = { B @b } - D = { C @c } - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct A { - pub val: Node, - } - - #[derive(Debug, Clone)] - pub struct B { - pub a: A, - } - - #[derive(Debug, Clone)] - pub struct C { - pub b: B, - } - - #[derive(Debug, Clone)] - pub struct D { - pub c: C, - } - "); -} - -#[test] -fn emit_list_of_optionals() { - let input = indoc! {r#" - Item = { #Node @value } - MaybeItem = Item? - Items = MaybeItem* - "#}; - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Item { - pub value: Node, - } - - pub type MaybeItem = Option; - - pub type Items = Vec>; - "); -} - -#[test] -fn emit_builtin_value_with_named_key() { - let input = indoc! {r#" - AliasNode = #Node - AliasString = #string - AliasUnit = () - "#}; - insta::assert_snapshot!(emit(input), @""); -} - -// --- DefaultQuery --- - -#[test] -fn emit_default_query_struct() { - let input = "#DefaultQuery = { #Node @value }"; - - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct QueryResult { - pub value: Node, - } - "); -} - -#[test] -fn emit_default_query_custom_name() { - let input = "#DefaultQuery = { #Node @value }"; - let config = RustEmitConfig { - default_query_name: "MyResult".to_string(), - ..Default::default() - }; - - insta::assert_snapshot!(emit_with_config(input, &config), @r" - #[derive(Debug, Clone)] - pub struct MyResult { - pub value: Node, - } - "); -} - -#[test] -fn emit_default_query_referenced() { - let input = indoc! {r#" - Item = { #Node @value } - Items = Item* - #DefaultQuery = { Items @items } - "#}; - - insta::assert_snapshot!(emit(input), @r" - #[derive(Debug, Clone)] - pub struct Item { - pub value: Node, - } - - pub type Items = Vec; - - #[derive(Debug, Clone)] - pub struct QueryResult { - pub items: Vec, - } - "); -} diff --git a/crates/plotnik-lib/src/infer/emit/typescript.rs b/crates/plotnik-lib/src/infer/emit/typescript.rs deleted file mode 100644 index 72621fd1..00000000 --- a/crates/plotnik-lib/src/infer/emit/typescript.rs +++ /dev/null @@ -1,300 +0,0 @@ -//! TypeScript code emitter for inferred types. -//! -//! Emits TypeScript interface and type definitions from a `TypeTable`. - -use indexmap::IndexMap; - -use super::super::types::{TypeKey, TypeTable, TypeValue}; - -/// Configuration for TypeScript emission. -#[derive(Debug, Clone)] -pub struct TypeScriptEmitConfig { - /// How to represent optional values. - pub optional_style: OptionalStyle, - /// Whether to export types. - pub export: bool, - /// Whether to make fields readonly. - pub readonly: bool, - /// Whether to inline synthetic types. - pub inline_synthetic: bool, - /// Name for the Node type. - pub node_type_name: String, - /// Whether to emit `type Foo = ...` instead of `interface Foo { ... }`. - pub use_type_alias: bool, - /// Name for the default (unnamed) query entry point. - pub default_query_name: String, -} - -/// How to represent optional types. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum OptionalStyle { - /// `T | null` - Null, - /// `T | undefined` - Undefined, - /// `T?` (optional property) - QuestionMark, -} - -impl Default for TypeScriptEmitConfig { - fn default() -> Self { - Self { - optional_style: OptionalStyle::Null, - export: false, - readonly: false, - inline_synthetic: true, - node_type_name: "SyntaxNode".to_string(), - use_type_alias: false, - default_query_name: "QueryResult".to_string(), - } - } -} - -/// Emit TypeScript code from a type table. -pub fn emit_typescript(table: &TypeTable<'_>, config: &TypeScriptEmitConfig) -> String { - let mut output = String::new(); - let sorted = topological_sort(table); - - for key in sorted { - let Some(value) = table.get(&key) else { - continue; - }; - - // Skip built-in types - if matches!(key, TypeKey::Node | TypeKey::String | TypeKey::Unit) { - continue; - } - - // Skip synthetic types if inlining - if config.inline_synthetic && matches!(key, TypeKey::Synthetic(_)) { - continue; - } - - let type_def = emit_type_def(&key, value, table, config); - if !type_def.is_empty() { - output.push_str(&type_def); - output.push_str("\n\n"); - } - } - - output.trim_end().to_string() -} - -fn emit_type_def( - key: &TypeKey<'_>, - value: &TypeValue<'_>, - table: &TypeTable<'_>, - config: &TypeScriptEmitConfig, -) -> String { - let name = type_name(key, config); - let export_prefix = if config.export && !matches!(key, TypeKey::Synthetic(_)) { - "export " - } else { - "" - }; - - match value { - TypeValue::Node | TypeValue::String | TypeValue::Unit | TypeValue::Invalid => String::new(), - - TypeValue::Struct(fields) => { - if config.use_type_alias { - let inline = emit_inline_struct(fields, table, config); - format!("{}type {} = {};", export_prefix, name, inline) - } else if fields.is_empty() { - format!("{}interface {} {{}}", export_prefix, name) - } else { - let mut out = format!("{}interface {} {{\n", export_prefix, name); - for (field_name, field_type) in fields { - let (type_str, is_optional) = emit_field_type(field_type, table, config); - let readonly = if config.readonly { "readonly " } else { "" }; - let optional = - if is_optional && config.optional_style == OptionalStyle::QuestionMark { - "?" - } else { - "" - }; - out.push_str(&format!( - " {}{}{}: {};\n", - readonly, field_name, optional, type_str - )); - } - out.push('}'); - out - } - } - - TypeValue::TaggedUnion(variants) => { - let mut out = format!("{}type {} =\n", export_prefix, name); - let variant_count = variants.len(); - for (i, (variant_name, variant_key)) in variants.iter().enumerate() { - out.push_str(" | { tag: \""); - out.push_str(variant_name); - out.push('"'); - // Look up variant type to get fields - if let Some(TypeValue::Struct(fields)) = table.get(variant_key) { - for (field_name, field_type) in fields { - let (type_str, is_optional) = emit_field_type(field_type, table, config); - let optional = if is_optional - && config.optional_style == OptionalStyle::QuestionMark - { - "?" - } else { - "" - }; - out.push_str(&format!("; {}{}: {}", field_name, optional, type_str)); - } - } - out.push_str(" }"); - if i < variant_count - 1 { - out.push('\n'); - } - } - out.push(';'); - out - } - - TypeValue::Optional(_) | TypeValue::List(_) | TypeValue::NonEmptyList(_) => { - let (type_str, _) = emit_field_type(key, table, config); - format!("{}type {} = {};", export_prefix, name, type_str) - } - } -} - -/// Returns (type_string, is_optional) -pub(crate) fn emit_field_type( - key: &TypeKey<'_>, - table: &TypeTable<'_>, - config: &TypeScriptEmitConfig, -) -> (String, bool) { - match table.get(key) { - Some(TypeValue::Node) => (config.node_type_name.clone(), false), - Some(TypeValue::String) => ("string".to_string(), false), - Some(TypeValue::Unit) | Some(TypeValue::Invalid) => ("{}".to_string(), false), - - Some(TypeValue::Optional(inner)) => { - let (inner_str, _) = emit_field_type(inner, table, config); - let type_str = match config.optional_style { - OptionalStyle::Null => format!("{} | null", inner_str), - OptionalStyle::Undefined => format!("{} | undefined", inner_str), - OptionalStyle::QuestionMark => inner_str, - }; - (type_str, true) - } - - Some(TypeValue::List(inner)) => { - let (inner_str, _) = emit_field_type(inner, table, config); - (format!("{}[]", wrap_if_union(&inner_str)), false) - } - - Some(TypeValue::NonEmptyList(inner)) => { - let (inner_str, _) = emit_field_type(inner, table, config); - (format!("[{}, ...{}[]]", inner_str, inner_str), false) - } - - Some(TypeValue::Struct(fields)) => { - if config.inline_synthetic && matches!(key, TypeKey::Synthetic(_)) { - (emit_inline_struct(fields, table, config), false) - } else { - (type_name(key, config), false) - } - } - - Some(TypeValue::TaggedUnion(_)) => (type_name(key, config), false), - - None => (type_name(key, config), false), - } -} - -pub(crate) fn emit_inline_struct( - fields: &IndexMap<&str, TypeKey<'_>>, - table: &TypeTable<'_>, - config: &TypeScriptEmitConfig, -) -> String { - if fields.is_empty() { - return "{}".to_string(); - } - - let mut out = String::from("{ "); - for (i, (field_name, field_type)) in fields.iter().enumerate() { - let (type_str, is_optional) = emit_field_type(field_type, table, config); - let optional = if is_optional && config.optional_style == OptionalStyle::QuestionMark { - "?" - } else { - "" - }; - out.push_str(field_name); - out.push_str(optional); - out.push_str(": "); - out.push_str(&type_str); - if i < fields.len() - 1 { - out.push_str("; "); - } - } - out.push_str(" }"); - out -} - -fn type_name(key: &TypeKey<'_>, config: &TypeScriptEmitConfig) -> String { - if key.is_default_query() { - config.default_query_name.clone() - } else { - key.to_pascal_case() - } -} - -pub(crate) fn wrap_if_union(type_str: &str) -> String { - if type_str.contains('|') { - format!("({})", type_str) - } else { - type_str.to_string() - } -} - -/// Topologically sort types so dependencies come before dependents. -pub(crate) fn topological_sort<'src>(table: &TypeTable<'src>) -> Vec> { - let mut result = Vec::new(); - let mut visited = IndexMap::new(); - - for key in table.types.keys() { - visit(key, table, &mut visited, &mut result); - } - - result -} - -fn visit<'src>( - key: &TypeKey<'src>, - table: &TypeTable<'src>, - visited: &mut IndexMap, bool>, - result: &mut Vec>, -) { - if visited.contains_key(key) { - return; - } - - visited.insert(key.clone(), true); - - let Some(value) = table.get(key) else { - visited.insert(key.clone(), false); - result.push(key.clone()); - return; - }; - - for dep in dependencies(value) { - visit(&dep, table, visited, result); - } - - visited.insert(key.clone(), false); - result.push(key.clone()); -} - -pub(crate) fn dependencies<'src>(value: &TypeValue<'src>) -> Vec> { - match value { - TypeValue::Node | TypeValue::String | TypeValue::Unit | TypeValue::Invalid => vec![], - TypeValue::Struct(fields) => fields.values().cloned().collect(), - TypeValue::TaggedUnion(variants) => variants.values().cloned().collect(), - TypeValue::Optional(inner) | TypeValue::List(inner) | TypeValue::NonEmptyList(inner) => { - vec![inner.clone()] - } - } -} diff --git a/crates/plotnik-lib/src/infer/emit/typescript_tests.rs b/crates/plotnik-lib/src/infer/emit/typescript_tests.rs deleted file mode 100644 index 5aae21dc..00000000 --- a/crates/plotnik-lib/src/infer/emit/typescript_tests.rs +++ /dev/null @@ -1,793 +0,0 @@ -use super::typescript::{OptionalStyle, TypeScriptEmitConfig, emit_typescript}; -use crate::infer::tyton::parse; -use indoc::indoc; - -fn emit(input: &str) -> String { - let table = parse(input).expect("tyton parse failed"); - emit_typescript(&table, &TypeScriptEmitConfig::default()) -} - -fn emit_with_config(input: &str, config: &TypeScriptEmitConfig) -> String { - let table = parse(input).expect("tyton parse failed"); - emit_typescript(&table, config) -} - -// --- Simple Structs (Interfaces) --- - -#[test] -fn emit_interface_single_field() { - let input = "Foo = { #Node @value }"; - insta::assert_snapshot!(emit(input), @r" - interface Foo { - value: SyntaxNode; - } - "); -} - -#[test] -fn emit_interface_multiple_fields() { - let input = "Func = { #string @name #Node @body #Node @params }"; - insta::assert_snapshot!(emit(input), @r" - interface Func { - name: string; - body: SyntaxNode; - params: SyntaxNode; - } - "); -} - -#[test] -fn emit_interface_empty() { - let input = "Empty = {}"; - insta::assert_snapshot!(emit(input), @"interface Empty {}"); -} - -#[test] -fn emit_interface_with_unit_field() { - let input = "Wrapper = { () @marker }"; - insta::assert_snapshot!(emit(input), @r" - interface Wrapper { - marker: {}; - } - "); -} - -#[test] -fn emit_interface_nested_refs() { - let input = indoc! {r#" - Inner = { #Node @value } - Outer = { Inner @inner #string @label } - "#}; - insta::assert_snapshot!(emit(input), @r" - interface Inner { - value: SyntaxNode; - } - - interface Outer { - inner: Inner; - label: string; - } - "); -} - -// --- Tagged Unions --- - -#[test] -fn emit_tagged_union_simple() { - let input = indoc! {r#" - AssignStmt = { #Node @target #Node @value } - CallStmt = { #Node @func } - Stmt = [ Assign: AssignStmt Call: CallStmt ] - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface AssignStmt { - target: SyntaxNode; - value: SyntaxNode; - } - - interface CallStmt { - func: SyntaxNode; - } - - type Stmt = - | { tag: "Assign"; target: SyntaxNode; value: SyntaxNode } - | { tag: "Call"; func: SyntaxNode }; - "#); -} - -#[test] -fn emit_tagged_union_with_empty_variant() { - let input = indoc! {r#" - ValueVariant = { #Node @value } - Expr = [ Some: ValueVariant None: () ] - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface ValueVariant { - value: SyntaxNode; - } - - type Expr = - | { tag: "Some"; value: SyntaxNode } - | { tag: "None" }; - "#); -} - -#[test] -fn emit_tagged_union_all_empty() { - let input = "Token = [ Comma: () Dot: () Semi: () ]"; - insta::assert_snapshot!(emit(input), @r#" - type Token = - | { tag: "Comma" } - | { tag: "Dot" } - | { tag: "Semi" }; - "#); -} - -#[test] -fn emit_tagged_union_with_builtins() { - let input = "Value = [ Text: #string Code: #Node Empty: () ]"; - insta::assert_snapshot!(emit(input), @r#" - type Value = - | { tag: "Text" } - | { tag: "Code" } - | { tag: "Empty" }; - "#); -} - -// --- Wrapper Types --- - -#[test] -fn emit_optional_null() { - let input = "MaybeNode = #Node?"; - insta::assert_snapshot!(emit(input), @"type MaybeNode = SyntaxNode | null;"); -} - -#[test] -fn emit_optional_undefined() { - let input = "MaybeNode = #Node?"; - let config = TypeScriptEmitConfig { - optional_style: OptionalStyle::Undefined, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @"type MaybeNode = SyntaxNode | undefined;"); -} - -#[test] -fn emit_optional_question_mark() { - let input = indoc! {r#" - MaybeNode = #Node? - Foo = { MaybeNode @maybe } - "#}; - let config = TypeScriptEmitConfig { - optional_style: OptionalStyle::QuestionMark, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - type MaybeNode = SyntaxNode; - - interface Foo { - maybe?: SyntaxNode; - } - "); -} - -#[test] -fn emit_list() { - let input = "Nodes = #Node*"; - insta::assert_snapshot!(emit(input), @"type Nodes = SyntaxNode[];"); -} - -#[test] -fn emit_non_empty_list() { - let input = "Nodes = #Node+"; - insta::assert_snapshot!(emit(input), @"type Nodes = [SyntaxNode, ...SyntaxNode[]];"); -} - -#[test] -fn emit_optional_named() { - let input = indoc! {r#" - Stmt = { #Node @value } - MaybeStmt = Stmt? - "#}; - insta::assert_snapshot!(emit(input), @r" - interface Stmt { - value: SyntaxNode; - } - - type MaybeStmt = Stmt | null; - "); -} - -#[test] -fn emit_list_named() { - let input = indoc! {r#" - Stmt = { #Node @value } - Stmts = Stmt* - "#}; - insta::assert_snapshot!(emit(input), @r" - interface Stmt { - value: SyntaxNode; - } - - type Stmts = Stmt[]; - "); -} - -#[test] -fn emit_nested_wrappers() { - let input = indoc! {r#" - Item = { #Node @value } - Items = Item* - MaybeItems = Items? - "#}; - insta::assert_snapshot!(emit(input), @r" - interface Item { - value: SyntaxNode; - } - - type Items = Item[]; - - type MaybeItems = Item[] | null; - "); -} - -#[test] -fn emit_list_of_optionals() { - let input = indoc! {r#" - Item = { #Node @value } - MaybeItem = Item? - Items = MaybeItem* - "#}; - insta::assert_snapshot!(emit(input), @r" - interface Item { - value: SyntaxNode; - } - - type MaybeItem = Item | null; - - type Items = (Item | null)[]; - "); -} - -// --- Config Variations --- - -#[test] -fn emit_with_export() { - let input = "Foo = { #Node @value }"; - let config = TypeScriptEmitConfig { - export: true, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - export interface Foo { - value: SyntaxNode; - } - "); -} - -#[test] -fn emit_readonly_fields() { - let input = "Foo = { #Node @value #string @name }"; - let config = TypeScriptEmitConfig { - readonly: true, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - interface Foo { - readonly value: SyntaxNode; - readonly name: string; - } - "); -} - -#[test] -fn emit_custom_node_type() { - let input = "Foo = { #Node @value }"; - let config = TypeScriptEmitConfig { - node_type_name: "TSNode".to_string(), - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - interface Foo { - value: TSNode; - } - "); -} - -#[test] -fn emit_type_alias_instead_of_interface() { - let input = "Foo = { #Node @value #string @name }"; - let config = TypeScriptEmitConfig { - use_type_alias: true, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @"type Foo = { value: SyntaxNode; name: string };"); -} - -#[test] -fn emit_type_alias_empty() { - let input = "Empty = {}"; - let config = TypeScriptEmitConfig { - use_type_alias: true, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @"type Empty = {};"); -} - -#[test] -fn emit_type_alias_nested() { - let input = indoc! {r#" - Inner = { #Node @value } - Outer = { Inner @inner #string @label } - "#}; - let config = TypeScriptEmitConfig { - use_type_alias: true, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - type Inner = { value: SyntaxNode }; - - type Outer = { inner: Inner; label: string }; - "); -} - -#[test] -fn emit_no_inline_synthetic() { - let input = indoc! {r#" - Container = { @inner } - "#}; - let config = TypeScriptEmitConfig { - inline_synthetic: false, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - interface Container { - inner: InnerField; - } - "); -} - -#[test] -fn emit_inline_synthetic() { - let input = indoc! {r#" - Container = { @inner } - "#}; - insta::assert_snapshot!(emit(input), @r" - interface Container { - inner: InnerField; - } - "); -} - -// --- Complex Scenarios --- - -#[test] -fn emit_complex_program() { - let input = indoc! {r#" - FuncInfo = { #string @name #Node @body } - Param = { #string @name #string @type_annotation } - Params = Param* - FuncDecl = { FuncInfo @info Params @params } - ExprStmt = { #Node @expr } - Stmt = [ Func: FuncDecl Expr: ExprStmt ] - Program = { Stmt @statements } - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface FuncInfo { - name: string; - body: SyntaxNode; - } - - interface Param { - name: string; - type_annotation: string; - } - - type Params = Param[]; - - interface FuncDecl { - info: FuncInfo; - params: Param[]; - } - - interface ExprStmt { - expr: SyntaxNode; - } - - type Stmt = - | { tag: "Func"; info: FuncInfo; params: Param[] } - | { tag: "Expr"; expr: SyntaxNode }; - - interface Program { - statements: Stmt; - } - "#); -} - -#[test] -fn emit_mixed_wrappers_and_structs() { - let input = indoc! {r#" - Leaf = { #string @text } - Branch = { #Node @left #Node @right } - Tree = [ Leaf: Leaf Branch: Branch ] - Forest = Tree* - MaybeForest = Forest? - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface Leaf { - text: string; - } - - interface Branch { - left: SyntaxNode; - right: SyntaxNode; - } - - type Tree = - | { tag: "Leaf"; text: string } - | { tag: "Branch"; left: SyntaxNode; right: SyntaxNode }; - - type Forest = Tree[]; - - type MaybeForest = Tree[] | null; - "#); -} - -#[test] -fn emit_all_config_options() { - let input = indoc! {r#" - MaybeNode = #Node? - Item = { #Node @value MaybeNode @maybe } - Items = Item* - "#}; - let config = TypeScriptEmitConfig { - optional_style: OptionalStyle::QuestionMark, - export: true, - readonly: true, - inline_synthetic: true, - node_type_name: "ASTNode".to_string(), - use_type_alias: false, - default_query_name: "QueryResult".to_string(), - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - export type MaybeNode = ASTNode; - - export interface Item { - readonly value: ASTNode; - readonly maybe?: ASTNode; - } - - export type Items = Item[]; - "); -} - -// --- Edge Cases --- - -#[test] -fn emit_single_variant_union() { - let input = indoc! {r#" - OnlyVariant = { #Node @value } - Single = [ Only: OnlyVariant ] - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface OnlyVariant { - value: SyntaxNode; - } - - type Single = - | { tag: "Only"; value: SyntaxNode }; - "#); -} - -#[test] -fn emit_deeply_nested() { - let input = indoc! {r#" - A = { #Node @val } - B = { A @a } - C = { B @b } - D = { C @c } - "#}; - insta::assert_snapshot!(emit(input), @r" - interface A { - val: SyntaxNode; - } - - interface B { - a: A; - } - - interface C { - b: B; - } - - interface D { - c: C; - } - "); -} - -#[test] -fn emit_union_in_list() { - let input = indoc! {r#" - A = { #Node @a } - B = { #Node @b } - Choice = [ A: A B: B ] - Choices = Choice* - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface A { - a: SyntaxNode; - } - - interface B { - b: SyntaxNode; - } - - type Choice = - | { tag: "A"; a: SyntaxNode } - | { tag: "B"; b: SyntaxNode }; - - type Choices = Choice[]; - "#); -} - -#[test] -fn emit_optional_in_struct_null_style() { - let input = indoc! {r#" - MaybeNode = #Node? - Container = { MaybeNode @item #string @name } - "#}; - insta::assert_snapshot!(emit(input), @r" - type MaybeNode = SyntaxNode | null; - - interface Container { - item: SyntaxNode | null; - name: string; - } - "); -} - -#[test] -fn emit_optional_in_struct_undefined_style() { - let input = indoc! {r#" - MaybeNode = #Node? - Container = { MaybeNode @item #string @name } - "#}; - let config = TypeScriptEmitConfig { - optional_style: OptionalStyle::Undefined, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - type MaybeNode = SyntaxNode | undefined; - - interface Container { - item: SyntaxNode | undefined; - name: string; - } - "); -} - -#[test] -fn emit_tagged_union_with_optional_field_question_mark() { - let input = indoc! {r#" - MaybeNode = #Node? - VariantA = { MaybeNode @value } - VariantB = { #Node @item } - Choice = [ A: VariantA B: VariantB ] - "#}; - let config = TypeScriptEmitConfig { - optional_style: OptionalStyle::QuestionMark, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r#" - type MaybeNode = SyntaxNode; - - interface VariantA { - value?: SyntaxNode; - } - - interface VariantB { - item: SyntaxNode; - } - - type Choice = - | { tag: "A"; value?: SyntaxNode } - | { tag: "B"; item: SyntaxNode }; - "#); -} - -#[test] -fn emit_struct_with_union_field() { - let input = indoc! {r#" - A = { #Node @a } - B = { #Node @b } - Choice = [ A: A B: B ] - Container = { Choice @choice #string @name } - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface A { - a: SyntaxNode; - } - - interface B { - b: SyntaxNode; - } - - type Choice = - | { tag: "A"; a: SyntaxNode } - | { tag: "B"; b: SyntaxNode }; - - interface Container { - choice: Choice; - name: string; - } - "#); -} - -#[test] -fn emit_struct_with_forward_ref() { - let input = indoc! {r#" - Container = { Later @item } - Later = { #Node @value } - "#}; - insta::assert_snapshot!(emit(input), @r" - interface Later { - value: SyntaxNode; - } - - interface Container { - item: Later; - } - "); -} - -#[test] -fn emit_synthetic_type_no_inline() { - let input = " = { #Node @value }"; - let config = TypeScriptEmitConfig { - inline_synthetic: false, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - interface FooBar { - value: SyntaxNode; - } - "); -} - -#[test] -fn emit_synthetic_type_with_inline() { - let input = " = { #Node @value }"; - let config = TypeScriptEmitConfig { - inline_synthetic: true, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @""); -} - -#[test] -fn emit_field_referencing_tagged_union() { - let input = indoc! {r#" - VarA = { #Node @x } - VarB = { #Node @y } - Choice = [ A: VarA B: VarB ] - Container = { Choice @choice } - "#}; - insta::assert_snapshot!(emit(input), @r#" - interface VarA { - x: SyntaxNode; - } - - interface VarB { - y: SyntaxNode; - } - - type Choice = - | { tag: "A"; x: SyntaxNode } - | { tag: "B"; y: SyntaxNode }; - - interface Container { - choice: Choice; - } - "#); -} - -#[test] -fn emit_field_referencing_unknown_type() { - let input = "Container = { DoesNotExist @unknown }"; - insta::assert_snapshot!(emit(input), @r" - interface Container { - unknown: DoesNotExist; - } - "); -} - -#[test] -fn emit_empty_interface_no_type_alias() { - let input = "Empty = {}"; - let config = TypeScriptEmitConfig { - use_type_alias: false, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @"interface Empty {}"); -} - -#[test] -fn emit_inline_synthetic_struct_with_optional_field() { - let input = indoc! {r#" - MaybeNode = #Node? - = { #Node @value MaybeNode @maybe } - Container = { @inner } - "#}; - let config = TypeScriptEmitConfig { - inline_synthetic: true, - optional_style: OptionalStyle::QuestionMark, - ..Default::default() - }; - insta::assert_snapshot!(emit_with_config(input, &config), @r" - type MaybeNode = SyntaxNode; - - interface Container { - inner: { value: SyntaxNode; maybe?: SyntaxNode }; - } - "); -} - -#[test] -fn emit_builtin_value_with_named_key() { - let input = indoc! {r#" - AliasNode = #Node - AliasString = #string - AliasUnit = () - "#}; - insta::assert_snapshot!(emit(input), @""); -} - -// --- DefaultQuery --- - -#[test] -fn emit_default_query_interface() { - let input = "#DefaultQuery = { #Node @value }"; - - insta::assert_snapshot!(emit(input), @r" - interface QueryResult { - value: SyntaxNode; - } - "); -} - -#[test] -fn emit_default_query_custom_name() { - let input = "#DefaultQuery = { #Node @value }"; - let config = TypeScriptEmitConfig { - default_query_name: "MyResult".to_string(), - ..Default::default() - }; - - insta::assert_snapshot!(emit_with_config(input, &config), @r" - interface MyResult { - value: SyntaxNode; - } - "); -} - -#[test] -fn emit_default_query_referenced() { - let input = indoc! {r#" - Item = { #Node @value } - Items = Item* - #DefaultQuery = { Items @items } - "#}; - - insta::assert_snapshot!(emit(input), @r" - interface Item { - value: SyntaxNode; - } - - type Items = Item[]; - - interface QueryResult { - items: Item[]; - } - "); -} diff --git a/crates/plotnik-lib/src/infer/mod.rs b/crates/plotnik-lib/src/infer/mod.rs deleted file mode 100644 index 46471372..00000000 --- a/crates/plotnik-lib/src/infer/mod.rs +++ /dev/null @@ -1,21 +0,0 @@ -//! Type inference for query output types. -//! -//! This module provides: -//! - `TypeTable`: collection of inferred types -//! - `TypeKey` / `TypeValue`: type representation -//! - `emit_rust`: Rust code emitter -//! - `emit_typescript`: TypeScript code emitter - -pub mod emit; -mod types; -pub mod tyton; - -#[cfg(test)] -mod types_tests; -#[cfg(test)] -mod tyton_tests; - -pub use emit::{ - Indirection, OptionalStyle, RustEmitConfig, TypeScriptEmitConfig, emit_rust, emit_typescript, -}; -pub use types::{TypeKey, TypeTable, TypeValue}; diff --git a/crates/plotnik-lib/src/infer/types.rs b/crates/plotnik-lib/src/infer/types.rs deleted file mode 100644 index 6e9081bd..00000000 --- a/crates/plotnik-lib/src/infer/types.rs +++ /dev/null @@ -1,280 +0,0 @@ -//! Type representation for inferred query output types. -//! -//! # Overview -//! -//! The type system is flat: all types live in a `TypeTable` keyed by `TypeKey`. -//! Wrapper types (Optional, List, NonEmptyList) reference inner types by key. -//! -//! # Design Decisions -//! -//! ## Alternation Handling -//! -//! Alternations (`[A: ... B: ...]` or `[... ...]`) produce different type structures: -//! -//! - **Tagged alternations** (`[A: expr B: expr]`): Become `TaggedUnion` with named variants. -//! Each branch gets its own struct type, discriminated by the tag name. -//! -//! - **Untagged/mixed alternations** (`[expr expr]`): Branches are "merged" into a single -//! struct where fields are combined. The merge rules: -//! 1. Field present in all branches with same type → field has that type -//! 2. Field present in some branches only → field becomes Optional -//! 3. Field present in all branches but with different types → field gets Invalid type -//! -//! ## Invalid Type -//! -//! The `Invalid` type represents a type conflict that couldn't be resolved (e.g., field -//! has `Node` in one branch and `String` in another). It is emitted the same as `Unit` -//! in code generators—this keeps output valid while signaling the user made a questionable -//! query. Diagnostics should warn about Invalid types during inference. -//! -//! ## Type Keys vs Type Values -//! -//! - `TypeKey`: Identity/reference to a type. Used in field types, wrapper inner types. -//! - `TypeValue`: The actual type definition. Stored in the table. -//! -//! Built-in types (Node, String, Unit, Invalid) have both a key and value variant for -//! consistency—the key is what you reference, the value is what gets stored. -//! -//! ## DefaultQuery Key -//! -//! `TypeKey::DefaultQuery` represents the unnamed entry point query (the last definition -//! without a name). It has no corresponding `TypeValue` variant—it's purely a key that -//! maps to a Struct or other value. The emitted name ("QueryResult" by default) is -//! configurable per code generator. -//! -//! ## Synthetic Keys -//! -//! For nested captures like `(function @fn { (param @p) @params })`, we need unique type -//! names. Synthetic keys use path segments: `["fn", "params"]` → `FnParams`. This avoids -//! name collisions while keeping names readable. - -use indexmap::IndexMap; - -/// Identity of a type in the type table. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum TypeKey<'src> { - /// Tree-sitter node (built-in) - Node, - /// String value from `:: string` annotation (built-in) - String, - /// Unit type for empty captures (built-in) - Unit, - /// Invalid type for unresolvable conflicts (built-in) - /// Emitted same as Unit in code generators. - Invalid, - /// The unnamed entry point query (last definition without a name). - /// Default emitted name is "QueryResult", but emitters may override. - DefaultQuery, - /// User-provided type name via `:: TypeName` - Named(&'src str), - /// Path-based synthetic name: ["Foo", "bar"] → FooBar - Synthetic(Vec<&'src str>), -} - -impl TypeKey<'_> { - /// Render as PascalCase type name. - pub fn to_pascal_case(&self) -> String { - match self { - TypeKey::Node => "Node".to_string(), - TypeKey::String => "String".to_string(), - TypeKey::Unit => "Unit".to_string(), - TypeKey::Invalid => "Unit".to_string(), // Invalid emits as Unit - TypeKey::DefaultQuery => "DefaultQuery".to_string(), - TypeKey::Named(name) => (*name).to_string(), - TypeKey::Synthetic(segments) => segments.iter().map(|s| to_pascal(s)).collect(), - } - } - - /// Returns true if this is a built-in primitive type. - pub fn is_builtin(&self) -> bool { - matches!( - self, - TypeKey::Node | TypeKey::String | TypeKey::Unit | TypeKey::Invalid - ) - } - - /// Returns true if this is the default query entry point. - pub fn is_default_query(&self) -> bool { - matches!(self, TypeKey::DefaultQuery) - } -} - -/// Convert snake_case or lowercase to PascalCase. -pub(crate) fn to_pascal(s: &str) -> String { - s.split('_') - .map(|part| { - let mut chars = part.chars(); - match chars.next() { - None => String::new(), - Some(first) => first.to_uppercase().chain(chars).collect(), - } - }) - .collect() -} - -/// Type definition stored in the type table. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum TypeValue<'src> { - /// Tree-sitter node primitive - Node, - /// String primitive - String, - /// Unit type (empty struct) - Unit, - /// Invalid type (conflicting types in untagged union) - /// Emitted same as Unit. Presence indicates a diagnostic should be emitted. - Invalid, - /// Struct with named fields - Struct(IndexMap<&'src str, TypeKey<'src>>), - /// Tagged union: variant name → variant type (must resolve to Struct or Unit) - TaggedUnion(IndexMap<&'src str, TypeKey<'src>>), - /// Optional wrapper - Optional(TypeKey<'src>), - /// Zero-or-more list wrapper - List(TypeKey<'src>), - /// One-or-more list wrapper - NonEmptyList(TypeKey<'src>), -} - -/// Result of merging a single field across branches. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum MergedField<'src> { - /// Field has same type in all branches where present - Same(TypeKey<'src>), - /// Field has same type but missing in some branches → needs Optional wrapper - Optional(TypeKey<'src>), - /// Field has conflicting types across branches → Invalid - Conflict, -} - -/// Collection of all inferred types for a query. -#[derive(Debug, Clone)] -pub struct TypeTable<'src> { - /// All type definitions, keyed by their identity. - /// Pre-populated with built-in types (Node, String, Unit, Invalid). - pub types: IndexMap, TypeValue<'src>>, - /// Types that contain cyclic references (need Box in Rust). - pub cyclic: Vec>, -} - -impl<'src> TypeTable<'src> { - /// Create a new type table with built-in types pre-populated. - pub fn new() -> Self { - let mut types = IndexMap::new(); - types.insert(TypeKey::Node, TypeValue::Node); - types.insert(TypeKey::String, TypeValue::String); - types.insert(TypeKey::Unit, TypeValue::Unit); - types.insert(TypeKey::Invalid, TypeValue::Invalid); - Self { - types, - cyclic: Vec::new(), - } - } - - /// Insert a type definition. Returns the key for chaining. - pub fn insert(&mut self, key: TypeKey<'src>, value: TypeValue<'src>) -> TypeKey<'src> { - self.types.insert(key.clone(), value); - key - } - - /// Mark a type as cyclic (requires indirection in Rust). - pub fn mark_cyclic(&mut self, key: TypeKey<'src>) { - if !self.cyclic.contains(&key) { - self.cyclic.push(key); - } - } - - /// Check if a type is cyclic. - pub fn is_cyclic(&self, key: &TypeKey<'src>) -> bool { - self.cyclic.contains(key) - } - - /// Get a type by key. - pub fn get(&self, key: &TypeKey<'src>) -> Option<&TypeValue<'src>> { - self.types.get(key) - } - - /// Iterate over all types in insertion order. - pub fn iter(&self) -> impl Iterator, &TypeValue<'src>)> { - self.types.iter() - } - - /// Merge fields from multiple struct branches (for untagged unions). - /// - /// Given a list of field maps (one per branch), produces a merged field map where: - /// - Fields present in all branches with the same type keep that type - /// - Fields present in only some branches become Optional - /// - Fields with conflicting types across branches become Invalid - /// - /// # Example - /// - /// Branch 1: `{ name: String, value: Node }` - /// Branch 2: `{ name: String, extra: Node }` - /// - /// Merged: `{ name: String, value: Optional, extra: Optional }` - /// - /// # Type Conflict Example - /// - /// Branch 1: `{ x: String }` - /// Branch 2: `{ x: Node }` - /// - /// Merged: `{ x: Invalid }` (with diagnostic warning) - pub fn merge_fields( - branches: &[IndexMap<&'src str, TypeKey<'src>>], - ) -> IndexMap<&'src str, MergedField<'src>> { - if branches.is_empty() { - return IndexMap::new(); - } - - // Collect all field names across all branches - let mut all_fields: IndexMap<&'src str, ()> = IndexMap::new(); - for branch in branches { - for field_name in branch.keys() { - all_fields.entry(*field_name).or_insert(()); - } - } - - let mut result = IndexMap::new(); - let branch_count = branches.len(); - - for field_name in all_fields.keys() { - // Collect (type, count) for this field across branches - let mut type_occurrences: Vec<&TypeKey<'src>> = Vec::new(); - for branch in branches { - if let Some(ty) = branch.get(field_name) { - type_occurrences.push(ty); - } - } - - let present_count = type_occurrences.len(); - if present_count == 0 { - continue; - } - - // Check if all occurrences have the same type - let first_type = type_occurrences[0]; - let all_same_type = type_occurrences.iter().all(|t| *t == first_type); - - let merged = if !all_same_type { - // Type conflict - MergedField::Conflict - } else if present_count == branch_count { - // Present in all branches with same type - MergedField::Same(first_type.clone()) - } else { - // Present in some branches only - MergedField::Optional(first_type.clone()) - }; - - result.insert(*field_name, merged); - } - - result - } -} - -impl Default for TypeTable<'_> { - fn default() -> Self { - Self::new() - } -} diff --git a/crates/plotnik-lib/src/infer/types_tests.rs b/crates/plotnik-lib/src/infer/types_tests.rs deleted file mode 100644 index 32299deb..00000000 --- a/crates/plotnik-lib/src/infer/types_tests.rs +++ /dev/null @@ -1,377 +0,0 @@ -use super::types::{MergedField, TypeKey, TypeTable, TypeValue, to_pascal}; -use indexmap::IndexMap; - -#[test] -fn type_key_to_pascal_case_builtins() { - assert_eq!(TypeKey::Node.to_pascal_case(), "Node"); - assert_eq!(TypeKey::String.to_pascal_case(), "String"); - assert_eq!(TypeKey::Unit.to_pascal_case(), "Unit"); - assert_eq!(TypeKey::Invalid.to_pascal_case(), "Unit"); // Invalid emits as Unit -} - -#[test] -fn type_key_to_pascal_case_named() { - assert_eq!( - TypeKey::Named("FunctionInfo").to_pascal_case(), - "FunctionInfo" - ); - assert_eq!(TypeKey::Named("Stmt").to_pascal_case(), "Stmt"); -} - -#[test] -fn type_key_to_pascal_case_synthetic() { - assert_eq!(TypeKey::Synthetic(vec!["Foo"]).to_pascal_case(), "Foo"); - assert_eq!( - TypeKey::Synthetic(vec!["Foo", "bar"]).to_pascal_case(), - "FooBar" - ); - assert_eq!( - TypeKey::Synthetic(vec!["Foo", "bar", "baz"]).to_pascal_case(), - "FooBarBaz" - ); -} - -#[test] -fn type_key_to_pascal_case_snake_case_segments() { - assert_eq!( - TypeKey::Synthetic(vec!["Foo", "bar_baz"]).to_pascal_case(), - "FooBarBaz" - ); - assert_eq!( - TypeKey::Synthetic(vec!["function_info", "params"]).to_pascal_case(), - "FunctionInfoParams" - ); -} - -#[test] -fn type_table_new_has_builtins() { - let table = TypeTable::new(); - assert_eq!(table.get(&TypeKey::Node), Some(&TypeValue::Node)); - assert_eq!(table.get(&TypeKey::String), Some(&TypeValue::String)); - assert_eq!(table.get(&TypeKey::Unit), Some(&TypeValue::Unit)); - assert_eq!(table.get(&TypeKey::Invalid), Some(&TypeValue::Invalid)); -} - -#[test] -fn type_table_insert_and_get() { - let mut table = TypeTable::new(); - let key = TypeKey::Named("Foo"); - let value = TypeValue::Struct(IndexMap::new()); - table.insert(key.clone(), value.clone()); - assert_eq!(table.get(&key), Some(&value)); -} - -#[test] -fn type_table_cyclic_tracking() { - let mut table = TypeTable::new(); - let key = TypeKey::Named("Recursive"); - - assert!(!table.is_cyclic(&key)); - table.mark_cyclic(key.clone()); - assert!(table.is_cyclic(&key)); - - // Double marking is idempotent - table.mark_cyclic(key.clone()); - assert_eq!(table.cyclic.len(), 1); -} - -#[test] -fn type_table_iter_preserves_order() { - let mut table = TypeTable::new(); - table.insert(TypeKey::Named("A"), TypeValue::Unit); - table.insert(TypeKey::Named("B"), TypeValue::Unit); - table.insert(TypeKey::Named("C"), TypeValue::Unit); - - let keys: Vec<_> = table.iter().map(|(k, _)| k.clone()).collect(); - // Builtins first (Node, String, Unit, Invalid), then inserted order - assert_eq!(keys[0], TypeKey::Node); - assert_eq!(keys[1], TypeKey::String); - assert_eq!(keys[2], TypeKey::Unit); - assert_eq!(keys[3], TypeKey::Invalid); - assert_eq!(keys[4], TypeKey::Named("A")); - assert_eq!(keys[5], TypeKey::Named("B")); - assert_eq!(keys[6], TypeKey::Named("C")); -} - -#[test] -fn type_table_default() { - let table: TypeTable = Default::default(); - assert!(table.get(&TypeKey::Node).is_some()); -} - -#[test] -fn type_value_equality() { - let s1 = TypeValue::Struct(IndexMap::new()); - let s2 = TypeValue::Struct(IndexMap::new()); - assert_eq!(s1, s2); - - let mut fields = IndexMap::new(); - fields.insert("x", TypeKey::Node); - let s3 = TypeValue::Struct(fields); - assert_ne!(s1, s3); -} - -#[test] -fn type_value_wrapper_types() { - let opt = TypeValue::Optional(TypeKey::Node); - let list = TypeValue::List(TypeKey::Node); - let ne_list = TypeValue::NonEmptyList(TypeKey::Node); - - assert_ne!(opt, list); - assert_ne!(list, ne_list); -} - -#[test] -fn type_value_tagged_union() { - let mut table = TypeTable::new(); - - let mut assign_fields = IndexMap::new(); - assign_fields.insert("target", TypeKey::String); - table.insert( - TypeKey::Synthetic(vec!["Stmt", "Assign"]), - TypeValue::Struct(assign_fields), - ); - - let mut call_fields = IndexMap::new(); - call_fields.insert("func", TypeKey::String); - table.insert( - TypeKey::Synthetic(vec!["Stmt", "Call"]), - TypeValue::Struct(call_fields), - ); - - let mut variants = IndexMap::new(); - variants.insert("Assign", TypeKey::Synthetic(vec!["Stmt", "Assign"])); - variants.insert("Call", TypeKey::Synthetic(vec!["Stmt", "Call"])); - - let union = TypeValue::TaggedUnion(variants); - table.insert(TypeKey::Named("Stmt"), union); - - let TypeValue::TaggedUnion(v) = table.get(&TypeKey::Named("Stmt")).unwrap() else { - panic!("expected TaggedUnion"); - }; - assert_eq!(v.len(), 2); - assert!(v.contains_key("Assign")); - assert!(v.contains_key("Call")); - assert!(table.get(&v["Assign"]).is_some()); -} - -#[test] -fn type_value_tagged_union_empty_variant() { - let mut table = TypeTable::new(); - - let mut variants = IndexMap::new(); - variants.insert("Empty", TypeKey::Unit); - table.insert( - TypeKey::Named("MaybeEmpty"), - TypeValue::TaggedUnion(variants), - ); - - let TypeValue::TaggedUnion(v) = table.get(&TypeKey::Named("MaybeEmpty")).unwrap() else { - panic!("expected TaggedUnion"); - }; - assert_eq!(v["Empty"], TypeKey::Unit); -} - -#[test] -fn to_pascal_empty_string() { - assert_eq!(to_pascal(""), ""); -} - -#[test] -fn to_pascal_single_char() { - assert_eq!(to_pascal("a"), "A"); - assert_eq!(to_pascal("Z"), "Z"); -} - -#[test] -fn to_pascal_already_pascal() { - assert_eq!(to_pascal("FooBar"), "FooBar"); -} - -#[test] -fn to_pascal_multiple_underscores() { - assert_eq!(to_pascal("foo__bar"), "FooBar"); - assert_eq!(to_pascal("_foo_"), "Foo"); -} - -#[test] -fn type_key_equality() { - assert_eq!(TypeKey::Node, TypeKey::Node); - assert_ne!(TypeKey::Node, TypeKey::String); - assert_eq!(TypeKey::Named("Foo"), TypeKey::Named("Foo")); - assert_ne!(TypeKey::Named("Foo"), TypeKey::Named("Bar")); - assert_eq!( - TypeKey::Synthetic(vec!["a", "b"]), - TypeKey::Synthetic(vec!["a", "b"]) - ); - assert_ne!( - TypeKey::Synthetic(vec!["a", "b"]), - TypeKey::Synthetic(vec!["a", "c"]) - ); -} - -#[test] -fn type_key_hash_consistency() { - use std::collections::HashSet; - let mut set = HashSet::new(); - set.insert(TypeKey::Node); - set.insert(TypeKey::Named("Foo")); - set.insert(TypeKey::Synthetic(vec!["a", "b"])); - - assert!(set.contains(&TypeKey::Node)); - assert!(set.contains(&TypeKey::Named("Foo"))); - assert!(set.contains(&TypeKey::Synthetic(vec!["a", "b"]))); - assert!(!set.contains(&TypeKey::String)); -} - -#[test] -fn type_key_is_builtin() { - assert!(TypeKey::Node.is_builtin()); - assert!(TypeKey::String.is_builtin()); - assert!(TypeKey::Unit.is_builtin()); - assert!(TypeKey::Invalid.is_builtin()); - assert!(!TypeKey::Named("Foo").is_builtin()); - assert!(!TypeKey::Synthetic(vec!["a"]).is_builtin()); -} - -#[test] -fn type_value_invalid() { - assert_eq!(TypeValue::Invalid, TypeValue::Invalid); - assert_ne!(TypeValue::Invalid, TypeValue::Unit); -} - -#[test] -fn merge_fields_empty_branches() { - let branches: Vec> = vec![]; - - let merged = TypeTable::merge_fields(&branches); - - assert!(merged.is_empty()); -} - -#[test] -fn merge_fields_single_branch() { - let mut branch = IndexMap::new(); - branch.insert("name", TypeKey::String); - branch.insert("value", TypeKey::Node); - - let merged = TypeTable::merge_fields(&[branch]); - - assert_eq!(merged.len(), 2); - assert_eq!(merged["name"], MergedField::Same(TypeKey::String)); - assert_eq!(merged["value"], MergedField::Same(TypeKey::Node)); -} - -#[test] -fn merge_fields_identical_branches() { - let mut branch1 = IndexMap::new(); - branch1.insert("name", TypeKey::String); - - let mut branch2 = IndexMap::new(); - branch2.insert("name", TypeKey::String); - - let merged = TypeTable::merge_fields(&[branch1, branch2]); - - assert_eq!(merged.len(), 1); - assert_eq!(merged["name"], MergedField::Same(TypeKey::String)); -} - -#[test] -fn merge_fields_missing_in_some_branches() { - let mut branch1 = IndexMap::new(); - branch1.insert("name", TypeKey::String); - branch1.insert("value", TypeKey::Node); - - let mut branch2 = IndexMap::new(); - branch2.insert("name", TypeKey::String); - // value missing - - let merged = TypeTable::merge_fields(&[branch1, branch2]); - - assert_eq!(merged.len(), 2); - assert_eq!(merged["name"], MergedField::Same(TypeKey::String)); - assert_eq!(merged["value"], MergedField::Optional(TypeKey::Node)); -} - -#[test] -fn merge_fields_disjoint_branches() { - let mut branch1 = IndexMap::new(); - branch1.insert("a", TypeKey::String); - - let mut branch2 = IndexMap::new(); - branch2.insert("b", TypeKey::Node); - - let merged = TypeTable::merge_fields(&[branch1, branch2]); - - assert_eq!(merged.len(), 2); - assert_eq!(merged["a"], MergedField::Optional(TypeKey::String)); - assert_eq!(merged["b"], MergedField::Optional(TypeKey::Node)); -} - -#[test] -fn merge_fields_type_conflict() { - let mut branch1 = IndexMap::new(); - branch1.insert("x", TypeKey::String); - - let mut branch2 = IndexMap::new(); - branch2.insert("x", TypeKey::Node); - - let merged = TypeTable::merge_fields(&[branch1, branch2]); - - assert_eq!(merged.len(), 1); - assert_eq!(merged["x"], MergedField::Conflict); -} - -#[test] -fn merge_fields_partial_conflict() { - // Three branches: x is String in branch 1 and 2, Node in branch 3 - let mut branch1 = IndexMap::new(); - branch1.insert("x", TypeKey::String); - - let mut branch2 = IndexMap::new(); - branch2.insert("x", TypeKey::String); - - let mut branch3 = IndexMap::new(); - branch3.insert("x", TypeKey::Node); - - let merged = TypeTable::merge_fields(&[branch1, branch2, branch3]); - - assert_eq!(merged["x"], MergedField::Conflict); -} - -#[test] -fn merge_fields_complex_scenario() { - // Branch 1: { name: String, value: Node } - // Branch 2: { name: String, extra: Node } - // Result: { name: String, value: Optional, extra: Optional } - let mut branch1 = IndexMap::new(); - branch1.insert("name", TypeKey::String); - branch1.insert("value", TypeKey::Node); - - let mut branch2 = IndexMap::new(); - branch2.insert("name", TypeKey::String); - branch2.insert("extra", TypeKey::Node); - - let merged = TypeTable::merge_fields(&[branch1, branch2]); - - assert_eq!(merged.len(), 3); - assert_eq!(merged["name"], MergedField::Same(TypeKey::String)); - assert_eq!(merged["value"], MergedField::Optional(TypeKey::Node)); - assert_eq!(merged["extra"], MergedField::Optional(TypeKey::Node)); -} - -#[test] -fn merge_fields_preserves_order() { - let mut branch1 = IndexMap::new(); - branch1.insert("z", TypeKey::String); - branch1.insert("a", TypeKey::String); - - let mut branch2 = IndexMap::new(); - branch2.insert("m", TypeKey::String); - - let merged = TypeTable::merge_fields(&[branch1, branch2]); - - let keys: Vec<_> = merged.keys().collect(); - // Order follows first occurrence across branches - assert_eq!(keys, vec![&"z", &"a", &"m"]); -} diff --git a/crates/plotnik-lib/src/infer/tyton.rs b/crates/plotnik-lib/src/infer/tyton.rs deleted file mode 100644 index e3a89364..00000000 --- a/crates/plotnik-lib/src/infer/tyton.rs +++ /dev/null @@ -1,547 +0,0 @@ -//! Tyton: Types Testing Object Notation -//! -//! A compact DSL for constructing `TypeTable` test fixtures. -//! Supports both parsing (text → TypeTable) and emitting (TypeTable → text). -//! -//! # Design -//! -//! Tyton uses a **flattened structure** mirroring `TypeTable`: all types are -//! top-level definitions referenced by name. No inline nesting is supported. -//! -//! ```text -//! // ✗ Invalid: inline optional -//! Foo = { #Node? @maybe } -//! -//! // ✓ Valid: separate definition + reference -//! MaybeNode = #Node? -//! Foo = { MaybeNode @maybe } -//! ``` -//! -//! # Syntax -//! -//! Keys: -//! - `#Node` — built-in node type -//! - `#string` — built-in string type -//! - `#Invalid` — built-in invalid type -//! - `#DefaultQuery` — unnamed entry point query -//! - `()` — built-in unit type -//! - `PascalName` — named type -//! - `` — synthetic key from path segments -//! -//! Values: -//! - `{ Type @field ... }` — struct with fields -//! - `[ Tag: Type ... ]` — tagged union -//! - `Key?` — optional wrapper -//! - `Key*` — list wrapper -//! - `Key+` — non-empty list wrapper -//! - `#Node` / `#string` / `()` — bare builtin alias -//! -//! Definitions: -//! - `Name = { ... }` — define a struct -//! - `Name = [ ... ]` — define a tagged union -//! - `Name = Other?` — define an optional -//! - ` = { ... }` — define with synthetic key -//! - `#DefaultQuery = { ... }` — define unnamed entry point -//! - `AliasNode = #Node` — alias to builtin -//! -//! # Example -//! -//! ```text -//! FuncInfo = { #string @name #Node @body } -//! Stmt = [ Assign: AssignStmt Call: CallStmt ] -//! Stmts = Stmt* -//! ``` - -use std::fmt::Write; - -use indexmap::IndexMap; -use logos::Logos; - -use super::{TypeKey, TypeTable, TypeValue}; - -#[derive(Logos, Debug, Clone, PartialEq)] -#[logos(skip r"[ \t\n\r]+")] -enum Token<'src> { - // Built-in type keywords (prefixed with #) - #[token("#Node")] - Node, - - #[token("#string")] - String, - - #[token("#Invalid")] - Invalid, - - #[token("#DefaultQuery")] - DefaultQuery, - - #[token("()")] - Unit, - - // Symbols - #[token("=")] - Eq, - - #[token("{")] - LBrace, - - #[token("}")] - RBrace, - - #[token("[")] - LBracket, - - #[token("]")] - RBracket, - - #[token("<")] - LAngle, - - #[token(">")] - RAngle, - - #[token(":")] - Colon, - - #[token("@")] - At, - - #[token("?")] - Question, - - #[token("*")] - Star, - - #[token("+")] - Plus, - - // Identifiers: PascalCase for type names, snake_case for fields/segments - #[regex(r"[A-Z][a-zA-Z0-9]*", |lex| lex.slice())] - UpperIdent(&'src str), - - #[regex(r"[a-z][a-z0-9_]*", |lex| lex.slice())] - LowerIdent(&'src str), -} - -struct Parser<'src> { - tokens: Vec<(Token<'src>, std::ops::Range)>, - pos: usize, - input: &'src str, -} - -#[derive(Debug)] -pub struct ParseError { - pub message: String, - pub span: std::ops::Range, -} - -impl std::fmt::Display for ParseError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{} at {:?}", self.message, self.span) - } -} - -impl std::error::Error for ParseError {} - -impl<'src> Parser<'src> { - fn new(input: &'src str) -> Result { - let lexer = Token::lexer(input); - let mut tokens = Vec::new(); - - for (result, span) in lexer.spanned() { - match result { - Ok(token) => tokens.push((token, span)), - Err(_) => { - return Err(ParseError { - message: format!("unexpected character: {:?}", &input[span.clone()]), - span, - }); - } - } - } - - Ok(Self { - tokens, - pos: 0, - input, - }) - } - - fn peek(&self) -> Option<&Token<'src>> { - self.tokens.get(self.pos).map(|(t, _)| t) - } - - fn advance(&mut self) -> Option<&Token<'src>> { - let token = self.tokens.get(self.pos).map(|(t, _)| t); - if token.is_some() { - self.pos += 1; - } - token - } - - fn current_span(&self) -> std::ops::Range { - self.tokens - .get(self.pos) - .map(|(_, s)| s.clone()) - .unwrap_or(self.input.len()..self.input.len()) - } - - fn expect(&mut self, expected: Token<'src>) -> Result<(), ParseError> { - let span = self.current_span(); - match self.advance() { - Some(t) if std::mem::discriminant(t) == std::mem::discriminant(&expected) => Ok(()), - Some(t) => Err(ParseError { - message: format!("expected {:?}, got {:?}", expected, t), - span, - }), - None => Err(ParseError { - message: format!("expected {:?}, got EOF", expected), - span, - }), - } - } - - fn parse_type_key(&mut self) -> Result, ParseError> { - let span = self.current_span(); - match self.peek() { - Some(Token::Node) => { - self.advance(); - Ok(TypeKey::Node) - } - Some(Token::String) => { - self.advance(); - Ok(TypeKey::String) - } - Some(Token::Invalid) => { - self.advance(); - Ok(TypeKey::Invalid) - } - Some(Token::DefaultQuery) => { - self.advance(); - Ok(TypeKey::DefaultQuery) - } - Some(Token::Unit) => { - self.advance(); - Ok(TypeKey::Unit) - } - Some(Token::UpperIdent(name)) => { - let name = *name; - self.advance(); - Ok(TypeKey::Named(name)) - } - Some(Token::LAngle) => self.parse_synthetic_key(), - _ => Err(ParseError { - message: "expected type key".to_string(), - span, - }), - } - } - - fn parse_synthetic_key(&mut self) -> Result, ParseError> { - self.expect(Token::LAngle)?; - let mut segments = Vec::new(); - - loop { - let span = self.current_span(); - match self.peek() { - Some(Token::RAngle) => { - self.advance(); - break; - } - Some(Token::UpperIdent(s)) => { - let s = *s; - self.advance(); - segments.push(s); - } - Some(Token::LowerIdent(s)) => { - let s = *s; - self.advance(); - segments.push(s); - } - _ => { - return Err(ParseError { - message: "expected identifier or '>'".to_string(), - span, - }); - } - } - } - - if segments.is_empty() { - return Err(ParseError { - message: "synthetic key cannot be empty".to_string(), - span: self.current_span(), - }); - } - - Ok(TypeKey::Synthetic(segments)) - } - - fn parse_type_value(&mut self) -> Result, ParseError> { - let span = self.current_span(); - match self.peek() { - Some(Token::LBrace) => self.parse_struct(), - Some(Token::LBracket) => self.parse_tagged_union(), - Some(Token::Node) => { - self.advance(); - self.parse_wrapper_or_bare(TypeKey::Node, TypeValue::Node) - } - Some(Token::String) => { - self.advance(); - self.parse_wrapper_or_bare(TypeKey::String, TypeValue::String) - } - Some(Token::Invalid) => { - self.advance(); - self.parse_wrapper_or_bare(TypeKey::Invalid, TypeValue::Invalid) - } - Some(Token::Unit) => { - self.advance(); - self.parse_wrapper_or_bare(TypeKey::Unit, TypeValue::Unit) - } - Some(Token::UpperIdent(_)) | Some(Token::LAngle) => { - let key = self.parse_type_key()?; - self.parse_wrapper(key) - } - _ => Err(ParseError { - message: "expected type value".to_string(), - span, - }), - } - } - - fn parse_wrapper_or_bare( - &mut self, - key: TypeKey<'src>, - bare: TypeValue<'src>, - ) -> Result, ParseError> { - match self.peek() { - Some(Token::Question) => { - self.advance(); - Ok(TypeValue::Optional(key)) - } - Some(Token::Star) => { - self.advance(); - Ok(TypeValue::List(key)) - } - Some(Token::Plus) => { - self.advance(); - Ok(TypeValue::NonEmptyList(key)) - } - _ => Ok(bare), - } - } - - fn parse_struct(&mut self) -> Result, ParseError> { - self.expect(Token::LBrace)?; - let mut fields = IndexMap::new(); - - loop { - if matches!(self.peek(), Some(Token::RBrace)) { - self.advance(); - break; - } - - let type_key = self.parse_type_key()?; - self.expect(Token::At)?; - - let span = self.current_span(); - let field_name = match self.advance() { - Some(Token::LowerIdent(name)) => *name, - _ => { - return Err(ParseError { - message: "expected field name (lowercase)".to_string(), - span, - }); - } - }; - - fields.insert(field_name, type_key); - } - - Ok(TypeValue::Struct(fields)) - } - - fn parse_tagged_union(&mut self) -> Result, ParseError> { - self.expect(Token::LBracket)?; - let mut variants = IndexMap::new(); - - loop { - if matches!(self.peek(), Some(Token::RBracket)) { - self.advance(); - break; - } - - let span = self.current_span(); - let tag = match self.advance() { - Some(Token::UpperIdent(name)) => *name, - _ => { - return Err(ParseError { - message: "expected variant tag (uppercase)".to_string(), - span, - }); - } - }; - - self.expect(Token::Colon)?; - let type_key = self.parse_type_key()?; - variants.insert(tag, type_key); - } - - Ok(TypeValue::TaggedUnion(variants)) - } - - fn parse_wrapper(&mut self, inner: TypeKey<'src>) -> Result, ParseError> { - match self.peek() { - Some(Token::Question) => { - self.advance(); - Ok(TypeValue::Optional(inner)) - } - Some(Token::Star) => { - self.advance(); - Ok(TypeValue::List(inner)) - } - Some(Token::Plus) => { - self.advance(); - Ok(TypeValue::NonEmptyList(inner)) - } - _ => Err(ParseError { - message: "expected quantifier (?, *, +) after type key".to_string(), - span: self.current_span(), - }), - } - } - - fn parse_definition(&mut self) -> Result<(TypeKey<'src>, TypeValue<'src>), ParseError> { - let span = self.current_span(); - let key = match self.peek() { - Some(Token::UpperIdent(name)) => { - let name = *name; - self.advance(); - TypeKey::Named(name) - } - Some(Token::DefaultQuery) => { - self.advance(); - TypeKey::DefaultQuery - } - Some(Token::LAngle) => self.parse_synthetic_key()?, - _ => { - return Err(ParseError { - message: "expected type name (uppercase) or synthetic key".to_string(), - span, - }); - } - }; - - self.expect(Token::Eq)?; - let value = self.parse_type_value()?; - - Ok((key, value)) - } - - fn parse_all(&mut self) -> Result, ParseError> { - let mut table = TypeTable::new(); - - while self.peek().is_some() { - let (key, value) = self.parse_definition()?; - table.insert(key, value); - } - - Ok(table) - } -} - -/// Parse tyton notation into a TypeTable. -pub fn parse(input: &str) -> Result, ParseError> { - let mut parser = Parser::new(input)?; - parser.parse_all() -} - -/// Emit TypeTable as tyton notation. -pub fn emit(table: &TypeTable<'_>) -> String { - let mut out = String::new(); - - for (key, value) in table.iter() { - if is_builtin(key) { - continue; - } - if !out.is_empty() { - out.push('\n'); - } - emit_key(&mut out, key); - out.push_str(" = "); - emit_value(&mut out, value); - } - - out -} - -fn is_builtin(key: &TypeKey<'_>) -> bool { - matches!( - key, - TypeKey::Node | TypeKey::String | TypeKey::Unit | TypeKey::Invalid - ) -} - -fn emit_key(out: &mut String, key: &TypeKey<'_>) { - match key { - TypeKey::Node => out.push_str("#Node"), - TypeKey::String => out.push_str("#string"), - TypeKey::Invalid => out.push_str("#Invalid"), - TypeKey::Unit => out.push_str("()"), - TypeKey::DefaultQuery => out.push_str("#DefaultQuery"), - TypeKey::Named(name) => out.push_str(name), - TypeKey::Synthetic(segments) => { - out.push('<'); - for (i, seg) in segments.iter().enumerate() { - if i > 0 { - out.push(' '); - } - out.push_str(seg); - } - out.push('>'); - } - } -} - -fn emit_value(out: &mut String, value: &TypeValue<'_>) { - match value { - TypeValue::Node => out.push_str("#Node"), - TypeValue::String => out.push_str("#string"), - TypeValue::Invalid => out.push_str("#Invalid"), - TypeValue::Unit => out.push_str("()"), - TypeValue::Struct(fields) => { - out.push_str("{ "); - for (i, (field, key)) in fields.iter().enumerate() { - if i > 0 { - out.push(' '); - } - emit_key(out, key); - write!(out, " @{}", field).unwrap(); - } - out.push_str(" }"); - } - TypeValue::TaggedUnion(variants) => { - out.push_str("[ "); - for (i, (tag, key)) in variants.iter().enumerate() { - if i > 0 { - out.push(' '); - } - write!(out, "{}: ", tag).unwrap(); - emit_key(out, key); - } - out.push_str(" ]"); - } - TypeValue::Optional(key) => { - emit_key(out, key); - out.push('?'); - } - TypeValue::List(key) => { - emit_key(out, key); - out.push('*'); - } - TypeValue::NonEmptyList(key) => { - emit_key(out, key); - out.push('+'); - } - } -} diff --git a/crates/plotnik-lib/src/infer/tyton_tests.rs b/crates/plotnik-lib/src/infer/tyton_tests.rs deleted file mode 100644 index c948f295..00000000 --- a/crates/plotnik-lib/src/infer/tyton_tests.rs +++ /dev/null @@ -1,599 +0,0 @@ -use super::tyton::{emit, parse}; -use indoc::indoc; - -fn dump_table(input: &str) -> String { - match parse(input) { - Ok(table) => { - let mut out = String::new(); - for (key, value) in table.iter() { - out.push_str(&format!("{:?} = {:?}\n", key, value)); - } - out - } - Err(e) => format!("ERROR: {}", e), - } -} - -#[test] -fn parse_empty() { - insta::assert_snapshot!(dump_table(""), @r" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - "); -} - -#[test] -fn parse_struct_simple() { - let input = "Foo = { #Node @name }"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Foo") = Struct({"name": Node}) - "#); -} - -#[test] -fn parse_struct_multiple_fields() { - let input = "Func = { #string @name #Node @body #Node @params }"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Func") = Struct({"name": String, "body": Node, "params": Node}) - "#); -} - -#[test] -fn parse_struct_empty() { - let input = "Empty = {}"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Empty") = Struct({}) - "#); -} - -#[test] -fn parse_struct_with_unit() { - let input = "Wrapper = { () @unit }"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Wrapper") = Struct({"unit": Unit}) - "#); -} - -#[test] -fn parse_tagged_union() { - let input = "Stmt = [ Assign: AssignStmt Call: CallStmt ]"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Stmt") = TaggedUnion({"Assign": Named("AssignStmt"), "Call": Named("CallStmt")}) - "#); -} - -#[test] -fn parse_tagged_union_single() { - let input = "Single = [ Only: OnlyVariant ]"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Single") = TaggedUnion({"Only": Named("OnlyVariant")}) - "#); -} - -#[test] -fn parse_tagged_union_with_builtins() { - let input = "Mixed = [ Text: #string Code: #Node Empty: () ]"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Mixed") = TaggedUnion({"Text": String, "Code": Node, "Empty": Unit}) - "#); -} - -#[test] -fn parse_optional() { - let input = "MaybeNode = #Node?"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("MaybeNode") = Optional(Node) - "#); -} - -#[test] -fn parse_list() { - let input = "Nodes = #Node*"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Nodes") = List(Node) - "#); -} - -#[test] -fn parse_non_empty_list() { - let input = "Nodes = #Node+"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Nodes") = NonEmptyList(Node) - "#); -} - -#[test] -fn parse_optional_named() { - let input = "MaybeStmt = Stmt?"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("MaybeStmt") = Optional(Named("Stmt")) - "#); -} - -#[test] -fn parse_list_named() { - let input = "Stmts = Stmt*"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Stmts") = List(Named("Stmt")) - "#); -} - -#[test] -fn parse_synthetic_key_simple() { - let input = "Wrapper = ?"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Wrapper") = Optional(Synthetic(["Foo", "bar"])) - "#); -} - -#[test] -fn parse_synthetic_key_multiple_segments() { - let input = "Wrapper = *"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Wrapper") = List(Synthetic(["Foo", "bar", "baz"])) - "#); -} - -#[test] -fn parse_struct_with_synthetic() { - let input = "Container = { @inner }"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Container") = Struct({"inner": Synthetic(["Inner", "field"])}) - "#); -} - -#[test] -fn parse_union_with_synthetic() { - let input = "Choice = [ First: Second: ]"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Choice") = TaggedUnion({"First": Synthetic(["Choice", "first"]), "Second": Synthetic(["Choice", "second"])}) - "#); -} - -#[test] -fn parse_multiple_definitions() { - let input = indoc! {r#" - AssignStmt = { #Node @target #Node @value } - CallStmt = { #Node @func #Node @args } - Stmt = [ Assign: AssignStmt Call: CallStmt ] - Stmts = Stmt* - "#}; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("AssignStmt") = Struct({"target": Node, "value": Node}) - Named("CallStmt") = Struct({"func": Node, "args": Node}) - Named("Stmt") = TaggedUnion({"Assign": Named("AssignStmt"), "Call": Named("CallStmt")}) - Named("Stmts") = List(Named("Stmt")) - "#); -} - -#[test] -fn parse_complex_example() { - let input = indoc! {r#" - FuncInfo = { #string @name #Node @body } - Param = { #string @name #string @type_annotation } - Params = Param* - FuncDecl = { FuncInfo @info Params @params } - Stmt = [ Func: FuncDecl Expr: #Node ] - MaybeStmt = Stmt? - Program = { Stmt @statements } - "#}; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("FuncInfo") = Struct({"name": String, "body": Node}) - Named("Param") = Struct({"name": String, "type_annotation": String}) - Named("Params") = List(Named("Param")) - Named("FuncDecl") = Struct({"info": Named("FuncInfo"), "params": Named("Params")}) - Named("Stmt") = TaggedUnion({"Func": Named("FuncDecl"), "Expr": Node}) - Named("MaybeStmt") = Optional(Named("Stmt")) - Named("Program") = Struct({"statements": Named("Stmt")}) - "#); -} - -#[test] -fn parse_all_builtins() { - let input = indoc! {r#" - AllBuiltins = { #Node @node #string @str () @unit } - OptNode = #Node? - ListStr = #string* - NonEmptyUnit = ()+ - "#}; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("AllBuiltins") = Struct({"node": Node, "str": String, "unit": Unit}) - Named("OptNode") = Optional(Node) - Named("ListStr") = List(String) - Named("NonEmptyUnit") = NonEmptyList(Unit) - "#); -} - -#[test] -fn parse_invalid_builtin() { - let input = "HasInvalid = { #Invalid @bad }"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("HasInvalid") = Struct({"bad": Invalid}) - "#); -} - -#[test] -fn parse_invalid_wrapper() { - let input = "MaybeInvalid = #Invalid?"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("MaybeInvalid") = Optional(Invalid) - "#); -} - -#[test] -fn error_missing_eq() { - let input = "Foo { #Node @x }"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected Eq, got LBrace at 4..5"); -} - -#[test] -fn error_missing_at() { - let input = "Foo = { #Node name }"; - insta::assert_snapshot!(dump_table(input), @r#"ERROR: expected At, got LowerIdent("name") at 14..18"#); -} - -#[test] -fn error_missing_colon_in_union() { - let input = "Foo = [ A B ]"; - insta::assert_snapshot!(dump_table(input), @r#"ERROR: expected Colon, got UpperIdent("B") at 10..11"#); -} - -#[test] -fn error_empty_synthetic() { - let input = "Foo = <>?"; - insta::assert_snapshot!(dump_table(input), @"ERROR: synthetic key cannot be empty at 8..9"); -} - -#[test] -fn error_unclosed_brace() { - let input = "Foo = { #Node @x"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected type key at 16..16"); -} - -#[test] -fn error_unclosed_bracket() { - let input = "Foo = [ A: B"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected variant tag (uppercase) at 12..12"); -} - -#[test] -fn error_lowercase_type_name() { - let input = "foo = { #Node @x }"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected type name (uppercase) or synthetic key at 0..3"); -} - -#[test] -fn error_uppercase_field_name() { - let input = "Foo = { #Node @Name }"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected field name (lowercase) at 15..19"); -} - -#[test] -fn parse_bare_builtin_alias_node() { - let input = "AliasNode = #Node"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("AliasNode") = Node - "#); -} - -#[test] -fn parse_bare_builtin_alias_string() { - let input = "AliasString = #string"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("AliasString") = String - "#); -} - -#[test] -fn parse_bare_builtin_alias_unit() { - let input = "AliasUnit = ()"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("AliasUnit") = Unit - "#); -} - -#[test] -fn parse_bare_builtin_alias_invalid() { - let input = "AliasInvalid = #Invalid"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("AliasInvalid") = Invalid - "#); -} - -#[test] -fn parse_synthetic_definition_struct() { - let input = " = { #Node @value }"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Synthetic(["Foo", "bar"]) = Struct({"value": Node}) - "#); -} - -#[test] -fn parse_synthetic_definition_union() { - let input = " = [ A: #Node B: #string ]"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Synthetic(["Choice", "first"]) = TaggedUnion({"A": Node, "B": String}) - "#); -} - -#[test] -fn parse_synthetic_definition_wrapper() { - let input = " = #Node?"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Synthetic(["Inner", "nested"]) = Optional(Node) - "#); -} - -#[test] -fn error_invalid_char() { - let input = "Foo = { #Node @x $ }"; - insta::assert_snapshot!(dump_table(input), @r#"ERROR: unexpected character: "$" at 17..18"#); -} - -#[test] -fn error_eof_in_struct() { - let input = "Foo = { #Node @x"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected type key at 16..16"); -} - -#[test] -fn error_eof_expecting_colon() { - let input = "Foo = [ A"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected Colon, got EOF at 9..9"); -} - -#[test] -fn error_invalid_token_in_synthetic() { - let input = "Foo = ?"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected identifier or '>' at 9..10"); -} - -#[test] -fn error_invalid_type_value() { - let input = "Foo = @bar"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected type value at 6..7"); -} - -#[test] -fn error_unprefixed_node() { - let input = "Foo = { Node @x }"; - insta::assert_snapshot!(dump_table(input), @r#" - Node = Node - String = String - Unit = Unit - Invalid = Invalid - Named("Foo") = Struct({"x": Named("Node")}) - "#); -} - -#[test] -fn error_unprefixed_string() { - let input = "Foo = string"; - insta::assert_snapshot!(dump_table(input), @"ERROR: expected type value at 6..12"); -} - -// === emit tests === - -#[test] -fn emit_empty() { - let table = parse("").unwrap(); - insta::assert_snapshot!(emit(&table), @""); -} - -#[test] -fn emit_struct_simple() { - let table = parse("Foo = { #Node @name }").unwrap(); - insta::assert_snapshot!(emit(&table), @"Foo = { #Node @name }"); -} - -#[test] -fn emit_struct_multiple_fields() { - let table = parse("Func = { #string @name #Node @body #Node @params }").unwrap(); - insta::assert_snapshot!(emit(&table), @"Func = { #string @name #Node @body #Node @params }"); -} - -#[test] -fn emit_struct_empty() { - let table = parse("Empty = {}").unwrap(); - insta::assert_snapshot!(emit(&table), @"Empty = { }"); -} - -#[test] -fn emit_tagged_union() { - let table = parse("Stmt = [ Assign: AssignStmt Call: CallStmt ]").unwrap(); - insta::assert_snapshot!(emit(&table), @"Stmt = [ Assign: AssignStmt Call: CallStmt ]"); -} - -#[test] -fn emit_optional() { - let table = parse("MaybeNode = #Node?").unwrap(); - insta::assert_snapshot!(emit(&table), @"MaybeNode = #Node?"); -} - -#[test] -fn emit_list() { - let table = parse("Nodes = #Node*").unwrap(); - insta::assert_snapshot!(emit(&table), @"Nodes = #Node*"); -} - -#[test] -fn emit_non_empty_list() { - let table = parse("Nodes = #Node+").unwrap(); - insta::assert_snapshot!(emit(&table), @"Nodes = #Node+"); -} - -#[test] -fn emit_synthetic_key() { - let table = parse(" = { #Node @value }").unwrap(); - insta::assert_snapshot!(emit(&table), @" = { #Node @value }"); -} - -#[test] -fn emit_synthetic_in_wrapper() { - let table = parse("Wrapper = ?").unwrap(); - insta::assert_snapshot!(emit(&table), @"Wrapper = ?"); -} - -#[test] -fn emit_bare_builtins() { - let input = indoc! {r#" - AliasNode = #Node - AliasString = #string - AliasUnit = () - "#}; - let table = parse(input).unwrap(); - insta::assert_snapshot!(emit(&table), @r" - AliasNode = #Node - AliasString = #string - AliasUnit = () - "); -} - -#[test] -fn emit_multiple_definitions() { - let input = indoc! {r#" - AssignStmt = { #Node @target #Node @value } - CallStmt = { #Node @func #Node @args } - Stmt = [ Assign: AssignStmt Call: CallStmt ] - Stmts = Stmt* - "#}; - let table = parse(input).unwrap(); - insta::assert_snapshot!(emit(&table), @r" - AssignStmt = { #Node @target #Node @value } - CallStmt = { #Node @func #Node @args } - Stmt = [ Assign: AssignStmt Call: CallStmt ] - Stmts = Stmt* - "); -} - -#[test] -fn emit_roundtrip() { - let input = indoc! {r#" - FuncInfo = { #string @name #Node @body } - Param = { #string @name #string @type_annotation } - Params = Param* - FuncDecl = { FuncInfo @info Params @params } - Stmt = [ Func: FuncDecl Expr: #Node ] - MaybeStmt = Stmt? - "#}; - - let table1 = parse(input).unwrap(); - let emitted = emit(&table1); - let table2 = parse(&emitted).unwrap(); - - assert_eq!(table1.types, table2.types); -} diff --git a/crates/plotnik-lib/src/ir/compiled.rs b/crates/plotnik-lib/src/ir/compiled.rs new file mode 100644 index 00000000..4463898b --- /dev/null +++ b/crates/plotnik-lib/src/ir/compiled.rs @@ -0,0 +1,757 @@ +//! Compiled query container and buffer. +//! +//! The compiled query lives in a single contiguous allocation—cache-friendly, +//! zero fragmentation, portable to WASM. See ADR-0004 for format details. + +use std::alloc::{Layout, alloc, dealloc}; +use std::fmt::Write; +use std::ptr; + +use super::{ + EffectOp, Entrypoint, NodeFieldId, NodeTypeId, Slice, StringId, StringRef, Transition, + TransitionId, TypeDef, TypeMember, +}; + +/// Buffer alignment for cache-line efficiency. +pub const BUFFER_ALIGN: usize = 64; + +/// Magic bytes identifying a compiled query file. +pub const MAGIC: [u8; 4] = *b"PLNK"; + +/// Current format version. +pub const FORMAT_VERSION: u32 = 1; + +/// Aligned buffer for compiled query data. +/// +/// Allocated via `Layout::from_size_align(len, BUFFER_ALIGN)`. Standard `Box<[u8]>` +/// won't work—it assumes 1-byte alignment and corrupts `dealloc`. +pub struct CompiledQueryBuffer { + ptr: *mut u8, + len: usize, + /// `true` if allocated, `false` if mmap'd or external. + owned: bool, +} + +impl CompiledQueryBuffer { + /// Allocate a new buffer with 64-byte alignment. + pub fn allocate(len: usize) -> Self { + if len == 0 { + return Self { + ptr: ptr::null_mut(), + len: 0, + owned: true, + }; + } + + let layout = Layout::from_size_align(len, BUFFER_ALIGN).expect("invalid layout"); + + // SAFETY: layout is non-zero size, properly aligned + let ptr = unsafe { alloc(layout) }; + if ptr.is_null() { + std::alloc::handle_alloc_error(layout); + } + + Self { + ptr, + len, + owned: true, + } + } + + /// Create a view into external memory (mmap'd or borrowed). + /// + /// # Safety + /// - `ptr` must be valid for reads of `len` bytes + /// - `ptr` must be aligned to `BUFFER_ALIGN` + /// - The backing memory must outlive the returned buffer + pub unsafe fn from_external(ptr: *mut u8, len: usize) -> Self { + debug_assert!( + (ptr as usize).is_multiple_of(BUFFER_ALIGN), + "buffer must be 64-byte aligned" + ); + Self { + ptr, + len, + owned: false, + } + } + + /// Returns a pointer to the buffer start. + #[inline] + pub fn as_ptr(&self) -> *const u8 { + self.ptr + } + + /// Returns a mutable pointer to the buffer start. + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut u8 { + self.ptr + } + + /// Returns the buffer length in bytes. + #[inline] + pub fn len(&self) -> usize { + self.len + } + + /// Returns true if the buffer is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Returns the buffer as a byte slice. + #[inline] + pub fn as_slice(&self) -> &[u8] { + if self.ptr.is_null() { + &[] + } else { + // SAFETY: ptr is valid for len bytes if non-null + unsafe { std::slice::from_raw_parts(self.ptr, self.len) } + } + } + + /// Returns the buffer as a mutable byte slice. + #[inline] + pub fn as_mut_slice(&mut self) -> &mut [u8] { + if self.ptr.is_null() { + &mut [] + } else { + // SAFETY: ptr is valid for len bytes if non-null, and we have &mut self + unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) } + } + } +} + +impl Drop for CompiledQueryBuffer { + fn drop(&mut self) { + if self.owned && !self.ptr.is_null() { + let layout = Layout::from_size_align(self.len, BUFFER_ALIGN) + .expect("layout was valid at allocation"); + // SAFETY: ptr was allocated with this exact layout + unsafe { dealloc(self.ptr, layout) }; + } + } +} + +// SAFETY: The buffer is just raw bytes, safe to send across threads +unsafe impl Send for CompiledQueryBuffer {} +unsafe impl Sync for CompiledQueryBuffer {} + +/// A compiled query ready for execution. +/// +/// Contains a single contiguous buffer with all segments, plus offset indices +/// for O(1) access to each segment. +pub struct CompiledQuery { + buffer: CompiledQueryBuffer, + // Segment offsets (byte offsets into buffer) + successors_offset: u32, + effects_offset: u32, + negated_fields_offset: u32, + string_refs_offset: u32, + string_bytes_offset: u32, + type_defs_offset: u32, + type_members_offset: u32, + entrypoints_offset: u32, + trivia_kinds_offset: u32, // 0 = no trivia kinds + // Segment counts (number of elements) + transition_count: u32, + successor_count: u32, + effect_count: u32, + negated_field_count: u16, + string_ref_count: u16, + type_def_count: u16, + type_member_count: u16, + entrypoint_count: u16, + trivia_kind_count: u16, +} + +impl CompiledQuery { + /// Creates a new compiled query from pre-built components. + /// + /// This is typically called by the emitter after layout computation. + #[allow(clippy::too_many_arguments)] + pub fn new( + buffer: CompiledQueryBuffer, + successors_offset: u32, + effects_offset: u32, + negated_fields_offset: u32, + string_refs_offset: u32, + string_bytes_offset: u32, + type_defs_offset: u32, + type_members_offset: u32, + entrypoints_offset: u32, + trivia_kinds_offset: u32, + transition_count: u32, + successor_count: u32, + effect_count: u32, + negated_field_count: u16, + string_ref_count: u16, + type_def_count: u16, + type_member_count: u16, + entrypoint_count: u16, + trivia_kind_count: u16, + ) -> Self { + Self { + buffer, + successors_offset, + effects_offset, + negated_fields_offset, + string_refs_offset, + string_bytes_offset, + type_defs_offset, + type_members_offset, + entrypoints_offset, + trivia_kinds_offset, + transition_count, + successor_count, + effect_count, + negated_field_count, + string_ref_count, + type_def_count, + type_member_count, + entrypoint_count, + trivia_kind_count, + } + } + + // ───────────────────────────────────────────────────────────────────── + // Segment accessors + // ───────────────────────────────────────────────────────────────────── + + /// Returns the transitions segment. + #[inline] + pub fn transitions(&self) -> &[Transition] { + // Transitions start at offset 0 + // SAFETY: buffer is properly aligned, transitions are at offset 0 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr() as *const Transition, + self.transition_count as usize, + ) + } + } + + /// Returns the successors segment. + #[inline] + pub fn successors(&self) -> &[TransitionId] { + // SAFETY: offset is aligned to 4 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr().add(self.successors_offset as usize) as *const TransitionId, + self.successor_count as usize, + ) + } + } + + /// Returns the effects segment. + #[inline] + pub fn effects(&self) -> &[EffectOp] { + // SAFETY: offset is aligned to 2 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr().add(self.effects_offset as usize) as *const EffectOp, + self.effect_count as usize, + ) + } + } + + /// Returns the negated fields segment. + #[inline] + pub fn negated_fields(&self) -> &[NodeFieldId] { + // SAFETY: offset is aligned to 2 + unsafe { + std::slice::from_raw_parts( + self.buffer + .as_ptr() + .add(self.negated_fields_offset as usize) as *const NodeFieldId, + self.negated_field_count as usize, + ) + } + } + + /// Returns the string refs segment. + #[inline] + pub fn string_refs(&self) -> &[StringRef] { + // SAFETY: offset is aligned to 4 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr().add(self.string_refs_offset as usize) as *const StringRef, + self.string_ref_count as usize, + ) + } + } + + /// Returns the raw string bytes. + #[inline] + pub fn string_bytes(&self) -> &[u8] { + let end = if self.type_defs_offset > 0 { + self.type_defs_offset as usize + } else { + self.buffer.len() + }; + let start = self.string_bytes_offset as usize; + &self.buffer.as_slice()[start..end] + } + + /// Returns the type definitions segment. + #[inline] + pub fn type_defs(&self) -> &[TypeDef] { + // SAFETY: offset is aligned to 4 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr().add(self.type_defs_offset as usize) as *const TypeDef, + self.type_def_count as usize, + ) + } + } + + /// Returns the type members segment. + #[inline] + pub fn type_members(&self) -> &[TypeMember] { + // SAFETY: offset is aligned to 2 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr().add(self.type_members_offset as usize) as *const TypeMember, + self.type_member_count as usize, + ) + } + } + + /// Returns the entrypoints segment. + #[inline] + pub fn entrypoints(&self) -> &[Entrypoint] { + // SAFETY: offset is aligned to 4 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr().add(self.entrypoints_offset as usize) as *const Entrypoint, + self.entrypoint_count as usize, + ) + } + } + + /// Returns the trivia kinds segment (node types to skip). + #[inline] + pub fn trivia_kinds(&self) -> &[NodeTypeId] { + if self.trivia_kinds_offset == 0 { + return &[]; + } + // SAFETY: offset is aligned to 2 + unsafe { + std::slice::from_raw_parts( + self.buffer.as_ptr().add(self.trivia_kinds_offset as usize) as *const NodeTypeId, + self.trivia_kind_count as usize, + ) + } + } + + // ───────────────────────────────────────────────────────────────────── + // High-level accessors + // ───────────────────────────────────────────────────────────────────── + + /// Returns a transition by ID. + #[inline] + pub fn transition(&self, id: TransitionId) -> &Transition { + &self.transitions()[id as usize] + } + + /// Returns a view of a transition with resolved slices. + #[inline] + pub fn transition_view(&self, id: TransitionId) -> TransitionView<'_> { + TransitionView { + query: self, + raw: self.transition(id), + } + } + + /// Resolves a string ID to its UTF-8 content. + #[inline] + pub fn string(&self, id: StringId) -> &str { + let refs = self.string_refs(); + let string_ref = &refs[id as usize]; + let bytes = self.string_bytes(); + let start = string_ref.offset as usize; + let end = start + string_ref.len as usize; + // SAFETY: emitter ensures valid UTF-8 + unsafe { std::str::from_utf8_unchecked(&bytes[start..end]) } + } + + /// Resolves a slice of effects. + #[inline] + pub fn resolve_effects(&self, slice: Slice) -> &[EffectOp] { + let effects = self.effects(); + let start = slice.start_index() as usize; + let end = start + slice.len() as usize; + &effects[start..end] + } + + /// Resolves a slice of negated fields. + #[inline] + pub fn resolve_negated_fields(&self, slice: Slice) -> &[NodeFieldId] { + let fields = self.negated_fields(); + let start = slice.start_index() as usize; + let end = start + slice.len() as usize; + &fields[start..end] + } + + /// Resolves a slice of type members. + #[inline] + pub fn resolve_type_members(&self, slice: Slice) -> &[TypeMember] { + let members = self.type_members(); + let start = slice.start_index() as usize; + let end = start + slice.len() as usize; + &members[start..end] + } + + /// Resolves successors for a transition by ID, handling both inline and spilled cases. + #[inline] + pub fn resolve_successors_by_id(&self, id: TransitionId) -> &[TransitionId] { + let transition = self.transition(id); + if transition.has_inline_successors() { + // Return from transitions segment - inline data is part of the transition + let count = transition.successor_count as usize; + &self.transitions()[id as usize].successor_data[..count] + } else { + let start = transition.spilled_successors_index() as usize; + let count = transition.successor_count as usize; + &self.successors()[start..start + count] + } + } + + /// Returns the number of transitions. + #[inline] + pub fn transition_count(&self) -> u32 { + self.transition_count + } + + /// Returns the number of entrypoints. + #[inline] + pub fn entrypoint_count(&self) -> u16 { + self.entrypoint_count + } + + /// Returns the raw buffer for serialization. + #[inline] + pub fn buffer(&self) -> &CompiledQueryBuffer { + &self.buffer + } + + /// Returns offset metadata for serialization. + pub fn offsets(&self) -> CompiledQueryOffsets { + CompiledQueryOffsets { + successors_offset: self.successors_offset, + effects_offset: self.effects_offset, + negated_fields_offset: self.negated_fields_offset, + string_refs_offset: self.string_refs_offset, + string_bytes_offset: self.string_bytes_offset, + type_defs_offset: self.type_defs_offset, + type_members_offset: self.type_members_offset, + entrypoints_offset: self.entrypoints_offset, + trivia_kinds_offset: self.trivia_kinds_offset, + } + } + + /// Dumps the compiled query in human-readable format for debugging. + pub fn dump(&self) -> String { + let mut out = String::new(); + + // Header + writeln!(out, "CompiledQuery {{").unwrap(); + writeln!(out, " buffer_len: {}", self.buffer.len()).unwrap(); + writeln!(out, " transitions: {}", self.transition_count).unwrap(); + writeln!(out, " successors: {} (spilled)", self.successor_count).unwrap(); + writeln!(out, " effects: {}", self.effect_count).unwrap(); + writeln!(out, " strings: {}", self.string_ref_count).unwrap(); + writeln!(out, " type_defs: {}", self.type_def_count).unwrap(); + writeln!(out, " entrypoints: {}", self.entrypoint_count).unwrap(); + writeln!(out).unwrap(); + + // Entrypoints + writeln!(out, " Entrypoints:").unwrap(); + for ep in self.entrypoints() { + let name = self.string(ep.name_id()); + writeln!( + out, + " {} -> T{} (type {})", + name, + ep.target(), + ep.result_type() + ) + .unwrap(); + } + writeln!(out).unwrap(); + + // Transitions + writeln!(out, " Transitions:").unwrap(); + for i in 0..self.transition_count { + let view = self.transition_view(i); + write!(out, " T{}: ", i).unwrap(); + + // Matcher + match view.matcher() { + super::Matcher::Epsilon => write!(out, "ε").unwrap(), + super::Matcher::Node { kind, field, .. } => { + write!(out, "Node({})", kind).unwrap(); + if let Some(f) = field { + write!(out, " field={}", f).unwrap(); + } + } + super::Matcher::Anonymous { kind, field, .. } => { + write!(out, "Anon({})", kind).unwrap(); + if let Some(f) = field { + write!(out, " field={}", f).unwrap(); + } + } + super::Matcher::Wildcard => write!(out, "_").unwrap(), + } + + // Nav + let nav = view.nav(); + if !nav.is_stay() { + write!(out, " nav={:?}", nav.kind).unwrap(); + if nav.level > 0 { + write!(out, "({})", nav.level).unwrap(); + } + } + + // Ref marker + match view.ref_marker() { + super::RefTransition::None => {} + super::RefTransition::Enter(id) => write!(out, " Enter({})", id).unwrap(), + super::RefTransition::Exit(id) => write!(out, " Exit({})", id).unwrap(), + } + + // Effects + let effects = view.effects(); + if !effects.is_empty() { + write!(out, " [").unwrap(); + for (j, eff) in effects.iter().enumerate() { + if j > 0 { + write!(out, ", ").unwrap(); + } + match eff { + EffectOp::CaptureNode => write!(out, "Capture").unwrap(), + EffectOp::ClearCurrent => write!(out, "Clear").unwrap(), + EffectOp::StartArray => write!(out, "StartArr").unwrap(), + EffectOp::PushElement => write!(out, "Push").unwrap(), + EffectOp::EndArray => write!(out, "EndArr").unwrap(), + EffectOp::StartObject => write!(out, "StartObj").unwrap(), + EffectOp::EndObject => write!(out, "EndObj").unwrap(), + EffectOp::Field(id) => write!(out, "Field({})", self.string(*id)).unwrap(), + EffectOp::StartVariant(id) => { + write!(out, "Var({})", self.string(*id)).unwrap() + } + EffectOp::EndVariant => write!(out, "EndVar").unwrap(), + EffectOp::ToString => write!(out, "ToStr").unwrap(), + } + } + write!(out, "]").unwrap(); + } + + // Successors + let succs = view.successors(); + if !succs.is_empty() { + write!(out, " -> [").unwrap(); + for (j, s) in succs.iter().enumerate() { + if j > 0 { + write!(out, ", ").unwrap(); + } + write!(out, "T{}", s).unwrap(); + } + write!(out, "]").unwrap(); + } + + writeln!(out).unwrap(); + } + + // Strings + if self.string_ref_count > 0 { + writeln!(out).unwrap(); + writeln!(out, " Strings:").unwrap(); + for i in 0..self.string_ref_count { + let s = self.string(i); + writeln!(out, " S{}: {:?}", i, s).unwrap(); + } + } + + // Types + if self.type_def_count > 0 { + writeln!(out).unwrap(); + writeln!(out, " Types:").unwrap(); + for (i, td) in self.type_defs().iter().enumerate() { + let type_id = i as u16 + super::TYPE_COMPOSITE_START; + let name = if td.name != super::STRING_NONE { + self.string(td.name) + } else { + "" + }; + write!(out, " Ty{}: {} {:?}", type_id, name, td.kind).unwrap(); + if td.is_wrapper() { + if let Some(inner) = td.inner_type() { + write!(out, " inner=Ty{}", inner).unwrap(); + } + } else if let Some(members) = td.members_slice() { + let resolved = self.resolve_type_members(members); + write!(out, " {{").unwrap(); + for (j, m) in resolved.iter().enumerate() { + if j > 0 { + write!(out, ", ").unwrap(); + } + write!(out, "{}: Ty{}", self.string(m.name), m.ty).unwrap(); + } + write!(out, "}}").unwrap(); + } + writeln!(out).unwrap(); + } + } + + writeln!(out, "}}").unwrap(); + out + } +} + +/// Offset metadata extracted from CompiledQuery. +#[derive(Debug, Clone, Copy)] +pub struct CompiledQueryOffsets { + pub successors_offset: u32, + pub effects_offset: u32, + pub negated_fields_offset: u32, + pub string_refs_offset: u32, + pub string_bytes_offset: u32, + pub type_defs_offset: u32, + pub type_members_offset: u32, + pub entrypoints_offset: u32, + pub trivia_kinds_offset: u32, +} + +// ───────────────────────────────────────────────────────────────────────────── +// View types +// ───────────────────────────────────────────────────────────────────────────── + +/// A view of a transition with resolved slices. +/// +/// Hides offset arithmetic and inline/spilled distinction from callers. +pub struct TransitionView<'a> { + query: &'a CompiledQuery, + raw: &'a Transition, +} + +impl<'a> TransitionView<'a> { + /// Returns the raw transition. + #[inline] + pub fn raw(&self) -> &'a Transition { + self.raw + } + + /// Returns resolved successor IDs. + #[inline] + pub fn successors(&self) -> &'a [TransitionId] { + if self.raw.has_inline_successors() { + let count = self.raw.successor_count as usize; + &self.raw.successor_data[..count] + } else { + let start = self.raw.spilled_successors_index() as usize; + let count = self.raw.successor_count as usize; + &self.query.successors()[start..start + count] + } + } + + /// Returns resolved effect operations. + #[inline] + pub fn effects(&self) -> &'a [EffectOp] { + self.query.resolve_effects(self.raw.effects()) + } + + /// Returns the matcher. + #[inline] + pub fn matcher(&self) -> &super::Matcher { + &self.raw.matcher + } + + /// Returns a view of the matcher with resolved slices. + #[inline] + pub fn matcher_view(&self) -> MatcherView<'a> { + MatcherView { + query: self.query, + raw: &self.raw.matcher, + } + } + + /// Returns the navigation instruction. + #[inline] + pub fn nav(&self) -> super::Nav { + self.raw.nav + } + + /// Returns the ref transition marker. + #[inline] + pub fn ref_marker(&self) -> super::RefTransition { + self.raw.ref_marker + } +} + +/// A view of a matcher with resolved slices. +pub struct MatcherView<'a> { + query: &'a CompiledQuery, + raw: &'a super::Matcher, +} + +impl<'a> MatcherView<'a> { + /// Returns the raw matcher. + #[inline] + pub fn raw(&self) -> &'a super::Matcher { + self.raw + } + + /// Returns resolved negated fields. + #[inline] + pub fn negated_fields(&self) -> &'a [NodeFieldId] { + self.query.resolve_negated_fields(self.raw.negated_fields()) + } + + /// Returns the matcher kind. + #[inline] + pub fn kind(&self) -> super::MatcherKind { + self.raw.kind() + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Alignment helpers +// ───────────────────────────────────────────────────────────────────────────── + +/// Aligns an offset up to the given alignment. +#[inline] +pub const fn align_up(offset: u32, align: u32) -> u32 { + (offset + align - 1) & !(align - 1) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn buffer_alignment() { + let buf = CompiledQueryBuffer::allocate(128); + assert_eq!(buf.as_ptr() as usize % BUFFER_ALIGN, 0); + assert_eq!(buf.len(), 128); + } + + #[test] + fn buffer_empty() { + let buf = CompiledQueryBuffer::allocate(0); + assert!(buf.is_empty()); + assert_eq!(buf.as_slice(), &[] as &[u8]); + } + + #[test] + fn align_up_values() { + assert_eq!(align_up(0, 4), 0); + assert_eq!(align_up(1, 4), 4); + assert_eq!(align_up(4, 4), 4); + assert_eq!(align_up(5, 4), 8); + assert_eq!(align_up(63, 64), 64); + assert_eq!(align_up(64, 64), 64); + assert_eq!(align_up(65, 64), 128); + } +} diff --git a/crates/plotnik-lib/src/ir/effect.rs b/crates/plotnik-lib/src/ir/effect.rs index dd6b6565..ff6d39bf 100644 --- a/crates/plotnik-lib/src/ir/effect.rs +++ b/crates/plotnik-lib/src/ir/effect.rs @@ -16,6 +16,10 @@ pub enum EffectOp { /// Only valid on transitions with Node/Anonymous/Wildcard matcher. CaptureNode, + /// Clear current value (set to None). + /// Used on skip paths for optional captures. + ClearCurrent, + /// Push empty array onto stack. StartArray, diff --git a/crates/plotnik-lib/src/ir/emit.rs b/crates/plotnik-lib/src/ir/emit.rs new file mode 100644 index 00000000..c0e72c64 --- /dev/null +++ b/crates/plotnik-lib/src/ir/emit.rs @@ -0,0 +1,979 @@ +//! Query emitter: transforms BuildGraph + TypeInferenceResult into CompiledQuery. +//! +//! Three-pass construction: +//! 1. Analysis: count elements, intern strings, collect data +//! 2. Layout: compute aligned offsets, allocate once +//! 3. Emission: write via ptr::write + +use std::collections::HashMap; +use std::ptr; + +use super::compiled::{CompiledQuery, CompiledQueryBuffer, align_up}; +use super::ids::{NodeFieldId, NodeTypeId, RefId, StringId, TYPE_NODE, TransitionId}; +use super::strings::StringInterner; +use super::{ + EffectOp, Entrypoint, MAX_INLINE_SUCCESSORS, Matcher, RefTransition, Slice, StringRef, + Transition, TypeDef, TypeMember, +}; + +use crate::query::graph::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, RefMarker}; +use crate::query::typing::TypeInferenceResult; + +/// Callback for resolving node kind names to IDs. +pub trait NodeKindResolver { + /// Resolves a named node kind to its ID. Returns `None` if unknown. + fn resolve_kind(&self, name: &str) -> Option; + + /// Resolves a field name to its ID. Returns `None` if unknown. + fn resolve_field(&self, name: &str) -> Option; +} + +/// A resolver that always fails (for testing without tree-sitter). +pub struct NullResolver; + +impl NodeKindResolver for NullResolver { + fn resolve_kind(&self, _name: &str) -> Option { + None + } + fn resolve_field(&self, _name: &str) -> Option { + None + } +} + +/// Map-based resolver for testing. +pub struct MapResolver { + kinds: HashMap, + fields: HashMap, +} + +impl MapResolver { + pub fn new() -> Self { + Self { + kinds: HashMap::new(), + fields: HashMap::new(), + } + } + + pub fn add_kind(&mut self, name: impl Into, id: NodeTypeId) { + self.kinds.insert(name.into(), id); + } + + pub fn add_field(&mut self, name: impl Into, id: NodeFieldId) { + self.fields.insert(name.into(), id); + } +} + +impl Default for MapResolver { + fn default() -> Self { + Self::new() + } +} + +impl NodeKindResolver for MapResolver { + fn resolve_kind(&self, name: &str) -> Option { + self.kinds.get(name).copied() + } + + fn resolve_field(&self, name: &str) -> Option { + self.fields.get(name).copied() + } +} + +/// Query emitter error. +#[derive(Debug, Clone)] +pub enum EmitError { + /// Unknown node kind encountered. + UnknownNodeKind(String), + /// Unknown field name encountered. + UnknownField(String), + /// Too many transitions (exceeds u32::MAX). + TooManyTransitions, + /// Too many successors (exceeds u32::MAX). + TooManySuccessors, + /// Too many effects (exceeds u32::MAX). + TooManyEffects, + /// Internal consistency error. + InternalError(String), +} + +impl std::fmt::Display for EmitError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + EmitError::UnknownNodeKind(s) => write!(f, "unknown node kind: {}", s), + EmitError::UnknownField(s) => write!(f, "unknown field: {}", s), + EmitError::TooManyTransitions => write!(f, "too many transitions"), + EmitError::TooManySuccessors => write!(f, "too many successors"), + EmitError::TooManyEffects => write!(f, "too many effects"), + EmitError::InternalError(s) => write!(f, "internal error: {}", s), + } + } +} + +impl std::error::Error for EmitError {} + +/// Result type for emit operations. +pub type EmitResult = Result; + +/// Emitter state during analysis phase. +struct EmitContext<'src, 'g> { + graph: &'g BuildGraph<'src>, + type_info: &'g TypeInferenceResult<'src>, + strings: StringInterner<'src>, + + // Collected data + effects: Vec, + negated_fields: Vec, + /// Spilled successors (for transitions with >8 successors) + spilled_successors: Vec, + + // Maps from BuildGraph to IR + /// For each transition, its effects slice + transition_effects: Vec>, + /// For each transition, its negated fields slice + transition_negated_fields: Vec>, + /// For each transition, if successors spill: (start_index in spilled_successors, count) + transition_spilled: Vec>, +} + +impl<'src, 'g> EmitContext<'src, 'g> { + fn new(graph: &'g BuildGraph<'src>, type_info: &'g TypeInferenceResult<'src>) -> Self { + let node_count = graph.len(); + Self { + graph, + type_info, + strings: StringInterner::new(), + effects: Vec::new(), + negated_fields: Vec::new(), + spilled_successors: Vec::new(), + transition_effects: Vec::with_capacity(node_count), + transition_negated_fields: Vec::with_capacity(node_count), + transition_spilled: Vec::with_capacity(node_count), + } + } + + fn intern(&mut self, s: &'src str) -> StringId { + self.strings.intern(s) + } +} + +/// Layout information computed in pass 2. +struct LayoutInfo { + buffer_len: usize, + successors_offset: u32, + effects_offset: u32, + negated_fields_offset: u32, + string_refs_offset: u32, + string_bytes_offset: u32, + type_defs_offset: u32, + type_members_offset: u32, + entrypoints_offset: u32, + trivia_kinds_offset: u32, + + // Counts + transition_count: u32, + successor_count: u32, + effect_count: u32, + negated_field_count: u16, + string_ref_count: u16, + type_def_count: u16, + type_member_count: u16, + entrypoint_count: u16, + trivia_kind_count: u16, +} + +/// Emits a compiled query from a BuildGraph. +pub struct QueryEmitter<'src, 'g, R> { + ctx: EmitContext<'src, 'g>, + resolver: R, + trivia_kinds: Vec, +} + +impl<'src, 'g, R: NodeKindResolver> QueryEmitter<'src, 'g, R> { + /// Creates a new emitter. + pub fn new( + graph: &'g BuildGraph<'src>, + type_info: &'g TypeInferenceResult<'src>, + resolver: R, + ) -> Self { + Self { + ctx: EmitContext::new(graph, type_info), + resolver, + trivia_kinds: Vec::new(), + } + } + + /// Sets trivia node kinds (e.g., comments) to skip during execution. + pub fn with_trivia_kinds(mut self, kinds: Vec) -> Self { + self.trivia_kinds = kinds; + self + } + + /// Emits the compiled query. + pub fn emit(mut self) -> EmitResult { + // Pass 1: Analysis + self.analyze()?; + + // Pass 2: Layout + let layout = self.compute_layout()?; + + // Pass 3: Emission + self.emit_buffer(layout) + } + + // ───────────────────────────────────────────────────────────────────── + // Pass 1: Analysis + // ───────────────────────────────────────────────────────────────────── + + fn analyze(&mut self) -> EmitResult<()> { + // Pre-intern definition names for entrypoints + for (name, _) in self.ctx.graph.definitions() { + self.ctx.intern(name); + } + + // Pre-intern type names + for type_def in &self.ctx.type_info.type_defs { + if let Some(name) = type_def.name { + self.ctx.intern(name); + } + for member in &type_def.members { + self.ctx.intern(member.name); + } + } + + // Analyze each transition + for (_, node) in self.ctx.graph.iter() { + self.analyze_node(node)?; + } + + Ok(()) + } + + fn analyze_node(&mut self, node: &BuildNode<'src>) -> EmitResult<()> { + // Collect effects + let effects_start = self.ctx.effects.len() as u32; + for effect in &node.effects { + let ir_effect = self.convert_effect(effect)?; + self.ctx.effects.push(ir_effect); + } + let effects_len = (self.ctx.effects.len() as u32 - effects_start) as u16; + self.ctx + .transition_effects + .push(Slice::new(effects_start, effects_len)); + + // Collect negated fields + let negated_start = self.ctx.negated_fields.len() as u32; + if let BuildMatcher::Node { negated_fields, .. } = &node.matcher { + for field_name in negated_fields { + let field_id = self + .resolver + .resolve_field(field_name) + .ok_or_else(|| EmitError::UnknownField((*field_name).to_string()))?; + self.ctx.negated_fields.push(field_id); + } + } + let negated_len = (self.ctx.negated_fields.len() as u32 - negated_start) as u16; + self.ctx + .transition_negated_fields + .push(Slice::new(negated_start, negated_len)); + + // Check if successors need to spill + if node.successors.len() > MAX_INLINE_SUCCESSORS { + let start = self.ctx.spilled_successors.len() as u32; + for &succ in &node.successors { + self.ctx.spilled_successors.push(succ); + } + self.ctx + .transition_spilled + .push(Some((start, node.successors.len() as u32))); + } else { + self.ctx.transition_spilled.push(None); + } + + Ok(()) + } + + fn convert_effect(&mut self, effect: &BuildEffect<'src>) -> EmitResult { + Ok(match effect { + BuildEffect::CaptureNode => EffectOp::CaptureNode, + BuildEffect::ClearCurrent => EffectOp::ClearCurrent, + BuildEffect::StartArray { .. } => EffectOp::StartArray, + BuildEffect::PushElement => EffectOp::PushElement, + BuildEffect::EndArray => EffectOp::EndArray, + BuildEffect::StartObject { .. } => EffectOp::StartObject, + BuildEffect::EndObject => EffectOp::EndObject, + BuildEffect::Field { name, .. } => { + let id = self.ctx.intern(name); + EffectOp::Field(id) + } + BuildEffect::StartVariant(tag) => { + let id = self.ctx.intern(tag); + EffectOp::StartVariant(id) + } + BuildEffect::EndVariant => EffectOp::EndVariant, + BuildEffect::ToString => EffectOp::ToString, + }) + } + + // ───────────────────────────────────────────────────────────────────── + // Pass 2: Layout + // ───────────────────────────────────────────────────────────────────── + + fn compute_layout(&self) -> EmitResult { + let transition_count = self.ctx.graph.len() as u32; + let successor_count = self.ctx.spilled_successors.len() as u32; + let effect_count = self.ctx.effects.len() as u32; + let negated_field_count = self.ctx.negated_fields.len() as u16; + let string_ref_count = self.ctx.strings.len() as u16; + let type_def_count = self.ctx.type_info.type_defs.len() as u16; + let type_member_count: u16 = self + .ctx + .type_info + .type_defs + .iter() + .map(|td| td.members.len() as u16) + .sum(); + let entrypoint_count = self.ctx.graph.definitions().count() as u16; + let trivia_kind_count = self.trivia_kinds.len() as u16; + + // Compute offsets with proper alignment + let mut offset: u32 = 0; + + // Transitions at offset 0, 64-byte aligned + offset += transition_count * 64; + + // Successors: align 4 + let successors_offset = align_up(offset, 4); + offset = successors_offset + successor_count * 4; + + // Effects: align 4 (EffectOp is 4 bytes with repr(C, u16) but discriminant+payload) + let effects_offset = align_up(offset, 4); + offset = effects_offset + effect_count * 4; + + // Negated fields: align 2 + let negated_fields_offset = align_up(offset, 2); + offset = negated_fields_offset + (negated_field_count as u32) * 2; + + // String refs: align 4 + let string_refs_offset = align_up(offset, 4); + offset = string_refs_offset + (string_ref_count as u32) * 8; + + // String bytes: align 1 + let string_bytes_offset = offset; + offset += self.ctx.strings.total_bytes() as u32; + + // Type defs: align 4 + let type_defs_offset = align_up(offset, 4); + offset = type_defs_offset + (type_def_count as u32) * 12; + + // Type members: align 2 + let type_members_offset = align_up(offset, 2); + offset = type_members_offset + (type_member_count as u32) * 4; + + // Entrypoints: align 4 + let entrypoints_offset = align_up(offset, 4); + offset = entrypoints_offset + (entrypoint_count as u32) * 12; + + // Trivia kinds: align 2 + let trivia_kinds_offset = if trivia_kind_count > 0 { + let aligned = align_up(offset, 2); + offset = aligned + (trivia_kind_count as u32) * 2; + aligned + } else { + 0 + }; + + // Final buffer size, aligned to 64 for potential mmap + let buffer_len = align_up(offset, 64) as usize; + + Ok(LayoutInfo { + buffer_len, + successors_offset, + effects_offset, + negated_fields_offset, + string_refs_offset, + string_bytes_offset, + type_defs_offset, + type_members_offset, + entrypoints_offset, + trivia_kinds_offset, + transition_count, + successor_count, + effect_count, + negated_field_count, + string_ref_count, + type_def_count, + type_member_count, + entrypoint_count, + trivia_kind_count, + }) + } + + // ───────────────────────────────────────────────────────────────────── + // Pass 3: Emission + // ───────────────────────────────────────────────────────────────────── + + fn emit_buffer(self, layout: LayoutInfo) -> EmitResult { + let mut buffer = CompiledQueryBuffer::allocate(layout.buffer_len); + let base = buffer.as_mut_ptr(); + + // Emit transitions + self.emit_transitions(base, &layout)?; + + // Emit successors + self.emit_successors(base, &layout); + + // Emit effects + self.emit_effects(base, &layout); + + // Emit negated fields + self.emit_negated_fields(base, &layout); + + // Emit strings + self.emit_strings(base, &layout); + + // Emit type metadata + self.emit_types(base, &layout); + + // Emit entrypoints + self.emit_entrypoints(base, &layout)?; + + // Emit trivia kinds + self.emit_trivia_kinds(base, &layout); + + Ok(CompiledQuery::new( + buffer, + layout.successors_offset, + layout.effects_offset, + layout.negated_fields_offset, + layout.string_refs_offset, + layout.string_bytes_offset, + layout.type_defs_offset, + layout.type_members_offset, + layout.entrypoints_offset, + layout.trivia_kinds_offset, + layout.transition_count, + layout.successor_count, + layout.effect_count, + layout.negated_field_count, + layout.string_ref_count, + layout.type_def_count, + layout.type_member_count, + layout.entrypoint_count, + layout.trivia_kind_count, + )) + } + + fn emit_transitions(&self, base: *mut u8, _layout: &LayoutInfo) -> EmitResult<()> { + let transitions_ptr = base as *mut Transition; + + for (idx, (_, node)) in self.ctx.graph.iter().enumerate() { + let transition = self.build_transition(node, idx)?; + // SAFETY: buffer is properly sized and aligned + unsafe { + ptr::write(transitions_ptr.add(idx), transition); + } + } + + Ok(()) + } + + fn build_transition(&self, node: &BuildNode<'src>, idx: usize) -> EmitResult { + let matcher = self.convert_matcher(&node.matcher)?; + let ref_marker = self.convert_ref_marker(&node.ref_marker); + let effects = self.ctx.transition_effects[idx]; + let negated_fields_slice = self.ctx.transition_negated_fields[idx]; + + // Build successor data + let (successor_count, successor_data) = + if let Some((start, count)) = self.ctx.transition_spilled[idx] { + // Spilled: store index in successor_data[0] + let mut data = [0u32; MAX_INLINE_SUCCESSORS]; + data[0] = start; + (count, data) + } else { + // Inline + let mut data = [0u32; MAX_INLINE_SUCCESSORS]; + for (i, &succ) in node.successors.iter().enumerate() { + data[i] = succ; + } + (node.successors.len() as u32, data) + }; + + // Inject negated_fields into matcher if applicable + let matcher = match matcher { + Matcher::Node { kind, field, .. } => Matcher::Node { + kind, + field, + negated_fields: negated_fields_slice, + }, + Matcher::Anonymous { kind, field, .. } => Matcher::Anonymous { + kind, + field, + negated_fields: Slice::empty(), + }, + other => other, + }; + + let transition = Transition::new( + matcher, + ref_marker, + node.nav, + effects, + successor_count, + successor_data, + ); + + Ok(transition) + } + + fn convert_matcher(&self, matcher: &BuildMatcher<'src>) -> EmitResult { + Ok(match matcher { + BuildMatcher::Epsilon => Matcher::Epsilon, + BuildMatcher::Node { kind, field, .. } => { + let kind_id = self + .resolver + .resolve_kind(kind) + .ok_or_else(|| EmitError::UnknownNodeKind((*kind).to_string()))?; + let field_id = match field { + Some(f) => self.resolver.resolve_field(f), + None => None, + }; + Matcher::Node { + kind: kind_id, + field: field_id, + negated_fields: Slice::empty(), // Will be filled in build_transition + } + } + BuildMatcher::Anonymous { literal, field } => { + // For anonymous nodes, we use the literal as a synthetic kind ID + // In practice, this would be resolved differently + let kind_id = self.resolver.resolve_kind(literal).unwrap_or(0); + let field_id = match field { + Some(f) => self.resolver.resolve_field(f), + None => None, + }; + Matcher::Anonymous { + kind: kind_id, + field: field_id, + negated_fields: Slice::empty(), + } + } + BuildMatcher::Wildcard { field } => { + // Wildcard doesn't use field in IR representation + let _ = field; + Matcher::Wildcard + } + }) + } + + fn convert_ref_marker(&self, marker: &RefMarker) -> RefTransition { + match marker { + RefMarker::None => RefTransition::None, + RefMarker::Enter { ref_id } => RefTransition::Enter(*ref_id as RefId), + RefMarker::Exit { ref_id } => RefTransition::Exit(*ref_id as RefId), + } + } + + fn emit_successors(&self, base: *mut u8, layout: &LayoutInfo) { + if self.ctx.spilled_successors.is_empty() { + return; + } + + let ptr = unsafe { base.add(layout.successors_offset as usize) } as *mut TransitionId; + for (i, &succ) in self.ctx.spilled_successors.iter().enumerate() { + unsafe { + ptr::write(ptr.add(i), succ); + } + } + } + + fn emit_effects(&self, base: *mut u8, layout: &LayoutInfo) { + if self.ctx.effects.is_empty() { + return; + } + + let ptr = unsafe { base.add(layout.effects_offset as usize) } as *mut EffectOp; + for (i, effect) in self.ctx.effects.iter().enumerate() { + unsafe { + ptr::write(ptr.add(i), *effect); + } + } + } + + fn emit_negated_fields(&self, base: *mut u8, layout: &LayoutInfo) { + if self.ctx.negated_fields.is_empty() { + return; + } + + let ptr = unsafe { base.add(layout.negated_fields_offset as usize) } as *mut NodeFieldId; + for (i, &field) in self.ctx.negated_fields.iter().enumerate() { + unsafe { + ptr::write(ptr.add(i), field); + } + } + } + + fn emit_strings(&self, base: *mut u8, layout: &LayoutInfo) { + // Emit string refs + let refs_ptr = unsafe { base.add(layout.string_refs_offset as usize) } as *mut StringRef; + let bytes_ptr = unsafe { base.add(layout.string_bytes_offset as usize) }; + + let mut byte_offset: u32 = 0; + for (i, (_, s)) in self.ctx.strings.iter().enumerate() { + // Write StringRef + let string_ref = StringRef::new(byte_offset, s.len() as u16); + unsafe { + ptr::write(refs_ptr.add(i), string_ref); + } + + // Write string bytes + unsafe { + ptr::copy_nonoverlapping(s.as_ptr(), bytes_ptr.add(byte_offset as usize), s.len()); + } + + byte_offset += s.len() as u32; + } + } + + fn emit_types(&self, base: *mut u8, layout: &LayoutInfo) { + let defs_ptr = unsafe { base.add(layout.type_defs_offset as usize) } as *mut TypeDef; + let members_ptr = + unsafe { base.add(layout.type_members_offset as usize) } as *mut TypeMember; + + let mut member_idx: u32 = 0; + + for (i, type_def) in self.ctx.type_info.type_defs.iter().enumerate() { + let name_id = type_def + .name + .and_then(|n| self.ctx.strings.get(n)) + .unwrap_or(super::ids::STRING_NONE); + + let ir_def = if let Some(inner) = type_def.inner_type { + TypeDef::wrapper(type_def.kind, inner) + } else { + let members_start = member_idx; + let members_len = type_def.members.len() as u16; + + // Emit members + for member in &type_def.members { + let member_name_id = self + .ctx + .strings + .get(member.name) + .expect("member name should be interned"); + let ir_member = TypeMember::new(member_name_id, member.ty); + unsafe { + ptr::write(members_ptr.add(member_idx as usize), ir_member); + } + member_idx += 1; + } + + TypeDef::composite( + type_def.kind, + name_id, + Slice::new(members_start, members_len), + ) + }; + + unsafe { + ptr::write(defs_ptr.add(i), ir_def); + } + } + } + + fn emit_entrypoints(&self, base: *mut u8, layout: &LayoutInfo) -> EmitResult<()> { + let ptr = unsafe { base.add(layout.entrypoints_offset as usize) } as *mut Entrypoint; + + for (i, (name, entry_node)) in self.ctx.graph.definitions().enumerate() { + let name_id = self + .ctx + .strings + .get(name) + .expect("definition name should be interned"); + + // Look up the result type for this definition + let result_type = self + .ctx + .type_info + .entrypoint_types + .get(name) + .copied() + .unwrap_or(TYPE_NODE); + + let entrypoint = Entrypoint::new(name_id, entry_node, result_type); + unsafe { + ptr::write(ptr.add(i), entrypoint); + } + } + + Ok(()) + } + + fn emit_trivia_kinds(&self, base: *mut u8, layout: &LayoutInfo) { + if self.trivia_kinds.is_empty() { + return; + } + + let ptr = unsafe { base.add(layout.trivia_kinds_offset as usize) } as *mut NodeTypeId; + for (i, &kind) in self.trivia_kinds.iter().enumerate() { + unsafe { + ptr::write(ptr.add(i), kind); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::query::graph::{BuildEffect, BuildGraph, BuildMatcher, BuildNode}; + use crate::query::typing::TypeInferenceResult; + use std::num::NonZeroU16; + + fn make_resolver() -> MapResolver { + let mut r = MapResolver::new(); + r.add_kind("identifier", 1); + r.add_kind("function_declaration", 2); + r.add_field("name", NonZeroU16::new(1).unwrap()); + r.add_field("body", NonZeroU16::new(2).unwrap()); + r + } + + #[test] + fn emit_simple_query() { + let mut graph = BuildGraph::new(); + + // Create a simple: (identifier) @id + let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + graph.node_mut(node).add_effect(BuildEffect::CaptureNode); + graph.add_definition("Main", node); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let compiled = emitter.emit().expect("emit should succeed"); + + assert_eq!(compiled.transition_count(), 1); + assert_eq!(compiled.entrypoint_count(), 1); + + let t = compiled.transition(0); + assert!(matches!(t.matcher, Matcher::Node { kind: 1, .. })); + } + + #[test] + fn emit_with_effects() { + let mut graph = BuildGraph::new(); + + let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + graph.node_mut(node).add_effect(BuildEffect::CaptureNode); + graph.node_mut(node).add_effect(BuildEffect::Field { + name: "name", + span: Default::default(), + }); + graph.add_definition("Main", node); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let compiled = emitter.emit().expect("emit should succeed"); + + let view = compiled.transition_view(0); + let effects = view.effects(); + assert_eq!(effects.len(), 2); + assert!(matches!(effects[0], EffectOp::CaptureNode)); + assert!(matches!(effects[1], EffectOp::Field(_))); + + // Verify string was interned + if let EffectOp::Field(id) = effects[1] { + assert_eq!(compiled.string(id), "name"); + } + } + + #[test] + fn emit_with_successors() { + let mut graph = BuildGraph::new(); + + // Create: entry -> branch -> [a, b] + let entry = graph.add_epsilon(); + let a = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + let b = graph.add_node(BuildNode::with_matcher(BuildMatcher::node( + "function_declaration", + ))); + graph.connect(entry, a); + graph.connect(entry, b); + graph.add_definition("Main", entry); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let compiled = emitter.emit().expect("emit should succeed"); + + assert_eq!(compiled.transition_count(), 3); + + let view = compiled.transition_view(0); + let successors = view.successors(); + assert_eq!(successors.len(), 2); + assert_eq!(successors[0], 1); + assert_eq!(successors[1], 2); + } + + #[test] + fn emit_many_successors_spills() { + let mut graph = BuildGraph::new(); + + // Create entry with 10 successors (exceeds MAX_INLINE_SUCCESSORS) + let entry = graph.add_epsilon(); + for _ in 0..10 { + let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + graph.connect(entry, node); + } + graph.add_definition("Main", entry); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let compiled = emitter.emit().expect("emit should succeed"); + + let t = compiled.transition(0); + assert!(!t.has_inline_successors()); + assert_eq!(t.successor_count, 10); + + let view = compiled.transition_view(0); + let successors = view.successors(); + assert_eq!(successors.len(), 10); + } + + #[test] + fn string_interning_deduplicates() { + let mut graph = BuildGraph::new(); + + // Two fields with same name + let n1 = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + graph.node_mut(n1).add_effect(BuildEffect::Field { + name: "value", + span: Default::default(), + }); + + let n2 = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + graph.node_mut(n2).add_effect(BuildEffect::Field { + name: "value", + span: Default::default(), + }); + graph.connect(n1, n2); + + graph.add_definition("Main", n1); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let compiled = emitter.emit().expect("emit should succeed"); + + // Both should reference the same string ID + let e1 = compiled.transition_view(0).effects(); + let e2 = compiled.transition_view(1).effects(); + + let id1 = match e1[0] { + EffectOp::Field(id) => id, + _ => panic!(), + }; + let id2 = match e2[0] { + EffectOp::Field(id) => id, + _ => panic!(), + }; + + assert_eq!(id1, id2); + assert_eq!(compiled.string(id1), "value"); + } + + #[test] + fn unknown_node_kind_errors() { + let mut graph = BuildGraph::new(); + let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("unknown_kind"))); + graph.add_definition("Main", node); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let result = emitter.emit(); + + assert!(matches!(result, Err(EmitError::UnknownNodeKind(_)))); + } + + #[test] + fn serialize_deserialize_roundtrip() { + let mut graph = BuildGraph::new(); + + // Build a small graph with effects + let n1 = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + graph.node_mut(n1).add_effect(BuildEffect::CaptureNode); + graph.node_mut(n1).add_effect(BuildEffect::Field { + name: "id", + span: Default::default(), + }); + + let n2 = graph.add_node(BuildNode::with_matcher(BuildMatcher::node( + "function_declaration", + ))); + graph.node_mut(n2).add_effect(BuildEffect::CaptureNode); + graph.connect(n1, n2); + + graph.add_definition("Main", n1); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + // Emit + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let compiled = emitter.emit().expect("emit should succeed"); + + // Serialize + let bytes = crate::ir::to_bytes(&compiled).expect("serialize should succeed"); + + // Deserialize + let restored = crate::ir::from_bytes(&bytes).expect("deserialize should succeed"); + + // Verify counts + assert_eq!(restored.transition_count(), compiled.transition_count()); + assert_eq!(restored.entrypoint_count(), compiled.entrypoint_count()); + + // Check transitions match + for i in 0..compiled.transition_count() { + let orig = compiled.transition_view(i); + let rest = restored.transition_view(i); + + assert_eq!(orig.successors(), rest.successors()); + assert_eq!(orig.effects().len(), rest.effects().len()); + } + + // Check strings match + let ep = restored.entrypoints()[0]; + assert_eq!(restored.string(ep.name_id()), "Main"); + } + + #[test] + fn dump_produces_output() { + let mut graph = BuildGraph::new(); + let node = graph.add_node(BuildNode::with_matcher(BuildMatcher::node("identifier"))); + graph.node_mut(node).add_effect(BuildEffect::CaptureNode); + graph.add_definition("Test", node); + + let type_info = TypeInferenceResult::default(); + let resolver = make_resolver(); + + let emitter = QueryEmitter::new(&graph, &type_info, resolver); + let compiled = emitter.emit().expect("emit should succeed"); + + let dump = compiled.dump(); + + assert!(dump.contains("CompiledQuery")); + assert!(dump.contains("Test")); + assert!(dump.contains("Capture")); + assert!(dump.contains("Node(1)")); + } +} diff --git a/crates/plotnik-lib/src/ir/ids.rs b/crates/plotnik-lib/src/ir/ids.rs index dcb88265..f97b17ba 100644 --- a/crates/plotnik-lib/src/ir/ids.rs +++ b/crates/plotnik-lib/src/ir/ids.rs @@ -18,6 +18,9 @@ pub type NodeFieldId = NonZeroU16; /// Index into the string_refs segment. pub type StringId = u16; +/// Sentinel value for unnamed types (wrapper types have no explicit name). +pub const STRING_NONE: StringId = 0xFFFF; + /// Field name in effects (alias for type safety). pub type DataFieldId = StringId; diff --git a/crates/plotnik-lib/src/ir/mod.rs b/crates/plotnik-lib/src/ir/mod.rs index df4de243..c50fce7a 100644 --- a/crates/plotnik-lib/src/ir/mod.rs +++ b/crates/plotnik-lib/src/ir/mod.rs @@ -9,14 +9,18 @@ //! Note: This module contains only type definitions. Query execution //! lives elsewhere. +mod compiled; mod effect; +mod emit; mod entrypoint; mod ids; mod matcher; mod nav; mod ref_transition; +mod serialize; mod slice; mod string_ref; +mod strings; mod transition; mod type_metadata; @@ -33,7 +37,8 @@ mod string_ref_tests; // Re-export ID types pub use ids::{ - DataFieldId, NodeFieldId, NodeTypeId, RefId, StringId, TransitionId, TypeId, VariantTagId, + DataFieldId, NodeFieldId, NodeTypeId, RefId, STRING_NONE, StringId, TransitionId, TypeId, + VariantTagId, }; // Re-export TypeId constants @@ -65,3 +70,20 @@ pub use string_ref::StringRef; // Re-export entrypoint pub use entrypoint::Entrypoint; + +// Re-export compiled query types +pub use compiled::{ + BUFFER_ALIGN, CompiledQuery, CompiledQueryBuffer, CompiledQueryOffsets, FORMAT_VERSION, MAGIC, + MatcherView, TransitionView, align_up, +}; + +// Re-export string interner +pub use strings::StringInterner; + +// Re-export emitter +pub use emit::{EmitError, EmitResult, MapResolver, NodeKindResolver, NullResolver, QueryEmitter}; + +// Re-export serialization +pub use serialize::{ + HEADER_SIZE, SerializeError, SerializeResult, deserialize, from_bytes, serialize, to_bytes, +}; diff --git a/crates/plotnik-lib/src/ir/nav.rs b/crates/plotnik-lib/src/ir/nav.rs index 5630a426..76d74ebf 100644 --- a/crates/plotnik-lib/src/ir/nav.rs +++ b/crates/plotnik-lib/src/ir/nav.rs @@ -69,14 +69,26 @@ impl Nav { } } + /// Constrained ascent requires `level == 1`. Multi-level ascent with + /// intermediate constraints must decompose into separate transitions. pub const fn up_skip_trivia(level: u8) -> Self { + assert!( + level == 1, + "UpSkipTrivia requires level == 1; decompose for intermediate constraints" + ); Self { kind: NavKind::UpSkipTrivia, level, } } + /// Constrained ascent requires `level == 1`. Multi-level ascent with + /// intermediate constraints must decompose into separate transitions. pub const fn up_exact(level: u8) -> Self { + assert!( + level == 1, + "UpExact requires level == 1; decompose for intermediate constraints" + ); Self { kind: NavKind::UpExact, level, diff --git a/crates/plotnik-lib/src/ir/serialize.rs b/crates/plotnik-lib/src/ir/serialize.rs new file mode 100644 index 00000000..850b3682 --- /dev/null +++ b/crates/plotnik-lib/src/ir/serialize.rs @@ -0,0 +1,414 @@ +//! Serialization and deserialization for compiled queries. +//! +//! Binary format (see ADR-0004): +//! ```text +//! Header (64 bytes): +//! magic: [u8; 4] b"PLNK" +//! version: u32 format version +//! checksum: u32 CRC32(header[12..64] || buffer_data) +//! buffer_len: u32 +//! successors_offset: u32 +//! effects_offset: u32 +//! negated_fields_offset: u32 +//! string_refs_offset: u32 +//! string_bytes_offset: u32 +//! type_defs_offset: u32 +//! type_members_offset: u32 +//! entrypoints_offset: u32 +//! trivia_kinds_offset: u32 +//! _reserved: [u8; 12] +//! ``` + +use std::io::{Read, Write}; + +use super::compiled::{CompiledQuery, CompiledQueryBuffer, FORMAT_VERSION, MAGIC}; + +/// Header size in bytes (64 bytes for cache-line alignment). +pub const HEADER_SIZE: usize = 64; + +/// Serialization error. +#[derive(Debug, Clone)] +pub enum SerializeError { + /// Invalid magic bytes. + InvalidMagic([u8; 4]), + /// Version mismatch (expected, found). + VersionMismatch { expected: u32, found: u32 }, + /// Checksum mismatch (expected, found). + ChecksumMismatch { expected: u32, found: u32 }, + /// IO error message. + Io(String), + /// Header too short. + HeaderTooShort, + /// Buffer alignment error. + AlignmentError, +} + +impl std::fmt::Display for SerializeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SerializeError::InvalidMagic(m) => { + write!(f, "invalid magic: {:?}", m) + } + SerializeError::VersionMismatch { expected, found } => { + write!( + f, + "version mismatch: expected {}, found {}", + expected, found + ) + } + SerializeError::ChecksumMismatch { expected, found } => { + write!( + f, + "checksum mismatch: expected {:08x}, found {:08x}", + expected, found + ) + } + SerializeError::Io(msg) => write!(f, "io error: {}", msg), + SerializeError::HeaderTooShort => write!(f, "header too short"), + SerializeError::AlignmentError => write!(f, "buffer alignment error"), + } + } +} + +impl std::error::Error for SerializeError {} + +impl From for SerializeError { + fn from(e: std::io::Error) -> Self { + SerializeError::Io(e.to_string()) + } +} + +/// Result type for serialization operations. +pub type SerializeResult = Result; + +/// Computes CRC32 checksum. +fn crc32(data: &[u8]) -> u32 { + // Simple CRC32 implementation (IEEE polynomial) + const CRC32_TABLE: [u32; 256] = generate_crc32_table(); + + let mut crc: u32 = 0xFFFFFFFF; + for &byte in data { + let index = ((crc ^ byte as u32) & 0xFF) as usize; + crc = CRC32_TABLE[index] ^ (crc >> 8); + } + !crc +} + +const fn generate_crc32_table() -> [u32; 256] { + const POLYNOMIAL: u32 = 0xEDB88320; + let mut table = [0u32; 256]; + let mut i = 0; + while i < 256 { + let mut crc = i as u32; + let mut j = 0; + while j < 8 { + if crc & 1 != 0 { + crc = (crc >> 1) ^ POLYNOMIAL; + } else { + crc >>= 1; + } + j += 1; + } + table[i] = crc; + i += 1; + } + table +} + +/// Serialized header structure (64 bytes, matching ADR-0004). +/// +/// Large counts (transition, successor, effect) are computed from offsets. +/// Small counts are stored in the reserved area since they can't be reliably +/// computed due to alignment padding. +#[repr(C)] +struct Header { + magic: [u8; 4], + version: u32, + checksum: u32, + buffer_len: u32, + successors_offset: u32, + effects_offset: u32, + negated_fields_offset: u32, + string_refs_offset: u32, + string_bytes_offset: u32, + type_defs_offset: u32, + type_members_offset: u32, + entrypoints_offset: u32, + trivia_kinds_offset: u32, + // Counts stored in reserved area (12 bytes = 6 x u16) + negated_field_count: u16, + string_ref_count: u16, + type_def_count: u16, + type_member_count: u16, + entrypoint_count: u16, + trivia_kind_count: u16, +} + +const _: () = assert!(std::mem::size_of::
() == HEADER_SIZE); + +impl Header { + fn to_bytes(&self) -> [u8; HEADER_SIZE] { + let mut bytes = [0u8; HEADER_SIZE]; + bytes[0..4].copy_from_slice(&self.magic); + bytes[4..8].copy_from_slice(&self.version.to_le_bytes()); + bytes[8..12].copy_from_slice(&self.checksum.to_le_bytes()); + bytes[12..16].copy_from_slice(&self.buffer_len.to_le_bytes()); + bytes[16..20].copy_from_slice(&self.successors_offset.to_le_bytes()); + bytes[20..24].copy_from_slice(&self.effects_offset.to_le_bytes()); + bytes[24..28].copy_from_slice(&self.negated_fields_offset.to_le_bytes()); + bytes[28..32].copy_from_slice(&self.string_refs_offset.to_le_bytes()); + bytes[32..36].copy_from_slice(&self.string_bytes_offset.to_le_bytes()); + bytes[36..40].copy_from_slice(&self.type_defs_offset.to_le_bytes()); + bytes[40..44].copy_from_slice(&self.type_members_offset.to_le_bytes()); + bytes[44..48].copy_from_slice(&self.entrypoints_offset.to_le_bytes()); + bytes[48..52].copy_from_slice(&self.trivia_kinds_offset.to_le_bytes()); + // Counts in reserved area + bytes[52..54].copy_from_slice(&self.negated_field_count.to_le_bytes()); + bytes[54..56].copy_from_slice(&self.string_ref_count.to_le_bytes()); + bytes[56..58].copy_from_slice(&self.type_def_count.to_le_bytes()); + bytes[58..60].copy_from_slice(&self.type_member_count.to_le_bytes()); + bytes[60..62].copy_from_slice(&self.entrypoint_count.to_le_bytes()); + bytes[62..64].copy_from_slice(&self.trivia_kind_count.to_le_bytes()); + bytes + } + + fn from_bytes(bytes: &[u8; HEADER_SIZE]) -> Self { + Self { + magic: bytes[0..4].try_into().unwrap(), + version: u32::from_le_bytes(bytes[4..8].try_into().unwrap()), + checksum: u32::from_le_bytes(bytes[8..12].try_into().unwrap()), + buffer_len: u32::from_le_bytes(bytes[12..16].try_into().unwrap()), + successors_offset: u32::from_le_bytes(bytes[16..20].try_into().unwrap()), + effects_offset: u32::from_le_bytes(bytes[20..24].try_into().unwrap()), + negated_fields_offset: u32::from_le_bytes(bytes[24..28].try_into().unwrap()), + string_refs_offset: u32::from_le_bytes(bytes[28..32].try_into().unwrap()), + string_bytes_offset: u32::from_le_bytes(bytes[32..36].try_into().unwrap()), + type_defs_offset: u32::from_le_bytes(bytes[36..40].try_into().unwrap()), + type_members_offset: u32::from_le_bytes(bytes[40..44].try_into().unwrap()), + entrypoints_offset: u32::from_le_bytes(bytes[44..48].try_into().unwrap()), + trivia_kinds_offset: u32::from_le_bytes(bytes[48..52].try_into().unwrap()), + negated_field_count: u16::from_le_bytes(bytes[52..54].try_into().unwrap()), + string_ref_count: u16::from_le_bytes(bytes[54..56].try_into().unwrap()), + type_def_count: u16::from_le_bytes(bytes[56..58].try_into().unwrap()), + type_member_count: u16::from_le_bytes(bytes[58..60].try_into().unwrap()), + entrypoint_count: u16::from_le_bytes(bytes[60..62].try_into().unwrap()), + trivia_kind_count: u16::from_le_bytes(bytes[62..64].try_into().unwrap()), + } + } +} + +/// Serializes a compiled query to a writer. +pub fn serialize(query: &CompiledQuery, mut writer: W) -> SerializeResult<()> { + let offsets = query.offsets(); + let buffer = query.buffer(); + + // Build header (without checksum first) + let mut header = Header { + magic: MAGIC, + version: FORMAT_VERSION, + checksum: 0, // Computed below + buffer_len: buffer.len() as u32, + successors_offset: offsets.successors_offset, + effects_offset: offsets.effects_offset, + negated_fields_offset: offsets.negated_fields_offset, + string_refs_offset: offsets.string_refs_offset, + string_bytes_offset: offsets.string_bytes_offset, + type_defs_offset: offsets.type_defs_offset, + type_members_offset: offsets.type_members_offset, + entrypoints_offset: offsets.entrypoints_offset, + trivia_kinds_offset: offsets.trivia_kinds_offset, + negated_field_count: query.negated_fields().len() as u16, + string_ref_count: query.string_refs().len() as u16, + type_def_count: query.type_defs().len() as u16, + type_member_count: query.type_members().len() as u16, + entrypoint_count: query.entrypoint_count(), + trivia_kind_count: query.trivia_kinds().len() as u16, + }; + + // Compute checksum over header[12..64] + buffer + let header_bytes = header.to_bytes(); + let mut checksum_data = Vec::with_capacity(52 + buffer.len()); + checksum_data.extend_from_slice(&header_bytes[12..]); + checksum_data.extend_from_slice(buffer.as_slice()); + header.checksum = crc32(&checksum_data); + + // Write header and buffer + writer.write_all(&header.to_bytes())?; + writer.write_all(buffer.as_slice())?; + + Ok(()) +} + +/// Serializes a compiled query to a byte vector. +pub fn to_bytes(query: &CompiledQuery) -> SerializeResult> { + let mut bytes = Vec::with_capacity(HEADER_SIZE + query.buffer().len()); + serialize(query, &mut bytes)?; + Ok(bytes) +} + +/// Deserializes a compiled query from a reader. +pub fn deserialize(mut reader: R) -> SerializeResult { + // Read header + let mut header_bytes = [0u8; HEADER_SIZE]; + reader.read_exact(&mut header_bytes)?; + + let header = Header::from_bytes(&header_bytes); + + // Verify magic + if header.magic != MAGIC { + return Err(SerializeError::InvalidMagic(header.magic)); + } + + // Verify version + if header.version != FORMAT_VERSION { + return Err(SerializeError::VersionMismatch { + expected: FORMAT_VERSION, + found: header.version, + }); + } + + // Read buffer + let buffer_len = header.buffer_len as usize; + let mut buffer = CompiledQueryBuffer::allocate(buffer_len); + reader.read_exact(buffer.as_mut_slice())?; + + // Verify checksum + let mut checksum_data = Vec::with_capacity(52 + buffer_len); + checksum_data.extend_from_slice(&header_bytes[12..]); + checksum_data.extend_from_slice(buffer.as_slice()); + let computed_checksum = crc32(&checksum_data); + + if header.checksum != computed_checksum { + return Err(SerializeError::ChecksumMismatch { + expected: header.checksum, + found: computed_checksum, + }); + } + + // Reconstruct all counts from offsets (transitions are 64 bytes each) + let transition_count = header.successors_offset / 64; + let successor_count = compute_count_from_offsets( + header.successors_offset, + header.effects_offset, + 4, // size of TransitionId + ); + let effect_count = compute_count_from_offsets( + header.effects_offset, + header.negated_fields_offset, + 4, // size of EffectOp + ); + + // Counts are read directly from header + let negated_field_count = header.negated_field_count; + let string_ref_count = header.string_ref_count; + let type_def_count = header.type_def_count; + let type_member_count = header.type_member_count; + let entrypoint_count = header.entrypoint_count; + let trivia_kind_count = header.trivia_kind_count; + + Ok(CompiledQuery::new( + buffer, + header.successors_offset, + header.effects_offset, + header.negated_fields_offset, + header.string_refs_offset, + header.string_bytes_offset, + header.type_defs_offset, + header.type_members_offset, + header.entrypoints_offset, + header.trivia_kinds_offset, + transition_count, + successor_count, + effect_count, + negated_field_count, + string_ref_count, + type_def_count, + type_member_count, + entrypoint_count, + trivia_kind_count, + )) +} + +/// Deserializes a compiled query from a byte slice. +pub fn from_bytes(bytes: &[u8]) -> SerializeResult { + deserialize(std::io::Cursor::new(bytes)) +} + +fn compute_count_from_offsets(start: u32, end: u32, element_size: u32) -> u32 { + if end <= start { + return 0; + } + (end - start) / element_size +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn crc32_known_value() { + // Test against known CRC32 value + let data = b"123456789"; + let crc = crc32(data); + assert_eq!(crc, 0xCBF43926); + } + + #[test] + fn header_roundtrip() { + let header = Header { + magic: MAGIC, + version: FORMAT_VERSION, + checksum: 0x12345678, + buffer_len: 1024, + successors_offset: 64, + effects_offset: 128, + negated_fields_offset: 256, + string_refs_offset: 300, + string_bytes_offset: 400, + type_defs_offset: 500, + type_members_offset: 600, + entrypoints_offset: 700, + trivia_kinds_offset: 800, + negated_field_count: 5, + string_ref_count: 8, + type_def_count: 3, + type_member_count: 12, + entrypoint_count: 2, + trivia_kind_count: 1, + }; + + let bytes = header.to_bytes(); + let parsed = Header::from_bytes(&bytes); + + assert_eq!(parsed.magic, header.magic); + assert_eq!(parsed.version, header.version); + assert_eq!(parsed.checksum, header.checksum); + assert_eq!(parsed.buffer_len, header.buffer_len); + assert_eq!(parsed.successors_offset, header.successors_offset); + assert_eq!(parsed.trivia_kinds_offset, header.trivia_kinds_offset); + assert_eq!(parsed.entrypoint_count, header.entrypoint_count); + assert_eq!(parsed.type_def_count, header.type_def_count); + } + + #[test] + fn invalid_magic_rejected() { + let mut data = vec![0u8; HEADER_SIZE + 64]; + data[0..4].copy_from_slice(b"NOTM"); + + let result = from_bytes(&data); + assert!(matches!(result, Err(SerializeError::InvalidMagic(_)))); + } + + #[test] + fn version_mismatch_rejected() { + let mut data = vec![0u8; HEADER_SIZE + 64]; + data[0..4].copy_from_slice(&MAGIC); + data[4..8].copy_from_slice(&999u32.to_le_bytes()); + + let result = from_bytes(&data); + assert!(matches!( + result, + Err(SerializeError::VersionMismatch { .. }) + )); + } +} diff --git a/crates/plotnik-lib/src/ir/slice.rs b/crates/plotnik-lib/src/ir/slice.rs index c3abbfb2..13e8d717 100644 --- a/crates/plotnik-lib/src/ir/slice.rs +++ b/crates/plotnik-lib/src/ir/slice.rs @@ -3,7 +3,7 @@ //! `start_index` is an **element index**, not a byte offset. This naming //! distinguishes it from byte offsets like `StringRef.offset`. //! -//! This struct is 6 bytes to fit the Transition layout requirements. +//! This struct is 8 bytes with 4-byte alignment for efficient access. //! Type safety is provided through generic methods, not stored PhantomData. use std::marker::PhantomData; @@ -13,20 +13,21 @@ use std::marker::PhantomData; /// Used for variable-length data (successors, effects, negated fields, type members). /// The slice references elements by index into the corresponding segment array. /// -/// Layout: 6 bytes (4 + 2), no padding due to `repr(C, packed)`. -/// Alignment is 1 due to packing, so reads may be unaligned on some platforms. -#[repr(C, packed)] +/// Layout: 8 bytes (4 + 2 + 2), align 4. +#[repr(C)] #[derive(Clone, Copy)] pub struct Slice { /// Element index into the segment array (NOT byte offset). start_index: u32, /// Number of elements. 65k elements per slice is sufficient. len: u16, + _pad: u16, _phantom: PhantomData T>, } -// Compile-time size verification -const _: () = assert!(size_of::>() == 6); +// Compile-time size/alignment verification +const _: () = assert!(size_of::>() == 8); +const _: () = assert!(align_of::>() == 4); impl Slice { /// Creates a new slice. @@ -35,6 +36,7 @@ impl Slice { Self { start_index, len, + _pad: 0, _phantom: PhantomData, } } @@ -48,7 +50,6 @@ impl Slice { /// Returns the start index (element index, not byte offset). #[inline] pub fn start_index(&self) -> u32 { - // Packed struct - field may be unaligned, so copy out self.start_index } @@ -80,7 +81,7 @@ impl Default for Slice { impl PartialEq for Slice { fn eq(&self, other: &Self) -> bool { - self.start_index() == other.start_index() && self.len() == other.len() + self.start_index == other.start_index && self.len == other.len } } @@ -89,8 +90,8 @@ impl Eq for Slice {} impl std::fmt::Debug for Slice { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Slice") - .field("start_index", &self.start_index()) - .field("len", &self.len()) + .field("start_index", &self.start_index) + .field("len", &self.len) .finish() } } diff --git a/crates/plotnik-lib/src/ir/slice_tests.rs b/crates/plotnik-lib/src/ir/slice_tests.rs index dee8ae26..c1e85f77 100644 --- a/crates/plotnik-lib/src/ir/slice_tests.rs +++ b/crates/plotnik-lib/src/ir/slice_tests.rs @@ -43,6 +43,7 @@ fn equality() { } #[test] -fn size_is_6_bytes() { - assert_eq!(std::mem::size_of::>(), 6); +fn size_is_8_bytes() { + assert_eq!(std::mem::size_of::>(), 8); + assert_eq!(std::mem::align_of::>(), 4); } diff --git a/crates/plotnik-lib/src/ir/strings.rs b/crates/plotnik-lib/src/ir/strings.rs new file mode 100644 index 00000000..ae09801a --- /dev/null +++ b/crates/plotnik-lib/src/ir/strings.rs @@ -0,0 +1,140 @@ +//! String interning for compiled queries. +//! +//! Identical strings share storage and ID. Used for field names, variant tags, +//! entrypoint names, and type names. + +use std::collections::HashMap; + +use super::ids::StringId; + +/// String interner for query compilation. +/// +/// Interns strings during the analysis phase, then emits them as a contiguous +/// byte pool with `StringRef` entries pointing into it. +#[derive(Debug, Default)] +pub struct StringInterner<'src> { + /// Map from string content to assigned ID. + map: HashMap<&'src str, StringId>, + /// Strings in ID order for emission. + strings: Vec<&'src str>, +} + +impl<'src> StringInterner<'src> { + /// Creates a new empty interner. + pub fn new() -> Self { + Self::default() + } + + /// Interns a string, returning its ID. + /// + /// If the string was previously interned, returns the existing ID. + pub fn intern(&mut self, s: &'src str) -> StringId { + if let Some(&id) = self.map.get(s) { + return id; + } + + let id = self.strings.len() as StringId; + assert!(id < 0xFFFF, "string pool overflow (>65534 strings)"); + + self.map.insert(s, id); + self.strings.push(s); + id + } + + /// Returns the ID of a previously interned string, or `None`. + pub fn get(&self, s: &str) -> Option { + self.map.get(s).copied() + } + + /// Returns the string for a given ID. + /// + /// # Panics + /// Panics if the ID is out of range. + pub fn resolve(&self, id: StringId) -> &'src str { + self.strings[id as usize] + } + + /// Returns the number of interned strings. + pub fn len(&self) -> usize { + self.strings.len() + } + + /// Returns true if no strings have been interned. + pub fn is_empty(&self) -> bool { + self.strings.is_empty() + } + + /// Returns an iterator over (id, string) pairs in ID order. + pub fn iter(&self) -> impl Iterator + '_ { + self.strings + .iter() + .enumerate() + .map(|(i, s)| (i as StringId, *s)) + } + + /// Returns the total byte size needed for all strings. + pub fn total_bytes(&self) -> usize { + self.strings.iter().map(|s| s.len()).sum() + } + + /// Consumes the interner and returns strings in ID order. + pub fn into_strings(self) -> Vec<&'src str> { + self.strings + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn intern_deduplicates() { + let mut interner = StringInterner::new(); + + let id1 = interner.intern("foo"); + let id2 = interner.intern("bar"); + let id3 = interner.intern("foo"); + + assert_eq!(id1, 0); + assert_eq!(id2, 1); + assert_eq!(id3, 0); // same as id1 + assert_eq!(interner.len(), 2); + } + + #[test] + fn resolve_works() { + let mut interner = StringInterner::new(); + interner.intern("hello"); + interner.intern("world"); + + assert_eq!(interner.resolve(0), "hello"); + assert_eq!(interner.resolve(1), "world"); + } + + #[test] + fn get_returns_none_for_unknown() { + let interner = StringInterner::new(); + assert_eq!(interner.get("unknown"), None); + } + + #[test] + fn total_bytes() { + let mut interner = StringInterner::new(); + interner.intern("foo"); // 3 bytes + interner.intern("hello"); // 5 bytes + interner.intern("foo"); // deduplicated + + assert_eq!(interner.total_bytes(), 8); + } + + #[test] + fn iter_order() { + let mut interner = StringInterner::new(); + interner.intern("a"); + interner.intern("b"); + interner.intern("c"); + + let pairs: Vec<_> = interner.iter().collect(); + assert_eq!(pairs, vec![(0, "a"), (1, "b"), (2, "c")]); + } +} diff --git a/crates/plotnik-lib/src/ir/transition.rs b/crates/plotnik-lib/src/ir/transition.rs index a452d239..0d47c500 100644 --- a/crates/plotnik-lib/src/ir/transition.rs +++ b/crates/plotnik-lib/src/ir/transition.rs @@ -14,35 +14,82 @@ pub const MAX_INLINE_SUCCESSORS: usize = 8; /// Transitions use SSO (small-size optimization) for successors: /// - 0-8 successors: stored inline in `successor_data` /// - 9+ successors: `successor_data[0]` is index into successors segment +/// +/// Layout (64 bytes total, 64-byte aligned): +/// ```text +/// offset 0: matcher (16 bytes) +/// offset 16: ref_marker (4 bytes) +/// offset 20: nav (2 bytes) +/// offset 22: effects_len (2 bytes) +/// offset 24: successor_count (4 bytes) +/// offset 28: effects_start (4 bytes) +/// offset 32: successor_data (32 bytes) +/// ``` #[repr(C, align(64))] #[derive(Clone, Copy)] pub struct Transition { // --- 32 bytes metadata --- /// What this transition matches (node kind, wildcard, epsilon). - pub matcher: Matcher, // 16 bytes + pub matcher: Matcher, // 16 bytes, offset 0 /// Reference call/return marker for recursive definitions. - pub ref_marker: RefTransition, // 4 bytes + pub ref_marker: RefTransition, // 4 bytes, offset 16 - /// Number of successor transitions. - pub successor_count: u32, // 4 bytes + /// Navigation instruction (descend/ascend/sibling traversal). + pub nav: Nav, // 2 bytes, offset 20 - /// Effects to execute on successful match. - /// When empty: start_index=0, len=0. - pub effects: Slice, // 6 bytes + /// Number of effect operations (inlined from Slice for alignment). + effects_len: u16, // 2 bytes, offset 22 - /// Navigation instruction (descend/ascend/sibling traversal). - pub nav: Nav, // 2 bytes + /// Number of successor transitions. + pub successor_count: u32, // 4 bytes, offset 24 + + /// Start index into effects segment (inlined from Slice for alignment). + effects_start: u32, // 4 bytes, offset 28 // --- 32 bytes control flow --- /// Successor storage (inline or spilled index). /// /// - If `successor_count <= 8`: contains `TransitionId` values directly /// - If `successor_count > 8`: `successor_data[0]` is index into successors segment - pub successor_data: [u32; MAX_INLINE_SUCCESSORS], // 32 bytes + pub successor_data: [u32; MAX_INLINE_SUCCESSORS], // 32 bytes, offset 32 } impl Transition { + /// Creates a new transition with all fields. + #[inline] + pub fn new( + matcher: Matcher, + ref_marker: RefTransition, + nav: Nav, + effects: Slice, + successor_count: u32, + successor_data: [u32; MAX_INLINE_SUCCESSORS], + ) -> Self { + Self { + matcher, + ref_marker, + nav, + effects_len: effects.len(), + successor_count, + effects_start: effects.start_index(), + successor_data, + } + } + + /// Returns the effects slice. + #[inline] + pub fn effects(&self) -> Slice { + Slice::new(self.effects_start, self.effects_len) + } + + /// Sets the effects slice. + #[inline] + pub fn set_effects(&mut self, effects: Slice) { + self.effects_start = effects.start_index(); + self.effects_len = effects.len(); + } + /// Returns `true` if successors are stored inline. #[inline] pub fn has_inline_successors(&self) -> bool { diff --git a/crates/plotnik-lib/src/ir/type_metadata.rs b/crates/plotnik-lib/src/ir/type_metadata.rs index 46fda12a..a532ad75 100644 --- a/crates/plotnik-lib/src/ir/type_metadata.rs +++ b/crates/plotnik-lib/src/ir/type_metadata.rs @@ -4,7 +4,7 @@ //! transitions produce, not how they execute. use super::Slice; -use super::ids::{StringId, TypeId}; +use super::ids::{STRING_NONE, StringId, TypeId}; /// First composite type ID (after primitives 0-2). pub const TYPE_COMPOSITE_START: TypeId = 3; @@ -19,16 +19,16 @@ pub const TYPE_COMPOSITE_START: TypeId = 3; pub struct TypeDef { pub kind: TypeKind, _pad: u8, - /// Synthetic or explicit type name. `0xFFFF` for unnamed wrappers. + /// Synthetic or explicit type name. `STRING_NONE` for unnamed wrappers. pub name: StringId, /// See struct-level docs for dual semantics. pub members: Slice, - _pad2: u16, } -// Size is 12 bytes: kind(1) + pad(1) + name(2) + members(6) + pad2(2) -// Alignment is 2 due to packed Slice having align 1 +// Size is 12 bytes: kind(1) + pad(1) + name(2) + members(8) = 12 +// Alignment is 4 due to Slice having align 4 const _: () = assert!(size_of::() == 12); +const _: () = assert!(align_of::() == 4); impl TypeDef { /// Create a wrapper type (Optional, ArrayStar, ArrayPlus). @@ -40,9 +40,8 @@ impl TypeDef { Self { kind, _pad: 0, - name: 0xFFFF, + name: STRING_NONE, members: Slice::from_inner_type(inner), - _pad2: 0, } } @@ -54,7 +53,6 @@ impl TypeDef { _pad: 0, name, members, - _pad2: 0, } } diff --git a/crates/plotnik-lib/src/lib.rs b/crates/plotnik-lib/src/lib.rs index 418441dd..adad70f6 100644 --- a/crates/plotnik-lib/src/lib.rs +++ b/crates/plotnik-lib/src/lib.rs @@ -17,7 +17,6 @@ #![cfg_attr(coverage_nightly, feature(coverage_attribute))] pub mod diagnostics; -pub mod infer; pub mod ir; pub mod parser; pub mod query; @@ -29,7 +28,7 @@ pub mod query; pub type PassResult = std::result::Result<(T, Diagnostics), Error>; pub use diagnostics::{Diagnostics, DiagnosticsPrinter, Severity}; -pub use query::Query; +pub use query::{Query, UNNAMED_DEF}; /// Errors that can occur during query parsing. #[derive(Debug, Clone, thiserror::Error)] diff --git a/crates/plotnik-lib/src/parser/ast.rs b/crates/plotnik-lib/src/parser/ast.rs index 420aa78b..680d2889 100644 --- a/crates/plotnik-lib/src/parser/ast.rs +++ b/crates/plotnik-lib/src/parser/ast.rs @@ -98,6 +98,39 @@ ast_node!(FieldExpr, Field); ast_node!(NegatedField, NegatedField); ast_node!(Anchor, Anchor); +/// Either an expression or an anchor in a sequence. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum SeqItem { + Expr(Expr), + Anchor(Anchor), +} + +impl SeqItem { + pub fn cast(node: SyntaxNode) -> Option { + if let Some(expr) = Expr::cast(node.clone()) { + return Some(SeqItem::Expr(expr)); + } + if let Some(anchor) = Anchor::cast(node) { + return Some(SeqItem::Anchor(anchor)); + } + None + } + + pub fn as_anchor(&self) -> Option<&Anchor> { + match self { + SeqItem::Anchor(a) => Some(a), + _ => None, + } + } + + pub fn as_expr(&self) -> Option<&Expr> { + match self { + SeqItem::Expr(e) => Some(e), + _ => None, + } + } +} + /// Anonymous node: string literal (`"+"`) or wildcard (`_`). /// Maps from CST `Str` or `Wildcard`. #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -204,6 +237,16 @@ impl NamedNode { pub fn children(&self) -> impl Iterator + '_ { self.0.children().filter_map(Expr::cast) } + + /// Returns all anchors in this node. + pub fn anchors(&self) -> impl Iterator + '_ { + self.0.children().filter_map(Anchor::cast) + } + + /// Returns children interleaved with anchors, preserving order. + pub fn items(&self) -> impl Iterator + '_ { + self.0.children().filter_map(SeqItem::cast) + } } impl Ref { @@ -266,6 +309,16 @@ impl SeqExpr { pub fn children(&self) -> impl Iterator + '_ { self.0.children().filter_map(Expr::cast) } + + /// Returns all anchors in this sequence. + pub fn anchors(&self) -> impl Iterator + '_ { + self.0.children().filter_map(Anchor::cast) + } + + /// Returns children interleaved with anchors, preserving order. + pub fn items(&self) -> impl Iterator + '_ { + self.0.children().filter_map(SeqItem::cast) + } } impl CapturedExpr { diff --git a/crates/plotnik-lib/src/parser/mod.rs b/crates/plotnik-lib/src/parser/mod.rs index 4e3ff52e..0d0f44fb 100644 --- a/crates/plotnik-lib/src/parser/mod.rs +++ b/crates/plotnik-lib/src/parser/mod.rs @@ -42,7 +42,7 @@ pub use cst::{SyntaxKind, SyntaxNode, SyntaxToken}; pub use ast::{ AltExpr, AltKind, Anchor, AnonymousNode, Branch, CapturedExpr, Def, Expr, FieldExpr, NamedNode, - NegatedField, QuantifiedExpr, Ref, Root, SeqExpr, Type, token_src, + NegatedField, QuantifiedExpr, Ref, Root, SeqExpr, SeqItem, Type, token_src, }; pub use core::{ParseResult, Parser}; diff --git a/crates/plotnik-lib/src/query/graph.rs b/crates/plotnik-lib/src/query/graph.rs new file mode 100644 index 00000000..8336db2a --- /dev/null +++ b/crates/plotnik-lib/src/query/graph.rs @@ -0,0 +1,629 @@ +//! Core types for build-time query graphs. +//! +//! The graph uses index-based node references (`NodeId`) with nodes stored +//! in a `Vec`. Strings borrow from the source (`&'src str`) until IR emission. + +use crate::ir::Nav; +use indexmap::IndexMap; +use rowan::TextRange; + +/// Index into `BuildGraph::nodes`. +pub type NodeId = u32; + +/// A graph fragment with single entry and exit points. +/// +/// Every expression compiles to a fragment. Combinators connect fragments +/// by manipulating entry/exit edges. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Fragment { + pub entry: NodeId, + pub exit: NodeId, +} + +impl Fragment { + pub fn new(entry: NodeId, exit: NodeId) -> Self { + Self { entry, exit } + } + + pub fn single(node: NodeId) -> Self { + Self { + entry: node, + exit: node, + } + } +} + +/// Array collection mode for loop combinators. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ArrayMode { + /// No array collection (simple repetition) + None, + /// Collect elements into array (StartArray/PushElement/EndArray) + Simple, + /// Collect with object scope per iteration (for QIS) + Qis, +} + +/// Build-time graph for query compilation. +/// +/// Nodes are stored in a flat vector, referenced by `NodeId`. +/// Definitions map names to their entry points. +#[derive(Debug)] +pub struct BuildGraph<'src> { + nodes: Vec>, + definitions: IndexMap<&'src str, NodeId>, +} + +impl<'src> BuildGraph<'src> { + pub fn new() -> Self { + Self { + nodes: Vec::new(), + definitions: IndexMap::new(), + } + } + + pub fn add_node(&mut self, node: BuildNode<'src>) -> NodeId { + let id = self.nodes.len() as NodeId; + self.nodes.push(node); + id + } + + pub fn add_epsilon(&mut self) -> NodeId { + self.add_node(BuildNode::epsilon()) + } + + pub fn add_matcher(&mut self, matcher: BuildMatcher<'src>) -> NodeId { + self.add_node(BuildNode::with_matcher(matcher)) + } + + pub fn add_definition(&mut self, name: &'src str, entry: NodeId) { + self.definitions.insert(name, entry); + } + + pub fn definition(&self, name: &str) -> Option { + self.definitions.get(name).copied() + } + + pub fn definitions(&self) -> impl Iterator + '_ { + self.definitions.iter().map(|(k, v)| (*k, *v)) + } + + pub fn node(&self, id: NodeId) -> &BuildNode<'src> { + &self.nodes[id as usize] + } + + pub fn node_mut(&mut self, id: NodeId) -> &mut BuildNode<'src> { + &mut self.nodes[id as usize] + } + + pub fn len(&self) -> usize { + self.nodes.len() + } + + pub fn is_empty(&self) -> bool { + self.nodes.is_empty() + } + + pub fn iter(&self) -> impl Iterator)> { + self.nodes.iter().enumerate().map(|(i, n)| (i as NodeId, n)) + } + + pub fn connect(&mut self, from: NodeId, to: NodeId) { + self.nodes[from as usize].successors.push(to); + } + + pub fn connect_exit(&mut self, fragment: Fragment, to: NodeId) { + self.connect(fragment.exit, to); + } + + // ───────────────────────────────────────────────────────────────────── + // Fragment Combinators + // ───────────────────────────────────────────────────────────────────── + + pub fn matcher_fragment(&mut self, matcher: BuildMatcher<'src>) -> Fragment { + Fragment::single(self.add_matcher(matcher)) + } + + pub fn epsilon_fragment(&mut self) -> Fragment { + Fragment::single(self.add_epsilon()) + } + + /// Connect fragments in sequence: f1 → f2 → ... → fn + pub fn sequence(&mut self, fragments: &[Fragment]) -> Fragment { + match fragments.len() { + 0 => self.epsilon_fragment(), + 1 => fragments[0], + _ => { + for window in fragments.windows(2) { + self.connect(window[0].exit, window[1].entry); + } + Fragment::new(fragments[0].entry, fragments[fragments.len() - 1].exit) + } + } + } + + /// Connect fragments in parallel (alternation): entry → [f1|f2|...|fn] → exit + pub fn alternation(&mut self, fragments: &[Fragment]) -> Fragment { + if fragments.is_empty() { + return self.epsilon_fragment(); + } + if fragments.len() == 1 { + return fragments[0]; + } + + let entry = self.add_epsilon(); + let exit = self.add_epsilon(); + + for f in fragments { + self.connect(entry, f.entry); + self.connect(f.exit, exit); + } + + Fragment::new(entry, exit) + } + + // ───────────────────────────────────────────────────────────────────── + // Generic Loop/Optional Builders + // ───────────────────────────────────────────────────────────────────── + + /// Generic loop combinator for * and + quantifiers. + /// + /// - `at_least_one`: true for + (one or more), false for * (zero or more) + /// - `greedy`: true for greedy (try match first), false for lazy (try exit first) + /// - `mode`: array collection mode + fn build_repetition( + &mut self, + inner: Fragment, + at_least_one: bool, + greedy: bool, + mode: ArrayMode, + ) -> Fragment { + let has_array = mode != ArrayMode::None; + let has_qis = mode == ArrayMode::Qis; + + // Array wrapper nodes + let start = if has_array { + let s = self.add_epsilon(); + self.node_mut(s).add_effect(BuildEffect::StartArray { + is_plus: at_least_one, + }); + Some(s) + } else { + None + }; + + let end = if has_array { + let e = self.add_epsilon(); + self.node_mut(e).add_effect(BuildEffect::EndArray); + Some(e) + } else { + None + }; + + // QIS object wrapper nodes + let (obj_start, obj_end) = if has_qis { + let os = self.add_epsilon(); + self.node_mut(os).add_effect(BuildEffect::StartObject { + for_alternation: false, + }); + let oe = self.add_epsilon(); + self.node_mut(oe).add_effect(BuildEffect::EndObject); + (Some(os), Some(oe)) + } else { + (None, None) + }; + + // Push node for array modes + let push = if has_array { + let p = self.add_epsilon(); + self.node_mut(p).add_effect(BuildEffect::PushElement); + Some(p) + } else { + None + }; + + // Branch node (decision point for loop continuation) + let branch = self.add_epsilon(); + + // Exit node for non-array modes + let exit = if !has_array { + Some(self.add_epsilon()) + } else { + None + }; + + // Determine the effective inner entry/exit (with QIS wrapping if needed) + let (loop_body_entry, loop_body_exit) = if has_qis { + self.connect(obj_start.unwrap(), inner.entry); + self.connect(inner.exit, obj_end.unwrap()); + (obj_start.unwrap(), obj_end.unwrap()) + } else { + (inner.entry, inner.exit) + }; + + // Wire up the graph based on at_least_one and greedy + if at_least_one { + // + pattern: must match at least once + // Entry → body → push/branch → (loop back or exit) + let entry_point = start.unwrap_or(loop_body_entry); + let exit_point = end.or(exit).unwrap(); + + if let Some(s) = start { + self.connect(s, loop_body_entry); + } + + if let Some(p) = push { + self.connect(loop_body_exit, p); + self.connect(p, branch); + } else { + self.connect(loop_body_exit, branch); + } + + if greedy { + self.connect(branch, loop_body_entry); + self.connect(branch, exit_point); + } else { + self.connect(branch, exit_point); + self.connect(branch, loop_body_entry); + } + + Fragment::new(entry_point, exit_point) + } else { + // * pattern: zero or more + // Entry → branch → (body → push → branch) or exit + let entry_point = start.unwrap_or(branch); + let exit_point = end.or(exit).unwrap(); + + if let Some(s) = start { + self.connect(s, branch); + } + + if greedy { + self.connect(branch, loop_body_entry); + self.connect(branch, exit_point); + } else { + self.connect(branch, exit_point); + self.connect(branch, loop_body_entry); + } + + if let Some(p) = push { + self.connect(loop_body_exit, p); + self.connect(p, branch); + } else { + self.connect(loop_body_exit, branch); + } + + Fragment::new(entry_point, exit_point) + } + } + + /// Generic optional combinator for ? quantifier. + /// + /// - `greedy`: true for greedy (try match first), false for lazy (try skip first) + /// - `qis`: true to wrap the optional value in an object scope + fn build_optional(&mut self, inner: Fragment, greedy: bool, qis: bool) -> Fragment { + let branch = self.add_epsilon(); + let exit = self.add_epsilon(); + + if qis { + let obj_start = self.add_epsilon(); + self.node_mut(obj_start) + .add_effect(BuildEffect::StartObject { + for_alternation: false, + }); + + let obj_end = self.add_epsilon(); + self.node_mut(obj_end).add_effect(BuildEffect::EndObject); + + // Skip path needs ClearCurrent to indicate "nothing captured" + let skip = self.add_epsilon(); + self.node_mut(skip).add_effect(BuildEffect::ClearCurrent); + + self.connect(obj_start, inner.entry); + self.connect(inner.exit, obj_end); + self.connect(obj_end, exit); + self.connect(skip, exit); + + if greedy { + self.connect(branch, obj_start); + self.connect(branch, skip); + } else { + self.connect(branch, skip); + self.connect(branch, obj_start); + } + } else { + let skip = self.add_epsilon(); + self.node_mut(skip).add_effect(BuildEffect::ClearCurrent); + + self.connect(skip, exit); + self.connect(inner.exit, exit); + + if greedy { + self.connect(branch, inner.entry); + self.connect(branch, skip); + } else { + self.connect(branch, skip); + self.connect(branch, inner.entry); + } + } + + Fragment::new(branch, exit) + } + + // ───────────────────────────────────────────────────────────────────── + // Simple Loop Combinators (no array collection) + // ───────────────────────────────────────────────────────────────────── + + /// Zero or more (greedy): inner* + pub fn zero_or_more(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, false, true, ArrayMode::None) + } + + /// Zero or more (non-greedy): inner*? + pub fn zero_or_more_lazy(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, false, false, ArrayMode::None) + } + + /// One or more (greedy): inner+ + pub fn one_or_more(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, true, true, ArrayMode::None) + } + + /// One or more (non-greedy): inner+? + pub fn one_or_more_lazy(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, true, false, ArrayMode::None) + } + + /// Optional (greedy): inner? + pub fn optional(&mut self, inner: Fragment) -> Fragment { + self.build_optional(inner, true, false) + } + + /// Optional (non-greedy): inner?? + pub fn optional_lazy(&mut self, inner: Fragment) -> Fragment { + self.build_optional(inner, false, false) + } + + // ───────────────────────────────────────────────────────────────────── + // Array-Collecting Loop Combinators + // ───────────────────────────────────────────────────────────────────── + + /// Zero or more with array collection (greedy): inner* + pub fn zero_or_more_array(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, false, true, ArrayMode::Simple) + } + + /// Zero or more with array collection (non-greedy): inner*? + pub fn zero_or_more_array_lazy(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, false, false, ArrayMode::Simple) + } + + /// One or more with array collection (greedy): inner+ + pub fn one_or_more_array(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, true, true, ArrayMode::Simple) + } + + /// One or more with array collection (non-greedy): inner+? + pub fn one_or_more_array_lazy(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, true, false, ArrayMode::Simple) + } + + // ───────────────────────────────────────────────────────────────────── + // QIS-Aware Array Combinators (wrap each iteration with object scope) + // ───────────────────────────────────────────────────────────────────── + + /// Zero or more with QIS object wrapping (greedy): inner* + /// + /// Each iteration is wrapped in StartObject/EndObject to keep + /// multiple captures coupled per-iteration. + pub fn zero_or_more_array_qis(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, false, true, ArrayMode::Qis) + } + + /// Zero or more with QIS object wrapping (non-greedy): inner*? + pub fn zero_or_more_array_qis_lazy(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, false, false, ArrayMode::Qis) + } + + /// One or more with QIS object wrapping (greedy): inner+ + pub fn one_or_more_array_qis(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, true, true, ArrayMode::Qis) + } + + /// One or more with QIS object wrapping (non-greedy): inner+? + pub fn one_or_more_array_qis_lazy(&mut self, inner: Fragment) -> Fragment { + self.build_repetition(inner, true, false, ArrayMode::Qis) + } + + /// Optional with QIS object wrapping: inner? + /// + /// Wraps the optional value in an object scope. + pub fn optional_qis(&mut self, inner: Fragment) -> Fragment { + self.build_optional(inner, true, true) + } + + /// Optional with QIS object wrapping (non-greedy): inner?? + pub fn optional_qis_lazy(&mut self, inner: Fragment) -> Fragment { + self.build_optional(inner, false, true) + } +} + +impl Default for BuildGraph<'_> { + fn default() -> Self { + Self::new() + } +} + +/// A node in the build graph. +#[derive(Debug, Clone)] +pub struct BuildNode<'src> { + pub matcher: BuildMatcher<'src>, + pub effects: Vec>, + pub ref_marker: RefMarker, + pub successors: Vec, + pub nav: Nav, + pub ref_name: Option<&'src str>, +} + +impl<'src> BuildNode<'src> { + pub fn epsilon() -> Self { + Self { + matcher: BuildMatcher::Epsilon, + effects: Vec::new(), + ref_marker: RefMarker::None, + successors: Vec::new(), + nav: Nav::stay(), + ref_name: None, + } + } + + pub fn with_matcher(matcher: BuildMatcher<'src>) -> Self { + Self { + matcher, + effects: Vec::new(), + ref_marker: RefMarker::None, + successors: Vec::new(), + nav: Nav::stay(), + ref_name: None, + } + } + + pub fn add_effect(&mut self, effect: BuildEffect<'src>) { + self.effects.push(effect); + } + + pub fn set_ref_marker(&mut self, marker: RefMarker) { + self.ref_marker = marker; + } + + pub fn set_nav(&mut self, nav: Nav) { + self.nav = nav; + } + + pub fn is_epsilon(&self) -> bool { + matches!(self.matcher, BuildMatcher::Epsilon) + } +} + +/// What a transition matches. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum BuildMatcher<'src> { + Epsilon, + Node { + kind: &'src str, + field: Option<&'src str>, + negated_fields: Vec<&'src str>, + }, + Anonymous { + literal: &'src str, + field: Option<&'src str>, + }, + Wildcard { + field: Option<&'src str>, + }, +} + +impl<'src> BuildMatcher<'src> { + pub fn node(kind: &'src str) -> Self { + Self::Node { + kind, + field: None, + negated_fields: Vec::new(), + } + } + + pub fn anonymous(literal: &'src str) -> Self { + Self::Anonymous { + literal, + field: None, + } + } + + pub fn wildcard() -> Self { + Self::Wildcard { field: None } + } + + pub fn with_field(mut self, field: &'src str) -> Self { + match &mut self { + BuildMatcher::Node { field: f, .. } => *f = Some(field), + BuildMatcher::Anonymous { field: f, .. } => *f = Some(field), + BuildMatcher::Wildcard { field: f } => *f = Some(field), + BuildMatcher::Epsilon => {} + } + self + } + + pub fn with_negated_field(mut self, field: &'src str) -> Self { + if let BuildMatcher::Node { negated_fields, .. } = &mut self { + negated_fields.push(field); + } + self + } +} + +/// Effect operations recorded during graph construction. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum BuildEffect<'src> { + CaptureNode, + /// Clear current value (set to None). Used on skip paths for optional captures. + ClearCurrent, + /// Start array collection. `is_plus` distinguishes `+` (true) from `*` (false). + StartArray { + is_plus: bool, + }, + PushElement, + EndArray, + /// Start object scope. `for_alternation` is true when this object wraps a captured + /// tagged alternation (tags should create enum), false for QIS/sequence objects + /// (tags in inner alternations should be ignored). + StartObject { + for_alternation: bool, + }, + EndObject, + Field { + name: &'src str, + span: TextRange, + }, + StartVariant(&'src str), + EndVariant, + ToString, +} + +/// Marker for definition call/return transitions. +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub enum RefMarker { + #[default] + None, + Enter { + ref_id: u32, + }, + Exit { + ref_id: u32, + }, +} + +impl RefMarker { + pub fn enter(ref_id: u32) -> Self { + Self::Enter { ref_id } + } + + pub fn exit(ref_id: u32) -> Self { + Self::Exit { ref_id } + } + + pub fn is_none(&self) -> bool { + matches!(self, RefMarker::None) + } + + pub fn is_some(&self) -> bool { + !matches!(self, RefMarker::None) + } + + pub fn is_enter(&self) -> bool { + matches!(self, RefMarker::Enter { .. }) + } + + pub fn is_exit(&self) -> bool { + matches!(self, RefMarker::Exit { .. }) + } +} diff --git a/crates/plotnik-lib/src/query/graph_build.rs b/crates/plotnik-lib/src/query/graph_build.rs new file mode 100644 index 00000000..8def0eef --- /dev/null +++ b/crates/plotnik-lib/src/query/graph_build.rs @@ -0,0 +1,626 @@ +//! Graph construction integrated with Query pipeline. +//! +//! Constructs a `BuildGraph` from the parsed AST, reusing the `symbol_table` +//! and `qis_triggers` populated by earlier passes. + +use std::collections::HashSet; + +use crate::ir::Nav; +use crate::parser::{ + AltExpr, AltKind, AnonymousNode, Branch, CapturedExpr, Expr, FieldExpr, NamedNode, + NegatedField, QuantifiedExpr, Ref, SeqExpr, SeqItem, SyntaxKind, token_src, +}; + +use super::Query; +use super::graph::{BuildEffect, BuildMatcher, Fragment, NodeId, RefMarker}; + +/// Context for navigation determination. +/// When `anchored` is true, `prev_anonymous` indicates whether the preceding +/// expression was anonymous (string literal), which determines Exact vs SkipTrivia mode. +#[derive(Debug, Clone, Copy)] +enum NavContext { + Root, + FirstChild { + anchored: bool, + prev_anonymous: bool, + }, + Sibling { + anchored: bool, + prev_anonymous: bool, + }, +} + +impl NavContext { + fn to_nav(self) -> Nav { + match self { + NavContext::Root => Nav::stay(), + NavContext::FirstChild { + anchored: false, .. + } => Nav::down(), + NavContext::FirstChild { + anchored: true, + prev_anonymous, + } => { + if prev_anonymous { + Nav::down_exact() + } else { + Nav::down_skip_trivia() + } + } + NavContext::Sibling { + anchored: false, .. + } => Nav::next(), + NavContext::Sibling { + anchored: true, + prev_anonymous, + } => { + if prev_anonymous { + Nav::next_exact() + } else { + Nav::next_skip_trivia() + } + } + } + } +} + +/// Tracks trailing anchor state for Up navigation. +#[derive(Debug, Clone, Copy)] +struct ExitContext { + has_trailing_anchor: bool, + last_was_anonymous: bool, +} + +impl ExitContext { + fn to_up_nav(self, level: u8) -> Nav { + if !self.has_trailing_anchor { + Nav::up(level) + } else if self.last_was_anonymous { + Nav::up_exact(level) + } else { + Nav::up_skip_trivia(level) + } + } +} + +impl<'a> Query<'a> { + /// Build the graph from the already-populated symbol_table. + /// + /// This method reuses the symbol_table from name resolution and + /// qis_triggers from QIS detection. + pub(super) fn construct_graph(&mut self) { + self.next_ref_id = 0; + + let entries: Vec<_> = self + .symbol_table + .iter() + .map(|(name, body)| (*name, body.clone())) + .collect(); + for (name, body) in entries { + let fragment = self.construct_expr(&body, NavContext::Root); + self.graph.add_definition(name, fragment.entry); + } + + self.link_references(); + } + + /// Link Enter nodes to their definition entry points. + fn link_references(&mut self) { + let mut links: Vec<(NodeId, &'a str, Option)> = Vec::new(); + + for (id, node) in self.graph.iter() { + if let RefMarker::Enter { .. } = &node.ref_marker + && let Some(name) = node.ref_name + { + let exit_node = self.find_exit_for_enter(id); + links.push((id, name, exit_node)); + } + } + + for (enter_id, name, exit_id) in links { + if let Some(def_entry) = self.graph.definition(name) { + self.graph.connect(enter_id, def_entry); + // Connect Enter → Exit so Exit node (with Capture effect) is traversed + if let Some(exit) = exit_id { + self.graph.connect(enter_id, exit); + } + } + } + } + + fn find_exit_for_enter(&self, enter_id: NodeId) -> Option { + let enter_node = self.graph.node(enter_id); + let RefMarker::Enter { ref_id } = enter_node.ref_marker else { + return None; + }; + + for (id, node) in self.graph.iter() { + if let RefMarker::Exit { ref_id: exit_id } = &node.ref_marker + && *exit_id == ref_id + { + return Some(id); + } + } + None + } + + fn construct_expr(&mut self, expr: &Expr, ctx: NavContext) -> Fragment { + match expr { + Expr::NamedNode(node) => self.construct_named_node(node, ctx), + Expr::AnonymousNode(node) => self.construct_anonymous_node(node, ctx), + Expr::Ref(r) => self.construct_ref(r, ctx), + Expr::AltExpr(alt) => self.construct_alt(alt, ctx), + Expr::SeqExpr(seq) => self.construct_seq(seq, ctx), + Expr::CapturedExpr(cap) => self.construct_capture(cap, ctx), + Expr::QuantifiedExpr(quant) => self.construct_quantifier(quant, ctx), + Expr::FieldExpr(field) => self.construct_field(field, ctx), + } + } + + fn construct_named_node(&mut self, node: &NamedNode, ctx: NavContext) -> Fragment { + let matcher = self.build_named_matcher(node); + let nav = ctx.to_nav(); + let node_id = self.graph.add_matcher(matcher); + self.graph.node_mut(node_id).set_nav(nav); + + let items: Vec<_> = node.items().collect(); + if items.is_empty() { + return Fragment::single(node_id); + } + + let (child_fragments, exit_ctx) = self.construct_item_sequence(&items, true); + if child_fragments.is_empty() { + return Fragment::single(node_id); + } + + let inner = self.graph.sequence(&child_fragments); + self.graph.connect(node_id, inner.entry); + + let exit_id = self.graph.add_epsilon(); + self.graph.node_mut(exit_id).set_nav(exit_ctx.to_up_nav(1)); + self.graph.connect(inner.exit, exit_id); + + Fragment::new(node_id, exit_id) + } + + fn construct_item_sequence( + &mut self, + items: &[SeqItem], + is_children: bool, + ) -> (Vec, ExitContext) { + let mut fragments = Vec::new(); + let mut pending_anchor = false; + let mut last_was_anonymous = false; + let mut is_first = true; + + for item in items { + match item { + SeqItem::Anchor(_) => { + pending_anchor = true; + } + SeqItem::Expr(expr) => { + let ctx = if is_first { + is_first = false; + if is_children { + NavContext::FirstChild { + anchored: pending_anchor, + prev_anonymous: last_was_anonymous, + } + } else { + NavContext::Sibling { + anchored: pending_anchor, + prev_anonymous: last_was_anonymous, + } + } + } else { + NavContext::Sibling { + anchored: pending_anchor, + prev_anonymous: last_was_anonymous, + } + }; + + last_was_anonymous = is_anonymous_expr(expr); + let frag = self.construct_expr(expr, ctx); + fragments.push(frag); + pending_anchor = false; + } + } + } + + let exit_ctx = ExitContext { + has_trailing_anchor: pending_anchor, + last_was_anonymous, + }; + + (fragments, exit_ctx) + } + + fn build_named_matcher(&self, node: &NamedNode) -> BuildMatcher<'a> { + let kind = node + .node_type() + .map(|t| token_src(&t, self.source)) + .unwrap_or("_"); + + let negated_fields: Vec<&'a str> = node + .as_cst() + .children() + .filter_map(NegatedField::cast) + .filter_map(|nf| nf.name()) + .map(|t| token_src(&t, self.source)) + .collect(); + + let field = self.find_field_constraint(node.as_cst()); + + if node.is_any() { + BuildMatcher::Wildcard { field } + } else { + BuildMatcher::Node { + kind, + field, + negated_fields, + } + } + } + + fn construct_anonymous_node(&mut self, node: &AnonymousNode, ctx: NavContext) -> Fragment { + let field = self.find_field_constraint(node.as_cst()); + let nav = ctx.to_nav(); + + let matcher = if node.is_any() { + BuildMatcher::Wildcard { field } + } else { + let literal = node + .value() + .map(|t| token_src(&t, self.source)) + .unwrap_or(""); + BuildMatcher::Anonymous { literal, field } + }; + + let node_id = self.graph.add_matcher(matcher); + self.graph.node_mut(node_id).set_nav(nav); + Fragment::single(node_id) + } + + fn construct_ref(&mut self, r: &Ref, ctx: NavContext) -> Fragment { + let Some(name_token) = r.name() else { + return self.graph.epsilon_fragment(); + }; + + let ref_id = self.next_ref_id; + self.next_ref_id += 1; + + let enter_id = self.graph.add_epsilon(); + let nav = ctx.to_nav(); + self.graph.node_mut(enter_id).set_nav(nav); + self.graph + .node_mut(enter_id) + .set_ref_marker(RefMarker::enter(ref_id)); + + let exit_id = self.graph.add_epsilon(); + self.graph + .node_mut(exit_id) + .set_ref_marker(RefMarker::exit(ref_id)); + + let name = token_src(&name_token, self.source); + self.graph.node_mut(enter_id).ref_name = Some(name); + + Fragment::new(enter_id, exit_id) + } + + fn construct_alt(&mut self, alt: &AltExpr, ctx: NavContext) -> Fragment { + match alt.kind() { + AltKind::Tagged => self.construct_tagged_alt(alt, ctx), + AltKind::Untagged | AltKind::Mixed => self.construct_untagged_alt(alt, ctx), + } + } + + fn construct_tagged_alt(&mut self, alt: &AltExpr, ctx: NavContext) -> Fragment { + let branches: Vec<_> = alt.branches().collect(); + if branches.is_empty() { + return self.graph.epsilon_fragment(); + } + + let branch_id = self.graph.add_epsilon(); + self.graph.node_mut(branch_id).set_nav(ctx.to_nav()); + + let exit_id = self.graph.add_epsilon(); + + for branch in &branches { + let frag = self.construct_tagged_branch(branch); + self.graph.connect(branch_id, frag.entry); + self.graph.connect(frag.exit, exit_id); + } + + Fragment::new(branch_id, exit_id) + } + + fn construct_tagged_branch(&mut self, branch: &Branch) -> Fragment { + let Some(label_token) = branch.label() else { + return branch + .body() + .map(|b| self.construct_expr(&b, NavContext::Root)) + .unwrap_or_else(|| self.graph.epsilon_fragment()); + }; + let Some(body) = branch.body() else { + return self.graph.epsilon_fragment(); + }; + + let label = token_src(&label_token, self.source); + + let start_id = self.graph.add_epsilon(); + self.graph + .node_mut(start_id) + .add_effect(BuildEffect::StartVariant(label)); + + let body_frag = self.construct_expr(&body, NavContext::Root); + + // Count Field effects to determine object wrapping. + // Note: Single-capture flattening (ADR-0007) is handled in type inference, + // not here, because we don't know if the alternation is captured yet. + // Uncaptured inline tagged alternations need Field effects preserved. + let field_count = self.count_field_effects(body_frag.entry); + + if field_count > 1 { + // Multiple captures: wrap with StartObject/EndObject + // This is NOT the alternation capture object - it's the variant's scope + self.graph + .node_mut(start_id) + .add_effect(BuildEffect::StartObject { + for_alternation: false, + }); + } + + let end_id = self.graph.add_epsilon(); + if field_count > 1 { + self.graph + .node_mut(end_id) + .add_effect(BuildEffect::EndObject); + } + self.graph + .node_mut(end_id) + .add_effect(BuildEffect::EndVariant); + + self.graph.connect(start_id, body_frag.entry); + self.graph.connect(body_frag.exit, end_id); + + Fragment::new(start_id, end_id) + } + + fn construct_untagged_alt(&mut self, alt: &AltExpr, ctx: NavContext) -> Fragment { + let branches: Vec<_> = alt.branches().filter_map(|b| b.body()).collect(); + + if branches.is_empty() { + return self.graph.epsilon_fragment(); + } + + let branch_id = self.graph.add_epsilon(); + self.graph.node_mut(branch_id).set_nav(ctx.to_nav()); + + let exit_id = self.graph.add_epsilon(); + + for body in &branches { + let frag = self.construct_expr(body, NavContext::Root); + self.graph.connect(branch_id, frag.entry); + self.graph.connect(frag.exit, exit_id); + } + + Fragment::new(branch_id, exit_id) + } + + fn construct_seq(&mut self, seq: &SeqExpr, ctx: NavContext) -> Fragment { + let items: Vec<_> = seq.items().collect(); + + // Uncaptured sequences don't create object scope - they just group items. + // Captures propagate to parent scope. Object scope is created by: + // - Captured sequences ({...} @name) via construct_capture + // - QIS quantifiers that wrap loop body with StartObject/EndObject + + let start_id = self.graph.add_epsilon(); + self.graph.node_mut(start_id).set_nav(ctx.to_nav()); + + let (child_fragments, _exit_ctx) = self.construct_item_sequence(&items, false); + let inner = self.graph.sequence(&child_fragments); + + self.graph.connect(start_id, inner.entry); + + Fragment::new(start_id, inner.exit) + } + + fn construct_capture(&mut self, cap: &CapturedExpr, ctx: NavContext) -> Fragment { + let Some(inner_expr) = cap.inner() else { + return self.graph.epsilon_fragment(); + }; + + let inner_frag = self.construct_expr(&inner_expr, ctx); + + let capture_token = cap.name(); + let capture_name = capture_token.as_ref().map(|t| token_src(t, self.source)); + + let has_to_string = cap + .type_annotation() + .and_then(|t| t.name()) + .map(|n| n.text() == "string") + .unwrap_or(false); + + // Captured sequence/alternation creates object scope for nested fields. + // Tagged alternations use variants instead (handled in construct_tagged_alt). + // Quantifiers only need wrapper if QIS (2+ captures) - otherwise the array is the direct value. + let needs_object_wrapper = match &inner_expr { + Expr::SeqExpr(_) | Expr::AltExpr(_) => true, + Expr::QuantifiedExpr(q) => self.qis_triggers.contains_key(q), + _ => false, + }; + + let matchers = self.find_all_matchers(inner_frag.entry); + for matcher_id in matchers { + self.graph + .node_mut(matcher_id) + .add_effect(BuildEffect::CaptureNode); + + if has_to_string { + self.graph + .node_mut(matcher_id) + .add_effect(BuildEffect::ToString); + } + } + + if let Some(name) = capture_name { + let span = capture_token + .as_ref() + .map(|t| t.text_range()) + .unwrap_or_default(); + + // Check if we're capturing an alternation (for enum vs struct distinction) + let is_alternation_capture = matches!(&inner_expr, Expr::AltExpr(_)); + + let (entry, exit) = if needs_object_wrapper { + // Wrap with StartObject/EndObject for composite captures + let start_id = self.graph.add_epsilon(); + self.graph + .node_mut(start_id) + .add_effect(BuildEffect::StartObject { + for_alternation: is_alternation_capture, + }); + self.graph.connect(start_id, inner_frag.entry); + + let end_id = self.graph.add_epsilon(); + self.graph + .node_mut(end_id) + .add_effect(BuildEffect::EndObject); + self.graph.connect(inner_frag.exit, end_id); + + (start_id, end_id) + } else { + (inner_frag.entry, inner_frag.exit) + }; + + let field_id = self.graph.add_epsilon(); + self.graph + .node_mut(field_id) + .add_effect(BuildEffect::Field { name, span }); + self.graph.connect(exit, field_id); + Fragment::new(entry, field_id) + } else { + inner_frag + } + } + + fn construct_quantifier(&mut self, quant: &QuantifiedExpr, ctx: NavContext) -> Fragment { + let Some(inner_expr) = quant.inner() else { + return self.graph.epsilon_fragment(); + }; + let Some(op) = quant.operator() else { + return self.construct_expr(&inner_expr, ctx); + }; + + let f = self.construct_expr(&inner_expr, ctx); + let qis = self.qis_triggers.contains_key(quant); + + match (op.kind(), qis) { + (SyntaxKind::Star, false) => self.graph.zero_or_more_array(f), + (SyntaxKind::Star, true) => self.graph.zero_or_more_array_qis(f), + (SyntaxKind::StarQuestion, false) => self.graph.zero_or_more_array_lazy(f), + (SyntaxKind::StarQuestion, true) => self.graph.zero_or_more_array_qis_lazy(f), + (SyntaxKind::Plus, false) => self.graph.one_or_more_array(f), + (SyntaxKind::Plus, true) => self.graph.one_or_more_array_qis(f), + (SyntaxKind::PlusQuestion, false) => self.graph.one_or_more_array_lazy(f), + (SyntaxKind::PlusQuestion, true) => self.graph.one_or_more_array_qis_lazy(f), + (SyntaxKind::Question, false) => self.graph.optional(f), + (SyntaxKind::Question, true) => self.graph.optional_qis(f), + (SyntaxKind::QuestionQuestion, false) => self.graph.optional_lazy(f), + (SyntaxKind::QuestionQuestion, true) => self.graph.optional_qis_lazy(f), + _ => f, + } + } + + fn construct_field(&mut self, field: &FieldExpr, ctx: NavContext) -> Fragment { + let Some(value_expr) = field.value() else { + return self.graph.epsilon_fragment(); + }; + self.construct_expr(&value_expr, ctx) + } + + fn find_field_constraint(&self, node: &crate::parser::SyntaxNode) -> Option<&'a str> { + let parent = node.parent()?; + let field_expr = FieldExpr::cast(parent)?; + let name_token = field_expr.name()?; + Some(token_src(&name_token, self.source)) + } + + fn find_all_matchers(&self, start: NodeId) -> Vec { + let mut result = Vec::new(); + let mut visited = HashSet::new(); + self.collect_matchers(start, &mut result, &mut visited); + result + } + + fn collect_matchers( + &self, + node_id: NodeId, + result: &mut Vec, + visited: &mut HashSet, + ) { + if !visited.insert(node_id) { + return; + } + + let node = self.graph.node(node_id); + + // References are opaque to captures: don't traverse into definition body. + // Capture should happen at Exit (after reference executes, cursor at matched node). + if let RefMarker::Enter { ref_id } = node.ref_marker { + for (id, n) in self.graph.iter() { + if let RefMarker::Exit { ref_id: exit_id } = n.ref_marker + && exit_id == ref_id + { + result.push(id); + return; + } + } + return; + } + + if !node.is_epsilon() { + result.push(node_id); + return; + } + + for &succ in &node.successors { + self.collect_matchers(succ, result, visited); + } + } + /// Count Field effects reachable from a node (for variant flattening). + fn count_field_effects(&self, start: NodeId) -> usize { + self.nodes_with_field_effects(start) + .iter() + .flat_map(|&id| &self.graph.node(id).effects) + .filter(|e| matches!(e, BuildEffect::Field { .. })) + .count() + } + + fn nodes_with_field_effects(&self, start: NodeId) -> Vec { + let mut result = Vec::new(); + let mut visited = HashSet::new(); + let mut stack = vec![start]; + + while let Some(node_id) = stack.pop() { + if !visited.insert(node_id) { + continue; + } + let node = self.graph.node(node_id); + if node + .effects + .iter() + .any(|e| matches!(e, BuildEffect::Field { .. })) + { + result.push(node_id); + } + stack.extend(&node.successors); + } + result + } +} + +fn is_anonymous_expr(expr: &Expr) -> bool { + matches!(expr, Expr::AnonymousNode(n) if !n.is_any()) +} diff --git a/crates/plotnik-lib/src/query/graph_build_tests.rs b/crates/plotnik-lib/src/query/graph_build_tests.rs new file mode 100644 index 00000000..82345f76 --- /dev/null +++ b/crates/plotnik-lib/src/query/graph_build_tests.rs @@ -0,0 +1,267 @@ +//! Tests for graph construction integrated with Query pipeline. + +use indoc::indoc; + +use crate::query::Query; + +fn snapshot(input: &str) -> String { + let query = Query::try_from(input).unwrap().build_graph(); + query.graph().dump() +} + +fn snapshot_optimized(input: &str) -> String { + let query = Query::try_from(input).unwrap().build_graph(); + query.graph().dump_live(query.dead_nodes()) +} + +#[test] +fn simple_named_node() { + insta::assert_snapshot!(snapshot("Q = (identifier)"), @r" + Q = N0 + + N0: (identifier) → ∅ + "); +} + +#[test] +fn named_node_with_capture() { + insta::assert_snapshot!(snapshot("Q = (identifier) @id"), @r" + Q = N0 + + N0: (identifier) [Capture] → N1 + N1: ε [Field(id)] → ∅ + "); +} + +#[test] +fn named_node_with_children() { + insta::assert_snapshot!(snapshot("Q = (function_definition (identifier))"), @r" + Q = N0 + + N0: (function_definition) → N1 + N1: [Down] (identifier) → N2 + N2: [Up(1)] ε → ∅ + "); +} + +#[test] +fn sequence() { + insta::assert_snapshot!(snapshot("Q = { (a) (b) }"), @r" + Q = N1 + + N0: ε → N1 + N1: [Next] (a) → N2 + N2: [Next] (b) → ∅ + "); +} + +#[test] +fn sequence_with_captures() { + insta::assert_snapshot!(snapshot("Q = { (a) @x (b) @y }"), @r" + Q = N1 + + N0: ε → N1 + N1: [Next] (a) [Capture] → N2 + N2: ε [Field(x)] → N3 + N3: [Next] (b) [Capture] → N4 + N4: ε [Field(y)] → ∅ + "); +} + +#[test] +fn alternation_untagged() { + insta::assert_snapshot!(snapshot("Q = [ (a) (b) ]"), @r" + Q = N0 + + N0: ε → N2, N3 + N1: ε → ∅ + N2: (a) → N1 + N3: (b) → N1 + "); +} + +#[test] +fn alternation_tagged() { + insta::assert_snapshot!(snapshot("Q = [ A: (a) @x B: (b) @y ]"), @r" + Q = N0 + + N0: ε → N3, N7 + N1: ε → ∅ + N2: ε [Variant(A)] → N3 + N3: (a) [Variant(A)] [Capture] → N5 + N4: ε [Field(x)] → N5 + N5: ε [Field(x)] [EndVariant] → N1 + N6: ε [Variant(B)] → N7 + N7: (b) [Variant(B)] [Capture] → N9 + N8: ε [Field(y)] → N9 + N9: ε [Field(y)] [EndVariant] → N1 + "); +} + +#[test] +fn quantifier_star() { + insta::assert_snapshot!(snapshot("Q = (identifier)*"), @r" + Q = N1 + + N0: (identifier) → N3 + N1: ε [StartArray] → N4 + N2: ε [EndArray] → ∅ + N3: ε [Push] → N4 + N4: ε → N0, N2 + "); +} + +#[test] +fn quantifier_plus() { + insta::assert_snapshot!(snapshot("Q = (identifier)+"), @r" + Q = N1 + + N0: (identifier) → N4 + N1: ε [StartArray] → N0 + N2: ε [EndArray] → ∅ + N3: ε [Push] → N4 + N4: ε [Push] → N0, N2 + "); +} + +#[test] +fn quantifier_optional() { + insta::assert_snapshot!(snapshot("Q = (identifier)?"), @r" + Q = N1 + + N0: (identifier) → N2 + N1: ε → N0, N3 + N2: ε → ∅ + N3: ε [Clear] → N2 + "); +} + +#[test] +fn reference() { + let input = indoc! {r#" + A = (identifier) + B = (A) + "#}; + insta::assert_snapshot!(snapshot(input), @r" + A = N0 + B = N1 + + N0: (identifier) → ∅ + N1: ε +Enter(0, A) → N0, N2 + N2: ε +Exit(0) → ∅ + "); +} + +#[test] +fn anonymous_node() { + insta::assert_snapshot!(snapshot(r#"Q = "hello""#), @r#" + Q = N0 + + N0: "hello" → ∅ + "#); +} + +#[test] +fn wildcard() { + insta::assert_snapshot!(snapshot("Q = (_)"), @r" + Q = N0 + + N0: _ → ∅ + "); +} + +#[test] +fn field_constraint() { + insta::assert_snapshot!(snapshot("Q = (function name: (identifier))"), @r" + Q = N0 + + N0: (function) → N1 + N1: [Down] (identifier) @name → N2 + N2: [Up(1)] ε → ∅ + "); +} + +#[test] +fn to_string_annotation() { + insta::assert_snapshot!(snapshot("Q = (identifier) @name ::string"), @r" + Q = N0 + + N0: (identifier) [Capture] [ToString] → N1 + N1: ε [Field(name)] → ∅ + "); +} + +#[test] +fn anchor_first_child() { + insta::assert_snapshot!(snapshot("Q = (parent . (child))"), @r" + Q = N0 + + N0: (parent) → N1 + N1: [Down.] (child) → N2 + N2: [Up(1)] ε → ∅ + "); +} + +#[test] +fn anchor_sibling() { + insta::assert_snapshot!(snapshot("Q = (parent (a) . (b))"), @r" + Q = N0 + + N0: (parent) → N1 + N1: [Down] (a) → N2 + N2: [Next.] (b) → N3 + N3: [Up(1)] ε → ∅ + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Optimization tests +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn optimized_simple() { + insta::assert_snapshot!(snapshot_optimized("Q = (identifier) @id"), @r" + Q = N0 + + N0: (identifier) [Capture] → N1 + N1: ε [Field(id)] → ∅ + "); +} + +#[test] +fn optimized_sequence() { + insta::assert_snapshot!(snapshot_optimized("Q = { (a) @x (b) @y }"), @r" + Q = N1 + + N1: [Next] (a) [Capture] → N2 + N2: ε [Field(x)] → N3 + N3: [Next] (b) [Capture] → N4 + N4: ε [Field(y)] → ∅ + "); +} + +#[test] +fn symbol_table_reuse() { + let input = indoc! {r#" + Foo = (identifier) + Bar = (Foo) + Baz = (Bar) + "#}; + let query = Query::try_from(input).unwrap().build_graph(); + + assert!(query.graph().definition("Foo").is_some()); + assert!(query.graph().definition("Bar").is_some()); + assert!(query.graph().definition("Baz").is_some()); + + insta::assert_snapshot!(query.graph().dump(), @r" + Foo = N0 + Bar = N1 + Baz = N3 + + N0: (identifier) → ∅ + N1: ε +Enter(0, Foo) → N0, N2 + N2: ε +Exit(0) → ∅ + N3: ε +Enter(1, Bar) → N1, N4 + N4: ε +Exit(1) → ∅ + "); +} diff --git a/crates/plotnik-lib/src/query/graph_dump.rs b/crates/plotnik-lib/src/query/graph_dump.rs new file mode 100644 index 00000000..0fc3f19f --- /dev/null +++ b/crates/plotnik-lib/src/query/graph_dump.rs @@ -0,0 +1,192 @@ +//! Dump helpers for graph inspection and testing. + +use std::collections::HashSet; +use std::fmt::Write; + +use crate::ir::{Nav, NavKind}; + +use super::graph::{BuildEffect, BuildGraph, BuildMatcher, NodeId, RefMarker}; + +/// Printer for `BuildGraph` with configurable output options. +pub struct GraphPrinter<'a, 'src> { + graph: &'a BuildGraph<'src>, + dead_nodes: Option<&'a HashSet>, + show_dead: bool, +} + +impl<'a, 'src> GraphPrinter<'a, 'src> { + pub fn new(graph: &'a BuildGraph<'src>) -> Self { + Self { + graph, + dead_nodes: None, + show_dead: false, + } + } + + pub fn with_dead_nodes(mut self, dead: &'a HashSet) -> Self { + self.dead_nodes = Some(dead); + self + } + + pub fn show_dead(mut self, show: bool) -> Self { + self.show_dead = show; + self + } + + pub fn dump(&self) -> String { + let mut out = String::new(); + self.format(&mut out).expect("String write never fails"); + out + } + + fn format(&self, w: &mut String) -> std::fmt::Result { + for (name, entry) in self.graph.definitions() { + writeln!(w, "{} = N{}", name, entry)?; + } + if self.graph.definitions().next().is_some() { + writeln!(w)?; + } + + for (id, node) in self.graph.iter() { + let is_dead = self.dead_nodes.map(|d| d.contains(&id)).unwrap_or(false); + + if is_dead && !self.show_dead { + continue; + } + + if is_dead { + write!(w, "N{}: ✗ ", id)?; + } else { + write!(w, "N{}: ", id)?; + } + + if !node.nav.is_stay() { + write!(w, "[{}] ", format_nav(&node.nav))?; + } + + self.format_matcher(w, &node.matcher)?; + + match &node.ref_marker { + RefMarker::None => {} + RefMarker::Enter { ref_id } => { + let name = node.ref_name.unwrap_or("?"); + write!(w, " +Enter({}, {})", ref_id, name)?; + } + RefMarker::Exit { ref_id } => { + write!(w, " +Exit({})", ref_id)?; + } + } + + for effect in &node.effects { + write!(w, " [{}]", format_effect(effect))?; + } + + self.format_successors(w, &node.successors)?; + + writeln!(w)?; + } + + Ok(()) + } + + fn format_matcher(&self, w: &mut String, matcher: &BuildMatcher<'src>) -> std::fmt::Result { + match matcher { + BuildMatcher::Epsilon => write!(w, "ε"), + BuildMatcher::Node { + kind, + field, + negated_fields, + } => { + write!(w, "({})", kind)?; + if let Some(f) = field { + write!(w, " @{}", f)?; + } + for neg in negated_fields { + write!(w, " !{}", neg)?; + } + Ok(()) + } + BuildMatcher::Anonymous { literal, field } => { + write!(w, "\"{}\"", literal)?; + if let Some(f) = field { + write!(w, " @{}", f)?; + } + Ok(()) + } + BuildMatcher::Wildcard { field } => { + write!(w, "_")?; + if let Some(f) = field { + write!(w, " @{}", f)?; + } + Ok(()) + } + } + } + + fn format_successors(&self, w: &mut String, successors: &[NodeId]) -> std::fmt::Result { + let live_succs: Vec<_> = successors + .iter() + .filter(|s| self.dead_nodes.map(|d| !d.contains(s)).unwrap_or(true)) + .collect(); + + if live_succs.is_empty() { + write!(w, " → ∅") + } else { + write!(w, " → ")?; + let succs: Vec<_> = live_succs.iter().map(|s| format!("N{}", s)).collect(); + write!(w, "{}", succs.join(", ")) + } + } +} + +fn format_nav(nav: &Nav) -> String { + match nav.kind { + NavKind::Stay => "Stay".to_string(), + NavKind::Next => "Next".to_string(), + NavKind::NextSkipTrivia => "Next.".to_string(), + NavKind::NextExact => "Next!".to_string(), + NavKind::Down => "Down".to_string(), + NavKind::DownSkipTrivia => "Down.".to_string(), + NavKind::DownExact => "Down!".to_string(), + NavKind::Up => format!("Up({})", nav.level), + NavKind::UpSkipTrivia => format!("Up.({})", nav.level), + NavKind::UpExact => format!("Up!({})", nav.level), + } +} + +fn format_effect(effect: &BuildEffect) -> String { + match effect { + BuildEffect::CaptureNode => "Capture".to_string(), + BuildEffect::ClearCurrent => "Clear".to_string(), + BuildEffect::StartArray { .. } => "StartArray".to_string(), + BuildEffect::PushElement => "Push".to_string(), + BuildEffect::EndArray => "EndArray".to_string(), + BuildEffect::StartObject { .. } => "StartObj".to_string(), + BuildEffect::EndObject => "EndObj".to_string(), + BuildEffect::Field { name, .. } => format!("Field({})", name), + BuildEffect::StartVariant(v) => format!("Variant({})", v), + BuildEffect::EndVariant => "EndVariant".to_string(), + BuildEffect::ToString => "ToString".to_string(), + } +} + +impl<'src> BuildGraph<'src> { + pub fn printer(&self) -> GraphPrinter<'_, 'src> { + GraphPrinter::new(self) + } + + pub fn dump(&self) -> String { + self.printer().dump() + } + + pub fn dump_with_dead(&self, dead_nodes: &HashSet) -> String { + self.printer() + .with_dead_nodes(dead_nodes) + .show_dead(true) + .dump() + } + + pub fn dump_live(&self, dead_nodes: &HashSet) -> String { + self.printer().with_dead_nodes(dead_nodes).dump() + } +} diff --git a/crates/plotnik-lib/src/query/graph_master_test.rs b/crates/plotnik-lib/src/query/graph_master_test.rs new file mode 100644 index 00000000..1d77456a --- /dev/null +++ b/crates/plotnik-lib/src/query/graph_master_test.rs @@ -0,0 +1,1259 @@ +//! Golden master test for graph construction and type inference. +//! +//! This test exercises the full spectrum of ADR-specified behaviors: +//! - ADR-0004: Binary format concepts (transitions, effects, strings, types) +//! - ADR-0005: Transition graph (matchers, nav, ref markers, quantifiers) +//! - ADR-0006: Query execution (effect stream, materialization) +//! - ADR-0007: Type metadata (TypeKind, synthetic naming, flattening) +//! - ADR-0008: Tree navigation (Nav kinds, anchor lowering) +//! - ADR-0009: Type system (cardinality, scopes, alternations, QIS, unification) + +use indoc::indoc; + +use crate::query::Query; + +fn golden_master(source: &str) -> String { + let query = Query::try_from(source) + .expect("parse should succeed") + .build_graph(); + + let mut out = String::new(); + + out.push_str( + "═══════════════════════════════════════════════════════════════════════════════\n", + ); + out.push_str(" TRANSITION GRAPH\n"); + out.push_str( + "═══════════════════════════════════════════════════════════════════════════════\n\n", + ); + out.push_str(&query.graph().dump_live(query.dead_nodes())); + + out.push_str( + "\n═══════════════════════════════════════════════════════════════════════════════\n", + ); + out.push_str(" TYPE INFERENCE\n"); + out.push_str( + "═══════════════════════════════════════════════════════════════════════════════\n\n", + ); + out.push_str(&query.type_info().dump()); + + out +} + +/// Comprehensive test covering all major ADR features. +/// +/// Query structure: +/// 1. Basic captures with ::string annotation (ADR-0007, ADR-0009) +/// 2. Field constraints and negated fields (ADR-0005) +/// 3. Anchors - first child, last child, siblings (ADR-0008) +/// 4. Quantifiers - *, +, ? with captures (ADR-0005, ADR-0009) +/// 5. QIS - multiple captures in quantified expr (ADR-0009) +/// 6. Tagged alternations - enum generation (ADR-0007, ADR-0009) +/// 7. Untagged alternations - struct merge (ADR-0009) +/// 8. Captured sequences - nested scopes (ADR-0009) +/// 9. Definition references - Enter/Exit (ADR-0005, ADR-0006) +/// 10. Cardinality propagation and joins (ADR-0009) +/// 11. Single-capture variant flattening (ADR-0007, ADR-0009) +/// 12. Deep nesting with multi-level Up (ADR-0008) +/// 13. Wildcards and string literals (ADR-0005) +#[test] +fn golden_master_comprehensive() { + let source = indoc! {r#" + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 1: Basic captures and type annotations + // ═══════════════════════════════════════════════════════════════════════════ + + // Simple node capture → Node type + SimpleCapture = (identifier) @name + + // String annotation → String type + StringCapture = (identifier) @name ::string + + // Multiple flat captures → Struct with multiple fields + MultiCapture = (function + name: (identifier) @fn_name ::string + body: (block) @fn_body + ) + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 2: Navigation and anchors (ADR-0008) + // ═══════════════════════════════════════════════════════════════════════════ + + // First child anchor → DownSkipTrivia + AnchorFirst = (parent . (first_child) @first) + + // Last child anchor → UpSkipTrivia + AnchorLast = (parent (last_child) @last .) + + // Adjacent siblings → NextSkipTrivia + AnchorSibling = (parent (a) @left . (b) @right) + + // Deep nesting with multi-level Up + DeepNest = (a (b (c (d) @deep))) + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 3: Quantifiers (ADR-0005, ADR-0009) + // ═══════════════════════════════════════════════════════════════════════════ + + // Star quantifier → ArrayStar + StarQuant = (container (item)* @items) + + // Plus quantifier → ArrayPlus + PlusQuant = (container (item)+ @items) + + // Optional quantifier → Optional + OptQuant = (container (item)? @maybe_item) + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 4: QIS - Quantifier-Induced Scope (ADR-0009) + // ═══════════════════════════════════════════════════════════════════════════ + + // Two captures in quantified node → QIS triggers, creates element struct + QisNode = (function + name: (identifier) @name + body: (block) @body + )* + + // Two captures in quantified sequence → QIS triggers + QisSequence = { (key) @key (value) @value }* + + // Single capture → NO QIS, standard cardinality propagation + NoQis = { (item) @item }* + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 5: Tagged alternations (ADR-0007, ADR-0009) + // ═══════════════════════════════════════════════════════════════════════════ + + // Tagged at definition root → Definition becomes Enum + // Single capture per variant → flattened payload + TaggedRoot = [ + Ok: (success) @val + Err: (error) @msg ::string + ] + + // Tagged alternation captured → creates nested Enum + TaggedCaptured = (wrapper [ + Left: (left_node) @l + Right: (right_node) @r + ] @choice) + + // Tagged with multi-capture variant → NOT flattened, creates struct + TaggedMulti = [ + Simple: (node) @val + Complex: (pair (key) @k (value) @v) + ] + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 6: Untagged alternations (ADR-0009) + // ═══════════════════════════════════════════════════════════════════════════ + + // Symmetric captures → required field + UntaggedSymmetric = [ (a) @val (b) @val ] + + // Asymmetric captures → both become Optional + UntaggedAsymmetric = [ (a) @x (b) @y ] + + // Captured untagged → creates struct scope + UntaggedCaptured = [ (a) @x (b) @y ] @data + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 7: Captured sequences and nested scopes (ADR-0009) + // ═══════════════════════════════════════════════════════════════════════════ + + // Captured sequence → creates nested struct + CapturedSeq = (outer { (inner) @x (inner2) @y } @nested) + + // Uncaptured sequence → captures propagate to parent + UncapturedSeq = (outer { (inner) @x (inner2) @y }) + + // Deeply nested scopes + NestedScopes = { { (a) @a } @inner1 { (b) @b } @inner2 } @outer + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 8: Definition references (ADR-0005, ADR-0006) + // ═══════════════════════════════════════════════════════════════════════════ + + // Base definition + Identifier = (identifier) @id + + // Reference to definition → Enter/Exit markers + RefSimple = (Identifier) + + // Captured reference → captures the reference result + RefCaptured = (Identifier) @captured_id + + // Chained references + RefChain = (RefSimple) + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 9: Cardinality combinations (ADR-0009) + // ═══════════════════════════════════════════════════════════════════════════ + + // Cardinality in alternation branches + // Branch 1: @item cardinality 1, Branch 2: @item cardinality + + // Join produces + + CardinalityJoin = [ (single) @item (multi (x)+ @item) ] + + // Nested quantifiers + NestedQuant = ((item)* @inner)+ @outer + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 10: Mixed patterns (comprehensive) + // ═══════════════════════════════════════════════════════════════════════════ + + // Everything combined: field constraints, anchors, quantifiers, alternations + Complex = (module + name: (identifier) @mod_name ::string + . (import)* @imports + body: (block { + [ + Func: (function + name: (identifier) @fn_name ::string + params: (parameters { (param) @p }* @params) + body: (block) @fn_body + ) + Class: (class + name: (identifier) @cls_name ::string + body: (class_body) @cls_body + ) + ] + }* @items) . + ) + + // ═══════════════════════════════════════════════════════════════════════════ + // SECTION 11: Edge cases + // ═══════════════════════════════════════════════════════════════════════════ + + // Wildcard capture + WildcardCapture = _ @any + + // String literal (anonymous node) + StringLiteral = "+" @op + + // No captures → Void type + NoCaptures = (identifier) + + // Empty alternation branch (unit variant) + EmptyBranch = [ + Some: (value) @val + None: (none_marker) + ] + "#}; + + insta::assert_snapshot!(golden_master(source), @r#" + ═══════════════════════════════════════════════════════════════════════════════ + TRANSITION GRAPH + ═══════════════════════════════════════════════════════════════════════════════ + + SimpleCapture = N0 + StringCapture = N2 + MultiCapture = N4 + AnchorFirst = N10 + AnchorLast = N14 + AnchorSibling = N18 + DeepNest = N24 + StarQuant = N32 + PlusQuant = N40 + OptQuant = N48 + QisNode = N61 + QisSequence = N72 + NoQis = N81 + TaggedRoot = N85 + TaggedCaptured = N95 + TaggedMulti = N110 + UntaggedSymmetric = N124 + UntaggedAsymmetric = N130 + UntaggedCaptured = N136 + CapturedSeq = N145 + UncapturedSeq = N155 + NestedScopes = N166 + Identifier = N178 + RefSimple = N180 + RefCaptured = N182 + RefChain = N185 + CardinalityJoin = N187 + NestedQuant = N207 + Complex = N212 + WildcardCapture = N262 + StringLiteral = N264 + NoCaptures = N266 + EmptyBranch = N267 + + N0: (identifier) [Capture] → N1 + N1: ε [Field(name)] → ∅ + N2: (identifier) [Capture] [ToString] → N3 + N3: ε [Field(name)] → ∅ + N4: (function) → N5 + N5: [Down] (identifier) @name [Capture] [ToString] → N6 + N6: ε [Field(fn_name)] → N7 + N7: [Next] (block) @body [Capture] → N8 + N8: ε [Field(fn_body)] → N9 + N9: [Up(1)] ε → ∅ + N10: (parent) → N11 + N11: [Down.] (first_child) [Capture] → N12 + N12: ε [Field(first)] → N13 + N13: [Up(1)] ε → ∅ + N14: (parent) → N15 + N15: [Down] (last_child) [Capture] → N16 + N16: ε [Field(last)] → N17 + N17: [Up.(1)] ε → ∅ + N18: (parent) → N19 + N19: [Down] (a) [Capture] → N20 + N20: ε [Field(left)] → N21 + N21: [Next.] (b) [Capture] → N22 + N22: ε [Field(right)] → N23 + N23: [Up(1)] ε → ∅ + N24: (a) → N25 + N25: [Down] (b) → N26 + N26: [Down] (c) → N27 + N27: [Down] (d) [Capture] → N28 + N28: ε [Field(deep)] → N31 + N31: [Up(3)] ε → ∅ + N32: (container) → N34 + N33: [Down] (item) [Capture] → N36 + N34: ε [StartArray] → N37 + N36: ε [Push] → N37 + N37: ε → N33, N38 + N38: ε [EndArray] [Field(items)] → N39 + N39: [Up(1)] ε → ∅ + N40: (container) → N42 + N41: [Down] (item) [Capture] → N45 + N42: ε [StartArray] → N41 + N45: ε [Push] → N41, N46 + N46: ε [EndArray] [Field(items)] → N47 + N47: [Up(1)] ε → ∅ + N48: (container) → N50 + N49: [Down] (item) [Capture] → N53 + N50: ε → N49, N52 + N52: ε [Clear] → N53 + N53: ε [Field(maybe_item)] → N54 + N54: [Up(1)] ε → ∅ + N55: (function) [StartObj] → N56 + N56: [Down] (identifier) @name [Capture] → N57 + N57: ε [Field(name)] → N58 + N58: [Next] (block) @body [Capture] → N59 + N59: ε [Field(body)] → N65 + N61: ε [StartArray] → N66 + N62: ε [EndArray] → ∅ + N65: [Up(1)] ε [EndObj] [Push] → N66 + N66: ε → N55, N62 + N67: ε [StartObj] → N68 + N68: [Next] (key) [Capture] → N69 + N69: ε [Field(key)] → N70 + N70: [Next] (value) [Capture] → N76 + N72: ε [StartArray] → N77 + N73: ε [EndArray] → ∅ + N76: ε [Field(value)] [EndObj] [Push] → N77 + N77: ε → N67, N73 + N79: [Next] (item) [Capture] → N83 + N81: ε [StartArray] → N84 + N82: ε [EndArray] → ∅ + N83: ε [Field(item)] [Push] → N84 + N84: ε → N79, N82 + N85: ε → N88, N92 + N86: ε → ∅ + N88: (success) [Variant(Ok)] [Capture] → N90 + N90: ε [Field(val)] [EndVariant] → N86 + N92: (error) [Variant(Err)] [Capture] [ToString] → N94 + N94: ε [Field(msg)] [EndVariant] → N86 + N95: (wrapper) → N106 + N96: [Down] ε → N99, N103 + N99: (left_node) [Variant(Left)] [Capture] [Capture] → N101 + N101: ε [Field(l)] [EndVariant] → N108 + N103: (right_node) [Variant(Right)] [Capture] [Capture] → N105 + N105: ε [Field(r)] [EndVariant] → N108 + N106: ε [StartObj] → N96 + N108: ε [EndObj] [Field(choice)] → N109 + N109: [Up(1)] ε → ∅ + N110: ε → N113, N117 + N111: ε → ∅ + N113: (node) [Variant(Simple)] [Capture] → N115 + N115: ε [Field(val)] [EndVariant] → N111 + N117: (pair) [Variant(Complex)] [StartObj] → N118 + N118: [Down] (key) [Capture] → N119 + N119: ε [Field(k)] → N120 + N120: [Next] (value) [Capture] → N121 + N121: ε [Field(v)] → N123 + N123: [Up(1)] ε [EndObj] [EndVariant] → N111 + N124: ε → N126, N128 + N125: ε → ∅ + N126: (a) [Capture] → N127 + N127: ε [Field(val)] → N125 + N128: (b) [Capture] → N129 + N129: ε [Field(val)] → N125 + N130: ε → N132, N134 + N131: ε → ∅ + N132: (a) [Capture] → N133 + N133: ε [Field(x)] → N131 + N134: (b) [Capture] → N135 + N135: ε [Field(y)] → N131 + N136: ε [StartObj] → N138, N140 + N138: (a) [Capture] [Capture] → N139 + N139: ε [Field(x)] → N144 + N140: (b) [Capture] [Capture] → N141 + N141: ε [Field(y)] → N144 + N144: ε [EndObj] [Field(data)] → ∅ + N145: (outer) → N151 + N146: [Down] ε → N147 + N147: [Next] (inner) [Capture] [Capture] → N148 + N148: ε [Field(x)] → N149 + N149: [Next] (inner2) [Capture] → N153 + N151: ε [StartObj] → N146 + N153: ε [Field(y)] [EndObj] [Field(nested)] → N154 + N154: [Up(1)] ε → ∅ + N155: (outer) → N156 + N156: [Down] ε → N157 + N157: [Next] (inner) [Capture] → N158 + N158: ε [Field(x)] → N159 + N159: [Next] (inner2) [Capture] → N160 + N160: ε [Field(y)] → N161 + N161: [Up(1)] ε → ∅ + N163: [Next] ε → N164 + N164: [Next] (a) [Capture] [Capture] [Capture] → N172 + N166: ε [StartObj] [StartObj] → N163 + N169: [Next] ε → N170 + N170: [Next] (b) [Capture] [Capture] → N177 + N172: ε [Field(a)] [EndObj] [Field(inner1)] [StartObj] → N169 + N177: ε [Field(b)] [EndObj] [Field(inner2)] [EndObj] [Field(outer)] → ∅ + N178: (identifier) [Capture] → N179 + N179: ε [Field(id)] → ∅ + N180: ε +Enter(0, Identifier) → N178, N181 + N181: ε +Exit(0) → ∅ + N182: ε +Enter(1, Identifier) → N178, N183 + N183: ε +Exit(1) [Capture] → N184 + N184: ε [Field(captured_id)] → ∅ + N185: ε +Enter(2, RefSimple) → N180, N186 + N186: ε +Exit(2) → ∅ + N187: ε → N189, N191 + N188: [Up(1)] ε → ∅ + N189: (single) [Capture] → N190 + N190: ε [Field(item)] → N188 + N191: (multi) → N193 + N192: [Down] (x) [Capture] → N196 + N193: ε [StartArray] → N192 + N196: ε [Push] → N192, N197 + N197: ε [EndArray] [Field(item)] → N188 + N199: (_) [Capture] → N201 + N200: [Down] (item) [Capture] → N203 + N201: ε [StartArray] → N204 + N203: ε [Push] → N204 + N204: ε → N200, N205 + N205: ε [EndArray] [Field(inner)] → N210 + N207: ε [StartArray] → N199 + N210: [Up(1)] ε [Push] → N199, N211 + N211: ε [EndArray] [Field(outer)] → ∅ + N212: (module) → N213 + N213: [Down] (identifier) @name [Capture] [ToString] → N216 + N215: [Next.] (import) [Capture] → N218 + N216: ε [Field(mod_name)] [StartArray] → N219 + N218: ε [Push] → N219 + N219: ε → N215, N220 + N220: ε [EndArray] [Field(imports)] → N221 + N221: [Next] (block) @body → N251 + N222: [Down] ε → N223 + N223: [Next] ε → N226, N244 + N226: (function) [Variant(Func)] [StartObj] [Capture] → N227 + N227: [Down] (identifier) @name [Capture] [ToString] → N228 + N228: ε [Field(fn_name)] → N229 + N229: [Next] (parameters) @params → N233 + N230: [Down] ε → N231 + N231: [Next] (param) [Capture] [Capture] → N235 + N233: ε [StartArray] → N236 + N235: ε [Field(p)] [Push] → N236 + N236: ε → N230, N237 + N237: ε [EndArray] [Field(params)] → N238 + N238: [Up(1)] ε → N239 + N239: [Next] (block) @body [Capture] → N240 + N240: ε [Field(fn_body)] → N242 + N242: [Up(1)] ε [EndObj] [EndVariant] → N255 + N244: (class) [Variant(Class)] [StartObj] [Capture] → N245 + N245: [Down] (identifier) @name [Capture] [ToString] → N246 + N246: ε [Field(cls_name)] → N247 + N247: [Next] (class_body) @body [Capture] → N248 + N248: ε [Field(cls_body)] → N250 + N250: [Up(1)] ε [EndObj] [EndVariant] → N255 + N251: ε [StartObj] [StartArray] → N256 + N253: ε [StartObj] → N222 + N255: ε [EndObj] [Push] → N256 + N256: ε → N253, N259 + N259: ε [EndArray] [EndObj] [Field(items)] → N260 + N260: [Up(1)] ε → N261 + N261: [Up.(1)] ε → ∅ + N262: _ [Capture] → N263 + N263: ε [Field(any)] → ∅ + N264: "+" [Capture] → N265 + N265: ε [Field(op)] → ∅ + N266: (identifier) → ∅ + N267: ε → N270, N274 + N268: ε → ∅ + N270: (value) [Variant(Some)] [Capture] → N272 + N272: ε [Field(val)] [EndVariant] → N268 + N274: (none_marker) [Variant(None)] → N275 + N275: ε [EndVariant] → N268 + + ═══════════════════════════════════════════════════════════════════════════════ + TYPE INFERENCE + ═══════════════════════════════════════════════════════════════════════════════ + + === Entrypoints === + Identifier → T3 + RefSimple → Void + WildcardCapture → T4 + UntaggedSymmetric → T5 + UntaggedCaptured → T9 + UntaggedAsymmetric → T12 + UncapturedSeq → T13 + TaggedRoot → T14 + TaggedMulti → T16 + TaggedCaptured → T18 + StringLiteral → T19 + StringCapture → T20 + StarQuant → T22 + SimpleCapture → T23 + RefChain → Void + RefCaptured → T24 + QisSequence → T26 + QisNode → T28 + PlusQuant → T30 + OptQuant → T32 + NoQis → T34 + NoCaptures → Void + NestedScopes → T38 + NestedQuant → T41 + MultiCapture → T42 + EmptyBranch → T43 + DeepNest → T44 + Complex → T54 + CardinalityJoin → T56 + CapturedSeq → T58 + AnchorSibling → T59 + AnchorLast → T60 + AnchorFirst → T61 + + === Types === + T3: Record Identifier { + id: Node + } + T4: Record WildcardCapture { + any: Node + } + T5: Record UntaggedSymmetric { + val: Node + } + T6: Optional → Node + T7: Optional → Node + T8: Record UntaggedCapturedScope6 { + x: T6 + y: T7 + } + T9: Record UntaggedCaptured { + data: T8 + } + T10: Optional → Node + T11: Optional → Node + T12: Record UntaggedAsymmetric { + x: T10 + y: T11 + } + T13: Record UncapturedSeq { + x: Node + y: Node + } + T14: Enum TaggedRoot { + Ok: Node + Err: String + } + T15: Record TaggedMultiScope15 { + k: Node + v: Node + } + T16: Enum TaggedMulti { + Simple: Node + Complex: T15 + } + T17: Enum TaggedCapturedScope17 { + Left: Node + Right: Node + } + T18: Record TaggedCaptured { + choice: T17 + } + T19: Record StringLiteral { + op: Node + } + T20: Record StringCapture { + name: String + } + T21: ArrayStar → Node + T22: Record StarQuant { + items: T21 + } + T23: Record SimpleCapture { + name: Node + } + T24: Record RefCaptured { + captured_id: T3 + } + T25: Record QisSequenceScope25 { + key: Node + value: Node + } + T26: ArrayStar → T25 + T27: Record QisNodeScope27 { + name: Node + body: Node + } + T28: ArrayStar → T27 + T29: ArrayPlus → Node + T30: Record PlusQuant { + items: T29 + } + T31: Optional → Node + T32: Record OptQuant { + maybe_item: T31 + } + T33: ArrayStar → Node + T34: Record NoQis { + item: T33 + } + T35: Record NestedScopesScope35 { + a: Node + } + T36: Record NestedScopesScope36 { + b: Node + } + T37: Record NestedScopesScope37 { + inner1: T35 + inner2: T36 + } + T38: Record NestedScopes { + outer: T37 + } + T39: ArrayStar → Node + T40: ArrayPlus → Node + T41: Record NestedQuant { + inner: T39 + outer: T40 + } + T42: Record MultiCapture { + fn_name: String + fn_body: Node + } + T43: Enum EmptyBranch { + Some: Node + None: Void + } + T44: Record DeepNest { + deep: Node + } + T45: Optional → String + T46: ArrayStar → Node + T47: ArrayStar → Node + T48: Optional → Node + T49: Optional → String + T50: Optional → Node + T51: Record ComplexScope45 { + fn_name: T45 + p: T46 + params: T47 + fn_body: T48 + cls_name: T49 + cls_body: T50 + } + T52: ArrayStar → T51 + T53: ArrayStar → Node + T54: Record Complex { + mod_name: String + imports: T53 + items: T52 + } + T55: ArrayPlus → Node + T56: Record CardinalityJoin { + item: T55 + } + T57: Record CapturedSeqScope57 { + x: Node + y: Node + } + T58: Record CapturedSeq { + nested: T57 + } + T59: Record AnchorSibling { + left: Node + right: Node + } + T60: Record AnchorLast { + last: Node + } + T61: Record AnchorFirst { + first: Node + } + "#); +} + +/// Test specifically for ADR-0008 navigation lowering. +#[test] +fn golden_navigation_patterns() { + let source = indoc! {r#" + // Stay - first transition at root + NavStay = (root) @r + + // Down - descend to children (skip any) + NavDown = (parent (child) @c) + + // DownSkipTrivia - anchor at first child + NavDownAnchor = (parent . (child) @c) + + // Next - sibling traversal (skip any) + NavNext = (parent (a) @a (b) @b) + + // NextSkipTrivia - adjacent siblings + NavNextAnchor = (parent (a) @a . (b) @b) + + // Up - ascend (no constraint) + NavUp = (a (b (c) @c)) + + // UpSkipTrivia - must be last non-trivia + NavUpAnchor = (parent (child) @c .) + + // Multi-level Up + NavUpMulti = (a (b (c (d (e) @e)))) + + // Mixed anchors + NavMixed = (outer . (first) @f (middle) @m . (last) @l .) + "#}; + + insta::assert_snapshot!(golden_master(source), @r" + ═══════════════════════════════════════════════════════════════════════════════ + TRANSITION GRAPH + ═══════════════════════════════════════════════════════════════════════════════ + + NavStay = N0 + NavDown = N2 + NavDownAnchor = N6 + NavNext = N10 + NavNextAnchor = N16 + NavUp = N22 + NavUpAnchor = N28 + NavUpMulti = N32 + NavMixed = N42 + + N0: (root) [Capture] → N1 + N1: ε [Field(r)] → ∅ + N2: (parent) → N3 + N3: [Down] (child) [Capture] → N4 + N4: ε [Field(c)] → N5 + N5: [Up(1)] ε → ∅ + N6: (parent) → N7 + N7: [Down.] (child) [Capture] → N8 + N8: ε [Field(c)] → N9 + N9: [Up(1)] ε → ∅ + N10: (parent) → N11 + N11: [Down] (a) [Capture] → N12 + N12: ε [Field(a)] → N13 + N13: [Next] (b) [Capture] → N14 + N14: ε [Field(b)] → N15 + N15: [Up(1)] ε → ∅ + N16: (parent) → N17 + N17: [Down] (a) [Capture] → N18 + N18: ε [Field(a)] → N19 + N19: [Next.] (b) [Capture] → N20 + N20: ε [Field(b)] → N21 + N21: [Up(1)] ε → ∅ + N22: (a) → N23 + N23: [Down] (b) → N24 + N24: [Down] (c) [Capture] → N25 + N25: ε [Field(c)] → N27 + N27: [Up(2)] ε → ∅ + N28: (parent) → N29 + N29: [Down] (child) [Capture] → N30 + N30: ε [Field(c)] → N31 + N31: [Up.(1)] ε → ∅ + N32: (a) → N33 + N33: [Down] (b) → N34 + N34: [Down] (c) → N35 + N35: [Down] (d) → N36 + N36: [Down] (e) [Capture] → N37 + N37: ε [Field(e)] → N41 + N41: [Up(4)] ε → ∅ + N42: (outer) → N43 + N43: [Down.] (first) [Capture] → N44 + N44: ε [Field(f)] → N45 + N45: [Next] (middle) [Capture] → N46 + N46: ε [Field(m)] → N47 + N47: [Next.] (last) [Capture] → N48 + N48: ε [Field(l)] → N49 + N49: [Up.(1)] ε → ∅ + + ═══════════════════════════════════════════════════════════════════════════════ + TYPE INFERENCE + ═══════════════════════════════════════════════════════════════════════════════ + + === Entrypoints === + NavUpMulti → T3 + NavUpAnchor → T4 + NavUp → T5 + NavStay → T6 + NavNextAnchor → T7 + NavNext → T8 + NavMixed → T9 + NavDownAnchor → T10 + NavDown → T11 + + === Types === + T3: Record NavUpMulti { + e: Node + } + T4: Record NavUpAnchor { + c: Node + } + T5: Record NavUp { + c: Node + } + T6: Record NavStay { + r: Node + } + T7: Record NavNextAnchor { + a: Node + b: Node + } + T8: Record NavNext { + a: Node + b: Node + } + T9: Record NavMixed { + f: Node + m: Node + l: Node + } + T10: Record NavDownAnchor { + c: Node + } + T11: Record NavDown { + c: Node + } + "); +} + +/// Test specifically for ADR-0009 type inference edge cases. +#[test] +fn golden_type_inference() { + let source = indoc! {r#" + // Flat scoping - nesting doesn't create data nesting + FlatScope = (a (b (c (d) @val))) + + // Reference opacity - calling doesn't inherit captures + BaseWithCapture = (identifier) @name + RefOpaque = (BaseWithCapture) + RefCaptured = (BaseWithCapture) @result + + // Tagged at root vs inline + TaggedAtRoot = [ A: (a) @x B: (b) @y ] + TaggedInline = (wrapper [ A: (a) @x B: (b) @y ]) + + // Cardinality multiplication + // outer(*) * inner(+) = * + CardMult = ((item)+ @items)* + + // QIS vs non-QIS + QisTwo = { (a) @x (b) @y }* + NoQisOne = { (a) @x }* + + // Missing field rule - asymmetric → Optional + MissingField = [ + Full: (full (a) @a (b) @b (c) @c) + Partial: (partial (a) @a) + ] + + // Synthetic naming + SyntheticNames = (foo { (bar) @bar } @baz) + "#}; + + insta::assert_snapshot!(golden_master(source), @r" + ═══════════════════════════════════════════════════════════════════════════════ + TRANSITION GRAPH + ═══════════════════════════════════════════════════════════════════════════════ + + FlatScope = N0 + BaseWithCapture = N8 + RefOpaque = N10 + RefCaptured = N12 + TaggedAtRoot = N15 + TaggedInline = N25 + CardMult = N45 + QisTwo = N54 + NoQisOne = N63 + MissingField = N67 + SyntheticNames = N85 + + N0: (a) → N1 + N1: [Down] (b) → N2 + N2: [Down] (c) → N3 + N3: [Down] (d) [Capture] → N4 + N4: ε [Field(val)] → N7 + N7: [Up(3)] ε → ∅ + N8: (identifier) [Capture] → N9 + N9: ε [Field(name)] → ∅ + N10: ε +Enter(0, BaseWithCapture) → N8, N11 + N11: ε +Exit(0) → ∅ + N12: ε +Enter(1, BaseWithCapture) → N8, N13 + N13: ε +Exit(1) [Capture] → N14 + N14: ε [Field(result)] → ∅ + N15: ε → N18, N22 + N16: ε → ∅ + N18: (a) [Variant(A)] [Capture] → N20 + N20: ε [Field(x)] [EndVariant] → N16 + N22: (b) [Variant(B)] [Capture] → N24 + N24: ε [Field(y)] [EndVariant] → N16 + N25: (wrapper) → N26 + N26: [Down] ε → N29, N33 + N29: (a) [Variant(A)] [Capture] → N31 + N31: ε [Field(x)] [EndVariant] → N36 + N33: (b) [Variant(B)] [Capture] → N35 + N35: ε [Field(y)] [EndVariant] → N36 + N36: [Up(1)] ε → ∅ + N37: (_) → N39 + N38: [Down] (item) [Capture] → N42 + N39: ε [StartArray] → N38 + N42: ε [Push] → N38, N43 + N43: ε [EndArray] [Field(items)] → N47 + N45: ε [StartArray] → N48 + N46: ε [EndArray] → ∅ + N47: [Up(1)] ε [Push] → N48 + N48: ε → N37, N46 + N49: ε [StartObj] → N50 + N50: [Next] (a) [Capture] → N51 + N51: ε [Field(x)] → N52 + N52: [Next] (b) [Capture] → N58 + N54: ε [StartArray] → N59 + N55: ε [EndArray] → ∅ + N58: ε [Field(y)] [EndObj] [Push] → N59 + N59: ε → N49, N55 + N61: [Next] (a) [Capture] → N65 + N63: ε [StartArray] → N66 + N64: ε [EndArray] → ∅ + N65: ε [Field(x)] [Push] → N66 + N66: ε → N61, N64 + N67: ε → N70, N80 + N68: ε → ∅ + N70: (full) [Variant(Full)] [StartObj] → N71 + N71: [Down] (a) [Capture] → N72 + N72: ε [Field(a)] → N73 + N73: [Next] (b) [Capture] → N74 + N74: ε [Field(b)] → N75 + N75: [Next] (c) [Capture] → N76 + N76: ε [Field(c)] → N78 + N78: [Up(1)] ε [EndObj] [EndVariant] → N68 + N80: (partial) [Variant(Partial)] → N81 + N81: [Down] (a) [Capture] → N82 + N82: ε [Field(a)] → N84 + N84: [Up(1)] ε [EndVariant] → N68 + N85: (foo) → N89 + N86: [Down] ε → N87 + N87: [Next] (bar) [Capture] [Capture] → N91 + N89: ε [StartObj] → N86 + N91: ε [Field(bar)] [EndObj] [Field(baz)] → N92 + N92: [Up(1)] ε → ∅ + + ═══════════════════════════════════════════════════════════════════════════════ + TYPE INFERENCE + ═══════════════════════════════════════════════════════════════════════════════ + + === Entrypoints === + BaseWithCapture → T3 + TaggedInline → T6 + TaggedAtRoot → T7 + SyntheticNames → T9 + RefOpaque → Void + RefCaptured → T10 + QisTwo → T12 + NoQisOne → T14 + MissingField → T16 + FlatScope → T17 + CardMult → T19 + + === Types === + T3: Record BaseWithCapture { + name: Node + } + T4: Optional → Node + T5: Optional → Node + T6: Record TaggedInline { + x: T4 + y: T5 + } + T7: Enum TaggedAtRoot { + A: Node + B: Node + } + T8: Record SyntheticNamesScope8 { + bar: Node + } + T9: Record SyntheticNames { + baz: T8 + } + T10: Record RefCaptured { + result: T3 + } + T11: Record QisTwoScope11 { + x: Node + y: Node + } + T12: ArrayStar → T11 + T13: ArrayStar → Node + T14: Record NoQisOne { + x: T13 + } + T15: Record MissingFieldScope15 { + a: Node + b: Node + c: Node + } + T16: Enum MissingField { + Full: T15 + Partial: Node + } + T17: Record FlatScope { + val: Node + } + T18: ArrayStar → Node + T19: Record CardMult { + items: T18 + } + "); +} + +/// Test ADR-0005 effect stream patterns. +#[test] +fn golden_effect_patterns() { + let source = indoc! {r#" + // CaptureNode + Field + EffCapture = (node) @name + + // ToString + EffToString = (node) @name ::string + + // StartArray / Push / EndArray + EffArray = (container (item)* @items) + + // StartObject / Field / EndObject (via captured sequence) + EffObject = { (a) @x (b) @y } @obj + + // StartVariant / EndVariant (via tagged alternation) + EffVariant = [ A: (a) @x B: (b) @y ] @choice + + // Clear (via optional skip path) + EffClear = (container (item)? @maybe) + "#}; + + insta::assert_snapshot!(golden_master(source), @r" + ═══════════════════════════════════════════════════════════════════════════════ + TRANSITION GRAPH + ═══════════════════════════════════════════════════════════════════════════════ + + EffCapture = N0 + EffToString = N2 + EffArray = N4 + EffObject = N12 + EffVariant = N20 + EffClear = N33 + + N0: (node) [Capture] → N1 + N1: ε [Field(name)] → ∅ + N2: (node) [Capture] [ToString] → N3 + N3: ε [Field(name)] → ∅ + N4: (container) → N6 + N5: [Down] (item) [Capture] → N8 + N6: ε [StartArray] → N9 + N8: ε [Push] → N9 + N9: ε → N5, N10 + N10: ε [EndArray] [Field(items)] → N11 + N11: [Up(1)] ε → ∅ + N12: ε [StartObj] → N13 + N13: [Next] (a) [Capture] [Capture] → N14 + N14: ε [Field(x)] → N15 + N15: [Next] (b) [Capture] → N19 + N19: ε [Field(y)] [EndObj] [Field(obj)] → ∅ + N20: ε [StartObj] → N23, N27 + N23: (a) [Variant(A)] [Capture] [Capture] → N25 + N25: ε [Field(x)] [EndVariant] → N32 + N27: (b) [Variant(B)] [Capture] [Capture] → N29 + N29: ε [Field(y)] [EndVariant] → N32 + N32: ε [EndObj] [Field(choice)] → ∅ + N33: (container) → N35 + N34: [Down] (item) [Capture] → N38 + N35: ε → N34, N37 + N37: ε [Clear] → N38 + N38: ε [Field(maybe)] → N39 + N39: [Up(1)] ε → ∅ + + ═══════════════════════════════════════════════════════════════════════════════ + TYPE INFERENCE + ═══════════════════════════════════════════════════════════════════════════════ + + === Entrypoints === + EffVariant → T4 + EffToString → T5 + EffObject → T7 + EffClear → T9 + EffCapture → T10 + EffArray → T12 + + === Types === + T3: Enum EffVariantScope3 { + A: Node + B: Node + } + T4: Record EffVariant { + choice: T3 + } + T5: Record EffToString { + name: String + } + T6: Record EffObjectScope6 { + x: Node + y: Node + } + T7: Record EffObject { + obj: T6 + } + T8: Optional → Node + T9: Record EffClear { + maybe: T8 + } + T10: Record EffCapture { + name: Node + } + T11: ArrayStar → Node + T12: Record EffArray { + items: T11 + } + "); +} + +/// Test quantifier graph structure (ADR-0005). +#[test] +fn golden_quantifier_graphs() { + let source = indoc! {r#" + // Greedy star: Branch.next = [match, exit] + GreedyStar = (a)* @items + + // Greedy plus: must match at least once + GreedyPlus = (a)+ @items + + // Optional: branch to match or skip + Optional = (a)? @maybe + + // Non-greedy star: Branch.next = [exit, match] + LazyStar = (a)*? @items + + // Non-greedy plus + LazyPlus = (a)+? @items + + // Quantifier on sequence (QIS triggered) + QuantSeq = { (a) @x (b) @y }* + + // Nested quantifiers + NestedQuant = (outer (inner)* @inners)+ @outers + "#}; + + insta::assert_snapshot!(golden_master(source), @r" + ═══════════════════════════════════════════════════════════════════════════════ + TRANSITION GRAPH + ═══════════════════════════════════════════════════════════════════════════════ + + GreedyStar = N1 + GreedyPlus = N7 + Optional = N13 + LazyStar = N18 + LazyPlus = N24 + QuantSeq = N34 + NestedQuant = N48 + + N0: (a) [Capture] → N3 + N1: ε [StartArray] → N4 + N3: ε [Push] → N4 + N4: ε → N0, N5 + N5: ε [EndArray] [Field(items)] → ∅ + N6: (a) [Capture] → N10 + N7: ε [StartArray] → N6 + N10: ε [Push] → N6, N11 + N11: ε [EndArray] [Field(items)] → ∅ + N12: (a) [Capture] → N16 + N13: ε → N12, N15 + N15: ε [Clear] → N16 + N16: ε [Field(maybe)] → ∅ + N17: (a) [Capture] → N20 + N18: ε [StartArray] → N21 + N20: ε [Push] → N21 + N21: ε → N22, N17 + N22: ε [EndArray] [Field(items)] → ∅ + N23: (a) [Capture] → N27 + N24: ε [StartArray] → N23 + N27: ε [Push] → N28, N23 + N28: ε [EndArray] [Field(items)] → ∅ + N29: ε [StartObj] → N30 + N30: [Next] (a) [Capture] → N31 + N31: ε [Field(x)] → N32 + N32: [Next] (b) [Capture] → N38 + N34: ε [StartArray] → N39 + N35: ε [EndArray] → ∅ + N38: ε [Field(y)] [EndObj] [Push] → N39 + N39: ε → N29, N35 + N40: (outer) [Capture] → N42 + N41: [Down] (inner) [Capture] → N44 + N42: ε [StartArray] → N45 + N44: ε [Push] → N45 + N45: ε → N41, N46 + N46: ε [EndArray] [Field(inners)] → N51 + N48: ε [StartArray] → N40 + N51: [Up(1)] ε [Push] → N40, N52 + N52: ε [EndArray] [Field(outers)] → ∅ + + ═══════════════════════════════════════════════════════════════════════════════ + TYPE INFERENCE + ═══════════════════════════════════════════════════════════════════════════════ + + === Entrypoints === + QuantSeq → T4 + Optional → T6 + NestedQuant → T9 + LazyStar → T11 + LazyPlus → T13 + GreedyStar → T15 + GreedyPlus → T17 + + === Types === + T3: Record QuantSeqScope3 { + x: Node + y: Node + } + T4: ArrayStar → T3 + T5: Optional → Node + T6: Record Optional { + maybe: T5 + } + T7: ArrayStar → Node + T8: ArrayPlus → Node + T9: Record NestedQuant { + inners: T7 + outers: T8 + } + T10: ArrayStar → Node + T11: Record LazyStar { + items: T10 + } + T12: ArrayPlus → Node + T13: Record LazyPlus { + items: T12 + } + T14: ArrayStar → Node + T15: Record GreedyStar { + items: T14 + } + T16: ArrayPlus → Node + T17: Record GreedyPlus { + items: T16 + } + "); +} diff --git a/crates/plotnik-lib/src/query/graph_optimize.rs b/crates/plotnik-lib/src/query/graph_optimize.rs new file mode 100644 index 00000000..952df895 --- /dev/null +++ b/crates/plotnik-lib/src/query/graph_optimize.rs @@ -0,0 +1,186 @@ +//! Epsilon elimination optimization pass. +//! +//! Reduces graph size by removing unnecessary epsilon transitions. +//! +//! # Safety Rules (from ADR-0005) +//! +//! An epsilon node CANNOT be eliminated if: +//! - It has a `RefMarker` (Enter/Exit) +//! - It has multiple successors (branch point) +//! - Its successor already has a `RefMarker` +//! - Both have non-Stay `Nav` that can't be merged + +use std::collections::{HashMap, HashSet}; + +use crate::ir::{Nav, NavKind}; + +use super::Query; +use super::graph::{BuildGraph, BuildMatcher, NodeId}; + +/// Statistics from epsilon elimination. +#[derive(Debug, Default)] +pub struct OptimizeStats { + pub epsilons_eliminated: usize, + pub epsilons_kept: usize, +} + +impl Query<'_> { + /// Run epsilon elimination on the graph. + /// + /// Populates `dead_nodes` with eliminated node IDs. + pub(super) fn optimize_graph(&mut self) { + let (dead, _stats) = eliminate_epsilons(&mut self.graph); + self.dead_nodes = dead; + } +} + +/// Run epsilon elimination on a BuildGraph. +/// +/// Returns the set of dead node IDs that should be skipped during emission. +pub fn eliminate_epsilons(graph: &mut BuildGraph) -> (HashSet, OptimizeStats) { + let mut stats = OptimizeStats::default(); + let mut dead_nodes: HashSet = HashSet::new(); + + let mut predecessors = build_predecessor_map(graph); + + // Process nodes in reverse order to handle chains + let node_count = graph.len() as NodeId; + for id in (0..node_count).rev() { + if dead_nodes.contains(&id) { + continue; + } + + let node = graph.node(id); + if !is_eliminable_epsilon(node, graph, &predecessors) { + if node.is_epsilon() { + stats.epsilons_kept += 1; + } + continue; + } + + let successor_id = node.successors[0]; + let effects_to_prepend = graph.node(id).effects.clone(); + let nav_to_transfer = graph.node(id).nav; + let preds = predecessors.get(&id).cloned().unwrap_or_default(); + + // Prepend effects to successor + if !effects_to_prepend.is_empty() { + let succ = graph.node_mut(successor_id); + let mut new_effects = effects_to_prepend; + new_effects.append(&mut succ.effects); + succ.effects = new_effects; + } + + // Transfer or merge nav + let successor_nav = graph.node(successor_id).nav; + if !nav_to_transfer.is_stay() { + if successor_nav.is_stay() { + graph.node_mut(successor_id).nav = nav_to_transfer; + } else if can_merge_up(nav_to_transfer, successor_nav) { + let merged = Nav::up(nav_to_transfer.level + successor_nav.level); + graph.node_mut(successor_id).nav = merged; + } + } + + // Redirect predecessors to successor + for pred_id in &preds { + if dead_nodes.contains(pred_id) { + continue; + } + let pred = graph.node_mut(*pred_id); + for succ in &mut pred.successors { + if *succ == id { + *succ = successor_id; + } + } + // Update predecessor map: pred is now a predecessor of successor + predecessors.entry(successor_id).or_default().push(*pred_id); + } + // Remove eliminated node from successor's predecessors + if let Some(succ_preds) = predecessors.get_mut(&successor_id) { + succ_preds.retain(|&p| p != id); + } + + redirect_definitions(graph, id, successor_id); + + dead_nodes.insert(id); + stats.epsilons_eliminated += 1; + } + + (dead_nodes, stats) +} + +fn is_eliminable_epsilon( + node: &super::graph::BuildNode, + graph: &BuildGraph, + predecessors: &HashMap>, +) -> bool { + if !matches!(node.matcher, BuildMatcher::Epsilon) { + return false; + } + + if node.ref_marker.is_some() { + return false; + } + + if node.successors.len() != 1 { + return false; + } + + let successor_id = node.successors[0]; + let successor = graph.node(successor_id); + + if !node.nav.is_stay() && !successor.nav.is_stay() && !can_merge_up(node.nav, successor.nav) { + return false; + } + + if !node.effects.is_empty() && successor.ref_marker.is_some() { + return false; + } + + // Don't eliminate if epsilon has effects and successor has navigation. + // Effects must execute BEFORE successor's nav/match, but prepending to effects list + // would execute them AFTER nav/match. + if !node.effects.is_empty() && !successor.nav.is_stay() { + return false; + } + + // Don't eliminate if node has effects and successor is a join point. + // Merging effects onto a join point changes execution count (e.g., loop entry vs per-iteration). + if !node.effects.is_empty() { + let succ_pred_count = predecessors.get(&successor_id).map_or(0, |p| p.len()); + if succ_pred_count > 1 { + return false; + } + } + + true +} + +fn build_predecessor_map(graph: &BuildGraph) -> HashMap> { + let mut predecessors: HashMap> = HashMap::new(); + + for (id, node) in graph.iter() { + for &succ in &node.successors { + predecessors.entry(succ).or_default().push(id); + } + } + + predecessors +} + +fn can_merge_up(a: Nav, b: Nav) -> bool { + a.kind == NavKind::Up && b.kind == NavKind::Up +} + +fn redirect_definitions(graph: &mut BuildGraph, old_id: NodeId, new_id: NodeId) { + let updates: Vec<_> = graph + .definitions() + .filter(|(_, entry)| *entry == old_id) + .map(|(name, _)| name) + .collect(); + + for name in updates { + graph.add_definition(name, new_id); + } +} diff --git a/crates/plotnik-lib/src/query/graph_qis.rs b/crates/plotnik-lib/src/query/graph_qis.rs new file mode 100644 index 00000000..a3be746e --- /dev/null +++ b/crates/plotnik-lib/src/query/graph_qis.rs @@ -0,0 +1,107 @@ +//! Quantifier-Induced Scope (QIS) detection. +//! +//! QIS triggers when a quantified expression has ≥2 propagating captures. +//! This creates an implicit object scope so captures stay coupled per-iteration. +//! +//! See ADR-0009 for full specification. + +use crate::parser::{ast, token_src}; + +use super::{QisTrigger, Query}; + +impl<'a> Query<'a> { + /// Detect Quantifier-Induced Scope triggers. + /// + /// QIS triggers when a quantified expression has ≥2 propagating captures + /// (captures not absorbed by inner scopes like `{...} @x` or `[A: ...] @x`). + pub(super) fn detect_qis(&mut self) { + let bodies: Vec<_> = self.symbol_table.values().cloned().collect(); + for body in &bodies { + self.detect_qis_in_expr(body); + } + } + + fn detect_qis_in_expr(&mut self, expr: &ast::Expr) { + match expr { + ast::Expr::QuantifiedExpr(q) => { + if let Some(inner) = q.inner() { + let captures = self.collect_propagating_captures(&inner); + if captures.len() >= 2 { + self.qis_triggers.insert(q.clone(), QisTrigger { captures }); + } + self.detect_qis_in_expr(&inner); + } + } + ast::Expr::CapturedExpr(c) => { + // Captures on sequences/alternations absorb inner captures, + // but we still recurse to find nested quantifiers + if let Some(inner) = c.inner() { + self.detect_qis_in_expr(&inner); + } + } + _ => { + for child in expr.children() { + self.detect_qis_in_expr(&child); + } + } + } + } + + /// Collect captures that propagate out of an expression (not absorbed by inner scopes). + fn collect_propagating_captures(&self, expr: &ast::Expr) -> Vec<&'a str> { + let mut captures = Vec::new(); + self.collect_propagating_captures_impl(expr, &mut captures); + captures + } + + fn collect_propagating_captures_impl(&self, expr: &ast::Expr, out: &mut Vec<&'a str>) { + match expr { + ast::Expr::CapturedExpr(c) => { + if let Some(name_token) = c.name() { + let name = token_src(&name_token, self.source); + out.push(name); + } + // Captured sequence/alternation absorbs inner captures. + // Need to look through quantifiers to find the actual container. + if let Some(inner) = c.inner() + && !Self::is_scope_container(&inner) + { + self.collect_propagating_captures_impl(&inner, out); + } + } + ast::Expr::QuantifiedExpr(q) => { + // Nested quantifier: its captures propagate (with modified cardinality) + if let Some(inner) = q.inner() { + self.collect_propagating_captures_impl(&inner, out); + } + } + _ => { + for child in expr.children() { + self.collect_propagating_captures_impl(&child, out); + } + } + } + } + + /// Check if an expression is a scope container (seq/alt), looking through quantifiers. + fn is_scope_container(expr: &ast::Expr) -> bool { + match expr { + ast::Expr::SeqExpr(_) | ast::Expr::AltExpr(_) => true, + ast::Expr::QuantifiedExpr(q) => q + .inner() + .map(|i| Self::is_scope_container(&i)) + .unwrap_or(false), + _ => false, + } + } + + /// Check if a quantified expression triggers QIS. + pub fn is_qis_trigger(&self, q: &ast::QuantifiedExpr) -> bool { + self.qis_triggers.contains_key(q) + } + + /// Get QIS trigger info for a quantified expression. + pub fn qis_trigger(&self, q: &ast::QuantifiedExpr) -> Option<&QisTrigger<'a>> { + self.qis_triggers.get(q) + } +} diff --git a/crates/plotnik-lib/src/query/graph_qis_tests.rs b/crates/plotnik-lib/src/query/graph_qis_tests.rs new file mode 100644 index 00000000..cb3bb29c --- /dev/null +++ b/crates/plotnik-lib/src/query/graph_qis_tests.rs @@ -0,0 +1,231 @@ +use indoc::indoc; + +use crate::Query; + +fn check_qis(source: &str) -> String { + let query = Query::try_from(source).unwrap().build_graph(); + let mut result = Vec::new(); + + for def in query.root().defs() { + let def_name = def.name().map(|t| t.text().to_string()).unwrap_or_default(); + let mut triggers: Vec<_> = query + .qis_triggers + .iter() + .filter_map(|(q, trigger)| { + // Check if this quantifier belongs to this definition + let q_range = q.text_range(); + let def_range = def.text_range(); + if q_range.start() >= def_range.start() && q_range.end() <= def_range.end() { + Some(( + q_range.start(), + format!(" QIS: [{}]", trigger.captures.join(", ")), + )) + } else { + None + } + }) + .collect(); + triggers.sort_by_key(|(pos, _)| *pos); + let triggers: Vec<_> = triggers.into_iter().map(|(_, s)| s).collect(); + + if triggers.is_empty() { + result.push(format!("{}: no QIS", def_name)); + } else { + result.push(format!("{}:", def_name)); + result.extend(triggers); + } + } + + result.join("\n") +} + +#[test] +fn single_capture_no_qis() { + let source = "Foo = { (a) @x }*"; + + insta::assert_snapshot!(check_qis(source), @"Foo: no QIS"); +} + +#[test] +fn two_captures_triggers_qis() { + let source = "Foo = { (a) @x (b) @y }*"; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [x, y] + "); +} + +#[test] +fn three_captures_triggers_qis() { + let source = "Foo = { (a) @x (b) @y (c) @z }*"; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [x, y, z] + "); +} + +#[test] +fn captured_sequence_absorbs_inner() { + let source = "Foo = { { (a) @x (b) @y } @inner }*"; + + insta::assert_snapshot!(check_qis(source), @"Foo: no QIS"); +} + +#[test] +fn captured_alternation_absorbs_inner() { + let source = "Foo = { [ (a) @x (b) @y ] @choice }*"; + + insta::assert_snapshot!(check_qis(source), @"Foo: no QIS"); +} + +#[test] +fn uncaptured_alternation_propagates() { + let source = "Foo = { [ (a) @x (b) @y ] }*"; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [x, y] + "); +} + +#[test] +fn node_with_two_captures() { + let source = indoc! {r#" + Foo = (function + name: (identifier) @name + body: (block) @body + )* + "#}; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [name, body] + "); +} + +#[test] +fn plus_quantifier_triggers_qis() { + let source = "Foo = { (a) @x (b) @y }+"; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [x, y] + "); +} + +#[test] +fn optional_quantifier_triggers_qis() { + let source = "Foo = { (a) @x (b) @y }?"; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [x, y] + "); +} + +#[test] +fn nested_quantifier_inner_qis() { + let source = "Foo = { { (a) @x (b) @y }* }+"; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [x, y] + QIS: [x, y] + "); +} + +#[test] +fn nested_quantifier_both_qis() { + // Outer quantifier has @c and @inner (2 captures) -> QIS + // Inner quantifier has @x and @y (2 captures) -> QIS + let source = "Outer = { (c) @c { (a) @x (b) @y }* @inner }+"; + + insta::assert_snapshot!(check_qis(source), @r" + Outer: + QIS: [c, inner] + QIS: [x, y] + "); +} + +#[test] +fn multiple_definitions() { + let source = indoc! {r#" + Single = { (a) @x }* + Multi = { (a) @x (b) @y }* + "#}; + + insta::assert_snapshot!(check_qis(source), @r" + Single: no QIS + Multi: + QIS: [x, y] + "); +} + +#[test] +fn no_quantifier_no_qis() { + let source = "Foo = { (a) @x (b) @y }"; + + insta::assert_snapshot!(check_qis(source), @"Foo: no QIS"); +} + +#[test] +fn lazy_quantifier_triggers_qis() { + let source = "Foo = { (a) @x (b) @y }*?"; + + insta::assert_snapshot!(check_qis(source), @r" + Foo: + QIS: [x, y] + "); +} + +#[test] +fn qis_graph_has_object_effects() { + // Verify that QIS-triggered quantifiers emit StartObject/EndObject + let source = "Foo = { (a) @x (b) @y }*"; + let (_query, pre_opt) = Query::try_from(source) + .unwrap() + .build_graph_with_pre_opt_dump(); + + // QIS adds StartObj/EndObj around each iteration to keep captures coupled. + // Sequences themselves don't add object scope (captures propagate to parent). + let start_count = pre_opt.matches("StartObj").count(); + let end_count = pre_opt.matches("EndObj").count(); + + assert_eq!( + start_count, 1, + "QIS graph should have 1 StartObj (from QIS loop):\n{}", + pre_opt + ); + assert_eq!( + end_count, 1, + "QIS graph should have 1 EndObj (from QIS loop):\n{}", + pre_opt + ); +} + +#[test] +fn non_qis_graph_no_object_effects() { + // Single capture should NOT trigger QIS object wrapping + let source = "Foo = { (a) @x }*"; + let (_query, pre_opt) = Query::try_from(source) + .unwrap() + .build_graph_with_pre_opt_dump(); + + // Non-QIS quantifiers don't need object scope - captures propagate with array cardinality. + // Sequences themselves don't add object scope either. + let start_count = pre_opt.matches("StartObj").count(); + let end_count = pre_opt.matches("EndObj").count(); + + assert_eq!( + start_count, 0, + "Non-QIS graph should have no StartObj:\n{}", + pre_opt + ); + assert_eq!( + end_count, 0, + "Non-QIS graph should have no EndObj:\n{}", + pre_opt + ); +} diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 203b4197..321bf01e 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -1,23 +1,45 @@ //! Query processing pipeline. //! -//! Stages: parse → alt_kinds → symbol_table → recursion → shapes. +//! Stages: parse → alt_kinds → symbol_table → recursion → shapes → [qis → build_graph]. //! Each stage populates its own diagnostics. Use `is_valid()` to check //! if any stage produced errors. +//! +//! The `build_graph` stage is optional and constructs the transition graph +//! for compilation to binary IR. QIS detection runs as part of this stage. mod dump; +mod graph_qis; mod invariants; mod printer; pub use printer::QueryPrinter; pub mod alt_kinds; +pub mod graph; +mod graph_build; +mod graph_dump; +mod graph_optimize; #[cfg(feature = "plotnik-langs")] pub mod link; pub mod recursion; pub mod shapes; pub mod symbol_table; +pub mod typing; + +pub use graph::{BuildEffect, BuildGraph, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; +pub use graph_optimize::OptimizeStats; +pub use symbol_table::UNNAMED_DEF; +pub use typing::{ + InferredMember, InferredTypeDef, TypeDescription, TypeInferenceResult, UnificationError, +}; #[cfg(test)] mod alt_kinds_tests; +#[cfg(test)] +mod graph_build_tests; +#[cfg(test)] +mod graph_master_test; +#[cfg(test)] +mod graph_qis_tests; #[cfg(all(test, feature = "plotnik-langs"))] mod link_tests; #[cfg(test)] @@ -30,8 +52,10 @@ mod recursion_tests; mod shapes_tests; #[cfg(test)] mod symbol_table_tests; +#[cfg(test)] +mod typing_tests; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; #[cfg(feature = "plotnik-langs")] use plotnik_langs::{NodeFieldId, NodeTypeId}; @@ -55,9 +79,21 @@ use symbol_table::SymbolTable; /// Create with [`new`](Self::new), optionally configure fuel limits, /// then call [`exec`](Self::exec) to run analysis. /// +/// For compilation, call [`build_graph`](Self::build_graph) after `exec`. +/// /// Check [`is_valid`](Self::is_valid) or [`diagnostics`](Self::diagnostics) /// to determine if the query has syntax/semantic issues. +/// Quantifier-Induced Scope trigger info. +/// +/// When a quantified expression has ≥2 propagating captures, QIS creates +/// an implicit object scope so captures stay coupled per-iteration. #[derive(Debug, Clone)] +pub struct QisTrigger<'a> { + /// Capture names that propagate from this quantified expression. + pub captures: Vec<&'a str>, +} + +#[derive(Debug)] pub struct Query<'a> { source: &'a str, ast: Root, @@ -77,6 +113,14 @@ pub struct Query<'a> { shapes_diagnostics: Diagnostics, #[cfg(feature = "plotnik-langs")] link_diagnostics: Diagnostics, + // Graph compilation fields + graph: BuildGraph<'a>, + dead_nodes: HashSet, + type_info: TypeInferenceResult<'a>, + /// QIS triggers: quantified expressions with ≥2 propagating captures. + qis_triggers: HashMap>, + /// Counter for generating unique ref IDs during graph construction. + next_ref_id: u32, } fn empty_root() -> Root { @@ -111,6 +155,11 @@ impl<'a> Query<'a> { shapes_diagnostics: Diagnostics::new(), #[cfg(feature = "plotnik-langs")] link_diagnostics: Diagnostics::new(), + graph: BuildGraph::default(), + dead_nodes: HashSet::new(), + type_info: TypeInferenceResult::default(), + qis_triggers: HashMap::new(), + next_ref_id: 0, } } @@ -145,6 +194,36 @@ impl<'a> Query<'a> { Ok(self) } + /// Build the transition graph for compilation. + /// + /// This is an optional step after `exec`. It detects QIS triggers, + /// constructs the graph, runs epsilon elimination, and infers types. + /// + /// Only runs if the query is valid (no errors from previous passes). + pub fn build_graph(mut self) -> Self { + if !self.is_valid() { + return self; + } + self.detect_qis(); + self.construct_graph(); + self.infer_types(); // Run before optimization to avoid merged effects + self.optimize_graph(); + self + } + + /// Build graph and return dump of graph before optimization (for debugging). + pub fn build_graph_with_pre_opt_dump(mut self) -> (Self, String) { + if !self.is_valid() { + return (self, String::new()); + } + self.detect_qis(); + self.construct_graph(); + let pre_opt_dump = self.graph.dump(); + self.infer_types(); + self.optimize_graph(); + (self, pre_opt_dump) + } + fn try_parse(&mut self) -> Result<()> { let tokens = lex(self.source); let parser = Parser::new(self.source, tokens) @@ -170,6 +249,21 @@ impl<'a> Query<'a> { &self.ast } + /// Access the constructed graph. + pub fn graph(&self) -> &BuildGraph<'a> { + &self.graph + } + + /// Access the set of dead nodes (eliminated by optimization). + pub fn dead_nodes(&self) -> &HashSet { + &self.dead_nodes + } + + /// Access the type inference result. + pub fn type_info(&self) -> &TypeInferenceResult<'a> { + &self.type_info + } + pub(crate) fn shape_cardinality(&self, node: &SyntaxNode) -> ShapeCardinality { // Error nodes are invalid if node.kind() == SyntaxKind::Error { @@ -220,6 +314,7 @@ impl<'a> Query<'a> { all.extend(self.shapes_diagnostics.clone()); #[cfg(feature = "plotnik-langs")] all.extend(self.link_diagnostics.clone()); + all.extend(self.type_info.diagnostics.clone()); all } @@ -251,6 +346,11 @@ impl<'a> Query<'a> { && !self.recursion_diagnostics.has_errors() && !self.shapes_diagnostics.has_errors() } + + /// Check if graph compilation produced type errors. + pub fn has_type_errors(&self) -> bool { + self.type_info.has_errors() + } } impl<'a> TryFrom<&'a str> for Query<'a> { diff --git a/crates/plotnik-lib/src/query/printer_tests.rs b/crates/plotnik-lib/src/query/printer_tests.rs index be5b8f51..b7813b96 100644 --- a/crates/plotnik-lib/src/query/printer_tests.rs +++ b/crates/plotnik-lib/src/query/printer_tests.rs @@ -160,6 +160,9 @@ fn printer_symbols_with_cardinalities() { insta::assert_snapshot!(q.printer().only_symbols(true).with_cardinalities(true).dump(), @r" A¹ B⁺ + _ + A¹ + B⁺ "); } @@ -175,6 +178,9 @@ fn printer_symbols_with_refs() { A B A + _ + B + A "); } @@ -193,6 +199,10 @@ fn printer_symbols_cycle() { B A B (cycle) + _ + A + B + A (cycle) "); } @@ -200,7 +210,10 @@ fn printer_symbols_cycle() { fn printer_symbols_undefined_ref() { let input = "(call (Undefined))"; let q = Query::try_from(input).unwrap(); - insta::assert_snapshot!(q.printer().only_symbols(true).dump(), @""); + insta::assert_snapshot!(q.printer().only_symbols(true).dump(), @r" + _ + Undefined? + "); } #[test] diff --git a/crates/plotnik-lib/src/query/symbol_table.rs b/crates/plotnik-lib/src/query/symbol_table.rs index 4066d62d..6ab31455 100644 --- a/crates/plotnik-lib/src/query/symbol_table.rs +++ b/crates/plotnik-lib/src/query/symbol_table.rs @@ -6,6 +6,10 @@ use indexmap::IndexMap; +/// Sentinel name for unnamed definitions (bare expressions at root level). +/// Code generators can emit whatever name they want for this. +pub const UNNAMED_DEF: &str = "_"; + use crate::diagnostics::DiagnosticKind; use crate::parser::{Expr, Ref, ast, token_src}; @@ -17,13 +21,14 @@ impl<'a> Query<'a> { pub(super) fn resolve_names(&mut self) { // Pass 1: collect definitions for def in self.ast.defs() { - let Some(name_token) = def.name() else { - continue; + let (name, is_named) = match def.name() { + Some(token) => (token_src(&token, self.source), true), + None => (UNNAMED_DEF, false), }; - let name = token_src(&name_token, self.source); - - if self.symbol_table.contains_key(name) { + // Skip duplicate check for unnamed definitions (already diagnosed by parser) + if is_named && self.symbol_table.contains_key(name) { + let name_token = def.name().unwrap(); self.resolve_diagnostics .report(DiagnosticKind::DuplicateDefinition, name_token.text_range()) .message(name) @@ -31,6 +36,11 @@ impl<'a> Query<'a> { continue; } + // For unnamed defs, only keep the last one (parser already warned about others) + if !is_named && self.symbol_table.contains_key(name) { + self.symbol_table.shift_remove(name); + } + let Some(body) = def.body() else { continue; }; diff --git a/crates/plotnik-lib/src/query/symbol_table_tests.rs b/crates/plotnik-lib/src/query/symbol_table_tests.rs index b18fc915..979d8f9e 100644 --- a/crates/plotnik-lib/src/query/symbol_table_tests.rs +++ b/crates/plotnik-lib/src/query/symbol_table_tests.rs @@ -180,7 +180,11 @@ fn entry_point_reference() { let query = Query::try_from(input).unwrap(); assert!(query.is_valid()); - insta::assert_snapshot!(query.dump_symbols(), @"Expr"); + insta::assert_snapshot!(query.dump_symbols(), @r" + Expr + _ + Expr + "); } #[test] @@ -202,7 +206,7 @@ fn no_definitions() { let input = "(identifier)"; let query = Query::try_from(input).unwrap(); assert!(query.is_valid()); - insta::assert_snapshot!(query.dump_symbols(), @""); + insta::assert_snapshot!(query.dump_symbols(), @"_"); } #[test] diff --git a/crates/plotnik-lib/src/query/typing.rs b/crates/plotnik-lib/src/query/typing.rs new file mode 100644 index 00000000..d6faf758 --- /dev/null +++ b/crates/plotnik-lib/src/query/typing.rs @@ -0,0 +1,1020 @@ +//! AST-based type inference for Plotnik queries. +//! +//! Analyzes query AST to determine output types. +//! Rules follow ADR-0009 (Type System). +//! +//! # Design +//! +//! Unlike graph-based inference which must reconstruct structure from CFG traversal, +//! AST-based inference directly walks the tree structure: +//! - Sequences → `SeqExpr` +//! - Alternations → `AltExpr` with `.kind()` for tagged/untagged +//! - Quantifiers → `QuantifiedExpr` +//! - Captures → `CapturedExpr` +//! +//! This eliminates dry-run traversal, reconvergence detection, and scope stack management. + +use std::collections::{HashMap, HashSet}; + +use indexmap::IndexMap; +use rowan::TextRange; + +use crate::diagnostics::{DiagnosticKind, Diagnostics}; +use crate::ir::{TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId, TypeKind}; +use crate::parser::ast::{self, AltKind, Expr}; +use crate::parser::token_src; + +use super::Query; + +/// Result of type inference. +#[derive(Debug, Default)] +pub struct TypeInferenceResult<'src> { + pub type_defs: Vec>, + pub entrypoint_types: IndexMap<&'src str, TypeId>, + pub diagnostics: Diagnostics, + pub errors: Vec>, +} + +/// Error when types cannot be unified in alternation branches. +#[derive(Debug, Clone)] +pub struct UnificationError<'src> { + pub field: &'src str, + pub definition: &'src str, + pub types_found: Vec, + pub spans: Vec, +} + +/// Human-readable type description for error messages. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum TypeDescription { + Node, + String, + Struct(Vec), +} + +impl std::fmt::Display for TypeDescription { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TypeDescription::Node => write!(f, "Node"), + TypeDescription::String => write!(f, "String"), + TypeDescription::Struct(fields) => { + write!(f, "Struct {{ {} }}", fields.join(", ")) + } + } + } +} + +/// An inferred type definition. +#[derive(Debug, Clone)] +pub struct InferredTypeDef<'src> { + pub kind: TypeKind, + pub name: Option<&'src str>, + pub members: Vec>, + pub inner_type: Option, +} + +/// A field (for Record) or variant (for Enum). +#[derive(Debug, Clone)] +pub struct InferredMember<'src> { + pub name: &'src str, + pub ty: TypeId, +} + +// ───────────────────────────────────────────────────────────────────────────── +// Cardinality +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +enum Cardinality { + #[default] + One, + Optional, + Star, + Plus, +} + +impl Cardinality { + /// Join cardinalities when merging alternation branches. + fn join(self, other: Cardinality) -> Cardinality { + use Cardinality::*; + match (self, other) { + (One, One) => One, + (One, Optional) | (Optional, One) | (Optional, Optional) => Optional, + (Plus, Plus) => Plus, + (One, Plus) | (Plus, One) => Plus, + _ => Star, + } + } + + fn make_optional(self) -> Cardinality { + use Cardinality::*; + match self { + One => Optional, + Plus => Star, + x => x, + } + } + + /// Multiply cardinalities (outer * inner). + fn multiply(self, inner: Cardinality) -> Cardinality { + use Cardinality::*; + match (self, inner) { + (One, x) => x, + (x, One) => x, + (Optional, Optional) => Optional, + (Plus, Plus) => Plus, + _ => Star, + } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Type shape for unification checking +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone, PartialEq, Eq)] +enum TypeShape { + Primitive(TypeId), +} + +impl TypeShape { + fn to_description(&self) -> TypeDescription { + match self { + TypeShape::Primitive(TYPE_NODE) => TypeDescription::Node, + TypeShape::Primitive(TYPE_STR) => TypeDescription::String, + TypeShape::Primitive(_) => TypeDescription::Node, + } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Field tracking within a scope +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +struct FieldInfo { + base_type: TypeId, + shape: TypeShape, + cardinality: Cardinality, + branch_count: usize, + spans: Vec, +} + +#[derive(Debug, Clone, Default)] +struct ScopeInfo<'src> { + fields: IndexMap<&'src str, FieldInfo>, + #[allow(dead_code)] // May be used for future enum variant tracking + variants: IndexMap<&'src str, ScopeInfo<'src>>, + #[allow(dead_code)] + has_variants: bool, +} + +impl<'src> ScopeInfo<'src> { + fn add_field( + &mut self, + name: &'src str, + base_type: TypeId, + cardinality: Cardinality, + span: TextRange, + ) { + let shape = TypeShape::Primitive(base_type); + if let Some(existing) = self.fields.get_mut(name) { + existing.cardinality = existing.cardinality.join(cardinality); + existing.branch_count += 1; + existing.spans.push(span); + } else { + self.fields.insert( + name, + FieldInfo { + base_type, + shape, + cardinality, + branch_count: 1, + spans: vec![span], + }, + ); + } + } + + fn merge_from(&mut self, other: ScopeInfo<'src>) -> Vec> { + let mut errors = Vec::new(); + + for (name, other_info) in other.fields { + if let Some(existing) = self.fields.get_mut(name) { + if existing.shape != other_info.shape { + errors.push(MergeError { + field: name, + shapes: vec![existing.shape.clone(), other_info.shape.clone()], + spans: existing + .spans + .iter() + .chain(&other_info.spans) + .cloned() + .collect(), + }); + } + existing.cardinality = existing.cardinality.join(other_info.cardinality); + existing.branch_count += other_info.branch_count; + existing.spans.extend(other_info.spans); + } else { + self.fields.insert(name, other_info); + } + } + + errors + } + + fn apply_optionality(&mut self, total_branches: usize) { + for info in self.fields.values_mut() { + if info.branch_count < total_branches { + info.cardinality = info.cardinality.make_optional(); + } + } + } + + #[allow(dead_code)] // May be useful for future scope analysis + fn is_empty(&self) -> bool { + self.fields.is_empty() && self.variants.is_empty() + } +} + +#[derive(Debug)] +struct MergeError<'src> { + field: &'src str, + shapes: Vec, + spans: Vec, +} + +// ───────────────────────────────────────────────────────────────────────────── +// Inference result from expression +// ───────────────────────────────────────────────────────────────────────────── + +/// What an expression produces when evaluated. +#[derive(Debug, Clone)] +struct ExprResult { + /// Base type (before cardinality wrapping). + base_type: TypeId, + /// Cardinality modifier. + cardinality: Cardinality, + /// True if this result represents a meaningful type (not just default Node). + /// Used to distinguish QIS array results from simple uncaptured expressions. + is_meaningful: bool, +} + +impl ExprResult { + fn node() -> Self { + Self { + base_type: TYPE_NODE, + cardinality: Cardinality::One, + is_meaningful: false, + } + } + + fn void() -> Self { + Self { + base_type: TYPE_VOID, + cardinality: Cardinality::One, + is_meaningful: false, + } + } + + fn meaningful(type_id: TypeId) -> Self { + Self { + base_type: type_id, + cardinality: Cardinality::One, + is_meaningful: true, + } + } + + /// Type is known but doesn't contribute to definition result (e.g., opaque references). + fn opaque(type_id: TypeId) -> Self { + Self { + base_type: type_id, + cardinality: Cardinality::One, + is_meaningful: false, + } + } + + fn with_cardinality(mut self, card: Cardinality) -> Self { + self.cardinality = card; + self + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Inference context +// ───────────────────────────────────────────────────────────────────────────── + +struct InferenceContext<'src> { + source: &'src str, + qis_triggers: HashSet, + type_defs: Vec>, + next_type_id: TypeId, + diagnostics: Diagnostics, + errors: Vec>, + current_def_name: &'src str, + /// Map from definition name to its computed type. + definition_types: HashMap<&'src str, TypeId>, +} + +impl<'src> InferenceContext<'src> { + fn new(source: &'src str, qis_triggers: HashSet) -> Self { + Self { + source, + qis_triggers, + type_defs: Vec::new(), + next_type_id: 3, // 0=void, 1=node, 2=str + diagnostics: Diagnostics::default(), + errors: Vec::new(), + current_def_name: "", + definition_types: HashMap::new(), + } + } + + fn alloc_type_id(&mut self) -> TypeId { + let id = self.next_type_id; + self.next_type_id += 1; + id + } + + // ───────────────────────────────────────────────────────────────────────── + // Definition inference + // ───────────────────────────────────────────────────────────────────────── + + fn infer_definition(&mut self, def_name: &'src str, body: &Expr) -> TypeId { + self.current_def_name = def_name; + + let mut scope = ScopeInfo::default(); + let mut merge_errors = Vec::new(); + + // Special case: tagged alternation at definition root creates enum + if let Expr::AltExpr(alt) = body + && alt.kind() == AltKind::Tagged + { + return self.infer_tagged_alternation_as_enum(def_name, alt, &mut merge_errors); + } + + // General case: infer expression and collect captures into scope + let result = self.infer_expr(body, &mut scope, Cardinality::One, &mut merge_errors); + + self.report_merge_errors(&merge_errors); + + // Build result type from scope + if !scope.fields.is_empty() { + self.create_struct_type(def_name, &scope) + } else if result.is_meaningful { + // QIS or other expressions that produce a meaningful type without populating scope + result.base_type + } else { + TYPE_VOID + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Expression inference + // ───────────────────────────────────────────────────────────────────────── + + fn infer_expr( + &mut self, + expr: &Expr, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + match expr { + Expr::CapturedExpr(c) => self.infer_captured(c, scope, outer_card, errors), + Expr::QuantifiedExpr(q) => self.infer_quantified(q, scope, outer_card, errors), + Expr::SeqExpr(s) => self.infer_sequence(s, scope, outer_card, errors), + Expr::AltExpr(a) => self.infer_alternation(a, scope, outer_card, errors), + Expr::NamedNode(n) => self.infer_named_node(n, scope, outer_card, errors), + Expr::FieldExpr(f) => self.infer_field_expr(f, scope, outer_card, errors), + Expr::Ref(r) => self.infer_ref(r), + Expr::AnonymousNode(_) => ExprResult::node(), + } + } + + fn infer_captured( + &mut self, + c: &ast::CapturedExpr, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + let capture_name = c.name().map(|t| token_src(&t, self.source)).unwrap_or("_"); + let span = c.text_range(); + let has_string_annotation = c + .type_annotation() + .and_then(|t| t.name()) + .is_some_and(|n| n.text() == "string"); + + let Some(inner) = c.inner() else { + return ExprResult::node(); + }; + + // Check if inner is a scope container (seq/alt) + let is_scope_container = matches!(inner, Expr::SeqExpr(_) | Expr::AltExpr(_)); + + if is_scope_container { + // Captured scope container: creates nested type + let nested_type = self.infer_captured_container(capture_name, &inner, errors); + let result = ExprResult::meaningful(nested_type); + let effective_card = outer_card.multiply(result.cardinality); + scope.add_field(capture_name, result.base_type, effective_card, span); + result + } else { + // Simple capture: just capture the result + let result = self.infer_expr(&inner, scope, outer_card, errors); + let base_type = if has_string_annotation { + TYPE_STR + } else { + result.base_type + }; + let effective_card = outer_card.multiply(result.cardinality); + scope.add_field(capture_name, base_type, effective_card, span); + ExprResult::meaningful(base_type).with_cardinality(result.cardinality) + } + } + + fn infer_captured_container( + &mut self, + _capture_name: &'src str, + inner: &Expr, + errors: &mut Vec>, + ) -> TypeId { + match inner { + Expr::SeqExpr(s) => { + let mut nested_scope = ScopeInfo::default(); + for child in s.children() { + self.infer_expr(&child, &mut nested_scope, Cardinality::One, errors); + } + let type_name = self.generate_scope_name(); + self.create_struct_type(type_name, &nested_scope) + } + Expr::AltExpr(a) => { + if a.kind() == AltKind::Tagged { + // Captured tagged alternation → Enum + let type_name = self.generate_scope_name(); + self.infer_tagged_alternation_as_enum(type_name, a, errors) + } else { + // Captured untagged alternation → Struct with merged fields + let mut nested_scope = ScopeInfo::default(); + self.infer_untagged_alternation(a, &mut nested_scope, Cardinality::One, errors); + let type_name = self.generate_scope_name(); + self.create_struct_type(type_name, &nested_scope) + } + } + _ => { + // Not a container - shouldn't reach here + TYPE_NODE + } + } + } + + fn infer_quantified( + &mut self, + q: &ast::QuantifiedExpr, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + let Some(inner) = q.inner() else { + return ExprResult::node(); + }; + + let quant_card = self.quantifier_cardinality(q); + let is_qis = self.qis_triggers.contains(q); + + if is_qis { + // QIS: create implicit scope for multiple captures + let mut nested_scope = ScopeInfo::default(); + self.infer_expr(&inner, &mut nested_scope, Cardinality::One, errors); + + let element_type = if !nested_scope.fields.is_empty() { + let type_name = self.generate_scope_name(); + self.create_struct_type(type_name, &nested_scope) + } else { + TYPE_NODE + }; + + // Wrap with array type - this is a meaningful result + let array_type = self.wrap_with_cardinality(element_type, quant_card); + ExprResult::meaningful(array_type) + } else { + // No QIS: captures propagate with multiplied cardinality + let combined_card = outer_card.multiply(quant_card); + let result = self.infer_expr(&inner, scope, combined_card, errors); + // Return result with quantifier's cardinality so captured quantifiers work correctly + ExprResult { + base_type: result.base_type, + cardinality: quant_card.multiply(result.cardinality), + is_meaningful: result.is_meaningful, + } + } + } + + fn infer_sequence( + &mut self, + s: &ast::SeqExpr, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + // Uncaptured sequence: captures propagate to parent scope + let mut last_result = ExprResult::void(); + for child in s.children() { + last_result = self.infer_expr(&child, scope, outer_card, errors); + } + last_result + } + + fn infer_alternation( + &mut self, + a: &ast::AltExpr, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + // Uncaptured alternation (tagged or untagged): captures propagate with optionality + self.infer_untagged_alternation(a, scope, outer_card, errors) + } + + fn infer_untagged_alternation( + &mut self, + a: &ast::AltExpr, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + let branches: Vec<_> = a.branches().collect(); + let total_branches = branches.len(); + + if total_branches == 0 { + return ExprResult::void(); + } + + let mut merged_scope = ScopeInfo::default(); + + for branch in &branches { + let Some(body) = branch.body() else { + continue; + }; + let mut branch_scope = ScopeInfo::default(); + self.infer_expr(&body, &mut branch_scope, outer_card, errors); + errors.extend(merged_scope.merge_from(branch_scope)); + } + + // Apply optionality for fields not present in all branches + merged_scope.apply_optionality(total_branches); + + // Merge into parent scope + errors.extend(scope.merge_from(merged_scope)); + + ExprResult::node() + } + + fn infer_tagged_alternation_as_enum( + &mut self, + type_name: &'src str, + a: &ast::AltExpr, + errors: &mut Vec>, + ) -> TypeId { + let mut variants = IndexMap::new(); + + for branch in a.branches() { + let tag = branch + .label() + .map(|t| token_src(&t, self.source)) + .unwrap_or("_"); + let Some(body) = branch.body() else { + variants.insert(tag, ScopeInfo::default()); + continue; + }; + + let mut variant_scope = ScopeInfo::default(); + self.infer_expr(&body, &mut variant_scope, Cardinality::One, errors); + variants.insert(tag, variant_scope); + } + + self.create_enum_type_from_variants(type_name, &variants) + } + + fn infer_named_node( + &mut self, + n: &ast::NamedNode, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + // Named nodes have children - recurse into them + for child in n.children() { + self.infer_expr(&child, scope, outer_card, errors); + } + ExprResult::node() + } + + fn infer_field_expr( + &mut self, + f: &ast::FieldExpr, + scope: &mut ScopeInfo<'src>, + outer_card: Cardinality, + errors: &mut Vec>, + ) -> ExprResult { + // Field constraint (name: expr) - just recurse + if let Some(value) = f.value() { + return self.infer_expr(&value, scope, outer_card, errors); + } + ExprResult::node() + } + + fn infer_ref(&self, r: &ast::Ref) -> ExprResult { + // References are opaque - captures don't propagate from referenced definition. + // Return the type (for use when captured) but mark as not meaningful + // so uncaptured refs don't affect definition's result type. + let ref_name = r.name().map(|t| t.text().to_string()); + if let Some(name) = ref_name + && let Some(&type_id) = self.definition_types.get(name.as_str()) + { + return ExprResult::opaque(type_id); + } + ExprResult::node() + } + + // ───────────────────────────────────────────────────────────────────────── + // Helpers + // ───────────────────────────────────────────────────────────────────────── + + fn quantifier_cardinality(&self, q: &ast::QuantifiedExpr) -> Cardinality { + let Some(op) = q.operator() else { + return Cardinality::One; + }; + use crate::parser::cst::SyntaxKind; + match op.kind() { + SyntaxKind::Star | SyntaxKind::StarQuestion => Cardinality::Star, + SyntaxKind::Plus | SyntaxKind::PlusQuestion => Cardinality::Plus, + SyntaxKind::Question | SyntaxKind::QuestionQuestion => Cardinality::Optional, + _ => Cardinality::One, + } + } + + fn generate_scope_name(&self) -> &'src str { + let name = format!("{}Scope{}", self.current_def_name, self.next_type_id); + Box::leak(name.into_boxed_str()) + } + + fn create_struct_type(&mut self, name: &'src str, scope: &ScopeInfo<'src>) -> TypeId { + let members: Vec<_> = scope + .fields + .iter() + .map(|(field_name, info)| { + let member_type = self.wrap_with_cardinality(info.base_type, info.cardinality); + InferredMember { + name: field_name, + ty: member_type, + } + }) + .collect(); + + let type_id = self.alloc_type_id(); + + self.type_defs.push(InferredTypeDef { + kind: TypeKind::Record, + name: Some(name), + members, + inner_type: None, + }); + + type_id + } + + fn create_enum_type_from_variants( + &mut self, + name: &'src str, + variants: &IndexMap<&'src str, ScopeInfo<'src>>, + ) -> TypeId { + let mut members = Vec::new(); + + for (tag, variant_scope) in variants { + let variant_type = if variant_scope.fields.is_empty() { + TYPE_VOID + } else if variant_scope.fields.len() == 1 { + // Single-capture variant: flatten (ADR-0007) + let (_, info) = variant_scope.fields.iter().next().unwrap(); + self.wrap_with_cardinality(info.base_type, info.cardinality) + } else { + let variant_name = self.generate_scope_name(); + self.create_struct_type(variant_name, variant_scope) + }; + members.push(InferredMember { + name: tag, + ty: variant_type, + }); + } + + let type_id = self.alloc_type_id(); + + self.type_defs.push(InferredTypeDef { + kind: TypeKind::Enum, + name: Some(name), + members, + inner_type: None, + }); + + type_id + } + + fn wrap_with_cardinality(&mut self, base: TypeId, card: Cardinality) -> TypeId { + match card { + Cardinality::One => base, + Cardinality::Optional => { + let type_id = self.alloc_type_id(); + self.type_defs.push(InferredTypeDef { + kind: TypeKind::Optional, + name: None, + members: Vec::new(), + inner_type: Some(base), + }); + type_id + } + Cardinality::Star => { + let type_id = self.alloc_type_id(); + self.type_defs.push(InferredTypeDef { + kind: TypeKind::ArrayStar, + name: None, + members: Vec::new(), + inner_type: Some(base), + }); + type_id + } + Cardinality::Plus => { + let type_id = self.alloc_type_id(); + self.type_defs.push(InferredTypeDef { + kind: TypeKind::ArrayPlus, + name: None, + members: Vec::new(), + inner_type: Some(base), + }); + type_id + } + } + } + + fn report_merge_errors(&mut self, merge_errors: &[MergeError<'src>]) { + for err in merge_errors { + let types_str = err + .shapes + .iter() + .map(|s| s.to_description().to_string()) + .collect::>() + .join(" vs "); + + let primary_span = err.spans.first().copied().unwrap_or_default(); + let mut builder = self + .diagnostics + .report(DiagnosticKind::IncompatibleTypes, primary_span) + .message(types_str); + + for span in err.spans.iter().skip(1) { + builder = builder.related_to("also captured here", *span); + } + builder + .hint(format!( + "capture `{}` has incompatible types across branches", + err.field + )) + .emit(); + + self.errors.push(UnificationError { + field: err.field, + definition: self.current_def_name, + types_found: err.shapes.iter().map(|s| s.to_description()).collect(), + spans: err.spans.clone(), + }); + } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Query integration +// ───────────────────────────────────────────────────────────────────────────── + +impl<'a> Query<'a> { + /// Run type inference on the query AST. + pub(super) fn infer_types(&mut self) { + // Collect QIS triggers upfront to avoid borrowing issues + let qis_triggers: HashSet<_> = self.qis_triggers.keys().cloned().collect(); + let sorted = self.topological_sort_definitions_ast(); + + let mut ctx = InferenceContext::new(self.source, qis_triggers); + + // Process definitions in dependency order + for (name, body) in &sorted { + let type_id = ctx.infer_definition(name, body); + ctx.definition_types.insert(name, type_id); + } + + // Preserve symbol table order for entrypoints + for (name, _) in &sorted { + if let Some(&type_id) = ctx.definition_types.get(name) { + self.type_info.entrypoint_types.insert(*name, type_id); + } + } + self.type_info.type_defs = ctx.type_defs; + self.type_info.diagnostics = ctx.diagnostics; + self.type_info.errors = ctx.errors; + } + + /// Topologically sort definitions for processing order. + fn topological_sort_definitions_ast(&self) -> Vec<(&'a str, ast::Expr)> { + use std::collections::{HashSet, VecDeque}; + + let definitions: Vec<_> = self + .symbol_table + .iter() + .map(|(&name, body)| (name, body.clone())) + .collect(); + let def_names: HashSet<&str> = definitions.iter().map(|(name, _)| *name).collect(); + + // Build dependency graph from AST references + let mut deps: HashMap<&str, Vec<&str>> = HashMap::new(); + for (name, body) in &definitions { + let refs = Self::collect_ast_references(body, &def_names); + deps.insert(name, refs); + } + + // Kahn's algorithm + let mut in_degree: HashMap<&str, usize> = HashMap::new(); + for (name, _) in &definitions { + in_degree.insert(name, 0); + } + for refs in deps.values() { + for &dep in refs { + *in_degree.entry(dep).or_insert(0) += 1; + } + } + + let mut zero_degree: Vec<&str> = in_degree + .iter() + .filter(|(_, deg)| **deg == 0) + .map(|(&name, _)| name) + .collect(); + zero_degree.sort(); + let mut queue: VecDeque<&str> = zero_degree.into_iter().collect(); + + let mut sorted_names = Vec::new(); + while let Some(name) = queue.pop_front() { + sorted_names.push(name); + if let Some(refs) = deps.get(name) { + for &dep in refs { + if let Some(deg) = in_degree.get_mut(dep) { + *deg = deg.saturating_sub(1); + if *deg == 0 { + queue.push_back(dep); + } + } + } + } + } + + // Reverse so dependencies come first + sorted_names.reverse(); + + // Add any remaining (cyclic) definitions + for (name, _) in &definitions { + if !sorted_names.contains(name) { + sorted_names.push(name); + } + } + + // Build result with bodies + sorted_names + .into_iter() + .filter_map(|name| self.symbol_table.get(name).map(|body| (name, body.clone()))) + .collect() + } + + /// Collect references from an AST expression. + fn collect_ast_references<'b>(expr: &Expr, def_names: &HashSet<&'b str>) -> Vec<&'b str> { + let mut refs = Vec::new(); + Self::collect_ast_references_impl(expr, def_names, &mut refs); + refs + } + + fn collect_ast_references_impl<'b>( + expr: &Expr, + def_names: &HashSet<&'b str>, + refs: &mut Vec<&'b str>, + ) { + match expr { + Expr::Ref(r) => { + if let Some(name_token) = r.name() { + let name = name_token.text(); + if def_names.contains(name) && !refs.contains(&name) { + // Find the actual &'b str from the set + if let Some(&found) = def_names.iter().find(|&&n| n == name) { + refs.push(found); + } + } + } + } + _ => { + for child in expr.children() { + Self::collect_ast_references_impl(&child, def_names, refs); + } + } + } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Display and helpers +// ───────────────────────────────────────────────────────────────────────────── + +impl TypeInferenceResult<'_> { + pub fn dump(&self) -> String { + let mut out = String::new(); + + out.push_str("=== Entrypoints ===\n"); + for (name, type_id) in &self.entrypoint_types { + out.push_str(&format!("{} → {}\n", name, format_type_id(*type_id))); + } + + if !self.type_defs.is_empty() { + out.push_str("\n=== Types ===\n"); + for (idx, def) in self.type_defs.iter().enumerate() { + let type_id = 3 + idx as TypeId; + let name = def.name.unwrap_or(""); + match def.kind { + TypeKind::Record => { + out.push_str(&format!("T{}: Record {} {{\n", type_id, name)); + for member in &def.members { + out.push_str(&format!( + " {}: {}\n", + member.name, + format_type_id(member.ty) + )); + } + out.push_str("}\n"); + } + TypeKind::Enum => { + out.push_str(&format!("T{}: Enum {} {{\n", type_id, name)); + for member in &def.members { + out.push_str(&format!( + " {}: {}\n", + member.name, + format_type_id(member.ty) + )); + } + out.push_str("}\n"); + } + TypeKind::Optional => { + let inner = def.inner_type.map(format_type_id).unwrap_or_default(); + out.push_str(&format!("T{}: Optional {} → {}\n", type_id, name, inner)); + } + TypeKind::ArrayStar => { + let inner = def.inner_type.map(format_type_id).unwrap_or_default(); + out.push_str(&format!("T{}: ArrayStar {} → {}\n", type_id, name, inner)); + } + TypeKind::ArrayPlus => { + let inner = def.inner_type.map(format_type_id).unwrap_or_default(); + out.push_str(&format!("T{}: ArrayPlus {} → {}\n", type_id, name, inner)); + } + } + } + } + + if !self.errors.is_empty() { + out.push_str("\n=== Errors ===\n"); + for err in &self.errors { + let types = err + .types_found + .iter() + .map(|t| t.to_string()) + .collect::>() + .join(", "); + out.push_str(&format!( + "field `{}` in `{}`: incompatible types [{}]\n", + err.field, err.definition, types + )); + } + } + + out + } + + pub fn dump_diagnostics(&self, source: &str) -> String { + self.diagnostics.render_filtered(source) + } + + pub fn has_errors(&self) -> bool { + !self.errors.is_empty() + } +} + +fn format_type_id(id: TypeId) -> String { + match id { + TYPE_VOID => "Void".to_string(), + TYPE_NODE => "Node".to_string(), + TYPE_STR => "String".to_string(), + _ => format!("T{}", id), + } +} diff --git a/crates/plotnik-lib/src/query/typing_tests.rs b/crates/plotnik-lib/src/query/typing_tests.rs new file mode 100644 index 00000000..efd5735e --- /dev/null +++ b/crates/plotnik-lib/src/query/typing_tests.rs @@ -0,0 +1,656 @@ +//! Tests for type inference. + +use indoc::indoc; + +use crate::query::Query; + +fn infer(source: &str) -> String { + let query = Query::try_from(source) + .expect("parse should succeed") + .build_graph(); + query.type_info().dump() +} + +fn infer_with_graph(source: &str) -> String { + let query = Query::try_from(source) + .expect("parse should succeed") + .build_graph(); + let mut out = String::new(); + out.push_str("=== Graph ===\n"); + out.push_str(&query.graph().dump_live(query.dead_nodes())); + out.push('\n'); + out.push_str(&query.type_info().dump()); + out +} + +#[test] +fn debug_star_quantifier_graph() { + // See graph BEFORE optimization (what type inference actually sees) + let (query, pre_opt_dump) = Query::try_from("Foo = ((item) @items)*") + .expect("parse should succeed") + .build_graph_with_pre_opt_dump(); + let mut out = String::new(); + out.push_str("=== Graph (before optimization - what type inference sees) ===\n"); + out.push_str(&pre_opt_dump); + out.push_str("\n=== Graph (after optimization) ===\n"); + out.push_str(&query.graph().dump_live(query.dead_nodes())); + out.push('\n'); + out.push_str(&query.type_info().dump()); + insta::assert_snapshot!(out, @r" + === Graph (before optimization - what type inference sees) === + Foo = N4 + + N0: (_) → N1 + N1: [Down] (item) [Capture] → N2 + N2: ε [Field(items)] → N3 + N3: [Up(1)] ε → N6 + N4: ε [StartArray] → N7 + N5: ε [EndArray] → ∅ + N6: ε [Push] → N7 + N7: ε → N0, N5 + + === Graph (after optimization) === + Foo = N4 + + N0: (_) → N1 + N1: [Down] (item) [Capture] → N2 + N2: ε [Field(items)] → N6 + N4: ε [StartArray] → N7 + N5: ε [EndArray] → ∅ + N6: [Up(1)] ε [Push] → N7 + N7: ε → N0, N5 + + === Entrypoints === + Foo → T4 + + === Types === + T3: ArrayStar → Node + T4: Record Foo { + items: T3 + } + "); +} + +#[test] +fn debug_graph_structure() { + let result = infer_with_graph("Foo = (identifier) @name"); + insta::assert_snapshot!(result, @r" + === Graph === + Foo = N0 + + N0: (identifier) [Capture] → N1 + N1: ε [Field(name)] → ∅ + + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + name: Node + } + "); +} + +#[test] +fn debug_incompatible_types_graph() { + let input = indoc! {r#" + Foo = [ (a) @v (b) @v ::string ] + "#}; + + let query = Query::new(input) + .exec() + .expect("parse should succeed") + .build_graph(); + + let mut out = String::new(); + out.push_str("=== Graph (after optimization) ===\n"); + out.push_str(&query.graph().dump_live(query.dead_nodes())); + out.push_str("\n=== Dead nodes count: "); + out.push_str(&query.dead_nodes().len().to_string()); + out.push_str(" ===\n\n"); + out.push_str(&query.type_info().dump()); + insta::assert_snapshot!(out, @r" + === Graph (after optimization) === + Foo = N0 + + N0: ε → N2, N4 + N1: ε → ∅ + N2: (a) [Capture] → N3 + N3: ε [Field(v)] → N1 + N4: (b) [Capture] [ToString] → N5 + N5: ε [Field(v)] → N1 + + === Dead nodes count: 0 === + + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + v: Node + } + + === Errors === + field `v` in `Foo`: incompatible types [Node, String] + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Basic captures +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn single_node_capture() { + let result = infer("Foo = (identifier) @name"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + name: Node + } + "); +} + +#[test] +fn string_capture() { + let result = infer("Foo = (identifier) @name ::string"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + name: String + } + "); +} + +#[test] +fn multiple_captures_flat() { + let result = infer("Foo = (a (b) @x (c) @y)"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + x: Node + y: Node + } + "); +} + +#[test] +fn no_captures_void() { + let result = infer("Foo = (identifier)"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → Void + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Captured sequences (composite types) +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn captured_sequence_creates_struct() { + let input = indoc! {r#" + Foo = { (a) @x (b) @y } @z + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: Record FooScope3 { + x: Node + y: Node + } + T4: Record Foo { + z: T3 + } + "); +} + +#[test] +fn nested_captured_sequence() { + let input = indoc! {r#" + Foo = { (outer) @a { (inner) @b } @nested } @root + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T5 + + === Types === + T3: Record FooScope3 { + b: Node + } + T4: Record FooScope4 { + a: Node + nested: T3 + } + T5: Record Foo { + root: T4 + } + "); +} + +#[test] +fn sequence_without_capture_propagates() { + let input = indoc! {r#" + Foo = { (a) @x (b) @y } + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + x: Node + y: Node + } + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Alternations +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn untagged_alternation_symmetric() { + let input = indoc! {r#" + Foo = [ (a) @v (b) @v ] + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + v: Node + } + "); +} + +#[test] +fn untagged_alternation_asymmetric() { + let input = indoc! {r#" + Foo = [ (a) @x (b) @y ] + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T5 + + === Types === + T3: Optional → Node + T4: Optional → Node + T5: Record Foo { + x: T3 + y: T4 + } + "); +} + +#[test] +fn tagged_alternation_uncaptured_propagates() { + let input = indoc! {r#" + Foo = [ A: (a) @x B: (b) @y ] + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Enum Foo { + A: Node + B: Node + } + "); +} + +#[test] +fn tagged_alternation_captured_creates_enum() { + let input = indoc! {r#" + Foo = [ A: (a) @x B: (b) @y ] @choice + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: Enum FooScope3 { + A: Node + B: Node + } + T4: Record Foo { + choice: T3 + } + "); +} + +#[test] +fn captured_untagged_alternation_creates_struct() { + let input = indoc! {r#" + Foo = [ (a) @x (b) @y ] @val + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T6 + + === Types === + T3: Optional → Node + T4: Optional → Node + T5: Record FooScope3 { + x: T3 + y: T4 + } + T6: Record Foo { + val: T5 + } + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Quantifiers +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn star_quantifier() { + let result = infer("Foo = ((item) @items)*"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: ArrayStar → Node + T4: Record Foo { + items: T3 + } + "); +} + +#[test] +fn plus_quantifier() { + let result = infer("Foo = ((item) @items)+"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: ArrayPlus → Node + T4: Record Foo { + items: T3 + } + "); +} + +#[test] +fn optional_quantifier() { + let result = infer("Foo = ((item) @maybe)?"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: Optional → Node + T4: Record Foo { + maybe: T3 + } + "); +} + +#[test] +fn quantifier_on_sequence() { + // QIS triggered: ≥2 captures inside quantified expression + let input = indoc! {r#" + Foo = { (a) @x (b) @y }* + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T4 + + === Types === + T3: Record FooScope3 { + x: Node + y: Node + } + T4: ArrayStar → T3 + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// QIS: Additional cases from ADR-0009 +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn qis_single_capture_no_trigger() { + // Single capture inside sequence - no QIS + // Note: The sequence creates its own scope, so the capture goes there. + // Without explicit capture on the sequence, the struct is orphaned. + let input = indoc! {r#" + Single = { (a) @item }* + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Single → T4 + + === Types === + T3: ArrayStar → Node + T4: Record Single { + item: T3 + } + "); +} + +#[test] +fn qis_alternation_in_sequence() { + // Alternation with asymmetric captures inside quantified sequence + // QIS triggered (2 captures), creates element struct + // Note: Current impl doesn't apply optionality for alternation branches in QIS + let input = indoc! {r#" + Foo = { [ (a) @x (b) @y ] }* + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T6 + + === Types === + T3: Optional → Node + T4: Optional → Node + T5: Record FooScope3 { + x: T3 + y: T4 + } + T6: ArrayStar → T5 + "); +} + +#[test] +fn quantified_seq_with_inline_tagged_alt() { + // Issue #5: captures from inline tagged alternation inside quantified sequence + // The tagged alternation is uncaptured, so it should behave like untagged. + // All captures should propagate with Optional cardinality. + let input = indoc! {r#" + Test = { [ A: (a) @x B: (b) @y ] }* @items + "#}; + + let result = infer_with_graph(input); + insta::assert_snapshot!(result, @r" + === Graph === + Test = N11 + + N0: ε [StartObj] → N1 + N1: [Next] ε → N4, N8 + N4: (a) [Variant(A)] [Capture] [Capture] → N6 + N6: ε [Field(x)] [EndVariant] → N15 + N8: (b) [Variant(B)] [Capture] [Capture] → N10 + N10: ε [Field(y)] [EndVariant] → N15 + N11: ε [StartObj] [StartArray] → N16 + N15: ε [EndObj] [Push] → N16 + N16: ε → N0, N19 + N19: ε [EndArray] [EndObj] [Field(items)] → ∅ + + === Entrypoints === + Test → T7 + + === Types === + T3: Optional → Node + T4: Optional → Node + T5: Record TestScope3 { + x: T3 + y: T4 + } + T6: ArrayStar → T5 + T7: Record Test { + items: T6 + } + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Type compatibility +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn compatible_types_in_alternation() { + let input = indoc! {r#" + Foo = [ (a) @v (b) @v ] + "#}; + + let query = Query::try_from(input).expect("parse").build_graph(); + assert!(query.type_info().errors.is_empty()); +} + +#[test] +fn incompatible_types_in_alternation() { + let input = indoc! {r#" + Foo = [ (a) @v (b) @v ::string ] + "#}; + + let result = infer_with_graph(input); + insta::assert_snapshot!(result, @r" + === Graph === + Foo = N0 + + N0: ε → N2, N4 + N1: ε → ∅ + N2: (a) [Capture] → N3 + N3: ε [Field(v)] → N1 + N4: (b) [Capture] [ToString] → N5 + N5: ε [Field(v)] → N1 + + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + v: Node + } + + === Errors === + field `v` in `Foo`: incompatible types [Node, String] + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Multiple definitions +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn multiple_definitions() { + let input = indoc! {r#" + Func = (function_declaration name: (identifier) @name) + Class = (class_declaration name: (identifier) @name body: (class_body) @body) + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Func → T3 + Class → T4 + + === Types === + T3: Record Func { + name: Node + } + T4: Record Class { + name: Node + body: Node + } + "); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Edge cases +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn deeply_nested_node() { + let input = indoc! {r#" + Foo = (a (b (c (d) @val))) + "#}; + + let result = infer(input); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + val: Node + } + "); +} + +#[test] +fn wildcard_capture() { + let result = infer("Foo = _ @any"); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + any: Node + } + "); +} + +#[test] +fn string_literal_capture() { + let result = infer(r#"Foo = "+" @op"#); + insta::assert_snapshot!(result, @r" + === Entrypoints === + Foo → T3 + + === Types === + T3: Record Foo { + op: Node + } + "); +} diff --git a/docs/adr/ADR-0004-query-ir-binary-format.md b/docs/adr/ADR-0004-query-ir-binary-format.md index ebcd2a3f..72f0eb3e 100644 --- a/docs/adr/ADR-0004-query-ir-binary-format.md +++ b/docs/adr/ADR-0004-query-ir-binary-format.md @@ -79,6 +79,7 @@ Single pool for all strings (field names, variant tags, entrypoint names, type n ```rust type StringId = u16; +const STRING_NONE: StringId = 0xFFFF; // sentinel for unnamed types #[repr(C)] struct StringRef { @@ -118,7 +119,7 @@ struct Entrypoint { Header (64 bytes): magic: [u8; 4] b"PLNK" version: u32 format version + ABI hash - checksum: u32 CRC32(offsets || buffer_data) + checksum: u32 CRC32(header[12..64] || buffer_data) buffer_len: u32 successors_offset: u32 effects_offset: u32 @@ -138,6 +139,8 @@ Header is 64 bytes to ensure buffer data starts at a 64-byte aligned offset. Thi Little-endian always. UTF-8 strings. Version mismatch or checksum failure → recompile. +**Checksum coverage**: The checksum covers bytes 12–63 of the header (everything after the checksum field) plus all buffer data. The magic and version are verified independently before checksum validation—a version mismatch triggers recompile without checking the checksum. + ### Construction Three passes: @@ -166,7 +169,7 @@ Buffer layout: 0x0280 Negated Fields [] 0x0280 String Refs [{0,4}, {4,5}, {9,5}, ...] 0x02C0 String Bytes "namevalueIdentNumFuncExpr" -0x0300 Type Defs [Record{...}, Enum{...}, ...] +0x0300 Type Defs [Struct{...}, Enum{...}, ...] 0x0340 Type Members [{name,Str}, {Ident,Ty5}, ...] 0x0380 Entrypoints [{name=Func, target=Tr0, type=Ty3}, ...] 0x03A0 Trivia Kinds [comment, ...] diff --git a/docs/adr/ADR-0005-transition-graph-format.md b/docs/adr/ADR-0005-transition-graph-format.md index e6ea9513..eb8e19a8 100644 --- a/docs/adr/ADR-0005-transition-graph-format.md +++ b/docs/adr/ADR-0005-transition-graph-format.md @@ -25,30 +25,33 @@ type RefId = u16; Relative range within a segment: ```rust -#[repr(C, packed)] +#[repr(C)] struct Slice { start_index: u32, // element index into segment array (NOT byte offset) len: u16, // 65k elements per slice is sufficient + _pad: u16, _phantom: PhantomData T>, } -// 6 bytes, align 1 (packed to avoid padding) +// 8 bytes, align 4 ``` -**Note**: `repr(C, packed)` is required to achieve 6 bytes. Standard `repr(C)` would pad to 8 bytes for alignment. The packed repr means field access may be unaligned on some platforms—accessors should copy values out rather than returning references. - `start_index` is an **element index**, not a byte offset. This naming distinguishes it from byte offsets like `StringRef.offset` and `CompiledQuery.*_offset`. The distinction matters for typed array access. ### Transition ```rust +/// Transitions use SSO (small-size optimization) for successors: +/// - 0-8 successors: stored inline in `successor_data` +/// - 9+ successors: `successor_data[0]` is index into successors segment #[repr(C, align(64))] struct Transition { // --- 32 bytes metadata --- matcher: Matcher, // 16 (offset 0) ref_marker: RefTransition, // 4 (offset 16) - successor_count: u32, // 4 (offset 20) - effects: Slice, // 6 (offset 24, when no effects: start and len are zero) - nav: Nav, // 2 (offset 30, see ADR-0008) + nav: Nav, // 2 (offset 20, see ADR-0008) + effects_len: u16, // 2 (offset 22, inlined from Slice) + successor_count: u32, // 4 (offset 24) + effects_start: u32, // 4 (offset 28, inlined from Slice) // --- 32 bytes control flow --- successor_data: [u32; 8], // 32 (offset 32) @@ -56,6 +59,8 @@ struct Transition { // 64 bytes, align 64 (cache-line aligned) ``` +The `effects_start` and `effects_len` fields are inlined rather than using `Slice` to maintain 64-byte alignment without sacrificing inline successor slots. Accessors reconstruct a `Slice` on demand. + Navigation is fully determined by `nav`—no runtime dispatch based on previous matcher. See [ADR-0008](ADR-0008-tree-navigation.md) for `Nav` definition and semantics. Single `ref_marker` slot—sequences like `Enter(A) → Enter(B)` remain as epsilon chains. @@ -94,16 +99,16 @@ enum Matcher { Node { kind: NodeTypeId, // 2 field: Option, // 2 - negated_fields: Slice, // 8 + negated_fields: Slice, // 8 (align 4, starts at offset 8) }, Anonymous { kind: NodeTypeId, // 2 field: Option, // 2 - negated_fields: Slice, // 8 + negated_fields: Slice, // 8 (align 4, starts at offset 8) }, Wildcard, } -// 16 bytes, align 4 +// 16 bytes, align 4 (discriminant 4 + payload 12, but payload naturally aligns) ``` `Option` uses 0 for `None` (niche optimization). @@ -126,6 +131,8 @@ Layout: 1-byte discriminant + 1-byte padding + 2-byte `RefId` payload = 4 bytes. Explicit `None` ensures stable binary layout (`Option` niche is unspecified). +**RefId semantics**: `RefId` is a unique identifier assigned per definition reference during graph construction. It is **not** an index into the `Entrypoints` table (which is for named exports). The actual jump target comes from `successors()[0]}` of the `Enter` transition. `RefId` exists solely to verify that `Exit(id)` matches the corresponding `Enter(id)` at runtime—a mismatch indicates an IR bug. + ### Enter/Exit Semantics **Problem**: A definition can be called from multiple sites. Naively, `Exit.next` would contain all possible return points from all call sites, requiring O(N) filtering at runtime to find which return is valid for the current call. diff --git a/docs/adr/ADR-0006-dynamic-query-execution.md b/docs/adr/ADR-0006-dynamic-query-execution.md index 81be2c82..58a6488d 100644 --- a/docs/adr/ADR-0006-dynamic-query-execution.md +++ b/docs/adr/ADR-0006-dynamic-query-execution.md @@ -17,10 +17,17 @@ For each transition: 1. Execute `nav` initial movement (e.g., goto_first_child, goto_next_sibling) 2. Search loop: try matcher, on fail apply skip policy (advance or fail) 3. On match success: store matched node, execute `effects` sequentially -4. Process successors with backtracking +4. Process `ref_marker` (see below) +5. Process successors with backtracking For `Up*` variants, step 2 becomes: validate exit constraint, ascend N levels (no search loop). +**RefTransition handling** (step 4): + +- `None`: no action, proceed to step 5 +- `Enter(ref_id)`: push frame onto `FrameArena`, store `successors()[1..]` as returns, then jump to `successors()[0]` (definition entry)—step 5 is skipped +- `Exit(ref_id)`: verify `ref_id` matches current frame, pop frame, use stored returns as successors—step 5 uses these instead of the transition's own successors + Navigation is fully determined by `nav`—no runtime dispatch based on previous matcher. See [ADR-0008](ADR-0008-tree-navigation.md) for detailed semantics. The matched node is stored in a temporary slot (`matched_node`) accessible to `CaptureNode` effect. Effects execute in order—`CaptureNode` reads from this slot and sets `executor.current`. @@ -68,6 +75,7 @@ enum Container<'a> { | Effect | Action | | ------------------- | ----------------------------------------- | | `CaptureNode` | `current = Node(nodes.next())` (consumes) | +| `ClearCurrent` | `current = None` | | `StartArray` | push `Array([])` onto stack | | `PushElement` | move `current` into top array | | `EndArray` | pop array into `current` | @@ -78,6 +86,8 @@ enum Container<'a> { | `EndVariant` | pop, wrap `current`, set as current | | `ToString` | replace `current` Node with text | +`ClearCurrent` is emitted on skip paths for optional captures (`expr? @name`). When the optional is skipped, `ClearCurrent` ensures `current = None` before `Field(id)` executes, producing the correct `None` value for the optional field. + Invalid state = IR bug → panic. ### QueryInterpreter diff --git a/docs/adr/ADR-0007-type-metadata-format.md b/docs/adr/ADR-0007-type-metadata-format.md index 0f0e6c28..8d8f3dbd 100644 --- a/docs/adr/ADR-0007-type-metadata-format.md +++ b/docs/adr/ADR-0007-type-metadata-format.md @@ -53,10 +53,9 @@ struct TypeDef { kind: TypeKind, // 1 _pad: u8, // 1 name: StringId, // 2 - synthetic or explicit, 0xFFFF for wrappers - members: Slice, // 6 - see interpretation below - _pad2: u16, // 2 + members: Slice, // 8 - see interpretation below } -// 12 bytes, align 2 (due to packed Slice having align 1) +// 12 bytes, align 4 ``` The `members` field has dual semantics based on `kind`: @@ -64,7 +63,7 @@ The `members` field has dual semantics based on `kind`: | Kind | `members.start_index` | `members.len` | | ---------------------------------- | ----------------------- | ------------- | | Wrappers (Optional/Array\*/Array+) | Inner `TypeId` (as u32) | 0 | -| Composites (Record/Enum) | Index into type_members | Member count | +| Composites (Struct/Enum) | Index into type_members | Member count | This reuses `Slice` for consistency with [ADR-0005](ADR-0005-transition-graph-format.md), while keeping TypeDef compact. @@ -76,7 +75,7 @@ enum TypeKind { Optional = 0, // T? — members.start = inner TypeId ArrayStar = 1, // T* — members.start = element TypeId ArrayPlus = 2, // T+ — members.start = element TypeId - Record = 3, // struct — members = slice into type_members + Struct = 3, // struct — members = slice into type_members Enum = 4, // tagged union — members = slice into type_members } ``` @@ -86,12 +85,12 @@ enum TypeKind { | Optional | `expr?` | Nullable wrapper | | ArrayStar | `expr*` | Zero or more elements | | ArrayPlus | `expr+` | One or more elements (non-empty) | -| Record | `{ ... } @name` | Named fields | +| Struct | `{ ... } @name` | Named fields | | Enum | `[ A: ... B: ... ]` | Tagged union (discriminated) | ### TypeMember -Shared structure for Record fields and Enum variants: +Shared structure for Struct fields and Enum variants: ```rust #[repr(C)] @@ -115,6 +114,18 @@ When no explicit `:: TypeName` annotation exists, names are synthesized: Collisions resolved by numeric suffix: `FuncBody`, `FuncBody2`, etc. +### Single-Capture Variant Flattening + +When an enum variant's branch has exactly one capture, the variant payload flattens to that capture's type directly—no wrapper struct. + +| Branch Captures | Variant Payload | +| --------------- | -------------------------- | +| 0 | Unit (Void) | +| 1 | Capture's type (flattened) | +| ≥2 | Struct with named fields | + +Rationale: The variant tag already discriminates; a single-field wrapper struct adds verbosity without information. + ### Example Query: @@ -132,10 +143,8 @@ Func = (function_declaration Type graph: ``` -T3: Record "Func" → [name: Str, body: T4] -T4: Enum "FuncBody" → [Stmt: T5, Expr: T6] -T5: Record "FuncBodyStmt" → [stmt: Node] -T6: Record "FuncBodyExpr" → [expr: Node] +T3: Struct "Func" → [name: Str, body: T4] +T4: Enum "FuncBody" → [Stmt: Node, Expr: Node] // flattened: 1 capture per branch Entrypoint: Func → result_type: T3 ``` @@ -145,23 +154,16 @@ Generated TypeScript: ```typescript interface Func { name: string; - body: - | { $tag: "Stmt"; $data: { stmt: Node } } - | { $tag: "Expr"; $data: { expr: Node } }; + body: { $tag: "Stmt"; $data: Node } | { $tag: "Expr"; $data: Node }; } ``` Generated Rust: ```rust -struct Func { - name: String, - body: FuncBody, -} - enum FuncBody { - Stmt { stmt: Node }, - Expr { expr: Node }, + Stmt(Node), + Expr(Node), } ``` diff --git a/docs/adr/ADR-0009-type-system.md b/docs/adr/ADR-0009-type-system.md new file mode 100644 index 00000000..521be2a3 --- /dev/null +++ b/docs/adr/ADR-0009-type-system.md @@ -0,0 +1,570 @@ +# ADR-0009: Type System + +- **Status**: Accepted +- **Date**: 2025-01-14 + +## Context + +Type inference transforms a `BuildGraph` into `TypeDef`/`TypeMember` structures (ADR-0007). This ADR formalizes the inference rules, particularly the semantics of alternations. + +## Decision + +### Type Universe + +``` +τ ::= Void -- definition with no captures (TypeId = 0) + | Node -- AST node reference (TypeId = 1) + | String -- extracted source text (TypeId = 2) + | Optional(τ) -- nullable wrapper + | ArrayStar(τ) -- zero or more + | ArrayPlus(τ) -- one or more + | Struct(fields) -- struct with named fields + | Enum(variants) -- tagged union +``` + +### Cardinality + +Cardinality describes how many values a capture produces: + +| Cardinality | Notation | Wrapper | Semantics | +| ----------- | -------- | ----------- | ------------ | +| Required | `1` | none | exactly one | +| Optional | `?` | `Optional` | zero or one | +| Star | `*` | `ArrayStar` | zero or more | +| Plus | `+` | `ArrayPlus` | one or more | + +Cardinality propagates through nesting: + +``` +outer * inner = result +────────────────────── + 1 * 1 = 1 + 1 * ? = ? + 1 * * = * + 1 * + = + + ? * 1 = ? + ? * ? = ? + ? * * = * + ? * + = * + * * 1 = * + * * ? = * + * * * = * + * * + = * + + * 1 = + + + * ? = * + + * * = * + + * + = + +``` + +### Scope Rules + +A **scope** is a container that collects captures into fields. + +Scopes are created by: + +1. **Definition root**: inherits the scope type of its root expression (see below) +2. **Captured sequence**: `{...} @name` creates a nested Struct scope +3. **Captured tagged alternation**: `[A: ... B: ...] @name` creates an Enum; each variant has its own scope +4. **Captured untagged alternation**: `[...] @name` creates a Struct; captures from branches merge + +**Definition root semantics**: A definition `Foo = expr` is equivalent to capturing the root expression with the definition name. Therefore: + +- `Foo = [ A: ... B: ... ]` → `Foo` is an Enum (tagged alternation at root) +- `Foo = { ... }` or `Foo = (node ...)` → `Foo` is a Struct (captures propagate to root scope) +- `Foo = (node) @x` → `Foo` is a Struct with field `x` + +**Critical rule**: Tags only have effect when the alternation is captured. An _inline_ uncaptured tagged alternation behaves identically to an untagged one—captures propagate to parent scope. + +### Flat Scoping Principle + +Query nesting does NOT create data nesting. Intermediate structure is invisible: + +```plotnik +Query = (a (b (c) @val)) +``` + +Result type: `Struct { val: Node }` — the `(a ...)` and `(b ...)` wrappers contribute nothing. + +Only explicit scope markers (`{...} @x`, `[...] @x` with tags) introduce nesting in the output type. + +### Reference Opacity + +References are opaque to captures: calling `(Foo)` does NOT inherit captures from `Foo`. + +```plotnik +A = (identifier) @name +B = (A) +C = (A) @node +``` + +Types: + +- `A { name: Node }` — has the capture +- `B {}` (Void) — calling A produces no fields in B +- `C { node: Node }` — captures the reference itself, not A's internals + +To access A's captures, you must either: + +1. Inline A's pattern into B +2. Capture the reference: `(A) @a` yields `{ a: A }` where `a` has type `A` + +This matches runtime semantics ([ADR-0006](ADR-0006-dynamic-query-execution.md)): Enter pushes a frame and jumps to the definition; Exit pops and returns. The caller only sees what it explicitly captures. + +### Type Inference for Captures + +| Pattern | Inferred Type | +| ----------------------------- | -------------------- | +| `(node) @x` | `Node` | +| `"literal" @x` | `Node` | +| `@x ::string` | `String` | +| `@x ::TypeName` | `TypeName` (nominal) | +| `{...} @x` | synthetic Struct | +| `[A: ... B: ...] @x` (tagged) | Enum with variants | +| `[...] @x` (untagged) | merged Struct | + +### Alternation Semantics + +This is the most complex part of type inference. The key insight: + +> **Tags only matter when the alternation is captured.** + +#### Case 1: Uncaptured Alternation (Tagged or Untagged) + +Captures propagate to the parent scope. Asymmetric captures become Optional. + +```plotnik +Foo = [ A: (a) @x B: (b) @y ] +``` + +Despite tags, this is uncaptured. Behavior: + +- `@x` appears only in branch A → propagates as `Optional(Node)` +- `@y` appears only in branch B → propagates as `Optional(Node)` +- Result: `Foo { x: Optional(Node), y: Optional(Node) }` + +```plotnik +Bar = [ (a) @v (b) @v ] +``` + +Untagged, uncaptured. Both branches have `@v`: + +- `@v` appears in all branches with type `Node` → propagates as `Node` +- Result: `Bar { v: Node }` + +#### Case 2: Captured Untagged Alternation + +Creates a Struct scope. Captures from branches merge into it. + +```plotnik +Foo = [ (a) @x (b) @y ] @z +``` + +- `@z` creates a Struct scope +- `@x` and `@y` are asymmetric → both become Optional within `@z`'s scope +- Result: `Foo { z: FooZ }` where `FooZ { x: Optional(Node), y: Optional(Node) }` + +```plotnik +Bar = [ (a) @v (b) @v ] @z +``` + +- `@z` creates a Struct scope +- `@v` appears in all branches → required within `@z`'s scope +- Result: `Bar { z: BarZ }` where `BarZ { v: Node }` + +#### Case 3: Captured Tagged Alternation + +Creates an Enum. Each variant has its own independent scope, subject to **Single-Capture Variant Flattening** (see below). + +```plotnik +Foo = [ A: (a) @x B: (b) @y ] @z +``` + +- `@z` creates an Enum because tags are present AND alternation is captured +- Variant `A` has scope with `@x: Node` +- Variant `B` has scope with `@y: Node` +- Both variants have exactly 1 capture → flattened +- Result: `Foo { z: FooZ }` where `FooZ` is: + ``` + Enum FooZ { A(Node), B(Node) } + ``` + +#### Single-Capture Variant Flattening + +When a tagged alternation variant has exactly one capture, the wrapper struct is eliminated—the variant payload becomes the capture's type directly. + +| Branch Captures | Variant Payload | Rust Syntax | +| --------------- | --------------------- | ------------------ | +| 0 | Unit (Void) | `A` | +| 1 | Capture's type (flat) | `A(T)` | +| ≥2 | Struct (named fields) | `A { x: T, y: U }` | + +**Rationale**: The field name is redundant when it's the only capture—the variant tag already provides discrimination. This produces idiomatic types matching `Option`, `Result`. + +**Formalization**: + +``` +VariantPayload(branch) = + let captures = propagating_captures(branch) + match captures.len(): + 0 → Void + 1 → captures[0].type // flatten: discard field name + _ → Struct(captures) // preserve field names +``` + +**Examples**: + +```plotnik +// Single capture per branch → flatten +Foo = [ A: (a) @x B: (b) @y ] @z +// → Enum FooZ { A(Node), B(Node) } + +// Mixed: one branch single, other multi → partial flatten +Bar = [ A: (a) @x B: (b) @y (c) @z ] @result +// → Enum BarResult { A(Node), B { y: Node, z: Node } } + +// Single capture with type annotation → flatten preserves type +Baz = [ Ok: (val) @v Err: (msg) @e ::string ] @result +// → Enum BazResult { Ok(Node), Err(String) } + +// Single capture of nested struct → flatten to that struct +Qux = [ A: { (x) @x (y) @y } @data B: (b) @b ] @choice +// → Enum QuxChoice { A(QuxChoiceData), B(Node) } +// → QuxChoiceData = { x: Node, y: Node } +``` + +### Unification Rules (1-Level Merge) + +When merging captures across untagged alternation branches, we apply **1-level merge semantics**. This balances flexibility with type safety: top-level fields merge with optionality, but nested struct mismatches are errors. + +**Design rationale**: Plotnik's purpose is typed extraction. Deep recursive merging would produce heavily-optional types (`{ a?: { b?: { c?: Node } } }`), forcing users back to defensive checking—undermining the library's value. Tagged+captured alternations exist when precise discrimination is needed. + +**Base type compatibility**: + +``` +unify(Node, Node) = Node +unify(String, String) = String +unify(Node, String) = ⊥ (error: incompatible primitives) +unify(Node, Struct) = ⊥ (error: primitive vs composite) +unify(String, Struct) = ⊥ (error: primitive vs composite) +``` + +**Struct merging** (1-level only): + +``` +unify(Struct(f₁), Struct(f₂)) = Struct(merged_fields) + where merged_fields: + - fields in both f₁ and f₂: unify types (must be compatible) + - fields only in f₁: become Optional + - fields only in f₂: become Optional +``` + +Nested structs are compared by **structural identity**, not recursively merged. If a field has type `Struct` in both branches but the structs differ, it's an error. + +**Cardinality interaction**: Cardinality join happens first, then type unification. If `T` and `T[]` appear at the same field, lift to array, then unify element types. + +**Error reporting**: When unification fails, the compiler reports ALL incompatibilities across all branches, not just the first. This helps users fix multiple issues in one iteration. + +**Examples**: + +``` +// OK: top-level field merge +Branch 1: { x: Node, y: Node } +Branch 2: { x: Node, z: String } +Result: { x: Node, y?: Node, z?: String } + +// OK: nested structs identical +Branch 1: { data: { a: Node }, extra: Node } +Branch 2: { data: { a: Node } } +Result: { data: { a: Node }, extra?: Node } + +// ERROR: nested structs differ (no deep merge) +Branch 1: { data: { a: Node } } +Branch 2: { data: { b: Node } } +→ Error: field `data` has incompatible struct types + +// ERROR: primitive vs primitive mismatch +Branch 1: { val: String } +Branch 2: { val: Node } +→ Error: field `val` has incompatible types: `String` vs `Node` +``` + +### Cardinality Join (for merging) + +When the same capture appears in multiple branches with different cardinalities: + +``` + + + /|\ + * | (arrays collapse to *) + \| + ? + | + 1 +``` + +| Left | Right | Join | +| ---- | ----- | ---- | +| 1 | 1 | 1 | +| 1 | ? | ? | +| 1 | \* | \* | +| 1 | + | + | +| ? | ? | ? | +| ? | \* | \* | +| ? | + | \* | +| \* | \* | \* | +| \* | + | \* | +| + | + | + | + +### Cardinality Lifting Coercion + +When cardinality join produces an array type (`*` or `+`) but a branch has scalar cardinality (`1` or `?`), the compiler inserts coercion effects to wrap the scalar in a singleton array. + +| Original | Lifted to | Effect transformation | +| -------- | ---------- | ------------------------------------------------------------------------------------------- | +| `1` | `*` or `+` | `CaptureNode` → `StartArray, CaptureNode, PushElement, EndArray` | +| `?` | `*` | absent → `StartArray, EndArray`; present → `StartArray, CaptureNode, PushElement, EndArray` | + +This ensures the materializer always receives homogeneous values matching the declared type. + +Example: + +```plotnik +Items = [ (single) @item (multi { (x)+ @item }) ] +``` + +Branch 1 has `@item: 1`, branch 2 has `@item: +`. Join is `+`. Branch 1's effects are lifted: + +``` +// Before lifting: +CaptureNode, Field("item") + +// After lifting: +StartArray, CaptureNode, PushElement, EndArray, Field("item") +``` + +### Quantifier-Induced Scope (QIS) + +When a quantified expression contains multiple captures, they must stay coupled per-iteration. QIS creates an implicit scope to preserve this structural relationship. + +**Trigger**: Quantifier `Q ∈ {*, +, ?}` applied to expression `E`, where `E` has **≥2 propagating captures** (captures not absorbed by inner scopes). + +**Mechanism**: QIS creates an implicit scope around `E`. Captures propagate to this scope (not the parent), forming a struct element type. + +**Containers**: Any expression can trigger QIS: + +- Node: `(node ...)Q` +- Sequence: `{...}Q` +- Alternation: `[...]Q` + +**Naming**: + +| Context | Element Type Name | +| ---------------------------- | ----------------------------------- | +| At definition root | `{Def}Item` | +| Explicit capture `E Q @name` | `{Parent}{Name}` | +| Neither | **Error**: require explicit `@name` | + +**Result Type**: + +| Q | Result | +| --- | ------------------------ | +| `*` | `ArrayStar(ElementType)` | +| `+` | `ArrayPlus(ElementType)` | +| `?` | `Optional(ElementType)` | + +**Interior rules**: Standard type inference within the implicit scope: + +- Uncaptured alternations (tagged or not): asymmetric captures → Optional +- Captured tagged alternations: Enum with variant scopes + +**Non-trigger** (≤1 propagating capture): No QIS. Single capture propagates with cardinality multiplication `Q × innerCard`. + +**Examples**: + +```plotnik +// Node as container - keeps name/body paired +Functions = (function_declaration + name: (identifier) @name + body: (block) @body +)* +// → Functions = ArrayStar(FunctionsItem) +// → FunctionsItem = { name: Node, body: Node } + +// Alternation in quantified sequence +Foo = { [ (a) @x (b) @y ] }* +// → Foo = ArrayStar(FooItem) +// → FooItem = { x: Optional(Node), y: Optional(Node) } + +// Tagged but uncaptured (tags ignored, same result) +Bar = { [ A: (a) @x B: (b) @y ] }* +// → Bar = ArrayStar(BarItem) +// → BarItem = { x: Optional(Node), y: Optional(Node) } + +// Tagged AND captured (no QIS - single propagating capture) +Baz = { [ A: (a) @x B: (b) @y ] @choice }* +// → Baz = ArrayStar(BazChoice) +// → BazChoice = Enum { A: { x: Node }, B: { y: Node } } + +// Nested with explicit capture +Outer = (parent { [ (a) @x (b) @y ] }* @items) +// → Outer = { items: ArrayStar(OuterItems) } +// → OuterItems = { x: Optional(Node), y: Optional(Node) } + +// Single capture - no QIS, standard rules +Single = { (a) @item }* +// → Single = { item: ArrayStar(Node) } + +// Error: QIS triggered but no capture, not at root +Bad = (parent { [ (a) @x (b) @y ] }* (other) @z) +// → Error: quantified expression with multiple captures requires @name +``` + +### Missing Field Rule + +If a capture appears in some branches but not all, the field becomes `Optional` (or `*` if original was array). + +This is intentional: users can have common fields be required across all branches, while branch-specific fields become optional. + +### Synthetic Naming + +Types without explicit `::Name` receive synthetic names: + +| Context | Pattern | Example | +| -------------------- | ----------------- | ------------ | +| Definition root | `{DefName}` | `Func` | +| Captured sequence | `{Def}{Capture}` | `FuncParams` | +| Captured alternation | `{Def}{Capture}` | `FuncBody` | +| Enum variant payload | `{Enum}{Variant}` | `FuncBodyOk` | + +Collision resolution: append numeric suffix (`Foo`, `Foo2`, `Foo3`, ...). + +### Error Conditions + +| Condition | Severity | Recovery | Diagnostic Kind (future) | +| ------------------------------------ | -------- | ----------------------------- | ------------------------------ | +| Incompatible primitives in alt | Error | Use `TYPE_INVALID`, continue | `TypeMismatchInAlt` | +| Primitive vs Struct in alt | Error | Use `TYPE_INVALID`, continue | `TypeMismatchInAlt` | +| Nested struct mismatch in alt | Error | Use `TYPE_INVALID`, continue | `StructMismatchInAlt` | +| Duplicate capture in same scope | Error | Keep first, ignore duplicates | `DuplicateCapture` | +| Empty definition (no captures) | Info | Type is `Void` (TypeId = 0) | (no diagnostic) | +| Inline uncaptured tagged alternation | Warning | Treat as untagged | `UnusedBranchLabels` | +| QIS without capture (not at root) | Error | Cannot infer element type | `MultiCaptureQuantifierNoName` | + +The last warning applies only to literal tagged alternations, not references. If `Foo = [ A: ... ]` is used as `(Foo)`, no warning—the user intentionally reuses a definition. But `(parent [ A: ... B: ... ])` inline without capture likely indicates a forgotten `@name`. + +**Exhaustive error reporting**: When type unification fails, the compiler explores all branches and reports all incompatibilities. Example diagnostic: + +``` +error: incompatible types in alternation branches + --> query.plot:3:5 + | + 3 | (a { (x) @val ::string }) @data + | ^^^ `String` here + 4 | (b { (x { (y) @inner }) @val }) @data + | ^^^ `Node` here + | + = note: capture `val` has incompatible types across branches + = help: use tagged alternation `[ A: ... B: ... ]` for precise discrimination +``` + +## Examples + +### Example 1: Captured Sequence + +```plotnik +Foo = (foo {(bar) @bar} @baz) +``` + +- `@bar` captures `(bar)` → `Node` +- `@baz` captures the sequence containing `@bar` → creates scope +- Types: + - `@bar: Node` + - `@baz: FooBaz { bar: Node }` + - `Foo: { baz: FooBaz }` + +### Example 2: Uncaptured Sequence + +```plotnik +Foo = (foo {(bar) @bar}) +``` + +- `@bar` captures `(bar)` → `Node` +- Sequence `{...}` is NOT captured → `@bar` propagates to `Foo`'s scope +- Types: + - `Foo: { bar: Node }` + +### Example 3: Tagged Alternation at Definition Root + +```plotnik +Result = [ + Ok: (value) @val + Err: (error) @msg ::string +] +``` + +- Tagged alternation at definition root → `Result` is an Enum +- Each variant has exactly 1 capture → flattened (no wrapper structs) +- Types: + - `Result: Enum { Ok(Node), Err(String) }` + +### Example 4: Tagged Alternation (Inline, Uncaptured) + +```plotnik +Foo = (parent [ + Ok: (value) @val + Err: (error) @msg ::string +]) +``` + +- Tagged alternation is inline and uncaptured → tags ignored, behaves like untagged +- `@val` only in Ok branch → `Optional(Node)` +- `@msg` only in Err branch → `Optional(String)` +- Types: + - `Foo: { val: Optional(Node), msg: Optional(String) }` +- Diagnostic: warning `UnusedBranchLabels` (inline uncaptured tagged alternation) + +### Example 5: Cardinality in Alternation + +```plotnik +Items = [ (single) @item (multi { (x)+ @item }) ] +``` + +- Branch 1: `@item` cardinality `1`, type `Node` +- Branch 2: `@item` cardinality `+`, type `Node` +- Join: cardinality `+` (both present, LUB of `1` and `+`) +- Types: + - `Items: { item: ArrayPlus(Node) }` + +### Example 6: Nested Quantifier + +```plotnik +Funcs = (module { (function)* @fns }) +``` + +- `@fns` has cardinality `*` from quantifier +- Sequence not captured → propagates to root +- Types: + - `Funcs: { fns: ArrayStar(Node) }` + +## Consequences + +**Positive**: + +- Explicit rules enable deterministic inference +- "Tags only matter when captured" is a simple mental model +- 1-level merge provides flexibility while preserving type safety +- Asymmetric fields becoming Optional is intuitive ("match any branch, get what's available") +- Definition root inherits type naturally—no wrapper structs for top-level enums +- Exhaustive error reporting helps users fix all issues in one iteration + +**Negative**: + +- LUB cardinality join can lose precision +- 1-level merge is less flexible than deep merge (intentional trade-off) + +**Alternatives Considered**: + +- Error on uncaptured tagged alternations (rejected: too restrictive for incremental development) +- Definition root always Struct (rejected: forces wrapper types for enums, e.g., `struct Expr { val: ExprEnum }` instead of `enum Expr`) +- Deep recursive merge for nested structs (rejected: produces heavily-optional types that defeat the purpose of typed extraction; users who need flexibility at depth should use tagged+captured alternations for precision) +- Strict struct equality for merging (rejected: too restrictive for common patterns like `[ (a) @x (b) @y ]`)