diff --git a/AGENTS.md b/AGENTS.md index cc5b3248..a97b4776 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -157,7 +157,7 @@ Boolean = [ ``` crates/ plotnik-cli/ # CLI tool - src/commands/ # Subcommands (debug, docs, langs) + src/commands/ # Subcommands (debug, docs, exec, langs, types) plotnik-core/ # Common code plotnik-lib/ # Plotnik as library src/ @@ -177,20 +177,68 @@ Run: `cargo run -p plotnik-cli -- ` - `debug` — Inspect queries and source file ASTs - Example: `cargo run -p plotnik-cli -- debug -q '(foo) @bar'` +- `exec` — Execute query against source, output JSON + - Example: `cargo run -p plotnik-cli -- exec -q '(identifier) @id' -s app.js` +- `types` — Generate TypeScript type definitions from query + - Example: `cargo run -p plotnik-cli -- types -q '(identifier) @id' -l javascript` - `langs` — List supported languages Inputs: `-q/--query `, `--query-file `, `--source `, `-s/--source-file `, `-l/--lang ` -Output (inferred from input): `--only-symbols`, `--cst`, `--raw`, `--spans`, `--cardinalities` +### `debug` output flags + +- `--only-symbols` — Show only symbol table (requires query) +- `--cst` — Show query CST instead of AST +- `--raw` — Include trivia tokens (whitespace, comments) +- `--spans` — Show source spans +- `--cardinalities` — Show inferred cardinalities +- `--graph` — Show compiled transition graph +- `--graph-raw` — Show unoptimized graph (before epsilon elimination) +- `--types` — Show inferred types ```sh cargo run -p plotnik-cli -- debug -q '(identifier) @id' cargo run -p plotnik-cli -- debug -q '(identifier) @id' --only-symbols +cargo run -p plotnik-cli -- debug -q '(identifier) @id' --graph -l javascript +cargo run -p plotnik-cli -- debug -q '(identifier) @id' --types -l javascript cargo run -p plotnik-cli -- debug -s app.ts cargo run -p plotnik-cli -- debug -s app.ts --raw cargo run -p plotnik-cli -- debug -q '(function_declaration) @fn' -s app.ts -l typescript ``` +### `exec` output flags + +- `--pretty` — Pretty-print JSON output +- `--verbose-nodes` — Include line/column positions in nodes +- `--check` — Validate output against inferred types +- `--entry ` — Entry point name (definition to match from) + +```sh +cargo run -p plotnik-cli -- exec -q '(program (expression_statement (identifier) @name))' --source 'x' -l javascript +cargo run -p plotnik-cli -- exec -q '(identifier) @id' -s app.js --pretty +cargo run -p plotnik-cli -- exec -q '(function_declaration) @fn' -s app.ts -l typescript --verbose-nodes +cargo run -p plotnik-cli -- exec -q '(identifier) @id' -s app.js --check +cargo run -p plotnik-cli -- exec -q '(identifier) @id' -s app.js --verbose-nodes --pretty +cargo run -p plotnik-cli -- exec -q 'A = (identifier) @id B = (string) @str' -s app.js --entry B +``` + +### `types` output flags + +- `--format ` — Output format: `typescript` or `ts` (default: typescript) +- `--root-type ` — Name for root type of anonymous expressions (default: Query) +- `--verbose-nodes` — Use verbose Node shape (matches `exec --verbose-nodes`) +- `--no-node-type` — Don't emit Node/Point type definitions +- `--no-export` — Don't add `export` keyword to types +- `-o/--output ` — Write output to file instead of stdout + +```sh +cargo run -p plotnik-cli -- types -q '(identifier) @id' -l javascript +cargo run -p plotnik-cli -- types -q 'Func = (function_declaration name: (identifier) @name body: (statement_block) @body)' -l js +cargo run -p plotnik-cli -- types -q '(identifier) @id' -l javascript --verbose-nodes +cargo run -p plotnik-cli -- types -q '(identifier) @id' -l javascript --no-node-type +cargo run -p plotnik-cli -- types -q '(identifier) @id' -l javascript -o types.d.ts +``` + # Coding rules - Avoid nesting logic: prefer early exit in functions (return) and loops (continue/break) diff --git a/Cargo.lock b/Cargo.lock index 0c17ef66..e8831c7f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -339,6 +339,7 @@ dependencies = [ "clap", "plotnik-langs", "plotnik-lib", + "serde_json", "tree-sitter", ] @@ -400,6 +401,7 @@ dependencies = [ "serde", "serde_json", "thiserror", + "tree-sitter", ] [[package]] diff --git a/crates/plotnik-cli/Cargo.toml b/crates/plotnik-cli/Cargo.toml index 0940d9af..4b9b5d75 100644 --- a/crates/plotnik-cli/Cargo.toml +++ b/crates/plotnik-cli/Cargo.toml @@ -69,4 +69,5 @@ yaml = ["plotnik-langs/yaml"] clap = { version = "4.5", features = ["derive"] } plotnik-langs = { version = "0.1.0", path = "../plotnik-langs", default-features = false } plotnik-lib = { version = "0.1.0", path = "../plotnik-lib" } +serde_json = "1.0" tree-sitter = "0.26" diff --git a/crates/plotnik-cli/src/cli.rs b/crates/plotnik-cli/src/cli.rs index a83fb67a..40663524 100644 --- a/crates/plotnik-cli/src/cli.rs +++ b/crates/plotnik-cli/src/cli.rs @@ -60,6 +60,95 @@ pub enum Command { /// List supported languages Langs, + + /// Execute a query against source code and output JSON + #[command(after_help = r#"EXAMPLES: + plotnik exec -q '(identifier) @id' -s app.js + plotnik exec -q '(identifier) @id' -s app.js --pretty + plotnik exec -q '(function_declaration) @fn' -s app.ts -l typescript --verbose-nodes + plotnik exec -q '(identifier) @id' -s app.js --check + plotnik exec --query-file query.plnk -s app.js --entry FunctionDef"#)] + Exec { + #[command(flatten)] + query: QueryArgs, + + #[command(flatten)] + source: SourceArgs, + + /// Language for source (required for inline text, inferred from extension otherwise) + #[arg(long, short = 'l', value_name = "LANG")] + lang: Option, + + #[command(flatten)] + output: ExecOutputArgs, + }, + + /// Generate type definitions from a query + #[command(after_help = r#"EXAMPLES: + plotnik types -q '(identifier) @id' -l javascript + plotnik types --query-file query.plnk -l typescript + plotnik types -q '(function_declaration) @fn' -l js --format ts + plotnik types -q '(identifier) @id' -l js --verbose-nodes + plotnik types -q '(identifier) @id' -l js -o types.d.ts + +NOTE: Use --verbose-nodes to match `exec --verbose-nodes` output shape."#)] + Types { + #[command(flatten)] + query: QueryArgs, + + /// Target language (required) + #[arg(long, short = 'l', value_name = "LANG")] + lang: Option, + + #[command(flatten)] + output: TypesOutputArgs, + }, +} + +#[derive(Args)] +pub struct ExecOutputArgs { + /// Pretty-print JSON output + #[arg(long)] + pub pretty: bool, + + /// Include verbose node information (line/column positions) + #[arg(long)] + pub verbose_nodes: bool, + + /// Validate output against inferred types + #[arg(long)] + pub check: bool, + + /// Entry point name (definition to match from) + #[arg(long, value_name = "NAME")] + pub entry: Option, +} + +#[derive(Args)] +pub struct TypesOutputArgs { + /// Output format (typescript, ts) + #[arg(long, default_value = "typescript", value_name = "FORMAT")] + pub format: String, + + /// Name for the root type (for anonymous expressions) + #[arg(long, default_value = "Query", value_name = "NAME")] + pub root_type: String, + + /// Use verbose node shape (matches exec --verbose-nodes) + #[arg(long)] + pub verbose_nodes: bool, + + /// Don't emit Node/Point type definitions + #[arg(long)] + pub no_node_type: bool, + + /// Don't export types + #[arg(long)] + pub no_export: bool, + + /// Write output to file + #[arg(short = 'o', long, value_name = "FILE")] + pub output: Option, } #[derive(Args)] diff --git a/crates/plotnik-cli/src/commands/debug/mod.rs b/crates/plotnik-cli/src/commands/debug/mod.rs index 2c95154d..888b8625 100644 --- a/crates/plotnik-cli/src/commands/debug/mod.rs +++ b/crates/plotnik-cli/src/commands/debug/mod.rs @@ -1,4 +1,4 @@ -mod source; +pub mod source; use std::fs; use std::io::{self, Read}; @@ -86,7 +86,13 @@ pub fn run(args: DebugArgs) { if (args.graph || args.graph_raw || args.types) && let Some(q) = query.take() { - let (q, pre_opt_dump) = q.build_graph_with_pre_opt_dump(); + // Determine root kind for auto-wrapping + let root_kind = args.lang.as_ref().and_then(|lang_name| { + let lang = resolve_lang_for_link(&Some(lang_name.clone())); + lang.root().and_then(|root_id| lang.node_type_name(root_id)) + }); + + let (q, pre_opt_dump) = q.build_graph_with_pre_opt_dump(root_kind); let mut needs_separator = false; if args.graph_raw { if show_both_graphs { diff --git a/crates/plotnik-cli/src/commands/debug/source.rs b/crates/plotnik-cli/src/commands/debug/source.rs index c39eb92e..164fbd89 100644 --- a/crates/plotnik-cli/src/commands/debug/source.rs +++ b/crates/plotnik-cli/src/commands/debug/source.rs @@ -16,7 +16,10 @@ pub fn load_source(text: &Option, file: &Option) -> String { .expect("failed to read stdin"); return buf; } - return fs::read_to_string(path).expect("failed to read source file"); + return fs::read_to_string(path).unwrap_or_else(|_| { + eprintln!("error: file not found: {}", path.display()); + std::process::exit(1); + }); } unreachable!() } diff --git a/crates/plotnik-cli/src/commands/exec.rs b/crates/plotnik-cli/src/commands/exec.rs new file mode 100644 index 00000000..fdd4fcc7 --- /dev/null +++ b/crates/plotnik-cli/src/commands/exec.rs @@ -0,0 +1,211 @@ +use std::fs; +use std::io::{self, Read}; +use std::path::PathBuf; + +use plotnik_langs::{Lang, NodeFieldId, NodeTypeId}; +use plotnik_lib::Query; +use plotnik_lib::engine::interpreter::QueryInterpreter; +use plotnik_lib::engine::validate::validate as validate_result; +use plotnik_lib::engine::value::{ResolvedValue, VerboseResolvedValue}; +use plotnik_lib::ir::{NodeKindResolver, QueryEmitter}; + +use super::debug::source::resolve_lang; + +pub struct ExecArgs { + pub query_text: Option, + pub query_file: Option, + pub source_text: Option, + pub source_file: Option, + pub lang: Option, + pub pretty: bool, + pub verbose_nodes: bool, + pub check: bool, + pub entry: Option, +} + +struct LangResolver(Lang); + +impl NodeKindResolver for LangResolver { + fn resolve_kind(&self, name: &str) -> Option { + self.0.resolve_named_node(name) + } + + fn resolve_field(&self, name: &str) -> Option { + self.0.resolve_field(name) + } +} + +pub fn run(args: ExecArgs) { + if let Err(msg) = validate(&args) { + eprintln!("error: {}", msg); + std::process::exit(1); + } + + let query_source = load_query(&args); + if query_source.trim().is_empty() { + eprintln!("error: query cannot be empty"); + std::process::exit(1); + } + let source_code = load_source(&args); + let lang = resolve_lang(&args.lang, &args.source_text, &args.source_file); + + // Parse and validate query + let mut query = Query::new(&query_source).exec().unwrap_or_else(|e| { + eprintln!("error: {}", e); + std::process::exit(1); + }); + + if !query.is_valid() { + eprint!("{}", query.diagnostics().render(&query_source)); + std::process::exit(1); + } + + // Link query against language + query.link(&lang); + if !query.is_valid() { + eprint!("{}", query.diagnostics().render(&query_source)); + std::process::exit(1); + } + + // Build transition graph and type info + let mut query = query.build_graph(); + if query.has_type_errors() { + eprint!("{}", query.diagnostics().render(&query_source)); + std::process::exit(1); + } + + // Auto-wrap definitions with root node if available + if let Some(root_id) = lang.root() + && let Some(root_kind) = lang.node_type_name(root_id) + { + query = query.wrap_with_root(root_kind); + } + + // Emit compiled query + let resolver = LangResolver(lang.clone()); + let emitter = QueryEmitter::new(query.graph(), query.type_info(), resolver); + let compiled = emitter.emit().unwrap_or_else(|e| { + eprintln!("error: emit failed: {:?}", e); + std::process::exit(1); + }); + + // Parse source + let tree = lang.parse(&source_code); + let cursor = tree.walk(); + + // Find entry point + let entrypoint = match &args.entry { + Some(name) => compiled + .entrypoints() + .iter() + .find(|ep| compiled.string(ep.name_id()) == name) + .unwrap_or_else(|| { + let available: Vec<_> = compiled + .entrypoints() + .iter() + .map(|ep| compiled.string(ep.name_id())) + .collect(); + eprintln!( + "error: entry point '{}' not found. Available: {}", + name, + available.join(", ") + ); + std::process::exit(1); + }), + None => compiled.entrypoints().last().unwrap_or_else(|| { + eprintln!("error: no entry points in query"); + std::process::exit(1); + }), + }; + + // Run interpreter + let interpreter = QueryInterpreter::new(&compiled, cursor, &source_code); + let result = interpreter + .run_from(entrypoint.target()) + .unwrap_or_else(|e| { + eprintln!("error: {}", e); + std::process::exit(1); + }); + + // Type checking against inferred types + if args.check { + let expected_type = Some(entrypoint.result_type()); + if let Some(type_id) = expected_type + && let Err(e) = validate_result(&result, type_id, &compiled) + { + eprintln!("type error: {}", e); + std::process::exit(1); + } + } + + // Output JSON + let output = match (args.verbose_nodes, args.pretty) { + (true, true) => serde_json::to_string_pretty(&VerboseResolvedValue(&result, &compiled)), + (true, false) => serde_json::to_string(&VerboseResolvedValue(&result, &compiled)), + (false, true) => serde_json::to_string_pretty(&ResolvedValue(&result, &compiled)), + (false, false) => serde_json::to_string(&ResolvedValue(&result, &compiled)), + }; + + match output { + Ok(json) => println!("{}", json), + Err(e) => { + eprintln!("error: JSON serialization failed: {}", e); + std::process::exit(1); + } + } +} + +fn load_query(args: &ExecArgs) -> String { + if let Some(ref text) = args.query_text { + return text.clone(); + } + if let Some(ref path) = args.query_file { + if path.as_os_str() == "-" { + let mut buf = String::new(); + io::stdin() + .read_to_string(&mut buf) + .expect("failed to read stdin"); + return buf; + } + return fs::read_to_string(path).unwrap_or_else(|_| { + eprintln!("error: query file not found: {}", path.display()); + std::process::exit(1); + }); + } + unreachable!("validation ensures query input exists") +} + +fn load_source(args: &ExecArgs) -> String { + if let Some(ref text) = args.source_text { + return text.clone(); + } + if let Some(ref path) = args.source_file { + if path.as_os_str() == "-" { + panic!("cannot read both query and source from stdin"); + } + return fs::read_to_string(path).unwrap_or_else(|_| { + eprintln!("error: file not found: {}", path.display()); + std::process::exit(1); + }); + } + unreachable!("validation ensures source input exists") +} + +fn validate(args: &ExecArgs) -> Result<(), &'static str> { + let has_query = args.query_text.is_some() || args.query_file.is_some(); + let has_source = args.source_text.is_some() || args.source_file.is_some(); + + if !has_query { + return Err("query is required: use -q/--query or --query-file"); + } + + if !has_source { + return Err("source is required: use -s/--source-file or --source"); + } + + if args.source_text.is_some() && args.lang.is_none() { + return Err("--lang is required when using --source"); + } + + Ok(()) +} diff --git a/crates/plotnik-cli/src/commands/mod.rs b/crates/plotnik-cli/src/commands/mod.rs index 37b04dfb..f6f0efd5 100644 --- a/crates/plotnik-cli/src/commands/mod.rs +++ b/crates/plotnik-cli/src/commands/mod.rs @@ -1,3 +1,5 @@ pub mod debug; pub mod docs; +pub mod exec; pub mod langs; +pub mod types; diff --git a/crates/plotnik-cli/src/commands/types.rs b/crates/plotnik-cli/src/commands/types.rs new file mode 100644 index 00000000..39f543b7 --- /dev/null +++ b/crates/plotnik-cli/src/commands/types.rs @@ -0,0 +1,404 @@ +use std::fmt::Write; +use std::fs; +use std::io::{self, Read}; +use std::path::PathBuf; + +use plotnik_langs::{Lang, NodeFieldId, NodeTypeId}; +use plotnik_lib::Query; +use plotnik_lib::ir::{ + CompiledQuery, NodeKindResolver, QueryEmitter, STRING_NONE, TYPE_NODE, TYPE_STR, TYPE_VOID, + TypeId, TypeKind, +}; + +pub struct TypesArgs { + pub query_text: Option, + pub query_file: Option, + pub lang: Option, + pub format: String, + pub root_type: String, + pub verbose_nodes: bool, + pub no_node_type: bool, + pub export: bool, + pub output: Option, +} + +struct LangResolver(Lang); + +impl NodeKindResolver for LangResolver { + fn resolve_kind(&self, name: &str) -> Option { + self.0.resolve_named_node(name) + } + + fn resolve_field(&self, name: &str) -> Option { + self.0.resolve_field(name) + } +} + +pub fn run(args: TypesArgs) { + if let Err(msg) = validate(&args) { + eprintln!("error: {}", msg); + std::process::exit(1); + } + + let query_source = load_query(&args); + if query_source.trim().is_empty() { + eprintln!("error: query cannot be empty"); + std::process::exit(1); + } + let lang = resolve_lang_required(&args.lang); + + // Parse and validate query + let mut query = Query::new(&query_source).exec().unwrap_or_else(|e| { + eprintln!("error: {}", e); + std::process::exit(1); + }); + + if !query.is_valid() { + eprint!("{}", query.diagnostics().render(&query_source)); + std::process::exit(1); + } + + // Link query against language + query.link(&lang); + if !query.is_valid() { + eprint!("{}", query.diagnostics().render(&query_source)); + std::process::exit(1); + } + + // Build transition graph and type info + let mut query = query.build_graph(); + if query.has_type_errors() { + eprint!("{}", query.diagnostics().render(&query_source)); + std::process::exit(1); + } + + // Auto-wrap definitions with root node if available + if let Some(root_id) = lang.root() + && let Some(root_kind) = lang.node_type_name(root_id) + { + query = query.wrap_with_root(root_kind); + } + + // Emit compiled query (IR) + let resolver = LangResolver(lang.clone()); + let emitter = QueryEmitter::new(query.graph(), query.type_info(), resolver); + let compiled = emitter.emit().unwrap_or_else(|e| { + eprintln!("error: emit failed: {:?}", e); + std::process::exit(1); + }); + + // Generate TypeScript + let output = generate_typescript(&compiled, &args); + + // Write output + if let Some(path) = &args.output { + fs::write(path, &output).unwrap_or_else(|e| { + eprintln!("error: failed to write {}: {}", path.display(), e); + std::process::exit(1); + }); + } else { + print!("{}", output); + } +} + +fn generate_typescript(ir: &CompiledQuery, args: &TypesArgs) -> String { + let mut out = String::new(); + let export_prefix = if args.export { "export " } else { "" }; + + // Emit Node and Point types unless --no-node-type + if !args.no_node_type { + if args.verbose_nodes { + writeln!(out, "{}interface Point {{", export_prefix).unwrap(); + writeln!(out, " row: number;").unwrap(); + writeln!(out, " column: number;").unwrap(); + writeln!(out, "}}").unwrap(); + writeln!(out).unwrap(); + writeln!(out, "{}interface Node {{", export_prefix).unwrap(); + writeln!(out, " kind: string;").unwrap(); + writeln!(out, " text: string;").unwrap(); + writeln!(out, " start_byte: number;").unwrap(); + writeln!(out, " end_byte: number;").unwrap(); + writeln!(out, " start_point: Point;").unwrap(); + writeln!(out, " end_point: Point;").unwrap(); + writeln!(out, "}}").unwrap(); + } else { + writeln!(out, "{}interface Node {{", export_prefix).unwrap(); + writeln!(out, " kind: string;").unwrap(); + writeln!(out, " text: string;").unwrap(); + writeln!(out, " range: [number, number];").unwrap(); + writeln!(out, "}}").unwrap(); + } + } + + let emitter = TypeScriptEmitter::new(ir, export_prefix); + + // Emit composite types that are named and not inlinable + for (idx, type_def) in ir.type_defs().iter().enumerate() { + let type_id = idx as TypeId + 3; // TYPE_COMPOSITE_START + if !emitter.should_emit_as_interface(type_id) { + continue; + } + + if !out.is_empty() { + writeln!(out).unwrap(); + } + emitter.emit_type_def(&mut out, type_id, type_def); + } + + // Emit entrypoints as type aliases if they differ from their type name + for entry in ir.entrypoints() { + let raw_entry_name = ir.string(entry.name_id()); + // Replace anonymous entrypoint "_" with --root-type name + let entry_name = if raw_entry_name == "_" { + args.root_type.as_str() + } else { + raw_entry_name + }; + let type_id = entry.result_type(); + let type_name = emitter.get_type_name(type_id); + + // Skip if entrypoint name matches type name (redundant alias) + if type_name == entry_name { + continue; + } + + if !out.is_empty() { + writeln!(out).unwrap(); + } + writeln!( + out, + "{}type {} = {};", + export_prefix, + entry_name, + emitter.format_type(type_id) + ) + .unwrap(); + } + + out +} + +struct TypeScriptEmitter<'a> { + ir: &'a CompiledQuery, + export_prefix: &'a str, +} + +impl<'a> TypeScriptEmitter<'a> { + fn new(ir: &'a CompiledQuery, export_prefix: &'a str) -> Self { + Self { ir, export_prefix } + } + + /// Returns true if this type should be emitted as a standalone interface. + fn should_emit_as_interface(&self, type_id: TypeId) -> bool { + if type_id < 3 { + return false; // primitives + } + + let idx = (type_id - 3) as usize; + let Some(def) = self.ir.type_defs().get(idx) else { + return false; + }; + + // Wrapper types are always inlined + if def.is_wrapper() { + return false; + } + + // Named composites get their own interface + def.name != STRING_NONE + } + + /// Get the type name for a composite type, or generate one. + fn get_type_name(&self, type_id: TypeId) -> String { + match type_id { + TYPE_VOID => "null".to_string(), + TYPE_NODE => "Node".to_string(), + TYPE_STR => "string".to_string(), + _ => { + let idx = (type_id - 3) as usize; + if let Some(def) = self.ir.type_defs().get(idx) + && def.name != STRING_NONE + { + return self.ir.string(def.name).to_string(); + } + // Fallback for anonymous types + format!("T{}", type_id) + } + } + } + + /// Format a type reference (may be inline or named). + fn format_type(&self, type_id: TypeId) -> String { + match type_id { + TYPE_VOID => "null".to_string(), + TYPE_NODE => "Node".to_string(), + TYPE_STR => "string".to_string(), + _ => { + let idx = (type_id - 3) as usize; + let Some(def) = self.ir.type_defs().get(idx) else { + return format!("unknown /* T{} */", type_id); + }; + + // Wrapper types: inline + if let Some(inner) = def.inner_type() { + let inner_fmt = self.format_type(inner); + return match def.kind { + TypeKind::Optional => format!("{} | null", inner_fmt), + TypeKind::ArrayStar => format!("{}[]", self.wrap_if_union(&inner_fmt)), + TypeKind::ArrayPlus => { + format!("[{}, ...{}[]]", inner_fmt, self.wrap_if_union(&inner_fmt)) + } + _ => unreachable!(), + }; + } + + // Named composite: reference by name + if def.name != STRING_NONE { + return self.ir.string(def.name).to_string(); + } + + // Anonymous composite: inline + self.format_inline_composite(type_id, def.kind) + } + } + } + + /// Wrap type in parens if it contains a union (for array element types). + fn wrap_if_union(&self, ty: &str) -> String { + if ty.contains(" | ") { + format!("({})", ty) + } else { + ty.to_string() + } + } + + /// Format an anonymous composite type inline. + fn format_inline_composite(&self, type_id: TypeId, kind: TypeKind) -> String { + let idx = (type_id - 3) as usize; + let Some(def) = self.ir.type_defs().get(idx) else { + return "unknown".to_string(); + }; + + let Some(members_slice) = def.members_slice() else { + return "unknown".to_string(); + }; + + let members = self.ir.resolve_type_members(members_slice); + + match kind { + TypeKind::Record => { + let fields: Vec = members + .iter() + .map(|m| format!("{}: {}", self.ir.string(m.name), self.format_type(m.ty))) + .collect(); + format!("{{ {} }}", fields.join("; ")) + } + TypeKind::Enum => { + let variants: Vec = members + .iter() + .map(|m| { + let tag = self.ir.string(m.name); + let data = self.format_type(m.ty); + format!("{{ $tag: \"{}\"; $data: {} }}", tag, data) + }) + .collect(); + variants.join(" | ") + } + _ => "unknown".to_string(), + } + } + + /// Emit a type definition as an interface or type alias. + fn emit_type_def(&self, out: &mut String, type_id: TypeId, def: &plotnik_lib::ir::TypeDef) { + let name = if def.name != STRING_NONE { + self.ir.string(def.name).to_string() + } else { + format!("T{}", type_id) + }; + + let Some(members_slice) = def.members_slice() else { + return; + }; + + let members = self.ir.resolve_type_members(members_slice); + + match def.kind { + TypeKind::Record => { + writeln!(out, "{}interface {} {{", self.export_prefix, name).unwrap(); + for m in members { + writeln!( + out, + " {}: {};", + self.ir.string(m.name), + self.format_type(m.ty) + ) + .unwrap(); + } + writeln!(out, "}}").unwrap(); + } + TypeKind::Enum => { + let variants: Vec = members + .iter() + .map(|m| { + let tag = self.ir.string(m.name); + let data = self.format_type(m.ty); + format!("{{ $tag: \"{}\"; $data: {} }}", tag, data) + }) + .collect(); + writeln!( + out, + "{}type {} =\n | {};", + self.export_prefix, + name, + variants.join("\n | ") + ) + .unwrap(); + } + _ => {} + } + } +} + +fn load_query(args: &TypesArgs) -> String { + if let Some(ref text) = args.query_text { + return text.clone(); + } + if let Some(ref path) = args.query_file { + if path.as_os_str() == "-" { + let mut buf = String::new(); + io::stdin() + .read_to_string(&mut buf) + .expect("failed to read stdin"); + return buf; + } + return fs::read_to_string(path).expect("failed to read query file"); + } + unreachable!("validation ensures query input exists") +} + +fn resolve_lang_required(lang: &Option) -> Lang { + let name = lang.as_ref().expect("--lang is required"); + plotnik_langs::from_name(name).unwrap_or_else(|| { + eprintln!("error: unknown language: {}", name); + std::process::exit(1); + }) +} + +fn validate(args: &TypesArgs) -> Result<(), &'static str> { + let has_query = args.query_text.is_some() || args.query_file.is_some(); + + if !has_query { + return Err("query is required: use -q/--query or --query-file"); + } + + if args.lang.is_none() { + return Err("--lang is required for type generation"); + } + + let fmt = args.format.to_lowercase(); + if fmt != "typescript" && fmt != "ts" { + return Err("--format must be 'typescript' or 'ts'"); + } + + Ok(()) +} diff --git a/crates/plotnik-cli/src/main.rs b/crates/plotnik-cli/src/main.rs index e1579a29..18ec2e01 100644 --- a/crates/plotnik-cli/src/main.rs +++ b/crates/plotnik-cli/src/main.rs @@ -3,6 +3,8 @@ mod commands; use cli::{Cli, Command}; use commands::debug::DebugArgs; +use commands::exec::ExecArgs; +use commands::types::TypesArgs; fn main() { let cli = ::parse(); @@ -37,5 +39,40 @@ fn main() { Command::Langs => { commands::langs::run(); } + Command::Exec { + query, + source, + lang, + output, + } => { + commands::exec::run(ExecArgs { + query_text: query.query_text, + query_file: query.query_file, + source_text: source.source_text, + source_file: source.source_file, + lang, + entry: output.entry, + pretty: output.pretty, + verbose_nodes: output.verbose_nodes, + check: output.check, + }); + } + Command::Types { + query, + lang, + output, + } => { + commands::types::run(TypesArgs { + query_text: query.query_text, + query_file: query.query_file, + lang, + format: output.format, + root_type: output.root_type, + verbose_nodes: output.verbose_nodes, + no_node_type: output.no_node_type, + export: !output.no_export, + output: output.output, + }); + } } } diff --git a/crates/plotnik-core/src/lib.rs b/crates/plotnik-core/src/lib.rs index 99dd2988..e3ab0a97 100644 --- a/crates/plotnik-core/src/lib.rs +++ b/crates/plotnik-core/src/lib.rs @@ -15,10 +15,6 @@ use std::num::NonZeroU16; mod invariants; -// ============================================================================ -// Deserialization Layer -// ============================================================================ - /// Raw node definition from `node-types.json`. #[derive(Debug, Clone, serde::Deserialize)] pub struct RawNode { @@ -56,10 +52,6 @@ pub fn parse_node_types(json: &str) -> Result, serde_json::Error> { serde_json::from_str(json) } -// ============================================================================ -// Common Types -// ============================================================================ - /// Node type ID (tree-sitter uses u16). pub type NodeTypeId = u16; @@ -73,10 +65,6 @@ pub struct Cardinality { pub required: bool, } -// ============================================================================ -// NodeTypes Trait -// ============================================================================ - /// Trait for node type constraint lookups. /// /// Provides only what tree-sitter's `Language` API doesn't: @@ -156,10 +144,6 @@ impl NodeTypes for &T { } } -// ============================================================================ -// Static Analysis Layer (zero runtime init) -// ============================================================================ - /// Field info for static storage. #[derive(Debug, Clone, Copy)] pub struct StaticFieldInfo { @@ -325,10 +309,6 @@ impl NodeTypes for StaticNodeTypes { } } -// ============================================================================ -// Dynamic Analysis Layer (runtime construction) -// ============================================================================ - /// Information about a single field on a node type. #[derive(Debug, Clone)] pub struct FieldInfo { diff --git a/crates/plotnik-lib/Cargo.toml b/crates/plotnik-lib/Cargo.toml index ec0d7965..d904fbba 100644 --- a/crates/plotnik-lib/Cargo.toml +++ b/crates/plotnik-lib/Cargo.toml @@ -20,10 +20,12 @@ indexmap = "2" rowan = "0.16.1" serde = { version = "1.0.228", features = ["derive"] } thiserror = "2.0.17" +tree-sitter = "0.26" plotnik-langs = { version = "0.1", path = "../plotnik-langs", optional = true } [features] default = ["plotnik-langs"] +unstable-child-type-validation = [] [dev-dependencies] insta = { version = "=1.44.3", features = ["yaml"] } diff --git a/crates/plotnik-lib/src/diagnostics/tests.rs b/crates/plotnik-lib/src/diagnostics/tests.rs index 0f921aeb..92f30d7b 100644 --- a/crates/plotnik-lib/src/diagnostics/tests.rs +++ b/crates/plotnik-lib/src/diagnostics/tests.rs @@ -340,8 +340,6 @@ fn diagnostic_kind_message_rendering() { ); } -// === Filtering/suppression tests === - #[test] fn filtered_no_suppression_disjoint_spans() { let mut diagnostics = Diagnostics::new(); diff --git a/crates/plotnik-lib/src/engine/effect_stream.rs b/crates/plotnik-lib/src/engine/effect_stream.rs new file mode 100644 index 00000000..7073da85 --- /dev/null +++ b/crates/plotnik-lib/src/engine/effect_stream.rs @@ -0,0 +1,146 @@ +//! Effect stream recorded during query execution. + +use crate::ir::EffectOp; +use serde::Serialize; +use serde::ser::SerializeStruct; +use tree_sitter::Node; + +/// A captured AST node with a reference to the source. +#[derive(Debug, Clone, Copy)] +pub struct CapturedNode<'tree> { + node: Node<'tree>, + source: &'tree str, +} + +impl<'tree> CapturedNode<'tree> { + /// Create from a tree-sitter node and source text. + pub fn new(node: Node<'tree>, source: &'tree str) -> Self { + Self { node, source } + } + + /// Returns the underlying tree-sitter node. + pub fn node(&self) -> Node<'tree> { + self.node + } + + /// Returns the source text of the node. + pub fn text(&self) -> &'tree str { + self.node + .utf8_text(self.source.as_bytes()) + .unwrap_or("") + } + + pub fn start_byte(&self) -> usize { + self.node.start_byte() + } + + pub fn end_byte(&self) -> usize { + self.node.end_byte() + } + + pub fn start_point(&self) -> (usize, usize) { + let p = self.node.start_position(); + (p.row, p.column) + } + + pub fn end_point(&self) -> (usize, usize) { + let p = self.node.end_position(); + (p.row, p.column) + } + + pub fn kind(&self) -> &'tree str { + self.node.kind() + } +} + +impl PartialEq for CapturedNode<'_> { + fn eq(&self, other: &Self) -> bool { + // Compare by node identity (same position in same tree) + self.node.id() == other.node.id() + && self.start_byte() == other.start_byte() + && self.end_byte() == other.end_byte() + } +} + +impl Eq for CapturedNode<'_> {} + +impl Serialize for CapturedNode<'_> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut state = serializer.serialize_struct("CapturedNode", 3)?; + state.serialize_field("kind", self.kind())?; + state.serialize_field("text", self.text())?; + state.serialize_field("range", &[self.start_byte(), self.end_byte()])?; + state.end() + } +} + +/// Wrapper for verbose serialization of a captured node. +/// Includes full positional information (bytes + line/column). +pub struct VerboseNode<'a, 'tree>(pub &'a CapturedNode<'tree>); + +impl Serialize for VerboseNode<'_, '_> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let node = self.0; + let mut state = serializer.serialize_struct("CapturedNode", 6)?; + state.serialize_field("kind", node.kind())?; + state.serialize_field("text", node.text())?; + state.serialize_field("start_byte", &node.start_byte())?; + state.serialize_field("end_byte", &node.end_byte())?; + state.serialize_field("start_point", &node.start_point())?; + state.serialize_field("end_point", &node.end_point())?; + state.end() + } +} + +/// A log of effects to be replayed by the materializer. +/// See ADR-0006 for details. +#[derive(Debug, Clone, Default)] +pub struct EffectStream<'tree> { + /// The sequence of operations to perform. + ops: Vec, + /// The sequence of nodes captured, one for each `CaptureNode` op. + nodes: Vec>, +} + +impl<'tree> EffectStream<'tree> { + pub fn new() -> Self { + Self::default() + } + + /// Appends an effect operation to the stream. + pub fn push_op(&mut self, op: EffectOp) { + self.ops.push(op); + } + + /// Appends a captured node to the stream. + pub fn push_node(&mut self, node: Node<'tree>, source: &'tree str) { + self.nodes.push(CapturedNode::new(node, source)); + } + + /// Appends a captured node directly. + pub fn push_captured_node(&mut self, node: CapturedNode<'tree>) { + self.nodes.push(node); + } + + /// Returns the operations. + pub fn ops(&self) -> &[EffectOp] { + &self.ops + } + + /// Returns the captured nodes. + pub fn nodes(&self) -> &[CapturedNode<'tree>] { + &self.nodes + } + + /// Truncate streams to watermarks (for backtracking). + pub fn truncate(&mut self, ops_len: usize, nodes_len: usize) { + self.ops.truncate(ops_len); + self.nodes.truncate(nodes_len); + } +} diff --git a/crates/plotnik-lib/src/engine/error.rs b/crates/plotnik-lib/src/engine/error.rs new file mode 100644 index 00000000..229f3369 --- /dev/null +++ b/crates/plotnik-lib/src/engine/error.rs @@ -0,0 +1,12 @@ +//! Errors that can occur during query execution. + +#[derive(Debug, Clone, thiserror::Error)] +pub enum RuntimeError { + /// Execution fuel exhausted (too many interpreter operations). + #[error("runtime execution limit exceeded")] + ExecFuelExhausted, + + /// Recursion fuel exhausted (too many nested definition calls). + #[error("runtime recursion limit exceeded")] + RecursionLimitExceeded, +} diff --git a/crates/plotnik-lib/src/engine/interpreter.rs b/crates/plotnik-lib/src/engine/interpreter.rs new file mode 100644 index 00000000..e7643f62 --- /dev/null +++ b/crates/plotnik-lib/src/engine/interpreter.rs @@ -0,0 +1,573 @@ +//! The core query interpreter. +//! +//! Executes a compiled query against a tree-sitter AST, producing an effect stream +//! that can be materialized into a structured value. +//! +//! See ADR-0006 for detailed execution semantics. + +use std::collections::HashSet; + +use tree_sitter::{Node, TreeCursor}; + +use crate::ir::{ + CompiledQuery, EffectOp, Matcher, Nav, NavKind, NodeFieldId, NodeTypeId, RefTransition, + TransitionId, +}; + +use super::effect_stream::EffectStream; +use super::error::RuntimeError; +use super::materializer::Materializer; +use super::value::Value; + +/// A saved execution state for backtracking. +#[derive(Debug, Clone)] +struct Checkpoint { + /// Tree-sitter descendant index for cursor restoration. + cursor_checkpoint: usize, + /// Number of ops in effect stream at save time. + effect_ops_watermark: usize, + /// Number of nodes in effect stream at save time. + effect_nodes_watermark: usize, + /// Current frame index at save time. + recursion_frame: Option, + /// Previous max_frame_watermark (for O(1) restore). + prev_max_watermark: Option, + /// Source transition for alternatives. + transition_id: TransitionId, + /// Index of next alternative to try. + next_alt: u32, +} + +/// Stack of checkpoints with O(1) watermark maintenance. +#[derive(Debug, Default)] +struct CheckpointStack { + points: Vec, + /// Highest frame index referenced by any checkpoint. + max_frame_watermark: Option, +} + +impl CheckpointStack { + fn new() -> Self { + Self::default() + } + + fn push(&mut self, mut point: Checkpoint) { + point.prev_max_watermark = self.max_frame_watermark; + if let Some(frame) = point.recursion_frame { + self.max_frame_watermark = Some(match self.max_frame_watermark { + Some(max) => max.max(frame), + None => frame, + }); + } + self.points.push(point); + } + + fn pop(&mut self) -> Option { + let point = self.points.pop()?; + self.max_frame_watermark = point.prev_max_watermark; + Some(point) + } +} + +/// A call frame for definition references. +#[derive(Debug, Clone)] +struct Frame { + /// Index of caller's frame (None if called from top level). + parent: Option, + /// Ref ID to verify Exit matches Enter. + ref_id: u16, + /// Transition that entered this call (to retrieve returns via successors()[1..]). + enter_transition: TransitionId, +} + +/// Append-only arena of call frames. +#[derive(Debug, Default)] +struct FrameArena { + frames: Vec, + /// Index of current frame (the "stack pointer"). + current: Option, +} + +impl FrameArena { + fn new() -> Self { + Self::default() + } + + /// Push a new frame, returns its index. + fn push(&mut self, parent: Option, ref_id: u16, enter_transition: TransitionId) -> u32 { + let idx = self.frames.len() as u32; + self.frames.push(Frame { + parent, + ref_id, + enter_transition, + }); + self.current = Some(idx); + idx + } + + /// Get current frame. + fn current_frame(&self) -> Option<&Frame> { + self.current.map(|idx| &self.frames[idx as usize]) + } + + /// Exit current frame (set current to parent). + fn exit(&mut self) -> Option<&Frame> { + let frame = self.current_frame()?; + let parent = frame.parent; + let idx = self.current?; + self.current = parent; + Some(&self.frames[idx as usize]) + } + + /// Prune frames above the high-water mark. + fn prune(&mut self, checkpoints: &CheckpointStack) { + let high_water = match (self.current, checkpoints.max_frame_watermark) { + (None, None) => return, + (Some(c), None) => c, + (None, Some(m)) => m, + (Some(c), Some(m)) => c.max(m), + }; + self.frames.truncate((high_water + 1) as usize); + } +} + +/// Default execution fuel (transitions). +const DEFAULT_EXEC_FUEL: u32 = 1_000_000; +/// Default recursion fuel (Enter operations). +const DEFAULT_RECURSION_FUEL: u32 = 1024; + +/// Query interpreter that executes a compiled query against an AST. +pub struct QueryInterpreter<'q, 'tree> { + query: &'q CompiledQuery, + cursor: TreeCursor<'tree>, + source: &'tree str, + checkpoints: CheckpointStack, + frames: FrameArena, + effects: EffectStream<'tree>, + /// Trivia node type IDs (for skip-trivia navigation). + trivia_kinds: HashSet, + /// Matched node slot (cleared at start of each transition). + matched_node: Option>, + /// Execution fuel remaining. + exec_fuel: u32, + /// Recursion fuel remaining. + recursion_fuel: u32, +} + +impl<'q, 'tree> QueryInterpreter<'q, 'tree> { + /// Creates a new interpreter. + /// + /// The cursor should be positioned at the tree root. + pub fn new(query: &'q CompiledQuery, cursor: TreeCursor<'tree>, source: &'tree str) -> Self { + let trivia_kinds: HashSet<_> = query.trivia_kinds().iter().copied().collect(); + Self { + query, + cursor, + source, + checkpoints: CheckpointStack::new(), + frames: FrameArena::new(), + effects: EffectStream::new(), + trivia_kinds, + matched_node: None, + exec_fuel: DEFAULT_EXEC_FUEL, + recursion_fuel: DEFAULT_RECURSION_FUEL, + } + } + + /// Set execution fuel limit. + pub fn with_exec_fuel(mut self, fuel: u32) -> Self { + self.exec_fuel = fuel; + self + } + + /// Set recursion fuel limit. + pub fn with_recursion_fuel(mut self, fuel: u32) -> Self { + self.recursion_fuel = fuel; + self + } + + /// Run the query and return the result. + pub fn run(self) -> Result, RuntimeError> { + // Get the entry transition from the last entrypoint (main definition) + let start_transition = self + .query + .entrypoints() + .last() + .map(|ep| ep.target()) + .unwrap_or(0); + + self.run_from(start_transition) + } + + /// Run the query from a specific transition and return the result. + pub fn run_from(mut self, start: TransitionId) -> Result, RuntimeError> { + match self.execute(start) { + Ok(true) => Ok(Materializer::materialize(&self.effects)), + Ok(false) => Ok(Value::Null), // No match + Err(e) => Err(e), + } + } + + /// Execute from a given transition, returns true if matched. + fn execute(&mut self, start: TransitionId) -> Result { + let mut current = start; + + loop { + // Check fuel + if self.exec_fuel == 0 { + return Err(RuntimeError::ExecFuelExhausted); + } + self.exec_fuel -= 1; + + // Clear matched_node slot at start of each transition + self.matched_node = None; + + let view = self.query.transition_view(current); + let nav = view.nav(); + let matcher = view.matcher(); + let ref_marker = view.ref_marker(); + let successors = view.successors(); + + // Step 1: Execute navigation + let nav_ok = self.execute_nav(nav); + if !nav_ok { + // Navigation failed, backtrack + if let Some(next) = self.backtrack()? { + current = next; + continue; + } + return Ok(false); + } + + // Step 2: Try matcher (with skip policy from nav) + let match_ok = self.execute_matcher(matcher, nav); + if !match_ok { + // Match failed, backtrack + if let Some(next) = self.backtrack()? { + current = next; + continue; + } + return Ok(false); + } + + // Step 3: Execute effects + for &effect in view.effects() { + self.execute_effect(effect); + } + + // Step 4: Process ref_marker + match ref_marker { + RefTransition::None => {} + RefTransition::Enter(ref_id) => { + if self.recursion_fuel == 0 { + return Err(RuntimeError::RecursionLimitExceeded); + } + self.recursion_fuel -= 1; + + // Push frame with returns = successors[1..] + self.frames.push(self.frames.current, ref_id, current); + + // Jump to definition entry = successors[0] + if successors.is_empty() { + panic!("Enter transition must have at least one successor"); + } + current = successors[0]; + continue; + } + RefTransition::Exit(ref_id) => { + // Verify ref_id matches + let frame = self.frames.current_frame().expect("Exit without frame"); + assert_eq!(frame.ref_id, ref_id, "Exit ref_id mismatch"); + + // Get returns from enter transition + let enter_trans = frame.enter_transition; + let enter_view = self.query.transition_view(enter_trans); + let returns = &enter_view.successors()[1..]; + + // Pop frame + self.frames.exit(); + + // Prune frames if possible + self.frames.prune(&self.checkpoints); + + // Continue with returns as successors + if returns.is_empty() { + // Definition matched, no returns = we're done with this path + // This shouldn't happen in well-formed graphs + if let Some(next) = self.backtrack()? { + current = next; + continue; + } + return Ok(true); + } + + // Save checkpoint for alternatives if multiple returns + if returns.len() > 1 { + self.save_checkpoint(enter_trans, 2); // Skip successors[0] and [1] + } + + current = returns[0]; + continue; + } + } + + // Step 5: Process successors + if successors.is_empty() { + // Terminal transition - match succeeded + return Ok(true); + } + + // Save checkpoint for alternatives + if successors.len() > 1 { + self.save_checkpoint(current, 1); + } + + current = successors[0]; + } + } + + /// Save a checkpoint for backtracking. + fn save_checkpoint(&mut self, transition_id: TransitionId, next_alt: u32) { + let checkpoint = Checkpoint { + cursor_checkpoint: self.cursor.descendant_index(), + effect_ops_watermark: self.effects.ops().len(), + effect_nodes_watermark: self.effects.nodes().len(), + recursion_frame: self.frames.current, + prev_max_watermark: None, // Set by CheckpointStack::push + transition_id, + next_alt, + }; + self.checkpoints.push(checkpoint); + } + + /// Backtrack to the next alternative. Returns the transition to try. + fn backtrack(&mut self) -> Result, RuntimeError> { + loop { + let Some(mut checkpoint) = self.checkpoints.pop() else { + return Ok(None); + }; + + // Restore cursor + self.cursor.goto_descendant(checkpoint.cursor_checkpoint); + + // Restore effects + self.effects.truncate( + checkpoint.effect_ops_watermark, + checkpoint.effect_nodes_watermark, + ); + + // Restore frame + self.frames.current = checkpoint.recursion_frame; + + // Get next alternative + let view = self.query.transition_view(checkpoint.transition_id); + let successors = view.successors(); + + if (checkpoint.next_alt as usize) < successors.len() { + let next = successors[checkpoint.next_alt as usize]; + checkpoint.next_alt += 1; + + // Re-save if more alternatives remain + if (checkpoint.next_alt as usize) < successors.len() { + self.checkpoints.push(checkpoint); + } + + return Ok(Some(next)); + } + // No more alternatives at this checkpoint, try next + } + } + + /// Execute navigation, returns true if successful. + fn execute_nav(&mut self, nav: Nav) -> bool { + match nav.kind { + NavKind::Stay => true, + + NavKind::Next => self.cursor.goto_next_sibling(), + + NavKind::NextSkipTrivia => { + while self.cursor.goto_next_sibling() { + if !self.is_trivia(self.cursor.node()) { + return true; + } + } + false + } + + NavKind::NextExact => self.cursor.goto_next_sibling(), + + NavKind::Down => self.cursor.goto_first_child(), + + NavKind::DownSkipTrivia => { + if !self.cursor.goto_first_child() { + return false; + } + while self.is_trivia(self.cursor.node()) { + if !self.cursor.goto_next_sibling() { + return false; + } + } + true + } + + NavKind::DownExact => self.cursor.goto_first_child(), + + NavKind::Up => { + for _ in 0..nav.level { + if !self.cursor.goto_parent() { + return false; + } + } + true + } + + NavKind::UpSkipTrivia => { + // Validate we're at last non-trivia child before ascending + let current_id = self.cursor.node().id(); + if let Some(parent) = self.cursor.node().parent() { + let child_count = parent.child_count() as u32; + let mut found_current = false; + for i in 0..child_count { + if let Some(child) = parent.child(i) { + if child.id() == current_id { + found_current = true; + continue; + } + if found_current && !self.is_trivia(child) { + return false; + } + } + } + } + self.cursor.goto_parent() + } + + NavKind::UpExact => { + // Validate we're at last child + let node = self.cursor.node(); + if let Some(parent) = node.parent() { + let child_count = parent.child_count(); + if child_count > 0 { + let last_child = parent.child((child_count - 1) as u32); + if last_child.map(|c| c.id()) != Some(node.id()) { + return false; + } + } + } + self.cursor.goto_parent() + } + } + } + + /// Execute matcher with skip policy, returns true if matched. + fn execute_matcher(&mut self, matcher: &Matcher, nav: Nav) -> bool { + match matcher { + Matcher::Epsilon => true, + + Matcher::Node { + kind, + field, + negated_fields, + } => { + let matched = self.try_match_node(*kind, *field, *negated_fields, true, nav); + if matched { + self.matched_node = Some(self.cursor.node()); + } + matched + } + + Matcher::Anonymous { + kind, + field, + negated_fields, + } => { + let matched = self.try_match_node(*kind, *field, *negated_fields, false, nav); + if matched { + self.matched_node = Some(self.cursor.node()); + } + matched + } + + Matcher::Wildcard => { + self.matched_node = Some(self.cursor.node()); + true + } + } + } + + /// Try to match a node with the given constraints. + fn try_match_node( + &mut self, + kind: NodeTypeId, + field: Option, + negated_fields: crate::ir::Slice, + named: bool, + nav: Nav, + ) -> bool { + // Determine skip policy + let can_skip = match nav.kind { + NavKind::Next | NavKind::Down => true, + NavKind::NextSkipTrivia | NavKind::DownSkipTrivia => false, // Already handled trivia + _ => false, + }; + + loop { + let node = self.cursor.node(); + + // Check named/anonymous + if named != node.is_named() { + if can_skip && self.cursor.goto_next_sibling() { + continue; + } + return false; + } + + // Check kind + if node.kind_id() != kind { + if can_skip && self.cursor.goto_next_sibling() { + continue; + } + return false; + } + + // Check field constraint + if let Some(field_id) = field { + let actual_field = self.cursor.field_id(); + if actual_field != Some(field_id) { + if can_skip && self.cursor.goto_next_sibling() { + continue; + } + return false; + } + } + + // Check negated fields + let neg_fields = self.query.resolve_negated_fields(negated_fields); + for &neg_field in neg_fields { + if node.child_by_field_id(neg_field.get()).is_some() { + if can_skip && self.cursor.goto_next_sibling() { + continue; + } + return false; + } + } + + return true; + } + } + + /// Execute an effect operation. + fn execute_effect(&mut self, effect: EffectOp) { + self.effects.push_op(effect); + + if matches!(effect, EffectOp::CaptureNode) { + let node = self.matched_node.expect("CaptureNode without matched node"); + self.effects.push_node(node, self.source); + } + } + + /// Check if a node is trivia. + fn is_trivia(&self, node: Node) -> bool { + self.trivia_kinds.contains(&node.kind_id()) + } +} diff --git a/crates/plotnik-lib/src/engine/interpreter_tests.rs b/crates/plotnik-lib/src/engine/interpreter_tests.rs new file mode 100644 index 00000000..6ce32a15 --- /dev/null +++ b/crates/plotnik-lib/src/engine/interpreter_tests.rs @@ -0,0 +1,113 @@ +use plotnik_langs::{Lang, NodeFieldId, NodeTypeId, javascript}; + +use crate::engine::interpreter::QueryInterpreter; +use crate::engine::value::Value; +use crate::ir::{NodeKindResolver, QueryEmitter}; +use crate::query::Query; + +struct LangResolver(Lang); + +impl NodeKindResolver for LangResolver { + fn resolve_kind(&self, name: &str) -> Option { + self.0.resolve_named_node(name) + } + + fn resolve_field(&self, name: &str) -> Option { + self.0.resolve_field(name) + } +} + +fn run(query_src: &str, source: &str) -> String { + let lang = javascript(); + + // Parse, link, build graph + let mut query = Query::new(query_src).exec().expect("query parse failed"); + + if !query.is_valid() { + return format!("QUERY ERROR:\n{}", query.diagnostics().render(query_src)); + } + + query.link(&lang); + if !query.is_valid() { + return format!("LINK ERROR:\n{}", query.diagnostics().render(query_src)); + } + + let query = query.build_graph(); + if query.has_type_errors() { + return format!("TYPE ERROR:\n{}", query.diagnostics().render(query_src)); + } + + // Emit compiled query + let resolver = LangResolver(lang.clone()); + let emitter = QueryEmitter::new(query.graph(), query.type_info(), resolver); + let compiled = match emitter.emit() { + Ok(c) => c, + Err(e) => return format!("EMIT ERROR: {:?}", e), + }; + + // Parse source + let tree = lang.parse(source); + let cursor = tree.walk(); + + // Run interpreter + let interpreter = QueryInterpreter::new(&compiled, cursor, source); + match interpreter.run() { + Ok(value) => format_value(&value), + Err(e) => format!("RUNTIME ERROR: {}", e), + } +} + +fn format_value(value: &Value) -> String { + serde_json::to_string_pretty(value).unwrap_or_else(|e| format!("JSON ERROR: {}", e)) +} + +#[test] +fn capture_identifier() { + // AST: (program (expression_statement (identifier "x"))) + let query = "(program (expression_statement (identifier) @name))"; + let src = "x"; + + let result = run(query, src); + + insta::assert_snapshot!(result, @r#" + { + "kind": "identifier", + "text": "x", + "range": [ + 0, + 1 + ] + } + "#); +} + +#[test] +fn capture_number() { + // AST: (program (expression_statement (number "42"))) + let query = "(program (expression_statement (number) @num))"; + let src = "42"; + + let result = run(query, src); + + insta::assert_snapshot!(result, @r#" + { + "kind": "number", + "text": "42", + "range": [ + 0, + 2 + ] + } + "#); +} + +#[test] +fn no_match_wrong_root() { + // Query expects function_declaration at root, but AST root is program + let query = "(function_declaration) @fn"; + let src = "function foo() {}"; + + let result = run(query, src); + + insta::assert_snapshot!(result, @"null"); +} diff --git a/crates/plotnik-lib/src/engine/materializer.rs b/crates/plotnik-lib/src/engine/materializer.rs new file mode 100644 index 00000000..6dd43cc7 --- /dev/null +++ b/crates/plotnik-lib/src/engine/materializer.rs @@ -0,0 +1,117 @@ +//! Replays an effect stream to materialize a `Value`. + +use super::effect_stream::{CapturedNode, EffectStream}; +use super::value::Value; +use crate::ir::{DataFieldId, EffectOp, VariantTagId}; +use std::collections::BTreeMap; + +/// A container being built on the materializer's value stack. +enum Container<'tree> { + Array(Vec>), + Object(BTreeMap>), + Variant(VariantTagId), +} + +pub struct Materializer<'a, 'tree> { + /// The current value being processed. + current: Option>, + /// A stack of containers (arrays, objects, variants) being built. + stack: Vec>, + /// An iterator over the captured nodes from the effect stream. + nodes: std::slice::Iter<'a, CapturedNode<'tree>>, +} + +impl<'a, 'tree> Materializer<'a, 'tree> { + /// Creates a new materializer for a given effect stream. + fn new(stream: &'a EffectStream<'tree>) -> Self { + Self { + current: None, + stack: Vec::new(), + nodes: stream.nodes().iter(), + } + } + + /// Consumes the materializer and returns the final value. + fn finish(mut self) -> Value<'tree> { + self.current.take().unwrap_or(Value::Null) + } + + /// Replays an effect stream to produce a final `Value`. + pub fn materialize(stream: &'a EffectStream<'tree>) -> Value<'tree> { + let mut materializer = Materializer::new(stream); + + for op in stream.ops() { + materializer.apply_op(*op); + } + + materializer.finish() + } + + /// Applies a single effect operation to the materializer's state. + fn apply_op(&mut self, op: EffectOp) { + match op { + EffectOp::CaptureNode => { + let node = *self.nodes.next().expect("mismatched node capture"); + self.current = Some(Value::Node(node)); + } + EffectOp::StartObject => { + self.stack.push(Container::Object(BTreeMap::new())); + } + EffectOp::EndObject => match self.stack.pop() { + Some(Container::Object(obj)) => self.current = Some(Value::Object(obj)), + _ => panic!("invalid EndObject operation"), + }, + EffectOp::Field(id) => { + let value = self.current.take().unwrap_or(Value::Null); + if let Some(Container::Object(map)) = self.stack.last_mut() { + map.insert(id, value); + } else { + panic!("invalid Field operation without object on stack"); + } + } + EffectOp::StartArray => { + self.stack.push(Container::Array(Vec::new())); + } + EffectOp::EndArray => match self.stack.pop() { + Some(Container::Array(arr)) => self.current = Some(Value::Array(arr)), + _ => panic!("invalid EndArray operation"), + }, + EffectOp::PushElement => { + let value = self.current.take().unwrap_or(Value::Null); + if let Some(Container::Array(arr)) = self.stack.last_mut() { + arr.push(value); + } else { + panic!("invalid PushElement operation without array on stack"); + } + } + EffectOp::ClearCurrent => { + self.current = None; + } + EffectOp::StartVariant(tag) => { + self.stack.push(Container::Variant(tag)); + } + EffectOp::EndVariant => { + let value = self.current.take().unwrap_or(Value::Null); + match self.stack.pop() { + Some(Container::Variant(tag)) => { + self.current = Some(Value::Variant { + tag, + value: Box::new(value), + }); + } + _ => panic!("invalid EndVariant operation"), + } + } + EffectOp::ToString => { + if let Some(Value::Node(node)) = self.current.take() { + self.current = Some(Value::String(node.text().to_string())); + } else { + panic!("invalid ToString operation without a node"); + } + } + } + } +} + +#[cfg(test)] +mod materializer_tests; diff --git a/crates/plotnik-lib/src/engine/materializer/materializer_tests.rs b/crates/plotnik-lib/src/engine/materializer/materializer_tests.rs new file mode 100644 index 00000000..11b6fd47 --- /dev/null +++ b/crates/plotnik-lib/src/engine/materializer/materializer_tests.rs @@ -0,0 +1,157 @@ +use plotnik_langs::javascript; + +use crate::engine::effect_stream::{CapturedNode, EffectStream}; +use crate::engine::materializer::Materializer; +use crate::engine::value::Value; +use crate::ir::EffectOp; + +fn capture_node<'tree>( + tree: &'tree tree_sitter::Tree, + source: &'tree str, + index: usize, +) -> CapturedNode<'tree> { + let mut cursor = tree.walk(); + cursor.goto_first_child(); + for _ in 0..index { + cursor.goto_next_sibling(); + } + CapturedNode::new(cursor.node(), source) +} + +#[test] +fn materialize_simple_object() { + let lang = javascript(); + let source = "a; b;"; + let tree = lang.parse(source); + + let node0 = capture_node(&tree, source, 0); + let node1 = capture_node(&tree, source, 1); + + let mut stream = EffectStream::new(); + stream.push_op(EffectOp::StartObject); + stream.push_op(EffectOp::CaptureNode); + stream.push_captured_node(node0); + stream.push_op(EffectOp::Field(10)); + stream.push_op(EffectOp::CaptureNode); + stream.push_captured_node(node1); + stream.push_op(EffectOp::Field(20)); + stream.push_op(EffectOp::EndObject); + + let value = Materializer::materialize(&stream); + + match value { + Value::Object(map) => { + assert_eq!(map.len(), 2); + assert!(map.contains_key(&10)); + assert!(map.contains_key(&20)); + } + _ => panic!("expected Object"), + } +} + +#[test] +fn materialize_simple_array() { + let lang = javascript(); + let source = "a; b;"; + let tree = lang.parse(source); + + let node0 = capture_node(&tree, source, 0); + let node1 = capture_node(&tree, source, 1); + + let mut stream = EffectStream::new(); + stream.push_op(EffectOp::StartArray); + stream.push_op(EffectOp::CaptureNode); + stream.push_captured_node(node0); + stream.push_op(EffectOp::PushElement); + stream.push_op(EffectOp::CaptureNode); + stream.push_captured_node(node1); + stream.push_op(EffectOp::PushElement); + stream.push_op(EffectOp::EndArray); + + let value = Materializer::materialize(&stream); + + match value { + Value::Array(arr) => { + assert_eq!(arr.len(), 2); + assert!(matches!(arr[0], Value::Node(_))); + assert!(matches!(arr[1], Value::Node(_))); + } + _ => panic!("expected Array"), + } +} + +#[test] +fn materialize_object_with_optional_field() { + let lang = javascript(); + let source = "a;"; + let tree = lang.parse(source); + + let node0 = capture_node(&tree, source, 0); + + let mut stream = EffectStream::new(); + stream.push_op(EffectOp::StartObject); + stream.push_op(EffectOp::CaptureNode); + stream.push_captured_node(node0); + stream.push_op(EffectOp::Field(10)); + stream.push_op(EffectOp::ClearCurrent); + stream.push_op(EffectOp::Field(30)); + stream.push_op(EffectOp::EndObject); + + let value = Materializer::materialize(&stream); + + match value { + Value::Object(map) => { + assert_eq!(map.len(), 2); + assert!(matches!(map.get(&10), Some(Value::Node(_)))); + assert!(matches!(map.get(&30), Some(Value::Null))); + } + _ => panic!("expected Object"), + } +} + +#[test] +fn materialize_variant() { + let lang = javascript(); + let source = "a;"; + let tree = lang.parse(source); + + let node0 = capture_node(&tree, source, 0); + + let mut stream = EffectStream::new(); + stream.push_op(EffectOp::StartVariant(100)); + stream.push_op(EffectOp::CaptureNode); + stream.push_captured_node(node0); + stream.push_op(EffectOp::EndVariant); + + let value = Materializer::materialize(&stream); + + match value { + Value::Variant { tag, value } => { + assert_eq!(tag, 100); + assert!(matches!(*value, Value::Node(_))); + } + _ => panic!("expected Variant"), + } +} + +#[test] +fn materialize_to_string() { + let lang = javascript(); + let source = "hello"; + let tree = lang.parse(source); + + // Get the identifier node (program -> expression_statement -> identifier) + let root = tree.root_node(); + let expr_stmt = root.child(0).unwrap(); + let ident = expr_stmt.child(0).unwrap(); + let node = CapturedNode::new(ident, source); + + let mut stream = EffectStream::new(); + stream.push_op(EffectOp::CaptureNode); + stream.push_captured_node(node); + stream.push_op(EffectOp::ToString); + + let value = Materializer::materialize(&stream); + + assert_eq!(value, Value::String("hello".to_string())); +} diff --git a/crates/plotnik-lib/src/engine/mod.rs b/crates/plotnik-lib/src/engine/mod.rs new file mode 100644 index 00000000..1f9bf0d0 --- /dev/null +++ b/crates/plotnik-lib/src/engine/mod.rs @@ -0,0 +1,13 @@ +//! Query execution engine. + +pub mod effect_stream; +pub mod error; +pub mod interpreter; +pub mod materializer; +pub mod validate; +pub mod value; + +#[cfg(test)] +mod interpreter_tests; +#[cfg(test)] +mod validate_tests; diff --git a/crates/plotnik-lib/src/engine/validate.rs b/crates/plotnik-lib/src/engine/validate.rs new file mode 100644 index 00000000..39e1044c --- /dev/null +++ b/crates/plotnik-lib/src/engine/validate.rs @@ -0,0 +1,324 @@ +//! Runtime validation of query results against type metadata. +//! +//! Validates that `Value` produced by the materializer matches the expected +//! type from the IR. A mismatch indicates an IR construction bug. + +use std::fmt; + +use crate::ir::{ + CompiledQuery, TYPE_COMPOSITE_START, TYPE_NODE, TYPE_STR, TYPE_VOID, TypeId, TypeKind, +}; + +use super::value::Value; + +/// Error returned when validation fails. +#[derive(Debug)] +pub struct TypeError { + pub expected: TypeDescription, + pub actual: TypeDescription, + pub path: Vec, +} + +impl fmt::Display for TypeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "type mismatch at ")?; + if self.path.is_empty() { + write!(f, "")?; + } else { + for (i, seg) in self.path.iter().enumerate() { + if i > 0 { + write!(f, ".")?; + } + match seg { + PathSegment::Field(name) => write!(f, "{}", name)?, + PathSegment::Index(i) => write!(f, "[{}]", i)?, + PathSegment::Variant(tag) => write!(f, "<{}>", tag)?, + } + } + } + write!(f, ": expected {}, got {}", self.expected, self.actual) + } +} + +/// Segment in the path to a type error. +#[derive(Debug, Clone)] +pub enum PathSegment { + Field(String), + Index(usize), + Variant(String), +} + +/// Human-readable type description for error messages. +#[derive(Debug, Clone)] +pub enum TypeDescription { + Void, + Node, + String, + Optional(Box), + Array(Box), + NonEmptyArray(Box), + Record(String), + Enum(String), + // Actual value descriptions + ActualNull, + ActualNode, + ActualString, + ActualArray(usize), + ActualObject, + ActualVariant(String), +} + +impl fmt::Display for TypeDescription { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TypeDescription::Void => write!(f, "void"), + TypeDescription::Node => write!(f, "Node"), + TypeDescription::String => write!(f, "string"), + TypeDescription::Optional(inner) => write!(f, "{}?", inner), + TypeDescription::Array(inner) => write!(f, "{}*", inner), + TypeDescription::NonEmptyArray(inner) => write!(f, "{}+", inner), + TypeDescription::Record(name) => write!(f, "struct {}", name), + TypeDescription::Enum(name) => write!(f, "enum {}", name), + TypeDescription::ActualNull => write!(f, "null"), + TypeDescription::ActualNode => write!(f, "Node"), + TypeDescription::ActualString => write!(f, "string"), + TypeDescription::ActualArray(len) => write!(f, "array[{}]", len), + TypeDescription::ActualObject => write!(f, "object"), + TypeDescription::ActualVariant(tag) => write!(f, "variant({})", tag), + } + } +} + +/// Validates a value against the expected type. +pub fn validate( + value: &Value<'_>, + expected: TypeId, + query: &CompiledQuery, +) -> Result<(), TypeError> { + let mut ctx = ValidationContext { + query, + path: Vec::new(), + }; + ctx.validate_value(value, expected) +} + +struct ValidationContext<'a> { + query: &'a CompiledQuery, + path: Vec, +} + +impl ValidationContext<'_> { + fn validate_value(&mut self, value: &Value<'_>, expected: TypeId) -> Result<(), TypeError> { + match expected { + TYPE_VOID => self.expect_null(value), + TYPE_NODE => self.expect_node(value), + TYPE_STR => self.expect_string(value), + id if id >= TYPE_COMPOSITE_START => self.validate_composite(value, id), + _ => Ok(()), // Unknown primitive, skip validation + } + } + + fn expect_null(&self, value: &Value<'_>) -> Result<(), TypeError> { + match value { + Value::Null => Ok(()), + _ => Err(self.type_error(TypeDescription::Void, self.describe_value(value))), + } + } + + fn expect_node(&self, value: &Value<'_>) -> Result<(), TypeError> { + match value { + Value::Node(_) => Ok(()), + _ => Err(self.type_error(TypeDescription::Node, self.describe_value(value))), + } + } + + fn expect_string(&self, value: &Value<'_>) -> Result<(), TypeError> { + match value { + Value::String(_) => Ok(()), + _ => Err(self.type_error(TypeDescription::String, self.describe_value(value))), + } + } + + fn validate_composite(&mut self, value: &Value<'_>, type_id: TypeId) -> Result<(), TypeError> { + let idx = (type_id - TYPE_COMPOSITE_START) as usize; + let Some(def) = self.query.type_defs().get(idx) else { + return Ok(()); // Unknown type, skip + }; + + match def.kind { + TypeKind::Optional => self.validate_optional(value, def.inner_type().unwrap()), + TypeKind::ArrayStar => self.validate_array(value, def.inner_type().unwrap(), false), + TypeKind::ArrayPlus => self.validate_array(value, def.inner_type().unwrap(), true), + TypeKind::Record => self.validate_record(value, type_id, def), + TypeKind::Enum => self.validate_enum(value, type_id, def), + } + } + + fn validate_optional(&mut self, value: &Value<'_>, inner: TypeId) -> Result<(), TypeError> { + match value { + Value::Null => Ok(()), + _ => self.validate_value(value, inner), + } + } + + fn validate_array( + &mut self, + value: &Value<'_>, + element: TypeId, + non_empty: bool, + ) -> Result<(), TypeError> { + let Value::Array(items) = value else { + let expected = if non_empty { + TypeDescription::NonEmptyArray(Box::new(self.describe_type(element))) + } else { + TypeDescription::Array(Box::new(self.describe_type(element))) + }; + return Err(self.type_error(expected, self.describe_value(value))); + }; + + if non_empty && items.is_empty() { + return Err(self.type_error( + TypeDescription::NonEmptyArray(Box::new(self.describe_type(element))), + TypeDescription::ActualArray(0), + )); + } + + for (i, item) in items.iter().enumerate() { + self.path.push(PathSegment::Index(i)); + self.validate_value(item, element)?; + self.path.pop(); + } + + Ok(()) + } + + fn validate_record( + &mut self, + value: &Value<'_>, + type_id: TypeId, + def: &crate::ir::TypeDef, + ) -> Result<(), TypeError> { + let Value::Object(fields) = value else { + return Err(self.type_error(self.describe_type(type_id), self.describe_value(value))); + }; + + let Some(members_slice) = def.members_slice() else { + return Ok(()); + }; + let members = self.query.resolve_type_members(members_slice); + + for member in members { + let field_name = self.query.string(member.name); + self.path.push(PathSegment::Field(field_name.to_string())); + + // Field ID in the object is the index, need to find it + if let Some(field_value) = fields.get(&member.name) { + self.validate_value(field_value, member.ty)?; + } + // Missing field is OK if it's optional (would be Null) + + self.path.pop(); + } + + Ok(()) + } + + fn validate_enum( + &mut self, + value: &Value<'_>, + type_id: TypeId, + def: &crate::ir::TypeDef, + ) -> Result<(), TypeError> { + let Value::Variant { tag, value: inner } = value else { + return Err(self.type_error(self.describe_type(type_id), self.describe_value(value))); + }; + + let Some(members_slice) = def.members_slice() else { + return Ok(()); + }; + let members = self.query.resolve_type_members(members_slice); + + // Find the variant by tag + let variant = members.iter().find(|m| m.name == *tag); + let Some(variant) = variant else { + // Unknown variant tag + let tag_name = self.query.string(*tag); + return Err(self.type_error( + self.describe_type(type_id), + TypeDescription::ActualVariant(tag_name.to_string()), + )); + }; + + let tag_name = self.query.string(variant.name); + self.path.push(PathSegment::Variant(tag_name.to_string())); + self.validate_value(inner, variant.ty)?; + self.path.pop(); + + Ok(()) + } + + fn describe_type(&self, type_id: TypeId) -> TypeDescription { + match type_id { + TYPE_VOID => TypeDescription::Void, + TYPE_NODE => TypeDescription::Node, + TYPE_STR => TypeDescription::String, + id if id >= TYPE_COMPOSITE_START => { + let idx = (id - TYPE_COMPOSITE_START) as usize; + if let Some(def) = self.query.type_defs().get(idx) { + match def.kind { + TypeKind::Optional => TypeDescription::Optional(Box::new( + self.describe_type(def.inner_type().unwrap()), + )), + TypeKind::ArrayStar => TypeDescription::Array(Box::new( + self.describe_type(def.inner_type().unwrap()), + )), + TypeKind::ArrayPlus => TypeDescription::NonEmptyArray(Box::new( + self.describe_type(def.inner_type().unwrap()), + )), + TypeKind::Record => { + let name = if def.name != crate::ir::STRING_NONE { + self.query.string(def.name).to_string() + } else { + format!("T{}", type_id) + }; + TypeDescription::Record(name) + } + TypeKind::Enum => { + let name = if def.name != crate::ir::STRING_NONE { + self.query.string(def.name).to_string() + } else { + format!("T{}", type_id) + }; + TypeDescription::Enum(name) + } + } + } else { + TypeDescription::Node + } + } + _ => TypeDescription::Node, + } + } + + fn describe_value(&self, value: &Value<'_>) -> TypeDescription { + match value { + Value::Null => TypeDescription::ActualNull, + Value::Node(_) => TypeDescription::ActualNode, + Value::String(_) => TypeDescription::ActualString, + Value::Array(items) => TypeDescription::ActualArray(items.len()), + Value::Object(_) => TypeDescription::ActualObject, + Value::Variant { tag, .. } => { + let tag_name = self.query.string(*tag); + TypeDescription::ActualVariant(tag_name.to_string()) + } + } + } + + fn type_error(&self, expected: TypeDescription, actual: TypeDescription) -> TypeError { + TypeError { + expected, + actual, + path: self.path.clone(), + } + } +} diff --git a/crates/plotnik-lib/src/engine/validate_tests.rs b/crates/plotnik-lib/src/engine/validate_tests.rs new file mode 100644 index 00000000..8a427a58 --- /dev/null +++ b/crates/plotnik-lib/src/engine/validate_tests.rs @@ -0,0 +1,110 @@ +//! End-to-end tests for runtime type validation. + +use plotnik_langs::{Lang, NodeFieldId, NodeTypeId, javascript}; + +use crate::engine::interpreter::QueryInterpreter; +use crate::engine::validate::validate; +use crate::ir::{NodeKindResolver, QueryEmitter}; +use crate::query::Query; + +struct LangResolver(Lang); + +impl NodeKindResolver for LangResolver { + fn resolve_kind(&self, name: &str) -> Option { + self.0.resolve_named_node(name) + } + + fn resolve_field(&self, name: &str) -> Option { + self.0.resolve_field(name) + } +} + +fn run_and_validate(query_src: &str, source: &str) -> String { + let lang = javascript(); + + let mut query = Query::new(query_src).exec().expect("query parse failed"); + assert!( + query.is_valid(), + "query invalid: {}", + query.diagnostics().render(query_src) + ); + + query.link(&lang); + assert!( + query.is_valid(), + "link failed: {}", + query.diagnostics().render(query_src) + ); + + let query = query.build_graph(); + assert!( + !query.has_type_errors(), + "type error: {}", + query.diagnostics().render(query_src) + ); + + let resolver = LangResolver(lang.clone()); + let emitter = QueryEmitter::new(query.graph(), query.type_info(), resolver); + let compiled = emitter.emit().expect("emit failed"); + + let tree = lang.parse(source); + let cursor = tree.walk(); + + let interpreter = QueryInterpreter::new(&compiled, cursor, source); + let result = interpreter.run().expect("runtime error"); + + let expected_type = compiled.entrypoints().first().unwrap().result_type(); + + match validate(&result, expected_type, &compiled) { + Ok(()) => "OK".to_string(), + Err(e) => format!("VALIDATION ERROR: {}", e), + } +} + +#[test] +fn validate_simple_capture() { + let result = run_and_validate("(program (expression_statement (identifier) @name))", "x"); + insta::assert_snapshot!(result, @"OK"); +} + +#[test] +fn validate_string_annotation() { + let result = run_and_validate( + "(program (expression_statement (identifier) @name :: string))", + "x", + ); + insta::assert_snapshot!(result, @"OK"); +} + +#[test] +fn validate_sequence_star() { + let result = run_and_validate( + "(program { (expression_statement (identifier) @id)* })", + "x; y; z", + ); + insta::assert_snapshot!(result, @"OK"); +} + +#[test] +fn validate_sequence_plus() { + let result = run_and_validate( + "(program { (expression_statement (identifier) @id)+ })", + "x; y", + ); + insta::assert_snapshot!(result, @"OK"); +} + +#[test] +fn validate_optional_present() { + let result = run_and_validate("(program (expression_statement (identifier)? @maybe))", "x"); + insta::assert_snapshot!(result, @"OK"); +} + +#[test] +fn validate_optional_absent() { + let result = run_and_validate( + "(program (expression_statement (number)? @maybe (identifier)))", + "x", + ); + insta::assert_snapshot!(result, @"OK"); +} diff --git a/crates/plotnik-lib/src/engine/value.rs b/crates/plotnik-lib/src/engine/value.rs new file mode 100644 index 00000000..a6288eda --- /dev/null +++ b/crates/plotnik-lib/src/engine/value.rs @@ -0,0 +1,145 @@ +//! Types for representing query results. + +use super::effect_stream::{CapturedNode, VerboseNode}; +use crate::ir::{CompiledQuery, DataFieldId, VariantTagId}; +use serde::Serialize; +use serde::ser::{SerializeMap, SerializeSeq, SerializeStruct}; +use std::collections::BTreeMap; + +/// A structured value produced by a query. +#[derive(Debug, Clone, PartialEq, Serialize)] +#[serde(untagged)] +pub enum Value<'tree> { + /// Represents a missing optional value. Serializes to `null`. + Null, + /// An AST node capture. + Node(CapturedNode<'tree>), + /// A string, typically from a `:: string` conversion. + String(String), + /// A list of values, from a `*` or `+` capture. + Array(Vec>), + /// A map of field names to values, from a `{...}` capture. + Object(BTreeMap>), + /// A tagged union, from a `[...]` capture with labels. + Variant { + tag: VariantTagId, + value: Box>, + }, +} + +/// Wrapper for verbose serialization of a Value. +/// Nodes include full positional information (bytes + line/column). +pub struct VerboseValue<'a, 'tree>(pub &'a Value<'tree>); + +impl Serialize for VerboseValue<'_, '_> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self.0 { + Value::Null => serializer.serialize_none(), + Value::Node(node) => VerboseNode(node).serialize(serializer), + Value::String(s) => serializer.serialize_str(s), + Value::Array(arr) => { + let mut seq = serializer.serialize_seq(Some(arr.len()))?; + for item in arr { + seq.serialize_element(&VerboseValue(item))?; + } + seq.end() + } + Value::Object(obj) => { + let mut map = serializer.serialize_map(Some(obj.len()))?; + for (k, v) in obj { + map.serialize_entry(&k, &VerboseValue(v))?; + } + map.end() + } + Value::Variant { tag, value } => { + let mut state = serializer.serialize_struct("Variant", 2)?; + state.serialize_field("$tag", tag)?; + state.serialize_field("$data", &VerboseValue(value))?; + state.end() + } + } + } +} + +/// Wrapper for serialization that resolves string IDs to actual strings. +/// Object field names and variant tags are resolved via CompiledQuery. +pub struct ResolvedValue<'a, 'tree>(pub &'a Value<'tree>, pub &'a CompiledQuery); + +impl Serialize for ResolvedValue<'_, '_> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let ResolvedValue(value, query) = self; + match value { + Value::Null => serializer.serialize_none(), + Value::Node(node) => node.serialize(serializer), + Value::String(s) => serializer.serialize_str(s), + Value::Array(arr) => { + let mut seq = serializer.serialize_seq(Some(arr.len()))?; + for item in arr { + seq.serialize_element(&ResolvedValue(item, query))?; + } + seq.end() + } + Value::Object(obj) => { + let mut map = serializer.serialize_map(Some(obj.len()))?; + for (k, v) in obj { + let key = query.string(*k); + map.serialize_entry(key, &ResolvedValue(v, query))?; + } + map.end() + } + Value::Variant { tag, value } => { + let mut state = serializer.serialize_struct("Variant", 2)?; + let tag_str = query.string(*tag); + state.serialize_field("$tag", tag_str)?; + state.serialize_field("$data", &ResolvedValue(value, query))?; + state.end() + } + } + } +} + +/// Wrapper for verbose serialization with resolved string IDs. +/// Combines VerboseValue (full node positions) with ResolvedValue (string resolution). +pub struct VerboseResolvedValue<'a, 'tree>(pub &'a Value<'tree>, pub &'a CompiledQuery); + +impl Serialize for VerboseResolvedValue<'_, '_> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let VerboseResolvedValue(value, query) = self; + match value { + Value::Null => serializer.serialize_none(), + Value::Node(node) => VerboseNode(node).serialize(serializer), + Value::String(s) => serializer.serialize_str(s), + Value::Array(arr) => { + let mut seq = serializer.serialize_seq(Some(arr.len()))?; + for item in arr { + seq.serialize_element(&VerboseResolvedValue(item, query))?; + } + seq.end() + } + Value::Object(obj) => { + let mut map = serializer.serialize_map(Some(obj.len()))?; + for (k, v) in obj { + let key = query.string(*k); + map.serialize_entry(key, &VerboseResolvedValue(v, query))?; + } + map.end() + } + Value::Variant { tag, value } => { + let mut state = serializer.serialize_struct("Variant", 2)?; + let tag_str = query.string(*tag); + state.serialize_field("$tag", tag_str)?; + state.serialize_field("$data", &VerboseResolvedValue(value, query))?; + state.end() + } + } + } +} diff --git a/crates/plotnik-lib/src/ir/compiled.rs b/crates/plotnik-lib/src/ir/compiled.rs index 4463898b..adfd710c 100644 --- a/crates/plotnik-lib/src/ir/compiled.rs +++ b/crates/plotnik-lib/src/ir/compiled.rs @@ -215,10 +215,6 @@ impl CompiledQuery { } } - // ───────────────────────────────────────────────────────────────────── - // Segment accessors - // ───────────────────────────────────────────────────────────────────── - /// Returns the transitions segment. #[inline] pub fn transitions(&self) -> &[Transition] { @@ -345,10 +341,6 @@ impl CompiledQuery { } } - // ───────────────────────────────────────────────────────────────────── - // High-level accessors - // ───────────────────────────────────────────────────────────────────── - /// Returns a transition by ID. #[inline] pub fn transition(&self, id: TransitionId) -> &Transition { @@ -624,10 +616,6 @@ pub struct CompiledQueryOffsets { pub trivia_kinds_offset: u32, } -// ───────────────────────────────────────────────────────────────────────────── -// View types -// ───────────────────────────────────────────────────────────────────────────── - /// A view of a transition with resolved slices. /// /// Hides offset arithmetic and inline/spilled distinction from callers. @@ -716,10 +704,6 @@ impl<'a> MatcherView<'a> { } } -// ───────────────────────────────────────────────────────────────────────────── -// Alignment helpers -// ───────────────────────────────────────────────────────────────────────────── - /// Aligns an offset up to the given alignment. #[inline] pub const fn align_up(offset: u32, align: u32) -> u32 { diff --git a/crates/plotnik-lib/src/ir/emit.rs b/crates/plotnik-lib/src/ir/emit.rs index 9c351cf6..f59986cb 100644 --- a/crates/plotnik-lib/src/ir/emit.rs +++ b/crates/plotnik-lib/src/ir/emit.rs @@ -220,10 +220,6 @@ impl<'src, 'g, R: NodeKindResolver> QueryEmitter<'src, 'g, R> { self.emit_buffer(layout) } - // ───────────────────────────────────────────────────────────────────── - // Pass 1: Analysis - // ───────────────────────────────────────────────────────────────────── - fn analyze(&mut self) -> EmitResult<()> { // Pre-intern definition names for entrypoints for (name, _) in self.ctx.graph.definitions() { @@ -314,10 +310,6 @@ impl<'src, 'g, R: NodeKindResolver> QueryEmitter<'src, 'g, R> { }) } - // ───────────────────────────────────────────────────────────────────── - // Pass 2: Layout - // ───────────────────────────────────────────────────────────────────── - fn compute_layout(&self) -> EmitResult { let transition_count = self.ctx.graph.len() as u32; let successor_count = self.ctx.spilled_successors.len() as u32; @@ -408,10 +400,6 @@ impl<'src, 'g, R: NodeKindResolver> QueryEmitter<'src, 'g, R> { }) } - // ───────────────────────────────────────────────────────────────────── - // Pass 3: Emission - // ───────────────────────────────────────────────────────────────────── - fn emit_buffer(self, layout: LayoutInfo) -> EmitResult { let mut buffer = CompiledQueryBuffer::allocate(layout.buffer_len); let base = buffer.as_mut_ptr(); diff --git a/crates/plotnik-lib/src/lib.rs b/crates/plotnik-lib/src/lib.rs index adad70f6..62d6c381 100644 --- a/crates/plotnik-lib/src/lib.rs +++ b/crates/plotnik-lib/src/lib.rs @@ -17,6 +17,7 @@ #![cfg_attr(coverage_nightly, feature(coverage_attribute))] pub mod diagnostics; +pub mod engine; pub mod ir; pub mod parser; pub mod query; diff --git a/crates/plotnik-lib/src/query/graph.rs b/crates/plotnik-lib/src/query/graph.rs index 8336db2a..fe649053 100644 --- a/crates/plotnik-lib/src/query/graph.rs +++ b/crates/plotnik-lib/src/query/graph.rs @@ -72,6 +72,21 @@ impl<'src> BuildGraph<'src> { self.add_node(BuildNode::epsilon()) } + /// Clone a node, creating a new node with the same matcher, effects, and ref_marker, + /// but with the specified nav and copying the successors list. + pub fn clone_node_with_nav(&mut self, node_id: NodeId, nav: Nav) -> NodeId { + let original = &self.nodes[node_id as usize]; + let cloned = BuildNode { + matcher: original.matcher.clone(), + effects: original.effects.clone(), + ref_marker: original.ref_marker.clone(), + successors: original.successors.clone(), + nav, + ref_name: original.ref_name, + }; + self.add_node(cloned) + } + pub fn add_matcher(&mut self, matcher: BuildMatcher<'src>) -> NodeId { self.add_node(BuildNode::with_matcher(matcher)) } @@ -116,10 +131,6 @@ impl<'src> BuildGraph<'src> { self.connect(fragment.exit, to); } - // ───────────────────────────────────────────────────────────────────── - // Fragment Combinators - // ───────────────────────────────────────────────────────────────────── - pub fn matcher_fragment(&mut self, matcher: BuildMatcher<'src>) -> Fragment { Fragment::single(self.add_matcher(matcher)) } @@ -162,10 +173,6 @@ impl<'src> BuildGraph<'src> { Fragment::new(entry, exit) } - // ───────────────────────────────────────────────────────────────────── - // Generic Loop/Optional Builders - // ───────────────────────────────────────────────────────────────────── - /// Generic loop combinator for * and + quantifiers. /// /// - `at_least_one`: true for + (one or more), false for * (zero or more) @@ -177,6 +184,7 @@ impl<'src> BuildGraph<'src> { at_least_one: bool, greedy: bool, mode: ArrayMode, + initial_nav: Nav, ) -> Fragment { let has_array = mode != ArrayMode::None; let has_qis = mode == ArrayMode::Qis; @@ -241,39 +249,74 @@ impl<'src> BuildGraph<'src> { (inner.entry, inner.exit) }; + // Set initial navigation on inner.entry (the actual matcher). + // In QIS mode, this is distinct from loop_body_entry (the object wrapper). + self.node_mut(inner.entry).set_nav(initial_nav); + + // For re-entry (subsequent iterations), clone inner.entry with Next nav. + // This creates a separate path for re-entry that can skip non-matching siblings. + // In QIS mode, we clone inner.entry (not obj_start) to avoid duplicating the wrapper. + let try_next = self.clone_node_with_nav(inner.entry, Nav::next()); + + // QIS object wrapper for try_next re-entry path + let (try_next_entry, try_next_exit) = if has_qis { + let os = self.add_epsilon(); + self.node_mut(os).add_effect(BuildEffect::StartObject { + for_alternation: false, + }); + let oe = self.add_epsilon(); + self.node_mut(oe).add_effect(BuildEffect::EndObject); + self.connect(os, try_next); + self.connect(try_next, oe); + (os, oe) + } else { + (try_next, try_next) + }; + // Wire up the graph based on at_least_one and greedy if at_least_one { // + pattern: must match at least once - // Entry → body → push/branch → (loop back or exit) + // Entry → loop_body_entry → body → push → re_entry → (try_next → body or exit) let entry_point = start.unwrap_or(loop_body_entry); let exit_point = end.or(exit).unwrap(); + // re_entry is a branch point (no nav) that chooses: try more or exit + let re_entry = self.add_epsilon(); + if let Some(s) = start { self.connect(s, loop_body_entry); } if let Some(p) = push { self.connect(loop_body_exit, p); - self.connect(p, branch); + // try_next also needs to connect to push after matching + self.connect(try_next_exit, p); + self.connect(p, re_entry); } else { - self.connect(loop_body_exit, branch); + self.connect(loop_body_exit, re_entry); + self.connect(try_next_exit, re_entry); } + // re_entry branches: try_next (Next nav) or exit + // If try_next's Next fails, backtrack finds re_entry checkpoint and tries exit if greedy { - self.connect(branch, loop_body_entry); - self.connect(branch, exit_point); + self.connect(re_entry, try_next_entry); + self.connect(re_entry, exit_point); } else { - self.connect(branch, exit_point); - self.connect(branch, loop_body_entry); + self.connect(re_entry, exit_point); + self.connect(re_entry, try_next_entry); } Fragment::new(entry_point, exit_point) } else { // * pattern: zero or more - // Entry → branch → (body → push → branch) or exit + // Entry → branch → (loop_body_entry → body → push → re_entry → try_next → body) or exit let entry_point = start.unwrap_or(branch); let exit_point = end.or(exit).unwrap(); + // re_entry is a branch point (no nav) that chooses: try more or exit + let re_entry = self.add_epsilon(); + if let Some(s) = start { self.connect(s, branch); } @@ -288,9 +331,22 @@ impl<'src> BuildGraph<'src> { if let Some(p) = push { self.connect(loop_body_exit, p); - self.connect(p, branch); + // try_next also needs to connect to push after matching + self.connect(try_next_exit, p); + self.connect(p, re_entry); } else { - self.connect(loop_body_exit, branch); + self.connect(loop_body_exit, re_entry); + self.connect(try_next_exit, re_entry); + } + + // re_entry branches: try_next (Next nav) or exit + // If try_next's Next fails, backtrack finds re_entry checkpoint and tries exit + if greedy { + self.connect(re_entry, try_next_entry); + self.connect(re_entry, exit_point); + } else { + self.connect(re_entry, exit_point); + self.connect(re_entry, try_next_entry); } Fragment::new(entry_point, exit_point) @@ -350,28 +406,24 @@ impl<'src> BuildGraph<'src> { Fragment::new(branch, exit) } - // ───────────────────────────────────────────────────────────────────── - // Simple Loop Combinators (no array collection) - // ───────────────────────────────────────────────────────────────────── - /// Zero or more (greedy): inner* - pub fn zero_or_more(&mut self, inner: Fragment) -> Fragment { - self.build_repetition(inner, false, true, ArrayMode::None) + pub fn zero_or_more(&mut self, inner: Fragment, nav: Nav) -> Fragment { + self.build_repetition(inner, false, true, ArrayMode::None, nav) } /// Zero or more (non-greedy): inner*? - pub fn zero_or_more_lazy(&mut self, inner: Fragment) -> Fragment { - self.build_repetition(inner, false, false, ArrayMode::None) + pub fn zero_or_more_lazy(&mut self, inner: Fragment, nav: Nav) -> Fragment { + self.build_repetition(inner, false, false, ArrayMode::None, nav) } /// One or more (greedy): inner+ - pub fn one_or_more(&mut self, inner: Fragment) -> Fragment { - self.build_repetition(inner, true, true, ArrayMode::None) + pub fn one_or_more(&mut self, inner: Fragment, nav: Nav) -> Fragment { + self.build_repetition(inner, true, true, ArrayMode::None, nav) } /// One or more (non-greedy): inner+? - pub fn one_or_more_lazy(&mut self, inner: Fragment) -> Fragment { - self.build_repetition(inner, true, false, ArrayMode::None) + pub fn one_or_more_lazy(&mut self, inner: Fragment, nav: Nav) -> Fragment { + self.build_repetition(inner, true, false, ArrayMode::None, nav) } /// Optional (greedy): inner? @@ -384,55 +436,47 @@ impl<'src> BuildGraph<'src> { self.build_optional(inner, false, false) } - // ───────────────────────────────────────────────────────────────────── - // Array-Collecting Loop Combinators - // ───────────────────────────────────────────────────────────────────── - /// Zero or more with array collection (greedy): inner* - pub fn zero_or_more_array(&mut self, inner: Fragment) -> Fragment { - self.build_repetition(inner, false, true, ArrayMode::Simple) + pub fn zero_or_more_array(&mut self, inner: Fragment, nav: Nav) -> Fragment { + self.build_repetition(inner, false, true, ArrayMode::Simple, nav) } /// Zero or more with array collection (non-greedy): inner*? - pub fn zero_or_more_array_lazy(&mut self, inner: Fragment) -> Fragment { - self.build_repetition(inner, false, false, ArrayMode::Simple) + pub fn zero_or_more_array_lazy(&mut self, inner: Fragment, nav: Nav) -> Fragment { + self.build_repetition(inner, false, false, ArrayMode::Simple, nav) } /// One or more with array collection (greedy): inner+ - pub fn one_or_more_array(&mut self, inner: Fragment) -> Fragment { - self.build_repetition(inner, true, true, ArrayMode::Simple) + pub fn one_or_more_array(&mut self, inner: Fragment, nav: Nav) -> Fragment { + self.build_repetition(inner, true, true, ArrayMode::Simple, nav) } /// One or more with array collection (non-greedy): inner+? - pub fn one_or_more_array_lazy(&mut self, inner: Fragment) -> Fragment { - self.build_repetition(inner, true, false, ArrayMode::Simple) + pub fn one_or_more_array_lazy(&mut self, inner: Fragment, nav: Nav) -> Fragment { + self.build_repetition(inner, true, false, ArrayMode::Simple, nav) } - // ───────────────────────────────────────────────────────────────────── - // QIS-Aware Array Combinators (wrap each iteration with object scope) - // ───────────────────────────────────────────────────────────────────── - /// Zero or more with QIS object wrapping (greedy): inner* /// /// Each iteration is wrapped in StartObject/EndObject to keep /// multiple captures coupled per-iteration. - pub fn zero_or_more_array_qis(&mut self, inner: Fragment) -> Fragment { - self.build_repetition(inner, false, true, ArrayMode::Qis) + pub fn zero_or_more_array_qis(&mut self, inner: Fragment, nav: Nav) -> Fragment { + self.build_repetition(inner, false, true, ArrayMode::Qis, nav) } /// Zero or more with QIS object wrapping (non-greedy): inner*? - pub fn zero_or_more_array_qis_lazy(&mut self, inner: Fragment) -> Fragment { - self.build_repetition(inner, false, false, ArrayMode::Qis) + pub fn zero_or_more_array_qis_lazy(&mut self, inner: Fragment, nav: Nav) -> Fragment { + self.build_repetition(inner, false, false, ArrayMode::Qis, nav) } /// One or more with QIS object wrapping (greedy): inner+ - pub fn one_or_more_array_qis(&mut self, inner: Fragment) -> Fragment { - self.build_repetition(inner, true, true, ArrayMode::Qis) + pub fn one_or_more_array_qis(&mut self, inner: Fragment, nav: Nav) -> Fragment { + self.build_repetition(inner, true, true, ArrayMode::Qis, nav) } /// One or more with QIS object wrapping (non-greedy): inner+? - pub fn one_or_more_array_qis_lazy(&mut self, inner: Fragment) -> Fragment { - self.build_repetition(inner, true, false, ArrayMode::Qis) + pub fn one_or_more_array_qis_lazy(&mut self, inner: Fragment, nav: Nav) -> Fragment { + self.build_repetition(inner, true, false, ArrayMode::Qis, nav) } /// Optional with QIS object wrapping: inner? @@ -446,6 +490,56 @@ impl<'src> BuildGraph<'src> { pub fn optional_qis_lazy(&mut self, inner: Fragment) -> Fragment { self.build_optional(inner, false, true) } + + /// Wrap definitions that don't already match the root node kind. + /// + /// For each definition whose entry matcher doesn't match `root_kind`, + /// prepends a transition that matches the root and descends into children. + /// This allows queries like `(function_declaration)` to work when the + /// interpreter starts at tree root (e.g., `program`). + pub fn wrap_definitions_with_root(&mut self, root_kind: &'src str) { + let def_names: Vec<&'src str> = self.definitions.keys().copied().collect(); + + for name in def_names { + let entry = self.definitions[name]; + + // Check if entry already matches root (directly or first reachable matcher) + if self.entry_matches_root(entry, root_kind) { + continue; + } + + // Create wrapper: (root_kind) with Nav::stay + let wrapper = self.add_node(BuildNode::with_matcher(BuildMatcher::node(root_kind))); + + // Add epsilon node with Nav::down between wrapper and original entry + let down_nav = self.add_epsilon(); + self.node_mut(down_nav).set_nav(Nav::down()); + + // Connect wrapper → down_nav → original entry + self.connect(wrapper, down_nav); + self.connect(down_nav, entry); + + // Update definition to point to wrapper + self.definitions.insert(name, wrapper); + } + } + + /// Check if entry (or first reachable node matcher) already matches root kind. + fn entry_matches_root(&self, entry: NodeId, root_kind: &str) -> bool { + match &self.nodes[entry as usize].matcher { + BuildMatcher::Node { kind, .. } => *kind == root_kind, + BuildMatcher::Epsilon => { + // For epsilon entries, check first reachable node matchers + for &target in &self.nodes[entry as usize].successors { + if self.entry_matches_root(target, root_kind) { + return true; + } + } + false + } + _ => false, + } + } } impl Default for BuildGraph<'_> { diff --git a/crates/plotnik-lib/src/query/graph_build.rs b/crates/plotnik-lib/src/query/graph_build.rs index 8def0eef..3f675114 100644 --- a/crates/plotnik-lib/src/query/graph_build.rs +++ b/crates/plotnik-lib/src/query/graph_build.rs @@ -12,7 +12,7 @@ use crate::parser::{ }; use super::Query; -use super::graph::{BuildEffect, BuildMatcher, Fragment, NodeId, RefMarker}; +use super::graph::{BuildEffect, BuildMatcher, BuildNode, Fragment, NodeId, RefMarker}; /// Context for navigation determination. /// When `anchored` is true, `prev_anonymous` indicates whether the preceding @@ -97,8 +97,31 @@ impl<'a> Query<'a> { .map(|(name, body)| (*name, body.clone())) .collect(); for (name, body) in entries { + self.current_def_name = name; let fragment = self.construct_expr(&body, NavContext::Root); - self.graph.add_definition(name, fragment.entry); + + // Multi-capture definitions need struct wrapping at root + let entry = if self.multi_capture_defs.contains(name) { + let start_id = self.graph.add_epsilon(); + self.graph + .node_mut(start_id) + .add_effect(BuildEffect::StartObject { + for_alternation: false, + }); + self.graph.connect(start_id, fragment.entry); + + let end_id = self.graph.add_epsilon(); + self.graph + .node_mut(end_id) + .add_effect(BuildEffect::EndObject); + self.graph.connect(fragment.exit, end_id); + + start_id + } else { + fragment.entry + }; + + self.graph.add_definition(name, entry); } self.link_references(); @@ -178,6 +201,31 @@ impl<'a> Query<'a> { let exit_id = self.graph.add_epsilon(); self.graph.node_mut(exit_id).set_nav(exit_ctx.to_up_nav(1)); + + // Trailing anchor retry loop: when UpSkipTrivia fails, try next sibling + if exit_ctx.has_trailing_anchor && !child_fragments.is_empty() { + let last_frag = child_fragments.last().unwrap(); + let last_entry = self.graph.node(last_frag.entry); + let last_matcher = last_entry.matcher.clone(); + let last_effects = last_entry.effects.clone(); + + // Choice point: epsilon with 2 successors (won't be eliminated) + let choice_id = self.graph.add_epsilon(); + self.graph.connect(inner.exit, choice_id); + self.graph.connect(choice_id, exit_id); // First: try UpSkipTrivia + + // Retry node: Nav::next() + same matcher + same effects + let retry_id = self.graph.add_node(BuildNode::with_matcher(last_matcher)); + self.graph.node_mut(retry_id).set_nav(Nav::next()); + for effect in last_effects { + self.graph.node_mut(retry_id).add_effect(effect); + } + self.graph.connect(choice_id, retry_id); // Second: try next sibling + self.graph.connect(retry_id, choice_id); // Loop back to choice + + return Fragment::new(node_id, exit_id); + } + self.graph.connect(inner.exit, exit_id); Fragment::new(node_id, exit_id) @@ -444,65 +492,105 @@ impl<'a> Query<'a> { // Captured sequence/alternation creates object scope for nested fields. // Tagged alternations use variants instead (handled in construct_tagged_alt). - // Quantifiers only need wrapper if QIS (2+ captures) - otherwise the array is the direct value. + // Quantifiers never need outer wrapper - QIS handles per-element wrapping inside the array. let needs_object_wrapper = match &inner_expr { Expr::SeqExpr(_) | Expr::AltExpr(_) => true, - Expr::QuantifiedExpr(q) => self.qis_triggers.contains_key(q), + Expr::QuantifiedExpr(_) => false, _ => false, }; - let matchers = self.find_all_matchers(inner_frag.entry); - for matcher_id in matchers { - self.graph - .node_mut(matcher_id) - .add_effect(BuildEffect::CaptureNode); - - if has_to_string { + // Only add CaptureNode to inner matchers when capturing a node directly. + // Captured containers (seq/alt) capture structure, not individual nodes. + if !needs_object_wrapper { + let matchers = self.find_all_matchers(inner_frag.entry); + for matcher_id in matchers { self.graph .node_mut(matcher_id) - .add_effect(BuildEffect::ToString); + .add_effect(BuildEffect::CaptureNode); + + if has_to_string { + self.graph + .node_mut(matcher_id) + .add_effect(BuildEffect::ToString); + } } } - if let Some(name) = capture_name { - let span = capture_token - .as_ref() - .map(|t| t.text_range()) - .unwrap_or_default(); + let Some(name) = capture_name else { + return inner_frag; + }; - // Check if we're capturing an alternation (for enum vs struct distinction) + // Single-capture definitions unwrap: no Field effect, type is capture's type directly. + // Only the specific propagating capture should unwrap, not nested captures. + let is_single_capture = self.is_single_capture(self.current_def_name, name); + + if is_single_capture && needs_object_wrapper { + // Captured container at single-capture definition root + let inner_captures = self.collect_propagating_captures(&inner_expr); + if inner_captures.is_empty() { + // No inner captures → Void (per ADR-0009 Payload Rule). + // Return epsilon for matching only, discard inner effects. + return self.graph.epsilon_fragment(); + } + // Has inner captures → wrap with StartObject/EndObject but skip outer Field let is_alternation_capture = matches!(&inner_expr, Expr::AltExpr(_)); + let start_id = self.graph.add_epsilon(); + self.graph + .node_mut(start_id) + .add_effect(BuildEffect::StartObject { + for_alternation: is_alternation_capture, + }); + self.graph.connect(start_id, inner_frag.entry); - let (entry, exit) = if needs_object_wrapper { - // Wrap with StartObject/EndObject for composite captures - let start_id = self.graph.add_epsilon(); - self.graph - .node_mut(start_id) - .add_effect(BuildEffect::StartObject { - for_alternation: is_alternation_capture, - }); - self.graph.connect(start_id, inner_frag.entry); + let end_id = self.graph.add_epsilon(); + self.graph + .node_mut(end_id) + .add_effect(BuildEffect::EndObject); + self.graph.connect(inner_frag.exit, end_id); - let end_id = self.graph.add_epsilon(); - self.graph - .node_mut(end_id) - .add_effect(BuildEffect::EndObject); - self.graph.connect(inner_frag.exit, end_id); + return Fragment::new(start_id, end_id); + } - (start_id, end_id) - } else { - (inner_frag.entry, inner_frag.exit) - }; + if is_single_capture { + // Non-container single capture: unwrap directly + return inner_frag; + } + + let span = capture_token + .as_ref() + .map(|t| t.text_range()) + .unwrap_or_default(); - let field_id = self.graph.add_epsilon(); + // Check if we're capturing an alternation (for enum vs struct distinction) + let is_alternation_capture = matches!(&inner_expr, Expr::AltExpr(_)); + + let (entry, exit) = if needs_object_wrapper { + // Wrap with StartObject/EndObject for composite captures + let start_id = self.graph.add_epsilon(); + self.graph + .node_mut(start_id) + .add_effect(BuildEffect::StartObject { + for_alternation: is_alternation_capture, + }); + self.graph.connect(start_id, inner_frag.entry); + + let end_id = self.graph.add_epsilon(); self.graph - .node_mut(field_id) - .add_effect(BuildEffect::Field { name, span }); - self.graph.connect(exit, field_id); - Fragment::new(entry, field_id) + .node_mut(end_id) + .add_effect(BuildEffect::EndObject); + self.graph.connect(inner_frag.exit, end_id); + + (start_id, end_id) } else { - inner_frag - } + (inner_frag.entry, inner_frag.exit) + }; + + let field_id = self.graph.add_epsilon(); + self.graph + .node_mut(field_id) + .add_effect(BuildEffect::Field { name, span }); + self.graph.connect(exit, field_id); + Fragment::new(entry, field_id) } fn construct_quantifier(&mut self, quant: &QuantifiedExpr, ctx: NavContext) -> Fragment { @@ -513,18 +601,20 @@ impl<'a> Query<'a> { return self.construct_expr(&inner_expr, ctx); }; - let f = self.construct_expr(&inner_expr, ctx); + // Build inner with Stay nav; the repetition combinator handles initial/re-entry nav + let f = self.construct_expr(&inner_expr, NavContext::Root); + let nav = ctx.to_nav(); let qis = self.qis_triggers.contains_key(quant); match (op.kind(), qis) { - (SyntaxKind::Star, false) => self.graph.zero_or_more_array(f), - (SyntaxKind::Star, true) => self.graph.zero_or_more_array_qis(f), - (SyntaxKind::StarQuestion, false) => self.graph.zero_or_more_array_lazy(f), - (SyntaxKind::StarQuestion, true) => self.graph.zero_or_more_array_qis_lazy(f), - (SyntaxKind::Plus, false) => self.graph.one_or_more_array(f), - (SyntaxKind::Plus, true) => self.graph.one_or_more_array_qis(f), - (SyntaxKind::PlusQuestion, false) => self.graph.one_or_more_array_lazy(f), - (SyntaxKind::PlusQuestion, true) => self.graph.one_or_more_array_qis_lazy(f), + (SyntaxKind::Star, false) => self.graph.zero_or_more_array(f, nav), + (SyntaxKind::Star, true) => self.graph.zero_or_more_array_qis(f, nav), + (SyntaxKind::StarQuestion, false) => self.graph.zero_or_more_array_lazy(f, nav), + (SyntaxKind::StarQuestion, true) => self.graph.zero_or_more_array_qis_lazy(f, nav), + (SyntaxKind::Plus, false) => self.graph.one_or_more_array(f, nav), + (SyntaxKind::Plus, true) => self.graph.one_or_more_array_qis(f, nav), + (SyntaxKind::PlusQuestion, false) => self.graph.one_or_more_array_lazy(f, nav), + (SyntaxKind::PlusQuestion, true) => self.graph.one_or_more_array_qis_lazy(f, nav), (SyntaxKind::Question, false) => self.graph.optional(f), (SyntaxKind::Question, true) => self.graph.optional_qis(f), (SyntaxKind::QuestionQuestion, false) => self.graph.optional_lazy(f), @@ -582,7 +672,7 @@ impl<'a> Query<'a> { if !node.is_epsilon() { result.push(node_id); - return; + // Continue through to find all matchers in loops (e.g., try_next in quantifiers) } for &succ in &node.successors { diff --git a/crates/plotnik-lib/src/query/graph_build_tests.rs b/crates/plotnik-lib/src/query/graph_build_tests.rs index 04145597..262fcec1 100644 --- a/crates/plotnik-lib/src/query/graph_build_tests.rs +++ b/crates/plotnik-lib/src/query/graph_build_tests.rs @@ -28,8 +28,7 @@ fn named_node_with_capture() { insta::assert_snapshot!(snapshot("Q = (identifier) @id"), @r" Q = (0) - (0) —(identifier)—[CaptureNode]→ (1) - (1) —𝜀—[Field(id)]→ (✓) + (0) —(identifier)—[CaptureNode]→ (✓) "); } @@ -58,13 +57,15 @@ fn sequence() { #[test] fn sequence_with_captures() { insta::assert_snapshot!(snapshot("Q = { (a) @x (b) @y }"), @r" - Q = (1) + Q = (0) - (0) —𝜀→ (1) + (0) —𝜀—[StartObject]→ (1) (1) —{→}—(a)—[CaptureNode]→ (2) (2) —𝜀—[Field(x)]→ (3) - (3) —{→}—(b)—[CaptureNode]→ (4) - (4) —𝜀—[Field(y)]→ (✓) + (3) —{→}—(b)—[CaptureNode]→ (6) + (4) —𝜀—[Field(y)]→ (6) + (5) —𝜀—[StartObject]→ (0) + (6) —𝜀—[Field(y), EndObject]→ (✓) "); } @@ -83,44 +84,50 @@ fn alternation_untagged() { #[test] fn alternation_tagged() { insta::assert_snapshot!(snapshot("Q = [ A: (a) @x B: (b) @y ]"), @r" - Q = (0) - - (0) —𝜀→ (3), (7) - (1) —𝜀→ (✓) - (2) —𝜀—[StartVariant(A)]→ (3) - (3) —(a)—[StartVariant(A), CaptureNode]→ (5) - (4) —𝜀—[Field(x)]→ (5) - (5) —𝜀—[Field(x), EndVariant]→ (1) - (6) —𝜀—[StartVariant(B)]→ (7) - (7) —(b)—[StartVariant(B), CaptureNode]→ (9) - (8) —𝜀—[Field(y)]→ (9) - (9) —𝜀—[Field(y), EndVariant]→ (1) + Q = (00) + + (00) —𝜀—[StartObject]→ (03), (07) + (01) —𝜀→ (11) + (02) —𝜀—[StartVariant(A)]→ (03) + (03) —(a)—[StartVariant(A), CaptureNode]→ (05) + (04) —𝜀—[Field(x)]→ (05) + (05) —𝜀—[Field(x), EndVariant]→ (11) + (06) —𝜀—[StartVariant(B)]→ (07) + (07) —(b)—[StartVariant(B), CaptureNode]→ (09) + (08) —𝜀—[Field(y)]→ (09) + (09) —𝜀—[Field(y), EndVariant]→ (11) + (10) —𝜀—[StartObject]→ (00) + (11) —𝜀—[EndObject]→ (✓) "); } #[test] fn quantifier_star() { insta::assert_snapshot!(snapshot("Q = (identifier)*"), @r" - Q = (1) + Q = (4) - (0) —(identifier)→ (3) + (0) —(identifier)→ (6) (1) —𝜀—[StartArray]→ (4) (2) —𝜀—[EndArray]→ (✓) - (3) —𝜀—[PushElement]→ (4) - (4) —𝜀→ (0), (2) + (3) —𝜀—[PushElement]→ (6) + (4) —𝜀—[StartArray]→ (0), (2) + (5) —{→}—(identifier)→ (6) + (6) —𝜀—[PushElement]→ (5), (2) "); } #[test] fn quantifier_plus() { insta::assert_snapshot!(snapshot("Q = (identifier)+"), @r" - Q = (1) + Q = (0) - (0) —(identifier)→ (4) + (0) —(identifier)—[StartArray]→ (6) (1) —𝜀—[StartArray]→ (0) (2) —𝜀—[EndArray]→ (✓) - (3) —𝜀—[PushElement]→ (4) - (4) —𝜀—[PushElement]→ (0), (2) + (3) —𝜀—[PushElement]→ (6) + (4) —𝜀→ (✓) + (5) —{→}—(identifier)→ (6) + (6) —𝜀—[PushElement]→ (5), (2) "); } @@ -186,8 +193,7 @@ fn to_string_annotation() { insta::assert_snapshot!(snapshot("Q = (identifier) @name ::string"), @r" Q = (0) - (0) —(identifier)—[CaptureNode, ToString]→ (1) - (1) —𝜀—[Field(name)]→ (✓) + (0) —(identifier)—[CaptureNode, ToString]→ (✓) "); } @@ -214,29 +220,25 @@ fn anchor_sibling() { "); } -// ───────────────────────────────────────────────────────────────────────────── -// Optimization tests -// ───────────────────────────────────────────────────────────────────────────── - #[test] fn optimized_simple() { insta::assert_snapshot!(snapshot_optimized("Q = (identifier) @id"), @r" Q = (0) - (0) —(identifier)—[CaptureNode]→ (1) - (1) —𝜀—[Field(id)]→ (✓) + (0) —(identifier)—[CaptureNode]→ (✓) "); } #[test] fn optimized_sequence() { insta::assert_snapshot!(snapshot_optimized("Q = { (a) @x (b) @y }"), @r" - Q = (1) + Q = (0) + (0) —𝜀—[StartObject]→ (1) (1) —{→}—(a)—[CaptureNode]→ (2) (2) —𝜀—[Field(x)]→ (3) - (3) —{→}—(b)—[CaptureNode]→ (4) - (4) —𝜀—[Field(y)]→ (✓) + (3) —{→}—(b)—[CaptureNode]→ (6) + (6) —𝜀—[Field(y), EndObject]→ (✓) "); } @@ -265,3 +267,83 @@ fn symbol_table_reuse() { (4) —𝜀—→ (✓) "); } + +// ============================================================================ +// wrap_definitions_with_root +// ============================================================================ + +#[test] +fn wrap_with_root_simple() { + let query = Query::try_from("Q = (identifier)") + .unwrap() + .build_graph() + .wrap_with_root("program"); + + insta::assert_snapshot!(query.graph().dump(), @r" + Q = (1) + + (0) —{↘}—(identifier)→ (✓) + (1) —(program)→ (0) + (2) —{↘}—𝜀→ (0) + "); +} + +#[test] +fn wrap_with_root_already_matches() { + // Definition already starts with root - no wrapping needed + let query = Query::try_from("Q = (program (identifier))") + .unwrap() + .build_graph() + .wrap_with_root("program"); + + insta::assert_snapshot!(query.graph().dump(), @r" + Q = (0) + + (0) —(program)→ (1) + (1) —{↘}—(identifier)→ (2) + (2) —{↗¹}—𝜀→ (✓) + "); +} + +#[test] +fn wrap_with_root_multiple_definitions() { + let input = indoc! {r#" + Foo = (identifier) + Bar = (program (string)) + "#}; + let query = Query::try_from(input) + .unwrap() + .build_graph() + .wrap_with_root("program"); + + // Foo gets wrapped, Bar already matches root + insta::assert_snapshot!(query.graph().dump(), @r" + Foo = (4) + Bar = (1) + + (0) —{↘}—(identifier)→ (✓) + (1) —(program)→ (2) + (2) —{↘}—(string)→ (3) + (3) —{↗¹}—𝜀→ (✓) + (4) —(program)→ (0) + (5) —{↘}—𝜀→ (0) + "); +} + +#[test] +fn wrap_with_root_with_captures() { + let query = Query::try_from("Q = (function_declaration name: (identifier) @name)") + .unwrap() + .build_graph() + .wrap_with_root("program"); + + insta::assert_snapshot!(query.graph().dump(), @r" + Q = (3) + + (0) —{↘}—(function_declaration)→ (1) + (1) —{↘}—(identifier)@name—[CaptureNode]→ (2) + (2) —{↗¹}—𝜀→ (✓) + (3) —(program)→ (0) + (4) —{↘}—𝜀→ (0) + "); +} diff --git a/crates/plotnik-lib/src/query/graph_master_test.rs b/crates/plotnik-lib/src/query/graph_master_test.rs index f332ec21..7cce57e8 100644 --- a/crates/plotnik-lib/src/query/graph_master_test.rs +++ b/crates/plotnik-lib/src/query/graph_master_test.rs @@ -246,269 +246,301 @@ fn golden_master_comprehensive() { ═══════════════════════════════════════════════════════════════════════════════ SimpleCapture = (000) - StringCapture = (002) - MultiCapture = (004) + StringCapture = (001) + MultiCapture = (002) AnchorFirst = (010) - AnchorLast = (014) + AnchorLast = (013) AnchorSibling = (018) - DeepNest = (024) - StarQuant = (032) - PlusQuant = (040) - OptQuant = (048) - QisNode = (061) - QisSequence = (072) - NoQis = (081) - TaggedRoot = (085) - TaggedCaptured = (095) - TaggedMulti = (110) - UntaggedSymmetric = (124) - UntaggedAsymmetric = (130) - UntaggedCaptured = (136) - CapturedSeq = (145) - UncapturedSeq = (155) - NestedScopes = (166) - Identifier = (178) - RefSimple = (180) - RefCaptured = (182) - RefChain = (185) - CardinalityJoin = (187) - NestedQuant = (207) - Complex = (212) - WildcardCapture = (262) - StringLiteral = (264) - NoCaptures = (266) - EmptyBranch = (267) - - (000) —(identifier)—[CaptureNode]→ (001) - (001) —𝜀—[Field(name)]→ (✓) - (002) —(identifier)—[CaptureNode, ToString]→ (003) - (003) —𝜀—[Field(name)]→ (✓) - (004) —(function)→ (005) - (005) —{↘}—(identifier)@name—[CaptureNode, ToString]→ (006) - (006) —𝜀—[Field(fn_name)]→ (007) - (007) —{→}—(block)@body—[CaptureNode]→ (008) - (008) —𝜀—[Field(fn_body)]→ (009) - (009) —{↗¹}—𝜀→ (✓) + DeepNest = (026) + StarQuant = (033) + PlusQuant = (042) + OptQuant = (051) + QisNode = (068) + QisSequence = (085) + NoQis = (097) + TaggedRoot = (100) + TaggedCaptured = (112) + TaggedMulti = (126) + UntaggedSymmetric = (142) + UntaggedAsymmetric = (150) + UntaggedCaptured = (158) + CapturedSeq = (166) + UncapturedSeq = (175) + NestedScopes = (188) + Identifier = (199) + RefSimple = (200) + RefCaptured = (202) + RefChain = (204) + CardinalityJoin = (206) + NestedQuant = (222) + Complex = (242) + WildcardCapture = (306) + StringLiteral = (307) + NoCaptures = (308) + EmptyBranch = (309) + + (000) —(identifier)—[CaptureNode]→ (✓) + (001) —(identifier)—[CaptureNode, ToString]→ (✓) + (002) —(function)—[StartObject]→ (003) + (003) —{↘}—(identifier)@name—[CaptureNode, ToString]→ (004) + (004) —𝜀—[Field(fn_name)]→ (005) + (005) —{→}—(block)@body—[CaptureNode]→ (006) + (006) —𝜀—[Field(fn_body)]→ (009) + (009) —{↗¹}—𝜀—[EndObject]→ (✓) (010) —(parent)→ (011) (011) —{↘.}—(first_child)—[CaptureNode]→ (012) - (012) —𝜀—[Field(first)]→ (013) - (013) —{↗¹}—𝜀→ (✓) - (014) —(parent)→ (015) - (015) —{↘}—(last_child)—[CaptureNode]→ (016) - (016) —𝜀—[Field(last)]→ (017) - (017) —{↗·¹}—𝜀→ (✓) - (018) —(parent)→ (019) + (012) —{↗¹}—𝜀→ (✓) + (013) —(parent)→ (014) + (014) —{↘}—(last_child)—[CaptureNode]→ (016) + (015) —{↗·¹}—𝜀→ (✓) + (016) —𝜀→ (015), (017) + (017) —{→}—(last_child)—[CaptureNode]→ (016) + (018) —(parent)—[StartObject]→ (019) (019) —{↘}—(a)—[CaptureNode]→ (020) (020) —𝜀—[Field(left)]→ (021) (021) —{→·}—(b)—[CaptureNode]→ (022) - (022) —𝜀—[Field(right)]→ (023) - (023) —{↗¹}—𝜀→ (✓) - (024) —(a)→ (025) - (025) —{↘}—(b)→ (026) - (026) —{↘}—(c)→ (027) - (027) —{↘}—(d)—[CaptureNode]→ (028) - (028) —𝜀—[Field(deep)]→ (031) - (031) —{↗³}—𝜀→ (✓) - (032) —(container)→ (034) - (033) —{↘}—(item)—[CaptureNode]→ (036) - (034) —𝜀—[StartArray]→ (037) - (036) —𝜀—[PushElement]→ (037) - (037) —𝜀→ (033), (038) - (038) —𝜀—[EndArray, Field(items)]→ (039) - (039) —{↗¹}—𝜀→ (✓) - (040) —(container)→ (042) - (041) —{↘}—(item)—[CaptureNode]→ (045) - (042) —𝜀—[StartArray]→ (041) - (045) —𝜀—[PushElement]→ (041), (046) - (046) —𝜀—[EndArray, Field(items)]→ (047) - (047) —{↗¹}—𝜀→ (✓) - (048) —(container)→ (050) - (049) —{↘}—(item)—[CaptureNode]→ (053) - (050) —𝜀→ (049), (052) - (052) —𝜀—[ClearCurrent]→ (053) - (053) —𝜀—[Field(maybe_item)]→ (054) - (054) —{↗¹}—𝜀→ (✓) - (055) —(function)—[StartObject]→ (056) - (056) —{↘}—(identifier)@name—[CaptureNode]→ (057) - (057) —𝜀—[Field(name)]→ (058) - (058) —{→}—(block)@body—[CaptureNode]→ (059) - (059) —𝜀—[Field(body)]→ (065) - (061) —𝜀—[StartArray]→ (066) - (062) —𝜀—[EndArray]→ (✓) - (065) —{↗¹}—𝜀—[EndObject, PushElement]→ (066) - (066) —𝜀→ (055), (062) - (067) —𝜀—[StartObject]→ (068) - (068) —{→}—(key)—[CaptureNode]→ (069) - (069) —𝜀—[Field(key)]→ (070) - (070) —{→}—(value)—[CaptureNode]→ (076) - (072) —𝜀—[StartArray]→ (077) - (073) —𝜀—[EndArray]→ (✓) - (076) —𝜀—[Field(value), EndObject, PushElement]→ (077) - (077) —𝜀→ (067), (073) - (079) —{→}—(item)—[CaptureNode]→ (083) - (081) —𝜀—[StartArray]→ (084) - (082) —𝜀—[EndArray]→ (✓) - (083) —𝜀—[Field(item), PushElement]→ (084) - (084) —𝜀→ (079), (082) - (085) —𝜀→ (088), (092) - (086) —𝜀→ (✓) - (088) —(success)—[StartVariant(Ok), CaptureNode]→ (090) - (090) —𝜀—[Field(val), EndVariant]→ (086) - (092) —(error)—[StartVariant(Err), CaptureNode, ToString]→ (094) - (094) —𝜀—[Field(msg), EndVariant]→ (086) - (095) —(wrapper)→ (106) - (096) —{↘}—𝜀→ (099), (103) - (099) —(left_node)—[StartVariant(Left), CaptureNode, CaptureNode]→ (101) - (101) —𝜀—[Field(l), EndVariant]→ (108) - (103) —(right_node)—[StartVariant(Right), CaptureNode, CaptureNode]→ (105) - (105) —𝜀—[Field(r), EndVariant]→ (108) - (106) —𝜀—[StartObject]→ (096) - (108) —𝜀—[EndObject, Field(choice)]→ (109) - (109) —{↗¹}—𝜀→ (✓) - (110) —𝜀→ (113), (117) - (111) —𝜀→ (✓) - (113) —(node)—[StartVariant(Simple), CaptureNode]→ (115) - (115) —𝜀—[Field(val), EndVariant]→ (111) - (117) —(pair)—[StartVariant(Complex), StartObject]→ (118) - (118) —{↘}—(key)—[CaptureNode]→ (119) - (119) —𝜀—[Field(k)]→ (120) - (120) —{→}—(value)—[CaptureNode]→ (121) - (121) —𝜀—[Field(v)]→ (123) - (123) —{↗¹}—𝜀—[EndObject, EndVariant]→ (111) - (124) —𝜀→ (126), (128) - (125) —𝜀→ (✓) - (126) —(a)—[CaptureNode]→ (127) - (127) —𝜀—[Field(val)]→ (125) - (128) —(b)—[CaptureNode]→ (129) - (129) —𝜀—[Field(val)]→ (125) - (130) —𝜀→ (132), (134) - (131) —𝜀→ (✓) - (132) —(a)—[CaptureNode]→ (133) - (133) —𝜀—[Field(x)]→ (131) - (134) —(b)—[CaptureNode]→ (135) - (135) —𝜀—[Field(y)]→ (131) - (136) —𝜀—[StartObject]→ (138), (140) - (138) —(a)—[CaptureNode, CaptureNode]→ (139) - (139) —𝜀—[Field(x)]→ (144) - (140) —(b)—[CaptureNode, CaptureNode]→ (141) - (141) —𝜀—[Field(y)]→ (144) - (144) —𝜀—[EndObject, Field(data)]→ (✓) - (145) —(outer)→ (151) - (146) —{↘}—𝜀→ (147) - (147) —{→}—(inner)—[CaptureNode, CaptureNode]→ (148) - (148) —𝜀—[Field(x)]→ (149) - (149) —{→}—(inner2)—[CaptureNode]→ (153) - (151) —𝜀—[StartObject]→ (146) - (153) —𝜀—[Field(y), EndObject, Field(nested)]→ (154) - (154) —{↗¹}—𝜀→ (✓) - (155) —(outer)→ (156) - (156) —{↘}—𝜀→ (157) - (157) —{→}—(inner)—[CaptureNode]→ (158) - (158) —𝜀—[Field(x)]→ (159) - (159) —{→}—(inner2)—[CaptureNode]→ (160) - (160) —𝜀—[Field(y)]→ (161) - (161) —{↗¹}—𝜀→ (✓) - (163) —{→}—𝜀→ (164) - (164) —{→}—(a)—[CaptureNode, CaptureNode, CaptureNode]→ (172) - (166) —𝜀—[StartObject, StartObject]→ (163) - (169) —{→}—𝜀→ (170) - (170) —{→}—(b)—[CaptureNode, CaptureNode]→ (177) - (172) —𝜀—[Field(a), EndObject, Field(inner1), StartObject]→ (169) - (177) —𝜀—[Field(b), EndObject, Field(inner2), EndObject, Field(outer)]→ (✓) - (178) —(identifier)—[CaptureNode]→ (179) - (179) —𝜀—[Field(id)]→ (✓) - (180) ——𝜀→ (178), (181) - (181) —𝜀—→ (✓) - (182) ——𝜀→ (178), (183) - (183) —𝜀——[CaptureNode]→ (184) - (184) —𝜀—[Field(captured_id)]→ (✓) - (185) ——𝜀→ (180), (186) - (186) —𝜀—→ (✓) - (187) —𝜀→ (189), (191) - (188) —{↗¹}—𝜀→ (✓) - (189) —(single)—[CaptureNode]→ (190) - (190) —𝜀—[Field(item)]→ (188) - (191) —(multi)→ (193) - (192) —{↘}—(x)—[CaptureNode]→ (196) - (193) —𝜀—[StartArray]→ (192) - (196) —𝜀—[PushElement]→ (192), (197) - (197) —𝜀—[EndArray, Field(item)]→ (188) - (199) —(_)—[CaptureNode]→ (201) - (200) —{↘}—(item)—[CaptureNode]→ (203) - (201) —𝜀—[StartArray]→ (204) - (203) —𝜀—[PushElement]→ (204) - (204) —𝜀→ (200), (205) - (205) —𝜀—[EndArray, Field(inner)]→ (210) - (207) —𝜀—[StartArray]→ (199) - (210) —{↗¹}—𝜀—[PushElement]→ (199), (211) - (211) —𝜀—[EndArray, Field(outer)]→ (✓) - (212) —(module)→ (213) - (213) —{↘}—(identifier)@name—[CaptureNode, ToString]→ (216) - (215) —{→·}—(import)—[CaptureNode]→ (218) - (216) —𝜀—[Field(mod_name), StartArray]→ (219) - (218) —𝜀—[PushElement]→ (219) - (219) —𝜀→ (215), (220) - (220) —𝜀—[EndArray, Field(imports)]→ (221) - (221) —{→}—(block)@body→ (251) - (222) —{↘}—𝜀→ (223) - (223) —{→}—𝜀→ (226), (244) - (226) —(function)—[StartVariant(Func), StartObject, CaptureNode]→ (227) - (227) —{↘}—(identifier)@name—[CaptureNode, ToString]→ (228) - (228) —𝜀—[Field(fn_name)]→ (229) - (229) —{→}—(parameters)@params→ (233) - (230) —{↘}—𝜀→ (231) - (231) —{→}—(param)—[CaptureNode, CaptureNode]→ (235) - (233) —𝜀—[StartArray]→ (236) - (235) —𝜀—[Field(p), PushElement]→ (236) - (236) —𝜀→ (230), (237) - (237) —𝜀—[EndArray, Field(params)]→ (238) - (238) —{↗¹}—𝜀→ (239) - (239) —{→}—(block)@body—[CaptureNode]→ (240) - (240) —𝜀—[Field(fn_body)]→ (242) - (242) —{↗¹}—𝜀—[EndObject, EndVariant]→ (255) - (244) —(class)—[StartVariant(Class), StartObject, CaptureNode]→ (245) - (245) —{↘}—(identifier)@name—[CaptureNode, ToString]→ (246) - (246) —𝜀—[Field(cls_name)]→ (247) - (247) —{→}—(class_body)@body—[CaptureNode]→ (248) - (248) —𝜀—[Field(cls_body)]→ (250) - (250) —{↗¹}—𝜀—[EndObject, EndVariant]→ (255) - (251) —𝜀—[StartObject, StartArray]→ (256) - (253) —𝜀—[StartObject]→ (222) - (255) —𝜀—[EndObject, PushElement]→ (256) - (256) —𝜀→ (253), (259) - (259) —𝜀—[EndArray, EndObject, Field(items)]→ (260) - (260) —{↗¹}—𝜀→ (261) - (261) —{↗·¹}—𝜀→ (✓) - (262) —(🞵)—[CaptureNode]→ (263) - (263) —𝜀—[Field(any)]→ (✓) - (264) —"+"—[CaptureNode]→ (265) - (265) —𝜀—[Field(op)]→ (✓) - (266) —(identifier)→ (✓) - (267) —𝜀→ (270), (274) - (268) —𝜀→ (✓) - (270) —(value)—[StartVariant(Some), CaptureNode]→ (272) - (272) —𝜀—[Field(val), EndVariant]→ (268) - (274) —(none_marker)—[StartVariant(None)]→ (275) - (275) —𝜀—[EndVariant]→ (268) + (022) —𝜀—[Field(right)]→ (025) + (025) —{↗¹}—𝜀—[EndObject]→ (✓) + (026) —(a)→ (027) + (027) —{↘}—(b)→ (028) + (028) —{↘}—(c)→ (029) + (029) —{↘}—(d)—[CaptureNode]→ (032) + (032) —{↗³}—𝜀→ (✓) + (033) —(container)→ (038) + (034) —{↘}—(item)—[CaptureNode]→ (040) + (036) —𝜀—[EndArray]→ (041) + (038) —𝜀—[StartArray]→ (034), (036) + (039) —{→}—(item)—[CaptureNode]→ (040) + (040) —𝜀—[PushElement]→ (039), (036) + (041) —{↗¹}—𝜀→ (✓) + (042) —(container)→ (044) + (043) —{↘}—(item)—[CaptureNode]→ (049) + (044) —𝜀—[StartArray]→ (043) + (045) —𝜀—[EndArray]→ (050) + (047) —𝜀→ (✓) + (048) —{→}—(item)—[CaptureNode]→ (049) + (049) —𝜀—[PushElement]→ (048), (045) + (050) —{↗¹}—𝜀→ (✓) + (051) —(container)→ (053) + (052) —(item)—[CaptureNode]→ (056) + (053) —𝜀→ (052), (055) + (055) —𝜀—[ClearCurrent]→ (056) + (056) —{↗¹}—𝜀→ (✓) + (057) —(function)—[StartObject]→ (058) + (058) —{↘}—(identifier)@name—[CaptureNode]→ (059) + (059) —𝜀—[Field(name)]→ (060) + (060) —{→}—(block)@body—[CaptureNode]→ (061) + (061) —𝜀—[Field(body)]→ (066) + (066) —{↗¹}—𝜀—[EndObject]→ (072) + (068) —𝜀—[StartObject, StartArray]→ (057), (074) + (069) —{→}—(function)→ (058), (071) + (070) —𝜀—[StartObject]→ (069) + (071) —𝜀—[EndObject]→ (072) + (072) —𝜀—[PushElement]→ (070), (074) + (074) —𝜀—[EndArray, EndObject]→ (✓) + (075) —𝜀—[StartObject]→ (076) + (076) —{→}—(key)—[CaptureNode]→ (077) + (077) —𝜀—[Field(key)]→ (078) + (078) —{→}—(value)—[CaptureNode]→ (083) + (083) —𝜀—[Field(value), EndObject]→ (089) + (085) —𝜀—[StartObject, StartArray]→ (075), (091) + (086) —{→}—𝜀→ (076), (088) + (087) —𝜀—[StartObject]→ (086) + (088) —𝜀—[EndObject]→ (089) + (089) —𝜀—[PushElement]→ (087), (091) + (091) —𝜀—[EndArray, EndObject]→ (✓) + (093) —{→}—(item)—[CaptureNode]→ (099) + (095) —𝜀—[EndArray]→ (✓) + (097) —𝜀—[StartArray]→ (093), (095) + (098) —{→}—𝜀→ (093), (099) + (099) —𝜀—[PushElement]→ (098), (095) + (100) —𝜀—[StartObject]→ (103), (107) + (103) —(success)—[StartVariant(Ok), CaptureNode]→ (105) + (105) —𝜀—[Field(val), EndVariant]→ (111) + (107) —(error)—[StartVariant(Err), CaptureNode, ToString]→ (109) + (109) —𝜀—[Field(msg), EndVariant]→ (111) + (111) —𝜀—[EndObject]→ (✓) + (112) —(wrapper)→ (123) + (113) —{↘}—𝜀→ (116), (120) + (116) —(left_node)—[StartVariant(Left), CaptureNode]→ (118) + (118) —𝜀—[Field(l), EndVariant]→ (124) + (120) —(right_node)—[StartVariant(Right), CaptureNode]→ (122) + (122) —𝜀—[Field(r), EndVariant]→ (124) + (123) —𝜀—[StartObject]→ (113) + (124) —𝜀—[EndObject]→ (125) + (125) —{↗¹}—𝜀→ (✓) + (126) —𝜀—[StartObject]→ (129), (133) + (129) —(node)—[StartVariant(Simple), CaptureNode]→ (131) + (131) —𝜀—[Field(val), EndVariant]→ (141) + (133) —(pair)—[StartVariant(Complex), StartObject]→ (134) + (134) —{↘}—(key)—[CaptureNode]→ (135) + (135) —𝜀—[Field(k)]→ (136) + (136) —{→}—(value)—[CaptureNode]→ (137) + (137) —𝜀—[Field(v)]→ (139) + (139) —{↗¹}—𝜀—[EndObject, EndVariant]→ (141) + (141) —𝜀—[EndObject]→ (✓) + (142) —𝜀—[StartObject]→ (144), (146) + (144) —(a)—[CaptureNode]→ (145) + (145) —𝜀—[Field(val)]→ (149) + (146) —(b)—[CaptureNode]→ (147) + (147) —𝜀—[Field(val)]→ (149) + (149) —𝜀—[EndObject]→ (✓) + (150) —𝜀—[StartObject]→ (152), (154) + (152) —(a)—[CaptureNode]→ (153) + (153) —𝜀—[Field(x)]→ (157) + (154) —(b)—[CaptureNode]→ (155) + (155) —𝜀—[Field(y)]→ (157) + (157) —𝜀—[EndObject]→ (✓) + (158) —𝜀—[StartObject]→ (160), (162) + (160) —(a)—[CaptureNode]→ (161) + (161) —𝜀—[Field(x)]→ (165) + (162) —(b)—[CaptureNode]→ (163) + (163) —𝜀—[Field(y)]→ (165) + (165) —𝜀—[EndObject]→ (✓) + (166) —(outer)→ (172) + (167) —{↘}—𝜀→ (168) + (168) —{→}—(inner)—[CaptureNode]→ (169) + (169) —𝜀—[Field(x)]→ (170) + (170) —{→}—(inner2)—[CaptureNode]→ (173) + (172) —𝜀—[StartObject]→ (167) + (173) —𝜀—[Field(y), EndObject]→ (174) + (174) —{↗¹}—𝜀→ (✓) + (175) —(outer)—[StartObject]→ (176) + (176) —{↘}—𝜀→ (177) + (177) —{→}—(inner)—[CaptureNode]→ (178) + (178) —𝜀—[Field(x)]→ (179) + (179) —{→}—(inner2)—[CaptureNode]→ (180) + (180) —𝜀—[Field(y)]→ (183) + (183) —{↗¹}—𝜀—[EndObject]→ (✓) + (185) —{→}—𝜀→ (186) + (186) —{→}—(a)—[CaptureNode]→ (194) + (188) —𝜀—[StartObject, StartObject]→ (185) + (191) —{→}—𝜀→ (192) + (192) —{→}—(b)—[CaptureNode]→ (198) + (194) —𝜀—[Field(a), EndObject, Field(inner1), StartObject]→ (191) + (198) —𝜀—[Field(b), EndObject, Field(inner2), EndObject]→ (✓) + (199) —(identifier)—[CaptureNode]→ (✓) + (200) ——𝜀→ (199), (201) + (201) —𝜀—→ (✓) + (202) ——𝜀→ (199), (203) + (203) —𝜀——[CaptureNode]→ (✓) + (204) ——𝜀→ (200), (205) + (205) —𝜀—→ (✓) + (206) —𝜀—[StartObject]→ (208), (210) + (208) —(single)—[CaptureNode]→ (209) + (209) —𝜀—[Field(item)]→ (221) + (210) —(multi)→ (212) + (211) —{↘}—(x)—[CaptureNode]→ (217) + (212) —𝜀—[StartArray]→ (211) + (215) —𝜀→ (✓) + (216) —{→}—(x)—[CaptureNode]→ (217) + (217) —𝜀—[PushElement]→ (216), (218) + (218) —𝜀—[EndArray, Field(item)]→ (219) + (219) —{↗¹}—𝜀→ (221) + (221) —𝜀—[EndObject]→ (✓) + (222) —(_)—[StartArray, StartObject, CaptureNode]→ (227) + (223) —{↘}—(item)—[CaptureNode, CaptureNode]→ (229) + (227) —𝜀—[StartArray]→ (223), (230) + (228) —{→}—(item)—[CaptureNode, CaptureNode]→ (229) + (229) —𝜀—[PushElement]→ (228), (230) + (230) —𝜀—[EndArray, Field(inner)]→ (235) + (233) —𝜀—[EndArray]→ (✓) + (235) —{↗¹}—𝜀—[EndObject]→ (241) + (237) —𝜀→ (✓) + (238) —{→}—(_)—[CaptureNode]→ (227), (240) + (239) —𝜀—[StartObject]→ (238) + (240) —𝜀—[EndObject]→ (241) + (241) —𝜀—[PushElement]→ (239), (233) + (242) —(module)—[StartObject]→ (243) + (243) —{↘}—(identifier)@name—[CaptureNode, ToString]→ (249) + (245) —{→·}—(import)—[CaptureNode]→ (251) + (249) —𝜀—[Field(mod_name), StartArray]→ (245), (252) + (250) —{→}—(import)—[CaptureNode]→ (251) + (251) —𝜀—[PushElement]→ (250), (252) + (252) —𝜀—[EndArray, Field(imports)]→ (253) + (253) —{→}—(block)@body→ (294) + (254) —{↘}—𝜀→ (255) + (255) —{→}—𝜀→ (258), (282) + (258) —(function)—[StartVariant(Func), StartObject, CaptureNode]→ (259) + (259) —{↘}—(identifier)@name—[CaptureNode, ToString, CaptureNode]→ (260) + (260) —𝜀—[Field(fn_name)]→ (261) + (261) —{→}—(parameters)@params—[CaptureNode]→ (270) + (262) —{↘}—𝜀→ (263) + (263) —{→}—(param)—[CaptureNode, CaptureNode, CaptureNode]→ (268) + (267) —𝜀—[StartObject]→ (262) + (268) —𝜀—[Field(p), EndObject]→ (274) + (270) —𝜀—[StartArray]→ (267), (275) + (271) —{→}—𝜀→ (263), (273) + (272) —𝜀—[StartObject]→ (271) + (273) —𝜀—[EndObject]→ (274) + (274) —𝜀—[PushElement]→ (272), (275) + (275) —𝜀—[EndArray, Field(params)]→ (276) + (276) —{↗¹}—𝜀→ (277) + (277) —{→}—(block)@body—[CaptureNode, CaptureNode]→ (278) + (278) —𝜀—[Field(fn_body)]→ (280) + (280) —{↗¹}—𝜀—[EndObject, EndVariant]→ (292) + (282) —(class)—[StartVariant(Class), StartObject, CaptureNode]→ (283) + (283) —{↘}—(identifier)@name—[CaptureNode, ToString, CaptureNode]→ (284) + (284) —𝜀—[Field(cls_name)]→ (285) + (285) —{→}—(class_body)@body—[CaptureNode, CaptureNode]→ (286) + (286) —𝜀—[Field(cls_body)]→ (288) + (288) —{↗¹}—𝜀—[EndObject, EndVariant]→ (292) + (291) —𝜀—[StartObject]→ (254) + (292) —𝜀—[EndObject]→ (298) + (294) —𝜀—[StartArray]→ (291), (299) + (295) —{→}—𝜀→ (255), (297) + (296) —𝜀—[StartObject]→ (295) + (297) —𝜀—[EndObject]→ (298) + (298) —𝜀—[PushElement]→ (296), (299) + (299) —𝜀—[EndArray, Field(items)]→ (300) + (300) —{↗¹}—𝜀→ (302) + (302) —𝜀→ (305), (303) + (303) —{→}—(block)@body→ (302) + (305) —{↗·¹}—𝜀—[EndObject]→ (✓) + (306) —(🞵)—[CaptureNode]→ (✓) + (307) —"+"—[CaptureNode]→ (✓) + (308) —(identifier)→ (✓) + (309) —𝜀→ (312), (315) + (310) —𝜀→ (✓) + (312) —(value)—[StartVariant(Some), CaptureNode]→ (313) + (313) —𝜀—[EndVariant]→ (310) + (315) —(none_marker)—[StartVariant(None)]→ (316) + (316) —𝜀—[EndVariant]→ (310) ═══════════════════════════════════════════════════════════════════════════════ TYPE INFERENCE ═══════════════════════════════════════════════════════════════════════════════ + Identifier = Node RefSimple = () + WildcardCapture = Node + UntaggedSymmetric = Node + UntaggedCaptured = UntaggedCapturedScope3 + TaggedCaptured = TaggedCapturedScope13 + StringLiteral = Node + StringCapture = str + StarQuant = [Node] + SimpleCapture = Node RefChain = () - QisSequence = T26 - QisNode = T28 + RefCaptured = Node + QisSequence = T16 + QisNode = T18 + PlusQuant = [Node]⁺ + OptQuant = Node? + NoQis = [Node] NoCaptures = () - - Identifier = { id: Node } - WildcardCapture = { any: Node } - UntaggedSymmetric = { val: Node } - UntaggedCapturedScope6 = { + NestedScopes = NestedScopesScope24 + NestedQuant = T27 + DeepNest = Node + CardinalityJoin = [Node]⁺ + CapturedSeq = CapturedSeqScope42 + AnchorLast = Node + AnchorFirst = Node + + UntaggedCapturedScope3 = { x: Node? y: Node? } - UntaggedCaptured = { data: UntaggedCapturedScope6 } UntaggedAsymmetric = { x: Node? y: Node? @@ -521,48 +553,36 @@ fn golden_master_comprehensive() { Ok => Node Err => str } - TaggedMultiScope15 = { + TaggedMultiScope11 = { k: Node v: Node } TaggedMulti = { Simple => Node - Complex => TaggedMultiScope15 + Complex => TaggedMultiScope11 } - TaggedCapturedScope17 = { + TaggedCapturedScope13 = { Left => Node Right => Node } - TaggedCaptured = { choice: TaggedCapturedScope17 } - StringLiteral = { op: Node } - StringCapture = { name: str } - StarQuant = { items: [Node] } - SimpleCapture = { name: Node } - RefCaptured = { captured_id: Identifier } - QisSequenceScope25 = { + QisSequenceScope15 = { key: Node value: Node } - T26 = [QisSequenceScope25] - QisNodeScope27 = { + T16 = [QisSequenceScope15] + QisNodeScope17 = { name: Node body: Node } - T28 = [QisNodeScope27] - PlusQuant = { items: [Node]⁺ } - OptQuant = { maybe_item: Node? } - NoQis = { item: [Node] } - NestedScopesScope35 = { a: Node } - NestedScopesScope36 = { b: Node } - NestedScopesScope37 = { - inner1: NestedScopesScope35 - inner2: NestedScopesScope36 - } - NestedScopes = { outer: NestedScopesScope37 } - NestedQuant = { - inner: [Node] - outer: [Node]⁺ + T18 = [QisNodeScope17] + NestedScopesScope22 = { a: Node } + NestedScopesScope23 = { b: Node } + NestedScopesScope24 = { + inner1: NestedScopesScope22 + inner2: NestedScopesScope23 } + NestedQuantScope25 = { inner: [Node] } + T27 = [NestedQuantScope25]⁺ MultiCapture = { fn_name: str fn_body: Node @@ -571,33 +591,30 @@ fn golden_master_comprehensive() { Some => Node None => () } - DeepNest = { deep: Node } - ComplexScope45 = { + ComplexScope30 = { p: Node } + T31 = [ComplexScope30] + T33 = T31? + ComplexScope32 = { fn_name: str? - p: [Node] - params: [Node] + params: T33 fn_body: Node? cls_name: str? cls_body: Node? } - T52 = [ComplexScope45] + T38 = [ComplexScope32] Complex = { mod_name: str imports: [Node] - items: T52 + items: T38 } - CardinalityJoin = { item: [Node]⁺ } - CapturedSeqScope57 = { + CapturedSeqScope42 = { x: Node y: Node } - CapturedSeq = { nested: CapturedSeqScope57 } AnchorSibling = { left: Node right: Node } - AnchorLast = { last: Node } - AnchorFirst = { first: Node } "#); } @@ -639,70 +656,71 @@ fn golden_navigation_patterns() { ═══════════════════════════════════════════════════════════════════════════════ NavStay = (00) - NavDown = (02) - NavDownAnchor = (06) - NavNext = (10) - NavNextAnchor = (16) - NavUp = (22) + NavDown = (01) + NavDownAnchor = (04) + NavNext = (07) + NavNextAnchor = (15) + NavUp = (23) NavUpAnchor = (28) - NavUpMulti = (32) + NavUpMulti = (33) NavMixed = (42) - (00) —(root)—[CaptureNode]→ (01) - (01) —𝜀—[Field(r)]→ (✓) - (02) —(parent)→ (03) - (03) —{↘}—(child)—[CaptureNode]→ (04) - (04) —𝜀—[Field(c)]→ (05) - (05) —{↗¹}—𝜀→ (✓) - (06) —(parent)→ (07) - (07) —{↘.}—(child)—[CaptureNode]→ (08) - (08) —𝜀—[Field(c)]→ (09) - (09) —{↗¹}—𝜀→ (✓) - (10) —(parent)→ (11) - (11) —{↘}—(a)—[CaptureNode]→ (12) - (12) —𝜀—[Field(a)]→ (13) - (13) —{→}—(b)—[CaptureNode]→ (14) - (14) —𝜀—[Field(b)]→ (15) - (15) —{↗¹}—𝜀→ (✓) - (16) —(parent)→ (17) - (17) —{↘}—(a)—[CaptureNode]→ (18) - (18) —𝜀—[Field(a)]→ (19) - (19) —{→·}—(b)—[CaptureNode]→ (20) - (20) —𝜀—[Field(b)]→ (21) - (21) —{↗¹}—𝜀→ (✓) - (22) —(a)→ (23) - (23) —{↘}—(b)→ (24) - (24) —{↘}—(c)—[CaptureNode]→ (25) - (25) —𝜀—[Field(c)]→ (27) + (00) —(root)—[CaptureNode]→ (✓) + (01) —(parent)→ (02) + (02) —{↘}—(child)—[CaptureNode]→ (03) + (03) —{↗¹}—𝜀→ (✓) + (04) —(parent)→ (05) + (05) —{↘.}—(child)—[CaptureNode]→ (06) + (06) —{↗¹}—𝜀→ (✓) + (07) —(parent)—[StartObject]→ (08) + (08) —{↘}—(a)—[CaptureNode]→ (09) + (09) —𝜀—[Field(a)]→ (10) + (10) —{→}—(b)—[CaptureNode]→ (11) + (11) —𝜀—[Field(b)]→ (14) + (14) —{↗¹}—𝜀—[EndObject]→ (✓) + (15) —(parent)—[StartObject]→ (16) + (16) —{↘}—(a)—[CaptureNode]→ (17) + (17) —𝜀—[Field(a)]→ (18) + (18) —{→·}—(b)—[CaptureNode]→ (19) + (19) —𝜀—[Field(b)]→ (22) + (22) —{↗¹}—𝜀—[EndObject]→ (✓) + (23) —(a)→ (24) + (24) —{↘}—(b)→ (25) + (25) —{↘}—(c)—[CaptureNode]→ (27) (27) —{↗²}—𝜀→ (✓) (28) —(parent)→ (29) - (29) —{↘}—(child)—[CaptureNode]→ (30) - (30) —𝜀—[Field(c)]→ (31) - (31) —{↗·¹}—𝜀→ (✓) - (32) —(a)→ (33) - (33) —{↘}—(b)→ (34) - (34) —{↘}—(c)→ (35) - (35) —{↘}—(d)→ (36) - (36) —{↘}—(e)—[CaptureNode]→ (37) - (37) —𝜀—[Field(e)]→ (41) + (29) —{↘}—(child)—[CaptureNode]→ (31) + (30) —{↗·¹}—𝜀→ (✓) + (31) —𝜀→ (30), (32) + (32) —{→}—(child)—[CaptureNode]→ (31) + (33) —(a)→ (34) + (34) —{↘}—(b)→ (35) + (35) —{↘}—(c)→ (36) + (36) —{↘}—(d)→ (37) + (37) —{↘}—(e)—[CaptureNode]→ (41) (41) —{↗⁴}—𝜀→ (✓) - (42) —(outer)→ (43) + (42) —(outer)—[StartObject]→ (43) (43) —{↘.}—(first)—[CaptureNode]→ (44) (44) —𝜀—[Field(f)]→ (45) (45) —{→}—(middle)—[CaptureNode]→ (46) (46) —𝜀—[Field(m)]→ (47) (47) —{→·}—(last)—[CaptureNode]→ (48) - (48) —𝜀—[Field(l)]→ (49) - (49) —{↗·¹}—𝜀→ (✓) + (48) —𝜀—[Field(l)]→ (50) + (50) —𝜀→ (53), (51) + (51) —{→}—(last)—[CaptureNode]→ (50) + (53) —{↗·¹}—𝜀—[EndObject]→ (✓) ═══════════════════════════════════════════════════════════════════════════════ TYPE INFERENCE ═══════════════════════════════════════════════════════════════════════════════ - NavUpMulti = { e: Node } - NavUpAnchor = { c: Node } - NavUp = { c: Node } - NavStay = { r: Node } + NavUpMulti = Node + NavUpAnchor = Node + NavUp = Node + NavStay = Node + NavDownAnchor = Node + NavDown = Node + NavNextAnchor = { a: Node b: Node @@ -716,8 +734,6 @@ fn golden_navigation_patterns() { m: Node l: Node } - NavDownAnchor = { c: Node } - NavDown = { c: Node } "); } @@ -760,95 +776,103 @@ fn golden_type_inference() { TRANSITION GRAPH ═══════════════════════════════════════════════════════════════════════════════ - FlatScope = (00) - BaseWithCapture = (08) - RefOpaque = (10) - RefCaptured = (12) - TaggedAtRoot = (15) - TaggedInline = (25) - CardMult = (45) - QisTwo = (54) - NoQisOne = (63) - MissingField = (67) - SyntheticNames = (85) - - (00) —(a)→ (01) - (01) —{↘}—(b)→ (02) - (02) —{↘}—(c)→ (03) - (03) —{↘}—(d)—[CaptureNode]→ (04) - (04) —𝜀—[Field(val)]→ (07) - (07) —{↗³}—𝜀→ (✓) - (08) —(identifier)—[CaptureNode]→ (09) - (09) —𝜀—[Field(name)]→ (✓) - (10) ——𝜀→ (08), (11) - (11) —𝜀—→ (✓) - (12) ——𝜀→ (08), (13) - (13) —𝜀——[CaptureNode]→ (14) - (14) —𝜀—[Field(result)]→ (✓) - (15) —𝜀→ (18), (22) - (16) —𝜀→ (✓) - (18) —(a)—[StartVariant(A), CaptureNode]→ (20) - (20) —𝜀—[Field(x), EndVariant]→ (16) - (22) —(b)—[StartVariant(B), CaptureNode]→ (24) - (24) —𝜀—[Field(y), EndVariant]→ (16) - (25) —(wrapper)→ (26) - (26) —{↘}—𝜀→ (29), (33) - (29) —(a)—[StartVariant(A), CaptureNode]→ (31) - (31) —𝜀—[Field(x), EndVariant]→ (36) - (33) —(b)—[StartVariant(B), CaptureNode]→ (35) - (35) —𝜀—[Field(y), EndVariant]→ (36) - (36) —{↗¹}—𝜀→ (✓) - (37) —(_)→ (39) - (38) —{↘}—(item)—[CaptureNode]→ (42) - (39) —𝜀—[StartArray]→ (38) - (42) —𝜀—[PushElement]→ (38), (43) - (43) —𝜀—[EndArray, Field(items)]→ (47) - (45) —𝜀—[StartArray]→ (48) - (46) —𝜀—[EndArray]→ (✓) - (47) —{↗¹}—𝜀—[PushElement]→ (48) - (48) —𝜀→ (37), (46) - (49) —𝜀—[StartObject]→ (50) - (50) —{→}—(a)—[CaptureNode]→ (51) - (51) —𝜀—[Field(x)]→ (52) - (52) —{→}—(b)—[CaptureNode]→ (58) - (54) —𝜀—[StartArray]→ (59) - (55) —𝜀—[EndArray]→ (✓) - (58) —𝜀—[Field(y), EndObject, PushElement]→ (59) - (59) —𝜀→ (49), (55) - (61) —{→}—(a)—[CaptureNode]→ (65) - (63) —𝜀—[StartArray]→ (66) - (64) —𝜀—[EndArray]→ (✓) - (65) —𝜀—[Field(x), PushElement]→ (66) - (66) —𝜀→ (61), (64) - (67) —𝜀→ (70), (80) - (68) —𝜀→ (✓) - (70) —(full)—[StartVariant(Full), StartObject]→ (71) - (71) —{↘}—(a)—[CaptureNode]→ (72) - (72) —𝜀—[Field(a)]→ (73) - (73) —{→}—(b)—[CaptureNode]→ (74) - (74) —𝜀—[Field(b)]→ (75) - (75) —{→}—(c)—[CaptureNode]→ (76) - (76) —𝜀—[Field(c)]→ (78) - (78) —{↗¹}—𝜀—[EndObject, EndVariant]→ (68) - (80) —(partial)—[StartVariant(Partial)]→ (81) - (81) —{↘}—(a)—[CaptureNode]→ (82) - (82) —𝜀—[Field(a)]→ (84) - (84) —{↗¹}—𝜀—[EndVariant]→ (68) - (85) —(foo)→ (89) - (86) —{↘}—𝜀→ (87) - (87) —{→}—(bar)—[CaptureNode, CaptureNode]→ (91) - (89) —𝜀—[StartObject]→ (86) - (91) —𝜀—[Field(bar), EndObject, Field(baz)]→ (92) - (92) —{↗¹}—𝜀→ (✓) + FlatScope = (000) + BaseWithCapture = (007) + RefOpaque = (008) + RefCaptured = (010) + TaggedAtRoot = (012) + TaggedInline = (024) + CardMult = (050) + QisTwo = (063) + NoQisOne = (075) + MissingField = (078) + SyntheticNames = (098) + + (000) —(a)→ (001) + (001) —{↘}—(b)→ (002) + (002) —{↘}—(c)→ (003) + (003) —{↘}—(d)—[CaptureNode]→ (006) + (006) —{↗³}—𝜀→ (✓) + (007) —(identifier)—[CaptureNode]→ (✓) + (008) ——𝜀→ (007), (009) + (009) —𝜀—→ (✓) + (010) ——𝜀→ (007), (011) + (011) —𝜀——[CaptureNode]→ (✓) + (012) —𝜀—[StartObject]→ (015), (019) + (015) —(a)—[StartVariant(A), CaptureNode]→ (017) + (017) —𝜀—[Field(x), EndVariant]→ (023) + (019) —(b)—[StartVariant(B), CaptureNode]→ (021) + (021) —𝜀—[Field(y), EndVariant]→ (023) + (023) —𝜀—[EndObject]→ (✓) + (024) —(wrapper)—[StartObject]→ (025) + (025) —{↘}—𝜀→ (028), (032) + (028) —(a)—[StartVariant(A), CaptureNode]→ (030) + (030) —𝜀—[Field(x), EndVariant]→ (037) + (032) —(b)—[StartVariant(B), CaptureNode]→ (034) + (034) —𝜀—[Field(y), EndVariant]→ (037) + (037) —{↗¹}—𝜀—[EndObject]→ (✓) + (038) —(_)→ (040) + (039) —{↘}—(item)—[CaptureNode]→ (045) + (040) —𝜀—[StartArray]→ (039) + (041) —𝜀—[EndArray]→ (046) + (043) —𝜀→ (✓) + (044) —{→}—(item)—[CaptureNode]→ (045) + (045) —𝜀—[PushElement]→ (044), (041) + (046) —{↗¹}—𝜀→ (052) + (048) —𝜀—[EndArray]→ (✓) + (050) —𝜀—[StartArray]→ (038), (048) + (051) —{→}—(_)→ (040), (052) + (052) —𝜀—[PushElement]→ (051), (048) + (053) —𝜀—[StartObject]→ (054) + (054) —{→}—(a)—[CaptureNode]→ (055) + (055) —𝜀—[Field(x)]→ (056) + (056) —{→}—(b)—[CaptureNode]→ (061) + (061) —𝜀—[Field(y), EndObject]→ (067) + (063) —𝜀—[StartObject, StartArray]→ (053), (069) + (064) —{→}—𝜀→ (054), (066) + (065) —𝜀—[StartObject]→ (064) + (066) —𝜀—[EndObject]→ (067) + (067) —𝜀—[PushElement]→ (065), (069) + (069) —𝜀—[EndArray, EndObject]→ (✓) + (071) —{→}—(a)—[CaptureNode]→ (077) + (073) —𝜀—[EndArray]→ (✓) + (075) —𝜀—[StartArray]→ (071), (073) + (076) —{→}—𝜀→ (071), (077) + (077) —𝜀—[PushElement]→ (076), (073) + (078) —𝜀—[StartObject]→ (081), (091) + (081) —(full)—[StartVariant(Full), StartObject]→ (082) + (082) —{↘}—(a)—[CaptureNode]→ (083) + (083) —𝜀—[Field(a)]→ (084) + (084) —{→}—(b)—[CaptureNode]→ (085) + (085) —𝜀—[Field(b)]→ (086) + (086) —{→}—(c)—[CaptureNode]→ (087) + (087) —𝜀—[Field(c)]→ (089) + (089) —{↗¹}—𝜀—[EndObject, EndVariant]→ (097) + (091) —(partial)—[StartVariant(Partial)]→ (092) + (092) —{↘}—(a)—[CaptureNode]→ (093) + (093) —𝜀—[Field(a)]→ (095) + (095) —{↗¹}—𝜀—[EndVariant]→ (097) + (097) —𝜀—[EndObject]→ (✓) + (098) —(foo)→ (102) + (099) —{↘}—𝜀→ (100) + (100) —{→}—(bar)—[CaptureNode]→ (103) + (102) —𝜀—[StartObject]→ (099) + (103) —𝜀—[Field(bar), EndObject]→ (104) + (104) —{↗¹}—𝜀→ (✓) ═══════════════════════════════════════════════════════════════════════════════ TYPE INFERENCE ═══════════════════════════════════════════════════════════════════════════════ + BaseWithCapture = Node + SyntheticNames = SyntheticNamesScope7 RefOpaque = () - QisTwo = T12 + RefCaptured = Node + QisTwo = T09 + NoQisOne = [Node] + FlatScope = Node + CardMult = [Node] - BaseWithCapture = { name: Node } TaggedInline = { x: Node? y: Node? @@ -857,26 +881,21 @@ fn golden_type_inference() { A => Node B => Node } - SyntheticNamesScope8 = { bar: Node } - SyntheticNames = { baz: SyntheticNamesScope8 } - RefCaptured = { result: BaseWithCapture } - QisTwoScope11 = { + SyntheticNamesScope7 = { bar: Node } + QisTwoScope8 = { x: Node y: Node } - T12 = [QisTwoScope11] - NoQisOne = { x: [Node] } - MissingFieldScope15 = { + T09 = [QisTwoScope8] + MissingFieldScope11 = { a: Node b: Node c: Node } MissingField = { - Full => MissingFieldScope15 + Full => MissingFieldScope11 Partial => Node } - FlatScope = { val: Node } - CardMult = { items: [Node] } "); } @@ -909,59 +928,57 @@ fn golden_effect_patterns() { ═══════════════════════════════════════════════════════════════════════════════ EffCapture = (00) - EffToString = (02) - EffArray = (04) - EffObject = (12) - EffVariant = (20) - EffClear = (33) - - (00) —(node)—[CaptureNode]→ (01) - (01) —𝜀—[Field(name)]→ (✓) - (02) —(node)—[CaptureNode, ToString]→ (03) - (03) —𝜀—[Field(name)]→ (✓) - (04) —(container)→ (06) - (05) —{↘}—(item)—[CaptureNode]→ (08) - (06) —𝜀—[StartArray]→ (09) - (08) —𝜀—[PushElement]→ (09) - (09) —𝜀→ (05), (10) - (10) —𝜀—[EndArray, Field(items)]→ (11) - (11) —{↗¹}—𝜀→ (✓) - (12) —𝜀—[StartObject]→ (13) - (13) —{→}—(a)—[CaptureNode, CaptureNode]→ (14) - (14) —𝜀—[Field(x)]→ (15) - (15) —{→}—(b)—[CaptureNode]→ (19) - (19) —𝜀—[Field(y), EndObject, Field(obj)]→ (✓) - (20) —𝜀—[StartObject]→ (23), (27) - (23) —(a)—[StartVariant(A), CaptureNode, CaptureNode]→ (25) - (25) —𝜀—[Field(x), EndVariant]→ (32) - (27) —(b)—[StartVariant(B), CaptureNode, CaptureNode]→ (29) - (29) —𝜀—[Field(y), EndVariant]→ (32) - (32) —𝜀—[EndObject, Field(choice)]→ (✓) - (33) —(container)→ (35) - (34) —{↘}—(item)—[CaptureNode]→ (38) - (35) —𝜀→ (34), (37) - (37) —𝜀—[ClearCurrent]→ (38) - (38) —𝜀—[Field(maybe)]→ (39) - (39) —{↗¹}—𝜀→ (✓) + EffToString = (01) + EffArray = (02) + EffObject = (11) + EffVariant = (18) + EffClear = (30) + + (00) —(node)—[CaptureNode]→ (✓) + (01) —(node)—[CaptureNode, ToString]→ (✓) + (02) —(container)→ (07) + (03) —{↘}—(item)—[CaptureNode]→ (09) + (05) —𝜀—[EndArray]→ (10) + (07) —𝜀—[StartArray]→ (03), (05) + (08) —{→}—(item)—[CaptureNode]→ (09) + (09) —𝜀—[PushElement]→ (08), (05) + (10) —{↗¹}—𝜀→ (✓) + (11) —𝜀—[StartObject]→ (12) + (12) —{→}—(a)—[CaptureNode]→ (13) + (13) —𝜀—[Field(x)]→ (14) + (14) —{→}—(b)—[CaptureNode]→ (17) + (17) —𝜀—[Field(y), EndObject]→ (✓) + (18) —𝜀—[StartObject]→ (21), (25) + (21) —(a)—[StartVariant(A), CaptureNode]→ (23) + (23) —𝜀—[Field(x), EndVariant]→ (29) + (25) —(b)—[StartVariant(B), CaptureNode]→ (27) + (27) —𝜀—[Field(y), EndVariant]→ (29) + (29) —𝜀—[EndObject]→ (✓) + (30) —(container)→ (32) + (31) —(item)—[CaptureNode]→ (35) + (32) —𝜀→ (31), (34) + (34) —𝜀—[ClearCurrent]→ (35) + (35) —{↗¹}—𝜀→ (✓) ═══════════════════════════════════════════════════════════════════════════════ TYPE INFERENCE ═══════════════════════════════════════════════════════════════════════════════ + EffVariant = EffVariantScope3 + EffToString = str + EffObject = EffObjectScope4 + EffClear = Node? + EffCapture = Node + EffArray = [Node] + EffVariantScope3 = { A => Node B => Node } - EffVariant = { choice: EffVariantScope3 } - EffToString = { name: str } - EffObjectScope6 = { + EffObjectScope4 = { x: Node y: Node } - EffObject = { obj: EffObjectScope6 } - EffClear = { maybe: Node? } - EffCapture = { name: Node } - EffArray = { items: [Node] } "); } @@ -996,73 +1013,81 @@ fn golden_quantifier_graphs() { TRANSITION GRAPH ═══════════════════════════════════════════════════════════════════════════════ - GreedyStar = (01) + GreedyStar = (04) GreedyPlus = (07) - Optional = (13) - LazyStar = (18) - LazyPlus = (24) - QuantSeq = (34) - NestedQuant = (48) - - (00) —(a)—[CaptureNode]→ (03) - (01) —𝜀—[StartArray]→ (04) - (03) —𝜀—[PushElement]→ (04) - (04) —𝜀→ (00), (05) - (05) —𝜀—[EndArray, Field(items)]→ (✓) - (06) —(a)—[CaptureNode]→ (10) - (07) —𝜀—[StartArray]→ (06) - (10) —𝜀—[PushElement]→ (06), (11) - (11) —𝜀—[EndArray, Field(items)]→ (✓) - (12) —(a)—[CaptureNode]→ (16) - (13) —𝜀→ (12), (15) - (15) —𝜀—[ClearCurrent]→ (16) - (16) —𝜀—[Field(maybe)]→ (✓) - (17) —(a)—[CaptureNode]→ (20) - (18) —𝜀—[StartArray]→ (21) - (20) —𝜀—[PushElement]→ (21) - (21) —𝜀→ (22), (17) - (22) —𝜀—[EndArray, Field(items)]→ (✓) - (23) —(a)—[CaptureNode]→ (27) - (24) —𝜀—[StartArray]→ (23) - (27) —𝜀—[PushElement]→ (28), (23) - (28) —𝜀—[EndArray, Field(items)]→ (✓) - (29) —𝜀—[StartObject]→ (30) + Optional = (15) + LazyStar = (22) + LazyPlus = (25) + QuantSeq = (42) + NestedQuant = (49) + + (00) —(a)—[CaptureNode]→ (06) + (02) —𝜀—[EndArray]→ (✓) + (04) —𝜀—[StartArray]→ (00), (02) + (05) —{→}—(a)—[CaptureNode]→ (06) + (06) —𝜀—[PushElement]→ (05), (02) + (07) —(a)—[StartArray, CaptureNode]→ (13) + (09) —𝜀—[EndArray]→ (✓) + (11) —𝜀→ (✓) + (12) —{→}—(a)—[CaptureNode]→ (13) + (13) —𝜀—[PushElement]→ (12), (09) + (14) —(a)—[CaptureNode]→ (16) + (15) —𝜀→ (14), (17) + (16) —𝜀→ (✓) + (17) —𝜀—[ClearCurrent]→ (16) + (18) —(a)—[CaptureNode]→ (24) + (20) —𝜀—[EndArray]→ (✓) + (22) —𝜀—[StartArray]→ (20), (18) + (23) —{→}—(a)—[CaptureNode]→ (24) + (24) —𝜀—[PushElement]→ (20), (23) + (25) —(a)—[StartArray, CaptureNode]→ (31) + (27) —𝜀—[EndArray]→ (✓) + (29) —𝜀→ (✓) (30) —{→}—(a)—[CaptureNode]→ (31) - (31) —𝜀—[Field(x)]→ (32) - (32) —{→}—(b)—[CaptureNode]→ (38) - (34) —𝜀—[StartArray]→ (39) - (35) —𝜀—[EndArray]→ (✓) - (38) —𝜀—[Field(y), EndObject, PushElement]→ (39) - (39) —𝜀→ (29), (35) - (40) —(outer)—[CaptureNode]→ (42) - (41) —{↘}—(inner)—[CaptureNode]→ (44) - (42) —𝜀—[StartArray]→ (45) - (44) —𝜀—[PushElement]→ (45) - (45) —𝜀→ (41), (46) - (46) —𝜀—[EndArray, Field(inners)]→ (51) - (48) —𝜀—[StartArray]→ (40) - (51) —{↗¹}—𝜀—[PushElement]→ (40), (52) - (52) —𝜀—[EndArray, Field(outers)]→ (✓) + (31) —𝜀—[PushElement]→ (27), (30) + (32) —𝜀—[StartObject]→ (33) + (33) —{→}—(a)—[CaptureNode]→ (34) + (34) —𝜀—[Field(x)]→ (35) + (35) —{→}—(b)—[CaptureNode]→ (40) + (40) —𝜀—[Field(y), EndObject]→ (46) + (42) —𝜀—[StartObject, StartArray]→ (32), (48) + (43) —{→}—𝜀→ (33), (45) + (44) —𝜀—[StartObject]→ (43) + (45) —𝜀—[EndObject]→ (46) + (46) —𝜀—[PushElement]→ (44), (48) + (48) —𝜀—[EndArray, EndObject]→ (✓) + (49) —(outer)—[StartArray, StartObject, CaptureNode]→ (54) + (50) —{↘}—(inner)—[CaptureNode, CaptureNode]→ (56) + (54) —𝜀—[StartArray]→ (50), (57) + (55) —{→}—(inner)—[CaptureNode, CaptureNode]→ (56) + (56) —𝜀—[PushElement]→ (55), (57) + (57) —𝜀—[EndArray, Field(inners)]→ (62) + (60) —𝜀—[EndArray]→ (✓) + (62) —{↗¹}—𝜀—[EndObject]→ (68) + (64) —𝜀→ (✓) + (65) —{→}—(outer)—[CaptureNode]→ (54), (67) + (66) —𝜀—[StartObject]→ (65) + (67) —𝜀—[EndObject]→ (68) + (68) —𝜀—[PushElement]→ (66), (60) ═══════════════════════════════════════════════════════════════════════════════ TYPE INFERENCE ═══════════════════════════════════════════════════════════════════════════════ QuantSeq = T04 + Optional = Node? + NestedQuant = T08 + LazyStar = [Node] + LazyPlus = [Node]⁺ + GreedyStar = [Node] + GreedyPlus = [Node]⁺ QuantSeqScope3 = { x: Node y: Node } T04 = [QuantSeqScope3] - Optional = { maybe: Node? } - NestedQuant = { - inners: [Node] - outers: [Node]⁺ - } - LazyStar = { items: [Node] } - LazyPlus = { items: [Node]⁺ } - GreedyStar = { items: [Node] } - GreedyPlus = { items: [Node]⁺ } + NestedQuantScope6 = { inners: [Node] } + T08 = [NestedQuantScope6]⁺ "); } diff --git a/crates/plotnik-lib/src/query/graph_optimize.rs b/crates/plotnik-lib/src/query/graph_optimize.rs index 952df895..87976fc4 100644 --- a/crates/plotnik-lib/src/query/graph_optimize.rs +++ b/crates/plotnik-lib/src/query/graph_optimize.rs @@ -134,6 +134,15 @@ fn is_eliminable_epsilon( return false; } + // Don't eliminate if node has nav and successor is a join point. + // Different paths may need different navigation (e.g., first iteration vs loop re-entry). + if !node.nav.is_stay() { + let succ_pred_count = predecessors.get(&successor_id).map_or(0, |p| p.len()); + if succ_pred_count > 1 { + return false; + } + } + if !node.effects.is_empty() && successor.ref_marker.is_some() { return false; } diff --git a/crates/plotnik-lib/src/query/graph_qis.rs b/crates/plotnik-lib/src/query/graph_qis.rs index a3be746e..84fea8b9 100644 --- a/crates/plotnik-lib/src/query/graph_qis.rs +++ b/crates/plotnik-lib/src/query/graph_qis.rs @@ -1,7 +1,7 @@ -//! Quantifier-Induced Scope (QIS) detection. +//! Capture scope detection: QIS and single-capture definitions. //! -//! QIS triggers when a quantified expression has ≥2 propagating captures. -//! This creates an implicit object scope so captures stay coupled per-iteration. +//! - QIS triggers when a quantified expression has ≥2 propagating captures. +//! - Single-capture definitions unwrap to their capture's type directly. //! //! See ADR-0009 for full specification. @@ -10,13 +10,25 @@ use crate::parser::{ast, token_src}; use super::{QisTrigger, Query}; impl<'a> Query<'a> { - /// Detect Quantifier-Induced Scope triggers. + /// Detect capture scopes: QIS triggers and single-capture definitions. /// - /// QIS triggers when a quantified expression has ≥2 propagating captures - /// (captures not absorbed by inner scopes like `{...} @x` or `[A: ...] @x`). - pub(super) fn detect_qis(&mut self) { - let bodies: Vec<_> = self.symbol_table.values().cloned().collect(); - for body in &bodies { + /// - QIS triggers when quantified expression has ≥2 propagating captures + /// - Single-capture definitions unwrap (no Field effect, type is capture's type) + pub(super) fn detect_capture_scopes(&mut self) { + let entries: Vec<_> = self + .symbol_table + .iter() + .map(|(n, b)| (*n, b.clone())) + .collect(); + for (name, body) in &entries { + // Detect single-capture and multi-capture definitions + let captures = self.collect_propagating_captures(body); + if captures.len() == 1 { + self.single_capture_defs.insert(*name, captures[0]); + } else if captures.len() >= 2 { + self.multi_capture_defs.insert(*name); + } + // Detect QIS within this definition self.detect_qis_in_expr(body); } } @@ -34,8 +46,20 @@ impl<'a> Query<'a> { } ast::Expr::CapturedExpr(c) => { // Captures on sequences/alternations absorb inner captures, - // but we still recurse to find nested quantifiers + // but we still recurse to find nested quantifiers. + // Special case: captured quantifier with ≥1 nested capture needs QIS + // to wrap each iteration with StartObject/EndObject for proper field scoping. if let Some(inner) = c.inner() { + // Check if this capture wraps a quantifier with nested captures + if let ast::Expr::QuantifiedExpr(q) = &inner + && let Some(quant_inner) = q.inner() + { + let captures = self.collect_propagating_captures(&quant_inner); + // Trigger QIS if there's at least 1 capture (not already covered by ≥2 rule) + if !captures.is_empty() && !self.qis_triggers.contains_key(q) { + self.qis_triggers.insert(q.clone(), QisTrigger { captures }); + } + } self.detect_qis_in_expr(&inner); } } @@ -48,7 +72,7 @@ impl<'a> Query<'a> { } /// Collect captures that propagate out of an expression (not absorbed by inner scopes). - fn collect_propagating_captures(&self, expr: &ast::Expr) -> Vec<&'a str> { + pub(super) fn collect_propagating_captures(&self, expr: &ast::Expr) -> Vec<&'a str> { let mut captures = Vec::new(); self.collect_propagating_captures_impl(expr, &mut captures); captures @@ -62,9 +86,9 @@ impl<'a> Query<'a> { out.push(name); } // Captured sequence/alternation absorbs inner captures. - // Need to look through quantifiers to find the actual container. + // Captured quantifiers with nested captures also absorb (they become QIS). if let Some(inner) = c.inner() - && !Self::is_scope_container(&inner) + && !self.is_scope_container(&inner) { self.collect_propagating_captures_impl(&inner, out); } @@ -83,14 +107,26 @@ impl<'a> Query<'a> { } } - /// Check if an expression is a scope container (seq/alt), looking through quantifiers. - fn is_scope_container(expr: &ast::Expr) -> bool { + /// Check if an expression is a scope container that absorbs inner captures. + /// - Sequences and alternations always absorb + /// - Quantifiers absorb if they have nested captures (will become QIS) + fn is_scope_container(&self, expr: &ast::Expr) -> bool { match expr { ast::Expr::SeqExpr(_) | ast::Expr::AltExpr(_) => true, - ast::Expr::QuantifiedExpr(q) => q - .inner() - .map(|i| Self::is_scope_container(&i)) - .unwrap_or(false), + ast::Expr::QuantifiedExpr(q) => { + if let Some(inner) = q.inner() { + // Quantifier with nested captures acts as scope container + // (will be treated as QIS, wrapping each element in an object) + let nested_captures = self.collect_propagating_captures(&inner); + if !nested_captures.is_empty() { + return true; + } + // Otherwise check if inner is a scope container + self.is_scope_container(&inner) + } else { + false + } + } _ => false, } } @@ -104,4 +140,18 @@ impl<'a> Query<'a> { pub fn qis_trigger(&self, q: &ast::QuantifiedExpr) -> Option<&QisTrigger<'a>> { self.qis_triggers.get(q) } + + /// Check if this capture is the single propagating capture for its definition. + /// Only that specific capture should unwrap (skip Field effect). + pub fn is_single_capture(&self, def_name: &str, capture_name: &str) -> bool { + self.single_capture_defs + .get(def_name) + .map(|c| *c == capture_name) + .unwrap_or(false) + } + + /// Check if definition has 2+ propagating captures (needs struct wrapping). + pub fn is_multi_capture_def(&self, name: &str) -> bool { + self.multi_capture_defs.contains(name) + } } diff --git a/crates/plotnik-lib/src/query/graph_qis_tests.rs b/crates/plotnik-lib/src/query/graph_qis_tests.rs index cb3bb29c..d2b7ce8c 100644 --- a/crates/plotnik-lib/src/query/graph_qis_tests.rs +++ b/crates/plotnik-lib/src/query/graph_qis_tests.rs @@ -186,21 +186,23 @@ fn qis_graph_has_object_effects() { let source = "Foo = { (a) @x (b) @y }*"; let (_query, pre_opt) = Query::try_from(source) .unwrap() - .build_graph_with_pre_opt_dump(); + .build_graph_with_pre_opt_dump(None); // QIS adds StartObj/EndObj around each iteration to keep captures coupled. - // Sequences themselves don't add object scope (captures propagate to parent). + // Multi-capture definitions also get wrapped in StartObj/EndObj at root. + // The loop has separate wrappers for initial entry and re-entry paths. let start_count = pre_opt.matches("StartObj").count(); let end_count = pre_opt.matches("EndObj").count(); + // 1 from multi-capture def wrapper + 1 for initial loop entry + 1 for re-entry = 3 assert_eq!( - start_count, 1, - "QIS graph should have 1 StartObj (from QIS loop):\n{}", + start_count, 3, + "QIS graph should have 3 StartObj (def wrapper + initial loop + re-entry):\n{}", pre_opt ); assert_eq!( - end_count, 1, - "QIS graph should have 1 EndObj (from QIS loop):\n{}", + end_count, 3, + "QIS graph should have 3 EndObj (def wrapper + initial loop + re-entry):\n{}", pre_opt ); } @@ -211,7 +213,7 @@ fn non_qis_graph_no_object_effects() { let source = "Foo = { (a) @x }*"; let (_query, pre_opt) = Query::try_from(source) .unwrap() - .build_graph_with_pre_opt_dump(); + .build_graph_with_pre_opt_dump(None); // Non-QIS quantifiers don't need object scope - captures propagate with array cardinality. // Sequences themselves don't add object scope either. diff --git a/crates/plotnik-lib/src/query/infer.rs b/crates/plotnik-lib/src/query/infer.rs index 6793517d..df0882e8 100644 --- a/crates/plotnik-lib/src/query/infer.rs +++ b/crates/plotnik-lib/src/query/infer.rs @@ -80,10 +80,6 @@ pub struct InferredMember<'src> { pub ty: TypeId, } -// ───────────────────────────────────────────────────────────────────────────── -// Cardinality -// ───────────────────────────────────────────────────────────────────────────── - #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] enum Cardinality { #[default] @@ -128,10 +124,6 @@ impl Cardinality { } } -// ───────────────────────────────────────────────────────────────────────────── -// Type shape for unification checking -// ───────────────────────────────────────────────────────────────────────────── - #[derive(Debug, Clone, PartialEq, Eq)] enum TypeShape { Primitive(TypeId), @@ -147,10 +139,6 @@ impl TypeShape { } } -// ───────────────────────────────────────────────────────────────────────────── -// Field tracking within a scope -// ───────────────────────────────────────────────────────────────────────────── - #[derive(Debug, Clone)] struct FieldInfo { base_type: TypeId, @@ -245,10 +233,6 @@ struct MergeError<'src> { spans: Vec, } -// ───────────────────────────────────────────────────────────────────────────── -// Inference result from expression -// ───────────────────────────────────────────────────────────────────────────── - /// What an expression produces when evaluated. #[derive(Debug, Clone)] struct ExprResult { @@ -301,10 +285,6 @@ impl ExprResult { } } -// ───────────────────────────────────────────────────────────────────────────── -// Inference context -// ───────────────────────────────────────────────────────────────────────────── - struct InferenceContext<'src> { source: &'src str, qis_triggers: HashSet, @@ -337,10 +317,6 @@ impl<'src> InferenceContext<'src> { id } - // ───────────────────────────────────────────────────────────────────────── - // Definition inference - // ───────────────────────────────────────────────────────────────────────── - fn infer_definition(&mut self, def_name: &'src str, body: &Expr) -> TypeId { self.current_def_name = def_name; @@ -359,21 +335,28 @@ impl<'src> InferenceContext<'src> { self.report_merge_errors(&merge_errors); - // Build result type from scope - if !scope.fields.is_empty() { - self.create_struct_type(def_name, &scope) - } else if result.is_meaningful { - // QIS or other expressions that produce a meaningful type without populating scope - result.base_type - } else { - TYPE_VOID + // Build result type from scope (Payload Rule from ADR-0009) + match scope.fields.len() { + 0 => { + if result.is_meaningful { + // QIS or other expressions that produce a meaningful type without populating scope + result.base_type + } else { + TYPE_VOID + } + } + 1 => { + // Single capture at definition root: unwrap to capture's type + let (_, info) = scope.fields.iter().next().unwrap(); + self.wrap_with_cardinality(info.base_type, info.cardinality) + } + _ => { + // Multiple captures: create struct + self.create_struct_type(def_name, &scope) + } } } - // ───────────────────────────────────────────────────────────────────────── - // Expression inference - // ───────────────────────────────────────────────────────────────────────── - fn infer_expr( &mut self, expr: &Expr, @@ -447,6 +430,10 @@ impl<'src> InferenceContext<'src> { for child in s.children() { self.infer_expr(&child, &mut nested_scope, Cardinality::One, errors); } + // Per ADR-0009 Payload Rule: 0 captures → Void + if nested_scope.is_empty() { + return TYPE_VOID; + } let type_name = self.generate_scope_name(); self.create_struct_type(type_name, &nested_scope) } @@ -459,6 +446,10 @@ impl<'src> InferenceContext<'src> { // Captured untagged alternation → Struct with merged fields let mut nested_scope = ScopeInfo::default(); self.infer_untagged_alternation(a, &mut nested_scope, Cardinality::One, errors); + // Per ADR-0009 Payload Rule: 0 captures → Void + if nested_scope.is_empty() { + return TYPE_VOID; + } let type_name = self.generate_scope_name(); self.create_struct_type(type_name, &nested_scope) } @@ -639,10 +630,6 @@ impl<'src> InferenceContext<'src> { ExprResult::node() } - // ───────────────────────────────────────────────────────────────────────── - // Helpers - // ───────────────────────────────────────────────────────────────────────── - fn quantifier_cardinality(&self, q: &ast::QuantifiedExpr) -> Cardinality { let Some(op) = q.operator() else { return Cardinality::One; @@ -793,10 +780,6 @@ impl<'src> InferenceContext<'src> { } } -// ───────────────────────────────────────────────────────────────────────────── -// Query integration -// ───────────────────────────────────────────────────────────────────────────── - impl<'a> Query<'a> { /// Run type inference on the query AST. pub(super) fn infer_types(&mut self) { diff --git a/crates/plotnik-lib/src/query/infer_tests.rs b/crates/plotnik-lib/src/query/infer_tests.rs index 97bad87e..291e4707 100644 --- a/crates/plotnik-lib/src/query/infer_tests.rs +++ b/crates/plotnik-lib/src/query/infer_tests.rs @@ -27,7 +27,7 @@ fn debug_star_quantifier_graph() { // See graph BEFORE optimization (what type inference actually sees) let (query, pre_opt_dump) = Query::try_from("Foo = ((item) @items)*") .expect("parse should succeed") - .build_graph_with_pre_opt_dump(); + .build_graph_with_pre_opt_dump(None); let mut out = String::new(); out.push_str("(pre-optimization)\n"); out.push_str(&pre_opt_dump); @@ -37,29 +37,30 @@ fn debug_star_quantifier_graph() { out.push_str(&query.type_info().dump()); insta::assert_snapshot!(out, @r" (pre-optimization) - Foo = (4) + Foo = (3) (0) —(_)→ (1) (1) —{↘}—(item)—[CaptureNode]→ (2) - (2) —𝜀—[Field(items)]→ (3) - (3) —{↗¹}—𝜀→ (6) - (4) —𝜀—[StartArray]→ (7) - (5) —𝜀—[EndArray]→ (✓) - (6) —𝜀—[PushElement]→ (7) - (7) —𝜀→ (0), (5) + (2) —{↗¹}—𝜀→ (5) + (3) —𝜀—[StartArray]→ (6) + (4) —𝜀—[EndArray]→ (✓) + (5) —𝜀—[PushElement]→ (8) + (6) —𝜀→ (0), (4) + (7) —{→}—(_)→ (1), (5) + (8) —𝜀→ (7), (4) (post-optimization) - Foo = (4) + Foo = (6) (0) —(_)→ (1) (1) —{↘}—(item)—[CaptureNode]→ (2) - (2) —𝜀—[Field(items)]→ (6) - (4) —𝜀—[StartArray]→ (7) - (5) —𝜀—[EndArray]→ (✓) - (6) —{↗¹}—𝜀—[PushElement]→ (7) - (7) —𝜀→ (0), (5) + (2) —{↗¹}—𝜀→ (8) + (4) —𝜀—[EndArray]→ (✓) + (6) —𝜀—[StartArray]→ (0), (4) + (7) —{→}—(_)→ (1), (8) + (8) —𝜀—[PushElement]→ (7), (4) - Foo = { items: [Node] } + Foo = [Node] "); } @@ -69,10 +70,9 @@ fn debug_graph_structure() { insta::assert_snapshot!(result, @r" Foo = (0) - (0) —(identifier)—[CaptureNode]→ (1) - (1) —𝜀—[Field(name)]→ (✓) + (0) —(identifier)—[CaptureNode]→ (✓) - Foo = { name: Node } + Foo = Node "); } @@ -94,36 +94,32 @@ fn debug_incompatible_types_graph() { insta::assert_snapshot!(out, @r" Foo = (0) - (0) —𝜀→ (2), (4) - (1) —𝜀→ (✓) + (0) —𝜀—[StartObject]→ (2), (4) (2) —(a)—[CaptureNode]→ (3) - (3) —𝜀—[Field(v)]→ (1) + (3) —𝜀—[Field(v)]→ (7) (4) —(b)—[CaptureNode, ToString]→ (5) - (5) —𝜀—[Field(v)]→ (1) + (5) —𝜀—[Field(v)]→ (7) + (7) —𝜀—[EndObject]→ (✓) - (dead nodes: 0) + (dead nodes: 2) - Foo = { v: Node } + Foo = Node Errors: field `v` in `Foo`: incompatible types [Node, String] "); } -// ───────────────────────────────────────────────────────────────────────────── -// Basic captures -// ───────────────────────────────────────────────────────────────────────────── - #[test] fn single_node_capture() { let result = infer("Foo = (identifier) @name"); - insta::assert_snapshot!(result, @"Foo = { name: Node }"); + insta::assert_snapshot!(result, @"Foo = Node"); } #[test] fn string_capture() { let result = infer("Foo = (identifier) @name ::string"); - insta::assert_snapshot!(result, @"Foo = { name: str }"); + insta::assert_snapshot!(result, @"Foo = str"); } #[test] @@ -143,10 +139,6 @@ fn no_captures_void() { insta::assert_snapshot!(result, @"Foo = ()"); } -// ───────────────────────────────────────────────────────────────────────────── -// Captured sequences (composite types) -// ───────────────────────────────────────────────────────────────────────────── - #[test] fn captured_sequence_creates_struct() { let input = indoc! {r#" @@ -155,11 +147,12 @@ fn captured_sequence_creates_struct() { let result = infer(input); insta::assert_snapshot!(result, @r" + Foo = FooScope3 + FooScope3 = { x: Node y: Node } - Foo = { z: FooScope3 } "); } @@ -171,12 +164,13 @@ fn nested_captured_sequence() { let result = infer(input); insta::assert_snapshot!(result, @r" + Foo = FooScope4 + FooScope3 = { b: Node } FooScope4 = { a: Node nested: FooScope3 } - Foo = { root: FooScope4 } "); } @@ -195,10 +189,6 @@ fn sequence_without_capture_propagates() { "); } -// ───────────────────────────────────────────────────────────────────────────── -// Alternations -// ───────────────────────────────────────────────────────────────────────────── - #[test] fn untagged_alternation_symmetric() { let input = indoc! {r#" @@ -206,7 +196,7 @@ fn untagged_alternation_symmetric() { "#}; let result = infer(input); - insta::assert_snapshot!(result, @"Foo = { v: Node }"); + insta::assert_snapshot!(result, @"Foo = Node"); } #[test] @@ -247,11 +237,12 @@ fn tagged_alternation_captured_creates_enum() { let result = infer(input); insta::assert_snapshot!(result, @r" + Foo = FooScope3 + FooScope3 = { A => Node B => Node } - Foo = { choice: FooScope3 } "); } @@ -263,34 +254,31 @@ fn captured_untagged_alternation_creates_struct() { let result = infer(input); insta::assert_snapshot!(result, @r" + Foo = FooScope3 + FooScope3 = { x: Node? y: Node? } - Foo = { val: FooScope3 } "); } -// ───────────────────────────────────────────────────────────────────────────── -// Quantifiers -// ───────────────────────────────────────────────────────────────────────────── - #[test] fn star_quantifier() { let result = infer("Foo = ((item) @items)*"); - insta::assert_snapshot!(result, @"Foo = { items: [Node] }"); + insta::assert_snapshot!(result, @"Foo = [Node]"); } #[test] fn plus_quantifier() { let result = infer("Foo = ((item) @items)+"); - insta::assert_snapshot!(result, @"Foo = { items: [Node]⁺ }"); + insta::assert_snapshot!(result, @"Foo = [Node]⁺"); } #[test] fn optional_quantifier() { let result = infer("Foo = ((item) @maybe)?"); - insta::assert_snapshot!(result, @"Foo = { maybe: Node? }"); + insta::assert_snapshot!(result, @"Foo = Node?"); } #[test] @@ -312,10 +300,6 @@ fn quantifier_on_sequence() { "); } -// ───────────────────────────────────────────────────────────────────────────── -// QIS: Additional cases from ADR-0009 -// ───────────────────────────────────────────────────────────────────────────── - #[test] fn qis_single_capture_no_trigger() { // Single capture inside sequence - no QIS @@ -326,7 +310,7 @@ fn qis_single_capture_no_trigger() { "#}; let result = infer(input); - insta::assert_snapshot!(result, @"Single = { item: [Node] }"); + insta::assert_snapshot!(result, @"Single = [Node]"); } #[test] @@ -361,32 +345,32 @@ fn quantified_seq_with_inline_tagged_alt() { let result = infer_with_graph(input); insta::assert_snapshot!(result, @r" - Test = (11) + Test = (16) (00) —𝜀—[StartObject]→ (01) (01) —{→}—𝜀→ (04), (08) (04) —(a)—[StartVariant(A), CaptureNode, CaptureNode]→ (06) - (06) —𝜀—[Field(x), EndVariant]→ (15) + (06) —𝜀—[Field(x), EndVariant]→ (14) (08) —(b)—[StartVariant(B), CaptureNode, CaptureNode]→ (10) - (10) —𝜀—[Field(y), EndVariant]→ (15) - (11) —𝜀—[StartObject, StartArray]→ (16) - (15) —𝜀—[EndObject, PushElement]→ (16) - (16) —𝜀→ (00), (19) - (19) —𝜀—[EndArray, EndObject, Field(items)]→ (✓) + (10) —𝜀—[Field(y), EndVariant]→ (14) + (12) —𝜀—[EndArray]→ (✓) + (14) —𝜀—[EndObject]→ (20) + (16) —𝜀—[StartArray]→ (00), (12) + (17) —{→}—𝜀→ (01), (19) + (18) —𝜀—[StartObject]→ (17) + (19) —𝜀—[EndObject]→ (20) + (20) —𝜀—[PushElement]→ (18), (12) + + Test = T6 TestScope3 = { x: Node? y: Node? } T6 = [TestScope3] - Test = { items: T6 } "); } -// ───────────────────────────────────────────────────────────────────────────── -// Type compatibility -// ───────────────────────────────────────────────────────────────────────────── - #[test] fn compatible_types_in_alternation() { let input = indoc! {r#" @@ -407,24 +391,20 @@ fn incompatible_types_in_alternation() { insta::assert_snapshot!(result, @r" Foo = (0) - (0) —𝜀→ (2), (4) - (1) —𝜀→ (✓) + (0) —𝜀—[StartObject]→ (2), (4) (2) —(a)—[CaptureNode]→ (3) - (3) —𝜀—[Field(v)]→ (1) + (3) —𝜀—[Field(v)]→ (7) (4) —(b)—[CaptureNode, ToString]→ (5) - (5) —𝜀—[Field(v)]→ (1) + (5) —𝜀—[Field(v)]→ (7) + (7) —𝜀—[EndObject]→ (✓) - Foo = { v: Node } + Foo = Node Errors: field `v` in `Foo`: incompatible types [Node, String] "); } -// ───────────────────────────────────────────────────────────────────────────── -// Multiple definitions -// ───────────────────────────────────────────────────────────────────────────── - #[test] fn multiple_definitions() { let input = indoc! {r#" @@ -434,7 +414,8 @@ fn multiple_definitions() { let result = infer(input); insta::assert_snapshot!(result, @r" - Func = { name: Node } + Func = Node + Class = { name: Node body: Node @@ -442,10 +423,6 @@ fn multiple_definitions() { "); } -// ───────────────────────────────────────────────────────────────────────────── -// Edge cases -// ───────────────────────────────────────────────────────────────────────────── - #[test] fn deeply_nested_node() { let input = indoc! {r#" @@ -453,17 +430,17 @@ fn deeply_nested_node() { "#}; let result = infer(input); - insta::assert_snapshot!(result, @"Foo = { val: Node }"); + insta::assert_snapshot!(result, @"Foo = Node"); } #[test] fn wildcard_capture() { let result = infer("Foo = _ @any"); - insta::assert_snapshot!(result, @"Foo = { any: Node }"); + insta::assert_snapshot!(result, @"Foo = Node"); } #[test] fn string_literal_capture() { let result = infer(r#"Foo = "+" @op"#); - insta::assert_snapshot!(result, @"Foo = { op: Node }"); + insta::assert_snapshot!(result, @"Foo = Node"); } diff --git a/crates/plotnik-lib/src/query/link.rs b/crates/plotnik-lib/src/query/link.rs index c477dbfa..19fb2d5b 100644 --- a/crates/plotnik-lib/src/query/link.rs +++ b/crates/plotnik-lib/src/query/link.rs @@ -53,6 +53,7 @@ fn find_similar<'a>(name: &str, candidates: &[&'a str], max_distance: usize) -> } /// Check if `child` is a subtype of `supertype`, recursively handling nested supertypes. +#[allow(dead_code)] fn is_subtype_of(lang: &Lang, child: NodeTypeId, supertype: NodeTypeId) -> bool { let subtypes = lang.subtypes(supertype); for &subtype in subtypes { @@ -67,6 +68,7 @@ fn is_subtype_of(lang: &Lang, child: NodeTypeId, supertype: NodeTypeId) -> bool } /// Check if `child` is a valid non-field child of `parent`, expanding supertypes. +#[allow(dead_code)] fn is_valid_child_expanded(lang: &Lang, parent: NodeTypeId, child: NodeTypeId) -> bool { let valid_types = lang.valid_child_types(parent); for &allowed in valid_types { @@ -81,6 +83,7 @@ fn is_valid_child_expanded(lang: &Lang, parent: NodeTypeId, child: NodeTypeId) - } /// Check if `child` is a valid field value type, expanding supertypes. +#[allow(dead_code)] fn is_valid_field_type_expanded( lang: &Lang, parent: NodeTypeId, @@ -100,6 +103,7 @@ fn is_valid_field_type_expanded( } /// Format a list of items for display, truncating if too long. +#[allow(dead_code)] fn format_list(items: &[&str], max_items: usize) -> String { if items.is_empty() { return String::new(); @@ -124,6 +128,7 @@ fn format_list(items: &[&str], max_items: usize) -> String { } /// Context for validating child types. +#[allow(dead_code)] #[derive(Clone, Copy)] struct ValidationContext<'a> { /// The parent node type being validated against. @@ -136,6 +141,7 @@ struct ValidationContext<'a> { field: Option>, } +#[allow(dead_code)] #[derive(Clone, Copy)] struct FieldContext<'a> { name: &'a str, @@ -511,6 +517,7 @@ impl<'a> Query<'a> { } /// Validate non-field children. Called for direct children of a NamedNode that aren't fields. + #[cfg(feature = "unstable-child-type-validation")] fn validate_non_field_children( &mut self, expr: &Expr, @@ -564,7 +571,18 @@ impl<'a> Query<'a> { } } + #[cfg(not(feature = "unstable-child-type-validation"))] + fn validate_non_field_children( + &mut self, + _expr: &Expr, + _ctx: &ValidationContext<'a>, + _lang: &Lang, + _visited: &mut IndexSet, + ) { + } + /// Validate a terminal type (NamedNode or AnonymousNode) against the context. + #[cfg(feature = "unstable-child-type-validation")] fn validate_terminal_type( &mut self, expr: &Expr, @@ -625,7 +643,18 @@ impl<'a> Query<'a> { // Non-field children are validated by validate_non_field_children } + #[cfg(not(feature = "unstable-child-type-validation"))] + fn validate_terminal_type( + &mut self, + _expr: &Expr, + _ctx: &ValidationContext<'a>, + _lang: &Lang, + _visited: &mut IndexSet, + ) { + } + /// Collect all terminal types from an expression (traverses through Alt/Seq/Capture/Quantifier/Ref). + #[allow(dead_code)] fn collect_terminal_types( &self, expr: &Expr, @@ -636,6 +665,7 @@ impl<'a> Query<'a> { result } + #[allow(dead_code)] fn collect_terminal_types_impl( &self, expr: &Expr, @@ -690,6 +720,7 @@ impl<'a> Query<'a> { } /// Get type info for a terminal expression (NamedNode or AnonymousNode). + #[allow(dead_code)] fn get_terminal_type_info(&self, expr: &Expr) -> Option<(NodeTypeId, &'a str, TextRange)> { match expr { Expr::NamedNode(node) => { diff --git a/crates/plotnik-lib/src/query/link_tests.rs b/crates/plotnik-lib/src/query/link_tests.rs index 2c8e6c68..203a6858 100644 --- a/crates/plotnik-lib/src/query/link_tests.rs +++ b/crates/plotnik-lib/src/query/link_tests.rs @@ -331,6 +331,7 @@ fn nested_field_validation() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn invalid_child_type_for_field() { let input = indoc! {r#" @@ -382,6 +383,7 @@ fn alternation_with_link_errors() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn sequence_with_link_errors() { let input = indoc! {r#" @@ -496,6 +498,7 @@ fn field_on_node_without_fields() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn invalid_child_type_no_children_allowed() { let input = indoc! {r#" @@ -517,6 +520,7 @@ fn invalid_child_type_no_children_allowed() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn invalid_child_type_wrong_type() { let input = indoc! {r#" @@ -566,6 +570,7 @@ fn valid_child_via_nested_supertype() { assert!(query.is_valid()); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn invalid_anonymous_child() { let input = indoc! {r#" @@ -589,6 +594,7 @@ fn invalid_anonymous_child() { "#); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn invalid_child_in_alternation() { let input = indoc! {r#" @@ -612,6 +618,7 @@ fn invalid_child_in_alternation() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn invalid_child_in_sequence() { let input = indoc! {r#" @@ -647,6 +654,7 @@ fn deeply_nested_sequences_valid() { assert!(query.is_valid()); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn deeply_nested_sequences_invalid() { let input = indoc! {r#" @@ -681,6 +689,7 @@ fn deeply_nested_alternations_in_field_valid() { assert!(query.is_valid()); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn deeply_nested_alternations_in_field_invalid() { let input = indoc! {r#" @@ -703,6 +712,7 @@ fn deeply_nested_alternations_in_field_invalid() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn deeply_nested_no_fields_allowed() { let input = indoc! {r#" @@ -723,6 +733,7 @@ fn deeply_nested_no_fields_allowed() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn mixed_nested_with_capture_and_quantifier() { let input = indoc! {r#" @@ -748,6 +759,7 @@ fn mixed_nested_with_capture_and_quantifier() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn field_with_captured_and_quantified_invalid_type() { let input = indoc! {r#" @@ -771,6 +783,7 @@ fn field_with_captured_and_quantified_invalid_type() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn multiple_invalid_types_in_alternation_field() { let input = indoc! {r#" @@ -803,6 +816,7 @@ fn multiple_invalid_types_in_alternation_field() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn multiple_invalid_types_in_sequence_child() { let input = indoc! {r#" @@ -835,6 +849,7 @@ fn multiple_invalid_types_in_sequence_child() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn ref_followed_for_child_validation() { let input = indoc! {r#" @@ -863,6 +878,7 @@ fn ref_followed_for_child_validation() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn ref_followed_for_field_validation() { let input = indoc! {r#" @@ -908,6 +924,7 @@ fn ref_followed_valid_case() { assert!(query.is_valid()); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn ref_followed_recursive_with_invalid_type() { let input = indoc! {r#" @@ -959,6 +976,7 @@ fn ref_followed_recursive_valid() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn ref_followed_mutual_recursion() { let input = indoc! {r#" @@ -1003,6 +1021,7 @@ fn ref_followed_mutual_recursion() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn ref_followed_in_sequence() { let input = indoc! {r#" @@ -1035,6 +1054,7 @@ fn ref_followed_in_sequence() { "); } +#[cfg(feature = "unstable-child-type-validation")] #[test] fn ref_validated_in_multiple_contexts() { let input = indoc! {r#" diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 90d45f18..9f11e252 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -120,6 +120,12 @@ pub struct Query<'a> { type_info: TypeInferenceResult<'a>, /// QIS triggers: quantified expressions with ≥2 propagating captures. qis_triggers: HashMap>, + /// Definitions with exactly 1 propagating capture: def name → capture name. + single_capture_defs: HashMap<&'a str, &'a str>, + /// Definitions with 2+ propagating captures (need struct wrapping at root). + multi_capture_defs: HashSet<&'a str>, + /// Current definition name during graph construction. + current_def_name: &'a str, /// Counter for generating unique ref IDs during graph construction. next_ref_id: u32, } @@ -160,6 +166,9 @@ impl<'a> Query<'a> { dead_nodes: HashSet::new(), type_info: TypeInferenceResult::default(), qis_triggers: HashMap::new(), + single_capture_defs: HashMap::new(), + multi_capture_defs: HashSet::new(), + current_def_name: "", next_ref_id: 0, } } @@ -205,7 +214,7 @@ impl<'a> Query<'a> { if !self.is_valid() { return self; } - self.detect_qis(); + self.detect_capture_scopes(); self.construct_graph(); self.infer_types(); // Run before optimization to avoid merged effects self.optimize_graph(); @@ -213,12 +222,17 @@ impl<'a> Query<'a> { } /// Build graph and return dump of graph before optimization (for debugging). - pub fn build_graph_with_pre_opt_dump(mut self) -> (Self, String) { + /// + /// If `root_kind` is provided, definitions are wrapped before dumping. + pub fn build_graph_with_pre_opt_dump(mut self, root_kind: Option<&'a str>) -> (Self, String) { if !self.is_valid() { return (self, String::new()); } - self.detect_qis(); + self.detect_capture_scopes(); self.construct_graph(); + if let Some(root) = root_kind { + self.graph.wrap_definitions_with_root(root); + } let pre_opt_dump = self.graph.dump(); self.infer_types(); self.optimize_graph(); @@ -255,6 +269,20 @@ impl<'a> Query<'a> { &self.graph } + /// Wrap definitions that don't already match the root node kind. + /// + /// Call this after `build_graph()` to allow queries like `(function_declaration)` + /// to work when the interpreter starts at tree root (e.g., `program`). + /// + /// The `root_kind` should be the language's root node kind (e.g., "program" for JS). + pub fn wrap_with_root(mut self, root_kind: &'a str) -> Self { + self.graph.wrap_definitions_with_root(root_kind); + // Re-run type inference and optimization on wrapped graph + self.infer_types(); + self.optimize_graph(); + self + } + /// Access the set of dead nodes (eliminated by optimization). pub fn dead_nodes(&self) -> &HashSet { &self.dead_nodes diff --git a/docs/adr/ADR-0009-type-system.md b/docs/adr/ADR-0009-type-system.md index 521be2a3..57533763 100644 --- a/docs/adr/ADR-0009-type-system.md +++ b/docs/adr/ADR-0009-type-system.md @@ -5,91 +5,88 @@ ## Context -Type inference transforms a `BuildGraph` into `TypeDef`/`TypeMember` structures (ADR-0007). This ADR formalizes the inference rules, particularly the semantics of alternations. +Type inference transforms a query into typed structures. This ADR formalizes the inference rules with a unified conceptual model. ## Decision +### Core Principle + +The type system reduces to two orthogonal concepts: + +1. **Scope boundaries** — where captures land +2. **Payload rule** — what type a scope produces + +> Captures bubble up to the nearest scope boundary; each scope's type is determined by its capture count and scope kind. + ### Type Universe ``` -τ ::= Void -- definition with no captures (TypeId = 0) +τ ::= Void -- no captures (TypeId = 0) | Node -- AST node reference (TypeId = 1) | String -- extracted source text (TypeId = 2) - | Optional(τ) -- nullable wrapper + | Optional(τ) -- zero or one | ArrayStar(τ) -- zero or more | ArrayPlus(τ) -- one or more - | Struct(fields) -- struct with named fields + | Struct(fields) -- named fields | Enum(variants) -- tagged union ``` -### Cardinality - -Cardinality describes how many values a capture produces: - -| Cardinality | Notation | Wrapper | Semantics | -| ----------- | -------- | ----------- | ------------ | -| Required | `1` | none | exactly one | -| Optional | `?` | `Optional` | zero or one | -| Star | `*` | `ArrayStar` | zero or more | -| Plus | `+` | `ArrayPlus` | one or more | +### Captures -Cardinality propagates through nesting: +A capture `@name` creates a field that bubbles up to the nearest enclosing scope. -``` -outer * inner = result -────────────────────── - 1 * 1 = 1 - 1 * ? = ? - 1 * * = * - 1 * + = + - ? * 1 = ? - ? * ? = ? - ? * * = * - ? * + = * - * * 1 = * - * * ? = * - * * * = * - * * + = * - + * 1 = + - + * ? = * - + * * = * - + * + = + -``` +| Pattern | Field Type | +| --------------- | -------------------- | +| `(node) @x` | `Node` | +| `"literal" @x` | `Node` | +| `@x ::string` | `String` | +| `@x ::TypeName` | `TypeName` (nominal) | +| `{...} @x` | scope payload | +| `[...] @x` | scope payload | -### Scope Rules +### Scope Boundaries -A **scope** is a container that collects captures into fields. +**Golden rule**: `{}` and `[]` create a scope **only when captured**. Scopes are created by: -1. **Definition root**: inherits the scope type of its root expression (see below) -2. **Captured sequence**: `{...} @name` creates a nested Struct scope -3. **Captured tagged alternation**: `[A: ... B: ...] @name` creates an Enum; each variant has its own scope -4. **Captured untagged alternation**: `[...] @name` creates a Struct; captures from branches merge +1. **Definition root**: `Def = expr` — always a scope +2. **Captured sequence**: `{...} @name` — creates Struct scope +3. **Captured tagged alternation**: `[A: ... B: ...] @name` — creates Enum scope +4. **Captured untagged alternation**: `[...] @name` — creates Struct scope (merged fields) +5. **QIS** (Quantifier-Induced Scope): auto-created when quantifier has ≥2 captures +6. **Reference**: `(Def)` is opaque — blocks propagation entirely -**Definition root semantics**: A definition `Foo = expr` is equivalent to capturing the root expression with the definition name. Therefore: +**Uncaptured containers are transparent**: -- `Foo = [ A: ... B: ... ]` → `Foo` is an Enum (tagged alternation at root) -- `Foo = { ... }` or `Foo = (node ...)` → `Foo` is a Struct (captures propagate to root scope) -- `Foo = (node) @x` → `Foo` is a Struct with field `x` +- `{...}` without `@name` — captures pass through to outer scope +- `[...]` without `@name` — captures pass through (asymmetric ones become Optional) +- `[A: ... B: ...]` without `@name` — **tags ignored**, behaves like untagged -**Critical rule**: Tags only have effect when the alternation is captured. An _inline_ uncaptured tagged alternation behaves identically to an untagged one—captures propagate to parent scope. +### Payload Rule -### Flat Scoping Principle +| Captures | Payload Type | +| -------- | ----------------------- | +| 0 | `Void` | +| 1 | unwrap OR `Struct` | +| ≥2 | `Struct { field, ... }` | -Query nesting does NOT create data nesting. Intermediate structure is invisible: +**Unwrap applies to** (1 capture → capture's type directly): -```plotnik -Query = (a (b (c) @val)) -``` +- Definition roots +- Enum variants +- QIS element types -Result type: `Struct { val: Node }` — the `(a ...)` and `(b ...)` wrappers contribute nothing. +**Always Struct** (1 capture → `Struct { field }`): -Only explicit scope markers (`{...} @x`, `[...] @x` with tags) introduce nesting in the output type. +- Captured sequences `{...} @name` +- Captured untagged alternations `[...] @name` + +**Rationale**: Explicit `@name` on a container signals intent to preserve structure. Definition roots and enum variants unwrap because the container name (def name / variant tag) already provides context. ### Reference Opacity -References are opaque to captures: calling `(Foo)` does NOT inherit captures from `Foo`. +References are opaque barriers. Calling `(Foo)` does NOT inherit `Foo`'s captures. ```plotnik A = (identifier) @name @@ -99,472 +96,331 @@ C = (A) @node Types: -- `A { name: Node }` — has the capture -- `B {}` (Void) — calling A produces no fields in B -- `C { node: Node }` — captures the reference itself, not A's internals +- `A` → `Node` (1 capture, unwrapped) +- `B` → `Void` (0 captures — A's captures don't leak) +- `C` → `Node` (1 capture of type `A`, which is `Node`) -To access A's captures, you must either: +To access a definition's structure, capture it: `(Foo) @foo` yields a field of type `Foo`. -1. Inline A's pattern into B -2. Capture the reference: `(A) @a` yields `{ a: A }` where `a` has type `A` +### Flat Scoping Principle -This matches runtime semantics ([ADR-0006](ADR-0006-dynamic-query-execution.md)): Enter pushes a frame and jumps to the definition; Exit pops and returns. The caller only sees what it explicitly captures. +Query nesting does NOT create data nesting. Only scope boundaries matter: -### Type Inference for Captures +```plotnik +Query = (a (b (c) @val)) +``` -| Pattern | Inferred Type | -| ----------------------------- | -------------------- | -| `(node) @x` | `Node` | -| `"literal" @x` | `Node` | -| `@x ::string` | `String` | -| `@x ::TypeName` | `TypeName` (nominal) | -| `{...} @x` | synthetic Struct | -| `[A: ... B: ...] @x` (tagged) | Enum with variants | -| `[...] @x` (untagged) | merged Struct | +Result: `Node` — the `(a ...)` and `(b ...)` wrappers contribute nothing. Single capture at def root unwraps. -### Alternation Semantics +```plotnik +Query = (a (b (c) @x (d) @y)) +``` -This is the most complex part of type inference. The key insight: +Result: `Struct { x: Node, y: Node }` — two captures form a struct. -> **Tags only matter when the alternation is captured.** +### Cardinality -#### Case 1: Uncaptured Alternation (Tagged or Untagged) +Cardinality describes how many values a capture produces: -Captures propagate to the parent scope. Asymmetric captures become Optional. +| Cardinality | Notation | Wrapper | +| ----------- | -------- | ----------- | +| Required | `1` | none | +| Optional | `?` | `Optional` | +| Star | `*` | `ArrayStar` | +| Plus | `+` | `ArrayPlus` | -```plotnik -Foo = [ A: (a) @x B: (b) @y ] -``` +**Propagation through nesting** (outer × inner): -Despite tags, this is uncaptured. Behavior: +``` + 1 × 1 = 1 ? × 1 = ? * × 1 = * + × 1 = + + 1 × ? = ? ? × ? = ? * × ? = * + × ? = * + 1 × * = * ? × * = * * × * = * + × * = * + 1 × + = + ? × + = * * × + = * + × + = + +``` -- `@x` appears only in branch A → propagates as `Optional(Node)` -- `@y` appears only in branch B → propagates as `Optional(Node)` -- Result: `Foo { x: Optional(Node), y: Optional(Node) }` +**Join** (merging branches with same capture): -```plotnik -Bar = [ (a) @v (b) @v ] ``` + + + /|\ + * | + \| + ? + | + 1 +``` + +When join produces array (`*`/`+`) but branch has scalar (`1`/`?`), compiler inserts lifting coercion to wrap in singleton array. -Untagged, uncaptured. Both branches have `@v`: +### Alternation Semantics -- `@v` appears in all branches with type `Node` → propagates as `Node` -- Result: `Bar { v: Node }` +**Key insight**: Tags only matter when the alternation is captured. -#### Case 2: Captured Untagged Alternation +#### Uncaptured Alternation -Creates a Struct scope. Captures from branches merge into it. +Captures propagate to parent scope. Asymmetric captures become `Optional`. Tags are ignored. ```plotnik -Foo = [ (a) @x (b) @y ] @z +// Tagged but uncaptured — tags ignored +Foo = [ A: (a) @x B: (b) @y ] ``` -- `@z` creates a Struct scope -- `@x` and `@y` are asymmetric → both become Optional within `@z`'s scope -- Result: `Foo { z: FooZ }` where `FooZ { x: Optional(Node), y: Optional(Node) }` +- `@x` only in A → `Optional(Node)` +- `@y` only in B → `Optional(Node)` +- Result: `Struct { x: Optional(Node), y: Optional(Node) }` ```plotnik -Bar = [ (a) @v (b) @v ] @z +// Symmetric captures +Bar = [ (a) @v (b) @v ] ``` -- `@z` creates a Struct scope -- `@v` appears in all branches → required within `@z`'s scope -- Result: `Bar { z: BarZ }` where `BarZ { v: Node }` +- `@v` in all branches → `Node` (not Optional) +- Result: `Node` (1 capture at def root, unwraps) + +Diagnostic: warning for inline uncaptured tagged alternation (likely forgot `@name`). -#### Case 3: Captured Tagged Alternation +#### Captured Untagged Alternation -Creates an Enum. Each variant has its own independent scope, subject to **Single-Capture Variant Flattening** (see below). +Creates Struct scope. Branches merge. No unwrapping. ```plotnik -Foo = [ A: (a) @x B: (b) @y ] @z +Foo = [ (a) @x (b) @y ] @z ``` -- `@z` creates an Enum because tags are present AND alternation is captured -- Variant `A` has scope with `@x: Node` -- Variant `B` has scope with `@y: Node` -- Both variants have exactly 1 capture → flattened -- Result: `Foo { z: FooZ }` where `FooZ` is: - ``` - Enum FooZ { A(Node), B(Node) } - ``` - -#### Single-Capture Variant Flattening +- `@z` creates Struct scope +- Merge: `{ x: Optional(Node), y: Optional(Node) }` +- Result: `Struct { z: Struct { x: Optional(Node), y: Optional(Node) } }` -When a tagged alternation variant has exactly one capture, the wrapper struct is eliminated—the variant payload becomes the capture's type directly. - -| Branch Captures | Variant Payload | Rust Syntax | -| --------------- | --------------------- | ------------------ | -| 0 | Unit (Void) | `A` | -| 1 | Capture's type (flat) | `A(T)` | -| ≥2 | Struct (named fields) | `A { x: T, y: U }` | - -**Rationale**: The field name is redundant when it's the only capture—the variant tag already provides discrimination. This produces idiomatic types matching `Option`, `Result`. +```plotnik +Bar = [ (a) @v (b) @v ] @z +``` -**Formalization**: +- `@z` creates Struct scope +- Merge: `{ v: Node }` +- Always Struct (no unwrap): `Struct { v: Node }` +- Result: `Struct { z: Struct { v: Node } }` -``` -VariantPayload(branch) = - let captures = propagating_captures(branch) - match captures.len(): - 0 → Void - 1 → captures[0].type // flatten: discard field name - _ → Struct(captures) // preserve field names -``` +#### Captured Tagged Alternation -**Examples**: +Creates Enum scope. Each variant is independent, follows payload rule. ```plotnik -// Single capture per branch → flatten -Foo = [ A: (a) @x B: (b) @y ] @z -// → Enum FooZ { A(Node), B(Node) } - -// Mixed: one branch single, other multi → partial flatten -Bar = [ A: (a) @x B: (b) @y (c) @z ] @result -// → Enum BarResult { A(Node), B { y: Node, z: Node } } - -// Single capture with type annotation → flatten preserves type -Baz = [ Ok: (val) @v Err: (msg) @e ::string ] @result -// → Enum BazResult { Ok(Node), Err(String) } - -// Single capture of nested struct → flatten to that struct -Qux = [ A: { (x) @x (y) @y } @data B: (b) @b ] @choice -// → Enum QuxChoice { A(QuxChoiceData), B(Node) } -// → QuxChoiceData = { x: Node, y: Node } +Result = [ + Ok: (value) @val + Err: (error) @msg ::string +] @result ``` -### Unification Rules (1-Level Merge) +- Variant `Ok`: 1 capture → `Node` (unwrap) +- Variant `Err`: 1 capture → `String` (unwrap) +- Result: `Struct { result: Enum { Ok(Node), Err(String) } }` -When merging captures across untagged alternation branches, we apply **1-level merge semantics**. This balances flexibility with type safety: top-level fields merge with optionality, but nested struct mismatches are errors. +#### Tagged Alternation at Definition Root -**Design rationale**: Plotnik's purpose is typed extraction. Deep recursive merging would produce heavily-optional types (`{ a?: { b?: { c?: Node } } }`), forcing users back to defensive checking—undermining the library's value. Tagged+captured alternations exist when precise discrimination is needed. +Special case: tagged alternation directly at definition root makes the definition itself an Enum. -**Base type compatibility**: - -``` -unify(Node, Node) = Node -unify(String, String) = String -unify(Node, String) = ⊥ (error: incompatible primitives) -unify(Node, Struct) = ⊥ (error: primitive vs composite) -unify(String, Struct) = ⊥ (error: primitive vs composite) +```plotnik +Result = [ + Ok: (value) @val + Err: (error) @msg ::string +] ``` -**Struct merging** (1-level only): - -``` -unify(Struct(f₁), Struct(f₂)) = Struct(merged_fields) - where merged_fields: - - fields in both f₁ and f₂: unify types (must be compatible) - - fields only in f₁: become Optional - - fields only in f₂: become Optional -``` +- Result: `Enum Result { Ok(Node), Err(String) }` -Nested structs are compared by **structural identity**, not recursively merged. If a field has type `Struct` in both branches but the structs differ, it's an error. +No wrapper struct — the definition IS the enum. -**Cardinality interaction**: Cardinality join happens first, then type unification. If `T` and `T[]` appear at the same field, lift to array, then unify element types. +### Unification Rules (Branch Merge) -**Error reporting**: When unification fails, the compiler reports ALL incompatibilities across all branches, not just the first. This helps users fix multiple issues in one iteration. +When merging captures across untagged alternation branches: -**Examples**: +**1-level merge semantics**: Top-level fields merge with optionality; nested struct mismatches are errors. ``` // OK: top-level field merge Branch 1: { x: Node, y: Node } Branch 2: { x: Node, z: String } -Result: { x: Node, y?: Node, z?: String } +Result: { x: Node, y: Optional(Node), z: Optional(String) } // OK: nested structs identical Branch 1: { data: { a: Node }, extra: Node } Branch 2: { data: { a: Node } } -Result: { data: { a: Node }, extra?: Node } +Result: { data: { a: Node }, extra: Optional(Node) } -// ERROR: nested structs differ (no deep merge) +// ERROR: nested structs differ Branch 1: { data: { a: Node } } Branch 2: { data: { b: Node } } → Error: field `data` has incompatible struct types -// ERROR: primitive vs primitive mismatch +// ERROR: primitive mismatch Branch 1: { val: String } Branch 2: { val: Node } -→ Error: field `val` has incompatible types: `String` vs `Node` -``` - -### Cardinality Join (for merging) - -When the same capture appears in multiple branches with different cardinalities: - -``` - + - /|\ - * | (arrays collapse to *) - \| - ? - | - 1 +→ Error: field `val` has incompatible types ``` -| Left | Right | Join | -| ---- | ----- | ---- | -| 1 | 1 | 1 | -| 1 | ? | ? | -| 1 | \* | \* | -| 1 | + | + | -| ? | ? | ? | -| ? | \* | \* | -| ? | + | \* | -| \* | \* | \* | -| \* | + | \* | -| + | + | + | - -### Cardinality Lifting Coercion - -When cardinality join produces an array type (`*` or `+`) but a branch has scalar cardinality (`1` or `?`), the compiler inserts coercion effects to wrap the scalar in a singleton array. - -| Original | Lifted to | Effect transformation | -| -------- | ---------- | ------------------------------------------------------------------------------------------- | -| `1` | `*` or `+` | `CaptureNode` → `StartArray, CaptureNode, PushElement, EndArray` | -| `?` | `*` | absent → `StartArray, EndArray`; present → `StartArray, CaptureNode, PushElement, EndArray` | - -This ensures the materializer always receives homogeneous values matching the declared type. - -Example: - -```plotnik -Items = [ (single) @item (multi { (x)+ @item }) ] -``` - -Branch 1 has `@item: 1`, branch 2 has `@item: +`. Join is `+`. Branch 1's effects are lifted: - -``` -// Before lifting: -CaptureNode, Field("item") - -// After lifting: -StartArray, CaptureNode, PushElement, EndArray, Field("item") -``` +**Rationale**: Deep recursive merging produces heavily-optional types, defeating typed extraction's purpose. Use tagged alternations for precise discrimination. ### Quantifier-Induced Scope (QIS) -When a quantified expression contains multiple captures, they must stay coupled per-iteration. QIS creates an implicit scope to preserve this structural relationship. - -**Trigger**: Quantifier `Q ∈ {*, +, ?}` applied to expression `E`, where `E` has **≥2 propagating captures** (captures not absorbed by inner scopes). - -**Mechanism**: QIS creates an implicit scope around `E`. Captures propagate to this scope (not the parent), forming a struct element type. - -**Containers**: Any expression can trigger QIS: - -- Node: `(node ...)Q` -- Sequence: `{...}Q` -- Alternation: `[...]Q` - -**Naming**: - -| Context | Element Type Name | -| ---------------------------- | ----------------------------------- | -| At definition root | `{Def}Item` | -| Explicit capture `E Q @name` | `{Parent}{Name}` | -| Neither | **Error**: require explicit `@name` | - -**Result Type**: - -| Q | Result | -| --- | ------------------------ | -| `*` | `ArrayStar(ElementType)` | -| `+` | `ArrayPlus(ElementType)` | -| `?` | `Optional(ElementType)` | - -**Interior rules**: Standard type inference within the implicit scope: - -- Uncaptured alternations (tagged or not): asymmetric captures → Optional -- Captured tagged alternations: Enum with variant scopes - -**Non-trigger** (≤1 propagating capture): No QIS. Single capture propagates with cardinality multiplication `Q × innerCard`. - -**Examples**: +When a quantified expression has **≥2 propagating captures**, QIS auto-creates a scope to keep values paired per-iteration. ```plotnik -// Node as container - keeps name/body paired -Functions = (function_declaration +// 2 captures under quantifier → QIS triggers +Functions = (function name: (identifier) @name body: (block) @body )* -// → Functions = ArrayStar(FunctionsItem) -// → FunctionsItem = { name: Node, body: Node } - -// Alternation in quantified sequence -Foo = { [ (a) @x (b) @y ] }* -// → Foo = ArrayStar(FooItem) -// → FooItem = { x: Optional(Node), y: Optional(Node) } - -// Tagged but uncaptured (tags ignored, same result) -Bar = { [ A: (a) @x B: (b) @y ] }* -// → Bar = ArrayStar(BarItem) -// → BarItem = { x: Optional(Node), y: Optional(Node) } - -// Tagged AND captured (no QIS - single propagating capture) -Baz = { [ A: (a) @x B: (b) @y ] @choice }* -// → Baz = ArrayStar(BazChoice) -// → BazChoice = Enum { A: { x: Node }, B: { y: Node } } - -// Nested with explicit capture -Outer = (parent { [ (a) @x (b) @y ] }* @items) -// → Outer = { items: ArrayStar(OuterItems) } -// → OuterItems = { x: Optional(Node), y: Optional(Node) } - -// Single capture - no QIS, standard rules -Single = { (a) @item }* -// → Single = { item: ArrayStar(Node) } - -// Error: QIS triggered but no capture, not at root -Bad = (parent { [ (a) @x (b) @y ] }* (other) @z) -// → Error: quantified expression with multiple captures requires @name ``` -### Missing Field Rule +- QIS creates element scope with 2 captures → Struct (always, by payload rule) +- Result: `ArrayStar(FunctionsItem)` where `FunctionsItem { name: Node, body: Node }` +- Definition has 1 propagating capture (the array) → unwrap +- Final: `Functions` is `ArrayStar(FunctionsItem)` + +```plotnik +// 1 capture → no QIS, standard cardinality multiplication +Items = { (item) @item }* +``` + +- No QIS (only 1 capture) +- `@item` gets cardinality `*` +- Result: `Node` would be wrong... actually 1 capture at def root +- Wait, the capture is `ArrayStar(Node)`, so def root has 1 "field" +- Result: `ArrayStar(Node)` (unwrapped) -If a capture appears in some branches but not all, the field becomes `Optional` (or `*` if original was array). +**Naming**: -This is intentional: users can have common fields be required across all branches, while branch-specific fields become optional. +- At definition root: `{Def}Item` +- With explicit capture `E* @name`: `{Parent}{Name}` +- Neither (not at root, no capture): Error — require explicit `@name` ### Synthetic Naming Types without explicit `::Name` receive synthetic names: -| Context | Pattern | Example | -| -------------------- | ----------------- | ------------ | -| Definition root | `{DefName}` | `Func` | -| Captured sequence | `{Def}{Capture}` | `FuncParams` | -| Captured alternation | `{Def}{Capture}` | `FuncBody` | -| Enum variant payload | `{Enum}{Variant}` | `FuncBodyOk` | +| Context | Pattern | +| -------------------- | ----------------- | +| Definition root | `{DefName}` | +| Captured sequence | `{Def}{Capture}` | +| Captured alternation | `{Def}{Capture}` | +| Enum variant payload | `{Enum}{Variant}` | +| QIS element | `{Def}Item` | -Collision resolution: append numeric suffix (`Foo`, `Foo2`, `Foo3`, ...). +Collision resolution: append numeric suffix (`Foo`, `Foo2`, `Foo3`). ### Error Conditions -| Condition | Severity | Recovery | Diagnostic Kind (future) | -| ------------------------------------ | -------- | ----------------------------- | ------------------------------ | -| Incompatible primitives in alt | Error | Use `TYPE_INVALID`, continue | `TypeMismatchInAlt` | -| Primitive vs Struct in alt | Error | Use `TYPE_INVALID`, continue | `TypeMismatchInAlt` | -| Nested struct mismatch in alt | Error | Use `TYPE_INVALID`, continue | `StructMismatchInAlt` | -| Duplicate capture in same scope | Error | Keep first, ignore duplicates | `DuplicateCapture` | -| Empty definition (no captures) | Info | Type is `Void` (TypeId = 0) | (no diagnostic) | -| Inline uncaptured tagged alternation | Warning | Treat as untagged | `UnusedBranchLabels` | -| QIS without capture (not at root) | Error | Cannot infer element type | `MultiCaptureQuantifierNoName` | +| Condition | Severity | Recovery | +| --------------------------------- | -------- | -------------------------- | +| Incompatible types in alternation | Error | Use invalid type, continue | +| Nested struct mismatch | Error | Use invalid type, continue | +| Duplicate capture in same scope | Error | Keep first | +| Inline uncaptured tagged alt | Warning | Treat as untagged | +| QIS without capture (not at root) | Error | Cannot infer element type | -The last warning applies only to literal tagged alternations, not references. If `Foo = [ A: ... ]` is used as `(Foo)`, no warning—the user intentionally reuses a definition. But `(parent [ A: ... B: ... ])` inline without capture likely indicates a forgotten `@name`. +Error reporting is exhaustive: all incompatibilities across all branches are reported, not just the first. -**Exhaustive error reporting**: When type unification fails, the compiler explores all branches and reports all incompatibilities. Example diagnostic: +## Examples -``` -error: incompatible types in alternation branches - --> query.plot:3:5 - | - 3 | (a { (x) @val ::string }) @data - | ^^^ `String` here - 4 | (b { (x { (y) @inner }) @val }) @data - | ^^^ `Node` here - | - = note: capture `val` has incompatible types across branches - = help: use tagged alternation `[ A: ... B: ... ]` for precise discrimination +### Single Capture at Definition Root + +```plotnik +Name = (identifier) @name ``` -## Examples +- 1 capture at def root → unwrap +- Result: `Name` is `Node` -### Example 1: Captured Sequence +### Multiple Captures at Definition Root ```plotnik -Foo = (foo {(bar) @bar} @baz) +Binding = (variable_declaration + name: (identifier) @name + value: (expression) @value +) ``` -- `@bar` captures `(bar)` → `Node` -- `@baz` captures the sequence containing `@bar` → creates scope -- Types: - - `@bar: Node` - - `@baz: FooBaz { bar: Node }` - - `Foo: { baz: FooBaz }` +- 2 captures → Struct +- Result: `Binding { name: Node, value: Node }` -### Example 2: Uncaptured Sequence +### Captured vs Uncaptured Sequence ```plotnik -Foo = (foo {(bar) @bar}) +// Captured sequence — creates scope, always Struct +Foo = { (bar) @bar } @baz ``` -- `@bar` captures `(bar)` → `Node` -- Sequence `{...}` is NOT captured → `@bar` propagates to `Foo`'s scope -- Types: - - `Foo: { bar: Node }` - -### Example 3: Tagged Alternation at Definition Root +- `@bar` stays in `@baz`'s scope +- Captured sequence: always Struct +- Result: `Struct { baz: Struct { bar: Node } }` ```plotnik -Result = [ - Ok: (value) @val - Err: (error) @msg ::string -] +// Uncaptured sequence — transparent, captures pass through +Foo = { (bar) @bar } ``` -- Tagged alternation at definition root → `Result` is an Enum -- Each variant has exactly 1 capture → flattened (no wrapper structs) -- Types: - - `Result: Enum { Ok(Node), Err(String) }` +- `{...}` without `@name` is transparent +- `@bar` bubbles up to definition root +- 1 capture at def root → unwrap +- Result: `Foo` is `Node` -### Example 4: Tagged Alternation (Inline, Uncaptured) +### Enum at Definition Root ```plotnik -Foo = (parent [ - Ok: (value) @val - Err: (error) @msg ::string -]) +Boolean = [ + True: "true" + False: "false" +] ``` -- Tagged alternation is inline and uncaptured → tags ignored, behaves like untagged -- `@val` only in Ok branch → `Optional(Node)` -- `@msg` only in Err branch → `Optional(String)` -- Types: - - `Foo: { val: Optional(Node), msg: Optional(String) }` -- Diagnostic: warning `UnusedBranchLabels` (inline uncaptured tagged alternation) +- Tagged alt at root, 0 captures per variant → Void +- Result: `Enum Boolean { True, False }` -### Example 5: Cardinality in Alternation +### Mixed Variant Payloads ```plotnik -Items = [ (single) @item (multi { (x)+ @item }) ] +Expr = [ + Lit: (number) @value + Bin: (binary left: (_) @left right: (_) @right) +] ``` -- Branch 1: `@item` cardinality `1`, type `Node` -- Branch 2: `@item` cardinality `+`, type `Node` -- Join: cardinality `+` (both present, LUB of `1` and `+`) -- Types: - - `Items: { item: ArrayPlus(Node) }` +- `Lit`: 1 capture → unwrap → `Node` +- `Bin`: 2 captures → Struct +- Result: `Enum Expr { Lit(Node), Bin { left: Node, right: Node } }` -### Example 6: Nested Quantifier +### QIS with Multiple Captures ```plotnik -Funcs = (module { (function)* @fns }) +Module = (module { + (function + name: (identifier) @name + params: (parameters) @params + )* +}) ``` -- `@fns` has cardinality `*` from quantifier -- Sequence not captured → propagates to root -- Types: - - `Funcs: { fns: ArrayStar(Node) }` +- 2 captures under `*` → QIS triggers +- Element type: `ModuleItem { name: Node, params: Node }` +- Array propagates to def root (1 capture) → unwrap +- Result: `Module` is `ArrayStar(ModuleItem)` ## Consequences **Positive**: -- Explicit rules enable deterministic inference -- "Tags only matter when captured" is a simple mental model -- 1-level merge provides flexibility while preserving type safety -- Asymmetric fields becoming Optional is intuitive ("match any branch, get what's available") -- Definition root inherits type naturally—no wrapper structs for top-level enums -- Exhaustive error reporting helps users fix all issues in one iteration +- Golden rule ("only captured containers create scopes") is easy to remember +- Payload rule is uniform: 0→void, 1→unwrap, 2+→struct +- Exception for captured containers (always Struct) matches user intent +- "Tags only matter when captured" eliminates confusion **Negative**: -- LUB cardinality join can lose precision +- Field name loss on single-capture unwrap (mitigated by `::Type` annotation) - 1-level merge is less flexible than deep merge (intentional trade-off) **Alternatives Considered**: -- Error on uncaptured tagged alternations (rejected: too restrictive for incremental development) -- Definition root always Struct (rejected: forces wrapper types for enums, e.g., `struct Expr { val: ExprEnum }` instead of `enum Expr`) -- Deep recursive merge for nested structs (rejected: produces heavily-optional types that defeat the purpose of typed extraction; users who need flexibility at depth should use tagged+captured alternations for precision) -- Strict struct equality for merging (rejected: too restrictive for common patterns like `[ (a) @x (b) @y ]`) +- Always wrap in struct (rejected: verbose types like `{ val: Node }` instead of `Node`) +- Deep recursive merge (rejected: heavily-optional types defeat typed extraction) +- Error on uncaptured tagged alternations (rejected: too restrictive)