diff --git a/AGENTS.md b/AGENTS.md index 832078fe..0912a246 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -7,7 +7,7 @@ # Documentation -[docs/README.md](docs/README.md) | [Language Reference](docs/lang-reference.md) | [Type System](docs/type-system.md) | [Runtime Engine](docs/runtime-engine.md) | [Binary Format](docs/binary-format/01-overview.md) +[docs/README.md](docs/README.md) | [CLI Guide](docs/cli.md) | [Language Reference](docs/lang-reference.md) | [Type System](docs/type-system.md) | [Runtime Engine](docs/runtime-engine.md) | [Binary Format](docs/binary-format/01-overview.md) # Query Syntax Quick Reference @@ -187,38 +187,56 @@ docs/ Run: `cargo run -p plotnik-cli -- ` -| Command | Purpose | Status | -| ------- | ------------------------------- | ------- | -| `debug` | Inspect queries and source ASTs | Working | -| `types` | Generate TypeScript types | Working | -| `langs` | List supported languages | Working | -| `exec` | Execute query, output JSON | Not yet | +| Command | Purpose | Status | +| ------- | -------------------------- | ------- | +| `tree` | Explore tree-sitter AST | Working | +| `check` | Validate query | Working | +| `dump` | Show compiled bytecode | Working | +| `infer` | Generate TypeScript types | Working | +| `langs` | List supported languages | Working | +| `exec` | Execute query, output JSON | Not yet | -## debug +## tree -Inspect query AST/CST or parse source files with tree-sitter. +Explore a source file's tree-sitter AST. ```sh -cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' -cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' --only-symbols -cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' --types -cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' --bytecode -cargo run -p plotnik-cli -- debug -s app.ts -cargo run -p plotnik-cli -- debug -s app.ts --raw +cargo run -p plotnik-cli -- tree app.ts +cargo run -p plotnik-cli -- tree app.ts --raw +cargo run -p plotnik-cli -- tree app.ts --spans ``` -Options: `--only-symbols`, `--cst`, `--raw`, `--spans`, `--arities`, `--types`, `--bytecode` +## check -## types +Validate a query (silent on success, like `cargo check`). -Generate TypeScript type definitions from a query. Requires `-l/--lang` to validate node types against grammar. +```sh +cargo run -p plotnik-cli -- check query.ptk -l typescript +cargo run -p plotnik-cli -- check queries.ts/ # workspace with lang inference +cargo run -p plotnik-cli -- check -q '(identifier) @id' -l javascript +``` + +## dump + +Show compiled bytecode. + +```sh +cargo run -p plotnik-cli -- dump query.ptk # unlinked +cargo run -p plotnik-cli -- dump query.ptk -l typescript # linked +cargo run -p plotnik-cli -- dump -q '(identifier) @id' +``` + +## infer + +Generate TypeScript type definitions from a query. ```sh -cargo run -p plotnik-cli -- types -q 'Test = (identifier) @id' -l javascript -cargo run -p plotnik-cli -- types --query-file query.ptk -l typescript -o types.d.ts +cargo run -p plotnik-cli -- infer query.ptk -l javascript +cargo run -p plotnik-cli -- infer queries.ts/ -o types.d.ts +cargo run -p plotnik-cli -- infer -q '(identifier) @id' -l typescript ``` -Options: `--root-type `, `--verbose-nodes`, `--no-node-type`, `--no-export`, `-o ` +Options: `--verbose-nodes`, `--no-node-type`, `--no-export`, `-o ` ## langs diff --git a/crates/plotnik-cli/src/cli.rs b/crates/plotnik-cli/src/cli.rs index ee3a8062..edf6169a 100644 --- a/crates/plotnik-cli/src/cli.rs +++ b/crates/plotnik-cli/src/cli.rs @@ -30,97 +30,157 @@ pub struct Cli { #[derive(Subcommand)] pub enum Command { - /// Debug and inspect queries and source files + /// Explore a source file's tree-sitter AST #[command(after_help = r#"EXAMPLES: - plotnik debug -q 'Q = (identifier) @id' - plotnik debug -q 'Q = (identifier) @id' --only-symbols - plotnik debug -q 'Q = (identifier) @id' --bytecode - plotnik debug -s app.ts - plotnik debug -s app.ts --raw - plotnik debug -q 'Q = (function_declaration) @fn' -s app.ts -l typescript"#)] - Debug { - #[command(flatten)] - query: QueryArgs, + plotnik tree app.ts + plotnik tree app.ts --raw + plotnik tree app.ts --spans"#)] + Tree { + /// Source file to parse (use "-" for stdin) + #[arg(value_name = "SOURCE")] + source: PathBuf, + + /// Language (inferred from extension if not specified) + #[arg(short = 'l', long, value_name = "LANG")] + lang: Option, - #[command(flatten)] - source: SourceArgs, + /// Include anonymous nodes (literals, punctuation) + #[arg(long)] + raw: bool, - /// Language for source (required for inline text, inferred from extension otherwise) - #[arg(long, short = 'l', value_name = "LANG")] + /// Show source positions + #[arg(long)] + spans: bool, + }, + + /// Validate a query + #[command(after_help = r#"EXAMPLES: + plotnik check query.ptk + plotnik check query.ptk -l typescript + plotnik check queries.ts/ + plotnik check -q '(identifier) @id' -l javascript"#)] + Check { + /// Query file or workspace directory + #[arg(value_name = "QUERY")] + query_path: Option, + + /// Inline query text + #[arg(short = 'q', long = "query", value_name = "TEXT")] + query_text: Option, + + /// Language for grammar validation (inferred from workspace name if possible) + #[arg(short = 'l', long, value_name = "LANG")] lang: Option, + /// Treat warnings as errors + #[arg(long)] + strict: bool, + #[command(flatten)] output: OutputArgs, }, - /// List supported languages - Langs, - - /// Execute a query against source code and output JSON + /// Show compiled bytecode #[command(after_help = r#"EXAMPLES: - plotnik exec -q 'Q = (identifier) @id' -s app.js - plotnik exec -q 'Q = (identifier) @id' -s app.js --pretty - plotnik exec -q 'Q = (function_declaration) @fn' -s app.ts -l typescript --verbose-nodes - plotnik exec -q 'Q = (identifier) @id' -s app.js --check - plotnik exec --query-file query.ptk -s app.js --entry FunctionDef"#)] - Exec { - #[command(flatten)] - query: QueryArgs, - - #[command(flatten)] - source: SourceArgs, - - /// Language for source (required for inline text, inferred from extension otherwise) - #[arg(long, short = 'l', value_name = "LANG")] + plotnik dump query.ptk + plotnik dump query.ptk -l typescript + plotnik dump -q '(identifier) @id'"#)] + Dump { + /// Query file or workspace directory + #[arg(value_name = "QUERY")] + query_path: Option, + + /// Inline query text + #[arg(short = 'q', long = "query", value_name = "TEXT")] + query_text: Option, + + /// Language for linking (inferred from workspace name if possible) + #[arg(short = 'l', long, value_name = "LANG")] lang: Option, #[command(flatten)] - output: ExecOutputArgs, + output: OutputArgs, }, /// Generate type definitions from a query #[command(after_help = r#"EXAMPLES: - plotnik types -q 'Q = (identifier) @id' -l javascript - plotnik types --query-file query.ptk -l typescript - plotnik types -q 'Q = (function_declaration) @fn' -l js --format ts - plotnik types -q 'Q = (identifier) @id' -l js --verbose-nodes - plotnik types -q 'Q = (identifier) @id' -l js -o types.d.ts + plotnik infer query.ptk -l javascript + plotnik infer queries.ts/ -o types.d.ts + plotnik infer -q '(function_declaration) @fn' -l typescript + plotnik infer query.ptk -l js --verbose-nodes NOTE: Use --verbose-nodes to match `exec --verbose-nodes` output shape."#)] - Types { - #[command(flatten)] - query: QueryArgs, + Infer { + /// Query file or workspace directory + #[arg(value_name = "QUERY")] + query_path: Option, + + /// Inline query text + #[arg(short = 'q', long = "query", value_name = "TEXT")] + query_text: Option, - /// Target language (required) - #[arg(long, short = 'l', value_name = "LANG")] + /// Target language (required, or inferred from workspace name) + #[arg(short = 'l', long, value_name = "LANG")] lang: Option, #[command(flatten)] - output: TypesOutputArgs, + infer_output: InferOutputArgs, + + #[command(flatten)] + output: OutputArgs, }, -} -#[derive(Args)] -pub struct ExecOutputArgs { - /// Pretty-print JSON output - #[arg(long)] - pub pretty: bool, + /// Execute a query against source code and output JSON + #[command(after_help = r#"EXAMPLES: + plotnik exec query.ptk app.js + plotnik exec -q '(identifier) @id' -s app.js + plotnik exec query.ptk app.ts --pretty + plotnik exec query.ptk app.ts --verbose-nodes"#)] + Exec { + /// Query file or workspace directory + #[arg(value_name = "QUERY")] + query_path: Option, - /// Include verbose node information (line/column positions) - #[arg(long)] - pub verbose_nodes: bool, + /// Source file to execute against + #[arg(value_name = "SOURCE")] + source_path: Option, - /// Validate output against inferred types - #[arg(long)] - pub check: bool, + /// Inline query text + #[arg(short = 'q', long = "query", value_name = "TEXT")] + query_text: Option, - /// Entry point name (definition to match from) - #[arg(long, value_name = "NAME")] - pub entry: Option, + /// Source code as inline text + #[arg(long = "source", value_name = "TEXT")] + source_text: Option, + + /// Source file (alternative to positional) + #[arg(short = 's', long = "source-file", value_name = "FILE")] + source_file: Option, + + /// Language (inferred from source extension if not specified) + #[arg(short = 'l', long, value_name = "LANG")] + lang: Option, + + #[command(flatten)] + exec_output: ExecOutputArgs, + + #[command(flatten)] + output: OutputArgs, + }, + + /// List supported languages + Langs, +} + +#[derive(Args)] +pub struct OutputArgs { + /// Colorize output + #[arg(long, default_value = "auto", value_name = "WHEN")] + pub color: ColorChoice, } #[derive(Args)] -pub struct TypesOutputArgs { +pub struct InferOutputArgs { /// Output format (typescript, ts) #[arg(long, default_value = "typescript", value_name = "FORMAT")] pub format: String, @@ -143,68 +203,20 @@ pub struct TypesOutputArgs { } #[derive(Args)] -#[group(id = "query_input", multiple = false)] -pub struct QueryArgs { - /// Query as inline text - #[arg(short = 'q', long = "query", value_name = "QUERY")] - pub query_text: Option, - - /// Query from file (use "-" for stdin) - #[arg(long = "query-file", value_name = "FILE")] - pub query_file: Option, -} - -#[derive(Args)] -#[group(id = "source_input", multiple = false)] -pub struct SourceArgs { - /// Source code as inline text - #[arg(long = "source", value_name = "SOURCE")] - pub source_text: Option, - - /// Source code from file (use "-" for stdin) - #[arg(short = 's', long = "source-file", value_name = "FILE")] - pub source_file: Option, -} - -#[derive(Args)] -pub struct OutputArgs { - /// Colorize output (auto-detected by default) - #[arg(long, default_value = "auto", value_name = "WHEN")] - pub color: ColorChoice, - - /// Show only symbol table (instead of query AST) - #[arg(long = "only-symbols")] - pub symbols: bool, - - /// Show query CST instead of AST (no effect on source) - #[arg(long)] - pub cst: bool, - - /// Include trivia tokens (whitespace, comments) - #[arg(long)] - pub raw: bool, - - /// Show source spans - #[arg(long)] - pub spans: bool, - - /// Show inferred arities - #[arg(long)] - pub arities: bool, - - /// Show compiled graph +pub struct ExecOutputArgs { + /// Pretty-print JSON output #[arg(long)] - pub graph: bool, + pub pretty: bool, - /// Show unoptimized graph (before epsilon elimination) + /// Include verbose node information (line/column positions) #[arg(long)] - pub graph_raw: bool, + pub verbose_nodes: bool, - /// Show inferred types + /// Validate output against inferred types #[arg(long)] - pub types: bool, + pub check: bool, - /// Show bytecode dump - #[arg(long)] - pub bytecode: bool, + /// Entry point name (definition to match from) + #[arg(long, value_name = "NAME")] + pub entry: Option, } diff --git a/crates/plotnik-cli/src/commands/check.rs b/crates/plotnik-cli/src/commands/check.rs new file mode 100644 index 00000000..d4075e89 --- /dev/null +++ b/crates/plotnik-cli/src/commands/check.rs @@ -0,0 +1,83 @@ +use std::path::PathBuf; + +use plotnik_lib::QueryBuilder; + +use super::lang_resolver::{resolve_lang, resolve_lang_required, suggest_language}; +use super::query_loader::load_query_source; + +pub struct CheckArgs { + pub query_path: Option, + pub query_text: Option, + pub lang: Option, + pub strict: bool, + pub color: bool, +} + +pub fn run(args: CheckArgs) { + let source_map = match load_query_source( + args.query_path.as_deref(), + args.query_text.as_deref(), + ) { + Ok(map) => map, + Err(msg) => { + eprintln!("error: {}", msg); + std::process::exit(1); + } + }; + + if source_map.is_empty() { + eprintln!("error: query cannot be empty"); + std::process::exit(1); + } + + // Parse and analyze + let query = match QueryBuilder::new(source_map).parse() { + Ok(parsed) => parsed.analyze(), + Err(e) => { + eprintln!("error: {}", e); + std::process::exit(1); + } + }; + + // Resolve language: explicit flag takes precedence, then infer from workspace + let lang = match &args.lang { + Some(name) => Some(resolve_lang_required(name).unwrap_or_else(|msg| { + eprintln!("error: {}", msg); + if let Some(suggestion) = suggest_language(name) { + eprintln!(); + eprintln!("Did you mean '{}'?", suggestion); + } + eprintln!(); + eprintln!("Run 'plotnik langs' for the full list."); + std::process::exit(1); + })), + None => resolve_lang(None, args.query_path.as_deref()), + }; + + let (is_valid, diagnostics, source_map) = match lang { + Some(lang) => { + let linked = query.link(&lang); + let valid = if args.strict { + !linked.diagnostics().has_errors() && !linked.diagnostics().has_warnings() + } else { + linked.is_valid() + }; + (valid, linked.diagnostics(), linked.source_map().clone()) + } + None => { + let valid = if args.strict { + !query.diagnostics().has_errors() && !query.diagnostics().has_warnings() + } else { + query.is_valid() + }; + (valid, query.diagnostics(), query.source_map().clone()) + } + }; + + if !is_valid { + eprint!("{}", diagnostics.render_colored(&source_map, args.color)); + std::process::exit(1); + } + + // Silent on success (like cargo check) +} diff --git a/crates/plotnik-cli/src/commands/debug/mod.rs b/crates/plotnik-cli/src/commands/debug/mod.rs deleted file mode 100644 index 841bc420..00000000 --- a/crates/plotnik-cli/src/commands/debug/mod.rs +++ /dev/null @@ -1,173 +0,0 @@ -#![allow(dead_code)] -pub mod source; - -use std::fs; -use std::io::{self, Read}; - -use plotnik_lib::Query; - -use source::{dump_source, load_source, parse_tree, resolve_lang}; - -pub struct DebugArgs { - pub query_text: Option, - pub query_file: Option, - pub source_text: Option, - pub source_file: Option, - pub lang: Option, - pub symbols: bool, - pub raw: bool, - pub cst: bool, - pub spans: bool, - pub arities: bool, - pub graph: bool, - pub graph_raw: bool, - pub types: bool, - pub bytecode: bool, - pub color: bool, -} - -pub fn run(args: DebugArgs) { - let has_query_input = args.query_text.is_some() || args.query_file.is_some(); - let has_source_input = args.source_text.is_some() || args.source_file.is_some(); - - if let Err(msg) = validate(&args, has_query_input, has_source_input) { - eprintln!("error: {}", msg); - std::process::exit(1); - } - - let query_source = if has_query_input { - Some(load_query(&args)) - } else { - None - }; - - let query = query_source.as_ref().map(|src| { - Query::try_from(src.as_str()).unwrap_or_else(|e| { - eprintln!("error: {}", e); - std::process::exit(1); - }) - }); - - let show_query = has_query_input && !args.symbols && !args.graph && !args.types && !args.bytecode; - let show_source = has_source_input; - - if show_query && let Some(ref q) = query { - print!( - "{}", - q.printer() - .raw(args.cst || args.raw) - .with_trivia(args.raw) - .with_spans(args.spans) - .with_arities(args.arities) - .dump() - ); - } - - if args.symbols - && let Some(ref q) = query - { - print!( - "{}", - q.printer() - .only_symbols(true) - .with_arities(args.arities) - .dump() - ); - } - - if args.graph || args.graph_raw { - eprintln!("error: --graph and --graph-raw are not yet implemented"); - std::process::exit(1); - } - - if args.types - && let Some(ref q) = query - { - ensure_valid(q, args.color); - let bytecode = q.emit().expect("bytecode emission failed"); - let module = - plotnik_lib::bytecode::Module::from_bytes(bytecode).expect("module loading failed"); - let output = plotnik_lib::bytecode::emit::emit_typescript(&module); - print!("{}", output); - } - - if args.bytecode - && let Some(ref q) = query - { - ensure_valid(q, args.color); - let bytecode = q.emit().expect("bytecode emission failed"); - let module = - plotnik_lib::bytecode::Module::from_bytes(bytecode).expect("module loading failed"); - let output = plotnik_lib::bytecode::dump(&module); - print!("{}", output); - } - - if show_source { - if show_query || args.symbols { - println!(); - } - let resolved_lang = resolve_lang(&args.lang, &args.source_text, &args.source_file); - let source_code = load_source(&args.source_text, &args.source_file); - let tree = parse_tree(&source_code, resolved_lang); - print!("{}", dump_source(&tree, &source_code, args.raw)); - } - - if let Some(ref q) = query { - ensure_valid(q, args.color); - } -} - -/// Ensure query is valid, exiting with diagnostics if not. -fn ensure_valid(q: &Query, color: bool) { - if q.is_valid() { - return; - } - eprint!( - "{}", - q.diagnostics().render_colored(q.source_map(), color) - ); - std::process::exit(1); -} - -fn load_query(args: &DebugArgs) -> String { - if let Some(ref text) = args.query_text { - return text.clone(); - } - if let Some(ref path) = args.query_file { - if path.as_os_str() == "-" { - let mut buf = String::new(); - io::stdin() - .read_to_string(&mut buf) - .expect("failed to read stdin"); - return buf; - } - return fs::read_to_string(path).expect("failed to read query file"); - } - unreachable!() -} - -fn validate(args: &DebugArgs, has_query: bool, has_source: bool) -> Result<(), &'static str> { - if !has_query && !has_source { - return Err( - "specify at least one input: -q/--query, --query-file, -s/--source-file, or --source", - ); - } - - if args.symbols && !has_query { - return Err("--only-symbols requires -q/--query or --query-file"); - } - - if args.source_text.is_some() && args.lang.is_none() { - return Err("--lang is required when using --source"); - } - - Ok(()) -} - -fn resolve_lang_for_link(lang: &Option) -> plotnik_langs::Lang { - let name = lang.as_ref().expect("--lang required for --link"); - plotnik_langs::from_name(name).unwrap_or_else(|| { - eprintln!("error: unknown language: {}", name); - std::process::exit(1); - }) -} diff --git a/crates/plotnik-cli/src/commands/dump.rs b/crates/plotnik-cli/src/commands/dump.rs new file mode 100644 index 00000000..2e8d5784 --- /dev/null +++ b/crates/plotnik-cli/src/commands/dump.rs @@ -0,0 +1,84 @@ +use std::path::PathBuf; + +use plotnik_lib::QueryBuilder; +use plotnik_lib::bytecode::{Module, dump}; + +use super::lang_resolver::{resolve_lang, resolve_lang_required, suggest_language}; +use super::query_loader::load_query_source; + +pub struct DumpArgs { + pub query_path: Option, + pub query_text: Option, + pub lang: Option, + pub color: bool, +} + +pub fn run(args: DumpArgs) { + let source_map = match load_query_source( + args.query_path.as_deref(), + args.query_text.as_deref(), + ) { + Ok(map) => map, + Err(msg) => { + eprintln!("error: {}", msg); + std::process::exit(1); + } + }; + + if source_map.is_empty() { + eprintln!("error: query cannot be empty"); + std::process::exit(1); + } + + // Parse and analyze + let query = match QueryBuilder::new(source_map).parse() { + Ok(parsed) => parsed.analyze(), + Err(e) => { + eprintln!("error: {}", e); + std::process::exit(1); + } + }; + + // Resolve language (optional - enables linking) + let lang = if let Some(lang_name) = &args.lang { + match resolve_lang_required(lang_name) { + Ok(l) => Some(l), + Err(msg) => { + eprintln!("error: {}", msg); + if let Some(suggestion) = suggest_language(lang_name) { + eprintln!(); + eprintln!("Did you mean '{}'?", suggestion); + } + eprintln!(); + eprintln!("Run 'plotnik langs' for the full list."); + std::process::exit(1); + } + } + } else { + resolve_lang(None, args.query_path.as_deref()) + }; + + let bytecode = if let Some(lang) = lang { + let linked = query.link(&lang); + if !linked.is_valid() { + eprint!( + "{}", + linked.diagnostics().render_colored(linked.source_map(), args.color) + ); + std::process::exit(1); + } + linked.emit().expect("bytecode emission failed") + } else { + if !query.is_valid() { + eprint!( + "{}", + query.diagnostics().render_colored(query.source_map(), args.color) + ); + std::process::exit(1); + } + query.emit().expect("bytecode emission failed") + }; + + let module = Module::from_bytes(bytecode).expect("module loading failed"); + print!("{}", dump(&module)); +} diff --git a/crates/plotnik-cli/src/commands/exec.rs b/crates/plotnik-cli/src/commands/exec.rs index d99c8787..5ae02188 100644 --- a/crates/plotnik-cli/src/commands/exec.rs +++ b/crates/plotnik-cli/src/commands/exec.rs @@ -2,20 +2,23 @@ use std::fs; use std::io::{self, Read}; use std::path::PathBuf; -use plotnik_lib::{QueryBuilder, SourceMap}; +use plotnik_langs::Lang; +use plotnik_lib::QueryBuilder; -use super::debug::source::resolve_lang; +use super::lang_resolver::{resolve_lang_required, suggest_language}; +use super::query_loader::load_query_source; pub struct ExecArgs { + pub query_path: Option, pub query_text: Option, - pub query_file: Option, + pub source_path: Option, pub source_text: Option, - pub source_file: Option, pub lang: Option, pub pretty: bool, pub verbose_nodes: bool, pub check: bool, pub entry: Option, + pub color: bool, } pub fn run(args: ExecArgs) { @@ -24,85 +27,117 @@ pub fn run(args: ExecArgs) { std::process::exit(1); } - let query_source = load_query(&args); - if query_source.trim().is_empty() { + let source_map = match load_query_source( + args.query_path.as_deref(), + args.query_text.as_deref(), + ) { + Ok(map) => map, + Err(msg) => { + eprintln!("error: {}", msg); + std::process::exit(1); + } + }; + + if source_map.is_empty() { eprintln!("error: query cannot be empty"); std::process::exit(1); } + let _source_code = load_source(&args); - let lang = resolve_lang(&args.lang, &args.source_text, &args.source_file); + let lang = resolve_source_lang(&args); - // Parse query - let query_parsed = QueryBuilder::new(SourceMap::one_liner(&query_source)) - .parse() - .unwrap_or_else(|e| { + // Parse and analyze query + let query = match QueryBuilder::new(source_map).parse() { + Ok(parsed) => parsed.analyze().link(&lang), + Err(e) => { eprintln!("error: {}", e); std::process::exit(1); - }); - - // Analyze query - let query_analyzed = query_parsed.analyze(); + } + }; - // Link query against language - let linked = query_analyzed.link(&lang); - if !linked.is_valid() { - eprint!("{}", linked.diagnostics().render(linked.source_map())); + if !query.is_valid() { + eprint!( + "{}", + query.diagnostics().render_colored(query.source_map(), args.color) + ); std::process::exit(1); } let _ = (args.pretty, args.verbose_nodes, args.check, args.entry); - eprintln!("error: query execution not yet implemented"); - eprintln!("hint: use `plotnik types` to generate TypeScript types from queries"); - std::process::exit(1); + eprintln!("The 'exec' command is under development."); + eprintln!(); + eprintln!("For now, use 'plotnik infer' to generate TypeScript types."); + std::process::exit(0); } -fn load_query(args: &ExecArgs) -> String { - if let Some(ref text) = args.query_text { +fn load_source(args: &ExecArgs) -> String { + if let Some(ref text) = args.source_text { return text.clone(); } - if let Some(ref path) = args.query_file { + if let Some(ref path) = args.source_path { if path.as_os_str() == "-" { + // Check if query is also from stdin + if args.query_path.as_ref().map(|p| p.as_os_str() == "-").unwrap_or(false) { + eprintln!("error: query and source cannot both be from stdin"); + std::process::exit(1); + } let mut buf = String::new(); io::stdin() .read_to_string(&mut buf) .expect("failed to read stdin"); return buf; } - return fs::read_to_string(path).unwrap_or_else(|_| { - eprintln!("error: query file not found: {}", path.display()); + return fs::read_to_string(path).unwrap_or_else(|e| { + eprintln!("error: failed to read '{}': {}", path.display(), e); std::process::exit(1); }); } - unreachable!("validation ensures query input exists") + unreachable!("validation ensures source input exists") } -fn load_source(args: &ExecArgs) -> String { - if let Some(ref text) = args.source_text { - return text.clone(); - } - if let Some(ref path) = args.source_file { - if path.as_os_str() == "-" { - panic!("cannot read both query and source from stdin"); - } - return fs::read_to_string(path).unwrap_or_else(|_| { - eprintln!("error: file not found: {}", path.display()); +fn resolve_source_lang(args: &ExecArgs) -> Lang { + if let Some(ref name) = args.lang { + return resolve_lang_required(name).unwrap_or_else(|msg| { + eprintln!("error: {}", msg); + if let Some(suggestion) = suggest_language(name) { + eprintln!(); + eprintln!("Did you mean '{}'?", suggestion); + } + eprintln!(); + eprintln!("Run 'plotnik langs' for the full list."); std::process::exit(1); }); } - unreachable!("validation ensures source input exists") + + if let Some(ref path) = args.source_path + && path.as_os_str() != "-" + && let Some(ext) = path.extension().and_then(|e| e.to_str()) + { + if let Some(lang) = plotnik_langs::from_ext(ext) { + return lang; + } + eprintln!( + "error: cannot infer language from extension '.{}', use --lang", + ext + ); + std::process::exit(1); + } + + eprintln!("error: --lang is required (cannot infer from input)"); + std::process::exit(1); } fn validate(args: &ExecArgs) -> Result<(), &'static str> { - let has_query = args.query_text.is_some() || args.query_file.is_some(); - let has_source = args.source_text.is_some() || args.source_file.is_some(); + let has_query = args.query_text.is_some() || args.query_path.is_some(); + let has_source = args.source_text.is_some() || args.source_path.is_some(); if !has_query { - return Err("query is required: use -q/--query or --query-file"); + return Err("query is required: use positional argument, -q/--query, or --query-file"); } if !has_source { - return Err("source is required: use -s/--source-file or --source"); + return Err("source is required: use positional argument, -s/--source-file, or --source"); } if args.source_text.is_some() && args.lang.is_none() { diff --git a/crates/plotnik-cli/src/commands/infer.rs b/crates/plotnik-cli/src/commands/infer.rs new file mode 100644 index 00000000..8cfe02cc --- /dev/null +++ b/crates/plotnik-cli/src/commands/infer.rs @@ -0,0 +1,124 @@ +use std::fs; +use std::io::{self, Write}; +use std::path::PathBuf; + +use plotnik_lib::QueryBuilder; +use plotnik_lib::bytecode::Module; +use plotnik_lib::codegen::typescript; + +use super::lang_resolver::{resolve_lang, resolve_lang_required, suggest_language}; +use super::query_loader::load_query_source; + +pub struct InferArgs { + pub query_path: Option, + pub query_text: Option, + pub lang: Option, + pub format: String, + pub verbose_nodes: bool, + pub no_node_type: bool, + pub export: bool, + pub output: Option, + pub color: bool, +} + +pub fn run(args: InferArgs) { + // Validate format + let fmt = args.format.to_lowercase(); + if fmt != "typescript" && fmt != "ts" { + eprintln!("error: --format must be 'typescript' or 'ts'"); + std::process::exit(1); + } + + let source_map = match load_query_source( + args.query_path.as_deref(), + args.query_text.as_deref(), + ) { + Ok(map) => map, + Err(msg) => { + eprintln!("error: {}", msg); + std::process::exit(1); + } + }; + + if source_map.is_empty() { + eprintln!("error: query cannot be empty"); + std::process::exit(1); + } + + // Resolve language (required for infer) + let lang = args + .lang + .as_deref() + .map(|name| { + resolve_lang_required(name).unwrap_or_else(|msg| { + eprintln!("error: {}", msg); + if let Some(suggestion) = suggest_language(name) { + eprintln!(); + eprintln!("Did you mean '{}'?", suggestion); + } + eprintln!(); + eprintln!("Run 'plotnik langs' for the full list."); + std::process::exit(1); + }) + }) + .or_else(|| resolve_lang(None, args.query_path.as_deref())); + + let Some(lang) = lang else { + eprintln!("error: --lang is required for type generation"); + std::process::exit(1); + }; + + // Parse, analyze, and link + let query = match QueryBuilder::new(source_map).parse() { + Ok(parsed) => parsed.analyze().link(&lang), + Err(e) => { + eprintln!("error: {}", e); + std::process::exit(1); + } + }; + + if !query.is_valid() { + eprint!( + "{}", + query.diagnostics().render_colored(query.source_map(), args.color) + ); + std::process::exit(1); + } + + // Emit to bytecode + let bytecode = query.emit().expect("bytecode emission failed"); + let module = Module::from_bytes(bytecode).expect("module loading failed"); + + // Emit TypeScript types + let config = typescript::Config { + export: args.export, + emit_node_type: !args.no_node_type, + verbose_nodes: args.verbose_nodes, + }; + let output = typescript::emit_with_config(&module, config); + + // Write output + if let Some(ref path) = args.output { + fs::write(path, &output).unwrap_or_else(|e| { + eprintln!("error: failed to write '{}': {}", path.display(), e); + std::process::exit(1); + }); + // Success message + let type_count = count_types(&output); + eprintln!("Wrote {} types to {}", type_count, path.display()); + } else { + io::stdout().write_all(output.as_bytes()).unwrap(); + } +} + +fn count_types(output: &str) -> usize { + output + .lines() + .filter(|line| { + line.starts_with("export type ") + || line.starts_with("type ") + || line.starts_with("export interface ") + || line.starts_with("interface ") + }) + .count() +} diff --git a/crates/plotnik-cli/src/commands/lang_resolver.rs b/crates/plotnik-cli/src/commands/lang_resolver.rs new file mode 100644 index 00000000..59362eca --- /dev/null +++ b/crates/plotnik-cli/src/commands/lang_resolver.rs @@ -0,0 +1,70 @@ +use std::path::Path; + +use plotnik_langs::Lang; + +/// Resolve language from explicit flag or infer from workspace directory name. +/// +/// Directory inference: `queries.ts/` → typescript, `queries.javascript/` → javascript +pub fn resolve_lang(explicit: Option<&str>, query_path: Option<&Path>) -> Option { + // Explicit flag takes precedence + if let Some(name) = explicit { + return plotnik_langs::from_name(name); + } + + // Infer from directory name extension: "queries.ts" → "ts" + if let Some(path) = query_path + && path.is_dir() + && let Some(name) = path.file_name().and_then(|n| n.to_str()) + && let Some((_, ext)) = name.rsplit_once('.') + { + return plotnik_langs::from_ext(ext); + } + + None +} + +/// Resolve language, returning an error message if unknown. +pub fn resolve_lang_required(lang_name: &str) -> Result { + plotnik_langs::from_name(lang_name) + .ok_or_else(|| format!("unknown language: '{}'", lang_name)) +} + +/// Suggest similar language names for typos. +pub fn suggest_language(input: &str) -> Option { + let input_lower = input.to_lowercase(); + plotnik_langs::all() + .into_iter() + .filter(|lang| levenshtein(lang.name(), &input_lower) <= 2) + .min_by_key(|lang| levenshtein(lang.name(), &input_lower)) + .map(|lang| lang.name().to_string()) +} + +fn levenshtein(a: &str, b: &str) -> usize { + let a_chars: Vec = a.chars().collect(); + let b_chars: Vec = b.chars().collect(); + let m = a_chars.len(); + let n = b_chars.len(); + + if m == 0 { + return n; + } + if n == 0 { + return m; + } + + let mut prev = (0..=n).collect::>(); + let mut curr = vec![0; n + 1]; + + for i in 1..=m { + curr[0] = i; + for j in 1..=n { + let cost = usize::from(a_chars[i - 1] != b_chars[j - 1]); + curr[j] = (prev[j] + 1) + .min(curr[j - 1] + 1) + .min(prev[j - 1] + cost); + } + std::mem::swap(&mut prev, &mut curr); + } + + prev[n] +} diff --git a/crates/plotnik-cli/src/commands/mod.rs b/crates/plotnik-cli/src/commands/mod.rs index f5e59a46..7c199d0d 100644 --- a/crates/plotnik-cli/src/commands/mod.rs +++ b/crates/plotnik-cli/src/commands/mod.rs @@ -1,7 +1,11 @@ -pub mod debug; +pub mod check; +pub mod dump; pub mod exec; +pub mod infer; +pub mod lang_resolver; pub mod langs; -pub mod types; +pub mod query_loader; +pub mod tree; #[cfg(test)] mod langs_tests; diff --git a/crates/plotnik-cli/src/commands/query_loader.rs b/crates/plotnik-cli/src/commands/query_loader.rs new file mode 100644 index 00000000..10df616b --- /dev/null +++ b/crates/plotnik-cli/src/commands/query_loader.rs @@ -0,0 +1,79 @@ +use std::fs; +use std::io::{self, Read}; +use std::path::Path; + +use plotnik_lib::SourceMap; + +pub fn load_query_source( + query_path: Option<&Path>, + query_text: Option<&str>, +) -> Result { + if let Some(text) = query_text { + let mut map = SourceMap::new(); + map.add_one_liner(text); + return Ok(map); + } + + if let Some(path) = query_path { + if path.as_os_str() == "-" { + return load_stdin(); + } + if path.is_dir() { + return load_workspace(path); + } + return load_file(path); + } + + Err("query is required: use positional argument, -q/--query, or --query-file".to_string()) +} + +fn load_stdin() -> Result { + let mut buf = String::new(); + io::stdin() + .read_to_string(&mut buf) + .map_err(|e| format!("failed to read stdin: {}", e))?; + let mut map = SourceMap::new(); + map.add_stdin(&buf); + Ok(map) +} + +fn load_file(path: &Path) -> Result { + let content = fs::read_to_string(path) + .map_err(|e| format!("failed to read '{}': {}", path.display(), e))?; + let mut map = SourceMap::new(); + map.add_file(&path.to_string_lossy(), &content); + Ok(map) +} + +fn load_workspace(dir: &Path) -> Result { + let mut map = SourceMap::new(); + let mut entries: Vec<_> = fs::read_dir(dir) + .map_err(|e| format!("failed to read directory '{}': {}", dir.display(), e))? + .filter_map(|e| e.ok()) + .filter(|e| { + e.path() + .extension() + .map(|ext| ext == "ptk") + .unwrap_or(false) + }) + .collect(); + + if entries.is_empty() { + return Err(format!( + "no .ptk files found in workspace '{}'", + dir.display() + )); + } + + // Sort for deterministic ordering + entries.sort_by_key(|e| e.path()); + + for entry in entries { + let path = entry.path(); + let content = fs::read_to_string(&path) + .map_err(|e| format!("failed to read '{}': {}", path.display(), e))?; + map.add_file(&path.to_string_lossy(), &content); + } + + Ok(map) +} diff --git a/crates/plotnik-cli/src/commands/debug/source.rs b/crates/plotnik-cli/src/commands/tree.rs similarity index 59% rename from crates/plotnik-cli/src/commands/debug/source.rs rename to crates/plotnik-cli/src/commands/tree.rs index 5adea924..c646dc90 100644 --- a/crates/plotnik-cli/src/commands/debug/source.rs +++ b/crates/plotnik-cli/src/commands/tree.rs @@ -1,35 +1,39 @@ use std::fs; use std::io::{self, Read}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use arborium_tree_sitter as tree_sitter; use plotnik_langs::Lang; -pub fn load_source(text: &Option, file: &Option) -> String { - if let Some(t) = text { - return t.clone(); - } - if let Some(path) = file { - if path.as_os_str() == "-" { - let mut buf = String::new(); - io::stdin() - .read_to_string(&mut buf) - .expect("failed to read stdin"); - return buf; - } - return fs::read_to_string(path).unwrap_or_else(|_| { - eprintln!("error: file not found: {}", path.display()); - std::process::exit(1); - }); +pub struct TreeArgs { + pub source_path: PathBuf, + pub lang: Option, + pub raw: bool, + pub spans: bool, +} + +pub fn run(args: TreeArgs) { + let lang = resolve_lang(&args.lang, &args.source_path); + let source = load_source(&args.source_path); + let tree = lang.parse(&source); + print!("{}", dump_tree(&tree, &source, args.raw, args.spans)); +} + +fn load_source(path: &PathBuf) -> String { + if path.as_os_str() == "-" { + let mut buf = String::new(); + io::stdin() + .read_to_string(&mut buf) + .expect("failed to read stdin"); + return buf; } - unreachable!() + fs::read_to_string(path).unwrap_or_else(|_| { + eprintln!("error: file not found: {}", path.display()); + std::process::exit(1); + }) } -pub fn resolve_lang( - lang: &Option, - _source_text: &Option, - source_file: &Option, -) -> Lang { +fn resolve_lang(lang: &Option, source_path: &Path) -> Lang { if let Some(name) = lang { return plotnik_langs::from_name(name).unwrap_or_else(|| { eprintln!("error: unknown language: {}", name); @@ -37,9 +41,8 @@ pub fn resolve_lang( }); } - if let Some(path) = source_file - && path.as_os_str() != "-" - && let Some(ext) = path.extension().and_then(|e| e.to_str()) + if source_path.as_os_str() != "-" + && let Some(ext) = source_path.extension().and_then(|e| e.to_str()) { return plotnik_langs::from_ext(ext).unwrap_or_else(|| { eprintln!( @@ -50,16 +53,12 @@ pub fn resolve_lang( }); } - eprintln!("error: --lang is required (cannot infer from input)"); + eprintln!("error: --lang is required (cannot infer from stdin)"); std::process::exit(1); } -pub fn parse_tree(source: &str, lang: Lang) -> tree_sitter::Tree { - lang.parse(source) -} - -pub fn dump_source(tree: &tree_sitter::Tree, source: &str, include_anonymous: bool) -> String { - format_node(tree.root_node(), source, 0, include_anonymous) + "\n" +fn dump_tree(tree: &tree_sitter::Tree, source: &str, raw: bool, spans: bool) -> String { + format_node(tree.root_node(), source, 0, raw, spans) + "\n" } fn format_node( @@ -67,8 +66,9 @@ fn format_node( source: &str, depth: usize, include_anonymous: bool, + show_spans: bool, ) -> String { - format_node_with_field(node, None, source, depth, include_anonymous) + format_node_with_field(node, None, source, depth, include_anonymous, show_spans) } fn format_node_with_field( @@ -77,6 +77,7 @@ fn format_node_with_field( source: &str, depth: usize, include_anonymous: bool, + show_spans: bool, ) -> String { if !include_anonymous && !node.is_named() { return String::new(); @@ -85,6 +86,13 @@ fn format_node_with_field( let indent = " ".repeat(depth); let kind = node.kind(); let field_prefix = field_name.map(|f| format!("{}: ", f)).unwrap_or_default(); + let span_suffix = if show_spans { + let start = node.start_position(); + let end = node.end_position(); + format!(" [{}:{}-{}:{}]", start.row, start.column, end.row, end.column) + } else { + String::new() + }; let children: Vec<_> = { let mut cursor = node.walk(); @@ -108,19 +116,26 @@ fn format_node_with_field( .utf8_text(source.as_bytes()) .unwrap_or(""); return if text == kind { - format!("{}{}(\"{}\")", indent, field_prefix, escape_string(kind)) + format!( + "{}{}(\"{}\"){}", + indent, + field_prefix, + escape_string(kind), + span_suffix + ) } else { format!( - "{}{}({} \"{}\")", + "{}{}({} \"{}\"){}", indent, field_prefix, kind, - escape_string(text) + escape_string(text), + span_suffix ) }; } - let mut out = format!("{}{}({}", indent, field_prefix, kind); + let mut out = format!("{}{}({}{}", indent, field_prefix, kind, span_suffix); for (child, child_field) in children { out.push('\n'); out.push_str(&format_node_with_field( @@ -129,6 +144,7 @@ fn format_node_with_field( source, depth + 1, include_anonymous, + show_spans, )); } out.push(')'); diff --git a/crates/plotnik-cli/src/commands/types.rs b/crates/plotnik-cli/src/commands/types.rs deleted file mode 100644 index 6c2ef68c..00000000 --- a/crates/plotnik-cli/src/commands/types.rs +++ /dev/null @@ -1,116 +0,0 @@ -use std::fs; -use std::io::{self, Read, Write}; -use std::path::PathBuf; - -use plotnik_langs::Lang; -use plotnik_lib::Query; -use plotnik_lib::bytecode::emit::{TsEmitConfig, emit_typescript_with_config}; - -pub struct TypesArgs { - pub query_text: Option, - pub query_file: Option, - pub lang: Option, - pub format: String, - pub verbose_nodes: bool, - pub no_node_type: bool, - pub export: bool, - pub output: Option, -} - -pub fn run(args: TypesArgs) { - if let Err(msg) = validate(&args) { - eprintln!("error: {}", msg); - std::process::exit(1); - } - - let query_source = load_query(&args); - if query_source.trim().is_empty() { - eprintln!("error: query cannot be empty"); - std::process::exit(1); - } - let lang = resolve_lang_required(&args.lang); - - // Parse and analyze query - let query = Query::try_from(query_source.as_str()) - .unwrap_or_else(|e| { - eprintln!("error: {}", e); - std::process::exit(1); - }) - .link(&lang); - - if !query.is_valid() { - eprint!("{}", query.diagnostics().render(query.source_map())); - std::process::exit(1); - } - - // Emit to bytecode first - let bytecode = query.emit().expect("bytecode emission failed"); - let module = - plotnik_lib::bytecode::Module::from_bytes(bytecode).expect("module loading failed"); - - // Emit TypeScript types from bytecode - let config = TsEmitConfig { - export: args.export, - emit_node_type: !args.no_node_type, - verbose_nodes: args.verbose_nodes, - }; - - let output = emit_typescript_with_config(&module, config); - - // Write output - if let Some(ref path) = args.output { - fs::write(path, &output).unwrap_or_else(|e| { - eprintln!("error: failed to write {}: {}", path.display(), e); - std::process::exit(1); - }); - } else { - io::stdout().write_all(output.as_bytes()).unwrap(); - } -} - -fn load_query(args: &TypesArgs) -> String { - if let Some(ref text) = args.query_text { - return text.clone(); - } - if let Some(ref path) = args.query_file { - if path.as_os_str() == "-" { - let mut buf = String::new(); - io::stdin() - .read_to_string(&mut buf) - .expect("failed to read stdin"); - return buf; - } - return fs::read_to_string(path).unwrap_or_else(|e| { - eprintln!("error: failed to read query file: {}", e); - std::process::exit(1); - }); - } - unreachable!("validation ensures query input exists") -} - -fn resolve_lang_required(lang: &Option) -> Lang { - let name = lang.as_ref().expect("--lang is required"); - plotnik_langs::from_name(name).unwrap_or_else(|| { - eprintln!("error: unknown language: {}", name); - std::process::exit(1); - }) -} - -fn validate(args: &TypesArgs) -> Result<(), &'static str> { - let has_query = args.query_text.is_some() || args.query_file.is_some(); - - if !has_query { - return Err("query is required: use -q/--query or --query-file"); - } - - if args.lang.is_none() { - return Err("--lang is required for type generation"); - } - - let fmt = args.format.to_lowercase(); - if fmt != "typescript" && fmt != "ts" { - return Err("--format must be 'typescript' or 'ts'"); - } - - Ok(()) -} diff --git a/crates/plotnik-cli/src/main.rs b/crates/plotnik-cli/src/main.rs index 9882c875..31ef8fe7 100644 --- a/crates/plotnik-cli/src/main.rs +++ b/crates/plotnik-cli/src/main.rs @@ -2,74 +2,103 @@ mod cli; mod commands; use cli::{Cli, Command}; -use commands::debug::DebugArgs; +use commands::check::CheckArgs; +use commands::dump::DumpArgs; use commands::exec::ExecArgs; -use commands::types::TypesArgs; +use commands::infer::InferArgs; +use commands::tree::TreeArgs; fn main() { let cli = ::parse(); match cli.command { - Command::Debug { - query, + Command::Tree { source, lang, + raw, + spans, + } => { + commands::tree::run(TreeArgs { + source_path: source, + lang, + raw, + spans, + }); + } + Command::Check { + query_path, + query_text, + lang, + strict, output, } => { - commands::debug::run(DebugArgs { - query_text: query.query_text, - query_file: query.query_file, - source_text: source.source_text, - source_file: source.source_file, + commands::check::run(CheckArgs { + query_path, + query_text, lang, - symbols: output.symbols, - raw: output.raw, - cst: output.cst, - spans: output.spans, - arities: output.arities, - graph: output.graph, - graph_raw: output.graph_raw, - types: output.types, - bytecode: output.bytecode, + strict, color: output.color.should_colorize(), }); } - Command::Langs => { - commands::langs::run(); + Command::Dump { + query_path, + query_text, + lang, + output, + } => { + commands::dump::run(DumpArgs { + query_path, + query_text, + lang, + color: output.color.should_colorize(), + }); } - Command::Exec { - query, - source, + Command::Infer { + query_path, + query_text, lang, + infer_output, output, } => { - commands::exec::run(ExecArgs { - query_text: query.query_text, - query_file: query.query_file, - source_text: source.source_text, - source_file: source.source_file, + commands::infer::run(InferArgs { + query_path, + query_text, lang, - entry: output.entry, - pretty: output.pretty, - verbose_nodes: output.verbose_nodes, - check: output.check, + format: infer_output.format, + verbose_nodes: infer_output.verbose_nodes, + no_node_type: infer_output.no_node_type, + export: !infer_output.no_export, + output: infer_output.output, + color: output.color.should_colorize(), }); } - Command::Types { - query, + Command::Exec { + query_path, + source_path, + query_text, + source_text, + source_file, lang, + exec_output, output, } => { - commands::types::run(TypesArgs { - query_text: query.query_text, - query_file: query.query_file, + // Merge source_path and source_file (positional takes precedence) + let resolved_source = source_path.or(source_file); + commands::exec::run(ExecArgs { + query_path, + query_text, + source_path: resolved_source, + source_text, lang, - format: output.format, - verbose_nodes: output.verbose_nodes, - no_node_type: output.no_node_type, - export: !output.no_export, - output: output.output, + pretty: exec_output.pretty, + verbose_nodes: exec_output.verbose_nodes, + check: exec_output.check, + entry: exec_output.entry, + color: output.color.should_colorize(), }); } + Command::Langs => { + commands::langs::run(); + } } } diff --git a/crates/plotnik-lib/src/bytecode/emit/mod.rs b/crates/plotnik-lib/src/bytecode/emit/mod.rs index 3699f14d..ec604807 100644 --- a/crates/plotnik-lib/src/bytecode/emit/mod.rs +++ b/crates/plotnik-lib/src/bytecode/emit/mod.rs @@ -1,10 +1,10 @@ //! Code generation from bytecode Module. //! -//! This module provides emitters for generating code from compiled bytecode. -//! Currently supports TypeScript, with Rust planned. +//! This module re-exports from [`crate::codegen`] for backwards compatibility. +//! New code should use [`crate::codegen::typescript`] directly. -mod typescript; - -pub use typescript::{ - EmitConfig as TsEmitConfig, TsEmitter, emit_typescript, emit_typescript_with_config, +// Re-export from codegen module for backwards compatibility +pub use crate::codegen::typescript::{ + Config as TsEmitConfig, Emitter as TsEmitter, emit as emit_typescript, + emit_with_config as emit_typescript_with_config, }; diff --git a/crates/plotnik-lib/src/codegen/mod.rs b/crates/plotnik-lib/src/codegen/mod.rs new file mode 100644 index 00000000..cebcc363 --- /dev/null +++ b/crates/plotnik-lib/src/codegen/mod.rs @@ -0,0 +1,21 @@ +//! Code generation from bytecode Module. +//! +//! This module provides emitters for generating code from compiled bytecode. +//! Each target language has its own submodule with a `Config` struct and `emit()` function. +//! +//! # Example +//! +//! ```ignore +//! use plotnik_lib::codegen::typescript; +//! use plotnik_lib::bytecode::Module; +//! +//! let module = Module::from_bytes(bytecode)?; +//! let config = typescript::Config { +//! export: true, +//! emit_node_type: true, +//! verbose_nodes: false, +//! }; +//! let output = typescript::emit_with_config(&module, config); +//! ``` + +pub mod typescript; diff --git a/crates/plotnik-lib/src/codegen/typescript.rs b/crates/plotnik-lib/src/codegen/typescript.rs new file mode 100644 index 00000000..c8c228a8 --- /dev/null +++ b/crates/plotnik-lib/src/codegen/typescript.rs @@ -0,0 +1,783 @@ +//! TypeScript type emitter from bytecode Module. +//! +//! Converts compiled bytecode back to TypeScript declarations. +//! Used as a test oracle and for generating types from .ptkq files. + +use std::collections::hash_map::Entry; +use std::collections::{BTreeSet, HashMap, HashSet}; + +use plotnik_core::utils::to_pascal_case; + +use crate::bytecode::{ + EntrypointsView, Module, QTypeId, StringsView, TypeDef, TypeKind, TypesView, +}; + +/// Configuration for TypeScript emission. +#[derive(Clone, Debug)] +pub struct Config { + /// Whether to export types + pub export: bool, + /// Whether to emit the Node type definition + pub emit_node_type: bool, + /// Use verbose node representation (with kind, text, etc.) + pub verbose_nodes: bool, +} + +impl Default for Config { + fn default() -> Self { + Self { + export: true, + emit_node_type: true, + verbose_nodes: false, + } + } +} + +/// TypeScript emitter from bytecode module. +pub struct Emitter<'a> { + types: TypesView<'a>, + strings: StringsView<'a>, + entrypoints: EntrypointsView<'a>, + config: Config, + + /// TypeId -> assigned name mapping + type_names: HashMap, + /// Names already used (for collision avoidance) + used_names: BTreeSet, + /// Track which builtin types are referenced + node_referenced: bool, + /// Track which types have been emitted + emitted: HashSet, + /// Types visited during builtin reference collection (cycle detection) + refs_visited: HashSet, + /// Output buffer + output: String, +} + +impl<'a> Emitter<'a> { + pub fn new(module: &'a Module, config: Config) -> Self { + Self { + types: module.types(), + strings: module.strings(), + entrypoints: module.entrypoints(), + config, + type_names: HashMap::new(), + used_names: BTreeSet::new(), + node_referenced: false, + emitted: HashSet::new(), + refs_visited: HashSet::new(), + output: String::new(), + } + } + + /// Emit TypeScript for all entrypoint types. + pub fn emit(mut self) -> String { + self.prepare_emission(); + + // Collect all entrypoints and their result types + let mut primary_names: HashMap = HashMap::new(); + let mut aliases: Vec<(String, QTypeId)> = Vec::new(); + + for i in 0..self.entrypoints.len() { + let ep = self.entrypoints.get(i); + let name = self.strings.get(ep.name).to_string(); + let type_id = ep.result_type; + + match primary_names.entry(type_id) { + Entry::Vacant(e) => { + e.insert(name); + } + Entry::Occupied(_) => { + aliases.push((name, type_id)); + } + } + } + + // Collect all reachable types starting from entrypoints + let mut to_emit = HashSet::new(); + for i in 0..self.entrypoints.len() { + let ep = self.entrypoints.get(i); + self.collect_reachable_types(ep.result_type, &mut to_emit); + } + + // Emit in topological order + for type_id in self.sort_topologically(to_emit) { + if let Some(def_name) = primary_names.get(&type_id) { + self.emit_type_definition(def_name, type_id); + } else { + self.emit_generated_or_custom(type_id); + } + } + + // Emit aliases + for (alias_name, type_id) in aliases { + if let Some(primary_name) = primary_names.get(&type_id) { + self.emit_type_alias(&alias_name, primary_name); + } + } + + self.output + } + + fn prepare_emission(&mut self) { + // Reserve entrypoint names to avoid collisions + for i in 0..self.entrypoints.len() { + let ep = self.entrypoints.get(i); + let name = self.strings.get(ep.name); + self.used_names.insert(to_pascal_case(name)); + } + + // Assign names to named types from TypeNames section + for i in 0..self.types.names_count() { + let type_name = self.types.get_name(i); + let name = self.strings.get(type_name.name); + self.type_names + .insert(type_name.type_id, to_pascal_case(name)); + } + + // Assign names to struct/enum types that need them but don't have names + self.assign_generated_names(); + + // Collect builtin references + self.collect_builtin_references(); + + // Emit Node interface if referenced + if self.config.emit_node_type && self.node_referenced { + self.emit_node_interface(); + } + } + + fn assign_generated_names(&mut self) { + // Collect naming contexts from entrypoints → fields + let mut contexts: HashMap = HashMap::new(); + + for i in 0..self.entrypoints.len() { + let ep = self.entrypoints.get(i); + let def_name = self.strings.get(ep.name); + self.collect_naming_contexts( + ep.result_type, + &NamingContext { + def_name: def_name.to_string(), + field_name: None, + }, + &mut contexts, + ); + } + + // Assign names to types that need them + for i in 0..self.types.defs_count() { + let type_id = QTypeId::from_custom_index(i); + if self.type_names.contains_key(&type_id) { + continue; + } + + let type_def = self.types.get_def(i); + if !self.needs_generated_name(&type_def) { + continue; + } + + let name = if let Some(ctx) = contexts.get(&type_id) { + self.generate_contextual_name(ctx) + } else { + self.generate_fallback_name(&type_def) + }; + self.type_names.insert(type_id, name); + } + } + + fn collect_naming_contexts( + &self, + type_id: QTypeId, + ctx: &NamingContext, + contexts: &mut HashMap, + ) { + if type_id.is_builtin() || contexts.contains_key(&type_id) { + return; + } + + let Some(type_def) = self.types.get(type_id) else { + return; + }; + + let Some(kind) = type_def.type_kind() else { + return; + }; + + match kind { + TypeKind::Struct => { + contexts.entry(type_id).or_insert_with(|| ctx.clone()); + for member in self.types.members_of(&type_def) { + let field_name = self.strings.get(member.name); + // Unwrap Optional wrappers to get the actual type + let (inner_type, _) = self.unwrap_optional(member.type_id); + let field_ctx = NamingContext { + def_name: ctx.def_name.clone(), + field_name: Some(field_name.to_string()), + }; + self.collect_naming_contexts(inner_type, &field_ctx, contexts); + } + } + TypeKind::Enum => { + contexts.entry(type_id).or_insert_with(|| ctx.clone()); + } + TypeKind::ArrayZeroOrMore | TypeKind::ArrayOneOrMore => { + let inner = QTypeId(type_def.data); + self.collect_naming_contexts(inner, ctx, contexts); + } + TypeKind::Optional => { + let inner = QTypeId(type_def.data); + self.collect_naming_contexts(inner, ctx, contexts); + } + TypeKind::Alias => { + // Aliases don't need contexts + } + } + } + + fn collect_builtin_references(&mut self) { + for i in 0..self.entrypoints.len() { + let ep = self.entrypoints.get(i); + self.collect_refs_recursive(ep.result_type); + } + } + + fn collect_refs_recursive(&mut self, type_id: QTypeId) { + if type_id == QTypeId::NODE { + self.node_referenced = true; + return; + } + if type_id == QTypeId::STRING || type_id == QTypeId::VOID { + return; + } + + // Cycle detection + if !self.refs_visited.insert(type_id) { + return; + } + + let Some(type_def) = self.types.get(type_id) else { + return; + }; + + let Some(kind) = type_def.type_kind() else { + return; + }; + + match kind { + TypeKind::Struct | TypeKind::Enum => { + let member_types: Vec<_> = self + .types + .members_of(&type_def) + .map(|m| m.type_id) + .collect(); + for ty in member_types { + self.collect_refs_recursive(ty); + } + } + TypeKind::ArrayZeroOrMore | TypeKind::ArrayOneOrMore | TypeKind::Optional => { + self.collect_refs_recursive(QTypeId(type_def.data)); + } + TypeKind::Alias => { + // Alias to Node + self.node_referenced = true; + } + } + } + + fn sort_topologically(&self, types: HashSet) -> Vec { + let mut deps: HashMap> = HashMap::new(); + let mut rdeps: HashMap> = HashMap::new(); + + for &tid in &types { + deps.entry(tid).or_default(); + rdeps.entry(tid).or_default(); + } + + // Build dependency graph + for &tid in &types { + for dep in self.get_direct_deps(tid) { + if types.contains(&dep) && dep != tid { + deps.entry(tid).or_default().insert(dep); + rdeps.entry(dep).or_default().insert(tid); + } + } + } + + // Kahn's algorithm + let mut result = Vec::with_capacity(types.len()); + let mut queue: Vec = deps + .iter() + .filter(|(_, d)| d.is_empty()) + .map(|(&tid, _)| tid) + .collect(); + + queue.sort_by_key(|tid| tid.0); + + while let Some(tid) = queue.pop() { + result.push(tid); + if let Some(dependents) = rdeps.get(&tid) { + for &dependent in dependents { + if let Some(dep_set) = deps.get_mut(&dependent) { + dep_set.remove(&tid); + if dep_set.is_empty() { + queue.push(dependent); + queue.sort_by_key(|t| t.0); + } + } + } + } + } + + result + } + + fn collect_reachable_types(&self, type_id: QTypeId, out: &mut HashSet) { + if type_id.is_builtin() || out.contains(&type_id) { + return; + } + + let Some(type_def) = self.types.get(type_id) else { + return; + }; + + let Some(kind) = type_def.type_kind() else { + return; + }; + + match kind { + TypeKind::Struct => { + out.insert(type_id); + for member in self.types.members_of(&type_def) { + self.collect_reachable_types(member.type_id, out); + } + } + TypeKind::Enum => { + out.insert(type_id); + for member in self.types.members_of(&type_def) { + // For enum variants, recurse into payload fields but don't + // add the payload struct itself - it will be inlined. + self.collect_enum_variant_refs(member.type_id, out); + } + } + TypeKind::Alias => { + out.insert(type_id); + } + TypeKind::ArrayZeroOrMore | TypeKind::ArrayOneOrMore => { + self.collect_reachable_types(QTypeId(type_def.data), out); + } + TypeKind::Optional => { + self.collect_reachable_types(QTypeId(type_def.data), out); + } + } + } + + /// Collect reachable types from enum variant payloads. + /// Recurses into struct fields but doesn't add the payload struct itself. + fn collect_enum_variant_refs(&self, type_id: QTypeId, out: &mut HashSet) { + if type_id.is_builtin() { + return; + } + + let Some(type_def) = self.types.get(type_id) else { + return; + }; + + let Some(kind) = type_def.type_kind() else { + return; + }; + + match kind { + TypeKind::Struct => { + // DON'T add the struct - it will be inlined as $data. + // But DO recurse into its fields to find named types. + for member in self.types.members_of(&type_def) { + self.collect_reachable_types(member.type_id, out); + } + } + _ => { + // For non-struct payloads (shouldn't happen normally), + // fall back to regular collection. + self.collect_reachable_types(type_id, out); + } + } + } + + fn get_direct_deps(&self, type_id: QTypeId) -> Vec { + let Some(type_def) = self.types.get(type_id) else { + return vec![]; + }; + + let Some(kind) = type_def.type_kind() else { + return vec![]; + }; + + match kind { + TypeKind::Struct | TypeKind::Enum => self + .types + .members_of(&type_def) + .flat_map(|member| self.unwrap_for_deps(member.type_id)) + .collect(), + TypeKind::ArrayZeroOrMore | TypeKind::ArrayOneOrMore => { + self.unwrap_for_deps(QTypeId(type_def.data)) + } + TypeKind::Optional => self.unwrap_for_deps(QTypeId(type_def.data)), + TypeKind::Alias => vec![], + } + } + + fn unwrap_for_deps(&self, type_id: QTypeId) -> Vec { + if type_id.is_builtin() { + return vec![]; + } + + let Some(type_def) = self.types.get(type_id) else { + return vec![]; + }; + + let Some(kind) = type_def.type_kind() else { + return vec![]; + }; + + match kind { + TypeKind::ArrayZeroOrMore | TypeKind::ArrayOneOrMore | TypeKind::Optional => { + self.unwrap_for_deps(QTypeId(type_def.data)) + } + TypeKind::Struct | TypeKind::Enum | TypeKind::Alias => vec![type_id], + } + } + + fn emit_generated_or_custom(&mut self, type_id: QTypeId) { + if self.emitted.contains(&type_id) || type_id.is_builtin() { + return; + } + + let Some(type_def) = self.types.get(type_id) else { + return; + }; + + // Check if this is an alias type (custom type annotation) + if type_def.is_alias() { + if let Some(name) = self.type_names.get(&type_id).cloned() { + self.emit_custom_type_alias(&name); + self.emitted.insert(type_id); + } + return; + } + + // Check if we have a generated name + if let Some(name) = self.type_names.get(&type_id).cloned() { + self.emit_generated_type_def(type_id, &name); + } + } + + fn emit_generated_type_def(&mut self, type_id: QTypeId, name: &str) { + self.emitted.insert(type_id); + let export = if self.config.export { "export " } else { "" }; + + let Some(type_def) = self.types.get(type_id) else { + return; + }; + + let Some(kind) = type_def.type_kind() else { + return; + }; + + match kind { + TypeKind::Struct => self.emit_interface(name, &type_def, export), + TypeKind::Enum => self.emit_tagged_union(name, &type_def, export), + _ => {} + } + } + + fn emit_type_definition(&mut self, name: &str, type_id: QTypeId) { + self.emitted.insert(type_id); + let export = if self.config.export { "export " } else { "" }; + let type_name = to_pascal_case(name); + + let Some(type_def) = self.types.get(type_id) else { + // Builtin type - emit as alias + let ts_type = self.type_to_ts(type_id); + self.output + .push_str(&format!("{}type {} = {};\n\n", export, type_name, ts_type)); + return; + }; + + let Some(kind) = type_def.type_kind() else { + return; + }; + + match kind { + TypeKind::Struct => self.emit_interface(&type_name, &type_def, export), + TypeKind::Enum => self.emit_tagged_union(&type_name, &type_def, export), + _ => { + let ts_type = self.type_to_ts(type_id); + self.output + .push_str(&format!("{}type {} = {};\n\n", export, type_name, ts_type)); + } + } + } + + fn emit_interface(&mut self, name: &str, type_def: &TypeDef, export: &str) { + self.output + .push_str(&format!("{}interface {} {{\n", export, name)); + + // Collect fields and sort by name + let mut fields: Vec<(String, QTypeId, bool)> = self + .types + .members_of(type_def) + .map(|member| { + let field_name = self.strings.get(member.name).to_string(); + let (inner_type, optional) = self.unwrap_optional(member.type_id); + (field_name, inner_type, optional) + }) + .collect(); + fields.sort_by(|a, b| a.0.cmp(&b.0)); + + for (field_name, field_type, optional) in fields { + let ts_type = self.type_to_ts(field_type); + let opt_marker = if optional { "?" } else { "" }; + self.output + .push_str(&format!(" {}{}: {};\n", field_name, opt_marker, ts_type)); + } + + self.output.push_str("}\n\n"); + } + + fn emit_tagged_union(&mut self, name: &str, type_def: &TypeDef, export: &str) { + let mut variant_types = Vec::new(); + + for member in self.types.members_of(type_def) { + let variant_name = self.strings.get(member.name); + let variant_type_name = format!("{}{}", name, to_pascal_case(variant_name)); + variant_types.push(variant_type_name.clone()); + + let data_str = self.inline_data_type(member.type_id); + self.output.push_str(&format!( + "{}interface {} {{\n $tag: \"{}\";\n $data: {};\n}}\n\n", + export, variant_type_name, variant_name, data_str + )); + } + + let union = variant_types.join(" | "); + self.output + .push_str(&format!("{}type {} = {};\n\n", export, name, union)); + } + + fn emit_custom_type_alias(&mut self, name: &str) { + let export = if self.config.export { "export " } else { "" }; + self.output + .push_str(&format!("{}type {} = Node;\n\n", export, name)); + } + + fn emit_type_alias(&mut self, alias_name: &str, target_name: &str) { + let export = if self.config.export { "export " } else { "" }; + self.output.push_str(&format!( + "{}type {} = {};\n\n", + export, alias_name, target_name + )); + } + + fn emit_node_interface(&mut self) { + let export = if self.config.export { "export " } else { "" }; + if self.config.verbose_nodes { + self.output.push_str(&format!( + "{}interface Node {{\n kind: string;\n text: string;\n startPosition: {{ row: number; column: number }};\n endPosition: {{ row: number; column: number }};\n}}\n\n", + export + )); + } else { + self.output.push_str(&format!( + "{}interface Node {{\n kind: string;\n text: string;\n}}\n\n", + export + )); + } + } + + fn type_to_ts(&self, type_id: QTypeId) -> String { + match type_id { + QTypeId::VOID => "void".to_string(), + QTypeId::NODE => "Node".to_string(), + QTypeId::STRING => "string".to_string(), + _ => self.custom_type_to_ts(type_id), + } + } + + fn custom_type_to_ts(&self, type_id: QTypeId) -> String { + let Some(type_def) = self.types.get(type_id) else { + return "unknown".to_string(); + }; + + let Some(kind) = type_def.type_kind() else { + return "unknown".to_string(); + }; + + match kind { + TypeKind::Struct | TypeKind::Enum => { + if let Some(name) = self.type_names.get(&type_id) { + name.clone() + } else { + self.inline_composite(type_id, &type_def, &kind) + } + } + TypeKind::Alias => { + if let Some(name) = self.type_names.get(&type_id) { + name.clone() + } else { + "Node".to_string() + } + } + TypeKind::ArrayZeroOrMore => { + let elem_type = self.type_to_ts(QTypeId(type_def.data)); + format!("{}[]", elem_type) + } + TypeKind::ArrayOneOrMore => { + let elem_type = self.type_to_ts(QTypeId(type_def.data)); + format!("[{}, ...{}[]]", elem_type, elem_type) + } + TypeKind::Optional => { + let inner_type = self.type_to_ts(QTypeId(type_def.data)); + format!("{} | null", inner_type) + } + } + } + + fn inline_composite(&self, _type_id: QTypeId, type_def: &TypeDef, kind: &TypeKind) -> String { + match kind { + TypeKind::Struct => self.inline_struct(type_def), + TypeKind::Enum => self.inline_enum(type_def), + _ => "unknown".to_string(), + } + } + + fn inline_struct(&self, type_def: &TypeDef) -> String { + if type_def.count == 0 { + return "{}".to_string(); + } + + let mut fields: Vec<(String, QTypeId, bool)> = self + .types + .members_of(type_def) + .map(|member| { + let field_name = self.strings.get(member.name).to_string(); + let (inner_type, optional) = self.unwrap_optional(member.type_id); + (field_name, inner_type, optional) + }) + .collect(); + fields.sort_by(|a, b| a.0.cmp(&b.0)); + + let field_strs: Vec = fields + .iter() + .map(|(name, ty, opt)| { + let ts_type = self.type_to_ts(*ty); + let opt_marker = if *opt { "?" } else { "" }; + format!("{}{}: {}", name, opt_marker, ts_type) + }) + .collect(); + + format!("{{ {} }}", field_strs.join("; ")) + } + + fn inline_enum(&self, type_def: &TypeDef) -> String { + let variant_strs: Vec = self + .types + .members_of(type_def) + .map(|member| { + let name = self.strings.get(member.name); + let data_type = self.type_to_ts(member.type_id); + format!("{{ $tag: \"{}\"; $data: {} }}", name, data_type) + }) + .collect(); + + variant_strs.join(" | ") + } + + fn inline_data_type(&self, type_id: QTypeId) -> String { + if type_id == QTypeId::VOID { + return "{}".to_string(); + } + + let Some(type_def) = self.types.get(type_id) else { + return self.type_to_ts(type_id); + }; + + let Some(kind) = type_def.type_kind() else { + return self.type_to_ts(type_id); + }; + + if kind == TypeKind::Struct { + self.inline_struct(&type_def) + } else { + self.type_to_ts(type_id) + } + } + + /// Unwrap Optional wrappers and return (inner_type, is_optional). + fn unwrap_optional(&self, type_id: QTypeId) -> (QTypeId, bool) { + if type_id.is_builtin() { + return (type_id, false); + } + let Some(type_def) = self.types.get(type_id) else { + return (type_id, false); + }; + if type_def.type_kind() != Some(TypeKind::Optional) { + return (type_id, false); + } + (QTypeId(type_def.data), true) + } + + fn needs_generated_name(&self, type_def: &TypeDef) -> bool { + matches!( + type_def.type_kind(), + Some(TypeKind::Struct) | Some(TypeKind::Enum) + ) + } + + fn generate_contextual_name(&mut self, ctx: &NamingContext) -> String { + let base = if let Some(field) = &ctx.field_name { + format!("{}{}", to_pascal_case(&ctx.def_name), to_pascal_case(field)) + } else { + to_pascal_case(&ctx.def_name) + }; + self.unique_name(&base) + } + + fn generate_fallback_name(&mut self, type_def: &TypeDef) -> String { + let base = match type_def.type_kind() { + Some(TypeKind::Struct) => "Struct", + Some(TypeKind::Enum) => "Enum", + _ => "Type", + }; + self.unique_name(base) + } + + fn unique_name(&mut self, base: &str) -> String { + let base = to_pascal_case(base); + if self.used_names.insert(base.clone()) { + return base; + } + + let mut counter = 2; + loop { + let name = format!("{}{}", base, counter); + if self.used_names.insert(name.clone()) { + return name; + } + counter += 1; + } + } +} + +#[derive(Clone, Debug)] +struct NamingContext { + def_name: String, + field_name: Option, +} + +/// Emit TypeScript from a bytecode module. +pub fn emit(module: &Module) -> String { + Emitter::new(module, Config::default()).emit() +} + +/// Emit TypeScript from a bytecode module with custom config. +pub fn emit_with_config(module: &Module, config: Config) -> String { + Emitter::new(module, config).emit() +} diff --git a/crates/plotnik-lib/src/lib.rs b/crates/plotnik-lib/src/lib.rs index cf156be4..91dfa05c 100644 --- a/crates/plotnik-lib/src/lib.rs +++ b/crates/plotnik-lib/src/lib.rs @@ -17,6 +17,7 @@ #![cfg_attr(coverage_nightly, feature(coverage_attribute))] pub mod bytecode; +pub mod codegen; pub mod diagnostics; pub mod parser; pub mod query;