diff --git a/AGENTS.md b/AGENTS.md index cc15da36..286196a5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -199,11 +199,12 @@ Inspect query AST/CST or parse source files with tree-sitter. cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' --only-symbols cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' --types +cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' --bytecode cargo run -p plotnik-cli -- debug -s app.ts cargo run -p plotnik-cli -- debug -s app.ts --raw ``` -Options: `--only-symbols`, `--cst`, `--raw`, `--spans`, `--arities`, `--types` +Options: `--only-symbols`, `--cst`, `--raw`, `--spans`, `--arities`, `--types`, `--bytecode` ## types diff --git a/crates/plotnik-cli/src/cli.rs b/crates/plotnik-cli/src/cli.rs index 553dda6c..ee3a8062 100644 --- a/crates/plotnik-cli/src/cli.rs +++ b/crates/plotnik-cli/src/cli.rs @@ -32,11 +32,12 @@ pub struct Cli { pub enum Command { /// Debug and inspect queries and source files #[command(after_help = r#"EXAMPLES: - plotnik debug -q '(identifier) @id' - plotnik debug -q '(identifier) @id' --only-symbols + plotnik debug -q 'Q = (identifier) @id' + plotnik debug -q 'Q = (identifier) @id' --only-symbols + plotnik debug -q 'Q = (identifier) @id' --bytecode plotnik debug -s app.ts plotnik debug -s app.ts --raw - plotnik debug -q '(function_declaration) @fn' -s app.ts -l typescript"#)] + plotnik debug -q 'Q = (function_declaration) @fn' -s app.ts -l typescript"#)] Debug { #[command(flatten)] query: QueryArgs, @@ -57,10 +58,10 @@ pub enum Command { /// Execute a query against source code and output JSON #[command(after_help = r#"EXAMPLES: - plotnik exec -q '(identifier) @id' -s app.js - plotnik exec -q '(identifier) @id' -s app.js --pretty - plotnik exec -q '(function_declaration) @fn' -s app.ts -l typescript --verbose-nodes - plotnik exec -q '(identifier) @id' -s app.js --check + plotnik exec -q 'Q = (identifier) @id' -s app.js + plotnik exec -q 'Q = (identifier) @id' -s app.js --pretty + plotnik exec -q 'Q = (function_declaration) @fn' -s app.ts -l typescript --verbose-nodes + plotnik exec -q 'Q = (identifier) @id' -s app.js --check plotnik exec --query-file query.ptk -s app.js --entry FunctionDef"#)] Exec { #[command(flatten)] @@ -79,11 +80,11 @@ pub enum Command { /// Generate type definitions from a query #[command(after_help = r#"EXAMPLES: - plotnik types -q '(identifier) @id' -l javascript + plotnik types -q 'Q = (identifier) @id' -l javascript plotnik types --query-file query.ptk -l typescript - plotnik types -q '(function_declaration) @fn' -l js --format ts - plotnik types -q '(identifier) @id' -l js --verbose-nodes - plotnik types -q '(identifier) @id' -l js -o types.d.ts + plotnik types -q 'Q = (function_declaration) @fn' -l js --format ts + plotnik types -q 'Q = (identifier) @id' -l js --verbose-nodes + plotnik types -q 'Q = (identifier) @id' -l js -o types.d.ts NOTE: Use --verbose-nodes to match `exec --verbose-nodes` output shape."#)] Types { @@ -202,4 +203,8 @@ pub struct OutputArgs { /// Show inferred types #[arg(long)] pub types: bool, + + /// Show bytecode dump + #[arg(long)] + pub bytecode: bool, } diff --git a/crates/plotnik-cli/src/commands/debug/mod.rs b/crates/plotnik-cli/src/commands/debug/mod.rs index 02682339..841bc420 100644 --- a/crates/plotnik-cli/src/commands/debug/mod.rs +++ b/crates/plotnik-cli/src/commands/debug/mod.rs @@ -22,6 +22,7 @@ pub struct DebugArgs { pub graph: bool, pub graph_raw: bool, pub types: bool, + pub bytecode: bool, pub color: bool, } @@ -47,7 +48,7 @@ pub fn run(args: DebugArgs) { }) }); - let show_query = has_query_input && !args.symbols && !args.graph && !args.types; + let show_query = has_query_input && !args.symbols && !args.graph && !args.types && !args.bytecode; let show_source = has_source_input; if show_query && let Some(ref q) = query { @@ -82,6 +83,7 @@ pub fn run(args: DebugArgs) { if args.types && let Some(ref q) = query { + ensure_valid(q, args.color); let bytecode = q.emit().expect("bytecode emission failed"); let module = plotnik_lib::bytecode::Module::from_bytes(bytecode).expect("module loading failed"); @@ -89,6 +91,17 @@ pub fn run(args: DebugArgs) { print!("{}", output); } + if args.bytecode + && let Some(ref q) = query + { + ensure_valid(q, args.color); + let bytecode = q.emit().expect("bytecode emission failed"); + let module = + plotnik_lib::bytecode::Module::from_bytes(bytecode).expect("module loading failed"); + let output = plotnik_lib::bytecode::dump(&module); + print!("{}", output); + } + if show_source { if show_query || args.symbols { println!(); @@ -99,15 +112,21 @@ pub fn run(args: DebugArgs) { print!("{}", dump_source(&tree, &source_code, args.raw)); } - if let Some(ref q) = query - && !q.is_valid() - { - eprint!( - "{}", - q.diagnostics().render_colored(q.source_map(), args.color) - ); - std::process::exit(1); + if let Some(ref q) = query { + ensure_valid(q, args.color); + } +} + +/// Ensure query is valid, exiting with diagnostics if not. +fn ensure_valid(q: &Query, color: bool) { + if q.is_valid() { + return; } + eprint!( + "{}", + q.diagnostics().render_colored(q.source_map(), color) + ); + std::process::exit(1); } fn load_query(args: &DebugArgs) -> String { diff --git a/crates/plotnik-cli/src/main.rs b/crates/plotnik-cli/src/main.rs index 9ad8eb72..9882c875 100644 --- a/crates/plotnik-cli/src/main.rs +++ b/crates/plotnik-cli/src/main.rs @@ -30,6 +30,7 @@ fn main() { graph: output.graph, graph_raw: output.graph_raw, types: output.types, + bytecode: output.bytecode, color: output.color.should_colorize(), }); } diff --git a/crates/plotnik-lib/src/bytecode/dump_tests.rs b/crates/plotnik-lib/src/bytecode/dump_tests.rs new file mode 100644 index 00000000..1121b82b --- /dev/null +++ b/crates/plotnik-lib/src/bytecode/dump_tests.rs @@ -0,0 +1,131 @@ +//! Tests for bytecode dump functionality. + +use crate::Query; +use indoc::indoc; + +#[test] +fn dump_minimal() { + let input = "Test = (identifier) @id"; + + let res = Query::expect_valid_linked_bytecode(input); + + insta::assert_snapshot!(res, @r#" + [header] + linked = true + + [strings] + S00 "Beauty will save the world" + S01 "id" + S02 "Test" + S03 "identifier" + + [types.defs] + T00 = void + T01 = Node + T02 = str + T03 = Struct(M0, 1) ; { id } + + [types.members] + M0 = (S01, T01) ; id: Node + + [types.names] + N0 = (S02, T03) ; Test + + [entry] + Test = 01 :: T03 + + [code] + 00 ๐œ€ โ—ผ + + Test: + 01 ๐œ€ 02 + 02 *โ†“ (identifier) 03 + 03 ๐œ€ [Node Set(M0)] โ—ผ + "#); +} + +#[test] +fn dump_multiple_entrypoints() { + let input = indoc! {r#" + Expression = [(identifier) @name (number) @value] + Root = (function_declaration name: (identifier) @name) + "#}; + + let res = Query::expect_valid_linked_bytecode(input); + + // Verify key sections exist + assert!(res.contains("[header]")); + assert!(res.contains("[strings]")); + assert!(res.contains("[types.defs]")); + assert!(res.contains("[types.members]")); + assert!(res.contains("[types.names]")); + assert!(res.contains("[entry]")); + assert!(res.contains("[code]")); + + // Verify both entrypoints appear + assert!(res.contains("Expression")); + assert!(res.contains("Root")); + + // Verify code section has entrypoint labels + assert!(res.contains("Expression:")); + assert!(res.contains("Root:")); +} + +#[test] +fn dump_with_field_constraints() { + let input = indoc! {r#" + Test = (binary_expression + left: (_) @left + right: (_) @right) + "#}; + + let res = Query::expect_valid_linked_bytecode(input); + + // Should have field references in code section + assert!(res.contains("left:")); + assert!(res.contains("right:")); +} + +#[test] +fn dump_with_quantifier() { + let input = "Test = (identifier)* @items"; + + let res = Query::expect_valid_linked_bytecode(input); + + // Should have array type + assert!(res.contains("Array") || res.contains("[]")); +} + +#[test] +fn dump_with_alternation() { + let input = "Test = [(identifier) @id (string) @str]"; + + let res = Query::expect_valid_linked_bytecode(input); + + // Should have code section with branching + assert!(res.contains("[code]")); +} + +#[test] +fn dump_comprehensive() { + // A query that exercises most features: + // - Multiple definitions (entrypoints) + // - Field constraints (node_fields) + // - Multiple node types (node_types) + // - Captures with types (type_defs, type_members) + // - Alternation (branching in code) + let input = indoc! {r#" + Ident = (identifier) @name :: string + Expression = [ + Literal: (number) @value + Variable: (identifier) @name + ] + Assignment = (assignment_expression + left: (identifier) @target + right: (Expression) @value) + "#}; + + let res = Query::expect_valid_linked_bytecode(input); + + insta::assert_snapshot!(res); +} diff --git a/crates/plotnik-lib/src/bytecode/mod.rs b/crates/plotnik-lib/src/bytecode/mod.rs index e288c04a..9fdf7557 100644 --- a/crates/plotnik-lib/src/bytecode/mod.rs +++ b/crates/plotnik-lib/src/bytecode/mod.rs @@ -47,6 +47,8 @@ pub use module::{ pub use dump::dump; +#[cfg(test)] +mod dump_tests; #[cfg(test)] mod instructions_tests; #[cfg(test)] diff --git a/crates/plotnik-lib/src/bytecode/snapshots/plotnik_lib__bytecode__dump_tests__dump_comprehensive.snap b/crates/plotnik-lib/src/bytecode/snapshots/plotnik_lib__bytecode__dump_tests__dump_comprehensive.snap new file mode 100644 index 00000000..69ea7eae --- /dev/null +++ b/crates/plotnik-lib/src/bytecode/snapshots/plotnik_lib__bytecode__dump_tests__dump_comprehensive.snap @@ -0,0 +1,81 @@ +--- +source: crates/plotnik-lib/src/bytecode/dump_tests.rs +expression: res +--- +[header] +linked = true + +[strings] +S00 "Beauty will save the world" +S01 "name" +S02 "value" +S03 "Literal" +S04 "Variable" +S05 "target" +S06 "Ident" +S07 "Expression" +S08 "Assignment" +S09 "identifier" +S10 "number" +S11 "assignment_expression" +S12 "left" +S13 "right" + +[types.defs] +T00 = void +T01 = Node +T02 = str +T03 = Struct(M0, 1) ; { name } +T04 = Struct(M1, 1) ; { value } +T05 = Struct(M2, 1) ; { name } +T06 = Enum(M3, 2) ; Literal | Variable +T07 = Struct(M5, 2) ; { value, target } + +[types.members] +M0 = (S01, T02) ; name: str +M1 = (S02, T01) ; value: Node +M2 = (S01, T01) ; name: Node +M3 = (S03, T04) ; Literal: T04 +M4 = (S04, T05) ; Variable: T05 +M5 = (S02, T06) ; value: Expression +M6 = (S05, T01) ; target: Node + +[types.names] +N0 = (S06, T03) ; Ident +N1 = (S07, T06) ; Expression +N2 = (S08, T07) ; Assignment + +[entry] +Assignment = 08 :: T07 +Expression = 05 :: T06 +Ident = 01 :: T03 + +[code] + 00 ๐œ€ โ—ผ + +Ident: + 01 ๐œ€ 02 + 02 *โ†“ (identifier) 03 + 03 ๐œ€ [Text Set(M0)] โ—ผ + +Expression: + 05 ๐œ€ 06 + 06 ๐œ€ 23, 30 + +Assignment: + 08 ๐œ€ 09 + 09 *โ†“ (assignment_expression) 10 + 10 ๐œ€ left: _ 11 + 11 *โ†“ (identifier) 12 + 12 ๐œ€ [Node Set(M1)] 14 + 14 ๐œ€ right: _ โ–ถ(Expression) + 15 ๐œ€ [Node Set(M0)] 17 + 17 *โ†‘ยน โ—ผ + 18 ๐œ€ [EndE] โ—ผ + 20 ๐œ€ [Node] 18 + 22 *โ†“ (number) 20 + 23 ๐œ€ [E(M0)] 22 + 25 ๐œ€ [EndE] โ—ผ + 27 ๐œ€ [Node] 25 + 29 *โ†“ (identifier) 27 + 30 ๐œ€ [E(M1)] 29 diff --git a/crates/plotnik-lib/src/query/emit.rs b/crates/plotnik-lib/src/query/codegen.rs similarity index 85% rename from crates/plotnik-lib/src/query/emit.rs rename to crates/plotnik-lib/src/query/codegen.rs index ab2f1665..2c2cbde7 100644 --- a/crates/plotnik-lib/src/query/emit.rs +++ b/crates/plotnik-lib/src/query/codegen.rs @@ -4,15 +4,20 @@ use std::collections::{HashMap, HashSet}; +use indexmap::IndexMap; use plotnik_core::{Interner, NodeFieldId, NodeTypeId, Symbol}; +use crate::bytecode::ir::Label; +use crate::bytecode::layout::CacheAligned; use crate::bytecode::{ Entrypoint, FieldSymbol, Header, NodeSymbol, QTypeId, SECTION_ALIGN, StepId, StringId, TriviaEntry, TypeDef, TypeMember, TypeMetaHeader, TypeName, }; use crate::type_system::TypeKind; +use super::compile::Compiler; use super::query::LinkedQuery; +use super::symbol_table::SymbolTable; use super::type_check::{ FieldInfo, TYPE_NODE, TYPE_STRING, TYPE_VOID, TypeContext, TypeId, TypeShape, }; @@ -20,6 +25,8 @@ use super::type_check::{ /// Error during bytecode emission. #[derive(Clone, Debug)] pub enum EmitError { + /// Query has validation errors (must be valid before emitting). + InvalidQuery, /// Too many strings (exceeds u16 max). TooManyStrings(usize), /// Too many types (exceeds u16 max). @@ -28,29 +35,43 @@ pub enum EmitError { TooManyTypeMembers(usize), /// Too many entrypoints (exceeds u16 max). TooManyEntrypoints(usize), + /// Too many transitions (exceeds u16 max). + TooManyTransitions(usize), /// String not found in interner. StringNotFound(Symbol), + /// Compilation error. + Compile(super::compile::CompileError), } impl std::fmt::Display for EmitError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { + Self::InvalidQuery => write!(f, "query has validation errors"), Self::TooManyStrings(n) => write!(f, "too many strings: {n} (max 65534)"), Self::TooManyTypes(n) => write!(f, "too many types: {n} (max 65533)"), Self::TooManyTypeMembers(n) => write!(f, "too many type members: {n} (max 65535)"), Self::TooManyEntrypoints(n) => write!(f, "too many entrypoints: {n} (max 65535)"), + Self::TooManyTransitions(n) => write!(f, "too many transitions: {n} (max 65535)"), Self::StringNotFound(sym) => write!(f, "string not found for symbol: {sym:?}"), + Self::Compile(e) => write!(f, "compilation error: {e}"), } } } impl std::error::Error for EmitError {} +/// Easter egg string at index 0 (Dostoevsky, The Idiot). +/// StringId(0) is reserved and never referenced by instructions. +pub const EASTER_EGG: &str = "Beauty will save the world"; + /// Builds the string table, remapping query Symbols to bytecode StringIds. /// /// The bytecode format requires a subset of the query interner's strings. /// This builder collects only the strings that are actually used and assigns /// compact StringId indices. +/// +/// StringId(0) is reserved for an easter egg and is never referenced by +/// instructions. Actual strings start at index 1. #[derive(Debug)] pub struct StringTableBuilder { /// Map from query Symbol to bytecode StringId. @@ -63,11 +84,15 @@ pub struct StringTableBuilder { impl StringTableBuilder { pub fn new() -> Self { - Self { + let mut builder = Self { mapping: HashMap::new(), str_lookup: HashMap::new(), strings: Vec::new(), - } + }; + // Reserve index 0 for easter egg + builder.strings.push(EASTER_EGG.to_string()); + builder.str_lookup.insert(EASTER_EGG.to_string(), StringId(0)); + builder } /// Get or create a StringId for a Symbol. @@ -115,7 +140,8 @@ impl StringTableBuilder { /// Validate that the string count fits in u16. pub fn validate(&self) -> Result<(), EmitError> { - // Max count is 65534 because the table needs count+1 entries + // Max count is 65534 because the table needs count+1 entries. + // Index 0 is reserved for the easter egg, so we can have 65533 user strings. if self.strings.len() > 65534 { return Err(EmitError::TooManyStrings(self.strings.len())); } @@ -211,10 +237,10 @@ impl TypeTableBuilder { // Emit TypeDefs and TypeMembers - fill in the placeholders. for (slot_index, &type_id) in ordered_types.iter().enumerate() { - let type_kind = type_ctx + let type_shape = type_ctx .get_type(type_id) .expect("collected type must exist"); - self.emit_type_at_slot(slot_index, type_id, type_kind, type_ctx, interner, strings)?; + self.emit_type_at_slot(slot_index, type_id, type_shape, type_ctx, interner, strings)?; } // Collect TypeName entries for named definitions @@ -236,12 +262,12 @@ impl TypeTableBuilder { &mut self, slot_index: usize, _type_id: TypeId, - type_kind: &TypeShape, + type_shape: &TypeShape, type_ctx: &TypeContext, interner: &Interner, strings: &mut StringTableBuilder, ) -> Result<(), EmitError> { - match type_kind { + match type_shape { TypeShape::Void | TypeShape::Node | TypeShape::String => { // Builtins - should not reach here unreachable!("builtins should be handled separately") @@ -362,8 +388,8 @@ impl TypeTableBuilder { } // Handle Ref types by following the reference - if let Some(type_kind) = type_ctx.get_type(type_id) - && let TypeShape::Ref(def_id) = type_kind + if let Some(type_shape) = type_ctx.get_type(type_id) + && let TypeShape::Ref(def_id) = type_shape && let Some(def_type_id) = type_ctx.get_def_type(*def_id) { return self.resolve_type(def_type_id, type_ctx); @@ -486,12 +512,12 @@ fn collect_types_dfs( return; } - let Some(type_kind) = type_ctx.get_type(type_id) else { + let Some(type_shape) = type_ctx.get_type(type_id) else { return; }; // Resolve Ref types to their target - if let TypeShape::Ref(def_id) = type_kind { + if let TypeShape::Ref(def_id) = type_shape { if let Some(target_id) = type_ctx.get_def_type(*def_id) { collect_types_dfs(target_id, type_ctx, out, seen); } @@ -501,7 +527,7 @@ fn collect_types_dfs( seen.insert(type_id); // Collect children first (depth-first), then add self - match type_kind { + match type_shape { TypeShape::Struct(fields) => { for field_info in fields.values() { collect_types_dfs(field_info.type_id, type_ctx, out, seen); @@ -542,8 +568,12 @@ fn pad_to_section(buf: &mut Vec) { } /// Emit bytecode from type context only (no node validation). -pub fn emit(type_ctx: &TypeContext, interner: &Interner) -> Result, EmitError> { - emit_inner(type_ctx, interner, None, None) +pub fn emit( + type_ctx: &TypeContext, + interner: &Interner, + symbol_table: &SymbolTable, +) -> Result, EmitError> { + emit_inner(type_ctx, interner, symbol_table, None, None) } /// Emit bytecode from a LinkedQuery (includes node type/field validation info). @@ -551,6 +581,7 @@ pub fn emit_linked(query: &LinkedQuery) -> Result, EmitError> { emit_inner( query.type_context(), query.interner(), + &query.symbol_table, Some(query.node_type_ids()), Some(query.node_field_ids()), ) @@ -560,13 +591,28 @@ pub fn emit_linked(query: &LinkedQuery) -> Result, EmitError> { fn emit_inner( type_ctx: &TypeContext, interner: &Interner, - node_type_ids: Option<&HashMap>, - node_field_ids: Option<&HashMap>, + symbol_table: &SymbolTable, + node_type_ids: Option<&IndexMap>, + node_field_ids: Option<&IndexMap>, ) -> Result, EmitError> { + let is_linked = node_type_ids.is_some(); let mut strings = StringTableBuilder::new(); let mut types = TypeTableBuilder::new(); types.build(type_ctx, interner, &mut strings)?; + // Compile transitions (strings are interned here for unlinked mode) + let compile_result = Compiler::compile(interner, type_ctx, symbol_table, &mut strings, node_type_ids, node_field_ids) + .map_err(EmitError::Compile)?; + + // Layout with cache alignment + let entry_labels: Vec