diff --git a/AGENTS.md b/AGENTS.md index cc15da36..286196a5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -199,11 +199,12 @@ Inspect query AST/CST or parse source files with tree-sitter. cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' --only-symbols cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' --types +cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' --bytecode cargo run -p plotnik-cli -- debug -s app.ts cargo run -p plotnik-cli -- debug -s app.ts --raw ``` -Options: `--only-symbols`, `--cst`, `--raw`, `--spans`, `--arities`, `--types` +Options: `--only-symbols`, `--cst`, `--raw`, `--spans`, `--arities`, `--types`, `--bytecode` ## types diff --git a/crates/plotnik-cli/src/cli.rs b/crates/plotnik-cli/src/cli.rs index 553dda6c..ee3a8062 100644 --- a/crates/plotnik-cli/src/cli.rs +++ b/crates/plotnik-cli/src/cli.rs @@ -32,11 +32,12 @@ pub struct Cli { pub enum Command { /// Debug and inspect queries and source files #[command(after_help = r#"EXAMPLES: - plotnik debug -q '(identifier) @id' - plotnik debug -q '(identifier) @id' --only-symbols + plotnik debug -q 'Q = (identifier) @id' + plotnik debug -q 'Q = (identifier) @id' --only-symbols + plotnik debug -q 'Q = (identifier) @id' --bytecode plotnik debug -s app.ts plotnik debug -s app.ts --raw - plotnik debug -q '(function_declaration) @fn' -s app.ts -l typescript"#)] + plotnik debug -q 'Q = (function_declaration) @fn' -s app.ts -l typescript"#)] Debug { #[command(flatten)] query: QueryArgs, @@ -57,10 +58,10 @@ pub enum Command { /// Execute a query against source code and output JSON #[command(after_help = r#"EXAMPLES: - plotnik exec -q '(identifier) @id' -s app.js - plotnik exec -q '(identifier) @id' -s app.js --pretty - plotnik exec -q '(function_declaration) @fn' -s app.ts -l typescript --verbose-nodes - plotnik exec -q '(identifier) @id' -s app.js --check + plotnik exec -q 'Q = (identifier) @id' -s app.js + plotnik exec -q 'Q = (identifier) @id' -s app.js --pretty + plotnik exec -q 'Q = (function_declaration) @fn' -s app.ts -l typescript --verbose-nodes + plotnik exec -q 'Q = (identifier) @id' -s app.js --check plotnik exec --query-file query.ptk -s app.js --entry FunctionDef"#)] Exec { #[command(flatten)] @@ -79,11 +80,11 @@ pub enum Command { /// Generate type definitions from a query #[command(after_help = r#"EXAMPLES: - plotnik types -q '(identifier) @id' -l javascript + plotnik types -q 'Q = (identifier) @id' -l javascript plotnik types --query-file query.ptk -l typescript - plotnik types -q '(function_declaration) @fn' -l js --format ts - plotnik types -q '(identifier) @id' -l js --verbose-nodes - plotnik types -q '(identifier) @id' -l js -o types.d.ts + plotnik types -q 'Q = (function_declaration) @fn' -l js --format ts + plotnik types -q 'Q = (identifier) @id' -l js --verbose-nodes + plotnik types -q 'Q = (identifier) @id' -l js -o types.d.ts NOTE: Use --verbose-nodes to match `exec --verbose-nodes` output shape."#)] Types { @@ -202,4 +203,8 @@ pub struct OutputArgs { /// Show inferred types #[arg(long)] pub types: bool, + + /// Show bytecode dump + #[arg(long)] + pub bytecode: bool, } diff --git a/crates/plotnik-cli/src/commands/debug/mod.rs b/crates/plotnik-cli/src/commands/debug/mod.rs index 02682339..841bc420 100644 --- a/crates/plotnik-cli/src/commands/debug/mod.rs +++ b/crates/plotnik-cli/src/commands/debug/mod.rs @@ -22,6 +22,7 @@ pub struct DebugArgs { pub graph: bool, pub graph_raw: bool, pub types: bool, + pub bytecode: bool, pub color: bool, } @@ -47,7 +48,7 @@ pub fn run(args: DebugArgs) { }) }); - let show_query = has_query_input && !args.symbols && !args.graph && !args.types; + let show_query = has_query_input && !args.symbols && !args.graph && !args.types && !args.bytecode; let show_source = has_source_input; if show_query && let Some(ref q) = query { @@ -82,6 +83,7 @@ pub fn run(args: DebugArgs) { if args.types && let Some(ref q) = query { + ensure_valid(q, args.color); let bytecode = q.emit().expect("bytecode emission failed"); let module = plotnik_lib::bytecode::Module::from_bytes(bytecode).expect("module loading failed"); @@ -89,6 +91,17 @@ pub fn run(args: DebugArgs) { print!("{}", output); } + if args.bytecode + && let Some(ref q) = query + { + ensure_valid(q, args.color); + let bytecode = q.emit().expect("bytecode emission failed"); + let module = + plotnik_lib::bytecode::Module::from_bytes(bytecode).expect("module loading failed"); + let output = plotnik_lib::bytecode::dump(&module); + print!("{}", output); + } + if show_source { if show_query || args.symbols { println!(); @@ -99,15 +112,21 @@ pub fn run(args: DebugArgs) { print!("{}", dump_source(&tree, &source_code, args.raw)); } - if let Some(ref q) = query - && !q.is_valid() - { - eprint!( - "{}", - q.diagnostics().render_colored(q.source_map(), args.color) - ); - std::process::exit(1); + if let Some(ref q) = query { + ensure_valid(q, args.color); + } +} + +/// Ensure query is valid, exiting with diagnostics if not. +fn ensure_valid(q: &Query, color: bool) { + if q.is_valid() { + return; } + eprint!( + "{}", + q.diagnostics().render_colored(q.source_map(), color) + ); + std::process::exit(1); } fn load_query(args: &DebugArgs) -> String { diff --git a/crates/plotnik-cli/src/main.rs b/crates/plotnik-cli/src/main.rs index 9ad8eb72..9882c875 100644 --- a/crates/plotnik-cli/src/main.rs +++ b/crates/plotnik-cli/src/main.rs @@ -30,6 +30,7 @@ fn main() { graph: output.graph, graph_raw: output.graph_raw, types: output.types, + bytecode: output.bytecode, color: output.color.should_colorize(), }); } diff --git a/crates/plotnik-lib/src/bytecode/dump_tests.rs b/crates/plotnik-lib/src/bytecode/dump_tests.rs new file mode 100644 index 00000000..1121b82b --- /dev/null +++ b/crates/plotnik-lib/src/bytecode/dump_tests.rs @@ -0,0 +1,131 @@ +//! Tests for bytecode dump functionality. + +use crate::Query; +use indoc::indoc; + +#[test] +fn dump_minimal() { + let input = "Test = (identifier) @id"; + + let res = Query::expect_valid_linked_bytecode(input); + + insta::assert_snapshot!(res, @r#" + [header] + linked = true + + [strings] + S00 "Beauty will save the world" + S01 "id" + S02 "Test" + S03 "identifier" + + [types.defs] + T00 = void + T01 = Node + T02 = str + T03 = Struct(M0, 1) ; { id } + + [types.members] + M0 = (S01, T01) ; id: Node + + [types.names] + N0 = (S02, T03) ; Test + + [entry] + Test = 01 :: T03 + + [code] + 00 ๐œ€ โ—ผ + + Test: + 01 ๐œ€ 02 + 02 *โ†“ (identifier) 03 + 03 ๐œ€ [Node Set(M0)] โ—ผ + "#); +} + +#[test] +fn dump_multiple_entrypoints() { + let input = indoc! {r#" + Expression = [(identifier) @name (number) @value] + Root = (function_declaration name: (identifier) @name) + "#}; + + let res = Query::expect_valid_linked_bytecode(input); + + // Verify key sections exist + assert!(res.contains("[header]")); + assert!(res.contains("[strings]")); + assert!(res.contains("[types.defs]")); + assert!(res.contains("[types.members]")); + assert!(res.contains("[types.names]")); + assert!(res.contains("[entry]")); + assert!(res.contains("[code]")); + + // Verify both entrypoints appear + assert!(res.contains("Expression")); + assert!(res.contains("Root")); + + // Verify code section has entrypoint labels + assert!(res.contains("Expression:")); + assert!(res.contains("Root:")); +} + +#[test] +fn dump_with_field_constraints() { + let input = indoc! {r#" + Test = (binary_expression + left: (_) @left + right: (_) @right) + "#}; + + let res = Query::expect_valid_linked_bytecode(input); + + // Should have field references in code section + assert!(res.contains("left:")); + assert!(res.contains("right:")); +} + +#[test] +fn dump_with_quantifier() { + let input = "Test = (identifier)* @items"; + + let res = Query::expect_valid_linked_bytecode(input); + + // Should have array type + assert!(res.contains("Array") || res.contains("[]")); +} + +#[test] +fn dump_with_alternation() { + let input = "Test = [(identifier) @id (string) @str]"; + + let res = Query::expect_valid_linked_bytecode(input); + + // Should have code section with branching + assert!(res.contains("[code]")); +} + +#[test] +fn dump_comprehensive() { + // A query that exercises most features: + // - Multiple definitions (entrypoints) + // - Field constraints (node_fields) + // - Multiple node types (node_types) + // - Captures with types (type_defs, type_members) + // - Alternation (branching in code) + let input = indoc! {r#" + Ident = (identifier) @name :: string + Expression = [ + Literal: (number) @value + Variable: (identifier) @name + ] + Assignment = (assignment_expression + left: (identifier) @target + right: (Expression) @value) + "#}; + + let res = Query::expect_valid_linked_bytecode(input); + + insta::assert_snapshot!(res); +} diff --git a/crates/plotnik-lib/src/bytecode/mod.rs b/crates/plotnik-lib/src/bytecode/mod.rs index e288c04a..9fdf7557 100644 --- a/crates/plotnik-lib/src/bytecode/mod.rs +++ b/crates/plotnik-lib/src/bytecode/mod.rs @@ -47,6 +47,8 @@ pub use module::{ pub use dump::dump; +#[cfg(test)] +mod dump_tests; #[cfg(test)] mod instructions_tests; #[cfg(test)] diff --git a/crates/plotnik-lib/src/bytecode/module_tests.rs b/crates/plotnik-lib/src/bytecode/module_tests.rs index c4b8be1f..3d10eb67 100644 --- a/crates/plotnik-lib/src/bytecode/module_tests.rs +++ b/crates/plotnik-lib/src/bytecode/module_tests.rs @@ -1,321 +1,184 @@ //! Tests for the bytecode module. -use super::*; -use crate::bytecode::nav::Nav; -use crate::bytecode::{Header, MAGIC, Match, TypeMetaHeader, VERSION}; - -/// Build a minimal valid bytecode for testing. -fn build_test_bytecode() -> Vec { - // Layout (all sections 64-byte aligned): - // [0..64) Header - // [64..128) StringBlob + padding - // [128..192) StringTable + padding (needs 2 u32 entries: offset + sentinel) - // [192..256) NodeTypes + padding - // [256..320) NodeFields + padding - // [320..384) Trivia + padding - // [384..448) TypeMeta: TypeMetaHeader (8 bytes) + padding - // [448..512) TypeDefs sub-section (aligned) - // [512..576) TypeMembers sub-section (aligned, empty) - // [576..640) TypeNames sub-section (aligned, empty) - // [640..704) Entrypoints + padding - // [704..768) Transitions + padding - - let mut bytes = vec![0u8; 768]; - - // String blob: "Test" at offset 0 - let str_blob_offset = 64; - bytes[64] = b'T'; - bytes[65] = b'e'; - bytes[66] = b's'; - bytes[67] = b't'; - - // String table: sequential u32 offsets with sentinel - // Entry 0: offset 0 (start of "Test") - // Entry 1: offset 4 (sentinel = end of blob) - let str_table_offset = 128; - bytes[128..132].copy_from_slice(&0u32.to_le_bytes()); // offset of string 0 - bytes[132..136].copy_from_slice(&4u32.to_le_bytes()); // sentinel (end of blob) - - // Node types: one entry (id=42, name=StringId(0)) - let node_types_offset = 192; - bytes[192..194].copy_from_slice(&42u16.to_le_bytes()); - bytes[194..196].copy_from_slice(&0u16.to_le_bytes()); - - // Node fields: one entry (id=7, name=StringId(0)) - let node_fields_offset = 256; - bytes[256..258].copy_from_slice(&7u16.to_le_bytes()); - bytes[258..260].copy_from_slice(&0u16.to_le_bytes()); - - // Trivia: one entry (node_type=100) - let trivia_offset = 320; - bytes[320..322].copy_from_slice(&100u16.to_le_bytes()); - - // TypeMeta section - let type_meta_offset = 384; - - // TypeMetaHeader (8 bytes): type_defs_count=1, type_members_count=0, type_names_count=0 - let type_meta_header = TypeMetaHeader { - type_defs_count: 1, - type_members_count: 0, - type_names_count: 0, - _pad: 0, - }; - bytes[384..392].copy_from_slice(&type_meta_header.to_bytes()); - - // TypeDefs sub-section at aligned offset (448) - // One TypeDef (4 bytes): data=0, count=0, kind=3 (Struct) - bytes[448..450].copy_from_slice(&0u16.to_le_bytes()); // data (member index) - bytes[450] = 0; // count - bytes[451] = 3; // kind=Struct - - // TypeMembers sub-section at 512 (empty) - // TypeNames sub-section at 576 (empty) - - // Entrypoints: one entry (name=StringId(0), target=StepId(0), result_type=QTypeId(0)) - let entrypoints_offset = 640; - bytes[640..642].copy_from_slice(&0u16.to_le_bytes()); // name - bytes[642..644].copy_from_slice(&0u16.to_le_bytes()); // target - bytes[644..646].copy_from_slice(&0u16.to_le_bytes()); // result_type - bytes[646..648].copy_from_slice(&0u16.to_le_bytes()); // padding - - // Transitions: one Match8 instruction (accept state) - let transitions_offset = 704; - // type_id=0x00 (Match8, segment 0) - bytes[704] = 0x00; - // nav=Stay - bytes[705] = Nav::Stay.to_byte(); - // node_type=None (0) - bytes[706..708].copy_from_slice(&0u16.to_le_bytes()); - // node_field=None (0) - bytes[708..710].copy_from_slice(&0u16.to_le_bytes()); - // next=0 (accept) - bytes[710..712].copy_from_slice(&0u16.to_le_bytes()); - - // Build header - let header = Header { - magic: MAGIC, - version: VERSION, - checksum: 0, - total_size: 768, - str_blob_offset: str_blob_offset as u32, - str_table_offset: str_table_offset as u32, - node_types_offset: node_types_offset as u32, - node_fields_offset: node_fields_offset as u32, - trivia_offset: trivia_offset as u32, - type_meta_offset: type_meta_offset as u32, - entrypoints_offset: entrypoints_offset as u32, - transitions_offset: transitions_offset as u32, - str_table_count: 1, - node_types_count: 1, - node_fields_count: 1, - trivia_count: 1, - entrypoints_count: 1, - transitions_count: 1, - ..Default::default() - }; - - bytes[0..64].copy_from_slice(&header.to_bytes()); - bytes -} +use indoc::indoc; + +use crate::Query; +use crate::bytecode::{Module, ModuleError, StepId, StringId}; #[test] fn module_from_bytes_valid() { - let bytes = build_test_bytecode(); + let input = "Test = (identifier) @id"; + + let bytes = Query::expect_valid_linked_bytes(input); let module = Module::from_bytes(bytes).unwrap(); assert!(module.header().validate_magic()); assert!(module.header().validate_version()); - assert_eq!(module.header().total_size, 768); } #[test] fn module_from_bytes_too_small() { - let bytes = vec![0u8; 32]; - let err = Module::from_bytes(bytes).unwrap_err(); + let input = "Test = (identifier) @id"; + + let bytes = Query::expect_valid_linked_bytes(input); + let truncated = bytes[..32].to_vec(); + + let err = Module::from_bytes(truncated).unwrap_err(); assert!(matches!(err, ModuleError::FileTooSmall(32))); } #[test] fn module_from_bytes_invalid_magic() { - let mut bytes = build_test_bytecode(); + let input = "Test = (identifier) @id"; + + let mut bytes = Query::expect_valid_linked_bytes(input); bytes[0] = b'X'; // Corrupt magic + let err = Module::from_bytes(bytes).unwrap_err(); assert!(matches!(err, ModuleError::InvalidMagic)); } #[test] fn module_from_bytes_wrong_version() { - let mut bytes = build_test_bytecode(); + let input = "Test = (identifier) @id"; + + let mut bytes = Query::expect_valid_linked_bytes(input); bytes[4..8].copy_from_slice(&999u32.to_le_bytes()); // Wrong version + let err = Module::from_bytes(bytes).unwrap_err(); assert!(matches!(err, ModuleError::UnsupportedVersion(999))); } #[test] fn module_from_bytes_size_mismatch() { - let mut bytes = build_test_bytecode(); - bytes[12..16].copy_from_slice(&1000u32.to_le_bytes()); // Wrong total_size + let input = "Test = (identifier) @id"; + + let mut bytes = Query::expect_valid_linked_bytes(input); + let actual_size = bytes.len() as u32; + bytes[12..16].copy_from_slice(&(actual_size + 100).to_le_bytes()); // Wrong total_size + let err = Module::from_bytes(bytes).unwrap_err(); assert!(matches!( err, ModuleError::SizeMismatch { - header: 1000, - actual: 768 - } + header: h, + actual: a + } if h == actual_size + 100 && a == actual_size as usize )); } -#[test] -fn module_decode_step() { - let bytes = build_test_bytecode(); - let module = Module::from_bytes(bytes).unwrap(); - - let instr = module.decode_step(StepId(0)); - match instr { - Instruction::Match(m) => { - assert_eq!(m.nav, Nav::Stay); - assert!(m.is_epsilon()); - assert!(m.is_terminal()); - } - _ => panic!("expected Match instruction"), - } -} - #[test] fn module_strings_view() { - let bytes = build_test_bytecode(); + let input = "Test = (identifier) @id"; + + let bytes = Query::expect_valid_linked_bytes(input); let module = Module::from_bytes(bytes).unwrap(); let strings = module.strings(); - assert_eq!(strings.get(StringId(0)), "Test"); + // String 0 is the easter egg + assert_eq!(strings.get(StringId(0)), "Beauty will save the world"); + // Other strings include "id", "Test", "identifier" + assert!(module.header().str_table_count >= 3); } #[test] fn module_node_types_view() { - let bytes = build_test_bytecode(); + let input = "Test = (identifier) @id"; + + let bytes = Query::expect_valid_linked_bytes(input); let module = Module::from_bytes(bytes).unwrap(); let node_types = module.node_types(); - assert_eq!(node_types.len(), 1); assert!(!node_types.is_empty()); - - let sym = node_types.get(0); - assert_eq!(sym.id, 42); - assert_eq!(sym.name, StringId(0)); + // Should have "identifier" node type + let has_identifier = (0..node_types.len()).any(|i| { + let sym = node_types.get(i); + module.strings().get(sym.name) == "identifier" + }); + assert!(has_identifier); } #[test] fn module_node_fields_view() { - let bytes = build_test_bytecode(); - let module = Module::from_bytes(bytes).unwrap(); - - let fields = module.node_fields(); - assert_eq!(fields.len(), 1); - - let sym = fields.get(0); - assert_eq!(sym.id, 7); - assert_eq!(sym.name, StringId(0)); -} + let input = "Test = (function_declaration name: (identifier) @name)"; -#[test] -fn module_trivia_view() { - let bytes = build_test_bytecode(); + let bytes = Query::expect_valid_linked_bytes(input); let module = Module::from_bytes(bytes).unwrap(); - let trivia = module.trivia(); - assert_eq!(trivia.len(), 1); - assert!(trivia.contains(100)); - assert!(!trivia.contains(42)); + let fields = module.node_fields(); + assert!(!fields.is_empty()); + // Should have "name" field + let has_name = (0..fields.len()).any(|i| { + let sym = fields.get(i); + module.strings().get(sym.name) == "name" + }); + assert!(has_name); } #[test] fn module_types_view() { - let bytes = build_test_bytecode(); + let input = indoc! {r#" + Test = (function_declaration + name: (identifier) @name + body: (_) @body) + "#}; + + let bytes = Query::expect_valid_linked_bytes(input); let module = Module::from_bytes(bytes).unwrap(); let types = module.types(); - assert_eq!(types.defs_count(), 1); - assert_eq!(types.members_count(), 0); - assert_eq!(types.names_count(), 0); - - let def = types.get_def(0); - assert_eq!(def.kind, 3); // Struct - assert_eq!(def.data, 0); // member index - assert_eq!(def.count, 0); // member count + // Should have custom types (struct with fields) + assert!(types.defs_count() >= 1); + assert!(types.members_count() >= 2); // name and body fields } #[test] fn module_entrypoints_view() { - let bytes = build_test_bytecode(); + let input = indoc! {r#" + Foo = (identifier) @id + Bar = (string) @str + "#}; + + let bytes = Query::expect_valid_linked_bytes(input); let module = Module::from_bytes(bytes).unwrap(); let entrypoints = module.entrypoints(); - assert_eq!(entrypoints.len(), 1); + assert_eq!(entrypoints.len(), 2); assert!(!entrypoints.is_empty()); - let ep = entrypoints.get(0); - assert_eq!(ep.name, StringId(0)); - assert_eq!(ep.target, StepId(0)); - + // Should be able to find by name let strings = module.strings(); - let found = entrypoints.find_by_name("Test", &strings); - assert!(found.is_some()); - assert_eq!(found.unwrap().target, StepId(0)); + let foo = entrypoints.find_by_name("Foo", &strings); + let bar = entrypoints.find_by_name("Bar", &strings); + assert!(foo.is_some()); + assert!(bar.is_some()); } #[test] -fn instruction_from_bytes_dispatch() { - // Test Match8 - let match8 = Match { - segment: 0, - nav: Nav::Down, - node_type: std::num::NonZeroU16::new(42), - node_field: None, - pre_effects: vec![], - neg_fields: vec![], - post_effects: vec![], - successors: vec![StepId(10)], - }; - let bytes = match8.to_bytes().unwrap(); - let instr = Instruction::from_bytes(&bytes); - assert!(matches!(instr, Instruction::Match(_))); - - // Test Call - let call = Call { - segment: 0, - next: StepId(5), - target: StepId(100), - ref_id: 1, - }; - let bytes = call.to_bytes(); - let instr = Instruction::from_bytes(&bytes); - assert!(matches!(instr, Instruction::Call(_))); - - // Test Return - let ret = Return { - segment: 0, - ref_id: 1, - }; - let bytes = ret.to_bytes(); - let instr = Instruction::from_bytes(&bytes); - assert!(matches!(instr, Instruction::Return(_))); -} +fn module_decode_step() { + let input = "Test = (identifier) @id"; -#[test] -fn byte_storage_deref() { - let data = vec![1, 2, 3, 4, 5]; - let storage = ByteStorage::from_vec(data.clone()); + let bytes = Query::expect_valid_linked_bytes(input); + let module = Module::from_bytes(bytes).unwrap(); - assert_eq!(&*storage, &data[..]); - assert_eq!(storage.len(), 5); - assert_eq!(storage[2], 3); + // Step 0 is always the accept state (epsilon terminal) + let instr = module.decode_step(StepId(0)); + match instr { + crate::bytecode::Instruction::Match(m) => { + assert!(m.is_epsilon()); + assert!(m.is_terminal()); + } + _ => panic!("expected Match instruction at step 0"), + } } #[test] fn module_from_path_mmap() { use std::io::Write; - let bytes = build_test_bytecode(); + let input = "Test = (identifier) @id"; + + let bytes = Query::expect_valid_linked_bytes(input); // Write to temp file let mut tmpfile = tempfile::NamedTempFile::new().unwrap(); @@ -326,13 +189,24 @@ fn module_from_path_mmap() { let module = Module::from_path(tmpfile.path()).unwrap(); assert!(module.header().validate_magic()); - assert_eq!(module.header().total_size, 768); // Verify we can decode instructions let instr = module.decode_step(StepId(0)); - assert!(matches!(instr, Instruction::Match(_))); + assert!(matches!(instr, crate::bytecode::Instruction::Match(_))); // Verify string lookup works through mmap let strings = module.strings(); - assert_eq!(strings.get(StringId(0)), "Test"); + assert_eq!(strings.get(StringId(0)), "Beauty will save the world"); +} + +#[test] +fn byte_storage_deref() { + use crate::bytecode::ByteStorage; + + let data = vec![1, 2, 3, 4, 5]; + let storage = ByteStorage::from_vec(data.clone()); + + assert_eq!(&*storage, &data[..]); + assert_eq!(storage.len(), 5); + assert_eq!(storage[2], 3); } diff --git a/crates/plotnik-lib/src/bytecode/snapshots/plotnik_lib__bytecode__dump_tests__dump_comprehensive.snap b/crates/plotnik-lib/src/bytecode/snapshots/plotnik_lib__bytecode__dump_tests__dump_comprehensive.snap new file mode 100644 index 00000000..69ea7eae --- /dev/null +++ b/crates/plotnik-lib/src/bytecode/snapshots/plotnik_lib__bytecode__dump_tests__dump_comprehensive.snap @@ -0,0 +1,81 @@ +--- +source: crates/plotnik-lib/src/bytecode/dump_tests.rs +expression: res +--- +[header] +linked = true + +[strings] +S00 "Beauty will save the world" +S01 "name" +S02 "value" +S03 "Literal" +S04 "Variable" +S05 "target" +S06 "Ident" +S07 "Expression" +S08 "Assignment" +S09 "identifier" +S10 "number" +S11 "assignment_expression" +S12 "left" +S13 "right" + +[types.defs] +T00 = void +T01 = Node +T02 = str +T03 = Struct(M0, 1) ; { name } +T04 = Struct(M1, 1) ; { value } +T05 = Struct(M2, 1) ; { name } +T06 = Enum(M3, 2) ; Literal | Variable +T07 = Struct(M5, 2) ; { value, target } + +[types.members] +M0 = (S01, T02) ; name: str +M1 = (S02, T01) ; value: Node +M2 = (S01, T01) ; name: Node +M3 = (S03, T04) ; Literal: T04 +M4 = (S04, T05) ; Variable: T05 +M5 = (S02, T06) ; value: Expression +M6 = (S05, T01) ; target: Node + +[types.names] +N0 = (S06, T03) ; Ident +N1 = (S07, T06) ; Expression +N2 = (S08, T07) ; Assignment + +[entry] +Assignment = 08 :: T07 +Expression = 05 :: T06 +Ident = 01 :: T03 + +[code] + 00 ๐œ€ โ—ผ + +Ident: + 01 ๐œ€ 02 + 02 *โ†“ (identifier) 03 + 03 ๐œ€ [Text Set(M0)] โ—ผ + +Expression: + 05 ๐œ€ 06 + 06 ๐œ€ 23, 30 + +Assignment: + 08 ๐œ€ 09 + 09 *โ†“ (assignment_expression) 10 + 10 ๐œ€ left: _ 11 + 11 *โ†“ (identifier) 12 + 12 ๐œ€ [Node Set(M1)] 14 + 14 ๐œ€ right: _ โ–ถ(Expression) + 15 ๐œ€ [Node Set(M0)] 17 + 17 *โ†‘ยน โ—ผ + 18 ๐œ€ [EndE] โ—ผ + 20 ๐œ€ [Node] 18 + 22 *โ†“ (number) 20 + 23 ๐œ€ [E(M0)] 22 + 25 ๐œ€ [EndE] โ—ผ + 27 ๐œ€ [Node] 25 + 29 *โ†“ (identifier) 27 + 30 ๐œ€ [E(M1)] 29 diff --git a/crates/plotnik-lib/src/query/emit.rs b/crates/plotnik-lib/src/query/codegen.rs similarity index 85% rename from crates/plotnik-lib/src/query/emit.rs rename to crates/plotnik-lib/src/query/codegen.rs index ab2f1665..2c2cbde7 100644 --- a/crates/plotnik-lib/src/query/emit.rs +++ b/crates/plotnik-lib/src/query/codegen.rs @@ -4,15 +4,20 @@ use std::collections::{HashMap, HashSet}; +use indexmap::IndexMap; use plotnik_core::{Interner, NodeFieldId, NodeTypeId, Symbol}; +use crate::bytecode::ir::Label; +use crate::bytecode::layout::CacheAligned; use crate::bytecode::{ Entrypoint, FieldSymbol, Header, NodeSymbol, QTypeId, SECTION_ALIGN, StepId, StringId, TriviaEntry, TypeDef, TypeMember, TypeMetaHeader, TypeName, }; use crate::type_system::TypeKind; +use super::compile::Compiler; use super::query::LinkedQuery; +use super::symbol_table::SymbolTable; use super::type_check::{ FieldInfo, TYPE_NODE, TYPE_STRING, TYPE_VOID, TypeContext, TypeId, TypeShape, }; @@ -20,6 +25,8 @@ use super::type_check::{ /// Error during bytecode emission. #[derive(Clone, Debug)] pub enum EmitError { + /// Query has validation errors (must be valid before emitting). + InvalidQuery, /// Too many strings (exceeds u16 max). TooManyStrings(usize), /// Too many types (exceeds u16 max). @@ -28,29 +35,43 @@ pub enum EmitError { TooManyTypeMembers(usize), /// Too many entrypoints (exceeds u16 max). TooManyEntrypoints(usize), + /// Too many transitions (exceeds u16 max). + TooManyTransitions(usize), /// String not found in interner. StringNotFound(Symbol), + /// Compilation error. + Compile(super::compile::CompileError), } impl std::fmt::Display for EmitError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { + Self::InvalidQuery => write!(f, "query has validation errors"), Self::TooManyStrings(n) => write!(f, "too many strings: {n} (max 65534)"), Self::TooManyTypes(n) => write!(f, "too many types: {n} (max 65533)"), Self::TooManyTypeMembers(n) => write!(f, "too many type members: {n} (max 65535)"), Self::TooManyEntrypoints(n) => write!(f, "too many entrypoints: {n} (max 65535)"), + Self::TooManyTransitions(n) => write!(f, "too many transitions: {n} (max 65535)"), Self::StringNotFound(sym) => write!(f, "string not found for symbol: {sym:?}"), + Self::Compile(e) => write!(f, "compilation error: {e}"), } } } impl std::error::Error for EmitError {} +/// Easter egg string at index 0 (Dostoevsky, The Idiot). +/// StringId(0) is reserved and never referenced by instructions. +pub const EASTER_EGG: &str = "Beauty will save the world"; + /// Builds the string table, remapping query Symbols to bytecode StringIds. /// /// The bytecode format requires a subset of the query interner's strings. /// This builder collects only the strings that are actually used and assigns /// compact StringId indices. +/// +/// StringId(0) is reserved for an easter egg and is never referenced by +/// instructions. Actual strings start at index 1. #[derive(Debug)] pub struct StringTableBuilder { /// Map from query Symbol to bytecode StringId. @@ -63,11 +84,15 @@ pub struct StringTableBuilder { impl StringTableBuilder { pub fn new() -> Self { - Self { + let mut builder = Self { mapping: HashMap::new(), str_lookup: HashMap::new(), strings: Vec::new(), - } + }; + // Reserve index 0 for easter egg + builder.strings.push(EASTER_EGG.to_string()); + builder.str_lookup.insert(EASTER_EGG.to_string(), StringId(0)); + builder } /// Get or create a StringId for a Symbol. @@ -115,7 +140,8 @@ impl StringTableBuilder { /// Validate that the string count fits in u16. pub fn validate(&self) -> Result<(), EmitError> { - // Max count is 65534 because the table needs count+1 entries + // Max count is 65534 because the table needs count+1 entries. + // Index 0 is reserved for the easter egg, so we can have 65533 user strings. if self.strings.len() > 65534 { return Err(EmitError::TooManyStrings(self.strings.len())); } @@ -211,10 +237,10 @@ impl TypeTableBuilder { // Emit TypeDefs and TypeMembers - fill in the placeholders. for (slot_index, &type_id) in ordered_types.iter().enumerate() { - let type_kind = type_ctx + let type_shape = type_ctx .get_type(type_id) .expect("collected type must exist"); - self.emit_type_at_slot(slot_index, type_id, type_kind, type_ctx, interner, strings)?; + self.emit_type_at_slot(slot_index, type_id, type_shape, type_ctx, interner, strings)?; } // Collect TypeName entries for named definitions @@ -236,12 +262,12 @@ impl TypeTableBuilder { &mut self, slot_index: usize, _type_id: TypeId, - type_kind: &TypeShape, + type_shape: &TypeShape, type_ctx: &TypeContext, interner: &Interner, strings: &mut StringTableBuilder, ) -> Result<(), EmitError> { - match type_kind { + match type_shape { TypeShape::Void | TypeShape::Node | TypeShape::String => { // Builtins - should not reach here unreachable!("builtins should be handled separately") @@ -362,8 +388,8 @@ impl TypeTableBuilder { } // Handle Ref types by following the reference - if let Some(type_kind) = type_ctx.get_type(type_id) - && let TypeShape::Ref(def_id) = type_kind + if let Some(type_shape) = type_ctx.get_type(type_id) + && let TypeShape::Ref(def_id) = type_shape && let Some(def_type_id) = type_ctx.get_def_type(*def_id) { return self.resolve_type(def_type_id, type_ctx); @@ -486,12 +512,12 @@ fn collect_types_dfs( return; } - let Some(type_kind) = type_ctx.get_type(type_id) else { + let Some(type_shape) = type_ctx.get_type(type_id) else { return; }; // Resolve Ref types to their target - if let TypeShape::Ref(def_id) = type_kind { + if let TypeShape::Ref(def_id) = type_shape { if let Some(target_id) = type_ctx.get_def_type(*def_id) { collect_types_dfs(target_id, type_ctx, out, seen); } @@ -501,7 +527,7 @@ fn collect_types_dfs( seen.insert(type_id); // Collect children first (depth-first), then add self - match type_kind { + match type_shape { TypeShape::Struct(fields) => { for field_info in fields.values() { collect_types_dfs(field_info.type_id, type_ctx, out, seen); @@ -542,8 +568,12 @@ fn pad_to_section(buf: &mut Vec) { } /// Emit bytecode from type context only (no node validation). -pub fn emit(type_ctx: &TypeContext, interner: &Interner) -> Result, EmitError> { - emit_inner(type_ctx, interner, None, None) +pub fn emit( + type_ctx: &TypeContext, + interner: &Interner, + symbol_table: &SymbolTable, +) -> Result, EmitError> { + emit_inner(type_ctx, interner, symbol_table, None, None) } /// Emit bytecode from a LinkedQuery (includes node type/field validation info). @@ -551,6 +581,7 @@ pub fn emit_linked(query: &LinkedQuery) -> Result, EmitError> { emit_inner( query.type_context(), query.interner(), + &query.symbol_table, Some(query.node_type_ids()), Some(query.node_field_ids()), ) @@ -560,13 +591,28 @@ pub fn emit_linked(query: &LinkedQuery) -> Result, EmitError> { fn emit_inner( type_ctx: &TypeContext, interner: &Interner, - node_type_ids: Option<&HashMap>, - node_field_ids: Option<&HashMap>, + symbol_table: &SymbolTable, + node_type_ids: Option<&IndexMap>, + node_field_ids: Option<&IndexMap>, ) -> Result, EmitError> { + let is_linked = node_type_ids.is_some(); let mut strings = StringTableBuilder::new(); let mut types = TypeTableBuilder::new(); types.build(type_ctx, interner, &mut strings)?; + // Compile transitions (strings are interned here for unlinked mode) + let compile_result = Compiler::compile(interner, type_ctx, symbol_table, &mut strings, node_type_ids, node_field_ids) + .map_err(EmitError::Compile)?; + + // Layout with cache alignment + let entry_labels: Vec