From b6af964c663d64c13d644acb344f05724125d8b0 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Tue, 30 Dec 2025 13:11:25 -0300 Subject: [PATCH] feat: add bytecode emit infrastructure --- Cargo.lock | 20 + crates/plotnik-lib/Cargo.toml | 2 + crates/plotnik-lib/src/bytecode/emit/mod.rs | 10 + .../src/bytecode/emit/typescript.rs | 799 ++++++++++++++++++ crates/plotnik-lib/src/bytecode/mod.rs | 1 + crates/plotnik-lib/src/query/emit.rs | 724 ++++++++++++++++ crates/plotnik-lib/src/query/emit_tests.rs | 333 ++++++++ crates/plotnik-lib/src/query/mod.rs | 3 + crates/plotnik-lib/src/query/query.rs | 10 + 9 files changed, 1902 insertions(+) create mode 100644 crates/plotnik-lib/src/bytecode/emit/mod.rs create mode 100644 crates/plotnik-lib/src/bytecode/emit/typescript.rs create mode 100644 crates/plotnik-lib/src/query/emit.rs create mode 100644 crates/plotnik-lib/src/query/emit_tests.rs diff --git a/Cargo.lock b/Cargo.lock index 98e53033..0a6b734a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1297,6 +1297,15 @@ version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636" +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "dlmalloc" version = "0.2.12" @@ -1474,6 +1483,15 @@ version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "memmap2" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +dependencies = [ + "libc", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -1625,10 +1643,12 @@ version = "0.1.1" dependencies = [ "annotate-snippets", "arborium-tree-sitter", + "crc32fast", "indexmap", "indoc", "insta", "logos", + "memmap2", "plotnik-core", "plotnik-langs", "rowan", diff --git a/crates/plotnik-lib/Cargo.toml b/crates/plotnik-lib/Cargo.toml index ab2365d8..fbaab02e 100644 --- a/crates/plotnik-lib/Cargo.toml +++ b/crates/plotnik-lib/Cargo.toml @@ -21,6 +21,8 @@ rowan = "0.16.1" serde = { version = "1.0.228", features = ["derive"] } thiserror = "2.0.17" arborium-tree-sitter = "2.3.2" +crc32fast = "1.4" +memmap2 = "0.9" plotnik-core = { version = "0.1", path = "../plotnik-core" } plotnik-langs = { version = "0.1", path = "../plotnik-langs", optional = true } diff --git a/crates/plotnik-lib/src/bytecode/emit/mod.rs b/crates/plotnik-lib/src/bytecode/emit/mod.rs new file mode 100644 index 00000000..3699f14d --- /dev/null +++ b/crates/plotnik-lib/src/bytecode/emit/mod.rs @@ -0,0 +1,10 @@ +//! Code generation from bytecode Module. +//! +//! This module provides emitters for generating code from compiled bytecode. +//! Currently supports TypeScript, with Rust planned. + +mod typescript; + +pub use typescript::{ + EmitConfig as TsEmitConfig, TsEmitter, emit_typescript, emit_typescript_with_config, +}; diff --git a/crates/plotnik-lib/src/bytecode/emit/typescript.rs b/crates/plotnik-lib/src/bytecode/emit/typescript.rs new file mode 100644 index 00000000..dc91a489 --- /dev/null +++ b/crates/plotnik-lib/src/bytecode/emit/typescript.rs @@ -0,0 +1,799 @@ +//! TypeScript type emitter from bytecode Module. +//! +//! Converts compiled bytecode back to TypeScript declarations. +//! Used as a test oracle and for generating types from .ptkq files. + +use std::collections::hash_map::Entry; +use std::collections::{BTreeSet, HashMap, HashSet}; + +use crate::bytecode::module::{Module, StringsView, TypesView}; +use crate::bytecode::type_meta::{TypeDef, TypeKind}; +use crate::bytecode::{EntrypointsView, QTypeId}; + +/// Configuration for TypeScript emission. +#[derive(Clone, Debug)] +pub struct EmitConfig { + /// Whether to export types + pub export: bool, + /// Whether to emit the Node type definition + pub emit_node_type: bool, + /// Use verbose node representation (with kind, text, etc.) + pub verbose_nodes: bool, +} + +impl Default for EmitConfig { + fn default() -> Self { + Self { + export: true, + emit_node_type: true, + verbose_nodes: false, + } + } +} + +/// TypeScript emitter from bytecode module. +pub struct TsEmitter<'a> { + types: TypesView<'a>, + strings: StringsView<'a>, + entrypoints: EntrypointsView<'a>, + config: EmitConfig, + + /// TypeId -> assigned name mapping + type_names: HashMap, + /// Names already used (for collision avoidance) + used_names: BTreeSet, + /// Track which builtin types are referenced + node_referenced: bool, + /// Track which types have been emitted + emitted: HashSet, + /// Types visited during builtin reference collection (cycle detection) + refs_visited: HashSet, + /// Output buffer + output: String, +} + +impl<'a> TsEmitter<'a> { + pub fn new(module: &'a Module, config: EmitConfig) -> Self { + Self { + types: module.types(), + strings: module.strings(), + entrypoints: module.entrypoints(), + config, + type_names: HashMap::new(), + used_names: BTreeSet::new(), + node_referenced: false, + emitted: HashSet::new(), + refs_visited: HashSet::new(), + output: String::new(), + } + } + + /// Emit TypeScript for all entrypoint types. + pub fn emit(mut self) -> String { + self.prepare_emission(); + + // Collect all entrypoints and their result types + let mut primary_names: HashMap = HashMap::new(); + let mut aliases: Vec<(String, QTypeId)> = Vec::new(); + + for i in 0..self.entrypoints.len() { + let ep = self.entrypoints.get(i); + let name = self.strings.get(ep.name).to_string(); + let type_id = ep.result_type; + + match primary_names.entry(type_id) { + Entry::Vacant(e) => { + e.insert(name); + } + Entry::Occupied(_) => { + aliases.push((name, type_id)); + } + } + } + + // Collect all reachable types starting from entrypoints + let mut to_emit = HashSet::new(); + for i in 0..self.entrypoints.len() { + let ep = self.entrypoints.get(i); + self.collect_reachable_types(ep.result_type, &mut to_emit); + } + + // Emit in topological order + for type_id in self.sort_topologically(to_emit) { + if let Some(def_name) = primary_names.get(&type_id) { + self.emit_type_definition(def_name, type_id); + } else { + self.emit_generated_or_custom(type_id); + } + } + + // Emit aliases + for (alias_name, type_id) in aliases { + if let Some(primary_name) = primary_names.get(&type_id) { + self.emit_type_alias(&alias_name, primary_name); + } + } + + self.output + } + + fn prepare_emission(&mut self) { + // Reserve entrypoint names to avoid collisions + for i in 0..self.entrypoints.len() { + let ep = self.entrypoints.get(i); + let name = self.strings.get(ep.name); + self.used_names.insert(to_pascal_case(name)); + } + + // Assign names to named types from TypeNames section + for i in 0..self.types.names_count() { + let type_name = self.types.get_name(i); + let name = self.strings.get(type_name.name); + self.type_names + .insert(type_name.type_id, to_pascal_case(name)); + } + + // Assign names to struct/enum types that need them but don't have names + self.assign_generated_names(); + + // Collect builtin references + self.collect_builtin_references(); + + // Emit Node interface if referenced + if self.config.emit_node_type && self.node_referenced { + self.emit_node_interface(); + } + } + + fn assign_generated_names(&mut self) { + // Collect naming contexts from entrypoints → fields + let mut contexts: HashMap = HashMap::new(); + + for i in 0..self.entrypoints.len() { + let ep = self.entrypoints.get(i); + let def_name = self.strings.get(ep.name); + self.collect_naming_contexts( + ep.result_type, + &NamingContext { + def_name: def_name.to_string(), + field_name: None, + }, + &mut contexts, + ); + } + + // Assign names to types that need them + for i in 0..self.types.defs_count() { + let type_id = QTypeId::from_custom_index(i); + if self.type_names.contains_key(&type_id) { + continue; + } + + let type_def = self.types.get_def(i); + if !self.needs_generated_name(&type_def) { + continue; + } + + let name = if let Some(ctx) = contexts.get(&type_id) { + self.generate_contextual_name(ctx) + } else { + self.generate_fallback_name(&type_def) + }; + self.type_names.insert(type_id, name); + } + } + + fn collect_naming_contexts( + &self, + type_id: QTypeId, + ctx: &NamingContext, + contexts: &mut HashMap, + ) { + if type_id.is_builtin() || contexts.contains_key(&type_id) { + return; + } + + let Some(type_def) = self.types.get(type_id) else { + return; + }; + + let Some(kind) = type_def.type_kind() else { + return; + }; + + match kind { + TypeKind::Struct => { + contexts.entry(type_id).or_insert_with(|| ctx.clone()); + for member in self.types.members_of(&type_def) { + let field_name = self.strings.get(member.name); + // Unwrap Optional wrappers to get the actual type + let (inner_type, _) = self.unwrap_optional(member.type_id); + let field_ctx = NamingContext { + def_name: ctx.def_name.clone(), + field_name: Some(field_name.to_string()), + }; + self.collect_naming_contexts(inner_type, &field_ctx, contexts); + } + } + TypeKind::Enum => { + contexts.entry(type_id).or_insert_with(|| ctx.clone()); + } + TypeKind::ArrayZeroOrMore | TypeKind::ArrayOneOrMore => { + let inner = QTypeId(type_def.data); + self.collect_naming_contexts(inner, ctx, contexts); + } + TypeKind::Optional => { + let inner = QTypeId(type_def.data); + self.collect_naming_contexts(inner, ctx, contexts); + } + TypeKind::Alias => { + // Aliases don't need contexts + } + } + } + + fn collect_builtin_references(&mut self) { + for i in 0..self.entrypoints.len() { + let ep = self.entrypoints.get(i); + self.collect_refs_recursive(ep.result_type); + } + } + + fn collect_refs_recursive(&mut self, type_id: QTypeId) { + if type_id == QTypeId::NODE { + self.node_referenced = true; + return; + } + if type_id == QTypeId::STRING || type_id == QTypeId::VOID { + return; + } + + // Cycle detection + if !self.refs_visited.insert(type_id) { + return; + } + + let Some(type_def) = self.types.get(type_id) else { + return; + }; + + let Some(kind) = type_def.type_kind() else { + return; + }; + + match kind { + TypeKind::Struct | TypeKind::Enum => { + let member_types: Vec<_> = self + .types + .members_of(&type_def) + .map(|m| m.type_id) + .collect(); + for ty in member_types { + self.collect_refs_recursive(ty); + } + } + TypeKind::ArrayZeroOrMore | TypeKind::ArrayOneOrMore | TypeKind::Optional => { + self.collect_refs_recursive(QTypeId(type_def.data)); + } + TypeKind::Alias => { + // Alias to Node + self.node_referenced = true; + } + } + } + + fn sort_topologically(&self, types: HashSet) -> Vec { + let mut deps: HashMap> = HashMap::new(); + let mut rdeps: HashMap> = HashMap::new(); + + for &tid in &types { + deps.entry(tid).or_default(); + rdeps.entry(tid).or_default(); + } + + // Build dependency graph + for &tid in &types { + for dep in self.get_direct_deps(tid) { + if types.contains(&dep) && dep != tid { + deps.entry(tid).or_default().insert(dep); + rdeps.entry(dep).or_default().insert(tid); + } + } + } + + // Kahn's algorithm + let mut result = Vec::with_capacity(types.len()); + let mut queue: Vec = deps + .iter() + .filter(|(_, d)| d.is_empty()) + .map(|(&tid, _)| tid) + .collect(); + + queue.sort_by_key(|tid| tid.0); + + while let Some(tid) = queue.pop() { + result.push(tid); + if let Some(dependents) = rdeps.get(&tid) { + for &dependent in dependents { + if let Some(dep_set) = deps.get_mut(&dependent) { + dep_set.remove(&tid); + if dep_set.is_empty() { + queue.push(dependent); + queue.sort_by_key(|t| t.0); + } + } + } + } + } + + result + } + + fn collect_reachable_types(&self, type_id: QTypeId, out: &mut HashSet) { + if type_id.is_builtin() || out.contains(&type_id) { + return; + } + + let Some(type_def) = self.types.get(type_id) else { + return; + }; + + let Some(kind) = type_def.type_kind() else { + return; + }; + + match kind { + TypeKind::Struct => { + out.insert(type_id); + for member in self.types.members_of(&type_def) { + self.collect_reachable_types(member.type_id, out); + } + } + TypeKind::Enum => { + out.insert(type_id); + for member in self.types.members_of(&type_def) { + // For enum variants, recurse into payload fields but don't + // add the payload struct itself - it will be inlined. + self.collect_enum_variant_refs(member.type_id, out); + } + } + TypeKind::Alias => { + out.insert(type_id); + } + TypeKind::ArrayZeroOrMore | TypeKind::ArrayOneOrMore => { + self.collect_reachable_types(QTypeId(type_def.data), out); + } + TypeKind::Optional => { + self.collect_reachable_types(QTypeId(type_def.data), out); + } + } + } + + /// Collect reachable types from enum variant payloads. + /// Recurses into struct fields but doesn't add the payload struct itself. + fn collect_enum_variant_refs(&self, type_id: QTypeId, out: &mut HashSet) { + if type_id.is_builtin() { + return; + } + + let Some(type_def) = self.types.get(type_id) else { + return; + }; + + let Some(kind) = type_def.type_kind() else { + return; + }; + + match kind { + TypeKind::Struct => { + // DON'T add the struct - it will be inlined as $data. + // But DO recurse into its fields to find named types. + for member in self.types.members_of(&type_def) { + self.collect_reachable_types(member.type_id, out); + } + } + _ => { + // For non-struct payloads (shouldn't happen normally), + // fall back to regular collection. + self.collect_reachable_types(type_id, out); + } + } + } + + fn get_direct_deps(&self, type_id: QTypeId) -> Vec { + let Some(type_def) = self.types.get(type_id) else { + return vec![]; + }; + + let Some(kind) = type_def.type_kind() else { + return vec![]; + }; + + match kind { + TypeKind::Struct | TypeKind::Enum => self + .types + .members_of(&type_def) + .flat_map(|member| self.unwrap_for_deps(member.type_id)) + .collect(), + TypeKind::ArrayZeroOrMore | TypeKind::ArrayOneOrMore => { + self.unwrap_for_deps(QTypeId(type_def.data)) + } + TypeKind::Optional => self.unwrap_for_deps(QTypeId(type_def.data)), + TypeKind::Alias => vec![], + } + } + + fn unwrap_for_deps(&self, type_id: QTypeId) -> Vec { + if type_id.is_builtin() { + return vec![]; + } + + let Some(type_def) = self.types.get(type_id) else { + return vec![]; + }; + + let Some(kind) = type_def.type_kind() else { + return vec![]; + }; + + match kind { + TypeKind::ArrayZeroOrMore | TypeKind::ArrayOneOrMore | TypeKind::Optional => { + self.unwrap_for_deps(QTypeId(type_def.data)) + } + TypeKind::Struct | TypeKind::Enum | TypeKind::Alias => vec![type_id], + } + } + + fn emit_generated_or_custom(&mut self, type_id: QTypeId) { + if self.emitted.contains(&type_id) || type_id.is_builtin() { + return; + } + + let Some(type_def) = self.types.get(type_id) else { + return; + }; + + // Check if this is an alias type (custom type annotation) + if type_def.is_alias() { + if let Some(name) = self.type_names.get(&type_id).cloned() { + self.emit_custom_type_alias(&name); + self.emitted.insert(type_id); + } + return; + } + + // Check if we have a generated name + if let Some(name) = self.type_names.get(&type_id).cloned() { + self.emit_generated_type_def(type_id, &name); + } + } + + fn emit_generated_type_def(&mut self, type_id: QTypeId, name: &str) { + self.emitted.insert(type_id); + let export = if self.config.export { "export " } else { "" }; + + let Some(type_def) = self.types.get(type_id) else { + return; + }; + + let Some(kind) = type_def.type_kind() else { + return; + }; + + match kind { + TypeKind::Struct => self.emit_interface(name, &type_def, export), + TypeKind::Enum => self.emit_tagged_union(name, &type_def, export), + _ => {} + } + } + + fn emit_type_definition(&mut self, name: &str, type_id: QTypeId) { + self.emitted.insert(type_id); + let export = if self.config.export { "export " } else { "" }; + let type_name = to_pascal_case(name); + + let Some(type_def) = self.types.get(type_id) else { + // Builtin type - emit as alias + let ts_type = self.type_to_ts(type_id); + self.output + .push_str(&format!("{}type {} = {};\n\n", export, type_name, ts_type)); + return; + }; + + let Some(kind) = type_def.type_kind() else { + return; + }; + + match kind { + TypeKind::Struct => self.emit_interface(&type_name, &type_def, export), + TypeKind::Enum => self.emit_tagged_union(&type_name, &type_def, export), + _ => { + let ts_type = self.type_to_ts(type_id); + self.output + .push_str(&format!("{}type {} = {};\n\n", export, type_name, ts_type)); + } + } + } + + fn emit_interface(&mut self, name: &str, type_def: &TypeDef, export: &str) { + self.output + .push_str(&format!("{}interface {} {{\n", export, name)); + + // Collect fields and sort by name + let mut fields: Vec<(String, QTypeId, bool)> = self + .types + .members_of(type_def) + .map(|member| { + let field_name = self.strings.get(member.name).to_string(); + let (inner_type, optional) = self.unwrap_optional(member.type_id); + (field_name, inner_type, optional) + }) + .collect(); + fields.sort_by(|a, b| a.0.cmp(&b.0)); + + for (field_name, field_type, optional) in fields { + let ts_type = self.type_to_ts(field_type); + let opt_marker = if optional { "?" } else { "" }; + self.output + .push_str(&format!(" {}{}: {};\n", field_name, opt_marker, ts_type)); + } + + self.output.push_str("}\n\n"); + } + + fn emit_tagged_union(&mut self, name: &str, type_def: &TypeDef, export: &str) { + let mut variant_types = Vec::new(); + + for member in self.types.members_of(type_def) { + let variant_name = self.strings.get(member.name); + let variant_type_name = format!("{}{}", name, to_pascal_case(variant_name)); + variant_types.push(variant_type_name.clone()); + + let data_str = self.inline_data_type(member.type_id); + self.output.push_str(&format!( + "{}interface {} {{\n $tag: \"{}\";\n $data: {};\n}}\n\n", + export, variant_type_name, variant_name, data_str + )); + } + + let union = variant_types.join(" | "); + self.output + .push_str(&format!("{}type {} = {};\n\n", export, name, union)); + } + + fn emit_custom_type_alias(&mut self, name: &str) { + let export = if self.config.export { "export " } else { "" }; + self.output + .push_str(&format!("{}type {} = Node;\n\n", export, name)); + } + + fn emit_type_alias(&mut self, alias_name: &str, target_name: &str) { + let export = if self.config.export { "export " } else { "" }; + self.output.push_str(&format!( + "{}type {} = {};\n\n", + export, alias_name, target_name + )); + } + + fn emit_node_interface(&mut self) { + let export = if self.config.export { "export " } else { "" }; + if self.config.verbose_nodes { + self.output.push_str(&format!( + "{}interface Node {{\n kind: string;\n text: string;\n startPosition: {{ row: number; column: number }};\n endPosition: {{ row: number; column: number }};\n}}\n\n", + export + )); + } else { + self.output.push_str(&format!( + "{}interface Node {{\n kind: string;\n text: string;\n}}\n\n", + export + )); + } + } + + fn type_to_ts(&self, type_id: QTypeId) -> String { + match type_id { + QTypeId::VOID => "void".to_string(), + QTypeId::NODE => "Node".to_string(), + QTypeId::STRING => "string".to_string(), + _ => self.custom_type_to_ts(type_id), + } + } + + fn custom_type_to_ts(&self, type_id: QTypeId) -> String { + let Some(type_def) = self.types.get(type_id) else { + return "unknown".to_string(); + }; + + let Some(kind) = type_def.type_kind() else { + return "unknown".to_string(); + }; + + match kind { + TypeKind::Struct | TypeKind::Enum => { + if let Some(name) = self.type_names.get(&type_id) { + name.clone() + } else { + self.inline_composite(type_id, &type_def, &kind) + } + } + TypeKind::Alias => { + if let Some(name) = self.type_names.get(&type_id) { + name.clone() + } else { + "Node".to_string() + } + } + TypeKind::ArrayZeroOrMore => { + let elem_type = self.type_to_ts(QTypeId(type_def.data)); + format!("{}[]", elem_type) + } + TypeKind::ArrayOneOrMore => { + let elem_type = self.type_to_ts(QTypeId(type_def.data)); + format!("[{}, ...{}[]]", elem_type, elem_type) + } + TypeKind::Optional => { + let inner_type = self.type_to_ts(QTypeId(type_def.data)); + format!("{} | null", inner_type) + } + } + } + + fn inline_composite(&self, _type_id: QTypeId, type_def: &TypeDef, kind: &TypeKind) -> String { + match kind { + TypeKind::Struct => self.inline_struct(type_def), + TypeKind::Enum => self.inline_enum(type_def), + _ => "unknown".to_string(), + } + } + + fn inline_struct(&self, type_def: &TypeDef) -> String { + if type_def.count == 0 { + return "{}".to_string(); + } + + let mut fields: Vec<(String, QTypeId, bool)> = self + .types + .members_of(type_def) + .map(|member| { + let field_name = self.strings.get(member.name).to_string(); + let (inner_type, optional) = self.unwrap_optional(member.type_id); + (field_name, inner_type, optional) + }) + .collect(); + fields.sort_by(|a, b| a.0.cmp(&b.0)); + + let field_strs: Vec = fields + .iter() + .map(|(name, ty, opt)| { + let ts_type = self.type_to_ts(*ty); + let opt_marker = if *opt { "?" } else { "" }; + format!("{}{}: {}", name, opt_marker, ts_type) + }) + .collect(); + + format!("{{ {} }}", field_strs.join("; ")) + } + + fn inline_enum(&self, type_def: &TypeDef) -> String { + let variant_strs: Vec = self + .types + .members_of(type_def) + .map(|member| { + let name = self.strings.get(member.name); + let data_type = self.type_to_ts(member.type_id); + format!("{{ $tag: \"{}\"; $data: {} }}", name, data_type) + }) + .collect(); + + variant_strs.join(" | ") + } + + fn inline_data_type(&self, type_id: QTypeId) -> String { + if type_id == QTypeId::VOID { + return "{}".to_string(); + } + + let Some(type_def) = self.types.get(type_id) else { + return self.type_to_ts(type_id); + }; + + let Some(kind) = type_def.type_kind() else { + return self.type_to_ts(type_id); + }; + + if kind == TypeKind::Struct { + self.inline_struct(&type_def) + } else { + self.type_to_ts(type_id) + } + } + + /// Unwrap Optional wrappers and return (inner_type, is_optional). + fn unwrap_optional(&self, type_id: QTypeId) -> (QTypeId, bool) { + if type_id.is_builtin() { + return (type_id, false); + } + let Some(type_def) = self.types.get(type_id) else { + return (type_id, false); + }; + if type_def.type_kind() == Some(TypeKind::Optional) { + (QTypeId(type_def.data), true) + } else { + (type_id, false) + } + } + + fn needs_generated_name(&self, type_def: &TypeDef) -> bool { + matches!( + type_def.type_kind(), + Some(TypeKind::Struct) | Some(TypeKind::Enum) + ) + } + + fn generate_contextual_name(&mut self, ctx: &NamingContext) -> String { + let base = if let Some(field) = &ctx.field_name { + format!("{}{}", to_pascal_case(&ctx.def_name), to_pascal_case(field)) + } else { + to_pascal_case(&ctx.def_name) + }; + self.unique_name(&base) + } + + fn generate_fallback_name(&mut self, type_def: &TypeDef) -> String { + let base = match type_def.type_kind() { + Some(TypeKind::Struct) => "Struct", + Some(TypeKind::Enum) => "Enum", + _ => "Type", + }; + self.unique_name(base) + } + + fn unique_name(&mut self, base: &str) -> String { + let base = to_pascal_case(base); + if self.used_names.insert(base.clone()) { + return base; + } + + let mut counter = 2; + loop { + let name = format!("{}{}", base, counter); + if self.used_names.insert(name.clone()) { + return name; + } + counter += 1; + } + } +} + +#[derive(Clone, Debug)] +struct NamingContext { + def_name: String, + field_name: Option, +} + +fn to_pascal_case(s: &str) -> String { + let mut result = String::with_capacity(s.len()); + let mut capitalize_next = true; + + for c in s.chars() { + if c == '_' || c == '-' || c == '.' { + capitalize_next = true; + } else if capitalize_next { + result.extend(c.to_uppercase()); + capitalize_next = false; + } else { + result.push(c); + } + } + result +} + +/// Emit TypeScript from a bytecode module. +pub fn emit_typescript(module: &Module) -> String { + TsEmitter::new(module, EmitConfig::default()).emit() +} + +/// Emit TypeScript from a bytecode module with custom config. +pub fn emit_typescript_with_config(module: &Module, config: EmitConfig) -> String { + TsEmitter::new(module, config).emit() +} diff --git a/crates/plotnik-lib/src/bytecode/mod.rs b/crates/plotnik-lib/src/bytecode/mod.rs index 6f947aec..9fd33cc9 100644 --- a/crates/plotnik-lib/src/bytecode/mod.rs +++ b/crates/plotnik-lib/src/bytecode/mod.rs @@ -4,6 +4,7 @@ mod constants; mod effects; +pub mod emit; mod entrypoint; mod header; mod ids; diff --git a/crates/plotnik-lib/src/query/emit.rs b/crates/plotnik-lib/src/query/emit.rs new file mode 100644 index 00000000..0daf25c8 --- /dev/null +++ b/crates/plotnik-lib/src/query/emit.rs @@ -0,0 +1,724 @@ +//! Bytecode emission from LinkedQuery. +//! +//! Converts the analyzed query representation into the binary bytecode format. + +use std::collections::{HashMap, HashSet}; + +use plotnik_core::{Interner, NodeFieldId, NodeTypeId, Symbol}; + +use crate::bytecode::{ + Entrypoint, FieldSymbol, Header, NodeSymbol, QTypeId, SECTION_ALIGN, StepId, StringId, + TriviaEntry, TypeDef, TypeMember, TypeMetaHeader, TypeName, +}; +use crate::type_system::TypeKind; + +use super::query::LinkedQuery; +use super::type_check::{ + FieldInfo, TYPE_NODE, TYPE_STRING, TYPE_VOID, TypeContext, TypeId, TypeKind as InferredTypeKind, +}; + +/// Error during bytecode emission. +#[derive(Clone, Debug)] +pub enum EmitError { + /// Too many strings (exceeds u16 max). + TooManyStrings(usize), + /// Too many types (exceeds u16 max). + TooManyTypes(usize), + /// Too many type members (exceeds u16 max). + TooManyTypeMembers(usize), + /// Too many entrypoints (exceeds u16 max). + TooManyEntrypoints(usize), + /// String not found in interner. + StringNotFound(Symbol), +} + +impl std::fmt::Display for EmitError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::TooManyStrings(n) => write!(f, "too many strings: {n} (max 65534)"), + Self::TooManyTypes(n) => write!(f, "too many types: {n} (max 65533)"), + Self::TooManyTypeMembers(n) => write!(f, "too many type members: {n} (max 65535)"), + Self::TooManyEntrypoints(n) => write!(f, "too many entrypoints: {n} (max 65535)"), + Self::StringNotFound(sym) => write!(f, "string not found for symbol: {sym:?}"), + } + } +} + +impl std::error::Error for EmitError {} + +/// Builds the string table, remapping query Symbols to bytecode StringIds. +/// +/// The bytecode format requires a subset of the query interner's strings. +/// This builder collects only the strings that are actually used and assigns +/// compact StringId indices. +#[derive(Debug)] +pub struct StringTableBuilder { + /// Map from query Symbol to bytecode StringId. + mapping: HashMap, + /// Reverse lookup from string content to StringId (for intern_str). + str_lookup: HashMap, + /// Ordered strings for the binary. + strings: Vec, +} + +impl StringTableBuilder { + pub fn new() -> Self { + Self { + mapping: HashMap::new(), + str_lookup: HashMap::new(), + strings: Vec::new(), + } + } + + /// Get or create a StringId for a Symbol. + pub fn get_or_intern( + &mut self, + sym: Symbol, + interner: &Interner, + ) -> Result { + if let Some(&id) = self.mapping.get(&sym) { + return Ok(id); + } + + let text = interner + .try_resolve(sym) + .ok_or(EmitError::StringNotFound(sym))?; + + let id = StringId(self.strings.len() as u16); + self.strings.push(text.to_string()); + self.str_lookup.insert(text.to_string(), id); + self.mapping.insert(sym, id); + Ok(id) + } + + /// Intern a string directly (for generated strings not in the query interner). + pub fn intern_str(&mut self, s: &str) -> StringId { + if let Some(&id) = self.str_lookup.get(s) { + return id; + } + + let id = StringId(self.strings.len() as u16); + self.strings.push(s.to_string()); + self.str_lookup.insert(s.to_string(), id); + id + } + + /// Number of interned strings. + pub fn len(&self) -> usize { + self.strings.len() + } + + /// Whether the builder is empty. + pub fn is_empty(&self) -> bool { + self.strings.is_empty() + } + + /// Validate that the string count fits in u16. + pub fn validate(&self) -> Result<(), EmitError> { + // Max count is 65534 because the table needs count+1 entries + if self.strings.len() > 65534 { + return Err(EmitError::TooManyStrings(self.strings.len())); + } + Ok(()) + } + + /// Emit the string blob and offset table. + /// + /// Returns (blob_bytes, table_bytes). + pub fn emit(&self) -> (Vec, Vec) { + let mut blob = Vec::new(); + let mut offsets: Vec = Vec::with_capacity(self.strings.len() + 1); + + for s in &self.strings { + offsets.push(blob.len() as u32); + blob.extend_from_slice(s.as_bytes()); + } + offsets.push(blob.len() as u32); // sentinel + + // Convert offsets to bytes + let table_bytes: Vec = offsets.iter().flat_map(|o| o.to_le_bytes()).collect(); + + (blob, table_bytes) + } +} + +impl Default for StringTableBuilder { + fn default() -> Self { + Self::new() + } +} + +/// Builds the type metadata, remapping query TypeIds to bytecode QTypeIds. +#[derive(Debug)] +pub struct TypeTableBuilder { + /// Map from query TypeId to bytecode QTypeId. + mapping: HashMap, + /// Type definitions (4 bytes each). + type_defs: Vec, + /// Type members for structs/enums (4 bytes each). + type_members: Vec, + /// Type names for named types (4 bytes each). + type_names: Vec, + /// Cache for dynamically created Optional wrappers: base_type -> Optional(base_type) + optional_wrappers: HashMap, +} + +impl TypeTableBuilder { + pub fn new() -> Self { + Self { + mapping: HashMap::new(), + type_defs: Vec::new(), + type_members: Vec::new(), + type_names: Vec::new(), + optional_wrappers: HashMap::new(), + } + } + + /// Build type table from TypeContext. + /// + /// Types are collected in definition order, depth-first, to mirror query structure. + pub fn build( + &mut self, + type_ctx: &TypeContext, + interner: &Interner, + strings: &mut StringTableBuilder, + ) -> Result<(), EmitError> { + // Pre-populate builtin mappings + self.mapping.insert(TYPE_VOID, QTypeId::VOID); + self.mapping.insert(TYPE_NODE, QTypeId::NODE); + self.mapping.insert(TYPE_STRING, QTypeId::STRING); + + // Collect types in definition order, depth-first to mirror query structure + let mut ordered_types: Vec = Vec::new(); + let mut seen: HashSet = HashSet::new(); + + for (_def_id, type_id) in type_ctx.iter_def_types() { + collect_types_dfs(type_id, type_ctx, &mut ordered_types, &mut seen); + } + + // Pre-assign QTypeIds and reserve slots for all collected types. + // This ensures that forward references (e.g., recursive types) can be resolved. + for (i, &type_id) in ordered_types.iter().enumerate() { + let bc_id = QTypeId::from_custom_index(i); + self.mapping.insert(type_id, bc_id); + // Push a placeholder that will be filled in during emit + self.type_defs.push(TypeDef { + data: 0, + count: 0, + kind: 0, // Placeholder + }); + } + + // Emit TypeDefs and TypeMembers - fill in the placeholders. + for (slot_index, &type_id) in ordered_types.iter().enumerate() { + let type_kind = type_ctx + .get_type(type_id) + .expect("collected type must exist"); + self.emit_type_at_slot(slot_index, type_id, type_kind, type_ctx, interner, strings)?; + } + + // Collect TypeName entries for named definitions + for (def_id, type_id) in type_ctx.iter_def_types() { + let name_sym = type_ctx.def_name_sym(def_id); + let name = strings.get_or_intern(name_sym, interner)?; + let bc_type_id = self.mapping.get(&type_id).copied().unwrap_or(QTypeId::VOID); + self.type_names.push(TypeName { + name, + type_id: bc_type_id, + }); + } + + Ok(()) + } + + /// Fill in a TypeDef at a pre-allocated slot. + fn emit_type_at_slot( + &mut self, + slot_index: usize, + _type_id: TypeId, + type_kind: &InferredTypeKind, + type_ctx: &TypeContext, + interner: &Interner, + strings: &mut StringTableBuilder, + ) -> Result<(), EmitError> { + match type_kind { + InferredTypeKind::Void | InferredTypeKind::Node | InferredTypeKind::String => { + // Builtins - should not reach here + unreachable!("builtins should be handled separately") + } + + InferredTypeKind::Custom(sym) => { + // Custom type annotation: @x :: Identifier → type Identifier = Node + let bc_type_id = QTypeId::from_custom_index(slot_index); + + // Add TypeName entry for the custom type + let name = strings.get_or_intern(*sym, interner)?; + self.type_names.push(TypeName { + name, + type_id: bc_type_id, + }); + + self.type_defs[slot_index] = TypeDef { + data: QTypeId::NODE.0, // Custom types alias Node + count: 0, + kind: TypeKind::Alias as u8, + }; + Ok(()) + } + + InferredTypeKind::Optional(inner) => { + let inner_bc = self.resolve_type(*inner, type_ctx)?; + + self.type_defs[slot_index] = TypeDef { + data: inner_bc.0, + count: 0, + kind: TypeKind::Optional as u8, + }; + Ok(()) + } + + InferredTypeKind::Array { element, non_empty } => { + let element_bc = self.resolve_type(*element, type_ctx)?; + + let kind = if *non_empty { + TypeKind::ArrayOneOrMore + } else { + TypeKind::ArrayZeroOrMore + }; + self.type_defs[slot_index] = TypeDef { + data: element_bc.0, + count: 0, + kind: kind as u8, + }; + Ok(()) + } + + InferredTypeKind::Struct(fields) => { + // Resolve field types (this may create Optional wrappers at later indices) + let mut resolved_fields = Vec::with_capacity(fields.len()); + for (field_sym, field_info) in fields { + let field_name = strings.get_or_intern(*field_sym, interner)?; + let field_type = self.resolve_field_type(field_info, type_ctx)?; + resolved_fields.push((field_name, field_type)); + } + + // Now emit the members and update the placeholder + let member_start = self.type_members.len() as u16; + for (field_name, field_type) in resolved_fields { + self.type_members.push(TypeMember { + name: field_name, + type_id: field_type, + }); + } + + let member_count = fields.len() as u8; + self.type_defs[slot_index] = TypeDef { + data: member_start, + count: member_count, + kind: TypeKind::Struct as u8, + }; + Ok(()) + } + + InferredTypeKind::Enum(variants) => { + // Resolve variant types (this may create types at later indices) + let mut resolved_variants = Vec::with_capacity(variants.len()); + for (variant_sym, variant_type_id) in variants { + let variant_name = strings.get_or_intern(*variant_sym, interner)?; + let variant_type = self.resolve_type(*variant_type_id, type_ctx)?; + resolved_variants.push((variant_name, variant_type)); + } + + // Now emit the members and update the placeholder + let member_start = self.type_members.len() as u16; + for (variant_name, variant_type) in resolved_variants { + self.type_members.push(TypeMember { + name: variant_name, + type_id: variant_type, + }); + } + + let member_count = variants.len() as u8; + self.type_defs[slot_index] = TypeDef { + data: member_start, + count: member_count, + kind: TypeKind::Enum as u8, + }; + Ok(()) + } + + InferredTypeKind::Ref(_def_id) => { + // Ref types are not emitted - they resolve to their target + unreachable!("Ref types should not be collected for emission") + } + } + } + + /// Resolve a query TypeId to bytecode QTypeId. + fn resolve_type(&self, type_id: TypeId, type_ctx: &TypeContext) -> Result { + // Check if already mapped + if let Some(&bc_id) = self.mapping.get(&type_id) { + return Ok(bc_id); + } + + // Handle Ref types by following the reference + if let Some(type_kind) = type_ctx.get_type(type_id) + && let InferredTypeKind::Ref(def_id) = type_kind + && let Some(def_type_id) = type_ctx.get_def_type(*def_id) + { + return self.resolve_type(def_type_id, type_ctx); + } + + // If not found, default to VOID (should not happen for well-formed types) + Ok(QTypeId::VOID) + } + + /// Resolve a field's type, handling optionality. + fn resolve_field_type( + &mut self, + field_info: &FieldInfo, + type_ctx: &TypeContext, + ) -> Result { + let base_type = self.resolve_type(field_info.type_id, type_ctx)?; + + // If the field is optional, wrap it in Optional + if field_info.optional { + self.get_or_create_optional(base_type) + } else { + Ok(base_type) + } + } + + /// Get or create an Optional wrapper for a base type. + fn get_or_create_optional(&mut self, base_type: QTypeId) -> Result { + // Check cache first + if let Some(&optional_id) = self.optional_wrappers.get(&base_type) { + return Ok(optional_id); + } + + // Create new Optional wrapper + let optional_id = QTypeId::from_custom_index(self.type_defs.len()); + + self.type_defs.push(TypeDef { + data: base_type.0, + count: 0, + kind: TypeKind::Optional as u8, + }); + + self.optional_wrappers.insert(base_type, optional_id); + Ok(optional_id) + } + + /// Validate that counts fit in u16. + pub fn validate(&self) -> Result<(), EmitError> { + // Max 65533 custom types (65535 - 3 builtins) + if self.type_defs.len() > 65533 { + return Err(EmitError::TooManyTypes(self.type_defs.len())); + } + if self.type_members.len() > 65535 { + return Err(EmitError::TooManyTypeMembers(self.type_members.len())); + } + Ok(()) + } + + /// Get the bytecode QTypeId for a query TypeId. + pub fn get(&self, type_id: TypeId) -> Option { + self.mapping.get(&type_id).copied() + } + + /// Emit type definitions, members, and names as bytes. + /// + /// Returns (type_defs_bytes, type_members_bytes, type_names_bytes). + pub fn emit(&self) -> (Vec, Vec, Vec) { + let mut defs_bytes = Vec::with_capacity(self.type_defs.len() * 4); + for def in &self.type_defs { + defs_bytes.extend_from_slice(&def.data.to_le_bytes()); + defs_bytes.push(def.count); + defs_bytes.push(def.kind); + } + + let mut members_bytes = Vec::with_capacity(self.type_members.len() * 4); + for member in &self.type_members { + members_bytes.extend_from_slice(&member.name.0.to_le_bytes()); + members_bytes.extend_from_slice(&member.type_id.0.to_le_bytes()); + } + + let mut names_bytes = Vec::with_capacity(self.type_names.len() * 4); + for type_name in &self.type_names { + names_bytes.extend_from_slice(&type_name.name.0.to_le_bytes()); + names_bytes.extend_from_slice(&type_name.type_id.0.to_le_bytes()); + } + + (defs_bytes, members_bytes, names_bytes) + } + + /// Number of type definitions. + pub fn type_defs_count(&self) -> usize { + self.type_defs.len() + } + + /// Number of type members. + pub fn type_members_count(&self) -> usize { + self.type_members.len() + } + + /// Number of type names. + pub fn type_names_count(&self) -> usize { + self.type_names.len() + } +} + +impl Default for TypeTableBuilder { + fn default() -> Self { + Self::new() + } +} + +/// Collect types depth-first starting from a root type. +fn collect_types_dfs( + type_id: TypeId, + type_ctx: &TypeContext, + out: &mut Vec, + seen: &mut HashSet, +) { + // Skip builtins and already-seen types + if type_id.is_builtin() || seen.contains(&type_id) { + return; + } + + let Some(type_kind) = type_ctx.get_type(type_id) else { + return; + }; + + // Resolve Ref types to their target + if let InferredTypeKind::Ref(def_id) = type_kind { + if let Some(target_id) = type_ctx.get_def_type(*def_id) { + collect_types_dfs(target_id, type_ctx, out, seen); + } + return; + } + + seen.insert(type_id); + + // Collect children first (depth-first), then add self + match type_kind { + InferredTypeKind::Struct(fields) => { + for field_info in fields.values() { + collect_types_dfs(field_info.type_id, type_ctx, out, seen); + } + out.push(type_id); + } + InferredTypeKind::Enum(variants) => { + for &variant_type_id in variants.values() { + collect_types_dfs(variant_type_id, type_ctx, out, seen); + } + out.push(type_id); + } + InferredTypeKind::Array { element, .. } => { + // Collect element type first, then add the Array itself + collect_types_dfs(*element, type_ctx, out, seen); + out.push(type_id); + } + InferredTypeKind::Optional(inner) => { + // Collect inner type first, then add the Optional itself + collect_types_dfs(*inner, type_ctx, out, seen); + out.push(type_id); + } + InferredTypeKind::Custom(_) => { + // Custom types alias Node, no children to collect + out.push(type_id); + } + _ => {} + } +} + +/// Pad a buffer to the section alignment boundary. +fn pad_to_section(buf: &mut Vec) { + let rem = buf.len() % SECTION_ALIGN; + if rem != 0 { + let padding = SECTION_ALIGN - rem; + buf.resize(buf.len() + padding, 0); + } +} + +/// Emit bytecode from type context only (no node validation). +pub fn emit(type_ctx: &TypeContext, interner: &Interner) -> Result, EmitError> { + emit_inner(type_ctx, interner, None, None) +} + +/// Emit bytecode from a LinkedQuery (includes node type/field validation info). +pub fn emit_linked(query: &LinkedQuery) -> Result, EmitError> { + emit_inner( + query.type_context(), + query.interner(), + Some(query.node_type_ids()), + Some(query.node_field_ids()), + ) +} + +/// Shared bytecode emission logic. +fn emit_inner( + type_ctx: &TypeContext, + interner: &Interner, + node_type_ids: Option<&HashMap>, + node_field_ids: Option<&HashMap>, +) -> Result, EmitError> { + let mut strings = StringTableBuilder::new(); + let mut types = TypeTableBuilder::new(); + types.build(type_ctx, interner, &mut strings)?; + + // Collect node symbols (empty if not linked) + let mut node_symbols: Vec = Vec::new(); + if let Some(ids) = node_type_ids { + for (&sym, &node_id) in ids { + let name = strings.get_or_intern(sym, interner)?; + node_symbols.push(NodeSymbol { + id: node_id.get(), + name, + }); + } + } + + // Collect field symbols (empty if not linked) + let mut field_symbols: Vec = Vec::new(); + if let Some(ids) = node_field_ids { + for (&sym, &field_id) in ids { + let name = strings.get_or_intern(sym, interner)?; + field_symbols.push(FieldSymbol { + id: field_id.get(), + name, + }); + } + } + + // Collect entrypoints + let mut entrypoints: Vec = Vec::new(); + for (def_id, type_id) in type_ctx.iter_def_types() { + let name_sym = type_ctx.def_name_sym(def_id); + let name = strings.get_or_intern(name_sym, interner)?; + let result_type = types.get(type_id).unwrap_or(QTypeId::VOID); + entrypoints.push(Entrypoint { + name, + target: StepId::ACCEPT, + result_type, + _pad: 0, + }); + } + + // Validate counts + strings.validate()?; + types.validate()?; + if entrypoints.len() > 65535 { + return Err(EmitError::TooManyEntrypoints(entrypoints.len())); + } + + // Trivia (empty for now) + let trivia_entries: Vec = Vec::new(); + + // Emit all byte sections + let (str_blob, str_table) = strings.emit(); + let (type_defs_bytes, type_members_bytes, type_names_bytes) = types.emit(); + + let node_types_bytes = emit_node_symbols(&node_symbols); + let node_fields_bytes = emit_field_symbols(&field_symbols); + let trivia_bytes = emit_trivia(&trivia_entries); + let entrypoints_bytes = emit_entrypoints(&entrypoints); + + // Build output with sections + let mut output = vec![0u8; 64]; // Reserve header space + + let str_blob_offset = emit_section(&mut output, &str_blob); + let str_table_offset = emit_section(&mut output, &str_table); + let node_types_offset = emit_section(&mut output, &node_types_bytes); + let node_fields_offset = emit_section(&mut output, &node_fields_bytes); + let trivia_offset = emit_section(&mut output, &trivia_bytes); + + // Type metadata section (header + 3 aligned sub-sections) + let type_meta_offset = emit_section( + &mut output, + &TypeMetaHeader { + type_defs_count: types.type_defs_count() as u16, + type_members_count: types.type_members_count() as u16, + type_names_count: types.type_names_count() as u16, + _pad: 0, + } + .to_bytes(), + ); + emit_section(&mut output, &type_defs_bytes); + emit_section(&mut output, &type_members_bytes); + emit_section(&mut output, &type_names_bytes); + + let entrypoints_offset = emit_section(&mut output, &entrypoints_bytes); + let transitions_offset = emit_section(&mut output, &[]); // Empty for now + + pad_to_section(&mut output); + let total_size = output.len() as u32; + + // Build and write header + let mut header = Header { + str_blob_offset, + str_table_offset, + node_types_offset, + node_fields_offset, + trivia_offset, + type_meta_offset, + entrypoints_offset, + transitions_offset, + str_table_count: strings.len() as u16, + node_types_count: node_symbols.len() as u16, + node_fields_count: field_symbols.len() as u16, + trivia_count: trivia_entries.len() as u16, + entrypoints_count: entrypoints.len() as u16, + transitions_count: 0, + total_size, + ..Default::default() + }; + header.checksum = crc32fast::hash(&output[64..]); + output[..64].copy_from_slice(&header.to_bytes()); + + Ok(output) +} + +fn emit_section(output: &mut Vec, data: &[u8]) -> u32 { + pad_to_section(output); + let offset = output.len() as u32; + output.extend_from_slice(data); + offset +} + +fn emit_node_symbols(symbols: &[NodeSymbol]) -> Vec { + let mut bytes = Vec::with_capacity(symbols.len() * 4); + for sym in symbols { + bytes.extend_from_slice(&sym.id.to_le_bytes()); + bytes.extend_from_slice(&sym.name.0.to_le_bytes()); + } + bytes +} + +fn emit_field_symbols(symbols: &[FieldSymbol]) -> Vec { + let mut bytes = Vec::with_capacity(symbols.len() * 4); + for sym in symbols { + bytes.extend_from_slice(&sym.id.to_le_bytes()); + bytes.extend_from_slice(&sym.name.0.to_le_bytes()); + } + bytes +} + +fn emit_trivia(entries: &[TriviaEntry]) -> Vec { + let mut bytes = Vec::with_capacity(entries.len() * 2); + for entry in entries { + bytes.extend_from_slice(&entry.node_type.to_le_bytes()); + } + bytes +} + +fn emit_entrypoints(entrypoints: &[Entrypoint]) -> Vec { + let mut bytes = Vec::with_capacity(entrypoints.len() * 8); + for ep in entrypoints { + bytes.extend_from_slice(&ep.name.0.to_le_bytes()); + bytes.extend_from_slice(&ep.target.0.to_le_bytes()); + bytes.extend_from_slice(&ep.result_type.0.to_le_bytes()); + bytes.extend_from_slice(&ep._pad.to_le_bytes()); + } + bytes +} diff --git a/crates/plotnik-lib/src/query/emit_tests.rs b/crates/plotnik-lib/src/query/emit_tests.rs new file mode 100644 index 00000000..b166e35b --- /dev/null +++ b/crates/plotnik-lib/src/query/emit_tests.rs @@ -0,0 +1,333 @@ +//! Tests for bytecode emission. + +use plotnik_langs::{Lang, from_name}; + +use crate::bytecode::{Header, MAGIC, Module, QTypeId, VERSION}; +use crate::query::QueryBuilder; +use crate::query::emit::{StringTableBuilder, TypeTableBuilder}; + +fn javascript() -> Lang { + from_name("javascript").expect("javascript lang") +} + +fn emit_query(src: &str) -> Vec { + QueryBuilder::one_liner(src) + .parse() + .expect("parse") + .analyze() + .link(&javascript()) + .emit() + .expect("emit") +} + +#[test] +fn emit_minimal_query() { + let bytes = emit_query("Test = (identifier) @id"); + + // Verify header + assert!(bytes.len() >= 64); + let header = Header::from_bytes(&bytes); + assert_eq!(header.magic, MAGIC); + assert_eq!(header.version, VERSION); + assert_eq!(header.total_size as usize, bytes.len()); + + // Should have 1 entrypoint + assert_eq!(header.entrypoints_count, 1); + + // Should have at least one string (the definition name "Test") + assert!(header.str_table_count >= 1); + + // Should have at least one node type ("identifier") + assert!(header.node_types_count >= 1); +} + +#[test] +fn emit_roundtrip_via_module() { + let bytes = emit_query("Test = (identifier) @id"); + + // Load the bytes as a Module + let module = Module::from_bytes(bytes).expect("load module"); + + // Verify we can read back the strings + let strings = module.strings(); + assert!(module.header().str_table_count >= 1); + + // Verify we can read back entrypoints + let entrypoints = module.entrypoints(); + assert_eq!(entrypoints.len(), 1); + + // Verify we can read the entrypoint name + let ep = entrypoints.get(0); + let name = strings.get(ep.name); + assert_eq!(name, "Test"); +} + +#[test] +fn emit_multiple_definitions() { + let bytes = emit_query( + r#" + Foo = (identifier) @id + Bar = (string) @str + "#, + ); + + let header = Header::from_bytes(&bytes); + + // Should have 2 entrypoints + assert_eq!(header.entrypoints_count, 2); + + // Entrypoints preserve definition order + let module = Module::from_bytes(bytes).expect("load module"); + let entrypoints = module.entrypoints(); + + let ep0 = entrypoints.get(0); + let ep1 = entrypoints.get(1); + + let name0 = module.strings().get(ep0.name); + let name1 = module.strings().get(ep1.name); + + assert_eq!(name0, "Foo"); // Foo defined first + assert_eq!(name1, "Bar"); +} + +#[test] +fn emit_with_field_constraint() { + let bytes = emit_query("Test = (function_declaration name: (identifier) @name)"); + + let header = Header::from_bytes(&bytes); + + // Should have at least one field ("name") + assert!(header.node_fields_count >= 1); + + let module = Module::from_bytes(bytes).expect("load module"); + let fields = module.node_fields(); + + // Find the "name" field + let has_name_field = (0..fields.len()).any(|i| { + let f = fields.get(i); + module.strings().get(f.name) == "name" + }); + assert!(has_name_field, "should have 'name' field"); +} + +#[test] +fn emit_with_struct_type() { + // This should produce a struct type with two fields + let bytes = + emit_query("Test = (function_declaration name: (identifier) @name body: (_) @body)"); + + // Load the module to check type metadata + let module = Module::from_bytes(bytes).expect("load module"); + let types = module.types(); + + // Should have type definitions for the struct + // The struct has 2 fields, so we expect type members + assert!(types.defs_count() >= 1 || types.members_count() >= 2); +} + +#[test] +fn string_table_builder_deduplicates() { + use plotnik_core::Interner; + + let mut interner = Interner::new(); + let sym1 = interner.intern("foo"); + let sym2 = interner.intern("bar"); + let sym3 = interner.intern("foo"); // Same as sym1 + + let mut builder = StringTableBuilder::new(); + let id1 = builder.get_or_intern(sym1, &interner).expect("id1"); + let id2 = builder.get_or_intern(sym2, &interner).expect("id2"); + let id3 = builder.get_or_intern(sym3, &interner).expect("id3"); + + assert_eq!(id1, id3); // Same symbol -> same StringId + assert_ne!(id1, id2); // Different symbols -> different StringIds + assert_eq!(builder.len(), 2); // Only 2 unique strings +} + +#[test] +fn string_table_builder_intern_str() { + let mut builder = StringTableBuilder::new(); + + let id1 = builder.intern_str("hello"); + let id2 = builder.intern_str("world"); + let id3 = builder.intern_str("hello"); // Duplicate + + assert_eq!(id1, id3); + assert_ne!(id1, id2); + assert_eq!(builder.len(), 2); +} + +#[test] +fn type_table_builder_builtins() { + use crate::query::type_check::{TYPE_NODE, TYPE_STRING, TYPE_VOID}; + + let mut builder = TypeTableBuilder::new(); + + // Build with empty context + let type_ctx = crate::query::type_check::TypeContext::new(); + let interner = plotnik_core::Interner::new(); + let mut strings = StringTableBuilder::new(); + + builder + .build(&type_ctx, &interner, &mut strings) + .expect("build"); + + // Builtins should be mapped + assert_eq!(builder.get(TYPE_VOID), Some(QTypeId::VOID)); + assert_eq!(builder.get(TYPE_NODE), Some(QTypeId::NODE)); + assert_eq!(builder.get(TYPE_STRING), Some(QTypeId::STRING)); +} + +#[test] +fn emit_checksum_is_valid() { + let bytes = emit_query("Test = (identifier) @id"); + + let header = Header::from_bytes(&bytes); + + // Verify checksum + let computed = crc32fast::hash(&bytes[64..]); + assert_eq!(header.checksum, computed, "checksum mismatch"); +} + +#[test] +fn emit_sections_are_aligned() { + let bytes = emit_query("Test = (identifier) @id"); + + let header = Header::from_bytes(&bytes); + + // All section offsets should be 64-byte aligned + assert_eq!(header.str_blob_offset % 64, 0, "str_blob not aligned"); + assert_eq!(header.str_table_offset % 64, 0, "str_table not aligned"); + assert_eq!(header.node_types_offset % 64, 0, "node_types not aligned"); + assert_eq!(header.node_fields_offset % 64, 0, "node_fields not aligned"); + assert_eq!(header.trivia_offset % 64, 0, "trivia not aligned"); + assert_eq!(header.type_meta_offset % 64, 0, "type_meta not aligned"); + assert_eq!(header.entrypoints_offset % 64, 0, "entrypoints not aligned"); + assert_eq!(header.transitions_offset % 64, 0, "transitions not aligned"); +} + +#[test] +fn debug_recursive_quantified() { + use crate::SourceMap; + use crate::bytecode::QTypeId; + + let src = "Item = (item (Item)* @children)"; + let source_map = SourceMap::one_liner(src); + let query = crate::query::QueryBuilder::new(source_map) + .parse() + .unwrap() + .analyze(); + + eprintln!("=== TypeContext ==="); + for (id, kind) in query.type_context().iter_types() { + eprintln!("TypeId {:?}: {:?}", id, kind); + } + + for (def_id, type_id) in query.type_context().iter_def_types() { + let name_sym = query.type_context().def_name_sym(def_id); + let name = query.interner().resolve(name_sym); + eprintln!("DefId {:?}: {} -> TypeId {:?}", def_id, name, type_id); + } + + let bytecode = query.emit().expect("emit"); + let module = Module::from_bytes(bytecode).expect("load"); + + eprintln!("\n=== Bytecode ==="); + eprintln!("TypeDefs count: {}", module.types().defs_count()); + for i in 0..module.types().defs_count() { + let def = module.types().get_def(i); + let type_id = QTypeId::from_custom_index(i); + eprintln!( + " TypeDef[{}] (id={:?}): kind={}, data={}, count={}", + i, type_id, def.kind, def.data, def.count + ); + } + + eprintln!("\nEntrypoints: {}", module.entrypoints().len()); + for i in 0..module.entrypoints().len() { + let ep = module.entrypoints().get(i); + let name = module.strings().get(ep.name); + eprintln!(" {}: result_type = {:?}", name, ep.result_type); + } + + eprintln!("\n=== TypeScript Output ==="); + let ts = crate::bytecode::emit::emit_typescript(&module); + eprintln!("{}", ts); +} + +#[test] +fn debug_untagged_alt() { + use crate::SourceMap; + use crate::bytecode::QTypeId; + + let src = "Q = [(a) @a (b) @b]"; + let source_map = SourceMap::one_liner(src); + let query = crate::query::QueryBuilder::new(source_map) + .parse() + .unwrap() + .analyze(); + + // Check type context + eprintln!("=== TypeContext ==="); + for (def_id, type_id) in query.type_context().iter_def_types() { + let name_sym = query.type_context().def_name_sym(def_id); + let name = query.interner().resolve(name_sym); + eprintln!("DefId {:?}: {} -> TypeId {:?}", def_id, name, type_id); + + if let Some(tk) = query.type_context().get_type(type_id) { + eprintln!(" TypeKind: {:?}", tk); + } + } + + // Emit bytecode + let bytecode = query.emit().expect("emit"); + let module = Module::from_bytes(bytecode).expect("load"); + + eprintln!("\n=== Bytecode ==="); + eprintln!("Entrypoints: {}", module.entrypoints().len()); + for i in 0..module.entrypoints().len() { + let ep = module.entrypoints().get(i); + let name = module.strings().get(ep.name); + eprintln!(" {}: result_type = {:?}", name, ep.result_type); + + if let Some(def) = module.types().get(ep.result_type) { + eprintln!( + " TypeDef: kind={}, data={}, count={}", + def.kind, def.data, def.count + ); + } else if ep.result_type.is_builtin() { + eprintln!(" Builtin type: {:?}", ep.result_type); + } + } + + eprintln!("\nTypeDefs count: {}", module.types().defs_count()); + for i in 0..module.types().defs_count() { + let def = module.types().get_def(i); + let type_id = QTypeId::from_custom_index(i); + eprintln!( + " TypeDef[{}] (id={:?}): kind={}, data={}, count={}", + i, type_id, def.kind, def.data, def.count + ); + } + + eprintln!("\nTypeMembers count: {}", module.types().members_count()); + for i in 0..module.types().members_count() { + let m = module.types().get_member(i); + let name = module.strings().get(m.name); + eprintln!( + " TypeMember[{}]: name={}, type_id={:?}", + i, name, m.type_id + ); + } + + eprintln!("\nTypeNames count: {}", module.types().names_count()); + for i in 0..module.types().names_count() { + let tn = module.types().get_name(i); + let name = module.strings().get(tn.name); + eprintln!(" TypeName[{}]: name={}, type_id={:?}", i, name, tn.type_id); + } + + eprintln!("\n=== TypeScript Output ==="); + let ts = crate::bytecode::emit::emit_typescript(&module); + eprintln!("{}", ts); +} diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 6eb9976e..8c707ec7 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -10,6 +10,7 @@ pub use symbol_table::SymbolTable; pub mod alt_kinds; mod dependencies; +pub mod emit; pub mod link; #[allow(clippy::module_inception)] pub mod query; @@ -22,6 +23,8 @@ mod alt_kinds_tests; #[cfg(test)] mod dependencies_tests; #[cfg(all(test, feature = "plotnik-langs"))] +mod emit_tests; +#[cfg(all(test, feature = "plotnik-langs"))] mod link_tests; #[cfg(test)] mod printer_tests; diff --git a/crates/plotnik-lib/src/query/query.rs b/crates/plotnik-lib/src/query/query.rs index 5d72e6b1..88a792f1 100644 --- a/crates/plotnik-lib/src/query/query.rs +++ b/crates/plotnik-lib/src/query/query.rs @@ -200,6 +200,11 @@ impl QueryAnalyzed { &self.interner } + /// Emit bytecode (types only, no node validation). + pub fn emit(&self) -> Result, super::emit::EmitError> { + super::emit::emit(&self.type_context, &self.interner) + } + pub fn link(mut self, lang: &Lang) -> LinkedQuery { let mut output = link::LinkOutput::default(); @@ -263,6 +268,11 @@ impl LinkedQuery { pub fn node_field_ids(&self) -> &HashMap { &self.node_field_ids } + + /// Emit bytecode (includes node type/field validation info). + pub fn emit(&self) -> Result, super::emit::EmitError> { + super::emit::emit_linked(self) + } } impl Deref for LinkedQuery {