From 7769305a3218ef59a67640bc0e842e7cd0c8f48d Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 3 Jan 2026 09:03:42 -0300 Subject: [PATCH] refactor: use NonZeroU16 for StepId/StringId, colorize dump output --- crates/plotnik-cli/src/commands/dump.rs | 4 +- crates/plotnik-lib/src/bytecode/dump.rs | 266 ++++++------------ crates/plotnik-lib/src/bytecode/entrypoint.rs | 2 +- crates/plotnik-lib/src/bytecode/ids.rs | 51 +++- .../plotnik-lib/src/bytecode/instructions.rs | 31 +- .../src/bytecode/instructions_tests.rs | 20 +- crates/plotnik-lib/src/bytecode/ir.rs | 29 +- crates/plotnik-lib/src/bytecode/module.rs | 41 +-- .../plotnik-lib/src/bytecode/module_tests.rs | 24 +- crates/plotnik-lib/src/compile/expressions.rs | 51 +++- crates/plotnik-lib/src/emit/codegen_tests.rs | 10 + crates/plotnik-lib/src/emit/layout.rs | 10 +- crates/plotnik-lib/src/emit/layout_tests.rs | 43 +-- crates/plotnik-lib/src/emit/mod.rs | 32 ++- ..._codegen_tests__alternations_captured.snap | 6 +- ...n_tests__alternations_captured_tagged.snap | 14 +- ...gen_tests__alternations_in_quantifier.snap | 52 ++-- ...__codegen_tests__alternations_labeled.snap | 14 +- ...ts__alternations_no_internal_captures.snap | 12 +- ...en_tests__alternations_null_injection.snap | 20 +- ...codegen_tests__alternations_unlabeled.snap | 20 +- ...degen_tests__anchors_between_siblings.snap | 10 +- ...t__codegen_tests__anchors_first_child.snap | 8 +- ...it__codegen_tests__anchors_last_child.snap | 8 +- ...mit__codegen_tests__anchors_no_anchor.snap | 10 +- ...codegen_tests__anchors_with_anonymous.snap | 10 +- ...__emit__codegen_tests__captures_basic.snap | 4 +- ...codegen_tests__captures_deeply_nested.snap | 25 +- ...mit__codegen_tests__captures_multiple.snap | 14 +- ...__codegen_tests__captures_nested_flat.snap | 17 +- ...sts__captures_optional_wrapper_struct.snap | 26 +- ..._codegen_tests__captures_struct_scope.snap | 14 +- ...egen_tests__captures_with_type_custom.snap | 4 +- ...egen_tests__captures_with_type_string.snap | 4 +- ...odegen_tests__captures_wrapper_struct.snap | 36 +-- ...tests__comprehensive_multi_definition.snap | 70 ++--- ...__codegen_tests__definitions_multiple.snap | 6 +- ..._codegen_tests__definitions_reference.snap | 24 +- ...it__codegen_tests__definitions_single.snap | 4 +- ...it__codegen_tests__fields_alternation.snap | 51 ++++ ..._emit__codegen_tests__fields_multiple.snap | 14 +- ...__emit__codegen_tests__fields_negated.snap | 10 +- ...b__emit__codegen_tests__fields_single.snap | 8 +- ..._emit__codegen_tests__nodes_anonymous.snap | 8 +- ...lib__emit__codegen_tests__nodes_error.snap | 4 +- ...b__emit__codegen_tests__nodes_missing.snap | 4 +- ...lib__emit__codegen_tests__nodes_named.snap | 4 +- ...it__codegen_tests__nodes_wildcard_any.snap | 8 +- ...__codegen_tests__nodes_wildcard_named.snap | 8 +- ...__codegen_tests__optional_first_child.snap | 24 +- ...odegen_tests__optional_null_injection.snap | 12 +- ..._tests__quantifiers_first_child_array.snap | 28 +- ...__codegen_tests__quantifiers_optional.snap | 12 +- ...tests__quantifiers_optional_nongreedy.snap | 12 +- ...emit__codegen_tests__quantifiers_plus.snap | 10 +- ...gen_tests__quantifiers_plus_nongreedy.snap | 10 +- ..._tests__quantifiers_repeat_navigation.snap | 20 +- ...emit__codegen_tests__quantifiers_star.snap | 12 +- ...gen_tests__quantifiers_star_nongreedy.snap | 12 +- ...degen_tests__quantifiers_struct_array.snap | 36 +-- ...emit__codegen_tests__recursion_simple.snap | 26 +- ...sts__recursion_with_structured_result.snap | 66 +++-- ..._emit__codegen_tests__sequences_basic.snap | 10 +- ...odegen_tests__sequences_in_quantifier.snap | 24 +- ...emit__codegen_tests__sequences_nested.snap | 14 +- ...odegen_tests__sequences_with_captures.snap | 14 +- crates/plotnik-lib/src/query/query_tests.rs | 4 +- 67 files changed, 797 insertions(+), 714 deletions(-) create mode 100644 crates/plotnik-lib/src/emit/snapshots/plotnik_lib__emit__codegen_tests__fields_alternation.snap diff --git a/crates/plotnik-cli/src/commands/dump.rs b/crates/plotnik-cli/src/commands/dump.rs index 2e8d5784..55d65c7d 100644 --- a/crates/plotnik-cli/src/commands/dump.rs +++ b/crates/plotnik-cli/src/commands/dump.rs @@ -2,6 +2,7 @@ use std::path::PathBuf; use plotnik_lib::QueryBuilder; use plotnik_lib::bytecode::{Module, dump}; +use plotnik_lib::Colors; use super::lang_resolver::{resolve_lang, resolve_lang_required, suggest_language}; use super::query_loader::load_query_source; @@ -80,5 +81,6 @@ pub fn run(args: DumpArgs) { }; let module = Module::from_bytes(bytecode).expect("module loading failed"); - print!("{}", dump(&module)); + let colors = Colors::new(args.color); + print!("{}", dump(&module, colors)); } diff --git a/crates/plotnik-lib/src/bytecode/dump.rs b/crates/plotnik-lib/src/bytecode/dump.rs index debc7627..50ee3185 100644 --- a/crates/plotnik-lib/src/bytecode/dump.rs +++ b/crates/plotnik-lib/src/bytecode/dump.rs @@ -1,23 +1,24 @@ //! Human-readable bytecode dump for debugging and documentation. //! -//! See `docs/wip/bytecode.md` for the output format specification. +//! See `docs/binary-format/07-dump-format.md` for the output format specification. use std::collections::BTreeMap; use std::fmt::Write as _; -use super::effects::EffectOpcode; -use super::ids::{QTypeId, StepId, StringId}; +use crate::colors::Colors; + +use super::format::{format_effect, nav_symbol_epsilon, width_for_count, LineBuilder, Symbol}; +use super::ids::{QTypeId, StepId}; use super::module::{Instruction, Module}; -use super::nav::Nav; use super::type_meta::TypeKind; use super::{Call, Match, Return}; /// Generate a human-readable dump of the bytecode module. -pub fn dump(module: &Module) -> String { +pub fn dump(module: &Module, colors: Colors) -> String { let mut out = String::new(); - let ctx = DumpContext::new(module); + let ctx = DumpContext::new(module, colors); - dump_header(&mut out, module); + dump_header(&mut out, module, &ctx); dump_strings(&mut out, module, &ctx); dump_types_defs(&mut out, module, &ctx); dump_types_members(&mut out, module, &ctx); @@ -28,21 +29,14 @@ pub fn dump(module: &Module) -> String { out } -fn dump_header(out: &mut String, module: &Module) { +fn dump_header(out: &mut String, module: &Module, ctx: &DumpContext) { + let c = &ctx.colors; let header = module.header(); - out.push_str("[flags]\n"); + writeln!(out, "{}[flags]{}", c.blue, c.reset).unwrap(); writeln!(out, "linked = {}", header.is_linked()).unwrap(); out.push('\n'); } -/// Calculate the minimum width needed to display numbers up to `count - 1`. -fn width_for_count(count: usize) -> usize { - if count <= 1 { - 1 - } else { - ((count - 1) as f64).log10().floor() as usize + 1 - } -} /// Context for dump formatting, precomputes lookups for O(1) access. struct DumpContext { @@ -66,10 +60,12 @@ struct DumpContext { name_width: usize, /// Width for step indices. step_width: usize, + /// Color palette. + colors: Colors, } impl DumpContext { - fn new(module: &Module) -> Self { + fn new(module: &Module, colors: Colors) -> Self { let header = module.header(); let is_linked = header.is_linked(); let strings = module.strings(); @@ -81,7 +77,7 @@ impl DumpContext { for i in 0..entrypoints.len() { let ep = entrypoints.get(i); let name = strings.get(ep.name).to_string(); - step_labels.insert(ep.target.0, name); + step_labels.insert(ep.target.get(), name); } let mut node_type_names = BTreeMap::new(); @@ -99,7 +95,7 @@ impl DumpContext { // Collect all strings for unlinked mode lookups let str_count = header.str_table_count as usize; let all_strings: Vec = (0..str_count) - .map(|i| strings.get(StringId(i as u16)).to_string()) + .map(|i| strings.get_by_index(i).to_string()) .collect(); // Compute widths for index formatting @@ -122,11 +118,12 @@ impl DumpContext { member_width, name_width, step_width, + colors, } } fn label_for(&self, step: StepId) -> Option<&str> { - self.step_labels.get(&step.0).map(|s| s.as_str()) + self.step_labels.get(&step.get()).map(|s| s.as_str()) } /// Get the name for a node type ID. @@ -157,25 +154,27 @@ impl DumpContext { } fn dump_strings(out: &mut String, module: &Module, ctx: &DumpContext) { + let c = &ctx.colors; let strings = module.strings(); let count = module.header().str_table_count as usize; let w = ctx.str_width; - out.push_str("[strings]\n"); + writeln!(out, "{}[strings]{}", c.blue, c.reset).unwrap(); for i in 0..count { - let s = strings.get(StringId(i as u16)); - writeln!(out, "S{i:0w$} {s:?}").unwrap(); + let s = strings.get_by_index(i); + writeln!(out, "S{i:0w$} {}{s:?}{}", c.green, c.reset).unwrap(); } out.push('\n'); } fn dump_types_defs(out: &mut String, module: &Module, ctx: &DumpContext) { + let c = &ctx.colors; let types = module.types(); let strings = module.strings(); let tw = ctx.type_width; let mw = ctx.member_width; - out.push_str("[type_defs]\n"); + writeln!(out, "{}[type_defs]{}", c.blue, c.reset).unwrap(); // All types are now in type_defs, including builtins for i in 0..types.defs_count() { @@ -197,7 +196,7 @@ fn dump_types_defs(out: &mut String, module: &Module, ctx: &DumpContext) { TypeKind::Alias => format!("Alias(T{:0tw$})", def.data), }; - // Generate comment for non-primitives + // Generate comment for non-primitives (comments are dim) let comment = match kind { TypeKind::Void | TypeKind::Node | TypeKind::String => String::new(), TypeKind::Struct => { @@ -205,26 +204,26 @@ fn dump_types_defs(out: &mut String, module: &Module, ctx: &DumpContext) { .members_of(&def) .map(|m| strings.get(m.name).to_string()) .collect(); - format!(" ; {{ {} }}", fields.join(", ")) + format!("{} ; {{ {} }}{}", c.dim, fields.join(", "), c.reset) } TypeKind::Enum => { let variants: Vec<_> = types .members_of(&def) .map(|m| strings.get(m.name).to_string()) .collect(); - format!(" ; {}", variants.join(" | ")) + format!("{} ; {}{}", c.dim, variants.join(" | "), c.reset) } TypeKind::Optional => { let inner_name = format_type_name(QTypeId(def.data), module, ctx); - format!(" ; {}?", inner_name) + format!("{} ; {}?{}", c.dim, inner_name, c.reset) } TypeKind::ArrayZeroOrMore => { let inner_name = format_type_name(QTypeId(def.data), module, ctx); - format!(" ; {}*", inner_name) + format!("{} ; {}*{}", c.dim, inner_name, c.reset) } TypeKind::ArrayOneOrMore => { let inner_name = format_type_name(QTypeId(def.data), module, ctx); - format!(" ; {}+", inner_name) + format!("{} ; {}+{}", c.dim, inner_name, c.reset) } TypeKind::Alias => String::new(), }; @@ -235,21 +234,25 @@ fn dump_types_defs(out: &mut String, module: &Module, ctx: &DumpContext) { } fn dump_types_members(out: &mut String, module: &Module, ctx: &DumpContext) { + let c = &ctx.colors; let types = module.types(); let strings = module.strings(); let mw = ctx.member_width; let sw = ctx.str_width; let tw = ctx.type_width; - out.push_str("[type_members]\n"); + writeln!(out, "{}[type_members]{}", c.blue, c.reset).unwrap(); for i in 0..types.members_count() { let member = types.get_member(i); let name = strings.get(member.name); let type_name = format_type_name(member.type_id, module, ctx); writeln!( out, - "M{i:0mw$}: S{:0sw$} → T{:0tw$} ; {name}: {type_name}", - member.name.0, member.type_id.0 + "M{i:0mw$}: S{:0sw$} → T{:0tw$} {}; {name}: {type_name}{}", + member.name.0, + member.type_id.0, + c.dim, + c.reset ) .unwrap(); } @@ -257,20 +260,25 @@ fn dump_types_members(out: &mut String, module: &Module, ctx: &DumpContext) { } fn dump_types_names(out: &mut String, module: &Module, ctx: &DumpContext) { + let c = &ctx.colors; let types = module.types(); let strings = module.strings(); let nw = ctx.name_width; let sw = ctx.str_width; let tw = ctx.type_width; - out.push_str("[type_names]\n"); + writeln!(out, "{}[type_names]{}", c.blue, c.reset).unwrap(); for i in 0..types.names_count() { let entry = types.get_name(i); let name = strings.get(entry.name); writeln!( out, - "N{i:0nw$}: S{:0sw$} → T{:0tw$} ; {name}", - entry.name.0, entry.type_id.0 + "N{i:0nw$}: S{:0sw$} → T{:0tw$} {}; {}{name}{}", + entry.name.0, + entry.type_id.0, + c.dim, + c.blue, + c.reset ) .unwrap(); } @@ -304,12 +312,13 @@ fn format_type_name(type_id: QTypeId, module: &Module, ctx: &DumpContext) -> Str } fn dump_entrypoints(out: &mut String, module: &Module, ctx: &DumpContext) { + let c = &ctx.colors; let strings = module.strings(); let entrypoints = module.entrypoints(); let stw = ctx.step_width; let tw = ctx.type_width; - out.push_str("[entrypoints]\n"); + writeln!(out, "{}[entrypoints]{}", c.blue, c.reset).unwrap(); // Collect and sort by name for display let mut entries: Vec<_> = (0..entrypoints.len()) @@ -327,7 +336,9 @@ fn dump_entrypoints(out: &mut String, module: &Module, ctx: &DumpContext) { for (name, target, type_id) in entries { writeln!( out, - "{name:width$} = {:0stw$} :: T{type_id:0tw$}", + "{}{name:width$}{} = {:0stw$} :: T{type_id:0tw$}", + c.blue, + c.reset, target, width = max_len ) @@ -337,20 +348,21 @@ fn dump_entrypoints(out: &mut String, module: &Module, ctx: &DumpContext) { } fn dump_code(out: &mut String, module: &Module, ctx: &DumpContext) { + let c = &ctx.colors; let header = module.header(); let transitions_count = header.transitions_count as usize; let step_width = ctx.step_width; - out.push_str("[transitions]\n"); + writeln!(out, "{}[transitions]{}", c.blue, c.reset).unwrap(); let mut step = 0u16; while (step as usize) < transitions_count { - // Check if this step has a label - if let Some(label) = ctx.label_for(StepId(step)) { - writeln!(out, "\n{label}:").unwrap(); + // Check if this step has a label (using raw u16) + if let Some(label) = ctx.step_labels.get(&step) { + writeln!(out, "\n{}{label}{}:", c.blue, c.reset).unwrap(); } - let instr = module.decode_step(StepId(step)); + let instr = module.decode_step_alloc(step); let line = format_instruction(step, &instr, module, ctx, step_width); out.push_str(&line); out.push('\n'); @@ -361,12 +373,6 @@ fn dump_code(out: &mut String, module: &Module, ctx: &DumpContext) { } } -/// Pad a base string to a target column width, then append a suffix. -/// Ensures at least 2 spaces between base and suffix. -fn pad_to_column(base: String, col: usize, suffix: &str) -> String { - let padding = col.saturating_sub(base.chars().count()).max(2); - format!("{base}{:padding$}{suffix}", "") -} fn instruction_step_count(instr: &Instruction) -> u16 { match instr { @@ -398,35 +404,6 @@ fn instruction_step_count(instr: &Instruction) -> u16 { } } -// ============================================================================= -// Instruction Line Format -// ============================================================================= -// -// Each instruction line follows this column layout: -// -//