diff --git a/crates/plotnik-bytecode/src/bytecode/dump.rs b/crates/plotnik-bytecode/src/bytecode/dump.rs index ecb0111..d694b94 100644 --- a/crates/plotnik-bytecode/src/bytecode/dump.rs +++ b/crates/plotnik-bytecode/src/bytecode/dump.rs @@ -23,6 +23,7 @@ pub fn dump(module: &Module, colors: Colors) -> String { let ctx = DumpContext::new(module, colors); dump_strings(&mut out, module, &ctx); + dump_regexes(&mut out, module, &ctx); dump_types_defs(&mut out, module, &ctx); dump_types_members(&mut out, module, &ctx); dump_types_names(&mut out, module, &ctx); @@ -143,6 +144,27 @@ fn dump_strings(out: &mut String, module: &Module, ctx: &DumpContext) { out.push('\n'); } +fn dump_regexes(out: &mut String, module: &Module, ctx: &DumpContext) { + let count = module.header().regex_table_count as usize; + // Index 0 is reserved, so only print if there are actual regexes + if count <= 1 { + return; + } + + let c = &ctx.colors; + let regexes = module.regexes(); + let w = width_for_count(count); + + writeln!(out, "{}[regex]{}", c.blue, c.reset).unwrap(); + // Skip index 0 (reserved) + for i in 1..count { + let string_id = regexes.get_string_id(i); + let pattern = &ctx.all_strings[string_id.get() as usize]; + writeln!(out, "R{i:0w$} {}/{pattern}/{}", c.green, c.reset).unwrap(); + } + out.push('\n'); +} + fn dump_types_defs(out: &mut String, module: &Module, ctx: &DumpContext) { let c = &ctx.colors; let types = module.types(); diff --git a/docs/binary-format/07-dump-format.md b/docs/binary-format/07-dump-format.md index fc749f9..9353660 100644 --- a/docs/binary-format/07-dump-format.md +++ b/docs/binary-format/07-dump-format.md @@ -81,6 +81,22 @@ Value: - **`Value`**: The compiled query definition. Step 08 branches to try `Num` (step 11) or `Str` (step 16). - **`...`**: Padding slots (multi-step instructions occupy consecutive step IDs). +### Regex Section + +When the query contains regex predicates (`=~` or `!~`), a `[regex]` section appears after `[strings]`: + +``` +[regex] +R1 /pattern/ +R2 /another.*/ +``` + +Format: `R //` + +- Index 0 is reserved, so regex IDs start at 1 +- Patterns are displayed from the string table for readability +- In transitions, predicates reference patterns inline: `(identifier) =~ /foo/` + ## Files - `crates/plotnik-lib/src/bytecode/dump.rs` — Dump formatting logic @@ -184,6 +200,7 @@ Effects in `[pre]` execute before match attempt; effects in `[post]` execute aft | Prefix | Section | Description | | ------ | ------------ | ----------- | | S## | strings | StringId | +| R## | regex | RegexId | | T## | type_defs | TypeId | | M## | type_members | MemberIndex | | N## | type_names | NameIndex |