Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions crates/plotnik-lib/src/bytecode/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,6 @@ pub const SECTION_ALIGN: usize = 64;
/// Step size in bytes (all instructions are 8-byte aligned).
pub const STEP_SIZE: usize = 8;

/// Sentinel value for "any named node" wildcard `(_)`.
///
/// When `node_type` equals this value, the VM checks `node.is_named()`
/// instead of comparing type IDs. This distinguishes `(_)` (any named)
/// from `_` (any node including anonymous).
pub const NAMED_WILDCARD: u16 = 0xFFFF;

/// Maximum payload slots for Match instructions.
///
/// Match64 (the largest variant) supports up to 28 u16 slots for
Expand Down
61 changes: 41 additions & 20 deletions crates/plotnik-lib/src/bytecode/dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ use std::fmt::Write as _;

use crate::colors::Colors;

use super::NAMED_WILDCARD;
use super::format::{LineBuilder, Symbol, format_effect, nav_symbol_epsilon, width_for_count};
use super::format::{LineBuilder, Symbol, format_effect, nav_symbol, width_for_count};
use super::ids::TypeId;
use super::instructions::StepId;
use super::ir::NodeTypeIR;
use super::module::{Instruction, Module};
use super::type_meta::{TypeData, TypeKind};
use super::{Call, Match, Return, Trampoline};
Expand Down Expand Up @@ -446,7 +446,7 @@ fn format_match(
step_width: usize,
) -> String {
let builder = LineBuilder::new(step_width);
let symbol = nav_symbol_epsilon(m.nav, m.is_epsilon());
let symbol = nav_symbol(m.nav);
let prefix = format!(" {:0sw$} {} ", step, symbol.format(), sw = step_width);

let content = format_match_content(m, ctx);
Expand All @@ -464,17 +464,20 @@ fn format_match_content(m: &Match, ctx: &DumpContext) -> String {
parts.push(format!("[{}]", pre.join(" ")));
}

for field_id in m.neg_fields() {
let name = ctx
.node_field_name(field_id)
.map(String::from)
.unwrap_or_else(|| format!("field#{field_id}"));
parts.push(format!("-{name}"));
}
// Skip neg_fields and node pattern for epsilon (no node interaction)
if !m.is_epsilon() {
for field_id in m.neg_fields() {
let name = ctx
.node_field_name(field_id)
.map(String::from)
.unwrap_or_else(|| format!("field#{field_id}"));
parts.push(format!("-{name}"));
}

let node_part = format_node_pattern(m, ctx);
if !node_part.is_empty() {
parts.push(node_part);
let node_part = format_node_pattern(m, ctx);
if !node_part.is_empty() {
parts.push(node_part);
}
}

let post: Vec<_> = m.post_effects().map(|e| format_effect(&e)).collect();
Expand All @@ -485,7 +488,7 @@ fn format_match_content(m: &Match, ctx: &DumpContext) -> String {
parts.join(" ")
}

/// Format node pattern: `field: (type)` or `(type)` or `field: _` or `(_)`
/// Format node pattern: `field: (type)` or `(type)` or `field: _` or `(_)` or `"text"`
fn format_node_pattern(m: &Match, ctx: &DumpContext) -> String {
let mut result = String::new();

Expand All @@ -498,11 +501,17 @@ fn format_node_pattern(m: &Match, ctx: &DumpContext) -> String {
result.push_str(": ");
}

if let Some(type_id) = m.node_type {
if type_id.get() == NAMED_WILDCARD {
match m.node_type {
NodeTypeIR::Any => {
// Any node wildcard: `_`
result.push('_');
}
NodeTypeIR::Named(None) => {
// Named wildcard: any named node
result.push_str("(_)");
} else {
}
NodeTypeIR::Named(Some(type_id)) => {
// Specific named node type
let name = ctx
.node_type_name(type_id.get())
.map(String::from)
Expand All @@ -511,8 +520,20 @@ fn format_node_pattern(m: &Match, ctx: &DumpContext) -> String {
result.push_str(&name);
result.push(')');
}
} else if m.node_field.is_some() {
result.push('_');
NodeTypeIR::Anonymous(None) => {
// Anonymous wildcard: any anonymous node (future syntax)
result.push_str("\"_\"");
}
NodeTypeIR::Anonymous(Some(type_id)) => {
// Specific anonymous node (literal token)
let name = ctx
.node_type_name(type_id.get())
.map(String::from)
.unwrap_or_else(|| format!("anon#{}", type_id.get()));
result.push('"');
result.push_str(&name);
result.push('"');
}
}

result
Expand All @@ -538,7 +559,7 @@ fn format_call(
) -> String {
let c = &ctx.colors;
let builder = LineBuilder::new(step_width);
let symbol = nav_symbol_epsilon(call.nav, false);
let symbol = nav_symbol(call.nav());
let prefix = format!(" {:0sw$} {} ", step, symbol.format(), sw = step_width);

// Format field constraint if present
Expand Down
17 changes: 2 additions & 15 deletions crates/plotnik-lib/src/bytecode/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ impl Symbol {
///
/// | Nav | Symbol | Notes |
/// | --------------- | ------- | ----------------------------------- |
/// | Epsilon | ε | Pure control flow, no cursor check |
/// | Stay | (blank) | No movement, 5 spaces |
/// | Stay (epsilon) | ε | Only when no type/field constraints |
/// | StayExact | ! | Stay at position, exact match only |
/// | Down | ▽ | First child, skip any |
/// | DownSkip | !▽ | First child, skip trivia |
Expand All @@ -86,6 +86,7 @@ impl Symbol {
/// | UpExact(n) | !!△ⁿ | Ascend n, must be last child |
pub fn nav_symbol(nav: Nav) -> Symbol {
match nav {
Nav::Epsilon => Symbol::EPSILON,
Nav::Stay => Symbol::EMPTY,
Nav::StayExact => Symbol::new(" ", "!", " "),
Nav::Down => Symbol::new(" ", "▽", " "),
Expand All @@ -100,20 +101,6 @@ pub fn nav_symbol(nav: Nav) -> Symbol {
}
}

/// Format navigation for epsilon transitions (when is_epsilon is true).
///
/// True epsilon transitions require all three conditions:
/// - `nav == Stay` (no cursor movement)
/// - `node_type == None` (no type constraint)
/// - `node_field == None` (no field constraint)
pub fn nav_symbol_epsilon(nav: Nav, is_epsilon: bool) -> Symbol {
if is_epsilon {
Symbol::EPSILON
} else {
nav_symbol(nav)
}
}

/// Trace sub-line symbols.
pub mod trace {
use super::Symbol;
Expand Down
64 changes: 44 additions & 20 deletions crates/plotnik-lib/src/bytecode/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::num::NonZeroU16;

use super::constants::{SECTION_ALIGN, STEP_SIZE};
use super::effects::EffectOp;
use super::ir::NodeTypeIR;
use super::nav::Nav;

/// Step address in bytecode (raw u16).
Expand Down Expand Up @@ -129,12 +130,12 @@ impl Opcode {
#[derive(Clone, Copy, Debug)]
pub struct Match<'a> {
bytes: &'a [u8],
/// Segment index (0-15, currently only 0 is used).
/// Segment index (0-3, currently only 0 is used).
pub segment: u8,
/// Navigation command.
/// Navigation command. `Epsilon` means no cursor movement or node check.
pub nav: Nav,
/// Node type constraint (None = wildcard).
pub node_type: Option<NonZeroU16>,
/// Node type constraint (Any = wildcard, Named/Anonymous for specific checks).
pub node_type: NodeTypeIR,
/// Field constraint (None = wildcard).
pub node_field: Option<NonZeroU16>,
/// Whether this is Match8 (no payload) or extended.
Expand All @@ -153,18 +154,23 @@ impl<'a> Match<'a> {
///
/// The slice must start at the instruction and contain at least
/// the full instruction size (determined by opcode).
///
/// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
#[inline]
pub fn from_bytes(bytes: &'a [u8]) -> Self {
debug_assert!(bytes.len() >= 8, "Match instruction too short");

let type_id_byte = bytes[0];
let segment = type_id_byte >> 4;
debug_assert!(segment == 0, "non-zero segment not yet supported");
// Header byte: segment(2) | node_kind(2) | opcode(4)
let segment = (type_id_byte >> 6) & 0x3;
let node_kind = (type_id_byte >> 4) & 0x3;
let opcode = Opcode::from_u8(type_id_byte & 0xF);
debug_assert!(segment == 0, "non-zero segment not yet supported");
debug_assert!(opcode.is_match(), "expected Match opcode");

let nav = Nav::from_byte(bytes[1]);
let node_type = NonZeroU16::new(u16::from_le_bytes([bytes[2], bytes[3]]));
let node_type_val = u16::from_le_bytes([bytes[2], bytes[3]]);
let node_type = NodeTypeIR::from_bytes(node_kind, node_type_val);
let node_field = NonZeroU16::new(u16::from_le_bytes([bytes[4], bytes[5]]));

let (is_match8, match8_next, pre_count, neg_count, post_count, succ_count) =
Expand Down Expand Up @@ -207,7 +213,7 @@ impl<'a> Match<'a> {
/// Check if this is an epsilon transition (no node interaction).
#[inline]
pub fn is_epsilon(&self) -> bool {
self.nav == Nav::Stay && self.node_type.is_none() && self.node_field.is_none()
self.nav == Nav::Epsilon
}

/// Number of successors.
Expand Down Expand Up @@ -282,7 +288,7 @@ impl<'a> Match<'a> {
/// Call instruction for invoking definitions (recursion).
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct Call {
/// Segment index (0-15).
/// Segment index (0-3).
pub(crate) segment: u8,
/// Navigation to apply before jumping to target.
pub(crate) nav: Nav,
Expand All @@ -307,14 +313,17 @@ impl Call {
}

/// Decode from 8-byte bytecode.
///
/// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
/// For Call, node_kind bits are ignored (always 0).
pub(crate) fn from_bytes(bytes: [u8; 8]) -> Self {
let type_id_byte = bytes[0];
let segment = type_id_byte >> 4;
let segment = (type_id_byte >> 6) & 0x3;
let opcode = Opcode::from_u8(type_id_byte & 0xF);
assert!(
segment == 0,
"non-zero segment not yet supported: {segment}"
);
let opcode = Opcode::from_u8(type_id_byte & 0xF);
assert_eq!(opcode, Opcode::Call, "expected Call opcode");

Self {
Expand All @@ -327,9 +336,12 @@ impl Call {
}

/// Encode to 8-byte bytecode.
///
/// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
pub fn to_bytes(&self) -> [u8; 8] {
let mut bytes = [0u8; 8];
bytes[0] = (self.segment << 4) | (Opcode::Call as u8);
// node_kind = 0 for Call
bytes[0] = (self.segment << 6) | (Opcode::Call as u8);
bytes[1] = self.nav.to_byte();
bytes[2..4].copy_from_slice(&self.node_field.map_or(0, |v| v.get()).to_le_bytes());
bytes[4..6].copy_from_slice(&self.next.get().to_le_bytes());
Expand All @@ -354,7 +366,7 @@ impl Call {
/// Return instruction for returning from definitions.
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct Return {
/// Segment index (0-15).
/// Segment index (0-3).
pub(crate) segment: u8,
}

Expand All @@ -365,23 +377,29 @@ impl Return {
}

/// Decode from 8-byte bytecode.
///
/// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
/// For Return, node_kind bits are ignored (always 0).
pub(crate) fn from_bytes(bytes: [u8; 8]) -> Self {
let type_id_byte = bytes[0];
let segment = type_id_byte >> 4;
let segment = (type_id_byte >> 6) & 0x3;
let opcode = Opcode::from_u8(type_id_byte & 0xF);
assert!(
segment == 0,
"non-zero segment not yet supported: {segment}"
);
let opcode = Opcode::from_u8(type_id_byte & 0xF);
assert_eq!(opcode, Opcode::Return, "expected Return opcode");

Self { segment }
}

/// Encode to 8-byte bytecode.
///
/// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
pub fn to_bytes(&self) -> [u8; 8] {
let mut bytes = [0u8; 8];
bytes[0] = (self.segment << 4) | (Opcode::Return as u8);
// node_kind = 0 for Return
bytes[0] = (self.segment << 6) | (Opcode::Return as u8);
// bytes[1..8] are reserved/padding
bytes
}
Expand All @@ -400,7 +418,7 @@ impl Default for Return {
/// the entry preamble: `Obj → Trampoline → EndObj → Accept`.
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct Trampoline {
/// Segment index (0-15).
/// Segment index (0-3).
pub(crate) segment: u8,
/// Return address (where to continue after entrypoint returns).
pub(crate) next: StepId,
Expand All @@ -413,14 +431,17 @@ impl Trampoline {
}

/// Decode from 8-byte bytecode.
///
/// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
/// For Trampoline, node_kind bits are ignored (always 0).
pub(crate) fn from_bytes(bytes: [u8; 8]) -> Self {
let type_id_byte = bytes[0];
let segment = type_id_byte >> 4;
let segment = (type_id_byte >> 6) & 0x3;
let opcode = Opcode::from_u8(type_id_byte & 0xF);
assert!(
segment == 0,
"non-zero segment not yet supported: {segment}"
);
let opcode = Opcode::from_u8(type_id_byte & 0xF);
assert_eq!(opcode, Opcode::Trampoline, "expected Trampoline opcode");

Self {
Expand All @@ -430,9 +451,12 @@ impl Trampoline {
}

/// Encode to 8-byte bytecode.
///
/// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
pub fn to_bytes(&self) -> [u8; 8] {
let mut bytes = [0u8; 8];
bytes[0] = (self.segment << 4) | (Opcode::Trampoline as u8);
// node_kind = 0 for Trampoline
bytes[0] = (self.segment << 6) | (Opcode::Trampoline as u8);
// bytes[1] is padding
bytes[2..4].copy_from_slice(&self.next.get().to_le_bytes());
// bytes[4..8] are reserved/padding
Expand Down
Loading