From ffd317c49dd3c561aa885f5705cdff19f0158469 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 29 Dec 2025 15:01:35 -0300 Subject: [PATCH] feat: add bytecode navigation and sections Add section primitives: - nav: VM navigation commands (Stay, Next, Down, Up variants) - sections: Slice, NodeSymbol, FieldSymbol, TriviaEntry --- crates/plotnik-lib/src/bytecode/mod.rs | 6 + crates/plotnik-lib/src/bytecode/nav.rs | 135 ++++++++++++++++++++ crates/plotnik-lib/src/bytecode/sections.rs | 61 +++++++++ 3 files changed, 202 insertions(+) create mode 100644 crates/plotnik-lib/src/bytecode/nav.rs create mode 100644 crates/plotnik-lib/src/bytecode/sections.rs diff --git a/crates/plotnik-lib/src/bytecode/mod.rs b/crates/plotnik-lib/src/bytecode/mod.rs index 73bc06cc..ccec40ad 100644 --- a/crates/plotnik-lib/src/bytecode/mod.rs +++ b/crates/plotnik-lib/src/bytecode/mod.rs @@ -5,6 +5,8 @@ mod constants; mod header; mod ids; +mod nav; +mod sections; pub use constants::{ MAGIC, SECTION_ALIGN, STEP_ACCEPT, STEP_SIZE, TYPE_CUSTOM_START, TYPE_NODE, TYPE_STRING, @@ -14,3 +16,7 @@ pub use constants::{ pub use ids::{QTypeId, StepId, StringId}; pub use header::Header; + +pub use nav::Nav; + +pub use sections::{FieldSymbol, NodeSymbol, Slice, TriviaEntry}; diff --git a/crates/plotnik-lib/src/bytecode/nav.rs b/crates/plotnik-lib/src/bytecode/nav.rs new file mode 100644 index 00000000..1ef102a7 --- /dev/null +++ b/crates/plotnik-lib/src/bytecode/nav.rs @@ -0,0 +1,135 @@ +//! Navigation command encoding for bytecode instructions. +//! +//! Navigation determines how the VM moves through the tree-sitter AST. + +/// Navigation command for VM execution. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Default)] +pub enum Nav { + #[default] + Stay, + Next, + NextSkip, + NextExact, + Down, + DownSkip, + DownExact, + Up(u8), + UpSkipTrivia(u8), + UpExact(u8), +} + +impl Nav { + /// Decode from bytecode byte. + /// + /// Byte layout: + /// - Bits 7-6: Mode (00=Standard, 01=Up, 10=UpSkipTrivia, 11=UpExact) + /// - Bits 5-0: Payload (enum value for Standard, level count for Up variants) + pub fn from_byte(b: u8) -> Self { + let mode = b >> 6; + let payload = b & 0x3F; + + match mode { + 0b00 => match payload { + 0 => Self::Stay, + 1 => Self::Next, + 2 => Self::NextSkip, + 3 => Self::NextExact, + 4 => Self::Down, + 5 => Self::DownSkip, + 6 => Self::DownExact, + _ => panic!("invalid nav standard: {payload}"), + }, + 0b01 => { + assert!(payload >= 1, "invalid nav up level: {payload}"); + Self::Up(payload) + } + 0b10 => { + assert!(payload >= 1, "invalid nav up_skip_trivia level: {payload}"); + Self::UpSkipTrivia(payload) + } + 0b11 => { + assert!(payload >= 1, "invalid nav up_exact level: {payload}"); + Self::UpExact(payload) + } + _ => unreachable!(), + } + } + + /// Encode to bytecode byte. + pub fn to_byte(self) -> u8 { + match self { + Self::Stay => 0, + Self::Next => 1, + Self::NextSkip => 2, + Self::NextExact => 3, + Self::Down => 4, + Self::DownSkip => 5, + Self::DownExact => 6, + Self::Up(n) => { + debug_assert!((1..=63).contains(&n)); + 0b01_000000 | n + } + Self::UpSkipTrivia(n) => { + debug_assert!((1..=63).contains(&n)); + 0b10_000000 | n + } + Self::UpExact(n) => { + debug_assert!((1..=63).contains(&n)); + 0b11_000000 | n + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn nav_standard_roundtrip() { + for nav in [ + Nav::Stay, + Nav::Next, + Nav::NextSkip, + Nav::NextExact, + Nav::Down, + Nav::DownSkip, + Nav::DownExact, + ] { + assert_eq!(Nav::from_byte(nav.to_byte()), nav); + } + } + + #[test] + fn nav_up_roundtrip() { + let nav = Nav::Up(5); + assert_eq!(Nav::from_byte(nav.to_byte()), nav); + + let nav = Nav::UpSkipTrivia(10); + assert_eq!(Nav::from_byte(nav.to_byte()), nav); + + let nav = Nav::UpExact(63); + assert_eq!(Nav::from_byte(nav.to_byte()), nav); + } + + #[test] + fn nav_byte_encoding() { + assert_eq!(Nav::Stay.to_byte(), 0b00_000000); + assert_eq!(Nav::Down.to_byte(), 0b00_000100); + assert_eq!(Nav::Up(5).to_byte(), 0b01_000101); + assert_eq!(Nav::UpSkipTrivia(3).to_byte(), 0b10_000011); + assert_eq!(Nav::UpExact(1).to_byte(), 0b11_000001); + } + + #[test] + #[should_panic(expected = "invalid nav standard")] + fn nav_invalid_standard_panics() { + Nav::from_byte(0b00_111111); + } + + #[test] + #[should_panic(expected = "invalid nav up level")] + fn nav_invalid_up_zero_panics() { + Nav::from_byte(0b01_000000); + } +} diff --git a/crates/plotnik-lib/src/bytecode/sections.rs b/crates/plotnik-lib/src/bytecode/sections.rs new file mode 100644 index 00000000..3a1beaf5 --- /dev/null +++ b/crates/plotnik-lib/src/bytecode/sections.rs @@ -0,0 +1,61 @@ +//! Binary format section primitives. + +use super::ids::StringId; + +/// Range into an array: [ptr..ptr+len). +/// +/// Dual-use depending on context: +/// - For `TypeDef` wrappers (Optional, Array*): `ptr` is inner `QTypeId`, `len` is 0. +/// - For `TypeDef` composites (Struct, Enum): `ptr` is index into TypeMember array, `len` is count. +#[derive(Clone, Copy, Debug, Default)] +#[repr(C)] +pub struct Slice { + pub ptr: u16, + pub len: u16, +} + +impl Slice { + #[inline] + pub fn range(self) -> std::ops::Range { + let start = self.ptr as usize; + start..start + self.len as usize + } +} + +/// Maps tree-sitter NodeTypeId to its string name. +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct NodeSymbol { + /// Tree-sitter node type ID + pub id: u16, + /// StringId for the node kind name + pub name: StringId, +} + +/// Maps tree-sitter NodeFieldId to its string name. +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct FieldSymbol { + /// Tree-sitter field ID + pub id: u16, + /// StringId for the field name + pub name: StringId, +} + +/// A node type ID that counts as trivia (whitespace, comments). +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct TriviaEntry { + pub node_type: u16, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn slice_range() { + let slice = Slice { ptr: 5, len: 3 }; + assert_eq!(slice.range(), 5..8); + } +}