From 8003b598d56eda0db2b92bda736e79b88e206f98 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 08:32:49 -0300 Subject: [PATCH 1/2] feat: Basic types for query IR --- crates/plotnik-lib/src/ir/effect.rs | 45 +++++ crates/plotnik-lib/src/ir/effect_tests.rs | 22 +++ crates/plotnik-lib/src/ir/entrypoint.rs | 56 ++++++ crates/plotnik-lib/src/ir/ids.rs | 37 ++++ crates/plotnik-lib/src/ir/matcher.rs | 89 ++++++++++ crates/plotnik-lib/src/ir/matcher_tests.rs | 27 +++ crates/plotnik-lib/src/ir/mod.rs | 67 +++++++ crates/plotnik-lib/src/ir/nav.rs | 168 ++++++++++++++++++ crates/plotnik-lib/src/ir/ref_transition.rs | 53 ++++++ .../src/ir/ref_transition_tests.rs | 26 +++ crates/plotnik-lib/src/ir/slice.rs | 96 ++++++++++ crates/plotnik-lib/src/ir/slice_tests.rs | 48 +++++ crates/plotnik-lib/src/ir/string_ref.rs | 31 ++++ crates/plotnik-lib/src/ir/string_ref_tests.rs | 14 ++ crates/plotnik-lib/src/ir/transition.rs | 78 ++++++++ crates/plotnik-lib/src/ir/type_metadata.rs | 124 +++++++++++++ crates/plotnik-lib/src/lib.rs | 1 + docs/adr/ADR-0005-transition-graph-format.md | 8 +- docs/adr/ADR-0007-type-metadata-format.md | 2 +- 19 files changed, 988 insertions(+), 4 deletions(-) create mode 100644 crates/plotnik-lib/src/ir/effect.rs create mode 100644 crates/plotnik-lib/src/ir/effect_tests.rs create mode 100644 crates/plotnik-lib/src/ir/entrypoint.rs create mode 100644 crates/plotnik-lib/src/ir/ids.rs create mode 100644 crates/plotnik-lib/src/ir/matcher.rs create mode 100644 crates/plotnik-lib/src/ir/matcher_tests.rs create mode 100644 crates/plotnik-lib/src/ir/mod.rs create mode 100644 crates/plotnik-lib/src/ir/nav.rs create mode 100644 crates/plotnik-lib/src/ir/ref_transition.rs create mode 100644 crates/plotnik-lib/src/ir/ref_transition_tests.rs create mode 100644 crates/plotnik-lib/src/ir/slice.rs create mode 100644 crates/plotnik-lib/src/ir/slice_tests.rs create mode 100644 crates/plotnik-lib/src/ir/string_ref.rs create mode 100644 crates/plotnik-lib/src/ir/string_ref_tests.rs create mode 100644 crates/plotnik-lib/src/ir/transition.rs create mode 100644 crates/plotnik-lib/src/ir/type_metadata.rs diff --git a/crates/plotnik-lib/src/ir/effect.rs b/crates/plotnik-lib/src/ir/effect.rs new file mode 100644 index 00000000..dd6b6565 --- /dev/null +++ b/crates/plotnik-lib/src/ir/effect.rs @@ -0,0 +1,45 @@ +//! Effect operations for the query IR. +//! +//! Effects are recorded during transition execution and replayed +//! during materialization to construct the output value. + +use super::ids::{DataFieldId, VariantTagId}; + +/// Effect operation in the IR effect stream. +/// +/// Effects are executed sequentially after a successful match. +/// They manipulate a value stack to construct structured output. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(C, u16)] +pub enum EffectOp { + /// Store matched node as current value. + /// Only valid on transitions with Node/Anonymous/Wildcard matcher. + CaptureNode, + + /// Push empty array onto stack. + StartArray, + + /// Move current value into top array. + PushElement, + + /// Pop array from stack into current. + EndArray, + + /// Push empty object onto stack. + StartObject, + + /// Pop object from stack into current. + EndObject, + + /// Move current value into top object at field. + Field(DataFieldId), + + /// Push variant container with tag onto stack. + StartVariant(VariantTagId), + + /// Pop variant, wrap current, set as current. + EndVariant, + + /// Replace current Node with its source text. + ToString, +} diff --git a/crates/plotnik-lib/src/ir/effect_tests.rs b/crates/plotnik-lib/src/ir/effect_tests.rs new file mode 100644 index 00000000..9a1a355a --- /dev/null +++ b/crates/plotnik-lib/src/ir/effect_tests.rs @@ -0,0 +1,22 @@ +use super::*; + +#[test] +fn effect_op_size_and_align() { + assert_eq!(size_of::(), 4); + assert_eq!(align_of::(), 2); +} + +#[test] +fn effect_op_variants() { + // Ensure all variants exist and are constructible + let _ = EffectOp::CaptureNode; + let _ = EffectOp::StartArray; + let _ = EffectOp::PushElement; + let _ = EffectOp::EndArray; + let _ = EffectOp::StartObject; + let _ = EffectOp::EndObject; + let _ = EffectOp::Field(0); + let _ = EffectOp::StartVariant(0); + let _ = EffectOp::EndVariant; + let _ = EffectOp::ToString; +} diff --git a/crates/plotnik-lib/src/ir/entrypoint.rs b/crates/plotnik-lib/src/ir/entrypoint.rs new file mode 100644 index 00000000..e5ef5936 --- /dev/null +++ b/crates/plotnik-lib/src/ir/entrypoint.rs @@ -0,0 +1,56 @@ +//! Named entrypoints for multi-definition queries. +//! +//! Entrypoints provide named exports for definitions. The default entrypoint +//! is always Transition 0; this table enables accessing other definitions by name. + +use super::ids::{StringId, TransitionId, TypeId}; + +/// Named entrypoint into the query graph. +/// +/// Layout: 12 bytes, align 4. +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct Entrypoint { + /// String ID for the entrypoint name. + name_id: StringId, + _pad: u16, + /// Target transition (definition entry point). + target: TransitionId, + /// Result type of this definition (see ADR-0007). + result_type: TypeId, + _pad2: u16, +} + +const _: () = assert!(size_of::() == 12); +const _: () = assert!(align_of::() == 4); + +impl Entrypoint { + /// Creates a new entrypoint. + pub const fn new(name_id: StringId, target: TransitionId, result_type: TypeId) -> Self { + Self { + name_id, + _pad: 0, + target, + result_type, + _pad2: 0, + } + } + + /// Returns the string ID of the entrypoint name. + #[inline] + pub const fn name_id(&self) -> StringId { + self.name_id + } + + /// Returns the target transition ID. + #[inline] + pub const fn target(&self) -> TransitionId { + self.target + } + + /// Returns the result type ID. + #[inline] + pub const fn result_type(&self) -> TypeId { + self.result_type + } +} diff --git a/crates/plotnik-lib/src/ir/ids.rs b/crates/plotnik-lib/src/ir/ids.rs new file mode 100644 index 00000000..dcb88265 --- /dev/null +++ b/crates/plotnik-lib/src/ir/ids.rs @@ -0,0 +1,37 @@ +//! ID types for the compiled query IR. +//! +//! These are lightweight wrappers/aliases for indices and identifiers +//! used throughout the IR. They provide type safety without runtime cost. + +use std::num::NonZeroU16; + +/// Index into the transitions segment. +pub type TransitionId = u32; + +/// Node type ID from tree-sitter. Do not change the underlying type. +pub type NodeTypeId = u16; + +/// Node field ID from tree-sitter. Uses `NonZeroU16` so `Option` +/// is the same size as `NodeFieldId` (niche optimization with 0 = None). +pub type NodeFieldId = NonZeroU16; + +/// Index into the string_refs segment. +pub type StringId = u16; + +/// Field name in effects (alias for type safety). +pub type DataFieldId = StringId; + +/// Variant tag in effects (alias for type safety). +pub type VariantTagId = StringId; + +/// Index for definition references (Enter/Exit). +pub type RefId = u16; + +/// Index into type_defs segment (with reserved primitives 0-2). +pub type TypeId = u16; + +// TypeId reserved constants +pub const TYPE_VOID: TypeId = 0; +pub const TYPE_NODE: TypeId = 1; +pub const TYPE_STR: TypeId = 2; +pub const TYPE_INVALID: TypeId = 0xFFFF; diff --git a/crates/plotnik-lib/src/ir/matcher.rs b/crates/plotnik-lib/src/ir/matcher.rs new file mode 100644 index 00000000..0a968895 --- /dev/null +++ b/crates/plotnik-lib/src/ir/matcher.rs @@ -0,0 +1,89 @@ +//! Node matchers for transition graph. +//! +//! Matchers are purely for node matching - navigation is handled by `Nav`. + +use super::{NodeFieldId, NodeTypeId, Slice}; + +/// Discriminant for matcher variants. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MatcherKind { + Epsilon, + Node, + Anonymous, + Wildcard, +} + +/// Matcher determines what node satisfies a transition. +/// +/// Navigation (descend/ascend) is handled by `Nav`, not matchers. +#[repr(C, u32)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Matcher { + /// Matches without consuming input. Used for control flow transitions. + Epsilon, + + /// Matches a named node by kind, optionally constrained by field. + Node { + kind: NodeTypeId, + field: Option, + negated_fields: Slice, + }, + + /// Matches an anonymous node by kind, optionally constrained by field. + Anonymous { + kind: NodeTypeId, + field: Option, + negated_fields: Slice, + }, + + /// Matches any node (named or anonymous). + Wildcard, +} + +impl Matcher { + /// Returns true if this matcher consumes a node. + #[inline] + pub fn consumes_node(&self) -> bool { + !matches!(self, Matcher::Epsilon) + } + + /// Returns the discriminant kind. + #[inline] + pub fn kind(&self) -> MatcherKind { + match self { + Matcher::Epsilon => MatcherKind::Epsilon, + Matcher::Node { .. } => MatcherKind::Node, + Matcher::Anonymous { .. } => MatcherKind::Anonymous, + Matcher::Wildcard => MatcherKind::Wildcard, + } + } + + /// Returns the node type ID for Node/Anonymous variants, `None` otherwise. + #[inline] + pub fn node_kind(&self) -> Option { + match self { + Matcher::Node { kind, .. } | Matcher::Anonymous { kind, .. } => Some(*kind), + _ => None, + } + } + + /// Returns the field constraint, if any. + #[inline] + pub fn field(&self) -> Option { + match self { + Matcher::Node { field, .. } | Matcher::Anonymous { field, .. } => *field, + _ => None, + } + } + + /// Returns the negated fields slice. Empty for Epsilon/Wildcard. + #[inline] + pub fn negated_fields(&self) -> Slice { + match self { + Matcher::Node { negated_fields, .. } | Matcher::Anonymous { negated_fields, .. } => { + *negated_fields + } + _ => Slice::empty(), + } + } +} diff --git a/crates/plotnik-lib/src/ir/matcher_tests.rs b/crates/plotnik-lib/src/ir/matcher_tests.rs new file mode 100644 index 00000000..74410678 --- /dev/null +++ b/crates/plotnik-lib/src/ir/matcher_tests.rs @@ -0,0 +1,27 @@ +use super::*; + +#[test] +fn matcher_size_and_alignment() { + assert_eq!(size_of::(), 16); + assert_eq!(align_of::(), 4); +} + +#[test] +fn consumes_node() { + assert!(!Matcher::Epsilon.consumes_node()); + assert!(Matcher::Wildcard.consumes_node()); + + let node_matcher = Matcher::Node { + kind: 42, + field: None, + negated_fields: Slice::empty(), + }; + assert!(node_matcher.consumes_node()); + + let anon_matcher = Matcher::Anonymous { + kind: 1, + field: None, + negated_fields: Slice::empty(), + }; + assert!(anon_matcher.consumes_node()); +} diff --git a/crates/plotnik-lib/src/ir/mod.rs b/crates/plotnik-lib/src/ir/mod.rs new file mode 100644 index 00000000..df4de243 --- /dev/null +++ b/crates/plotnik-lib/src/ir/mod.rs @@ -0,0 +1,67 @@ +//! Intermediate Representation (IR) for compiled queries. +//! +//! This module contains the in-memory representation of compiled queries +//! as defined in ADR-0004 through ADR-0008. The IR is designed for: +//! - Cache-efficient execution (64-byte aligned transitions) +//! - Zero-copy access patterns +//! - WASM compatibility +//! +//! Note: This module contains only type definitions. Query execution +//! lives elsewhere. + +mod effect; +mod entrypoint; +mod ids; +mod matcher; +mod nav; +mod ref_transition; +mod slice; +mod string_ref; +mod transition; +mod type_metadata; + +#[cfg(test)] +mod effect_tests; +#[cfg(test)] +mod matcher_tests; +#[cfg(test)] +mod ref_transition_tests; +#[cfg(test)] +mod slice_tests; +#[cfg(test)] +mod string_ref_tests; + +// Re-export ID types +pub use ids::{ + DataFieldId, NodeFieldId, NodeTypeId, RefId, StringId, TransitionId, TypeId, VariantTagId, +}; + +// Re-export TypeId constants +pub use ids::{TYPE_INVALID, TYPE_NODE, TYPE_STR, TYPE_VOID}; + +// Re-export Slice +pub use slice::Slice; + +// Re-export navigation +pub use nav::{Nav, NavKind}; + +// Re-export matcher +pub use matcher::{Matcher, MatcherKind}; + +// Re-export effects +pub use effect::EffectOp; + +// Re-export ref transition +pub use ref_transition::RefTransition; + +// Re-export transition +pub use transition::{MAX_INLINE_SUCCESSORS, Transition}; + +// Re-export type metadata +pub use type_metadata::{TYPE_COMPOSITE_START, TypeDef, TypeKind, TypeMember}; + +// Re-export string ref +pub use string_ref::StringRef; + +// Re-export entrypoint +pub use entrypoint::Entrypoint; diff --git a/crates/plotnik-lib/src/ir/nav.rs b/crates/plotnik-lib/src/ir/nav.rs new file mode 100644 index 00000000..5630a426 --- /dev/null +++ b/crates/plotnik-lib/src/ir/nav.rs @@ -0,0 +1,168 @@ +//! Tree navigation instructions for query execution. +//! +//! Navigation decisions are resolved at graph construction time, not runtime. +//! Each transition carries its own `Nav` instruction. + +/// Navigation instruction determining cursor movement and skip policy. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(C)] +pub struct Nav { + pub kind: NavKind, + /// Ascent level count for `Up*` variants, ignored otherwise. + pub level: u8, +} + +impl Nav { + pub const fn stay() -> Self { + Self { + kind: NavKind::Stay, + level: 0, + } + } + + pub const fn next() -> Self { + Self { + kind: NavKind::Next, + level: 0, + } + } + + pub const fn next_skip_trivia() -> Self { + Self { + kind: NavKind::NextSkipTrivia, + level: 0, + } + } + + pub const fn next_exact() -> Self { + Self { + kind: NavKind::NextExact, + level: 0, + } + } + + pub const fn down() -> Self { + Self { + kind: NavKind::Down, + level: 0, + } + } + + pub const fn down_skip_trivia() -> Self { + Self { + kind: NavKind::DownSkipTrivia, + level: 0, + } + } + + pub const fn down_exact() -> Self { + Self { + kind: NavKind::DownExact, + level: 0, + } + } + + pub const fn up(level: u8) -> Self { + Self { + kind: NavKind::Up, + level, + } + } + + pub const fn up_skip_trivia(level: u8) -> Self { + Self { + kind: NavKind::UpSkipTrivia, + level, + } + } + + pub const fn up_exact(level: u8) -> Self { + Self { + kind: NavKind::UpExact, + level, + } + } + + /// Returns true if this is a Stay navigation (no movement). + #[inline] + pub const fn is_stay(&self) -> bool { + matches!(self.kind, NavKind::Stay) + } + + /// Returns true if this is a horizontal sibling traversal (Next*). + #[inline] + pub const fn is_next(&self) -> bool { + matches!( + self.kind, + NavKind::Next | NavKind::NextSkipTrivia | NavKind::NextExact + ) + } + + /// Returns true if this descends into children (Down*). + #[inline] + pub const fn is_down(&self) -> bool { + matches!( + self.kind, + NavKind::Down | NavKind::DownSkipTrivia | NavKind::DownExact + ) + } + + /// Returns true if this ascends to parent(s) (Up*). + #[inline] + pub const fn is_up(&self) -> bool { + matches!( + self.kind, + NavKind::Up | NavKind::UpSkipTrivia | NavKind::UpExact + ) + } + + /// Returns true if this navigation skips only trivia nodes. + #[inline] + pub const fn is_skip_trivia(&self) -> bool { + matches!( + self.kind, + NavKind::NextSkipTrivia | NavKind::DownSkipTrivia | NavKind::UpSkipTrivia + ) + } + + /// Returns true if this navigation requires exact position (no skipping). + #[inline] + pub const fn is_exact(&self) -> bool { + matches!( + self.kind, + NavKind::NextExact | NavKind::DownExact | NavKind::UpExact + ) + } +} + +/// Navigation kind determining movement direction and skip policy. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +pub enum NavKind { + /// No movement. Used only for first transition when cursor is at root. + Stay = 0, + + // Sibling traversal (horizontal) + /// Skip any nodes to find match. + Next = 1, + /// Skip trivia only, fail if non-trivia skipped. + NextSkipTrivia = 2, + /// No skipping, current sibling must match. + NextExact = 3, + + // Enter children (descend) + /// Skip any among children. + Down = 4, + /// Skip trivia only among children. + DownSkipTrivia = 5, + /// First child must match, no skip. + DownExact = 6, + + // Exit children (ascend) + /// Ascend `level` levels, no constraint. + Up = 7, + /// Validate last non-trivia, ascend `level` levels. + UpSkipTrivia = 8, + /// Validate last child, ascend `level` levels. + UpExact = 9, +} diff --git a/crates/plotnik-lib/src/ir/ref_transition.rs b/crates/plotnik-lib/src/ir/ref_transition.rs new file mode 100644 index 00000000..23937860 --- /dev/null +++ b/crates/plotnik-lib/src/ir/ref_transition.rs @@ -0,0 +1,53 @@ +//! Definition call/return markers for recursive transition network. +//! +//! See ADR-0005 for semantics of Enter/Exit transitions. + +use super::RefId; + +/// Marks a transition as entering or exiting a definition reference. +/// +/// A transition can hold at most one `RefTransition`. Sequences like +/// `Enter(A) → Enter(B)` require epsilon chains. +/// +/// Layout: 1-byte discriminant + 1-byte padding + 2-byte RefId = 4 bytes, align 2. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(C, u8)] +pub enum RefTransition { + /// No definition boundary crossing. + None, + + /// Push call frame with return transitions. + /// + /// For `Enter(ref_id)` transitions, successors have special structure: + /// - `successors()[0]`: definition entry point (where to jump) + /// - `successors()[1..]`: return transitions (stored in call frame) + Enter(RefId), + + /// Pop frame, continue with stored return transitions. + /// + /// Successors are ignored—returns come from the call frame pushed at `Enter`. + Exit(RefId), +} + +impl RefTransition { + /// Returns `true` if this is `None`. + #[inline] + pub fn is_none(self) -> bool { + matches!(self, Self::None) + } + + /// Returns the ref ID if this is `Enter` or `Exit`. + #[inline] + pub fn ref_id(self) -> Option { + match self { + Self::None => None, + Self::Enter(id) | Self::Exit(id) => Some(id), + } + } +} + +impl Default for RefTransition { + fn default() -> Self { + Self::None + } +} diff --git a/crates/plotnik-lib/src/ir/ref_transition_tests.rs b/crates/plotnik-lib/src/ir/ref_transition_tests.rs new file mode 100644 index 00000000..cbb3a438 --- /dev/null +++ b/crates/plotnik-lib/src/ir/ref_transition_tests.rs @@ -0,0 +1,26 @@ +use super::*; + +#[test] +fn size_and_alignment() { + assert_eq!(size_of::(), 4); + assert_eq!(align_of::(), 2); +} + +#[test] +fn none_is_default() { + assert_eq!(RefTransition::default(), RefTransition::None); +} + +#[test] +fn is_none() { + assert!(RefTransition::None.is_none()); + assert!(!RefTransition::Enter(1).is_none()); + assert!(!RefTransition::Exit(1).is_none()); +} + +#[test] +fn ref_id_extraction() { + assert_eq!(RefTransition::None.ref_id(), None); + assert_eq!(RefTransition::Enter(42).ref_id(), Some(42)); + assert_eq!(RefTransition::Exit(123).ref_id(), Some(123)); +} diff --git a/crates/plotnik-lib/src/ir/slice.rs b/crates/plotnik-lib/src/ir/slice.rs new file mode 100644 index 00000000..c3abbfb2 --- /dev/null +++ b/crates/plotnik-lib/src/ir/slice.rs @@ -0,0 +1,96 @@ +//! Relative range within a segment. +//! +//! `start_index` is an **element index**, not a byte offset. This naming +//! distinguishes it from byte offsets like `StringRef.offset`. +//! +//! This struct is 6 bytes to fit the Transition layout requirements. +//! Type safety is provided through generic methods, not stored PhantomData. + +use std::marker::PhantomData; + +/// Relative range within a compiled query segment. +/// +/// Used for variable-length data (successors, effects, negated fields, type members). +/// The slice references elements by index into the corresponding segment array. +/// +/// Layout: 6 bytes (4 + 2), no padding due to `repr(C, packed)`. +/// Alignment is 1 due to packing, so reads may be unaligned on some platforms. +#[repr(C, packed)] +#[derive(Clone, Copy)] +pub struct Slice { + /// Element index into the segment array (NOT byte offset). + start_index: u32, + /// Number of elements. 65k elements per slice is sufficient. + len: u16, + _phantom: PhantomData T>, +} + +// Compile-time size verification +const _: () = assert!(size_of::>() == 6); + +impl Slice { + /// Creates a new slice. + #[inline] + pub const fn new(start_index: u32, len: u16) -> Self { + Self { + start_index, + len, + _phantom: PhantomData, + } + } + + /// Creates an empty slice. + #[inline] + pub const fn empty() -> Self { + Self::new(0, 0) + } + + /// Returns the start index (element index, not byte offset). + #[inline] + pub fn start_index(&self) -> u32 { + // Packed struct - field may be unaligned, so copy out + self.start_index + } + + /// Returns the number of elements. + #[inline] + pub fn len(&self) -> u16 { + self.len + } + + /// Returns true if the slice is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Creates a slice encoding an inner type ID (for wrapper TypeDef). + /// The `start_index` stores the TypeId as u32, `len` is 0. + #[inline] + pub const fn from_inner_type(type_id: u16) -> Self { + Self::new(type_id as u32, 0) + } +} + +impl Default for Slice { + fn default() -> Self { + Self::empty() + } +} + +impl PartialEq for Slice { + fn eq(&self, other: &Self) -> bool { + self.start_index() == other.start_index() && self.len() == other.len() + } +} + +impl Eq for Slice {} + +impl std::fmt::Debug for Slice { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Slice") + .field("start_index", &self.start_index()) + .field("len", &self.len()) + .finish() + } +} diff --git a/crates/plotnik-lib/src/ir/slice_tests.rs b/crates/plotnik-lib/src/ir/slice_tests.rs new file mode 100644 index 00000000..dee8ae26 --- /dev/null +++ b/crates/plotnik-lib/src/ir/slice_tests.rs @@ -0,0 +1,48 @@ +use super::*; + +#[test] +fn empty_slice() { + let slice: Slice = Slice::empty(); + + assert!(slice.is_empty()); + assert_eq!(slice.start_index(), 0); + assert_eq!(slice.len(), 0); +} + +#[test] +fn new_slice() { + let slice: Slice = Slice::new(42, 10); + + assert!(!slice.is_empty()); + assert_eq!(slice.start_index(), 42); + assert_eq!(slice.len(), 10); +} + +#[test] +fn default_is_empty() { + let slice: Slice = Slice::default(); + assert!(slice.is_empty()); +} + +#[test] +fn from_inner_type() { + let slice: Slice<()> = Slice::from_inner_type(0x1234); + + assert_eq!(slice.start_index(), 0x1234); + assert_eq!(slice.len(), 0); +} + +#[test] +fn equality() { + let a: Slice = Slice::new(10, 5); + let b: Slice = Slice::new(10, 5); + let c: Slice = Slice::new(10, 6); + + assert_eq!(a, b); + assert_ne!(a, c); +} + +#[test] +fn size_is_6_bytes() { + assert_eq!(std::mem::size_of::>(), 6); +} diff --git a/crates/plotnik-lib/src/ir/string_ref.rs b/crates/plotnik-lib/src/ir/string_ref.rs new file mode 100644 index 00000000..143d94a8 --- /dev/null +++ b/crates/plotnik-lib/src/ir/string_ref.rs @@ -0,0 +1,31 @@ +//! String pool entry reference. +//! +//! Strings are stored in a single contiguous byte pool. `StringRef` points +//! into that pool via byte offset (not element index). + +/// Reference to a string in the string pool. +/// +/// Layout: 8 bytes, align 4. +#[repr(C)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct StringRef { + /// Byte offset into string_bytes segment. + pub offset: u32, + /// Length of the string in bytes. + pub len: u16, + _pad: u16, +} + +impl StringRef { + pub const fn new(offset: u32, len: u16) -> Self { + Self { + offset, + len, + _pad: 0, + } + } +} + +// Compile-time size verification +const _: () = assert!(size_of::() == 8); +const _: () = assert!(align_of::() == 4); diff --git a/crates/plotnik-lib/src/ir/string_ref_tests.rs b/crates/plotnik-lib/src/ir/string_ref_tests.rs new file mode 100644 index 00000000..74e7d12f --- /dev/null +++ b/crates/plotnik-lib/src/ir/string_ref_tests.rs @@ -0,0 +1,14 @@ +use super::*; + +#[test] +fn string_ref_new() { + let r = StringRef::new(42, 10); + assert_eq!(r.offset, 42); + assert_eq!(r.len, 10); +} + +#[test] +fn string_ref_layout() { + assert_eq!(size_of::(), 8); + assert_eq!(align_of::(), 4); +} diff --git a/crates/plotnik-lib/src/ir/transition.rs b/crates/plotnik-lib/src/ir/transition.rs new file mode 100644 index 00000000..a452d239 --- /dev/null +++ b/crates/plotnik-lib/src/ir/transition.rs @@ -0,0 +1,78 @@ +//! Transition struct - the fundamental unit of the query IR. +//! +//! Each transition is 64 bytes and cache-line aligned to ensure no transition +//! straddles cache lines. Transitions carry all semantics: matching, effects, +//! and successors. States are implicit junction points. + +use super::{EffectOp, Matcher, Nav, RefTransition, Slice, TransitionId}; + +/// Maximum number of inline successors before spilling to external segment. +pub const MAX_INLINE_SUCCESSORS: usize = 8; + +/// A single transition in the query graph. +/// +/// Transitions use SSO (small-size optimization) for successors: +/// - 0-8 successors: stored inline in `successor_data` +/// - 9+ successors: `successor_data[0]` is index into successors segment +#[repr(C, align(64))] +#[derive(Clone, Copy)] +pub struct Transition { + // --- 32 bytes metadata --- + /// What this transition matches (node kind, wildcard, epsilon). + pub matcher: Matcher, // 16 bytes + + /// Reference call/return marker for recursive definitions. + pub ref_marker: RefTransition, // 4 bytes + + /// Number of successor transitions. + pub successor_count: u32, // 4 bytes + + /// Effects to execute on successful match. + /// When empty: start_index=0, len=0. + pub effects: Slice, // 6 bytes + + /// Navigation instruction (descend/ascend/sibling traversal). + pub nav: Nav, // 2 bytes + + // --- 32 bytes control flow --- + /// Successor storage (inline or spilled index). + /// + /// - If `successor_count <= 8`: contains `TransitionId` values directly + /// - If `successor_count > 8`: `successor_data[0]` is index into successors segment + pub successor_data: [u32; MAX_INLINE_SUCCESSORS], // 32 bytes +} + +impl Transition { + /// Returns `true` if successors are stored inline. + #[inline] + pub fn has_inline_successors(&self) -> bool { + self.successor_count as usize <= MAX_INLINE_SUCCESSORS + } + + /// Returns inline successors if they fit, `None` if spilled. + #[inline] + pub fn inline_successors(&self) -> Option<&[TransitionId]> { + if self.has_inline_successors() { + Some(&self.successor_data[..self.successor_count as usize]) + } else { + None + } + } + + /// Returns the spilled successor segment index and count. + /// Panics if successors are inline. + #[inline] + pub fn spilled_successors_index(&self) -> u32 { + debug_assert!( + !self.has_inline_successors(), + "successors are inline, not spilled" + ); + self.successor_data[0] + } +} + +// Compile-time size/alignment verification +const _: () = { + assert!(core::mem::size_of::() == 64); + assert!(core::mem::align_of::() == 64); +}; diff --git a/crates/plotnik-lib/src/ir/type_metadata.rs b/crates/plotnik-lib/src/ir/type_metadata.rs new file mode 100644 index 00000000..46fda12a --- /dev/null +++ b/crates/plotnik-lib/src/ir/type_metadata.rs @@ -0,0 +1,124 @@ +//! Type metadata for code generation and validation. +//! +//! Type metadata is descriptive, not prescriptive—it describes what +//! transitions produce, not how they execute. + +use super::Slice; +use super::ids::{StringId, TypeId}; + +/// First composite type ID (after primitives 0-2). +pub const TYPE_COMPOSITE_START: TypeId = 3; + +/// Type definition in the compiled query. +/// +/// The `members` field has dual semantics based on `kind`: +/// - Wrappers (Optional/ArrayStar/ArrayPlus): `members.start_index` is inner TypeId +/// - Composites (Record/Enum): `members` is slice into type_members segment +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct TypeDef { + pub kind: TypeKind, + _pad: u8, + /// Synthetic or explicit type name. `0xFFFF` for unnamed wrappers. + pub name: StringId, + /// See struct-level docs for dual semantics. + pub members: Slice, + _pad2: u16, +} + +// Size is 12 bytes: kind(1) + pad(1) + name(2) + members(6) + pad2(2) +// Alignment is 2 due to packed Slice having align 1 +const _: () = assert!(size_of::() == 12); + +impl TypeDef { + /// Create a wrapper type (Optional, ArrayStar, ArrayPlus). + pub fn wrapper(kind: TypeKind, inner: TypeId) -> Self { + debug_assert!(matches!( + kind, + TypeKind::Optional | TypeKind::ArrayStar | TypeKind::ArrayPlus + )); + Self { + kind, + _pad: 0, + name: 0xFFFF, + members: Slice::from_inner_type(inner), + _pad2: 0, + } + } + + /// Create a composite type (Record, Enum). + pub fn composite(kind: TypeKind, name: StringId, members: Slice) -> Self { + debug_assert!(matches!(kind, TypeKind::Record | TypeKind::Enum)); + Self { + kind, + _pad: 0, + name, + members, + _pad2: 0, + } + } + + /// For wrapper types, returns the inner type ID. + pub fn inner_type(&self) -> Option { + match self.kind { + TypeKind::Optional | TypeKind::ArrayStar | TypeKind::ArrayPlus => { + Some(self.members.start_index() as TypeId) + } + TypeKind::Record | TypeKind::Enum => None, + } + } + + /// For composite types, returns the members slice. + pub fn members_slice(&self) -> Option> { + match self.kind { + TypeKind::Record | TypeKind::Enum => Some(self.members), + TypeKind::Optional | TypeKind::ArrayStar | TypeKind::ArrayPlus => None, + } + } + + pub fn is_wrapper(&self) -> bool { + matches!( + self.kind, + TypeKind::Optional | TypeKind::ArrayStar | TypeKind::ArrayPlus + ) + } + + pub fn is_composite(&self) -> bool { + matches!(self.kind, TypeKind::Record | TypeKind::Enum) + } +} + +/// Discriminant for type definitions. +#[repr(u8)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TypeKind { + /// `T?` — nullable wrapper + Optional = 0, + /// `T*` — zero or more elements + ArrayStar = 1, + /// `T+` — one or more elements (non-empty) + ArrayPlus = 2, + /// Struct with named fields + Record = 3, + /// Tagged union (discriminated) + Enum = 4, +} + +/// Member of a Record (field) or Enum (variant). +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct TypeMember { + /// Field name or variant tag. + pub name: StringId, + /// Field type or variant payload. `TYPE_VOID` for unit variants. + pub ty: TypeId, +} + +const _: () = assert!(size_of::() == 4); +const _: () = assert!(align_of::() == 2); + +impl TypeMember { + pub fn new(name: StringId, ty: TypeId) -> Self { + Self { name, ty } + } +} diff --git a/crates/plotnik-lib/src/lib.rs b/crates/plotnik-lib/src/lib.rs index 86797bb6..418441dd 100644 --- a/crates/plotnik-lib/src/lib.rs +++ b/crates/plotnik-lib/src/lib.rs @@ -18,6 +18,7 @@ pub mod diagnostics; pub mod infer; +pub mod ir; pub mod parser; pub mod query; diff --git a/docs/adr/ADR-0005-transition-graph-format.md b/docs/adr/ADR-0005-transition-graph-format.md index b3cee6ec..e6ea9513 100644 --- a/docs/adr/ADR-0005-transition-graph-format.md +++ b/docs/adr/ADR-0005-transition-graph-format.md @@ -25,15 +25,17 @@ type RefId = u16; Relative range within a segment: ```rust -#[repr(C)] +#[repr(C, packed)] struct Slice { start_index: u32, // element index into segment array (NOT byte offset) len: u16, // 65k elements per slice is sufficient - _phantom: PhantomData, + _phantom: PhantomData T>, } -// 6 bytes, align 4 +// 6 bytes, align 1 (packed to avoid padding) ``` +**Note**: `repr(C, packed)` is required to achieve 6 bytes. Standard `repr(C)` would pad to 8 bytes for alignment. The packed repr means field access may be unaligned on some platforms—accessors should copy values out rather than returning references. + `start_index` is an **element index**, not a byte offset. This naming distinguishes it from byte offsets like `StringRef.offset` and `CompiledQuery.*_offset`. The distinction matters for typed array access. ### Transition diff --git a/docs/adr/ADR-0007-type-metadata-format.md b/docs/adr/ADR-0007-type-metadata-format.md index 17fb57fb..0f0e6c28 100644 --- a/docs/adr/ADR-0007-type-metadata-format.md +++ b/docs/adr/ADR-0007-type-metadata-format.md @@ -56,7 +56,7 @@ struct TypeDef { members: Slice, // 6 - see interpretation below _pad2: u16, // 2 } -// 12 bytes, align 4 +// 12 bytes, align 2 (due to packed Slice having align 1) ``` The `members` field has dual semantics based on `kind`: From 12772d2884745ec7ee03a5510fe40704705354d3 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 13 Dec 2025 08:39:13 -0300 Subject: [PATCH 2/2] Fix clippy --- crates/plotnik-lib/src/ir/ref_transition.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/crates/plotnik-lib/src/ir/ref_transition.rs b/crates/plotnik-lib/src/ir/ref_transition.rs index 23937860..1316f144 100644 --- a/crates/plotnik-lib/src/ir/ref_transition.rs +++ b/crates/plotnik-lib/src/ir/ref_transition.rs @@ -10,10 +10,11 @@ use super::RefId; /// `Enter(A) → Enter(B)` require epsilon chains. /// /// Layout: 1-byte discriminant + 1-byte padding + 2-byte RefId = 4 bytes, align 2. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] #[repr(C, u8)] pub enum RefTransition { /// No definition boundary crossing. + #[default] None, /// Push call frame with return transitions. @@ -45,9 +46,3 @@ impl RefTransition { } } } - -impl Default for RefTransition { - fn default() -> Self { - Self::None - } -}