diff --git a/crates/plotnik-lib/src/bytecode/effects.rs b/crates/plotnik-lib/src/bytecode/effects.rs new file mode 100644 index 00000000..8f52983c --- /dev/null +++ b/crates/plotnik-lib/src/bytecode/effects.rs @@ -0,0 +1,110 @@ +//! Effect operations for bytecode. + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[repr(u8)] +pub enum EffectOpcode { + Node = 0, + A = 1, + Push = 2, + EndA = 3, + S = 4, + EndS = 5, + Set = 6, + E = 7, + EndE = 8, + Text = 9, + Clear = 10, + Null = 11, +} + +impl EffectOpcode { + fn from_u8(v: u8) -> Self { + match v { + 0 => Self::Node, + 1 => Self::A, + 2 => Self::Push, + 3 => Self::EndA, + 4 => Self::S, + 5 => Self::EndS, + 6 => Self::Set, + 7 => Self::E, + 8 => Self::EndE, + 9 => Self::Text, + 10 => Self::Clear, + 11 => Self::Null, + _ => panic!("invalid effect opcode: {v}"), + } + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub struct EffectOp { + pub opcode: EffectOpcode, + pub payload: usize, +} + +impl EffectOp { + pub fn from_bytes(bytes: [u8; 2]) -> Self { + let raw = u16::from_le_bytes(bytes); + let opcode = EffectOpcode::from_u8((raw >> 10) as u8); + let payload = (raw & 0x3FF) as usize; + Self { opcode, payload } + } + + pub fn to_bytes(self) -> [u8; 2] { + assert!( + self.payload <= 0x3FF, + "effect payload exceeds 10-bit limit: {}", + self.payload + ); + let raw = ((self.opcode as u16) << 10) | ((self.payload as u16) & 0x3FF); + raw.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn roundtrip_with_payload() { + let op = EffectOp { + opcode: EffectOpcode::Set, + payload: 42, + }; + let bytes = op.to_bytes(); + let decoded = EffectOp::from_bytes(bytes); + assert_eq!(decoded.opcode, EffectOpcode::Set); + assert_eq!(decoded.payload, 42); + } + + #[test] + fn roundtrip_no_payload() { + let op = EffectOp { + opcode: EffectOpcode::Node, + payload: 0, + }; + let bytes = op.to_bytes(); + let decoded = EffectOp::from_bytes(bytes); + assert_eq!(decoded.opcode, EffectOpcode::Node); + assert_eq!(decoded.payload, 0); + } + + #[test] + fn max_payload() { + let op = EffectOp { + opcode: EffectOpcode::E, + payload: 1023, + }; + let bytes = op.to_bytes(); + let decoded = EffectOp::from_bytes(bytes); + assert_eq!(decoded.payload, 1023); + } + + #[test] + #[should_panic(expected = "invalid effect opcode")] + fn invalid_opcode_panics() { + let bytes = [0xFF, 0xFF]; // opcode would be 63, which is invalid + EffectOp::from_bytes(bytes); + } +} diff --git a/crates/plotnik-lib/src/bytecode/entrypoint.rs b/crates/plotnik-lib/src/bytecode/entrypoint.rs new file mode 100644 index 00000000..3550cf21 --- /dev/null +++ b/crates/plotnik-lib/src/bytecode/entrypoint.rs @@ -0,0 +1,28 @@ +//! Entrypoint section types. + +use super::{QTypeId, StepId, StringId}; + +/// Named query definition entry point (8 bytes). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(C)] +pub struct Entrypoint { + /// Definition name. + pub name: StringId, + /// Starting instruction (StepId). + pub target: StepId, + /// Result type. + pub result_type: QTypeId, + pub(crate) _pad: u16, +} + +const _: () = assert!(std::mem::size_of::() == 8); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn entrypoint_size() { + assert_eq!(std::mem::size_of::(), 8); + } +} diff --git a/crates/plotnik-lib/src/bytecode/mod.rs b/crates/plotnik-lib/src/bytecode/mod.rs index ccec40ad..9ddadd7b 100644 --- a/crates/plotnik-lib/src/bytecode/mod.rs +++ b/crates/plotnik-lib/src/bytecode/mod.rs @@ -3,10 +3,13 @@ //! Implements the binary format specified in `docs/binary-format/`. mod constants; +mod effects; +mod entrypoint; mod header; mod ids; mod nav; mod sections; +mod type_meta; pub use constants::{ MAGIC, SECTION_ALIGN, STEP_ACCEPT, STEP_SIZE, TYPE_CUSTOM_START, TYPE_NODE, TYPE_STRING, @@ -20,3 +23,9 @@ pub use header::Header; pub use nav::Nav; pub use sections::{FieldSymbol, NodeSymbol, Slice, TriviaEntry}; + +pub use effects::{EffectOp, EffectOpcode}; + +pub use entrypoint::Entrypoint; + +pub use type_meta::{TypeDef, TypeKind, TypeMember, TypeMetaHeader, TypeName}; diff --git a/crates/plotnik-lib/src/bytecode/type_meta.rs b/crates/plotnik-lib/src/bytecode/type_meta.rs new file mode 100644 index 00000000..7c5ac9e3 --- /dev/null +++ b/crates/plotnik-lib/src/bytecode/type_meta.rs @@ -0,0 +1,202 @@ +//! Type metadata definitions for bytecode format. + +use super::{QTypeId, StringId}; + +// Re-export the shared TypeKind +pub use crate::type_system::TypeKind; + +/// Convenience aliases for bytecode-specific naming (ArrayStar/ArrayPlus). +impl TypeKind { + /// Alias for `ArrayZeroOrMore` (T*). + pub const ARRAY_STAR: Self = Self::ArrayZeroOrMore; + /// Alias for `ArrayOneOrMore` (T+). + pub const ARRAY_PLUS: Self = Self::ArrayOneOrMore; +} + +/// TypeMeta section header (8 bytes). +/// +/// Contains counts for the three sub-sections. Located at `type_meta_offset`. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +#[repr(C)] +pub struct TypeMetaHeader { + /// Number of TypeDef entries. + pub type_defs_count: u16, + /// Number of TypeMember entries. + pub type_members_count: u16, + /// Number of TypeName entries. + pub type_names_count: u16, + /// Padding for alignment. + pub(crate) _pad: u16, +} + +const _: () = assert!(std::mem::size_of::() == 8); + +impl TypeMetaHeader { + /// Decode from 8 bytes. + pub fn from_bytes(bytes: &[u8]) -> Self { + assert!(bytes.len() >= 8, "TypeMetaHeader too short"); + Self { + type_defs_count: u16::from_le_bytes([bytes[0], bytes[1]]), + type_members_count: u16::from_le_bytes([bytes[2], bytes[3]]), + type_names_count: u16::from_le_bytes([bytes[4], bytes[5]]), + _pad: u16::from_le_bytes([bytes[6], bytes[7]]), + } + } + + /// Encode to 8 bytes. + pub fn to_bytes(&self) -> [u8; 8] { + let mut bytes = [0u8; 8]; + bytes[0..2].copy_from_slice(&self.type_defs_count.to_le_bytes()); + bytes[2..4].copy_from_slice(&self.type_members_count.to_le_bytes()); + bytes[4..6].copy_from_slice(&self.type_names_count.to_le_bytes()); + bytes[6..8].copy_from_slice(&self._pad.to_le_bytes()); + bytes + } +} + +/// Type definition entry (4 bytes). +/// +/// Semantics of `data` and `count` depend on `kind`: +/// - Wrappers (Optional, ArrayStar, ArrayPlus): `data` = inner TypeId, `count` = 0 +/// - Struct/Enum: `data` = member index, `count` = member count +/// - Alias: `data` = target TypeId, `count` = 0 +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(C)] +pub struct TypeDef { + /// For wrappers/alias: inner/target TypeId. + /// For Struct/Enum: index into TypeMembers section. + pub data: u16, + /// Member count (0 for wrappers/alias, field/variant count for composites). + pub count: u8, + /// TypeKind discriminant. + pub kind: u8, +} + +const _: () = assert!(std::mem::size_of::() == 4); + +impl TypeDef { + /// For wrapper types, get the inner type. + #[inline] + pub fn inner_type(&self) -> Option { + TypeKind::from_u8(self.kind) + .filter(|k| k.is_wrapper()) + .map(|_| QTypeId(self.data)) + } + + /// Get the TypeKind for this definition. + #[inline] + pub fn type_kind(&self) -> Option { + TypeKind::from_u8(self.kind) + } + + /// Whether this is an alias type. + #[inline] + pub fn is_alias(&self) -> bool { + TypeKind::from_u8(self.kind).is_some_and(|k| k.is_alias()) + } + + /// For alias types, get the target type. + #[inline] + pub fn alias_target(&self) -> Option { + TypeKind::from_u8(self.kind) + .filter(|k| k.is_alias()) + .map(|_| QTypeId(self.data)) + } + + /// For Struct/Enum types, get the member index. + #[inline] + pub fn member_index(&self) -> Option { + TypeKind::from_u8(self.kind) + .filter(|k| k.is_composite()) + .map(|_| self.data) + } + + /// For Struct/Enum types, get the member count. + #[inline] + pub fn member_count(&self) -> Option { + TypeKind::from_u8(self.kind) + .filter(|k| k.is_composite()) + .map(|_| self.count) + } +} + +/// Maps a name to a type (4 bytes). +/// +/// Only named types (definitions, aliases) have entries here. +/// Entries are sorted lexicographically by name for binary search. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(C)] +pub struct TypeName { + /// StringId of the type name. + pub name: StringId, + /// TypeId this name refers to. + pub type_id: QTypeId, +} + +const _: () = assert!(std::mem::size_of::() == 4); + +/// Field or variant entry (4 bytes). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(C)] +pub struct TypeMember { + /// Field/variant name. + pub name: StringId, + /// Type of this field/variant. + pub type_id: QTypeId, +} + +const _: () = assert!(std::mem::size_of::() == 4); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn type_meta_header_size() { + assert_eq!(std::mem::size_of::(), 8); + } + + #[test] + fn type_meta_header_roundtrip() { + let header = TypeMetaHeader { + type_defs_count: 42, + type_members_count: 100, + type_names_count: 5, + ..Default::default() + }; + let bytes = header.to_bytes(); + let decoded = TypeMetaHeader::from_bytes(&bytes); + assert_eq!(decoded, header); + } + + #[test] + fn type_def_size() { + assert_eq!(std::mem::size_of::(), 4); + } + + #[test] + fn type_member_size() { + assert_eq!(std::mem::size_of::(), 4); + } + + #[test] + fn type_name_size() { + assert_eq!(std::mem::size_of::(), 4); + } + + #[test] + fn type_kind_is_wrapper() { + assert!(TypeKind::Optional.is_wrapper()); + assert!(TypeKind::ArrayZeroOrMore.is_wrapper()); + assert!(TypeKind::ArrayOneOrMore.is_wrapper()); + assert!(!TypeKind::Struct.is_wrapper()); + assert!(!TypeKind::Enum.is_wrapper()); + } + + #[test] + fn type_kind_aliases() { + // Test bytecode-friendly aliases + assert_eq!(TypeKind::ARRAY_STAR, TypeKind::ArrayZeroOrMore); + assert_eq!(TypeKind::ARRAY_PLUS, TypeKind::ArrayOneOrMore); + } +} diff --git a/docs/binary-format/06-transitions.md b/docs/binary-format/06-transitions.md index 92034a1a..09a22b01 100644 --- a/docs/binary-format/06-transitions.md +++ b/docs/binary-format/06-transitions.md @@ -86,20 +86,20 @@ EffectOp (u16) - **Opcode**: 6 bits (0-63), currently 12 defined. - **Payload**: 10 bits (0-1023), member/variant index. -| Opcode | Name | Payload | -| :----- | :------------- | :--------------------- | -| 0 | `CaptureNode` | - | -| 1 | `StartArray` | - | -| 2 | `PushElement` | - | -| 3 | `EndArray` | - | -| 4 | `StartObject` | - | -| 5 | `EndObject` | - | -| 6 | `SetField` | Member index (0-1023) | -| 7 | `StartVariant` | Variant index (0-1023) | -| 8 | `EndVariant` | - | -| 9 | `ToString` | - | -| 10 | `ClearCurrent` | - | -| 11 | `PushNull` | - | +| Opcode | Name | Payload | +| :----- | :----- | :--------------------- | +| 0 | `Node` | - | +| 1 | `A` | - | +| 2 | `Push` | - | +| 3 | `EndA` | - | +| 4 | `S` | - | +| 5 | `EndS` | - | +| 6 | `Set` | Member index (0-1023) | +| 7 | `E` | Variant index (0-1023) | +| 8 | `EndE` | - | +| 9 | `Text` | - | +| 10 | `Clear`| - | +| 11 | `Null` | - | **Opcode Ranges** (future extensibility): @@ -204,8 +204,8 @@ The compiler selects the smallest step size that fits the payload. If the total **Pre vs Post Effects**: -- `pre_effects`: Execute before match attempt. Used for scope openers (`StartObject`, `StartArray`, `StartVariant`) that must run regardless of which branch succeeds. -- `post_effects`: Execute after successful match. Used for capture/assignment ops (`CaptureNode`, `SetField`, `EndObject`, etc.) that depend on `matched_node`. +- `pre_effects`: Execute before match attempt. Used for scope openers (`S`, `A`, `E`) that must run regardless of which branch succeeds. +- `post_effects`: Execute after successful match. Used for capture/assignment ops (`Node`, `Set`, `EndS`, etc.) that depend on `matched_node`. ### 4.3. Epsilon Transitions @@ -326,12 +326,12 @@ Branch.successors = [exit_path, match_path] // try exit first ``` Entry ─ε→ Branch ─ε→ Match ─ε→ Exit │ - └─ε→ [PushNull] ─ε→ Exit + └─ε→ [Null] ─ε→ Exit Branch.successors = [match_path, skip_path] ``` -`PushNull` emits explicit null when the optional pattern doesn't match. +`Null` emits explicit null when the optional pattern doesn't match. ### Non-Greedy `??` @@ -343,14 +343,14 @@ Branch.successors = [skip_path, match_path] // try skip first Untagged alternations `[ A B ]` compile to branching with null injection for type consistency. -When a capture appears in some branches but not others, the compiler injects `PushNull` into branches missing that capture: +When a capture appears in some branches but not others, the compiler injects `Null` into branches missing that capture: ``` Query: [ (a) @x (b) ] Type: { x?: Node } -Branch 1 (a): [CaptureNode, SetField(x)] → Exit -Branch 2 (b): [PushNull, SetField(x)] → Exit +Branch 1 (a): [Node, Set(x)] → Exit +Branch 2 (b): [Null, Set(x)] → Exit ``` This ensures the output object always has all fields defined, matching the type system's merged struct model. diff --git a/docs/runtime-engine.md b/docs/runtime-engine.md index b5860595..1380d798 100644 --- a/docs/runtime-engine.md +++ b/docs/runtime-engine.md @@ -155,17 +155,17 @@ struct EffectStream<'a> { } ``` -| Effect | Action | -| ------------------- | --------------------------------------- | -| CaptureNode | Push `matched_node` | -| Start/EndObject | Object boundaries | -| SetField(id) | Assign to field | -| Start/EndArray | Array boundaries | -| PushElement | Append to array | -| Start/EndVariant(t) | Tagged union boundaries | -| ToString | Node → source text | -| ClearCurrent | Reset current value | -| PushNull | Null placeholder (optional/alternation) | +| Effect | Action | +| -------- | --------------------------------------- | +| Node | Push `matched_node` | +| S/EndS | Object boundaries | +| Set(id) | Assign to field | +| A/EndA | Array boundaries | +| Push | Append to array | +| E/EndE | Tagged union boundaries | +| Text | Node → source text | +| Clear | Reset current value | +| Null | Null placeholder (optional/alternation) | ### Materialization