Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/sync-langs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Sync arborium languages
on:
push:
branches:
- 'renovate/arborium-crates'
- "renovate/arborium-crates"

permissions:
contents: write
Expand Down
55 changes: 29 additions & 26 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,26 @@

## Core Constructs

| Syntax | Meaning |
| ------------------- | ------------------------------ |
| `(node_kind)` | Named node |
| `"text"` / `'text'` | Anonymous node (literal token) |
| `(_)` | Any named node |
| `_` | Any node |
| `@name` | Capture (snake_case only) |
| `@x :: T` | Type annotation |
| `@x :: string` | Extract node text |
| `field: pattern` | Field constraint |
| `-field` | Negated field (assert absent) |
| `?` `*` `+` | Quantifiers (0-1, 0+, 1+) |
| `??` `*?` `+?` | Non-greedy variants |
| `.` | Anchor (adjacency, see below) |
| `{...}` | Sequence (siblings in order) |
| `[...]` | Alternation (first match wins) |
| `Name = ...` | Named definition (entrypoint) |
| `(Name)` | Use named expression |
| `(node == "x")` | String predicate (== != ^= $= *=) |
| `(node =~ /x/)` | Regex predicate (=~ !~) |
| Syntax | Meaning |
| ------------------- | ---------------------------------- |
| `(node_kind)` | Named node |
| `"text"` / `'text'` | Anonymous node (literal token) |
| `(_)` | Any named node |
| `_` | Any node |
| `@name` | Capture (snake_case only) |
| `@x :: T` | Type annotation |
| `@x :: string` | Extract node text |
| `field: pattern` | Field constraint |
| `-field` | Negated field (assert absent) |
| `?` `*` `+` | Quantifiers (0-1, 0+, 1+) |
| `??` `*?` `+?` | Non-greedy variants |
| `.` | Anchor (adjacency, see below) |
| `{...}` | Sequence (siblings in order) |
| `[...]` | Alternation (first match wins) |
| `Name = ...` | Named definition (entrypoint) |
| `(Name)` | Use named expression |
| `(node == "x")` | String predicate (== != ^= $= \*=) |
| `(node =~ /x/)` | Regex predicate (=~ !~) |

## Data Model Rules

Expand Down Expand Up @@ -148,23 +148,26 @@ Tree-sitter: `((a) (b))` — Plotnik: `{(a) (b)}`. The #1 syntax error.

```
crates/
plotnik-bytecode/ # Binary format definitions
src/
bytecode/ # Instruction set, modules, linking
type_system/ # Shared type primitives
plotnik-cli/ # CLI tool
src/commands/ # Subcommands (ast, check, dump, exec, infer, trace, langs)
plotnik-core/ # Node type database (NodeTypes, StaticNodeTypes) and string interning (Interner, Symbol)
plotnik-lib/ # Plotnik as library
plotnik-compiler/ # Compilation pipeline
src/
analyze/ # Semantic analysis (symbol_table, dependencies, type_check, validation)
bytecode/ # Binary format definitions
compile/ # Thompson NFA construction (AST → IR)
diagnostics/ # User-friendly error reporting
emit/ # Bytecode emission (IR → binary)
engine/ # Runtime VM (execution, backtracking, effects)
parser/ # Syntactic parsing (lexer, grammar, AST)
query/ # Query facade (Query, QueryBuilder, SourceMap)
type_system/ # Shared type primitives
typegen/ # Type declaration extraction (bytecode → .d.ts)
plotnik-core/ # Node type database (NodeTypes, StaticNodeTypes) and string interning (Interner, Symbol)
plotnik-langs/ # Tree-sitter language bindings
plotnik-macros/ # Proc macros
plotnik-lib/ # Facade crate re-exporting bytecode, compiler, vm
plotnik-vm/ # Runtime VM
src/engine/ # Execution, backtracking, effects
docs/
binary-format/ # Bytecode format specification
lang-reference.md # Language specification
Expand Down
5 changes: 4 additions & 1 deletion crates/plotnik-bytecode/src/bytecode/aligned_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,10 @@ impl std::fmt::Debug for AlignedVec {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("AlignedVec")
.field("len", &self.len)
.field("aligned", &(self.blocks.as_ptr() as usize).is_multiple_of(ALIGN))
.field(
"aligned",
&(self.blocks.as_ptr() as usize).is_multiple_of(ALIGN),
)
.finish()
}
}
2 changes: 1 addition & 1 deletion crates/plotnik-bytecode/src/bytecode/aligned_vec_tests.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::aligned_vec::{AlignedVec, ALIGN};
use super::aligned_vec::{ALIGN, AlignedVec};

fn is_aligned(ptr: *const u8) -> bool {
(ptr as usize).is_multiple_of(ALIGN)
Expand Down
5 changes: 2 additions & 3 deletions crates/plotnik-bytecode/src/bytecode/dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
use std::collections::BTreeMap;
use std::fmt::Write as _;

use plotnik_core::Colors;
use crate::predicate_op::PredicateOp;
use plotnik_core::Colors;

use super::format::{LineBuilder, Symbol, format_effect, nav_symbol, width_for_count};
use super::ids::TypeId;
use super::instructions::StepId;
use super::module::{Instruction, Module};
use super::node_type_ir::NodeTypeIR;
use super::nav::Nav;
use super::node_type_ir::NodeTypeIR;
use super::type_meta::{TypeData, TypeKind};
use super::{Call, Match, Return, Trampoline};

Expand All @@ -32,7 +32,6 @@ pub fn dump(module: &Module, colors: Colors) -> String {
out
}


/// Context for dump formatting, precomputes lookups for O(1) access.
struct DumpContext {
/// Maps step ID to entrypoint name for labeling.
Expand Down
36 changes: 18 additions & 18 deletions crates/plotnik-bytecode/src/bytecode/header_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ fn compute_offsets_empty() {

// New order: blobs first, then tables
// All sections 64-byte aligned. With 0 counts, each table still has 1 sentinel entry (4 bytes)
assert_eq!(offsets.str_blob, 64); // after header
assert_eq!(offsets.regex_blob, 64); // 64 + align(0) = 64
assert_eq!(offsets.str_table, 64); // 64 + align(0) = 64
assert_eq!(offsets.regex_table, 128); // 64 + align(4) = 128
assert_eq!(offsets.node_types, 192); // 128 + align(4) = 192
assert_eq!(offsets.node_fields, 192); // 192 + align(0) = 192
assert_eq!(offsets.str_blob, 64); // after header
assert_eq!(offsets.regex_blob, 64); // 64 + align(0) = 64
assert_eq!(offsets.str_table, 64); // 64 + align(0) = 64
assert_eq!(offsets.regex_table, 128); // 64 + align(4) = 128
assert_eq!(offsets.node_types, 192); // 128 + align(4) = 192
assert_eq!(offsets.node_fields, 192); // 192 + align(0) = 192
assert_eq!(offsets.trivia, 192);
assert_eq!(offsets.type_defs, 192);
assert_eq!(offsets.type_members, 192);
Expand Down Expand Up @@ -84,16 +84,16 @@ fn compute_offsets_with_data() {
let offsets = h.compute_offsets();

// New order: blobs first, then tables. All offsets 64-byte aligned.
assert_eq!(offsets.str_blob, 64); // header end
assert_eq!(offsets.regex_blob, 192); // 64 + 100 = 164 → 192
assert_eq!(offsets.str_table, 320); // 192 + 128 = 320 (aligned)
assert_eq!(offsets.regex_table, 384); // 320 + 24 = 344 → 384
assert_eq!(offsets.node_types, 448); // 384 + 12 = 396 → 448
assert_eq!(offsets.node_fields, 512); // 448 + 40 = 488 → 512
assert_eq!(offsets.trivia, 576); // 512 + 20 = 532 → 576
assert_eq!(offsets.type_defs, 640); // 576 + 6 = 582 → 640
assert_eq!(offsets.type_members, 704); // 640 + 32 = 672 → 704
assert_eq!(offsets.type_names, 768); // 704 + 48 = 752 → 768
assert_eq!(offsets.entrypoints, 832); // 768 + 16 = 784 → 832
assert_eq!(offsets.transitions, 896); // 832 + 16 = 848 → 896
assert_eq!(offsets.str_blob, 64); // header end
assert_eq!(offsets.regex_blob, 192); // 64 + 100 = 164 → 192
assert_eq!(offsets.str_table, 320); // 192 + 128 = 320 (aligned)
assert_eq!(offsets.regex_table, 384); // 320 + 24 = 344 → 384
assert_eq!(offsets.node_types, 448); // 384 + 12 = 396 → 448
assert_eq!(offsets.node_fields, 512); // 448 + 40 = 488 → 512
assert_eq!(offsets.trivia, 576); // 512 + 20 = 532 → 576
assert_eq!(offsets.type_defs, 640); // 576 + 6 = 582 → 640
assert_eq!(offsets.type_members, 704); // 640 + 32 = 672 → 704
assert_eq!(offsets.type_names, 768); // 704 + 48 = 752 → 768
assert_eq!(offsets.entrypoints, 832); // 768 + 16 = 784 → 832
assert_eq!(offsets.transitions, 896); // 832 + 16 = 848 → 896
}
5 changes: 4 additions & 1 deletion crates/plotnik-bytecode/src/bytecode/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,10 @@ impl Module {
}

/// Load a module from owned bytes (copies into aligned storage).
#[deprecated(since = "0.1.0", note = "use `Module::from_aligned` for AlignedVec or `Module::load` for copying")]
#[deprecated(
since = "0.1.0",
note = "use `Module::from_aligned` for AlignedVec or `Module::load` for copying"
)]
pub fn from_bytes(bytes: Vec<u8>) -> Result<Self, ModuleError> {
Self::load(&bytes)
}
Expand Down
2 changes: 1 addition & 1 deletion crates/plotnik-bytecode/src/bytecode/module_tests.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Tests for the bytecode module.

use super::module::{ByteStorage, ModuleError};
use super::AlignedVec;
use super::module::{ByteStorage, ModuleError};

#[test]
fn byte_storage_copy_from_slice() {
Expand Down
13 changes: 6 additions & 7 deletions crates/plotnik-bytecode/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,12 @@ pub mod type_system;
// Re-export commonly used items at crate root
pub use bytecode::{
AlignedVec, ByteStorage, Call, EffectOp, EffectOpcode, Entrypoint, EntrypointsView,
FieldSymbol, Header, Instruction, LineBuilder, MAGIC, MAX_MATCH_PAYLOAD_SLOTS,
MAX_PRE_EFFECTS, Match, Module, ModuleError, Nav, NodeSymbol, NodeTypeIR, Opcode, RegexView,
Return, SECTION_ALIGN, STEP_SIZE, SectionOffsets, Slice, StepAddr, StepId, StringId,
StringsView, Symbol, SymbolsView, Trampoline, TriviaEntry, TriviaView, TypeData, TypeDef,
TypeId, TypeKind, TypeMember, TypeName, TypesView, VERSION, align_to_section, cols, dump,
format_effect, nav_symbol, select_match_opcode, superscript, trace, truncate_text,
width_for_count,
FieldSymbol, Header, Instruction, LineBuilder, MAGIC, MAX_MATCH_PAYLOAD_SLOTS, MAX_PRE_EFFECTS,
Match, Module, ModuleError, Nav, NodeSymbol, NodeTypeIR, Opcode, RegexView, Return,
SECTION_ALIGN, STEP_SIZE, SectionOffsets, Slice, StepAddr, StepId, StringId, StringsView,
Symbol, SymbolsView, Trampoline, TriviaEntry, TriviaView, TypeData, TypeDef, TypeId, TypeKind,
TypeMember, TypeName, TypesView, VERSION, align_to_section, cols, dump, format_effect,
nav_symbol, select_match_opcode, superscript, trace, truncate_text, width_for_count,
};
pub use dfa::deserialize_dfa;
pub use predicate_op::PredicateOp;
Expand Down
4 changes: 3 additions & 1 deletion crates/plotnik-cli/src/commands/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ pub fn run(args: ExecArgs) {
color: args.color,
});

let vm = VM::builder(&source_code, &tree).trivia_types(trivia_types).build();
let vm = VM::builder(&source_code, &tree)
.trivia_types(trivia_types)
.build();
let effects = match vm.execute(&module, 0, &entrypoint) {
Ok(effects) => effects,
Err(RuntimeError::NoMatch) => {
Expand Down
30 changes: 18 additions & 12 deletions crates/plotnik-compiler/src/analyze/validation/predicates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,15 @@
//! - Lookahead/lookbehind (`(?=...)`, `(?!...)`, etc.)
//! - Named captures (`(?P<name>...)`)

use regex_syntax::ast::{self, visit, Ast, GroupKind, Visitor as RegexVisitor};
use regex_syntax::ast::{self, Ast, GroupKind, Visitor as RegexVisitor, visit};
use rowan::TextRange;

use crate::SourceId;
use crate::analyze::visitor::{Visitor, walk_named_node};
use crate::diagnostics::{DiagnosticKind, Diagnostics};
use crate::parser::{NamedNode, Root};

pub fn validate_predicates(
source_id: SourceId,
source: &str,
ast: &Root,
diag: &mut Diagnostics,
) {
pub fn validate_predicates(source_id: SourceId, source: &str, ast: &Root, diag: &mut Diagnostics) {
let mut validator = PredicateValidator {
diag,
source_id,
Expand Down Expand Up @@ -69,13 +64,16 @@ impl PredicateValidator<'_, '_> {
let span = self.map_regex_span(e.span(), regex_range);
let report = match e.kind() {
ast::ErrorKind::UnsupportedBackreference => {
self.diag.report(self.source_id, DiagnosticKind::RegexBackreference, span)
self.diag
.report(self.source_id, DiagnosticKind::RegexBackreference, span)
}
ast::ErrorKind::UnsupportedLookAround => {
// Skip the opening `(` - point at `?=` / `?!` / `?<=` / `?<!`
use rowan::TextSize;
let adjusted = TextRange::new(span.start() + TextSize::from(1u32), span.end());
self.diag.report(self.source_id, DiagnosticKind::RegexLookaround, adjusted)
let adjusted =
TextRange::new(span.start() + TextSize::from(1u32), span.end());
self.diag
.report(self.source_id, DiagnosticKind::RegexLookaround, adjusted)
}
_ => self
.diag
Expand Down Expand Up @@ -128,8 +126,16 @@ impl RegexVisitor for NamedCaptureDetector {
&& let GroupKind::CaptureName { name, .. } = &group.kind
{
// Span for `?P<name>` (skip opening paren, include closing `>`)
let start = ast::Position::new(group.span.start.offset + 1, group.span.start.line, group.span.start.column + 1);
let end = ast::Position::new(name.span.end.offset + 1, name.span.end.line, name.span.end.column + 1);
let start = ast::Position::new(
group.span.start.offset + 1,
group.span.start.line,
group.span.start.column + 1,
);
let end = ast::Position::new(
name.span.end.offset + 1,
name.span.end.line,
name.span.end.column + 1,
);
self.named_captures.push(ast::Span::new(start, end));
}
Ok(())
Expand Down
7 changes: 4 additions & 3 deletions crates/plotnik-compiler/src/bytecode/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
use std::collections::BTreeMap;
use std::num::NonZeroU16;

use crate::analyze::type_check::TypeId;
use plotnik_bytecode::{
Call, EffectOp, EffectOpcode, Nav, Opcode, PredicateOp, Return, StepAddr, StepId, Trampoline,
select_match_opcode,
};
use crate::analyze::type_check::TypeId;

/// Node type constraint for Match instructions.
///
Expand Down Expand Up @@ -643,8 +643,9 @@ impl MatchIR {

let value_ref = match &pred.value {
PredicateValueIR::String(string_id) => string_id.get(),
PredicateValueIR::Regex(string_id) => lookup_regex(*string_id)
.expect("regex predicate must be interned"),
PredicateValueIR::Regex(string_id) => {
lookup_regex(*string_id).expect("regex predicate must be interned")
}
};
bytes[offset..offset + 2].copy_from_slice(&value_ref.to_le_bytes());
offset += 2;
Expand Down
16 changes: 8 additions & 8 deletions crates/plotnik-compiler/src/compile/capture.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use std::collections::HashSet;

use crate::analyze::type_check::{TypeContext, TypeId, TypeShape};
use crate::bytecode::EffectIR;
use plotnik_bytecode::EffectOpcode;
use crate::parser::ast::{self, Expr};
use plotnik_bytecode::EffectOpcode;

use super::Compiler;
use super::navigation::{inner_creates_scope, is_star_or_plus_quantifier, is_truly_empty_scope};
Expand Down Expand Up @@ -143,12 +143,12 @@ impl Compiler<'_> {
if !inner_creates_scope(&ei) {
return false;
}
let Some(info) = self.type_ctx.get_term_info(&ei) else {
let Some(info) = self.ctx.type_ctx.get_term_info(&ei) else {
return false;
};
info.flow
.type_id()
.and_then(|id| self.type_ctx.get_type(id))
.and_then(|id| self.ctx.type_ctx.get_type(id))
.is_some_and(|shape| matches!(shape, TypeShape::Struct(_) | TypeShape::Enum(_)))
});

Expand Down Expand Up @@ -195,7 +195,7 @@ impl Compiler<'_> {
}

// Check the actual inferred type, not syntax
let Some(info) = self.type_ctx.get_term_info(&inner) else {
let Some(info) = self.ctx.type_ctx.get_term_info(&inner) else {
return true;
};

Expand All @@ -204,7 +204,7 @@ impl Compiler<'_> {
!info
.flow
.type_id()
.and_then(|id| self.type_ctx.get_type(id))
.and_then(|id| self.ctx.type_ctx.get_type(id))
.is_some_and(|shape| matches!(shape, TypeShape::Struct(_) | TypeShape::Enum(_)))
}

Expand Down Expand Up @@ -235,9 +235,9 @@ impl Compiler<'_> {
/// In this case, we skip emitting Node/Text effects in captures.
fn ref_returns_structured(&self, r: &ast::Ref) -> bool {
r.name()
.and_then(|name| self.type_ctx.get_def_id(self.interner, name.text()))
.and_then(|def_id| self.type_ctx.get_def_type(def_id))
.and_then(|def_type| self.type_ctx.get_type(def_type))
.and_then(|name| self.ctx.type_ctx.get_def_id(self.ctx.interner, name.text()))
.and_then(|def_id| self.ctx.type_ctx.get_def_type(def_id))
.and_then(|def_type| self.ctx.type_ctx.get_type(def_type))
.is_some_and(|shape| {
matches!(
shape,
Expand Down
2 changes: 1 addition & 1 deletion crates/plotnik-compiler/src/compile/capture_tests.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::capture::CaptureEffects;
use plotnik_bytecode::EffectOpcode;
use crate::bytecode::{EffectIR, MemberRef};
use plotnik_bytecode::EffectOpcode;

#[test]
fn nest_scope_preserves_outer_and_nests_inner() {
Expand Down
Loading