diff --git a/AGENTS.md b/AGENTS.md index 29c0847a..cc15da36 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -82,6 +82,15 @@ The `.` anchor adapts to what it's anchoring: Rule: anchor is as strict as its strictest operand. +**Placement**: Boundary anchors require parent node context: + +``` +(parent . (first)) ; ✓ valid +(parent (last) .) ; ✓ valid +{(a) . (b)} ; ✓ interior anchor OK +{. (a)} ; ✗ boundary without parent +``` + ## Anti-patterns ``` @@ -93,6 +102,9 @@ Rule: anchor is as strict as its strictest operand. ; WRONG: predicates (unsupported) (id) @x (#eq? @x "foo") + +; WRONG: boundary anchors without parent node +{. (a)} ; use (parent {. (a)}) ``` ## Type System Rules diff --git a/crates/plotnik-lib/src/diagnostics/message.rs b/crates/plotnik-lib/src/diagnostics/message.rs index 86cd74f9..0b8101a3 100644 --- a/crates/plotnik-lib/src/diagnostics/message.rs +++ b/crates/plotnik-lib/src/diagnostics/message.rs @@ -63,6 +63,7 @@ pub enum DiagnosticKind { RecursionNoEscape, DirectRecursion, FieldSequenceValue, + AnchorWithoutContext, // Type inference errors IncompatibleTypes, @@ -147,6 +148,9 @@ impl DiagnosticKind { Self::DirectRecursion => { Some("recursive references must consume input before recursing") } + Self::AnchorWithoutContext => { + Some("wrap in a named node: `(parent . (child))`") + } _ => None, } } @@ -201,6 +205,7 @@ impl DiagnosticKind { Self::RecursionNoEscape => "infinite recursion: no escape path", Self::DirectRecursion => "infinite recursion: cycle consumes no input", Self::FieldSequenceValue => "field cannot match a sequence", + Self::AnchorWithoutContext => "boundary anchor requires parent node context", // Type inference Self::IncompatibleTypes => "incompatible types", diff --git a/crates/plotnik-lib/src/parser/tests/grammar/anchors_tests.rs b/crates/plotnik-lib/src/parser/tests/grammar/anchors_tests.rs index cddcc1cc..3b69378f 100644 --- a/crates/plotnik-lib/src/parser/tests/grammar/anchors_tests.rs +++ b/crates/plotnik-lib/src/parser/tests/grammar/anchors_tests.rs @@ -159,8 +159,9 @@ fn anchor_multiple_adjacent() { #[test] fn anchor_in_sequence() { + // Boundary anchors in sequences require parent node context let input = indoc! {r#" - Q = {. (first) (second) .} + Q = (parent {. (first) (second) .}) "#}; let res = Query::expect_valid_cst(input); @@ -170,20 +171,24 @@ fn anchor_in_sequence() { Def Id "Q" Equals "=" - Seq - BraceOpen "{" - Anchor - Dot "." - Tree - ParenOpen "(" - Id "first" - ParenClose ")" - Tree - ParenOpen "(" - Id "second" - ParenClose ")" - Anchor - Dot "." - BraceClose "}" + Tree + ParenOpen "(" + Id "parent" + Seq + BraceOpen "{" + Anchor + Dot "." + Tree + ParenOpen "(" + Id "first" + ParenClose ")" + Tree + ParenOpen "(" + Id "second" + ParenClose ")" + Anchor + Dot "." + BraceClose "}" + ParenClose ")" "#); } diff --git a/crates/plotnik-lib/src/parser/tests/grammar/sequences_tests.rs b/crates/plotnik-lib/src/parser/tests/grammar/sequences_tests.rs index b1dfcdd8..1e4d3434 100644 --- a/crates/plotnik-lib/src/parser/tests/grammar/sequences_tests.rs +++ b/crates/plotnik-lib/src/parser/tests/grammar/sequences_tests.rs @@ -272,8 +272,9 @@ fn sequence_comma_separated_expression() { #[test] fn sequence_with_anchor() { + // Boundary anchors require parent node context let input = indoc! {r#" - Q = {. (first) (second) .} + Q = (parent {. (first) (second) .}) "#}; let res = Query::expect_valid_cst(input); @@ -283,20 +284,24 @@ fn sequence_with_anchor() { Def Id "Q" Equals "=" - Seq - BraceOpen "{" - Anchor - Dot "." - Tree - ParenOpen "(" - Id "first" - ParenClose ")" - Tree - ParenOpen "(" - Id "second" - ParenClose ")" - Anchor - Dot "." - BraceClose "}" + Tree + ParenOpen "(" + Id "parent" + Seq + BraceOpen "{" + Anchor + Dot "." + Tree + ParenOpen "(" + Id "first" + ParenClose ")" + Tree + ParenOpen "(" + Id "second" + ParenClose ")" + Anchor + Dot "." + BraceClose "}" + ParenClose ")" "#); } diff --git a/crates/plotnik-lib/src/query/anchors.rs b/crates/plotnik-lib/src/query/anchors.rs new file mode 100644 index 00000000..4bcc1ec3 --- /dev/null +++ b/crates/plotnik-lib/src/query/anchors.rs @@ -0,0 +1,73 @@ +//! Semantic validation for anchor placement. +//! +//! Anchors require context to be meaningful: +//! - **Boundary anchors** (at start/end of sequence) need parent named node context +//! - **Interior anchors** (between items) are always valid +//! +//! This validation ensures anchors are placed where they can be meaningfully compiled. + +use super::visitor::{Visitor, walk_named_node, walk_seq_expr}; +use crate::SourceId; +use crate::diagnostics::{DiagnosticKind, Diagnostics}; +use crate::parser::ast::{NamedNode, Root, SeqExpr, SeqItem}; + +pub fn validate_anchors(source_id: SourceId, ast: &Root, diag: &mut Diagnostics) { + let mut visitor = AnchorValidator { + diag, + source_id, + in_named_node: false, + }; + visitor.visit(ast); +} + +struct AnchorValidator<'a> { + diag: &'a mut Diagnostics, + source_id: SourceId, + in_named_node: bool, +} + +impl Visitor for AnchorValidator<'_> { + fn visit_named_node(&mut self, node: &NamedNode) { + let prev = self.in_named_node; + self.in_named_node = true; + + // Check for anchors in the named node's items + self.check_items(node.items()); + + // Anchors inside named node children are always valid + // (the node provides first/last/adjacent context) + walk_named_node(self, node); + + self.in_named_node = prev; + } + + fn visit_seq_expr(&mut self, seq: &SeqExpr) { + // Check for boundary anchors without context + self.check_items(seq.items()); + + walk_seq_expr(self, seq); + } +} + +impl AnchorValidator<'_> { + fn check_items(&mut self, items: impl Iterator) { + let items: Vec<_> = items.collect(); + let len = items.len(); + + for (i, item) in items.iter().enumerate() { + if let SeqItem::Anchor(anchor) = item { + let is_boundary = i == 0 || i == len - 1; + + if is_boundary && !self.in_named_node { + self.diag + .report( + self.source_id, + DiagnosticKind::AnchorWithoutContext, + anchor.text_range(), + ) + .emit(); + } + } + } + } +} diff --git a/crates/plotnik-lib/src/query/anchors_tests.rs b/crates/plotnik-lib/src/query/anchors_tests.rs new file mode 100644 index 00000000..9fb025b9 --- /dev/null +++ b/crates/plotnik-lib/src/query/anchors_tests.rs @@ -0,0 +1,171 @@ +use crate::Query; + +#[test] +fn interior_anchor_always_valid() { + let input = "Q = {(a) . (b)}"; + + let res = Query::expect_valid_ast(input); + + insta::assert_snapshot!(res, @r" + Root + Def Q + Seq + NamedNode a + . + NamedNode b + "); +} + +#[test] +fn anchor_inside_named_node_first() { + let input = "Q = (parent . (first))"; + + let res = Query::expect_valid_ast(input); + + insta::assert_snapshot!(res, @r" + Root + Def Q + NamedNode parent + . + NamedNode first + "); +} + +#[test] +fn anchor_inside_named_node_last() { + let input = "Q = (parent (last) .)"; + + let res = Query::expect_valid_ast(input); + + insta::assert_snapshot!(res, @r" + Root + Def Q + NamedNode parent + NamedNode last + . + "); +} + +#[test] +fn anchor_inside_named_node_both() { + let input = "Q = (parent . (first) (second) .)"; + + let res = Query::expect_valid_ast(input); + + insta::assert_snapshot!(res, @r" + Root + Def Q + NamedNode parent + . + NamedNode first + NamedNode second + . + "); +} + +#[test] +fn anchor_in_seq_inside_named_node() { + let input = "Q = (parent {. (first)})"; + + let res = Query::expect_valid_ast(input); + + insta::assert_snapshot!(res, @r" + Root + Def Q + NamedNode parent + Seq + . + NamedNode first + "); +} + +#[test] +fn boundary_anchor_at_seq_start_without_context() { + let input = "Q = {. (a)}"; + + let res = Query::expect_invalid(input); + + insta::assert_snapshot!(res, @r" + error: boundary anchor requires parent node context + | + 1 | Q = {. (a)} + | ^ + | + help: wrap in a named node: `(parent . (child))` + "); +} + +#[test] +fn boundary_anchor_at_seq_end_without_context() { + let input = "Q = {(a) .}"; + + let res = Query::expect_invalid(input); + + insta::assert_snapshot!(res, @r" + error: boundary anchor requires parent node context + | + 1 | Q = {(a) .} + | ^ + | + help: wrap in a named node: `(parent . (child))` + "); +} + +#[test] +fn multiple_boundary_anchors_without_context() { + let input = "Q = {. (a) .}"; + + let res = Query::expect_invalid(input); + + insta::assert_snapshot!(res, @r" + error: boundary anchor requires parent node context + | + 1 | Q = {. (a) .} + | ^ + | + help: wrap in a named node: `(parent . (child))` + + error: boundary anchor requires parent node context + | + 1 | Q = {. (a) .} + | ^ + | + help: wrap in a named node: `(parent . (child))` + "); +} + +#[test] +fn interior_anchor_with_alternation() { + let input = "Q = {(a) . [(b) (c)]}"; + + let res = Query::expect_valid_ast(input); + + insta::assert_snapshot!(res, @r" + Root + Def Q + Seq + NamedNode a + . + Alt + Branch + NamedNode b + Branch + NamedNode c + "); +} + +#[test] +fn nested_named_node_provides_context() { + let input = "Q = (outer (inner . (first)))"; + + let res = Query::expect_valid_ast(input); + + insta::assert_snapshot!(res, @r" + Root + Def Q + NamedNode outer + NamedNode inner + . + NamedNode first + "); +} diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 8c707ec7..2bb96c16 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -9,6 +9,7 @@ pub use source_map::{SourceId, SourceMap}; pub use symbol_table::SymbolTable; pub mod alt_kinds; +pub mod anchors; mod dependencies; pub mod emit; pub mod link; @@ -21,6 +22,8 @@ pub mod visitor; #[cfg(test)] mod alt_kinds_tests; #[cfg(test)] +mod anchors_tests; +#[cfg(test)] mod dependencies_tests; #[cfg(all(test, feature = "plotnik-langs"))] mod emit_tests; diff --git a/crates/plotnik-lib/src/query/query.rs b/crates/plotnik-lib/src/query/query.rs index 88a792f1..6d83e0c2 100644 --- a/crates/plotnik-lib/src/query/query.rs +++ b/crates/plotnik-lib/src/query/query.rs @@ -10,6 +10,7 @@ use plotnik_langs::Lang; use crate::Diagnostics; use crate::parser::{ParseResult, Parser, Root, SyntaxNode, lexer::lex}; use crate::query::alt_kinds::validate_alt_kinds; +use crate::query::anchors::validate_anchors; use crate::query::dependencies; use crate::query::link; use crate::query::source_map::{SourceId, SourceMap}; @@ -75,6 +76,7 @@ impl QueryBuilder { let res = parser.parse()?; validate_alt_kinds(source.id, &res.ast, &mut diag); + validate_anchors(source.id, &res.ast, &mut diag); total_fuel_consumed = total_fuel_consumed.saturating_add(res.fuel_consumed); ast.insert(source.id, res.ast); }