From 8e56c4f1ed887e1c19d24154ff237baac4db5293 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Thu, 15 Jan 2026 13:59:23 -0300 Subject: [PATCH] fix: Reject multi-element sequences without internal captures --- .../src/analyze/type_check/infer.rs | 53 +++++++++++++++---- .../analyze/type_check/type_check_tests.rs | 33 ++++++++++++ .../src/diagnostics/message.rs | 8 +++ .../plotnik-compiler/src/emit/emit_tests.rs | 3 +- ...__emit_tests__sequences_in_quantifier.snap | 51 +++++++++++------- .../parser/tests/grammar/sequences_tests.rs | 10 +++- .../plotnik-compiler/src/query/query_tests.rs | 47 ++++++++++++++++ 7 files changed, 172 insertions(+), 33 deletions(-) diff --git a/crates/plotnik-compiler/src/analyze/type_check/infer.rs b/crates/plotnik-compiler/src/analyze/type_check/infer.rs index 3abd47da..0e3c11ce 100644 --- a/crates/plotnik-compiler/src/analyze/type_check/infer.rs +++ b/crates/plotnik-compiler/src/analyze/type_check/infer.rs @@ -581,9 +581,9 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { let flow = match quantifier { QuantifierKind::Optional => self.make_flow_optional(inner_info.flow), QuantifierKind::ZeroOrMore | QuantifierKind::OneOrMore => { - if !is_row_capture { - self.check_strict_dimensionality(quant, &inner_info); - } + // Always check multi-element sequences (row capture doesn't help) + // Only skip internal capture check when is_row_capture + self.check_strict_dimensionality(quant, &inner_info, is_row_capture); self.make_flow_array(inner_info.flow, &inner, quantifier.is_non_empty()) } }; @@ -681,8 +681,46 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { } /// Check strict dimensionality rule for * and + quantifiers. - /// Captures inside a quantifier are forbidden unless marked as a row capture. - fn check_strict_dimensionality(&mut self, quant: &QuantifiedExpr, inner_info: &TermInfo) { + /// + /// Two checks: + /// 1. Multi-element patterns (Arity::Many) without captures can't be scalar arrays + /// (applies regardless of is_row_capture - row capture doesn't help here) + /// 2. Internal captures require a row capture on the quantifier + /// (skipped when is_row_capture=true) + fn check_strict_dimensionality( + &mut self, + quant: &QuantifiedExpr, + inner_info: &TermInfo, + is_row_capture: bool, + ) { + let op = quant + .operator() + .map(|t| t.text().to_string()) + .unwrap_or_else(|| "*".to_string()); + + // Check 1: Multi-element patterns without captures can't be scalar arrays + // This check applies even with row capture - you can't meaningfully capture + // multiple nodes per iteration as a scalar + if inner_info.arity == Arity::Many && inner_info.flow.is_void() { + self.diag + .report( + self.source_id, + DiagnosticKind::MultiElementScalarCapture, + quant.text_range(), + ) + .message(format!( + "sequence with `{}` matches multiple nodes but has no internal captures", + op + )) + .emit(); + return; + } + + // Check 2: Internal captures require row capture (skip if already a row capture) + if is_row_capture { + return; + } + let TypeFlow::Bubble(type_id) = &inner_info.flow else { return; }; @@ -695,11 +733,6 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { return; } - let op = quant - .operator() - .map(|t| t.text().to_string()) - .unwrap_or_else(|| "*".to_string()); - let capture_names: Vec<_> = fields .keys() .map(|s| format!("`@{}`", self.interner.resolve(*s))) diff --git a/crates/plotnik-compiler/src/analyze/type_check/type_check_tests.rs b/crates/plotnik-compiler/src/analyze/type_check/type_check_tests.rs index 8f1aa063..42c3e8fc 100644 --- a/crates/plotnik-compiler/src/analyze/type_check/type_check_tests.rs +++ b/crates/plotnik-compiler/src/analyze/type_check/type_check_tests.rs @@ -664,6 +664,39 @@ fn error_named_node_with_capture_quantified() { "); } +#[test] +fn error_multi_element_sequence_no_captures() { + let input = "Bad = {(identifier) (number)}* @items"; + + let res = Query::expect_invalid(input); + + insta::assert_snapshot!(res, @r" + error: sequence with `*` matches multiple nodes but has no internal captures + | + 1 | Bad = {(identifier) (number)}* @items + | ^^^^^^^^^^^^^^^^^^^^^^^^ + | + help: add internal captures: `{(a) @a (b) @b}* @items` + "); +} + +#[test] +fn error_multi_element_alternation_branch() { + // Alternation where one branch is multi-element + let input = "Bad = [(identifier) {(number) (string)}]* @items"; + + let res = Query::expect_invalid(input); + + insta::assert_snapshot!(res, @r" + error: sequence with `*` matches multiple nodes but has no internal captures + | + 1 | Bad = [(identifier) {(number) (string)}]* @items + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + help: add internal captures: `{(a) @a (b) @b}* @items` + "); +} + #[test] fn recursive_type_with_alternation() { let input = indoc! {r#" diff --git a/crates/plotnik-compiler/src/diagnostics/message.rs b/crates/plotnik-compiler/src/diagnostics/message.rs index 0796a4db..94289447 100644 --- a/crates/plotnik-compiler/src/diagnostics/message.rs +++ b/crates/plotnik-compiler/src/diagnostics/message.rs @@ -77,6 +77,7 @@ pub enum DiagnosticKind { MultiCaptureQuantifierNoName, UnusedBranchLabels, StrictDimensionalityViolation, + MultiElementScalarCapture, UncapturedOutputWithCaptures, AmbiguousUncapturedOutputs, DuplicateCaptureInScope, @@ -181,6 +182,9 @@ impl DiagnosticKind { Self::AmbiguousUncapturedOutputs => { Some("capture each expression explicitly: `(X) @x (Y) @y`") } + Self::MultiElementScalarCapture => { + Some("add internal captures: `{(a) @a (b) @b}* @items`") + } _ => None, } } @@ -253,6 +257,9 @@ impl DiagnosticKind { Self::StrictDimensionalityViolation => { "quantifier with captures requires a struct capture" } + Self::MultiElementScalarCapture => { + "cannot capture multi-element pattern as scalar array" + } Self::UncapturedOutputWithCaptures => { "output-producing expression requires capture when siblings have captures" } @@ -301,6 +308,7 @@ impl DiagnosticKind { // Type inference errors with context Self::StrictDimensionalityViolation => "{}".to_string(), + Self::MultiElementScalarCapture => "{}".to_string(), Self::DuplicateCaptureInScope => { "capture `@{}` already defined in this scope".to_string() } diff --git a/crates/plotnik-compiler/src/emit/emit_tests.rs b/crates/plotnik-compiler/src/emit/emit_tests.rs index dc172e14..8eee93c7 100644 --- a/crates/plotnik-compiler/src/emit/emit_tests.rs +++ b/crates/plotnik-compiler/src/emit/emit_tests.rs @@ -279,8 +279,9 @@ fn sequences_nested() { #[test] fn sequences_in_quantifier() { + // Sequence with internal captures - valid for struct array snap!(indoc! {r#" - Test = (array {(identifier) (number)}* @items) + Test = (array {(identifier) @id (number) @num}* @items) "#}); } diff --git a/crates/plotnik-compiler/src/emit/snapshots/plotnik_compiler__emit__emit_tests__sequences_in_quantifier.snap b/crates/plotnik-compiler/src/emit/snapshots/plotnik_compiler__emit__emit_tests__sequences_in_quantifier.snap index eebd4627..f0bf8312 100644 --- a/crates/plotnik-compiler/src/emit/snapshots/plotnik_compiler__emit__emit_tests__sequences_in_quantifier.snap +++ b/crates/plotnik-compiler/src/emit/snapshots/plotnik_compiler__emit__emit_tests__sequences_in_quantifier.snap @@ -1,29 +1,38 @@ --- source: crates/plotnik-compiler/src/emit/emit_tests.rs --- -Test = (array {(identifier) (number)}* @items) +Test = (array {(identifier) @id (number) @num}* @items) --- [strings] S0 "Beauty will save the world" -S1 "items" -S2 "Test" -S3 "array" -S4 "identifier" -S5 "number" +S1 "id" +S2 "num" +S3 "items" +S4 "Test" +S5 "array" +S6 "identifier" +S7 "number" [type_defs] T0 = -T1 = ArrayStar(T0) ; * -T2 = Struct M0:1 ; { items } +T1 = Struct M0:2 ; { id, num } +T2 = ArrayStar(T1) ; T1* +T3 = Struct M2:1 ; { items } +T4 = Struct M3:1 ; { id } +T5 = Struct M4:1 ; { num } [type_members] -M0: S1 → T1 ; items: T1 +M0: S1 → T0 ; id: +M1: S2 → T0 ; num: +M2: S3 → T2 ; items: T2 +M3: S1 → T0 ; id: +M4: S2 → T0 ; num: [type_names] -N0: S2 → T2 ; Test +N0: S4 → T3 ; Test [entrypoints] -Test = 06 :: T2 +Test = 06 :: T3 [transitions] _ObjWrap: @@ -35,13 +44,15 @@ _ObjWrap: Test: 06 ! (array) 08 07 ... - 08 ε [Arr] 18, 11 + 08 ε [Arr] 22, 12 10 ▶ - 11 ε [EndArr Set(M0)] 10 - 13 ▷ (number) [Node Push] 22, 24 - 15 ! (identifier) 13 - 16 ▷ _ 15, 16, 24 - 18 ▽ _ 15, 16, 24 - 20 ▷ _ 15, 20, 24 - 22 ▷ _ 15, 20, 24 - 24 △ [EndArr Set(M0)] _ 10 + 11 ε 28 + 12 ε [EndArr Set(M2)] 10 + 14 ε [EndObj Push] 26, 11 + 16 ▷ (number) [Node Set(M1)] 14 + 18 ! [Obj] (identifier) [Node Set(M0)] 16 + 20 ▷ _ 18, 20, 28 + 22 ▽ _ 18, 20, 28 + 24 ▷ _ 18, 24, 28 + 26 ▷ _ 18, 24, 28 + 28 △ [EndArr Set(M2)] _ 10 diff --git a/crates/plotnik-compiler/src/parser/tests/grammar/sequences_tests.rs b/crates/plotnik-compiler/src/parser/tests/grammar/sequences_tests.rs index 329bbb57..4ee35727 100644 --- a/crates/plotnik-compiler/src/parser/tests/grammar/sequences_tests.rs +++ b/crates/plotnik-compiler/src/parser/tests/grammar/sequences_tests.rs @@ -177,11 +177,14 @@ fn sequence_with_captures() { #[test] fn sequence_with_quantifier() { + // Note: This tests parser behavior only. The pattern `{(a) (b)}+` is + // semantically invalid (multi-element sequence without captures), but + // we're only testing that the parser produces the correct CST. let input = indoc! {r#" Q = {(a) (b)}+ "#}; - let res = Query::expect_valid_cst(input); + let res = Query::parse_cst(input); insta::assert_snapshot!(res, @r#" Root @@ -306,11 +309,14 @@ fn sequence_with_alternation() { #[test] fn sequence_comma_separated_expression() { + // Note: This tests parser behavior only. The inner pattern `{"," (number)}*` + // is semantically invalid (multi-element sequence without captures), but + // we're only testing that the parser produces the correct CST. let input = indoc! {r#" Q = {(number) {"," (number)}*} "#}; - let res = Query::expect_valid_cst(input); + let res = Query::parse_cst(input); insta::assert_snapshot!(res, @r#" Root diff --git a/crates/plotnik-compiler/src/query/query_tests.rs b/crates/plotnik-compiler/src/query/query_tests.rs index 13f4d44a..ca667075 100644 --- a/crates/plotnik-compiler/src/query/query_tests.rs +++ b/crates/plotnik-compiler/src/query/query_tests.rs @@ -36,6 +36,47 @@ impl QueryAnalyzed { query } + /// Parse and validate syntax only (no semantic analysis). + /// Use this for pure parser/grammar tests. + #[track_caller] + fn parse_syntax_only(src: &str) -> Self { + use crate::diagnostics::DiagnosticKind::*; + let source_map = SourceMap::one_liner(src); + let query = QueryBuilder::new(source_map).parse().unwrap().analyze(); + // Only check for parse errors (not semantic errors) + let diag = query.diagnostics(); + let has_parse_error = diag.raw().iter().any(|d| { + matches!( + d.kind, + UnclosedTree + | UnclosedSequence + | UnclosedAlternation + | UnclosedRegex + | ExpectedExpression + | ExpectedTypeName + | ExpectedCaptureName + | ExpectedFieldName + | ExpectedSubtype + | ExpectedPredicateValue + | EmptyTree + | EmptyAnonymousNode + | EmptySequence + | EmptyAlternation + | BareIdentifier + | InvalidSeparator + | UnexpectedToken + ) + }); + + if has_parse_error { + panic!( + "Expected valid syntax, got parse error:\n{}", + query.dump_diagnostics() + ); + } + query + } + #[track_caller] pub fn expect(src: &str) -> Self { let source_map = SourceMap::one_liner(src); @@ -52,6 +93,12 @@ impl QueryAnalyzed { Self::parse_and_validate(src).dump_cst() } + /// Parse-only CST dump (for pure parser tests, no semantic validation). + #[track_caller] + pub fn parse_cst(src: &str) -> String { + Self::parse_syntax_only(src).dump_cst() + } + #[track_caller] pub fn expect_valid_cst_full(src: &str) -> String { Self::parse_and_validate(src).dump_cst_full()