From 75718bc1242973e6cfbabf638b05b3f36acf3fe9 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 5 Jan 2026 20:03:37 -0300 Subject: [PATCH 1/2] fix: empty captured sequences produce empty struct instead of null --- .../src/analyze/type_check/infer.rs | 22 +++++++++++++-- crates/plotnik-lib/src/compile/capture.rs | 28 ++++++++++++------- crates/plotnik-lib/src/compile/expressions.rs | 9 ++++-- crates/plotnik-lib/src/compile/navigation.rs | 3 ++ crates/plotnik-lib/src/engine/engine_tests.rs | 20 +++++++++++++ crates/plotnik-lib/src/engine/materializer.rs | 19 +++++++------ ...s__regression_empty_captured_sequence.snap | 18 ++++++++++++ ...sion_optional_empty_captured_sequence.snap | 18 ++++++++++++ crates/plotnik-lib/src/parser/ast.rs | 11 ++++++++ crates/plotnik-lib/src/parser/mod.rs | 3 +- 10 files changed, 127 insertions(+), 24 deletions(-) create mode 100644 crates/plotnik-lib/src/engine/snapshots/plotnik_lib__engine__engine_tests__regression_empty_captured_sequence.snap create mode 100644 crates/plotnik-lib/src/engine/snapshots/plotnik_lib__engine__engine_tests__regression_optional_empty_captured_sequence.snap diff --git a/crates/plotnik-lib/src/analyze/type_check/infer.rs b/crates/plotnik-lib/src/analyze/type_check/infer.rs index 683424da..0b159d79 100644 --- a/crates/plotnik-lib/src/analyze/type_check/infer.rs +++ b/crates/plotnik-lib/src/analyze/type_check/infer.rs @@ -22,7 +22,7 @@ use crate::analyze::visitor::{Visitor, walk_alt_expr, walk_def, walk_named_node, use crate::diagnostics::{DiagnosticKind, Diagnostics}; use crate::parser::ast::{ AltExpr, AltKind, AnonymousNode, CapturedExpr, Def, Expr, FieldExpr, NamedNode, QuantifiedExpr, - Ref, Root, SeqExpr, + Ref, Root, SeqExpr, is_truly_empty_scope, }; use crate::parser::cst::SyntaxKind; use crate::query::source_map::SourceId; @@ -458,8 +458,24 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { ) -> TypeId { match &inner_info.flow { TypeFlow::Void => { - let base_type = self.get_recursive_ref_type(inner).unwrap_or(TYPE_NODE); - self.annotation_to_alias(annotation, base_type) + // Truly empty sequences/alternations produce empty struct. + // E.g., `{ } @x` has type `{ x: {} }`. + // Non-empty sequences with void flow (e.g., suppressed captures) + // still produce Node for the capture. + if is_truly_empty_scope(inner) { + let empty_struct = self.ctx.intern_struct(BTreeMap::new()); + match annotation { + Some(AnnotationKind::String) => TYPE_STRING, + Some(AnnotationKind::TypeName(name)) => { + self.ctx.set_type_name(empty_struct, name); + empty_struct + } + None => empty_struct, + } + } else { + let base_type = self.get_recursive_ref_type(inner).unwrap_or(TYPE_NODE); + self.annotation_to_alias(annotation, base_type) + } } TypeFlow::Scalar(type_id) => { // For array types with annotation, replace the element type diff --git a/crates/plotnik-lib/src/compile/capture.rs b/crates/plotnik-lib/src/compile/capture.rs index 6c0ec8d9..61d92d00 100644 --- a/crates/plotnik-lib/src/compile/capture.rs +++ b/crates/plotnik-lib/src/compile/capture.rs @@ -11,7 +11,7 @@ use crate::bytecode::ir::EffectIR; use crate::parser::ast::{self, Expr}; use super::Compiler; -use super::navigation::{inner_creates_scope, is_star_or_plus_quantifier}; +use super::navigation::{inner_creates_scope, is_star_or_plus_quantifier, is_truly_empty_scope}; /// Capture effects to attach to the innermost match instruction. /// @@ -42,21 +42,29 @@ impl Compiler<'_> { let is_array = is_star_or_plus_quantifier(inner); // Check if inner is a scope-creating expression (SeqExpr/AltExpr) that produces - // a structured type (Struct/Enum). Named nodes with bubble captures don't count - - // they still need Node because we're capturing the matched node, not the struct. + // a structured type (Struct/Enum) or truly empty struct. Named nodes with bubble + // captures don't count - they still need Node because we're capturing the matched + // node, not the struct. // // For FieldExpr, look through to the value. The parser treats `field: expr @cap` as // `(field: expr) @cap` so that quantifiers work on fields (e.g., `decorator: (x)*` // for repeating fields). This means captures wrap the FieldExpr, but the value // determines whether it produces a structured type. See `parse_expr_no_suffix`. let creates_structured_scope = inner.and_then(unwrap_field_value).is_some_and(|ei| { - inner_creates_scope(&ei) - && self - .type_ctx - .get_term_info(&ei) - .and_then(|info| info.flow.type_id()) - .and_then(|id| self.type_ctx.get_type(id)) - .is_some_and(|shape| matches!(shape, TypeShape::Struct(_) | TypeShape::Enum(_))) + // Truly empty scopes (like `{ }`) produce empty struct + if is_truly_empty_scope(&ei) { + return true; + } + if !inner_creates_scope(&ei) { + return false; + } + let Some(info) = self.type_ctx.get_term_info(&ei) else { + return false; + }; + info.flow + .type_id() + .and_then(|id| self.type_ctx.get_type(id)) + .is_some_and(|shape| matches!(shape, TypeShape::Struct(_) | TypeShape::Enum(_))) }); if !is_structured_ref && !creates_structured_scope && !is_array { diff --git a/crates/plotnik-lib/src/compile/expressions.rs b/crates/plotnik-lib/src/compile/expressions.rs index 113c0d14..a7f5b1d4 100644 --- a/crates/plotnik-lib/src/compile/expressions.rs +++ b/crates/plotnik-lib/src/compile/expressions.rs @@ -18,7 +18,8 @@ use crate::parser::ast::{self, Expr}; use super::Compiler; use super::capture::CaptureEffects; use super::navigation::{ - check_trailing_anchor, inner_creates_scope, is_skippable_quantifier, is_star_or_plus_quantifier, + check_trailing_anchor, inner_creates_scope, is_skippable_quantifier, + is_star_or_plus_quantifier, is_truly_empty_scope, }; impl Compiler<'_> { @@ -429,7 +430,11 @@ impl Compiler<'_> { }; // Struct scope: Obj → inner → EndObj+capture → exit - if inner_is_bubble { + // Also handle truly empty scopes (e.g., `{ } @x` produces empty struct) + let inner_is_truly_empty_scope = is_truly_empty_scope(&inner); + let needs_struct_scope = inner_is_bubble || inner_is_truly_empty_scope; + + if needs_struct_scope { return if inner_creates_scope { // Sequence/alternation: capture effects after EndObj (value is the struct) self.compile_struct_scope( diff --git a/crates/plotnik-lib/src/compile/navigation.rs b/crates/plotnik-lib/src/compile/navigation.rs index 8b6d4b24..30d2675c 100644 --- a/crates/plotnik-lib/src/compile/navigation.rs +++ b/crates/plotnik-lib/src/compile/navigation.rs @@ -6,6 +6,9 @@ use crate::bytecode::Nav; use crate::parser::ast::{Expr, SeqItem}; +// Re-export from parser for compile module consumers +pub use crate::parser::is_truly_empty_scope; + /// Check if an expression is anonymous (string literal or wildcard). pub fn expr_is_anonymous(expr: Option<&Expr>) -> bool { matches!(expr, Some(Expr::AnonymousNode(_))) diff --git a/crates/plotnik-lib/src/engine/engine_tests.rs b/crates/plotnik-lib/src/engine/engine_tests.rs index 432c3c20..3c76373b 100644 --- a/crates/plotnik-lib/src/engine/engine_tests.rs +++ b/crates/plotnik-lib/src/engine/engine_tests.rs @@ -547,3 +547,23 @@ fn wildcard_named_skips_anonymous() { fn wildcard_bare_matches_anonymous() { snap!("Q = (program (return_statement _ @x))", "return 42"); } + +/// Empty captured sequence should produce empty struct, not null or panic. +/// Regression test for: type inference treating `{ }` as Node instead of empty struct. +#[test] +fn regression_empty_captured_sequence() { + snap!( + "Q = (program (expression_statement (identifier) @id { } @empty))", + "x" + ); +} + +/// Optional empty captured sequence should produce empty struct when matched. +/// Regression test for: `{ }? @maybe` producing null instead of `{}`. +#[test] +fn regression_optional_empty_captured_sequence() { + snap!( + "Q = (program (expression_statement (identifier) @id { }? @maybe))", + "x" + ); +} diff --git a/crates/plotnik-lib/src/engine/materializer.rs b/crates/plotnik-lib/src/engine/materializer.rs index ad5c5e95..7031aef7 100644 --- a/crates/plotnik-lib/src/engine/materializer.rs +++ b/crates/plotnik-lib/src/engine/materializer.rs @@ -145,18 +145,21 @@ impl<'t> Materializer<'t> for ValueMaterializer<'_> { } RuntimeEffect::EndObj => { if let Some(Builder::Object(fields)) = stack.pop() { - // Preserve pending if this object is empty and pending has a value. - // This allows the preamble's Obj/EndObj to work correctly for - // entrypoints that return non-struct values (enums, scalars). - // For void-returning queries (no pending, empty object), return null. if !fields.is_empty() { + // Non-empty object: always produce the object value pending = Some(Value::Object(fields)); } else if pending.is_none() { - // Empty object with no pending = void result → null - // (This is the preamble's empty wrapper for void-returning queries) - pending = None; + // Empty object with no pending value: + // - If nested (stack.len() > 1): produce empty object {} + // This handles captured empty sequences like `{ } @x` + // Note: stack always has at least the result_builder, so we check > 1 + // - If at root (stack.len() <= 1): void result → null + if stack.len() > 1 { + pending = Some(Value::Object(vec![])); + } + // else: pending stays None (void result) } - // else: non-empty pending, keep it (passthrough for enums, etc.) + // else: pending has a value, keep it (passthrough for enums, suppressive, etc.) } } RuntimeEffect::Enum(idx) => { diff --git a/crates/plotnik-lib/src/engine/snapshots/plotnik_lib__engine__engine_tests__regression_empty_captured_sequence.snap b/crates/plotnik-lib/src/engine/snapshots/plotnik_lib__engine__engine_tests__regression_empty_captured_sequence.snap new file mode 100644 index 00000000..c37a3acb --- /dev/null +++ b/crates/plotnik-lib/src/engine/snapshots/plotnik_lib__engine__engine_tests__regression_empty_captured_sequence.snap @@ -0,0 +1,18 @@ +--- +source: crates/plotnik-lib/src/engine/engine_tests.rs +--- +Q = (program (expression_statement (identifier) @id { } @empty)) +--- +x +--- +{ + "id": { + "kind": "identifier", + "text": "x", + "span": [ + 0, + 1 + ] + }, + "empty": {} +} diff --git a/crates/plotnik-lib/src/engine/snapshots/plotnik_lib__engine__engine_tests__regression_optional_empty_captured_sequence.snap b/crates/plotnik-lib/src/engine/snapshots/plotnik_lib__engine__engine_tests__regression_optional_empty_captured_sequence.snap new file mode 100644 index 00000000..00d44c6a --- /dev/null +++ b/crates/plotnik-lib/src/engine/snapshots/plotnik_lib__engine__engine_tests__regression_optional_empty_captured_sequence.snap @@ -0,0 +1,18 @@ +--- +source: crates/plotnik-lib/src/engine/engine_tests.rs +--- +Q = (program (expression_statement (identifier) @id { }? @maybe)) +--- +x +--- +{ + "id": { + "kind": "identifier", + "text": "x", + "span": [ + 0, + 1 + ] + }, + "maybe": {} +} diff --git a/crates/plotnik-lib/src/parser/ast.rs b/crates/plotnik-lib/src/parser/ast.rs index 0dcb3e4c..4e66aa43 100644 --- a/crates/plotnik-lib/src/parser/ast.rs +++ b/crates/plotnik-lib/src/parser/ast.rs @@ -469,3 +469,14 @@ impl NegatedField { .find(|t| t.kind() == SyntaxKind::Id) } } + +/// Checks if expression is a truly empty scope (sequence/alternation with no children). +/// Used to distinguish `{ } @x` (empty struct) from `{(expr) @_} @x` (Node capture). +pub fn is_truly_empty_scope(inner: &Expr) -> bool { + match inner { + Expr::SeqExpr(seq) => seq.children().next().is_none(), + Expr::AltExpr(alt) => alt.branches().next().is_none(), + Expr::QuantifiedExpr(q) => q.inner().is_some_and(|i| is_truly_empty_scope(&i)), + _ => false, + } +} diff --git a/crates/plotnik-lib/src/parser/mod.rs b/crates/plotnik-lib/src/parser/mod.rs index 0d0f44fb..da31c266 100644 --- a/crates/plotnik-lib/src/parser/mod.rs +++ b/crates/plotnik-lib/src/parser/mod.rs @@ -42,7 +42,8 @@ pub use cst::{SyntaxKind, SyntaxNode, SyntaxToken}; pub use ast::{ AltExpr, AltKind, Anchor, AnonymousNode, Branch, CapturedExpr, Def, Expr, FieldExpr, NamedNode, - NegatedField, QuantifiedExpr, Ref, Root, SeqExpr, SeqItem, Type, token_src, + NegatedField, QuantifiedExpr, Ref, Root, SeqExpr, SeqItem, Type, is_truly_empty_scope, + token_src, }; pub use core::{ParseResult, Parser}; From 9a4835eada3a34dfb7d31aebbb2a99dbade6f008 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 5 Jan 2026 21:17:57 -0300 Subject: [PATCH 2/2] fix: use inferred types instead of syntax checks in capture compilation (#273) --- crates/plotnik-lib/src/compile/capture.rs | 73 ++++++++++++------- ...odegen_tests__sequences_in_quantifier.snap | 2 +- 2 files changed, 48 insertions(+), 27 deletions(-) diff --git a/crates/plotnik-lib/src/compile/capture.rs b/crates/plotnik-lib/src/compile/capture.rs index 61d92d00..507df877 100644 --- a/crates/plotnik-lib/src/compile/capture.rs +++ b/crates/plotnik-lib/src/compile/capture.rs @@ -92,17 +92,35 @@ impl Compiler<'_> { /// Check if a quantifier body needs Node effect before Push. /// - /// For scalar array elements (simple named nodes, not structs/enums/refs), - /// we need [Node, Push] to capture the matched node value. - /// For structured elements, EndObj/EndEnum/Call already provides the value. + /// For scalar array elements (Node/String types), we need [Node/Text, Push] + /// to capture the matched node value. + /// For structured elements (Struct/Enum), EndObj/EndEnum provides the value. + /// For refs returning structured types, Call provides the value. pub(super) fn quantifier_needs_node_for_push(&self, expr: &Expr) -> bool { - if let Expr::QuantifiedExpr(quant) = expr - && let Some(body) = quant.inner() - { - !inner_creates_scope(&body) && !self.is_ref_returning_structured(&body) - } else { - true + let Expr::QuantifiedExpr(quant) = expr else { + return true; + }; + let Some(inner) = quant.inner() else { + return true; + }; + + // Refs returning structured types don't need Node + if self.is_ref_returning_structured(&inner) { + return false; } + + // Check the actual inferred type, not syntax + let Some(info) = self.type_ctx.get_term_info(&inner) else { + return true; + }; + + // If type is Struct or Enum, EndObj/EndEnum produces the value + // Otherwise (Node, String, Void, etc.), we need Node effect + !info + .flow + .type_id() + .and_then(|id| self.type_ctx.get_type(id)) + .is_some_and(|shape| matches!(shape, TypeShape::Struct(_) | TypeShape::Enum(_))) } /// Check if expr is (or wraps) a ref returning a structured type. @@ -174,24 +192,27 @@ fn unwrap_field_value(expr: &Expr) -> Option { /// Check if inner needs struct wrapper for array iterations. /// -/// Returns true when inner is a scope-creating expression (sequence/alternation) -/// that produces an untagged struct (not an enum). Enums use Enum/EndEnum instead. +/// Returns true when the inner expression produces a Struct type (bubbling fields). +/// This includes: +/// - Sequences/alternations with captures: `{(a) @x (b) @y}*` +/// - Named nodes with bubble captures: `(node (child) @x)*` +/// +/// Enums use Enum/EndEnum instead (handled separately). pub fn check_needs_struct_wrapper(inner: &Expr, type_ctx: &TypeContext) -> bool { - let inner_info = type_ctx.get_term_info(inner); - let inner_creates_scope = inner_creates_scope(inner); - let inner_is_untagged_bubble = inner_info.as_ref().is_some_and(|info| { - if !info.flow.is_bubble() { - return false; - } - let Some(type_id) = info.flow.type_id() else { - return false; - }; - let Some(shape) = type_ctx.get_type(type_id) else { - return false; - }; - matches!(shape, TypeShape::Struct(_)) - }); - inner_is_untagged_bubble && inner_creates_scope + let Some(info) = type_ctx.get_term_info(inner) else { + return false; + }; + + // Must be a bubble (fields flow to parent scope) + if !info.flow.is_bubble() { + return false; + } + + // Check the actual type - if it's a Struct, we need Obj/EndObj wrapper + info.flow + .type_id() + .and_then(|id| type_ctx.get_type(id)) + .is_some_and(|shape| matches!(shape, TypeShape::Struct(_))) } /// Get row type ID for array element scoping. diff --git a/crates/plotnik-lib/src/emit/snapshots/plotnik_lib__emit__codegen_tests__sequences_in_quantifier.snap b/crates/plotnik-lib/src/emit/snapshots/plotnik_lib__emit__codegen_tests__sequences_in_quantifier.snap index bbe93f2c..46b818f7 100644 --- a/crates/plotnik-lib/src/emit/snapshots/plotnik_lib__emit__codegen_tests__sequences_in_quantifier.snap +++ b/crates/plotnik-lib/src/emit/snapshots/plotnik_lib__emit__codegen_tests__sequences_in_quantifier.snap @@ -42,7 +42,7 @@ Test: 10 ε 28, 20 12 ε [EndArr Set(M0)] 14 14 △ 19 - 15 ▷ (b) [Push] 17 + 15 ▷ (b) [Node Push] 17 17 ε 34, 12 19 ▶ 20 ε [EndArr Set(M0)] 19