From 05cbb53e9a4264a8fb5da6acde8919adb809d9d0 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 6 Dec 2025 15:10:20 -0300 Subject: [PATCH 1/5] feat: Non-field children validation --- crates/plotnik-lib/src/diagnostics/message.rs | 3 + crates/plotnik-lib/src/query/link.rs | 97 +++++++++++ crates/plotnik-lib/src/query/link_tests.rs | 150 +++++++++++++++++- 3 files changed, 244 insertions(+), 6 deletions(-) diff --git a/crates/plotnik-lib/src/diagnostics/message.rs b/crates/plotnik-lib/src/diagnostics/message.rs index e48a4b55..dfad769a 100644 --- a/crates/plotnik-lib/src/diagnostics/message.rs +++ b/crates/plotnik-lib/src/diagnostics/message.rs @@ -65,6 +65,7 @@ pub enum DiagnosticKind { UnknownField, FieldNotOnNodeType, InvalidFieldChildType, + InvalidChildType, // Often consequences of earlier errors UnnamedDefNotLast, @@ -168,6 +169,7 @@ impl DiagnosticKind { Self::UnknownField => "unknown field", Self::FieldNotOnNodeType => "field not valid on this node type", Self::InvalidFieldChildType => "node type not valid for this field", + Self::InvalidChildType => "node type not valid as child", // Structural Self::UnnamedDefNotLast => "only the last definition can be unnamed", @@ -194,6 +196,7 @@ impl DiagnosticKind { Self::UnknownField => "`{}` is not a valid field".to_string(), Self::FieldNotOnNodeType => "field `{}` is not valid on this node type".to_string(), Self::InvalidFieldChildType => "node type `{}` is not valid for this field".to_string(), + Self::InvalidChildType => "`{}` cannot be a child of this node".to_string(), // Recursion with cycle path Self::RecursionNoEscape => "infinite recursion: {}".to_string(), diff --git a/crates/plotnik-lib/src/query/link.rs b/crates/plotnik-lib/src/query/link.rs index 69bd71b7..de59aa55 100644 --- a/crates/plotnik-lib/src/query/link.rs +++ b/crates/plotnik-lib/src/query/link.rs @@ -50,6 +50,34 @@ fn find_similar<'a>(name: &str, candidates: &[&'a str], max_distance: usize) -> .map(|(c, _)| c) } +/// Check if `child` is a subtype of `supertype`, recursively handling nested supertypes. +fn is_subtype_of(lang: &Lang, child: NodeTypeId, supertype: NodeTypeId) -> bool { + let subtypes = lang.subtypes(supertype); + for &subtype in subtypes { + if subtype == child { + return true; + } + if lang.is_supertype(subtype) && is_subtype_of(lang, child, subtype) { + return true; + } + } + false +} + +/// Check if `child` is a valid non-field child of `parent`, expanding supertypes. +fn is_valid_child_expanded(lang: &Lang, parent: NodeTypeId, child: NodeTypeId) -> bool { + let valid_types = lang.valid_child_types(parent); + for &allowed in valid_types { + if allowed == child { + return true; + } + if lang.is_supertype(allowed) && is_subtype_of(lang, child, allowed) { + return true; + } + } + false +} + /// Format a list of items for display, truncating if too long. fn format_list(items: &[&str], max_items: usize) -> String { if items.is_empty() { @@ -286,6 +314,7 @@ impl<'a> Query<'a> { Expr::NamedNode(node) => { let current_type_id = self.get_node_type_id(node); for child in node.children() { + self.validate_non_field_child(&child, current_type_id, lang); self.validate_expr_structure(&child, current_type_id, lang); } for child in node.as_cst().children() { @@ -302,11 +331,13 @@ impl<'a> Query<'a> { Expr::AltExpr(alt) => { for branch in alt.branches() { let Some(body) = branch.body() else { continue }; + self.validate_non_field_child(&body, parent_type_id, lang); self.validate_expr_structure(&body, parent_type_id, lang); } } Expr::SeqExpr(seq) => { for child in seq.children() { + self.validate_non_field_child(&child, parent_type_id, lang); self.validate_expr_structure(&child, parent_type_id, lang); } } @@ -376,6 +407,72 @@ impl<'a> Query<'a> { builder.emit(); } + fn validate_non_field_child( + &mut self, + expr: &Expr, + parent_type_id: Option, + lang: &Lang, + ) { + // Field expressions are validated separately by validate_field + if matches!(expr, Expr::FieldExpr(_)) { + return; + } + + let Some(parent_id) = parent_type_id else { + return; + }; + + let Some(child_id) = self.get_expr_type_id(expr) else { + return; + }; + + // Check if parent allows any non-field children + let valid_types = lang.valid_child_types(parent_id); + if valid_types.is_empty() { + // Parent has no non-field children defined - all children must be via fields + let child_name = self.get_expr_type_name(expr).unwrap_or("(unknown)"); + let parent_name = lang.node_type_name(parent_id).unwrap_or("(unknown)"); + self.link_diagnostics + .report(DiagnosticKind::InvalidChildType, expr.text_range()) + .message(child_name) + .hint(format!( + "`{}` only accepts children via fields", + parent_name + )) + .emit(); + return; + } + + if is_valid_child_expanded(lang, parent_id, child_id) { + return; + } + + let child_name = self.get_expr_type_name(expr).unwrap_or("(unknown)"); + let parent_name = lang.node_type_name(parent_id).unwrap_or("(unknown)"); + + // Collect valid type names, expanding supertypes for display + let mut valid_names: Vec<&str> = Vec::new(); + for &type_id in valid_types { + if let Some(name) = lang.node_type_name(type_id) { + valid_names.push(name); + } + } + + let mut builder = self + .link_diagnostics + .report(DiagnosticKind::InvalidChildType, expr.text_range()) + .message(child_name); + + if !valid_names.is_empty() { + builder = builder.hint(format!( + "valid children for `{}`: {}", + parent_name, + format_list(&valid_names, 5) + )); + } + builder.emit(); + } + fn validate_negated_field( &mut self, neg: &ast::NegatedField, diff --git a/crates/plotnik-lib/src/query/link_tests.rs b/crates/plotnik-lib/src/query/link_tests.rs index 158cf3e5..dccf5544 100644 --- a/crates/plotnik-lib/src/query/link_tests.rs +++ b/crates/plotnik-lib/src/query/link_tests.rs @@ -391,14 +391,21 @@ fn sequence_with_link_errors() { | ^^^^^^^^^ | help: did you mean `identifier`? + + error: `statement_block` cannot be a child of this node + | + 3 | (statement_block)} @body) @fn + | ^^^^^^^^^^^^^^^^^ + | + help: `function_declaration` only accepts children via fields "); } #[test] fn quantified_expr_validation() { let input = indoc! {r#" - (function_declaration - (identifier)+ @names) @fn + (statement_block + (function_declaration)+ @fns) @block "#}; let mut query = Query::try_from(input).unwrap(); @@ -408,11 +415,11 @@ fn quantified_expr_validation() { insta::assert_snapshot!(query.dump_ast(), @r" Root Def - CapturedExpr @fn - NamedNode function_declaration - CapturedExpr @names + CapturedExpr @block + NamedNode statement_block + CapturedExpr @fns QuantifiedExpr + - NamedNode identifier + NamedNode function_declaration "); } @@ -475,3 +482,134 @@ fn field_on_node_without_fields() { help: `identifier` has no fields "); } + +#[test] +fn invalid_child_type_no_children_allowed() { + let input = indoc! {r#" + (function_declaration + (class_declaration)) @fn + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: `class_declaration` cannot be a child of this node + | + 2 | (class_declaration)) @fn + | ^^^^^^^^^^^^^^^^^^^ + | + help: `function_declaration` only accepts children via fields + "); +} + +#[test] +fn invalid_child_type_wrong_type() { + let input = indoc! {r#" + (statement_block + (identifier)) @block + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: `identifier` cannot be a child of this node + | + 2 | (identifier)) @block + | ^^^^^^^^^^^^ + | + help: valid children for `statement_block`: `statement` + "); +} + +#[test] +fn valid_child_via_supertype() { + let input = indoc! {r#" + (statement_block + (function_declaration)) @block + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(query.is_valid()); +} + +#[test] +fn valid_child_via_nested_supertype() { + let input = indoc! {r#" + (program + (function_declaration)) @prog + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(query.is_valid()); +} + +#[test] +fn invalid_anonymous_child() { + let input = indoc! {r#" + (statement_block + "function") @block + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r#" + error: `function` cannot be a child of this node + | + 2 | "function") @block + | ^^^^^^^^^^ + | + help: valid children for `statement_block`: `statement` + "#); +} + +#[test] +fn invalid_child_in_alternation() { + let input = indoc! {r#" + (statement_block + [(function_declaration) (identifier)]) @block + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: `identifier` cannot be a child of this node + | + 2 | [(function_declaration) (identifier)]) @block + | ^^^^^^^^^^^^ + | + help: valid children for `statement_block`: `statement` + "); +} + +#[test] +fn invalid_child_in_sequence() { + let input = indoc! {r#" + (statement_block + {(function_declaration) (identifier)}) @block + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: `identifier` cannot be a child of this node + | + 2 | {(function_declaration) (identifier)}) @block + | ^^^^^^^^^^^^ + | + help: valid children for `statement_block`: `statement` + "); +} From 4948b107d459e272545c86db4fb768428bf2bd09 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 6 Dec 2025 15:13:57 -0300 Subject: [PATCH 2/5] Improve diagnostic range reporting for type errors --- crates/plotnik-lib/src/query/link.rs | 29 +++++++++++++++++++--- crates/plotnik-lib/src/query/link_tests.rs | 14 +++++------ 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/crates/plotnik-lib/src/query/link.rs b/crates/plotnik-lib/src/query/link.rs index de59aa55..b2459429 100644 --- a/crates/plotnik-lib/src/query/link.rs +++ b/crates/plotnik-lib/src/query/link.rs @@ -392,9 +392,10 @@ impl<'a> Query<'a> { .filter_map(|&id| lang.node_type_name(id)) .collect(); + let range = get_expr_type_range(&value).unwrap_or(value.text_range()); let mut builder = self .link_diagnostics - .report(DiagnosticKind::InvalidFieldChildType, value.text_range()) + .report(DiagnosticKind::InvalidFieldChildType, range) .message(child_name); if !valid_names.is_empty() { @@ -432,8 +433,9 @@ impl<'a> Query<'a> { // Parent has no non-field children defined - all children must be via fields let child_name = self.get_expr_type_name(expr).unwrap_or("(unknown)"); let parent_name = lang.node_type_name(parent_id).unwrap_or("(unknown)"); + let range = get_expr_type_range(expr).unwrap_or(expr.text_range()); self.link_diagnostics - .report(DiagnosticKind::InvalidChildType, expr.text_range()) + .report(DiagnosticKind::InvalidChildType, range) .message(child_name) .hint(format!( "`{}` only accepts children via fields", @@ -458,9 +460,10 @@ impl<'a> Query<'a> { } } + let range = get_expr_type_range(expr).unwrap_or(expr.text_range()); let mut builder = self .link_diagnostics - .report(DiagnosticKind::InvalidChildType, expr.text_range()) + .report(DiagnosticKind::InvalidChildType, range) .message(child_name); if !valid_names.is_empty() { @@ -583,6 +586,26 @@ impl<'a> Query<'a> { } } +fn get_expr_type_range(expr: &Expr) -> Option { + match expr { + Expr::NamedNode(node) => { + if node.is_any() { + return None; + } + Some(node.node_type()?.text_range()) + } + Expr::AnonymousNode(anon) => { + if anon.is_any() { + return None; + } + Some(anon.value()?.text_range()) + } + Expr::CapturedExpr(cap) => get_expr_type_range(&cap.inner()?), + Expr::QuantifiedExpr(q) => get_expr_type_range(&q.inner()?), + _ => None, + } +} + fn text_range_to_usize(range: rowan::TextRange) -> std::ops::Range { let start: usize = range.start().into(); let end: usize = range.end().into(); diff --git a/crates/plotnik-lib/src/query/link_tests.rs b/crates/plotnik-lib/src/query/link_tests.rs index dccf5544..a401598f 100644 --- a/crates/plotnik-lib/src/query/link_tests.rs +++ b/crates/plotnik-lib/src/query/link_tests.rs @@ -338,7 +338,7 @@ fn invalid_child_type_for_field() { error: node type `statement_block` is not valid for this field | 2 | name: (statement_block) @name) @fn - | ^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^ | help: valid types for `name`: `identifier` "); @@ -395,7 +395,7 @@ fn sequence_with_link_errors() { error: `statement_block` cannot be a child of this node | 3 | (statement_block)} @body) @fn - | ^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^ | help: `function_declaration` only accepts children via fields "); @@ -498,7 +498,7 @@ fn invalid_child_type_no_children_allowed() { error: `class_declaration` cannot be a child of this node | 2 | (class_declaration)) @fn - | ^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^^ | help: `function_declaration` only accepts children via fields "); @@ -519,7 +519,7 @@ fn invalid_child_type_wrong_type() { error: `identifier` cannot be a child of this node | 2 | (identifier)) @block - | ^^^^^^^^^^^^ + | ^^^^^^^^^^ | help: valid children for `statement_block`: `statement` "); @@ -566,7 +566,7 @@ fn invalid_anonymous_child() { error: `function` cannot be a child of this node | 2 | "function") @block - | ^^^^^^^^^^ + | ^^^^^^^^ | help: valid children for `statement_block`: `statement` "#); @@ -587,7 +587,7 @@ fn invalid_child_in_alternation() { error: `identifier` cannot be a child of this node | 2 | [(function_declaration) (identifier)]) @block - | ^^^^^^^^^^^^ + | ^^^^^^^^^^ | help: valid children for `statement_block`: `statement` "); @@ -608,7 +608,7 @@ fn invalid_child_in_sequence() { error: `identifier` cannot be a child of this node | 2 | {(function_declaration) (identifier)}) @block - | ^^^^^^^^^^^^ + | ^^^^^^^^^^ | help: valid children for `statement_block`: `statement` "); From d9b602f70797d657f92f7d2c0bbe63d3d34becd7 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 6 Dec 2025 15:25:27 -0300 Subject: [PATCH 3/5] Refactor query validation to improve type checking and error reporting --- crates/plotnik-lib/src/query/link.rs | 477 +++++++++++++-------- crates/plotnik-lib/src/query/link_tests.rs | 235 +++++++++- 2 files changed, 527 insertions(+), 185 deletions(-) diff --git a/crates/plotnik-lib/src/query/link.rs b/crates/plotnik-lib/src/query/link.rs index b2459429..fc7b3bda 100644 --- a/crates/plotnik-lib/src/query/link.rs +++ b/crates/plotnik-lib/src/query/link.rs @@ -5,7 +5,8 @@ //! 2. Collect and resolve all field names (FieldExpr, NegatedField) //! 3. Validate structural constraints (field on node type, child type for field) -use plotnik_langs::{Lang, NodeTypeId}; +use plotnik_langs::{Lang, NodeFieldId, NodeTypeId}; +use rowan::TextRange; use crate::diagnostics::DiagnosticKind; use crate::parser::ast::{self, Expr, NamedNode}; @@ -78,6 +79,25 @@ fn is_valid_child_expanded(lang: &Lang, parent: NodeTypeId, child: NodeTypeId) - false } +/// Check if `child` is a valid field value type, expanding supertypes. +fn is_valid_field_type_expanded( + lang: &Lang, + parent: NodeTypeId, + field: NodeFieldId, + child: NodeTypeId, +) -> bool { + if lang.is_valid_field_type(parent, field, child) { + return true; + } + let valid_types = lang.valid_field_types(parent, field); + for &allowed in valid_types { + if lang.is_supertype(allowed) && is_subtype_of(lang, child, allowed) { + return true; + } + } + false +} + /// Format a list of items for display, truncating if too long. fn format_list(items: &[&str], max_items: usize) -> String { if items.is_empty() { @@ -102,6 +122,26 @@ fn format_list(items: &[&str], max_items: usize) -> String { } } +/// Context for validating child types. +#[derive(Clone)] +struct ValidationContext<'a> { + /// The parent node type being validated against. + parent_id: NodeTypeId, + /// The parent node's name for error messages. + parent_name: &'a str, + /// The parent node type token range for related_to. + parent_range: TextRange, + /// If validating a field value, the field info. + field: Option>, +} + +#[derive(Clone)] +struct FieldContext<'a> { + name: &'a str, + id: NodeFieldId, + range: TextRange, +} + impl<'a> Query<'a> { /// Link query against a language grammar. /// @@ -307,56 +347,107 @@ impl<'a> Query<'a> { fn validate_expr_structure( &mut self, expr: &Expr, - parent_type_id: Option, + ctx: Option>, lang: &Lang, ) { match expr { Expr::NamedNode(node) => { - let current_type_id = self.get_node_type_id(node); + // Validate this node against the context (if any) + if let Some(ref ctx) = ctx { + self.validate_terminal_type(expr, ctx, lang); + } + + // Set up context for children + let child_ctx = self.make_node_context(node, lang); + for child in node.children() { - self.validate_non_field_child(&child, current_type_id, lang); - self.validate_expr_structure(&child, current_type_id, lang); + match &child { + Expr::FieldExpr(f) => { + // Fields get special handling + self.validate_field_expr(f, child_ctx.as_ref(), lang); + } + _ => { + // Non-field children: validate as non-field children + if let Some(ref ctx) = child_ctx { + self.validate_non_field_children(&child, ctx, lang); + } + self.validate_expr_structure(&child, child_ctx.clone(), lang); + } + } } - for child in node.as_cst().children() { - if let Some(neg) = ast::NegatedField::cast(child) { - self.validate_negated_field(&neg, current_type_id, lang); + + // Handle negated fields + if let Some(ref ctx) = child_ctx { + for child in node.as_cst().children() { + if let Some(neg) = ast::NegatedField::cast(child) { + self.validate_negated_field(&neg, ctx, lang); + } } } } + Expr::AnonymousNode(_) => { + // Validate this anonymous node against the context (if any) + if let Some(ref ctx) = ctx { + self.validate_terminal_type(expr, ctx, lang); + } + } Expr::FieldExpr(f) => { - self.validate_field(f, parent_type_id, lang); - let Some(value) = f.value() else { return }; - self.validate_expr_structure(&value, parent_type_id, lang); + // Should be handled by parent NamedNode, but handle gracefully + self.validate_field_expr(f, ctx.as_ref(), lang); } Expr::AltExpr(alt) => { for branch in alt.branches() { let Some(body) = branch.body() else { continue }; - self.validate_non_field_child(&body, parent_type_id, lang); - self.validate_expr_structure(&body, parent_type_id, lang); + self.validate_expr_structure(&body, ctx.clone(), lang); } } Expr::SeqExpr(seq) => { for child in seq.children() { - self.validate_non_field_child(&child, parent_type_id, lang); - self.validate_expr_structure(&child, parent_type_id, lang); + self.validate_expr_structure(&child, ctx.clone(), lang); } } Expr::CapturedExpr(cap) => { let Some(inner) = cap.inner() else { return }; - self.validate_expr_structure(&inner, parent_type_id, lang); + self.validate_expr_structure(&inner, ctx, lang); } Expr::QuantifiedExpr(q) => { let Some(inner) = q.inner() else { return }; - self.validate_expr_structure(&inner, parent_type_id, lang); + self.validate_expr_structure(&inner, ctx, lang); + } + Expr::Ref(_) => { + // References are validated elsewhere (symbol_table pass) } - Expr::AnonymousNode(_) | Expr::Ref(_) => {} } } - fn validate_field( + /// Create validation context for a named node's children. + fn make_node_context(&self, node: &NamedNode, lang: &Lang) -> Option> { + if node.is_any() { + return None; + } + let type_token = node.node_type()?; + if matches!( + type_token.kind(), + SyntaxKind::KwError | SyntaxKind::KwMissing + ) { + return None; + } + let type_name = type_token.text(); + let parent_id = self.node_type_ids.get(type_name).copied().flatten()?; + let parent_name = lang.node_type_name(parent_id)?; + Some(ValidationContext { + parent_id, + parent_name, + parent_range: type_token.text_range(), + field: None, + }) + } + + /// Validate a field expression. + fn validate_field_expr( &mut self, field: &ast::FieldExpr, - parent_type_id: Option, + ctx: Option<&ValidationContext<'a>>, lang: &Lang, ) { let Some(name_token) = field.name() else { @@ -368,118 +459,217 @@ impl<'a> Query<'a> { return; }; - let Some(parent_id) = parent_type_id else { + let Some(ctx) = ctx else { return; }; - if !lang.has_field(parent_id, field_id) { - self.emit_field_not_on_node(name_token.text_range(), field_name, parent_id, lang); + + // Check field exists on parent + if !lang.has_field(ctx.parent_id, field_id) { + self.emit_field_not_on_node( + name_token.text_range(), + field_name, + ctx.parent_id, + ctx.parent_range, + lang, + ); return; } let Some(value) = field.value() else { return; }; - let Some(child_id) = self.get_expr_type_id(&value) else { - return; - }; - if lang.is_valid_field_type(parent_id, field_id, child_id) { - return; - } - let child_name = self.get_expr_type_name(&value).unwrap_or("(unknown)"); - let valid_types = lang.valid_field_types(parent_id, field_id); - let valid_names: Vec<&str> = valid_types - .iter() - .filter_map(|&id| lang.node_type_name(id)) - .collect(); - let range = get_expr_type_range(&value).unwrap_or(value.text_range()); - let mut builder = self - .link_diagnostics - .report(DiagnosticKind::InvalidFieldChildType, range) - .message(child_name); + // Create field context for validating the value + let field_ctx = ValidationContext { + parent_id: ctx.parent_id, + parent_name: ctx.parent_name, + parent_range: ctx.parent_range, + field: Some(FieldContext { + name: &self.source[text_range_to_usize(name_token.text_range())], + id: field_id, + range: name_token.text_range(), + }), + }; - if !valid_names.is_empty() { - builder = builder.hint(format!( - "valid types for `{}`: {}", - field_name, - format_list(&valid_names, 5) - )); - } - builder.emit(); + // Validate field value - this will traverse through alt/seq/quantifier/capture + // and validate each terminal type against the field requirements + self.validate_expr_structure(&value, Some(field_ctx), lang); } - fn validate_non_field_child( + /// Validate non-field children. Called for direct children of a NamedNode that aren't fields. + fn validate_non_field_children( &mut self, expr: &Expr, - parent_type_id: Option, + ctx: &ValidationContext<'a>, lang: &Lang, ) { - // Field expressions are validated separately by validate_field - if matches!(expr, Expr::FieldExpr(_)) { - return; + // Collect all terminal types from this expression + let terminals = self.collect_terminal_types(expr); + + // Check if parent allows any non-field children + let valid_types = lang.valid_child_types(ctx.parent_id); + let parent_only_fields = valid_types.is_empty(); + + for (child_id, child_name, child_range) in terminals { + if parent_only_fields { + self.link_diagnostics + .report(DiagnosticKind::InvalidChildType, child_range) + .message(child_name) + .related_to( + format!("`{}` only accepts children via fields", ctx.parent_name), + ctx.parent_range, + ) + .emit(); + continue; + } + + if is_valid_child_expanded(lang, ctx.parent_id, child_id) { + continue; + } + + let valid_names: Vec<&str> = valid_types + .iter() + .filter_map(|&id| lang.node_type_name(id)) + .collect(); + + let mut builder = self + .link_diagnostics + .report(DiagnosticKind::InvalidChildType, child_range) + .message(child_name) + .related_to(format!("inside `{}`", ctx.parent_name), ctx.parent_range); + + if !valid_names.is_empty() { + builder = builder.hint(format!( + "valid children for `{}`: {}", + ctx.parent_name, + format_list(&valid_names, 5) + )); + } + builder.emit(); } + } - let Some(parent_id) = parent_type_id else { + /// Validate a terminal type (NamedNode or AnonymousNode) against the context. + fn validate_terminal_type(&mut self, expr: &Expr, ctx: &ValidationContext<'a>, lang: &Lang) { + let Some((child_id, child_name, child_range)) = self.get_terminal_type_info(expr) else { return; }; - let Some(child_id) = self.get_expr_type_id(expr) else { - return; - }; + if let Some(ref field) = ctx.field { + // Validating a field value + if is_valid_field_type_expanded(lang, ctx.parent_id, field.id, child_id) { + return; + } - // Check if parent allows any non-field children - let valid_types = lang.valid_child_types(parent_id); - if valid_types.is_empty() { - // Parent has no non-field children defined - all children must be via fields - let child_name = self.get_expr_type_name(expr).unwrap_or("(unknown)"); - let parent_name = lang.node_type_name(parent_id).unwrap_or("(unknown)"); - let range = get_expr_type_range(expr).unwrap_or(expr.text_range()); - self.link_diagnostics - .report(DiagnosticKind::InvalidChildType, range) + let valid_types = lang.valid_field_types(ctx.parent_id, field.id); + let valid_names: Vec<&str> = valid_types + .iter() + .filter_map(|&id| lang.node_type_name(id)) + .collect(); + + let mut builder = self + .link_diagnostics + .report(DiagnosticKind::InvalidFieldChildType, child_range) .message(child_name) - .hint(format!( - "`{}` only accepts children via fields", - parent_name - )) - .emit(); - return; - } + .related_to( + format!("field `{}` on `{}`", field.name, ctx.parent_name), + field.range, + ); - if is_valid_child_expanded(lang, parent_id, child_id) { - return; + if !valid_names.is_empty() { + builder = builder.hint(format!( + "valid types for `{}`: {}", + field.name, + format_list(&valid_names, 5) + )); + } + builder.emit(); } + // Non-field children are validated by validate_non_field_children + } - let child_name = self.get_expr_type_name(expr).unwrap_or("(unknown)"); - let parent_name = lang.node_type_name(parent_id).unwrap_or("(unknown)"); + /// Collect all terminal types from an expression (traverses through Alt/Seq/Capture/Quantifier). + fn collect_terminal_types(&self, expr: &Expr) -> Vec<(NodeTypeId, &'a str, TextRange)> { + let mut result = Vec::new(); + self.collect_terminal_types_impl(expr, &mut result); + result + } - // Collect valid type names, expanding supertypes for display - let mut valid_names: Vec<&str> = Vec::new(); - for &type_id in valid_types { - if let Some(name) = lang.node_type_name(type_id) { - valid_names.push(name); + fn collect_terminal_types_impl( + &self, + expr: &Expr, + result: &mut Vec<(NodeTypeId, &'a str, TextRange)>, + ) { + match expr { + Expr::NamedNode(_) | Expr::AnonymousNode(_) => { + if let Some(info) = self.get_terminal_type_info(expr) { + result.push(info); + } + } + Expr::AltExpr(alt) => { + for branch in alt.branches() { + if let Some(body) = branch.body() { + self.collect_terminal_types_impl(&body, result); + } + } + } + Expr::SeqExpr(seq) => { + for child in seq.children() { + self.collect_terminal_types_impl(&child, result); + } + } + Expr::CapturedExpr(cap) => { + if let Some(inner) = cap.inner() { + self.collect_terminal_types_impl(&inner, result); + } + } + Expr::QuantifiedExpr(q) => { + if let Some(inner) = q.inner() { + self.collect_terminal_types_impl(&inner, result); + } + } + Expr::FieldExpr(_) | Expr::Ref(_) => { + // Fields are handled separately, refs don't have concrete types } } + } - let range = get_expr_type_range(expr).unwrap_or(expr.text_range()); - let mut builder = self - .link_diagnostics - .report(DiagnosticKind::InvalidChildType, range) - .message(child_name); - - if !valid_names.is_empty() { - builder = builder.hint(format!( - "valid children for `{}`: {}", - parent_name, - format_list(&valid_names, 5) - )); + /// Get type info for a terminal expression (NamedNode or AnonymousNode). + fn get_terminal_type_info(&self, expr: &Expr) -> Option<(NodeTypeId, &'a str, TextRange)> { + match expr { + Expr::NamedNode(node) => { + if node.is_any() { + return None; + } + let type_token = node.node_type()?; + if matches!( + type_token.kind(), + SyntaxKind::KwError | SyntaxKind::KwMissing + ) { + return None; + } + let type_name = type_token.text(); + let type_id = self.node_type_ids.get(type_name).copied().flatten()?; + let name = &self.source[text_range_to_usize(type_token.text_range())]; + Some((type_id, name, type_token.text_range())) + } + Expr::AnonymousNode(anon) => { + if anon.is_any() { + return None; + } + let value_token = anon.value()?; + let value = &self.source[text_range_to_usize(value_token.text_range())]; + let type_id = self.node_type_ids.get(value).copied().flatten()?; + Some((type_id, value, value_token.text_range())) + } + _ => None, } - builder.emit(); } fn validate_negated_field( &mut self, neg: &ast::NegatedField, - parent_type_id: Option, + ctx: &ValidationContext<'a>, lang: &Lang, ) { let Some(name_token) = neg.name() else { @@ -491,20 +681,24 @@ impl<'a> Query<'a> { return; }; - let Some(parent_id) = parent_type_id else { - return; - }; - if lang.has_field(parent_id, field_id) { + if lang.has_field(ctx.parent_id, field_id) { return; } - self.emit_field_not_on_node(name_token.text_range(), field_name, parent_id, lang); + self.emit_field_not_on_node( + name_token.text_range(), + field_name, + ctx.parent_id, + ctx.parent_range, + lang, + ); } fn emit_field_not_on_node( &mut self, - range: rowan::TextRange, + range: TextRange, field_name: &str, parent_id: NodeTypeId, + parent_range: TextRange, lang: &Lang, ) { let valid_fields = lang.fields_for_node_type(parent_id); @@ -513,7 +707,8 @@ impl<'a> Query<'a> { let mut builder = self .link_diagnostics .report(DiagnosticKind::FieldNotOnNodeType, range) - .message(field_name); + .message(field_name) + .related_to(format!("on `{}`", parent_name), parent_range); if valid_fields.is_empty() { builder = builder.hint(format!("`{}` has no fields", parent_name)); @@ -530,83 +725,9 @@ impl<'a> Query<'a> { } builder.emit(); } - - fn get_node_type_id(&self, node: &NamedNode) -> Option { - if node.is_any() { - return None; - } - let type_token = node.node_type()?; - if matches!( - type_token.kind(), - SyntaxKind::KwError | SyntaxKind::KwMissing - ) { - return None; - } - let type_name = type_token.text(); - self.node_type_ids.get(type_name).copied().flatten() - } - - fn get_expr_type_id(&self, expr: &Expr) -> Option { - match expr { - Expr::NamedNode(node) => self.get_node_type_id(node), - Expr::AnonymousNode(anon) => { - if anon.is_any() { - return None; - } - let value_token = anon.value()?; - let value = &self.source[text_range_to_usize(value_token.text_range())]; - self.node_type_ids.get(value).copied().flatten() - } - Expr::CapturedExpr(cap) => self.get_expr_type_id(&cap.inner()?), - Expr::QuantifiedExpr(q) => self.get_expr_type_id(&q.inner()?), - _ => None, - } - } - - fn get_expr_type_name(&self, expr: &Expr) -> Option<&'a str> { - match expr { - Expr::NamedNode(node) => { - if node.is_any() { - return None; - } - let type_token = node.node_type()?; - Some(&self.source[text_range_to_usize(type_token.text_range())]) - } - Expr::AnonymousNode(anon) => { - if anon.is_any() { - return None; - } - let value_token = anon.value()?; - Some(&self.source[text_range_to_usize(value_token.text_range())]) - } - Expr::CapturedExpr(cap) => self.get_expr_type_name(&cap.inner()?), - Expr::QuantifiedExpr(q) => self.get_expr_type_name(&q.inner()?), - _ => None, - } - } -} - -fn get_expr_type_range(expr: &Expr) -> Option { - match expr { - Expr::NamedNode(node) => { - if node.is_any() { - return None; - } - Some(node.node_type()?.text_range()) - } - Expr::AnonymousNode(anon) => { - if anon.is_any() { - return None; - } - Some(anon.value()?.text_range()) - } - Expr::CapturedExpr(cap) => get_expr_type_range(&cap.inner()?), - Expr::QuantifiedExpr(q) => get_expr_type_range(&q.inner()?), - _ => None, - } } -fn text_range_to_usize(range: rowan::TextRange) -> std::ops::Range { +fn text_range_to_usize(range: TextRange) -> std::ops::Range { let start: usize = range.start().into(); let end: usize = range.end().into(); start..end diff --git a/crates/plotnik-lib/src/query/link_tests.rs b/crates/plotnik-lib/src/query/link_tests.rs index a401598f..12735ce0 100644 --- a/crates/plotnik-lib/src/query/link_tests.rs +++ b/crates/plotnik-lib/src/query/link_tests.rs @@ -115,6 +115,8 @@ fn field_not_on_node_type() { insta::assert_snapshot!(query.dump_diagnostics(), @r" error: field `condition` is not valid on this node type | + 1 | (function_declaration + | -------------------- on `function_declaration` 2 | condition: (identifier) @name) @fn | ^^^^^^^^^ | @@ -136,6 +138,8 @@ fn field_not_on_node_type_with_suggestion() { insta::assert_snapshot!(query.dump_diagnostics(), @r" error: field `parameter` is not valid on this node type | + 1 | (function_declaration + | -------------------- on `function_declaration` 2 | parameter: (formal_parameters) @params) @fn | ^^^^^^^^^ | @@ -178,7 +182,9 @@ fn negated_field_not_on_node_type() { error: field `condition` is not valid on this node type | 1 | (function_declaration !condition) @fn - | ^^^^^^^^^ + | -------------------- ^^^^^^^^^ + | | + | on `function_declaration` | help: valid fields for `function_declaration`: `body`, `name`, `parameters` "); @@ -198,7 +204,9 @@ fn negated_field_not_on_node_type_with_suggestion() { error: field `parameter` is not valid on this node type | 1 | (function_declaration !parameter) @fn - | ^^^^^^^^^ + | -------------------- ^^^^^^^^^ + | | + | on `function_declaration` | help: did you mean `parameters`? help: valid fields for `function_declaration`: `body`, `name`, `parameters`, `return_type`, `type_parameters` @@ -338,7 +346,9 @@ fn invalid_child_type_for_field() { error: node type `statement_block` is not valid for this field | 2 | name: (statement_block) @name) @fn - | ^^^^^^^^^^^^^^^ + | ---- ^^^^^^^^^^^^^^^ + | | + | field `name` on `function_declaration` | help: valid types for `name`: `identifier` "); @@ -394,10 +404,11 @@ fn sequence_with_link_errors() { error: `statement_block` cannot be a child of this node | + 1 | (function_declaration + | -------------------- `function_declaration` only accepts children via fields + 2 | {(identifer) 3 | (statement_block)} @body) @fn | ^^^^^^^^^^^^^^^ - | - help: `function_declaration` only accepts children via fields "); } @@ -476,6 +487,8 @@ fn field_on_node_without_fields() { insta::assert_snapshot!(query.dump_diagnostics(), @r" error: field `name` is not valid on this node type | + 1 | (identifier + | ---------- on `identifier` 2 | name: (identifier) @inner) @id | ^^^^ | @@ -497,10 +510,10 @@ fn invalid_child_type_no_children_allowed() { insta::assert_snapshot!(query.dump_diagnostics(), @r" error: `class_declaration` cannot be a child of this node | + 1 | (function_declaration + | -------------------- `function_declaration` only accepts children via fields 2 | (class_declaration)) @fn | ^^^^^^^^^^^^^^^^^ - | - help: `function_declaration` only accepts children via fields "); } @@ -518,6 +531,8 @@ fn invalid_child_type_wrong_type() { insta::assert_snapshot!(query.dump_diagnostics(), @r" error: `identifier` cannot be a child of this node | + 1 | (statement_block + | --------------- inside `statement_block` 2 | (identifier)) @block | ^^^^^^^^^^ | @@ -565,6 +580,8 @@ fn invalid_anonymous_child() { insta::assert_snapshot!(query.dump_diagnostics(), @r#" error: `function` cannot be a child of this node | + 1 | (statement_block + | --------------- inside `statement_block` 2 | "function") @block | ^^^^^^^^ | @@ -586,6 +603,8 @@ fn invalid_child_in_alternation() { insta::assert_snapshot!(query.dump_diagnostics(), @r" error: `identifier` cannot be a child of this node | + 1 | (statement_block + | --------------- inside `statement_block` 2 | [(function_declaration) (identifier)]) @block | ^^^^^^^^^^ | @@ -607,9 +626,211 @@ fn invalid_child_in_sequence() { insta::assert_snapshot!(query.dump_diagnostics(), @r" error: `identifier` cannot be a child of this node | + 1 | (statement_block + | --------------- inside `statement_block` 2 | {(function_declaration) (identifier)}) @block | ^^^^^^^^^^ | help: valid children for `statement_block`: `statement` "); } + +#[test] +fn deeply_nested_sequences_valid() { + let input = indoc! {r#" + (statement_block {{{(function_declaration)}}}) @block + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(query.is_valid()); +} + +#[test] +fn deeply_nested_sequences_invalid() { + let input = indoc! {r#" + (statement_block {{{(identifier)}}}) @block + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: `identifier` cannot be a child of this node + | + 1 | (statement_block {{{(identifier)}}}) @block + | --------------- ^^^^^^^^^^ + | | + | inside `statement_block` + | + help: valid children for `statement_block`: `statement` + "); +} + +#[test] +fn deeply_nested_alternations_in_field_valid() { + let input = indoc! {r#" + (function_declaration name: [[[(identifier)]]]) @fn + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(query.is_valid()); +} + +#[test] +fn deeply_nested_alternations_in_field_invalid() { + let input = indoc! {r#" + (function_declaration name: [[[(statement_block)]]]) @fn + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: node type `statement_block` is not valid for this field + | + 1 | (function_declaration name: [[[(statement_block)]]]) @fn + | ---- ^^^^^^^^^^^^^^^ + | | + | field `name` on `function_declaration` + | + help: valid types for `name`: `identifier` + "); +} + +#[test] +fn deeply_nested_no_fields_allowed() { + let input = indoc! {r#" + (function_declaration {{{(class_declaration)}}}) @fn + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: `class_declaration` cannot be a child of this node + | + 1 | (function_declaration {{{(class_declaration)}}}) @fn + | -------------------- ^^^^^^^^^^^^^^^^^ + | | + | `function_declaration` only accepts children via fields + "); +} + +#[test] +fn mixed_nested_with_capture_and_quantifier() { + let input = indoc! {r#" + (statement_block + {[(function_declaration)+ @fns + (identifier) @id]*}) @block + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: `identifier` cannot be a child of this node + | + 1 | (statement_block + | --------------- inside `statement_block` + 2 | {[(function_declaration)+ @fns + 3 | (identifier) @id]*}) @block + | ^^^^^^^^^^ + | + help: valid children for `statement_block`: `statement` + "); +} + +#[test] +fn field_with_captured_and_quantified_invalid_type() { + let input = indoc! {r#" + (function_declaration + name: (statement_block)? @name) @fn + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: node type `statement_block` is not valid for this field + | + 2 | name: (statement_block)? @name) @fn + | ---- ^^^^^^^^^^^^^^^ + | | + | field `name` on `function_declaration` + | + help: valid types for `name`: `identifier` + "); +} + +#[test] +fn multiple_invalid_types_in_alternation_field() { + let input = indoc! {r#" + (function_declaration + name: [(statement_block) (class_declaration)]) @fn + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: node type `statement_block` is not valid for this field + | + 2 | name: [(statement_block) (class_declaration)]) @fn + | ---- ^^^^^^^^^^^^^^^ + | | + | field `name` on `function_declaration` + | + help: valid types for `name`: `identifier` + + error: node type `class_declaration` is not valid for this field + | + 2 | name: [(statement_block) (class_declaration)]) @fn + | ---- ^^^^^^^^^^^^^^^^^ + | | + | field `name` on `function_declaration` + | + help: valid types for `name`: `identifier` + "); +} + +#[test] +fn multiple_invalid_types_in_sequence_child() { + let input = indoc! {r#" + (statement_block + {(identifier) (number)}) @block + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: `identifier` cannot be a child of this node + | + 1 | (statement_block + | --------------- inside `statement_block` + 2 | {(identifier) (number)}) @block + | ^^^^^^^^^^ + | + help: valid children for `statement_block`: `statement` + + error: `number` cannot be a child of this node + | + 1 | (statement_block + | --------------- inside `statement_block` + 2 | {(identifier) (number)}) @block + | ^^^^^^ + | + help: valid children for `statement_block`: `statement` + "); +} From 927060cd634bb4ee2e864b250e91f9b4f19f7e32 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 6 Dec 2025 15:33:55 -0300 Subject: [PATCH 4/5] Follow refs during query validation --- crates/plotnik-lib/src/query/link.rs | 101 +++++++++--- crates/plotnik-lib/src/query/link_tests.rs | 175 +++++++++++++++++++++ 2 files changed, 250 insertions(+), 26 deletions(-) diff --git a/crates/plotnik-lib/src/query/link.rs b/crates/plotnik-lib/src/query/link.rs index fc7b3bda..97d2e6d6 100644 --- a/crates/plotnik-lib/src/query/link.rs +++ b/crates/plotnik-lib/src/query/link.rs @@ -5,6 +5,7 @@ //! 2. Collect and resolve all field names (FieldExpr, NegatedField) //! 3. Validate structural constraints (field on node type, child type for field) +use indexmap::IndexSet; use plotnik_langs::{Lang, NodeFieldId, NodeTypeId}; use rowan::TextRange; @@ -340,7 +341,8 @@ impl<'a> Query<'a> { let defs: Vec<_> = self.ast.defs().collect(); for def in defs { let Some(body) = def.body() else { continue }; - self.validate_expr_structure(&body, None, lang); + let mut visited = IndexSet::new(); + self.validate_expr_structure(&body, None, lang, &mut visited); } } @@ -349,12 +351,13 @@ impl<'a> Query<'a> { expr: &Expr, ctx: Option>, lang: &Lang, + visited: &mut IndexSet, ) { match expr { Expr::NamedNode(node) => { // Validate this node against the context (if any) if let Some(ref ctx) = ctx { - self.validate_terminal_type(expr, ctx, lang); + self.validate_terminal_type(expr, ctx, lang, visited); } // Set up context for children @@ -364,14 +367,14 @@ impl<'a> Query<'a> { match &child { Expr::FieldExpr(f) => { // Fields get special handling - self.validate_field_expr(f, child_ctx.as_ref(), lang); + self.validate_field_expr(f, child_ctx.as_ref(), lang, visited); } _ => { // Non-field children: validate as non-field children if let Some(ref ctx) = child_ctx { - self.validate_non_field_children(&child, ctx, lang); + self.validate_non_field_children(&child, ctx, lang, visited); } - self.validate_expr_structure(&child, child_ctx.clone(), lang); + self.validate_expr_structure(&child, child_ctx.clone(), lang, visited); } } } @@ -388,34 +391,42 @@ impl<'a> Query<'a> { Expr::AnonymousNode(_) => { // Validate this anonymous node against the context (if any) if let Some(ref ctx) = ctx { - self.validate_terminal_type(expr, ctx, lang); + self.validate_terminal_type(expr, ctx, lang, visited); } } Expr::FieldExpr(f) => { // Should be handled by parent NamedNode, but handle gracefully - self.validate_field_expr(f, ctx.as_ref(), lang); + self.validate_field_expr(f, ctx.as_ref(), lang, visited); } Expr::AltExpr(alt) => { for branch in alt.branches() { let Some(body) = branch.body() else { continue }; - self.validate_expr_structure(&body, ctx.clone(), lang); + self.validate_expr_structure(&body, ctx.clone(), lang, visited); } } Expr::SeqExpr(seq) => { for child in seq.children() { - self.validate_expr_structure(&child, ctx.clone(), lang); + self.validate_expr_structure(&child, ctx.clone(), lang, visited); } } Expr::CapturedExpr(cap) => { let Some(inner) = cap.inner() else { return }; - self.validate_expr_structure(&inner, ctx, lang); + self.validate_expr_structure(&inner, ctx, lang, visited); } Expr::QuantifiedExpr(q) => { let Some(inner) = q.inner() else { return }; - self.validate_expr_structure(&inner, ctx, lang); + self.validate_expr_structure(&inner, ctx, lang, visited); } - Expr::Ref(_) => { - // References are validated elsewhere (symbol_table pass) + Expr::Ref(r) => { + let Some(name_token) = r.name() else { return }; + let name = name_token.text(); + if !visited.insert(name.to_string()) { + return; + } + let Some(body) = self.symbol_table.get(name).cloned() else { + return; + }; + self.validate_expr_structure(&body, ctx, lang, visited); } } } @@ -449,6 +460,7 @@ impl<'a> Query<'a> { field: &ast::FieldExpr, ctx: Option<&ValidationContext<'a>>, lang: &Lang, + visited: &mut IndexSet, ) { let Some(name_token) = field.name() else { return; @@ -493,7 +505,7 @@ impl<'a> Query<'a> { // Validate field value - this will traverse through alt/seq/quantifier/capture // and validate each terminal type against the field requirements - self.validate_expr_structure(&value, Some(field_ctx), lang); + self.validate_expr_structure(&value, Some(field_ctx), lang, visited); } /// Validate non-field children. Called for direct children of a NamedNode that aren't fields. @@ -502,9 +514,10 @@ impl<'a> Query<'a> { expr: &Expr, ctx: &ValidationContext<'a>, lang: &Lang, + visited: &mut IndexSet, ) { - // Collect all terminal types from this expression - let terminals = self.collect_terminal_types(expr); + // Collect all terminal types from this expression (follows refs) + let terminals = self.collect_terminal_types(expr, visited); // Check if parent allows any non-field children let valid_types = lang.valid_child_types(ctx.parent_id); @@ -550,7 +563,27 @@ impl<'a> Query<'a> { } /// Validate a terminal type (NamedNode or AnonymousNode) against the context. - fn validate_terminal_type(&mut self, expr: &Expr, ctx: &ValidationContext<'a>, lang: &Lang) { + fn validate_terminal_type( + &mut self, + expr: &Expr, + ctx: &ValidationContext<'a>, + lang: &Lang, + visited: &mut IndexSet, + ) { + // Handle refs by following them + if let Expr::Ref(r) = expr { + let Some(name_token) = r.name() else { return }; + let name = name_token.text(); + if !visited.insert(name.to_string()) { + return; + } + let Some(body) = self.symbol_table.get(name).cloned() else { + return; + }; + self.validate_terminal_type(&body, ctx, lang, visited); + return; + } + let Some((child_id, child_name, child_range)) = self.get_terminal_type_info(expr) else { return; }; @@ -588,10 +621,14 @@ impl<'a> Query<'a> { // Non-field children are validated by validate_non_field_children } - /// Collect all terminal types from an expression (traverses through Alt/Seq/Capture/Quantifier). - fn collect_terminal_types(&self, expr: &Expr) -> Vec<(NodeTypeId, &'a str, TextRange)> { + /// Collect all terminal types from an expression (traverses through Alt/Seq/Capture/Quantifier/Ref). + fn collect_terminal_types( + &self, + expr: &Expr, + visited: &mut IndexSet, + ) -> Vec<(NodeTypeId, &'a str, TextRange)> { let mut result = Vec::new(); - self.collect_terminal_types_impl(expr, &mut result); + self.collect_terminal_types_impl(expr, &mut result, visited); result } @@ -599,6 +636,7 @@ impl<'a> Query<'a> { &self, expr: &Expr, result: &mut Vec<(NodeTypeId, &'a str, TextRange)>, + visited: &mut IndexSet, ) { match expr { Expr::NamedNode(_) | Expr::AnonymousNode(_) => { @@ -609,27 +647,38 @@ impl<'a> Query<'a> { Expr::AltExpr(alt) => { for branch in alt.branches() { if let Some(body) = branch.body() { - self.collect_terminal_types_impl(&body, result); + self.collect_terminal_types_impl(&body, result, visited); } } } Expr::SeqExpr(seq) => { for child in seq.children() { - self.collect_terminal_types_impl(&child, result); + self.collect_terminal_types_impl(&child, result, visited); } } Expr::CapturedExpr(cap) => { if let Some(inner) = cap.inner() { - self.collect_terminal_types_impl(&inner, result); + self.collect_terminal_types_impl(&inner, result, visited); } } Expr::QuantifiedExpr(q) => { if let Some(inner) = q.inner() { - self.collect_terminal_types_impl(&inner, result); + self.collect_terminal_types_impl(&inner, result, visited); } } - Expr::FieldExpr(_) | Expr::Ref(_) => { - // Fields are handled separately, refs don't have concrete types + Expr::Ref(r) => { + let Some(name_token) = r.name() else { return }; + let name = name_token.text(); + if !visited.insert(name.to_string()) { + return; + } + let Some(body) = self.symbol_table.get(name) else { + return; + }; + self.collect_terminal_types_impl(body, result, visited); + } + Expr::FieldExpr(_) => { + // Fields are handled separately } } } diff --git a/crates/plotnik-lib/src/query/link_tests.rs b/crates/plotnik-lib/src/query/link_tests.rs index 12735ce0..68c9930b 100644 --- a/crates/plotnik-lib/src/query/link_tests.rs +++ b/crates/plotnik-lib/src/query/link_tests.rs @@ -834,3 +834,178 @@ fn multiple_invalid_types_in_sequence_child() { help: valid children for `statement_block`: `statement` "); } + +#[test] +fn ref_followed_for_child_validation() { + let input = indoc! {r#" + Foo = [(identifier) (string)] + (function_declaration (Foo)) + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: `identifier` cannot be a child of this node + | + 1 | Foo = [(identifier) (string)] + | ^^^^^^^^^^ + 2 | (function_declaration (Foo)) + | -------------------- `function_declaration` only accepts children via fields + + error: `string` cannot be a child of this node + | + 1 | Foo = [(identifier) (string)] + | ^^^^^^ + 2 | (function_declaration (Foo)) + | -------------------- `function_declaration` only accepts children via fields + "); +} + +#[test] +fn ref_followed_for_field_validation() { + let input = indoc! {r#" + Foo = [(number) (string)] + (function_declaration name: (Foo)) + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: node type `number` is not valid for this field + | + 1 | Foo = [(number) (string)] + | ^^^^^^ + 2 | (function_declaration name: (Foo)) + | ---- field `name` on `function_declaration` + | + help: valid types for `name`: `identifier` + + error: node type `string` is not valid for this field + | + 1 | Foo = [(number) (string)] + | ^^^^^^ + 2 | (function_declaration name: (Foo)) + | ---- field `name` on `function_declaration` + | + help: valid types for `name`: `identifier` + "); +} + +#[test] +fn ref_followed_valid_case() { + let input = indoc! {r#" + Foo = (identifier) + (function_declaration name: (Foo)) + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(query.is_valid()); +} + +#[test] +fn ref_followed_recursive_with_invalid_type() { + let input = indoc! {r#" + Foo = [(number) (Foo)] + (function_declaration name: (Foo)) + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: node type `number` is not valid for this field + | + 1 | Foo = [(number) (Foo)] + | ^^^^^^ + 2 | (function_declaration name: (Foo)) + | ---- field `name` on `function_declaration` + | + help: valid types for `name`: `identifier` + "); +} + +#[test] +fn ref_followed_recursive_valid() { + let input = indoc! {r#" + Foo = [(identifier) (Foo)] + (function_declaration name: (Foo)) + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(query.is_valid()); +} + +#[test] +fn ref_followed_mutual_recursion() { + let input = indoc! {r#" + Foo = [(number) (Bar)] + Bar = [(string) (Foo)] + (function_declaration name: (Foo)) + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: node type `number` is not valid for this field + | + 1 | Foo = [(number) (Bar)] + | ^^^^^^ + 2 | Bar = [(string) (Foo)] + 3 | (function_declaration name: (Foo)) + | ---- field `name` on `function_declaration` + | + help: valid types for `name`: `identifier` + + error: node type `string` is not valid for this field + | + 2 | Bar = [(string) (Foo)] + | ^^^^^^ + 3 | (function_declaration name: (Foo)) + | ---- field `name` on `function_declaration` + | + help: valid types for `name`: `identifier` + "); +} + +#[test] +fn ref_followed_in_sequence() { + let input = indoc! {r#" + Foo = (number) + (statement_block {(Foo) (string)}) + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: `number` cannot be a child of this node + | + 1 | Foo = (number) + | ^^^^^^ + 2 | (statement_block {(Foo) (string)}) + | --------------- inside `statement_block` + | + help: valid children for `statement_block`: `statement` + + error: `string` cannot be a child of this node + | + 2 | (statement_block {(Foo) (string)}) + | --------------- ^^^^^^ + | | + | inside `statement_block` + | + help: valid children for `statement_block`: `statement` + "); +} From 0cc7e525e712b7dbec61bd1a470d10a5cd34a1e4 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 6 Dec 2025 15:50:27 -0300 Subject: [PATCH 5/5] Improve recursive references and validation cycle detection --- crates/plotnik-lib/src/query/link.rs | 28 ++++++++++------- crates/plotnik-lib/src/query/link_tests.rs | 36 ++++++++++++++++++++++ crates/plotnik-lib/src/query/shapes.rs | 3 ++ 3 files changed, 56 insertions(+), 11 deletions(-) diff --git a/crates/plotnik-lib/src/query/link.rs b/crates/plotnik-lib/src/query/link.rs index 97d2e6d6..c477dbfa 100644 --- a/crates/plotnik-lib/src/query/link.rs +++ b/crates/plotnik-lib/src/query/link.rs @@ -17,8 +17,8 @@ use super::Query; /// Simple edit distance for fuzzy matching (Levenshtein). fn edit_distance(a: &str, b: &str) -> usize { - let a_len = a.len(); - let b_len = b.len(); + let a_len = a.chars().count(); + let b_len = b.chars().count(); if a_len == 0 { return b_len; @@ -124,7 +124,7 @@ fn format_list(items: &[&str], max_items: usize) -> String { } /// Context for validating child types. -#[derive(Clone)] +#[derive(Clone, Copy)] struct ValidationContext<'a> { /// The parent node type being validated against. parent_id: NodeTypeId, @@ -136,7 +136,7 @@ struct ValidationContext<'a> { field: Option>, } -#[derive(Clone)] +#[derive(Clone, Copy)] struct FieldContext<'a> { name: &'a str, id: NodeFieldId, @@ -371,19 +371,19 @@ impl<'a> Query<'a> { } _ => { // Non-field children: validate as non-field children - if let Some(ref ctx) = child_ctx { - self.validate_non_field_children(&child, ctx, lang, visited); + if let Some(ctx) = child_ctx { + self.validate_non_field_children(&child, &ctx, lang, visited); } - self.validate_expr_structure(&child, child_ctx.clone(), lang, visited); + self.validate_expr_structure(&child, child_ctx, lang, visited); } } } // Handle negated fields - if let Some(ref ctx) = child_ctx { + if let Some(ctx) = child_ctx { for child in node.as_cst().children() { if let Some(neg) = ast::NegatedField::cast(child) { - self.validate_negated_field(&neg, ctx, lang); + self.validate_negated_field(&neg, &ctx, lang); } } } @@ -401,12 +401,12 @@ impl<'a> Query<'a> { Expr::AltExpr(alt) => { for branch in alt.branches() { let Some(body) = branch.body() else { continue }; - self.validate_expr_structure(&body, ctx.clone(), lang, visited); + self.validate_expr_structure(&body, ctx, lang, visited); } } Expr::SeqExpr(seq) => { for child in seq.children() { - self.validate_expr_structure(&child, ctx.clone(), lang, visited); + self.validate_expr_structure(&child, ctx, lang, visited); } } Expr::CapturedExpr(cap) => { @@ -424,9 +424,11 @@ impl<'a> Query<'a> { return; } let Some(body) = self.symbol_table.get(name).cloned() else { + visited.swap_remove(name); return; }; self.validate_expr_structure(&body, ctx, lang, visited); + visited.swap_remove(name); } } } @@ -578,9 +580,11 @@ impl<'a> Query<'a> { return; } let Some(body) = self.symbol_table.get(name).cloned() else { + visited.swap_remove(name); return; }; self.validate_terminal_type(&body, ctx, lang, visited); + visited.swap_remove(name); return; } @@ -673,9 +677,11 @@ impl<'a> Query<'a> { return; } let Some(body) = self.symbol_table.get(name) else { + visited.swap_remove(name); return; }; self.collect_terminal_types_impl(body, result, visited); + visited.swap_remove(name); } Expr::FieldExpr(_) => { // Fields are handled separately diff --git a/crates/plotnik-lib/src/query/link_tests.rs b/crates/plotnik-lib/src/query/link_tests.rs index 68c9930b..a0d2271b 100644 --- a/crates/plotnik-lib/src/query/link_tests.rs +++ b/crates/plotnik-lib/src/query/link_tests.rs @@ -1009,3 +1009,39 @@ fn ref_followed_in_sequence() { help: valid children for `statement_block`: `statement` "); } + +#[test] +fn ref_validated_in_multiple_contexts() { + let input = indoc! {r#" + Foo = (number) + (function_declaration + name: (Foo) + body: (statement_block (Foo))) + "#}; + + let mut query = Query::try_from(input).unwrap(); + query.link(&plotnik_langs::javascript()); + + assert!(!query.is_valid()); + insta::assert_snapshot!(query.dump_diagnostics_raw(), @r" + error: node type `number` is not valid for this field + | + 1 | Foo = (number) + | ^^^^^^ + 2 | (function_declaration + 3 | name: (Foo) + | ---- field `name` on `function_declaration` + | + help: valid types for `name`: `identifier` + + error: `number` cannot be a child of this node + | + 1 | Foo = (number) + | ^^^^^^ + ... + 4 | body: (statement_block (Foo))) + | --------------- inside `statement_block` + | + help: valid children for `statement_block`: `statement` + "); +} diff --git a/crates/plotnik-lib/src/query/shapes.rs b/crates/plotnik-lib/src/query/shapes.rs index 8467a91a..efcf55aa 100644 --- a/crates/plotnik-lib/src/query/shapes.rs +++ b/crates/plotnik-lib/src/query/shapes.rs @@ -39,6 +39,9 @@ impl Query<'_> { if let Some(&c) = self.shape_cardinality_table.get(expr) { return c; } + // Insert sentinel to break cycles (e.g., `Foo = (Foo)`) + self.shape_cardinality_table + .insert(expr.clone(), ShapeCardinality::Invalid); let c = self.compute_single(expr); self.shape_cardinality_table.insert(expr.clone(), c); c