From db686489cca8e42081a235cfa713b7e5c8364f83 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Sat, 6 Dec 2025 23:17:11 -0300 Subject: [PATCH] refactor: Extract AST helpers --- crates/plotnik-lib/src/parser/ast.rs | 47 ++++++++ crates/plotnik-lib/src/parser/mod.rs | 2 +- crates/plotnik-lib/src/query/alt_kinds.rs | 39 ++----- crates/plotnik-lib/src/query/recursion.rs | 110 +++++-------------- crates/plotnik-lib/src/query/shapes.rs | 77 ++++++------- crates/plotnik-lib/src/query/symbol_table.rs | 51 ++------- 6 files changed, 129 insertions(+), 197 deletions(-) diff --git a/crates/plotnik-lib/src/parser/ast.rs b/crates/plotnik-lib/src/parser/ast.rs index 0a378354..420aa78b 100644 --- a/crates/plotnik-lib/src/parser/ast.rs +++ b/crates/plotnik-lib/src/parser/ast.rs @@ -2,10 +2,27 @@ //! //! Each struct wraps a `SyntaxNode` and provides typed accessors. //! Cast is infallible for correct `SyntaxKind` - validation happens elsewhere. +//! +//! ## String Lifetime Limitation +//! +//! `SyntaxToken::text()` returns `&str` tied to the token's lifetime, not to the +//! source `&'src str`. This is a rowan design: tokens store interned strings, not +//! spans into the original source. +//! +//! When building data structures that need source-lifetime strings (e.g., +//! `SymbolTable<'src>`), use [`token_src`] instead of `token.text()`. use super::cst::{SyntaxKind, SyntaxNode, SyntaxToken}; use rowan::TextRange; +/// Extracts token text with source lifetime. +/// +/// Use this instead of `token.text()` when you need `&'src str`. +pub fn token_src<'src>(token: &SyntaxToken, source: &'src str) -> &'src str { + let range = token.text_range(); + &source[range.start().into()..range.end().into()] +} + macro_rules! ast_node { ($name:ident, $kind:ident) => { #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -52,6 +69,21 @@ macro_rules! define_expr { }; } +impl Expr { + /// Returns direct child expressions. + pub fn children(&self) -> Vec { + match self { + Expr::NamedNode(n) => n.children().collect(), + Expr::SeqExpr(s) => s.children().collect(), + Expr::CapturedExpr(c) => c.inner().into_iter().collect(), + Expr::QuantifiedExpr(q) => q.inner().into_iter().collect(), + Expr::FieldExpr(f) => f.value().into_iter().collect(), + Expr::AltExpr(a) => a.branches().filter_map(|b| b.body()).collect(), + Expr::Ref(_) | Expr::AnonymousNode(_) => vec![], + } + } +} + ast_node!(Root, Root); ast_node!(Def, Def); ast_node!(NamedNode, Tree); @@ -283,6 +315,21 @@ impl QuantifiedExpr { ) }) } + + /// Returns true if quantifier allows zero matches (?, *, ??, *?). + pub fn is_optional(&self) -> bool { + self.operator() + .map(|op| { + matches!( + op.kind(), + SyntaxKind::Question + | SyntaxKind::Star + | SyntaxKind::QuestionQuestion + | SyntaxKind::StarQuestion + ) + }) + .unwrap_or(false) + } } impl FieldExpr { diff --git a/crates/plotnik-lib/src/parser/mod.rs b/crates/plotnik-lib/src/parser/mod.rs index 254bd9cc..4e3ff52e 100644 --- a/crates/plotnik-lib/src/parser/mod.rs +++ b/crates/plotnik-lib/src/parser/mod.rs @@ -42,7 +42,7 @@ pub use cst::{SyntaxKind, SyntaxNode, SyntaxToken}; pub use ast::{ AltExpr, AltKind, Anchor, AnonymousNode, Branch, CapturedExpr, Def, Expr, FieldExpr, NamedNode, - NegatedField, QuantifiedExpr, Ref, Root, SeqExpr, Type, + NegatedField, QuantifiedExpr, Ref, Root, SeqExpr, Type, token_src, }; pub use core::{ParseResult, Parser}; diff --git a/crates/plotnik-lib/src/query/alt_kinds.rs b/crates/plotnik-lib/src/query/alt_kinds.rs index 3c6a002c..e7f0067a 100644 --- a/crates/plotnik-lib/src/query/alt_kinds.rs +++ b/crates/plotnik-lib/src/query/alt_kinds.rs @@ -24,38 +24,13 @@ impl Query<'_> { } fn validate_alt_expr(&mut self, expr: &Expr) { - match expr { - Expr::AltExpr(alt) => { - self.check_mixed_alternation(alt); - for branch in alt.branches() { - let Some(body) = branch.body() else { continue }; - self.validate_alt_expr(&body); - } - assert_alt_no_bare_exprs(alt); - } - Expr::NamedNode(node) => { - for child in node.children() { - self.validate_alt_expr(&child); - } - } - Expr::SeqExpr(seq) => { - for child in seq.children() { - self.validate_alt_expr(&child); - } - } - Expr::CapturedExpr(cap) => { - let Some(inner) = cap.inner() else { return }; - self.validate_alt_expr(&inner); - } - Expr::QuantifiedExpr(q) => { - let Some(inner) = q.inner() else { return }; - self.validate_alt_expr(&inner); - } - Expr::FieldExpr(f) => { - let Some(value) = f.value() else { return }; - self.validate_alt_expr(&value); - } - Expr::Ref(_) | Expr::AnonymousNode(_) => {} + if let Expr::AltExpr(alt) = expr { + self.check_mixed_alternation(alt); + assert_alt_no_bare_exprs(alt); + } + + for child in expr.children() { + self.validate_alt_expr(&child); } } diff --git a/crates/plotnik-lib/src/query/recursion.rs b/crates/plotnik-lib/src/query/recursion.rs index e23267e8..4fd49625 100644 --- a/crates/plotnik-lib/src/query/recursion.rs +++ b/crates/plotnik-lib/src/query/recursion.rs @@ -8,7 +8,7 @@ use rowan::TextRange; use super::Query; use crate::diagnostics::DiagnosticKind; -use crate::parser::{Def, Expr, SyntaxKind}; +use crate::parser::{Def, Expr}; impl Query<'_> { pub(super) fn validate_recursion(&mut self) { @@ -233,32 +233,19 @@ fn expr_has_escape(expr: &Expr, scc: &IndexSet<&str>) -> bool { let children: Vec<_> = node.children().collect(); children.is_empty() || children.iter().all(|c| expr_has_escape(c, scc)) } - Expr::AltExpr(alt) => { - alt.branches().any(|b| { - b.body() - .map(|body| expr_has_escape(&body, scc)) - .unwrap_or(true) - }) || alt.exprs().any(|e| expr_has_escape(&e, scc)) - } - Expr::SeqExpr(seq) => seq.children().all(|c| expr_has_escape(&c, scc)), - Expr::QuantifiedExpr(q) => match q.operator().map(|op| op.kind()) { - Some( - SyntaxKind::Question - | SyntaxKind::Star - | SyntaxKind::QuestionQuestion - | SyntaxKind::StarQuestion, - ) => true, - Some(SyntaxKind::Plus | SyntaxKind::PlusQuestion) => q - .inner() + Expr::AltExpr(_) => expr.children().iter().any(|c| expr_has_escape(c, scc)), + Expr::SeqExpr(_) => expr.children().iter().all(|c| expr_has_escape(c, scc)), + Expr::QuantifiedExpr(q) => { + if q.is_optional() { + return true; + } + q.inner() .map(|inner| expr_has_escape(&inner, scc)) - .unwrap_or(true), - _ => true, - }, - Expr::CapturedExpr(cap) => cap - .inner() - .map(|inner| expr_has_escape(&inner, scc)) - .unwrap_or(true), - Expr::FieldExpr(f) => f.value().map(|v| expr_has_escape(&v, scc)).unwrap_or(true), + .unwrap_or(true) + } + Expr::CapturedExpr(_) | Expr::FieldExpr(_) => { + expr.children().iter().all(|c| expr_has_escape(c, scc)) + } Expr::AnonymousNode(_) => true, } } @@ -270,65 +257,26 @@ fn collect_refs(expr: &Expr) -> IndexSet { } fn collect_refs_into(expr: &Expr, refs: &mut IndexSet) { - match expr { - Expr::Ref(r) => { - let Some(name_token) = r.name() else { return }; - refs.insert(name_token.text().to_string()); - } - Expr::NamedNode(node) => { - for child in node.children() { - collect_refs_into(&child, refs); - } - } - Expr::AltExpr(alt) => { - for branch in alt.branches() { - let Some(body) = branch.body() else { continue }; - collect_refs_into(&body, refs); - } - } - Expr::SeqExpr(seq) => { - for child in seq.children() { - collect_refs_into(&child, refs); - } - } - Expr::CapturedExpr(cap) => { - let Some(inner) = cap.inner() else { return }; - collect_refs_into(&inner, refs); - } - Expr::QuantifiedExpr(q) => { - let Some(inner) = q.inner() else { return }; - collect_refs_into(&inner, refs); - } - Expr::FieldExpr(f) => { - let Some(value) = f.value() else { return }; - collect_refs_into(&value, refs); - } - Expr::AnonymousNode(_) => {} + if let Expr::Ref(r) = expr + && let Some(name_token) = r.name() + { + refs.insert(name_token.text().to_string()); + } + + for child in expr.children() { + collect_refs_into(&child, refs); } } fn find_ref_in_expr(expr: &Expr, target: &str) -> Option { - match expr { - Expr::Ref(r) => { - let name_token = r.name()?; - if name_token.text() == target { - return Some(name_token.text_range()); - } - None + if let Expr::Ref(r) = expr { + let name_token = r.name()?; + if name_token.text() == target { + return Some(name_token.text_range()); } - Expr::NamedNode(node) => node - .children() - .find_map(|child| find_ref_in_expr(&child, target)), - Expr::AltExpr(alt) => alt - .branches() - .find_map(|b| b.body().and_then(|body| find_ref_in_expr(&body, target))) - .or_else(|| alt.exprs().find_map(|e| find_ref_in_expr(&e, target))), - Expr::SeqExpr(seq) => seq.children().find_map(|c| find_ref_in_expr(&c, target)), - Expr::CapturedExpr(cap) => cap - .inner() - .and_then(|inner| find_ref_in_expr(&inner, target)), - Expr::QuantifiedExpr(q) => q.inner().and_then(|inner| find_ref_in_expr(&inner, target)), - Expr::FieldExpr(f) => f.value().and_then(|v| find_ref_in_expr(&v, target)), - Expr::AnonymousNode(_) => None, } + + expr.children() + .iter() + .find_map(|child| find_ref_in_expr(child, target)) } diff --git a/crates/plotnik-lib/src/query/shapes.rs b/crates/plotnik-lib/src/query/shapes.rs index efcf55aa..ee4a1501 100644 --- a/crates/plotnik-lib/src/query/shapes.rs +++ b/crates/plotnik-lib/src/query/shapes.rs @@ -10,7 +10,7 @@ use super::Query; use super::invariants::ensure_ref_has_name; use crate::diagnostics::DiagnosticKind; -use crate::parser::{Expr, FieldExpr, Ref, SeqExpr, SyntaxNode}; +use crate::parser::{Expr, Ref, SeqExpr}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ShapeCardinality { @@ -21,17 +21,22 @@ pub enum ShapeCardinality { impl Query<'_> { pub(super) fn infer_shapes(&mut self) { - self.compute_all_cardinalities(self.ast.as_cst().clone()); - self.validate_shapes(self.ast.as_cst().clone()); - } + let bodies: Vec<_> = self.ast.defs().filter_map(|d| d.body()).collect(); + + for body in &bodies { + self.compute_all_cardinalities(body); + } - fn compute_all_cardinalities(&mut self, node: SyntaxNode) { - if let Some(expr) = Expr::cast(node.clone()) { - self.get_or_compute(&expr); + for body in &bodies { + self.validate_shapes(body); } + } + + fn compute_all_cardinalities(&mut self, expr: &Expr) { + self.get_or_compute(expr); - for child in node.children() { - self.compute_all_cardinalities(child); + for child in expr.children() { + self.compute_all_cardinalities(&child); } } @@ -94,41 +99,31 @@ impl Query<'_> { self.get_or_compute(&body) } - fn validate_shapes(&mut self, node: SyntaxNode) { - let Some(field) = FieldExpr::cast(node.clone()) else { - for child in node.children() { - self.validate_shapes(child); + fn validate_shapes(&mut self, expr: &Expr) { + if let Expr::FieldExpr(field) = expr + && let Some(value) = field.value() + { + let card = self + .shape_cardinality_table + .get(&value) + .copied() + .unwrap_or(ShapeCardinality::One); + + if card == ShapeCardinality::Many { + let field_name = field + .name() + .map(|t| t.text().to_string()) + .unwrap_or_else(|| "field".to_string()); + + self.shapes_diagnostics + .report(DiagnosticKind::FieldSequenceValue, value.text_range()) + .message(field_name) + .emit(); } - return; - }; - - let Some(value) = field.value() else { - for child in node.children() { - self.validate_shapes(child); - } - return; - }; - - let card = self - .shape_cardinality_table - .get(&value) - .copied() - .unwrap_or(ShapeCardinality::One); - - if card == ShapeCardinality::Many { - let field_name = field - .name() - .map(|t| t.text().to_string()) - .unwrap_or_else(|| "field".to_string()); - - self.shapes_diagnostics - .report(DiagnosticKind::FieldSequenceValue, value.text_range()) - .message(field_name) - .emit(); } - for child in node.children() { - self.validate_shapes(child); + for child in expr.children() { + self.validate_shapes(&child); } } } diff --git a/crates/plotnik-lib/src/query/symbol_table.rs b/crates/plotnik-lib/src/query/symbol_table.rs index 73ebb029..4066d62d 100644 --- a/crates/plotnik-lib/src/query/symbol_table.rs +++ b/crates/plotnik-lib/src/query/symbol_table.rs @@ -7,7 +7,7 @@ use indexmap::IndexMap; use crate::diagnostics::DiagnosticKind; -use crate::parser::{Expr, Ref, ast}; +use crate::parser::{Expr, Ref, ast, token_src}; use super::Query; @@ -21,12 +21,11 @@ impl<'a> Query<'a> { continue; }; - let range = name_token.text_range(); - let name = &self.source[range.start().into()..range.end().into()]; + let name = token_src(&name_token, self.source); if self.symbol_table.contains_key(name) { self.resolve_diagnostics - .report(DiagnosticKind::DuplicateDefinition, range) + .report(DiagnosticKind::DuplicateDefinition, name_token.text_range()) .message(name) .emit(); continue; @@ -53,44 +52,12 @@ impl<'a> Query<'a> { } fn collect_reference_diagnostics(&mut self, expr: &Expr) { - match expr { - Expr::Ref(r) => { - self.check_ref_diagnostic(r); - } - Expr::NamedNode(node) => { - for child in node.children() { - self.collect_reference_diagnostics(&child); - } - } - Expr::AltExpr(alt) => { - for branch in alt.branches() { - let Some(body) = branch.body() else { continue }; - self.collect_reference_diagnostics(&body); - } - // Parser wraps all alt children in Branch nodes - assert!( - alt.exprs().next().is_none(), - "symbol_table: unexpected bare Expr in Alt (parser should wrap in Branch)" - ); - } - Expr::SeqExpr(seq) => { - for child in seq.children() { - self.collect_reference_diagnostics(&child); - } - } - Expr::CapturedExpr(cap) => { - let Some(inner) = cap.inner() else { return }; - self.collect_reference_diagnostics(&inner); - } - Expr::QuantifiedExpr(q) => { - let Some(inner) = q.inner() else { return }; - self.collect_reference_diagnostics(&inner); - } - Expr::FieldExpr(f) => { - let Some(value) = f.value() else { return }; - self.collect_reference_diagnostics(&value); - } - Expr::AnonymousNode(_) => {} + if let Expr::Ref(r) = expr { + self.check_ref_diagnostic(r); + } + + for child in expr.children() { + self.collect_reference_diagnostics(&child); } }