From fdd40bc12c2a67bfea4829b0bf854d2e8c4bb2e8 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Thu, 4 Dec 2025 21:42:58 -0300 Subject: [PATCH 01/10] feat: Static analyzer for node types --- Cargo.lock | 37 + Cargo.toml | 2 +- .../plotnik-cli/src/commands/debug/source.rs | 6 +- crates/plotnik-cli/src/commands/langs.rs | 16 +- crates/plotnik-core/Cargo.toml | 11 + crates/plotnik-core/src/lib.rs | 743 ++++++++++++++++++ crates/plotnik-langs/Cargo.toml | 53 +- crates/plotnik-langs/src/lib.rs | 473 ++++++++++- crates/plotnik-macros/Cargo.toml | 58 +- crates/plotnik-macros/src/lib.rs | 251 +++++- 10 files changed, 1564 insertions(+), 86 deletions(-) create mode 100644 crates/plotnik-core/Cargo.toml create mode 100644 crates/plotnik-core/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 855a829c..a4610751 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -348,12 +348,21 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "plotnik-core" +version = "0.1.0" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "plotnik-langs" version = "0.1.0" dependencies = [ "cargo_metadata", "paste", + "plotnik-core", "plotnik-macros", "tree-sitter", "tree-sitter-bash", @@ -402,9 +411,37 @@ dependencies = [ name = "plotnik-macros" version = "0.1.0" dependencies = [ + "plotnik-core", "proc-macro2", "quote", + "serde_json", "syn", + "tree-sitter", + "tree-sitter-bash", + "tree-sitter-c", + "tree-sitter-c-sharp", + "tree-sitter-cpp", + "tree-sitter-css", + "tree-sitter-elixir", + "tree-sitter-go", + "tree-sitter-haskell", + "tree-sitter-hcl", + "tree-sitter-html", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-json", + "tree-sitter-kotlin-sg", + "tree-sitter-lua", + "tree-sitter-nix", + "tree-sitter-php", + "tree-sitter-python", + "tree-sitter-ruby", + "tree-sitter-rust", + "tree-sitter-scala", + "tree-sitter-solidity", + "tree-sitter-swift", + "tree-sitter-typescript", + "tree-sitter-yaml", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index e73f5346..fe776c4f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,4 +2,4 @@ resolver = "2" -members = ["crates/plotnik-cli", "crates/plotnik-lib", "crates/plotnik-langs", "crates/plotnik-macros"] \ No newline at end of file +members = ["crates/plotnik-cli", "crates/plotnik-lib", "crates/plotnik-langs", "crates/plotnik-macros", "crates/plotnik-core"] diff --git a/crates/plotnik-cli/src/commands/debug/source.rs b/crates/plotnik-cli/src/commands/debug/source.rs index aae76034..6fc8bfac 100644 --- a/crates/plotnik-cli/src/commands/debug/source.rs +++ b/crates/plotnik-cli/src/commands/debug/source.rs @@ -25,7 +25,7 @@ pub fn resolve_lang( lang: &Option, _source_text: &Option, source_file: &Option, -) -> &'static Lang { +) -> &'static dyn Lang { if let Some(name) = lang { return plotnik_langs::from_name(name).unwrap_or_else(|| { eprintln!("error: unknown language: {}", name); @@ -50,10 +50,10 @@ pub fn resolve_lang( std::process::exit(1); } -pub fn parse_tree(source: &str, lang: &Lang) -> tree_sitter::Tree { +pub fn parse_tree(source: &str, lang: &dyn Lang) -> tree_sitter::Tree { let mut parser = tree_sitter::Parser::new(); parser - .set_language(&lang.ts_lang) + .set_language(lang.get_inner()) .expect("failed to set language"); parser.parse(source, None).expect("failed to parse source") } diff --git a/crates/plotnik-cli/src/commands/langs.rs b/crates/plotnik-cli/src/commands/langs.rs index daef71cc..4b8da80e 100644 --- a/crates/plotnik-cli/src/commands/langs.rs +++ b/crates/plotnik-cli/src/commands/langs.rs @@ -2,15 +2,17 @@ pub fn run() { let langs = plotnik_langs::all(); println!("Supported languages ({}):", langs.len()); for lang in langs { - println!(" {}", lang.name); + println!(" {}", lang.name()); } } #[cfg(test)] mod tests { - fn smoke_test(lang: &plotnik_langs::Lang, source: &str, expected_root: &str) { + use plotnik_langs::Lang; + + fn smoke_test(lang: &dyn Lang, source: &str, expected_root: &str) { let mut parser = tree_sitter::Parser::new(); - parser.set_language(&lang.ts_lang).unwrap(); + parser.set_language(lang.get_inner()).unwrap(); let tree = parser.parse(source, None).unwrap(); let root = tree.root_node(); assert_eq!(root.kind(), expected_root); @@ -204,9 +206,9 @@ mod tests { #[test] #[cfg(feature = "javascript")] fn lang_from_name() { - assert_eq!(plotnik_langs::from_name("js").unwrap().name, "javascript"); + assert_eq!(plotnik_langs::from_name("js").unwrap().name(), "javascript"); assert_eq!( - plotnik_langs::from_name("JavaScript").unwrap().name, + plotnik_langs::from_name("JavaScript").unwrap().name(), "javascript" ); assert!(plotnik_langs::from_name("unknown").is_none()); @@ -215,7 +217,7 @@ mod tests { #[test] #[cfg(feature = "javascript")] fn lang_from_extension() { - assert_eq!(plotnik_langs::from_ext("js").unwrap().name, "javascript"); - assert_eq!(plotnik_langs::from_ext("mjs").unwrap().name, "javascript"); + assert_eq!(plotnik_langs::from_ext("js").unwrap().name(), "javascript"); + assert_eq!(plotnik_langs::from_ext("mjs").unwrap().name(), "javascript"); } } diff --git a/crates/plotnik-core/Cargo.toml b/crates/plotnik-core/Cargo.toml new file mode 100644 index 00000000..43cf6ca5 --- /dev/null +++ b/crates/plotnik-core/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "plotnik-core" +version = "0.1.0" +edition = "2024" +license = "MIT" +description = "Core data structures for Plotnik" +repository = "https://github.com/plotnik-lang/plotnik" + +[dependencies] +serde = { version = "1", features = ["derive"] } +serde_json = "1" \ No newline at end of file diff --git a/crates/plotnik-core/src/lib.rs b/crates/plotnik-core/src/lib.rs new file mode 100644 index 00000000..fa8dfb7e --- /dev/null +++ b/crates/plotnik-core/src/lib.rs @@ -0,0 +1,743 @@ +//! Core data structures for Plotnik node type information. +//! +//! Two layers: +//! - **Deserialization layer**: 1:1 mapping to `node-types.json` +//! - **Analysis layer**: ID-indexed structures for efficient lookups +//! +//! Two implementations: +//! - **Dynamic** (`DynamicNodeTypes`): HashMap-based, for runtime construction +//! - **Static** (`StaticNodeTypes`): Array-based, zero runtime init + +use std::collections::HashMap; +use std::num::NonZeroU16; + +// ============================================================================ +// Deserialization Layer +// ============================================================================ + +/// Raw node definition from `node-types.json`. +#[derive(Debug, Clone, serde::Deserialize)] +pub struct RawNode { + #[serde(rename = "type")] + pub type_name: String, + pub named: bool, + #[serde(default)] + pub root: bool, + #[serde(default)] + pub extra: bool, + #[serde(default)] + pub fields: HashMap, + pub children: Option, + pub subtypes: Option>, +} + +/// Cardinality constraints for a field or children slot. +#[derive(Debug, Clone, serde::Deserialize)] +pub struct RawCardinality { + pub multiple: bool, + pub required: bool, + pub types: Vec, +} + +/// Reference to a node type. +#[derive(Debug, Clone, serde::Deserialize)] +pub struct RawTypeRef { + #[serde(rename = "type")] + pub type_name: String, + pub named: bool, +} + +/// Parse `node-types.json` content into raw nodes. +pub fn parse_node_types(json: &str) -> Result, serde_json::Error> { + serde_json::from_str(json) +} + +// ============================================================================ +// Common Types +// ============================================================================ + +/// Node type ID (tree-sitter uses u16). +pub type NodeTypeId = u16; + +/// Field ID (tree-sitter uses NonZeroU16). +pub type NodeFieldId = NonZeroU16; + +/// Cardinality info for a field or children slot. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Cardinality { + pub multiple: bool, + pub required: bool, +} + +// ============================================================================ +// NodeTypes Trait +// ============================================================================ + +/// Trait for node type constraint lookups. +/// +/// Provides only what tree-sitter's `Language` API doesn't: +/// - Root node identification +/// - Extra nodes (comments, whitespace) +/// - Field constraints per node type +/// - Children constraints per node type +/// +/// For name↔ID resolution and supertype info, use `Language` directly. +pub trait NodeTypes { + fn root(&self) -> Option; + fn is_extra(&self, id: NodeTypeId) -> bool; + + fn has_field(&self, node: NodeTypeId, field: NodeFieldId) -> bool; + fn field_cardinality(&self, node: NodeTypeId, field: NodeFieldId) -> Option; + fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &[NodeTypeId]; + fn is_valid_field_type(&self, node: NodeTypeId, field: NodeFieldId, child: NodeTypeId) -> bool; + + fn children_cardinality(&self, node: NodeTypeId) -> Option; + fn valid_child_types(&self, node: NodeTypeId) -> &[NodeTypeId]; + fn is_valid_child_type(&self, node: NodeTypeId, child: NodeTypeId) -> bool; +} + +// ============================================================================ +// Static Analysis Layer (zero runtime init) +// ============================================================================ + +/// Field info for static storage. +#[derive(Debug, Clone, Copy)] +pub struct StaticFieldInfo { + pub cardinality: Cardinality, + pub valid_types: &'static [NodeTypeId], +} + +/// Children info for static storage. +#[derive(Debug, Clone, Copy)] +pub struct StaticChildrenInfo { + pub cardinality: Cardinality, + pub valid_types: &'static [NodeTypeId], +} + +/// Complete node type information for static storage. +/// +/// Note: supertype/subtype info is NOT stored here - use `Language::node_kind_is_supertype()` +/// and `Language::subtypes_for_supertype()` from tree-sitter instead. +#[derive(Debug, Clone, Copy)] +pub struct StaticNodeTypeInfo { + pub name: &'static str, + pub named: bool, + /// Sorted slice of (field_id, field_info) pairs for binary search. + pub fields: &'static [(NodeFieldId, StaticFieldInfo)], + pub children: Option, +} + +/// Compiled node type database with static storage. +/// +/// All data is statically allocated - no runtime initialization needed. +/// Node lookups use binary search on sorted arrays. +#[derive(Debug, Clone, Copy)] +pub struct StaticNodeTypes { + /// Sorted slice of (node_id, node_info) pairs. + nodes: &'static [(NodeTypeId, StaticNodeTypeInfo)], + /// Slice of extra node type IDs. + extras: &'static [NodeTypeId], + root: Option, +} + +impl StaticNodeTypes { + pub const fn new( + nodes: &'static [(NodeTypeId, StaticNodeTypeInfo)], + extras: &'static [NodeTypeId], + root: Option, + ) -> Self { + Self { + nodes, + extras, + root, + } + } + + /// Get info for a node type by ID (binary search). + pub fn get(&self, id: NodeTypeId) -> Option<&'static StaticNodeTypeInfo> { + self.nodes + .binary_search_by_key(&id, |(node_id, _)| *node_id) + .ok() + .map(|idx| &self.nodes[idx].1) + } + + /// Check if node type exists. + pub fn contains(&self, id: NodeTypeId) -> bool { + self.nodes + .binary_search_by_key(&id, |(node_id, _)| *node_id) + .is_ok() + } + + /// Get field info for a node type (binary search for node, then field). + pub fn field( + &self, + node_id: NodeTypeId, + field_id: NodeFieldId, + ) -> Option<&'static StaticFieldInfo> { + let info = self.get(node_id)?; + info.fields + .binary_search_by_key(&field_id, |(fid, _)| *fid) + .ok() + .map(|idx| &info.fields[idx].1) + } + + /// Get children info for a node type. + pub fn children(&self, node_id: NodeTypeId) -> Option { + self.get(node_id)?.children + } + + /// Get all extra node type IDs. + pub fn extras(&self) -> &'static [NodeTypeId] { + self.extras + } + + pub fn len(&self) -> usize { + self.nodes.len() + } + + pub fn is_empty(&self) -> bool { + self.nodes.is_empty() + } + + pub fn iter(&self) -> impl Iterator { + self.nodes.iter().map(|(id, info)| (*id, info)) + } +} + +impl NodeTypes for StaticNodeTypes { + fn root(&self) -> Option { + self.root + } + + fn is_extra(&self, id: NodeTypeId) -> bool { + self.extras.contains(&id) + } + + fn has_field(&self, node: NodeTypeId, field: NodeFieldId) -> bool { + self.get(node).is_some_and(|info| { + info.fields + .binary_search_by_key(&field, |(fid, _)| *fid) + .is_ok() + }) + } + + fn field_cardinality(&self, node: NodeTypeId, field: NodeFieldId) -> Option { + self.field(node, field).map(|f| f.cardinality) + } + + fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &[NodeTypeId] { + self.field(node, field) + .map(|f| f.valid_types) + .unwrap_or(&[]) + } + + fn is_valid_field_type(&self, node: NodeTypeId, field: NodeFieldId, child: NodeTypeId) -> bool { + self.valid_field_types(node, field).contains(&child) + } + + fn children_cardinality(&self, node: NodeTypeId) -> Option { + self.children(node).map(|c| c.cardinality) + } + + fn valid_child_types(&self, node: NodeTypeId) -> &[NodeTypeId] { + self.children(node).map(|c| c.valid_types).unwrap_or(&[]) + } + + fn is_valid_child_type(&self, node: NodeTypeId, child: NodeTypeId) -> bool { + self.valid_child_types(node).contains(&child) + } +} + +// ============================================================================ +// Dynamic Analysis Layer (runtime construction) +// ============================================================================ + +/// Information about a single field on a node type. +#[derive(Debug, Clone)] +pub struct FieldInfo { + pub cardinality: Cardinality, + pub valid_types: Vec, +} + +/// Information about a node type's children (non-field children). +#[derive(Debug, Clone)] +pub struct ChildrenInfo { + pub cardinality: Cardinality, + pub valid_types: Vec, +} + +/// Complete node type information. +/// +/// Note: supertype/subtype info is NOT stored here - use tree-sitter's Language API. +#[derive(Debug, Clone)] +pub struct NodeTypeInfo { + pub name: String, + pub named: bool, + pub fields: HashMap, + pub children: Option, +} + +/// Compiled node type database for a language (dynamic/heap-allocated). +/// +/// Use this for runtime construction or as reference implementation. +/// For zero-init static data, use `StaticNodeTypes`. +#[derive(Debug, Clone)] +pub struct DynamicNodeTypes { + nodes: HashMap, + extras: Vec, + root: Option, +} + +impl DynamicNodeTypes { + pub fn from_raw( + nodes: HashMap, + extras: Vec, + root: Option, + ) -> Self { + Self { + nodes, + extras, + root, + } + } + + /// Build from raw nodes and ID resolution functions. + pub fn build(raw_nodes: &[RawNode], node_id_for_name: F, field_id_for_name: G) -> Self + where + F: Fn(&str, bool) -> Option, + G: Fn(&str) -> Option, + { + let mut nodes = HashMap::new(); + let mut extras = Vec::new(); + let mut root = None; + + for raw in raw_nodes { + let Some(node_id) = node_id_for_name(&raw.type_name, raw.named) else { + continue; + }; + + if raw.root { + root = Some(node_id); + } + + if raw.extra { + extras.push(node_id); + } + + let mut fields = HashMap::new(); + for (field_name, raw_card) in &raw.fields { + let Some(field_id) = field_id_for_name(field_name) else { + continue; + }; + + let valid_types = raw_card + .types + .iter() + .filter_map(|t| node_id_for_name(&t.type_name, t.named)) + .collect(); + + fields.insert( + field_id, + FieldInfo { + cardinality: Cardinality { + multiple: raw_card.multiple, + required: raw_card.required, + }, + valid_types, + }, + ); + } + + let children = raw.children.as_ref().map(|raw_card| { + let valid_types = raw_card + .types + .iter() + .filter_map(|t| node_id_for_name(&t.type_name, t.named)) + .collect(); + + ChildrenInfo { + cardinality: Cardinality { + multiple: raw_card.multiple, + required: raw_card.required, + }, + valid_types, + } + }); + + nodes.insert( + node_id, + NodeTypeInfo { + name: raw.type_name.clone(), + named: raw.named, + fields, + children, + }, + ); + } + + Self { + nodes, + extras, + root, + } + } + + pub fn get(&self, id: NodeTypeId) -> Option<&NodeTypeInfo> { + self.nodes.get(&id) + } + + pub fn contains(&self, id: NodeTypeId) -> bool { + self.nodes.contains_key(&id) + } + + pub fn field(&self, node_id: NodeTypeId, field_id: NodeFieldId) -> Option<&FieldInfo> { + self.nodes.get(&node_id)?.fields.get(&field_id) + } + + pub fn children(&self, node_id: NodeTypeId) -> Option<&ChildrenInfo> { + self.nodes.get(&node_id)?.children.as_ref() + } + + pub fn extras(&self) -> &[NodeTypeId] { + &self.extras + } + + pub fn len(&self) -> usize { + self.nodes.len() + } + + pub fn is_empty(&self) -> bool { + self.nodes.is_empty() + } + + pub fn iter(&self) -> impl Iterator { + self.nodes.iter().map(|(&id, info)| (id, info)) + } + + /// Get sorted vec of all node IDs (for conversion to static). + pub fn sorted_node_ids(&self) -> Vec { + let mut ids: Vec<_> = self.nodes.keys().copied().collect(); + ids.sort_unstable(); + ids + } + + /// Get sorted vec of extra IDs (for conversion to static). + pub fn sorted_extras(&self) -> Vec { + let mut ids = self.extras.clone(); + ids.sort_unstable(); + ids + } +} + +impl NodeTypes for DynamicNodeTypes { + fn root(&self) -> Option { + self.root + } + + fn is_extra(&self, id: NodeTypeId) -> bool { + self.extras.contains(&id) + } + + fn has_field(&self, node: NodeTypeId, field: NodeFieldId) -> bool { + self.nodes + .get(&node) + .is_some_and(|n| n.fields.contains_key(&field)) + } + + fn field_cardinality(&self, node: NodeTypeId, field: NodeFieldId) -> Option { + self.field(node, field).map(|f| f.cardinality) + } + + fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &[NodeTypeId] { + self.field(node, field) + .map(|f| f.valid_types.as_slice()) + .unwrap_or(&[]) + } + + fn is_valid_field_type(&self, node: NodeTypeId, field: NodeFieldId, child: NodeTypeId) -> bool { + self.valid_field_types(node, field).contains(&child) + } + + fn children_cardinality(&self, node: NodeTypeId) -> Option { + self.children(node).map(|c| c.cardinality) + } + + fn valid_child_types(&self, node: NodeTypeId) -> &[NodeTypeId] { + self.children(node) + .map(|c| c.valid_types.as_slice()) + .unwrap_or(&[]) + } + + fn is_valid_child_type(&self, node: NodeTypeId, child: NodeTypeId) -> bool { + self.valid_child_types(node).contains(&child) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const SAMPLE_JSON: &str = r#"[ + { + "type": "expression", + "named": true, + "subtypes": [ + {"type": "identifier", "named": true}, + {"type": "number", "named": true} + ] + }, + { + "type": "function_declaration", + "named": true, + "fields": { + "name": { + "multiple": false, + "required": true, + "types": [{"type": "identifier", "named": true}] + }, + "body": { + "multiple": false, + "required": true, + "types": [{"type": "block", "named": true}] + } + } + }, + { + "type": "program", + "named": true, + "root": true, + "fields": {}, + "children": { + "multiple": true, + "required": false, + "types": [{"type": "statement", "named": true}] + } + }, + { + "type": "comment", + "named": true, + "extra": true + }, + { + "type": "identifier", + "named": true + }, + { + "type": "+", + "named": false + } + ]"#; + + #[test] + fn parse_raw_nodes() { + let nodes = parse_node_types(SAMPLE_JSON).unwrap(); + assert_eq!(nodes.len(), 6); + + let expr = nodes.iter().find(|n| n.type_name == "expression").unwrap(); + assert!(expr.named); + assert!(expr.subtypes.is_some()); + assert_eq!(expr.subtypes.as_ref().unwrap().len(), 2); + + let func = nodes + .iter() + .find(|n| n.type_name == "function_declaration") + .unwrap(); + assert!(func.fields.contains_key("name")); + assert!(func.fields.contains_key("body")); + + let plus = nodes.iter().find(|n| n.type_name == "+").unwrap(); + assert!(!plus.named); + } + + #[test] + fn build_dynamic_node_types() { + let raw = parse_node_types(SAMPLE_JSON).unwrap(); + + let node_ids: HashMap<(&str, bool), NodeTypeId> = [ + (("expression", true), 1), + (("function_declaration", true), 2), + (("program", true), 3), + (("comment", true), 4), + (("identifier", true), 5), + (("+", false), 6), + (("block", true), 7), + (("statement", true), 8), + (("number", true), 9), + ] + .into_iter() + .collect(); + + let field_ids: HashMap<&str, NodeFieldId> = [ + ("name", NonZeroU16::new(1).unwrap()), + ("body", NonZeroU16::new(2).unwrap()), + ] + .into_iter() + .collect(); + + let node_types = DynamicNodeTypes::build( + &raw, + |name, named| node_ids.get(&(name, named)).copied(), + |name| field_ids.get(name).copied(), + ); + + assert_eq!(node_types.len(), 6); + + // Test via trait + assert_eq!(node_types.root(), Some(3)); + assert!(node_types.is_extra(4)); + assert!(!node_types.is_extra(5)); + assert!(node_types.has_field(2, NonZeroU16::new(1).unwrap())); + assert!(node_types.has_field(2, NonZeroU16::new(2).unwrap())); + assert!(!node_types.has_field(2, NonZeroU16::new(99).unwrap())); + assert!(node_types.is_valid_field_type(2, NonZeroU16::new(1).unwrap(), 5)); + assert!(!node_types.is_valid_field_type(2, NonZeroU16::new(1).unwrap(), 7)); + } + + // Static tests using manually constructed data + static TEST_VALID_TYPES_ID: [NodeTypeId; 1] = [5]; // identifier + static TEST_VALID_TYPES_BLOCK: [NodeTypeId; 1] = [7]; // block + static TEST_CHILDREN_TYPES: [NodeTypeId; 1] = [8]; // statement + + static TEST_FIELDS: [(NodeFieldId, StaticFieldInfo); 2] = [ + ( + NonZeroU16::new(1).unwrap(), + StaticFieldInfo { + cardinality: Cardinality { + multiple: false, + required: true, + }, + valid_types: &TEST_VALID_TYPES_ID, + }, + ), + ( + NonZeroU16::new(2).unwrap(), + StaticFieldInfo { + cardinality: Cardinality { + multiple: false, + required: true, + }, + valid_types: &TEST_VALID_TYPES_BLOCK, + }, + ), + ]; + + static TEST_NODES: [(NodeTypeId, StaticNodeTypeInfo); 4] = [ + ( + 1, + StaticNodeTypeInfo { + name: "expression", + named: true, + fields: &[], + children: None, + }, + ), + ( + 2, + StaticNodeTypeInfo { + name: "function_declaration", + named: true, + fields: &TEST_FIELDS, + children: None, + }, + ), + ( + 3, + StaticNodeTypeInfo { + name: "program", + named: true, + fields: &[], + children: Some(StaticChildrenInfo { + cardinality: Cardinality { + multiple: true, + required: false, + }, + valid_types: &TEST_CHILDREN_TYPES, + }), + }, + ), + ( + 4, + StaticNodeTypeInfo { + name: "comment", + named: true, + fields: &[], + children: None, + }, + ), + ]; + + static TEST_EXTRAS: [NodeTypeId; 1] = [4]; + + static TEST_STATIC_NODE_TYPES: StaticNodeTypes = + StaticNodeTypes::new(&TEST_NODES, &TEST_EXTRAS, Some(3)); + + #[test] + fn static_node_types_get() { + let info = TEST_STATIC_NODE_TYPES.get(2).unwrap(); + assert_eq!(info.name, "function_declaration"); + assert!(info.named); + + assert!(TEST_STATIC_NODE_TYPES.get(99).is_none()); + } + + #[test] + fn static_node_types_contains() { + assert!(TEST_STATIC_NODE_TYPES.contains(1)); + assert!(TEST_STATIC_NODE_TYPES.contains(2)); + assert!(!TEST_STATIC_NODE_TYPES.contains(99)); + } + + #[test] + fn static_node_types_trait() { + // Test via trait methods + assert_eq!(TEST_STATIC_NODE_TYPES.root(), Some(3)); + assert!(TEST_STATIC_NODE_TYPES.is_extra(4)); + assert!(!TEST_STATIC_NODE_TYPES.is_extra(1)); + + assert!(TEST_STATIC_NODE_TYPES.has_field(2, NonZeroU16::new(1).unwrap())); + assert!(TEST_STATIC_NODE_TYPES.has_field(2, NonZeroU16::new(2).unwrap())); + assert!(!TEST_STATIC_NODE_TYPES.has_field(2, NonZeroU16::new(99).unwrap())); + assert!(!TEST_STATIC_NODE_TYPES.has_field(1, NonZeroU16::new(1).unwrap())); + + assert!(TEST_STATIC_NODE_TYPES.is_valid_field_type(2, NonZeroU16::new(1).unwrap(), 5)); + assert!(!TEST_STATIC_NODE_TYPES.is_valid_field_type(2, NonZeroU16::new(1).unwrap(), 7)); + assert!(TEST_STATIC_NODE_TYPES.is_valid_field_type(2, NonZeroU16::new(2).unwrap(), 7)); + + let field_types = TEST_STATIC_NODE_TYPES.valid_field_types(2, NonZeroU16::new(1).unwrap()); + assert_eq!(field_types, &[5]); + + let card = TEST_STATIC_NODE_TYPES + .field_cardinality(2, NonZeroU16::new(1).unwrap()) + .unwrap(); + assert!(!card.multiple); + assert!(card.required); + } + + #[test] + fn static_node_types_children() { + let card = TEST_STATIC_NODE_TYPES.children_cardinality(3).unwrap(); + assert!(card.multiple); + assert!(!card.required); + + let child_types = TEST_STATIC_NODE_TYPES.valid_child_types(3); + assert_eq!(child_types, &[8]); + + assert!(TEST_STATIC_NODE_TYPES.is_valid_child_type(3, 8)); + assert!(!TEST_STATIC_NODE_TYPES.is_valid_child_type(3, 5)); + + assert!(TEST_STATIC_NODE_TYPES.children_cardinality(1).is_none()); + assert!(TEST_STATIC_NODE_TYPES.valid_child_types(1).is_empty()); + } + + #[test] + fn static_node_types_len() { + assert_eq!(TEST_STATIC_NODE_TYPES.len(), 4); + assert!(!TEST_STATIC_NODE_TYPES.is_empty()); + } + + #[test] + fn static_node_types_iter() { + let ids: Vec<_> = TEST_STATIC_NODE_TYPES.iter().map(|(id, _)| id).collect(); + assert_eq!(ids, vec![1, 2, 3, 4]); + } +} diff --git a/crates/plotnik-langs/Cargo.toml b/crates/plotnik-langs/Cargo.toml index 00a6be7e..4cdd2258 100644 --- a/crates/plotnik-langs/Cargo.toml +++ b/crates/plotnik-langs/Cargo.toml @@ -38,34 +38,35 @@ default = [ "typescript", "yaml", ] -bash = ["dep:tree-sitter-bash"] -c = ["dep:tree-sitter-c"] -cpp = ["dep:tree-sitter-cpp"] -csharp = ["dep:tree-sitter-c-sharp"] -css = ["dep:tree-sitter-css"] -elixir = ["dep:tree-sitter-elixir"] -go = ["dep:tree-sitter-go"] -haskell = ["dep:tree-sitter-haskell"] -hcl = ["dep:tree-sitter-hcl"] -html = ["dep:tree-sitter-html"] -java = ["dep:tree-sitter-java"] -javascript = ["dep:tree-sitter-javascript"] -json = ["dep:tree-sitter-json"] -kotlin = ["dep:tree-sitter-kotlin"] -lua = ["dep:tree-sitter-lua"] -nix = ["dep:tree-sitter-nix"] -php = ["dep:tree-sitter-php"] -python = ["dep:tree-sitter-python"] -ruby = ["dep:tree-sitter-ruby"] -rust = ["dep:tree-sitter-rust"] -scala = ["dep:tree-sitter-scala"] -solidity = ["dep:tree-sitter-solidity"] -swift = ["dep:tree-sitter-swift"] -typescript = ["dep:tree-sitter-typescript"] -yaml = ["dep:tree-sitter-yaml"] +bash = ["dep:tree-sitter-bash", "plotnik-macros/bash"] +c = ["dep:tree-sitter-c", "plotnik-macros/c"] +cpp = ["dep:tree-sitter-cpp", "plotnik-macros/cpp"] +csharp = ["dep:tree-sitter-c-sharp", "plotnik-macros/csharp"] +css = ["dep:tree-sitter-css", "plotnik-macros/css"] +elixir = ["dep:tree-sitter-elixir", "plotnik-macros/elixir"] +go = ["dep:tree-sitter-go", "plotnik-macros/go"] +haskell = ["dep:tree-sitter-haskell", "plotnik-macros/haskell"] +hcl = ["dep:tree-sitter-hcl", "plotnik-macros/hcl"] +html = ["dep:tree-sitter-html", "plotnik-macros/html"] +java = ["dep:tree-sitter-java", "plotnik-macros/java"] +javascript = ["dep:tree-sitter-javascript", "plotnik-macros/javascript"] +json = ["dep:tree-sitter-json", "plotnik-macros/json"] +kotlin = ["dep:tree-sitter-kotlin", "plotnik-macros/kotlin"] +lua = ["dep:tree-sitter-lua", "plotnik-macros/lua"] +nix = ["dep:tree-sitter-nix", "plotnik-macros/nix"] +php = ["dep:tree-sitter-php", "plotnik-macros/php"] +python = ["dep:tree-sitter-python", "plotnik-macros/python"] +ruby = ["dep:tree-sitter-ruby", "plotnik-macros/ruby"] +rust = ["dep:tree-sitter-rust", "plotnik-macros/rust"] +scala = ["dep:tree-sitter-scala", "plotnik-macros/scala"] +solidity = ["dep:tree-sitter-solidity", "plotnik-macros/solidity"] +swift = ["dep:tree-sitter-swift", "plotnik-macros/swift"] +typescript = ["dep:tree-sitter-typescript", "plotnik-macros/typescript"] +yaml = ["dep:tree-sitter-yaml", "plotnik-macros/yaml"] [dependencies] paste = "1.0" +plotnik-core = { version = "0.1.0", path = "../plotnik-core" } plotnik-macros = { version = "0.1.0", path = "../plotnik-macros" } tree-sitter = "0.25" tree-sitter-bash = { version = "0.25.0", optional = true } @@ -97,4 +98,4 @@ tree-sitter-yaml = { version = "0.7.0", optional = true } [build-dependencies] cargo_metadata = "0.23" -[dev-dependencies] +[dev-dependencies] \ No newline at end of file diff --git a/crates/plotnik-langs/src/lib.rs b/crates/plotnik-langs/src/lib.rs index 167e6a2d..a096ee1e 100644 --- a/crates/plotnik-langs/src/lib.rs +++ b/crates/plotnik-langs/src/lib.rs @@ -1,11 +1,219 @@ use std::sync::LazyLock; + use tree_sitter::Language; -#[derive(Debug, Clone)] -pub struct Lang { +pub use plotnik_core::{Cardinality, NodeFieldId, NodeTypeId, NodeTypes, StaticNodeTypes}; + +/// Trait providing a unified facade for tree-sitter's Language API +/// combined with our node type constraints. +/// +/// Methods that return Option types handle resolution failures gracefully. +pub trait Lang: Send + Sync { + fn name(&self) -> &str; + + /// Raw tree-sitter Language. You probably don't need this. + fn get_inner(&self) -> &Language; + + // ═══════════════════════════════════════════════════════════════════════ + // Resolution [Language API] + // ═══════════════════════════════════════════════════════════════════════ + + fn resolve_node(&self, kind: &str, named: bool) -> Option; + fn resolve_field(&self, name: &str) -> Option; + + // ═══════════════════════════════════════════════════════════════════════ + // Supertype info [Language API] + // ═══════════════════════════════════════════════════════════════════════ + + fn is_supertype(&self, id: Option) -> bool; + fn subtypes(&self, supertype: Option) -> &[u16]; + + // ═══════════════════════════════════════════════════════════════════════ + // Root & Extras [node_types] + // ═══════════════════════════════════════════════════════════════════════ + + fn root(&self) -> Option; + fn is_extra(&self, id: Option) -> bool; + + // ═══════════════════════════════════════════════════════════════════════ + // Field constraints [node_types] + // ═══════════════════════════════════════════════════════════════════════ + + fn has_field(&self, node: Option, field: Option) -> bool; + fn field_cardinality( + &self, + node: Option, + field: Option, + ) -> Option; + fn valid_field_types( + &self, + node: Option, + field: Option, + ) -> &'static [u16]; + fn is_valid_field_type( + &self, + node: Option, + field: Option, + child: Option, + ) -> bool; + + // ═══════════════════════════════════════════════════════════════════════ + // Children constraints [node_types] + // ═══════════════════════════════════════════════════════════════════════ + + fn children_cardinality(&self, node: Option) -> Option; + fn valid_child_types(&self, node: Option) -> &'static [u16]; + fn is_valid_child_type(&self, node: Option, child: Option) -> bool; +} + +/// Static implementation of `Lang` with compile-time generated node types. +#[derive(Debug)] +pub struct StaticLang { pub name: &'static str, - pub ts_lang: Language, - pub node_types_size: usize, + inner: Language, + node_types: &'static StaticNodeTypes, +} + +impl StaticLang { + pub const fn new( + name: &'static str, + inner: Language, + node_types: &'static StaticNodeTypes, + ) -> Self { + Self { + name, + inner, + node_types, + } + } + + pub fn node_types(&self) -> &'static StaticNodeTypes { + self.node_types + } +} + +impl Lang for StaticLang { + fn name(&self) -> &str { + self.name + } + + fn get_inner(&self) -> &Language { + &self.inner + } + + fn resolve_node(&self, kind: &str, named: bool) -> Option { + let id = self.inner.id_for_node_kind(kind, named); + + // FIX: Disambiguate tree-sitter's ID 0 (could be "end" node or "not found") + // + // Tree-sitter's id_for_node_kind has odd semantics: + // - Returns 0 for "not found" + // - BUT: ID 0 is also a valid ID for the anonymous "end" sentinel node + // + // This creates an ambiguity for anonymous nodes: + // - id_for_node_kind("end", false) -> 0 (valid) + // - id_for_node_kind("fake", false) -> 0 (not found) + // + // For named nodes, 0 is unambiguous since no named node has ID 0. + // For anonymous nodes, we must verify via reverse lookup. + if id == 0 { + if named { + // Named node with ID 0 = definitely not found + None + } else { + // Anonymous node with ID 0 = could be "end" or not found + // Check via reverse lookup + if self.inner.node_kind_for_id(0) == Some(kind) { + Some(0) // It's the "end" node + } else { + None // Not found + } + } + } else { + Some(id) + } + } + + fn resolve_field(&self, name: &str) -> Option { + self.inner.field_id_for_name(name) + } + + fn is_supertype(&self, id: Option) -> bool { + let Some(raw) = id else { return false }; + self.inner.node_kind_is_supertype(raw) + } + + fn subtypes(&self, supertype: Option) -> &[u16] { + let Some(raw) = supertype else { + return &[]; + }; + self.inner.subtypes_for_supertype(raw) + } + + fn root(&self) -> Option { + self.node_types.root() + } + + fn is_extra(&self, id: Option) -> bool { + let Some(id) = id else { return false }; + self.node_types.is_extra(id) + } + + fn has_field(&self, node: Option, field: Option) -> bool { + let (Some(n), Some(f)) = (node, field) else { + return false; + }; + self.node_types.has_field(n, f) + } + + fn field_cardinality( + &self, + node: Option, + field: Option, + ) -> Option { + let (n, f) = (node?, field?); + self.node_types.field_cardinality(n, f) + } + + fn valid_field_types( + &self, + node: Option, + field: Option, + ) -> &'static [u16] { + let (Some(n), Some(f)) = (node, field) else { + return &[]; + }; + self.node_types.valid_field_types(n, f) + } + + fn is_valid_field_type( + &self, + node: Option, + field: Option, + child: Option, + ) -> bool { + let (Some(n), Some(f), Some(c)) = (node, field, child) else { + return false; + }; + self.node_types.is_valid_field_type(n, f, c) + } + + fn children_cardinality(&self, node: Option) -> Option { + let n = node?; + self.node_types.children_cardinality(n) + } + + fn valid_child_types(&self, node: Option) -> &'static [u16] { + let Some(n) = node else { return &[] }; + self.node_types.valid_child_types(n) + } + + fn is_valid_child_type(&self, node: Option, child: Option) -> bool { + let (Some(n), Some(c)) = (node, child) else { + return false; + }; + self.node_types.is_valid_child_type(n, c) + } } macro_rules! define_langs { @@ -21,28 +229,30 @@ macro_rules! define_langs { } ),* $(,)? ) => { - // Generate node_types_size constants via proc macro + // Generate NodeTypes statics via proc macro $( #[cfg(feature = $feature)] - plotnik_macros::generate_node_types_size!($node_types_key); + plotnik_macros::generate_node_types!($node_types_key); )* - // Generate lazy accessor functions + // Generate static Lang definitions with LazyLock $( #[cfg(feature = $feature)] - pub fn $fn_name() -> &'static Lang { + pub fn $fn_name() -> &'static dyn Lang { paste::paste! { - static LANG: LazyLock = LazyLock::new(|| Lang { - name: $name, - ts_lang: $ts_lang.into(), - node_types_size: [<$node_types_key:upper _NODE_TYPES_SIZE>], + static LANG: LazyLock = LazyLock::new(|| { + StaticLang::new( + $name, + $ts_lang.into(), + &[<$node_types_key:upper _NODE_TYPES>], + ) }); } - &LANG + &*LANG } )* - pub fn from_name(s: &str) -> Option<&'static Lang> { + pub fn from_name(s: &str) -> Option<&'static dyn Lang> { match s.to_ascii_lowercase().as_str() { $( #[cfg(feature = $feature)] @@ -52,7 +262,7 @@ macro_rules! define_langs { } } - pub fn from_ext(ext: &str) -> Option<&'static Lang> { + pub fn from_ext(ext: &str) -> Option<&'static dyn Lang> { match ext.to_ascii_lowercase().as_str() { $( #[cfg(feature = $feature)] @@ -62,7 +272,7 @@ macro_rules! define_langs { } } - pub fn all() -> Vec<&'static Lang> { + pub fn all() -> Vec<&'static dyn Lang> { vec![ $( #[cfg(feature = $feature)] @@ -291,42 +501,33 @@ mod tests { #[test] #[cfg(feature = "javascript")] fn lang_from_name() { - assert_eq!(from_name("js").unwrap().name, "javascript"); - assert_eq!(from_name("JavaScript").unwrap().name, "javascript"); + assert_eq!(from_name("js").unwrap().name(), "javascript"); + assert_eq!(from_name("JavaScript").unwrap().name(), "javascript"); assert!(from_name("unknown").is_none()); } #[test] #[cfg(feature = "go")] fn lang_from_name_golang() { - assert_eq!(from_name("go").unwrap().name, "go"); - assert_eq!(from_name("golang").unwrap().name, "go"); - assert_eq!(from_name("GOLANG").unwrap().name, "go"); + assert_eq!(from_name("go").unwrap().name(), "go"); + assert_eq!(from_name("golang").unwrap().name(), "go"); + assert_eq!(from_name("GOLANG").unwrap().name(), "go"); } #[test] #[cfg(feature = "javascript")] fn lang_from_extension() { - assert_eq!(from_ext("js").unwrap().name, "javascript"); - assert_eq!(from_ext("mjs").unwrap().name, "javascript"); + assert_eq!(from_ext("js").unwrap().name(), "javascript"); + assert_eq!(from_ext("mjs").unwrap().name(), "javascript"); } #[test] #[cfg(feature = "typescript")] fn typescript_and_tsx() { - assert_eq!(typescript().name, "typescript"); - assert_eq!(tsx().name, "tsx"); - assert_eq!(from_ext("ts").unwrap().name, "typescript"); - assert_eq!(from_ext("tsx").unwrap().name, "tsx"); - } - - #[test] - #[cfg(feature = "javascript")] - fn node_types_size_matches_runtime() { - let runtime = std::fs::read_to_string(env!("PLOTNIK_NODE_TYPES_JAVASCRIPT")) - .unwrap() - .len(); - assert_eq!(javascript().node_types_size, runtime); + assert_eq!(typescript().name(), "typescript"); + assert_eq!(tsx().name(), "tsx"); + assert_eq!(from_ext("ts").unwrap().name(), "typescript"); + assert_eq!(from_ext("tsx").unwrap().name(), "tsx"); } #[test] @@ -334,7 +535,205 @@ mod tests { let langs = all(); assert!(!langs.is_empty()); for lang in &langs { - assert!(!lang.name.is_empty()); + assert!(!lang.name().is_empty()); + } + } + + #[test] + #[cfg(feature = "javascript")] + fn resolve_node_and_field() { + let lang = javascript(); + + let func_id = lang.resolve_node("function_declaration", true); + assert!(func_id.is_some()); + + let unknown = lang.resolve_node("nonexistent_node_type", true); + assert!(unknown.is_none()); + + let name_field = lang.resolve_field("name"); + assert!(name_field.is_some()); + + let unknown_field = lang.resolve_field("nonexistent_field"); + assert!(unknown_field.is_none()); + } + + #[test] + #[cfg(feature = "javascript")] + fn supertype_via_lang_trait() { + let lang = javascript(); + + let expr_id = lang.resolve_node("expression", true); + assert!(lang.is_supertype(expr_id)); + + let subtypes = lang.subtypes(expr_id); + assert!(!subtypes.is_empty()); + + let func_id = lang.resolve_node("function_declaration", true); + assert!(!lang.is_supertype(func_id)); + } + + #[test] + #[cfg(feature = "javascript")] + fn field_validation_via_trait() { + let lang = javascript(); + + let func_id = lang.resolve_node("function_declaration", true); + let name_field = lang.resolve_field("name"); + let body_field = lang.resolve_field("body"); + + assert!(lang.has_field(func_id, name_field)); + assert!(lang.has_field(func_id, body_field)); + + let identifier_id = lang.resolve_node("identifier", true); + assert!(lang.is_valid_field_type(func_id, name_field, identifier_id)); + + let statement_block_id = lang.resolve_node("statement_block", true); + assert!(lang.is_valid_field_type(func_id, body_field, statement_block_id)); + } + + #[test] + #[cfg(feature = "javascript")] + fn root_via_trait() { + let lang = javascript(); + let root_id = lang.root(); + assert!(root_id.is_some()); + + let program_id = lang.resolve_node("program", true); + assert_eq!(root_id, program_id); + } + + #[test] + #[cfg(feature = "javascript")] + fn unresolved_returns_sensible_defaults() { + let lang = javascript(); + + let unresolved_node: Option = None; + let unresolved_field: Option = None; + + assert!(!lang.is_supertype(unresolved_node)); + assert!(!lang.is_extra(unresolved_node)); + assert!(!lang.has_field(unresolved_node, unresolved_field)); + assert!(lang.subtypes(unresolved_node).is_empty()); + assert!( + lang.valid_field_types(unresolved_node, unresolved_field) + .is_empty() + ); + assert!(lang.valid_child_types(unresolved_node).is_empty()); + assert!(!lang.is_valid_field_type(unresolved_node, unresolved_field, unresolved_node)); + assert!(!lang.is_valid_child_type(unresolved_node, unresolved_node)); + } + + #[test] + #[cfg(feature = "rust")] + fn rust_lang_works() { + let lang = rust(); + let func_id = lang.resolve_node("function_item", true); + assert!(func_id.is_some()); + } + + /// Demonstrates tree-sitter's odd ID semantics and how our wrapper fixes them. + /// + /// Tree-sitter's `id_for_node_kind` returns 0 for both: + /// 1. The valid "end" sentinel node (anonymous, ID 0) + /// 2. Any non-existent node + /// + /// This test shows: + /// - The ambiguity in the raw tree-sitter API + /// - How our wrapper resolves it correctly + #[test] + #[cfg(feature = "javascript")] + fn tree_sitter_id_zero_ambiguity() { + let lang = javascript(); + let raw_lang = lang.get_inner(); + + // === Part 1: Understanding the problem === + + // ID 0 is the "end" sentinel node (anonymous) + assert_eq!(raw_lang.node_kind_for_id(0), Some("end")); + assert!(!raw_lang.node_kind_is_named(0)); + + // Tree-sitter returns 0 for BOTH valid "end" and non-existent nodes + let end_id = raw_lang.id_for_node_kind("end", false); + let fake_id = raw_lang.id_for_node_kind("totally_fake_node", false); + assert_eq!(end_id, 0, "Valid 'end' node returns 0"); + assert_eq!(fake_id, 0, "Non-existent node also returns 0!"); + + // This ambiguity doesn't exist for named nodes (0 always = not found) + let fake_named = raw_lang.id_for_node_kind("fake_named", true); + assert_eq!(fake_named, 0, "Non-existent named node returns 0"); + // And no named node has ID 0 + assert!(!raw_lang.node_kind_is_named(0)); + + // === Part 2: Our wrapper's solution === + + // For named nodes: 0 unambiguously means "not found" + assert!(lang.resolve_node("fake_named", true).is_none()); + + // For anonymous nodes: we disambiguate via reverse lookup + let end_resolved = lang.resolve_node("end", false); + let fake_resolved = lang.resolve_node("totally_fake_node", false); + + assert!(end_resolved.is_some(), "Valid 'end' node should resolve"); + assert_eq!(end_resolved, Some(0), "'end' should have ID 0"); + + assert!( + fake_resolved.is_none(), + "Non-existent node should be Unresolved" + ); + + // === Part 3: Field IDs don't have this problem === + + // Tree-sitter uses Option for fields - clean API! + let name_field_id = raw_lang.field_id_for_name("name"); + assert!(name_field_id.is_some(), "Field 'name' should exist"); + assert!(name_field_id.unwrap().get() > 0, "Field IDs start at 1"); + assert_eq!(raw_lang.field_id_for_name("fake_field"), None); + + // Our wrapper preserves this cleanliness + assert!(lang.resolve_field("name").is_some()); + assert!(lang.resolve_field("fake_field").is_none()); + } + + /// Additional test showing the tree-sitter oddities in detail + #[test] + #[cfg(feature = "javascript")] + fn tree_sitter_api_roundtrip_quirks() { + let lang = javascript(); + let raw_lang = lang.get_inner(); + + // Some nodes appear at multiple IDs! + // This happens when the same node type is used in different contexts + let mut id_to_names = std::collections::HashMap::>::new(); + + for id in 0..raw_lang.node_kind_count() as u16 { + if let Some(name) = raw_lang.node_kind_for_id(id) { + let is_named = raw_lang.node_kind_is_named(id); + id_to_names.entry(id).or_default().push((name, is_named)); + + // The roundtrip might NOT preserve the ID! + let resolved_id = raw_lang.id_for_node_kind(name, is_named); + + // For example, "identifier" might be at both ID 1 and ID 46, + // but id_for_node_kind("identifier", true) returns only one of them + if resolved_id != id && name != "ERROR" { + // This is normal - tree-sitter returns the first matching ID + // when multiple IDs have the same (name, is_named) combination + } + } + } + + // Verify our assumptions about ID 0 + assert_eq!(id_to_names[&0], vec![("end", false)]); + + // Field IDs are cleaner - they start at 1 (NonZeroU16) + assert!(raw_lang.field_name_for_id(0).is_none()); + + for fid in 1..=raw_lang.field_count() as u16 { + if let Some(name) = raw_lang.field_name_for_id(fid) { + // Field roundtrip is reliable + let resolved = raw_lang.field_id_for_name(name); + assert_eq!(resolved, std::num::NonZeroU16::new(fid)); + } } } } diff --git a/crates/plotnik-macros/Cargo.toml b/crates/plotnik-macros/Cargo.toml index c9073c0f..6e0c83c5 100644 --- a/crates/plotnik-macros/Cargo.toml +++ b/crates/plotnik-macros/Cargo.toml @@ -9,7 +9,63 @@ repository = "https://github.com/plotnik-lang/plotnik" [lib] proc-macro = true +[features] +default = [] +bash = ["dep:tree-sitter-bash"] +c = ["dep:tree-sitter-c"] +cpp = ["dep:tree-sitter-cpp"] +csharp = ["dep:tree-sitter-c-sharp"] +css = ["dep:tree-sitter-css"] +elixir = ["dep:tree-sitter-elixir"] +go = ["dep:tree-sitter-go"] +haskell = ["dep:tree-sitter-haskell"] +hcl = ["dep:tree-sitter-hcl"] +html = ["dep:tree-sitter-html"] +java = ["dep:tree-sitter-java"] +javascript = ["dep:tree-sitter-javascript"] +json = ["dep:tree-sitter-json"] +kotlin = ["dep:tree-sitter-kotlin"] +lua = ["dep:tree-sitter-lua"] +nix = ["dep:tree-sitter-nix"] +php = ["dep:tree-sitter-php"] +python = ["dep:tree-sitter-python"] +ruby = ["dep:tree-sitter-ruby"] +rust = ["dep:tree-sitter-rust"] +scala = ["dep:tree-sitter-scala"] +solidity = ["dep:tree-sitter-solidity"] +swift = ["dep:tree-sitter-swift"] +typescript = ["dep:tree-sitter-typescript"] +yaml = ["dep:tree-sitter-yaml"] + [dependencies] proc-macro2 = "1" quote = "1" -syn = "2" \ No newline at end of file +syn = "2" +plotnik-core = { version = "0.1.0", path = "../plotnik-core" } +serde_json = "1" +tree-sitter = "0.25" +tree-sitter-bash = { version = "0.25.0", optional = true } +tree-sitter-c = { version = "0.24.0", optional = true } +tree-sitter-cpp = { version = "0.23.0", optional = true } +tree-sitter-c-sharp = { version = "0.23.0", optional = true } +tree-sitter-css = { version = "0.25.0", optional = true } +tree-sitter-elixir = { version = "0.3.0", optional = true } +tree-sitter-go = { version = "0.25.0", optional = true } +tree-sitter-haskell = { version = "0.23.0", optional = true } +tree-sitter-hcl = { version = "1.1.0", optional = true } +tree-sitter-html = { version = "0.23.0", optional = true } +tree-sitter-java = { version = "0.23.0", optional = true } +tree-sitter-javascript = { version = "0.25.0", optional = true } +tree-sitter-json = { version = "0.24.0", optional = true } +tree-sitter-kotlin = { version = "0.4.0", optional = true, package = "tree-sitter-kotlin-sg" } +tree-sitter-lua = { version = "0.2.0", optional = true } +tree-sitter-nix = { version = "0.3.0", optional = true } +tree-sitter-php = { version = "0.24.0", optional = true } +tree-sitter-python = { version = "0.25.0", optional = true } +tree-sitter-ruby = { version = "0.23.0", optional = true } +tree-sitter-rust = { version = "0.24.0", optional = true } +tree-sitter-scala = { version = "0.24.0", optional = true } +tree-sitter-solidity = { version = "1.2.11", optional = true } +tree-sitter-swift = { version = "0.7.0", optional = true } +tree-sitter-typescript = { version = "0.23.2", optional = true } +tree-sitter-yaml = { version = "0.7.0", optional = true } \ No newline at end of file diff --git a/crates/plotnik-macros/src/lib.rs b/crates/plotnik-macros/src/lib.rs index db35dd63..a226dcf2 100644 --- a/crates/plotnik-macros/src/lib.rs +++ b/crates/plotnik-macros/src/lib.rs @@ -1,30 +1,259 @@ use proc_macro::TokenStream; +use proc_macro2::Span; use quote::quote; use syn::{LitStr, parse_macro_input}; +use tree_sitter::Language; +use plotnik_core::NodeTypes; + +/// Generate a StaticNodeTypes constant for a language. +/// +/// Usage: `generate_node_types!("javascript")` +/// +/// This reads the node-types.json at compile time and uses the tree-sitter +/// Language to resolve node/field names to IDs, producing efficient lookup tables. +/// The output is fully statically allocated - no runtime initialization needed. #[proc_macro] -pub fn generate_node_types_size(input: TokenStream) -> TokenStream { - let lang = parse_macro_input!(input as LitStr).value(); - let env_var = format!("PLOTNIK_NODE_TYPES_{}", lang.to_uppercase()); +pub fn generate_node_types(input: TokenStream) -> TokenStream { + let lang_key = parse_macro_input!(input as LitStr).value(); + + let env_var = format!("PLOTNIK_NODE_TYPES_{}", lang_key.to_uppercase()); - let path = std::env::var(&env_var).unwrap_or_else(|_| { + let json_path = std::env::var(&env_var).unwrap_or_else(|_| { panic!( "Environment variable {} not set. Is build.rs configured correctly?", env_var ) }); - let size = std::fs::read_to_string(&path) - .unwrap_or_else(|e| panic!("Failed to read {}: {}", path, e)) - .len(); + let json_content = std::fs::read_to_string(&json_path) + .unwrap_or_else(|e| panic!("Failed to read {}: {}", json_path, e)); + + let raw_nodes: Vec = serde_json::from_str(&json_content) + .unwrap_or_else(|e| panic!("Failed to parse {}: {}", json_path, e)); + + let ts_lang = get_language_for_key(&lang_key); let const_name = syn::Ident::new( - &format!("{}_NODE_TYPES_SIZE", lang.to_uppercase()), - proc_macro2::Span::call_site(), + &format!("{}_NODE_TYPES", lang_key.to_uppercase()), + Span::call_site(), ); + let generated = generate_static_node_types_code(&raw_nodes, &ts_lang, &lang_key, &const_name); + + generated.into() +} + +fn get_language_for_key(key: &str) -> Language { + match key.to_lowercase().as_str() { + #[cfg(feature = "bash")] + "bash" => tree_sitter_bash::LANGUAGE.into(), + #[cfg(feature = "c")] + "c" => tree_sitter_c::LANGUAGE.into(), + #[cfg(feature = "cpp")] + "cpp" => tree_sitter_cpp::LANGUAGE.into(), + #[cfg(feature = "csharp")] + "csharp" => tree_sitter_c_sharp::LANGUAGE.into(), + #[cfg(feature = "css")] + "css" => tree_sitter_css::LANGUAGE.into(), + #[cfg(feature = "elixir")] + "elixir" => tree_sitter_elixir::LANGUAGE.into(), + #[cfg(feature = "go")] + "go" => tree_sitter_go::LANGUAGE.into(), + #[cfg(feature = "haskell")] + "haskell" => tree_sitter_haskell::LANGUAGE.into(), + #[cfg(feature = "hcl")] + "hcl" => tree_sitter_hcl::LANGUAGE.into(), + #[cfg(feature = "html")] + "html" => tree_sitter_html::LANGUAGE.into(), + #[cfg(feature = "java")] + "java" => tree_sitter_java::LANGUAGE.into(), + #[cfg(feature = "javascript")] + "javascript" => tree_sitter_javascript::LANGUAGE.into(), + #[cfg(feature = "json")] + "json" => tree_sitter_json::LANGUAGE.into(), + #[cfg(feature = "kotlin")] + "kotlin" => tree_sitter_kotlin::LANGUAGE.into(), + #[cfg(feature = "lua")] + "lua" => tree_sitter_lua::LANGUAGE.into(), + #[cfg(feature = "nix")] + "nix" => tree_sitter_nix::LANGUAGE.into(), + #[cfg(feature = "php")] + "php" => tree_sitter_php::LANGUAGE_PHP.into(), + #[cfg(feature = "python")] + "python" => tree_sitter_python::LANGUAGE.into(), + #[cfg(feature = "ruby")] + "ruby" => tree_sitter_ruby::LANGUAGE.into(), + #[cfg(feature = "rust")] + "rust" => tree_sitter_rust::LANGUAGE.into(), + #[cfg(feature = "scala")] + "scala" => tree_sitter_scala::LANGUAGE.into(), + #[cfg(feature = "solidity")] + "solidity" => tree_sitter_solidity::LANGUAGE.into(), + #[cfg(feature = "swift")] + "swift" => tree_sitter_swift::LANGUAGE.into(), + #[cfg(feature = "typescript")] + "typescript" => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + #[cfg(feature = "typescript")] + "typescript_tsx" => tree_sitter_typescript::LANGUAGE_TSX.into(), + #[cfg(feature = "yaml")] + "yaml" => tree_sitter_yaml::LANGUAGE.into(), + _ => panic!("Unknown or disabled language key: {}", key), + } +} + +fn generate_static_node_types_code( + raw_nodes: &[plotnik_core::RawNode], + ts_lang: &Language, + lang_key: &str, + const_name: &syn::Ident, +) -> proc_macro2::TokenStream { + let node_types = plotnik_core::DynamicNodeTypes::build( + raw_nodes, + |name, named| { + let id = ts_lang.id_for_node_kind(name, named); + if id == 0 && named { None } else { Some(id) } + }, + |name| ts_lang.field_id_for_name(name), + ); + + let prefix = lang_key.to_uppercase(); + + let mut static_defs = Vec::new(); + let mut node_entries = Vec::new(); + + let extras = node_types.sorted_extras(); + let root = node_types.root(); + + // Process nodes in sorted order (for binary search on node lookup) + let sorted_node_ids = node_types.sorted_node_ids(); + + for node_id in &sorted_node_ids { + let info = node_types.get(*node_id).unwrap(); + + let mut field_array_defs = Vec::new(); + let mut field_entries = Vec::new(); + + // Sort fields by field_id (for binary search on field lookup) + let mut sorted_fields: Vec<_> = info.fields.iter().collect(); + sorted_fields.sort_by_key(|(fid, _)| *fid); + + for (field_id, field_info) in &sorted_fields { + let valid_types = field_info.valid_types.to_vec(); + + let valid_types_name = syn::Ident::new( + &format!("{}_N{}_F{}_TYPES", prefix, node_id, field_id), + Span::call_site(), + ); + + let multiple = field_info.cardinality.multiple; + let required = field_info.cardinality.required; + let types_len = valid_types.len(); + + field_array_defs.push(quote! { + static #valid_types_name: [u16; #types_len] = [#(#valid_types),*]; + }); + + let field_id_raw = field_id.get(); + field_entries.push(quote! { + (std::num::NonZeroU16::new(#field_id_raw).unwrap(), plotnik_core::StaticFieldInfo { + cardinality: plotnik_core::Cardinality { + multiple: #multiple, + required: #required, + }, + valid_types: &#valid_types_name, + }) + }); + } + + let fields_array_name = syn::Ident::new( + &format!("{}_N{}_FIELDS", prefix, node_id), + Span::call_site(), + ); + let fields_len = sorted_fields.len(); + + static_defs.extend(field_array_defs); + + if !sorted_fields.is_empty() { + static_defs.push(quote! { + static #fields_array_name: [(std::num::NonZeroU16, plotnik_core::StaticFieldInfo); #fields_len] = [ + #(#field_entries),* + ]; + }); + } + + let children_code = if let Some(children) = &info.children { + let valid_types = children.valid_types.to_vec(); + + let children_types_name = syn::Ident::new( + &format!("{}_N{}_CHILDREN_TYPES", prefix, node_id), + Span::call_site(), + ); + let types_len = valid_types.len(); + + static_defs.push(quote! { + static #children_types_name: [u16; #types_len] = [#(#valid_types),*]; + }); + + let multiple = children.cardinality.multiple; + let required = children.cardinality.required; + + quote! { + Some(plotnik_core::StaticChildrenInfo { + cardinality: plotnik_core::Cardinality { + multiple: #multiple, + required: #required, + }, + valid_types: &#children_types_name, + }) + } + } else { + quote! { None } + }; + + let name = &info.name; + let named = info.named; + + let fields_ref = if sorted_fields.is_empty() { + quote! { &[] } + } else { + quote! { &#fields_array_name } + }; + + node_entries.push(quote! { + (#node_id, plotnik_core::StaticNodeTypeInfo { + name: #name, + named: #named, + fields: #fields_ref, + children: #children_code, + }) + }); + } + + let nodes_array_name = syn::Ident::new(&format!("{}_NODES", prefix), Span::call_site()); + let nodes_len = sorted_node_ids.len(); + + let extras_array_name = syn::Ident::new(&format!("{}_EXTRAS", prefix), Span::call_site()); + let extras_len = extras.len(); + + let root_code = match root { + Some(id) => quote! { Some(#id) }, + None => quote! { None }, + }; + quote! { - pub const #const_name: usize = #size; + #(#static_defs)* + + static #nodes_array_name: [(u16, plotnik_core::StaticNodeTypeInfo); #nodes_len] = [ + #(#node_entries),* + ]; + + static #extras_array_name: [u16; #extras_len] = [#(#extras),*]; + + pub static #const_name: plotnik_core::StaticNodeTypes = plotnik_core::StaticNodeTypes::new( + &#nodes_array_name, + &#extras_array_name, + #root_code, + ); } - .into() } From 1f89bb750f9638b9a4e4631dea789ca35f606787 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Thu, 4 Dec 2025 21:51:10 -0300 Subject: [PATCH 02/10] Update lib.rs --- crates/plotnik-langs/src/lib.rs | 138 +++++++++----------------------- 1 file changed, 39 insertions(+), 99 deletions(-) diff --git a/crates/plotnik-langs/src/lib.rs b/crates/plotnik-langs/src/lib.rs index a096ee1e..26bbb225 100644 --- a/crates/plotnik-langs/src/lib.rs +++ b/crates/plotnik-langs/src/lib.rs @@ -6,8 +6,6 @@ pub use plotnik_core::{Cardinality, NodeFieldId, NodeTypeId, NodeTypes, StaticNo /// Trait providing a unified facade for tree-sitter's Language API /// combined with our node type constraints. -/// -/// Methods that return Option types handle resolution failures gracefully. pub trait Lang: Send + Sync { fn name(&self) -> &str; @@ -25,45 +23,32 @@ pub trait Lang: Send + Sync { // Supertype info [Language API] // ═══════════════════════════════════════════════════════════════════════ - fn is_supertype(&self, id: Option) -> bool; - fn subtypes(&self, supertype: Option) -> &[u16]; + fn is_supertype(&self, id: NodeTypeId) -> bool; + fn subtypes(&self, supertype: NodeTypeId) -> &[u16]; // ═══════════════════════════════════════════════════════════════════════ // Root & Extras [node_types] // ═══════════════════════════════════════════════════════════════════════ fn root(&self) -> Option; - fn is_extra(&self, id: Option) -> bool; + fn is_extra(&self, id: NodeTypeId) -> bool; // ═══════════════════════════════════════════════════════════════════════ // Field constraints [node_types] // ═══════════════════════════════════════════════════════════════════════ - fn has_field(&self, node: Option, field: Option) -> bool; - fn field_cardinality( - &self, - node: Option, - field: Option, - ) -> Option; - fn valid_field_types( - &self, - node: Option, - field: Option, - ) -> &'static [u16]; - fn is_valid_field_type( - &self, - node: Option, - field: Option, - child: Option, - ) -> bool; + fn has_field(&self, node: NodeTypeId, field: NodeFieldId) -> bool; + fn field_cardinality(&self, node: NodeTypeId, field: NodeFieldId) -> Option; + fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &'static [u16]; + fn is_valid_field_type(&self, node: NodeTypeId, field: NodeFieldId, child: NodeTypeId) -> bool; // ═══════════════════════════════════════════════════════════════════════ // Children constraints [node_types] // ═══════════════════════════════════════════════════════════════════════ - fn children_cardinality(&self, node: Option) -> Option; - fn valid_child_types(&self, node: Option) -> &'static [u16]; - fn is_valid_child_type(&self, node: Option, child: Option) -> bool; + fn children_cardinality(&self, node: NodeTypeId) -> Option; + fn valid_child_types(&self, node: NodeTypeId) -> &'static [u16]; + fn is_valid_child_type(&self, node: NodeTypeId, child: NodeTypeId) -> bool; } /// Static implementation of `Lang` with compile-time generated node types. @@ -138,81 +123,48 @@ impl Lang for StaticLang { self.inner.field_id_for_name(name) } - fn is_supertype(&self, id: Option) -> bool { - let Some(raw) = id else { return false }; - self.inner.node_kind_is_supertype(raw) + fn is_supertype(&self, id: NodeTypeId) -> bool { + self.inner.node_kind_is_supertype(id) } - fn subtypes(&self, supertype: Option) -> &[u16] { - let Some(raw) = supertype else { - return &[]; - }; - self.inner.subtypes_for_supertype(raw) + fn subtypes(&self, supertype: NodeTypeId) -> &[u16] { + self.inner.subtypes_for_supertype(supertype) } fn root(&self) -> Option { self.node_types.root() } - fn is_extra(&self, id: Option) -> bool { - let Some(id) = id else { return false }; + fn is_extra(&self, id: NodeTypeId) -> bool { self.node_types.is_extra(id) } - fn has_field(&self, node: Option, field: Option) -> bool { - let (Some(n), Some(f)) = (node, field) else { - return false; - }; - self.node_types.has_field(n, f) + fn has_field(&self, node: NodeTypeId, field: NodeFieldId) -> bool { + self.node_types.has_field(node, field) } - fn field_cardinality( - &self, - node: Option, - field: Option, - ) -> Option { - let (n, f) = (node?, field?); - self.node_types.field_cardinality(n, f) + fn field_cardinality(&self, node: NodeTypeId, field: NodeFieldId) -> Option { + self.node_types.field_cardinality(node, field) } - fn valid_field_types( - &self, - node: Option, - field: Option, - ) -> &'static [u16] { - let (Some(n), Some(f)) = (node, field) else { - return &[]; - }; - self.node_types.valid_field_types(n, f) + fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &'static [u16] { + self.node_types.valid_field_types(node, field) } - fn is_valid_field_type( - &self, - node: Option, - field: Option, - child: Option, - ) -> bool { - let (Some(n), Some(f), Some(c)) = (node, field, child) else { - return false; - }; - self.node_types.is_valid_field_type(n, f, c) + fn is_valid_field_type(&self, node: NodeTypeId, field: NodeFieldId, child: NodeTypeId) -> bool { + self.node_types.is_valid_field_type(node, field, child) } - fn children_cardinality(&self, node: Option) -> Option { - let n = node?; - self.node_types.children_cardinality(n) + fn children_cardinality(&self, node: NodeTypeId) -> Option { + self.node_types.children_cardinality(node) } - fn valid_child_types(&self, node: Option) -> &'static [u16] { - let Some(n) = node else { return &[] }; - self.node_types.valid_child_types(n) + fn valid_child_types(&self, node: NodeTypeId) -> &'static [u16] { + self.node_types.valid_child_types(node) } - fn is_valid_child_type(&self, node: Option, child: Option) -> bool { - let (Some(n), Some(c)) = (node, child) else { - return false; - }; - self.node_types.is_valid_child_type(n, c) + fn is_valid_child_type(&self, node: NodeTypeId, child: NodeTypeId) -> bool { + self.node_types.is_valid_child_type(node, child) } } @@ -562,13 +514,13 @@ mod tests { fn supertype_via_lang_trait() { let lang = javascript(); - let expr_id = lang.resolve_node("expression", true); + let expr_id = lang.resolve_node("expression", true).unwrap(); assert!(lang.is_supertype(expr_id)); let subtypes = lang.subtypes(expr_id); assert!(!subtypes.is_empty()); - let func_id = lang.resolve_node("function_declaration", true); + let func_id = lang.resolve_node("function_declaration", true).unwrap(); assert!(!lang.is_supertype(func_id)); } @@ -577,17 +529,17 @@ mod tests { fn field_validation_via_trait() { let lang = javascript(); - let func_id = lang.resolve_node("function_declaration", true); - let name_field = lang.resolve_field("name"); - let body_field = lang.resolve_field("body"); + let func_id = lang.resolve_node("function_declaration", true).unwrap(); + let name_field = lang.resolve_field("name").unwrap(); + let body_field = lang.resolve_field("body").unwrap(); assert!(lang.has_field(func_id, name_field)); assert!(lang.has_field(func_id, body_field)); - let identifier_id = lang.resolve_node("identifier", true); + let identifier_id = lang.resolve_node("identifier", true).unwrap(); assert!(lang.is_valid_field_type(func_id, name_field, identifier_id)); - let statement_block_id = lang.resolve_node("statement_block", true); + let statement_block_id = lang.resolve_node("statement_block", true).unwrap(); assert!(lang.is_valid_field_type(func_id, body_field, statement_block_id)); } @@ -604,23 +556,11 @@ mod tests { #[test] #[cfg(feature = "javascript")] - fn unresolved_returns_sensible_defaults() { + fn unresolved_returns_none() { let lang = javascript(); - let unresolved_node: Option = None; - let unresolved_field: Option = None; - - assert!(!lang.is_supertype(unresolved_node)); - assert!(!lang.is_extra(unresolved_node)); - assert!(!lang.has_field(unresolved_node, unresolved_field)); - assert!(lang.subtypes(unresolved_node).is_empty()); - assert!( - lang.valid_field_types(unresolved_node, unresolved_field) - .is_empty() - ); - assert!(lang.valid_child_types(unresolved_node).is_empty()); - assert!(!lang.is_valid_field_type(unresolved_node, unresolved_field, unresolved_node)); - assert!(!lang.is_valid_child_type(unresolved_node, unresolved_node)); + assert!(lang.resolve_node("nonexistent_node_type", true).is_none()); + assert!(lang.resolve_field("nonexistent_field").is_none()); } #[test] From 7cfebf5c4d7beecfb1e77284215136556dadeb5e Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Thu, 4 Dec 2025 22:29:05 -0300 Subject: [PATCH 03/10] Refactor Lang trait to support both static and dynamic languages --- .../plotnik-cli/src/commands/debug/source.rs | 6 +- crates/plotnik-cli/src/commands/langs.rs | 4 +- crates/plotnik-core/src/lib.rs | 30 ++ crates/plotnik-langs/src/builtin.rs | 281 ++++++++++++++ crates/plotnik-langs/src/dynamic.rs | 7 + crates/plotnik-langs/src/lib.rs | 363 +++--------------- 6 files changed, 366 insertions(+), 325 deletions(-) create mode 100644 crates/plotnik-langs/src/builtin.rs create mode 100644 crates/plotnik-langs/src/dynamic.rs diff --git a/crates/plotnik-cli/src/commands/debug/source.rs b/crates/plotnik-cli/src/commands/debug/source.rs index 6fc8bfac..00182671 100644 --- a/crates/plotnik-cli/src/commands/debug/source.rs +++ b/crates/plotnik-cli/src/commands/debug/source.rs @@ -25,7 +25,7 @@ pub fn resolve_lang( lang: &Option, _source_text: &Option, source_file: &Option, -) -> &'static dyn Lang { +) -> Lang { if let Some(name) = lang { return plotnik_langs::from_name(name).unwrap_or_else(|| { eprintln!("error: unknown language: {}", name); @@ -50,10 +50,10 @@ pub fn resolve_lang( std::process::exit(1); } -pub fn parse_tree(source: &str, lang: &dyn Lang) -> tree_sitter::Tree { +pub fn parse_tree(source: &str, lang: Lang) -> tree_sitter::Tree { let mut parser = tree_sitter::Parser::new(); parser - .set_language(lang.get_inner()) + .set_language(lang.inner()) .expect("failed to set language"); parser.parse(source, None).expect("failed to parse source") } diff --git a/crates/plotnik-cli/src/commands/langs.rs b/crates/plotnik-cli/src/commands/langs.rs index 4b8da80e..eeeeff7c 100644 --- a/crates/plotnik-cli/src/commands/langs.rs +++ b/crates/plotnik-cli/src/commands/langs.rs @@ -10,9 +10,9 @@ pub fn run() { mod tests { use plotnik_langs::Lang; - fn smoke_test(lang: &dyn Lang, source: &str, expected_root: &str) { + fn smoke_test(lang: Lang, source: &str, expected_root: &str) { let mut parser = tree_sitter::Parser::new(); - parser.set_language(lang.get_inner()).unwrap(); + parser.set_language(lang.inner()).unwrap(); let tree = parser.parse(source, None).unwrap(); let root = tree.root_node(); assert_eq!(root.kind(), expected_root); diff --git a/crates/plotnik-core/src/lib.rs b/crates/plotnik-core/src/lib.rs index fa8dfb7e..f4046e80 100644 --- a/crates/plotnik-core/src/lib.rs +++ b/crates/plotnik-core/src/lib.rs @@ -96,6 +96,36 @@ pub trait NodeTypes { fn is_valid_child_type(&self, node: NodeTypeId, child: NodeTypeId) -> bool; } +impl NodeTypes for &T { + fn root(&self) -> Option { + (*self).root() + } + fn is_extra(&self, id: NodeTypeId) -> bool { + (*self).is_extra(id) + } + fn has_field(&self, node: NodeTypeId, field: NodeFieldId) -> bool { + (*self).has_field(node, field) + } + fn field_cardinality(&self, node: NodeTypeId, field: NodeFieldId) -> Option { + (*self).field_cardinality(node, field) + } + fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &[NodeTypeId] { + (*self).valid_field_types(node, field) + } + fn is_valid_field_type(&self, node: NodeTypeId, field: NodeFieldId, child: NodeTypeId) -> bool { + (*self).is_valid_field_type(node, field, child) + } + fn children_cardinality(&self, node: NodeTypeId) -> Option { + (*self).children_cardinality(node) + } + fn valid_child_types(&self, node: NodeTypeId) -> &[NodeTypeId] { + (*self).valid_child_types(node) + } + fn is_valid_child_type(&self, node: NodeTypeId, child: NodeTypeId) -> bool { + (*self).is_valid_child_type(node, child) + } +} + // ============================================================================ // Static Analysis Layer (zero runtime init) // ============================================================================ diff --git a/crates/plotnik-langs/src/builtin.rs b/crates/plotnik-langs/src/builtin.rs new file mode 100644 index 00000000..d5baae50 --- /dev/null +++ b/crates/plotnik-langs/src/builtin.rs @@ -0,0 +1,281 @@ +use std::sync::{Arc, LazyLock}; + +use crate::{Lang, LangInner}; + +macro_rules! define_langs { + ( + $( + $fn_name:ident => { + feature: $feature:literal, + name: $name:literal, + ts_lang: $ts_lang:expr, + node_types_key: $node_types_key:literal, + names: [$($alias:literal),* $(,)?], + extensions: [$($ext:literal),* $(,)?] $(,)? + } + ),* $(,)? + ) => { + // Generate NodeTypes statics via proc macro + $( + #[cfg(feature = $feature)] + plotnik_macros::generate_node_types!($node_types_key); + )* + + // Generate static Lang definitions with LazyLock + $( + #[cfg(feature = $feature)] + pub fn $fn_name() -> Lang { + paste::paste! { + static LANG: LazyLock = LazyLock::new(|| { + Arc::new(LangInner::new_static( + $name, + $ts_lang.into(), + &[<$node_types_key:upper _NODE_TYPES>], + )) + }); + } + Arc::clone(&LANG) + } + )* + + pub fn from_name(s: &str) -> Option { + match s.to_ascii_lowercase().as_str() { + $( + #[cfg(feature = $feature)] + $($alias)|* => Some($fn_name()), + )* + _ => None, + } + } + + pub fn from_ext(ext: &str) -> Option { + match ext.to_ascii_lowercase().as_str() { + $( + #[cfg(feature = $feature)] + $($ext)|* => Some($fn_name()), + )* + _ => None, + } + } + + pub fn all() -> Vec { + vec![ + $( + #[cfg(feature = $feature)] + $fn_name(), + )* + ] + } + }; +} + +define_langs! { + bash => { + feature: "bash", + name: "bash", + ts_lang: tree_sitter_bash::LANGUAGE, + node_types_key: "bash", + names: ["bash", "sh", "shell"], + extensions: ["sh", "bash", "zsh"], + }, + c => { + feature: "c", + name: "c", + ts_lang: tree_sitter_c::LANGUAGE, + node_types_key: "c", + names: ["c"], + extensions: ["c", "h"], + }, + cpp => { + feature: "cpp", + name: "cpp", + ts_lang: tree_sitter_cpp::LANGUAGE, + node_types_key: "cpp", + names: ["cpp", "c++", "cxx", "cc"], + extensions: ["cpp", "cc", "cxx", "hpp", "hh", "hxx", "h++", "c++"], + }, + csharp => { + feature: "csharp", + name: "c_sharp", + ts_lang: tree_sitter_c_sharp::LANGUAGE, + node_types_key: "csharp", + names: ["csharp", "c#", "cs", "c_sharp"], + extensions: ["cs"], + }, + css => { + feature: "css", + name: "css", + ts_lang: tree_sitter_css::LANGUAGE, + node_types_key: "css", + names: ["css"], + extensions: ["css"], + }, + elixir => { + feature: "elixir", + name: "elixir", + ts_lang: tree_sitter_elixir::LANGUAGE, + node_types_key: "elixir", + names: ["elixir", "ex"], + extensions: ["ex", "exs"], + }, + go => { + feature: "go", + name: "go", + ts_lang: tree_sitter_go::LANGUAGE, + node_types_key: "go", + names: ["go", "golang"], + extensions: ["go"], + }, + haskell => { + feature: "haskell", + name: "haskell", + ts_lang: tree_sitter_haskell::LANGUAGE, + node_types_key: "haskell", + names: ["haskell", "hs"], + extensions: ["hs", "lhs"], + }, + hcl => { + feature: "hcl", + name: "hcl", + ts_lang: tree_sitter_hcl::LANGUAGE, + node_types_key: "hcl", + names: ["hcl", "terraform", "tf"], + extensions: ["hcl", "tf", "tfvars"], + }, + html => { + feature: "html", + name: "html", + ts_lang: tree_sitter_html::LANGUAGE, + node_types_key: "html", + names: ["html", "htm"], + extensions: ["html", "htm"], + }, + java => { + feature: "java", + name: "java", + ts_lang: tree_sitter_java::LANGUAGE, + node_types_key: "java", + names: ["java"], + extensions: ["java"], + }, + javascript => { + feature: "javascript", + name: "javascript", + ts_lang: tree_sitter_javascript::LANGUAGE, + node_types_key: "javascript", + names: ["javascript", "js", "jsx", "ecmascript", "es"], + extensions: ["js", "mjs", "cjs", "jsx"], + }, + json => { + feature: "json", + name: "json", + ts_lang: tree_sitter_json::LANGUAGE, + node_types_key: "json", + names: ["json"], + extensions: ["json"], + }, + kotlin => { + feature: "kotlin", + name: "kotlin", + ts_lang: tree_sitter_kotlin::LANGUAGE, + node_types_key: "kotlin", + names: ["kotlin", "kt"], + extensions: ["kt", "kts"], + }, + lua => { + feature: "lua", + name: "lua", + ts_lang: tree_sitter_lua::LANGUAGE, + node_types_key: "lua", + names: ["lua"], + extensions: ["lua"], + }, + nix => { + feature: "nix", + name: "nix", + ts_lang: tree_sitter_nix::LANGUAGE, + node_types_key: "nix", + names: ["nix"], + extensions: ["nix"], + }, + php => { + feature: "php", + name: "php", + ts_lang: tree_sitter_php::LANGUAGE_PHP, + node_types_key: "php", + names: ["php"], + extensions: ["php"], + }, + python => { + feature: "python", + name: "python", + ts_lang: tree_sitter_python::LANGUAGE, + node_types_key: "python", + names: ["python", "py"], + extensions: ["py", "pyi", "pyw"], + }, + ruby => { + feature: "ruby", + name: "ruby", + ts_lang: tree_sitter_ruby::LANGUAGE, + node_types_key: "ruby", + names: ["ruby", "rb"], + extensions: ["rb", "rake", "gemspec"], + }, + rust => { + feature: "rust", + name: "rust", + ts_lang: tree_sitter_rust::LANGUAGE, + node_types_key: "rust", + names: ["rust", "rs"], + extensions: ["rs"], + }, + scala => { + feature: "scala", + name: "scala", + ts_lang: tree_sitter_scala::LANGUAGE, + node_types_key: "scala", + names: ["scala"], + extensions: ["scala", "sc"], + }, + solidity => { + feature: "solidity", + name: "solidity", + ts_lang: tree_sitter_solidity::LANGUAGE, + node_types_key: "solidity", + names: ["solidity", "sol"], + extensions: ["sol"], + }, + swift => { + feature: "swift", + name: "swift", + ts_lang: tree_sitter_swift::LANGUAGE, + node_types_key: "swift", + names: ["swift"], + extensions: ["swift"], + }, + typescript => { + feature: "typescript", + name: "typescript", + ts_lang: tree_sitter_typescript::LANGUAGE_TYPESCRIPT, + node_types_key: "typescript", + names: ["typescript", "ts"], + extensions: ["ts", "mts", "cts"], + }, + tsx => { + feature: "typescript", + name: "tsx", + ts_lang: tree_sitter_typescript::LANGUAGE_TSX, + node_types_key: "typescript_tsx", + names: ["tsx"], + extensions: ["tsx"], + }, + yaml => { + feature: "yaml", + name: "yaml", + ts_lang: tree_sitter_yaml::LANGUAGE, + node_types_key: "yaml", + names: ["yaml", "yml"], + extensions: ["yaml", "yml"], + }, +} diff --git a/crates/plotnik-langs/src/dynamic.rs b/crates/plotnik-langs/src/dynamic.rs new file mode 100644 index 00000000..d5e879a3 --- /dev/null +++ b/crates/plotnik-langs/src/dynamic.rs @@ -0,0 +1,7 @@ +//! Dynamic language loading (runtime). +//! +//! Load tree-sitter languages and their node types at runtime from: +//! - Shared libraries (.so/.dylib/.dll) +//! - node-types.json files +//! +//! Not yet implemented. diff --git a/crates/plotnik-langs/src/lib.rs b/crates/plotnik-langs/src/lib.rs index 26bbb225..56909d9c 100644 --- a/crates/plotnik-langs/src/lib.rs +++ b/crates/plotnik-langs/src/lib.rs @@ -1,16 +1,24 @@ -use std::sync::LazyLock; +use std::sync::Arc; use tree_sitter::Language; pub use plotnik_core::{Cardinality, NodeFieldId, NodeTypeId, NodeTypes, StaticNodeTypes}; +pub mod builtin; +pub mod dynamic; + +pub use builtin::*; + +/// User-facing language type. Works with any language (static or dynamic). +pub type Lang = Arc; + /// Trait providing a unified facade for tree-sitter's Language API /// combined with our node type constraints. -pub trait Lang: Send + Sync { +pub trait LangImpl: Send + Sync { fn name(&self) -> &str; /// Raw tree-sitter Language. You probably don't need this. - fn get_inner(&self) -> &Language; + fn inner(&self) -> &Language; // ═══════════════════════════════════════════════════════════════════════ // Resolution [Language API] @@ -39,7 +47,7 @@ pub trait Lang: Send + Sync { fn has_field(&self, node: NodeTypeId, field: NodeFieldId) -> bool; fn field_cardinality(&self, node: NodeTypeId, field: NodeFieldId) -> Option; - fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &'static [u16]; + fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &[NodeTypeId]; fn is_valid_field_type(&self, node: NodeTypeId, field: NodeFieldId, child: NodeTypeId) -> bool; // ═══════════════════════════════════════════════════════════════════════ @@ -47,27 +55,26 @@ pub trait Lang: Send + Sync { // ═══════════════════════════════════════════════════════════════════════ fn children_cardinality(&self, node: NodeTypeId) -> Option; - fn valid_child_types(&self, node: NodeTypeId) -> &'static [u16]; + fn valid_child_types(&self, node: NodeTypeId) -> &[NodeTypeId]; fn is_valid_child_type(&self, node: NodeTypeId, child: NodeTypeId) -> bool; } -/// Static implementation of `Lang` with compile-time generated node types. +/// Generic language implementation parameterized by node types. +/// +/// This struct provides a single implementation of `LangImpl` that works with +/// any `NodeTypes` implementation (static or dynamic). #[derive(Debug)] -pub struct StaticLang { - pub name: &'static str, - inner: Language, - node_types: &'static StaticNodeTypes, +pub struct LangInner { + name: String, + ts_lang: Language, + node_types: N, } -impl StaticLang { - pub const fn new( - name: &'static str, - inner: Language, - node_types: &'static StaticNodeTypes, - ) -> Self { +impl LangInner<&'static StaticNodeTypes> { + pub fn new_static(name: &str, ts_lang: Language, node_types: &'static StaticNodeTypes) -> Self { Self { - name, - inner, + name: name.to_owned(), + ts_lang, node_types, } } @@ -77,17 +84,17 @@ impl StaticLang { } } -impl Lang for StaticLang { +impl LangImpl for LangInner { fn name(&self) -> &str { - self.name + &self.name } - fn get_inner(&self) -> &Language { - &self.inner + fn inner(&self) -> &Language { + &self.ts_lang } fn resolve_node(&self, kind: &str, named: bool) -> Option { - let id = self.inner.id_for_node_kind(kind, named); + let id = self.ts_lang.id_for_node_kind(kind, named); // FIX: Disambiguate tree-sitter's ID 0 (could be "end" node or "not found") // @@ -103,32 +110,26 @@ impl Lang for StaticLang { // For anonymous nodes, we must verify via reverse lookup. if id == 0 { if named { - // Named node with ID 0 = definitely not found - None - } else { - // Anonymous node with ID 0 = could be "end" or not found - // Check via reverse lookup - if self.inner.node_kind_for_id(0) == Some(kind) { - Some(0) // It's the "end" node - } else { - None // Not found - } + return None; + } + if self.ts_lang.node_kind_for_id(0) == Some(kind) { + return Some(0); } - } else { - Some(id) + return None; } + Some(id) } fn resolve_field(&self, name: &str) -> Option { - self.inner.field_id_for_name(name) + self.ts_lang.field_id_for_name(name) } fn is_supertype(&self, id: NodeTypeId) -> bool { - self.inner.node_kind_is_supertype(id) + self.ts_lang.node_kind_is_supertype(id) } fn subtypes(&self, supertype: NodeTypeId) -> &[u16] { - self.inner.subtypes_for_supertype(supertype) + self.ts_lang.subtypes_for_supertype(supertype) } fn root(&self) -> Option { @@ -147,7 +148,7 @@ impl Lang for StaticLang { self.node_types.field_cardinality(node, field) } - fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &'static [u16] { + fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &[NodeTypeId] { self.node_types.valid_field_types(node, field) } @@ -159,7 +160,7 @@ impl Lang for StaticLang { self.node_types.children_cardinality(node) } - fn valid_child_types(&self, node: NodeTypeId) -> &'static [u16] { + fn valid_child_types(&self, node: NodeTypeId) -> &[NodeTypeId] { self.node_types.valid_child_types(node) } @@ -168,284 +169,6 @@ impl Lang for StaticLang { } } -macro_rules! define_langs { - ( - $( - $fn_name:ident => { - feature: $feature:literal, - name: $name:literal, - ts_lang: $ts_lang:expr, - node_types_key: $node_types_key:literal, - names: [$($alias:literal),* $(,)?], - extensions: [$($ext:literal),* $(,)?] $(,)? - } - ),* $(,)? - ) => { - // Generate NodeTypes statics via proc macro - $( - #[cfg(feature = $feature)] - plotnik_macros::generate_node_types!($node_types_key); - )* - - // Generate static Lang definitions with LazyLock - $( - #[cfg(feature = $feature)] - pub fn $fn_name() -> &'static dyn Lang { - paste::paste! { - static LANG: LazyLock = LazyLock::new(|| { - StaticLang::new( - $name, - $ts_lang.into(), - &[<$node_types_key:upper _NODE_TYPES>], - ) - }); - } - &*LANG - } - )* - - pub fn from_name(s: &str) -> Option<&'static dyn Lang> { - match s.to_ascii_lowercase().as_str() { - $( - #[cfg(feature = $feature)] - $($alias)|* => Some($fn_name()), - )* - _ => None, - } - } - - pub fn from_ext(ext: &str) -> Option<&'static dyn Lang> { - match ext.to_ascii_lowercase().as_str() { - $( - #[cfg(feature = $feature)] - $($ext)|* => Some($fn_name()), - )* - _ => None, - } - } - - pub fn all() -> Vec<&'static dyn Lang> { - vec![ - $( - #[cfg(feature = $feature)] - $fn_name(), - )* - ] - } - }; -} - -define_langs! { - bash => { - feature: "bash", - name: "bash", - ts_lang: tree_sitter_bash::LANGUAGE, - node_types_key: "bash", - names: ["bash", "sh", "shell"], - extensions: ["sh", "bash", "zsh"], - }, - c => { - feature: "c", - name: "c", - ts_lang: tree_sitter_c::LANGUAGE, - node_types_key: "c", - names: ["c"], - extensions: ["c", "h"], - }, - cpp => { - feature: "cpp", - name: "cpp", - ts_lang: tree_sitter_cpp::LANGUAGE, - node_types_key: "cpp", - names: ["cpp", "c++", "cxx", "cc"], - extensions: ["cpp", "cc", "cxx", "hpp", "hh", "hxx", "h++", "c++"], - }, - csharp => { - feature: "csharp", - name: "c_sharp", - ts_lang: tree_sitter_c_sharp::LANGUAGE, - node_types_key: "csharp", - names: ["csharp", "c#", "cs", "c_sharp"], - extensions: ["cs"], - }, - css => { - feature: "css", - name: "css", - ts_lang: tree_sitter_css::LANGUAGE, - node_types_key: "css", - names: ["css"], - extensions: ["css"], - }, - elixir => { - feature: "elixir", - name: "elixir", - ts_lang: tree_sitter_elixir::LANGUAGE, - node_types_key: "elixir", - names: ["elixir", "ex"], - extensions: ["ex", "exs"], - }, - go => { - feature: "go", - name: "go", - ts_lang: tree_sitter_go::LANGUAGE, - node_types_key: "go", - names: ["go", "golang"], - extensions: ["go"], - }, - haskell => { - feature: "haskell", - name: "haskell", - ts_lang: tree_sitter_haskell::LANGUAGE, - node_types_key: "haskell", - names: ["haskell", "hs"], - extensions: ["hs", "lhs"], - }, - hcl => { - feature: "hcl", - name: "hcl", - ts_lang: tree_sitter_hcl::LANGUAGE, - node_types_key: "hcl", - names: ["hcl", "terraform", "tf"], - extensions: ["hcl", "tf", "tfvars"], - }, - html => { - feature: "html", - name: "html", - ts_lang: tree_sitter_html::LANGUAGE, - node_types_key: "html", - names: ["html", "htm"], - extensions: ["html", "htm"], - }, - java => { - feature: "java", - name: "java", - ts_lang: tree_sitter_java::LANGUAGE, - node_types_key: "java", - names: ["java"], - extensions: ["java"], - }, - javascript => { - feature: "javascript", - name: "javascript", - ts_lang: tree_sitter_javascript::LANGUAGE, - node_types_key: "javascript", - names: ["javascript", "js", "jsx", "ecmascript", "es"], - extensions: ["js", "mjs", "cjs", "jsx"], - }, - json => { - feature: "json", - name: "json", - ts_lang: tree_sitter_json::LANGUAGE, - node_types_key: "json", - names: ["json"], - extensions: ["json"], - }, - kotlin => { - feature: "kotlin", - name: "kotlin", - ts_lang: tree_sitter_kotlin::LANGUAGE, - node_types_key: "kotlin", - names: ["kotlin", "kt"], - extensions: ["kt", "kts"], - }, - lua => { - feature: "lua", - name: "lua", - ts_lang: tree_sitter_lua::LANGUAGE, - node_types_key: "lua", - names: ["lua"], - extensions: ["lua"], - }, - nix => { - feature: "nix", - name: "nix", - ts_lang: tree_sitter_nix::LANGUAGE, - node_types_key: "nix", - names: ["nix"], - extensions: ["nix"], - }, - php => { - feature: "php", - name: "php", - ts_lang: tree_sitter_php::LANGUAGE_PHP, - node_types_key: "php", - names: ["php"], - extensions: ["php"], - }, - python => { - feature: "python", - name: "python", - ts_lang: tree_sitter_python::LANGUAGE, - node_types_key: "python", - names: ["python", "py"], - extensions: ["py", "pyi", "pyw"], - }, - ruby => { - feature: "ruby", - name: "ruby", - ts_lang: tree_sitter_ruby::LANGUAGE, - node_types_key: "ruby", - names: ["ruby", "rb"], - extensions: ["rb", "rake", "gemspec"], - }, - rust => { - feature: "rust", - name: "rust", - ts_lang: tree_sitter_rust::LANGUAGE, - node_types_key: "rust", - names: ["rust", "rs"], - extensions: ["rs"], - }, - scala => { - feature: "scala", - name: "scala", - ts_lang: tree_sitter_scala::LANGUAGE, - node_types_key: "scala", - names: ["scala"], - extensions: ["scala", "sc"], - }, - solidity => { - feature: "solidity", - name: "solidity", - ts_lang: tree_sitter_solidity::LANGUAGE, - node_types_key: "solidity", - names: ["solidity", "sol"], - extensions: ["sol"], - }, - swift => { - feature: "swift", - name: "swift", - ts_lang: tree_sitter_swift::LANGUAGE, - node_types_key: "swift", - names: ["swift"], - extensions: ["swift"], - }, - typescript => { - feature: "typescript", - name: "typescript", - ts_lang: tree_sitter_typescript::LANGUAGE_TYPESCRIPT, - node_types_key: "typescript", - names: ["typescript", "ts"], - extensions: ["ts", "mts", "cts"], - }, - tsx => { - feature: "typescript", - name: "tsx", - ts_lang: tree_sitter_typescript::LANGUAGE_TSX, - node_types_key: "typescript_tsx", - names: ["tsx"], - extensions: ["tsx"], - }, - yaml => { - feature: "yaml", - name: "yaml", - ts_lang: tree_sitter_yaml::LANGUAGE, - node_types_key: "yaml", - names: ["yaml", "yml"], - extensions: ["yaml", "yml"], - }, -} - #[cfg(test)] mod tests { use super::*; @@ -584,7 +307,7 @@ mod tests { #[cfg(feature = "javascript")] fn tree_sitter_id_zero_ambiguity() { let lang = javascript(); - let raw_lang = lang.get_inner(); + let raw_lang = lang.inner(); // === Part 1: Understanding the problem === @@ -639,7 +362,7 @@ mod tests { #[cfg(feature = "javascript")] fn tree_sitter_api_roundtrip_quirks() { let lang = javascript(); - let raw_lang = lang.get_inner(); + let raw_lang = lang.inner(); // Some nodes appear at multiple IDs! // This happens when the same node type is used in different contexts From 9db368a16dce7f7ac10fffbb46e827428aef9acf Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Thu, 4 Dec 2025 22:33:09 -0300 Subject: [PATCH 04/10] Add parse method to Lang trait and simplify parsing --- .../plotnik-cli/src/commands/debug/source.rs | 6 +- crates/plotnik-cli/src/commands/langs.rs | 4 +- crates/plotnik-langs/src/lib.rs | 90 +++---------------- 3 files changed, 12 insertions(+), 88 deletions(-) diff --git a/crates/plotnik-cli/src/commands/debug/source.rs b/crates/plotnik-cli/src/commands/debug/source.rs index 00182671..c39eb92e 100644 --- a/crates/plotnik-cli/src/commands/debug/source.rs +++ b/crates/plotnik-cli/src/commands/debug/source.rs @@ -51,11 +51,7 @@ pub fn resolve_lang( } pub fn parse_tree(source: &str, lang: Lang) -> tree_sitter::Tree { - let mut parser = tree_sitter::Parser::new(); - parser - .set_language(lang.inner()) - .expect("failed to set language"); - parser.parse(source, None).expect("failed to parse source") + lang.parse(source) } pub fn dump_source(tree: &tree_sitter::Tree, source: &str, include_anonymous: bool) -> String { diff --git a/crates/plotnik-cli/src/commands/langs.rs b/crates/plotnik-cli/src/commands/langs.rs index eeeeff7c..060c5df3 100644 --- a/crates/plotnik-cli/src/commands/langs.rs +++ b/crates/plotnik-cli/src/commands/langs.rs @@ -11,9 +11,7 @@ mod tests { use plotnik_langs::Lang; fn smoke_test(lang: Lang, source: &str, expected_root: &str) { - let mut parser = tree_sitter::Parser::new(); - parser.set_language(lang.inner()).unwrap(); - let tree = parser.parse(source, None).unwrap(); + let tree = lang.parse(source); let root = tree.root_node(); assert_eq!(root.kind(), expected_root); assert!(!root.has_error()); diff --git a/crates/plotnik-langs/src/lib.rs b/crates/plotnik-langs/src/lib.rs index 56909d9c..ef78b540 100644 --- a/crates/plotnik-langs/src/lib.rs +++ b/crates/plotnik-langs/src/lib.rs @@ -17,8 +17,8 @@ pub type Lang = Arc; pub trait LangImpl: Send + Sync { fn name(&self) -> &str; - /// Raw tree-sitter Language. You probably don't need this. - fn inner(&self) -> &Language; + /// Parse source code into a tree-sitter tree. + fn parse(&self, source: &str) -> tree_sitter::Tree; // ═══════════════════════════════════════════════════════════════════════ // Resolution [Language API] @@ -89,8 +89,12 @@ impl LangImpl for LangInner { &self.name } - fn inner(&self) -> &Language { - &self.ts_lang + fn parse(&self, source: &str) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&self.ts_lang) + .expect("failed to set language"); + parser.parse(source, None).expect("failed to parse source") } fn resolve_node(&self, kind: &str, named: bool) -> Option { @@ -300,34 +304,11 @@ mod tests { /// 1. The valid "end" sentinel node (anonymous, ID 0) /// 2. Any non-existent node /// - /// This test shows: - /// - The ambiguity in the raw tree-sitter API - /// - How our wrapper resolves it correctly + /// Our wrapper resolves this correctly. #[test] #[cfg(feature = "javascript")] fn tree_sitter_id_zero_ambiguity() { let lang = javascript(); - let raw_lang = lang.inner(); - - // === Part 1: Understanding the problem === - - // ID 0 is the "end" sentinel node (anonymous) - assert_eq!(raw_lang.node_kind_for_id(0), Some("end")); - assert!(!raw_lang.node_kind_is_named(0)); - - // Tree-sitter returns 0 for BOTH valid "end" and non-existent nodes - let end_id = raw_lang.id_for_node_kind("end", false); - let fake_id = raw_lang.id_for_node_kind("totally_fake_node", false); - assert_eq!(end_id, 0, "Valid 'end' node returns 0"); - assert_eq!(fake_id, 0, "Non-existent node also returns 0!"); - - // This ambiguity doesn't exist for named nodes (0 always = not found) - let fake_named = raw_lang.id_for_node_kind("fake_named", true); - assert_eq!(fake_named, 0, "Non-existent named node returns 0"); - // And no named node has ID 0 - assert!(!raw_lang.node_kind_is_named(0)); - - // === Part 2: Our wrapper's solution === // For named nodes: 0 unambiguously means "not found" assert!(lang.resolve_node("fake_named", true).is_none()); @@ -344,59 +325,8 @@ mod tests { "Non-existent node should be Unresolved" ); - // === Part 3: Field IDs don't have this problem === - - // Tree-sitter uses Option for fields - clean API! - let name_field_id = raw_lang.field_id_for_name("name"); - assert!(name_field_id.is_some(), "Field 'name' should exist"); - assert!(name_field_id.unwrap().get() > 0, "Field IDs start at 1"); - assert_eq!(raw_lang.field_id_for_name("fake_field"), None); - - // Our wrapper preserves this cleanliness + // Our wrapper preserves field cleanliness assert!(lang.resolve_field("name").is_some()); assert!(lang.resolve_field("fake_field").is_none()); } - - /// Additional test showing the tree-sitter oddities in detail - #[test] - #[cfg(feature = "javascript")] - fn tree_sitter_api_roundtrip_quirks() { - let lang = javascript(); - let raw_lang = lang.inner(); - - // Some nodes appear at multiple IDs! - // This happens when the same node type is used in different contexts - let mut id_to_names = std::collections::HashMap::>::new(); - - for id in 0..raw_lang.node_kind_count() as u16 { - if let Some(name) = raw_lang.node_kind_for_id(id) { - let is_named = raw_lang.node_kind_is_named(id); - id_to_names.entry(id).or_default().push((name, is_named)); - - // The roundtrip might NOT preserve the ID! - let resolved_id = raw_lang.id_for_node_kind(name, is_named); - - // For example, "identifier" might be at both ID 1 and ID 46, - // but id_for_node_kind("identifier", true) returns only one of them - if resolved_id != id && name != "ERROR" { - // This is normal - tree-sitter returns the first matching ID - // when multiple IDs have the same (name, is_named) combination - } - } - } - - // Verify our assumptions about ID 0 - assert_eq!(id_to_names[&0], vec![("end", false)]); - - // Field IDs are cleaner - they start at 1 (NonZeroU16) - assert!(raw_lang.field_name_for_id(0).is_none()); - - for fid in 1..=raw_lang.field_count() as u16 { - if let Some(name) = raw_lang.field_name_for_id(fid) { - // Field roundtrip is reliable - let resolved = raw_lang.field_id_for_name(name); - assert_eq!(resolved, std::num::NonZeroU16::new(fid)); - } - } - } } From 6fb91849e0a4f92933c8e9107740344663d7efdd Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Thu, 4 Dec 2025 22:39:29 -0300 Subject: [PATCH 05/10] Rename function parameters to improve readability and semantics --- crates/plotnik-core/src/lib.rs | 192 +++++++++++++++++++++----------- crates/plotnik-langs/src/lib.rs | 83 +++++++++----- 2 files changed, 181 insertions(+), 94 deletions(-) diff --git a/crates/plotnik-core/src/lib.rs b/crates/plotnik-core/src/lib.rs index f4046e80..9e509236 100644 --- a/crates/plotnik-core/src/lib.rs +++ b/crates/plotnik-core/src/lib.rs @@ -84,45 +84,71 @@ pub struct Cardinality { /// For name↔ID resolution and supertype info, use `Language` directly. pub trait NodeTypes { fn root(&self) -> Option; - fn is_extra(&self, id: NodeTypeId) -> bool; + fn is_extra(&self, node_type_id: NodeTypeId) -> bool; - fn has_field(&self, node: NodeTypeId, field: NodeFieldId) -> bool; - fn field_cardinality(&self, node: NodeTypeId, field: NodeFieldId) -> Option; - fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &[NodeTypeId]; - fn is_valid_field_type(&self, node: NodeTypeId, field: NodeFieldId, child: NodeTypeId) -> bool; - - fn children_cardinality(&self, node: NodeTypeId) -> Option; - fn valid_child_types(&self, node: NodeTypeId) -> &[NodeTypeId]; - fn is_valid_child_type(&self, node: NodeTypeId, child: NodeTypeId) -> bool; + fn has_field(&self, node_type_id: NodeTypeId, node_field_id: NodeFieldId) -> bool; + fn field_cardinality( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + ) -> Option; + fn valid_field_types( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + ) -> &[NodeTypeId]; + fn is_valid_field_type( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + child: NodeTypeId, + ) -> bool; + + fn children_cardinality(&self, node_type_id: NodeTypeId) -> Option; + fn valid_child_types(&self, node_type_id: NodeTypeId) -> &[NodeTypeId]; + fn is_valid_child_type(&self, node_type_id: NodeTypeId, child: NodeTypeId) -> bool; } impl NodeTypes for &T { fn root(&self) -> Option { (*self).root() } - fn is_extra(&self, id: NodeTypeId) -> bool { - (*self).is_extra(id) + fn is_extra(&self, node_type_id: NodeTypeId) -> bool { + (*self).is_extra(node_type_id) } - fn has_field(&self, node: NodeTypeId, field: NodeFieldId) -> bool { - (*self).has_field(node, field) + fn has_field(&self, node_type_id: NodeTypeId, node_field_id: NodeFieldId) -> bool { + (*self).has_field(node_type_id, node_field_id) } - fn field_cardinality(&self, node: NodeTypeId, field: NodeFieldId) -> Option { - (*self).field_cardinality(node, field) + fn field_cardinality( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + ) -> Option { + (*self).field_cardinality(node_type_id, node_field_id) } - fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &[NodeTypeId] { - (*self).valid_field_types(node, field) + fn valid_field_types( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + ) -> &[NodeTypeId] { + (*self).valid_field_types(node_type_id, node_field_id) } - fn is_valid_field_type(&self, node: NodeTypeId, field: NodeFieldId, child: NodeTypeId) -> bool { - (*self).is_valid_field_type(node, field, child) + fn is_valid_field_type( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + child: NodeTypeId, + ) -> bool { + (*self).is_valid_field_type(node_type_id, node_field_id, child) } - fn children_cardinality(&self, node: NodeTypeId) -> Option { - (*self).children_cardinality(node) + fn children_cardinality(&self, node_type_id: NodeTypeId) -> Option { + (*self).children_cardinality(node_type_id) } - fn valid_child_types(&self, node: NodeTypeId) -> &[NodeTypeId] { - (*self).valid_child_types(node) + fn valid_child_types(&self, node_type_id: NodeTypeId) -> &[NodeTypeId] { + (*self).valid_child_types(node_type_id) } - fn is_valid_child_type(&self, node: NodeTypeId, child: NodeTypeId) -> bool { - (*self).is_valid_child_type(node, child) + fn is_valid_child_type(&self, node_type_id: NodeTypeId, child: NodeTypeId) -> bool { + (*self).is_valid_child_type(node_type_id, child) } } @@ -184,17 +210,17 @@ impl StaticNodeTypes { } /// Get info for a node type by ID (binary search). - pub fn get(&self, id: NodeTypeId) -> Option<&'static StaticNodeTypeInfo> { + pub fn get(&self, node_type_id: NodeTypeId) -> Option<&'static StaticNodeTypeInfo> { self.nodes - .binary_search_by_key(&id, |(node_id, _)| *node_id) + .binary_search_by_key(&node_type_id, |(node_id, _)| *node_id) .ok() .map(|idx| &self.nodes[idx].1) } /// Check if node type exists. - pub fn contains(&self, id: NodeTypeId) -> bool { + pub fn contains(&self, node_type_id: NodeTypeId) -> bool { self.nodes - .binary_search_by_key(&id, |(node_id, _)| *node_id) + .binary_search_by_key(&node_type_id, |(node_id, _)| *node_id) .is_ok() } @@ -239,42 +265,59 @@ impl NodeTypes for StaticNodeTypes { self.root } - fn is_extra(&self, id: NodeTypeId) -> bool { - self.extras.contains(&id) + fn is_extra(&self, node_type_id: NodeTypeId) -> bool { + self.extras.contains(&node_type_id) } - fn has_field(&self, node: NodeTypeId, field: NodeFieldId) -> bool { - self.get(node).is_some_and(|info| { + fn has_field(&self, node_type_id: NodeTypeId, node_field_id: NodeFieldId) -> bool { + self.get(node_type_id).is_some_and(|info| { info.fields - .binary_search_by_key(&field, |(fid, _)| *fid) + .binary_search_by_key(&node_field_id, |(fid, _)| *fid) .is_ok() }) } - fn field_cardinality(&self, node: NodeTypeId, field: NodeFieldId) -> Option { - self.field(node, field).map(|f| f.cardinality) + fn field_cardinality( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + ) -> Option { + self.field(node_type_id, node_field_id) + .map(|f| f.cardinality) } - fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &[NodeTypeId] { - self.field(node, field) + fn valid_field_types( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + ) -> &[NodeTypeId] { + self.field(node_type_id, node_field_id) .map(|f| f.valid_types) .unwrap_or(&[]) } - fn is_valid_field_type(&self, node: NodeTypeId, field: NodeFieldId, child: NodeTypeId) -> bool { - self.valid_field_types(node, field).contains(&child) + fn is_valid_field_type( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + child: NodeTypeId, + ) -> bool { + self.valid_field_types(node_type_id, node_field_id) + .contains(&child) } - fn children_cardinality(&self, node: NodeTypeId) -> Option { - self.children(node).map(|c| c.cardinality) + fn children_cardinality(&self, node_type_id: NodeTypeId) -> Option { + self.children(node_type_id).map(|c| c.cardinality) } - fn valid_child_types(&self, node: NodeTypeId) -> &[NodeTypeId] { - self.children(node).map(|c| c.valid_types).unwrap_or(&[]) + fn valid_child_types(&self, node_type_id: NodeTypeId) -> &[NodeTypeId] { + self.children(node_type_id) + .map(|c| c.valid_types) + .unwrap_or(&[]) } - fn is_valid_child_type(&self, node: NodeTypeId, child: NodeTypeId) -> bool { - self.valid_child_types(node).contains(&child) + fn is_valid_child_type(&self, node_type_id: NodeTypeId, child: NodeTypeId) -> bool { + self.valid_child_types(node_type_id).contains(&child) } } @@ -412,12 +455,12 @@ impl DynamicNodeTypes { } } - pub fn get(&self, id: NodeTypeId) -> Option<&NodeTypeInfo> { - self.nodes.get(&id) + pub fn get(&self, node_type_id: NodeTypeId) -> Option<&NodeTypeInfo> { + self.nodes.get(&node_type_id) } - pub fn contains(&self, id: NodeTypeId) -> bool { - self.nodes.contains_key(&id) + pub fn contains(&self, node_type_id: NodeTypeId) -> bool { + self.nodes.contains_key(&node_type_id) } pub fn field(&self, node_id: NodeTypeId, field_id: NodeFieldId) -> Option<&FieldInfo> { @@ -464,42 +507,57 @@ impl NodeTypes for DynamicNodeTypes { self.root } - fn is_extra(&self, id: NodeTypeId) -> bool { - self.extras.contains(&id) + fn is_extra(&self, node_type_id: NodeTypeId) -> bool { + self.extras.contains(&node_type_id) } - fn has_field(&self, node: NodeTypeId, field: NodeFieldId) -> bool { + fn has_field(&self, node_type_id: NodeTypeId, node_field_id: NodeFieldId) -> bool { self.nodes - .get(&node) - .is_some_and(|n| n.fields.contains_key(&field)) + .get(&node_type_id) + .is_some_and(|n| n.fields.contains_key(&node_field_id)) } - fn field_cardinality(&self, node: NodeTypeId, field: NodeFieldId) -> Option { - self.field(node, field).map(|f| f.cardinality) + fn field_cardinality( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + ) -> Option { + self.field(node_type_id, node_field_id) + .map(|f| f.cardinality) } - fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &[NodeTypeId] { - self.field(node, field) + fn valid_field_types( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + ) -> &[NodeTypeId] { + self.field(node_type_id, node_field_id) .map(|f| f.valid_types.as_slice()) .unwrap_or(&[]) } - fn is_valid_field_type(&self, node: NodeTypeId, field: NodeFieldId, child: NodeTypeId) -> bool { - self.valid_field_types(node, field).contains(&child) + fn is_valid_field_type( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + child: NodeTypeId, + ) -> bool { + self.valid_field_types(node_type_id, node_field_id) + .contains(&child) } - fn children_cardinality(&self, node: NodeTypeId) -> Option { - self.children(node).map(|c| c.cardinality) + fn children_cardinality(&self, node_type_id: NodeTypeId) -> Option { + self.children(node_type_id).map(|c| c.cardinality) } - fn valid_child_types(&self, node: NodeTypeId) -> &[NodeTypeId] { - self.children(node) + fn valid_child_types(&self, node_type_id: NodeTypeId) -> &[NodeTypeId] { + self.children(node_type_id) .map(|c| c.valid_types.as_slice()) .unwrap_or(&[]) } - fn is_valid_child_type(&self, node: NodeTypeId, child: NodeTypeId) -> bool { - self.valid_child_types(node).contains(&child) + fn is_valid_child_type(&self, node_type_id: NodeTypeId, child: NodeTypeId) -> bool { + self.valid_child_types(node_type_id).contains(&child) } } diff --git a/crates/plotnik-langs/src/lib.rs b/crates/plotnik-langs/src/lib.rs index ef78b540..a920d63b 100644 --- a/crates/plotnik-langs/src/lib.rs +++ b/crates/plotnik-langs/src/lib.rs @@ -31,7 +31,7 @@ pub trait LangImpl: Send + Sync { // Supertype info [Language API] // ═══════════════════════════════════════════════════════════════════════ - fn is_supertype(&self, id: NodeTypeId) -> bool; + fn is_supertype(&self, node_type_id: NodeTypeId) -> bool; fn subtypes(&self, supertype: NodeTypeId) -> &[u16]; // ═══════════════════════════════════════════════════════════════════════ @@ -39,24 +39,37 @@ pub trait LangImpl: Send + Sync { // ═══════════════════════════════════════════════════════════════════════ fn root(&self) -> Option; - fn is_extra(&self, id: NodeTypeId) -> bool; + fn is_extra(&self, node_type_id: NodeTypeId) -> bool; // ═══════════════════════════════════════════════════════════════════════ // Field constraints [node_types] // ═══════════════════════════════════════════════════════════════════════ - fn has_field(&self, node: NodeTypeId, field: NodeFieldId) -> bool; - fn field_cardinality(&self, node: NodeTypeId, field: NodeFieldId) -> Option; - fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &[NodeTypeId]; - fn is_valid_field_type(&self, node: NodeTypeId, field: NodeFieldId, child: NodeTypeId) -> bool; + fn has_field(&self, node_type_id: NodeTypeId, node_field_id: NodeFieldId) -> bool; + fn field_cardinality( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + ) -> Option; + fn valid_field_types( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + ) -> &[NodeTypeId]; + fn is_valid_field_type( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + child: NodeTypeId, + ) -> bool; // ═══════════════════════════════════════════════════════════════════════ // Children constraints [node_types] // ═══════════════════════════════════════════════════════════════════════ - fn children_cardinality(&self, node: NodeTypeId) -> Option; - fn valid_child_types(&self, node: NodeTypeId) -> &[NodeTypeId]; - fn is_valid_child_type(&self, node: NodeTypeId, child: NodeTypeId) -> bool; + fn children_cardinality(&self, node_type_id: NodeTypeId) -> Option; + fn valid_child_types(&self, node_type_id: NodeTypeId) -> &[NodeTypeId]; + fn is_valid_child_type(&self, node_type_id: NodeTypeId, child: NodeTypeId) -> bool; } /// Generic language implementation parameterized by node types. @@ -128,8 +141,8 @@ impl LangImpl for LangInner { self.ts_lang.field_id_for_name(name) } - fn is_supertype(&self, id: NodeTypeId) -> bool { - self.ts_lang.node_kind_is_supertype(id) + fn is_supertype(&self, node_type_id: NodeTypeId) -> bool { + self.ts_lang.node_kind_is_supertype(node_type_id) } fn subtypes(&self, supertype: NodeTypeId) -> &[u16] { @@ -140,36 +153,52 @@ impl LangImpl for LangInner { self.node_types.root() } - fn is_extra(&self, id: NodeTypeId) -> bool { - self.node_types.is_extra(id) + fn is_extra(&self, node_type_id: NodeTypeId) -> bool { + self.node_types.is_extra(node_type_id) } - fn has_field(&self, node: NodeTypeId, field: NodeFieldId) -> bool { - self.node_types.has_field(node, field) + fn has_field(&self, node_type_id: NodeTypeId, node_field_id: NodeFieldId) -> bool { + self.node_types.has_field(node_type_id, node_field_id) } - fn field_cardinality(&self, node: NodeTypeId, field: NodeFieldId) -> Option { - self.node_types.field_cardinality(node, field) + fn field_cardinality( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + ) -> Option { + self.node_types + .field_cardinality(node_type_id, node_field_id) } - fn valid_field_types(&self, node: NodeTypeId, field: NodeFieldId) -> &[NodeTypeId] { - self.node_types.valid_field_types(node, field) + fn valid_field_types( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + ) -> &[NodeTypeId] { + self.node_types + .valid_field_types(node_type_id, node_field_id) } - fn is_valid_field_type(&self, node: NodeTypeId, field: NodeFieldId, child: NodeTypeId) -> bool { - self.node_types.is_valid_field_type(node, field, child) + fn is_valid_field_type( + &self, + node_type_id: NodeTypeId, + node_field_id: NodeFieldId, + child: NodeTypeId, + ) -> bool { + self.node_types + .is_valid_field_type(node_type_id, node_field_id, child) } - fn children_cardinality(&self, node: NodeTypeId) -> Option { - self.node_types.children_cardinality(node) + fn children_cardinality(&self, node_type_id: NodeTypeId) -> Option { + self.node_types.children_cardinality(node_type_id) } - fn valid_child_types(&self, node: NodeTypeId) -> &[NodeTypeId] { - self.node_types.valid_child_types(node) + fn valid_child_types(&self, node_type_id: NodeTypeId) -> &[NodeTypeId] { + self.node_types.valid_child_types(node_type_id) } - fn is_valid_child_type(&self, node: NodeTypeId, child: NodeTypeId) -> bool { - self.node_types.is_valid_child_type(node, child) + fn is_valid_child_type(&self, node_type_id: NodeTypeId, child: NodeTypeId) -> bool { + self.node_types.is_valid_child_type(node_type_id, child) } } From b997c2fc4da266179d9175b7dea8dd1b0ad47af7 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Thu, 4 Dec 2025 23:09:56 -0300 Subject: [PATCH 06/10] Add ensure_node method to StaticNodeTypes and DynamicNodeTypes to enforce invariants --- crates/plotnik-core/Cargo.toml | 3 +++ crates/plotnik-core/src/invariants.rs | 27 +++++++++++++++++++++++++++ crates/plotnik-core/src/lib.rs | 18 ++++++++++-------- 3 files changed, 40 insertions(+), 8 deletions(-) create mode 100644 crates/plotnik-core/src/invariants.rs diff --git a/crates/plotnik-core/Cargo.toml b/crates/plotnik-core/Cargo.toml index 43cf6ca5..beed0052 100644 --- a/crates/plotnik-core/Cargo.toml +++ b/crates/plotnik-core/Cargo.toml @@ -6,6 +6,9 @@ license = "MIT" description = "Core data structures for Plotnik" repository = "https://github.com/plotnik-lang/plotnik" +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(coverage_nightly)'] } + [dependencies] serde = { version = "1", features = ["derive"] } serde_json = "1" \ No newline at end of file diff --git a/crates/plotnik-core/src/invariants.rs b/crates/plotnik-core/src/invariants.rs new file mode 100644 index 00000000..2f420cd1 --- /dev/null +++ b/crates/plotnik-core/src/invariants.rs @@ -0,0 +1,27 @@ +//! Invariant checks excluded from coverage reports. + +#![cfg_attr(coverage_nightly, coverage(off))] + +use crate::{DynamicNodeTypes, NodeTypeId, NodeTypeInfo, StaticNodeTypeInfo, StaticNodeTypes}; + +impl StaticNodeTypes { + pub(crate) fn ensure_node(&self, node_type_id: NodeTypeId) -> &'static StaticNodeTypeInfo { + self.get(node_type_id).unwrap_or_else(|| { + panic!( + "NodeTypes: node_type_id {node_type_id} not found \ + (Lang must verify Language ↔ NodeTypes correspondence)" + ) + }) + } +} + +impl DynamicNodeTypes { + pub(crate) fn ensure_node(&self, node_type_id: NodeTypeId) -> &NodeTypeInfo { + self.get(node_type_id).unwrap_or_else(|| { + panic!( + "NodeTypes: node_type_id {node_type_id} not found \ + (Lang must verify Language ↔ NodeTypes correspondence)" + ) + }) + } +} diff --git a/crates/plotnik-core/src/lib.rs b/crates/plotnik-core/src/lib.rs index 9e509236..45e11b15 100644 --- a/crates/plotnik-core/src/lib.rs +++ b/crates/plotnik-core/src/lib.rs @@ -11,6 +11,8 @@ use std::collections::HashMap; use std::num::NonZeroU16; +mod invariants; + // ============================================================================ // Deserialization Layer // ============================================================================ @@ -227,10 +229,10 @@ impl StaticNodeTypes { /// Get field info for a node type (binary search for node, then field). pub fn field( &self, - node_id: NodeTypeId, + node_type_id: NodeTypeId, field_id: NodeFieldId, ) -> Option<&'static StaticFieldInfo> { - let info = self.get(node_id)?; + let info = self.ensure_node(node_type_id); info.fields .binary_search_by_key(&field_id, |(fid, _)| *fid) .ok() @@ -238,8 +240,8 @@ impl StaticNodeTypes { } /// Get children info for a node type. - pub fn children(&self, node_id: NodeTypeId) -> Option { - self.get(node_id)?.children + pub fn children(&self, node_type_id: NodeTypeId) -> Option { + self.ensure_node(node_type_id).children } /// Get all extra node type IDs. @@ -463,12 +465,12 @@ impl DynamicNodeTypes { self.nodes.contains_key(&node_type_id) } - pub fn field(&self, node_id: NodeTypeId, field_id: NodeFieldId) -> Option<&FieldInfo> { - self.nodes.get(&node_id)?.fields.get(&field_id) + pub fn field(&self, node_type_id: NodeTypeId, field_id: NodeFieldId) -> Option<&FieldInfo> { + self.ensure_node(node_type_id).fields.get(&field_id) } - pub fn children(&self, node_id: NodeTypeId) -> Option<&ChildrenInfo> { - self.nodes.get(&node_id)?.children.as_ref() + pub fn children(&self, node_type_id: NodeTypeId) -> Option<&ChildrenInfo> { + self.ensure_node(node_type_id).children.as_ref() } pub fn extras(&self) -> &[NodeTypeId] { From 090040bb5bab68b0a1fb725812cdfffa71fb511e Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Thu, 4 Dec 2025 23:28:11 -0300 Subject: [PATCH 07/10] Tidy --- crates/plotnik-langs/src/dynamic.rs | 8 +---- crates/plotnik-langs/src/lib.rs | 49 +++-------------------------- 2 files changed, 6 insertions(+), 51 deletions(-) diff --git a/crates/plotnik-langs/src/dynamic.rs b/crates/plotnik-langs/src/dynamic.rs index d5e879a3..9c58295b 100644 --- a/crates/plotnik-langs/src/dynamic.rs +++ b/crates/plotnik-langs/src/dynamic.rs @@ -1,7 +1 @@ -//! Dynamic language loading (runtime). -//! -//! Load tree-sitter languages and their node types at runtime from: -//! - Shared libraries (.so/.dylib/.dll) -//! - node-types.json files -//! -//! Not yet implemented. +//! Dynamic language loading (runtime). Not yet implemented. diff --git a/crates/plotnik-langs/src/lib.rs b/crates/plotnik-langs/src/lib.rs index a920d63b..369c4e05 100644 --- a/crates/plotnik-langs/src/lib.rs +++ b/crates/plotnik-langs/src/lib.rs @@ -20,31 +20,15 @@ pub trait LangImpl: Send + Sync { /// Parse source code into a tree-sitter tree. fn parse(&self, source: &str) -> tree_sitter::Tree; - // ═══════════════════════════════════════════════════════════════════════ - // Resolution [Language API] - // ═══════════════════════════════════════════════════════════════════════ - fn resolve_node(&self, kind: &str, named: bool) -> Option; fn resolve_field(&self, name: &str) -> Option; - // ═══════════════════════════════════════════════════════════════════════ - // Supertype info [Language API] - // ═══════════════════════════════════════════════════════════════════════ - fn is_supertype(&self, node_type_id: NodeTypeId) -> bool; fn subtypes(&self, supertype: NodeTypeId) -> &[u16]; - // ═══════════════════════════════════════════════════════════════════════ - // Root & Extras [node_types] - // ═══════════════════════════════════════════════════════════════════════ - fn root(&self) -> Option; fn is_extra(&self, node_type_id: NodeTypeId) -> bool; - // ═══════════════════════════════════════════════════════════════════════ - // Field constraints [node_types] - // ═══════════════════════════════════════════════════════════════════════ - fn has_field(&self, node_type_id: NodeTypeId, node_field_id: NodeFieldId) -> bool; fn field_cardinality( &self, @@ -63,10 +47,6 @@ pub trait LangImpl: Send + Sync { child: NodeTypeId, ) -> bool; - // ═══════════════════════════════════════════════════════════════════════ - // Children constraints [node_types] - // ═══════════════════════════════════════════════════════════════════════ - fn children_cardinality(&self, node_type_id: NodeTypeId) -> Option; fn valid_child_types(&self, node_type_id: NodeTypeId) -> &[NodeTypeId]; fn is_valid_child_type(&self, node_type_id: NodeTypeId, child: NodeTypeId) -> bool; @@ -113,18 +93,9 @@ impl LangImpl for LangInner { fn resolve_node(&self, kind: &str, named: bool) -> Option { let id = self.ts_lang.id_for_node_kind(kind, named); - // FIX: Disambiguate tree-sitter's ID 0 (could be "end" node or "not found") - // - // Tree-sitter's id_for_node_kind has odd semantics: - // - Returns 0 for "not found" - // - BUT: ID 0 is also a valid ID for the anonymous "end" sentinel node - // - // This creates an ambiguity for anonymous nodes: - // - id_for_node_kind("end", false) -> 0 (valid) - // - id_for_node_kind("fake", false) -> 0 (not found) - // - // For named nodes, 0 is unambiguous since no named node has ID 0. - // For anonymous nodes, we must verify via reverse lookup. + // Tree-sitter returns 0 for both "not found" AND the valid anonymous "end" node. + // For named nodes, 0 always means "not found". For anonymous, we disambiguate + // via reverse lookup. if id == 0 { if named { return None; @@ -327,16 +298,9 @@ mod tests { assert!(func_id.is_some()); } - /// Demonstrates tree-sitter's odd ID semantics and how our wrapper fixes them. - /// - /// Tree-sitter's `id_for_node_kind` returns 0 for both: - /// 1. The valid "end" sentinel node (anonymous, ID 0) - /// 2. Any non-existent node - /// - /// Our wrapper resolves this correctly. #[test] #[cfg(feature = "javascript")] - fn tree_sitter_id_zero_ambiguity() { + fn tree_sitter_id_zero_disambiguation() { let lang = javascript(); // For named nodes: 0 unambiguously means "not found" @@ -349,10 +313,7 @@ mod tests { assert!(end_resolved.is_some(), "Valid 'end' node should resolve"); assert_eq!(end_resolved, Some(0), "'end' should have ID 0"); - assert!( - fake_resolved.is_none(), - "Non-existent node should be Unresolved" - ); + assert!(fake_resolved.is_none(), "Non-existent node should be None"); // Our wrapper preserves field cleanliness assert!(lang.resolve_field("name").is_some()); From 7ce18b08c809948d3c2207841754054bb8d2c6ce Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Thu, 4 Dec 2025 23:30:32 -0300 Subject: [PATCH 08/10] Refactor --- crates/plotnik-langs/src/lib.rs | 47 ++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/crates/plotnik-langs/src/lib.rs b/crates/plotnik-langs/src/lib.rs index 369c4e05..d665efd5 100644 --- a/crates/plotnik-langs/src/lib.rs +++ b/crates/plotnik-langs/src/lib.rs @@ -20,7 +20,8 @@ pub trait LangImpl: Send + Sync { /// Parse source code into a tree-sitter tree. fn parse(&self, source: &str) -> tree_sitter::Tree; - fn resolve_node(&self, kind: &str, named: bool) -> Option; + fn resolve_named_node(&self, kind: &str) -> Option; + fn resolve_anonymous_node(&self, kind: &str) -> Option; fn resolve_field(&self, name: &str) -> Option; fn is_supertype(&self, node_type_id: NodeTypeId) -> bool; @@ -90,16 +91,20 @@ impl LangImpl for LangInner { parser.parse(source, None).expect("failed to parse source") } - fn resolve_node(&self, kind: &str, named: bool) -> Option { - let id = self.ts_lang.id_for_node_kind(kind, named); + fn resolve_named_node(&self, kind: &str) -> Option { + let id = self.ts_lang.id_for_node_kind(kind, true); + // For named nodes, 0 always means "not found" + if id == 0 { + return None; + } + Some(id) + } + fn resolve_anonymous_node(&self, kind: &str) -> Option { + let id = self.ts_lang.id_for_node_kind(kind, false); // Tree-sitter returns 0 for both "not found" AND the valid anonymous "end" node. - // For named nodes, 0 always means "not found". For anonymous, we disambiguate - // via reverse lookup. + // We disambiguate via reverse lookup. if id == 0 { - if named { - return None; - } if self.ts_lang.node_kind_for_id(0) == Some(kind) { return Some(0); } @@ -223,10 +228,10 @@ mod tests { fn resolve_node_and_field() { let lang = javascript(); - let func_id = lang.resolve_node("function_declaration", true); + let func_id = lang.resolve_named_node("function_declaration"); assert!(func_id.is_some()); - let unknown = lang.resolve_node("nonexistent_node_type", true); + let unknown = lang.resolve_named_node("nonexistent_node_type"); assert!(unknown.is_none()); let name_field = lang.resolve_field("name"); @@ -241,13 +246,13 @@ mod tests { fn supertype_via_lang_trait() { let lang = javascript(); - let expr_id = lang.resolve_node("expression", true).unwrap(); + let expr_id = lang.resolve_named_node("expression").unwrap(); assert!(lang.is_supertype(expr_id)); let subtypes = lang.subtypes(expr_id); assert!(!subtypes.is_empty()); - let func_id = lang.resolve_node("function_declaration", true).unwrap(); + let func_id = lang.resolve_named_node("function_declaration").unwrap(); assert!(!lang.is_supertype(func_id)); } @@ -256,17 +261,17 @@ mod tests { fn field_validation_via_trait() { let lang = javascript(); - let func_id = lang.resolve_node("function_declaration", true).unwrap(); + let func_id = lang.resolve_named_node("function_declaration").unwrap(); let name_field = lang.resolve_field("name").unwrap(); let body_field = lang.resolve_field("body").unwrap(); assert!(lang.has_field(func_id, name_field)); assert!(lang.has_field(func_id, body_field)); - let identifier_id = lang.resolve_node("identifier", true).unwrap(); + let identifier_id = lang.resolve_named_node("identifier").unwrap(); assert!(lang.is_valid_field_type(func_id, name_field, identifier_id)); - let statement_block_id = lang.resolve_node("statement_block", true).unwrap(); + let statement_block_id = lang.resolve_named_node("statement_block").unwrap(); assert!(lang.is_valid_field_type(func_id, body_field, statement_block_id)); } @@ -277,7 +282,7 @@ mod tests { let root_id = lang.root(); assert!(root_id.is_some()); - let program_id = lang.resolve_node("program", true); + let program_id = lang.resolve_named_node("program"); assert_eq!(root_id, program_id); } @@ -286,7 +291,7 @@ mod tests { fn unresolved_returns_none() { let lang = javascript(); - assert!(lang.resolve_node("nonexistent_node_type", true).is_none()); + assert!(lang.resolve_named_node("nonexistent_node_type").is_none()); assert!(lang.resolve_field("nonexistent_field").is_none()); } @@ -294,7 +299,7 @@ mod tests { #[cfg(feature = "rust")] fn rust_lang_works() { let lang = rust(); - let func_id = lang.resolve_node("function_item", true); + let func_id = lang.resolve_named_node("function_item"); assert!(func_id.is_some()); } @@ -304,11 +309,11 @@ mod tests { let lang = javascript(); // For named nodes: 0 unambiguously means "not found" - assert!(lang.resolve_node("fake_named", true).is_none()); + assert!(lang.resolve_named_node("fake_named").is_none()); // For anonymous nodes: we disambiguate via reverse lookup - let end_resolved = lang.resolve_node("end", false); - let fake_resolved = lang.resolve_node("totally_fake_node", false); + let end_resolved = lang.resolve_anonymous_node("end"); + let fake_resolved = lang.resolve_anonymous_node("totally_fake_node"); assert!(end_resolved.is_some(), "Valid 'end' node should resolve"); assert_eq!(end_resolved, Some(0), "'end' should have ID 0"); From d64e80d0c73c883aaa0f21ef50e59cd331592396 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Thu, 4 Dec 2025 23:35:13 -0300 Subject: [PATCH 09/10] Final polish --- crates/plotnik-langs/src/lib.rs | 14 +-- crates/plotnik-macros/src/lib.rs | 187 ++++++++++++++++++------------- 2 files changed, 113 insertions(+), 88 deletions(-) diff --git a/crates/plotnik-langs/src/lib.rs b/crates/plotnik-langs/src/lib.rs index d665efd5..7d68d99f 100644 --- a/crates/plotnik-langs/src/lib.rs +++ b/crates/plotnik-langs/src/lib.rs @@ -94,23 +94,17 @@ impl LangImpl for LangInner { fn resolve_named_node(&self, kind: &str) -> Option { let id = self.ts_lang.id_for_node_kind(kind, true); // For named nodes, 0 always means "not found" - if id == 0 { - return None; - } - Some(id) + (id != 0).then_some(id) } fn resolve_anonymous_node(&self, kind: &str) -> Option { let id = self.ts_lang.id_for_node_kind(kind, false); // Tree-sitter returns 0 for both "not found" AND the valid anonymous "end" node. // We disambiguate via reverse lookup. - if id == 0 { - if self.ts_lang.node_kind_for_id(0) == Some(kind) { - return Some(0); - } - return None; + if id != 0 { + return Some(id); } - Some(id) + (self.ts_lang.node_kind_for_id(0) == Some(kind)).then_some(0) } fn resolve_field(&self, name: &str) -> Option { diff --git a/crates/plotnik-macros/src/lib.rs b/crates/plotnik-macros/src/lib.rs index a226dcf2..594a1759 100644 --- a/crates/plotnik-macros/src/lib.rs +++ b/crates/plotnik-macros/src/lib.rs @@ -102,6 +102,99 @@ fn get_language_for_key(key: &str) -> Language { } } +struct FieldCodeGen { + array_defs: Vec, + entries: Vec, +} + +fn generate_field_code( + prefix: &str, + node_id: u16, + field_id: &std::num::NonZeroU16, + field_info: &plotnik_core::FieldInfo, +) -> (proc_macro2::TokenStream, proc_macro2::TokenStream) { + let valid_types = field_info.valid_types.to_vec(); + let valid_types_name = syn::Ident::new( + &format!("{}_N{}_F{}_TYPES", prefix, node_id, field_id), + Span::call_site(), + ); + + let multiple = field_info.cardinality.multiple; + let required = field_info.cardinality.required; + let types_len = valid_types.len(); + + let array_def = quote! { + static #valid_types_name: [u16; #types_len] = [#(#valid_types),*]; + }; + + let field_id_raw = field_id.get(); + let entry = quote! { + (std::num::NonZeroU16::new(#field_id_raw).unwrap(), plotnik_core::StaticFieldInfo { + cardinality: plotnik_core::Cardinality { + multiple: #multiple, + required: #required, + }, + valid_types: &#valid_types_name, + }) + }; + + (array_def, entry) +} + +fn generate_fields_for_node( + prefix: &str, + node_id: u16, + fields: &std::collections::HashMap, +) -> FieldCodeGen { + let mut sorted_fields: Vec<_> = fields.iter().collect(); + sorted_fields.sort_by_key(|(fid, _)| *fid); + + let mut array_defs = Vec::new(); + let mut entries = Vec::new(); + + for (field_id, field_info) in sorted_fields { + let (array_def, entry) = generate_field_code(prefix, node_id, field_id, field_info); + array_defs.push(array_def); + entries.push(entry); + } + + FieldCodeGen { + array_defs, + entries, + } +} + +fn generate_children_code( + prefix: &str, + node_id: u16, + children: &plotnik_core::ChildrenInfo, + static_defs: &mut Vec, +) -> proc_macro2::TokenStream { + let valid_types = children.valid_types.to_vec(); + let children_types_name = syn::Ident::new( + &format!("{}_N{}_CHILDREN_TYPES", prefix, node_id), + Span::call_site(), + ); + let types_len = valid_types.len(); + + static_defs.push(quote! { + static #children_types_name: [u16; #types_len] = [#(#valid_types),*]; + }); + + let multiple = children.cardinality.multiple; + let required = children.cardinality.required; + + quote! { + Some(plotnik_core::StaticChildrenInfo { + cardinality: plotnik_core::Cardinality { + multiple: #multiple, + required: #required, + }, + valid_types: &#children_types_name, + }) + } +} + fn generate_static_node_types_code( raw_nodes: &[plotnik_core::RawNode], ts_lang: &Language, @@ -118,108 +211,46 @@ fn generate_static_node_types_code( ); let prefix = lang_key.to_uppercase(); - let mut static_defs = Vec::new(); let mut node_entries = Vec::new(); let extras = node_types.sorted_extras(); let root = node_types.root(); - - // Process nodes in sorted order (for binary search on node lookup) let sorted_node_ids = node_types.sorted_node_ids(); - for node_id in &sorted_node_ids { - let info = node_types.get(*node_id).unwrap(); - - let mut field_array_defs = Vec::new(); - let mut field_entries = Vec::new(); - - // Sort fields by field_id (for binary search on field lookup) - let mut sorted_fields: Vec<_> = info.fields.iter().collect(); - sorted_fields.sort_by_key(|(fid, _)| *fid); + for &node_id in &sorted_node_ids { + let info = node_types.get(node_id).unwrap(); - for (field_id, field_info) in &sorted_fields { - let valid_types = field_info.valid_types.to_vec(); + let field_gen = generate_fields_for_node(&prefix, node_id, &info.fields); + static_defs.extend(field_gen.array_defs); - let valid_types_name = syn::Ident::new( - &format!("{}_N{}_F{}_TYPES", prefix, node_id, field_id), + let fields_ref = if field_gen.entries.is_empty() { + quote! { &[] } + } else { + let fields_array_name = syn::Ident::new( + &format!("{}_N{}_FIELDS", prefix, node_id), Span::call_site(), ); + let fields_len = field_gen.entries.len(); + let field_entries = &field_gen.entries; - let multiple = field_info.cardinality.multiple; - let required = field_info.cardinality.required; - let types_len = valid_types.len(); - - field_array_defs.push(quote! { - static #valid_types_name: [u16; #types_len] = [#(#valid_types),*]; - }); - - let field_id_raw = field_id.get(); - field_entries.push(quote! { - (std::num::NonZeroU16::new(#field_id_raw).unwrap(), plotnik_core::StaticFieldInfo { - cardinality: plotnik_core::Cardinality { - multiple: #multiple, - required: #required, - }, - valid_types: &#valid_types_name, - }) - }); - } - - let fields_array_name = syn::Ident::new( - &format!("{}_N{}_FIELDS", prefix, node_id), - Span::call_site(), - ); - let fields_len = sorted_fields.len(); - - static_defs.extend(field_array_defs); - - if !sorted_fields.is_empty() { static_defs.push(quote! { static #fields_array_name: [(std::num::NonZeroU16, plotnik_core::StaticFieldInfo); #fields_len] = [ #(#field_entries),* ]; }); - } - let children_code = if let Some(children) = &info.children { - let valid_types = children.valid_types.to_vec(); - - let children_types_name = syn::Ident::new( - &format!("{}_N{}_CHILDREN_TYPES", prefix, node_id), - Span::call_site(), - ); - let types_len = valid_types.len(); - - static_defs.push(quote! { - static #children_types_name: [u16; #types_len] = [#(#valid_types),*]; - }); + quote! { &#fields_array_name } + }; - let multiple = children.cardinality.multiple; - let required = children.cardinality.required; - - quote! { - Some(plotnik_core::StaticChildrenInfo { - cardinality: plotnik_core::Cardinality { - multiple: #multiple, - required: #required, - }, - valid_types: &#children_types_name, - }) - } - } else { - quote! { None } + let children_code = match &info.children { + Some(children) => generate_children_code(&prefix, node_id, children, &mut static_defs), + None => quote! { None }, }; let name = &info.name; let named = info.named; - let fields_ref = if sorted_fields.is_empty() { - quote! { &[] } - } else { - quote! { &#fields_array_name } - }; - node_entries.push(quote! { (#node_id, plotnik_core::StaticNodeTypeInfo { name: #name, From 68b3ba882e3aa05e84a0e0acecc8d1b440f78a6c Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Thu, 4 Dec 2025 23:39:05 -0300 Subject: [PATCH 10/10] Fix coverage --- crates/plotnik-core/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/plotnik-core/src/lib.rs b/crates/plotnik-core/src/lib.rs index 45e11b15..99dd2988 100644 --- a/crates/plotnik-core/src/lib.rs +++ b/crates/plotnik-core/src/lib.rs @@ -1,3 +1,5 @@ +#![cfg_attr(coverage_nightly, feature(coverage_attribute))] + //! Core data structures for Plotnik node type information. //! //! Two layers: