From a8b76607df8ee855ffee157dd13157481a8ab26b Mon Sep 17 00:00:00 2001 From: SmartMonkey Date: Wed, 10 Dec 2025 13:12:28 +0100 Subject: [PATCH] Implement rules --- src/materializer.rs | 12 +-- src/phenolint.rs | 4 +- src/rules/curies/curie_format_rule.rs | 7 +- .../disease_consistency_rule.rs | 56 ++++++----- src/rules/resources.rs | 49 ++++++---- src/rules/rule_registry.rs | 7 +- src/rules/traits.rs | 26 +++-- src/tree/btree_node_repository.rs | 2 +- src/tree/mod.rs | 3 +- src/tree/node_repository.rs | 98 ------------------- src/tree/querying/mod.rs | 2 +- src/tree/querying/queries.rs | 3 +- src/tree/querying/traits.rs | 11 ++- src/tree/scopes.rs | 11 +-- src/tree/traits.rs | 2 +- tests/test_custom_rule.rs | 7 +- 16 files changed, 106 insertions(+), 194 deletions(-) delete mode 100644 src/tree/node_repository.rs diff --git a/src/materializer.rs b/src/materializer.rs index 89d735b..1828bb6 100644 --- a/src/materializer.rs +++ b/src/materializer.rs @@ -1,7 +1,6 @@ use crate::parsing::traits::ParsableNode; use crate::tree::node::{DynamicNode, MaterializedNode}; -use crate::tree::node_repository::NodeRepository; -use crate::tree::traits::LocatableNode; +use crate::tree::traits::{LocatableNode, NodeRepository}; use log::error; use phenopackets::schema::v2::core::{ Diagnosis, Disease, OntologyClass, PhenotypicFeature, Resource, VitalStatus, @@ -11,7 +10,7 @@ use phenopackets::schema::v2::{Cohort, Phenopacket}; pub(crate) struct NodeMaterializer; impl NodeMaterializer { - pub fn materialize_nodes(&mut self, dyn_node: &DynamicNode, repo: &mut NodeRepository) { + pub fn materialize_nodes(&mut self, dyn_node: &DynamicNode, repo: &mut impl NodeRepository) { if let Some(cohort) = Cohort::parse(dyn_node) { Self::push_to_repo(cohort, dyn_node, repo); } else if let Some(oc) = OntologyClass::parse(dyn_node) { @@ -33,12 +32,13 @@ impl NodeMaterializer { }; } - fn push_to_repo( + fn push_to_repo( materialized: NodeType, dyn_node: &DynamicNode, - board: &mut NodeRepository, + board: &mut impl NodeRepository, ) { let node = MaterializedNode::from_dynamic(materialized, dyn_node); - board.insert(node); + // TODO: Error throwing + board.insert(node).expect("Unable to insert node"); } } diff --git a/src/phenolint.rs b/src/phenolint.rs index 1a374d3..cf1d1f2 100644 --- a/src/phenolint.rs +++ b/src/phenolint.rs @@ -14,13 +14,13 @@ use crate::schema_validation::validator::PhenopacketSchemaValidator; use crate::traits::Lint; use crate::tree::abstract_pheno_tree::AbstractTreeTraversal; use crate::tree::node::DynamicNode; -use crate::tree::node_repository::NodeRepository; use crate::tree::pointer::Pointer; use log::{error, warn}; use phenopackets::schema::v2::Phenopacket; use prost::Message; use serde_json::Value; +use crate::tree::btree_node_repository::BTreeNodeRepository; use std::fs; use std::path::PathBuf; @@ -71,7 +71,7 @@ impl Lint for Phenolint { let root_node = DynamicNode::new(&values, &spans, Pointer::at_root()); let apt = AbstractTreeTraversal::new(values, spans); - let mut node_repo: NodeRepository = NodeRepository::new(); + let mut node_repo = BTreeNodeRepository::new(); for node in apt.traverse() { self.node_materializer diff --git a/src/rules/curies/curie_format_rule.rs b/src/rules/curies/curie_format_rule.rs index a5e3a3c..41bd08d 100644 --- a/src/rules/curies/curie_format_rule.rs +++ b/src/rules/curies/curie_format_rule.rs @@ -10,7 +10,8 @@ use crate::report::traits::{CompileReport, RegisterableReport, ReportFromContext use crate::rules::rule_registration::RuleRegistration; use crate::rules::traits::RuleMetaData; use crate::rules::traits::{LintRule, RuleCheck, RuleFromContext}; -use crate::tree::node_repository::List; +use crate::tree::querying::presentation::Flattened; +use crate::tree::querying::queries::convenience::All; use crate::tree::traits::{LocatableNode, Node}; use phenolint_macros::{register_report, register_rule}; use phenopackets::schema::v2::core::OntologyClass; @@ -38,9 +39,9 @@ impl RuleFromContext for CurieFormatRule { } impl RuleCheck for CurieFormatRule { - type Data<'a> = List<'a, OntologyClass>; + type Query = All; - fn check(&self, data: Self::Data<'_>) -> Vec { + fn check(&self, data: Flattened) -> Vec { let mut violations = vec![]; for node in data.0.iter() { diff --git a/src/rules/interpretation/disease_consistency_rule.rs b/src/rules/interpretation/disease_consistency_rule.rs index cd2eafe..5a6a763 100644 --- a/src/rules/interpretation/disease_consistency_rule.rs +++ b/src/rules/interpretation/disease_consistency_rule.rs @@ -14,8 +14,9 @@ use crate::report::traits::{CompileReport, RegisterableReport, ReportFromContext use crate::rules::rule_registration::RuleRegistration; use crate::rules::traits::RuleMetaData; use crate::rules::traits::{LintRule, RuleCheck, RuleFromContext}; -use crate::tree::node_repository::List; use crate::tree::pointer::Pointer; +use crate::tree::querying::presentation::Grouped; +use crate::tree::querying::queries::convenience::GroupedIndividuals; use crate::tree::traits::{LocatableNode, Node}; use phenolint_macros::{register_patch, register_report, register_rule}; use phenopackets::schema::v2::core::{Diagnosis, Disease, OntologyClass}; @@ -38,34 +39,37 @@ impl RuleFromContext for DiseaseConsistencyRule { } impl RuleCheck for DiseaseConsistencyRule { - type Data<'a> = (List<'a, Diagnosis>, List<'a, Disease>); + type Query = (GroupedIndividuals, GroupedIndividuals); - fn check(&self, data: Self::Data<'_>) -> Vec { + fn check(&self, data: (Grouped, Grouped)) -> Vec { let mut violations = vec![]; - let disease_terms: Vec<(&str, &str)> = data - .1 - .iter() - .filter_map(|disease| { - disease - .inner - .term - .as_ref() - .map(|oc| (oc.id.as_str(), oc.label.as_str())) - }) - .collect(); - - for diagnosis in data.0.iter() { - if let Some(oc) = &diagnosis.inner.disease - && !disease_terms.contains(&(oc.id.as_str(), oc.label.as_str())) - { - violations.push(LintViolation::new( - ViolationSeverity::Warning, - LintRule::rule_id(self), - NonEmptyVec::with_single_entry( - diagnosis.pointer().clone().down("disease").clone(), - ), - )) + let (diagnosis_groups, diseases_group) = data; + + for (diagnosis, diseases) in diagnosis_groups.0.iter().zip(diseases_group.0) { + let disease_terms: Vec<(&str, &str)> = diseases + .iter() + .filter_map(|disease| { + disease + .inner + .term + .as_ref() + .map(|oc| (oc.id.as_str(), oc.label.as_str())) + }) + .collect(); + + for diagnosis in diagnosis.iter() { + if let Some(oc) = &diagnosis.inner.disease + && !disease_terms.contains(&(oc.id.as_str(), oc.label.as_str())) + { + violations.push(LintViolation::new( + ViolationSeverity::Warning, + LintRule::rule_id(self), + NonEmptyVec::with_single_entry( + diagnosis.pointer().clone().down("disease").clone(), + ), + )) + } } } diff --git a/src/rules/resources.rs b/src/rules/resources.rs index 315498a..a7a3d46 100644 --- a/src/rules/resources.rs +++ b/src/rules/resources.rs @@ -8,8 +8,9 @@ use crate::report::traits::RuleReport; use crate::report::traits::{CompileReport, RegisterableReport, ReportFromContext}; use crate::rules::rule_registration::RuleRegistration; use crate::rules::traits::{LintRule, RuleCheck, RuleFromContext, RuleMetaData}; -use crate::tree::node_repository::List; use crate::tree::pointer::Pointer; +use crate::tree::querying::presentation::Grouped; +use crate::tree::querying::queries::convenience::GroupedIndividuals; use crate::tree::traits::{LocatableNode, Node}; use phenolint_macros::{register_report, register_rule}; use phenopackets::schema::v2::core::{OntologyClass, Resource}; @@ -35,28 +36,34 @@ impl RuleFromContext for CuriesHaveResourcesRule { } impl RuleCheck for CuriesHaveResourcesRule { - type Data<'a> = (List<'a, OntologyClass>, List<'a, Resource>); - - fn check(&self, data: Self::Data<'_>) -> Vec { - let known_prefixes: HashSet<_> = data - .1 - .iter() - .map(|r| r.inner.namespace_prefix.as_str()) - .collect(); + type Query = ( + GroupedIndividuals, + GroupedIndividuals, + ); + fn check(&self, data: (Grouped, Grouped)) -> Vec { + let (ontology_classes, resources) = data; let mut violations = vec![]; - for node in data.0.iter() { - if let Some(prefix) = find_prefix(node.inner.id.as_str()) - && !known_prefixes.contains(prefix) - { - violations.push(LintViolation::new( - ViolationSeverity::Error, - LintRule::rule_id(self), - node.pointer().clone().into(), // <- warns about the ontology class itself - )); + for (o, r) in ontology_classes.0.iter().zip(resources.0) { + let known_prefixes: HashSet<_> = r + .iter() + .map(|r| r.inner.namespace_prefix.as_str()) + .collect(); + + for node in o.iter() { + if let Some(prefix) = find_prefix(node.inner.id.as_str()) + && !known_prefixes.contains(prefix) + { + violations.push(LintViolation::new( + ViolationSeverity::Error, + LintRule::rule_id(self), + node.pointer().clone().into(), // <- warns about the ontology class itself + )); + } } } + violations } } @@ -66,8 +73,8 @@ mod test_curies_have_resources { use crate::rules::resources::CuriesHaveResourcesRule; use crate::rules::traits::{RuleCheck, RuleMetaData}; use crate::tree::node::MaterializedNode; - use crate::tree::node_repository::List; use crate::tree::pointer::Pointer; + use crate::tree::querying::presentation::Grouped; use phenopackets::schema::v2::core::OntologyClass; #[test] @@ -82,8 +89,8 @@ mod test_curies_have_resources { Default::default(), Pointer::new("/phenotypicFeatures/0/type"), )]; - let resources = []; - let data = (List(&ocs), List(&resources)); + let resources = vec![vec![]]; + let data = (Grouped(vec![ocs.to_vec()]), Grouped(resources)); let violations = rule.check(data); diff --git a/src/rules/rule_registry.rs b/src/rules/rule_registry.rs index 9bd3534..aa1296f 100644 --- a/src/rules/rule_registry.rs +++ b/src/rules/rule_registry.rs @@ -80,7 +80,8 @@ mod tests { use crate::rules::traits::RuleCheck; use crate::rules::traits::RuleFromContext; use crate::rules::traits::RuleMetaData; - use crate::tree::node_repository::List; + use crate::tree::querying::presentation::Flattened; + use crate::tree::querying::queries::convenience::All; use phenolint_macros::register_rule; use phenopackets::schema::v2::core::OntologyClass; use rstest::rstest; @@ -100,9 +101,9 @@ mod tests { } } impl RuleCheck for TestRule { - type Data<'a> = List<'a, OntologyClass>; + type Query = All; - fn check(&self, _: Self::Data<'_>) -> Vec { + fn check(&self, _: Flattened) -> Vec { todo!() } } diff --git a/src/rules/traits.rs b/src/rules/traits.rs index 97173cb..656729b 100644 --- a/src/rules/traits.rs +++ b/src/rules/traits.rs @@ -1,15 +1,17 @@ use crate::LinterContext; use crate::diagnostics::LintViolation; use crate::error::FromContextError; -use crate::tree::node_repository::NodeRepository; +use crate::tree::btree_node_repository::BTreeNodeRepository; +use crate::tree::querying::traits::QueryStrategy; -pub trait LintRule: RuleFromContext + Send + Sync { +pub trait LintRule: Send + Sync { fn rule_id(&self) -> &str; - fn check_erased(&self, board: &NodeRepository) -> Vec; + // Needs to be concrete type, because NodeRepository trait is not dyn compatible :( + fn check_erased(&self, board: &BTreeNodeRepository) -> Vec; } -pub trait RuleMetaData: Send + Sync { +pub trait RuleMetaData { fn rule_id(&self) -> &str; } @@ -20,28 +22,22 @@ pub trait RuleFromContext { } pub trait RuleCheck: Send + Sync + 'static { - type Data<'a>: LintData<'a> + ?Sized; - fn check(&self, data: Self::Data<'_>) -> Vec; + type Query: QueryStrategy; + fn check(&self, data: ::Output) -> Vec; } impl LintRule for T where T: RuleCheck + RuleFromContext + RuleMetaData, - for<'a> ::Data<'a>: Sized, + for<'a> ::Query: Sized, { fn rule_id(&self) -> &str { self.rule_id() } - fn check_erased(&self, board: &NodeRepository) -> Vec { - let data = ::Data::fetch(board); + fn check_erased(&self, board: &BTreeNodeRepository) -> Vec { + let data = ::Query::query(board); self.check(data) } } - -pub trait LintData<'a> { - fn fetch(board: &'a NodeRepository) -> Self - where - Self: Sized; -} diff --git a/src/tree/btree_node_repository.rs b/src/tree/btree_node_repository.rs index 575a9d3..dcf31a2 100644 --- a/src/tree/btree_node_repository.rs +++ b/src/tree/btree_node_repository.rs @@ -15,7 +15,7 @@ struct NodeEntry { inner: Box, } -pub(crate) struct BTreeNodeRepository { +pub struct BTreeNodeRepository { node_store: BTreeMap, span_store: BTreeMap>, scope_mappings: ScopeMappings, diff --git a/src/tree/mod.rs b/src/tree/mod.rs index fe93e4c..7b8dd6f 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -2,9 +2,8 @@ pub(crate) mod abstract_pheno_tree; pub(crate) mod btree_node_repository; mod error; pub mod node; -pub mod node_repository; pub mod pointer; -mod querying; +pub mod querying; mod scopes; pub mod traits; pub(crate) mod utils; diff --git a/src/tree/node_repository.rs b/src/tree/node_repository.rs deleted file mode 100644 index d1eeac2..0000000 --- a/src/tree/node_repository.rs +++ /dev/null @@ -1,98 +0,0 @@ -use crate::rules::traits::LintData; -use crate::tree::node::MaterializedNode; -use crate::tree::pointer::Pointer; -use crate::tree::traits::LocatableNode; - -use std::any::{Any, TypeId}; -use std::collections::HashMap; -use std::ops::Deref; - -#[derive(Default)] -pub struct NodeRepository { - board: HashMap>, -} - -impl NodeRepository { - pub fn new() -> NodeRepository { - NodeRepository { - board: HashMap::new(), - } - } - - fn get_raw(&self) -> &[MaterializedNode] { - self.board - .get(&TypeId::of::()) - .and_then(|b| b.downcast_ref::>>()) - .map(|v| v.as_slice()) - .unwrap_or(&[]) - } - - pub fn insert(&mut self, node: MaterializedNode) { - self.board - .entry(TypeId::of::()) - .or_insert_with(|| Box::new(Vec::>::new())) - .downcast_mut::>>() - .unwrap() - .push(node); - } - - pub fn node_by_pointer(&self, ptr: &Pointer) -> Option<&MaterializedNode> { - for nodes in self.board.values() { - let casted_node = nodes - .downcast_ref::>>() - .expect("Should be downcastable"); - - for node in casted_node.iter() { - if node.pointer() == ptr { - return Some(node); - } - } - } - None - } -} - -pub struct Single<'a, T: 'static>(pub Option<&'a MaterializedNode>); - -impl<'a, T> LintData<'a> for Single<'a, T> { - fn fetch(board: &'a NodeRepository) -> Self { - Single(board.get_raw::().first()) - } -} - -pub struct List<'a, T: 'static>(pub &'a [MaterializedNode]); - -impl<'a, T> Deref for List<'a, T> { - type Target = &'a [MaterializedNode]; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl<'a, T> LintData<'a> for List<'a, T> { - fn fetch(board: &'a NodeRepository) -> Self { - List(board.get_raw()) - } -} - -impl<'a, A, B> LintData<'a> for (A, B) -where - A: LintData<'a>, - B: LintData<'a>, -{ - fn fetch(board: &'a NodeRepository) -> Self { - (A::fetch(board), B::fetch(board)) - } -} - -impl<'a, A, B, C> LintData<'a> for (A, B, C) -where - A: LintData<'a>, - B: LintData<'a>, - C: LintData<'a>, -{ - fn fetch(board: &'a NodeRepository) -> Self { - (A::fetch(board), B::fetch(board), C::fetch(board)) - } -} diff --git a/src/tree/querying/mod.rs b/src/tree/querying/mod.rs index 436e089..6bf3045 100644 --- a/src/tree/querying/mod.rs +++ b/src/tree/querying/mod.rs @@ -1,3 +1,3 @@ pub mod presentation; pub mod queries; -mod traits; +pub mod traits; diff --git a/src/tree/querying/queries.rs b/src/tree/querying/queries.rs index 79ad3db..b7e91b0 100644 --- a/src/tree/querying/queries.rs +++ b/src/tree/querying/queries.rs @@ -1,9 +1,8 @@ use crate::tree::node::MaterializedNode; -use crate::tree::scopes::ScopeDefinition; use crate::tree::traits::NodeRepository; -use crate::tree::querying::traits::{QueryPresentation, QueryStrategy}; +use crate::tree::querying::traits::{QueryPresentation, QueryStrategy, ScopeDefinition}; use std::marker::PhantomData; #[derive(Debug)] diff --git a/src/tree/querying/traits.rs b/src/tree/querying/traits.rs index 76b6deb..3bfbc14 100644 --- a/src/tree/querying/traits.rs +++ b/src/tree/querying/traits.rs @@ -1,6 +1,7 @@ +use crate::tree::scopes::ScopeLayer; use crate::tree::traits::NodeRepository; -pub(crate) trait QueryStrategy { +pub trait QueryStrategy { type Output; #[allow(unused)] fn query(node_repo: &impl NodeRepository) -> Self::Output; @@ -44,3 +45,11 @@ pub(crate) trait QueryPresentation { where Self: Sized; } + +pub trait ScopeDefinition { + fn layer() -> ScopeLayer; + + fn partitioning_fields() -> &'static [&'static str] { + &[] + } +} diff --git a/src/tree/scopes.rs b/src/tree/scopes.rs index e5d80c0..663215d 100644 --- a/src/tree/scopes.rs +++ b/src/tree/scopes.rs @@ -1,23 +1,16 @@ use crate::tree::pointer::Pointer; +use crate::tree::querying::traits::ScopeDefinition; use phenopackets::schema::v2::{Cohort, Family, Phenopacket}; use std::any::TypeId; use std::cell::Cell; use std::collections::HashMap; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub(crate) enum ScopeLayer { +pub enum ScopeLayer { Individual = 0, Aggregated = 1, } -pub(crate) trait ScopeDefinition { - fn layer() -> ScopeLayer; - - fn partitioning_fields() -> &'static [&'static str] { - &[] - } -} - impl ScopeDefinition for Phenopacket { fn layer() -> ScopeLayer { ScopeLayer::Individual diff --git a/src/tree/traits.rs b/src/tree/traits.rs index 1a3fc7e..8d9e203 100644 --- a/src/tree/traits.rs +++ b/src/tree/traits.rs @@ -20,7 +20,7 @@ pub trait RetrievableNode { fn value_at(&self, ptr: &Pointer) -> Option>; } -pub(crate) trait NodeRepository { +pub trait NodeRepository { fn insert( &mut self, node: MaterializedNode, diff --git a/tests/test_custom_rule.rs b/tests/test_custom_rule.rs index 297f386..d7fc003 100644 --- a/tests/test_custom_rule.rs +++ b/tests/test_custom_rule.rs @@ -18,8 +18,9 @@ use phenolint::report::specs::{LabelSpecs, ReportSpecs}; use phenolint::report::traits::{CompileReport, RegisterableReport, ReportFromContext, RuleReport}; use phenolint::rules::traits::LintRule; use phenolint::rules::traits::{RuleCheck, RuleFromContext}; -use phenolint::tree::node_repository::List; use phenolint::tree::pointer::Pointer; +use phenolint::tree::querying::presentation::Flattened; +use phenolint::tree::querying::queries::convenience::All; use phenolint::tree::traits::Node; use phenolint_macros::{register_patch, register_report, register_rule}; use phenopackets::schema::v2::Phenopacket; @@ -43,9 +44,9 @@ impl RuleFromContext for CustomRule { } impl RuleCheck for CustomRule { - type Data<'a> = List<'a, OntologyClass>; + type Query = All; - fn check(&self, _: Self::Data<'_>) -> Vec { + fn check(&self, _: Flattened) -> Vec { vec![LintViolation::new( ViolationSeverity::Info, LintRule::rule_id(self),