From e49d1845a030c03ec57d6570d55572ec20820d82 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 9 Jul 2025 11:30:12 +0200 Subject: [PATCH 01/26] more general strategies --- nemo-cli/src/main.rs | 8 +-- nemo-python/src/lib.rs | 6 +- nemo-wasm/src/lib.rs | 6 +- nemo/src/api.rs | 6 +- nemo/src/execution.rs | 11 +-- nemo/src/execution/execution_engine.rs | 40 +++++------ nemo/src/execution/rule_execution.rs | 2 +- .../execution/selection_strategy/strategy.rs | 71 +++++++++++++++++-- 8 files changed, 102 insertions(+), 48 deletions(-) diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index a45178f56..0b586723f 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -36,7 +36,7 @@ use nemo::{ datavalues::AnyDataValue, error::Error, execution::{ - execution_parameters::ExecutionParameters, DefaultExecutionEngine, ExecutionEngine, + execution_parameters::ExecutionParameters, DefaultExecutionStrategy, ExecutionEngine, }, io::{resource_providers::ResourceProviders, ImportManager}, meta::timing::{TimedCode, TimedDisplay}, @@ -148,7 +148,7 @@ fn print_timing_details() { } /// Prints detailed memory information. -fn print_memory_details(engine: &DefaultExecutionEngine) { +fn print_memory_details(engine: &ExecutionEngine) { println!("\nMemory report:\n\n{}", engine.memory_usage()); } @@ -167,7 +167,7 @@ fn parse_trace_facts(cli: &CliApp) -> Result, Error> { } /// Deal with tracing -fn handle_tracing(cli: &CliApp, engine: &mut DefaultExecutionEngine) -> Result<(), CliError> { +fn handle_tracing(cli: &CliApp, engine: &mut ExecutionEngine) -> Result<(), CliError> { let tracing_facts = parse_trace_facts(cli)?; if !tracing_facts.is_empty() { log::info!("Starting tracing of {} facts...", tracing_facts.len()); @@ -256,7 +256,7 @@ fn run(mut cli: CliApp) -> Result<(), CliError> { TimedCode::instance().sub("Reasoning").start(); log::info!("Reasoning ... "); - engine.execute()?; + engine.execute::()?; log::info!("Reasoning done"); TimedCode::instance().sub("Reasoning").stop(); diff --git a/nemo-python/src/lib.rs b/nemo-python/src/lib.rs index c4caf6040..fed1a331f 100644 --- a/nemo-python/src/lib.rs +++ b/nemo-python/src/lib.rs @@ -5,7 +5,7 @@ use nemo::{ chase_model::ChaseAtom, datavalues::{AnyDataValue, DataValue}, error::Error, - execution::{tracing::trace::ExecutionTraceTree, ExecutionEngine}, + execution::{tracing::trace::ExecutionTraceTree, DefaultExecutionStrategy, ExecutionEngine}, io::{resource_providers::ResourceProviders, ExportManager, ImportManager}, meta::timing::TimedCode, rule_model::{ @@ -331,7 +331,7 @@ impl NemoResults { #[pyclass(unsendable)] struct NemoEngine { - engine: nemo::execution::DefaultExecutionEngine, + engine: nemo::execution::ExecutionEngine, } #[pyclass] @@ -412,7 +412,7 @@ impl NemoEngine { TimedCode::instance().start(); TimedCode::instance().sub("Reasoning").start(); - self.engine.execute().py_res()?; + self.engine.execute::().py_res()?; TimedCode::instance().sub("Reasoning").stop(); TimedCode::instance().stop(); diff --git a/nemo-wasm/src/lib.rs b/nemo-wasm/src/lib.rs index bca8f1b97..3701d6e03 100644 --- a/nemo-wasm/src/lib.rs +++ b/nemo-wasm/src/lib.rs @@ -12,7 +12,7 @@ use nemo::{ error::ReadingError, execution::{ tracing::trace::{ExecutionTrace, ExecutionTraceTree, TraceFactHandle}, - ExecutionEngine, + DefaultExecutionStrategy, ExecutionEngine, }, io::{ formats::FileFormatMeta, @@ -221,7 +221,7 @@ impl ResourceProvider for BlobResourceProvider { #[wasm_bindgen] pub struct NemoEngine { - engine: nemo::execution::DefaultExecutionEngine, + engine: nemo::execution::ExecutionEngine, } #[cfg(feature = "web_sys_unstable_apis")] @@ -306,7 +306,7 @@ impl NemoEngine { #[wasm_bindgen] pub fn reason(&mut self) -> Result<(), NemoError> { self.engine - .execute() + .execute::() .map_err(WasmOrInternalNemoError::Nemo) .map_err(NemoError) } diff --git a/nemo/src/api.rs b/nemo/src/api.rs index cd715a11d..3251437c3 100644 --- a/nemo/src/api.rs +++ b/nemo/src/api.rs @@ -26,7 +26,7 @@ use std::{fs::read_to_string, path::PathBuf}; use crate::{ error::{report::ProgramReport, Error, ReadingError}, execution::{ - execution_parameters::ExecutionParameters, DefaultExecutionEngine, ExecutionEngine, + execution_parameters::ExecutionParameters, DefaultExecutionStrategy, ExecutionEngine, }, rule_file::RuleFile, rule_model::{ @@ -40,7 +40,7 @@ use crate::{ use nemo_physical::resource::Resource; /// Reasoning Engine exposed by the API -pub type Engine = DefaultExecutionEngine; +pub type Engine = ExecutionEngine; /// Load the given `file` and load the program from the file. /// @@ -113,7 +113,7 @@ pub fn validate(input: String, label: String) -> ProgramReport { /// parsed rules, all relative paths are resolved with the current /// working directory pub fn reason(engine: &mut Engine) -> Result<(), Error> { - engine.execute() + engine.execute::() } /// Get a [Vec] of all output predicates that are computed by the engine. diff --git a/nemo/src/execution.rs b/nemo/src/execution.rs index bebb59c99..8d1df65e2 100644 --- a/nemo/src/execution.rs +++ b/nemo/src/execution.rs @@ -3,6 +3,8 @@ pub mod execution_engine; pub use execution_engine::ExecutionEngine; +use crate::execution::selection_strategy::strategy::SingleStepStrategy; + use self::selection_strategy::{ dependency_graph::graph_positive::GraphConstructorPositive, strategy_graph::StrategyDependencyGraph, strategy_round_robin::StrategyRoundRobin, @@ -16,9 +18,8 @@ pub mod selection_strategy; pub mod tracing; /// The default strategy that will be used for reasoning -pub type DefaultExecutionStrategy = StrategyStratifiedNegation< - StrategyDependencyGraph, +pub type DefaultExecutionStrategy = SingleStepStrategy< + StrategyStratifiedNegation< + StrategyDependencyGraph, + >, >; - -/// Shorthand for an execution engine using the default strategy -pub type DefaultExecutionEngine = ExecutionEngine; diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index 2ea688086..02c58a27e 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -20,7 +20,10 @@ use crate::{ translation::ProgramChaseTranslation, }, error::{report::ProgramReport, warned::Warned, Error}, - execution::{planning::plan_tracing::TracingStrategy, tracing::trace::TraceDerivation}, + execution::{ + planning::plan_tracing::TracingStrategy, selection_strategy::strategy::ExecutionStep, + tracing::trace::TraceDerivation, + }, io::{formats::Export, import_manager::ImportManager}, rule_file::RuleFile, rule_model::{ @@ -39,7 +42,7 @@ use crate::{ use super::{ execution_parameters::ExecutionParameters, rule_execution::RuleExecution, - selection_strategy::strategy::RuleSelectionStrategy, + selection_strategy::strategy::ExecutionStrategy, tracing::{ error::TracingError, trace::{ExecutionTrace, TraceFactHandle, TraceRuleApplication, TraceStatus}, @@ -67,7 +70,7 @@ impl RuleInfo { /// Object which handles the evaluation of the program. #[derive(Debug)] -pub struct ExecutionEngine { +pub struct ExecutionEngine { /// Logical program nemo_program: Program, @@ -76,9 +79,6 @@ pub struct ExecutionEngine { /// Auxillary information for `program` analysis: ProgramAnalysis, - /// The picked selection strategy for rules - rule_strategy: RuleSelectionStrategy, - /// Management of tables that represent predicates table_manager: TableManager, @@ -95,7 +95,7 @@ pub struct ExecutionEngine { current_step: usize, } -impl ExecutionEngine { +impl ExecutionEngine { /// Initialize a [ExecutionEngine] by parsing and translating /// the contents of the given file. pub fn from_file( @@ -132,16 +132,10 @@ impl ExecutionEngine { .iter() .for_each(|_| rule_infos.push(RuleInfo::new())); - let rule_strategy = Strategy::new( - chase_program.rules().iter().collect(), - analysis.rule_analysis.iter().collect(), - )?; - Ok(Self { nemo_program: program, program: chase_program, analysis, - rule_strategy, table_manager, predicate_fragmentation: HashMap::new(), predicate_last_union: HashMap::new(), @@ -261,22 +255,20 @@ impl ExecutionEngine { } /// Executes the program. - pub fn execute(&mut self) -> Result<(), Error> { + pub fn execute(&mut self) -> Result<(), Error> { TimedCode::instance().sub("Reasoning/Rules").start(); TimedCode::instance().sub("Reasoning/Execution").start(); - let rule_execution: Vec = self - .program - .rules() - .iter() - .zip(self.analysis.rule_analysis.iter()) - .map(|(r, a)| RuleExecution::initialize(r, a)) - .collect(); - let mut new_derivations: Option = None; - while let Some(index) = self.rule_strategy.next_rule(new_derivations) { - let updated_predicates = self.step(index, &rule_execution[index])?; + let mut rule_strategy = Strategy::new( + self.program.rules().iter().collect(), + self.analysis.rule_analysis.iter().collect(), + )?; + + while let Some(step) = rule_strategy.next_step(new_derivations) { + let ExecutionStep::ExecuteRule { execution, index } = step; + let updated_predicates = self.step(index, execution)?; new_derivations = Some(!updated_predicates.is_empty()); self.defrag(updated_predicates)?; diff --git a/nemo/src/execution/rule_execution.rs b/nemo/src/execution/rule_execution.rs index 634d0da87..ec27d713b 100644 --- a/nemo/src/execution/rule_execution.rs +++ b/nemo/src/execution/rule_execution.rs @@ -27,7 +27,7 @@ pub(crate) type VariableTranslation = OperationTableGenerator; /// Object responsible for executing a "normal" rule. #[derive(Debug)] -pub(crate) struct RuleExecution { +pub struct RuleExecution { /// Translation of variables into markers used for creating execution plans variable_translation: VariableTranslation, /// List of variable orders which might be considered for this rule diff --git a/nemo/src/execution/selection_strategy/strategy.rs b/nemo/src/execution/selection_strategy/strategy.rs index b7767246d..58e18862a 100644 --- a/nemo/src/execution/selection_strategy/strategy.rs +++ b/nemo/src/execution/selection_strategy/strategy.rs @@ -2,7 +2,10 @@ use thiserror::Error; -use crate::chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}; +use crate::{ + chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}, + execution::rule_execution::RuleExecution, +}; /// Errors that can occur while creating a strategy. #[derive(Error, Debug, Copy, Clone)] @@ -14,17 +17,75 @@ pub enum SelectionStrategyError { /// Trait that defines a strategy for rule execution, /// namely the order in which the rules are applied in. -pub trait RuleSelectionStrategy: std::fmt::Debug { +pub trait RuleSelectionStrategy: std::fmt::Debug + Sized { /// Create a new [RuleSelectionStrategy] object. fn new( rules: Vec<&ChaseRule>, rule_analyses: Vec<&RuleAnalysis>, - ) -> Result - where - Self: Sized; + ) -> Result; /// Return the index of the next rule that should be executed. /// Returns `None` if there are no more rules to be applied /// and the execution should therefore stop. fn next_rule(&mut self, new_derivations: Option) -> Option; } + +/// Strategy for executing a set of rules, +/// which might involve different types of [`ExecutionStep`]s +pub trait ExecutionStrategy: std::fmt::Debug + Sized { + /// Create a new [ExecutionStrategy] object. + fn new( + rules: Vec<&ChaseRule>, + rule_analyses: Vec<&RuleAnalysis>, + ) -> Result; + + /// Return the next step that should be executed. + /// Returns `None` if there are no more rules to be applied + /// and the execution should therefore stop. + fn next_step(&mut self, new_derivations: Option) -> Option>; +} + +/// Step that can be taken in the execution of a set of rules. +#[derive(Copy, Clone, Debug)] +pub enum ExecutionStep<'a> { + /// Execute a single rule via the trie-join + ExecuteRule { + /// Index of the rule that shall be executed + index: usize, + /// Strategy for the rule execution + execution: &'a RuleExecution, + }, +} + +/// A strategy executing one rule at a time +#[derive(Debug)] +pub struct SingleStepStrategy { + inner: T, + rule_execution: Box<[RuleExecution]>, +} + +impl ExecutionStrategy for SingleStepStrategy { + fn new( + rules: Vec<&ChaseRule>, + rule_analyses: Vec<&RuleAnalysis>, + ) -> Result { + let rule_execution = rules + .iter() + .zip(&rule_analyses) + .map(|(r, a)| RuleExecution::initialize(r, a)) + .collect(); + + Ok(SingleStepStrategy { + inner: T::new(rules, rule_analyses)?, + rule_execution, + }) + } + + fn next_step(&mut self, new_derivations: Option) -> Option> { + let index = self.inner.next_rule(new_derivations)?; + Some(ExecutionStep::ExecuteRule { + execution: &self.rule_execution[index], + index, + }) + } +} From 4d512fed8d44c5cfebdf1b5b35b23d6df3ebcf6d Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Tue, 5 Aug 2025 18:39:00 +0200 Subject: [PATCH 02/26] add saturator --- nemo/src/saturator.rs | 750 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 750 insertions(+) create mode 100644 nemo/src/saturator.rs diff --git a/nemo/src/saturator.rs b/nemo/src/saturator.rs new file mode 100644 index 000000000..1ca348b4f --- /dev/null +++ b/nemo/src/saturator.rs @@ -0,0 +1,750 @@ +#![feature(btree_cursors)] +#![feature(hash_set_entry)] + +use nemo::rule_model::components::{ + atom::Atom, + literal::Literal, + rule::Rule, + term::{ + primitive::{ + ground, + variable::{universal::UniversalVariable, Variable}, + Primitive, + }, + Term, + }, + IterableVariables, ProgramComponent, +}; + +use nemo_physical::{datatypes::StorageValueT, dictionary::DvDict, management::database::Dict}; +use rayon::{ + iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator}, + ThreadPoolBuilder, +}; +use std::{ + collections::{btree_set, BTreeSet, HashMap, HashSet, VecDeque}, + i32, + iter::repeat_n, + ops::{Bound, Index}, + sync::{mpsc, Arc}, + thread::current, +}; + +type VariableIdx = u16; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +enum BodyTerm { + Constant(StorageValueT), + Variable(VariableIdx), + Ignore, +} + +#[derive(Debug, Default, Clone)] +struct SaturationSubstitution(Vec>); + +impl Index for SaturationSubstitution { + type Output = Option; + + fn index(&self, index: VariableIdx) -> &Self::Output { + if self.0.len() <= usize::from(index) { + &None + } else { + &self.0[usize::from(index)] + } + } +} + +impl SaturationSubstitution { + fn insert(&mut self, var: VariableIdx, value: StorageValueT) -> Option { + if self.0.len() <= usize::from(var) { + self.0.resize_with(usize::from(var + 1), || None); + self.0[usize::from(var)] = Some(value); + None + } else { + let prev = self.0[usize::from(var)]; + self.0[usize::from(var)] = Some(value); + prev + } + } + + fn bind(&self, terms: &[BodyTerm]) -> Row { + terms + .iter() + .map(|term| match term { + BodyTerm::Constant(constant) => RowElement::Value(*constant), + BodyTerm::Variable(var) => self[*var] + .map(RowElement::Value) + .unwrap_or(RowElement::Bottom), + BodyTerm::Ignore => RowElement::Bottom, + }) + .collect() + } + + fn update(&mut self, terms: &[BodyTerm], row: &[RowElement]) { + for (term, value) in terms.iter().zip(row) { + let BodyTerm::Variable(var) = term else { + continue; + }; + + self.insert(*var, value.value()); + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +struct SaturationAtom { + predicate: Arc, + terms: Box<[BodyTerm]>, +} + +// impl Clone for SaturationAtom { +// fn clone(&self) -> Self { +// todo!() +// } +// } + +impl SaturationAtom { + fn match_fact(&self, fact: &SaturationFact) -> Option { + if fact.predicate != self.predicate { + return None; + } + + let mut res = SaturationSubstitution::default(); + debug_assert_eq!(self.terms.len(), fact.values.len()); + + for (term, value) in self.terms.iter().zip(&fact.values) { + match term { + BodyTerm::Constant(constant) => { + if value != constant { + return None; + } + } + BodyTerm::Variable(idx) => { + if let Some(prev) = res.insert(*idx, *value) { + if prev != *value { + return None; + } + } + } + BodyTerm::Ignore => {} + } + } + + Some(res) + } + + fn variables(&self) -> impl Iterator + use<'_> { + self.terms.iter().flat_map(|term| match term { + BodyTerm::Variable(var) => Some(*var), + _ => None, + }) + } +} + +type JoinOrder = Arc<[JoinOp]>; + +enum JoinOp { + Join(SaturationAtom), + Filter(SaturationAtom), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum HeadTerm { + Existential(VariableIdx), + Universal(VariableIdx), + Constant(StorageValueT), +} + +struct HeadAtom { + predicate: Arc, + terms: Box<[HeadTerm]>, +} + +enum Head { + Datalog(Box<[SaturationAtom]>), + Existential(Box<[HeadTerm]>), +} + +struct SaturationRule { + body_atoms: Box<[SaturationAtom]>, + join_orders: Box<[JoinOrder]>, + head: Head, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +struct SaturationFact { + predicate: Arc, + values: Box<[StorageValueT]>, +} + +impl SaturationRule { + fn trigger<'a, 'b>( + &'a self, + fact: &'b SaturationFact, + ) -> impl Iterator + use<'a, 'b> { + self.body_atoms + .iter() + .zip(&self.join_orders) + .filter_map(|(atom, order)| Some((atom.match_fact(fact)?, order.clone()))) + } +} + +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +enum RowElement { + Value(StorageValueT), + Bottom, + Top, +} + +type Row = Box<[RowElement]>; + +impl PartialOrd for RowElement { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for RowElement { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + match (self, other) { + (RowElement::Bottom, RowElement::Bottom) => std::cmp::Ordering::Equal, + (RowElement::Top, RowElement::Top) => std::cmp::Ordering::Equal, + (RowElement::Value(a), RowElement::Value(b)) => a.cmp(b), + + (_, RowElement::Bottom) => std::cmp::Ordering::Greater, + (_, RowElement::Top) => std::cmp::Ordering::Less, + (RowElement::Bottom, _) => std::cmp::Ordering::Less, + (RowElement::Top, _) => std::cmp::Ordering::Greater, + } + } +} + +enum MatchResult { + Matches, + InBounds, + OutOfBounds, +} + +impl RowElement { + fn value(self) -> StorageValueT { + match self { + RowElement::Value(inner) => inner, + RowElement::Top | RowElement::Bottom => panic!("called value() on RowElement::Ghost"), + } + } +} + +fn match_rows(pattern: &[RowElement], row: &[RowElement]) -> MatchResult { + let mut index = 0; + + while index < pattern.len() { + let RowElement::Value(value) = pattern[index] else { + break; + }; + + match value.cmp(&row[index].value()) { + std::cmp::Ordering::Less => return MatchResult::OutOfBounds, + std::cmp::Ordering::Equal => {} + std::cmp::Ordering::Greater => panic!("pattern must always be a lower bound"), + } + + index += 1; + } + + // only here if pattern[index] == Ghost || index >= pattern.len() + index += 1; + + while index < pattern.len() { + let RowElement::Value(value) = pattern[index] else { + index += 1; + continue; + }; + + if value != row[index].value() { + return MatchResult::InBounds; + } + + index += 1; + } + + MatchResult::Matches +} + +struct RowIterator<'a> { + lower_cursor: btree_set::Cursor<'a, Row>, + upper_cursor: btree_set::Cursor<'a, Row>, + pattern: Row, +} + +impl<'a> Iterator for RowIterator<'a> { + type Item = &'a [RowElement]; + + fn next(&mut self) -> Option { + while let Some(row) = self.lower_cursor.next() { + if Some(row) == self.upper_cursor.peek_next() { + return None; + } + + match match_rows(&self.pattern, row) { + MatchResult::Matches => return Some(row), + MatchResult::InBounds => continue, + MatchResult::OutOfBounds => unreachable!("this should have been caught early"), + } + } + + None + } +} + +trait GhostBound { + fn invert_bound(&self) -> Self; +} + +impl GhostBound for Row { + fn invert_bound(&self) -> Self { + self.iter() + .map(|elem| match elem { + RowElement::Bottom => RowElement::Top, + RowElement::Top => RowElement::Bottom, + value => *value, + }) + .collect() + } +} + +fn find_all_matches<'a>(pattern: Row, table: &'a BTreeSet) -> RowIterator<'a> { + let lower_cursor = table.lower_bound(Bound::Excluded(&pattern)); + let upper_cursor = table.upper_bound(Bound::Excluded(&pattern.invert_bound())); + RowIterator { + lower_cursor, + upper_cursor, + pattern, + } +} + +fn join<'a, 'b, 'c>( + subst: &'a SaturationSubstitution, + terms: &'b [BodyTerm], + table: &'c BTreeSet, +) -> impl Iterator + use<'a, 'b, 'c> { + find_all_matches(subst.bind(terms), table).map(|row| { + let mut subst = subst.clone(); + subst.update(terms, row); + subst + }) +} + +#[test] +fn find_all_matches_works() { + macro_rules! table { + [ $([ $($v:expr),* ],)* ] => { + BTreeSet::from([ $( Box::from([ $(RowElement::Value(StorageValueT::Id32($v))),* ]), )* ]) + }; + } + + let table: BTreeSet = table![ + [0, 0, 0, 1, 0], + [0, 1, 0, 0, 0], + [0, 1, 0, 1, 2], + [0, 1, 1, 0, 0], + [0, 1, 2, 1, 2], + [1, 0, 0, 0, 0], + [1, 1, 0, 1, 2], + [2, 1, 0, 0, 0], + ]; + + let pattern1: Row = Box::from([ + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Bottom, + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(1)), + RowElement::Bottom, + ]); + + let matches: Vec<_> = find_all_matches(pattern1, &table).collect(); + let expected: Vec<&[RowElement]> = vec![ + &[ + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(1)), + RowElement::Value(StorageValueT::Id32(0)), + ], + &[ + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(1)), + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(1)), + RowElement::Value(StorageValueT::Id32(2)), + ], + ]; + + assert_eq!(matches, expected); + + let pattern = Box::from([ + RowElement::Value(StorageValueT::Id32(1)), + RowElement::Bottom, + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Bottom, + ]); + + let mut iter = find_all_matches(pattern, &table); + let expected: &[RowElement] = &[ + RowElement::Value(StorageValueT::Id32(1)), + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(0)), + ]; + assert_eq!( + iter.lower_cursor.peek_next().map(|row| { + let row: &[RowElement] = row; + row + }), + Some(expected) + ); + assert_eq!(iter.next(), Some(expected)); + assert_eq!(iter.next(), None); +} + +fn bench_rules(n: usize) -> Vec { + let one = BodyTerm::Constant(StorageValueT::Int64(1)); + let zero = BodyTerm::Constant(StorageValueT::Int64(0)); + + let rules: Vec<_> = (0..n) + .map(|i| { + let head = (0..VariableIdx::try_from(i).unwrap()) + .map(BodyTerm::Variable) + .chain(Some(one)) + .chain(repeat_n(zero, n - i - 1)); + + let head = SaturationAtom { + predicate: Arc::from("p"), + terms: head.collect(), + }; + + let body = (0..VariableIdx::try_from(i).unwrap()) + .map(BodyTerm::Variable) + .chain(Some(zero)) + .chain(repeat_n(one, n - i - 1)); + + let body = SaturationAtom { + predicate: Arc::from("p"), + terms: body.collect(), + }; + + SaturationRule { + body_atoms: Box::from([body]), + join_orders: Box::from([Arc::from([])]), + head: Head::Datalog(Box::from([head])), + } + }) + .collect(); + + rules +} + +trait Set { + type Item: ?Sized; + + fn contains(&self, it: &Self::Item) -> bool; +} + +impl Set for BTreeSet { + type Item = [RowElement]; + + fn contains(&self, it: &Self::Item) -> bool { + self.contains(it) + } +} + +struct LoopJoin<'a, 'b> { + rows: RowIterator<'a>, + terms: &'b [BodyTerm], +} + +struct Filter<'a, 'b> { + table: &'a dyn Set, + terms: &'b [BodyTerm], +} + +enum JoinStep<'a, 'b> { + LoopJoin(LoopJoin<'a, 'b>), + Filter(Filter<'a, 'b>), +} + +enum Cases { + A(A), + B(B), +} + +impl Iterator for Cases +where + A: Iterator, + B: Iterator, +{ + type Item = A::Item; + + fn next(&mut self) -> Option { + match self { + Cases::A(a) => a.next(), + Cases::B(b) => b.next(), + } + } +} + +fn join_step<'a, 'b, 'c>( + step: JoinStep<'a, 'b>, + subst: &'c SaturationSubstitution, +) -> impl Iterator + use<'a, 'b, 'c> { + match step { + JoinStep::LoopJoin(LoopJoin { rows, terms }) => Cases::A(rows.map(|row| { + let mut subst = subst.clone(); + subst.update(terms, row); + subst + })), + JoinStep::Filter(Filter { table, terms }) => { + let row = subst.bind(terms); + match table.contains(&row) { + true => Cases::B(Some(subst.clone()).into_iter()), + false => Cases::B(None.into_iter()), + } + } + } +} + +struct JoinPlan<'a, 'b> { + inputs: Vec, + steps: Vec>, +} + +struct Variables(HashMap); + +impl Variables { + fn get(&mut self, var: String) -> u16 { + let len = self.0.len().try_into().unwrap(); + *self.0.entry(var).or_insert(len) + } +} + +fn convert_term(term: Term, dict: &mut Dict, vars: &mut Variables) -> Result { + match term.try_into_ground(&Default::default()) { + Ok(ground) => { + let value = ground.value().to_storage_value_t_dict(dict); + Ok(BodyTerm::Constant(value)) + } + Err(term) => { + let Term::Primitive(Primitive::Variable(var)) = term else { + return Err(()); + }; + + let Variable::Universal(var) = var else { + return Err(()); + }; + + match var.name() { + Some(name) => Ok(BodyTerm::Variable(vars.get(name))), + None => Ok(BodyTerm::Ignore), + } + } + } +} + +struct Interner(HashSet>); + +impl Interner { + fn create(&mut self, input: &str) -> Arc { + self.0.get_or_insert_with(input, |s| Arc::from(s)).clone() + } +} + +fn convert_atom(atom: &Atom, dict: &mut Dict, vars: &mut Variables) -> Result { + let predicate = Arc::from(atom.predicate().name()); + + let terms: Box<[BodyTerm]> = atom + .arguments() + .map(|term| convert_term(term.clone(), dict, vars)) + .collect::>()?; + + Ok(SaturationAtom { predicate, terms }) +} + +fn convert_literal( + lit: &Literal, + dict: &mut Dict, + vars: &mut Variables, +) -> Result { + match lit { + Literal::Positive(atom) => convert_atom(atom, dict, vars), + Literal::Negative(_) => Err(()), + Literal::Operation(_) => Err(()), + } +} + +fn filter_index(variables: &HashSet, atom: &SaturationAtom) -> (i32, i32) { + let mut other_variables = 0; + let mut overlapping_variables = 0; + + for var in atom.variables() { + if variables.contains(&var) { + overlapping_variables += 1; + } else { + other_variables += 1; + } + } + + (other_variables, overlapping_variables) +} + +fn compute_join_order( + mut variables: HashSet, + body: &[SaturationAtom], + mask: &mut [bool], +) -> JoinOrder { + let mut operations = Vec::new(); + + loop { + let mut index = None; + let mut min_new_variables = i32::MAX; + let mut max_overlapping = 0; + + for (current_index, atom) in body + .iter() + .enumerate() + .zip(&mut *mask) + .filter_map(|(atom, flag)| flag.then_some(atom)) + { + let (other, overlap) = filter_index(&variables, atom); + + if other < min_new_variables + || (other == min_new_variables && max_overlapping < overlap) + { + min_new_variables = other; + max_overlapping = overlap; + index = Some(current_index); + } + } + + let Some(index) = index else { + break JoinOrder::from(operations); + }; + + mask[index] = false; + if min_new_variables == 0 { + operations.push(JoinOp::Filter(body[index].clone())); + } else { + operations.push(JoinOp::Join(body[index].clone())); + variables.extend(body[index].variables()); + } + } +} + +fn convert(rule: Rule, dict: &mut Dict) -> Result { + let mut vars = Variables(HashMap::new()); + + let body: Box<[SaturationAtom]> = rule + .body() + .iter() + .map(|lit| convert_literal(lit, dict, &mut vars)) + .collect::>()?; + + let join_orders: Box<[JoinOrder]> = body + .iter() + .enumerate() + .map(|(idx, atom)| { + let variables: HashSet<_> = atom.variables().collect(); + let mut mask = vec![true; body.len()]; + mask[idx] = false; + + compute_join_order(variables, &body, &mut mask) + }) + .collect(); + + let head = if rule.variables().any(Variable::is_existential) { + todo!() + } else { + Head::Datalog( + rule.head() + .iter() + .map(|atom| convert_atom(atom, dict, &mut vars)) + .collect::>()?, + ) + }; + + Ok(SaturationRule { + body_atoms: body, + join_orders, + head, + }) +} + +fn run() { + let n = 20; + let mut todo = VecDeque::from([( + SaturationFact { + predicate: Arc::from("p"), + values: repeat_n(StorageValueT::Int64(0), n).collect(), + }, + repeat_n(RowElement::Value(StorageValueT::Int64(0)), n).collect::(), + )]); + + let rules = bench_rules(n); + + let mut closure = HashMap::, BTreeSet>::new(); + + loop { + if todo.is_empty() { + break; + } + + let mut ops = Vec::new(); + + while let Some((fact, tuple)) = todo.pop_front() { + if !closure + .entry(fact.predicate.clone()) + .or_default() + .insert(tuple) + { + continue; + } + + ops.extend(rules.iter().enumerate().flat_map(|(index, rule)| { + rule.trigger(&fact) + .map(move |(substitution, join_order)| (index, substitution, join_order)) + })) + } + + // join phase + todo = ops + .into_par_iter() + .map(|(rule_index, substitution, join_order)| { + let rule = &rules[rule_index]; + + // let iter = join_order + // .iter() + // .fold(JoinNode::Leaf(substitution), |input, &index| { + // JoinNode::LoopJoin { + // table: closure.get(&rule.body_atoms[index].predicate).unwrap(), + // terms: &rule.body_atoms[index].terms, + // input: Box::new(input), + // } + // }); + + let tuple = substitution.bind(&rule.head.terms); + + let fact = SaturationFact { + predicate: rule.head.predicate.clone(), + values: tuple.iter().cloned().map(RowElement::value).collect(), + }; + + (fact, tuple) + }) + .collect(); + } +} + +fn main() { + let tp = ThreadPoolBuilder::new().build().unwrap(); + tp.install(|| run()) +} From ccad5ba0adf4b99323d9cdb582ff3352ccfaf519 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Tue, 5 Aug 2025 19:40:19 +0200 Subject: [PATCH 03/26] saturation_model --- nemo-physical/src/datatypes.rs | 2 +- nemo-physical/src/datatypes/storage_value.rs | 2 +- nemo-physical/src/datavalues/any_datavalue.rs | 2 +- nemo/src/execution.rs | 1 + nemo/src/execution/saturation.rs | 3 + .../execution/saturation/saturation_model.rs | 256 ++++++++++++++++++ nemo/src/saturator.rs | 205 -------------- 7 files changed, 263 insertions(+), 208 deletions(-) create mode 100644 nemo/src/execution/saturation.rs create mode 100644 nemo/src/execution/saturation/saturation_model.rs diff --git a/nemo-physical/src/datatypes.rs b/nemo-physical/src/datatypes.rs index d4fd326bd..52486dd3f 100644 --- a/nemo-physical/src/datatypes.rs +++ b/nemo-physical/src/datatypes.rs @@ -5,7 +5,7 @@ pub(crate) mod storage_type_name; pub(crate) use storage_type_name::StorageTypeName; /// Module for defining [StorageValueT] pub(crate) mod storage_value; -pub(crate) use storage_value::StorageValueT; +pub use storage_value::StorageValueT; /// Module for defining [Double] pub mod double; pub use double::Double; diff --git a/nemo-physical/src/datatypes/storage_value.rs b/nemo-physical/src/datatypes/storage_value.rs index 4bb907f65..24786a7d9 100644 --- a/nemo-physical/src/datatypes/storage_value.rs +++ b/nemo-physical/src/datatypes/storage_value.rs @@ -12,7 +12,7 @@ use super::{double::Double, float::Float, into_datavalue::IntoDataValue, Storage /// Ord and PartialOrd assume U32 < U64 < I64 < Float < Double. /// More information at #[derive(Clone, Copy, Debug, PartialEq, Eq, Ord, PartialOrd)] -pub(crate) enum StorageValueT { +pub enum StorageValueT { /// A value of type [StorageTypeName::Id32]. Such values always refer to an entry in a /// dictionary, rather than to the literal numerical integer value. Id32(u32), diff --git a/nemo-physical/src/datavalues/any_datavalue.rs b/nemo-physical/src/datavalues/any_datavalue.rs index 1a9d8c97b..a8a7ecddf 100644 --- a/nemo-physical/src/datavalues/any_datavalue.rs +++ b/nemo-physical/src/datavalues/any_datavalue.rs @@ -515,7 +515,7 @@ impl AnyDataValue { /// The correct process in this case is to use the dictionary to create any null value on which this /// method will later be called. It is not possible to newly create a dictionary id for an arbitrary /// null value (in such a way that the same ID will be returned if an equal null value is converted). - pub(crate) fn to_storage_value_t_dict(&self, dictionary: &mut Dict) -> StorageValueT { + pub fn to_storage_value_t_dict(&self, dictionary: &mut Dict) -> StorageValueT { match self.value_domain() { ValueDomain::Tuple | ValueDomain::Map diff --git a/nemo/src/execution.rs b/nemo/src/execution.rs index 8d1df65e2..0404dc96f 100644 --- a/nemo/src/execution.rs +++ b/nemo/src/execution.rs @@ -14,6 +14,7 @@ use self::selection_strategy::{ pub mod execution_parameters; pub mod planning; pub mod rule_execution; +pub mod saturation; pub mod selection_strategy; pub mod tracing; diff --git a/nemo/src/execution/saturation.rs b/nemo/src/execution/saturation.rs new file mode 100644 index 000000000..6ed3d149a --- /dev/null +++ b/nemo/src/execution/saturation.rs @@ -0,0 +1,3 @@ +//! Saturate a set of rules. + +pub mod saturation_model; diff --git a/nemo/src/execution/saturation/saturation_model.rs b/nemo/src/execution/saturation/saturation_model.rs new file mode 100644 index 000000000..e51376bc6 --- /dev/null +++ b/nemo/src/execution/saturation/saturation_model.rs @@ -0,0 +1,256 @@ +//! Model of rules supported by the saturation algorithm + +use std::{ + borrow::Cow, + collections::{HashMap, HashSet}, + sync::Arc, +}; + +use nemo_physical::{datatypes::StorageValueT, dictionary::DvDict, management::database::Dict}; + +use crate::rule_model::components::{ + atom::Atom, + literal::Literal, + rule::Rule, + term::{ + primitive::{ground::GroundTerm, variable::Variable, Primitive}, + Term, + }, + IterableVariables, +}; + +type VariableIdx = u16; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +enum BodyTerm { + Constant(StorageValueT), + Variable(VariableIdx), + Ignore, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +struct SaturationAtom { + predicate: Arc, + terms: Box<[BodyTerm]>, +} + +impl SaturationAtom { + fn variables(&self) -> impl Iterator + use<'_> { + self.terms.iter().flat_map(|term| match term { + BodyTerm::Variable(var) => Some(*var), + _ => None, + }) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum HeadTerm { + Existential(VariableIdx), + Universal(VariableIdx), + Constant(StorageValueT), +} + +struct HeadAtom { + predicate: Arc, + terms: Box<[HeadTerm]>, +} + +enum Head { + Datalog(Box<[SaturationAtom]>), + Existential(Box<[HeadTerm]>), +} + +type JoinOrder = Arc<[JoinOp]>; + +enum JoinOp { + Join(SaturationAtom), + Filter(SaturationAtom), +} + +struct SaturationRule { + body_atoms: Box<[SaturationAtom]>, + join_orders: Box<[JoinOrder]>, + head: Head, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +struct SaturationFact { + predicate: Arc, + values: Box<[StorageValueT]>, +} + +struct Variables(HashMap); + +impl Variables { + fn get(&mut self, var: Cow) -> u16 { + match self.0.get(var.as_ref()) { + Some(index) => *index, + None => { + let index = self + .0 + .len() + .try_into() + .expect("number of variables must be smaller than u16::MAX"); + + self.0.insert(var.to_string(), index); + index + } + } + } +} + +fn convert_term(term: Term, dict: &mut Dict, vars: &mut Variables) -> Result { + match GroundTerm::try_from(term) { + Ok(ground) => { + let value = ground.value().to_storage_value_t_dict(dict); + Ok(BodyTerm::Constant(value)) + } + Err(term) => { + let Term::Primitive(Primitive::Variable(var)) = term else { + return Err(()); + }; + + let Variable::Universal(var) = var else { + return Err(()); + }; + + match var.name() { + Some(name) => Ok(BodyTerm::Variable(vars.get(Cow::Borrowed(name)))), + None => Ok(BodyTerm::Ignore), + } + } + } +} + +struct Interner(HashSet>); + +impl Interner { + fn create(&mut self, input: &str) -> Arc { + if let Some(res) = self.0.get(input) { + return res.clone(); + } else { + self.0.insert(Arc::from(input)); + self.0.get(input).unwrap().clone() + } + } +} + +fn convert_atom(atom: &Atom, dict: &mut Dict, vars: &mut Variables) -> Result { + let predicate = Arc::from(atom.predicate().name()); + + let terms: Box<[BodyTerm]> = atom + .terms() + .map(|term| convert_term(term.clone(), dict, vars)) + .collect::>()?; + + Ok(SaturationAtom { predicate, terms }) +} + +fn convert_literal( + lit: &Literal, + dict: &mut Dict, + vars: &mut Variables, +) -> Result { + match lit { + Literal::Positive(atom) => convert_atom(atom, dict, vars), + Literal::Negative(_) => Err(()), + Literal::Operation(_) => Err(()), + } +} + +fn filter_index(variables: &HashSet, atom: &SaturationAtom) -> (i32, i32) { + let mut other_variables = 0; + let mut overlapping_variables = 0; + + for var in atom.variables() { + if variables.contains(&var) { + overlapping_variables += 1; + } else { + other_variables += 1; + } + } + + (other_variables, overlapping_variables) +} + +fn compute_join_order( + mut variables: HashSet, + body: &[SaturationAtom], + mask: &mut [bool], +) -> JoinOrder { + let mut operations = Vec::new(); + + loop { + let mut index = None; + let mut min_new_variables = i32::MAX; + let mut max_overlapping = 0; + + for (current_index, atom) in body + .iter() + .enumerate() + .zip(&mut *mask) + .filter_map(|(atom, flag)| flag.then_some(atom)) + { + let (other, overlap) = filter_index(&variables, atom); + + if other < min_new_variables + || (other == min_new_variables && max_overlapping < overlap) + { + min_new_variables = other; + max_overlapping = overlap; + index = Some(current_index); + } + } + + let Some(index) = index else { + break JoinOrder::from(operations); + }; + + mask[index] = false; + if min_new_variables == 0 { + operations.push(JoinOp::Filter(body[index].clone())); + } else { + operations.push(JoinOp::Join(body[index].clone())); + variables.extend(body[index].variables()); + } + } +} + +fn convert(rule: Rule, dict: &mut Dict) -> Result { + let mut vars = Variables(HashMap::new()); + + let body: Box<[SaturationAtom]> = rule + .body() + .iter() + .map(|lit| convert_literal(lit, dict, &mut vars)) + .collect::>()?; + + let join_orders: Box<[JoinOrder]> = body + .iter() + .enumerate() + .map(|(idx, atom)| { + let variables: HashSet<_> = atom.variables().collect(); + let mut mask = vec![true; body.len()]; + mask[idx] = false; + + compute_join_order(variables, &body, &mut mask) + }) + .collect(); + + let head = if rule.variables().any(Variable::is_existential) { + todo!() + } else { + Head::Datalog( + rule.head() + .iter() + .map(|atom| convert_atom(atom, dict, &mut vars)) + .collect::>()?, + ) + }; + + Ok(SaturationRule { + body_atoms: body, + join_orders, + head, + }) +} diff --git a/nemo/src/saturator.rs b/nemo/src/saturator.rs index 1ca348b4f..991503e16 100644 --- a/nemo/src/saturator.rs +++ b/nemo/src/saturator.rs @@ -30,15 +30,6 @@ use std::{ thread::current, }; -type VariableIdx = u16; - -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -enum BodyTerm { - Constant(StorageValueT), - Variable(VariableIdx), - Ignore, -} - #[derive(Debug, Default, Clone)] struct SaturationSubstitution(Vec>); @@ -141,42 +132,6 @@ impl SaturationAtom { } } -type JoinOrder = Arc<[JoinOp]>; - -enum JoinOp { - Join(SaturationAtom), - Filter(SaturationAtom), -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -enum HeadTerm { - Existential(VariableIdx), - Universal(VariableIdx), - Constant(StorageValueT), -} - -struct HeadAtom { - predicate: Arc, - terms: Box<[HeadTerm]>, -} - -enum Head { - Datalog(Box<[SaturationAtom]>), - Existential(Box<[HeadTerm]>), -} - -struct SaturationRule { - body_atoms: Box<[SaturationAtom]>, - join_orders: Box<[JoinOrder]>, - head: Head, -} - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -struct SaturationFact { - predicate: Arc, - values: Box<[StorageValueT]>, -} - impl SaturationRule { fn trigger<'a, 'b>( &'a self, @@ -519,166 +474,6 @@ struct JoinPlan<'a, 'b> { steps: Vec>, } -struct Variables(HashMap); - -impl Variables { - fn get(&mut self, var: String) -> u16 { - let len = self.0.len().try_into().unwrap(); - *self.0.entry(var).or_insert(len) - } -} - -fn convert_term(term: Term, dict: &mut Dict, vars: &mut Variables) -> Result { - match term.try_into_ground(&Default::default()) { - Ok(ground) => { - let value = ground.value().to_storage_value_t_dict(dict); - Ok(BodyTerm::Constant(value)) - } - Err(term) => { - let Term::Primitive(Primitive::Variable(var)) = term else { - return Err(()); - }; - - let Variable::Universal(var) = var else { - return Err(()); - }; - - match var.name() { - Some(name) => Ok(BodyTerm::Variable(vars.get(name))), - None => Ok(BodyTerm::Ignore), - } - } - } -} - -struct Interner(HashSet>); - -impl Interner { - fn create(&mut self, input: &str) -> Arc { - self.0.get_or_insert_with(input, |s| Arc::from(s)).clone() - } -} - -fn convert_atom(atom: &Atom, dict: &mut Dict, vars: &mut Variables) -> Result { - let predicate = Arc::from(atom.predicate().name()); - - let terms: Box<[BodyTerm]> = atom - .arguments() - .map(|term| convert_term(term.clone(), dict, vars)) - .collect::>()?; - - Ok(SaturationAtom { predicate, terms }) -} - -fn convert_literal( - lit: &Literal, - dict: &mut Dict, - vars: &mut Variables, -) -> Result { - match lit { - Literal::Positive(atom) => convert_atom(atom, dict, vars), - Literal::Negative(_) => Err(()), - Literal::Operation(_) => Err(()), - } -} - -fn filter_index(variables: &HashSet, atom: &SaturationAtom) -> (i32, i32) { - let mut other_variables = 0; - let mut overlapping_variables = 0; - - for var in atom.variables() { - if variables.contains(&var) { - overlapping_variables += 1; - } else { - other_variables += 1; - } - } - - (other_variables, overlapping_variables) -} - -fn compute_join_order( - mut variables: HashSet, - body: &[SaturationAtom], - mask: &mut [bool], -) -> JoinOrder { - let mut operations = Vec::new(); - - loop { - let mut index = None; - let mut min_new_variables = i32::MAX; - let mut max_overlapping = 0; - - for (current_index, atom) in body - .iter() - .enumerate() - .zip(&mut *mask) - .filter_map(|(atom, flag)| flag.then_some(atom)) - { - let (other, overlap) = filter_index(&variables, atom); - - if other < min_new_variables - || (other == min_new_variables && max_overlapping < overlap) - { - min_new_variables = other; - max_overlapping = overlap; - index = Some(current_index); - } - } - - let Some(index) = index else { - break JoinOrder::from(operations); - }; - - mask[index] = false; - if min_new_variables == 0 { - operations.push(JoinOp::Filter(body[index].clone())); - } else { - operations.push(JoinOp::Join(body[index].clone())); - variables.extend(body[index].variables()); - } - } -} - -fn convert(rule: Rule, dict: &mut Dict) -> Result { - let mut vars = Variables(HashMap::new()); - - let body: Box<[SaturationAtom]> = rule - .body() - .iter() - .map(|lit| convert_literal(lit, dict, &mut vars)) - .collect::>()?; - - let join_orders: Box<[JoinOrder]> = body - .iter() - .enumerate() - .map(|(idx, atom)| { - let variables: HashSet<_> = atom.variables().collect(); - let mut mask = vec![true; body.len()]; - mask[idx] = false; - - compute_join_order(variables, &body, &mut mask) - }) - .collect(); - - let head = if rule.variables().any(Variable::is_existential) { - todo!() - } else { - Head::Datalog( - rule.head() - .iter() - .map(|atom| convert_atom(atom, dict, &mut vars)) - .collect::>()?, - ) - }; - - Ok(SaturationRule { - body_atoms: body, - join_orders, - head, - }) -} - fn run() { let n = 20; let mut todo = VecDeque::from([( From 6974593d253762f6313380ab3230723621c9aabb Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Tue, 5 Aug 2025 23:06:35 +0200 Subject: [PATCH 04/26] trigger and join --- nemo/src/execution/saturation.rs | 3 +- nemo/src/execution/saturation/execution.rs | 279 +++++++++++++++++ nemo/src/execution/saturation/model.rs | 281 ++++++++++++++++++ .../execution/saturation/saturation_model.rs | 256 ---------------- nemo/src/lib.rs | 1 + nemo/src/saturator.rs | 259 ---------------- 6 files changed, 563 insertions(+), 516 deletions(-) create mode 100644 nemo/src/execution/saturation/execution.rs create mode 100644 nemo/src/execution/saturation/model.rs delete mode 100644 nemo/src/execution/saturation/saturation_model.rs diff --git a/nemo/src/execution/saturation.rs b/nemo/src/execution/saturation.rs index 6ed3d149a..48bab25bd 100644 --- a/nemo/src/execution/saturation.rs +++ b/nemo/src/execution/saturation.rs @@ -1,3 +1,4 @@ //! Saturate a set of rules. -pub mod saturation_model; +pub mod execution; +pub mod model; diff --git a/nemo/src/execution/saturation/execution.rs b/nemo/src/execution/saturation/execution.rs new file mode 100644 index 000000000..36dbff434 --- /dev/null +++ b/nemo/src/execution/saturation/execution.rs @@ -0,0 +1,279 @@ +use std::{ + collections::{btree_set, BTreeSet}, + ops::{Bound, Index}, +}; + +use nemo_physical::datatypes::StorageValueT; + +use crate::execution::planning::operations::join; + +use super::model::{ + BodyTerm, JoinOrder, SaturationAtom, SaturationFact, SaturationRule, VariableIdx, +}; + +#[derive(Debug, Default, Clone)] +struct SaturationSubstitution(Vec>); + +impl Index for SaturationSubstitution { + type Output = Option; + + fn index(&self, index: VariableIdx) -> &Self::Output { + if self.0.len() <= usize::from(index) { + &None + } else { + &self.0[usize::from(index)] + } + } +} + +impl SaturationSubstitution { + fn insert(&mut self, var: VariableIdx, value: StorageValueT) -> Option { + if self.0.len() <= usize::from(var) { + self.0.resize_with(usize::from(var + 1), || None); + self.0[usize::from(var)] = Some(value); + None + } else { + let prev = self.0[usize::from(var)]; + self.0[usize::from(var)] = Some(value); + prev + } + } + + fn bind(&self, terms: &[BodyTerm]) -> Row { + terms + .iter() + .map(|term| match term { + BodyTerm::Constant(constant) => RowElement::Value(*constant), + BodyTerm::Variable(var) => self[*var] + .map(RowElement::Value) + .unwrap_or(RowElement::Bottom), + BodyTerm::Ignore => RowElement::Bottom, + }) + .collect() + } + + fn update(&mut self, terms: &[BodyTerm], row: &[RowElement]) { + for (term, value) in terms.iter().zip(row) { + let BodyTerm::Variable(var) = term else { + continue; + }; + + self.insert(*var, value.value()); + } + } +} + +impl SaturationAtom { + fn match_fact(&self, fact: &SaturationFact) -> Option { + if fact.predicate != self.predicate { + return None; + } + + let mut res = SaturationSubstitution::default(); + debug_assert_eq!(self.terms.len(), fact.values.len()); + + for (term, value) in self.terms.iter().zip(fact.values.iter()) { + match term { + BodyTerm::Constant(constant) => { + if value != constant { + return None; + } + } + BodyTerm::Variable(idx) => { + if let Some(prev) = res.insert(*idx, *value) { + if prev != *value { + return None; + } + } + } + BodyTerm::Ignore => {} + } + } + + Some(res) + } +} + +struct Triggers<'a, 'b> { + rule: &'a mut SaturationRule, + fact: &'b SaturationFact, + index: usize, +} + +impl Iterator for Triggers<'_, '_> { + type Item = (SaturationSubstitution, JoinOrder); + + fn next(&mut self) -> Option { + while self.index < self.rule.body_atoms.len() { + let Some(substitution) = self.rule.body_atoms[self.index].match_fact(self.fact) else { + self.index += 1; + continue; + }; + + let join_order = self.rule.join_order(self.index); + self.index += 1; + + return Some((substitution, join_order)); + } + + None + } +} + +impl SaturationRule { + fn trigger<'a, 'b>( + &'a mut self, + fact: &'b SaturationFact, + ) -> impl Iterator + use<'a, 'b> { + Triggers { + rule: self, + fact, + index: 0, + } + } +} + +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +enum RowElement { + Value(StorageValueT), + Bottom, + Top, +} + +type Row = Box<[RowElement]>; + +impl PartialOrd for RowElement { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for RowElement { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + match (self, other) { + (RowElement::Bottom, RowElement::Bottom) => std::cmp::Ordering::Equal, + (RowElement::Top, RowElement::Top) => std::cmp::Ordering::Equal, + (RowElement::Value(a), RowElement::Value(b)) => a.cmp(b), + + (_, RowElement::Bottom) => std::cmp::Ordering::Greater, + (_, RowElement::Top) => std::cmp::Ordering::Less, + (RowElement::Bottom, _) => std::cmp::Ordering::Less, + (RowElement::Top, _) => std::cmp::Ordering::Greater, + } + } +} + +enum MatchResult { + Matches, + InBounds, + OutOfBounds, +} + +impl RowElement { + fn value(self) -> StorageValueT { + match self { + RowElement::Value(inner) => inner, + RowElement::Top | RowElement::Bottom => panic!("called value() on RowElement::Ghost"), + } + } +} + +fn match_rows(pattern: &[RowElement], row: &[RowElement]) -> MatchResult { + let mut index = 0; + + while index < pattern.len() { + let RowElement::Value(value) = pattern[index] else { + break; + }; + + match value.cmp(&row[index].value()) { + std::cmp::Ordering::Less => return MatchResult::OutOfBounds, + std::cmp::Ordering::Equal => {} + std::cmp::Ordering::Greater => panic!("pattern must always be a lower bound"), + } + + index += 1; + } + + // only here if pattern[index] == Ghost || index >= pattern.len() + index += 1; + + while index < pattern.len() { + let RowElement::Value(value) = pattern[index] else { + index += 1; + continue; + }; + + if value != row[index].value() { + return MatchResult::InBounds; + } + + index += 1; + } + + MatchResult::Matches +} + +struct RowIterator<'a> { + lower_cursor: btree_set::Cursor<'a, Row>, + upper_cursor: btree_set::Cursor<'a, Row>, + pattern: Row, +} + +impl<'a> Iterator for RowIterator<'a> { + type Item = &'a [RowElement]; + + fn next(&mut self) -> Option { + while let Some(row) = self.lower_cursor.next() { + if Some(row) == self.upper_cursor.peek_next() { + return None; + } + + match match_rows(&self.pattern, row) { + MatchResult::Matches => return Some(row), + MatchResult::InBounds => continue, + MatchResult::OutOfBounds => unreachable!("this should have been caught early"), + } + } + + None + } +} + +trait GhostBound { + fn invert_bound(&self) -> Self; +} + +impl GhostBound for Row { + fn invert_bound(&self) -> Self { + self.iter() + .map(|elem| match elem { + RowElement::Bottom => RowElement::Top, + RowElement::Top => RowElement::Bottom, + value => *value, + }) + .collect() + } +} + +fn find_all_matches<'a>(pattern: Row, table: &'a BTreeSet) -> RowIterator<'a> { + let lower_cursor = table.lower_bound(Bound::Excluded(&pattern)); + let upper_cursor = table.upper_bound(Bound::Excluded(&pattern.invert_bound())); + RowIterator { + lower_cursor, + upper_cursor, + pattern, + } +} + +fn join<'a, 'b, 'c>( + subst: &'a SaturationSubstitution, + terms: &'b [BodyTerm], + table: &'c BTreeSet, +) -> impl Iterator + use<'a, 'b, 'c> { + find_all_matches(subst.bind(terms), table).map(|row| { + let mut subst = subst.clone(); + subst.update(terms, row); + subst + }) +} diff --git a/nemo/src/execution/saturation/model.rs b/nemo/src/execution/saturation/model.rs new file mode 100644 index 000000000..f07428de9 --- /dev/null +++ b/nemo/src/execution/saturation/model.rs @@ -0,0 +1,281 @@ +//! Model of rules supported by the saturation algorithm + +use std::{ + borrow::Cow, + collections::{HashMap, HashSet}, + iter::repeat, + sync::Arc, +}; + +use nemo_physical::{datatypes::StorageValueT, management::database::Dict}; + +use crate::rule_model::components::{ + atom::Atom, + literal::Literal, + rule::Rule, + term::{ + primitive::{ground::GroundTerm, variable::Variable, Primitive}, + Term, + }, + IterableVariables, +}; + +pub(crate) type VariableIdx = u16; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +/// Terms supported in the body of [`SaturationRule`]s +pub(crate) enum BodyTerm { + Constant(StorageValueT), + Variable(VariableIdx), + Ignore, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +/// Atoms whose arguments are represented by [`BodyTerm`] +pub(crate) struct SaturationAtom { + pub(super) predicate: Arc, + pub(super) terms: Box<[BodyTerm]>, +} + +impl SaturationAtom { + /// Iterate over the variables in a [`SaturationAtom`] + pub(super) fn variables(&self) -> impl Iterator + use<'_> { + self.terms.iter().flat_map(|term| match term { + BodyTerm::Variable(var) => Some(*var), + _ => None, + }) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum HeadTerm { + Existential(VariableIdx), + Universal(VariableIdx), + Constant(StorageValueT), +} + +struct HeadAtom { + predicate: Arc, + terms: Box<[HeadTerm]>, +} + +enum Head { + Datalog(Box<[SaturationAtom]>), +} + +pub(crate) type JoinOrder = Arc<[JoinOp]>; + +pub(crate) enum JoinOp { + Join(SaturationAtom), + Filter(SaturationAtom), +} + +pub(crate) struct SaturationRule { + pub(super) body_atoms: Arc<[SaturationAtom]>, + join_orders: Box<[Option]>, + pub(super) head: Head, +} + +impl SaturationRule { + pub(super) fn join_order(&mut self, index: usize) -> JoinOrder { + if let Some(order) = &self.join_orders[index] { + order.clone() + } else { + let atom = &self.body_atoms[index]; + let variables: HashSet<_> = atom.variables().collect(); + let mut mask = vec![true; self.body_atoms.len()]; + mask[index] = false; + + let order = compute_join_order(variables, &self.body_atoms, &mut mask); + + self.join_orders[index] = Some(order.clone()); + order + } + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) struct SaturationFact { + pub(super) predicate: Arc, + pub(super) values: Arc<[StorageValueT]>, +} + +struct Variables(HashMap); + +impl Variables { + fn get(&mut self, var: Cow) -> u16 { + match self.0.get(var.as_ref()) { + Some(index) => *index, + None => { + let index = self + .0 + .len() + .try_into() + .expect("number of variables must be smaller than u16::MAX"); + + self.0.insert(var.to_string(), index); + index + } + } + } +} + +impl SaturationRuleTranslation<'_> { + fn convert_term(&mut self, term: &Term) -> Result { + match GroundTerm::try_from(term.clone()) { + Ok(ground) => { + let value = ground.value().to_storage_value_t_dict(self.dict); + Ok(BodyTerm::Constant(value)) + } + Err(term) => { + let Term::Primitive(Primitive::Variable(var)) = term else { + return Err(()); + }; + + let Variable::Universal(var) = var else { + return Err(()); + }; + + match var.name() { + Some(name) => Ok(BodyTerm::Variable(self.variables.get(Cow::Borrowed(name)))), + None => Ok(BodyTerm::Ignore), + } + } + } + } + + fn convert_atom(&mut self, atom: &Atom) -> Result { + let predicate = Arc::from(atom.predicate().name()); + + let terms: Box<[BodyTerm]> = atom + .terms() + .map(|term| self.convert_term(term)) + .collect::>()?; + + Ok(SaturationAtom { predicate, terms }) + } + + fn convert_literal(&mut self, lit: &Literal) -> Result { + match lit { + Literal::Positive(atom) => self.convert_atom(atom), + Literal::Negative(_) => Err(()), + Literal::Operation(_) => Err(()), + } + } + + fn convert(&mut self, rule: Rule) -> Result { + let body: Arc<[SaturationAtom]> = rule + .body() + .iter() + .map(|lit| self.convert_literal(lit)) + .collect::>()?; + + let join_orders: Box<[_]> = repeat(None).take(body.len()).collect(); + + let head = if rule.variables().any(Variable::is_existential) { + // existential variable are not supported yet + return Err(()); + } else { + Head::Datalog( + rule.head() + .iter() + .map(|atom| self.convert_atom(atom)) + .collect::>()?, + ) + }; + + Ok(SaturationRule { + body_atoms: body, + join_orders, + head, + }) + } +} + +struct SaturationRuleTranslation<'a> { + variables: Variables, + interner: Interner, + dict: &'a mut Dict, +} + +impl<'a> SaturationRuleTranslation<'a> { + /// Create at [`SaturationRuleTranslation`] referring to a [`Dict`] + fn new(dict: &'a mut Dict) -> Self { + Self { + variables: Variables(HashMap::new()), + interner: Interner(HashSet::new()), + dict, + } + } +} + +struct Interner(HashSet>); + +impl Interner { + fn create(&mut self, input: &str) -> Arc { + if let Some(res) = self.0.get(input) { + return res.clone(); + } else { + self.0.insert(Arc::from(input)); + self.0.get(input).unwrap().clone() + } + } +} + +fn filter_index(variables: &HashSet, atom: &SaturationAtom) -> (i32, i32) { + let mut other_variables = 0; + let mut overlapping_variables = 0; + + for var in atom.variables() { + if variables.contains(&var) { + overlapping_variables += 1; + } else { + other_variables += 1; + } + } + + (other_variables, overlapping_variables) +} + +fn compute_join_order( + mut variables: HashSet, + body: &[SaturationAtom], + mask: &mut [bool], +) -> JoinOrder { + let mut operations = Vec::new(); + + loop { + let mut index = None; + let mut min_new_variables = i32::MAX; + let mut max_overlapping = 0; + + for (current_index, atom) in body + .iter() + .enumerate() + .zip(&mut *mask) + .filter_map(|(atom, flag)| flag.then_some(atom)) + { + let (other, overlap) = filter_index(&variables, atom); + + if other < min_new_variables + || (other == min_new_variables && max_overlapping < overlap) + { + min_new_variables = other; + max_overlapping = overlap; + index = Some(current_index); + } + } + + let Some(index) = index else { + break JoinOrder::from(operations); + }; + + mask[index] = false; + if min_new_variables == 0 { + operations.push(JoinOp::Filter(body[index].clone())); + } else { + operations.push(JoinOp::Join(body[index].clone())); + variables.extend(body[index].variables()); + } + } +} diff --git a/nemo/src/execution/saturation/saturation_model.rs b/nemo/src/execution/saturation/saturation_model.rs deleted file mode 100644 index e51376bc6..000000000 --- a/nemo/src/execution/saturation/saturation_model.rs +++ /dev/null @@ -1,256 +0,0 @@ -//! Model of rules supported by the saturation algorithm - -use std::{ - borrow::Cow, - collections::{HashMap, HashSet}, - sync::Arc, -}; - -use nemo_physical::{datatypes::StorageValueT, dictionary::DvDict, management::database::Dict}; - -use crate::rule_model::components::{ - atom::Atom, - literal::Literal, - rule::Rule, - term::{ - primitive::{ground::GroundTerm, variable::Variable, Primitive}, - Term, - }, - IterableVariables, -}; - -type VariableIdx = u16; - -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -enum BodyTerm { - Constant(StorageValueT), - Variable(VariableIdx), - Ignore, -} - -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] -struct SaturationAtom { - predicate: Arc, - terms: Box<[BodyTerm]>, -} - -impl SaturationAtom { - fn variables(&self) -> impl Iterator + use<'_> { - self.terms.iter().flat_map(|term| match term { - BodyTerm::Variable(var) => Some(*var), - _ => None, - }) - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -enum HeadTerm { - Existential(VariableIdx), - Universal(VariableIdx), - Constant(StorageValueT), -} - -struct HeadAtom { - predicate: Arc, - terms: Box<[HeadTerm]>, -} - -enum Head { - Datalog(Box<[SaturationAtom]>), - Existential(Box<[HeadTerm]>), -} - -type JoinOrder = Arc<[JoinOp]>; - -enum JoinOp { - Join(SaturationAtom), - Filter(SaturationAtom), -} - -struct SaturationRule { - body_atoms: Box<[SaturationAtom]>, - join_orders: Box<[JoinOrder]>, - head: Head, -} - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -struct SaturationFact { - predicate: Arc, - values: Box<[StorageValueT]>, -} - -struct Variables(HashMap); - -impl Variables { - fn get(&mut self, var: Cow) -> u16 { - match self.0.get(var.as_ref()) { - Some(index) => *index, - None => { - let index = self - .0 - .len() - .try_into() - .expect("number of variables must be smaller than u16::MAX"); - - self.0.insert(var.to_string(), index); - index - } - } - } -} - -fn convert_term(term: Term, dict: &mut Dict, vars: &mut Variables) -> Result { - match GroundTerm::try_from(term) { - Ok(ground) => { - let value = ground.value().to_storage_value_t_dict(dict); - Ok(BodyTerm::Constant(value)) - } - Err(term) => { - let Term::Primitive(Primitive::Variable(var)) = term else { - return Err(()); - }; - - let Variable::Universal(var) = var else { - return Err(()); - }; - - match var.name() { - Some(name) => Ok(BodyTerm::Variable(vars.get(Cow::Borrowed(name)))), - None => Ok(BodyTerm::Ignore), - } - } - } -} - -struct Interner(HashSet>); - -impl Interner { - fn create(&mut self, input: &str) -> Arc { - if let Some(res) = self.0.get(input) { - return res.clone(); - } else { - self.0.insert(Arc::from(input)); - self.0.get(input).unwrap().clone() - } - } -} - -fn convert_atom(atom: &Atom, dict: &mut Dict, vars: &mut Variables) -> Result { - let predicate = Arc::from(atom.predicate().name()); - - let terms: Box<[BodyTerm]> = atom - .terms() - .map(|term| convert_term(term.clone(), dict, vars)) - .collect::>()?; - - Ok(SaturationAtom { predicate, terms }) -} - -fn convert_literal( - lit: &Literal, - dict: &mut Dict, - vars: &mut Variables, -) -> Result { - match lit { - Literal::Positive(atom) => convert_atom(atom, dict, vars), - Literal::Negative(_) => Err(()), - Literal::Operation(_) => Err(()), - } -} - -fn filter_index(variables: &HashSet, atom: &SaturationAtom) -> (i32, i32) { - let mut other_variables = 0; - let mut overlapping_variables = 0; - - for var in atom.variables() { - if variables.contains(&var) { - overlapping_variables += 1; - } else { - other_variables += 1; - } - } - - (other_variables, overlapping_variables) -} - -fn compute_join_order( - mut variables: HashSet, - body: &[SaturationAtom], - mask: &mut [bool], -) -> JoinOrder { - let mut operations = Vec::new(); - - loop { - let mut index = None; - let mut min_new_variables = i32::MAX; - let mut max_overlapping = 0; - - for (current_index, atom) in body - .iter() - .enumerate() - .zip(&mut *mask) - .filter_map(|(atom, flag)| flag.then_some(atom)) - { - let (other, overlap) = filter_index(&variables, atom); - - if other < min_new_variables - || (other == min_new_variables && max_overlapping < overlap) - { - min_new_variables = other; - max_overlapping = overlap; - index = Some(current_index); - } - } - - let Some(index) = index else { - break JoinOrder::from(operations); - }; - - mask[index] = false; - if min_new_variables == 0 { - operations.push(JoinOp::Filter(body[index].clone())); - } else { - operations.push(JoinOp::Join(body[index].clone())); - variables.extend(body[index].variables()); - } - } -} - -fn convert(rule: Rule, dict: &mut Dict) -> Result { - let mut vars = Variables(HashMap::new()); - - let body: Box<[SaturationAtom]> = rule - .body() - .iter() - .map(|lit| convert_literal(lit, dict, &mut vars)) - .collect::>()?; - - let join_orders: Box<[JoinOrder]> = body - .iter() - .enumerate() - .map(|(idx, atom)| { - let variables: HashSet<_> = atom.variables().collect(); - let mut mask = vec![true; body.len()]; - mask[idx] = false; - - compute_join_order(variables, &body, &mut mask) - }) - .collect(); - - let head = if rule.variables().any(Variable::is_existential) { - todo!() - } else { - Head::Datalog( - rule.head() - .iter() - .map(|atom| convert_atom(atom, dict, &mut vars)) - .collect::>()?, - ) - }; - - Ok(SaturationRule { - body_atoms: body, - join_orders, - head, - }) -} diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index e3f3bfe58..3575fadab 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -20,6 +20,7 @@ #![feature(str_from_raw_parts)] #![feature(path_add_extension)] #![feature(associated_type_defaults)] +#![feature(btree_cursors)] /// The crate for underlying physical operations. pub extern crate nemo_physical; diff --git a/nemo/src/saturator.rs b/nemo/src/saturator.rs index 991503e16..1aa5c343e 100644 --- a/nemo/src/saturator.rs +++ b/nemo/src/saturator.rs @@ -30,265 +30,6 @@ use std::{ thread::current, }; -#[derive(Debug, Default, Clone)] -struct SaturationSubstitution(Vec>); - -impl Index for SaturationSubstitution { - type Output = Option; - - fn index(&self, index: VariableIdx) -> &Self::Output { - if self.0.len() <= usize::from(index) { - &None - } else { - &self.0[usize::from(index)] - } - } -} - -impl SaturationSubstitution { - fn insert(&mut self, var: VariableIdx, value: StorageValueT) -> Option { - if self.0.len() <= usize::from(var) { - self.0.resize_with(usize::from(var + 1), || None); - self.0[usize::from(var)] = Some(value); - None - } else { - let prev = self.0[usize::from(var)]; - self.0[usize::from(var)] = Some(value); - prev - } - } - - fn bind(&self, terms: &[BodyTerm]) -> Row { - terms - .iter() - .map(|term| match term { - BodyTerm::Constant(constant) => RowElement::Value(*constant), - BodyTerm::Variable(var) => self[*var] - .map(RowElement::Value) - .unwrap_or(RowElement::Bottom), - BodyTerm::Ignore => RowElement::Bottom, - }) - .collect() - } - - fn update(&mut self, terms: &[BodyTerm], row: &[RowElement]) { - for (term, value) in terms.iter().zip(row) { - let BodyTerm::Variable(var) = term else { - continue; - }; - - self.insert(*var, value.value()); - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] -struct SaturationAtom { - predicate: Arc, - terms: Box<[BodyTerm]>, -} - -// impl Clone for SaturationAtom { -// fn clone(&self) -> Self { -// todo!() -// } -// } - -impl SaturationAtom { - fn match_fact(&self, fact: &SaturationFact) -> Option { - if fact.predicate != self.predicate { - return None; - } - - let mut res = SaturationSubstitution::default(); - debug_assert_eq!(self.terms.len(), fact.values.len()); - - for (term, value) in self.terms.iter().zip(&fact.values) { - match term { - BodyTerm::Constant(constant) => { - if value != constant { - return None; - } - } - BodyTerm::Variable(idx) => { - if let Some(prev) = res.insert(*idx, *value) { - if prev != *value { - return None; - } - } - } - BodyTerm::Ignore => {} - } - } - - Some(res) - } - - fn variables(&self) -> impl Iterator + use<'_> { - self.terms.iter().flat_map(|term| match term { - BodyTerm::Variable(var) => Some(*var), - _ => None, - }) - } -} - -impl SaturationRule { - fn trigger<'a, 'b>( - &'a self, - fact: &'b SaturationFact, - ) -> impl Iterator + use<'a, 'b> { - self.body_atoms - .iter() - .zip(&self.join_orders) - .filter_map(|(atom, order)| Some((atom.match_fact(fact)?, order.clone()))) - } -} - -#[derive(PartialEq, Eq, Clone, Copy, Debug)] -enum RowElement { - Value(StorageValueT), - Bottom, - Top, -} - -type Row = Box<[RowElement]>; - -impl PartialOrd for RowElement { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for RowElement { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - match (self, other) { - (RowElement::Bottom, RowElement::Bottom) => std::cmp::Ordering::Equal, - (RowElement::Top, RowElement::Top) => std::cmp::Ordering::Equal, - (RowElement::Value(a), RowElement::Value(b)) => a.cmp(b), - - (_, RowElement::Bottom) => std::cmp::Ordering::Greater, - (_, RowElement::Top) => std::cmp::Ordering::Less, - (RowElement::Bottom, _) => std::cmp::Ordering::Less, - (RowElement::Top, _) => std::cmp::Ordering::Greater, - } - } -} - -enum MatchResult { - Matches, - InBounds, - OutOfBounds, -} - -impl RowElement { - fn value(self) -> StorageValueT { - match self { - RowElement::Value(inner) => inner, - RowElement::Top | RowElement::Bottom => panic!("called value() on RowElement::Ghost"), - } - } -} - -fn match_rows(pattern: &[RowElement], row: &[RowElement]) -> MatchResult { - let mut index = 0; - - while index < pattern.len() { - let RowElement::Value(value) = pattern[index] else { - break; - }; - - match value.cmp(&row[index].value()) { - std::cmp::Ordering::Less => return MatchResult::OutOfBounds, - std::cmp::Ordering::Equal => {} - std::cmp::Ordering::Greater => panic!("pattern must always be a lower bound"), - } - - index += 1; - } - - // only here if pattern[index] == Ghost || index >= pattern.len() - index += 1; - - while index < pattern.len() { - let RowElement::Value(value) = pattern[index] else { - index += 1; - continue; - }; - - if value != row[index].value() { - return MatchResult::InBounds; - } - - index += 1; - } - - MatchResult::Matches -} - -struct RowIterator<'a> { - lower_cursor: btree_set::Cursor<'a, Row>, - upper_cursor: btree_set::Cursor<'a, Row>, - pattern: Row, -} - -impl<'a> Iterator for RowIterator<'a> { - type Item = &'a [RowElement]; - - fn next(&mut self) -> Option { - while let Some(row) = self.lower_cursor.next() { - if Some(row) == self.upper_cursor.peek_next() { - return None; - } - - match match_rows(&self.pattern, row) { - MatchResult::Matches => return Some(row), - MatchResult::InBounds => continue, - MatchResult::OutOfBounds => unreachable!("this should have been caught early"), - } - } - - None - } -} - -trait GhostBound { - fn invert_bound(&self) -> Self; -} - -impl GhostBound for Row { - fn invert_bound(&self) -> Self { - self.iter() - .map(|elem| match elem { - RowElement::Bottom => RowElement::Top, - RowElement::Top => RowElement::Bottom, - value => *value, - }) - .collect() - } -} - -fn find_all_matches<'a>(pattern: Row, table: &'a BTreeSet) -> RowIterator<'a> { - let lower_cursor = table.lower_bound(Bound::Excluded(&pattern)); - let upper_cursor = table.upper_bound(Bound::Excluded(&pattern.invert_bound())); - RowIterator { - lower_cursor, - upper_cursor, - pattern, - } -} - -fn join<'a, 'b, 'c>( - subst: &'a SaturationSubstitution, - terms: &'b [BodyTerm], - table: &'c BTreeSet, -) -> impl Iterator + use<'a, 'b, 'c> { - find_all_matches(subst.bind(terms), table).map(|row| { - let mut subst = subst.clone(); - subst.update(terms, row); - subst - }) -} - #[test] fn find_all_matches_works() { macro_rules! table { From 7ae4e383ab144b551dbaffc816f8ca90ba7cbd9b Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 20 Aug 2025 14:50:17 +0200 Subject: [PATCH 05/26] saturation with nested iterators --- nemo/src/execution/saturation/execution.rs | 213 +++++++++++++++++++-- nemo/src/execution/saturation/model.rs | 3 +- 2 files changed, 199 insertions(+), 17 deletions(-) diff --git a/nemo/src/execution/saturation/execution.rs b/nemo/src/execution/saturation/execution.rs index 36dbff434..166e47acb 100644 --- a/nemo/src/execution/saturation/execution.rs +++ b/nemo/src/execution/saturation/execution.rs @@ -1,11 +1,15 @@ use std::{ - collections::{btree_set, BTreeSet}, + collections::{btree_set, BTreeSet, HashMap}, ops::{Bound, Index}, + sync::Arc, }; use nemo_physical::datatypes::StorageValueT; -use crate::execution::planning::operations::join; +use crate::{ + execution::saturation::model::{Head, JoinOp}, + rule_model::substitution::Substitution, +}; use super::model::{ BodyTerm, JoinOrder, SaturationAtom, SaturationFact, SaturationRule, VariableIdx, @@ -101,7 +105,7 @@ struct Triggers<'a, 'b> { } impl Iterator for Triggers<'_, '_> { - type Item = (SaturationSubstitution, JoinOrder); + type Item = ExecutionTree; fn next(&mut self) -> Option { while self.index < self.rule.body_atoms.len() { @@ -110,10 +114,16 @@ impl Iterator for Triggers<'_, '_> { continue; }; - let join_order = self.rule.join_order(self.index); + let ops = self.rule.join_order(self.index); + let index = ops.len(); + self.index += 1; - return Some((substitution, join_order)); + return Some(ExecutionTree { + init: substitution, + ops, + index, + }); } None @@ -124,7 +134,7 @@ impl SaturationRule { fn trigger<'a, 'b>( &'a mut self, fact: &'b SaturationFact, - ) -> impl Iterator + use<'a, 'b> { + ) -> impl Iterator + use<'a, 'b> { Triggers { rule: self, fact, @@ -266,14 +276,185 @@ fn find_all_matches<'a>(pattern: Row, table: &'a BTreeSet) -> RowIterator<' } } -fn join<'a, 'b, 'c>( - subst: &'a SaturationSubstitution, - terms: &'b [BodyTerm], - table: &'c BTreeSet, -) -> impl Iterator + use<'a, 'b, 'c> { - find_all_matches(subst.bind(terms), table).map(|row| { - let mut subst = subst.clone(); - subst.update(terms, row); - subst - }) +struct RowMatcher<'a> { + substitution: SaturationSubstitution, + atom: SaturationAtom, + cursor: RowIterator<'a>, +} + +impl Iterator for RowMatcher<'_> { + type Item = SaturationSubstitution; + + fn next(&mut self) -> Option { + let row = self.cursor.next()?; + let mut subst = self.substitution.clone(); + subst.update(&self.atom.terms, row); + Some(subst) + } +} + +fn join<'a>( + substitution: SaturationSubstitution, + atom: SaturationAtom, + table: &'a BTreeSet, +) -> RowMatcher<'a> { + let cursor = find_all_matches(substitution.bind(&atom.terms), table); + + RowMatcher { + substitution, + atom, + cursor, + } +} +struct ExecutionTree { + init: SaturationSubstitution, + ops: Arc<[JoinOp]>, + index: usize, +} + +enum JoinIter<'a> { + Done, + NoOp(SaturationSubstitution), + Join { + inner: Box>, + atom: SaturationAtom, + table: &'a BTreeSet, + current: Option>, + }, +} + +type DataBase = HashMap, BTreeSet>; + +impl ExecutionTree { + fn pop(&mut self) -> Option<&JoinOp> { + if self.index > 0 { + self.index -= 1; + Some(&self.ops[self.index]) + } else { + None + } + } + + fn execute<'a>(mut self, tables: &'a DataBase) -> JoinIter<'a> { + let Some(op) = self.pop() else { + return JoinIter::NoOp(self.init); + }; + + match op { + JoinOp::Join(atom) => { + let table = tables.get(&atom.predicate).unwrap(); + let atom = atom.clone(); + let inner = Box::new(self.execute(&tables)); + + JoinIter::Join { + inner, + atom, + table, + current: None, + } + } + // todo: more efficient implementation? + JoinOp::Filter(atom) => { + let table = tables.get(&atom.predicate).unwrap(); + let atom = atom.clone(); + let inner = Box::new(self.execute(&tables)); + + JoinIter::Join { + inner, + atom, + table, + current: None, + } + } + } + } +} + +impl Iterator for JoinIter<'_> { + type Item = SaturationSubstitution; + + fn next(&mut self) -> Option { + match self { + JoinIter::NoOp(saturation_substitution) => { + let res = saturation_substitution.clone(); + *self = Self::Done; + Some(res) + } + JoinIter::Join { + inner, + atom, + table, + current, + } => loop { + if let Some(current) = current { + if let Some(next) = current.next() { + return Some(next); + } + } + + let substitution = inner.next()?; + *current = Some(join(substitution, atom.clone(), table)); + }, + JoinIter::Done => None, + } + } +} + +fn fact_from_row(row: &Row, predicate: Arc) -> SaturationFact { + let values = row + .iter() + .map(|element| match element { + RowElement::Value(value) => Some(*value), + _ => None, + }) + .collect::>() + .unwrap(); + + SaturationFact { predicate, values } +} + +fn saturate(db: &mut DataBase, mut rules: Vec) { + let mut todo = Vec::new(); + + for (predicate, table) in db.iter() { + for row in table.iter() { + todo.push(fact_from_row(row, predicate.clone())); + } + } + + while !todo.is_empty() { + let mut matches = Vec::new(); + + for (rule_index, rule) in rules.iter_mut().enumerate() { + for fact in &todo { + for trigger in rule.trigger(&fact) { + matches.extend(trigger.execute(&db).map(|row| (row, rule_index))); + } + } + } + + todo.clear(); + + for (substitution, rule_index) in matches { + let rule = &rules[rule_index]; + + match &rule.head { + Head::Datalog(atoms) => { + for atom in atoms { + let row = substitution.bind(&atom.terms); + let table = db.entry(atom.predicate.clone()).or_default(); + + let mut cursor = table.lower_bound_mut(Bound::Included(&row)); + + if cursor.peek_next() != Some(&row) { + let fact = fact_from_row(&row, atom.predicate.clone()); + + cursor.insert_after(row).unwrap(); + todo.push(fact); + } + } + } + } + } + } } diff --git a/nemo/src/execution/saturation/model.rs b/nemo/src/execution/saturation/model.rs index f07428de9..294fecbde 100644 --- a/nemo/src/execution/saturation/model.rs +++ b/nemo/src/execution/saturation/model.rs @@ -59,12 +59,13 @@ struct HeadAtom { terms: Box<[HeadTerm]>, } -enum Head { +pub(crate) enum Head { Datalog(Box<[SaturationAtom]>), } pub(crate) type JoinOrder = Arc<[JoinOp]>; +#[derive(Clone)] pub(crate) enum JoinOp { Join(SaturationAtom), Filter(SaturationAtom), From 1780c1ebec1bf1e4fddbb069fa163d5226da0c14 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 20 Aug 2025 15:09:11 +0200 Subject: [PATCH 06/26] basic tests --- nemo/src/execution/saturation/execution.rs | 101 +++++++++++++++++++++ nemo/src/execution/saturation/model.rs | 40 +++++++- 2 files changed, 140 insertions(+), 1 deletion(-) diff --git a/nemo/src/execution/saturation/execution.rs b/nemo/src/execution/saturation/execution.rs index 166e47acb..072bd1617 100644 --- a/nemo/src/execution/saturation/execution.rs +++ b/nemo/src/execution/saturation/execution.rs @@ -1,5 +1,6 @@ use std::{ collections::{btree_set, BTreeSet, HashMap}, + iter::repeat_n, ops::{Bound, Index}, sync::Arc, }; @@ -458,3 +459,103 @@ fn saturate(db: &mut DataBase, mut rules: Vec) { } } } + +mod test { + use std::{ + collections::{BTreeSet, HashMap}, + iter::repeat_n, + }; + + use nemo_physical::datatypes::StorageValueT; + + use crate::execution::saturation::{ + execution::{find_all_matches, saturate, Row, RowElement}, + model::bench_rules, + }; + + #[test] + fn find_all_matches_works() { + macro_rules! table { + [ $([ $($v:expr),* ],)* ] => { + BTreeSet::from([ $( Box::from([ $(RowElement::Value(StorageValueT::Id32($v))),* ]), )* ]) + }; + } + + let table: BTreeSet = table![ + [0, 0, 0, 1, 0], + [0, 1, 0, 0, 0], + [0, 1, 0, 1, 2], + [0, 1, 1, 0, 0], + [0, 1, 2, 1, 2], + [1, 0, 0, 0, 0], + [1, 1, 0, 1, 2], + [2, 1, 0, 0, 0], + ]; + + let pattern1: Row = Box::from([ + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Bottom, + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(1)), + RowElement::Bottom, + ]); + + let matches: Vec<_> = find_all_matches(pattern1, &table).collect(); + let expected: Vec<&[RowElement]> = vec![ + &[ + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(1)), + RowElement::Value(StorageValueT::Id32(0)), + ], + &[ + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(1)), + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(1)), + RowElement::Value(StorageValueT::Id32(2)), + ], + ]; + + assert_eq!(matches, expected); + + let pattern = Box::from([ + RowElement::Value(StorageValueT::Id32(1)), + RowElement::Bottom, + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Bottom, + ]); + + let mut iter = find_all_matches(pattern, &table); + let expected: &[RowElement] = &[ + RowElement::Value(StorageValueT::Id32(1)), + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(0)), + RowElement::Value(StorageValueT::Id32(0)), + ]; + assert_eq!( + iter.lower_cursor.peek_next().map(|row| { + let row: &[RowElement] = row; + row + }), + Some(expected) + ); + assert_eq!(iter.next(), Some(expected)); + assert_eq!(iter.next(), None); + } + + #[test] + fn saturate_bench_rules() { + let (rules, predicate) = bench_rules(5); + let row: Row = repeat_n(RowElement::Value(StorageValueT::Int64(0)), 5).collect(); + + let mut db = HashMap::from([(predicate.clone(), BTreeSet::from([row]))]); + + saturate(&mut db, rules); + + assert_eq!(db.get(&predicate).unwrap().len(), 32); + } +} diff --git a/nemo/src/execution/saturation/model.rs b/nemo/src/execution/saturation/model.rs index 294fecbde..add5e3c13 100644 --- a/nemo/src/execution/saturation/model.rs +++ b/nemo/src/execution/saturation/model.rs @@ -3,7 +3,7 @@ use std::{ borrow::Cow, collections::{HashMap, HashSet}, - iter::repeat, + iter::{repeat, repeat_n}, sync::Arc, }; @@ -280,3 +280,41 @@ fn compute_join_order( } } } + +pub(super) fn bench_rules(n: usize) -> (Vec, Arc) { + let one = BodyTerm::Constant(StorageValueT::Int64(1)); + let zero = BodyTerm::Constant(StorageValueT::Int64(0)); + let predicate: Arc = Arc::from("p"); + + let rules: Vec<_> = (0..n) + .map(|i| { + let head = (0..VariableIdx::try_from(i).unwrap()) + .map(BodyTerm::Variable) + .chain(Some(one)) + .chain(repeat_n(zero, n - i - 1)); + + let head = SaturationAtom { + predicate: predicate.clone(), + terms: head.collect(), + }; + + let body = (0..VariableIdx::try_from(i).unwrap()) + .map(BodyTerm::Variable) + .chain(Some(zero)) + .chain(repeat_n(one, n - i - 1)); + + let body = SaturationAtom { + predicate: predicate.clone(), + terms: body.collect(), + }; + + SaturationRule { + body_atoms: Arc::from([body]), + join_orders: Box::from([None]), + head: Head::Datalog(Box::from([head])), + } + }) + .collect(); + + (rules, predicate) +} From eb696993094d215f5c42936071ba38c41e2a0160 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 20 Aug 2025 16:47:08 +0200 Subject: [PATCH 07/26] generalize dependency graph generation --- nemo/src/execution/execution_engine.rs | 5 +- .../dependency_graph/graph_constructor.rs | 23 +- .../dependency_graph/graph_positive.rs | 17 +- .../execution/selection_strategy/strategy.rs | 12 +- .../selection_strategy/strategy_graph.rs | 13 +- nemo/src/saturator.rs | 286 ------------------ 6 files changed, 42 insertions(+), 314 deletions(-) delete mode 100644 nemo/src/saturator.rs diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index 02c58a27e..456b0e2c0 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -261,10 +261,7 @@ impl ExecutionEngine { let mut new_derivations: Option = None; - let mut rule_strategy = Strategy::new( - self.program.rules().iter().collect(), - self.analysis.rule_analysis.iter().collect(), - )?; + let mut rule_strategy = Strategy::new(self.program.rules(), &self.analysis.rule_analysis)?; while let Some(step) = rule_strategy.next_step(new_derivations) { let ExecutionStep::ExecuteRule { execution, index } = step; diff --git a/nemo/src/execution/selection_strategy/dependency_graph/graph_constructor.rs b/nemo/src/execution/selection_strategy/dependency_graph/graph_constructor.rs index 7b7abcd42..678eb9bec 100644 --- a/nemo/src/execution/selection_strategy/dependency_graph/graph_constructor.rs +++ b/nemo/src/execution/selection_strategy/dependency_graph/graph_constructor.rs @@ -2,14 +2,31 @@ use petgraph::{adj::NodeIndex, Directed, Graph}; -use crate::chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}; +use crate::{ + chase_model::analysis::program_analysis::RuleAnalysis, rule_model::components::tag::Tag, +}; /// Graph that represents a prioritization between rules. pub type DependencyGraph = Graph, (), Directed>; +pub trait PositivePredicateAnalysis { + fn positive_body_predicates(&self) -> impl Iterator; + fn head_predicates(&self) -> impl Iterator; +} + +impl PositivePredicateAnalysis for &RuleAnalysis { + fn positive_body_predicates(&self) -> impl Iterator { + self.positive_body_predicates.iter() + } + + fn head_predicates(&self) -> impl Iterator { + self.head_predicates.iter() + } +} + /// Defines the trait for constructors of depedency graphs. -pub trait DependencyGraphConstructor: std::fmt::Debug { +pub trait DependencyGraphConstructor: std::fmt::Debug { /// Given a list of rules and some additional information, /// construct the dependency graph. - fn build_graph(rules: Vec<&ChaseRule>, rule_analyses: Vec<&RuleAnalysis>) -> DependencyGraph; + fn build_graph(rule_analyses: &[T]) -> DependencyGraph; } diff --git a/nemo/src/execution/selection_strategy/dependency_graph/graph_positive.rs b/nemo/src/execution/selection_strategy/dependency_graph/graph_positive.rs index 21f5f7ea5..51f5934aa 100644 --- a/nemo/src/execution/selection_strategy/dependency_graph/graph_positive.rs +++ b/nemo/src/execution/selection_strategy/dependency_graph/graph_positive.rs @@ -1,10 +1,10 @@ //! Module for defining a graph that checks //! when the application of a rule might lead to the application of another. -use std::collections::HashMap; +use std::{collections::HashMap, fmt::Debug}; use crate::{ - chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}, + execution::selection_strategy::dependency_graph::graph_constructor::PositivePredicateAnalysis, rule_model::components::tag::Tag, }; @@ -15,16 +15,17 @@ use super::graph_constructor::{DependencyGraph, DependencyGraphConstructor}; #[derive(Debug, Copy, Clone)] pub struct GraphConstructorPositive {} -impl DependencyGraphConstructor for GraphConstructorPositive { - fn build_graph(rules: Vec<&ChaseRule>, rule_analyses: Vec<&RuleAnalysis>) -> DependencyGraph { - debug_assert!(rules.len() == rule_analyses.len()); - let rule_count = rules.len(); +impl DependencyGraphConstructor + for GraphConstructorPositive +{ + fn build_graph(rule_analyses: &[T]) -> DependencyGraph { + let rule_count = rule_analyses.len(); let mut predicate_to_rules_body = HashMap::>::new(); let mut predicate_to_rules_head = HashMap::>::new(); for (rule_index, rule_analysis) in rule_analyses.iter().enumerate() { - for body_predicate in &rule_analysis.positive_body_predicates { + for body_predicate in rule_analysis.positive_body_predicates() { let indices = predicate_to_rules_body .entry(body_predicate.clone()) .or_default(); @@ -32,7 +33,7 @@ impl DependencyGraphConstructor for GraphConstructorPositive { indices.push(rule_index); } - for head_predicate in &rule_analysis.head_predicates { + for head_predicate in rule_analysis.head_predicates() { let indices = predicate_to_rules_head .entry(head_predicate.clone()) .or_default(); diff --git a/nemo/src/execution/selection_strategy/strategy.rs b/nemo/src/execution/selection_strategy/strategy.rs index 58e18862a..ece1f9754 100644 --- a/nemo/src/execution/selection_strategy/strategy.rs +++ b/nemo/src/execution/selection_strategy/strategy.rs @@ -35,8 +35,8 @@ pub trait RuleSelectionStrategy: std::fmt::Debug + Sized { pub trait ExecutionStrategy: std::fmt::Debug + Sized { /// Create a new [ExecutionStrategy] object. fn new( - rules: Vec<&ChaseRule>, - rule_analyses: Vec<&RuleAnalysis>, + rules: &[ChaseRule], + rule_analyses: &[RuleAnalysis], ) -> Result; /// Return the next step that should be executed. @@ -66,17 +66,17 @@ pub struct SingleStepStrategy { impl ExecutionStrategy for SingleStepStrategy { fn new( - rules: Vec<&ChaseRule>, - rule_analyses: Vec<&RuleAnalysis>, + rules: &[ChaseRule], + rule_analyses: &[RuleAnalysis], ) -> Result { let rule_execution = rules .iter() - .zip(&rule_analyses) + .zip(rule_analyses) .map(|(r, a)| RuleExecution::initialize(r, a)) .collect(); Ok(SingleStepStrategy { - inner: T::new(rules, rule_analyses)?, + inner: T::new(rules.iter().collect(), rule_analyses.iter().collect())?, rule_execution, }) } diff --git a/nemo/src/execution/selection_strategy/strategy_graph.rs b/nemo/src/execution/selection_strategy/strategy_graph.rs index 144187fd9..cef9cddf1 100644 --- a/nemo/src/execution/selection_strategy/strategy_graph.rs +++ b/nemo/src/execution/selection_strategy/strategy_graph.rs @@ -11,10 +11,7 @@ use super::{ /// Defines a rule execution strategy which respects certain dependencies between rules #[derive(Debug)] -pub struct StrategyDependencyGraph< - GraphConstructor: DependencyGraphConstructor, - SubStrategy: RuleSelectionStrategy, -> { +pub struct StrategyDependencyGraph { _constructor: PhantomData, ordered_sccs: Vec>, @@ -23,14 +20,16 @@ pub struct StrategyDependencyGraph< current_scc_index: usize, } -impl - RuleSelectionStrategy for StrategyDependencyGraph +impl< + GraphConstructor: for<'a> DependencyGraphConstructor<&'a RuleAnalysis>, + SubStrategy: RuleSelectionStrategy, + > RuleSelectionStrategy for StrategyDependencyGraph { fn new( rules: Vec<&ChaseRule>, rule_analyses: Vec<&RuleAnalysis>, ) -> Result { - let dependency_graph = GraphConstructor::build_graph(rules.clone(), rule_analyses.clone()); + let dependency_graph = GraphConstructor::build_graph(&rule_analyses); let graph_scc = petgraph::algo::condensation(dependency_graph, true); let scc_sorted = petgraph::algo::toposort(&graph_scc, None) .expect("The input graph is assured to be acyclic"); diff --git a/nemo/src/saturator.rs b/nemo/src/saturator.rs deleted file mode 100644 index 1aa5c343e..000000000 --- a/nemo/src/saturator.rs +++ /dev/null @@ -1,286 +0,0 @@ -#![feature(btree_cursors)] -#![feature(hash_set_entry)] - -use nemo::rule_model::components::{ - atom::Atom, - literal::Literal, - rule::Rule, - term::{ - primitive::{ - ground, - variable::{universal::UniversalVariable, Variable}, - Primitive, - }, - Term, - }, - IterableVariables, ProgramComponent, -}; - -use nemo_physical::{datatypes::StorageValueT, dictionary::DvDict, management::database::Dict}; -use rayon::{ - iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator}, - ThreadPoolBuilder, -}; -use std::{ - collections::{btree_set, BTreeSet, HashMap, HashSet, VecDeque}, - i32, - iter::repeat_n, - ops::{Bound, Index}, - sync::{mpsc, Arc}, - thread::current, -}; - -#[test] -fn find_all_matches_works() { - macro_rules! table { - [ $([ $($v:expr),* ],)* ] => { - BTreeSet::from([ $( Box::from([ $(RowElement::Value(StorageValueT::Id32($v))),* ]), )* ]) - }; - } - - let table: BTreeSet = table![ - [0, 0, 0, 1, 0], - [0, 1, 0, 0, 0], - [0, 1, 0, 1, 2], - [0, 1, 1, 0, 0], - [0, 1, 2, 1, 2], - [1, 0, 0, 0, 0], - [1, 1, 0, 1, 2], - [2, 1, 0, 0, 0], - ]; - - let pattern1: Row = Box::from([ - RowElement::Value(StorageValueT::Id32(0)), - RowElement::Bottom, - RowElement::Value(StorageValueT::Id32(0)), - RowElement::Value(StorageValueT::Id32(1)), - RowElement::Bottom, - ]); - - let matches: Vec<_> = find_all_matches(pattern1, &table).collect(); - let expected: Vec<&[RowElement]> = vec![ - &[ - RowElement::Value(StorageValueT::Id32(0)), - RowElement::Value(StorageValueT::Id32(0)), - RowElement::Value(StorageValueT::Id32(0)), - RowElement::Value(StorageValueT::Id32(1)), - RowElement::Value(StorageValueT::Id32(0)), - ], - &[ - RowElement::Value(StorageValueT::Id32(0)), - RowElement::Value(StorageValueT::Id32(1)), - RowElement::Value(StorageValueT::Id32(0)), - RowElement::Value(StorageValueT::Id32(1)), - RowElement::Value(StorageValueT::Id32(2)), - ], - ]; - - assert_eq!(matches, expected); - - let pattern = Box::from([ - RowElement::Value(StorageValueT::Id32(1)), - RowElement::Bottom, - RowElement::Value(StorageValueT::Id32(0)), - RowElement::Value(StorageValueT::Id32(0)), - RowElement::Bottom, - ]); - - let mut iter = find_all_matches(pattern, &table); - let expected: &[RowElement] = &[ - RowElement::Value(StorageValueT::Id32(1)), - RowElement::Value(StorageValueT::Id32(0)), - RowElement::Value(StorageValueT::Id32(0)), - RowElement::Value(StorageValueT::Id32(0)), - RowElement::Value(StorageValueT::Id32(0)), - ]; - assert_eq!( - iter.lower_cursor.peek_next().map(|row| { - let row: &[RowElement] = row; - row - }), - Some(expected) - ); - assert_eq!(iter.next(), Some(expected)); - assert_eq!(iter.next(), None); -} - -fn bench_rules(n: usize) -> Vec { - let one = BodyTerm::Constant(StorageValueT::Int64(1)); - let zero = BodyTerm::Constant(StorageValueT::Int64(0)); - - let rules: Vec<_> = (0..n) - .map(|i| { - let head = (0..VariableIdx::try_from(i).unwrap()) - .map(BodyTerm::Variable) - .chain(Some(one)) - .chain(repeat_n(zero, n - i - 1)); - - let head = SaturationAtom { - predicate: Arc::from("p"), - terms: head.collect(), - }; - - let body = (0..VariableIdx::try_from(i).unwrap()) - .map(BodyTerm::Variable) - .chain(Some(zero)) - .chain(repeat_n(one, n - i - 1)); - - let body = SaturationAtom { - predicate: Arc::from("p"), - terms: body.collect(), - }; - - SaturationRule { - body_atoms: Box::from([body]), - join_orders: Box::from([Arc::from([])]), - head: Head::Datalog(Box::from([head])), - } - }) - .collect(); - - rules -} - -trait Set { - type Item: ?Sized; - - fn contains(&self, it: &Self::Item) -> bool; -} - -impl Set for BTreeSet { - type Item = [RowElement]; - - fn contains(&self, it: &Self::Item) -> bool { - self.contains(it) - } -} - -struct LoopJoin<'a, 'b> { - rows: RowIterator<'a>, - terms: &'b [BodyTerm], -} - -struct Filter<'a, 'b> { - table: &'a dyn Set, - terms: &'b [BodyTerm], -} - -enum JoinStep<'a, 'b> { - LoopJoin(LoopJoin<'a, 'b>), - Filter(Filter<'a, 'b>), -} - -enum Cases { - A(A), - B(B), -} - -impl Iterator for Cases -where - A: Iterator, - B: Iterator, -{ - type Item = A::Item; - - fn next(&mut self) -> Option { - match self { - Cases::A(a) => a.next(), - Cases::B(b) => b.next(), - } - } -} - -fn join_step<'a, 'b, 'c>( - step: JoinStep<'a, 'b>, - subst: &'c SaturationSubstitution, -) -> impl Iterator + use<'a, 'b, 'c> { - match step { - JoinStep::LoopJoin(LoopJoin { rows, terms }) => Cases::A(rows.map(|row| { - let mut subst = subst.clone(); - subst.update(terms, row); - subst - })), - JoinStep::Filter(Filter { table, terms }) => { - let row = subst.bind(terms); - match table.contains(&row) { - true => Cases::B(Some(subst.clone()).into_iter()), - false => Cases::B(None.into_iter()), - } - } - } -} - -struct JoinPlan<'a, 'b> { - inputs: Vec, - steps: Vec>, -} - -fn run() { - let n = 20; - let mut todo = VecDeque::from([( - SaturationFact { - predicate: Arc::from("p"), - values: repeat_n(StorageValueT::Int64(0), n).collect(), - }, - repeat_n(RowElement::Value(StorageValueT::Int64(0)), n).collect::(), - )]); - - let rules = bench_rules(n); - - let mut closure = HashMap::, BTreeSet>::new(); - - loop { - if todo.is_empty() { - break; - } - - let mut ops = Vec::new(); - - while let Some((fact, tuple)) = todo.pop_front() { - if !closure - .entry(fact.predicate.clone()) - .or_default() - .insert(tuple) - { - continue; - } - - ops.extend(rules.iter().enumerate().flat_map(|(index, rule)| { - rule.trigger(&fact) - .map(move |(substitution, join_order)| (index, substitution, join_order)) - })) - } - - // join phase - todo = ops - .into_par_iter() - .map(|(rule_index, substitution, join_order)| { - let rule = &rules[rule_index]; - - // let iter = join_order - // .iter() - // .fold(JoinNode::Leaf(substitution), |input, &index| { - // JoinNode::LoopJoin { - // table: closure.get(&rule.body_atoms[index].predicate).unwrap(), - // terms: &rule.body_atoms[index].terms, - // input: Box::new(input), - // } - // }); - - let tuple = substitution.bind(&rule.head.terms); - - let fact = SaturationFact { - predicate: rule.head.predicate.clone(), - values: tuple.iter().cloned().map(RowElement::value).collect(), - }; - - (fact, tuple) - }) - .collect(); - } -} - -fn main() { - let tp = ThreadPoolBuilder::new().build().unwrap(); - tp.install(|| run()) -} From 3f5aa19f3714d83186d157cd65eab0f0d97bb126 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 20 Aug 2025 16:48:19 +0200 Subject: [PATCH 08/26] wrap database expected by saturation --- nemo/src/execution/saturation/execution.rs | 52 ++++++++++++++-------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/nemo/src/execution/saturation/execution.rs b/nemo/src/execution/saturation/execution.rs index 072bd1617..833598381 100644 --- a/nemo/src/execution/saturation/execution.rs +++ b/nemo/src/execution/saturation/execution.rs @@ -1,19 +1,15 @@ +//! Executing a set of saturation rules + use std::{ collections::{btree_set, BTreeSet, HashMap}, - iter::repeat_n, ops::{Bound, Index}, sync::Arc, }; use nemo_physical::datatypes::StorageValueT; -use crate::{ - execution::saturation::model::{Head, JoinOp}, - rule_model::substitution::Substitution, -}; - use super::model::{ - BodyTerm, JoinOrder, SaturationAtom, SaturationFact, SaturationRule, VariableIdx, + BodyTerm, Head, JoinOp, SaturationAtom, SaturationFact, SaturationRule, VariableIdx, }; #[derive(Debug, Default, Clone)] @@ -324,7 +320,8 @@ enum JoinIter<'a> { }, } -type DataBase = HashMap, BTreeSet>; +#[derive(Debug)] +pub(crate) struct DataBase(HashMap, BTreeSet>); impl ExecutionTree { fn pop(&mut self) -> Option<&JoinOp> { @@ -343,7 +340,7 @@ impl ExecutionTree { match op { JoinOp::Join(atom) => { - let table = tables.get(&atom.predicate).unwrap(); + let table = tables.0.get(&atom.predicate).unwrap(); let atom = atom.clone(); let inner = Box::new(self.execute(&tables)); @@ -356,7 +353,7 @@ impl ExecutionTree { } // todo: more efficient implementation? JoinOp::Filter(atom) => { - let table = tables.get(&atom.predicate).unwrap(); + let table = tables.0.get(&atom.predicate).unwrap(); let atom = atom.clone(); let inner = Box::new(self.execute(&tables)); @@ -414,10 +411,10 @@ fn fact_from_row(row: &Row, predicate: Arc) -> SaturationFact { SaturationFact { predicate, values } } -fn saturate(db: &mut DataBase, mut rules: Vec) { +pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { let mut todo = Vec::new(); - for (predicate, table) in db.iter() { + for (predicate, table) in db.0.iter() { for row in table.iter() { todo.push(fact_from_row(row, predicate.clone())); } @@ -443,7 +440,7 @@ fn saturate(db: &mut DataBase, mut rules: Vec) { Head::Datalog(atoms) => { for atom in atoms { let row = substitution.bind(&atom.terms); - let table = db.entry(atom.predicate.clone()).or_default(); + let table = db.0.entry(atom.predicate.clone()).or_default(); let mut cursor = table.lower_bound_mut(Bound::Included(&row)); @@ -460,6 +457,25 @@ fn saturate(db: &mut DataBase, mut rules: Vec) { } } +impl DataBase { + pub fn new() -> Self { + Self(Default::default()) + } + + pub fn add_table( + &mut self, + predicate: Arc, + table: impl Iterator>, + ) { + let table = table + .map(|row| row.into_iter().map(RowElement::Value).collect()) + .collect(); + + self.0.insert(predicate, table); + } +} + +#[cfg(test)] mod test { use std::{ collections::{BTreeSet, HashMap}, @@ -469,7 +485,7 @@ mod test { use nemo_physical::datatypes::StorageValueT; use crate::execution::saturation::{ - execution::{find_all_matches, saturate, Row, RowElement}, + execution::{find_all_matches, saturate, DataBase, Row, RowElement}, model::bench_rules, }; @@ -549,13 +565,13 @@ mod test { #[test] fn saturate_bench_rules() { - let (rules, predicate) = bench_rules(5); + let (mut rules, predicate) = bench_rules(5); let row: Row = repeat_n(RowElement::Value(StorageValueT::Int64(0)), 5).collect(); - let mut db = HashMap::from([(predicate.clone(), BTreeSet::from([row]))]); + let mut db = DataBase(HashMap::from([(predicate.clone(), BTreeSet::from([row]))])); - saturate(&mut db, rules); + saturate(&mut db, &mut rules); - assert_eq!(db.get(&predicate).unwrap().len(), 32); + assert_eq!(db.0.get(&predicate).unwrap().len(), 32); } } From 740c4f56f28692258fb76ac92b470118eabb029b Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 20 Aug 2025 16:48:50 +0200 Subject: [PATCH 09/26] expose iterator over StorageValueT --- nemo-physical/src/management/database.rs | 22 +++++++++++++++++++++- nemo-physical/src/tabular/trie.rs | 2 +- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/nemo-physical/src/management/database.rs b/nemo-physical/src/management/database.rs index 8a7e2729c..d59b00c85 100644 --- a/nemo-physical/src/management/database.rs +++ b/nemo-physical/src/management/database.rs @@ -18,6 +18,7 @@ use std::{ use crate::{ datasources::table_providers::TableProvider, + datatypes::StorageValueT, datavalues::AnyDataValue, error::Error, management::{bytesized::ByteSized, database::execution_series::ExecutionTreeNode}, @@ -132,6 +133,26 @@ impl DatabaseInstance { self.reference_manager.count_rows_in_memory(id) } + /// Provide an iterator over the rows of the table with the given [PermanentTableId] + /// which directly yields rows of [`StorageValueT`], without translation through a + /// dictionary. + /// + /// # Panics + /// Panics if the given id does not exist. + pub fn table_raw_row_iterator( + &mut self, + id: PermanentTableId, + ) -> Result> + '_, Error> { + // Make sure trie is loaded + let storage_id = self + .reference_manager + .trie_id(&self.dictionary, id, ColumnOrder::default()) + .unwrap_or_else(|err| panic!("No table with the id {id} exists: {err}")); + let trie = self.reference_manager.trie(storage_id); + + Ok(trie.row_iterator()) + } + /// Provide an iterator over the rows of the table with the given [PermanentTableId]. /// /// # Panics @@ -140,7 +161,6 @@ impl DatabaseInstance { &mut self, id: PermanentTableId, ) -> Result> + '_, Error> { - // Make sure trie is loaded let storage_id = self .reference_manager .trie_id(&self.dictionary, id, ColumnOrder::default()) diff --git a/nemo-physical/src/tabular/trie.rs b/nemo-physical/src/tabular/trie.rs index ff2679ff1..17e19a9e8 100644 --- a/nemo-physical/src/tabular/trie.rs +++ b/nemo-physical/src/tabular/trie.rs @@ -92,7 +92,7 @@ impl Trie { } /// Return a row based iterator over this trie. - pub(crate) fn row_iterator(&self) -> impl Iterator> + '_ { + pub fn row_iterator(&self) -> impl Iterator> + '_ { RowScan::new(self.partial_iterator(), 0) } From 9645533d4dbaf0842e6260117673b1d27e70a069 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 3 Sep 2025 09:38:17 +0200 Subject: [PATCH 10/26] simplify matching loop --- nemo/src/execution/saturation/execution.rs | 34 +++++++++++++--------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/nemo/src/execution/saturation/execution.rs b/nemo/src/execution/saturation/execution.rs index 833598381..903dedf2f 100644 --- a/nemo/src/execution/saturation/execution.rs +++ b/nemo/src/execution/saturation/execution.rs @@ -412,28 +412,25 @@ fn fact_from_row(row: &Row, predicate: Arc) -> SaturationFact { } pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { - let mut todo = Vec::new(); + let mut matches = Vec::new(); for (predicate, table) in db.0.iter() { - for row in table.iter() { - todo.push(fact_from_row(row, predicate.clone())); - } - } - - while !todo.is_empty() { - let mut matches = Vec::new(); - for (rule_index, rule) in rules.iter_mut().enumerate() { - for fact in &todo { + for row in table.iter() { + let fact = fact_from_row(row, predicate.clone()); + for trigger in rule.trigger(&fact) { matches.extend(trigger.execute(&db).map(|row| (row, rule_index))); } } } + } + let mut todo = Vec::new(); + while !matches.is_empty() { todo.clear(); - for (substitution, rule_index) in matches { + for (substitution, rule_index) in matches.drain(..) { let rule = &rules[rule_index]; match &rule.head { @@ -454,6 +451,14 @@ pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { } } } + + for fact in &todo { + for (rule_index, rule) in rules.iter_mut().enumerate() { + for trigger in rule.trigger(&fact) { + matches.extend(trigger.execute(&db).map(|row| (row, rule_index))); + } + } + } } } @@ -565,13 +570,14 @@ mod test { #[test] fn saturate_bench_rules() { - let (mut rules, predicate) = bench_rules(5); - let row: Row = repeat_n(RowElement::Value(StorageValueT::Int64(0)), 5).collect(); + let n = 10; + let (mut rules, predicate) = bench_rules(n); + let row: Row = repeat_n(RowElement::Value(StorageValueT::Int64(0)), n).collect(); let mut db = DataBase(HashMap::from([(predicate.clone(), BTreeSet::from([row]))])); saturate(&mut db, &mut rules); - assert_eq!(db.0.get(&predicate).unwrap().len(), 32); + assert_eq!(db.0.get(&predicate).unwrap().len(), 2_usize.pow(n as u32)); } } From 2ec0ca712fa4026f5420f316f9c231f70b9dfb76 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 10 Sep 2025 10:43:05 +0200 Subject: [PATCH 11/26] add age flag to saturation --- nemo/src/execution/saturation/execution.rs | 67 ++++++++++++++-------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/nemo/src/execution/saturation/execution.rs b/nemo/src/execution/saturation/execution.rs index 903dedf2f..cf8e0575f 100644 --- a/nemo/src/execution/saturation/execution.rs +++ b/nemo/src/execution/saturation/execution.rs @@ -1,7 +1,7 @@ //! Executing a set of saturation rules use std::{ - collections::{btree_set, BTreeSet, HashMap}, + collections::{btree_map, btree_set, BTreeMap, BTreeSet, HashMap}, ops::{Bound, Index}, sync::Arc, }; @@ -222,8 +222,8 @@ fn match_rows(pattern: &[RowElement], row: &[RowElement]) -> MatchResult { } struct RowIterator<'a> { - lower_cursor: btree_set::Cursor<'a, Row>, - upper_cursor: btree_set::Cursor<'a, Row>, + lower_cursor: btree_map::Cursor<'a, Row, Age>, + upper_cursor: btree_map::Cursor<'a, Row, Age>, pattern: Row, } @@ -231,9 +231,11 @@ impl<'a> Iterator for RowIterator<'a> { type Item = &'a [RowElement]; fn next(&mut self) -> Option { - while let Some(row) = self.lower_cursor.next() { - if Some(row) == self.upper_cursor.peek_next() { - return None; + while let Some((row, _)) = self.lower_cursor.next() { + if let Some((other_row, _)) = self.upper_cursor.peek_next() { + if other_row == row { + return None; + } } match match_rows(&self.pattern, row) { @@ -263,7 +265,7 @@ impl GhostBound for Row { } } -fn find_all_matches<'a>(pattern: Row, table: &'a BTreeSet) -> RowIterator<'a> { +fn find_all_matches<'a>(pattern: Row, table: &'a BTreeMap) -> RowIterator<'a> { let lower_cursor = table.lower_bound(Bound::Excluded(&pattern)); let upper_cursor = table.upper_bound(Bound::Excluded(&pattern.invert_bound())); RowIterator { @@ -293,7 +295,7 @@ impl Iterator for RowMatcher<'_> { fn join<'a>( substitution: SaturationSubstitution, atom: SaturationAtom, - table: &'a BTreeSet, + table: &'a BTreeMap, ) -> RowMatcher<'a> { let cursor = find_all_matches(substitution.bind(&atom.terms), table); @@ -315,13 +317,19 @@ enum JoinIter<'a> { Join { inner: Box>, atom: SaturationAtom, - table: &'a BTreeSet, + table: &'a BTreeMap, current: Option>, }, } #[derive(Debug)] -pub(crate) struct DataBase(HashMap, BTreeSet>); +enum Age { + old, + new, +} + +#[derive(Debug, Default)] +pub(crate) struct DataBase(HashMap, BTreeMap>); impl ExecutionTree { fn pop(&mut self) -> Option<&JoinOp> { @@ -416,7 +424,7 @@ pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { for (predicate, table) in db.0.iter() { for (rule_index, rule) in rules.iter_mut().enumerate() { - for row in table.iter() { + for (row, _) in table.iter() { let fact = fact_from_row(row, predicate.clone()); for trigger in rule.trigger(&fact) { @@ -441,12 +449,16 @@ pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { let mut cursor = table.lower_bound_mut(Bound::Included(&row)); - if cursor.peek_next() != Some(&row) { - let fact = fact_from_row(&row, atom.predicate.clone()); - - cursor.insert_after(row).unwrap(); - todo.push(fact); + if let Some((other_row, _)) = cursor.peek_next() { + if other_row == &row { + continue; + } } + + let fact = fact_from_row(&row, atom.predicate.clone()); + + cursor.insert_after(row, Age::new).unwrap(); + todo.push(fact); } } } @@ -473,7 +485,7 @@ impl DataBase { table: impl Iterator>, ) { let table = table - .map(|row| row.into_iter().map(RowElement::Value).collect()) + .map(|row| (row.into_iter().map(RowElement::Value).collect(), Age::old)) .collect(); self.0.insert(predicate, table); @@ -483,12 +495,14 @@ impl DataBase { #[cfg(test)] mod test { use std::{ - collections::{BTreeSet, HashMap}, + collections::{BTreeMap, BTreeSet, HashMap}, iter::repeat_n, }; use nemo_physical::datatypes::StorageValueT; + use super::Age; + use crate::execution::saturation::{ execution::{find_all_matches, saturate, DataBase, Row, RowElement}, model::bench_rules, @@ -497,12 +511,12 @@ mod test { #[test] fn find_all_matches_works() { macro_rules! table { - [ $([ $($v:expr),* ],)* ] => { - BTreeSet::from([ $( Box::from([ $(RowElement::Value(StorageValueT::Id32($v))),* ]), )* ]) - }; - } + [ $([ $($v:expr),* ],)* ] => { + BTreeMap::from([ $( (Box::from([ $(RowElement::Value(StorageValueT::Id32($v))),* ]), Age::old), )* ]) + }; + } - let table: BTreeSet = table![ + let table: BTreeMap = table![ [0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 1, 0, 1, 2], @@ -558,7 +572,7 @@ mod test { RowElement::Value(StorageValueT::Id32(0)), ]; assert_eq!( - iter.lower_cursor.peek_next().map(|row| { + iter.lower_cursor.peek_next().map(|(row, _)| { let row: &[RowElement] = row; row }), @@ -574,7 +588,10 @@ mod test { let (mut rules, predicate) = bench_rules(n); let row: Row = repeat_n(RowElement::Value(StorageValueT::Int64(0)), n).collect(); - let mut db = DataBase(HashMap::from([(predicate.clone(), BTreeSet::from([row]))])); + let mut db = DataBase(HashMap::from([( + predicate.clone(), + BTreeMap::from([(row, Age::old)]), + )])); saturate(&mut db, &mut rules); From c8a09ff88ad6f1f799265fc697300a1658fc8f2b Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 10 Sep 2025 15:41:45 +0200 Subject: [PATCH 12/26] integration --- nemo-physical/src/tabular.rs | 2 +- nemo-physical/src/tabular/buffer.rs | 2 + .../src/tabular/buffer/sorted_tuple_buffer.rs | 2 +- .../src/tabular/buffer/tuple_buffer.rs | 10 +- nemo-physical/src/tabular/trie.rs | 2 +- nemo/src/execution.rs | 8 +- nemo/src/execution/execution_engine.rs | 135 +++++++++++++++-- nemo/src/execution/saturation/execution.rs | 141 +++++++++++++++--- nemo/src/execution/saturation/model.rs | 13 +- .../execution/selection_strategy/strategy.rs | 65 +------- .../selection_strategy/strategy_graph.rs | 23 ++- .../selection_strategy/strategy_random.rs | 7 +- .../strategy_round_robin.rs | 7 +- .../strategy_stratified_negation.rs | 20 +-- nemo/src/table_manager.rs | 14 +- 15 files changed, 307 insertions(+), 144 deletions(-) diff --git a/nemo-physical/src/tabular.rs b/nemo-physical/src/tabular.rs index 308d261c7..c83d6181f 100644 --- a/nemo-physical/src/tabular.rs +++ b/nemo-physical/src/tabular.rs @@ -4,5 +4,5 @@ pub mod operations; pub mod trie; pub(crate) mod triescan; -pub(crate) mod buffer; +pub mod buffer; pub(crate) mod rowscan; diff --git a/nemo-physical/src/tabular/buffer.rs b/nemo-physical/src/tabular/buffer.rs index 94630e059..59747d484 100644 --- a/nemo-physical/src/tabular/buffer.rs +++ b/nemo-physical/src/tabular/buffer.rs @@ -2,3 +2,5 @@ pub(crate) mod sorted_tuple_buffer; pub(crate) mod tuple_buffer; + +pub use tuple_buffer::TupleBuffer; diff --git a/nemo-physical/src/tabular/buffer/sorted_tuple_buffer.rs b/nemo-physical/src/tabular/buffer/sorted_tuple_buffer.rs index 8e1f4611e..c9eb17524 100644 --- a/nemo-physical/src/tabular/buffer/sorted_tuple_buffer.rs +++ b/nemo-physical/src/tabular/buffer/sorted_tuple_buffer.rs @@ -8,7 +8,7 @@ use super::tuple_buffer::TupleBuffer; /// Read-only wrapper for [TupleBuffer] which allows the retrieval of its tuples in a sorted manner #[derive(Debug)] -pub(crate) struct SortedTupleBuffer { +pub struct SortedTupleBuffer { /// Underlying [TupleBuffer] containing the actual values tuple_buffer: TupleBuffer, /// We imagine the tuple of the `tuple_buffer` to be arranged one after another in the order of its subtables. diff --git a/nemo-physical/src/tabular/buffer/tuple_buffer.rs b/nemo-physical/src/tabular/buffer/tuple_buffer.rs index 55f29ca59..5b36a02c0 100644 --- a/nemo-physical/src/tabular/buffer/tuple_buffer.rs +++ b/nemo-physical/src/tabular/buffer/tuple_buffer.rs @@ -226,7 +226,7 @@ pub(super) struct TypedTableRecord { /// Represents a row-based table containing values of arbitrary data types #[derive(Debug)] -pub(crate) struct TupleBuffer { +pub struct TupleBuffer { /// Conceptionally, one may imagine the table represented by the [TupleBuffer] /// to be split into several subtables that only contain rows with certain fixed types. /// E.g. one subtable might contain tuples of type ([StorageTypeName::Id32], [StorageTypeName::Int64]) @@ -250,7 +250,7 @@ pub(crate) struct TupleBuffer { impl TupleBuffer { /// Create a new [TupleBuffer]. - pub(crate) fn new(column_number: usize) -> Self { + pub fn new(column_number: usize) -> Self { Self { typed_subtables: Vec::new(), table_lookup: TypedTableLookup::new(), @@ -289,7 +289,7 @@ impl TupleBuffer { /// Provide the next value for the current tuple. Values are added in in order. /// When the value for the last column was provided, the tuple is committed to the buffer. /// Alternatively, a partially built tuple can be abandonded by calling `drop_current_tuple`. - pub(crate) fn add_tuple_value(&mut self, value: StorageValueT) { + pub fn add_tuple_value(&mut self, value: StorageValueT) { self.current_tuple_types[self.current_tuple_index] = value.get_type(); self.current_tuple[self.current_tuple_index] = value; self.current_tuple_index += 1; @@ -307,7 +307,7 @@ impl TupleBuffer { } /// Finish writing to the [TupleBuffer] and return a [SortedTupleBuffer]. - pub(crate) fn finalize(self) -> SortedTupleBuffer { + pub fn finalize(self) -> SortedTupleBuffer { SortedTupleBuffer::new(self) } @@ -318,7 +318,7 @@ impl TupleBuffer { } /// Returns the number of rows in the [TupleBuffer] - pub(crate) fn size(&self) -> usize { + pub fn size(&self) -> usize { self.typed_subtables .iter() .map(|record| record.current_length) diff --git a/nemo-physical/src/tabular/trie.rs b/nemo-physical/src/tabular/trie.rs index 17e19a9e8..4461336f7 100644 --- a/nemo-physical/src/tabular/trie.rs +++ b/nemo-physical/src/tabular/trie.rs @@ -146,7 +146,7 @@ impl Trie { } /// Create a new [Trie] from a [SortedTupleBuffer]. - pub(crate) fn from_tuple_buffer(buffer: SortedTupleBuffer) -> Self { + pub fn from_tuple_buffer(buffer: SortedTupleBuffer) -> Self { let mut intervalcolumn_builders = (0..buffer.column_number()) .map(|_| IntervalColumnTBuilderMatrix::::default()) .collect::>(); diff --git a/nemo/src/execution.rs b/nemo/src/execution.rs index 0404dc96f..6345b30be 100644 --- a/nemo/src/execution.rs +++ b/nemo/src/execution.rs @@ -3,8 +3,6 @@ pub mod execution_engine; pub use execution_engine::ExecutionEngine; -use crate::execution::selection_strategy::strategy::SingleStepStrategy; - use self::selection_strategy::{ dependency_graph::graph_positive::GraphConstructorPositive, strategy_graph::StrategyDependencyGraph, strategy_round_robin::StrategyRoundRobin, @@ -19,8 +17,6 @@ pub mod selection_strategy; pub mod tracing; /// The default strategy that will be used for reasoning -pub type DefaultExecutionStrategy = SingleStepStrategy< - StrategyStratifiedNegation< - StrategyDependencyGraph, - >, +pub type DefaultExecutionStrategy = StrategyStratifiedNegation< + StrategyDependencyGraph, >; diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index 456b0e2c0..cf8100183 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -1,12 +1,20 @@ //! Functionality which handles the execution of a program -use std::collections::{hash_map::Entry, HashMap}; +use std::{ + collections::{hash_map::Entry, HashMap}, + ops::Deref, + sync::Arc, +}; use nemo_physical::{ datavalues::AnyDataValue, dictionary::DvDict, - management::database::sources::{SimpleTable, TableSource}, + management::{ + database::sources::{SimpleTable, TableSource}, + execution_plan::ColumnOrder, + }, meta::timing::TimedCode, + tabular::{buffer::TupleBuffer, trie::Trie}, }; use crate::{ @@ -21,7 +29,11 @@ use crate::{ }, error::{report::ProgramReport, warned::Warned, Error}, execution::{ - planning::plan_tracing::TracingStrategy, selection_strategy::strategy::ExecutionStep, + planning::plan_tracing::TracingStrategy, + saturation::{ + execution::{saturate, DataBase}, + model::{SaturationRule, SaturationRuleTranslation}, + }, tracing::trace::TraceDerivation, }, io::{formats::Export, import_manager::ImportManager}, @@ -33,7 +45,7 @@ use crate::{ term::primitive::{ground::GroundTerm, variable::Variable, Primitive}, }, pipeline::transformations::default::TransformationDefault, - programs::{handle::ProgramHandle, program::Program}, + programs::{handle::ProgramHandle, program::Program, ProgramRead}, substitution::Substitution, }, table_manager::{MemoryUsage, SubtableExecutionPlan, TableManager}, @@ -42,7 +54,7 @@ use crate::{ use super::{ execution_parameters::ExecutionParameters, rule_execution::RuleExecution, - selection_strategy::strategy::ExecutionStrategy, + selection_strategy::strategy::MetaStrategy, tracing::{ error::TracingError, trace::{ExecutionTrace, TraceFactHandle, TraceRuleApplication, TraceStatus}, @@ -254,21 +266,120 @@ impl ExecutionEngine { Ok(()) } + fn fill_saturation_rules( + &mut self, + scc: &[usize], + store: &mut HashMap, Option>>, + ) { + if store.contains_key(scc) { + return; + } + + let saturation_rules: Option> = { + let mut dict = self.table_manager.dictionary_mut(); + let mut translation = SaturationRuleTranslation::new(&mut dict); + scc.iter() + .map(|index| translation.convert(self.nemo_program.rule(*index)).ok()) + .collect() + }; + + store.insert(Box::from(scc), saturation_rules); + } + + fn saturation_step(&mut self, rules: &mut [SaturationRule]) -> Result { + let mut db: DataBase = Default::default(); + let mut new_facts = false; + + let predicates: Vec = self.table_manager.known_predicates().cloned().collect(); + for predicate in &predicates { + let Some(table_id) = self.table_manager.combine_predicate(&predicate)? else { + continue; + }; + + db.add_table( + Arc::from(predicate.name()), + self.table_manager.table_raw_row_iterator(table_id)?, + ); + } + + log::trace!("{db:?}"); + + saturate(&mut db, rules); + + for predicate in &predicates { + let mut buffer = TupleBuffer::new(self.predicate_arity(predicate).unwrap()); + + for value in db.new_facts(predicate.name()) { + buffer.add_tuple_value(value); + } + + if buffer.size() == 0 { + continue; + } + + log::debug!("derived {} facts for {predicate}", buffer.size()); + + let trie = Trie::from_tuple_buffer(buffer.finalize()); + self.table_manager.add_table( + predicate.clone(), + self.current_step, + ColumnOrder::default(), + trie, + ); + + new_facts = true; + } + + self.current_step += 1; + Ok(new_facts) + } + /// Executes the program. - pub fn execute(&mut self) -> Result<(), Error> { + pub fn execute(&mut self) -> Result<(), Error> { TimedCode::instance().sub("Reasoning/Rules").start(); TimedCode::instance().sub("Reasoning/Execution").start(); let mut new_derivations: Option = None; - let mut rule_strategy = Strategy::new(self.program.rules(), &self.analysis.rule_analysis)?; + let mut saturation_rules: HashMap, Option>> = + Default::default(); + + let executions: Vec<_> = self + .program + .rules() + .iter() + .zip(&self.analysis.rule_analysis) + .map(|(rule, analysis)| RuleExecution::initialize(rule, analysis)) + .collect(); + let mut rule_strategy = Strategy::new(self.analysis.rule_analysis.iter().collect())?; + + let mut last_scc: Option> = None; - while let Some(step) = rule_strategy.next_step(new_derivations) { - let ExecutionStep::ExecuteRule { execution, index } = step; - let updated_predicates = self.step(index, execution)?; - new_derivations = Some(!updated_predicates.is_empty()); + while let Some(index) = rule_strategy.next_rule(new_derivations) { + let scc = rule_strategy.current_scc(); + if let Some(last) = &last_scc { + let last: &[usize] = &last; + if scc == last { + log::debug!("skipping application of {index}"); + new_derivations = Some(false); + continue; + } + } - self.defrag(updated_predicates)?; + self.fill_saturation_rules(scc, &mut saturation_rules); + + if let Some(Some(rules)) = saturation_rules.get_mut(scc) { + log::info!("<<< {0}: APPLYING SCC {scc:?} >>>", self.current_step); + + new_derivations = Some(self.saturation_step(rules)?); + last_scc = Some(Box::from(scc)); + } else { + let updated_predicates = self.step(index, &executions[index])?; + last_scc = None; + new_derivations = Some(!updated_predicates.is_empty()); + + self.defrag(updated_predicates)?; + } } TimedCode::instance().sub("Reasoning/Rules").stop(); diff --git a/nemo/src/execution/saturation/execution.rs b/nemo/src/execution/saturation/execution.rs index cf8e0575f..9a887898a 100644 --- a/nemo/src/execution/saturation/execution.rs +++ b/nemo/src/execution/saturation/execution.rs @@ -1,7 +1,7 @@ //! Executing a set of saturation rules use std::{ - collections::{btree_map, btree_set, BTreeMap, BTreeSet, HashMap}, + collections::{btree_map, BTreeMap, HashMap}, ops::{Bound, Index}, sync::Arc, }; @@ -221,6 +221,7 @@ fn match_rows(pattern: &[RowElement], row: &[RowElement]) -> MatchResult { MatchResult::Matches } +#[derive(Debug)] struct RowIterator<'a> { lower_cursor: btree_map::Cursor<'a, Row, Age>, upper_cursor: btree_map::Cursor<'a, Row, Age>, @@ -241,7 +242,12 @@ impl<'a> Iterator for RowIterator<'a> { match match_rows(&self.pattern, row) { MatchResult::Matches => return Some(row), MatchResult::InBounds => continue, - MatchResult::OutOfBounds => unreachable!("this should have been caught early"), + MatchResult::OutOfBounds => { + log::trace!("OutOfBounds {row:?}, {:?}", self.pattern); + log::trace!("upper cursor next {:?}", self.upper_cursor.peek_next()); + log::trace!("upper cursor prev {:?}", self.upper_cursor.peek_prev()); + unreachable!("this should have been caught early") + } } } @@ -266,8 +272,8 @@ impl GhostBound for Row { } fn find_all_matches<'a>(pattern: Row, table: &'a BTreeMap) -> RowIterator<'a> { - let lower_cursor = table.lower_bound(Bound::Excluded(&pattern)); - let upper_cursor = table.upper_bound(Bound::Excluded(&pattern.invert_bound())); + let lower_cursor = table.lower_bound(Bound::Included(&pattern)); + let upper_cursor = table.upper_bound(Bound::Included(&pattern.invert_bound())); RowIterator { lower_cursor, upper_cursor, @@ -275,6 +281,7 @@ fn find_all_matches<'a>(pattern: Row, table: &'a BTreeMap) -> RowItera } } +#[derive(Debug)] struct RowMatcher<'a> { substitution: SaturationSubstitution, atom: SaturationAtom, @@ -305,12 +312,15 @@ fn join<'a>( cursor, } } + +#[derive(Debug)] struct ExecutionTree { init: SaturationSubstitution, ops: Arc<[JoinOp]>, index: usize, } +#[derive(Debug)] enum JoinIter<'a> { Done, NoOp(SaturationSubstitution), @@ -322,10 +332,10 @@ enum JoinIter<'a> { }, } -#[derive(Debug)] +#[derive(Debug, Clone, Copy)] enum Age { - old, - new, + Old, + New, } #[derive(Debug, Default)] @@ -348,7 +358,10 @@ impl ExecutionTree { match op { JoinOp::Join(atom) => { - let table = tables.0.get(&atom.predicate).unwrap(); + let Some(table) = tables.0.get(&atom.predicate) else { + return JoinIter::Done; + }; + let atom = atom.clone(); let inner = Box::new(self.execute(&tables)); @@ -361,7 +374,10 @@ impl ExecutionTree { } // todo: more efficient implementation? JoinOp::Filter(atom) => { - let table = tables.0.get(&atom.predicate).unwrap(); + let Some(table) = tables.0.get(&atom.predicate) else { + return JoinIter::Done; + }; + let atom = atom.clone(); let inner = Box::new(self.execute(&tables)); @@ -457,7 +473,7 @@ pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { let fact = fact_from_row(&row, atom.predicate.clone()); - cursor.insert_after(row, Age::new).unwrap(); + cursor.insert_after(row, Age::New).unwrap(); todo.push(fact); } } @@ -475,28 +491,40 @@ pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { } impl DataBase { - pub fn new() -> Self { - Self(Default::default()) - } - pub fn add_table( &mut self, predicate: Arc, table: impl Iterator>, ) { let table = table - .map(|row| (row.into_iter().map(RowElement::Value).collect(), Age::old)) + .map(|row| (row.into_iter().map(RowElement::Value).collect(), Age::Old)) .collect(); self.0.insert(predicate, table); } + + pub fn new_facts(&self, predicate: &str) -> impl Iterator + use<'_> { + self.0.get(predicate).into_iter().flat_map(|table| { + table.iter().flat_map(|(row, age)| { + (matches!(age, Age::New)) + .then_some(row.iter().map(|v| match v { + RowElement::Value(storage_value_t) => *storage_value_t, + RowElement::Bottom => unreachable!("sentinel elements are never written"), + RowElement::Top => unreachable!("sentinel elements are never written"), + })) + .into_iter() + .flatten() + }) + }) + } } #[cfg(test)] mod test { use std::{ - collections::{BTreeMap, BTreeSet, HashMap}, + collections::{BTreeMap, HashMap}, iter::repeat_n, + sync::Arc, }; use nemo_physical::datatypes::StorageValueT; @@ -505,17 +533,17 @@ mod test { use crate::execution::saturation::{ execution::{find_all_matches, saturate, DataBase, Row, RowElement}, - model::bench_rules, + model::{bench_rules, BodyTerm, Head, SaturationAtom, SaturationRule}, }; + macro_rules! table { + [ $([ $($v:expr),* ],)* ] => { + BTreeMap::from([ $( (Box::from([ $(RowElement::Value(StorageValueT::Id32($v))),* ]), Age::Old), )* ]) + }; + } + #[test] fn find_all_matches_works() { - macro_rules! table { - [ $([ $($v:expr),* ],)* ] => { - BTreeMap::from([ $( (Box::from([ $(RowElement::Value(StorageValueT::Id32($v))),* ]), Age::old), )* ]) - }; - } - let table: BTreeMap = table![ [0, 0, 0, 1, 0], [0, 1, 0, 0, 0], @@ -590,11 +618,76 @@ mod test { let mut db = DataBase(HashMap::from([( predicate.clone(), - BTreeMap::from([(row, Age::old)]), + BTreeMap::from([(row, Age::Old)]), )])); saturate(&mut db, &mut rules); assert_eq!(db.0.get(&predicate).unwrap().len(), 2_usize.pow(n as u32)); + + let new_len = db.new_facts(&predicate).count() / n; + assert_eq!(new_len, 2_usize.pow(n as u32) - 1); + } + + #[test] + fn saturate_multi_join() { + let p1: Arc = Arc::from("p1"); + let p2: Arc = Arc::from("p2"); + let p3: Arc = Arc::from("p3"); + let p4: Arc = Arc::from("p4"); + + let x = BodyTerm::Variable(0); + let y = BodyTerm::Variable(1); + let z = BodyTerm::Variable(2); + + let head = Head::Datalog(Box::from([SaturationAtom { + predicate: p1.clone(), + terms: Box::from([x.clone(), y.clone(), z.clone()]), + }])); + + let p2_atom = SaturationAtom { + predicate: p2.clone(), + terms: Box::from([x.clone(), y.clone()]), + }; + + let p2_table: BTreeMap = table![[0, 0],]; + + let p3_atom = SaturationAtom { + predicate: p3.clone(), + terms: Box::from([x.clone(), y.clone()]), + }; + + let p3_table: BTreeMap = table![[0, 0],]; + + let p4_atom = SaturationAtom { + predicate: p4.clone(), + terms: Box::from([x.clone(), y.clone(), z.clone()]), + }; + + let p4_table: BTreeMap = table![[0, 0, 0], [0, 0, 1],]; + + let mut db = HashMap::new(); + db.insert(p2.clone(), p2_table); + db.insert(p3.clone(), p3_table); + db.insert(p4.clone(), p4_table.clone()); + + let rule = SaturationRule { + body_atoms: Arc::new([p2_atom, p3_atom, p4_atom]), + join_orders: Box::from([None, None, None]), + head, + }; + + let mut db = DataBase(db); + let mut rules = vec![rule]; + + saturate(&mut db, &mut rules); + + assert_eq!( + db.0.get(&p1) + .unwrap_or(&BTreeMap::new()) + .keys() + .collect::>(), + p4_table.keys().collect::>() + ); } } diff --git a/nemo/src/execution/saturation/model.rs b/nemo/src/execution/saturation/model.rs index add5e3c13..8b470b7c3 100644 --- a/nemo/src/execution/saturation/model.rs +++ b/nemo/src/execution/saturation/model.rs @@ -54,26 +54,29 @@ enum HeadTerm { Constant(StorageValueT), } +#[derive(Debug)] struct HeadAtom { predicate: Arc, terms: Box<[HeadTerm]>, } +#[derive(Debug, Clone)] pub(crate) enum Head { Datalog(Box<[SaturationAtom]>), } pub(crate) type JoinOrder = Arc<[JoinOp]>; -#[derive(Clone)] +#[derive(Debug, Clone)] pub(crate) enum JoinOp { Join(SaturationAtom), Filter(SaturationAtom), } +#[derive(Debug, Clone)] pub(crate) struct SaturationRule { pub(super) body_atoms: Arc<[SaturationAtom]>, - join_orders: Box<[Option]>, + pub(super) join_orders: Box<[Option]>, pub(super) head: Head, } @@ -164,7 +167,7 @@ impl SaturationRuleTranslation<'_> { } } - fn convert(&mut self, rule: Rule) -> Result { + pub(crate) fn convert(&mut self, rule: &Rule) -> Result { let body: Arc<[SaturationAtom]> = rule .body() .iter() @@ -193,7 +196,7 @@ impl SaturationRuleTranslation<'_> { } } -struct SaturationRuleTranslation<'a> { +pub(crate) struct SaturationRuleTranslation<'a> { variables: Variables, interner: Interner, dict: &'a mut Dict, @@ -201,7 +204,7 @@ struct SaturationRuleTranslation<'a> { impl<'a> SaturationRuleTranslation<'a> { /// Create at [`SaturationRuleTranslation`] referring to a [`Dict`] - fn new(dict: &'a mut Dict) -> Self { + pub(crate) fn new(dict: &'a mut Dict) -> Self { Self { variables: Variables(HashMap::new()), interner: Interner(HashSet::new()), diff --git a/nemo/src/execution/selection_strategy/strategy.rs b/nemo/src/execution/selection_strategy/strategy.rs index ece1f9754..35a549a78 100644 --- a/nemo/src/execution/selection_strategy/strategy.rs +++ b/nemo/src/execution/selection_strategy/strategy.rs @@ -19,10 +19,7 @@ pub enum SelectionStrategyError { /// namely the order in which the rules are applied in. pub trait RuleSelectionStrategy: std::fmt::Debug + Sized { /// Create a new [RuleSelectionStrategy] object. - fn new( - rules: Vec<&ChaseRule>, - rule_analyses: Vec<&RuleAnalysis>, - ) -> Result; + fn new(rule_analyses: Vec<&RuleAnalysis>) -> Result; /// Return the index of the next rule that should be executed. /// Returns `None` if there are no more rules to be applied @@ -30,62 +27,6 @@ pub trait RuleSelectionStrategy: std::fmt::Debug + Sized { fn next_rule(&mut self, new_derivations: Option) -> Option; } -/// Strategy for executing a set of rules, -/// which might involve different types of [`ExecutionStep`]s -pub trait ExecutionStrategy: std::fmt::Debug + Sized { - /// Create a new [ExecutionStrategy] object. - fn new( - rules: &[ChaseRule], - rule_analyses: &[RuleAnalysis], - ) -> Result; - - /// Return the next step that should be executed. - /// Returns `None` if there are no more rules to be applied - /// and the execution should therefore stop. - fn next_step(&mut self, new_derivations: Option) -> Option>; -} - -/// Step that can be taken in the execution of a set of rules. -#[derive(Copy, Clone, Debug)] -pub enum ExecutionStep<'a> { - /// Execute a single rule via the trie-join - ExecuteRule { - /// Index of the rule that shall be executed - index: usize, - /// Strategy for the rule execution - execution: &'a RuleExecution, - }, -} - -/// A strategy executing one rule at a time -#[derive(Debug)] -pub struct SingleStepStrategy { - inner: T, - rule_execution: Box<[RuleExecution]>, -} - -impl ExecutionStrategy for SingleStepStrategy { - fn new( - rules: &[ChaseRule], - rule_analyses: &[RuleAnalysis], - ) -> Result { - let rule_execution = rules - .iter() - .zip(rule_analyses) - .map(|(r, a)| RuleExecution::initialize(r, a)) - .collect(); - - Ok(SingleStepStrategy { - inner: T::new(rules.iter().collect(), rule_analyses.iter().collect())?, - rule_execution, - }) - } - - fn next_step(&mut self, new_derivations: Option) -> Option> { - let index = self.inner.next_rule(new_derivations)?; - Some(ExecutionStep::ExecuteRule { - execution: &self.rule_execution[index], - index, - }) - } +pub trait MetaStrategy: RuleSelectionStrategy { + fn current_scc(&self) -> &[usize]; } diff --git a/nemo/src/execution/selection_strategy/strategy_graph.rs b/nemo/src/execution/selection_strategy/strategy_graph.rs index cef9cddf1..e6699581e 100644 --- a/nemo/src/execution/selection_strategy/strategy_graph.rs +++ b/nemo/src/execution/selection_strategy/strategy_graph.rs @@ -2,7 +2,10 @@ use std::marker::PhantomData; -use crate::chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}; +use crate::{ + chase_model::analysis::program_analysis::RuleAnalysis, + execution::selection_strategy::strategy::MetaStrategy, +}; use super::{ dependency_graph::graph_constructor::DependencyGraphConstructor, @@ -25,10 +28,7 @@ impl< SubStrategy: RuleSelectionStrategy, > RuleSelectionStrategy for StrategyDependencyGraph { - fn new( - rules: Vec<&ChaseRule>, - rule_analyses: Vec<&RuleAnalysis>, - ) -> Result { + fn new(rule_analyses: Vec<&RuleAnalysis>) -> Result { let dependency_graph = GraphConstructor::build_graph(&rule_analyses); let graph_scc = petgraph::algo::condensation(dependency_graph, true); let scc_sorted = petgraph::algo::toposort(&graph_scc, None) @@ -40,12 +40,11 @@ impl< for scc in scc_sorted { let scc_rule_indices = graph_scc[scc].clone(); - let sub_rules: Vec<&ChaseRule> = scc_rule_indices.iter().map(|&i| rules[i]).collect(); let sub_analyses: Vec<&RuleAnalysis> = scc_rule_indices.iter().map(|&i| rule_analyses[i]).collect(); ordered_sccs.push(scc_rule_indices); - substrategies.push(SubStrategy::new(sub_rules, sub_analyses)?); + substrategies.push(SubStrategy::new(sub_analyses)?); } Ok(Self { @@ -71,3 +70,13 @@ impl< None } } + +impl< + GraphConstructor: for<'a> DependencyGraphConstructor<&'a RuleAnalysis>, + SubStrategy: RuleSelectionStrategy, + > MetaStrategy for StrategyDependencyGraph +{ + fn current_scc(&self) -> &[usize] { + &self.ordered_sccs[self.current_scc_index] + } +} diff --git a/nemo/src/execution/selection_strategy/strategy_random.rs b/nemo/src/execution/selection_strategy/strategy_random.rs index 9668a0a56..a5d597d6d 100644 --- a/nemo/src/execution/selection_strategy/strategy_random.rs +++ b/nemo/src/execution/selection_strategy/strategy_random.rs @@ -4,7 +4,7 @@ use std::collections::HashSet; use rand::Rng; -use crate::chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}; +use crate::chase_model::analysis::program_analysis::RuleAnalysis; use super::strategy::{RuleSelectionStrategy, SelectionStrategyError}; @@ -20,10 +20,7 @@ pub struct StrategyRandom { impl RuleSelectionStrategy for StrategyRandom { /// Create new [StrategyRandom]. - fn new( - _rules: Vec<&ChaseRule>, - rule_analyses: Vec<&RuleAnalysis>, - ) -> Result { + fn new(rule_analyses: Vec<&RuleAnalysis>) -> Result { Ok(Self { rule_count: rule_analyses.len(), no_derivations: HashSet::new(), diff --git a/nemo/src/execution/selection_strategy/strategy_round_robin.rs b/nemo/src/execution/selection_strategy/strategy_round_robin.rs index 6a72e341a..972594414 100644 --- a/nemo/src/execution/selection_strategy/strategy_round_robin.rs +++ b/nemo/src/execution/selection_strategy/strategy_round_robin.rs @@ -1,6 +1,6 @@ //! Defines the execution strategy by which each rule is applied in the order it appears. -use crate::chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}; +use crate::chase_model::analysis::program_analysis::RuleAnalysis; use super::strategy::{RuleSelectionStrategy, SelectionStrategyError}; @@ -21,10 +21,7 @@ pub struct StrategyRoundRobin { impl RuleSelectionStrategy for StrategyRoundRobin { /// Create new [StrategyRoundRobin]. - fn new( - _rules: Vec<&ChaseRule>, - rule_analyses: Vec<&RuleAnalysis>, - ) -> Result { + fn new(rule_analyses: Vec<&RuleAnalysis>) -> Result { let self_recursive = rule_analyses.iter().map(|a| a.is_recursive).collect(); Ok(Self { diff --git a/nemo/src/execution/selection_strategy/strategy_stratified_negation.rs b/nemo/src/execution/selection_strategy/strategy_stratified_negation.rs index acc26dd76..2c133acd5 100644 --- a/nemo/src/execution/selection_strategy/strategy_stratified_negation.rs +++ b/nemo/src/execution/selection_strategy/strategy_stratified_negation.rs @@ -5,8 +5,8 @@ use std::collections::HashMap; use petgraph::Directed; use crate::{ - chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}, - rule_model::components::tag::Tag, + chase_model::analysis::program_analysis::RuleAnalysis, + execution::selection_strategy::strategy::MetaStrategy, rule_model::components::tag::Tag, util::labeled_graph::LabeledGraph, }; @@ -22,7 +22,7 @@ type NegationGraph = LabeledGraph; /// Defines a strategy where rule are divided into different strata /// which are executed in succession. -/// Entering a new statum implies that the table for every negated atom +/// Entering a new stratum implies that the table for every negated atom /// will not get any new elements. #[derive(Debug)] pub struct StrategyStratifiedNegation { @@ -107,10 +107,7 @@ impl RuleSelectionStrategy for StrategyStratifiedNegation { /// Create new [StrategyStratifiedNegation]. - fn new( - rules: Vec<&ChaseRule>, - rule_analyses: Vec<&RuleAnalysis>, - ) -> Result { + fn new(rule_analyses: Vec<&RuleAnalysis>) -> Result { let graph = Self::build_graph(&rule_analyses); if let Some(mut strata) = graph.stratify(&[EdgeLabel::Negative]) { @@ -119,11 +116,10 @@ impl RuleSelectionStrategy for stratum in &mut strata { stratum.sort(); - let sub_rules: Vec<&ChaseRule> = stratum.iter().map(|&i| rules[i]).collect(); let sub_analyses: Vec<&RuleAnalysis> = stratum.iter().map(|&i| rule_analyses[i]).collect(); - substrategies.push(SubStrategy::new(sub_rules, sub_analyses)?); + substrategies.push(SubStrategy::new(sub_analyses)?); } for stratum in &mut strata { @@ -159,3 +155,9 @@ impl RuleSelectionStrategy None } } + +impl MetaStrategy for StrategyStratifiedNegation { + fn current_scc(&self) -> &[usize] { + self.substrategies[self.current_stratum].current_scc() + } +} diff --git a/nemo/src/table_manager.rs b/nemo/src/table_manager.rs index d8ce60847..d18d7ecf3 100644 --- a/nemo/src/table_manager.rs +++ b/nemo/src/table_manager.rs @@ -3,6 +3,7 @@ use crate::{error::Error, rule_model::components::tag::Tag}; use nemo_physical::{ + datatypes::StorageValueT, datavalues::any_datavalue::AnyDataValue, management::{ bytesized::ByteSized, @@ -374,6 +375,13 @@ impl TableManager { Ok(self.database.table_row_iterator(id)?) } + pub(crate) fn table_raw_row_iterator( + &mut self, + id: PermanentTableId, + ) -> Result> + '_, Error> { + Ok(self.database.table_raw_row_iterator(id)?) + } + /// Combine all subtables of a predicate into one table /// and return the [PermanentTableId] of that new table. pub(crate) fn combine_predicate( @@ -473,7 +481,7 @@ impl TableManager { /// Add a [Trie] as a subtable of a predicate. /// Predicate must be registered before calling this function. #[allow(dead_code)] - fn add_table(&mut self, predicate: Tag, step: usize, order: ColumnOrder, trie: Trie) { + pub fn add_table(&mut self, predicate: Tag, step: usize, order: ColumnOrder, trie: Trie) { let name = self.generate_table_name(&predicate, &order, step); let table_id = self.database.register_add_trie(&name, order, trie); @@ -653,6 +661,10 @@ impl TableManager { self.database .execute_first_match(subtable_plan.execution_plan) } + + pub fn known_predicates(&self) -> impl Iterator { + self.predicate_subtables.keys() + } } #[cfg(test)] From 92781fb98e144180dd23ff314034af1b974d6662 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 10 Sep 2025 16:18:42 +0200 Subject: [PATCH 13/26] fix missindexing of rules --- nemo/src/execution/execution_engine.rs | 7 +++---- nemo/src/execution/selection_strategy/strategy.rs | 2 +- nemo/src/execution/selection_strategy/strategy_graph.rs | 7 +++++-- .../selection_strategy/strategy_stratified_negation.rs | 9 +++++++-- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index cf8100183..900b6b9b4 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -358,17 +358,16 @@ impl ExecutionEngine { while let Some(index) = rule_strategy.next_rule(new_derivations) { let scc = rule_strategy.current_scc(); if let Some(last) = &last_scc { - let last: &[usize] = &last; - if scc == last { + if &scc == last { log::debug!("skipping application of {index}"); new_derivations = Some(false); continue; } } - self.fill_saturation_rules(scc, &mut saturation_rules); + self.fill_saturation_rules(&scc, &mut saturation_rules); - if let Some(Some(rules)) = saturation_rules.get_mut(scc) { + if let Some(Some(rules)) = saturation_rules.get_mut(&scc) { log::info!("<<< {0}: APPLYING SCC {scc:?} >>>", self.current_step); new_derivations = Some(self.saturation_step(rules)?); diff --git a/nemo/src/execution/selection_strategy/strategy.rs b/nemo/src/execution/selection_strategy/strategy.rs index 35a549a78..812067539 100644 --- a/nemo/src/execution/selection_strategy/strategy.rs +++ b/nemo/src/execution/selection_strategy/strategy.rs @@ -28,5 +28,5 @@ pub trait RuleSelectionStrategy: std::fmt::Debug + Sized { } pub trait MetaStrategy: RuleSelectionStrategy { - fn current_scc(&self) -> &[usize]; + fn current_scc(&self) -> Box<[usize]>; } diff --git a/nemo/src/execution/selection_strategy/strategy_graph.rs b/nemo/src/execution/selection_strategy/strategy_graph.rs index e6699581e..a9306d642 100644 --- a/nemo/src/execution/selection_strategy/strategy_graph.rs +++ b/nemo/src/execution/selection_strategy/strategy_graph.rs @@ -76,7 +76,10 @@ impl< SubStrategy: RuleSelectionStrategy, > MetaStrategy for StrategyDependencyGraph { - fn current_scc(&self) -> &[usize] { - &self.ordered_sccs[self.current_scc_index] + fn current_scc(&self) -> Box<[usize]> { + self.ordered_sccs[self.current_scc_index] + .iter() + .cloned() + .collect() } } diff --git a/nemo/src/execution/selection_strategy/strategy_stratified_negation.rs b/nemo/src/execution/selection_strategy/strategy_stratified_negation.rs index 2c133acd5..380dda59e 100644 --- a/nemo/src/execution/selection_strategy/strategy_stratified_negation.rs +++ b/nemo/src/execution/selection_strategy/strategy_stratified_negation.rs @@ -157,7 +157,12 @@ impl RuleSelectionStrategy } impl MetaStrategy for StrategyStratifiedNegation { - fn current_scc(&self) -> &[usize] { - self.substrategies[self.current_stratum].current_scc() + fn current_scc(&self) -> Box<[usize]> { + let inner = self.substrategies[self.current_stratum].current_scc(); + + inner + .iter() + .map(|i| self.ordered_strata[self.current_stratum][*i]) + .collect() } } From 4cf22caa3b51148efbdd8c3db3e13aa01539cddd Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 10 Sep 2025 16:26:42 +0200 Subject: [PATCH 14/26] disallow atoms with multiple occurences of the same term --- nemo/src/execution/saturation/model.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/nemo/src/execution/saturation/model.rs b/nemo/src/execution/saturation/model.rs index 8b470b7c3..5b0522856 100644 --- a/nemo/src/execution/saturation/model.rs +++ b/nemo/src/execution/saturation/model.rs @@ -156,6 +156,18 @@ impl SaturationRuleTranslation<'_> { .map(|term| self.convert_term(term)) .collect::>()?; + for (i, t) in terms.iter().enumerate() { + if let BodyTerm::Variable(v) = t { + if terms[i + 1..] + .iter() + .find(|other| *other == &BodyTerm::Variable(*v)) + .is_some() + { + return Err(()); + } + } + } + Ok(SaturationAtom { predicate, terms }) } From 14aaee8b3966115e0ff1ceaa24d0431933980028 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 10 Sep 2025 16:38:04 +0200 Subject: [PATCH 15/26] actually use interner --- nemo/src/execution/saturation/model.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/src/execution/saturation/model.rs b/nemo/src/execution/saturation/model.rs index 5b0522856..5e1840f27 100644 --- a/nemo/src/execution/saturation/model.rs +++ b/nemo/src/execution/saturation/model.rs @@ -149,7 +149,7 @@ impl SaturationRuleTranslation<'_> { } fn convert_atom(&mut self, atom: &Atom) -> Result { - let predicate = Arc::from(atom.predicate().name()); + let predicate = self.interner.create(atom.predicate().name()); let terms: Box<[BodyTerm]> = atom .terms() From 38347e21f8342d8edbd600ee9b8d1067377f00f5 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 10 Sep 2025 16:41:33 +0200 Subject: [PATCH 16/26] fix warnings --- nemo/src/execution/execution_engine.rs | 3 +-- nemo/src/execution/saturation/model.rs | 18 ++++-------------- .../execution/selection_strategy/strategy.rs | 7 +++---- 3 files changed, 8 insertions(+), 20 deletions(-) diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index 900b6b9b4..67f74122a 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -2,7 +2,6 @@ use std::{ collections::{hash_map::Entry, HashMap}, - ops::Deref, sync::Arc, }; @@ -45,7 +44,7 @@ use crate::{ term::primitive::{ground::GroundTerm, variable::Variable, Primitive}, }, pipeline::transformations::default::TransformationDefault, - programs::{handle::ProgramHandle, program::Program, ProgramRead}, + programs::{handle::ProgramHandle, program::Program}, substitution::Substitution, }, table_manager::{MemoryUsage, SubtableExecutionPlan, TableManager}, diff --git a/nemo/src/execution/saturation/model.rs b/nemo/src/execution/saturation/model.rs index 5e1840f27..ae2d92b39 100644 --- a/nemo/src/execution/saturation/model.rs +++ b/nemo/src/execution/saturation/model.rs @@ -3,7 +3,7 @@ use std::{ borrow::Cow, collections::{HashMap, HashSet}, - iter::{repeat, repeat_n}, + iter::repeat, sync::Arc, }; @@ -47,19 +47,6 @@ impl SaturationAtom { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -enum HeadTerm { - Existential(VariableIdx), - Universal(VariableIdx), - Constant(StorageValueT), -} - -#[derive(Debug)] -struct HeadAtom { - predicate: Arc, - terms: Box<[HeadTerm]>, -} - #[derive(Debug, Clone)] pub(crate) enum Head { Datalog(Box<[SaturationAtom]>), @@ -296,7 +283,10 @@ fn compute_join_order( } } +#[cfg(test)] pub(super) fn bench_rules(n: usize) -> (Vec, Arc) { + use std::iter::repeat_n; + let one = BodyTerm::Constant(StorageValueT::Int64(1)); let zero = BodyTerm::Constant(StorageValueT::Int64(0)); let predicate: Arc = Arc::from("p"); diff --git a/nemo/src/execution/selection_strategy/strategy.rs b/nemo/src/execution/selection_strategy/strategy.rs index 812067539..c691b36a6 100644 --- a/nemo/src/execution/selection_strategy/strategy.rs +++ b/nemo/src/execution/selection_strategy/strategy.rs @@ -2,10 +2,7 @@ use thiserror::Error; -use crate::{ - chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}, - execution::rule_execution::RuleExecution, -}; +use crate::chase_model::analysis::program_analysis::RuleAnalysis; /// Errors that can occur while creating a strategy. #[derive(Error, Debug, Copy, Clone)] @@ -27,6 +24,8 @@ pub trait RuleSelectionStrategy: std::fmt::Debug + Sized { fn next_rule(&mut self, new_derivations: Option) -> Option; } +/// A [`RuleSelectionStrategy`] which is aware of the SCCs of a program pub trait MetaStrategy: RuleSelectionStrategy { + /// Get the SCC which the currently selected rule belongs to fn current_scc(&self) -> Box<[usize]>; } From 7869800b7b18f29a6db8c306de18a38146f2f693 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 10 Sep 2025 16:45:37 +0200 Subject: [PATCH 17/26] clippy --- nemo/src/execution/execution_engine.rs | 4 ++-- nemo/src/execution/saturation/execution.rs | 10 +++++----- nemo/src/execution/saturation/model.rs | 8 +++----- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index 67f74122a..f7a3b8a4e 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -291,7 +291,7 @@ impl ExecutionEngine { let predicates: Vec = self.table_manager.known_predicates().cloned().collect(); for predicate in &predicates { - let Some(table_id) = self.table_manager.combine_predicate(&predicate)? else { + let Some(table_id) = self.table_manager.combine_predicate(predicate)? else { continue; }; @@ -370,7 +370,7 @@ impl ExecutionEngine { log::info!("<<< {0}: APPLYING SCC {scc:?} >>>", self.current_step); new_derivations = Some(self.saturation_step(rules)?); - last_scc = Some(Box::from(scc)); + last_scc = Some(scc); } else { let updated_predicates = self.step(index, &executions[index])?; last_scc = None; diff --git a/nemo/src/execution/saturation/execution.rs b/nemo/src/execution/saturation/execution.rs index 9a887898a..667983025 100644 --- a/nemo/src/execution/saturation/execution.rs +++ b/nemo/src/execution/saturation/execution.rs @@ -363,7 +363,7 @@ impl ExecutionTree { }; let atom = atom.clone(); - let inner = Box::new(self.execute(&tables)); + let inner = Box::new(self.execute(tables)); JoinIter::Join { inner, @@ -379,7 +379,7 @@ impl ExecutionTree { }; let atom = atom.clone(); - let inner = Box::new(self.execute(&tables)); + let inner = Box::new(self.execute(tables)); JoinIter::Join { inner, @@ -444,7 +444,7 @@ pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { let fact = fact_from_row(row, predicate.clone()); for trigger in rule.trigger(&fact) { - matches.extend(trigger.execute(&db).map(|row| (row, rule_index))); + matches.extend(trigger.execute(db).map(|row| (row, rule_index))); } } } @@ -482,8 +482,8 @@ pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { for fact in &todo { for (rule_index, rule) in rules.iter_mut().enumerate() { - for trigger in rule.trigger(&fact) { - matches.extend(trigger.execute(&db).map(|row| (row, rule_index))); + for trigger in rule.trigger(fact) { + matches.extend(trigger.execute(db).map(|row| (row, rule_index))); } } } diff --git a/nemo/src/execution/saturation/model.rs b/nemo/src/execution/saturation/model.rs index ae2d92b39..a6c76598b 100644 --- a/nemo/src/execution/saturation/model.rs +++ b/nemo/src/execution/saturation/model.rs @@ -3,7 +3,6 @@ use std::{ borrow::Cow, collections::{HashMap, HashSet}, - iter::repeat, sync::Arc, }; @@ -147,8 +146,7 @@ impl SaturationRuleTranslation<'_> { if let BodyTerm::Variable(v) = t { if terms[i + 1..] .iter() - .find(|other| *other == &BodyTerm::Variable(*v)) - .is_some() + .any(|other| other == &BodyTerm::Variable(*v)) { return Err(()); } @@ -173,7 +171,7 @@ impl SaturationRuleTranslation<'_> { .map(|lit| self.convert_literal(lit)) .collect::>()?; - let join_orders: Box<[_]> = repeat(None).take(body.len()).collect(); + let join_orders: Box<[_]> = std::iter::repeat_n(None, body.len()).collect(); let head = if rule.variables().any(Variable::is_existential) { // existential variable are not supported yet @@ -217,7 +215,7 @@ struct Interner(HashSet>); impl Interner { fn create(&mut self, input: &str) -> Arc { if let Some(res) = self.0.get(input) { - return res.clone(); + res.clone() } else { self.0.insert(Arc::from(input)); self.0.get(input).unwrap().clone() From 38ca69844b9e9df3bfe2d4aba767590441b1b538 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Fri, 12 Sep 2025 18:27:33 +0200 Subject: [PATCH 18/26] add timing, saturate single rules --- nemo/src/execution/execution_engine.rs | 87 ++++++++++++++++++++------ 1 file changed, 69 insertions(+), 18 deletions(-) diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index f7a3b8a4e..04a73cc32 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -1,5 +1,6 @@ //! Functionality which handles the execution of a program +use core::slice; use std::{ collections::{hash_map::Entry, HashMap}, sync::Arc, @@ -215,6 +216,7 @@ impl ExecutionEngine { } fn step(&mut self, rule_index: usize, execution: &RuleExecution) -> Result, Error> { + TimedCode::instance().sub("Reasoning/Rules").start(); let timing_string = format!("Reasoning/Rules/Rule {rule_index}"); TimedCode::instance().sub(&timing_string).start(); @@ -233,6 +235,7 @@ impl ExecutionEngine { log::info!("Rule duration: {} ms", rule_duration.as_millis()); self.current_step += 1; + TimedCode::instance().sub("Reasoning/Rules").stop(); Ok(updated_predicates) } @@ -268,30 +271,57 @@ impl ExecutionEngine { fn fill_saturation_rules( &mut self, scc: &[usize], - store: &mut HashMap, Option>>, + store: &mut HashMap< + Box<[usize]>, + Result, HashMap>, + >, ) { if store.contains_key(scc) { return; } - let saturation_rules: Option> = { + let saturation_rules: Vec> = { let mut dict = self.table_manager.dictionary_mut(); let mut translation = SaturationRuleTranslation::new(&mut dict); scc.iter() - .map(|index| translation.convert(self.nemo_program.rule(*index)).ok()) + .map(|index| { + translation + .convert(self.nemo_program.rule(*index)) + .inspect_err(|reason| { + log::debug!("rule {index} does not support saturation ({reason})") + }) + .ok() + }) .collect() }; + let saturation_rules = if saturation_rules.iter().all(Option::is_some) { + Ok(saturation_rules.into_iter().map(Option::unwrap).collect()) + } else { + Err(saturation_rules + .into_iter() + .zip(scc) + .flat_map(|(r, i)| Some(*i).zip(r)) + .collect()) + }; + store.insert(Box::from(scc), saturation_rules); } - fn saturation_step(&mut self, rules: &mut [SaturationRule]) -> Result { + fn saturation_step( + &mut self, + rules: &mut [SaturationRule], + scc: &[usize], + ) -> Result { + TimedCode::instance().sub("Reasoning/Saturation").start(); + let mut db: DataBase = Default::default(); let mut new_facts = false; let predicates: Vec = self.table_manager.known_predicates().cloned().collect(); for predicate in &predicates { - let Some(table_id) = self.table_manager.combine_predicate(predicate)? else { + // rules.iter().flat_map(|r| r.input_predicates()) { + let Some(table_id) = self.table_manager.combine_predicate(&predicate)? else { continue; }; @@ -303,8 +333,17 @@ impl ExecutionEngine { log::trace!("{db:?}"); + let timing_string = format!("Reasoning/Saturation/Saturate/{scc:?}"); + + TimedCode::instance().sub(&timing_string).start(); saturate(&mut db, rules); + let duration = TimedCode::instance().sub(&timing_string).stop(); + log::info!("Saturation took {}ms", duration.as_millis()); + + let timing_string = format!("Reasoning/Saturation/Save Tables"); + + TimedCode::instance().sub(&timing_string).start(); for predicate in &predicates { let mut buffer = TupleBuffer::new(self.predicate_arity(predicate).unwrap()); @@ -328,20 +367,24 @@ impl ExecutionEngine { new_facts = true; } + let duration = TimedCode::instance().sub(&timing_string).stop(); + log::info!("Saved saturation results: {}ms", duration.as_millis()); self.current_step += 1; + TimedCode::instance().sub("Reasoning/Saturation").stop(); Ok(new_facts) } /// Executes the program. pub fn execute(&mut self) -> Result<(), Error> { - TimedCode::instance().sub("Reasoning/Rules").start(); TimedCode::instance().sub("Reasoning/Execution").start(); let mut new_derivations: Option = None; - let mut saturation_rules: HashMap, Option>> = - Default::default(); + let mut saturation_rules: HashMap< + Box<[usize]>, + Result, HashMap>, + > = Default::default(); let executions: Vec<_> = self .program @@ -366,21 +409,29 @@ impl ExecutionEngine { self.fill_saturation_rules(&scc, &mut saturation_rules); - if let Some(Some(rules)) = saturation_rules.get_mut(&scc) { - log::info!("<<< {0}: APPLYING SCC {scc:?} >>>", self.current_step); + match saturation_rules.get_mut(&scc).unwrap() { + Ok(rules) => { + log::info!("<<< {0}: APPLYING SCC {scc:?} >>>", self.current_step); - new_derivations = Some(self.saturation_step(rules)?); - last_scc = Some(scc); - } else { - let updated_predicates = self.step(index, &executions[index])?; - last_scc = None; - new_derivations = Some(!updated_predicates.is_empty()); + new_derivations = Some(self.saturation_step(rules, &scc)?); + last_scc = Some(scc); + } + Err(rules) => { + if let Some(rule) = rules.get_mut(&index) { + new_derivations = + Some(self.saturation_step(slice::from_mut(rule), &[index])?); + last_scc = None; + } else { + let updated_predicates = self.step(index, &executions[index])?; + last_scc = None; + new_derivations = Some(!updated_predicates.is_empty()); - self.defrag(updated_predicates)?; + self.defrag(updated_predicates)?; + } + } } } - TimedCode::instance().sub("Reasoning/Rules").stop(); TimedCode::instance().sub("Reasoning/Execution").stop(); Ok(()) } From 49b49727398c56abe67229213611933c2f361b22 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Fri, 12 Sep 2025 18:29:10 +0200 Subject: [PATCH 19/26] add support for single equality constraints also makes the update step in saturate less wasteful --- nemo/src/execution/saturation/execution.rs | 80 ++++++++++++++---- nemo/src/execution/saturation/model.rs | 97 +++++++++++++++------- 2 files changed, 131 insertions(+), 46 deletions(-) diff --git a/nemo/src/execution/saturation/execution.rs b/nemo/src/execution/saturation/execution.rs index 667983025..c2c80ae1a 100644 --- a/nemo/src/execution/saturation/execution.rs +++ b/nemo/src/execution/saturation/execution.rs @@ -1,12 +1,13 @@ //! Executing a set of saturation rules +use core::panic; use std::{ collections::{btree_map, BTreeMap, HashMap}, ops::{Bound, Index}, sync::Arc, }; -use nemo_physical::datatypes::StorageValueT; +use nemo_physical::{datatypes::StorageValueT, meta::timing::TimedCode}; use super::model::{ BodyTerm, Head, JoinOp, SaturationAtom, SaturationFact, SaturationRule, VariableIdx, @@ -53,14 +54,25 @@ impl SaturationSubstitution { .collect() } - fn update(&mut self, terms: &[BodyTerm], row: &[RowElement]) { + #[must_use] + fn update(&mut self, terms: &[BodyTerm], row: &[RowElement]) -> bool { for (term, value) in terms.iter().zip(row) { let BodyTerm::Variable(var) = term else { continue; }; - self.insert(*var, value.value()); + if let Some(prev) = self.insert(*var, value.value()) { + if prev != value.value() { + return false; + } + } } + + true + } + + fn satisfies(&self, equality: (VariableIdx, VariableIdx)) -> bool { + self.0[equality.0 as usize].unwrap() == self.0[equality.1 as usize].unwrap() } } @@ -91,6 +103,12 @@ impl SaturationAtom { } } + if let Some(equality) = self.equality { + if !res.satisfies(equality) { + return None; + } + } + Some(res) } } @@ -292,10 +310,19 @@ impl Iterator for RowMatcher<'_> { type Item = SaturationSubstitution; fn next(&mut self) -> Option { - let row = self.cursor.next()?; - let mut subst = self.substitution.clone(); - subst.update(&self.atom.terms, row); - Some(subst) + loop { + let row = self.cursor.next()?; + let mut subst = self.substitution.clone(); + if subst.update(&self.atom.terms, row) { + if let Some(equality) = &self.atom.equality { + if !subst.satisfies(*equality) { + continue; + } + } + + return Some(subst); + } + } } } @@ -430,7 +457,7 @@ fn fact_from_row(row: &Row, predicate: Arc) -> SaturationFact { _ => None, }) .collect::>() - .unwrap(); + .expect(format!("{row:?}").as_str()); SaturationFact { predicate, values } } @@ -438,18 +465,32 @@ fn fact_from_row(row: &Row, predicate: Arc) -> SaturationFact { pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { let mut matches = Vec::new(); - for (predicate, table) in db.0.iter() { - for (rule_index, rule) in rules.iter_mut().enumerate() { - for (row, _) in table.iter() { - let fact = fact_from_row(row, predicate.clone()); + #[cfg(not(test))] + TimedCode::instance() + .sub("Reasoning/Saturation/update") + .start(); - for trigger in rule.trigger(&fact) { - matches.extend(trigger.execute(db).map(|row| (row, rule_index))); - } + for (rule_index, rule) in rules.iter_mut().enumerate() { + let predicate = rule.body_atoms[0].predicate.clone(); + + for (row, _) in db.0.get(&predicate).iter().flat_map(|table| table.iter()) { + let fact = fact_from_row(row, predicate.clone()); + + for trigger in rule.trigger(&fact) { + matches.extend(trigger.execute(db).map(|row| (row, rule_index))); } } } + #[cfg(not(test))] + TimedCode::instance() + .sub("Reasoning/Saturation/update") + .stop(); + #[cfg(not(test))] + TimedCode::instance() + .sub("Reasoning/Saturation/loop") + .start(); + let mut todo = Vec::new(); while !matches.is_empty() { todo.clear(); @@ -488,6 +529,11 @@ pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { } } } + + #[cfg(not(test))] + TimedCode::instance() + .sub("Reasoning/Saturation/loop") + .stop(); } impl DataBase { @@ -643,11 +689,13 @@ mod test { let head = Head::Datalog(Box::from([SaturationAtom { predicate: p1.clone(), terms: Box::from([x.clone(), y.clone(), z.clone()]), + equality: Default::default(), }])); let p2_atom = SaturationAtom { predicate: p2.clone(), terms: Box::from([x.clone(), y.clone()]), + equality: Default::default(), }; let p2_table: BTreeMap = table![[0, 0],]; @@ -655,6 +703,7 @@ mod test { let p3_atom = SaturationAtom { predicate: p3.clone(), terms: Box::from([x.clone(), y.clone()]), + equality: Default::default(), }; let p3_table: BTreeMap = table![[0, 0],]; @@ -662,6 +711,7 @@ mod test { let p4_atom = SaturationAtom { predicate: p4.clone(), terms: Box::from([x.clone(), y.clone(), z.clone()]), + equality: Default::default(), }; let p4_table: BTreeMap = table![[0, 0, 0], [0, 0, 1],]; diff --git a/nemo/src/execution/saturation/model.rs b/nemo/src/execution/saturation/model.rs index a6c76598b..75cf1526d 100644 --- a/nemo/src/execution/saturation/model.rs +++ b/nemo/src/execution/saturation/model.rs @@ -34,6 +34,7 @@ pub(crate) enum BodyTerm { pub(crate) struct SaturationAtom { pub(super) predicate: Arc, pub(super) terms: Box<[BodyTerm]>, + pub(super) equality: Option<(VariableIdx, VariableIdx)>, } impl SaturationAtom { @@ -82,6 +83,10 @@ impl SaturationRule { order } } + + pub(crate) fn input_predicates(&self) -> impl Iterator> + use<'_> { + self.body_atoms.iter().map(|a| a.predicate.clone()) + } } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -90,28 +95,30 @@ pub(crate) struct SaturationFact { pub(super) values: Arc<[StorageValueT]>, } -struct Variables(HashMap); +#[derive(Default)] +struct Variables(HashMap, u16); impl Variables { fn get(&mut self, var: Cow) -> u16 { match self.0.get(var.as_ref()) { Some(index) => *index, None => { - let index = self - .0 - .len() - .try_into() - .expect("number of variables must be smaller than u16::MAX"); - + let index = self.add_fresh(); self.0.insert(var.to_string(), index); index } } } + + fn add_fresh(&mut self) -> u16 { + let index = self.1; + self.1 += 1; + index + } } impl SaturationRuleTranslation<'_> { - fn convert_term(&mut self, term: &Term) -> Result { + fn convert_term(&mut self, term: &Term) -> Result { match GroundTerm::try_from(term.clone()) { Ok(ground) => { let value = ground.value().to_storage_value_t_dict(self.dict); @@ -119,11 +126,11 @@ impl SaturationRuleTranslation<'_> { } Err(term) => { let Term::Primitive(Primitive::Variable(var)) = term else { - return Err(()); + return Err("not a ground-term or variable"); }; let Variable::Universal(var) = var else { - return Err(()); + return Err("existential"); }; match var.name() { @@ -134,54 +141,80 @@ impl SaturationRuleTranslation<'_> { } } - fn convert_atom(&mut self, atom: &Atom) -> Result { + fn convert_body_atom(&mut self, atom: &Atom) -> Result { let predicate = self.interner.create(atom.predicate().name()); - let terms: Box<[BodyTerm]> = atom + let mut terms: Box<[BodyTerm]> = atom .terms() .map(|term| self.convert_term(term)) - .collect::>()?; + .collect::>()?; - for (i, t) in terms.iter().enumerate() { - if let BodyTerm::Variable(v) = t { - if terms[i + 1..] - .iter() - .any(|other| other == &BodyTerm::Variable(*v)) - { - return Err(()); + let mut equality = None; + + for i in 0..terms.len() { + let (left, right) = terms.split_at_mut(i + 1); + if let BodyTerm::Variable(v) = &mut left[i] { + if right.iter().any(|other| other == &BodyTerm::Variable(*v)) { + if equality.is_some() { + return Err("only supports a single equality"); + } + + let orig = *v; + *v = self.variables.add_fresh(); + + equality = Some((orig, *v)); } } } - Ok(SaturationAtom { predicate, terms }) + Ok(SaturationAtom { + predicate, + terms, + equality, + }) + } + + fn convert_head_atom(&mut self, atom: &Atom) -> Result { + let predicate = self.interner.create(atom.predicate().name()); + + let terms: Box<[BodyTerm]> = atom + .terms() + .map(|term| self.convert_term(term)) + .collect::>()?; + + Ok(SaturationAtom { + predicate, + terms, + equality: Default::default(), + }) } - fn convert_literal(&mut self, lit: &Literal) -> Result { + fn convert_literal(&mut self, lit: &Literal) -> Result { match lit { - Literal::Positive(atom) => self.convert_atom(atom), - Literal::Negative(_) => Err(()), - Literal::Operation(_) => Err(()), + Literal::Positive(atom) => self.convert_body_atom(atom), + Literal::Negative(_) => Err("negation"), + Literal::Operation(_) => Err("unsupported operation"), } } - pub(crate) fn convert(&mut self, rule: &Rule) -> Result { + pub(crate) fn convert(&mut self, rule: &Rule) -> Result { let body: Arc<[SaturationAtom]> = rule .body() .iter() .map(|lit| self.convert_literal(lit)) - .collect::>()?; + .collect::>()?; let join_orders: Box<[_]> = std::iter::repeat_n(None, body.len()).collect(); let head = if rule.variables().any(Variable::is_existential) { // existential variable are not supported yet - return Err(()); + return Err("existential"); } else { Head::Datalog( rule.head() .iter() - .map(|atom| self.convert_atom(atom)) - .collect::>()?, + .map(|atom| self.convert_head_atom(atom)) + .collect::>()?, ) }; @@ -203,7 +236,7 @@ impl<'a> SaturationRuleTranslation<'a> { /// Create at [`SaturationRuleTranslation`] referring to a [`Dict`] pub(crate) fn new(dict: &'a mut Dict) -> Self { Self { - variables: Variables(HashMap::new()), + variables: Variables::default(), interner: Interner(HashSet::new()), dict, } @@ -299,6 +332,7 @@ pub(super) fn bench_rules(n: usize) -> (Vec, Arc) { let head = SaturationAtom { predicate: predicate.clone(), terms: head.collect(), + equality: Default::default(), }; let body = (0..VariableIdx::try_from(i).unwrap()) @@ -309,6 +343,7 @@ pub(super) fn bench_rules(n: usize) -> (Vec, Arc) { let body = SaturationAtom { predicate: predicate.clone(), terms: body.collect(), + equality: Default::default(), }; SaturationRule { From 9aa437620f869f4a12481194a2d6e4fc94e175d2 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Fri, 12 Sep 2025 18:35:27 +0200 Subject: [PATCH 20/26] cleanup --- nemo/src/execution/execution_engine.rs | 1 - nemo/src/execution/saturation/execution.rs | 4 +++- nemo/src/execution/saturation/model.rs | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index 04a73cc32..903b23640 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -320,7 +320,6 @@ impl ExecutionEngine { let predicates: Vec = self.table_manager.known_predicates().cloned().collect(); for predicate in &predicates { - // rules.iter().flat_map(|r| r.input_predicates()) { let Some(table_id) = self.table_manager.combine_predicate(&predicate)? else { continue; }; diff --git a/nemo/src/execution/saturation/execution.rs b/nemo/src/execution/saturation/execution.rs index c2c80ae1a..aed297c0f 100644 --- a/nemo/src/execution/saturation/execution.rs +++ b/nemo/src/execution/saturation/execution.rs @@ -7,7 +7,9 @@ use std::{ sync::Arc, }; -use nemo_physical::{datatypes::StorageValueT, meta::timing::TimedCode}; +use nemo_physical::datatypes::StorageValueT; +#[cfg(not(test))] +use nemo_physical::meta::timing::TimedCode; use super::model::{ BodyTerm, Head, JoinOp, SaturationAtom, SaturationFact, SaturationRule, VariableIdx, diff --git a/nemo/src/execution/saturation/model.rs b/nemo/src/execution/saturation/model.rs index 75cf1526d..bf7e5f6df 100644 --- a/nemo/src/execution/saturation/model.rs +++ b/nemo/src/execution/saturation/model.rs @@ -84,6 +84,7 @@ impl SaturationRule { } } + #[allow(unused)] pub(crate) fn input_predicates(&self) -> impl Iterator> + use<'_> { self.body_atoms.iter().map(|a| a.predicate.clone()) } From bf4b219c33107d90efdef65dcc5341f7f27fd577 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Fri, 12 Sep 2025 18:42:49 +0200 Subject: [PATCH 21/26] clippy (I) --- nemo/src/execution/execution_engine.rs | 4 ++-- nemo/src/execution/saturation/execution.rs | 2 +- nemo/src/lib.rs | 1 - nemo/src/rule_model/origin.rs | 7 ++----- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index 903b23640..2b1ce59b6 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -320,7 +320,7 @@ impl ExecutionEngine { let predicates: Vec = self.table_manager.known_predicates().cloned().collect(); for predicate in &predicates { - let Some(table_id) = self.table_manager.combine_predicate(&predicate)? else { + let Some(table_id) = self.table_manager.combine_predicate(predicate)? else { continue; }; @@ -340,7 +340,7 @@ impl ExecutionEngine { log::info!("Saturation took {}ms", duration.as_millis()); - let timing_string = format!("Reasoning/Saturation/Save Tables"); + let timing_string = "Reasoning/Saturation/Save Tables".to_string(); TimedCode::instance().sub(&timing_string).start(); for predicate in &predicates { diff --git a/nemo/src/execution/saturation/execution.rs b/nemo/src/execution/saturation/execution.rs index aed297c0f..b17bb0a31 100644 --- a/nemo/src/execution/saturation/execution.rs +++ b/nemo/src/execution/saturation/execution.rs @@ -459,7 +459,7 @@ fn fact_from_row(row: &Row, predicate: Arc) -> SaturationFact { _ => None, }) .collect::>() - .expect(format!("{row:?}").as_str()); + .unwrap_or_else(|| panic!("{row:?}")); SaturationFact { predicate, values } } diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index 3575fadab..3222d3fb6 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -18,7 +18,6 @@ #![feature(assert_matches)] #![feature(iter_intersperse)] #![feature(str_from_raw_parts)] -#![feature(path_add_extension)] #![feature(associated_type_defaults)] #![feature(btree_cursors)] diff --git a/nemo/src/rule_model/origin.rs b/nemo/src/rule_model/origin.rs index eb8ed2989..6671495ac 100644 --- a/nemo/src/rule_model/origin.rs +++ b/nemo/src/rule_model/origin.rs @@ -9,8 +9,10 @@ use super::{ /// Origin of a [super::components::ProgramComponent] #[derive(Debug, Clone)] +#[derive(Default)] pub enum Origin { /// Component has no special origin + #[default] Created, /// Component was created by parsing a file File { @@ -37,11 +39,6 @@ pub enum Origin { }, } -impl Default for Origin { - fn default() -> Self { - Self::Created - } -} impl Origin { /// Create an Oriign pointing to character range represented From 6b8cdf6dc988fab492769075316308a12dc58b97 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Sun, 14 Sep 2025 16:24:10 +0200 Subject: [PATCH 22/26] clippy (II) --- nemo-physical/src/meta/timing.rs | 9 +-- nemo/src/execution/execution_engine.rs | 87 +++++++++++++------------- 2 files changed, 44 insertions(+), 52 deletions(-) diff --git a/nemo-physical/src/meta/timing.rs b/nemo-physical/src/meta/timing.rs index a1e685e61..187c2745e 100644 --- a/nemo-physical/src/meta/timing.rs +++ b/nemo-physical/src/meta/timing.rs @@ -83,9 +83,10 @@ impl fmt::Debug for TimedCodeInfo { } /// How to sort the elements of a [TimedCode] object -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, Default)] pub enum TimedSorting { /// The order the code got called in + #[default] Default, /// Alphabetical by the title of the block Alphabetical, @@ -93,12 +94,6 @@ pub enum TimedSorting { LongestThreadTime, } -impl Default for TimedSorting { - fn default() -> Self { - Self::Default - } -} - /// How to display a layer of a [TimedCode] object #[derive(Debug, Default, Copy, Clone)] pub struct TimedDisplay { diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index 2b1ce59b6..e5cc61acb 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -268,46 +268,6 @@ impl ExecutionEngine { Ok(()) } - fn fill_saturation_rules( - &mut self, - scc: &[usize], - store: &mut HashMap< - Box<[usize]>, - Result, HashMap>, - >, - ) { - if store.contains_key(scc) { - return; - } - - let saturation_rules: Vec> = { - let mut dict = self.table_manager.dictionary_mut(); - let mut translation = SaturationRuleTranslation::new(&mut dict); - scc.iter() - .map(|index| { - translation - .convert(self.nemo_program.rule(*index)) - .inspect_err(|reason| { - log::debug!("rule {index} does not support saturation ({reason})") - }) - .ok() - }) - .collect() - }; - - let saturation_rules = if saturation_rules.iter().all(Option::is_some) { - Ok(saturation_rules.into_iter().map(Option::unwrap).collect()) - } else { - Err(saturation_rules - .into_iter() - .zip(scc) - .flat_map(|(r, i)| Some(*i).zip(r)) - .collect()) - }; - - store.insert(Box::from(scc), saturation_rules); - } - fn saturation_step( &mut self, rules: &mut [SaturationRule], @@ -380,10 +340,47 @@ impl ExecutionEngine { let mut new_derivations: Option = None; - let mut saturation_rules: HashMap< - Box<[usize]>, - Result, HashMap>, - > = Default::default(); + fn fill_saturation_rules( + this: &mut ExecutionEngine, + scc: &[usize], + store: &mut HashMap< + Box<[usize]>, + Result, HashMap>, + >, + ) { + if store.contains_key(scc) { + return; + } + + let saturation_rules: Vec> = { + let mut dict = this.table_manager.dictionary_mut(); + let mut translation = SaturationRuleTranslation::new(&mut dict); + scc.iter() + .map(|index| { + translation + .convert(this.nemo_program.rule(*index)) + .inspect_err(|reason| { + log::debug!("rule {index} does not support saturation ({reason})") + }) + .ok() + }) + .collect() + }; + + let saturation_rules = if saturation_rules.iter().all(Option::is_some) { + Ok(saturation_rules.into_iter().map(Option::unwrap).collect()) + } else { + Err(saturation_rules + .into_iter() + .zip(scc) + .flat_map(|(r, i)| Some(*i).zip(r)) + .collect()) + }; + + store.insert(Box::from(scc), saturation_rules); + } + + let mut saturation_rules = Default::default(); let executions: Vec<_> = self .program @@ -406,7 +403,7 @@ impl ExecutionEngine { } } - self.fill_saturation_rules(&scc, &mut saturation_rules); + fill_saturation_rules(self, &scc, &mut saturation_rules); match saturation_rules.get_mut(&scc).unwrap() { Ok(rules) => { From df9eaa42c6fe45f2362d322d304e3388e6ffa9c5 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Mon, 15 Sep 2025 09:17:54 +0200 Subject: [PATCH 23/26] cargo fmt --- nemo/src/rule_model/origin.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/nemo/src/rule_model/origin.rs b/nemo/src/rule_model/origin.rs index 6671495ac..75a7ef5da 100644 --- a/nemo/src/rule_model/origin.rs +++ b/nemo/src/rule_model/origin.rs @@ -8,8 +8,7 @@ use super::{ }; /// Origin of a [super::components::ProgramComponent] -#[derive(Debug, Clone)] -#[derive(Default)] +#[derive(Debug, Clone, Default)] pub enum Origin { /// Component has no special origin #[default] @@ -39,7 +38,6 @@ pub enum Origin { }, } - impl Origin { /// Create an Oriign pointing to character range represented /// by the given ast node. From aa17a694ac756436a2f03888c8dc67af9de3c460 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Mon, 22 Sep 2025 18:04:40 +0200 Subject: [PATCH 24/26] fix warnings --- nemo/src/execution/execution_engine.rs | 11 ++---- .../execution_engine/tracing/node_query.rs | 1 - .../execution_engine/tracing/simple.rs | 1 - .../execution_engine/tracing/tree_query.rs | 1 - nemo/src/execution/saturation/execution.rs | 35 ++++++++----------- nemo/src/execution/saturation/model.rs | 5 ++- 6 files changed, 19 insertions(+), 35 deletions(-) diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index 45968e817..b8f0f4cfb 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -21,14 +21,10 @@ use crate::{ translation::ProgramChaseTranslation, }, error::{Error, report::ProgramReport, warned::Warned}, - execution::{ - planning::plan_tracing::TracingStrategy, - saturation::{ + execution::saturation::{ execution::{DataBase, saturate}, model::{SaturationRule, SaturationRuleTranslation}, }, - tracing::trace::TraceDerivation, - }, io::{formats::Export, import_manager::ImportManager}, rule_file::RuleFile, rule_model::{ @@ -401,13 +397,12 @@ impl ExecutionEngine { while let Some(index) = rule_strategy.next_rule(new_derivations) { let scc = rule_strategy.current_scc(); - if let Some(last) = &last_scc { - if &scc == last { + if let Some(last) = &last_scc + && &scc == last { log::debug!("skipping application of {index}"); new_derivations = Some(false); continue; } - } fill_saturation_rules(self, &scc, &mut saturation_rules); diff --git a/nemo/src/execution/execution_engine/tracing/node_query.rs b/nemo/src/execution/execution_engine/tracing/node_query.rs index 227d38927..e81da4e01 100644 --- a/nemo/src/execution/execution_engine/tracing/node_query.rs +++ b/nemo/src/execution/execution_engine/tracing/node_query.rs @@ -22,7 +22,6 @@ use crate::{ valid_tables_plan, variable_translation, }, }, - selection_strategy::strategy::RuleSelectionStrategy, tracing::{ node_query::{ TableEntriesForTreeNodesQuery, TableEntriesForTreeNodesQueryInner, diff --git a/nemo/src/execution/execution_engine/tracing/simple.rs b/nemo/src/execution/execution_engine/tracing/simple.rs index 6df9d4012..233b9c91d 100644 --- a/nemo/src/execution/execution_engine/tracing/simple.rs +++ b/nemo/src/execution/execution_engine/tracing/simple.rs @@ -13,7 +13,6 @@ use crate::{ execution::{ ExecutionEngine, planning::plan_tracing::TracingStrategy, - selection_strategy::strategy::RuleSelectionStrategy, tracing::{ error::TracingError, trace::{ diff --git a/nemo/src/execution/execution_engine/tracing/tree_query.rs b/nemo/src/execution/execution_engine/tracing/tree_query.rs index eda284a40..b08315c9b 100644 --- a/nemo/src/execution/execution_engine/tracing/tree_query.rs +++ b/nemo/src/execution/execution_engine/tracing/tree_query.rs @@ -15,7 +15,6 @@ use crate::{ execution::{ ExecutionEngine, planning::plan_tracing::TracingStrategy, - selection_strategy::strategy::RuleSelectionStrategy, tracing::{ error::TracingError, shared::{PaginationResponse, Rule as TraceRule, TableEntryQuery, TableEntryResponse}, diff --git a/nemo/src/execution/saturation/execution.rs b/nemo/src/execution/saturation/execution.rs index b17bb0a31..fcab60415 100644 --- a/nemo/src/execution/saturation/execution.rs +++ b/nemo/src/execution/saturation/execution.rs @@ -63,11 +63,10 @@ impl SaturationSubstitution { continue; }; - if let Some(prev) = self.insert(*var, value.value()) { - if prev != value.value() { + if let Some(prev) = self.insert(*var, value.value()) + && prev != value.value() { return false; } - } } true @@ -95,21 +94,19 @@ impl SaturationAtom { } } BodyTerm::Variable(idx) => { - if let Some(prev) = res.insert(*idx, *value) { - if prev != *value { + if let Some(prev) = res.insert(*idx, *value) + && prev != *value { return None; } - } } BodyTerm::Ignore => {} } } - if let Some(equality) = self.equality { - if !res.satisfies(equality) { + if let Some(equality) = self.equality + && !res.satisfies(equality) { return None; } - } Some(res) } @@ -253,11 +250,10 @@ impl<'a> Iterator for RowIterator<'a> { fn next(&mut self) -> Option { while let Some((row, _)) = self.lower_cursor.next() { - if let Some((other_row, _)) = self.upper_cursor.peek_next() { - if other_row == row { + if let Some((other_row, _)) = self.upper_cursor.peek_next() + && other_row == row { return None; } - } match match_rows(&self.pattern, row) { MatchResult::Matches => return Some(row), @@ -316,11 +312,10 @@ impl Iterator for RowMatcher<'_> { let row = self.cursor.next()?; let mut subst = self.substitution.clone(); if subst.update(&self.atom.terms, row) { - if let Some(equality) = &self.atom.equality { - if !subst.satisfies(*equality) { + if let Some(equality) = &self.atom.equality + && !subst.satisfies(*equality) { continue; } - } return Some(subst); } @@ -437,11 +432,10 @@ impl Iterator for JoinIter<'_> { table, current, } => loop { - if let Some(current) = current { - if let Some(next) = current.next() { + if let Some(current) = current + && let Some(next) = current.next() { return Some(next); } - } let substitution = inner.next()?; *current = Some(join(substitution, atom.clone(), table)); @@ -508,11 +502,10 @@ pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { let mut cursor = table.lower_bound_mut(Bound::Included(&row)); - if let Some((other_row, _)) = cursor.peek_next() { - if other_row == &row { + if let Some((other_row, _)) = cursor.peek_next() + && other_row == &row { continue; } - } let fact = fact_from_row(&row, atom.predicate.clone()); diff --git a/nemo/src/execution/saturation/model.rs b/nemo/src/execution/saturation/model.rs index bf7e5f6df..3c9b6b846 100644 --- a/nemo/src/execution/saturation/model.rs +++ b/nemo/src/execution/saturation/model.rs @@ -154,8 +154,8 @@ impl SaturationRuleTranslation<'_> { for i in 0..terms.len() { let (left, right) = terms.split_at_mut(i + 1); - if let BodyTerm::Variable(v) = &mut left[i] { - if right.iter().any(|other| other == &BodyTerm::Variable(*v)) { + if let BodyTerm::Variable(v) = &mut left[i] + && right.iter().any(|other| other == &BodyTerm::Variable(*v)) { if equality.is_some() { return Err("only supports a single equality"); } @@ -165,7 +165,6 @@ impl SaturationRuleTranslation<'_> { equality = Some((orig, *v)); } - } } Ok(SaturationAtom { From 94969eaec380942cff334c2fb2436d113da68f17 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Tue, 23 Sep 2025 09:16:24 +0200 Subject: [PATCH 25/26] fix pathological performance --- nemo/src/execution/saturation/execution.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nemo/src/execution/saturation/execution.rs b/nemo/src/execution/saturation/execution.rs index fcab60415..8ce1a991e 100644 --- a/nemo/src/execution/saturation/execution.rs +++ b/nemo/src/execution/saturation/execution.rs @@ -467,7 +467,10 @@ pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { .start(); for (rule_index, rule) in rules.iter_mut().enumerate() { - let predicate = rule.body_atoms[0].predicate.clone(); + let predicate = rule + .input_predicates() + .min_by_key(|p| db.0.get(p).map(|t| t.len()).unwrap_or(0)) + .unwrap(); for (row, _) in db.0.get(&predicate).iter().flat_map(|table| table.iter()) { let fact = fact_from_row(row, predicate.clone()); From 8fa1e37e073c6a2ab64c5010727b015b55403ea7 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Tue, 23 Sep 2025 09:17:30 +0200 Subject: [PATCH 26/26] prepare for long-lived tables --- nemo/src/execution/saturation/execution.rs | 122 ++++++++++++++------- 1 file changed, 82 insertions(+), 40 deletions(-) diff --git a/nemo/src/execution/saturation/execution.rs b/nemo/src/execution/saturation/execution.rs index 8ce1a991e..818d1c961 100644 --- a/nemo/src/execution/saturation/execution.rs +++ b/nemo/src/execution/saturation/execution.rs @@ -2,8 +2,8 @@ use core::panic; use std::{ - collections::{btree_map, BTreeMap, HashMap}, - ops::{Bound, Index}, + collections::{BTreeMap, HashMap, btree_map}, + ops::{Bound, Deref, DerefMut, Index}, sync::Arc, }; @@ -64,9 +64,10 @@ impl SaturationSubstitution { }; if let Some(prev) = self.insert(*var, value.value()) - && prev != value.value() { - return false; - } + && prev != value.value() + { + return false; + } } true @@ -95,18 +96,20 @@ impl SaturationAtom { } BodyTerm::Variable(idx) => { if let Some(prev) = res.insert(*idx, *value) - && prev != *value { - return None; - } + && prev != *value + { + return None; + } } BodyTerm::Ignore => {} } } if let Some(equality) = self.equality - && !res.satisfies(equality) { - return None; - } + && !res.satisfies(equality) + { + return None; + } Some(res) } @@ -241,7 +244,7 @@ fn match_rows(pattern: &[RowElement], row: &[RowElement]) -> MatchResult { #[derive(Debug)] struct RowIterator<'a> { lower_cursor: btree_map::Cursor<'a, Row, Age>, - upper_cursor: btree_map::Cursor<'a, Row, Age>, + upper_cursor_next: Option<&'a Row>, pattern: Row, } @@ -250,18 +253,18 @@ impl<'a> Iterator for RowIterator<'a> { fn next(&mut self) -> Option { while let Some((row, _)) = self.lower_cursor.next() { - if let Some((other_row, _)) = self.upper_cursor.peek_next() - && other_row == row { - return None; - } + if let Some(other_row) = self.upper_cursor_next + && other_row == row + { + return None; + } match match_rows(&self.pattern, row) { MatchResult::Matches => return Some(row), MatchResult::InBounds => continue, MatchResult::OutOfBounds => { log::trace!("OutOfBounds {row:?}, {:?}", self.pattern); - log::trace!("upper cursor next {:?}", self.upper_cursor.peek_next()); - log::trace!("upper cursor prev {:?}", self.upper_cursor.peek_prev()); + log::trace!("upper cursor next {:?}", self.upper_cursor_next); unreachable!("this should have been caught early") } } @@ -290,9 +293,10 @@ impl GhostBound for Row { fn find_all_matches<'a>(pattern: Row, table: &'a BTreeMap) -> RowIterator<'a> { let lower_cursor = table.lower_bound(Bound::Included(&pattern)); let upper_cursor = table.upper_bound(Bound::Included(&pattern.invert_bound())); + let upper_cursor_next = upper_cursor.peek_next().map(|(r, _)| r); RowIterator { lower_cursor, - upper_cursor, + upper_cursor_next, pattern, } } @@ -313,9 +317,10 @@ impl Iterator for RowMatcher<'_> { let mut subst = self.substitution.clone(); if subst.update(&self.atom.terms, row) { if let Some(equality) = &self.atom.equality - && !subst.satisfies(*equality) { - continue; - } + && !subst.satisfies(*equality) + { + continue; + } return Some(subst); } @@ -362,8 +367,41 @@ enum Age { New, } +#[derive(Debug)] +enum Singular<'a, T> { + #[allow(unused)] + Ref(&'a mut T), + Owned(T), +} + +impl Default for Singular<'_, T> { + fn default() -> Self { + Self::Owned(T::default()) + } +} + +impl<'a, T> Deref for Singular<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + match self { + Singular::Ref(r) => &*r, + Singular::Owned(o) => o, + } + } +} + +impl<'a, T> DerefMut for Singular<'a, T> { + fn deref_mut(&mut self) -> &mut T { + match self { + Singular::Ref(r) => r, + Singular::Owned(o) => o, + } + } +} + #[derive(Debug, Default)] -pub(crate) struct DataBase(HashMap, BTreeMap>); +pub(crate) struct DataBase<'a>(HashMap, Singular<'a, BTreeMap>>); impl ExecutionTree { fn pop(&mut self) -> Option<&JoinOp> { @@ -433,9 +471,10 @@ impl Iterator for JoinIter<'_> { current, } => loop { if let Some(current) = current - && let Some(next) = current.next() { - return Some(next); - } + && let Some(next) = current.next() + { + return Some(next); + } let substitution = inner.next()?; *current = Some(join(substitution, atom.clone(), table)); @@ -506,9 +545,10 @@ pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { let mut cursor = table.lower_bound_mut(Bound::Included(&row)); if let Some((other_row, _)) = cursor.peek_next() - && other_row == &row { - continue; - } + && other_row == &row + { + continue; + } let fact = fact_from_row(&row, atom.predicate.clone()); @@ -534,15 +574,17 @@ pub(crate) fn saturate(db: &mut DataBase, rules: &mut [SaturationRule]) { .stop(); } -impl DataBase { +impl DataBase<'_> { pub fn add_table( &mut self, predicate: Arc, table: impl Iterator>, ) { - let table = table - .map(|row| (row.into_iter().map(RowElement::Value).collect(), Age::Old)) - .collect(); + let table = Singular::Owned( + table + .map(|row| (row.into_iter().map(RowElement::Value).collect(), Age::Old)) + .collect(), + ); self.0.insert(predicate, table); } @@ -576,8 +618,8 @@ mod test { use super::Age; use crate::execution::saturation::{ - execution::{find_all_matches, saturate, DataBase, Row, RowElement}, - model::{bench_rules, BodyTerm, Head, SaturationAtom, SaturationRule}, + execution::{DataBase, Row, RowElement, Singular, find_all_matches, saturate}, + model::{BodyTerm, Head, SaturationAtom, SaturationRule, bench_rules}, }; macro_rules! table { @@ -662,7 +704,7 @@ mod test { let mut db = DataBase(HashMap::from([( predicate.clone(), - BTreeMap::from([(row, Age::Old)]), + Singular::Owned(BTreeMap::from([(row, Age::Old)])), )])); saturate(&mut db, &mut rules); @@ -715,9 +757,9 @@ mod test { let p4_table: BTreeMap = table![[0, 0, 0], [0, 0, 1],]; let mut db = HashMap::new(); - db.insert(p2.clone(), p2_table); - db.insert(p3.clone(), p3_table); - db.insert(p4.clone(), p4_table.clone()); + db.insert(p2.clone(), Singular::Owned(p2_table)); + db.insert(p3.clone(), Singular::Owned(p3_table)); + db.insert(p4.clone(), Singular::Owned(p4_table.clone())); let rule = SaturationRule { body_atoms: Arc::new([p2_atom, p3_atom, p4_atom]), @@ -732,7 +774,7 @@ mod test { assert_eq!( db.0.get(&p1) - .unwrap_or(&BTreeMap::new()) + .unwrap_or(&Default::default()) .keys() .collect::>(), p4_table.keys().collect::>()