Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rs/tests/consensus/dual_workload_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,8 @@ async fn make_xnet_call(
fn main() -> Result<()> {
SystemTestGroup::new()
.with_setup(setup)
.without_assert_no_critical_errors()
.add_test(systest!(test))
.remove_metrics_to_check("critical_errors")
.execute_from_args()?;
Ok(())
}
2 changes: 1 addition & 1 deletion rs/tests/consensus/max_xnet_payload_size_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ async fn stable_grow(unican: &UniversalCanister<'_>, num_pages: u32) {
fn main() -> Result<()> {
SystemTestGroup::new()
.with_setup(setup)
.without_assert_no_critical_errors()
.add_test(systest!(test))
.remove_metrics_to_check("critical_errors")
.execute_from_args()?;
Ok(())
}
3 changes: 3 additions & 0 deletions rs/tests/consensus/replica_determinism_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ fn main() -> Result<()> {
.with_setup(setup)
.without_assert_no_replica_restarts()
.add_test(systest!(test))
// One of the nodes has a corrupted state and proposes a CUP share which will be invalidated
// by the peers (and vice versa), so it's expected that the metric is increased.
.remove_metrics_to_check("consensus_invalidated_artifacts")
.execute_from_args()?;
Ok(())
}
3 changes: 3 additions & 0 deletions rs/tests/consensus/request_auth_malicious_replica_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,9 @@ fn main() -> Result<()> {
SystemTestGroup::new()
.with_setup(setup)
.add_test(systest!(test))
// One of the nodes produces blocks with invalid ingress messages which should be invaldated
// by the peers, so it's expected that the metric is increased.
.remove_metrics_to_check("consensus_invalidated_artifacts")
.execute_from_args()?;

Ok(())
Expand Down
3 changes: 3 additions & 0 deletions rs/tests/consensus/safety_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ fn main() -> Result<()> {
SystemTestGroup::new()
.with_setup(setup)
.add_test(systest!(test))
// There is a malicious node which proposes invalid blocks, so it's expected that the metric
// is increased.
.remove_metrics_to_check("consensus_invalidated_artifacts")
.execute_from_args()?;
Ok(())
}
35 changes: 24 additions & 11 deletions rs/tests/driver/src/driver/group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ pub const MAX_RUNTIME_THREADS: usize = 16;
const REPORT_TASK_NAME: &str = "report";
const SETUP_TASK_NAME: &str = "setup";
const TEARDOWN_TASK_NAME: &str = "teardown";
const ASSERT_NO_CRITICAL_ERRORS_TASK_NAME: &str = "assert_no_critical_errors";
const ASSERT_NO_METRICS_ERRORS_TASK_NAME: &str = "assert_no_metrics_errors";
const ASSERT_NO_REPLICA_RESTARTS_TASK_NAME: &str = "assert_no_replica_restarts";
const LIFETIME_GUARD_TASK_PREFIX: &str = "lifetime_guard_";

Expand Down Expand Up @@ -424,12 +424,12 @@ impl SystemTestSubGroup {
pub struct SystemTestGroup {
setup: Option<Box<dyn PotSetupFn>>,
teardown: Option<Box<dyn PotSetupFn>>,
assert_no_critical_errors: bool,
assert_no_replica_restarts: bool,
tests: Vec<SystemTestSubGroup>,
timeout_per_test: Option<Duration>,
overall_timeout: Option<Duration>,
with_farm: bool,
metrics_to_check: Vec<String>,
}

impl Default for SystemTestGroup {
Expand Down Expand Up @@ -464,12 +464,15 @@ impl SystemTestGroup {
Self {
setup: Default::default(),
teardown: Default::default(),
assert_no_critical_errors: true,
assert_no_replica_restarts: true,
tests: Default::default(),
timeout_per_test: None,
overall_timeout: None,
with_farm: true,
metrics_to_check: vec![
String::from("critical_errors"),
String::from("consensus_invalidated_artifacts"),
],
}
}

Expand Down Expand Up @@ -497,8 +500,17 @@ impl SystemTestGroup {
self
}

pub fn without_assert_no_critical_errors(mut self) -> Self {
self.assert_no_critical_errors = false;
/// If the provided metric will have a non-zero value for any of the nodes, the test will
/// fail.
pub fn add_metrics_to_check(mut self, metric_name: impl ToString) -> Self {
self.metrics_to_check.push(metric_name.to_string());
self
}

pub fn remove_metrics_to_check(mut self, metric_name: impl ToString) -> Self {
let metric_name_to_remove = metric_name.to_string();
self.metrics_to_check
.retain(|metric_name| *metric_name != metric_name_to_remove);
self
}

Expand Down Expand Up @@ -699,10 +711,11 @@ impl SystemTestGroup {
false,
);

let assert_no_critical_errors_fn: Option<(String, Box<dyn PotSetupFn>)> = if self
.assert_no_critical_errors
let assert_no_metric_errors_fn: Option<(String, Box<dyn PotSetupFn>)> = if !self
.metrics_to_check
.is_empty()
{
let teardown_fn = |env: TestEnv| {
let teardown_fn = move |env: TestEnv| {
let topology = match env.safe_topology_snapshot() {
Ok(topology) => topology,
Err(e) => {
Expand All @@ -718,11 +731,11 @@ impl SystemTestGroup {
.flat_map(|subnet| subnet.nodes())
.collect();
for node in nodes {
node.assert_no_critical_errors();
node.assert_no_metrics_errors(self.metrics_to_check.clone());
}
};
Some((
ASSERT_NO_CRITICAL_ERRORS_TASK_NAME.to_string(),
ASSERT_NO_METRICS_ERRORS_TASK_NAME.to_string(),
Box::new(teardown_fn),
))
} else {
Expand Down Expand Up @@ -763,7 +776,7 @@ impl SystemTestGroup {
.teardown
.into_iter()
.map(|teardown| (TEARDOWN_TASK_NAME.to_string(), teardown))
.chain(assert_no_critical_errors_fn)
.chain(assert_no_metric_errors_fn)
.chain(assert_no_replica_restarts_fn)
.map(|(teardown_name, teardown_fn)| {
let logger = logger.clone();
Expand Down
17 changes: 8 additions & 9 deletions rs/tests/driver/src/driver/test_env_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1178,13 +1178,10 @@ impl IcNodeSnapshot {
))
}

pub fn assert_no_critical_errors(&self) {
pub fn assert_no_metrics_errors(&self, metrics_to_check: Vec<String>) {
block_on(async {
let replica_metric_name_prefix = "critical_errors";
let replica_metrics_fetcher = MetricsFetcher::new(
std::iter::once(self.clone()),
vec![replica_metric_name_prefix.to_string()],
);
let replica_metrics_fetcher =
MetricsFetcher::new(std::iter::once(self.clone()), metrics_to_check);
let replica_metrics_result = replica_metrics_fetcher.fetch::<u64>().await;
let replica_metrics = match replica_metrics_result {
Ok(replica_metrics) => replica_metrics,
Expand All @@ -1198,14 +1195,16 @@ impl IcNodeSnapshot {
};
assert!(
!replica_metrics.is_empty(),
"No critical error counters were found in replica metrics for node {}",
"No error counters were found in replica metrics for node {}",
self.node_id
);
for (name, value) in replica_metrics {
assert_eq!(
value[0], 0,
"Critical error {} raised by node {}. Create `SystemTestGroup` using `without_assert_no_critical_errors` if critical errors are expected in your test",
name, self.node_id
"The metric `{name}` on node {} has non-zero value. \
If the metric is allowed to be non-zero in the test, \
create `SystemTestGroup` with `remove_metrics_to_check(\"{name}\")",
self.node_id,
);
}
});
Expand Down
Loading