From 12d4234f7a587af42007a28aa0ce486fd2ebd5ee Mon Sep 17 00:00:00 2001 From: Kamil Popielarz Date: Thu, 12 Feb 2026 12:27:40 +0000 Subject: [PATCH 1/5] test --- .../consensus/adding_nodes_to_subnet_test.rs | 1 + .../catch_up_loop_prevention_test.rs | 1 + rs/tests/consensus/catch_up_possible_test.rs | 1 + rs/tests/consensus/consensus_performance.rs | 1 + rs/tests/consensus/dual_workload_test.rs | 1 + .../max_ingress_payload_size_test.rs | 1 + .../consensus/max_xnet_payload_size_test.rs | 1 + .../consensus/node_graceful_leaving_test.rs | 1 + .../upgrade/unassigned_node_upgrade_test.rs | 1 + .../upgrade/upgrade_app_subnet_test.rs | 1 + .../upgrade_downgrade_app_subnet_test.rs | 1 + .../upgrade_downgrade_nns_subnet_test.rs | 1 + .../upgrade/upgrade_with_alternative_urls.rs | 1 + rs/tests/driver/src/driver/group.rs | 28 ++++++++++++++----- rs/tests/driver/src/driver/test_env_api.rs | 23 ++++++++------- 15 files changed, 47 insertions(+), 17 deletions(-) diff --git a/rs/tests/consensus/adding_nodes_to_subnet_test.rs b/rs/tests/consensus/adding_nodes_to_subnet_test.rs index 8a948168a730..f11871acfadc 100644 --- a/rs/tests/consensus/adding_nodes_to_subnet_test.rs +++ b/rs/tests/consensus/adding_nodes_to_subnet_test.rs @@ -235,5 +235,6 @@ fn main() -> Result<()> { SystemTestGroup::new() .with_setup(setup) .add_test(systest!(adding_new_nodes_to_subnet_test)) + .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args() } diff --git a/rs/tests/consensus/catch_up_loop_prevention_test.rs b/rs/tests/consensus/catch_up_loop_prevention_test.rs index c933d12d06da..b40e487831d3 100644 --- a/rs/tests/consensus/catch_up_loop_prevention_test.rs +++ b/rs/tests/consensus/catch_up_loop_prevention_test.rs @@ -71,6 +71,7 @@ fn main() -> Result<()> { .add_test(systest!(test_catch_up_possible)) .with_timeout_per_test(TIMEOUT) .with_overall_timeout(TIMEOUT) + .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) diff --git a/rs/tests/consensus/catch_up_possible_test.rs b/rs/tests/consensus/catch_up_possible_test.rs index d7b040bcc4a5..d4d6d48b5f14 100644 --- a/rs/tests/consensus/catch_up_possible_test.rs +++ b/rs/tests/consensus/catch_up_possible_test.rs @@ -71,6 +71,7 @@ fn main() -> Result<()> { .add_test(systest!(test_catch_up_possible)) .with_timeout_per_test(TIMEOUT) .with_overall_timeout(TIMEOUT) + .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) diff --git a/rs/tests/consensus/consensus_performance.rs b/rs/tests/consensus/consensus_performance.rs index f158b32fa796..9634d087d574 100644 --- a/rs/tests/consensus/consensus_performance.rs +++ b/rs/tests/consensus/consensus_performance.rs @@ -228,6 +228,7 @@ fn main() -> Result<()> { .add_test(systest!(test_small_messages)) .add_test(systest!(test_few_large_messages)) .add_test(systest!(test_large_messages)) + .add_metrics_to_check("consensus_invalidated_artifacts") .with_teardown(teardown) .execute_from_args()?; Ok(()) diff --git a/rs/tests/consensus/dual_workload_test.rs b/rs/tests/consensus/dual_workload_test.rs index 89466eda2b76..f457fe9eabd1 100644 --- a/rs/tests/consensus/dual_workload_test.rs +++ b/rs/tests/consensus/dual_workload_test.rs @@ -224,6 +224,7 @@ fn main() -> Result<()> { .with_setup(setup) .without_assert_no_critical_errors() .add_test(systest!(test)) + .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/consensus/max_ingress_payload_size_test.rs b/rs/tests/consensus/max_ingress_payload_size_test.rs index c6af4995b90b..e6e7d5c5b7c4 100644 --- a/rs/tests/consensus/max_ingress_payload_size_test.rs +++ b/rs/tests/consensus/max_ingress_payload_size_test.rs @@ -255,6 +255,7 @@ fn main() -> Result<()> { .add_test(systest!(test_nns_subnet_query_exceeds_limits)) .add_test(systest!(test_nns_subnet_update_within_limits)) .add_test(systest!(test_nns_subnet_update_exceeds_limits)) + .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/consensus/max_xnet_payload_size_test.rs b/rs/tests/consensus/max_xnet_payload_size_test.rs index eb696e52e9ff..624696af3b74 100644 --- a/rs/tests/consensus/max_xnet_payload_size_test.rs +++ b/rs/tests/consensus/max_xnet_payload_size_test.rs @@ -60,6 +60,7 @@ fn setup(env: TestEnv) { .with_max_ingress_message_size(INGRESS_MAX_SIZE as u64) .with_max_block_payload_size(XNET_MAX_SIZE as u64), ) + .with_metrics_to_check("consensus_invalidated_artifacts") .setup_and_start(&env) .expect("failed to setup IC under test"); } diff --git a/rs/tests/consensus/node_graceful_leaving_test.rs b/rs/tests/consensus/node_graceful_leaving_test.rs index a09e00b2ac43..049f38de7608 100644 --- a/rs/tests/consensus/node_graceful_leaving_test.rs +++ b/rs/tests/consensus/node_graceful_leaving_test.rs @@ -122,6 +122,7 @@ fn main() -> Result<()> { SystemTestGroup::new() .with_setup(setup) .add_test(systest!(test)) + .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/consensus/upgrade/unassigned_node_upgrade_test.rs b/rs/tests/consensus/upgrade/unassigned_node_upgrade_test.rs index 8b11fb78bae9..c63421b80822 100644 --- a/rs/tests/consensus/upgrade/unassigned_node_upgrade_test.rs +++ b/rs/tests/consensus/upgrade/unassigned_node_upgrade_test.rs @@ -176,6 +176,7 @@ fn main() -> Result<()> { SystemTestGroup::new() .with_setup(setup) .add_test(systest!(test)) + .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/consensus/upgrade/upgrade_app_subnet_test.rs b/rs/tests/consensus/upgrade/upgrade_app_subnet_test.rs index 831d9b4ce1ce..179e719dfaf9 100644 --- a/rs/tests/consensus/upgrade/upgrade_app_subnet_test.rs +++ b/rs/tests/consensus/upgrade/upgrade_app_subnet_test.rs @@ -81,6 +81,7 @@ fn main() -> Result<()> { .with_timeout_per_test(UP_DOWNGRADE_PER_TEST_TIMEOUT) .with_setup(setup) .add_test(systest!(upgrade_app_subnet)) + .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/consensus/upgrade/upgrade_downgrade_app_subnet_test.rs b/rs/tests/consensus/upgrade/upgrade_downgrade_app_subnet_test.rs index 91dec37eb862..b122c6bae44a 100644 --- a/rs/tests/consensus/upgrade/upgrade_downgrade_app_subnet_test.rs +++ b/rs/tests/consensus/upgrade/upgrade_downgrade_app_subnet_test.rs @@ -173,6 +173,7 @@ fn main() -> Result<()> { .with_timeout_per_test(UP_DOWNGRADE_PER_TEST_TIMEOUT) .with_setup(setup) .add_test(systest!(upgrade_downgrade_app_subnet)) + .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/consensus/upgrade/upgrade_downgrade_nns_subnet_test.rs b/rs/tests/consensus/upgrade/upgrade_downgrade_nns_subnet_test.rs index 0b01d5fae099..a91350518d6a 100644 --- a/rs/tests/consensus/upgrade/upgrade_downgrade_nns_subnet_test.rs +++ b/rs/tests/consensus/upgrade/upgrade_downgrade_nns_subnet_test.rs @@ -69,6 +69,7 @@ fn main() -> Result<()> { .with_timeout_per_test(UP_DOWNGRADE_PER_TEST_TIMEOUT) .with_setup(setup) .add_test(systest!(upgrade_downgrade_nns_subnet)) + .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) diff --git a/rs/tests/consensus/upgrade/upgrade_with_alternative_urls.rs b/rs/tests/consensus/upgrade/upgrade_with_alternative_urls.rs index 557c9f080140..3420c837721c 100644 --- a/rs/tests/consensus/upgrade/upgrade_with_alternative_urls.rs +++ b/rs/tests/consensus/upgrade/upgrade_with_alternative_urls.rs @@ -109,6 +109,7 @@ fn main() -> Result<()> { SystemTestGroup::new() .with_setup(setup) .add_test(systest!(test)) + .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/driver/src/driver/group.rs b/rs/tests/driver/src/driver/group.rs index a922c1555598..8ce4e9970e4d 100644 --- a/rs/tests/driver/src/driver/group.rs +++ b/rs/tests/driver/src/driver/group.rs @@ -52,7 +52,7 @@ pub const MAX_RUNTIME_THREADS: usize = 16; const REPORT_TASK_NAME: &str = "report"; const SETUP_TASK_NAME: &str = "setup"; const TEARDOWN_TASK_NAME: &str = "teardown"; -const ASSERT_NO_CRITICAL_ERRORS_TASK_NAME: &str = "assert_no_critical_errors"; +const ASSERT_NO_METRICS_ERRORS_TASK_NAME: &str = "assert_no_metrics_errors"; const ASSERT_NO_REPLICA_RESTARTS_TASK_NAME: &str = "assert_no_replica_restarts"; const LIFETIME_GUARD_TASK_PREFIX: &str = "lifetime_guard_"; @@ -430,6 +430,7 @@ pub struct SystemTestGroup { timeout_per_test: Option, overall_timeout: Option, with_farm: bool, + metrics_to_check: Vec, } impl Default for SystemTestGroup { @@ -470,6 +471,7 @@ impl SystemTestGroup { timeout_per_test: None, overall_timeout: None, with_farm: true, + metrics_to_check: Vec::new(), } } @@ -502,6 +504,13 @@ impl SystemTestGroup { self } + /// If the provided metric will have a non-zero value for any of the nodes, the test will + /// fail. + pub fn add_metrics_to_check(mut self, metric_name: impl ToString) -> Self { + self.metrics_to_check.push(metric_name.to_string()); + self + } + pub fn without_assert_no_replica_restarts(mut self) -> Self { self.assert_no_replica_restarts = false; self @@ -594,7 +603,7 @@ impl SystemTestGroup { self } - fn make_plan(self, rh: &Handle, group_ctx: GroupContext) -> Result>> { + fn make_plan(mut self, rh: &Handle, group_ctx: GroupContext) -> Result>> { let logger = group_ctx.logger(); debug!(logger, "SystemTestGroup.make_plan"); let start_time = Utc::now(); @@ -699,10 +708,15 @@ impl SystemTestGroup { false, ); - let assert_no_critical_errors_fn: Option<(String, Box)> = if self - .assert_no_critical_errors + if self.assert_no_critical_errors { + self.metrics_to_check.push(String::from("critical_errors")); + } + + let assert_no_critical_errors_fn: Option<(String, Box)> = if !self + .metrics_to_check + .is_empty() { - let teardown_fn = |env: TestEnv| { + let teardown_fn = move |env: TestEnv| { let topology = match env.safe_topology_snapshot() { Ok(topology) => topology, Err(e) => { @@ -718,11 +732,11 @@ impl SystemTestGroup { .flat_map(|subnet| subnet.nodes()) .collect(); for node in nodes { - node.assert_no_critical_errors(); + node.assert_no_metrics_errors(self.metrics_to_check.clone()); } }; Some(( - ASSERT_NO_CRITICAL_ERRORS_TASK_NAME.to_string(), + ASSERT_NO_METRICS_ERRORS_TASK_NAME.to_string(), Box::new(teardown_fn), )) } else { diff --git a/rs/tests/driver/src/driver/test_env_api.rs b/rs/tests/driver/src/driver/test_env_api.rs index 3cd643b07e2c..a4d1eccc3019 100644 --- a/rs/tests/driver/src/driver/test_env_api.rs +++ b/rs/tests/driver/src/driver/test_env_api.rs @@ -1178,13 +1178,10 @@ impl IcNodeSnapshot { )) } - pub fn assert_no_critical_errors(&self) { + pub fn assert_no_metrics_errors(&self, metrics_to_check: Vec) { block_on(async { - let replica_metric_name_prefix = "critical_errors"; - let replica_metrics_fetcher = MetricsFetcher::new( - std::iter::once(self.clone()), - vec![replica_metric_name_prefix.to_string()], - ); + let replica_metrics_fetcher = + MetricsFetcher::new(std::iter::once(self.clone()), dbg!(metrics_to_check)); let replica_metrics_result = replica_metrics_fetcher.fetch::().await; let replica_metrics = match replica_metrics_result { Ok(replica_metrics) => replica_metrics, @@ -1197,15 +1194,21 @@ impl IcNodeSnapshot { } }; assert!( - !replica_metrics.is_empty(), - "No critical error counters were found in replica metrics for node {}", + replica_metrics.is_empty(), + "No error counters were found in replica metrics for node {}", self.node_id ); for (name, value) in replica_metrics { + let maybe_extra_msg = if name == "critical_errors" { + "Create `SystemTestGroup` using `without_assert_no_critical_errors` \ + if critical errors are expected in your test" + } else { + "" + }; assert_eq!( value[0], 0, - "Critical error {} raised by node {}. Create `SystemTestGroup` using `without_assert_no_critical_errors` if critical errors are expected in your test", - name, self.node_id + "The metrics {} on node {} has non-zero value. {}", + name, self.node_id, maybe_extra_msg, ); } }); From c1db73aaf96a641c0a0de3cc6939df3375fe4042 Mon Sep 17 00:00:00 2001 From: Kamil Popielarz Date: Thu, 12 Feb 2026 13:56:17 +0000 Subject: [PATCH 2/5] make it default --- .../consensus/adding_nodes_to_subnet_test.rs | 1 - .../catch_up_loop_prevention_test.rs | 1 - rs/tests/consensus/catch_up_possible_test.rs | 1 - rs/tests/consensus/consensus_performance.rs | 1 - rs/tests/consensus/dual_workload_test.rs | 3 +- .../max_ingress_payload_size_test.rs | 1 - .../consensus/max_xnet_payload_size_test.rs | 3 +- .../consensus/node_graceful_leaving_test.rs | 1 - .../upgrade/unassigned_node_upgrade_test.rs | 1 - .../upgrade/upgrade_app_subnet_test.rs | 1 - .../upgrade_downgrade_app_subnet_test.rs | 1 - .../upgrade_downgrade_nns_subnet_test.rs | 1 - .../upgrade/upgrade_with_alternative_urls.rs | 1 - rs/tests/driver/src/driver/group.rs | 29 +++++++++---------- rs/tests/driver/src/driver/test_env_api.rs | 14 ++++----- 15 files changed, 21 insertions(+), 39 deletions(-) diff --git a/rs/tests/consensus/adding_nodes_to_subnet_test.rs b/rs/tests/consensus/adding_nodes_to_subnet_test.rs index f11871acfadc..8a948168a730 100644 --- a/rs/tests/consensus/adding_nodes_to_subnet_test.rs +++ b/rs/tests/consensus/adding_nodes_to_subnet_test.rs @@ -235,6 +235,5 @@ fn main() -> Result<()> { SystemTestGroup::new() .with_setup(setup) .add_test(systest!(adding_new_nodes_to_subnet_test)) - .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args() } diff --git a/rs/tests/consensus/catch_up_loop_prevention_test.rs b/rs/tests/consensus/catch_up_loop_prevention_test.rs index b40e487831d3..c933d12d06da 100644 --- a/rs/tests/consensus/catch_up_loop_prevention_test.rs +++ b/rs/tests/consensus/catch_up_loop_prevention_test.rs @@ -71,7 +71,6 @@ fn main() -> Result<()> { .add_test(systest!(test_catch_up_possible)) .with_timeout_per_test(TIMEOUT) .with_overall_timeout(TIMEOUT) - .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) diff --git a/rs/tests/consensus/catch_up_possible_test.rs b/rs/tests/consensus/catch_up_possible_test.rs index d4d6d48b5f14..d7b040bcc4a5 100644 --- a/rs/tests/consensus/catch_up_possible_test.rs +++ b/rs/tests/consensus/catch_up_possible_test.rs @@ -71,7 +71,6 @@ fn main() -> Result<()> { .add_test(systest!(test_catch_up_possible)) .with_timeout_per_test(TIMEOUT) .with_overall_timeout(TIMEOUT) - .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) diff --git a/rs/tests/consensus/consensus_performance.rs b/rs/tests/consensus/consensus_performance.rs index 9634d087d574..f158b32fa796 100644 --- a/rs/tests/consensus/consensus_performance.rs +++ b/rs/tests/consensus/consensus_performance.rs @@ -228,7 +228,6 @@ fn main() -> Result<()> { .add_test(systest!(test_small_messages)) .add_test(systest!(test_few_large_messages)) .add_test(systest!(test_large_messages)) - .add_metrics_to_check("consensus_invalidated_artifacts") .with_teardown(teardown) .execute_from_args()?; Ok(()) diff --git a/rs/tests/consensus/dual_workload_test.rs b/rs/tests/consensus/dual_workload_test.rs index f457fe9eabd1..a39497633687 100644 --- a/rs/tests/consensus/dual_workload_test.rs +++ b/rs/tests/consensus/dual_workload_test.rs @@ -222,9 +222,8 @@ async fn make_xnet_call( fn main() -> Result<()> { SystemTestGroup::new() .with_setup(setup) - .without_assert_no_critical_errors() .add_test(systest!(test)) - .add_metrics_to_check("consensus_invalidated_artifacts") + .remove_metrics_to_check("critical_errors") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/consensus/max_ingress_payload_size_test.rs b/rs/tests/consensus/max_ingress_payload_size_test.rs index e6e7d5c5b7c4..c6af4995b90b 100644 --- a/rs/tests/consensus/max_ingress_payload_size_test.rs +++ b/rs/tests/consensus/max_ingress_payload_size_test.rs @@ -255,7 +255,6 @@ fn main() -> Result<()> { .add_test(systest!(test_nns_subnet_query_exceeds_limits)) .add_test(systest!(test_nns_subnet_update_within_limits)) .add_test(systest!(test_nns_subnet_update_exceeds_limits)) - .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/consensus/max_xnet_payload_size_test.rs b/rs/tests/consensus/max_xnet_payload_size_test.rs index 624696af3b74..1e87d95196d9 100644 --- a/rs/tests/consensus/max_xnet_payload_size_test.rs +++ b/rs/tests/consensus/max_xnet_payload_size_test.rs @@ -60,7 +60,6 @@ fn setup(env: TestEnv) { .with_max_ingress_message_size(INGRESS_MAX_SIZE as u64) .with_max_block_payload_size(XNET_MAX_SIZE as u64), ) - .with_metrics_to_check("consensus_invalidated_artifacts") .setup_and_start(&env) .expect("failed to setup IC under test"); } @@ -176,8 +175,8 @@ async fn stable_grow(unican: &UniversalCanister<'_>, num_pages: u32) { fn main() -> Result<()> { SystemTestGroup::new() .with_setup(setup) - .without_assert_no_critical_errors() .add_test(systest!(test)) + .remove_metrics_to_check("critical_errors") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/consensus/node_graceful_leaving_test.rs b/rs/tests/consensus/node_graceful_leaving_test.rs index 049f38de7608..a09e00b2ac43 100644 --- a/rs/tests/consensus/node_graceful_leaving_test.rs +++ b/rs/tests/consensus/node_graceful_leaving_test.rs @@ -122,7 +122,6 @@ fn main() -> Result<()> { SystemTestGroup::new() .with_setup(setup) .add_test(systest!(test)) - .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/consensus/upgrade/unassigned_node_upgrade_test.rs b/rs/tests/consensus/upgrade/unassigned_node_upgrade_test.rs index c63421b80822..8b11fb78bae9 100644 --- a/rs/tests/consensus/upgrade/unassigned_node_upgrade_test.rs +++ b/rs/tests/consensus/upgrade/unassigned_node_upgrade_test.rs @@ -176,7 +176,6 @@ fn main() -> Result<()> { SystemTestGroup::new() .with_setup(setup) .add_test(systest!(test)) - .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/consensus/upgrade/upgrade_app_subnet_test.rs b/rs/tests/consensus/upgrade/upgrade_app_subnet_test.rs index 179e719dfaf9..831d9b4ce1ce 100644 --- a/rs/tests/consensus/upgrade/upgrade_app_subnet_test.rs +++ b/rs/tests/consensus/upgrade/upgrade_app_subnet_test.rs @@ -81,7 +81,6 @@ fn main() -> Result<()> { .with_timeout_per_test(UP_DOWNGRADE_PER_TEST_TIMEOUT) .with_setup(setup) .add_test(systest!(upgrade_app_subnet)) - .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/consensus/upgrade/upgrade_downgrade_app_subnet_test.rs b/rs/tests/consensus/upgrade/upgrade_downgrade_app_subnet_test.rs index b122c6bae44a..91dec37eb862 100644 --- a/rs/tests/consensus/upgrade/upgrade_downgrade_app_subnet_test.rs +++ b/rs/tests/consensus/upgrade/upgrade_downgrade_app_subnet_test.rs @@ -173,7 +173,6 @@ fn main() -> Result<()> { .with_timeout_per_test(UP_DOWNGRADE_PER_TEST_TIMEOUT) .with_setup(setup) .add_test(systest!(upgrade_downgrade_app_subnet)) - .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/consensus/upgrade/upgrade_downgrade_nns_subnet_test.rs b/rs/tests/consensus/upgrade/upgrade_downgrade_nns_subnet_test.rs index a91350518d6a..0b01d5fae099 100644 --- a/rs/tests/consensus/upgrade/upgrade_downgrade_nns_subnet_test.rs +++ b/rs/tests/consensus/upgrade/upgrade_downgrade_nns_subnet_test.rs @@ -69,7 +69,6 @@ fn main() -> Result<()> { .with_timeout_per_test(UP_DOWNGRADE_PER_TEST_TIMEOUT) .with_setup(setup) .add_test(systest!(upgrade_downgrade_nns_subnet)) - .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) diff --git a/rs/tests/consensus/upgrade/upgrade_with_alternative_urls.rs b/rs/tests/consensus/upgrade/upgrade_with_alternative_urls.rs index 3420c837721c..557c9f080140 100644 --- a/rs/tests/consensus/upgrade/upgrade_with_alternative_urls.rs +++ b/rs/tests/consensus/upgrade/upgrade_with_alternative_urls.rs @@ -109,7 +109,6 @@ fn main() -> Result<()> { SystemTestGroup::new() .with_setup(setup) .add_test(systest!(test)) - .add_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/driver/src/driver/group.rs b/rs/tests/driver/src/driver/group.rs index 8ce4e9970e4d..82fd193e212e 100644 --- a/rs/tests/driver/src/driver/group.rs +++ b/rs/tests/driver/src/driver/group.rs @@ -424,7 +424,6 @@ impl SystemTestSubGroup { pub struct SystemTestGroup { setup: Option>, teardown: Option>, - assert_no_critical_errors: bool, assert_no_replica_restarts: bool, tests: Vec, timeout_per_test: Option, @@ -465,13 +464,15 @@ impl SystemTestGroup { Self { setup: Default::default(), teardown: Default::default(), - assert_no_critical_errors: true, assert_no_replica_restarts: true, tests: Default::default(), timeout_per_test: None, overall_timeout: None, with_farm: true, - metrics_to_check: Vec::new(), + metrics_to_check: vec![ + String::from("critical_errors"), + String::from("consensus_invalidated_artifacts"), + ], } } @@ -499,11 +500,6 @@ impl SystemTestGroup { self } - pub fn without_assert_no_critical_errors(mut self) -> Self { - self.assert_no_critical_errors = false; - self - } - /// If the provided metric will have a non-zero value for any of the nodes, the test will /// fail. pub fn add_metrics_to_check(mut self, metric_name: impl ToString) -> Self { @@ -511,6 +507,13 @@ impl SystemTestGroup { self } + pub fn remove_metrics_to_check(mut self, metric_name: impl ToString) -> Self { + let metric_name_to_remove = metric_name.to_string(); + self.metrics_to_check + .retain(|metric_name| *metric_name != metric_name_to_remove); + self + } + pub fn without_assert_no_replica_restarts(mut self) -> Self { self.assert_no_replica_restarts = false; self @@ -603,7 +606,7 @@ impl SystemTestGroup { self } - fn make_plan(mut self, rh: &Handle, group_ctx: GroupContext) -> Result>> { + fn make_plan(self, rh: &Handle, group_ctx: GroupContext) -> Result>> { let logger = group_ctx.logger(); debug!(logger, "SystemTestGroup.make_plan"); let start_time = Utc::now(); @@ -708,11 +711,7 @@ impl SystemTestGroup { false, ); - if self.assert_no_critical_errors { - self.metrics_to_check.push(String::from("critical_errors")); - } - - let assert_no_critical_errors_fn: Option<(String, Box)> = if !self + let assert_no_metric_errors_fn: Option<(String, Box)> = if !self .metrics_to_check .is_empty() { @@ -777,7 +776,7 @@ impl SystemTestGroup { .teardown .into_iter() .map(|teardown| (TEARDOWN_TASK_NAME.to_string(), teardown)) - .chain(assert_no_critical_errors_fn) + .chain(assert_no_metric_errors_fn) .chain(assert_no_replica_restarts_fn) .map(|(teardown_name, teardown_fn)| { let logger = logger.clone(); diff --git a/rs/tests/driver/src/driver/test_env_api.rs b/rs/tests/driver/src/driver/test_env_api.rs index a4d1eccc3019..fc1f815b4c24 100644 --- a/rs/tests/driver/src/driver/test_env_api.rs +++ b/rs/tests/driver/src/driver/test_env_api.rs @@ -1194,21 +1194,17 @@ impl IcNodeSnapshot { } }; assert!( - replica_metrics.is_empty(), + !replica_metrics.is_empty(), "No error counters were found in replica metrics for node {}", self.node_id ); for (name, value) in replica_metrics { - let maybe_extra_msg = if name == "critical_errors" { - "Create `SystemTestGroup` using `without_assert_no_critical_errors` \ - if critical errors are expected in your test" - } else { - "" - }; assert_eq!( value[0], 0, - "The metrics {} on node {} has non-zero value. {}", - name, self.node_id, maybe_extra_msg, + "The metric `{name}` on node {} has non-zero value. \ + If the metric is allowed to be non-zero in the test, \ + create `SystemTestGroup` with `remove_metrics_to_check(\"{name}\")", + self.node_id, ); } }); From fa48b5c942bee38e460d8d94440f4b5f34fa1b59 Mon Sep 17 00:00:00 2001 From: Kamil Popielarz Date: Thu, 12 Feb 2026 14:18:12 +0000 Subject: [PATCH 3/5] remove dbg! --- rs/tests/driver/src/driver/test_env_api.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/tests/driver/src/driver/test_env_api.rs b/rs/tests/driver/src/driver/test_env_api.rs index fc1f815b4c24..f059101a1053 100644 --- a/rs/tests/driver/src/driver/test_env_api.rs +++ b/rs/tests/driver/src/driver/test_env_api.rs @@ -1181,7 +1181,7 @@ impl IcNodeSnapshot { pub fn assert_no_metrics_errors(&self, metrics_to_check: Vec) { block_on(async { let replica_metrics_fetcher = - MetricsFetcher::new(std::iter::once(self.clone()), dbg!(metrics_to_check)); + MetricsFetcher::new(std::iter::once(self.clone()), metrics_to_check); let replica_metrics_result = replica_metrics_fetcher.fetch::().await; let replica_metrics = match replica_metrics_result { Ok(replica_metrics) => replica_metrics, From 2d0918766d5d630672d0c6d249227323eb7e7bdd Mon Sep 17 00:00:00 2001 From: Kamil Popielarz Date: Thu, 12 Feb 2026 15:58:03 +0000 Subject: [PATCH 4/5] disable checking the metric for some tests with malicious nodes --- rs/tests/consensus/replica_determinism_test.rs | 3 +++ rs/tests/consensus/request_auth_malicious_replica_test.rs | 3 +++ rs/tests/consensus/safety_test.rs | 3 +++ 3 files changed, 9 insertions(+) diff --git a/rs/tests/consensus/replica_determinism_test.rs b/rs/tests/consensus/replica_determinism_test.rs index 283652c6b877..e2959a01e6c8 100644 --- a/rs/tests/consensus/replica_determinism_test.rs +++ b/rs/tests/consensus/replica_determinism_test.rs @@ -158,6 +158,9 @@ fn main() -> Result<()> { .with_setup(setup) .without_assert_no_replica_restarts() .add_test(systest!(test)) + // One of the nodes has corrupted state and proposes a CUP share which will be invalidated + // by the peers (in vice versa), so it's expected that the metric is increased. + .remove_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } diff --git a/rs/tests/consensus/request_auth_malicious_replica_test.rs b/rs/tests/consensus/request_auth_malicious_replica_test.rs index f3e94f25fe56..6a8e6aab81f7 100644 --- a/rs/tests/consensus/request_auth_malicious_replica_test.rs +++ b/rs/tests/consensus/request_auth_malicious_replica_test.rs @@ -554,6 +554,9 @@ fn main() -> Result<()> { SystemTestGroup::new() .with_setup(setup) .add_test(systest!(test)) + // One of the nodes produces blocks with invalid ingress messages which should be invaldated + // by the peers, so it's expected that the metric is increased. + .remove_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) diff --git a/rs/tests/consensus/safety_test.rs b/rs/tests/consensus/safety_test.rs index f35f6b410cb7..905fb26da1a3 100644 --- a/rs/tests/consensus/safety_test.rs +++ b/rs/tests/consensus/safety_test.rs @@ -58,6 +58,9 @@ fn main() -> Result<()> { SystemTestGroup::new() .with_setup(setup) .add_test(systest!(test)) + // There is a malicious node which proposes invalid blocks, so it's expected that the metric + // is increased. + .remove_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(()) } From d903568996b606296ae4fedb0c48a5248ab21d2c Mon Sep 17 00:00:00 2001 From: Kamil Popielarz Date: Thu, 12 Feb 2026 15:59:08 +0000 Subject: [PATCH 5/5] typo --- rs/tests/consensus/replica_determinism_test.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rs/tests/consensus/replica_determinism_test.rs b/rs/tests/consensus/replica_determinism_test.rs index e2959a01e6c8..05f23cb15f94 100644 --- a/rs/tests/consensus/replica_determinism_test.rs +++ b/rs/tests/consensus/replica_determinism_test.rs @@ -158,8 +158,8 @@ fn main() -> Result<()> { .with_setup(setup) .without_assert_no_replica_restarts() .add_test(systest!(test)) - // One of the nodes has corrupted state and proposes a CUP share which will be invalidated - // by the peers (in vice versa), so it's expected that the metric is increased. + // One of the nodes has a corrupted state and proposes a CUP share which will be invalidated + // by the peers (and vice versa), so it's expected that the metric is increased. .remove_metrics_to_check("consensus_invalidated_artifacts") .execute_from_args()?; Ok(())