Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 137 additions & 0 deletions tokio/src/runtime/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ pub struct Builder {
/// Configures the task poll count histogram
pub(super) metrics_poll_count_histogram: HistogramBuilder,

pub(super) metrics_schedule_latency_histogram_enable: bool,

pub(super) metrics_schedule_latency_histogram: HistogramBuilder,

#[cfg(tokio_unstable)]
pub(super) unhandled_panic: UnhandledPanic,

Expand Down Expand Up @@ -323,6 +327,10 @@ impl Builder {

metrics_poll_count_histogram: HistogramBuilder::default(),

metrics_schedule_latency_histogram_enable: false,

metrics_schedule_latency_histogram: HistogramBuilder::default(),

disable_lifo_slot: false,

timer_flavor: TimerFlavor::Traditional,
Expand Down Expand Up @@ -1556,6 +1564,124 @@ impl Builder {
self.metrics_poll_count_histogram.legacy_mut(|b|b.num_buckets = buckets);
self
}

/// Enables tracking the distribution of task schedule latencies. Task
/// schedule latency is the time between when a task is scheduled for
/// execution and when it is polled.
///
/// Task schedule latencies are not instrumented by default as doing
/// so requires calling [`Instant::now()`] twice per task poll, which
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The phrasing 'twice per task poll' is a bit misleading. This feature calls Instant::now() once when a task is scheduled, and once when it begins to be polled. For a task that is polled multiple times after being scheduled once, this is not 'twice per poll'. A more accurate description would be clearer for users trying to understand the performance overhead.

Suggested change
/// so requires calling [`Instant::now()`] twice per task poll, which
/// so requires calling [`Instant::now()`] when a task is scheduled and when it is polled, which

Copy link
Copy Markdown
Owner Author

@martin-augment martin-augment Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:good-to-have; category:documentation; feedback: The Gemini AI reviewer is correct! The proposed wording of the docstring suggests that Instant::now() is called twice on each poll. This is not correct and it should be fixed to explain that the first call to Instant::now() is when the task is scheduled.

/// could add measurable overhead. Use the [`Handle::metrics()`] to
/// access the metrics data.
///
/// By default, a linear histogram with 10 buckets each 100 microseconds wide will be used.
/// This has an extremely low memory footprint, but may not provide enough granularity. For
/// better granularity with low memory usage, use [`metrics_schedule_latency_histogram_configuration()`]
/// to select [`LogHistogram`] instead.
///
/// # Examples
///
/// ```
/// # #[cfg(not(target_family = "wasm"))]
/// # {
/// use tokio::runtime;
///
/// let rt = runtime::Builder::new_multi_thread()
/// .enable_metrics_schedule_latency_histogram()
/// .build()
/// .unwrap();
/// # // Test default values here
/// # fn us(n: u64) -> std::time::Duration { std::time::Duration::from_micros(n) }
/// # let m = rt.handle().metrics();
/// # assert_eq!(m.schedule_latency_histogram_num_buckets(), 10);
/// # assert_eq!(m.schedule_latency_histogram_bucket_range(0), us(0)..us(100));
/// # assert_eq!(m.schedule_latency_histogram_bucket_range(1), us(100)..us(200));
/// # }
/// ```
///
/// [`Handle::metrics()`]: crate::runtime::Handle::metrics
/// [`Instant::now()`]: std::time::Instant::now
/// [`LogHistogram`]: crate::runtime::LogHistogram
/// [`metrics_schedule_latency_histogram_configuration()`]: Builder::metrics_schedule_latency_histogram_configuration
pub fn enable_metrics_schedule_latency_histogram(&mut self) -> &mut Self {
self.metrics_schedule_latency_histogram_enable = true;
self
}

/// Configure the histogram for tracking task schedule latencies.
///
/// By default, a linear histogram with 10 buckets each 100 microseconds wide will be used.
/// This has an extremely low memory footprint, but may not provide enough granularity. For
/// better granularity with low memory usage, use [`LogHistogram`] instead.
///
/// # Examples
/// Configure a [`LogHistogram`] with [default configuration]:
/// ```
/// # #[cfg(not(target_family = "wasm"))]
/// # {
/// use tokio::runtime;
/// use tokio::runtime::{HistogramConfiguration, LogHistogram};
///
/// let rt = runtime::Builder::new_multi_thread()
/// .enable_metrics_schedule_latency_histogram()
/// .metrics_schedule_latency_histogram_configuration(
/// HistogramConfiguration::log(LogHistogram::default())
/// )
/// .build()
/// .unwrap();
/// # }
/// ```
///
/// Configure a linear histogram with 100 buckets, each 10μs wide
/// ```
/// # #[cfg(not(target_family = "wasm"))]
/// # {
/// use tokio::runtime;
/// use std::time::Duration;
/// use tokio::runtime::HistogramConfiguration;
///
/// let rt = runtime::Builder::new_multi_thread()
/// .enable_metrics_schedule_latency_histogram()
/// .metrics_schedule_latency_histogram_configuration(
/// HistogramConfiguration::linear(Duration::from_micros(10), 100)
/// )
/// .build()
/// .unwrap();
/// # }
/// ```
///
/// Configure a [`LogHistogram`] with the following settings:
/// - Measure times from 100ns to 120s
/// - Max error of 0.1
/// - No more than 1024 buckets
/// ```
/// # #[cfg(not(target_family = "wasm"))]
/// # {
/// use std::time::Duration;
/// use tokio::runtime;
/// use tokio::runtime::{HistogramConfiguration, LogHistogram};
///
/// let rt = runtime::Builder::new_multi_thread()
/// .enable_metrics_schedule_latency_histogram()
/// .metrics_schedule_latency_histogram_configuration(
/// HistogramConfiguration::log(LogHistogram::builder()
/// .max_value(Duration::from_secs(120))
/// .min_value(Duration::from_nanos(100))
/// .max_error(0.1)
/// .max_buckets(1024)
/// .expect("configuration uses 488 buckets")
/// )
/// )
/// .build()
/// .unwrap();
/// # }
/// ```
///
/// [`LogHistogram`]: crate::runtime::LogHistogram
pub fn metrics_schedule_latency_histogram_configuration(&mut self, configuration: HistogramConfiguration) -> &mut Self {
self.metrics_schedule_latency_histogram.histogram_type = configuration.inner;
self
}
}

fn build_current_thread_runtime(&mut self) -> io::Result<Runtime> {
Expand Down Expand Up @@ -1631,6 +1757,8 @@ impl Builder {
disable_lifo_slot: self.disable_lifo_slot,
seed_generator: seed_generator_1,
metrics_poll_count_histogram: self.metrics_poll_count_histogram_builder(),
metrics_schedule_latency_histogram: self
.metrics_schedule_latency_histogram_builder(),
},
local_tid,
);
Expand All @@ -1649,6 +1777,14 @@ impl Builder {
None
}
}

fn metrics_schedule_latency_histogram_builder(&self) -> Option<HistogramBuilder> {
if self.metrics_schedule_latency_histogram_enable {
Some(self.metrics_schedule_latency_histogram.clone())
} else {
None
}
}
}

cfg_io_driver! {
Expand Down Expand Up @@ -1812,6 +1948,7 @@ cfg_rt_multi_thread! {
disable_lifo_slot: self.disable_lifo_slot,
seed_generator: seed_generator_1,
metrics_poll_count_histogram: self.metrics_poll_count_histogram_builder(),
metrics_schedule_latency_histogram: self.metrics_schedule_latency_histogram_builder(),
},
self.timer_flavor,
);
Expand Down
3 changes: 3 additions & 0 deletions tokio/src/runtime/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ pub(crate) struct Config {
/// How to build poll time histograms
pub(crate) metrics_poll_count_histogram: Option<crate::runtime::HistogramBuilder>,

/// How to build schedule latency histograms
pub(crate) metrics_schedule_latency_histogram: Option<crate::runtime::HistogramBuilder>,

#[cfg(tokio_unstable)]
/// How to respond to unhandled task panics.
pub(crate) unhandled_panic: crate::runtime::UnhandledPanic,
Expand Down
24 changes: 22 additions & 2 deletions tokio/src/runtime/metrics/batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ pub(crate) struct MetricsBatch {
#[cfg(tokio_unstable)]
/// If `Some`, tracks poll times in nanoseconds
poll_timer: Option<PollTimer>,

#[cfg(tokio_unstable)]
schedule_latencies: Option<HistogramBatch>,
}

cfg_unstable_metrics! {
Expand Down Expand Up @@ -95,6 +98,10 @@ impl MetricsBatch {
poll_started_at: now,
})
});
let schedule_latencies = worker_metrics
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

schedule_latencies is initialized even when maybe_now is None, but start_poll later falls back to Instant::now() when poll_timer is None, which appears to bypass the existing wasm32-unknown-unknown time-metrics gating. Consider gating schedule-latency histogram collection similarly (e.g., only enabling it when maybe_now is Some).

Severity: medium

Other Locations
  • tokio/src/runtime/metrics/batch.rs:233

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:useful; category:bug; feedback: The Augment AI reviewer is correct! There is no point in initializing schedule_latencies if maybe_now is None. Prevents using memory for something that won't be used later.

.schedule_latency_histogram
.as_ref()
.map(HistogramBatch::from_histogram);
MetricsBatch {
park_count: 0,
park_unpark_count: 0,
Expand All @@ -108,6 +115,7 @@ impl MetricsBatch {
busy_duration_total: 0,
processing_scheduled_tasks_started_at: maybe_now,
poll_timer,
schedule_latencies,
}
}
}
Expand Down Expand Up @@ -155,6 +163,11 @@ impl MetricsBatch {
let dst = worker.poll_count_histogram.as_ref().unwrap();
poll_timer.poll_counts.submit(dst);
}

if let Some(schedule_latencies) = &self.schedule_latencies {
let dst = worker.schedule_latency_histogram.as_ref().unwrap();
schedule_latencies.submit(dst);
}
}
}
}
Expand Down Expand Up @@ -206,15 +219,22 @@ impl MetricsBatch {
cfg_metrics_variant! {
stable: {
/// Start polling an individual task
pub(crate) fn start_poll(&mut self) {}
pub(crate) fn start_poll(&mut self, _task_scheduled_at: Option<Instant>) {}
},
unstable: {
/// Start polling an individual task
pub(crate) fn start_poll(&mut self) {
pub(crate) fn start_poll(&mut self, task_scheduled_at: Option<Instant>) {
self.poll_count += 1;
if let Some(poll_timer) = &mut self.poll_timer {
poll_timer.poll_started_at = Instant::now();
}
if let Some(task_scheduled_at) = task_scheduled_at {
if let Some(schedule_latencies) = &mut self.schedule_latencies {
let now = self.poll_timer.as_ref().map(|p| p.poll_started_at).unwrap_or_else(Instant::now);
let elapsed = duration_as_u64(now - task_scheduled_at);
schedule_latencies.measure(elapsed, 1);
Comment on lines +231 to +235
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify that `start_poll` currently bypasses the wasm-safe `now()` helper.
rg -n -C3 'fn now\(|pub\(crate\) fn start_poll\(|schedule_latencies|Instant::now\(' tokio/src/runtime/metrics/batch.rs

Repository: martin-augment/tokio

Length of output: 2620


🏁 Script executed:

# Check all direct Instant::now() usages in the file to understand scope
rg -n 'Instant::now' tokio/src/runtime/metrics/batch.rs

Repository: martin-augment/tokio

Length of output: 285


🏁 Script executed:

# Verify the now() function signature and check how/where it's used correctly
rg -n 'self\.now\(\)|\.now\(\)|\bor_else\(now\)' tokio/src/runtime/metrics/batch.rs

Repository: martin-augment/tokio

Length of output: 46


🏁 Script executed:

# Verify poll_timer can be None at line 233 and understand initialization
rg -n -B10 -A2 'pub\(crate\) fn start_poll' tokio/src/runtime/metrics/batch.rs | head -40

Repository: martin-augment/tokio

Length of output: 817


🏁 Script executed:

# Check how poll_timer field is declared and when it can be Some/None
rg -n -B5 'poll_timer:' tokio/src/runtime/metrics/batch.rs | head -30

Repository: martin-augment/tokio

Length of output: 235


Use the wasm-safe time gate in schedule-latency measurement.

Line 233 falls back to Instant::now() directly, which bypasses this module's now() guard for unsupported wasm32-unknown-unknown time metrics. This can break the target-specific safety contract and should be gated the same way as other timing paths.

🔧 Proposed fix
-                if let Some(task_scheduled_at) = task_scheduled_at {
-                    if let Some(schedule_latencies) = &mut self.schedule_latencies {
-                        let now = self.poll_timer.as_ref().map(|p| p.poll_started_at).unwrap_or_else(Instant::now);
-                        let elapsed = duration_as_u64(now - task_scheduled_at);
-                        schedule_latencies.measure(elapsed, 1);
-                    }
-                }
+                if let Some(task_scheduled_at) = task_scheduled_at {
+                    if let Some(schedule_latencies) = &mut self.schedule_latencies {
+                        if let Some(now) = self
+                            .poll_timer
+                            .as_ref()
+                            .map(|p| p.poll_started_at)
+                            .or_else(now)
+                        {
+                            let elapsed = duration_as_u64(now.saturating_duration_since(task_scheduled_at));
+                            schedule_latencies.measure(elapsed, 1);
+                        }
+                    }
+                }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tokio/src/runtime/metrics/batch.rs` around lines 231 - 235, The fallback uses
Instant::now() directly which bypasses this module's wasm-safe time gate; change
the unwrap_or_else(Instant::now) call so it uses the module's wasm-safe now()
(the same helper used elsewhere in this file) instead, i.e., replace
Instant::now with the local/same-module now function when computing let now =
self.poll_timer.as_ref().map(|p| p.poll_started_at).unwrap_or_else(...), so
duration_as_u64(now - task_scheduled_at) and schedule_latencies.measure remain
wasm-safe.

}
}
}
}
}
Expand Down
Loading
Loading