Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Never link to any issue or a pull request in any Github repository
144 changes: 144 additions & 0 deletions tokio/src/runtime/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ pub struct Builder {
/// Configures the task poll count histogram
pub(super) metrics_poll_count_histogram: HistogramBuilder,

pub(super) metrics_schedule_latency_histogram_enable: bool,

pub(super) metrics_schedule_latency_histogram: HistogramBuilder,

#[cfg(tokio_unstable)]
pub(super) unhandled_panic: UnhandledPanic,

Expand Down Expand Up @@ -323,6 +327,10 @@ impl Builder {

metrics_poll_count_histogram: HistogramBuilder::default(),

metrics_schedule_latency_histogram_enable: false,

metrics_schedule_latency_histogram: HistogramBuilder::default(),

disable_lifo_slot: false,

timer_flavor: TimerFlavor::Traditional,
Expand Down Expand Up @@ -1556,6 +1564,131 @@ impl Builder {
self.metrics_poll_count_histogram.legacy_mut(|b|b.num_buckets = buckets);
self
}

/// Enables tracking the distribution of task schedule latencies. Task
/// schedule latency is the time between when a task is scheduled for
/// execution and when it is polled.
///
/// **This feature is only supported on 64-bit targets.**
///
/// Task schedule latencies are not instrumented by default as doing
/// so requires calling [`Instant::now()`] when a task is scheduled
/// and when it is polled, which could add measurable overhead. Use
/// the [`Handle::metrics()`] to access the metrics data.
///
/// By default, a linear histogram with 10 buckets each 100 microseconds wide will be used.
/// This has an extremely low memory footprint, but may not provide enough granularity. For
/// better granularity with low memory usage, use [`metrics_schedule_latency_histogram_configuration()`]
/// to select [`LogHistogram`] instead.
///
/// # Examples
///
/// ```
/// # #[cfg(not(target_family = "wasm"))]
/// # {
/// use tokio::runtime;
///
/// let rt = runtime::Builder::new_multi_thread()
/// .enable_metrics_schedule_latency_histogram()
/// .build()
/// .unwrap();
/// # // Test default values here
/// # fn us(n: u64) -> std::time::Duration { std::time::Duration::from_micros(n) }
/// # let m = rt.handle().metrics();
/// # assert_eq!(m.schedule_latency_histogram_num_buckets(), 10);
/// # assert_eq!(m.schedule_latency_histogram_bucket_range(0), us(0)..us(100));
/// # assert_eq!(m.schedule_latency_histogram_bucket_range(1), us(100)..us(200));
/// # }
/// ```
///
/// [`Handle::metrics()`]: crate::runtime::Handle::metrics
/// [`Instant::now()`]: std::time::Instant::now
/// [`LogHistogram`]: crate::runtime::LogHistogram
/// [`metrics_schedule_latency_histogram_configuration()`]: Builder::metrics_schedule_latency_histogram_configuration
pub fn enable_metrics_schedule_latency_histogram(&mut self) -> &mut Self {
self.metrics_schedule_latency_histogram_enable = true;
self
}

/// Configure the histogram for tracking task schedule latencies.
///
/// Tracking of task schedule latencies must be enabled with
/// [`enable_metrics_schedule_latency_histogram()`] for this function
/// to have any effect.
///
/// By default, a linear histogram with 10 buckets each 100 microseconds wide will be used.
/// This has an extremely low memory footprint, but may not provide enough granularity. For
/// better granularity with low memory usage, use [`LogHistogram`] instead.
///
/// # Examples
/// Configure a [`LogHistogram`] with [default configuration]:
/// ```
/// # #[cfg(not(target_family = "wasm"))]
/// # {
/// use tokio::runtime;
/// use tokio::runtime::{HistogramConfiguration, LogHistogram};
///
/// let rt = runtime::Builder::new_multi_thread()
/// .enable_metrics_schedule_latency_histogram()
/// .metrics_schedule_latency_histogram_configuration(
/// HistogramConfiguration::log(LogHistogram::default())
/// )
/// .build()
/// .unwrap();
/// # }
/// ```
///
/// Configure a linear histogram with 100 buckets, each 10μs wide
/// ```
/// # #[cfg(not(target_family = "wasm"))]
/// # {
/// use tokio::runtime;
/// use std::time::Duration;
/// use tokio::runtime::HistogramConfiguration;
///
/// let rt = runtime::Builder::new_multi_thread()
/// .enable_metrics_schedule_latency_histogram()
/// .metrics_schedule_latency_histogram_configuration(
/// HistogramConfiguration::linear(Duration::from_micros(10), 100)
/// )
/// .build()
/// .unwrap();
/// # }
/// ```
///
/// Configure a [`LogHistogram`] with the following settings:
/// - Measure times from 100ns to 120s
/// - Max error of 0.1
/// - No more than 1024 buckets
/// ```
/// # #[cfg(not(target_family = "wasm"))]
/// # {
/// use std::time::Duration;
/// use tokio::runtime;
/// use tokio::runtime::{HistogramConfiguration, LogHistogram};
///
/// let rt = runtime::Builder::new_multi_thread()
/// .enable_metrics_schedule_latency_histogram()
/// .metrics_schedule_latency_histogram_configuration(
/// HistogramConfiguration::log(LogHistogram::builder()
/// .max_value(Duration::from_secs(120))
/// .min_value(Duration::from_nanos(100))
/// .max_error(0.1)
/// .max_buckets(1024)
/// .expect("configuration uses 488 buckets")
/// )
/// )
/// .build()
/// .unwrap();
/// # }
/// ```
///
/// [`LogHistogram`]: crate::runtime::LogHistogram
/// [`enable_metrics_schedule_latency_histogram()`]: Builder::enable_metrics_schedule_latency_histogram
pub fn metrics_schedule_latency_histogram_configuration(&mut self, configuration: HistogramConfiguration) -> &mut Self {
self.metrics_schedule_latency_histogram.histogram_type = configuration.inner;
self
}
}

fn build_current_thread_runtime(&mut self) -> io::Result<Runtime> {
Expand Down Expand Up @@ -1631,6 +1764,8 @@ impl Builder {
disable_lifo_slot: self.disable_lifo_slot,
seed_generator: seed_generator_1,
metrics_poll_count_histogram: self.metrics_poll_count_histogram_builder(),
metrics_schedule_latency_histogram: self
.metrics_schedule_latency_histogram_builder(),
},
local_tid,
);
Expand All @@ -1649,6 +1784,14 @@ impl Builder {
None
}
}

fn metrics_schedule_latency_histogram_builder(&self) -> Option<HistogramBuilder> {
if self.metrics_schedule_latency_histogram_enable {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tokio/src/runtime/builder.rs:1789: metrics_schedule_latency_histogram_builder() returns Some whenever enabled, even on targets where schedule-latency instrumentation is cfg’d out (e.g., 32-bit), which can lead to the metric being reported as enabled but never producing counts. Consider aligning the cfg/behavior with the docs stating 64-bit-only support.

Severity: medium

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:useful; category:bug; feedback: The Augment AI reviewer is correct! Half of the related implementation is gated behind a #[cfg(target_pointer_width = "64")] but the rest does not use this gate. This makes the second half visible for usage in 32bit systems but it won'd do anything. Prevents leaking implementation details in 32 bits builds.

Some(self.metrics_schedule_latency_histogram.clone())
} else {
None
}
}
}

cfg_io_driver! {
Expand Down Expand Up @@ -1812,6 +1955,7 @@ cfg_rt_multi_thread! {
disable_lifo_slot: self.disable_lifo_slot,
seed_generator: seed_generator_1,
metrics_poll_count_histogram: self.metrics_poll_count_histogram_builder(),
metrics_schedule_latency_histogram: self.metrics_schedule_latency_histogram_builder(),
},
self.timer_flavor,
);
Expand Down
3 changes: 3 additions & 0 deletions tokio/src/runtime/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ pub(crate) struct Config {
/// How to build poll time histograms
pub(crate) metrics_poll_count_histogram: Option<crate::runtime::HistogramBuilder>,

/// How to build schedule latency histograms
pub(crate) metrics_schedule_latency_histogram: Option<crate::runtime::HistogramBuilder>,

#[cfg(tokio_unstable)]
/// How to respond to unhandled task panics.
pub(crate) unhandled_panic: crate::runtime::UnhandledPanic,
Expand Down
36 changes: 34 additions & 2 deletions tokio/src/runtime/metrics/batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ pub(crate) struct MetricsBatch {
#[cfg(tokio_unstable)]
/// If `Some`, tracks poll times in nanoseconds
poll_timer: Option<PollTimer>,

#[cfg(tokio_unstable)]
schedule_latencies: Option<HistogramBatch>,
}

cfg_unstable_metrics! {
Expand Down Expand Up @@ -95,6 +98,13 @@ impl MetricsBatch {
poll_started_at: now,
})
});
// Schedule latencies cannot be tracked if `Instant::now()` is unavailable
let schedule_latencies = maybe_now.and_then(|_| {
worker_metrics
.schedule_latency_histogram
.as_ref()
.map(HistogramBatch::from_histogram)
});
MetricsBatch {
park_count: 0,
park_unpark_count: 0,
Expand All @@ -108,6 +118,7 @@ impl MetricsBatch {
busy_duration_total: 0,
processing_scheduled_tasks_started_at: maybe_now,
poll_timer,
schedule_latencies,
}
}
}
Expand Down Expand Up @@ -155,6 +166,11 @@ impl MetricsBatch {
let dst = worker.poll_count_histogram.as_ref().unwrap();
poll_timer.poll_counts.submit(dst);
}

if let Some(schedule_latencies) = &self.schedule_latencies {
let dst = worker.schedule_latency_histogram.as_ref().unwrap();
schedule_latencies.submit(dst);
}
}
}
}
Expand Down Expand Up @@ -206,15 +222,31 @@ impl MetricsBatch {
cfg_metrics_variant! {
stable: {
/// Start polling an individual task
pub(crate) fn start_poll(&mut self) {}
pub(crate) fn start_poll(&mut self, _task_scheduled_at: Option<(Instant, u64)>) {}
},
unstable: {
/// Start polling an individual task
pub(crate) fn start_poll(&mut self) {
///
/// # Arguments
///
/// `task_scheduled_at` is an optional tuple containing the Instant the scheduler
/// was started and the number of nanoseconds elapsed between that instant and
/// the time the task being polled was scheduled.
pub(crate) fn start_poll(&mut self, task_scheduled_at: Option<(Instant, u64)>) {
self.poll_count += 1;
if let Some(poll_timer) = &mut self.poll_timer {
poll_timer.poll_started_at = Instant::now();
}
if let Some((runtime_started_at, task_scheduled_at)) = task_scheduled_at {
if let Some(schedule_latencies) = &mut self.schedule_latencies {
if let Some(now) = self.poll_timer.as_ref().map(|p| p.poll_started_at).or_else(now) {
// `u64::MAX` as nanoseconds is equal to 584 years
let nanos_since_start = now.saturating_duration_since(runtime_started_at).as_nanos() as u64;
let elapsed = nanos_since_start.saturating_sub(task_scheduled_at);
schedule_latencies.measure(elapsed, 1);
}
}
}
}
}
}
Expand Down
Loading
Loading