-
Notifications
You must be signed in to change notification settings - Fork 0
7986: metrics: add task schedule latency metric #84
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,6 +53,9 @@ pub(crate) struct MetricsBatch { | |
| #[cfg(tokio_unstable)] | ||
| /// If `Some`, tracks poll times in nanoseconds | ||
| poll_timer: Option<PollTimer>, | ||
|
|
||
| #[cfg(tokio_unstable)] | ||
| schedule_latencies: Option<HistogramBatch>, | ||
| } | ||
|
|
||
| cfg_unstable_metrics! { | ||
|
|
@@ -95,6 +98,10 @@ impl MetricsBatch { | |
| poll_started_at: now, | ||
| }) | ||
| }); | ||
| let schedule_latencies = worker_metrics | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Severity: medium Other Locations
🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. value:useful; category:bug; feedback: The Augment AI reviewer is correct! There is no point in initializing |
||
| .schedule_latency_histogram | ||
| .as_ref() | ||
| .map(HistogramBatch::from_histogram); | ||
| MetricsBatch { | ||
| park_count: 0, | ||
| park_unpark_count: 0, | ||
|
|
@@ -108,6 +115,7 @@ impl MetricsBatch { | |
| busy_duration_total: 0, | ||
| processing_scheduled_tasks_started_at: maybe_now, | ||
| poll_timer, | ||
| schedule_latencies, | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -155,6 +163,11 @@ impl MetricsBatch { | |
| let dst = worker.poll_count_histogram.as_ref().unwrap(); | ||
| poll_timer.poll_counts.submit(dst); | ||
| } | ||
|
|
||
| if let Some(schedule_latencies) = &self.schedule_latencies { | ||
| let dst = worker.schedule_latency_histogram.as_ref().unwrap(); | ||
| schedule_latencies.submit(dst); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -206,15 +219,22 @@ impl MetricsBatch { | |
| cfg_metrics_variant! { | ||
| stable: { | ||
| /// Start polling an individual task | ||
| pub(crate) fn start_poll(&mut self) {} | ||
| pub(crate) fn start_poll(&mut self, _task_scheduled_at: Option<Instant>) {} | ||
| }, | ||
| unstable: { | ||
| /// Start polling an individual task | ||
| pub(crate) fn start_poll(&mut self) { | ||
| pub(crate) fn start_poll(&mut self, task_scheduled_at: Option<Instant>) { | ||
| self.poll_count += 1; | ||
| if let Some(poll_timer) = &mut self.poll_timer { | ||
| poll_timer.poll_started_at = Instant::now(); | ||
| } | ||
| if let Some(task_scheduled_at) = task_scheduled_at { | ||
| if let Some(schedule_latencies) = &mut self.schedule_latencies { | ||
| let now = self.poll_timer.as_ref().map(|p| p.poll_started_at).unwrap_or_else(Instant::now); | ||
| let elapsed = duration_as_u64(now - task_scheduled_at); | ||
| schedule_latencies.measure(elapsed, 1); | ||
|
Comment on lines
+231
to
+235
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Verify that `start_poll` currently bypasses the wasm-safe `now()` helper.
rg -n -C3 'fn now\(|pub\(crate\) fn start_poll\(|schedule_latencies|Instant::now\(' tokio/src/runtime/metrics/batch.rsRepository: martin-augment/tokio Length of output: 2620 🏁 Script executed: # Check all direct Instant::now() usages in the file to understand scope
rg -n 'Instant::now' tokio/src/runtime/metrics/batch.rsRepository: martin-augment/tokio Length of output: 285 🏁 Script executed: # Verify the now() function signature and check how/where it's used correctly
rg -n 'self\.now\(\)|\.now\(\)|\bor_else\(now\)' tokio/src/runtime/metrics/batch.rsRepository: martin-augment/tokio Length of output: 46 🏁 Script executed: # Verify poll_timer can be None at line 233 and understand initialization
rg -n -B10 -A2 'pub\(crate\) fn start_poll' tokio/src/runtime/metrics/batch.rs | head -40Repository: martin-augment/tokio Length of output: 817 🏁 Script executed: # Check how poll_timer field is declared and when it can be Some/None
rg -n -B5 'poll_timer:' tokio/src/runtime/metrics/batch.rs | head -30Repository: martin-augment/tokio Length of output: 235 Use the wasm-safe time gate in schedule-latency measurement. Line 233 falls back to 🔧 Proposed fix- if let Some(task_scheduled_at) = task_scheduled_at {
- if let Some(schedule_latencies) = &mut self.schedule_latencies {
- let now = self.poll_timer.as_ref().map(|p| p.poll_started_at).unwrap_or_else(Instant::now);
- let elapsed = duration_as_u64(now - task_scheduled_at);
- schedule_latencies.measure(elapsed, 1);
- }
- }
+ if let Some(task_scheduled_at) = task_scheduled_at {
+ if let Some(schedule_latencies) = &mut self.schedule_latencies {
+ if let Some(now) = self
+ .poll_timer
+ .as_ref()
+ .map(|p| p.poll_started_at)
+ .or_else(now)
+ {
+ let elapsed = duration_as_u64(now.saturating_duration_since(task_scheduled_at));
+ schedule_latencies.measure(elapsed, 1);
+ }
+ }
+ }🤖 Prompt for AI Agents |
||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The phrasing 'twice per task poll' is a bit misleading. This feature calls
Instant::now()once when a task is scheduled, and once when it begins to be polled. For a task that is polled multiple times after being scheduled once, this is not 'twice per poll'. A more accurate description would be clearer for users trying to understand the performance overhead.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
value:good-to-have; category:documentation; feedback: The Gemini AI reviewer is correct! The proposed wording of the docstring suggests that Instant::now() is called twice on each poll. This is not correct and it should be fixed to explain that the first call to Instant::now() is when the task is scheduled.