From 779ae5154220188ec2540da3fa6281bc0988ab2a Mon Sep 17 00:00:00 2001 From: Thomas Jungblut Date: Wed, 17 Dec 2025 11:37:02 +0100 Subject: [PATCH 1/2] Add more extensive runtime metrics I've noticed that K8s and apiserver contains several metrics that etcd does not have. This PR adds the missing metrics to the default Prometheus registry. Signed-off-by: Thomas Jungblut --- server/etcdserver/metrics.go | 5 +++++ tests/e2e/metrics_test.go | 2 ++ 2 files changed, 7 insertions(+) diff --git a/server/etcdserver/metrics.go b/server/etcdserver/metrics.go index 7176d30adbc0..52b3f582e22e 100644 --- a/server/etcdserver/metrics.go +++ b/server/etcdserver/metrics.go @@ -19,6 +19,7 @@ import ( "time" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" "go.uber.org/zap" "go.etcd.io/etcd/api/v3/version" @@ -162,6 +163,10 @@ var ( ) func init() { + // register more extensive go runtime metrics, for that we need to unregister the default metrics first + prometheus.Unregister(collectors.NewGoCollector()) + prometheus.MustRegister(collectors.NewGoCollector(collectors.WithGoCollectorRuntimeMetrics(collectors.MetricsAll))) + prometheus.MustRegister(hasLeader) prometheus.MustRegister(isLeader) prometheus.MustRegister(leaderChanges) diff --git a/tests/e2e/metrics_test.go b/tests/e2e/metrics_test.go index c76de746df6c..aa6489c9a795 100644 --- a/tests/e2e/metrics_test.go +++ b/tests/e2e/metrics_test.go @@ -221,6 +221,7 @@ func TestNoMetricsMissing(t *testing.T) { "etcd_snap_db_save_total_duration_seconds", "etcd_snap_fsync_duration_seconds", "go_gc_duration_seconds", + "go_gc_heap_allocs_by_size_bytes", "go_gc_gogc_percent", "go_gc_gomemlimit_bytes", "go_goroutines", @@ -248,6 +249,7 @@ func TestNoMetricsMissing(t *testing.T) { "go_memstats_stack_sys_bytes", "go_memstats_sys_bytes", "go_sched_gomaxprocs_threads", + "go_sched_pauses_stopping_gc_seconds", "go_threads", "grpc_server_handled_total", "grpc_server_msg_received_total", From 99db8578857b49d110ce39512060db7d7e1bfcb9 Mon Sep 17 00:00:00 2001 From: Thomas Jungblut Date: Wed, 17 Dec 2025 13:37:54 +0100 Subject: [PATCH 2/2] Instrument watchstream send loop This PR adds a few more timing metrics to the watch send loop. We're currently trying to track down some sort of contention affecting all gRPC endpoints, we see a hard QPS ceiling when the apiserver restarts with watch cache inits. There's currently no metric to help us pinpoint where the time is going to create such a ceiling, thus adding some more metrics here. Somewhat related to #15402. Signed-off-by: Thomas Jungblut --- server/etcdserver/api/v3rpc/metrics.go | 49 ++++++++++++++++++++++++++ server/etcdserver/api/v3rpc/watch.go | 12 ++++++- 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/server/etcdserver/api/v3rpc/metrics.go b/server/etcdserver/api/v3rpc/metrics.go index d79506e07321..9b99af09e960 100644 --- a/server/etcdserver/api/v3rpc/metrics.go +++ b/server/etcdserver/api/v3rpc/metrics.go @@ -52,6 +52,51 @@ var ( }, []string{"type", "client_api_version"}, ) + + watchSendLoopWatchStreamTime = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Namespace: "etcd_debugging", + Subsystem: "server", + Name: "watch_send_loop_watch_stream_time_seconds", + Help: "The total duration in seconds of running through the send loop watch stream response all events.", + }, + ) + + watchSendLoopWatchStreamTimePerEvent = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Namespace: "etcd_debugging", + Subsystem: "server", + Name: "watch_send_loop_watch_stream_time_per_event_seconds", + Help: "The average duration in seconds of running through the send loop watch stream response, per event.", + // lowest bucket start of upper bound 0.0001 sec (0.1 ms) with factor 2 + // highest bucket start of 0.0001 sec * 2^15 == 3.2768 sec + Buckets: prometheus.ExponentialBuckets(0.0001, 2, 16), + }, + ) + + watchSendLoopControlStreamTime = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Namespace: "etcd_debugging", + Subsystem: "server", + Name: "watch_send_loop_control_stream_time_seconds", + Help: "The total duration in seconds of running through the send loop control stream response.", + // lowest bucket start of upper bound 0.0001 sec (0.1 ms) with factor 2 + // highest bucket start of 0.0001 sec * 2^15 == 3.2768 sec + Buckets: prometheus.ExponentialBuckets(0.0001, 2, 16), + }, + ) + + watchSendLoopProgressTime = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Namespace: "etcd_debugging", + Subsystem: "server", + Name: "watch_send_loop_progress_time_seconds", + Help: "The total duration in seconds of running through the progress loop control stream response.", + // lowest bucket start of upper bound 0.0001 sec (0.1 ms) with factor 2 + // highest bucket start of 0.0001 sec * 2^15 == 3.2768 sec + Buckets: prometheus.ExponentialBuckets(0.0001, 2, 16), + }, + ) ) func init() { @@ -59,4 +104,8 @@ func init() { prometheus.MustRegister(receivedBytes) prometheus.MustRegister(streamFailures) prometheus.MustRegister(clientRequests) + prometheus.MustRegister(watchSendLoopWatchStreamTime) + prometheus.MustRegister(watchSendLoopWatchStreamTimePerEvent) + prometheus.MustRegister(watchSendLoopControlStreamTime) + prometheus.MustRegister(watchSendLoopProgressTime) } diff --git a/server/etcdserver/api/v3rpc/watch.go b/server/etcdserver/api/v3rpc/watch.go index d4a5bc35141e..31151d6dfb3a 100644 --- a/server/etcdserver/api/v3rpc/watch.go +++ b/server/etcdserver/api/v3rpc/watch.go @@ -405,6 +405,7 @@ func (sws *serverWatchStream) sendLoop() { return } + start := time.Now() // TODO: evs is []mvccpb.Event type // either return []*mvccpb.Event from the mvcc package // or define protocol buffer with []mvccpb.Event. @@ -475,11 +476,15 @@ func (sws *serverWatchStream) sendLoop() { } sws.mu.Unlock() + totalDur := time.Since(start) + watchSendLoopWatchStreamTime.Observe(totalDur.Seconds()) + watchSendLoopWatchStreamTimePerEvent.Observe(totalDur.Seconds() / float64(len(evs))) + case c, ok := <-sws.ctrlStream: if !ok { return } - + start := time.Now() if err := sws.gRPCStream.Send(c); err != nil { if isClientCtxErr(sws.gRPCStream.Context().Err(), err) { sws.lg.Debug("failed to send watch control response to gRPC stream", zap.Error(err)) @@ -517,7 +522,11 @@ func (sws *serverWatchStream) sendLoop() { delete(pending, wid) } + watchSendLoopControlStreamTime.Observe(time.Since(start).Seconds()) + case <-progressTicker.C: + start := time.Now() + sws.mu.Lock() for id, ok := range sws.progress { if ok { @@ -526,6 +535,7 @@ func (sws *serverWatchStream) sendLoop() { sws.progress[id] = true } sws.mu.Unlock() + watchSendLoopProgressTime.Observe(time.Since(start).Seconds()) case <-sws.closec: return