From eca261ab2f0f0e6874dfb175bfcfb6e4722ca396 Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Thu, 29 Jan 2026 12:29:24 +0000 Subject: [PATCH] remove specific beacon node id metric and move to version metric --- app/app.go | 2 +- app/metrics.go | 11 +-- app/monitoringapi.go | 59 ++++++------- docs/metrics.md | 192 +++++++++++++++++++++---------------------- 4 files changed, 128 insertions(+), 136 deletions(-) diff --git a/app/app.go b/app/app.go index fb81ab7b7..7b2566208 100644 --- a/app/app.go +++ b/app/app.go @@ -328,7 +328,7 @@ func Run(ctx context.Context, conf Config) (err error) { consensusDebugger := consensus.NewDebugger() - wireMonitoringAPI(ctx, life, conf.MonitoringAddr, conf.DebugAddr, p2pNode, eth2Cl, peerIDs, + wireMonitoringAPI(ctx, life, conf.MonitoringAddr, conf.DebugAddr, p2pNode, eth2Cl, conf.BeaconNodeAddrs, peerIDs, promRegistry, consensusDebugger, pubkeys, seenPubkeys, vapiCalls, len(lock.Validators)) err = wireCoreWorkflow(ctx, life, conf, lock, nodeIdx, p2pNode, p2pKey, eth2Cl, subEth2Cl, diff --git a/app/metrics.go b/app/metrics.go index dc3999d5f..878c15e5f 100644 --- a/app/metrics.go +++ b/app/metrics.go @@ -86,15 +86,8 @@ var ( Namespace: "app", Subsystem: "beacon_node", Name: "version", - Help: "Constant gauge with label set to the node version of the upstream beacon node", - }, []string{"version"}) - - beaconNodePeerIDGauge = promauto.NewResetGaugeVec(prometheus.GaugeOpts{ - Namespace: "app", - Subsystem: "beacon_node", - Name: "peer_id", - Help: "Constant gauge with label set to the peer_id of the upstream beacon node", - }, []string{"peer_id"}) + Help: "Constant gauge with labels set to the version and beacon_id of the upstream beacon node", + }, []string{"version", "beacon_id"}) thresholdGauge = promauto.NewGauge(prometheus.GaugeOpts{ Namespace: "cluster", diff --git a/app/monitoringapi.go b/app/monitoringapi.go index 079e38e0e..e22a7d788 100644 --- a/app/monitoringapi.go +++ b/app/monitoringapi.go @@ -23,6 +23,7 @@ import ( "github.com/obolnetwork/charon/app/health" "github.com/obolnetwork/charon/app/lifecycle" "github.com/obolnetwork/charon/app/log" + "github.com/obolnetwork/charon/app/z" "github.com/obolnetwork/charon/cluster" "github.com/obolnetwork/charon/core" ) @@ -45,12 +46,12 @@ var ( // wireMonitoringAPI constructs the monitoring API and registers it with the life cycle manager. // It serves prometheus metrics, pprof profiling and the runtime enr. func wireMonitoringAPI(ctx context.Context, life *lifecycle.Manager, promAddr, debugAddr string, - p2pNode host.Host, eth2Cl eth2wrap.Client, + p2pNode host.Host, eth2Cl eth2wrap.Client, beaconNodeAddrs []string, peerIDs []peer.ID, registry *prometheus.Registry, consensusDebugger http.Handler, pubkeys []core.PubKey, seenPubkeys <-chan core.PubKey, vapiCalls <-chan struct{}, numValidators int, ) { - beaconNodeVersionMetric(ctx, eth2Cl, clockwork.NewRealClock()) + beaconNodeVersionMetric(ctx, eth2Cl, beaconNodeAddrs, clockwork.NewRealClock()) mux := http.NewServeMux() @@ -268,35 +269,37 @@ func beaconNodeSyncing(ctx context.Context, eth2Cl eth2client.NodeSyncingProvide } // beaconNodeVersionMetric sets the beacon node version gauge. -func beaconNodeVersionMetric(ctx context.Context, eth2Cl eth2wrap.Client, clock clockwork.Clock) { - nodeVersionTicker := clock.NewTicker(10 * time.Minute) - - setNodeVersion := func() { - eth2Resp, err := eth2Cl.NodeVersion(ctx, ð2api.NodeVersionOpts{}) - if err != nil { - log.Error(ctx, "Failed to fetch beacon node version. Check beacon node connectivity and API availability", err) - return - } - - version := eth2Resp.Data +func beaconNodeVersionMetric(ctx context.Context, eth2Cl eth2wrap.Client, beaconNodeAddrs []string, clk clockwork.Clock) { + nodeVersionTicker := clk.NewTicker(10 * time.Minute) + setNodeVersionAndID := func() { beaconNodeVersionGauge.Reset() - beaconNodeVersionGauge.WithLabelValues(version).Set(1) - eth2wrap.CheckBeaconNodeVersion(ctx, version) - } + // Query each beacon node individually + for _, addr := range beaconNodeAddrs { + // Get a client scoped to this specific beacon node + scopedClient := eth2Cl.ClientForAddress(addr) - setNodePeerID := func() { - response, err := eth2Cl.NodeIdentity(ctx, ð2api.NodeIdentityOpts{}) - if err != nil { - log.Error(ctx, "Failed to fetch beacon node identity. Check beacon node connectivity and API availability", err) - return - } + versionResp, err := scopedClient.NodeVersion(ctx, ð2api.NodeVersionOpts{}) + if err != nil { + log.Warn(ctx, "Failed to fetch beacon node version", err, + z.Str("beacon_node_address", addr)) + continue + } - peerID := response.Data.PeerID + response, err := scopedClient.NodeIdentity(ctx, ð2api.NodeIdentityOpts{}) + if err != nil { + log.Warn(ctx, "Failed to fetch beacon node identity", err, + z.Str("beacon_node_address", addr)) + continue + } + + version := versionResp.Data + beaconID := response.Data.PeerID + beaconNodeVersionGauge.WithLabelValues(version, beaconID).Set(1) - beaconNodePeerIDGauge.Reset() - beaconNodePeerIDGauge.WithLabelValues(peerID).Set(1) + eth2wrap.CheckBeaconNodeVersion(ctx, version) + } } go func() { @@ -306,11 +309,9 @@ func beaconNodeVersionMetric(ctx context.Context, eth2Cl eth2wrap.Client, clock for { select { case <-onStartup: - setNodeVersion() - setNodePeerID() + setNodeVersionAndID() case <-nodeVersionTicker.Chan(): - setNodeVersion() - setNodePeerID() + setNodeVersionAndID() case <-ctx.Done(): return } diff --git a/docs/metrics.md b/docs/metrics.md index 0bdc4c4a9..3e043d9bc 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -3,7 +3,6 @@ This document contains all the prometheus metrics exposed by a charon node. All metrics contain the following labels, so they are omitted from the table below: - - `cluster_hash`: The cluster lock hash uniquely identifying the cluster. - `cluster_name`: The cluster lock name. - `cluster_network`: The cluster network name; goerli, mainnet, etc. @@ -12,99 +11,98 @@ All metrics contain the following labels, so they are omitted from the table bel The `cluster_*` labels uniquely identify a specific node`s metrics which is required when storing metrics from multiple nodes or clusters in one Prometheus instance. -| Name | Type | Help | Labels | -| ------------------------------------------------- | --------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------- | -| `app_beacon_node_peer_id` | Gauge | Constant gauge with label set to the peer_id of the upstream beacon node | `peer_id` | -| `app_beacon_node_peers` | Gauge | Gauge set to the peer count of the upstream beacon node | | -| `app_beacon_node_sse_block` | Histogram | Block imported into fork choice delay, supplied by beacon node`s SSE endpoint. Values between 0s and 4s for Ethereum mainnet are considered safe | `addr` | -| `app_beacon_node_sse_block_gossip` | Histogram | Block reception via gossip delay, supplied by beacon node`s SSE endpoint. Values between 0s and 4s for Ethereum mainnet are considered safe | `addr` | -| `app_beacon_node_sse_block_processing_time` | Histogram | Time in seconds between block gossip and head events, indicating block processing time. Lower values indicate better CPU/disk/RAM performance. | `addr` | -| `app_beacon_node_sse_chain_reorg_depth` | Histogram | Chain reorg depth, supplied by beacon node`s SSE endpoint | `addr` | -| `app_beacon_node_sse_head_delay` | Histogram | Delay in seconds between slot start and head update, supplied by beacon node`s SSE endpoint. Values between 8s and 12s for Ethereum mainnet are considered safe. | `addr` | -| `app_beacon_node_sse_head_slot` | Gauge | Current beacon node head slot, supplied by beacon node`s SSE endpoint | `addr` | -| `app_beacon_node_version` | Gauge | Constant gauge with label set to the node version of the upstream beacon node | `version` | -| `app_cache_hits_total` | Counter | Total number of times the cache was used | `endpoint` | -| `app_cache_invalidated_reorg_total` | Counter | Total number of times the cache was invalidated due to a chain reorg | `endpoint` | -| `app_cache_misses_total` | Counter | Total number of times the cache was missed | `endpoint` | -| `app_eth2_errors_total` | Counter | Total number of errors returned by eth2 beacon node requests | `endpoint` | -| `app_eth2_latency_seconds` | Histogram | Latency in seconds for eth2 beacon node requests | `endpoint` | -| `app_eth2_requests_total` | Counter | Total number of requests sent to eth2 beacon node | `endpoint` | -| `app_eth2_using_fallback` | Gauge | Indicates if client is using fallback (1) or primary (0) beacon node | | -| `app_feature_flags` | Gauge | Constant gauge with custom enabled feature flags | `feature_flags` | -| `app_git_commit` | Gauge | Constant gauge with label set to current git commit hash | `git_hash` | -| `app_health_checks` | Gauge | Application health checks by name and severity. Set to 1 for failing, 0 for ok. | `severity, name` | -| `app_health_metrics_high_cardinality` | Gauge | Metrics with high cardinality by name. | `name` | -| `app_log_error_total` | Counter | Total count of logged errors by topic | `topic` | -| `app_log_warn_total` | Counter | Total count of logged warnings by topic | `topic` | -| `app_monitoring_readyz` | Gauge | Set to 1 if the node is operational and monitoring api `/readyz` endpoint is returning 200s. Else `/readyz` is returning 500s and this metric is either set to 2 if the beacon node is down, or3 if the beacon node is syncing, or4 if quorum peers are not connected. | | -| `app_peer_name` | Gauge | Constant gauge with label set to the name of the cluster peer | `peer_name` | -| `app_peerinfo_builder_api_enabled` | Gauge | Set to 1 if builder API is enabled on this peer, else 0 if disabled. | `peer` | -| `app_peerinfo_clock_offset_seconds` | Gauge | Peer clock offset in seconds | `peer` | -| `app_peerinfo_git_commit` | Gauge | Constant gauge with git_hash label set to peer`s git commit hash. | `peer, git_hash` | -| `app_peerinfo_index` | Gauge | Constant gauge set to the peer index in the cluster definition | `peer` | -| `app_peerinfo_nickname` | Gauge | Constant gauge with nickname label set to peer`s charon nickname. | `peer, peer_nickname` | -| `app_peerinfo_start_time_secs` | Gauge | Constant gauge set to the peer start time of the binary in unix seconds | `peer` | -| `app_peerinfo_version` | Gauge | Constant gauge with version label set to peer`s charon version. | `peer, version` | -| `app_peerinfo_version_support` | Gauge | Set to 1 if the peer`s version is supported by (compatible with) the current version, else 0 if unsupported. | `peer` | -| `app_start_time_secs` | Gauge | Gauge set to the app start time of the binary in unix seconds | | -| `app_validator_stack_params` | Gauge | Parameters for each component of the validator stack in which this Charon instance is deployed into | `component, cli_parameters` | -| `app_version` | Gauge | Constant gauge with label set to current app version | `version` | -| `cluster_network` | Gauge | Constant gauge with label set to the current network (chain) | `network` | -| `cluster_operators` | Gauge | Number of operators in the cluster lock | | -| `cluster_threshold` | Gauge | Aggregation threshold in the cluster lock | | -| `cluster_validators` | Gauge | Number of validators in the cluster lock | | -| `core_bcast_broadcast_delay_seconds` | Histogram | Duty broadcast delay since the expected duty submission in seconds by type | `duty` | -| `core_bcast_broadcast_total` | Counter | The total count of successfully broadcast duties by type | `duty` | -| `core_consensus_decided_leader_index` | Gauge | Index of the decided leader by protocol and duty | `protocol, duty` | -| `core_consensus_decided_rounds` | Gauge | Number of decided rounds by protocol, duty, and timer | `protocol, duty, timer` | -| `core_consensus_duration_seconds` | Histogram | Duration of the consensus process by protocol, duty, and timer | `protocol, duty, timer` | -| `core_consensus_error_total` | Counter | Total count of consensus errors by protocol | `protocol` | -| `core_consensus_timeout_total` | Counter | Total count of consensus timeouts by protocol, duty, and timer | `protocol, duty, timer` | -| `core_fetcher_proposal_blinded` | Gauge | Whether the fetched proposal was blinded (1) or local (2) | | -| `core_parsigdb_exit_total` | Counter | Total number of partially signed voluntary exits per public key | `pubkey` | -| `core_parsigdb_store` | Histogram | Latency of partial signatures received since earliest expected time, per duty, per peer index | `duty, peer_idx` | -| `core_scheduler_current_epoch` | Gauge | The current epoch | | -| `core_scheduler_current_slot` | Gauge | The current slot | | -| `core_scheduler_duty_total` | Counter | The total count of duties scheduled by type | `duty` | -| `core_scheduler_skipped_slots_total` | Counter | Total number times slots were skipped | | -| `core_scheduler_submit_registration_errors_total` | Counter | The total count of failed submit registration requests | | -| `core_scheduler_submit_registration_total` | Counter | The total number of submit registration requests | | -| `core_scheduler_validator_balance_gwei` | Gauge | Total balance of a validator by public key | `pubkey_full, pubkey` | -| `core_scheduler_validator_status` | Gauge | Gauge with validator pubkey and status as labels, value=1 is current status, value=0 is previous. | `pubkey_full, pubkey, status` | -| `core_scheduler_validators_active` | Gauge | Number of active validators | | -| `core_tracker_attestation_expect_total` | Counter | Total number of expected attestations for the slot (counts individual attestations, not duties) | | -| `core_tracker_attestation_success_total` | Counter | Total number of successful attestations for the slot (counts individual attestations, not duties) | | -| `core_tracker_expect_duties_total` | Counter | Total number of expected duties (failed + success) by type | `duty` | -| `core_tracker_failed_duties_total` | Counter | Total number of failed duties by type | `duty` | -| `core_tracker_failed_duty_reasons_total` | Counter | Total number of failed duties by type and reason code | `duty, reason` | -| `core_tracker_inclusion_delay` | Gauge | Cluster`s average attestation inclusion delay in slots. Available only when attestation_inclusion feature flag is enabled. | | -| `core_tracker_inclusion_missed_total` | Counter | Total number of broadcast duties never included in any block by type | `duty` | -| `core_tracker_inconsistent_parsigs_total` | Counter | Total number of duties that contained inconsistent partial signed data by duty type | `duty` | -| `core_tracker_participation` | Gauge | Set to 1 if peer participated successfully for the given duty or else 0 | `duty, peer` | -| `core_tracker_participation_expected_total` | Counter | Total number of expected participations (fail + success) by peer and duty type | `duty, peer` | -| `core_tracker_participation_missed_total` | Counter | Total number of missed participations by peer and duty type | `duty, peer` | -| `core_tracker_participation_success_total` | Counter | Total number of successful participations by peer and duty type | `duty, peer` | -| `core_tracker_participation_total` | Counter | Total number of successful participations by peer and duty type | `duty, peer` | -| `core_tracker_success_duties_total` | Counter | Total number of successful duties by type | `duty` | -| `core_tracker_unexpected_events_total` | Counter | Total number of unexpected events by peer | `peer` | -| `core_validatorapi_proxy_request_latency_seconds` | Histogram | The validatorapi proxy request latencies in seconds by path | `path` | -| `core_validatorapi_request_error_total` | Counter | The total number of validatorapi request errors | `endpoint, status_code` | -| `core_validatorapi_request_latency_seconds` | Histogram | The validatorapi request latencies in seconds by endpoint | `endpoint` | -| `core_validatorapi_request_total` | Counter | The total number of requests per content-type and endpoint | `endpoint, content_type` | -| `core_validatorapi_vc_user_agent` | Gauge | Gauge with label set to user agent string of requests made by VC | `user_agent` | -| `p2p_peer_connection_total` | Counter | Total number of libp2p connections per peer. | `peer` | -| `p2p_peer_connection_types` | Gauge | Current number of libp2p connections by peer, type (`direct` or `relay`), and protocol (`tcp`, `quic`). Note that peers may have multiple connections. | `peer, type, protocol` | -| `p2p_peer_network_receive_bytes_total` | Counter | Total number of network bytes received from the peer by protocol and transport. Transport is based on first active connection (accurate in steady state). | `peer, protocol, transport` | -| `p2p_peer_network_sent_bytes_total` | Counter | Total number of network bytes sent to the peer by protocol and transport. Transport is based on first active connection (accurate in steady state). | `peer, protocol, transport` | -| `p2p_peer_streams` | Gauge | Current number of libp2p streams by peer, direction (`inbound` or `outbound` or `unknown`), protocol and transport. | `peer, direction, protocol, transport` | -| `p2p_ping_error_total` | Counter | Total number of ping errors per peer | `peer` | -| `p2p_ping_latency_secs` | Histogram | Ping latencies in seconds per peer | `peer` | -| `p2p_ping_success` | Gauge | Whether the last ping was successful (1) or not (0). Can be used as proxy for connected peers | `peer` | -| `p2p_reachability_status` | Gauge | Current libp2p reachability status of this node as detected by autonat: unknown(0), public(1) or private(2). | | -| `p2p_relay_connection_types` | Gauge | Current number of libp2p connections by relay, type (`direct` or `relay`), and protocol (`tcp`, `quic`). Note that peers may have multiple connections. | `peer, type, protocol` | -| `p2p_relay_connections` | Gauge | Connected relays by name | `peer` | -| `relay_p2p_active_connections` | Gauge | Current number of active connections by peer and cluster | `peer, peer_cluster` | -| `relay_p2p_connection_total` | Counter | Total number of new connections by peer and cluster | `peer, peer_cluster` | -| `relay_p2p_network_receive_bytes_total` | Counter | Total number of network bytes received from the peer and cluster | `peer, peer_cluster` | -| `relay_p2p_network_sent_bytes_total` | Counter | Total number of network bytes sent to the peer and cluster | `peer, peer_cluster` | -| `relay_p2p_ping_latency` | Histogram | Ping latency by peer and cluster | `peer, peer_cluster` | +| Name | Type | Help | Labels | +|---|---|---|---| +| `app_beacon_node_peers` | Gauge | Gauge set to the peer count of the upstream beacon node | | +| `app_beacon_node_sse_block` | Histogram | Block imported into fork choice delay, supplied by beacon node`s SSE endpoint. Values between 0s and 4s for Ethereum mainnet are considered safe | `addr` | +| `app_beacon_node_sse_block_gossip` | Histogram | Block reception via gossip delay, supplied by beacon node`s SSE endpoint. Values between 0s and 4s for Ethereum mainnet are considered safe | `addr` | +| `app_beacon_node_sse_block_processing_time` | Histogram | Time in seconds between block gossip and head events, indicating block processing time. Lower values indicate better CPU/disk/RAM performance. | `addr` | +| `app_beacon_node_sse_chain_reorg_depth` | Histogram | Chain reorg depth, supplied by beacon node`s SSE endpoint | `addr` | +| `app_beacon_node_sse_head_delay` | Histogram | Delay in seconds between slot start and head update, supplied by beacon node`s SSE endpoint. Values between 8s and 12s for Ethereum mainnet are considered safe. | `addr` | +| `app_beacon_node_sse_head_slot` | Gauge | Current beacon node head slot, supplied by beacon node`s SSE endpoint | `addr` | +| `app_beacon_node_version` | Gauge | Constant gauge with labels set to the version and beacon_id of the upstream beacon node | `version, beacon_id` | +| `app_cache_hits_total` | Counter | Total number of times the cache was used | `endpoint` | +| `app_cache_invalidated_reorg_total` | Counter | Total number of times the cache was invalidated due to a chain reorg | `endpoint` | +| `app_cache_misses_total` | Counter | Total number of times the cache was missed | `endpoint` | +| `app_eth2_errors_total` | Counter | Total number of errors returned by eth2 beacon node requests | `endpoint` | +| `app_eth2_latency_seconds` | Histogram | Latency in seconds for eth2 beacon node requests | `endpoint` | +| `app_eth2_requests_total` | Counter | Total number of requests sent to eth2 beacon node | `endpoint` | +| `app_eth2_using_fallback` | Gauge | Indicates if client is using fallback (1) or primary (0) beacon node | | +| `app_feature_flags` | Gauge | Constant gauge with custom enabled feature flags | `feature_flags` | +| `app_git_commit` | Gauge | Constant gauge with label set to current git commit hash | `git_hash` | +| `app_health_checks` | Gauge | Application health checks by name and severity. Set to 1 for failing, 0 for ok. | `severity, name` | +| `app_health_metrics_high_cardinality` | Gauge | Metrics with high cardinality by name. | `name` | +| `app_log_error_total` | Counter | Total count of logged errors by topic | `topic` | +| `app_log_warn_total` | Counter | Total count of logged warnings by topic | `topic` | +| `app_monitoring_readyz` | Gauge | Set to 1 if the node is operational and monitoring api `/readyz` endpoint is returning 200s. Else `/readyz` is returning 500s and this metric is either set to 2 if the beacon node is down, or3 if the beacon node is syncing, or4 if quorum peers are not connected. | | +| `app_peer_name` | Gauge | Constant gauge with label set to the name of the cluster peer | `peer_name` | +| `app_peerinfo_builder_api_enabled` | Gauge | Set to 1 if builder API is enabled on this peer, else 0 if disabled. | `peer` | +| `app_peerinfo_clock_offset_seconds` | Gauge | Peer clock offset in seconds | `peer` | +| `app_peerinfo_git_commit` | Gauge | Constant gauge with git_hash label set to peer`s git commit hash. | `peer, git_hash` | +| `app_peerinfo_index` | Gauge | Constant gauge set to the peer index in the cluster definition | `peer` | +| `app_peerinfo_nickname` | Gauge | Constant gauge with nickname label set to peer`s charon nickname. | `peer, peer_nickname` | +| `app_peerinfo_start_time_secs` | Gauge | Constant gauge set to the peer start time of the binary in unix seconds | `peer` | +| `app_peerinfo_version` | Gauge | Constant gauge with version label set to peer`s charon version. | `peer, version` | +| `app_peerinfo_version_support` | Gauge | Set to 1 if the peer`s version is supported by (compatible with) the current version, else 0 if unsupported. | `peer` | +| `app_start_time_secs` | Gauge | Gauge set to the app start time of the binary in unix seconds | | +| `app_validator_stack_params` | Gauge | Parameters for each component of the validator stack in which this Charon instance is deployed into | `component, cli_parameters` | +| `app_version` | Gauge | Constant gauge with label set to current app version | `version` | +| `cluster_network` | Gauge | Constant gauge with label set to the current network (chain) | `network` | +| `cluster_operators` | Gauge | Number of operators in the cluster lock | | +| `cluster_threshold` | Gauge | Aggregation threshold in the cluster lock | | +| `cluster_validators` | Gauge | Number of validators in the cluster lock | | +| `core_bcast_broadcast_delay_seconds` | Histogram | Duty broadcast delay since the expected duty submission in seconds by type | `duty` | +| `core_bcast_broadcast_total` | Counter | The total count of successfully broadcast duties by type | `duty` | +| `core_consensus_decided_leader_index` | Gauge | Index of the decided leader by protocol and duty | `protocol, duty` | +| `core_consensus_decided_rounds` | Gauge | Number of decided rounds by protocol, duty, and timer | `protocol, duty, timer` | +| `core_consensus_duration_seconds` | Histogram | Duration of the consensus process by protocol, duty, and timer | `protocol, duty, timer` | +| `core_consensus_error_total` | Counter | Total count of consensus errors by protocol | `protocol` | +| `core_consensus_timeout_total` | Counter | Total count of consensus timeouts by protocol, duty, and timer | `protocol, duty, timer` | +| `core_fetcher_proposal_blinded` | Gauge | Whether the fetched proposal was blinded (1) or local (2) | | +| `core_parsigdb_exit_total` | Counter | Total number of partially signed voluntary exits per public key | `pubkey` | +| `core_parsigdb_store` | Histogram | Latency of partial signatures received since earliest expected time, per duty, per peer index | `duty, peer_idx` | +| `core_scheduler_current_epoch` | Gauge | The current epoch | | +| `core_scheduler_current_slot` | Gauge | The current slot | | +| `core_scheduler_duty_total` | Counter | The total count of duties scheduled by type | `duty` | +| `core_scheduler_skipped_slots_total` | Counter | Total number times slots were skipped | | +| `core_scheduler_submit_registration_errors_total` | Counter | The total count of failed submit registration requests | | +| `core_scheduler_submit_registration_total` | Counter | The total number of submit registration requests | | +| `core_scheduler_validator_balance_gwei` | Gauge | Total balance of a validator by public key | `pubkey_full, pubkey` | +| `core_scheduler_validator_status` | Gauge | Gauge with validator pubkey and status as labels, value=1 is current status, value=0 is previous. | `pubkey_full, pubkey, status` | +| `core_scheduler_validators_active` | Gauge | Number of active validators | | +| `core_tracker_attestation_expect_total` | Counter | Total number of expected attestations for the slot (counts individual attestations, not duties) | | +| `core_tracker_attestation_success_total` | Counter | Total number of successful attestations for the slot (counts individual attestations, not duties) | | +| `core_tracker_expect_duties_total` | Counter | Total number of expected duties (failed + success) by type | `duty` | +| `core_tracker_failed_duties_total` | Counter | Total number of failed duties by type | `duty` | +| `core_tracker_failed_duty_reasons_total` | Counter | Total number of failed duties by type and reason code | `duty, reason` | +| `core_tracker_inclusion_delay` | Gauge | Cluster`s average attestation inclusion delay in slots. Available only when attestation_inclusion feature flag is enabled. | | +| `core_tracker_inclusion_missed_total` | Counter | Total number of broadcast duties never included in any block by type | `duty` | +| `core_tracker_inconsistent_parsigs_total` | Counter | Total number of duties that contained inconsistent partial signed data by duty type | `duty` | +| `core_tracker_participation` | Gauge | Set to 1 if peer participated successfully for the given duty or else 0 | `duty, peer` | +| `core_tracker_participation_expected_total` | Counter | Total number of expected participations (fail + success) by peer and duty type | `duty, peer` | +| `core_tracker_participation_missed_total` | Counter | Total number of missed participations by peer and duty type | `duty, peer` | +| `core_tracker_participation_success_total` | Counter | Total number of successful participations by peer and duty type | `duty, peer` | +| `core_tracker_participation_total` | Counter | Total number of successful participations by peer and duty type | `duty, peer` | +| `core_tracker_success_duties_total` | Counter | Total number of successful duties by type | `duty` | +| `core_tracker_unexpected_events_total` | Counter | Total number of unexpected events by peer | `peer` | +| `core_validatorapi_proxy_request_latency_seconds` | Histogram | The validatorapi proxy request latencies in seconds by path | `path` | +| `core_validatorapi_request_error_total` | Counter | The total number of validatorapi request errors | `endpoint, status_code` | +| `core_validatorapi_request_latency_seconds` | Histogram | The validatorapi request latencies in seconds by endpoint | `endpoint` | +| `core_validatorapi_request_total` | Counter | The total number of requests per content-type and endpoint | `endpoint, content_type` | +| `core_validatorapi_vc_user_agent` | Gauge | Gauge with label set to user agent string of requests made by VC | `user_agent` | +| `p2p_peer_connection_total` | Counter | Total number of libp2p connections per peer. | `peer` | +| `p2p_peer_connection_types` | Gauge | Current number of libp2p connections by peer, type (`direct` or `relay`), and protocol (`tcp`, `quic`). Note that peers may have multiple connections. | `peer, type, protocol` | +| `p2p_peer_network_receive_bytes_total` | Counter | Total number of network bytes received from the peer by protocol and transport. Transport is based on first active connection (accurate in steady state). | `peer, protocol, transport` | +| `p2p_peer_network_sent_bytes_total` | Counter | Total number of network bytes sent to the peer by protocol and transport. Transport is based on first active connection (accurate in steady state). | `peer, protocol, transport` | +| `p2p_peer_streams` | Gauge | Current number of libp2p streams by peer, direction (`inbound` or `outbound` or `unknown`), protocol and transport. | `peer, direction, protocol, transport` | +| `p2p_ping_error_total` | Counter | Total number of ping errors per peer | `peer` | +| `p2p_ping_latency_secs` | Histogram | Ping latencies in seconds per peer | `peer` | +| `p2p_ping_success` | Gauge | Whether the last ping was successful (1) or not (0). Can be used as proxy for connected peers | `peer` | +| `p2p_reachability_status` | Gauge | Current libp2p reachability status of this node as detected by autonat: unknown(0), public(1) or private(2). | | +| `p2p_relay_connection_types` | Gauge | Current number of libp2p connections by relay, type (`direct` or `relay`), and protocol (`tcp`, `quic`). Note that peers may have multiple connections. | `peer, type, protocol` | +| `p2p_relay_connections` | Gauge | Connected relays by name | `peer` | +| `relay_p2p_active_connections` | Gauge | Current number of active connections by peer and cluster | `peer, peer_cluster` | +| `relay_p2p_connection_total` | Counter | Total number of new connections by peer and cluster | `peer, peer_cluster` | +| `relay_p2p_network_receive_bytes_total` | Counter | Total number of network bytes received from the peer and cluster | `peer, peer_cluster` | +| `relay_p2p_network_sent_bytes_total` | Counter | Total number of network bytes sent to the peer and cluster | `peer, peer_cluster` | +| `relay_p2p_ping_latency` | Histogram | Ping latency by peer and cluster | `peer, peer_cluster` |