diff --git a/src/disco/events/gen_events.py b/src/disco/events/gen_events.py index 98fe93cb163..6ac602d7c02 100644 --- a/src/disco/events/gen_events.py +++ b/src/disco/events/gen_events.py @@ -70,6 +70,7 @@ class Field: description: str variants: Optional[Dict[str, Variant]] = None fields: Optional[Dict[str, "Field"]] = None + shared_name: Optional[str] = None @dataclass class Schema: @@ -78,13 +79,15 @@ class Schema: description: str fields: Dict[str, Field] -def parse_field(f: dict) -> Field: - return self._TO_PROTO.value[self.name] - -def parse_field(f: dict) -> Field: +def parse_field(f: dict, shared_types: Dict[str, dict]) -> Field: + if f["type"].startswith("ref:"): + field = parse_field(shared_types[f["type"][4:]], shared_types) + field.shared_name = f["type"][4:] + return field + fields = None if f["type"] == "Flatten": - fields = {k: parse_field(v) for k, v in f["fields"].items()} + fields = {k: parse_field(v, shared_types) for k, v in f["fields"].items()} return Field( chtype=ClickHouseType.from_str(f["type"]), @@ -93,62 +96,75 @@ def parse_field(f: dict) -> Field: fields=fields ) -def parse_schema(path: Path) -> Schema: +def parse_schema(path: Path, shared_types: Dict[str, dict]) -> Schema: data = json.loads(path.read_text()) - fields = {k: parse_field(v) for k, v in data["fields"].items()} + fields = {k: parse_field(v, shared_types) for k, v in data["fields"].items()} return Schema(data["name"], data["id"], data["description"], fields) -def collect_nested_messages(schema_name: str, fields: Dict[str, Field], prefix: str = "") -> List[tuple]: +def collect_nested_messages(fields: Dict[str, Field], prefix: str = "") -> List[tuple]: msgs = [] for name, f in fields.items(): if f.chtype == ClickHouseType.Flatten: - prefix = f"{prefix}{to_pascal_case(name)}" - msgs.append((f"{to_pascal_case(schema_name)}{prefix}", f.fields, f.description)) - msgs += collect_nested_messages(schema_name, f.fields, prefix) + new_prefix = f.shared_name or f"{prefix}{to_pascal_case(name)}" + msgs.append((new_prefix, f.fields, f.description)) + msgs += collect_nested_messages(f.fields, new_prefix) return msgs -def generate_message_fields(schema_name: str, fields: Dict[str, Field], prefix: str = "") -> List[str]: +def generate_message_fields(fields: Dict[str, Field], prefix: str = "") -> List[str]: lines = [] for i, (name, f) in enumerate(fields.items(), 1): if f.chtype == ClickHouseType.Flatten or f.variants: - proto_type = f"{to_pascal_case(schema_name)}{prefix}{to_pascal_case(name)}" + proto_type = f.shared_name or f"{prefix}{to_pascal_case(name)}" else: proto_type = f.chtype.to_protobuf_type() lines += [f" // {f.description}", f" {proto_type} {name} = {i};"] return lines -def generate_enums(schema_name: str, fields: Dict[str, Field], prefix: str = "") -> List[str]: +def generate_enums(fields: Dict[str, Field], prefix: str, generated: set) -> List[str]: lines = [] for name, f in fields.items(): if f.variants: - enum = f"{to_pascal_case(schema_name)}{prefix}{to_pascal_case(name)}" + enum = f.shared_name or f"{prefix}{to_pascal_case(name)}" + if enum in generated: + continue + + generated.add(enum) ep = to_screaming_snake_case(enum) lines += [f"// {f.description}", f"enum {enum} {{", f" {ep}_UNSPECIFIED = 0;"] for i, (vn, v) in enumerate(f.variants.items(), 1): lines.append(f" {ep}_{to_screaming_snake_case(vn)} = {i}; // {v.description}") lines += ["}", ""] if f.chtype == ClickHouseType.Flatten: - lines += generate_enums(schema_name, f.fields, f"{prefix}{to_pascal_case(name)}") + nested_prefix = f.shared_name or nested_prefix = f"{prefix}{to_pascal_case(name)}" + lines += generate_enums(f.fields, nested_prefix, generated) return lines def generate_protobuf(schemas: List[Schema]) -> str: lines = ['syntax = "proto3";', "", "package events.v1;", ""] + generated_enums = set() for s in schemas: - lines += generate_enums(s.name, s.fields) + schema_prefix = to_pascal_case(s.name) + lines += generate_enums(s.fields, schema_prefix, generated_enums) + generated_msgs = set() for s in schemas: - for msg, flds, desc in reversed(collect_nested_messages(s.name, s.fields)): - prefix = msg[len(to_pascal_case(s.name)):] - lines += [f"// {desc}", f"message {msg} {{"] + generate_message_fields(s.name, flds, prefix) + ["}", ""] + schema_prefix = to_pascal_case(s.name) + for msg, flds, desc in reversed(collect_nested_messages(s.fields, schema_prefix)): + if msg in generated_msgs: + continue + + generated_msgs.add(msg) + lines += [f"// {desc}", f"message {msg} {{"] + generate_message_fields(flds, msg) + ["}", ""] for s in schemas: - lines += [f"// {s.description}", f"message {to_pascal_case(s.name)} {{"] + generate_message_fields(s.name, s.fields) + ["}", ""] + schema_prefix = to_pascal_case(s.name) + lines += [f"// {s.description}", f"message {schema_prefix} {{"] + generate_message_fields(s.fields, schema_prefix) + ["}", ""] lines += ["// Combined event type", "message Event {", " oneof event {"] for s in schemas: @@ -171,7 +187,7 @@ def check_breaking_changes(schema_dir: Path) -> None: check=True ) - print("✓ No breaking changes detected") + print("No breaking changes detected") def main() -> None: parser = argparse.ArgumentParser(description="Generate protobuf from JSON schemas") @@ -181,10 +197,12 @@ def main() -> None: schema_dir = Path(__file__).parent / "schema" proto_path = schema_dir / "events.proto" - schemas = sorted([parse_schema(f) for f in schema_dir.glob("*.json")], key=lambda s: s.id) + shared_types = json.loads((schema_dir / "shared.json").read_text()) + schema_files = [f for f in schema_dir.glob("*.json") if f.name != "shared.json"] + schemas = sorted([parse_schema(f, shared_types) for f in schema_files], key=lambda s: s.id) proto_path.write_text(generate_protobuf(schemas)) - print(f"✓ Protobuf generated successfully for {len(schemas)} schemas") + print(f"Protobuf generated successfully from {len(schemas)} schemas") if not args.skip_check: check_breaking_changes(schema_dir) diff --git a/src/disco/events/schema/events.proto b/src/disco/events/schema/events.proto index 4bd9fd2687c..c487d782281 100644 --- a/src/disco/events/schema/events.proto +++ b/src/disco/events/schema/events.proto @@ -20,6 +20,1303 @@ enum ShredProtocol { SHRED_PROTOCOL_LEADER = 3; // Leader } +// Reason for taking this sample +enum MetricMetaSampleReason { + METRIC_META_SAMPLE_REASON_UNSPECIFIED = 0; + METRIC_META_SAMPLE_REASON_PERIODIC = 1; // Periodic sampling at regular intervals + METRIC_META_SAMPLE_REASON_LEADER = 2; // Sampled because this validator was leader in the slot +} + +// CPU time spent in each CPU regime +message MetricTileCpuDurationNanos { + // Wait (task was runnable but not scheduled) + uint64 cpu_duration_nanos_wait = 1; + // Idle (task was not runnable) + uint64 cpu_duration_nanos_idle = 2; + // User (task was scheduled and executing in user mode) + uint64 cpu_duration_nanos_user = 3; + // System (task was scheduled and executing in kernel mode) + uint64 cpu_duration_nanos_system = 4; +} + +// Mutually exclusive and exhaustive duration of time the tile spent in each of the regimes +message MetricTileRegimeDurationNanos { + // Caught up + Housekeeping + uint64 regime_duration_nanos_caught_up_housekeeping = 1; + // Processing + Housekeeping + uint64 regime_duration_nanos_processing_housekeeping = 2; + // Backpressure + Housekeeping + uint64 regime_duration_nanos_backpressure_housekeeping = 3; + // Caught up + Prefrag + uint64 regime_duration_nanos_caught_up_prefrag = 4; + // Processing + Prefrag + uint64 regime_duration_nanos_processing_prefrag = 5; + // Backpressure + Prefrag + uint64 regime_duration_nanos_backpressure_prefrag = 6; + // Caught up + Postfrag + uint64 regime_duration_nanos_caught_up_postfrag = 7; + // Processing + Postfrag + uint64 regime_duration_nanos_processing_postfrag = 8; +} + +// Common tile metrics shared by all tiles +message MetricTile { + // The process ID of the tile + uint64 pid = 1; + // The thread ID of the tile. Always the same as the Pid in production, but might be different in development + uint64 tid = 2; + // The number of involuntary context switches + uint64 context_switch_involuntary_count = 3; + // The number of voluntary context switches + uint64 context_switch_voluntary_count = 4; + // The number of major page faults + uint64 page_fault_major_count = 5; + // The number of minor page faults + uint64 page_fault_minor_count = 6; + // The current status of the tile. 0 is booting, 1 is running. 2 is shutdown + uint64 status = 7; + // The last UNIX timestamp in nanoseconds that the tile heartbeated + uint64 heartbeat = 8; + // Whether the tile is currently backpressured or not, either 1 or 0 + uint64 in_backpressure = 9; + // Number of times the tile has had to wait for one of more consumers to catch up to resume publishing + uint64 backpressure_count = 10; + // Mutually exclusive and exhaustive duration of time the tile spent in each of the regimes + MetricTileRegimeDurationNanos regime_duration_nanos = 11; + // CPU time spent in each CPU regime + MetricTileCpuDurationNanos cpu_duration_nanos = 12; +} + +// Metadata about this metrics sample +message MetricMeta { + // The kind_id of this tile instance within its type (e.g., 0, 1, 2 for multiple tiles of same type) + uint64 kind_id = 1; + // Unique identifier correlating samples taken at the same time across tiles + uint64 sample_id = 2; + // Reason for taking this sample + MetricMetaSampleReason sample_reason = 3; + // The slot number for which this sample was taken, if applicable + uint64 sample_slot = 4; +} + +// Number of QUIC packets that retransmitted +message MetricsQuicPktRetransmissions { + // Initial + uint64 pkt_retransmissions_initial = 1; + // Early data + uint64 pkt_retransmissions_early = 2; + // Handshake + uint64 pkt_retransmissions_handshake = 3; + // App data + uint64 pkt_retransmissions_app = 4; +} + +// Number of packets that failed decryption due to missing key +message MetricsQuicPktNoKey { + // Initial + uint64 pkt_no_key_initial = 1; + // Early data + uint64 pkt_no_key_early = 2; + // Handshake + uint64 pkt_no_key_handshake = 3; + // App data + uint64 pkt_no_key_app = 4; +} + +// Number of packets that failed decryption +message MetricsQuicPktCryptoFailed { + // Initial + uint64 pkt_crypto_failed_initial = 1; + // Early data + uint64 pkt_crypto_failed_early = 2; + // Handshake + uint64 pkt_crypto_failed_handshake = 3; + // App data + uint64 pkt_crypto_failed_app = 4; +} + +// ACK events +message MetricsQuicAckTx { + // Non-ACK-eliciting packet + uint64 ack_tx_noop = 1; + // New ACK range + uint64 ack_tx_new = 2; + // Merged into existing ACK range + uint64 ack_tx_merged = 3; + // Out of buffers + uint64 ack_tx_drop = 4; + // ACK suppressed by handler + uint64 ack_tx_cancel = 5; +} + +// Number of QUIC frames received +message MetricsQuicReceivedFrames { + // Unknown frame type + uint64 received_frames_unknown = 1; + // ACK frame + uint64 received_frames_ack = 2; + // RESET_STREAM frame + uint64 received_frames_reset_stream = 3; + // STOP_SENDING frame + uint64 received_frames_stop_sending = 4; + // CRYPTO frame + uint64 received_frames_crypto = 5; + // NEW_TOKEN frame + uint64 received_frames_new_token = 6; + // STREAM frame + uint64 received_frames_stream = 7; + // MAX_DATA frame + uint64 received_frames_max_data = 8; + // MAX_STREAM_DATA frame + uint64 received_frames_max_stream_data = 9; + // MAX_STREAMS frame + uint64 received_frames_max_streams = 10; + // DATA_BLOCKED frame + uint64 received_frames_data_blocked = 11; + // STREAM_DATA_BLOCKED frame + uint64 received_frames_stream_data_blocked = 12; + // STREAMS_BLOCKED(bidi) frame + uint64 received_frames_streams_blocked = 13; + // NEW_CONN_ID frame + uint64 received_frames_new_conn_id = 14; + // RETIRE_CONN_ID frame + uint64 received_frames_retire_conn_id = 15; + // PATH_CHALLENGE frame + uint64 received_frames_path_challenge = 16; + // PATH_RESPONSE frame + uint64 received_frames_path_response = 17; + // CONN_CLOSE(transport) frame + uint64 received_frames_conn_close_quic = 18; + // CONN_CLOSE(app) frame + uint64 received_frames_conn_close_app = 19; + // HANDSHAKE_DONE frame + uint64 received_frames_handshake_done = 20; + // PING frame + uint64 received_frames_ping = 21; + // PADDING frame + uint64 received_frames_padding = 22; +} + +// Number of Initial packets grouped by token length +message MetricsQuicInitialTokenLen { + // No token + uint64 initial_token_len_zero = 1; + // fd_quic retry token length + uint64 initial_token_len_fd_quic_len = 2; + // Invalid token length + uint64 initial_token_len_invalid_len = 3; +} + +// Results of attempts to acquire QUIC frame metadata +message MetricsQuicFrameTxAlloc { + // Success + uint64 frame_tx_alloc_success = 1; + // PktMetaPoolEmpty + uint64 frame_tx_alloc_fail_empty_pool = 2; + // ConnMaxedInflightFrames + uint64 frame_tx_alloc_fail_conn_max = 3; +} + +// Number of packets with an unknown connection ID +message MetricsQuicPktNoConn { + // Initial + uint64 pkt_no_conn_initial = 1; + // Retry + uint64 pkt_no_conn_retry = 2; + // Handshake + uint64 pkt_no_conn_handshake = 3; + // 1-RTT + uint64 pkt_no_conn_one_rtt = 4; +} + +// The number of QUIC connections in each state +message MetricsQuicConnectionsState { + // Freed + uint64 connections_state_invalid = 1; + // Handshaking peer + uint64 connections_state_handshake = 2; + // Handshake complete, confirming with peer + uint64 connections_state_handshake_complete = 3; + // Active connection + uint64 connections_state_active = 4; + // Peer requested close + uint64 connections_state_peer_close = 5; + // Connection terminating due to error + uint64 connections_state_abort = 6; + // Connection is closing + uint64 connections_state_close_pending = 7; + // Connection about to be freed + uint64 connections_state_dead = 8; +} + +// Count of txns received via TPU +message MetricsQuicTxnsReceived { + // TPU/UDP + uint64 txns_received_udp = 1; + // TPU/QUIC unfragmented + uint64 txns_received_quic_fast = 2; + // TPU/QUIC fragmented + uint64 txns_received_quic_frag = 3; +} + +// Number of gRPC errors encountered +message MetricsBundleErrors { + // Protobuf decode/encode error + uint64 errors_protobuf = 1; + // Transport error + uint64 errors_transport = 2; + // I/O timeout + uint64 errors_timeout = 3; + // Bundle dropped due to missing fee info + uint64 errors_no_fee_info = 4; + // OpenSSL alloc fail + uint64 errors_ssl_alloc = 5; +} + +// Count of transaction results through verify tile +message MetricsVerifyTransactionResult { + // Transaction verified successfully + uint64 transaction_result_success = 1; + // Peer transaction in the bundle failed + uint64 transaction_result_bundle_peer_failure = 2; + // Transaction failed to parse + uint64 transaction_result_parse_failure = 3; + // Transaction failed deduplication + uint64 transaction_result_dedup_failure = 4; + // Transaction failed signature verification + uint64 transaction_result_verify_failure = 5; +} + +// Count of transaction results through dedup tile +message MetricsDedupTransactionResult { + // Transaction deduplicated successfully + uint64 transaction_result_success = 1; + // Peer transaction in the bundle failed + uint64 transaction_result_bundle_peer_failure = 2; + // Transaction failed deduplication + uint64 transaction_result_dedup_failure = 3; +} + +// Result of considering whether bundle cranks are needed +message MetricsPackBundleCrankStatus { + // On-chain state in the correct state + uint64 bundle_crank_status_not_needed = 1; + // Inserted an initializer bundle to update the on-chain state + uint64 bundle_crank_status_inserted = 2; + // Tried to insert an initializer bundle to update the on-chain state, but creation failed + uint64 bundle_crank_status_creation_failed = 3; + // Tried to insert an initializer bundle to update the on-chain state, but insertion failed + uint64 bundle_crank_status_insertion_failed = 4; +} + +// Result of trying to consider a transaction for scheduling +message MetricsPackTransactionSchedule { + // Pack included the transaction in the microblock + uint64 transaction_schedule_taken = 1; + // Pack skipped the transaction because it would have exceeded the block CU limit + uint64 transaction_schedule_cu_limit = 2; + // Pack skipped the transaction because of account conflicts using the fast bitvector check + uint64 transaction_schedule_fast_path = 3; + // Pack skipped the transaction because it would have exceeded the block data size limit + uint64 transaction_schedule_byte_limit = 4; + // Pack skipped the transaction because it would have caused a writable account to exceed the per-account block write cost limit + uint64 transaction_schedule_write_cost = 5; + // Pack skipped the transaction because of account conflicts using the full slow check + uint64 transaction_schedule_slow_path = 6; + // Pack skipped the transaction it previously exceeded the per-account block write cost limit too many times + uint64 transaction_schedule_defer_skip = 7; +} + +// The total number of pending transactions in pack's pool that are available to be scheduled +message MetricsPackAvailableTransactions { + // All transactions in any treap + uint64 available_transactions_all = 1; + // Non-votes in the main treap + uint64 available_transactions_regular = 2; + // Simple votes + uint64 available_transactions_votes = 3; + // Non-votes that write to a hotly-contended account + uint64 available_transactions_conflicting = 4; + // Transactions that are part of a bundle + uint64 available_transactions_bundles = 5; +} + +// Time in nanos spent in each state +message MetricsPackMetricTiming { + // Pack had no transactions available, and wasn't leader + uint64 metric_timing_no_txn_no_bank_no_leader_no_microblock = 1; + // Pack had transactions available, but wasn't leader or had hit a limit + uint64 metric_timing_txn_no_bank_no_leader_no_microblock = 2; + // Pack had no transactions available, had banks but wasn't leader + uint64 metric_timing_no_txn_bank_no_leader_no_microblock = 3; + // Pack had transactions available, had banks but wasn't leader + uint64 metric_timing_txn_bank_no_leader_no_microblock = 4; + // Pack had no transactions available, and was leader but had no available banks + uint64 metric_timing_no_txn_no_bank_leader_no_microblock = 5; + // Pack had transactions available, was leader, but had no available banks + uint64 metric_timing_txn_no_bank_leader_no_microblock = 6; + // Pack had available banks but no transactions + uint64 metric_timing_no_txn_bank_leader_no_microblock = 7; + // Pack had banks and transactions available but couldn't schedule anything non-conflicting + uint64 metric_timing_txn_bank_leader_no_microblock = 8; + // Pack scheduled a non-empty microblock while not leader + uint64 metric_timing_no_txn_no_bank_no_leader_microblock = 9; + // Pack scheduled a non-empty microblock while not leader + uint64 metric_timing_txn_no_bank_no_leader_microblock = 10; + // Pack scheduled a non-empty microblock while not leader + uint64 metric_timing_no_txn_bank_no_leader_microblock = 11; + // Pack scheduled a non-empty microblock while not leader + uint64 metric_timing_txn_bank_no_leader_microblock = 12; + // Pack scheduled a non-empty microblock but all banks were busy + uint64 metric_timing_no_txn_no_bank_leader_microblock = 13; + // Pack scheduled a non-empty microblock but all banks were busy + uint64 metric_timing_txn_no_bank_leader_microblock = 14; + // Pack scheduled a non-empty microblock and now has no transactions + uint64 metric_timing_no_txn_bank_leader_microblock = 15; + // Pack scheduled a non-empty microblock + uint64 metric_timing_txn_bank_leader_microblock = 16; +} + +// Result of inserting a transaction into the pack object +message MetricsPackTransactionInserted { + // Bundle with two conflicting durable nonce transactions + uint64 transaction_inserted_nonce_conflict = 1; + // Transaction uses an account on the bundle blacklist + uint64 transaction_inserted_bundle_blacklist = 2; + // Transaction is an invalid durable nonce transaction + uint64 transaction_inserted_invalid_nonce = 3; + // Transaction tries to write to a sysvar + uint64 transaction_inserted_write_sysvar = 4; + // Estimating compute cost and/or fee failed + uint64 transaction_inserted_estimation_fail = 5; + // Transaction included an account address twice + uint64 transaction_inserted_duplicate_account = 6; + // Transaction tried to load too many accounts + uint64 transaction_inserted_too_many_accounts = 7; + // Transaction requests too many CUs + uint64 transaction_inserted_too_large = 8; + // Transaction already expired + uint64 transaction_inserted_expired = 9; + // Transaction loaded accounts from a lookup table + uint64 transaction_inserted_addr_lut = 10; + // Fee payer's balance below transaction fee + uint64 transaction_inserted_unaffordable = 11; + // Pack aware of transaction with same signature + uint64 transaction_inserted_duplicate = 12; + // Transaction's fee was too low given its compute unit requirement and another competing transactions that uses the same durable nonce + uint64 transaction_inserted_nonce_priority = 13; + // Transaction's fee was too low given its compute unit requirement and other competing transactions + uint64 transaction_inserted_priority = 14; + // Transaction that was not a simple vote added to pending transactions + uint64 transaction_inserted_nonvote_add = 15; + // Simple vote transaction was added to pending transactions + uint64 transaction_inserted_vote_add = 16; + // Transaction that was not a simple vote replaced a lower priority transaction + uint64 transaction_inserted_nonvote_replace = 17; + // Simple vote transaction replaced a lower priority transaction + uint64 transaction_inserted_vote_replace = 18; + // Durable nonce transaction added to pending transactions + uint64 transaction_inserted_nonce_nonvote_add = 19; + // Unused because durable nonce transactions can't be simple votes + uint64 transaction_inserted_unused = 20; + // Durable nonce transaction replaced a lower priority transaction, likely one that uses the same durable nonce + uint64 transaction_inserted_nonce_nonvote_replace = 21; +} + +// The result of processing a shred from the network +message MetricsShredShredProcessed { + // Shred was for a slot for which we don't know the leader + uint64 shred_processed_bad_slot = 1; + // Shred parsing failed + uint64 shred_processed_parse_failed = 2; + // Shred was invalid for one of many reasons + uint64 shred_processed_rejected = 3; + // Shred was ignored because we had already received or reconstructed it + uint64 shred_processed_ignored = 4; + // Shred accepted to an incomplete FEC set + uint64 shred_processed_okay = 5; + // Shred accepted and resulted in a valid, complete FEC set + uint64 shred_processed_completes = 6; +} + +// Total wire bytes of CRDS sent out in pull response messages +message MetricsGossipCrdsTxPullResponseBytes { + // Contact Info V1 + uint64 crds_tx_pull_response_bytes_contact_info_v1 = 1; + // Vote + uint64 crds_tx_pull_response_bytes_vote = 2; + // Lowest Slot + uint64 crds_tx_pull_response_bytes_lowest_slot = 3; + // Snapshot Hashes + uint64 crds_tx_pull_response_bytes_snapshot_hashes = 4; + // Accounts Hashes + uint64 crds_tx_pull_response_bytes_accounts_hashes = 5; + // Epoch Slots + uint64 crds_tx_pull_response_bytes_epoch_slots = 6; + // Version V1 + uint64 crds_tx_pull_response_bytes_version_v1 = 7; + // Version V2 + uint64 crds_tx_pull_response_bytes_version_v2 = 8; + // Node Instance + uint64 crds_tx_pull_response_bytes_node_instance = 9; + // Duplicate Shred + uint64 crds_tx_pull_response_bytes_duplicate_shred = 10; + // Incremental Snapshot Hashes + uint64 crds_tx_pull_response_bytes_incremental_snapshot_hashes = 11; + // Contact Info V2 + uint64 crds_tx_pull_response_bytes_contact_info_v2 = 12; + // Restart Last Voted Fork Slots + uint64 crds_tx_pull_response_bytes_restart_last_voted_fork_slots = 13; + // Restart Heaviest Fork + uint64 crds_tx_pull_response_bytes_restart_heaviest_fork = 14; +} + +// Number of CRDS values sent in pull response messages +message MetricsGossipCrdsTxPullResponseCount { + // Contact Info V1 + uint64 crds_tx_pull_response_count_contact_info_v1 = 1; + // Vote + uint64 crds_tx_pull_response_count_vote = 2; + // Lowest Slot + uint64 crds_tx_pull_response_count_lowest_slot = 3; + // Snapshot Hashes + uint64 crds_tx_pull_response_count_snapshot_hashes = 4; + // Accounts Hashes + uint64 crds_tx_pull_response_count_accounts_hashes = 5; + // Epoch Slots + uint64 crds_tx_pull_response_count_epoch_slots = 6; + // Version V1 + uint64 crds_tx_pull_response_count_version_v1 = 7; + // Version V2 + uint64 crds_tx_pull_response_count_version_v2 = 8; + // Node Instance + uint64 crds_tx_pull_response_count_node_instance = 9; + // Duplicate Shred + uint64 crds_tx_pull_response_count_duplicate_shred = 10; + // Incremental Snapshot Hashes + uint64 crds_tx_pull_response_count_incremental_snapshot_hashes = 11; + // Contact Info V2 + uint64 crds_tx_pull_response_count_contact_info_v2 = 12; + // Restart Last Voted Fork Slots + uint64 crds_tx_pull_response_count_restart_last_voted_fork_slots = 13; + // Restart Heaviest Fork + uint64 crds_tx_pull_response_count_restart_heaviest_fork = 14; +} + +// Total wire bytes of CRDS sent out in push messages +message MetricsGossipCrdsTxPushBytes { + // Contact Info V1 + uint64 crds_tx_push_bytes_contact_info_v1 = 1; + // Vote + uint64 crds_tx_push_bytes_vote = 2; + // Lowest Slot + uint64 crds_tx_push_bytes_lowest_slot = 3; + // Snapshot Hashes + uint64 crds_tx_push_bytes_snapshot_hashes = 4; + // Accounts Hashes + uint64 crds_tx_push_bytes_accounts_hashes = 5; + // Epoch Slots + uint64 crds_tx_push_bytes_epoch_slots = 6; + // Version V1 + uint64 crds_tx_push_bytes_version_v1 = 7; + // Version V2 + uint64 crds_tx_push_bytes_version_v2 = 8; + // Node Instance + uint64 crds_tx_push_bytes_node_instance = 9; + // Duplicate Shred + uint64 crds_tx_push_bytes_duplicate_shred = 10; + // Incremental Snapshot Hashes + uint64 crds_tx_push_bytes_incremental_snapshot_hashes = 11; + // Contact Info V2 + uint64 crds_tx_push_bytes_contact_info_v2 = 12; + // Restart Last Voted Fork Slots + uint64 crds_tx_push_bytes_restart_last_voted_fork_slots = 13; + // Restart Heaviest Fork + uint64 crds_tx_push_bytes_restart_heaviest_fork = 14; +} + +// Number of CRDS values sent in push messages +message MetricsGossipCrdsTxPushCount { + // Contact Info V1 + uint64 crds_tx_push_count_contact_info_v1 = 1; + // Vote + uint64 crds_tx_push_count_vote = 2; + // Lowest Slot + uint64 crds_tx_push_count_lowest_slot = 3; + // Snapshot Hashes + uint64 crds_tx_push_count_snapshot_hashes = 4; + // Accounts Hashes + uint64 crds_tx_push_count_accounts_hashes = 5; + // Epoch Slots + uint64 crds_tx_push_count_epoch_slots = 6; + // Version V1 + uint64 crds_tx_push_count_version_v1 = 7; + // Version V2 + uint64 crds_tx_push_count_version_v2 = 8; + // Node Instance + uint64 crds_tx_push_count_node_instance = 9; + // Duplicate Shred + uint64 crds_tx_push_count_duplicate_shred = 10; + // Incremental Snapshot Hashes + uint64 crds_tx_push_count_incremental_snapshot_hashes = 11; + // Contact Info V2 + uint64 crds_tx_push_count_contact_info_v2 = 12; + // Restart Last Voted Fork Slots + uint64 crds_tx_push_count_restart_last_voted_fork_slots = 13; + // Restart Heaviest Fork + uint64 crds_tx_push_count_restart_heaviest_fork = 14; +} + +// Total wire bytes sent in gossip messages +message MetricsGossipMessageTxBytes { + // Pull Request + uint64 message_tx_bytes_pull_request = 1; + // Pull Response + uint64 message_tx_bytes_pull_response = 2; + // Push + uint64 message_tx_bytes_push = 3; + // Prune + uint64 message_tx_bytes_prune = 4; + // Ping + uint64 message_tx_bytes_ping = 5; + // Pong + uint64 message_tx_bytes_pong = 6; +} + +// Number of gossip messages sent +message MetricsGossipMessageTxCount { + // Pull Request + uint64 message_tx_count_pull_request = 1; + // Pull Response + uint64 message_tx_count_pull_response = 2; + // Push + uint64 message_tx_count_push = 3; + // Prune + uint64 message_tx_count_prune = 4; + // Ping + uint64 message_tx_count_ping = 5; + // Pong + uint64 message_tx_count_pong = 6; +} + +// Outcome of incoming CRDS messages +message MetricsGossipCrdsRxCount { + // Pull Response (upserted) + uint64 crds_rx_count_upserted_pull_response = 1; + // Push (upserted) + uint64 crds_rx_count_upserted_push = 2; + // Pull Response (newer entry already present in table) + uint64 crds_rx_count_dropped_pull_response_stale = 3; + // Pull Response (outside expiry window and no contact info entry) + uint64 crds_rx_count_dropped_pull_response_wallclock = 4; + // Pull Response (duplicate) + uint64 crds_rx_count_dropped_pull_response_duplicate = 5; + // Push (newer entry already present in table) + uint64 crds_rx_count_dropped_push_stale = 6; + // Push (duplicate) + uint64 crds_rx_count_dropped_push_duplicate = 7; +} + +// The number of entries in the data store +message MetricsGossipCrdsCount { + // Contact Info V1 + uint64 crds_count_contact_info_v1 = 1; + // Vote + uint64 crds_count_vote = 2; + // Lowest Slot + uint64 crds_count_lowest_slot = 3; + // Snapshot Hashes + uint64 crds_count_snapshot_hashes = 4; + // Accounts Hashes + uint64 crds_count_accounts_hashes = 5; + // Epoch Slots + uint64 crds_count_epoch_slots = 6; + // Version V1 + uint64 crds_count_version_v1 = 7; + // Version V2 + uint64 crds_count_version_v2 = 8; + // Node Instance + uint64 crds_count_node_instance = 9; + // Duplicate Shred + uint64 crds_count_duplicate_shred = 10; + // Incremental Snapshot Hashes + uint64 crds_count_incremental_snapshot_hashes = 11; + // Contact Info V2 + uint64 crds_count_contact_info_v2 = 12; + // Restart Last Voted Fork Slots + uint64 crds_count_restart_last_voted_fork_slots = 13; + // Restart Heaviest Fork + uint64 crds_count_restart_heaviest_fork = 14; +} + +// The result of processing a pong response to a ping +message MetricsGossipPingTrackerPongResult { + // Pong ignored as node is staked + uint64 ping_tracker_pong_result_staked = 1; + // Pong ignored as node is an entrypoint + uint64 ping_tracker_pong_result_entrypoint = 2; + // Pong ignored as node has no outstanding ping + uint64 ping_tracker_pong_result_untracked = 3; + // Pong ignored as responding node doesn't match pinged address + uint64 ping_tracker_pong_result_address = 4; + // Pong ignored as token was not valid + uint64 ping_tracker_pong_result_token = 5; + // Pong accepted + uint64 ping_tracker_pong_result_success = 6; +} + +// The number of peers being tracked for ping/pong +message MetricsGossipPingTrackerCount { + // Peer is known but has not been pinged yet + uint64 ping_tracker_count_unpinged = 1; + // Peer was pinged but has not yet responded, or responded with an invalid pong + uint64 ping_tracker_count_invalid = 2; + // Peer was pinged and is currently valid + uint64 ping_tracker_count_valid = 3; + // Peer was pinged and is currently valid, and is being refreshed with a new ping + uint64 ping_tracker_count_valid_refreshing = 4; +} + +// Number of IPv4 routes +message MetricsNetlnkRouteCount { + // Local + uint64 route_count_local = 1; + // Main + uint64 route_count_main = 2; +} + +// Number of netlink live updates processed +message MetricsNetlnkUpdates { + // Link + uint64 updates_link = 1; + // Neighbor Table Entry + uint64 updates_neigh = 2; + // IPv4 Route Table Entry + uint64 updates_ipv4_route = 3; +} + +// Number of sendmmsg syscalls dispatched +message MetricsSockSyscallsSendmmsg { + // No error + uint64 syscalls_sendmmsg_no_error = 1; + // ENOBUFS, EAGAIN error + uint64 syscalls_sendmmsg_slow = 2; + // EPERM error (blocked by netfilter) + uint64 syscalls_sendmmsg_perm = 3; + // ENETUNREACH, EHOSTUNREACH error + uint64 syscalls_sendmmsg_unreach = 4; + // ENONET, ENETDOWN, EHOSTDOWN error + uint64 syscalls_sendmmsg_down = 5; + // Unrecognized error code + uint64 syscalls_sendmmsg_other = 6; +} + +// What types of client messages are we sending +message MetricsRepairSentPktTypes { + // Need Window + uint64 sent_pkt_types_needed_window = 1; + // Need Highest Window + uint64 sent_pkt_types_needed_highest_window = 2; + // Need Orphans + uint64 sent_pkt_types_needed_orphan = 3; + // Pong + uint64 sent_pkt_types_pong = 4; +} + +// Total count of ACK frames transmitted +message MetricsSendAckTx { + // Non-ACK-eliciting packet + uint64 ack_tx_noop = 1; + // New ACK range + uint64 ack_tx_new = 2; + // Merged into existing ACK range + uint64 ack_tx_merged = 3; + // Out of buffers + uint64 ack_tx_drop = 4; + // ACK suppressed by handler + uint64 ack_tx_cancel = 5; +} + +// Results of attempts to acquire QUIC frame metadata +message MetricsSendFrameTxAlloc { + // Success + uint64 frame_tx_alloc_success = 1; + // PktMetaPoolEmpty + uint64 frame_tx_alloc_fail_empty_pool = 2; + // ConnMaxedInflightFrames + uint64 frame_tx_alloc_fail_conn_max = 3; +} + +// Total count of QUIC frames received +message MetricsSendReceivedFrames { + // Unknown frame type + uint64 received_frames_unknown = 1; + // ACK frame + uint64 received_frames_ack = 2; + // RESET_STREAM frame + uint64 received_frames_reset_stream = 3; + // STOP_SENDING frame + uint64 received_frames_stop_sending = 4; + // CRYPTO frame + uint64 received_frames_crypto = 5; + // NEW_TOKEN frame + uint64 received_frames_new_token = 6; + // STREAM frame + uint64 received_frames_stream = 7; + // MAX_DATA frame + uint64 received_frames_max_data = 8; + // MAX_STREAM_DATA frame + uint64 received_frames_max_stream_data = 9; + // MAX_STREAMS frame + uint64 received_frames_max_streams = 10; + // DATA_BLOCKED frame + uint64 received_frames_data_blocked = 11; + // STREAM_DATA_BLOCKED frame + uint64 received_frames_stream_data_blocked = 12; + // STREAMS_BLOCKED(bidi) frame + uint64 received_frames_streams_blocked = 13; + // NEW_CONN_ID frame + uint64 received_frames_new_conn_id = 14; + // RETIRE_CONN_ID frame + uint64 received_frames_retire_conn_id = 15; + // PATH_CHALLENGE frame + uint64 received_frames_path_challenge = 16; + // PATH_RESPONSE frame + uint64 received_frames_path_response = 17; + // CONN_CLOSE(transport) frame + uint64 received_frames_conn_close_quic = 18; + // CONN_CLOSE(app) frame + uint64 received_frames_conn_close_app = 19; + // HANDSHAKE_DONE frame + uint64 received_frames_handshake_done = 20; + // PING frame + uint64 received_frames_ping = 21; + // PADDING frame + uint64 received_frames_padding = 22; +} + +// Total count of QUIC packet retransmissions +message MetricsSendPktRetransmissions { + // Initial + uint64 pkt_retransmissions_initial = 1; + // Early data + uint64 pkt_retransmissions_early = 2; + // Handshake + uint64 pkt_retransmissions_handshake = 3; + // App data + uint64 pkt_retransmissions_app = 4; +} + +// Total count of packets with no connection +message MetricsSendPktNoConn { + // Initial + uint64 pkt_no_conn_initial = 1; + // Retry + uint64 pkt_no_conn_retry = 2; + // Handshake + uint64 pkt_no_conn_handshake = 3; + // 1-RTT + uint64 pkt_no_conn_one_rtt = 4; +} + +// Total count of packets with no key +message MetricsSendPktNoKey { + // Initial + uint64 pkt_no_key_initial = 1; + // Early data + uint64 pkt_no_key_early = 2; + // Handshake + uint64 pkt_no_key_handshake = 3; + // App data + uint64 pkt_no_key_app = 4; +} + +// Total count of packets with crypto failures +message MetricsSendPktCryptoFailed { + // Initial + uint64 pkt_crypto_failed_initial = 1; + // Early data + uint64 pkt_crypto_failed_early = 2; + // Handshake + uint64 pkt_crypto_failed_handshake = 3; + // App data + uint64 pkt_crypto_failed_app = 4; +} + +// Number of QUIC connections in each state +message MetricsSendConnectionsState { + // Freed + uint64 connections_state_invalid = 1; + // Handshaking peer + uint64 connections_state_handshake = 2; + // Handshake complete, confirming with peer + uint64 connections_state_handshake_complete = 3; + // Active connection + uint64 connections_state_active = 4; + // Peer requested close + uint64 connections_state_peer_close = 5; + // Connection terminating due to error + uint64 connections_state_abort = 6; + // Connection is closing + uint64 connections_state_close_pending = 7; + // Connection about to be freed + uint64 connections_state_dead = 8; +} + +// Total number of times QUIC connection closed +message MetricsSendQuicConnFinal { + // QUIC Vote port + uint64 quic_conn_final_quic_vote = 1; + // QUIC TPU port + uint64 quic_conn_final_quic_tpu = 2; +} + +// Total number of times we completed a handshake +message MetricsSendHandshakeComplete { + // QUIC Vote port + uint64 handshake_complete_quic_vote = 1; + // QUIC TPU port + uint64 handshake_complete_quic_tpu = 2; +} + +// Total count of results from trying to ensure a connection for a leader for QUIC TPU port +message MetricsSendEnsureConnResultQuicTpu { + // No QUIC connection + uint64 ensure_conn_result_quic_tpu_no_leader = 1; + // No contact info + uint64 ensure_conn_result_quic_tpu_no_ci = 2; + // Initiated connection + uint64 ensure_conn_result_quic_tpu_new_connection = 3; + // Connection failed + uint64 ensure_conn_result_quic_tpu_conn_failed = 4; + // Connection exists + uint64 ensure_conn_result_quic_tpu_connected = 5; + // Connection cooldown + uint64 ensure_conn_result_quic_tpu_cooldown = 6; +} + +// Total count of results from trying to ensure a connection for a leader for QUIC Vote port +message MetricsSendEnsureConnResultQuicVote { + // No QUIC connection + uint64 ensure_conn_result_quic_vote_no_leader = 1; + // No contact info + uint64 ensure_conn_result_quic_vote_no_ci = 2; + // Initiated connection + uint64 ensure_conn_result_quic_vote_new_connection = 3; + // Connection failed + uint64 ensure_conn_result_quic_vote_conn_failed = 4; + // Connection exists + uint64 ensure_conn_result_quic_vote_connected = 5; + // Connection cooldown + uint64 ensure_conn_result_quic_vote_cooldown = 6; +} + +// Total count of results from trying to send via UDP TPU port +message MetricsSendSendResultUdpTpu { + // Success + uint64 send_result_udp_tpu_success = 1; + // No contact info + uint64 send_result_udp_tpu_no_ci = 2; + // No QUIC connection + uint64 send_result_udp_tpu_no_conn = 3; + // No QUIC stream + uint64 send_result_udp_tpu_no_stream = 4; +} + +// Total count of results from trying to send via UDP Vote port +message MetricsSendSendResultUdpVote { + // Success + uint64 send_result_udp_vote_success = 1; + // No contact info + uint64 send_result_udp_vote_no_ci = 2; + // No QUIC connection + uint64 send_result_udp_vote_no_conn = 3; + // No QUIC stream + uint64 send_result_udp_vote_no_stream = 4; +} + +// Total count of results from trying to send via QUIC TPU port +message MetricsSendSendResultQuicTpu { + // Success + uint64 send_result_quic_tpu_success = 1; + // No contact info + uint64 send_result_quic_tpu_no_ci = 2; + // No QUIC connection + uint64 send_result_quic_tpu_no_conn = 3; + // No QUIC stream + uint64 send_result_quic_tpu_no_stream = 4; +} + +// Total count of results from trying to send via QUIC Vote port +message MetricsSendSendResultQuicVote { + // Success + uint64 send_result_quic_vote_success = 1; + // No contact info + uint64 send_result_quic_vote_no_ci = 2; + // No QUIC connection + uint64 send_result_quic_vote_no_conn = 3; + // No QUIC stream + uint64 send_result_quic_vote_no_stream = 4; +} + +// Total number of contact infos received and handled for UDP TPU port +message MetricsSendNewContactInfoUdpTpu { + // Skipped (unroutable) + uint64 new_contact_info_udp_tpu_unroutable = 1; + // Initialized + uint64 new_contact_info_udp_tpu_initialized = 2; + // Contact info changed + uint64 new_contact_info_udp_tpu_changed = 3; + // Contact info unchanged + uint64 new_contact_info_udp_tpu_no_change = 4; +} + +// Total number of contact infos received and handled for UDP Vote port +message MetricsSendNewContactInfoUdpVote { + // Skipped (unroutable) + uint64 new_contact_info_udp_vote_unroutable = 1; + // Initialized + uint64 new_contact_info_udp_vote_initialized = 2; + // Contact info changed + uint64 new_contact_info_udp_vote_changed = 3; + // Contact info unchanged + uint64 new_contact_info_udp_vote_no_change = 4; +} + +// Total number of contact infos received and handled for QUIC TPU port +message MetricsSendNewContactInfoQuicTpu { + // Skipped (unroutable) + uint64 new_contact_info_quic_tpu_unroutable = 1; + // Initialized + uint64 new_contact_info_quic_tpu_initialized = 2; + // Contact info changed + uint64 new_contact_info_quic_tpu_changed = 3; + // Contact info unchanged + uint64 new_contact_info_quic_tpu_no_change = 4; +} + +// Total number of contact infos received and handled for QUIC Vote port +message MetricsSendNewContactInfoQuicVote { + // Skipped (unroutable) + uint64 new_contact_info_quic_vote_unroutable = 1; + // Initialized + uint64 new_contact_info_quic_vote_initialized = 2; + // Contact info changed + uint64 new_contact_info_quic_vote_changed = 3; + // Contact info unchanged + uint64 new_contact_info_quic_vote_no_change = 4; +} + +// Total wire bytes of CRDS values processed +message MetricsGossvfCrdsRxBytes { + // Pull Response (success) + uint64 crds_rx_bytes_success_pull_response = 1; + // Push (success) + uint64 crds_rx_bytes_success_push = 2; + // Pull Response (duplicate) + uint64 crds_rx_bytes_dropped_pull_response_duplicate = 3; + // Pull Response (signature) + uint64 crds_rx_bytes_dropped_pull_response_signature = 4; + // Pull Response (origin no contact info) + uint64 crds_rx_bytes_dropped_pull_response_origin_no_contact_info = 5; + // Pull Response (origin shred version) + uint64 crds_rx_bytes_dropped_pull_response_origin_shred_version = 6; + // Pull Response (inactive) + uint64 crds_rx_bytes_dropped_pull_response_inactive = 7; + // Push (signature) + uint64 crds_rx_bytes_dropped_push_signature = 8; + // Push (origin no contact info) + uint64 crds_rx_bytes_dropped_push_origin_no_contact_info = 9; + // Push (origin shred version) + uint64 crds_rx_bytes_dropped_push_origin_shred_version = 10; + // Push (inactive) + uint64 crds_rx_bytes_dropped_push_inactive = 11; + // Push (wallclock) + uint64 crds_rx_bytes_dropped_push_wallclock = 12; +} + +// Number of CRDS values processed +message MetricsGossvfCrdsRxCount { + // Pull Response (success) + uint64 crds_rx_count_success_pull_response = 1; + // Push (success) + uint64 crds_rx_count_success_push = 2; + // Pull Response (duplicate) + uint64 crds_rx_count_dropped_pull_response_duplicate = 3; + // Pull Response (signature) + uint64 crds_rx_count_dropped_pull_response_signature = 4; + // Pull Response (origin no contact info) + uint64 crds_rx_count_dropped_pull_response_origin_no_contact_info = 5; + // Pull Response (origin shred version) + uint64 crds_rx_count_dropped_pull_response_origin_shred_version = 6; + // Pull Response (inactive) + uint64 crds_rx_count_dropped_pull_response_inactive = 7; + // Push (signature) + uint64 crds_rx_count_dropped_push_signature = 8; + // Push (origin no contact info) + uint64 crds_rx_count_dropped_push_origin_no_contact_info = 9; + // Push (origin shred version) + uint64 crds_rx_count_dropped_push_origin_shred_version = 10; + // Push (inactive) + uint64 crds_rx_count_dropped_push_inactive = 11; + // Push (wallclock) + uint64 crds_rx_count_dropped_push_wallclock = 12; +} + +// Total wire bytes of gossip messages processed +message MetricsGossvfMessageRxBytes { + // Pull Request (success) + uint64 message_rx_bytes_success_pull_request = 1; + // Pull Response (success) + uint64 message_rx_bytes_success_pull_response = 2; + // Push (success) + uint64 message_rx_bytes_success_push = 3; + // Prune (success) + uint64 message_rx_bytes_success_prune = 4; + // Ping (success) + uint64 message_rx_bytes_success_ping = 5; + // Pong (success) + uint64 message_rx_bytes_success_pong = 6; + // Unparseable + uint64 message_rx_bytes_dropped_unparseable = 7; + // Pull Request (not contact info) + uint64 message_rx_bytes_dropped_pull_request_not_contact_info = 8; + // Pull Request (loopback) + uint64 message_rx_bytes_dropped_pull_request_loopback = 9; + // Pull Request (inactive) + uint64 message_rx_bytes_dropped_pull_request_inactive = 10; + // Pull Request (wallclock) + uint64 message_rx_bytes_dropped_pull_request_wallclock = 11; + // Pull Request (signature) + uint64 message_rx_bytes_dropped_pull_request_signature = 12; + // Pull Request (shred version) + uint64 message_rx_bytes_dropped_pull_request_shred_version = 13; + // Prune (destination) + uint64 message_rx_bytes_dropped_prune_destination = 14; + // Prune (wallclock) + uint64 message_rx_bytes_dropped_prune_wallclock = 15; + // Prune (signature) + uint64 message_rx_bytes_dropped_prune_signature = 16; + // Push (no valid crds) + uint64 message_rx_bytes_dropped_push_no_valid_crds = 17; + // Pull Response (no valid crds) + uint64 message_rx_bytes_dropped_pull_response_no_valid_crds = 18; + // Ping (signature) + uint64 message_rx_bytes_dropped_ping_signature = 19; + // Pong (signature) + uint64 message_rx_bytes_dropped_pong_signature = 20; +} + +// Number of gossip messages processed +message MetricsGossvfMessageRxCount { + // Pull Request (success) + uint64 message_rx_count_success_pull_request = 1; + // Pull Response (success) + uint64 message_rx_count_success_pull_response = 2; + // Push (success) + uint64 message_rx_count_success_push = 3; + // Prune (success) + uint64 message_rx_count_success_prune = 4; + // Ping (success) + uint64 message_rx_count_success_ping = 5; + // Pong (success) + uint64 message_rx_count_success_pong = 6; + // Unparseable + uint64 message_rx_count_dropped_unparseable = 7; + // Pull Request (not contact info) + uint64 message_rx_count_dropped_pull_request_not_contact_info = 8; + // Pull Request (loopback) + uint64 message_rx_count_dropped_pull_request_loopback = 9; + // Pull Request (inactive) + uint64 message_rx_count_dropped_pull_request_inactive = 10; + // Pull Request (wallclock) + uint64 message_rx_count_dropped_pull_request_wallclock = 11; + // Pull Request (signature) + uint64 message_rx_count_dropped_pull_request_signature = 12; + // Pull Request (shred version) + uint64 message_rx_count_dropped_pull_request_shred_version = 13; + // Prune (destination) + uint64 message_rx_count_dropped_prune_destination = 14; + // Prune (wallclock) + uint64 message_rx_count_dropped_prune_wallclock = 15; + // Prune (signature) + uint64 message_rx_count_dropped_prune_signature = 16; + // Push (no valid crds) + uint64 message_rx_count_dropped_push_no_valid_crds = 17; + // Pull Response (no valid crds) + uint64 message_rx_count_dropped_pull_response_no_valid_crds = 18; + // Ping (signature) + uint64 message_rx_count_dropped_ping_signature = 19; + // Pong (signature) + uint64 message_rx_count_dropped_pong_signature = 20; +} + +// Whether a transaction landed in the block or not +message MetricsBankfTransactionLanded { + // Transaction landed + uint64 transaction_landed_landed_success = 1; + // Transaction landed, but was fees only and did not execute + uint64 transaction_landed_landed_fees_only = 2; + // Transaction landed, but failed to execute + uint64 transaction_landed_landed_failed = 3; + // Transaction did not land + uint64 transaction_landed_unlanded = 4; +} + +// Result of loading and executing a transaction +message MetricsBankfTransactionResult { + // Transaction executed successfully + uint64 transaction_result_success = 1; + // An error occurred while processing an instruction + uint64 transaction_result_instructon_error = 2; + // The transaction fee payer address was not found + uint64 transaction_result_account_not_found = 3; + // A program account referenced by the transaction was not found + uint64 transaction_result_program_account_not_found = 4; + // The transaction fee payer did not have balance to pay the fee + uint64 transaction_result_insufficient_funds_for_fee = 5; + // The transaction fee payer account is not owned by the system program, or has data that is not a nonce + uint64 transaction_result_invalid_account_for_fee = 6; + // The transaction has already been processed in a recent block + uint64 transaction_result_already_processed = 7; + // The transaction references a blockhash that is not recent, or advances a nonce with the wrong value + uint64 transaction_result_blockhash_not_found = 8; + // A program account referenced by the transaction was no executable. TODO: No longer needed with SIMD-0162 + uint64 transaction_result_invalid_program_for_execution = 9; + // The transaction references an ALUT account that does not exist or is inactive + uint64 transaction_result_address_lookup_table_not_found = 10; + // The transaction references an ALUT account that is not owned by the ALUT program account + uint64 transaction_result_invalid_address_lookup_table_owner = 11; + // The transaction references an ALUT account that contains data which is not a valid ALUT + uint64 transaction_result_invalid_address_lookup_table_data = 12; + // The transaction references an account offset from the ALUT which does not exist + uint64 transaction_result_invalid_address_lookup_table_index = 13; + // The total account data size of the loaded accounts exceeds the consensus limit + uint64 transaction_result_max_loaded_accounts_data_size_exceeded = 14; + // A compute budget program instruction was invoked more than once + uint64 transaction_result_duplicate_instruction = 15; + // The compute budget program was invoked and set the loaded accounts data size to zero + uint64 transaction_result_invalid_loaded_accounts_data_size_limit = 16; + // The transaction references a nonce account that is already advanced + uint64 transaction_result_nonce_already_advanced = 17; + // The transaction is a nonce transaction but the advance instruction was not valid or failed + uint64 transaction_result_nonce_advance_failed = 18; + // The transaction is a nonce transaction but the blockhash is not the correct one + uint64 transaction_result_nonce_wrong_blockhash = 19; + // The transaction conflicts with another transaction in the microblock. TODO: No longer possible with smart dispatcher + uint64 transaction_result_account_in_use = 20; + // The transaction references the same account twice + uint64 transaction_result_account_loaded_twice = 21; + // The transaction had an invalid signature + uint64 transaction_result_signature_failure = 22; + // The transaction references too many accounts. TODO: No longer possible with smart dispatcher + uint64 transaction_result_too_many_account_locks = 23; + // The transaction would leave an account with a lower balance than the rent-exempt minimum + uint64 transaction_result_insufficient_funds_for_rent = 24; + // The total referenced account lamports before and after the transaction was unbalanced + uint64 transaction_result_unbalanced_transaction = 25; + // The transaction was part of a bundle and an earlier transaction in the bundle failed + uint64 transaction_result_bundle_peer = 26; +} + +// Count of address lookup tables resolved +message MetricsResolfLutResolved { + // The transaction referenced an index in a LUT that didn't exist + uint64 lut_resolved_invalid_lookup_index = 1; + // The account referenced as a LUT hasn't been initialized + uint64 lut_resolved_account_uninitialized = 2; + // The account referenced as a LUT couldn't be parsed + uint64 lut_resolved_invalid_account_data = 3; + // The account referenced as a LUT wasn't owned by the ALUT program ID + uint64 lut_resolved_invalid_account_owner = 4; + // The account referenced as a LUT couldn't be found + uint64 lut_resolved_account_not_found = 5; + // Resolved successfully + uint64 lut_resolved_success = 6; +} + +// Count of operations that happened on the transaction stash +message MetricsResolfStashOperation { + // A transaction with an unknown blockhash was added to the stash + uint64 stash_operation_inserted = 1; + // A transaction with an unknown blockhash was dropped because the stash was full + uint64 stash_operation_overrun = 2; + // A transaction with an unknown blockhash was published as the blockhash became known + uint64 stash_operation_published = 3; + // A transaction with an unknown blockhash was removed from the stash without publishing, due to a bad LUT resolved failure, or no bank. These errors are double counted with the respective metrics for those categories + uint64 stash_operation_removed = 4; +} + +// Transaction account change event counters +message MetricsExecTxnAccountChanges { + // Account did not exist before and still does not + uint64 txn_account_changes_unchanged_nonexist = 1; + // Account created + uint64 txn_account_changes_created = 2; + // Account deleted + uint64 txn_account_changes_delete = 3; + // Account modified + uint64 txn_account_changes_modify = 4; + // Account unchanged + uint64 txn_account_changes_unchanged = 5; +} + +// Mutually exclusive and exhaustive duration of time spent in virtual machine execution regimes +message MetricsExecVmRegime { + // VM setup + uint64 vm_regime_setup = 1; + // VM commit + uint64 vm_regime_commit = 2; + // VM setup (CPI) + uint64 vm_regime_setup_cpi = 3; + // VM commit (CPI) + uint64 vm_regime_commit_cpi = 4; + // VM interpreter execution + uint64 vm_regime_interpreter = 5; +} + +// Mutually exclusive and exhaustive duration of time spent in transaction execution regimes +message MetricsExecTxnRegime { + // Transaction setup + uint64 txn_regime_setup = 1; + // Transaction execution (includes VM setup/execution) + uint64 txn_regime_exec = 2; + // Transaction result commit + uint64 txn_regime_commit = 3; +} + +// Number of blocks written to bstream +message MetricsAccdbBlocks { + // Record + uint64 blocks_pair = 1; + // Record deletion + uint64 blocks_dead = 2; + // Partition/divider + uint64 blocks_part = 3; +} + +// Number of requests processed +message MetricsAccdbRequests { + // Acquire record + uint64 requests_acquire = 1; + // Release record + uint64 requests_release = 2; + // Erase record + uint64 requests_erase = 3; +} + +// Current bstream sequence number +message MetricsAccdbBstreamSeq { + // Blocks between ancient and past have been written and forgotten (no read, no write) + uint64 bstream_seq_ancient = 1; + // Blocks between past and present have been written (read only) + uint64 bstream_seq_past = 2; + // Blocks between present and future are being written (write only) + uint64 bstream_seq_present = 3; + // Blocks between future and ancient have not been written (no read, no write) + uint64 bstream_seq_future = 4; +} + // The validator received a verified, deduplicated transaction message Txn { // The source IP address of the transaction sender @@ -52,10 +1349,1076 @@ message Shred { bytes payload = 4; } +// Metrics snapshot for the net tile +message MetricsNet { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Packet receive count + uint64 rx_pkt_cnt = 3; + // Total number of bytes received (including Ethernet header) + uint64 rx_bytes_total = 4; + // Number of incoming packets dropped due to being too small + uint64 rx_undersz_cnt = 5; + // Number of incoming packets dropped due to fill ring being full + uint64 rx_fill_blocked_cnt = 6; + // Number of incoming packets dropped due to backpressure + uint64 rx_backpressure_cnt = 7; + // Number of receive buffers currently busy + uint64 rx_busy_cnt = 8; + // Number of receive buffers currently idle + uint64 rx_idle_cnt = 9; + // Number of packet transmit jobs submitted + uint64 tx_submit_cnt = 10; + // Number of packet transmit jobs marked as completed by the kernel + uint64 tx_complete_cnt = 11; + // Total number of bytes transmitted (including Ethernet header) + uint64 tx_bytes_total = 12; + // Number of packet transmit jobs dropped due to route failure + uint64 tx_route_fail_cnt = 13; + // Number of packet transmit jobs dropped due to unresolved neighbor + uint64 tx_neighbor_fail_cnt = 14; + // Number of packet transmit jobs dropped due to XDP TX ring full or missing completions + uint64 tx_full_fail_cnt = 15; + // Number of transmit buffers currently busy + uint64 tx_busy_cnt = 16; + // Number of transmit buffers currently idle + uint64 tx_idle_cnt = 17; + // Number of XSK sendto syscalls dispatched + uint64 xsk_tx_wakeup_cnt = 18; + // Number of XSK recvmsg syscalls dispatched + uint64 xsk_rx_wakeup_cnt = 19; + // Dropped for other reasons (xdp_statistics_v0.rx_dropped) + uint64 xdp_rx_dropped_other = 20; + // Dropped due to invalid descriptor (xdp_statistics_v0.rx_invalid_descs) + uint64 xdp_rx_invalid_descs = 21; + // Dropped due to invalid descriptor (xdp_statistics_v0.tx_invalid_descs) + uint64 xdp_tx_invalid_descs = 22; + // Dropped due to rx ring being full (xdp_statistics_v1.rx_ring_full) + uint64 xdp_rx_ring_full = 23; + // Failed to retrieve item from fill ring (xdp_statistics_v1.rx_fill_ring_empty_descs) + uint64 xdp_rx_fill_ring_empty_descs = 24; + // Failed to retrieve item from tx ring (xdp_statistics_v1.tx_ring_empty_descs) + uint64 xdp_tx_ring_empty_descs = 25; + // Number of valid GRE packets received + uint64 rx_gre_cnt = 26; + // Number of invalid GRE packets received + uint64 rx_gre_invalid_cnt = 27; + // Number of received but ignored GRE packets + uint64 rx_gre_ignored_cnt = 28; + // Number of GRE packet transmit jobs submitted + uint64 tx_gre_cnt = 29; + // Number of GRE packets transmit jobs dropped due to route failure + uint64 tx_gre_route_fail_cnt = 30; +} + +// Metrics snapshot for the quic tile +message MetricsQuic { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Count of txns overrun before reassembled (too small txn_reassembly_count) + uint64 txns_overrun = 3; + // Count of fragmented txn receive ops started + uint64 txn_reasms_started = 4; + // Number of fragmented txn receive ops currently active + uint64 txn_reasms_active = 5; + // Count of txn frags received + uint64 frags_ok = 6; + // Count of txn frags dropped due to data gap + uint64 frags_gap = 7; + // Count of txn frags dropped due to dup (stream already completed) + uint64 frags_dup = 8; + // Count of txns received via TPU + MetricsQuicTxnsReceived txns_received = 9; + // Count of txns abandoned because a conn was lost + uint64 txns_abandoned = 10; + // Count of txns received via QUIC dropped because they were too small + uint64 txn_undersz = 11; + // Count of txns received via QUIC dropped because they were too large + uint64 txn_oversz = 12; + // Count of packets received on the non-QUIC port that were too small to be a valid IP packet + uint64 legacy_txn_undersz = 13; + // Count of packets received on the non-QUIC port that were too large to be a valid transaction + uint64 legacy_txn_oversz = 14; + // Number of IP packets received + uint64 received_packets = 15; + // Total bytes received (including IP, UDP, QUIC headers) + uint64 received_bytes = 16; + // Number of IP packets sent + uint64 sent_packets = 17; + // Total bytes sent (including IP, UDP, QUIC headers) + uint64 sent_bytes = 18; + // The number of currently allocated QUIC connections + uint64 connections_alloc = 19; + // The number of QUIC connections in each state + MetricsQuicConnectionsState connections_state = 20; + // The total number of connections that have been created + uint64 connections_created = 21; + // Number of connections gracefully closed + uint64 connections_closed = 22; + // Number of connections aborted + uint64 connections_aborted = 23; + // Number of connections timed out + uint64 connections_timed_out = 24; + // Number of connections established with retry + uint64 connections_retried = 25; + // Number of connections that failed to create due to lack of slots + uint64 connection_error_no_slots = 26; + // Number of connections that failed during retry (e.g. invalid token) + uint64 connection_error_retry_fail = 27; + // Number of packets with an unknown connection ID + MetricsQuicPktNoConn pkt_no_conn = 28; + // Results of attempts to acquire QUIC frame metadata + MetricsQuicFrameTxAlloc frame_tx_alloc = 29; + // Number of Initial packets grouped by token length + MetricsQuicInitialTokenLen initial_token_len = 30; + // Number of handshake flows created + uint64 handshakes_created = 31; + // Number of handshakes dropped due to alloc fail + uint64 handshake_error_alloc_fail = 32; + // Number of handshakes dropped due to eviction + uint64 handshake_evicted = 33; + // Number of stream RX events + uint64 stream_received_events = 34; + // Total stream payload bytes received + uint64 stream_received_bytes = 35; + // Number of QUIC frames received + MetricsQuicReceivedFrames received_frames = 36; + // ACK events + MetricsQuicAckTx ack_tx = 37; + // Number of QUIC frames failed to parse + uint64 frame_fail_parse = 38; + // Number of packets that failed decryption + MetricsQuicPktCryptoFailed pkt_crypto_failed = 39; + // Number of packets that failed decryption due to missing key + MetricsQuicPktNoKey pkt_no_key = 40; + // Number of packets dropped due to weird IP or UDP header + uint64 pkt_net_header_invalid = 41; + // Number of packets dropped due to weird QUIC header + uint64 pkt_quic_header_invalid = 42; + // Number of QUIC packets dropped due to being too small + uint64 pkt_undersz = 43; + // Number of QUIC packets dropped due to being too large + uint64 pkt_oversz = 44; + // Number of QUIC version negotiation packets received + uint64 pkt_verneg = 45; + // Number of QUIC Retry packets sent + uint64 retry_sent = 46; + // Number of QUIC packets that retransmitted + MetricsQuicPktRetransmissions pkt_retransmissions = 47; +} + +// Metrics snapshot for the bundle tile +message MetricsBundle { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Total count of transactions received, including transactions within bundles + uint64 transaction_received = 3; + // Total count of packets received + uint64 packet_received = 4; + // Total count of bytes from received grpc protobuf payloads + uint64 proto_received_bytes = 5; + // Total count of bundles received + uint64 bundle_received = 6; + // Number of gRPC errors encountered + MetricsBundleErrors errors = 7; + // Workspace heap size + uint64 heap_size = 8; + // Approx free space in workspace + uint64 heap_free_bytes = 9; + // Number of ShredStream heartbeats successfully sent + uint64 shredstream_heartbeats = 10; + // Number of HTTP/2 PINGs acknowledged by server + uint64 keepalives = 11; + // 1 if connected to the bundle server, 0 if not + uint64 connected = 12; + // Latest RTT sample at scrape time (nanoseconds) + uint64 rtt_sample = 13; + // RTT moving average (nanoseconds) + uint64 rtt_smoothed = 14; + // RTT variance (nanoseconds) + uint64 rtt_var = 15; +} + +// Metrics snapshot for the verify tile +message MetricsVerify { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Count of transaction results through verify tile + MetricsVerifyTransactionResult transaction_result = 3; + // Count of simple vote transactions received over gossip instead of via the normal TPU path + uint64 gossiped_votes_received = 4; +} + +// Metrics snapshot for the dedup tile +message MetricsDedup { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Count of transaction results through dedup tile + MetricsDedupTransactionResult transaction_result = 3; + // Count of simple vote transactions received over gossip instead of via the normal TPU path + uint64 gossiped_votes_received = 4; +} + +// Metrics snapshot for the pack tile +message MetricsPack { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Count of transactions received via the normal TPU path + uint64 normal_transaction_received = 3; + // Result of inserting a transaction into the pack object + MetricsPackTransactionInserted transaction_inserted = 4; + // Time in nanos spent in each state + MetricsPackMetricTiming metric_timing = 5; + // Transactions dropped from the extra transaction storage because it was full + uint64 transaction_dropped_from_extra = 6; + // Transactions inserted into the extra transaction storage because pack's primary storage was full + uint64 transaction_inserted_to_extra = 7; + // Transactions pulled from the extra transaction storage and inserted into pack's primary storage + uint64 transaction_inserted_from_extra = 8; + // Transactions deleted from pack because their TTL expired + uint64 transaction_expired = 9; + // Transactions dropped from pack because they were requested to be deleted + uint64 transaction_deleted = 10; + // Transactions dropped from pack because they were already executed (in either the replay or leader pipeline) + uint64 transaction_already_executed = 11; + // Transactions dropped from pack because they were part of a partial bundle + uint64 transaction_dropped_partial_bundle = 12; + // The total number of pending transactions in pack's pool that are available to be scheduled + MetricsPackAvailableTransactions available_transactions = 13; + // The maximum number of pending transactions that pack can consider. This value is fixed at Firedancer startup but is a useful reference for AvailableTransactions + uint64 pending_transactions_heap_size = 14; + // A lower bound on the smallest non-vote transaction (in cost units) that is immediately available for scheduling + uint64 smallest_pending_transaction = 15; + // The number of times pack did not pack a microblock because the limit on microblocks/block had been reached + uint64 microblock_per_block_limit = 16; + // The number of times pack did not pack a microblock because it reached the data per block limit at the start of trying to schedule a microblock + uint64 data_per_block_limit = 17; + // Result of trying to consider a transaction for scheduling + MetricsPackTransactionSchedule transaction_schedule = 18; + // Result of considering whether bundle cranks are needed + MetricsPackBundleCrankStatus bundle_crank_status = 19; + // The number of cost units consumed in the current block, or 0 if pack is not currently packing a block + uint64 cus_consumed_in_block = 20; + // Count of attempts to delete a transaction that wasn't found + uint64 delete_missed = 21; + // Count of attempts to delete a transaction that was found and deleted + uint64 delete_hit = 22; +} + +// Metrics snapshot for the poh tile +message MetricsPoh { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; +} + +// Metrics snapshot for the shred tile +message MetricsShred { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // The number of microblocks that were abandoned because we switched slots without finishing the current slot + uint64 microblocks_abandoned = 3; + // The number of times a block was created with unknown parent block_id + uint64 invalid_block_id = 4; + // The result of processing a shred from the network + MetricsShredShredProcessed shred_processed = 5; + // The number of FEC sets that were spilled because they didn't complete in time and we needed space + uint64 fec_set_spilled = 6; + // The number of shreds that were rejected before any resources were allocated for the FEC set + uint64 shred_rejected_initial = 7; + // The number of shreds that were rejected because they're not chained merkle shreds + uint64 shred_rejected_unchained = 8; + // The number of FEC sets that were rejected for reasons that cause the whole FEC set to become invalid + uint64 fec_rejected_fatal = 9; + // The number of times we received a FEC force complete message + uint64 force_complete_request = 10; + // The number of times we failed to force complete a FEC set on request + uint64 force_complete_failure = 11; + // The number of times we successfully forced completed a FEC set on request + uint64 force_complete_success = 12; + // The number of times we received a repair shred + uint64 shred_repair_rcv = 13; + // The number bytes received from network packets with repair shreds. Bytes include network headers + uint64 shred_repair_rcv_bytes = 14; + // The number of times we received a turbine shred + uint64 shred_turbine_rcv = 15; + // The number bytes received from network packets with turbine shreds. Bytes include network headers + uint64 shred_turbine_rcv_bytes = 16; +} + +// Metrics snapshot for the sign tile +message MetricsSign { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; +} + +// Metrics snapshot for the metric tile +message MetricsMetric { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Timestamp when validator was started (nanoseconds since epoch) + uint64 boot_timestamp_nanos = 3; + // The number of active http connections to the Prometheus endpoint + uint64 connection_count = 4; + // The total number of bytes written to all responses on the Prometheus endpoint + uint64 bytes_written = 5; + // The total number of bytes read from all requests to the Prometheus endpoint + uint64 bytes_read = 6; +} + +// Metrics snapshot for the diag tile +message MetricsDiag { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; +} + +// Metrics snapshot for the gui tile +message MetricsGui { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // The number of active http connections to the GUI service, excluding connections that have been upgraded to a WebSocket connection + uint64 connection_count = 3; + // The number of active websocket connections to the GUI service + uint64 websocket_connection_count = 4; + // The total number of websocket frames sent to all connections to the GUI service + uint64 websocket_frames_sent = 5; + // The total number of websocket frames received from all connections to the GUI service + uint64 websocket_frames_received = 6; + // The total number of bytes written to all connections to the GUI service + uint64 bytes_written = 7; + // The total number of bytes read from all connections to the GUI service + uint64 bytes_read = 8; +} + +// Metrics snapshot for the replay tile +message MetricsReplay { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // The slot at which our node has most recently rooted + uint64 root_slot = 3; + // The distance in slots between our current root and the current reset slot + uint64 root_distance = 4; + // The slot at which we are currently leader, or 0 if none + uint64 leader_slot = 5; + // The slot at which we are next leader, or 0 if none. If we are currently leader, this is the same as the current leader slot + uint64 next_leader_slot = 6; + // The slot at which we last reset the replay stage, or 0 if unknown + uint64 reset_slot = 7; + // The maximum number of banks we can have alive + uint64 max_live_banks = 8; + // The number of banks we currently have alive + uint64 live_banks = 9; + // The number of free FEC sets in the reassembly queue + uint64 reasm_free = 10; + // Slot of the latest FEC set in the reassembly queue that can be replayed + uint64 reasm_latest_slot = 11; + // FEC set index of the latest FEC set in the reassembly queue that can be replayed + uint64 reasm_latest_fec_idx = 12; + // Count of slots replayed successfully + uint64 slots_total = 13; + // Count of transactions processed overall on the current fork + uint64 transactions_total = 14; + // Times where sched is full and a FEC set can't be processed + uint64 sched_full = 15; + // Times where reasm is empty and a FEC set can't be processed + uint64 reasm_empty = 16; + // Times where replay is blocked by the PoH tile not sending an end of leader message + uint64 leader_bid_wait = 17; + // Times where banks are full and a FEC set can't be processed + uint64 banks_full = 18; + // Times where the storage root is behind the consensus root and can't be advanced + uint64 storage_root_behind = 19; + // Number of program cache entries rooted + uint64 progcache_rooted = 20; + // Number of program cache entries garbage collected while rooting + uint64 progcache_gc_root = 21; + // Number of account database records created + uint64 accdb_created = 22; + // Number of account database records reverted + uint64 accdb_reverted = 23; + // Number of account database entries rooted + uint64 accdb_rooted = 24; + // Number of account database entries garbage collected + uint64 accdb_gc_root = 25; + // Number of account database entries reclaimed (deletion rooted) + uint64 accdb_reclaimed = 26; +} + +// Metrics snapshot for the gossip tile +message MetricsGossip { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // The peer tracking capacity of the ping tracker + uint64 ping_tracker_capacity = 3; + // The number of peers being tracked for ping/pong + MetricsGossipPingTrackerCount ping_tracker_count = 4; + // The result of processing a pong response to a ping + MetricsGossipPingTrackerPongResult ping_tracker_pong_result = 5; + // The number of peers evicted from the ping tracker to make space for new peers + uint64 ping_tracker_evicted_count = 6; + // The total number of peers ever tracked for ping/pong + uint64 ping_tracked_count = 7; + // The number of times a tracked peer was removed from tracking as it became staked + uint64 ping_tracker_stake_changed_count = 8; + // The number of times a tracked peer was removed from tracking as it's gossip address changed + uint64 ping_tracker_address_changed_count = 9; + // The capacity of the data store + uint64 crds_capacity = 10; + // The number of entries in the data store + MetricsGossipCrdsCount crds_count = 11; + // The number of entries expired from the data store due to age + uint64 crds_expired_count = 12; + // The number of entries evicted from the data store to make space for new entries + uint64 crds_evicted_count = 13; + // The capacity for storing peers in the data store + uint64 crds_peer_capacity = 14; + // The number of staked peers in the data store + uint64 crds_peer_staked_count = 15; + // The number of unstaked peers in the data store + uint64 crds_peer_unstaked_count = 16; + // The total visible stake in the data store, in lamports + uint64 crds_peer_total_stake = 17; + // The number of peers evicted from the data store to make space for new peers + uint64 crds_peer_evicted_count = 18; + // The capacity of the list of purged data store entries + uint64 crds_purged_capacity = 19; + // The number of purged data store entries + uint64 crds_purged_count = 20; + // The number of purged entries expired from the purged list due to age + uint64 crds_purged_expired_count = 21; + // The number of purged entries evicted from the data store to make space for new entries + uint64 crds_purged_evicted_count = 22; + // The number of unrecognized socket tags seen in Contact Infos + uint64 contact_info_unrecognized_socket_tags = 23; + // The number of IPv6 addresses seen in Contact Infos + uint64 contact_info_ipv6 = 24; + // Outcome of incoming CRDS messages + MetricsGossipCrdsRxCount crds_rx_count = 25; + // Number of gossip messages sent + MetricsGossipMessageTxCount message_tx_count = 26; + // Total wire bytes sent in gossip messages + MetricsGossipMessageTxBytes message_tx_bytes = 27; + // Number of CRDS values sent in push messages + MetricsGossipCrdsTxPushCount crds_tx_push_count = 28; + // Total wire bytes of CRDS sent out in push messages + MetricsGossipCrdsTxPushBytes crds_tx_push_bytes = 29; + // Number of CRDS values sent in pull response messages + MetricsGossipCrdsTxPullResponseCount crds_tx_pull_response_count = 30; + // Total wire bytes of CRDS sent out in pull response messages + MetricsGossipCrdsTxPullResponseBytes crds_tx_pull_response_bytes = 31; +} + +// Metrics snapshot for the netlnk tile +message MetricsNetlnk { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Number of netlink drop events caught + uint64 drop_events = 3; + // Number of full link table syncs done + uint64 link_full_syncs = 4; + // Number of full route table syncs done + uint64 route_full_syncs = 5; + // Number of netlink live updates processed + MetricsNetlnkUpdates updates = 6; + // Number of network interfaces + uint64 interface_count = 7; + // Number of IPv4 routes + MetricsNetlnkRouteCount route_count = 8; + // Number of neighbor solicit requests sent to kernel + uint64 neigh_probe_sent = 9; + // Number of neighbor solicit requests that failed to send (kernel too slow) + uint64 neigh_probe_fails = 10; + // Number of neighbor solicit that exceeded the per-host rate limit + uint64 neigh_probe_rate_limit_host = 11; + // Number of neighbor solicit that exceeded the global rate limit + uint64 neigh_probe_rate_limit_global = 12; +} + +// Metrics snapshot for the sock tile +message MetricsSock { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Number of sendmmsg syscalls dispatched + MetricsSockSyscallsSendmmsg syscalls_sendmmsg = 3; + // Number of recvmsg syscalls dispatched + uint64 syscalls_recvmmsg = 4; + // Number of packets received + uint64 rx_pkt_cnt = 5; + // Number of packets sent + uint64 tx_pkt_cnt = 6; + // Number of packets failed to send + uint64 tx_drop_cnt = 7; + // Total number of bytes transmitted (including Ethernet header) + uint64 tx_bytes_total = 8; + // Total number of bytes received (including Ethernet header) + uint64 rx_bytes_total = 9; +} + +// Metrics snapshot for the repair tile +message MetricsRepair { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // How many network packets we have sent, including reqs, pings, pongs, etc + uint64 total_pkt_count = 3; + // What types of client messages are we sending + MetricsRepairSentPktTypes sent_pkt_types = 4; + // Until which slots have we fully repaired + uint64 repaired_slots = 5; + // Our view of the current cluster slot, max slot received + uint64 current_slot = 6; + // How many peers have we requested + uint64 request_peers = 7; + // How many times no sign tiles were available to send request + uint64 sign_tile_unavail = 8; + // How many times we pass eager repair threshold + uint64 eager_repair_aggresses = 9; + // How many times we re-request a shred from the inflights queue + uint64 rerequest_queue = 10; + // How many times we received a malformed ping + uint64 malformed_ping = 11; +} + +// Metrics snapshot for the send tile +message MetricsSend { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Total number of times we received contact info for an unstaked node + uint64 unstaked_ci = 3; + // Total number of times we removed contact info + uint64 ci_removed = 4; + // Total number of contact infos received and handled for QUIC Vote port + MetricsSendNewContactInfoQuicVote new_contact_info_quic_vote = 5; + // Total number of contact infos received and handled for QUIC TPU port + MetricsSendNewContactInfoQuicTpu new_contact_info_quic_tpu = 6; + // Total number of contact infos received and handled for UDP Vote port + MetricsSendNewContactInfoUdpVote new_contact_info_udp_vote = 7; + // Total number of contact infos received and handled for UDP TPU port + MetricsSendNewContactInfoUdpTpu new_contact_info_udp_tpu = 8; + // Total count of results from trying to send via QUIC Vote port + MetricsSendSendResultQuicVote send_result_quic_vote = 9; + // Total count of results from trying to send via QUIC TPU port + MetricsSendSendResultQuicTpu send_result_quic_tpu = 10; + // Total count of results from trying to send via UDP Vote port + MetricsSendSendResultUdpVote send_result_udp_vote = 11; + // Total count of results from trying to send via UDP TPU port + MetricsSendSendResultUdpTpu send_result_udp_tpu = 12; + // Total count of results from trying to ensure a connection for a leader for QUIC Vote port + MetricsSendEnsureConnResultQuicVote ensure_conn_result_quic_vote = 13; + // Total count of results from trying to ensure a connection for a leader for QUIC TPU port + MetricsSendEnsureConnResultQuicTpu ensure_conn_result_quic_tpu = 14; + // Total number of times we completed a handshake + MetricsSendHandshakeComplete handshake_complete = 15; + // Total number of times QUIC connection closed + MetricsSendQuicConnFinal quic_conn_final = 16; + // Total count of QUIC packets received + uint64 received_packets = 17; + // Total bytes received via QUIC + uint64 received_bytes = 18; + // Total count of QUIC packets sent + uint64 sent_packets = 19; + // Total bytes sent via QUIC + uint64 sent_bytes = 20; + // Total count of QUIC retry packets sent + uint64 retry_sent = 21; + // Number of currently allocated QUIC connections + uint64 connections_alloc = 22; + // Number of QUIC connections in each state + MetricsSendConnectionsState connections_state = 23; + // Total count of QUIC connections created + uint64 connections_created = 24; + // Total count of QUIC connections closed + uint64 connections_closed = 25; + // Total count of QUIC connections aborted + uint64 connections_aborted = 26; + // Total count of QUIC connections timed out + uint64 connections_timed_out = 27; + // Total count of QUIC connections retried + uint64 connections_retried = 28; + // Total count of connection errors due to no slots + uint64 connection_error_no_slots = 29; + // Total count of connection retry failures + uint64 connection_error_retry_fail = 30; + // Total count of packets with crypto failures + MetricsSendPktCryptoFailed pkt_crypto_failed = 31; + // Total count of packets with no key + MetricsSendPktNoKey pkt_no_key = 32; + // Total count of packets with no connection + MetricsSendPktNoConn pkt_no_conn = 33; + // Total count of packet TX allocation failures + uint64 pkt_tx_alloc_fail = 34; + // Total count of packets with invalid network headers + uint64 pkt_net_header_invalid = 35; + // Total count of packets with invalid QUIC headers + uint64 pkt_quic_header_invalid = 36; + // Total count of undersized packets + uint64 pkt_undersz = 37; + // Total count of oversized packets + uint64 pkt_oversz = 38; + // Total count of version negotiation packets + uint64 pkt_verneg = 39; + // Total count of QUIC packet retransmissions + MetricsSendPktRetransmissions pkt_retransmissions = 40; + // Total count of QUIC handshakes created + uint64 handshakes_created = 41; + // Total count of handshake allocation failures + uint64 handshake_error_alloc_fail = 42; + // Total count of handshakes evicted + uint64 handshake_evicted = 43; + // Total count of stream events received + uint64 stream_received_events = 44; + // Total bytes received via streams + uint64 stream_received_bytes = 45; + // Total count of QUIC frames received + MetricsSendReceivedFrames received_frames = 46; + // Total count of frame parse failures + uint64 frame_fail_parse = 47; + // Results of attempts to acquire QUIC frame metadata + MetricsSendFrameTxAlloc frame_tx_alloc = 48; + // Total count of ACK frames transmitted + MetricsSendAckTx ack_tx = 49; +} + +// Metrics snapshot for the snapct tile +message MetricsSnapct { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // State of the tile + uint64 state = 3; + // Number of times we aborted and retried full snapshot download because the peer was too slow + uint64 full_num_retries = 4; + // Number of times we aborted and retried incremental snapshot download because the peer was too slow + uint64 incremental_num_retries = 5; + // Number of bytes read so far from the full snapshot. Might decrease if snapshot load is aborted and restarted + uint64 full_bytes_read = 6; + // Number of bytes written so far from the full snapshot. Might decrease if snapshot load is aborted and restarted + uint64 full_bytes_written = 7; + // Total size of the full snapshot file. Might change if snapshot load is aborted and restarted + uint64 full_bytes_total = 8; + // Number of times we retried the full snapshot download because the peer was too slow + uint64 full_download_retries = 9; + // Number of bytes read so far from the incremental snapshot. Might decrease if snapshot load is aborted and restarted + uint64 incremental_bytes_read = 10; + // Number of bytes written so far from the incremental snapshot. Might decrease if snapshot load is aborted and restarted + uint64 incremental_bytes_written = 11; + // Total size of the incremental snapshot file. Might change if snapshot load is aborted and restarted + uint64 incremental_bytes_total = 12; + // Number of times we retried the incremental snapshot download because the peer was too slow + uint64 incremental_download_retries = 13; + // The predicted slot from which replay starts after snapshot loading finishes. Might change if snapshot load is aborted and restarted + uint64 predicted_slot = 14; + // Number of fresh gossip peers seen when collecting gossip peers. + uint64 gossip_fresh_count = 15; + // Number of total gossip peers seen when collecting gossip peers. + uint64 gossip_total_count = 16; + // Number of SSL allocation errors encountered. + uint64 ssl_alloc_errors = 17; +} + +// Metrics snapshot for the snapld tile +message MetricsSnapld { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN + uint64 state = 3; + // Number of SSL allocation errors encountered. + uint64 ssl_alloc_errors = 4; +} + +// Metrics snapshot for the snapdc tile +message MetricsSnapdc { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN + uint64 state = 3; + // Number of bytes read so far from the compressed full snapshot file. Might decrease if snapshot load is aborted and restarted + uint64 full_compressed_bytes_read = 4; + // Number of bytes decompressed so far from the full snapshot. Might decrease if snapshot load is aborted and restarted + uint64 full_decompressed_bytes_written = 5; + // Number of bytes read so far from the compressed incremental snapshot file. Might decrease if snapshot load is aborted and restarted + uint64 incremental_compressed_bytes_read = 6; + // Number of bytes decompressed so far from the incremental snapshot. Might decrease if snapshot load is aborted and restarted + uint64 incremental_decompressed_bytes_written = 7; +} + +// Metrics snapshot for the snapin tile +message MetricsSnapin { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN + uint64 state = 3; + // Number of bytes read so far from the full snapshot. Might decrease if snapshot load is aborted and restarted + uint64 full_bytes_read = 4; + // Number of bytes read so far from the incremental snapshot. Might decrease if snapshot load is aborted and restarted + uint64 incremental_bytes_read = 5; + // Number of accounts seen during snapshot loading. Includes duplicates. Resets if snapshot load restarts + uint64 accounts_loaded = 6; + // Number of previously inserted accounts replaced by a later duplicate. Resets if snapshot load restarts + uint64 accounts_replaced = 7; + // Number of stale duplicate accounts dropped because a previously inserted account was newer. Resets if snapshot load restarts + uint64 accounts_ignored = 8; +} + +// Metrics snapshot for the ipecho tile +message MetricsIpecho { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // The current shred version used by the validator + uint64 current_shred_version = 3; + // The number of active connections to the ipecho service + uint64 connection_count = 4; + // The number of connections to the ipecho service that have been made and closed normally + uint64 connections_closed_ok = 5; + // The number of connections to the ipecho service that have been made and closed abnormally + uint64 connections_closed_error = 6; + // The total number of bytes read from all connections to the ipecho service + uint64 bytes_read = 7; + // The total number of bytes written to all connections to the ipecho service + uint64 bytes_written = 8; +} + +// Metrics snapshot for the gossvf tile +message MetricsGossvf { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Number of gossip messages processed + MetricsGossvfMessageRxCount message_rx_count = 3; + // Total wire bytes of gossip messages processed + MetricsGossvfMessageRxBytes message_rx_bytes = 4; + // Number of CRDS values processed + MetricsGossvfCrdsRxCount crds_rx_count = 5; + // Total wire bytes of CRDS values processed + MetricsGossvfCrdsRxBytes crds_rx_bytes = 6; +} + +// Metrics snapshot for the bankf tile +message MetricsBankf { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Result of loading and executing a transaction + MetricsBankfTransactionResult transaction_result = 3; + // Whether a transaction landed in the block or not + MetricsBankfTransactionLanded transaction_landed = 4; +} + +// Metrics snapshot for the resolf tile +message MetricsResolf { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Count of transactions dropped because the bank was not available + uint64 no_bank_drop = 3; + // Count of operations that happened on the transaction stash + MetricsResolfStashOperation stash_operation = 4; + // Count of address lookup tables resolved + MetricsResolfLutResolved lut_resolved = 5; + // Count of transactions that failed to resolve because the blockhash was expired + uint64 blockhash_expired = 6; + // Count of transactions that failed to resolve because a peer transaction in the bundle failed + uint64 transaction_bundle_peer_failure = 7; +} + +// Metrics snapshot for the exec tile +message MetricsExec { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Number of program cache misses + uint64 progcache_misses = 3; + // Number of program cache hits + uint64 progcache_hits = 4; + // Number of program cache insertions + uint64 progcache_fills = 5; + // Total number of bytes inserted into program cache + uint64 progcache_fill_tot_sz = 6; + // Number of program cache load fails (tombstones inserted) + uint64 progcache_fill_fails = 7; + // Number of time two tiles raced to insert the same cache entry + uint64 progcache_dup_inserts = 8; + // Number of program cache invalidations + uint64 progcache_invalidations = 9; + // Number of account database records created + uint64 accdb_created = 10; + // Mutually exclusive and exhaustive duration of time spent in transaction execution regimes + MetricsExecTxnRegime txn_regime = 11; + // Mutually exclusive and exhaustive duration of time spent in virtual machine execution regimes + MetricsExecVmRegime vm_regime = 12; + // Transaction account change event counters + MetricsExecTxnAccountChanges txn_account_changes = 13; +} + +// Metrics snapshot for the snapwr tile +message MetricsSnapwr { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // State of the tile. 0=IDLE, 1=PROCESSING, 4=SHUTDOWN + uint64 state = 3; + // Number of bytes written so far to the vinyl snapshot file. Might decrease if snapshot creation is aborted and restarted + uint64 vinyl_bytes_written = 4; +} + +// Metrics snapshot for the snapwh tile +message MetricsSnapwh { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // State of the tile. 0=IDLE, 1=PROCESSING, 4=SHUTDOWN + uint64 state = 3; +} + +// Metrics snapshot for the snapla tile +message MetricsSnapla { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN + uint64 state = 3; + // Number of accounts hashed for the full snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted + uint64 full_accounts_hashed = 4; + // Number of accounts hashed for the incremental snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted + uint64 incremental_accounts_hashed = 5; +} + +// Metrics snapshot for the snapls tile +message MetricsSnapls { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN + uint64 state = 3; + // Number of accounts hashed for the full snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted + uint64 full_accounts_hashed = 4; + // Number of accounts hashed for the incremental snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted + uint64 incremental_accounts_hashed = 5; +} + +// Metrics snapshot for the tower tile +message MetricsTower { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Number of times we dropped a vote txn because it was invalid (malformed, bad signature, etc.) + uint64 vote_txn_invalid = 3; + // Number of times we ignored all or part of a vote txn because we didn't recognize a slot (eg. our replay was behind) + uint64 vote_txn_ignored = 4; + // Number of times a vote txn mismatched our own block id + uint64 vote_txn_mismatch = 5; + // Rollback to an ancestor of our prev vote (can't vote) + uint64 ancestor_rollback = 6; + // Duplicate sibling got confirmed (can't vote) + uint64 sibling_confirmed = 7; + // Same fork as prev vote (can vote) + uint64 same_fork = 8; + // Prev vote was on a different fork, but we are allowed to switch (can vote) + uint64 switch_pass = 9; + // Prev vote was on a different fork, and we are not allowed to switch (can't vote) + uint64 switch_fail = 10; + // Locked out (can't vote) + uint64 lockout_fail = 11; + // Did not pass threshold check (can't vote) + uint64 threshold_fail = 12; + // Prev leader block did not propagate (can't vote) + uint64 propagated_fail = 13; + // Current Tower root slot + uint64 root_slot = 14; + // Current Tower vote slot + uint64 vote_slot = 15; + // Current Tower reset slot + uint64 reset_slot = 16; + // Number of times we ignored a slot likely due to minority fork publish + uint64 slot_ignored = 17; + // Number of hard forks we've seen (block ids with multiple candidate bank hashes) + uint64 hard_forks_seen = 18; + // Number of hard forks (candidate bank hashes) we've pruned + uint64 hard_forks_pruned = 19; + // Currently active hard forks + uint64 hard_forks_active = 20; + // Max number of candidate bank hashes for a given block id + uint64 hard_forks_max_width = 21; +} + +// Metrics snapshot for the accdb tile +message MetricsAccdb { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // Current bstream sequence number + MetricsAccdbBstreamSeq bstream_seq = 3; + // Number of request batches processed + uint64 request_batches = 4; + // Number of requests processed + MetricsAccdbRequests requests = 5; + // Number of blocks written to bstream + MetricsAccdbBlocks blocks = 6; + // + uint64 garbage_bytes = 7; + // Total number of record bytes that were garbage collected + uint64 cum_gc_bytes = 8; + // Total number of cache hits + uint64 cache_hits = 9; +} + +// Metrics snapshot for the snapwm tile +message MetricsSnapwm { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN + uint64 state = 3; + // Number of accounts seen during snapshot loading. Includes duplicates. Resets if snapshot load restarts + uint64 accounts_loaded = 4; + // Number of previously inserted accounts replaced by a later duplicate. Resets if snapshot load restarts + uint64 accounts_replaced = 5; + // Number of stale duplicate accounts dropped because a previously inserted account was newer. Resets if snapshot load restarts + uint64 accounts_ignored = 6; + // Current number of accounts in index. Resets if snapshot load restarts + uint64 accounts_active = 7; +} + +// Metrics snapshot for the snaplh tile +message MetricsSnaplh { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN + uint64 state = 3; + // Number of accounts hashed for the full snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted + uint64 full_accounts_hashed = 4; + // Number of accounts hashed for the incremental snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted + uint64 incremental_accounts_hashed = 5; +} + +// Metrics snapshot for the snaplv tile +message MetricsSnaplv { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; + // State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN + uint64 state = 3; + // Number of duplicate accounts hashed for the full snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted + uint64 full_duplicate_accounts_hashed = 4; + // Number of duplicate accounts hashed for the incremental snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted + uint64 incremental_duplicate_accounts_hashed = 5; +} + +// Metrics snapshot for the genesi tile +message MetricsGenesi { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; +} + +// Metrics snapshot for the rpc tile +message MetricsRpc { + // Metadata about this metrics sample + MetricMeta meta = 1; + // Common tile metrics shared by all tiles + MetricTile tile = 2; +} + // Combined event type message Event { oneof event { Txn txn = 1; Shred shred = 2; + MetricsNet metrics_net = 1000; + MetricsQuic metrics_quic = 1001; + MetricsBundle metrics_bundle = 1002; + MetricsVerify metrics_verify = 1003; + MetricsDedup metrics_dedup = 1004; + MetricsPack metrics_pack = 1006; + MetricsPoh metrics_poh = 1008; + MetricsShred metrics_shred = 1009; + MetricsSign metrics_sign = 1011; + MetricsMetric metrics_metric = 1012; + MetricsDiag metrics_diag = 1013; + MetricsGui metrics_gui = 1016; + MetricsReplay metrics_replay = 1017; + MetricsGossip metrics_gossip = 1018; + MetricsNetlnk metrics_netlnk = 1019; + MetricsSock metrics_sock = 1020; + MetricsRepair metrics_repair = 1021; + MetricsSend metrics_send = 1022; + MetricsSnapct metrics_snapct = 1023; + MetricsSnapld metrics_snapld = 1024; + MetricsSnapdc metrics_snapdc = 1025; + MetricsSnapin metrics_snapin = 1026; + MetricsIpecho metrics_ipecho = 1027; + MetricsGossvf metrics_gossvf = 1028; + MetricsBankf metrics_bankf = 1029; + MetricsResolf metrics_resolf = 1030; + MetricsExec metrics_exec = 1032; + MetricsSnapwr metrics_snapwr = 1033; + MetricsSnapwh metrics_snapwh = 1035; + MetricsSnapla metrics_snapla = 1036; + MetricsSnapls metrics_snapls = 1037; + MetricsTower metrics_tower = 1038; + MetricsAccdb metrics_accdb = 1039; + MetricsSnapwm metrics_snapwm = 1040; + MetricsSnaplh metrics_snaplh = 1041; + MetricsSnaplv metrics_snaplv = 1042; + MetricsGenesi metrics_genesi = 1043; + MetricsRpc metrics_rpc = 1044; } } diff --git a/src/disco/events/schema/metrics_accdb.json b/src/disco/events/schema/metrics_accdb.json new file mode 100644 index 00000000000..76614049e68 --- /dev/null +++ b/src/disco/events/schema/metrics_accdb.json @@ -0,0 +1,41 @@ +{ + "name": "metrics_accdb", + "id": 1039, + "description": "Metrics snapshot for the accdb tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "bstream_seq": { + "type": "Flatten", + "description": "Current bstream sequence number", + "fields": { + "bstream_seq_ancient": { "type": "UInt64", "description": "Blocks between ancient and past have been written and forgotten (no read, no write)" }, + "bstream_seq_past": { "type": "UInt64", "description": "Blocks between past and present have been written (read only)" }, + "bstream_seq_present": { "type": "UInt64", "description": "Blocks between present and future are being written (write only)" }, + "bstream_seq_future": { "type": "UInt64", "description": "Blocks between future and ancient have not been written (no read, no write)" } + } + }, + "request_batches": { "type": "UInt64", "description": "Number of request batches processed" }, + "requests": { + "type": "Flatten", + "description": "Number of requests processed", + "fields": { + "requests_acquire": { "type": "UInt64", "description": "Acquire record" }, + "requests_release": { "type": "UInt64", "description": "Release record" }, + "requests_erase": { "type": "UInt64", "description": "Erase record" } + } + }, + "blocks": { + "type": "Flatten", + "description": "Number of blocks written to bstream", + "fields": { + "blocks_pair": { "type": "UInt64", "description": "Record" }, + "blocks_dead": { "type": "UInt64", "description": "Record deletion" }, + "blocks_part": { "type": "UInt64", "description": "Partition/divider" } + } + }, + "garbage_bytes": { "type": "UInt64", "description": "" }, + "cum_gc_bytes": { "type": "UInt64", "description": "Total number of record bytes that were garbage collected" }, + "cache_hits": { "type": "UInt64", "description": "Total number of cache hits" } + } +} diff --git a/src/disco/events/schema/metrics_bankf.json b/src/disco/events/schema/metrics_bankf.json new file mode 100644 index 00000000000..e35cf2d2933 --- /dev/null +++ b/src/disco/events/schema/metrics_bankf.json @@ -0,0 +1,51 @@ +{ + "name": "metrics_bankf", + "id": 1029, + "description": "Metrics snapshot for the bankf tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "transaction_result": { + "type": "Flatten", + "description": "Result of loading and executing a transaction", + "fields": { + "transaction_result_success": { "type": "UInt64", "description": "Transaction executed successfully" }, + "transaction_result_instructon_error": { "type": "UInt64", "description": "An error occurred while processing an instruction" }, + "transaction_result_account_not_found": { "type": "UInt64", "description": "The transaction fee payer address was not found" }, + "transaction_result_program_account_not_found": { "type": "UInt64", "description": "A program account referenced by the transaction was not found" }, + "transaction_result_insufficient_funds_for_fee": { "type": "UInt64", "description": "The transaction fee payer did not have balance to pay the fee" }, + "transaction_result_invalid_account_for_fee": { "type": "UInt64", "description": "The transaction fee payer account is not owned by the system program, or has data that is not a nonce" }, + "transaction_result_already_processed": { "type": "UInt64", "description": "The transaction has already been processed in a recent block" }, + "transaction_result_blockhash_not_found": { "type": "UInt64", "description": "The transaction references a blockhash that is not recent, or advances a nonce with the wrong value" }, + "transaction_result_invalid_program_for_execution": { "type": "UInt64", "description": "A program account referenced by the transaction was no executable. TODO: No longer needed with SIMD-0162" }, + "transaction_result_address_lookup_table_not_found": { "type": "UInt64", "description": "The transaction references an ALUT account that does not exist or is inactive" }, + "transaction_result_invalid_address_lookup_table_owner": { "type": "UInt64", "description": "The transaction references an ALUT account that is not owned by the ALUT program account" }, + "transaction_result_invalid_address_lookup_table_data": { "type": "UInt64", "description": "The transaction references an ALUT account that contains data which is not a valid ALUT" }, + "transaction_result_invalid_address_lookup_table_index": { "type": "UInt64", "description": "The transaction references an account offset from the ALUT which does not exist" }, + "transaction_result_max_loaded_accounts_data_size_exceeded": { "type": "UInt64", "description": "The total account data size of the loaded accounts exceeds the consensus limit" }, + "transaction_result_duplicate_instruction": { "type": "UInt64", "description": "A compute budget program instruction was invoked more than once" }, + "transaction_result_invalid_loaded_accounts_data_size_limit": { "type": "UInt64", "description": "The compute budget program was invoked and set the loaded accounts data size to zero" }, + "transaction_result_nonce_already_advanced": { "type": "UInt64", "description": "The transaction references a nonce account that is already advanced" }, + "transaction_result_nonce_advance_failed": { "type": "UInt64", "description": "The transaction is a nonce transaction but the advance instruction was not valid or failed" }, + "transaction_result_nonce_wrong_blockhash": { "type": "UInt64", "description": "The transaction is a nonce transaction but the blockhash is not the correct one" }, + "transaction_result_account_in_use": { "type": "UInt64", "description": "The transaction conflicts with another transaction in the microblock. TODO: No longer possible with smart dispatcher" }, + "transaction_result_account_loaded_twice": { "type": "UInt64", "description": "The transaction references the same account twice" }, + "transaction_result_signature_failure": { "type": "UInt64", "description": "The transaction had an invalid signature" }, + "transaction_result_too_many_account_locks": { "type": "UInt64", "description": "The transaction references too many accounts. TODO: No longer possible with smart dispatcher" }, + "transaction_result_insufficient_funds_for_rent": { "type": "UInt64", "description": "The transaction would leave an account with a lower balance than the rent-exempt minimum" }, + "transaction_result_unbalanced_transaction": { "type": "UInt64", "description": "The total referenced account lamports before and after the transaction was unbalanced" }, + "transaction_result_bundle_peer": { "type": "UInt64", "description": "The transaction was part of a bundle and an earlier transaction in the bundle failed" } + } + }, + "transaction_landed": { + "type": "Flatten", + "description": "Whether a transaction landed in the block or not", + "fields": { + "transaction_landed_landed_success": { "type": "UInt64", "description": "Transaction landed" }, + "transaction_landed_landed_fees_only": { "type": "UInt64", "description": "Transaction landed, but was fees only and did not execute" }, + "transaction_landed_landed_failed": { "type": "UInt64", "description": "Transaction landed, but failed to execute" }, + "transaction_landed_unlanded": { "type": "UInt64", "description": "Transaction did not land" } + } + } + } +} diff --git a/src/disco/events/schema/metrics_bundle.json b/src/disco/events/schema/metrics_bundle.json new file mode 100644 index 00000000000..961965831b9 --- /dev/null +++ b/src/disco/events/schema/metrics_bundle.json @@ -0,0 +1,32 @@ +{ + "name": "metrics_bundle", + "id": 1002, + "description": "Metrics snapshot for the bundle tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "transaction_received": { "type": "UInt64", "description": "Total count of transactions received, including transactions within bundles" }, + "packet_received": { "type": "UInt64", "description": "Total count of packets received" }, + "proto_received_bytes": { "type": "UInt64", "description": "Total count of bytes from received grpc protobuf payloads" }, + "bundle_received": { "type": "UInt64", "description": "Total count of bundles received" }, + "errors": { + "type": "Flatten", + "description": "Number of gRPC errors encountered", + "fields": { + "errors_protobuf": { "type": "UInt64", "description": "Protobuf decode/encode error" }, + "errors_transport": { "type": "UInt64", "description": "Transport error" }, + "errors_timeout": { "type": "UInt64", "description": "I/O timeout" }, + "errors_no_fee_info": { "type": "UInt64", "description": "Bundle dropped due to missing fee info" }, + "errors_ssl_alloc": { "type": "UInt64", "description": "OpenSSL alloc fail" } + } + }, + "heap_size": { "type": "UInt64", "description": "Workspace heap size" }, + "heap_free_bytes": { "type": "UInt64", "description": "Approx free space in workspace" }, + "shredstream_heartbeats": { "type": "UInt64", "description": "Number of ShredStream heartbeats successfully sent" }, + "keepalives": { "type": "UInt64", "description": "Number of HTTP/2 PINGs acknowledged by server" }, + "connected": { "type": "UInt64", "description": "1 if connected to the bundle server, 0 if not" }, + "rtt_sample": { "type": "UInt64", "description": "Latest RTT sample at scrape time (nanoseconds)" }, + "rtt_smoothed": { "type": "UInt64", "description": "RTT moving average (nanoseconds)" }, + "rtt_var": { "type": "UInt64", "description": "RTT variance (nanoseconds)" } + } +} diff --git a/src/disco/events/schema/metrics_dedup.json b/src/disco/events/schema/metrics_dedup.json new file mode 100644 index 00000000000..6c5a6e7b885 --- /dev/null +++ b/src/disco/events/schema/metrics_dedup.json @@ -0,0 +1,19 @@ +{ + "name": "metrics_dedup", + "id": 1004, + "description": "Metrics snapshot for the dedup tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "transaction_result": { + "type": "Flatten", + "description": "Count of transaction results through dedup tile", + "fields": { + "transaction_result_success": { "type": "UInt64", "description": "Transaction deduplicated successfully" }, + "transaction_result_bundle_peer_failure": { "type": "UInt64", "description": "Peer transaction in the bundle failed" }, + "transaction_result_dedup_failure": { "type": "UInt64", "description": "Transaction failed deduplication" } + } + }, + "gossiped_votes_received": { "type": "UInt64", "description": "Count of simple vote transactions received over gossip instead of via the normal TPU path" } + } +} diff --git a/src/disco/events/schema/metrics_diag.json b/src/disco/events/schema/metrics_diag.json new file mode 100644 index 00000000000..4a27a33e001 --- /dev/null +++ b/src/disco/events/schema/metrics_diag.json @@ -0,0 +1,9 @@ +{ + "name": "metrics_diag", + "id": 1013, + "description": "Metrics snapshot for the diag tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" } + } +} diff --git a/src/disco/events/schema/metrics_exec.json b/src/disco/events/schema/metrics_exec.json new file mode 100644 index 00000000000..35633ded748 --- /dev/null +++ b/src/disco/events/schema/metrics_exec.json @@ -0,0 +1,48 @@ +{ + "name": "metrics_exec", + "id": 1032, + "description": "Metrics snapshot for the exec tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "progcache_misses": { "type": "UInt64", "description": "Number of program cache misses" }, + "progcache_hits": { "type": "UInt64", "description": "Number of program cache hits" }, + "progcache_fills": { "type": "UInt64", "description": "Number of program cache insertions" }, + "progcache_fill_tot_sz": { "type": "UInt64", "description": "Total number of bytes inserted into program cache" }, + "progcache_fill_fails": { "type": "UInt64", "description": "Number of program cache load fails (tombstones inserted)" }, + "progcache_dup_inserts": { "type": "UInt64", "description": "Number of time two tiles raced to insert the same cache entry" }, + "progcache_invalidations": { "type": "UInt64", "description": "Number of program cache invalidations" }, + "accdb_created": { "type": "UInt64", "description": "Number of account database records created" }, + "txn_regime": { + "type": "Flatten", + "description": "Mutually exclusive and exhaustive duration of time spent in transaction execution regimes", + "fields": { + "txn_regime_setup": { "type": "UInt64", "description": "Transaction setup" }, + "txn_regime_exec": { "type": "UInt64", "description": "Transaction execution (includes VM setup/execution)" }, + "txn_regime_commit": { "type": "UInt64", "description": "Transaction result commit" } + } + }, + "vm_regime": { + "type": "Flatten", + "description": "Mutually exclusive and exhaustive duration of time spent in virtual machine execution regimes", + "fields": { + "vm_regime_setup": { "type": "UInt64", "description": "VM setup" }, + "vm_regime_commit": { "type": "UInt64", "description": "VM commit" }, + "vm_regime_setup_cpi": { "type": "UInt64", "description": "VM setup (CPI)" }, + "vm_regime_commit_cpi": { "type": "UInt64", "description": "VM commit (CPI)" }, + "vm_regime_interpreter": { "type": "UInt64", "description": "VM interpreter execution" } + } + }, + "txn_account_changes": { + "type": "Flatten", + "description": "Transaction account change event counters", + "fields": { + "txn_account_changes_unchanged_nonexist": { "type": "UInt64", "description": "Account did not exist before and still does not" }, + "txn_account_changes_created": { "type": "UInt64", "description": "Account created" }, + "txn_account_changes_delete": { "type": "UInt64", "description": "Account deleted" }, + "txn_account_changes_modify": { "type": "UInt64", "description": "Account modified" }, + "txn_account_changes_unchanged": { "type": "UInt64", "description": "Account unchanged" } + } + } + } +} diff --git a/src/disco/events/schema/metrics_genesi.json b/src/disco/events/schema/metrics_genesi.json new file mode 100644 index 00000000000..27d2f1a17a1 --- /dev/null +++ b/src/disco/events/schema/metrics_genesi.json @@ -0,0 +1,9 @@ +{ + "name": "metrics_genesi", + "id": 1043, + "description": "Metrics snapshot for the genesi tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" } + } +} diff --git a/src/disco/events/schema/metrics_gossip.json b/src/disco/events/schema/metrics_gossip.json new file mode 100644 index 00000000000..aac00b15bf1 --- /dev/null +++ b/src/disco/events/schema/metrics_gossip.json @@ -0,0 +1,187 @@ +{ + "name": "metrics_gossip", + "id": 1018, + "description": "Metrics snapshot for the gossip tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "ping_tracker_capacity": { "type": "UInt64", "description": "The peer tracking capacity of the ping tracker" }, + "ping_tracker_count": { + "type": "Flatten", + "description": "The number of peers being tracked for ping/pong", + "fields": { + "ping_tracker_count_unpinged": { "type": "UInt64", "description": "Peer is known but has not been pinged yet" }, + "ping_tracker_count_invalid": { "type": "UInt64", "description": "Peer was pinged but has not yet responded, or responded with an invalid pong" }, + "ping_tracker_count_valid": { "type": "UInt64", "description": "Peer was pinged and is currently valid" }, + "ping_tracker_count_valid_refreshing": { "type": "UInt64", "description": "Peer was pinged and is currently valid, and is being refreshed with a new ping" } + } + }, + "ping_tracker_pong_result": { + "type": "Flatten", + "description": "The result of processing a pong response to a ping", + "fields": { + "ping_tracker_pong_result_staked": { "type": "UInt64", "description": "Pong ignored as node is staked" }, + "ping_tracker_pong_result_entrypoint": { "type": "UInt64", "description": "Pong ignored as node is an entrypoint" }, + "ping_tracker_pong_result_untracked": { "type": "UInt64", "description": "Pong ignored as node has no outstanding ping" }, + "ping_tracker_pong_result_address": { "type": "UInt64", "description": "Pong ignored as responding node doesn't match pinged address" }, + "ping_tracker_pong_result_token": { "type": "UInt64", "description": "Pong ignored as token was not valid" }, + "ping_tracker_pong_result_success": { "type": "UInt64", "description": "Pong accepted" } + } + }, + "ping_tracker_evicted_count": { "type": "UInt64", "description": "The number of peers evicted from the ping tracker to make space for new peers" }, + "ping_tracked_count": { "type": "UInt64", "description": "The total number of peers ever tracked for ping/pong" }, + "ping_tracker_stake_changed_count": { "type": "UInt64", "description": "The number of times a tracked peer was removed from tracking as it became staked" }, + "ping_tracker_address_changed_count": { "type": "UInt64", "description": "The number of times a tracked peer was removed from tracking as it's gossip address changed" }, + "crds_capacity": { "type": "UInt64", "description": "The capacity of the data store" }, + "crds_count": { + "type": "Flatten", + "description": "The number of entries in the data store", + "fields": { + "crds_count_contact_info_v1": { "type": "UInt64", "description": "Contact Info V1" }, + "crds_count_vote": { "type": "UInt64", "description": "Vote" }, + "crds_count_lowest_slot": { "type": "UInt64", "description": "Lowest Slot" }, + "crds_count_snapshot_hashes": { "type": "UInt64", "description": "Snapshot Hashes" }, + "crds_count_accounts_hashes": { "type": "UInt64", "description": "Accounts Hashes" }, + "crds_count_epoch_slots": { "type": "UInt64", "description": "Epoch Slots" }, + "crds_count_version_v1": { "type": "UInt64", "description": "Version V1" }, + "crds_count_version_v2": { "type": "UInt64", "description": "Version V2" }, + "crds_count_node_instance": { "type": "UInt64", "description": "Node Instance" }, + "crds_count_duplicate_shred": { "type": "UInt64", "description": "Duplicate Shred" }, + "crds_count_incremental_snapshot_hashes": { "type": "UInt64", "description": "Incremental Snapshot Hashes" }, + "crds_count_contact_info_v2": { "type": "UInt64", "description": "Contact Info V2" }, + "crds_count_restart_last_voted_fork_slots": { "type": "UInt64", "description": "Restart Last Voted Fork Slots" }, + "crds_count_restart_heaviest_fork": { "type": "UInt64", "description": "Restart Heaviest Fork" } + } + }, + "crds_expired_count": { "type": "UInt64", "description": "The number of entries expired from the data store due to age" }, + "crds_evicted_count": { "type": "UInt64", "description": "The number of entries evicted from the data store to make space for new entries" }, + "crds_peer_capacity": { "type": "UInt64", "description": "The capacity for storing peers in the data store" }, + "crds_peer_staked_count": { "type": "UInt64", "description": "The number of staked peers in the data store" }, + "crds_peer_unstaked_count": { "type": "UInt64", "description": "The number of unstaked peers in the data store" }, + "crds_peer_total_stake": { "type": "UInt64", "description": "The total visible stake in the data store, in lamports" }, + "crds_peer_evicted_count": { "type": "UInt64", "description": "The number of peers evicted from the data store to make space for new peers" }, + "crds_purged_capacity": { "type": "UInt64", "description": "The capacity of the list of purged data store entries" }, + "crds_purged_count": { "type": "UInt64", "description": "The number of purged data store entries" }, + "crds_purged_expired_count": { "type": "UInt64", "description": "The number of purged entries expired from the purged list due to age" }, + "crds_purged_evicted_count": { "type": "UInt64", "description": "The number of purged entries evicted from the data store to make space for new entries" }, + "contact_info_unrecognized_socket_tags": { "type": "UInt64", "description": "The number of unrecognized socket tags seen in Contact Infos" }, + "contact_info_ipv6": { "type": "UInt64", "description": "The number of IPv6 addresses seen in Contact Infos" }, + "crds_rx_count": { + "type": "Flatten", + "description": "Outcome of incoming CRDS messages", + "fields": { + "crds_rx_count_upserted_pull_response": { "type": "UInt64", "description": "Pull Response (upserted)" }, + "crds_rx_count_upserted_push": { "type": "UInt64", "description": "Push (upserted)" }, + "crds_rx_count_dropped_pull_response_stale": { "type": "UInt64", "description": "Pull Response (newer entry already present in table)" }, + "crds_rx_count_dropped_pull_response_wallclock": { "type": "UInt64", "description": "Pull Response (outside expiry window and no contact info entry)" }, + "crds_rx_count_dropped_pull_response_duplicate": { "type": "UInt64", "description": "Pull Response (duplicate)" }, + "crds_rx_count_dropped_push_stale": { "type": "UInt64", "description": "Push (newer entry already present in table)" }, + "crds_rx_count_dropped_push_duplicate": { "type": "UInt64", "description": "Push (duplicate)" } + } + }, + "message_tx_count": { + "type": "Flatten", + "description": "Number of gossip messages sent", + "fields": { + "message_tx_count_pull_request": { "type": "UInt64", "description": "Pull Request" }, + "message_tx_count_pull_response": { "type": "UInt64", "description": "Pull Response" }, + "message_tx_count_push": { "type": "UInt64", "description": "Push" }, + "message_tx_count_prune": { "type": "UInt64", "description": "Prune" }, + "message_tx_count_ping": { "type": "UInt64", "description": "Ping" }, + "message_tx_count_pong": { "type": "UInt64", "description": "Pong" } + } + }, + "message_tx_bytes": { + "type": "Flatten", + "description": "Total wire bytes sent in gossip messages", + "fields": { + "message_tx_bytes_pull_request": { "type": "UInt64", "description": "Pull Request" }, + "message_tx_bytes_pull_response": { "type": "UInt64", "description": "Pull Response" }, + "message_tx_bytes_push": { "type": "UInt64", "description": "Push" }, + "message_tx_bytes_prune": { "type": "UInt64", "description": "Prune" }, + "message_tx_bytes_ping": { "type": "UInt64", "description": "Ping" }, + "message_tx_bytes_pong": { "type": "UInt64", "description": "Pong" } + } + }, + "crds_tx_push_count": { + "type": "Flatten", + "description": "Number of CRDS values sent in push messages", + "fields": { + "crds_tx_push_count_contact_info_v1": { "type": "UInt64", "description": "Contact Info V1" }, + "crds_tx_push_count_vote": { "type": "UInt64", "description": "Vote" }, + "crds_tx_push_count_lowest_slot": { "type": "UInt64", "description": "Lowest Slot" }, + "crds_tx_push_count_snapshot_hashes": { "type": "UInt64", "description": "Snapshot Hashes" }, + "crds_tx_push_count_accounts_hashes": { "type": "UInt64", "description": "Accounts Hashes" }, + "crds_tx_push_count_epoch_slots": { "type": "UInt64", "description": "Epoch Slots" }, + "crds_tx_push_count_version_v1": { "type": "UInt64", "description": "Version V1" }, + "crds_tx_push_count_version_v2": { "type": "UInt64", "description": "Version V2" }, + "crds_tx_push_count_node_instance": { "type": "UInt64", "description": "Node Instance" }, + "crds_tx_push_count_duplicate_shred": { "type": "UInt64", "description": "Duplicate Shred" }, + "crds_tx_push_count_incremental_snapshot_hashes": { "type": "UInt64", "description": "Incremental Snapshot Hashes" }, + "crds_tx_push_count_contact_info_v2": { "type": "UInt64", "description": "Contact Info V2" }, + "crds_tx_push_count_restart_last_voted_fork_slots": { "type": "UInt64", "description": "Restart Last Voted Fork Slots" }, + "crds_tx_push_count_restart_heaviest_fork": { "type": "UInt64", "description": "Restart Heaviest Fork" } + } + }, + "crds_tx_push_bytes": { + "type": "Flatten", + "description": "Total wire bytes of CRDS sent out in push messages", + "fields": { + "crds_tx_push_bytes_contact_info_v1": { "type": "UInt64", "description": "Contact Info V1" }, + "crds_tx_push_bytes_vote": { "type": "UInt64", "description": "Vote" }, + "crds_tx_push_bytes_lowest_slot": { "type": "UInt64", "description": "Lowest Slot" }, + "crds_tx_push_bytes_snapshot_hashes": { "type": "UInt64", "description": "Snapshot Hashes" }, + "crds_tx_push_bytes_accounts_hashes": { "type": "UInt64", "description": "Accounts Hashes" }, + "crds_tx_push_bytes_epoch_slots": { "type": "UInt64", "description": "Epoch Slots" }, + "crds_tx_push_bytes_version_v1": { "type": "UInt64", "description": "Version V1" }, + "crds_tx_push_bytes_version_v2": { "type": "UInt64", "description": "Version V2" }, + "crds_tx_push_bytes_node_instance": { "type": "UInt64", "description": "Node Instance" }, + "crds_tx_push_bytes_duplicate_shred": { "type": "UInt64", "description": "Duplicate Shred" }, + "crds_tx_push_bytes_incremental_snapshot_hashes": { "type": "UInt64", "description": "Incremental Snapshot Hashes" }, + "crds_tx_push_bytes_contact_info_v2": { "type": "UInt64", "description": "Contact Info V2" }, + "crds_tx_push_bytes_restart_last_voted_fork_slots": { "type": "UInt64", "description": "Restart Last Voted Fork Slots" }, + "crds_tx_push_bytes_restart_heaviest_fork": { "type": "UInt64", "description": "Restart Heaviest Fork" } + } + }, + "crds_tx_pull_response_count": { + "type": "Flatten", + "description": "Number of CRDS values sent in pull response messages", + "fields": { + "crds_tx_pull_response_count_contact_info_v1": { "type": "UInt64", "description": "Contact Info V1" }, + "crds_tx_pull_response_count_vote": { "type": "UInt64", "description": "Vote" }, + "crds_tx_pull_response_count_lowest_slot": { "type": "UInt64", "description": "Lowest Slot" }, + "crds_tx_pull_response_count_snapshot_hashes": { "type": "UInt64", "description": "Snapshot Hashes" }, + "crds_tx_pull_response_count_accounts_hashes": { "type": "UInt64", "description": "Accounts Hashes" }, + "crds_tx_pull_response_count_epoch_slots": { "type": "UInt64", "description": "Epoch Slots" }, + "crds_tx_pull_response_count_version_v1": { "type": "UInt64", "description": "Version V1" }, + "crds_tx_pull_response_count_version_v2": { "type": "UInt64", "description": "Version V2" }, + "crds_tx_pull_response_count_node_instance": { "type": "UInt64", "description": "Node Instance" }, + "crds_tx_pull_response_count_duplicate_shred": { "type": "UInt64", "description": "Duplicate Shred" }, + "crds_tx_pull_response_count_incremental_snapshot_hashes": { "type": "UInt64", "description": "Incremental Snapshot Hashes" }, + "crds_tx_pull_response_count_contact_info_v2": { "type": "UInt64", "description": "Contact Info V2" }, + "crds_tx_pull_response_count_restart_last_voted_fork_slots": { "type": "UInt64", "description": "Restart Last Voted Fork Slots" }, + "crds_tx_pull_response_count_restart_heaviest_fork": { "type": "UInt64", "description": "Restart Heaviest Fork" } + } + }, + "crds_tx_pull_response_bytes": { + "type": "Flatten", + "description": "Total wire bytes of CRDS sent out in pull response messages", + "fields": { + "crds_tx_pull_response_bytes_contact_info_v1": { "type": "UInt64", "description": "Contact Info V1" }, + "crds_tx_pull_response_bytes_vote": { "type": "UInt64", "description": "Vote" }, + "crds_tx_pull_response_bytes_lowest_slot": { "type": "UInt64", "description": "Lowest Slot" }, + "crds_tx_pull_response_bytes_snapshot_hashes": { "type": "UInt64", "description": "Snapshot Hashes" }, + "crds_tx_pull_response_bytes_accounts_hashes": { "type": "UInt64", "description": "Accounts Hashes" }, + "crds_tx_pull_response_bytes_epoch_slots": { "type": "UInt64", "description": "Epoch Slots" }, + "crds_tx_pull_response_bytes_version_v1": { "type": "UInt64", "description": "Version V1" }, + "crds_tx_pull_response_bytes_version_v2": { "type": "UInt64", "description": "Version V2" }, + "crds_tx_pull_response_bytes_node_instance": { "type": "UInt64", "description": "Node Instance" }, + "crds_tx_pull_response_bytes_duplicate_shred": { "type": "UInt64", "description": "Duplicate Shred" }, + "crds_tx_pull_response_bytes_incremental_snapshot_hashes": { "type": "UInt64", "description": "Incremental Snapshot Hashes" }, + "crds_tx_pull_response_bytes_contact_info_v2": { "type": "UInt64", "description": "Contact Info V2" }, + "crds_tx_pull_response_bytes_restart_last_voted_fork_slots": { "type": "UInt64", "description": "Restart Last Voted Fork Slots" }, + "crds_tx_pull_response_bytes_restart_heaviest_fork": { "type": "UInt64", "description": "Restart Heaviest Fork" } + } + } + } +} diff --git a/src/disco/events/schema/metrics_gossvf.json b/src/disco/events/schema/metrics_gossvf.json new file mode 100644 index 00000000000..7597a363f0e --- /dev/null +++ b/src/disco/events/schema/metrics_gossvf.json @@ -0,0 +1,97 @@ +{ + "name": "metrics_gossvf", + "id": 1028, + "description": "Metrics snapshot for the gossvf tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "message_rx_count": { + "type": "Flatten", + "description": "Number of gossip messages processed", + "fields": { + "message_rx_count_success_pull_request": { "type": "UInt64", "description": "Pull Request (success)" }, + "message_rx_count_success_pull_response": { "type": "UInt64", "description": "Pull Response (success)" }, + "message_rx_count_success_push": { "type": "UInt64", "description": "Push (success)" }, + "message_rx_count_success_prune": { "type": "UInt64", "description": "Prune (success)" }, + "message_rx_count_success_ping": { "type": "UInt64", "description": "Ping (success)" }, + "message_rx_count_success_pong": { "type": "UInt64", "description": "Pong (success)" }, + "message_rx_count_dropped_unparseable": { "type": "UInt64", "description": "Unparseable" }, + "message_rx_count_dropped_pull_request_not_contact_info": { "type": "UInt64", "description": "Pull Request (not contact info)" }, + "message_rx_count_dropped_pull_request_loopback": { "type": "UInt64", "description": "Pull Request (loopback)" }, + "message_rx_count_dropped_pull_request_inactive": { "type": "UInt64", "description": "Pull Request (inactive)" }, + "message_rx_count_dropped_pull_request_wallclock": { "type": "UInt64", "description": "Pull Request (wallclock)" }, + "message_rx_count_dropped_pull_request_signature": { "type": "UInt64", "description": "Pull Request (signature)" }, + "message_rx_count_dropped_pull_request_shred_version": { "type": "UInt64", "description": "Pull Request (shred version)" }, + "message_rx_count_dropped_prune_destination": { "type": "UInt64", "description": "Prune (destination)" }, + "message_rx_count_dropped_prune_wallclock": { "type": "UInt64", "description": "Prune (wallclock)" }, + "message_rx_count_dropped_prune_signature": { "type": "UInt64", "description": "Prune (signature)" }, + "message_rx_count_dropped_push_no_valid_crds": { "type": "UInt64", "description": "Push (no valid crds)" }, + "message_rx_count_dropped_pull_response_no_valid_crds": { "type": "UInt64", "description": "Pull Response (no valid crds)" }, + "message_rx_count_dropped_ping_signature": { "type": "UInt64", "description": "Ping (signature)" }, + "message_rx_count_dropped_pong_signature": { "type": "UInt64", "description": "Pong (signature)" } + } + }, + "message_rx_bytes": { + "type": "Flatten", + "description": "Total wire bytes of gossip messages processed", + "fields": { + "message_rx_bytes_success_pull_request": { "type": "UInt64", "description": "Pull Request (success)" }, + "message_rx_bytes_success_pull_response": { "type": "UInt64", "description": "Pull Response (success)" }, + "message_rx_bytes_success_push": { "type": "UInt64", "description": "Push (success)" }, + "message_rx_bytes_success_prune": { "type": "UInt64", "description": "Prune (success)" }, + "message_rx_bytes_success_ping": { "type": "UInt64", "description": "Ping (success)" }, + "message_rx_bytes_success_pong": { "type": "UInt64", "description": "Pong (success)" }, + "message_rx_bytes_dropped_unparseable": { "type": "UInt64", "description": "Unparseable" }, + "message_rx_bytes_dropped_pull_request_not_contact_info": { "type": "UInt64", "description": "Pull Request (not contact info)" }, + "message_rx_bytes_dropped_pull_request_loopback": { "type": "UInt64", "description": "Pull Request (loopback)" }, + "message_rx_bytes_dropped_pull_request_inactive": { "type": "UInt64", "description": "Pull Request (inactive)" }, + "message_rx_bytes_dropped_pull_request_wallclock": { "type": "UInt64", "description": "Pull Request (wallclock)" }, + "message_rx_bytes_dropped_pull_request_signature": { "type": "UInt64", "description": "Pull Request (signature)" }, + "message_rx_bytes_dropped_pull_request_shred_version": { "type": "UInt64", "description": "Pull Request (shred version)" }, + "message_rx_bytes_dropped_prune_destination": { "type": "UInt64", "description": "Prune (destination)" }, + "message_rx_bytes_dropped_prune_wallclock": { "type": "UInt64", "description": "Prune (wallclock)" }, + "message_rx_bytes_dropped_prune_signature": { "type": "UInt64", "description": "Prune (signature)" }, + "message_rx_bytes_dropped_push_no_valid_crds": { "type": "UInt64", "description": "Push (no valid crds)" }, + "message_rx_bytes_dropped_pull_response_no_valid_crds": { "type": "UInt64", "description": "Pull Response (no valid crds)" }, + "message_rx_bytes_dropped_ping_signature": { "type": "UInt64", "description": "Ping (signature)" }, + "message_rx_bytes_dropped_pong_signature": { "type": "UInt64", "description": "Pong (signature)" } + } + }, + "crds_rx_count": { + "type": "Flatten", + "description": "Number of CRDS values processed", + "fields": { + "crds_rx_count_success_pull_response": { "type": "UInt64", "description": "Pull Response (success)" }, + "crds_rx_count_success_push": { "type": "UInt64", "description": "Push (success)" }, + "crds_rx_count_dropped_pull_response_duplicate": { "type": "UInt64", "description": "Pull Response (duplicate)" }, + "crds_rx_count_dropped_pull_response_signature": { "type": "UInt64", "description": "Pull Response (signature)" }, + "crds_rx_count_dropped_pull_response_origin_no_contact_info": { "type": "UInt64", "description": "Pull Response (origin no contact info)" }, + "crds_rx_count_dropped_pull_response_origin_shred_version": { "type": "UInt64", "description": "Pull Response (origin shred version)" }, + "crds_rx_count_dropped_pull_response_inactive": { "type": "UInt64", "description": "Pull Response (inactive)" }, + "crds_rx_count_dropped_push_signature": { "type": "UInt64", "description": "Push (signature)" }, + "crds_rx_count_dropped_push_origin_no_contact_info": { "type": "UInt64", "description": "Push (origin no contact info)" }, + "crds_rx_count_dropped_push_origin_shred_version": { "type": "UInt64", "description": "Push (origin shred version)" }, + "crds_rx_count_dropped_push_inactive": { "type": "UInt64", "description": "Push (inactive)" }, + "crds_rx_count_dropped_push_wallclock": { "type": "UInt64", "description": "Push (wallclock)" } + } + }, + "crds_rx_bytes": { + "type": "Flatten", + "description": "Total wire bytes of CRDS values processed", + "fields": { + "crds_rx_bytes_success_pull_response": { "type": "UInt64", "description": "Pull Response (success)" }, + "crds_rx_bytes_success_push": { "type": "UInt64", "description": "Push (success)" }, + "crds_rx_bytes_dropped_pull_response_duplicate": { "type": "UInt64", "description": "Pull Response (duplicate)" }, + "crds_rx_bytes_dropped_pull_response_signature": { "type": "UInt64", "description": "Pull Response (signature)" }, + "crds_rx_bytes_dropped_pull_response_origin_no_contact_info": { "type": "UInt64", "description": "Pull Response (origin no contact info)" }, + "crds_rx_bytes_dropped_pull_response_origin_shred_version": { "type": "UInt64", "description": "Pull Response (origin shred version)" }, + "crds_rx_bytes_dropped_pull_response_inactive": { "type": "UInt64", "description": "Pull Response (inactive)" }, + "crds_rx_bytes_dropped_push_signature": { "type": "UInt64", "description": "Push (signature)" }, + "crds_rx_bytes_dropped_push_origin_no_contact_info": { "type": "UInt64", "description": "Push (origin no contact info)" }, + "crds_rx_bytes_dropped_push_origin_shred_version": { "type": "UInt64", "description": "Push (origin shred version)" }, + "crds_rx_bytes_dropped_push_inactive": { "type": "UInt64", "description": "Push (inactive)" }, + "crds_rx_bytes_dropped_push_wallclock": { "type": "UInt64", "description": "Push (wallclock)" } + } + } + } +} diff --git a/src/disco/events/schema/metrics_gui.json b/src/disco/events/schema/metrics_gui.json new file mode 100644 index 00000000000..1a934c28e91 --- /dev/null +++ b/src/disco/events/schema/metrics_gui.json @@ -0,0 +1,15 @@ +{ + "name": "metrics_gui", + "id": 1016, + "description": "Metrics snapshot for the gui tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "connection_count": { "type": "UInt64", "description": "The number of active http connections to the GUI service, excluding connections that have been upgraded to a WebSocket connection" }, + "websocket_connection_count": { "type": "UInt64", "description": "The number of active websocket connections to the GUI service" }, + "websocket_frames_sent": { "type": "UInt64", "description": "The total number of websocket frames sent to all connections to the GUI service" }, + "websocket_frames_received": { "type": "UInt64", "description": "The total number of websocket frames received from all connections to the GUI service" }, + "bytes_written": { "type": "UInt64", "description": "The total number of bytes written to all connections to the GUI service" }, + "bytes_read": { "type": "UInt64", "description": "The total number of bytes read from all connections to the GUI service" } + } +} diff --git a/src/disco/events/schema/metrics_ipecho.json b/src/disco/events/schema/metrics_ipecho.json new file mode 100644 index 00000000000..53bb392d7d6 --- /dev/null +++ b/src/disco/events/schema/metrics_ipecho.json @@ -0,0 +1,15 @@ +{ + "name": "metrics_ipecho", + "id": 1027, + "description": "Metrics snapshot for the ipecho tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "current_shred_version": { "type": "UInt64", "description": "The current shred version used by the validator" }, + "connection_count": { "type": "UInt64", "description": "The number of active connections to the ipecho service" }, + "connections_closed_ok": { "type": "UInt64", "description": "The number of connections to the ipecho service that have been made and closed normally" }, + "connections_closed_error": { "type": "UInt64", "description": "The number of connections to the ipecho service that have been made and closed abnormally" }, + "bytes_read": { "type": "UInt64", "description": "The total number of bytes read from all connections to the ipecho service" }, + "bytes_written": { "type": "UInt64", "description": "The total number of bytes written to all connections to the ipecho service" } + } +} diff --git a/src/disco/events/schema/metrics_metric.json b/src/disco/events/schema/metrics_metric.json new file mode 100644 index 00000000000..0b714d5680c --- /dev/null +++ b/src/disco/events/schema/metrics_metric.json @@ -0,0 +1,13 @@ +{ + "name": "metrics_metric", + "id": 1012, + "description": "Metrics snapshot for the metric tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "boot_timestamp_nanos": { "type": "UInt64", "description": "Timestamp when validator was started (nanoseconds since epoch)" }, + "connection_count": { "type": "UInt64", "description": "The number of active http connections to the Prometheus endpoint" }, + "bytes_written": { "type": "UInt64", "description": "The total number of bytes written to all responses on the Prometheus endpoint" }, + "bytes_read": { "type": "UInt64", "description": "The total number of bytes read from all requests to the Prometheus endpoint" } + } +} diff --git a/src/disco/events/schema/metrics_net.json b/src/disco/events/schema/metrics_net.json new file mode 100644 index 00000000000..8e5c2efcaab --- /dev/null +++ b/src/disco/events/schema/metrics_net.json @@ -0,0 +1,37 @@ +{ + "name": "metrics_net", + "id": 1000, + "description": "Metrics snapshot for the net tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "rx_pkt_cnt": { "type": "UInt64", "description": "Packet receive count" }, + "rx_bytes_total": { "type": "UInt64", "description": "Total number of bytes received (including Ethernet header)" }, + "rx_undersz_cnt": { "type": "UInt64", "description": "Number of incoming packets dropped due to being too small" }, + "rx_fill_blocked_cnt": { "type": "UInt64", "description": "Number of incoming packets dropped due to fill ring being full" }, + "rx_backpressure_cnt": { "type": "UInt64", "description": "Number of incoming packets dropped due to backpressure" }, + "rx_busy_cnt": { "type": "UInt64", "description": "Number of receive buffers currently busy" }, + "rx_idle_cnt": { "type": "UInt64", "description": "Number of receive buffers currently idle" }, + "tx_submit_cnt": { "type": "UInt64", "description": "Number of packet transmit jobs submitted" }, + "tx_complete_cnt": { "type": "UInt64", "description": "Number of packet transmit jobs marked as completed by the kernel" }, + "tx_bytes_total": { "type": "UInt64", "description": "Total number of bytes transmitted (including Ethernet header)" }, + "tx_route_fail_cnt": { "type": "UInt64", "description": "Number of packet transmit jobs dropped due to route failure" }, + "tx_neighbor_fail_cnt": { "type": "UInt64", "description": "Number of packet transmit jobs dropped due to unresolved neighbor" }, + "tx_full_fail_cnt": { "type": "UInt64", "description": "Number of packet transmit jobs dropped due to XDP TX ring full or missing completions" }, + "tx_busy_cnt": { "type": "UInt64", "description": "Number of transmit buffers currently busy" }, + "tx_idle_cnt": { "type": "UInt64", "description": "Number of transmit buffers currently idle" }, + "xsk_tx_wakeup_cnt": { "type": "UInt64", "description": "Number of XSK sendto syscalls dispatched" }, + "xsk_rx_wakeup_cnt": { "type": "UInt64", "description": "Number of XSK recvmsg syscalls dispatched" }, + "xdp_rx_dropped_other": { "type": "UInt64", "description": "Dropped for other reasons (xdp_statistics_v0.rx_dropped)" }, + "xdp_rx_invalid_descs": { "type": "UInt64", "description": "Dropped due to invalid descriptor (xdp_statistics_v0.rx_invalid_descs)" }, + "xdp_tx_invalid_descs": { "type": "UInt64", "description": "Dropped due to invalid descriptor (xdp_statistics_v0.tx_invalid_descs)" }, + "xdp_rx_ring_full": { "type": "UInt64", "description": "Dropped due to rx ring being full (xdp_statistics_v1.rx_ring_full)" }, + "xdp_rx_fill_ring_empty_descs": { "type": "UInt64", "description": "Failed to retrieve item from fill ring (xdp_statistics_v1.rx_fill_ring_empty_descs)" }, + "xdp_tx_ring_empty_descs": { "type": "UInt64", "description": "Failed to retrieve item from tx ring (xdp_statistics_v1.tx_ring_empty_descs)" }, + "rx_gre_cnt": { "type": "UInt64", "description": "Number of valid GRE packets received" }, + "rx_gre_invalid_cnt": { "type": "UInt64", "description": "Number of invalid GRE packets received" }, + "rx_gre_ignored_cnt": { "type": "UInt64", "description": "Number of received but ignored GRE packets" }, + "tx_gre_cnt": { "type": "UInt64", "description": "Number of GRE packet transmit jobs submitted" }, + "tx_gre_route_fail_cnt": { "type": "UInt64", "description": "Number of GRE packets transmit jobs dropped due to route failure" } + } +} diff --git a/src/disco/events/schema/metrics_netlnk.json b/src/disco/events/schema/metrics_netlnk.json new file mode 100644 index 00000000000..be78708519b --- /dev/null +++ b/src/disco/events/schema/metrics_netlnk.json @@ -0,0 +1,34 @@ +{ + "name": "metrics_netlnk", + "id": 1019, + "description": "Metrics snapshot for the netlnk tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "drop_events": { "type": "UInt64", "description": "Number of netlink drop events caught" }, + "link_full_syncs": { "type": "UInt64", "description": "Number of full link table syncs done" }, + "route_full_syncs": { "type": "UInt64", "description": "Number of full route table syncs done" }, + "updates": { + "type": "Flatten", + "description": "Number of netlink live updates processed", + "fields": { + "updates_link": { "type": "UInt64", "description": "Link" }, + "updates_neigh": { "type": "UInt64", "description": "Neighbor Table Entry" }, + "updates_ipv4_route": { "type": "UInt64", "description": "IPv4 Route Table Entry" } + } + }, + "interface_count": { "type": "UInt64", "description": "Number of network interfaces" }, + "route_count": { + "type": "Flatten", + "description": "Number of IPv4 routes", + "fields": { + "route_count_local": { "type": "UInt64", "description": "Local" }, + "route_count_main": { "type": "UInt64", "description": "Main" } + } + }, + "neigh_probe_sent": { "type": "UInt64", "description": "Number of neighbor solicit requests sent to kernel" }, + "neigh_probe_fails": { "type": "UInt64", "description": "Number of neighbor solicit requests that failed to send (kernel too slow)" }, + "neigh_probe_rate_limit_host": { "type": "UInt64", "description": "Number of neighbor solicit that exceeded the per-host rate limit" }, + "neigh_probe_rate_limit_global": { "type": "UInt64", "description": "Number of neighbor solicit that exceeded the global rate limit" } + } +} diff --git a/src/disco/events/schema/metrics_pack.json b/src/disco/events/schema/metrics_pack.json new file mode 100644 index 00000000000..2fd8111f965 --- /dev/null +++ b/src/disco/events/schema/metrics_pack.json @@ -0,0 +1,107 @@ +{ + "name": "metrics_pack", + "id": 1006, + "description": "Metrics snapshot for the pack tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "normal_transaction_received": { "type": "UInt64", "description": "Count of transactions received via the normal TPU path" }, + "transaction_inserted": { + "type": "Flatten", + "description": "Result of inserting a transaction into the pack object", + "fields": { + "transaction_inserted_nonce_conflict": { "type": "UInt64", "description": "Bundle with two conflicting durable nonce transactions" }, + "transaction_inserted_bundle_blacklist": { "type": "UInt64", "description": "Transaction uses an account on the bundle blacklist" }, + "transaction_inserted_invalid_nonce": { "type": "UInt64", "description": "Transaction is an invalid durable nonce transaction" }, + "transaction_inserted_write_sysvar": { "type": "UInt64", "description": "Transaction tries to write to a sysvar" }, + "transaction_inserted_estimation_fail": { "type": "UInt64", "description": "Estimating compute cost and/or fee failed" }, + "transaction_inserted_duplicate_account": { "type": "UInt64", "description": "Transaction included an account address twice" }, + "transaction_inserted_too_many_accounts": { "type": "UInt64", "description": "Transaction tried to load too many accounts" }, + "transaction_inserted_too_large": { "type": "UInt64", "description": "Transaction requests too many CUs" }, + "transaction_inserted_expired": { "type": "UInt64", "description": "Transaction already expired" }, + "transaction_inserted_addr_lut": { "type": "UInt64", "description": "Transaction loaded accounts from a lookup table" }, + "transaction_inserted_unaffordable": { "type": "UInt64", "description": "Fee payer's balance below transaction fee" }, + "transaction_inserted_duplicate": { "type": "UInt64", "description": "Pack aware of transaction with same signature" }, + "transaction_inserted_nonce_priority": { "type": "UInt64", "description": "Transaction's fee was too low given its compute unit requirement and another competing transactions that uses the same durable nonce" }, + "transaction_inserted_priority": { "type": "UInt64", "description": "Transaction's fee was too low given its compute unit requirement and other competing transactions" }, + "transaction_inserted_nonvote_add": { "type": "UInt64", "description": "Transaction that was not a simple vote added to pending transactions" }, + "transaction_inserted_vote_add": { "type": "UInt64", "description": "Simple vote transaction was added to pending transactions" }, + "transaction_inserted_nonvote_replace": { "type": "UInt64", "description": "Transaction that was not a simple vote replaced a lower priority transaction" }, + "transaction_inserted_vote_replace": { "type": "UInt64", "description": "Simple vote transaction replaced a lower priority transaction" }, + "transaction_inserted_nonce_nonvote_add": { "type": "UInt64", "description": "Durable nonce transaction added to pending transactions" }, + "transaction_inserted_unused": { "type": "UInt64", "description": "Unused because durable nonce transactions can't be simple votes" }, + "transaction_inserted_nonce_nonvote_replace": { "type": "UInt64", "description": "Durable nonce transaction replaced a lower priority transaction, likely one that uses the same durable nonce" } + } + }, + "metric_timing": { + "type": "Flatten", + "description": "Time in nanos spent in each state", + "fields": { + "metric_timing_no_txn_no_bank_no_leader_no_microblock": { "type": "UInt64", "description": "Pack had no transactions available, and wasn't leader" }, + "metric_timing_txn_no_bank_no_leader_no_microblock": { "type": "UInt64", "description": "Pack had transactions available, but wasn't leader or had hit a limit" }, + "metric_timing_no_txn_bank_no_leader_no_microblock": { "type": "UInt64", "description": "Pack had no transactions available, had banks but wasn't leader" }, + "metric_timing_txn_bank_no_leader_no_microblock": { "type": "UInt64", "description": "Pack had transactions available, had banks but wasn't leader" }, + "metric_timing_no_txn_no_bank_leader_no_microblock": { "type": "UInt64", "description": "Pack had no transactions available, and was leader but had no available banks" }, + "metric_timing_txn_no_bank_leader_no_microblock": { "type": "UInt64", "description": "Pack had transactions available, was leader, but had no available banks" }, + "metric_timing_no_txn_bank_leader_no_microblock": { "type": "UInt64", "description": "Pack had available banks but no transactions" }, + "metric_timing_txn_bank_leader_no_microblock": { "type": "UInt64", "description": "Pack had banks and transactions available but couldn't schedule anything non-conflicting" }, + "metric_timing_no_txn_no_bank_no_leader_microblock": { "type": "UInt64", "description": "Pack scheduled a non-empty microblock while not leader" }, + "metric_timing_txn_no_bank_no_leader_microblock": { "type": "UInt64", "description": "Pack scheduled a non-empty microblock while not leader" }, + "metric_timing_no_txn_bank_no_leader_microblock": { "type": "UInt64", "description": "Pack scheduled a non-empty microblock while not leader" }, + "metric_timing_txn_bank_no_leader_microblock": { "type": "UInt64", "description": "Pack scheduled a non-empty microblock while not leader" }, + "metric_timing_no_txn_no_bank_leader_microblock": { "type": "UInt64", "description": "Pack scheduled a non-empty microblock but all banks were busy" }, + "metric_timing_txn_no_bank_leader_microblock": { "type": "UInt64", "description": "Pack scheduled a non-empty microblock but all banks were busy" }, + "metric_timing_no_txn_bank_leader_microblock": { "type": "UInt64", "description": "Pack scheduled a non-empty microblock and now has no transactions" }, + "metric_timing_txn_bank_leader_microblock": { "type": "UInt64", "description": "Pack scheduled a non-empty microblock" } + } + }, + "transaction_dropped_from_extra": { "type": "UInt64", "description": "Transactions dropped from the extra transaction storage because it was full" }, + "transaction_inserted_to_extra": { "type": "UInt64", "description": "Transactions inserted into the extra transaction storage because pack's primary storage was full" }, + "transaction_inserted_from_extra": { "type": "UInt64", "description": "Transactions pulled from the extra transaction storage and inserted into pack's primary storage" }, + "transaction_expired": { "type": "UInt64", "description": "Transactions deleted from pack because their TTL expired" }, + "transaction_deleted": { "type": "UInt64", "description": "Transactions dropped from pack because they were requested to be deleted" }, + "transaction_already_executed": { "type": "UInt64", "description": "Transactions dropped from pack because they were already executed (in either the replay or leader pipeline)" }, + "transaction_dropped_partial_bundle": { "type": "UInt64", "description": "Transactions dropped from pack because they were part of a partial bundle" }, + "available_transactions": { + "type": "Flatten", + "description": "The total number of pending transactions in pack's pool that are available to be scheduled", + "fields": { + "available_transactions_all": { "type": "UInt64", "description": "All transactions in any treap" }, + "available_transactions_regular": { "type": "UInt64", "description": "Non-votes in the main treap" }, + "available_transactions_votes": { "type": "UInt64", "description": "Simple votes" }, + "available_transactions_conflicting": { "type": "UInt64", "description": "Non-votes that write to a hotly-contended account" }, + "available_transactions_bundles": { "type": "UInt64", "description": "Transactions that are part of a bundle" } + } + }, + "pending_transactions_heap_size": { "type": "UInt64", "description": "The maximum number of pending transactions that pack can consider. This value is fixed at Firedancer startup but is a useful reference for AvailableTransactions" }, + "smallest_pending_transaction": { "type": "UInt64", "description": "A lower bound on the smallest non-vote transaction (in cost units) that is immediately available for scheduling" }, + "microblock_per_block_limit": { "type": "UInt64", "description": "The number of times pack did not pack a microblock because the limit on microblocks/block had been reached" }, + "data_per_block_limit": { "type": "UInt64", "description": "The number of times pack did not pack a microblock because it reached the data per block limit at the start of trying to schedule a microblock" }, + "transaction_schedule": { + "type": "Flatten", + "description": "Result of trying to consider a transaction for scheduling", + "fields": { + "transaction_schedule_taken": { "type": "UInt64", "description": "Pack included the transaction in the microblock" }, + "transaction_schedule_cu_limit": { "type": "UInt64", "description": "Pack skipped the transaction because it would have exceeded the block CU limit" }, + "transaction_schedule_fast_path": { "type": "UInt64", "description": "Pack skipped the transaction because of account conflicts using the fast bitvector check" }, + "transaction_schedule_byte_limit": { "type": "UInt64", "description": "Pack skipped the transaction because it would have exceeded the block data size limit" }, + "transaction_schedule_write_cost": { "type": "UInt64", "description": "Pack skipped the transaction because it would have caused a writable account to exceed the per-account block write cost limit" }, + "transaction_schedule_slow_path": { "type": "UInt64", "description": "Pack skipped the transaction because of account conflicts using the full slow check" }, + "transaction_schedule_defer_skip": { "type": "UInt64", "description": "Pack skipped the transaction it previously exceeded the per-account block write cost limit too many times" } + } + }, + "bundle_crank_status": { + "type": "Flatten", + "description": "Result of considering whether bundle cranks are needed", + "fields": { + "bundle_crank_status_not_needed": { "type": "UInt64", "description": "On-chain state in the correct state" }, + "bundle_crank_status_inserted": { "type": "UInt64", "description": "Inserted an initializer bundle to update the on-chain state" }, + "bundle_crank_status_creation_failed": { "type": "UInt64", "description": "Tried to insert an initializer bundle to update the on-chain state, but creation failed" }, + "bundle_crank_status_insertion_failed": { "type": "UInt64", "description": "Tried to insert an initializer bundle to update the on-chain state, but insertion failed" } + } + }, + "cus_consumed_in_block": { "type": "UInt64", "description": "The number of cost units consumed in the current block, or 0 if pack is not currently packing a block" }, + "delete_missed": { "type": "UInt64", "description": "Count of attempts to delete a transaction that wasn't found" }, + "delete_hit": { "type": "UInt64", "description": "Count of attempts to delete a transaction that was found and deleted" } + } +} diff --git a/src/disco/events/schema/metrics_poh.json b/src/disco/events/schema/metrics_poh.json new file mode 100644 index 00000000000..13129f7a694 --- /dev/null +++ b/src/disco/events/schema/metrics_poh.json @@ -0,0 +1,9 @@ +{ + "name": "metrics_poh", + "id": 1008, + "description": "Metrics snapshot for the poh tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" } + } +} diff --git a/src/disco/events/schema/metrics_quic.json b/src/disco/events/schema/metrics_quic.json new file mode 100644 index 00000000000..4ded81365e8 --- /dev/null +++ b/src/disco/events/schema/metrics_quic.json @@ -0,0 +1,164 @@ +{ + "name": "metrics_quic", + "id": 1001, + "description": "Metrics snapshot for the quic tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "txns_overrun": { "type": "UInt64", "description": "Count of txns overrun before reassembled (too small txn_reassembly_count)" }, + "txn_reasms_started": { "type": "UInt64", "description": "Count of fragmented txn receive ops started" }, + "txn_reasms_active": { "type": "UInt64", "description": "Number of fragmented txn receive ops currently active" }, + "frags_ok": { "type": "UInt64", "description": "Count of txn frags received" }, + "frags_gap": { "type": "UInt64", "description": "Count of txn frags dropped due to data gap" }, + "frags_dup": { "type": "UInt64", "description": "Count of txn frags dropped due to dup (stream already completed)" }, + "txns_received": { + "type": "Flatten", + "description": "Count of txns received via TPU", + "fields": { + "txns_received_udp": { "type": "UInt64", "description": "TPU/UDP" }, + "txns_received_quic_fast": { "type": "UInt64", "description": "TPU/QUIC unfragmented" }, + "txns_received_quic_frag": { "type": "UInt64", "description": "TPU/QUIC fragmented" } + } + }, + "txns_abandoned": { "type": "UInt64", "description": "Count of txns abandoned because a conn was lost" }, + "txn_undersz": { "type": "UInt64", "description": "Count of txns received via QUIC dropped because they were too small" }, + "txn_oversz": { "type": "UInt64", "description": "Count of txns received via QUIC dropped because they were too large" }, + "legacy_txn_undersz": { "type": "UInt64", "description": "Count of packets received on the non-QUIC port that were too small to be a valid IP packet" }, + "legacy_txn_oversz": { "type": "UInt64", "description": "Count of packets received on the non-QUIC port that were too large to be a valid transaction" }, + "received_packets": { "type": "UInt64", "description": "Number of IP packets received" }, + "received_bytes": { "type": "UInt64", "description": "Total bytes received (including IP, UDP, QUIC headers)" }, + "sent_packets": { "type": "UInt64", "description": "Number of IP packets sent" }, + "sent_bytes": { "type": "UInt64", "description": "Total bytes sent (including IP, UDP, QUIC headers)" }, + "connections_alloc": { "type": "UInt64", "description": "The number of currently allocated QUIC connections" }, + "connections_state": { + "type": "Flatten", + "description": "The number of QUIC connections in each state", + "fields": { + "connections_state_invalid": { "type": "UInt64", "description": "Freed" }, + "connections_state_handshake": { "type": "UInt64", "description": "Handshaking peer" }, + "connections_state_handshake_complete": { "type": "UInt64", "description": "Handshake complete, confirming with peer" }, + "connections_state_active": { "type": "UInt64", "description": "Active connection" }, + "connections_state_peer_close": { "type": "UInt64", "description": "Peer requested close" }, + "connections_state_abort": { "type": "UInt64", "description": "Connection terminating due to error" }, + "connections_state_close_pending": { "type": "UInt64", "description": "Connection is closing" }, + "connections_state_dead": { "type": "UInt64", "description": "Connection about to be freed" } + } + }, + "connections_created": { "type": "UInt64", "description": "The total number of connections that have been created" }, + "connections_closed": { "type": "UInt64", "description": "Number of connections gracefully closed" }, + "connections_aborted": { "type": "UInt64", "description": "Number of connections aborted" }, + "connections_timed_out": { "type": "UInt64", "description": "Number of connections timed out" }, + "connections_retried": { "type": "UInt64", "description": "Number of connections established with retry" }, + "connection_error_no_slots": { "type": "UInt64", "description": "Number of connections that failed to create due to lack of slots" }, + "connection_error_retry_fail": { "type": "UInt64", "description": "Number of connections that failed during retry (e.g. invalid token)" }, + "pkt_no_conn": { + "type": "Flatten", + "description": "Number of packets with an unknown connection ID", + "fields": { + "pkt_no_conn_initial": { "type": "UInt64", "description": "Initial" }, + "pkt_no_conn_retry": { "type": "UInt64", "description": "Retry" }, + "pkt_no_conn_handshake": { "type": "UInt64", "description": "Handshake" }, + "pkt_no_conn_one_rtt": { "type": "UInt64", "description": "1-RTT" } + } + }, + "frame_tx_alloc": { + "type": "Flatten", + "description": "Results of attempts to acquire QUIC frame metadata", + "fields": { + "frame_tx_alloc_success": { "type": "UInt64", "description": "Success" }, + "frame_tx_alloc_fail_empty_pool": { "type": "UInt64", "description": "PktMetaPoolEmpty" }, + "frame_tx_alloc_fail_conn_max": { "type": "UInt64", "description": "ConnMaxedInflightFrames" } + } + }, + "initial_token_len": { + "type": "Flatten", + "description": "Number of Initial packets grouped by token length", + "fields": { + "initial_token_len_zero": { "type": "UInt64", "description": "No token" }, + "initial_token_len_fd_quic_len": { "type": "UInt64", "description": "fd_quic retry token length" }, + "initial_token_len_invalid_len": { "type": "UInt64", "description": "Invalid token length" } + } + }, + "handshakes_created": { "type": "UInt64", "description": "Number of handshake flows created" }, + "handshake_error_alloc_fail": { "type": "UInt64", "description": "Number of handshakes dropped due to alloc fail" }, + "handshake_evicted": { "type": "UInt64", "description": "Number of handshakes dropped due to eviction" }, + "stream_received_events": { "type": "UInt64", "description": "Number of stream RX events" }, + "stream_received_bytes": { "type": "UInt64", "description": "Total stream payload bytes received" }, + "received_frames": { + "type": "Flatten", + "description": "Number of QUIC frames received", + "fields": { + "received_frames_unknown": { "type": "UInt64", "description": "Unknown frame type" }, + "received_frames_ack": { "type": "UInt64", "description": "ACK frame" }, + "received_frames_reset_stream": { "type": "UInt64", "description": "RESET_STREAM frame" }, + "received_frames_stop_sending": { "type": "UInt64", "description": "STOP_SENDING frame" }, + "received_frames_crypto": { "type": "UInt64", "description": "CRYPTO frame" }, + "received_frames_new_token": { "type": "UInt64", "description": "NEW_TOKEN frame" }, + "received_frames_stream": { "type": "UInt64", "description": "STREAM frame" }, + "received_frames_max_data": { "type": "UInt64", "description": "MAX_DATA frame" }, + "received_frames_max_stream_data": { "type": "UInt64", "description": "MAX_STREAM_DATA frame" }, + "received_frames_max_streams": { "type": "UInt64", "description": "MAX_STREAMS frame" }, + "received_frames_data_blocked": { "type": "UInt64", "description": "DATA_BLOCKED frame" }, + "received_frames_stream_data_blocked": { "type": "UInt64", "description": "STREAM_DATA_BLOCKED frame" }, + "received_frames_streams_blocked": { "type": "UInt64", "description": "STREAMS_BLOCKED(bidi) frame" }, + "received_frames_new_conn_id": { "type": "UInt64", "description": "NEW_CONN_ID frame" }, + "received_frames_retire_conn_id": { "type": "UInt64", "description": "RETIRE_CONN_ID frame" }, + "received_frames_path_challenge": { "type": "UInt64", "description": "PATH_CHALLENGE frame" }, + "received_frames_path_response": { "type": "UInt64", "description": "PATH_RESPONSE frame" }, + "received_frames_conn_close_quic": { "type": "UInt64", "description": "CONN_CLOSE(transport) frame" }, + "received_frames_conn_close_app": { "type": "UInt64", "description": "CONN_CLOSE(app) frame" }, + "received_frames_handshake_done": { "type": "UInt64", "description": "HANDSHAKE_DONE frame" }, + "received_frames_ping": { "type": "UInt64", "description": "PING frame" }, + "received_frames_padding": { "type": "UInt64", "description": "PADDING frame" } + } + }, + "ack_tx": { + "type": "Flatten", + "description": "ACK events", + "fields": { + "ack_tx_noop": { "type": "UInt64", "description": "Non-ACK-eliciting packet" }, + "ack_tx_new": { "type": "UInt64", "description": "New ACK range" }, + "ack_tx_merged": { "type": "UInt64", "description": "Merged into existing ACK range" }, + "ack_tx_drop": { "type": "UInt64", "description": "Out of buffers" }, + "ack_tx_cancel": { "type": "UInt64", "description": "ACK suppressed by handler" } + } + }, + "frame_fail_parse": { "type": "UInt64", "description": "Number of QUIC frames failed to parse" }, + "pkt_crypto_failed": { + "type": "Flatten", + "description": "Number of packets that failed decryption", + "fields": { + "pkt_crypto_failed_initial": { "type": "UInt64", "description": "Initial" }, + "pkt_crypto_failed_early": { "type": "UInt64", "description": "Early data" }, + "pkt_crypto_failed_handshake": { "type": "UInt64", "description": "Handshake" }, + "pkt_crypto_failed_app": { "type": "UInt64", "description": "App data" } + } + }, + "pkt_no_key": { + "type": "Flatten", + "description": "Number of packets that failed decryption due to missing key", + "fields": { + "pkt_no_key_initial": { "type": "UInt64", "description": "Initial" }, + "pkt_no_key_early": { "type": "UInt64", "description": "Early data" }, + "pkt_no_key_handshake": { "type": "UInt64", "description": "Handshake" }, + "pkt_no_key_app": { "type": "UInt64", "description": "App data" } + } + }, + "pkt_net_header_invalid": { "type": "UInt64", "description": "Number of packets dropped due to weird IP or UDP header" }, + "pkt_quic_header_invalid": { "type": "UInt64", "description": "Number of packets dropped due to weird QUIC header" }, + "pkt_undersz": { "type": "UInt64", "description": "Number of QUIC packets dropped due to being too small" }, + "pkt_oversz": { "type": "UInt64", "description": "Number of QUIC packets dropped due to being too large" }, + "pkt_verneg": { "type": "UInt64", "description": "Number of QUIC version negotiation packets received" }, + "retry_sent": { "type": "UInt64", "description": "Number of QUIC Retry packets sent" }, + "pkt_retransmissions": { + "type": "Flatten", + "description": "Number of QUIC packets that retransmitted", + "fields": { + "pkt_retransmissions_initial": { "type": "UInt64", "description": "Initial" }, + "pkt_retransmissions_early": { "type": "UInt64", "description": "Early data" }, + "pkt_retransmissions_handshake": { "type": "UInt64", "description": "Handshake" }, + "pkt_retransmissions_app": { "type": "UInt64", "description": "App data" } + } + } + } +} diff --git a/src/disco/events/schema/metrics_repair.json b/src/disco/events/schema/metrics_repair.json new file mode 100644 index 00000000000..1536c1d41c0 --- /dev/null +++ b/src/disco/events/schema/metrics_repair.json @@ -0,0 +1,27 @@ +{ + "name": "metrics_repair", + "id": 1021, + "description": "Metrics snapshot for the repair tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "total_pkt_count": { "type": "UInt64", "description": "How many network packets we have sent, including reqs, pings, pongs, etc" }, + "sent_pkt_types": { + "type": "Flatten", + "description": "What types of client messages are we sending", + "fields": { + "sent_pkt_types_needed_window": { "type": "UInt64", "description": "Need Window" }, + "sent_pkt_types_needed_highest_window": { "type": "UInt64", "description": "Need Highest Window" }, + "sent_pkt_types_needed_orphan": { "type": "UInt64", "description": "Need Orphans" }, + "sent_pkt_types_pong": { "type": "UInt64", "description": "Pong" } + } + }, + "repaired_slots": { "type": "UInt64", "description": "Until which slots have we fully repaired" }, + "current_slot": { "type": "UInt64", "description": "Our view of the current cluster slot, max slot received" }, + "request_peers": { "type": "UInt64", "description": "How many peers have we requested" }, + "sign_tile_unavail": { "type": "UInt64", "description": "How many times no sign tiles were available to send request" }, + "eager_repair_aggresses": { "type": "UInt64", "description": "How many times we pass eager repair threshold" }, + "rerequest_queue": { "type": "UInt64", "description": "How many times we re-request a shred from the inflights queue" }, + "malformed_ping": { "type": "UInt64", "description": "How many times we received a malformed ping" } + } +} diff --git a/src/disco/events/schema/metrics_replay.json b/src/disco/events/schema/metrics_replay.json new file mode 100644 index 00000000000..22500a716ed --- /dev/null +++ b/src/disco/events/schema/metrics_replay.json @@ -0,0 +1,33 @@ +{ + "name": "metrics_replay", + "id": 1017, + "description": "Metrics snapshot for the replay tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "root_slot": { "type": "UInt64", "description": "The slot at which our node has most recently rooted" }, + "root_distance": { "type": "UInt64", "description": "The distance in slots between our current root and the current reset slot" }, + "leader_slot": { "type": "UInt64", "description": "The slot at which we are currently leader, or 0 if none" }, + "next_leader_slot": { "type": "UInt64", "description": "The slot at which we are next leader, or 0 if none. If we are currently leader, this is the same as the current leader slot" }, + "reset_slot": { "type": "UInt64", "description": "The slot at which we last reset the replay stage, or 0 if unknown" }, + "max_live_banks": { "type": "UInt64", "description": "The maximum number of banks we can have alive" }, + "live_banks": { "type": "UInt64", "description": "The number of banks we currently have alive" }, + "reasm_free": { "type": "UInt64", "description": "The number of free FEC sets in the reassembly queue" }, + "reasm_latest_slot": { "type": "UInt64", "description": "Slot of the latest FEC set in the reassembly queue that can be replayed" }, + "reasm_latest_fec_idx": { "type": "UInt64", "description": "FEC set index of the latest FEC set in the reassembly queue that can be replayed" }, + "slots_total": { "type": "UInt64", "description": "Count of slots replayed successfully" }, + "transactions_total": { "type": "UInt64", "description": "Count of transactions processed overall on the current fork" }, + "sched_full": { "type": "UInt64", "description": "Times where sched is full and a FEC set can't be processed" }, + "reasm_empty": { "type": "UInt64", "description": "Times where reasm is empty and a FEC set can't be processed" }, + "leader_bid_wait": { "type": "UInt64", "description": "Times where replay is blocked by the PoH tile not sending an end of leader message" }, + "banks_full": { "type": "UInt64", "description": "Times where banks are full and a FEC set can't be processed" }, + "storage_root_behind": { "type": "UInt64", "description": "Times where the storage root is behind the consensus root and can't be advanced" }, + "progcache_rooted": { "type": "UInt64", "description": "Number of program cache entries rooted" }, + "progcache_gc_root": { "type": "UInt64", "description": "Number of program cache entries garbage collected while rooting" }, + "accdb_created": { "type": "UInt64", "description": "Number of account database records created" }, + "accdb_reverted": { "type": "UInt64", "description": "Number of account database records reverted" }, + "accdb_rooted": { "type": "UInt64", "description": "Number of account database entries rooted" }, + "accdb_gc_root": { "type": "UInt64", "description": "Number of account database entries garbage collected" }, + "accdb_reclaimed": { "type": "UInt64", "description": "Number of account database entries reclaimed (deletion rooted)" } + } +} diff --git a/src/disco/events/schema/metrics_resolf.json b/src/disco/events/schema/metrics_resolf.json new file mode 100644 index 00000000000..3927ec2df04 --- /dev/null +++ b/src/disco/events/schema/metrics_resolf.json @@ -0,0 +1,34 @@ +{ + "name": "metrics_resolf", + "id": 1030, + "description": "Metrics snapshot for the resolf tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "no_bank_drop": { "type": "UInt64", "description": "Count of transactions dropped because the bank was not available" }, + "stash_operation": { + "type": "Flatten", + "description": "Count of operations that happened on the transaction stash", + "fields": { + "stash_operation_inserted": { "type": "UInt64", "description": "A transaction with an unknown blockhash was added to the stash" }, + "stash_operation_overrun": { "type": "UInt64", "description": "A transaction with an unknown blockhash was dropped because the stash was full" }, + "stash_operation_published": { "type": "UInt64", "description": "A transaction with an unknown blockhash was published as the blockhash became known" }, + "stash_operation_removed": { "type": "UInt64", "description": "A transaction with an unknown blockhash was removed from the stash without publishing, due to a bad LUT resolved failure, or no bank. These errors are double counted with the respective metrics for those categories" } + } + }, + "lut_resolved": { + "type": "Flatten", + "description": "Count of address lookup tables resolved", + "fields": { + "lut_resolved_invalid_lookup_index": { "type": "UInt64", "description": "The transaction referenced an index in a LUT that didn't exist" }, + "lut_resolved_account_uninitialized": { "type": "UInt64", "description": "The account referenced as a LUT hasn't been initialized" }, + "lut_resolved_invalid_account_data": { "type": "UInt64", "description": "The account referenced as a LUT couldn't be parsed" }, + "lut_resolved_invalid_account_owner": { "type": "UInt64", "description": "The account referenced as a LUT wasn't owned by the ALUT program ID" }, + "lut_resolved_account_not_found": { "type": "UInt64", "description": "The account referenced as a LUT couldn't be found" }, + "lut_resolved_success": { "type": "UInt64", "description": "Resolved successfully" } + } + }, + "blockhash_expired": { "type": "UInt64", "description": "Count of transactions that failed to resolve because the blockhash was expired" }, + "transaction_bundle_peer_failure": { "type": "UInt64", "description": "Count of transactions that failed to resolve because a peer transaction in the bundle failed" } + } +} diff --git a/src/disco/events/schema/metrics_rpc.json b/src/disco/events/schema/metrics_rpc.json new file mode 100644 index 00000000000..afdd36a4dab --- /dev/null +++ b/src/disco/events/schema/metrics_rpc.json @@ -0,0 +1,9 @@ +{ + "name": "metrics_rpc", + "id": 1044, + "description": "Metrics snapshot for the rpc tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" } + } +} diff --git a/src/disco/events/schema/metrics_send.json b/src/disco/events/schema/metrics_send.json new file mode 100644 index 00000000000..cb7136a4736 --- /dev/null +++ b/src/disco/events/schema/metrics_send.json @@ -0,0 +1,258 @@ +{ + "name": "metrics_send", + "id": 1022, + "description": "Metrics snapshot for the send tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "unstaked_ci": { "type": "UInt64", "description": "Total number of times we received contact info for an unstaked node" }, + "ci_removed": { "type": "UInt64", "description": "Total number of times we removed contact info" }, + "new_contact_info_quic_vote": { + "type": "Flatten", + "description": "Total number of contact infos received and handled for QUIC Vote port", + "fields": { + "new_contact_info_quic_vote_unroutable": { "type": "UInt64", "description": "Skipped (unroutable)" }, + "new_contact_info_quic_vote_initialized": { "type": "UInt64", "description": "Initialized" }, + "new_contact_info_quic_vote_changed": { "type": "UInt64", "description": "Contact info changed" }, + "new_contact_info_quic_vote_no_change": { "type": "UInt64", "description": "Contact info unchanged" } + } + }, + "new_contact_info_quic_tpu": { + "type": "Flatten", + "description": "Total number of contact infos received and handled for QUIC TPU port", + "fields": { + "new_contact_info_quic_tpu_unroutable": { "type": "UInt64", "description": "Skipped (unroutable)" }, + "new_contact_info_quic_tpu_initialized": { "type": "UInt64", "description": "Initialized" }, + "new_contact_info_quic_tpu_changed": { "type": "UInt64", "description": "Contact info changed" }, + "new_contact_info_quic_tpu_no_change": { "type": "UInt64", "description": "Contact info unchanged" } + } + }, + "new_contact_info_udp_vote": { + "type": "Flatten", + "description": "Total number of contact infos received and handled for UDP Vote port", + "fields": { + "new_contact_info_udp_vote_unroutable": { "type": "UInt64", "description": "Skipped (unroutable)" }, + "new_contact_info_udp_vote_initialized": { "type": "UInt64", "description": "Initialized" }, + "new_contact_info_udp_vote_changed": { "type": "UInt64", "description": "Contact info changed" }, + "new_contact_info_udp_vote_no_change": { "type": "UInt64", "description": "Contact info unchanged" } + } + }, + "new_contact_info_udp_tpu": { + "type": "Flatten", + "description": "Total number of contact infos received and handled for UDP TPU port", + "fields": { + "new_contact_info_udp_tpu_unroutable": { "type": "UInt64", "description": "Skipped (unroutable)" }, + "new_contact_info_udp_tpu_initialized": { "type": "UInt64", "description": "Initialized" }, + "new_contact_info_udp_tpu_changed": { "type": "UInt64", "description": "Contact info changed" }, + "new_contact_info_udp_tpu_no_change": { "type": "UInt64", "description": "Contact info unchanged" } + } + }, + "send_result_quic_vote": { + "type": "Flatten", + "description": "Total count of results from trying to send via QUIC Vote port", + "fields": { + "send_result_quic_vote_success": { "type": "UInt64", "description": "Success" }, + "send_result_quic_vote_no_ci": { "type": "UInt64", "description": "No contact info" }, + "send_result_quic_vote_no_conn": { "type": "UInt64", "description": "No QUIC connection" }, + "send_result_quic_vote_no_stream": { "type": "UInt64", "description": "No QUIC stream" } + } + }, + "send_result_quic_tpu": { + "type": "Flatten", + "description": "Total count of results from trying to send via QUIC TPU port", + "fields": { + "send_result_quic_tpu_success": { "type": "UInt64", "description": "Success" }, + "send_result_quic_tpu_no_ci": { "type": "UInt64", "description": "No contact info" }, + "send_result_quic_tpu_no_conn": { "type": "UInt64", "description": "No QUIC connection" }, + "send_result_quic_tpu_no_stream": { "type": "UInt64", "description": "No QUIC stream" } + } + }, + "send_result_udp_vote": { + "type": "Flatten", + "description": "Total count of results from trying to send via UDP Vote port", + "fields": { + "send_result_udp_vote_success": { "type": "UInt64", "description": "Success" }, + "send_result_udp_vote_no_ci": { "type": "UInt64", "description": "No contact info" }, + "send_result_udp_vote_no_conn": { "type": "UInt64", "description": "No QUIC connection" }, + "send_result_udp_vote_no_stream": { "type": "UInt64", "description": "No QUIC stream" } + } + }, + "send_result_udp_tpu": { + "type": "Flatten", + "description": "Total count of results from trying to send via UDP TPU port", + "fields": { + "send_result_udp_tpu_success": { "type": "UInt64", "description": "Success" }, + "send_result_udp_tpu_no_ci": { "type": "UInt64", "description": "No contact info" }, + "send_result_udp_tpu_no_conn": { "type": "UInt64", "description": "No QUIC connection" }, + "send_result_udp_tpu_no_stream": { "type": "UInt64", "description": "No QUIC stream" } + } + }, + "ensure_conn_result_quic_vote": { + "type": "Flatten", + "description": "Total count of results from trying to ensure a connection for a leader for QUIC Vote port", + "fields": { + "ensure_conn_result_quic_vote_no_leader": { "type": "UInt64", "description": "No QUIC connection" }, + "ensure_conn_result_quic_vote_no_ci": { "type": "UInt64", "description": "No contact info" }, + "ensure_conn_result_quic_vote_new_connection": { "type": "UInt64", "description": "Initiated connection" }, + "ensure_conn_result_quic_vote_conn_failed": { "type": "UInt64", "description": "Connection failed" }, + "ensure_conn_result_quic_vote_connected": { "type": "UInt64", "description": "Connection exists" }, + "ensure_conn_result_quic_vote_cooldown": { "type": "UInt64", "description": "Connection cooldown" } + } + }, + "ensure_conn_result_quic_tpu": { + "type": "Flatten", + "description": "Total count of results from trying to ensure a connection for a leader for QUIC TPU port", + "fields": { + "ensure_conn_result_quic_tpu_no_leader": { "type": "UInt64", "description": "No QUIC connection" }, + "ensure_conn_result_quic_tpu_no_ci": { "type": "UInt64", "description": "No contact info" }, + "ensure_conn_result_quic_tpu_new_connection": { "type": "UInt64", "description": "Initiated connection" }, + "ensure_conn_result_quic_tpu_conn_failed": { "type": "UInt64", "description": "Connection failed" }, + "ensure_conn_result_quic_tpu_connected": { "type": "UInt64", "description": "Connection exists" }, + "ensure_conn_result_quic_tpu_cooldown": { "type": "UInt64", "description": "Connection cooldown" } + } + }, + "handshake_complete": { + "type": "Flatten", + "description": "Total number of times we completed a handshake", + "fields": { + "handshake_complete_quic_vote": { "type": "UInt64", "description": "QUIC Vote port" }, + "handshake_complete_quic_tpu": { "type": "UInt64", "description": "QUIC TPU port" } + } + }, + "quic_conn_final": { + "type": "Flatten", + "description": "Total number of times QUIC connection closed", + "fields": { + "quic_conn_final_quic_vote": { "type": "UInt64", "description": "QUIC Vote port" }, + "quic_conn_final_quic_tpu": { "type": "UInt64", "description": "QUIC TPU port" } + } + }, + "received_packets": { "type": "UInt64", "description": "Total count of QUIC packets received" }, + "received_bytes": { "type": "UInt64", "description": "Total bytes received via QUIC" }, + "sent_packets": { "type": "UInt64", "description": "Total count of QUIC packets sent" }, + "sent_bytes": { "type": "UInt64", "description": "Total bytes sent via QUIC" }, + "retry_sent": { "type": "UInt64", "description": "Total count of QUIC retry packets sent" }, + "connections_alloc": { "type": "UInt64", "description": "Number of currently allocated QUIC connections" }, + "connections_state": { + "type": "Flatten", + "description": "Number of QUIC connections in each state", + "fields": { + "connections_state_invalid": { "type": "UInt64", "description": "Freed" }, + "connections_state_handshake": { "type": "UInt64", "description": "Handshaking peer" }, + "connections_state_handshake_complete": { "type": "UInt64", "description": "Handshake complete, confirming with peer" }, + "connections_state_active": { "type": "UInt64", "description": "Active connection" }, + "connections_state_peer_close": { "type": "UInt64", "description": "Peer requested close" }, + "connections_state_abort": { "type": "UInt64", "description": "Connection terminating due to error" }, + "connections_state_close_pending": { "type": "UInt64", "description": "Connection is closing" }, + "connections_state_dead": { "type": "UInt64", "description": "Connection about to be freed" } + } + }, + "connections_created": { "type": "UInt64", "description": "Total count of QUIC connections created" }, + "connections_closed": { "type": "UInt64", "description": "Total count of QUIC connections closed" }, + "connections_aborted": { "type": "UInt64", "description": "Total count of QUIC connections aborted" }, + "connections_timed_out": { "type": "UInt64", "description": "Total count of QUIC connections timed out" }, + "connections_retried": { "type": "UInt64", "description": "Total count of QUIC connections retried" }, + "connection_error_no_slots": { "type": "UInt64", "description": "Total count of connection errors due to no slots" }, + "connection_error_retry_fail": { "type": "UInt64", "description": "Total count of connection retry failures" }, + "pkt_crypto_failed": { + "type": "Flatten", + "description": "Total count of packets with crypto failures", + "fields": { + "pkt_crypto_failed_initial": { "type": "UInt64", "description": "Initial" }, + "pkt_crypto_failed_early": { "type": "UInt64", "description": "Early data" }, + "pkt_crypto_failed_handshake": { "type": "UInt64", "description": "Handshake" }, + "pkt_crypto_failed_app": { "type": "UInt64", "description": "App data" } + } + }, + "pkt_no_key": { + "type": "Flatten", + "description": "Total count of packets with no key", + "fields": { + "pkt_no_key_initial": { "type": "UInt64", "description": "Initial" }, + "pkt_no_key_early": { "type": "UInt64", "description": "Early data" }, + "pkt_no_key_handshake": { "type": "UInt64", "description": "Handshake" }, + "pkt_no_key_app": { "type": "UInt64", "description": "App data" } + } + }, + "pkt_no_conn": { + "type": "Flatten", + "description": "Total count of packets with no connection", + "fields": { + "pkt_no_conn_initial": { "type": "UInt64", "description": "Initial" }, + "pkt_no_conn_retry": { "type": "UInt64", "description": "Retry" }, + "pkt_no_conn_handshake": { "type": "UInt64", "description": "Handshake" }, + "pkt_no_conn_one_rtt": { "type": "UInt64", "description": "1-RTT" } + } + }, + "pkt_tx_alloc_fail": { "type": "UInt64", "description": "Total count of packet TX allocation failures" }, + "pkt_net_header_invalid": { "type": "UInt64", "description": "Total count of packets with invalid network headers" }, + "pkt_quic_header_invalid": { "type": "UInt64", "description": "Total count of packets with invalid QUIC headers" }, + "pkt_undersz": { "type": "UInt64", "description": "Total count of undersized packets" }, + "pkt_oversz": { "type": "UInt64", "description": "Total count of oversized packets" }, + "pkt_verneg": { "type": "UInt64", "description": "Total count of version negotiation packets" }, + "pkt_retransmissions": { + "type": "Flatten", + "description": "Total count of QUIC packet retransmissions", + "fields": { + "pkt_retransmissions_initial": { "type": "UInt64", "description": "Initial" }, + "pkt_retransmissions_early": { "type": "UInt64", "description": "Early data" }, + "pkt_retransmissions_handshake": { "type": "UInt64", "description": "Handshake" }, + "pkt_retransmissions_app": { "type": "UInt64", "description": "App data" } + } + }, + "handshakes_created": { "type": "UInt64", "description": "Total count of QUIC handshakes created" }, + "handshake_error_alloc_fail": { "type": "UInt64", "description": "Total count of handshake allocation failures" }, + "handshake_evicted": { "type": "UInt64", "description": "Total count of handshakes evicted" }, + "stream_received_events": { "type": "UInt64", "description": "Total count of stream events received" }, + "stream_received_bytes": { "type": "UInt64", "description": "Total bytes received via streams" }, + "received_frames": { + "type": "Flatten", + "description": "Total count of QUIC frames received", + "fields": { + "received_frames_unknown": { "type": "UInt64", "description": "Unknown frame type" }, + "received_frames_ack": { "type": "UInt64", "description": "ACK frame" }, + "received_frames_reset_stream": { "type": "UInt64", "description": "RESET_STREAM frame" }, + "received_frames_stop_sending": { "type": "UInt64", "description": "STOP_SENDING frame" }, + "received_frames_crypto": { "type": "UInt64", "description": "CRYPTO frame" }, + "received_frames_new_token": { "type": "UInt64", "description": "NEW_TOKEN frame" }, + "received_frames_stream": { "type": "UInt64", "description": "STREAM frame" }, + "received_frames_max_data": { "type": "UInt64", "description": "MAX_DATA frame" }, + "received_frames_max_stream_data": { "type": "UInt64", "description": "MAX_STREAM_DATA frame" }, + "received_frames_max_streams": { "type": "UInt64", "description": "MAX_STREAMS frame" }, + "received_frames_data_blocked": { "type": "UInt64", "description": "DATA_BLOCKED frame" }, + "received_frames_stream_data_blocked": { "type": "UInt64", "description": "STREAM_DATA_BLOCKED frame" }, + "received_frames_streams_blocked": { "type": "UInt64", "description": "STREAMS_BLOCKED(bidi) frame" }, + "received_frames_new_conn_id": { "type": "UInt64", "description": "NEW_CONN_ID frame" }, + "received_frames_retire_conn_id": { "type": "UInt64", "description": "RETIRE_CONN_ID frame" }, + "received_frames_path_challenge": { "type": "UInt64", "description": "PATH_CHALLENGE frame" }, + "received_frames_path_response": { "type": "UInt64", "description": "PATH_RESPONSE frame" }, + "received_frames_conn_close_quic": { "type": "UInt64", "description": "CONN_CLOSE(transport) frame" }, + "received_frames_conn_close_app": { "type": "UInt64", "description": "CONN_CLOSE(app) frame" }, + "received_frames_handshake_done": { "type": "UInt64", "description": "HANDSHAKE_DONE frame" }, + "received_frames_ping": { "type": "UInt64", "description": "PING frame" }, + "received_frames_padding": { "type": "UInt64", "description": "PADDING frame" } + } + }, + "frame_fail_parse": { "type": "UInt64", "description": "Total count of frame parse failures" }, + "frame_tx_alloc": { + "type": "Flatten", + "description": "Results of attempts to acquire QUIC frame metadata", + "fields": { + "frame_tx_alloc_success": { "type": "UInt64", "description": "Success" }, + "frame_tx_alloc_fail_empty_pool": { "type": "UInt64", "description": "PktMetaPoolEmpty" }, + "frame_tx_alloc_fail_conn_max": { "type": "UInt64", "description": "ConnMaxedInflightFrames" } + } + }, + "ack_tx": { + "type": "Flatten", + "description": "Total count of ACK frames transmitted", + "fields": { + "ack_tx_noop": { "type": "UInt64", "description": "Non-ACK-eliciting packet" }, + "ack_tx_new": { "type": "UInt64", "description": "New ACK range" }, + "ack_tx_merged": { "type": "UInt64", "description": "Merged into existing ACK range" }, + "ack_tx_drop": { "type": "UInt64", "description": "Out of buffers" }, + "ack_tx_cancel": { "type": "UInt64", "description": "ACK suppressed by handler" } + } + } + } +} diff --git a/src/disco/events/schema/metrics_shred.json b/src/disco/events/schema/metrics_shred.json new file mode 100644 index 00000000000..bc591665b40 --- /dev/null +++ b/src/disco/events/schema/metrics_shred.json @@ -0,0 +1,34 @@ +{ + "name": "metrics_shred", + "id": 1009, + "description": "Metrics snapshot for the shred tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "microblocks_abandoned": { "type": "UInt64", "description": "The number of microblocks that were abandoned because we switched slots without finishing the current slot" }, + "invalid_block_id": { "type": "UInt64", "description": "The number of times a block was created with unknown parent block_id" }, + "shred_processed": { + "type": "Flatten", + "description": "The result of processing a shred from the network", + "fields": { + "shred_processed_bad_slot": { "type": "UInt64", "description": "Shred was for a slot for which we don't know the leader" }, + "shred_processed_parse_failed": { "type": "UInt64", "description": "Shred parsing failed" }, + "shred_processed_rejected": { "type": "UInt64", "description": "Shred was invalid for one of many reasons" }, + "shred_processed_ignored": { "type": "UInt64", "description": "Shred was ignored because we had already received or reconstructed it" }, + "shred_processed_okay": { "type": "UInt64", "description": "Shred accepted to an incomplete FEC set" }, + "shred_processed_completes": { "type": "UInt64", "description": "Shred accepted and resulted in a valid, complete FEC set" } + } + }, + "fec_set_spilled": { "type": "UInt64", "description": "The number of FEC sets that were spilled because they didn't complete in time and we needed space" }, + "shred_rejected_initial": { "type": "UInt64", "description": "The number of shreds that were rejected before any resources were allocated for the FEC set" }, + "shred_rejected_unchained": { "type": "UInt64", "description": "The number of shreds that were rejected because they're not chained merkle shreds" }, + "fec_rejected_fatal": { "type": "UInt64", "description": "The number of FEC sets that were rejected for reasons that cause the whole FEC set to become invalid" }, + "force_complete_request": { "type": "UInt64", "description": "The number of times we received a FEC force complete message" }, + "force_complete_failure": { "type": "UInt64", "description": "The number of times we failed to force complete a FEC set on request" }, + "force_complete_success": { "type": "UInt64", "description": "The number of times we successfully forced completed a FEC set on request" }, + "shred_repair_rcv": { "type": "UInt64", "description": "The number of times we received a repair shred" }, + "shred_repair_rcv_bytes": { "type": "UInt64", "description": "The number bytes received from network packets with repair shreds. Bytes include network headers" }, + "shred_turbine_rcv": { "type": "UInt64", "description": "The number of times we received a turbine shred" }, + "shred_turbine_rcv_bytes": { "type": "UInt64", "description": "The number bytes received from network packets with turbine shreds. Bytes include network headers" } + } +} diff --git a/src/disco/events/schema/metrics_sign.json b/src/disco/events/schema/metrics_sign.json new file mode 100644 index 00000000000..f786f73b211 --- /dev/null +++ b/src/disco/events/schema/metrics_sign.json @@ -0,0 +1,9 @@ +{ + "name": "metrics_sign", + "id": 1011, + "description": "Metrics snapshot for the sign tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" } + } +} diff --git a/src/disco/events/schema/metrics_snapct.json b/src/disco/events/schema/metrics_snapct.json new file mode 100644 index 00000000000..9ee70c8c4aa --- /dev/null +++ b/src/disco/events/schema/metrics_snapct.json @@ -0,0 +1,24 @@ +{ + "name": "metrics_snapct", + "id": 1023, + "description": "Metrics snapshot for the snapct tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "state": { "type": "UInt64", "description": "State of the tile" }, + "full_num_retries": { "type": "UInt64", "description": "Number of times we aborted and retried full snapshot download because the peer was too slow" }, + "incremental_num_retries": { "type": "UInt64", "description": "Number of times we aborted and retried incremental snapshot download because the peer was too slow" }, + "full_bytes_read": { "type": "UInt64", "description": "Number of bytes read so far from the full snapshot. Might decrease if snapshot load is aborted and restarted" }, + "full_bytes_written": { "type": "UInt64", "description": "Number of bytes written so far from the full snapshot. Might decrease if snapshot load is aborted and restarted" }, + "full_bytes_total": { "type": "UInt64", "description": "Total size of the full snapshot file. Might change if snapshot load is aborted and restarted" }, + "full_download_retries": { "type": "UInt64", "description": "Number of times we retried the full snapshot download because the peer was too slow" }, + "incremental_bytes_read": { "type": "UInt64", "description": "Number of bytes read so far from the incremental snapshot. Might decrease if snapshot load is aborted and restarted" }, + "incremental_bytes_written": { "type": "UInt64", "description": "Number of bytes written so far from the incremental snapshot. Might decrease if snapshot load is aborted and restarted" }, + "incremental_bytes_total": { "type": "UInt64", "description": "Total size of the incremental snapshot file. Might change if snapshot load is aborted and restarted" }, + "incremental_download_retries": { "type": "UInt64", "description": "Number of times we retried the incremental snapshot download because the peer was too slow" }, + "predicted_slot": { "type": "UInt64", "description": "The predicted slot from which replay starts after snapshot loading finishes. Might change if snapshot load is aborted and restarted" }, + "gossip_fresh_count": { "type": "UInt64", "description": "Number of fresh gossip peers seen when collecting gossip peers. " }, + "gossip_total_count": { "type": "UInt64", "description": "Number of total gossip peers seen when collecting gossip peers. " }, + "ssl_alloc_errors": { "type": "UInt64", "description": "Number of SSL allocation errors encountered. " } + } +} diff --git a/src/disco/events/schema/metrics_snapdc.json b/src/disco/events/schema/metrics_snapdc.json new file mode 100644 index 00000000000..340c7e9313f --- /dev/null +++ b/src/disco/events/schema/metrics_snapdc.json @@ -0,0 +1,14 @@ +{ + "name": "metrics_snapdc", + "id": 1025, + "description": "Metrics snapshot for the snapdc tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "state": { "type": "UInt64", "description": "State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN" }, + "full_compressed_bytes_read": { "type": "UInt64", "description": "Number of bytes read so far from the compressed full snapshot file. Might decrease if snapshot load is aborted and restarted" }, + "full_decompressed_bytes_written": { "type": "UInt64", "description": "Number of bytes decompressed so far from the full snapshot. Might decrease if snapshot load is aborted and restarted" }, + "incremental_compressed_bytes_read": { "type": "UInt64", "description": "Number of bytes read so far from the compressed incremental snapshot file. Might decrease if snapshot load is aborted and restarted" }, + "incremental_decompressed_bytes_written": { "type": "UInt64", "description": "Number of bytes decompressed so far from the incremental snapshot. Might decrease if snapshot load is aborted and restarted" } + } +} diff --git a/src/disco/events/schema/metrics_snapin.json b/src/disco/events/schema/metrics_snapin.json new file mode 100644 index 00000000000..4b852567ac2 --- /dev/null +++ b/src/disco/events/schema/metrics_snapin.json @@ -0,0 +1,15 @@ +{ + "name": "metrics_snapin", + "id": 1026, + "description": "Metrics snapshot for the snapin tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "state": { "type": "UInt64", "description": "State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN" }, + "full_bytes_read": { "type": "UInt64", "description": "Number of bytes read so far from the full snapshot. Might decrease if snapshot load is aborted and restarted" }, + "incremental_bytes_read": { "type": "UInt64", "description": "Number of bytes read so far from the incremental snapshot. Might decrease if snapshot load is aborted and restarted" }, + "accounts_loaded": { "type": "UInt64", "description": "Number of accounts seen during snapshot loading. Includes duplicates. Resets if snapshot load restarts" }, + "accounts_replaced": { "type": "UInt64", "description": "Number of previously inserted accounts replaced by a later duplicate. Resets if snapshot load restarts" }, + "accounts_ignored": { "type": "UInt64", "description": "Number of stale duplicate accounts dropped because a previously inserted account was newer. Resets if snapshot load restarts" } + } +} diff --git a/src/disco/events/schema/metrics_snapla.json b/src/disco/events/schema/metrics_snapla.json new file mode 100644 index 00000000000..4d283f502e2 --- /dev/null +++ b/src/disco/events/schema/metrics_snapla.json @@ -0,0 +1,12 @@ +{ + "name": "metrics_snapla", + "id": 1036, + "description": "Metrics snapshot for the snapla tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "state": { "type": "UInt64", "description": "State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN" }, + "full_accounts_hashed": { "type": "UInt64", "description": "Number of accounts hashed for the full snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted" }, + "incremental_accounts_hashed": { "type": "UInt64", "description": "Number of accounts hashed for the incremental snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted" } + } +} diff --git a/src/disco/events/schema/metrics_snapld.json b/src/disco/events/schema/metrics_snapld.json new file mode 100644 index 00000000000..0e7c02ccd0b --- /dev/null +++ b/src/disco/events/schema/metrics_snapld.json @@ -0,0 +1,11 @@ +{ + "name": "metrics_snapld", + "id": 1024, + "description": "Metrics snapshot for the snapld tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "state": { "type": "UInt64", "description": "State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN" }, + "ssl_alloc_errors": { "type": "UInt64", "description": "Number of SSL allocation errors encountered. " } + } +} diff --git a/src/disco/events/schema/metrics_snaplh.json b/src/disco/events/schema/metrics_snaplh.json new file mode 100644 index 00000000000..9928e087d42 --- /dev/null +++ b/src/disco/events/schema/metrics_snaplh.json @@ -0,0 +1,12 @@ +{ + "name": "metrics_snaplh", + "id": 1041, + "description": "Metrics snapshot for the snaplh tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "state": { "type": "UInt64", "description": "State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN" }, + "full_accounts_hashed": { "type": "UInt64", "description": "Number of accounts hashed for the full snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted" }, + "incremental_accounts_hashed": { "type": "UInt64", "description": "Number of accounts hashed for the incremental snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted" } + } +} diff --git a/src/disco/events/schema/metrics_snapls.json b/src/disco/events/schema/metrics_snapls.json new file mode 100644 index 00000000000..6dba560c904 --- /dev/null +++ b/src/disco/events/schema/metrics_snapls.json @@ -0,0 +1,12 @@ +{ + "name": "metrics_snapls", + "id": 1037, + "description": "Metrics snapshot for the snapls tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "state": { "type": "UInt64", "description": "State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN" }, + "full_accounts_hashed": { "type": "UInt64", "description": "Number of accounts hashed for the full snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted" }, + "incremental_accounts_hashed": { "type": "UInt64", "description": "Number of accounts hashed for the incremental snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted" } + } +} diff --git a/src/disco/events/schema/metrics_snaplv.json b/src/disco/events/schema/metrics_snaplv.json new file mode 100644 index 00000000000..7f4c3d965b1 --- /dev/null +++ b/src/disco/events/schema/metrics_snaplv.json @@ -0,0 +1,12 @@ +{ + "name": "metrics_snaplv", + "id": 1042, + "description": "Metrics snapshot for the snaplv tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "state": { "type": "UInt64", "description": "State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN" }, + "full_duplicate_accounts_hashed": { "type": "UInt64", "description": "Number of duplicate accounts hashed for the full snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted" }, + "incremental_duplicate_accounts_hashed": { "type": "UInt64", "description": "Number of duplicate accounts hashed for the incremental snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted" } + } +} diff --git a/src/disco/events/schema/metrics_snapwh.json b/src/disco/events/schema/metrics_snapwh.json new file mode 100644 index 00000000000..f435d848e60 --- /dev/null +++ b/src/disco/events/schema/metrics_snapwh.json @@ -0,0 +1,10 @@ +{ + "name": "metrics_snapwh", + "id": 1035, + "description": "Metrics snapshot for the snapwh tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "state": { "type": "UInt64", "description": "State of the tile. 0=IDLE, 1=PROCESSING, 4=SHUTDOWN" } + } +} diff --git a/src/disco/events/schema/metrics_snapwm.json b/src/disco/events/schema/metrics_snapwm.json new file mode 100644 index 00000000000..04b45cd3a6c --- /dev/null +++ b/src/disco/events/schema/metrics_snapwm.json @@ -0,0 +1,14 @@ +{ + "name": "metrics_snapwm", + "id": 1040, + "description": "Metrics snapshot for the snapwm tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "state": { "type": "UInt64", "description": "State of the tile. 0=IDLE, 1=PROCESSING, 2=FINISHING, 3=ERROR, 4=SHUTDOWN" }, + "accounts_loaded": { "type": "UInt64", "description": "Number of accounts seen during snapshot loading. Includes duplicates. Resets if snapshot load restarts" }, + "accounts_replaced": { "type": "UInt64", "description": "Number of previously inserted accounts replaced by a later duplicate. Resets if snapshot load restarts" }, + "accounts_ignored": { "type": "UInt64", "description": "Number of stale duplicate accounts dropped because a previously inserted account was newer. Resets if snapshot load restarts" }, + "accounts_active": { "type": "UInt64", "description": "Current number of accounts in index. Resets if snapshot load restarts" } + } +} diff --git a/src/disco/events/schema/metrics_snapwr.json b/src/disco/events/schema/metrics_snapwr.json new file mode 100644 index 00000000000..846781c21e5 --- /dev/null +++ b/src/disco/events/schema/metrics_snapwr.json @@ -0,0 +1,11 @@ +{ + "name": "metrics_snapwr", + "id": 1033, + "description": "Metrics snapshot for the snapwr tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "state": { "type": "UInt64", "description": "State of the tile. 0=IDLE, 1=PROCESSING, 4=SHUTDOWN" }, + "vinyl_bytes_written": { "type": "UInt64", "description": "Number of bytes written so far to the vinyl snapshot file. Might decrease if snapshot creation is aborted and restarted" } + } +} diff --git a/src/disco/events/schema/metrics_sock.json b/src/disco/events/schema/metrics_sock.json new file mode 100644 index 00000000000..92321d6ea4f --- /dev/null +++ b/src/disco/events/schema/metrics_sock.json @@ -0,0 +1,27 @@ +{ + "name": "metrics_sock", + "id": 1020, + "description": "Metrics snapshot for the sock tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "syscalls_sendmmsg": { + "type": "Flatten", + "description": "Number of sendmmsg syscalls dispatched", + "fields": { + "syscalls_sendmmsg_no_error": { "type": "UInt64", "description": "No error" }, + "syscalls_sendmmsg_slow": { "type": "UInt64", "description": "ENOBUFS, EAGAIN error" }, + "syscalls_sendmmsg_perm": { "type": "UInt64", "description": "EPERM error (blocked by netfilter)" }, + "syscalls_sendmmsg_unreach": { "type": "UInt64", "description": "ENETUNREACH, EHOSTUNREACH error" }, + "syscalls_sendmmsg_down": { "type": "UInt64", "description": "ENONET, ENETDOWN, EHOSTDOWN error" }, + "syscalls_sendmmsg_other": { "type": "UInt64", "description": "Unrecognized error code" } + } + }, + "syscalls_recvmmsg": { "type": "UInt64", "description": "Number of recvmsg syscalls dispatched" }, + "rx_pkt_cnt": { "type": "UInt64", "description": "Number of packets received" }, + "tx_pkt_cnt": { "type": "UInt64", "description": "Number of packets sent" }, + "tx_drop_cnt": { "type": "UInt64", "description": "Number of packets failed to send" }, + "tx_bytes_total": { "type": "UInt64", "description": "Total number of bytes transmitted (including Ethernet header)" }, + "rx_bytes_total": { "type": "UInt64", "description": "Total number of bytes received (including Ethernet header)" } + } +} diff --git a/src/disco/events/schema/metrics_tower.json b/src/disco/events/schema/metrics_tower.json new file mode 100644 index 00000000000..ef6ca21007b --- /dev/null +++ b/src/disco/events/schema/metrics_tower.json @@ -0,0 +1,28 @@ +{ + "name": "metrics_tower", + "id": 1038, + "description": "Metrics snapshot for the tower tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "vote_txn_invalid": { "type": "UInt64", "description": "Number of times we dropped a vote txn because it was invalid (malformed, bad signature, etc.)" }, + "vote_txn_ignored": { "type": "UInt64", "description": "Number of times we ignored all or part of a vote txn because we didn't recognize a slot (eg. our replay was behind)" }, + "vote_txn_mismatch": { "type": "UInt64", "description": "Number of times a vote txn mismatched our own block id" }, + "ancestor_rollback": { "type": "UInt64", "description": "Rollback to an ancestor of our prev vote (can't vote)" }, + "sibling_confirmed": { "type": "UInt64", "description": "Duplicate sibling got confirmed (can't vote)" }, + "same_fork": { "type": "UInt64", "description": "Same fork as prev vote (can vote)" }, + "switch_pass": { "type": "UInt64", "description": "Prev vote was on a different fork, but we are allowed to switch (can vote)" }, + "switch_fail": { "type": "UInt64", "description": "Prev vote was on a different fork, and we are not allowed to switch (can't vote)" }, + "lockout_fail": { "type": "UInt64", "description": "Locked out (can't vote)" }, + "threshold_fail": { "type": "UInt64", "description": "Did not pass threshold check (can't vote)" }, + "propagated_fail": { "type": "UInt64", "description": "Prev leader block did not propagate (can't vote)" }, + "root_slot": { "type": "UInt64", "description": "Current Tower root slot" }, + "vote_slot": { "type": "UInt64", "description": "Current Tower vote slot" }, + "reset_slot": { "type": "UInt64", "description": "Current Tower reset slot" }, + "slot_ignored": { "type": "UInt64", "description": "Number of times we ignored a slot likely due to minority fork publish" }, + "hard_forks_seen": { "type": "UInt64", "description": "Number of hard forks we've seen (block ids with multiple candidate bank hashes)" }, + "hard_forks_pruned": { "type": "UInt64", "description": "Number of hard forks (candidate bank hashes) we've pruned" }, + "hard_forks_active": { "type": "UInt64", "description": "Currently active hard forks" }, + "hard_forks_max_width": { "type": "UInt64", "description": "Max number of candidate bank hashes for a given block id" } + } +} diff --git a/src/disco/events/schema/metrics_verify.json b/src/disco/events/schema/metrics_verify.json new file mode 100644 index 00000000000..064e7a8c52e --- /dev/null +++ b/src/disco/events/schema/metrics_verify.json @@ -0,0 +1,21 @@ +{ + "name": "metrics_verify", + "id": 1003, + "description": "Metrics snapshot for the verify tile", + "fields": { + "meta": { "type": "ref:MetricMeta" }, + "tile": { "type": "ref:MetricTile" }, + "transaction_result": { + "type": "Flatten", + "description": "Count of transaction results through verify tile", + "fields": { + "transaction_result_success": { "type": "UInt64", "description": "Transaction verified successfully" }, + "transaction_result_bundle_peer_failure": { "type": "UInt64", "description": "Peer transaction in the bundle failed" }, + "transaction_result_parse_failure": { "type": "UInt64", "description": "Transaction failed to parse" }, + "transaction_result_dedup_failure": { "type": "UInt64", "description": "Transaction failed deduplication" }, + "transaction_result_verify_failure": { "type": "UInt64", "description": "Transaction failed signature verification" } + } + }, + "gossiped_votes_received": { "type": "UInt64", "description": "Count of simple vote transactions received over gossip instead of via the normal TPU path" } + } +} diff --git a/src/disco/events/schema/shared.json b/src/disco/events/schema/shared.json new file mode 100644 index 00000000000..be812683100 --- /dev/null +++ b/src/disco/events/schema/shared.json @@ -0,0 +1,59 @@ +{ + "MetricMeta": { + "type": "Flatten", + "description": "Metadata about this metrics sample", + "fields": { + "kind_id": { "type": "UInt64", "description": "The kind_id of this tile instance within its type (e.g., 0, 1, 2 for multiple tiles of same type)" }, + "sample_id": { "type": "UInt64", "description": "Unique identifier correlating samples taken at the same time across tiles" }, + "sample_reason": { + "type": "LowCardinality(String)", + "description": "Reason for taking this sample", + "variants": { + "Periodic": { "description": "Periodic sampling at regular intervals" }, + "Leader": { "description": "Sampled because this validator was leader in the slot" } + } + }, + "sample_slot": { "type": "UInt64", "description": "The slot number for which this sample was taken, if applicable" } + } + }, + "MetricTile": { + "type": "Flatten", + "description": "Common tile metrics shared by all tiles", + "fields": { + "pid": { "type": "UInt64", "description": "The process ID of the tile" }, + "tid": { "type": "UInt64", "description": "The thread ID of the tile. Always the same as the Pid in production, but might be different in development" }, + "context_switch_involuntary_count": { "type": "UInt64", "description": "The number of involuntary context switches" }, + "context_switch_voluntary_count": { "type": "UInt64", "description": "The number of voluntary context switches" }, + "page_fault_major_count": { "type": "UInt64", "description": "The number of major page faults" }, + "page_fault_minor_count": { "type": "UInt64", "description": "The number of minor page faults" }, + "status": { "type": "UInt64", "description": "The current status of the tile. 0 is booting, 1 is running. 2 is shutdown" }, + "heartbeat": { "type": "UInt64", "description": "The last UNIX timestamp in nanoseconds that the tile heartbeated" }, + "in_backpressure": { "type": "UInt64", "description": "Whether the tile is currently backpressured or not, either 1 or 0" }, + "backpressure_count": { "type": "UInt64", "description": "Number of times the tile has had to wait for one of more consumers to catch up to resume publishing" }, + "regime_duration_nanos": { + "type": "Flatten", + "description": "Mutually exclusive and exhaustive duration of time the tile spent in each of the regimes", + "fields": { + "regime_duration_nanos_caught_up_housekeeping": { "type": "UInt64", "description": "Caught up + Housekeeping" }, + "regime_duration_nanos_processing_housekeeping": { "type": "UInt64", "description": "Processing + Housekeeping" }, + "regime_duration_nanos_backpressure_housekeeping": { "type": "UInt64", "description": "Backpressure + Housekeeping" }, + "regime_duration_nanos_caught_up_prefrag": { "type": "UInt64", "description": "Caught up + Prefrag" }, + "regime_duration_nanos_processing_prefrag": { "type": "UInt64", "description": "Processing + Prefrag" }, + "regime_duration_nanos_backpressure_prefrag": { "type": "UInt64", "description": "Backpressure + Prefrag" }, + "regime_duration_nanos_caught_up_postfrag": { "type": "UInt64", "description": "Caught up + Postfrag" }, + "regime_duration_nanos_processing_postfrag": { "type": "UInt64", "description": "Processing + Postfrag" } + } + }, + "cpu_duration_nanos": { + "type": "Flatten", + "description": "CPU time spent in each CPU regime", + "fields": { + "cpu_duration_nanos_wait": { "type": "UInt64", "description": "Wait (task was runnable but not scheduled)" }, + "cpu_duration_nanos_idle": { "type": "UInt64", "description": "Idle (task was not runnable)" }, + "cpu_duration_nanos_user": { "type": "UInt64", "description": "User (task was scheduled and executing in user mode)" }, + "cpu_duration_nanos_system": { "type": "UInt64", "description": "System (task was scheduled and executing in kernel mode)" } + } + } + } + } +} diff --git a/src/disco/metrics/gen_metrics.py b/src/disco/metrics/gen_metrics.py index 0c9e889983d..b8a1618f254 100644 --- a/src/disco/metrics/gen_metrics.py +++ b/src/disco/metrics/gen_metrics.py @@ -1,6 +1,7 @@ from generate.types import * from generate.write_codegen import write_codegen from generate.write_docs import write_docs +from generate.write_schemas import write_schemas from pathlib import Path def main(): @@ -9,6 +10,7 @@ def main(): write_codegen(metrics) write_docs(metrics) + write_schemas(metrics) if __name__ == '__main__': main() diff --git a/src/disco/metrics/generate/write_schemas.py b/src/disco/metrics/generate/write_schemas.py new file mode 100644 index 00000000000..5e07a2db45b --- /dev/null +++ b/src/disco/metrics/generate/write_schemas.py @@ -0,0 +1,131 @@ +from .types import * +import json +import re +import sys +from typing import Dict, Union +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "events")) +from gen_events import Field, Variant, ClickHouseType, main as gen_events_main + +def to_snake_case(name: str) -> str: + """Convert PascalCase/camelCase to snake_case.""" + return re.sub(r'(? str: + if field.chtype == ClickHouseType.LowCardinalityString: + return "LowCardinality(String)" + return field.chtype.name + +def format_fields_json(fields: Dict[str, Union[Field, str]], indent: int = 8) -> List[str]: + lines = [] + pad = " " * indent + max_name_len = max(len(name) for name in fields) if fields else 0 + field_items = list(fields.items()) + + for i, (name, field) in enumerate(field_items): + padding = " " * (max_name_len - len(name)) + comma = "," if i < len(field_items) - 1 else "" + + if isinstance(field, str): + lines.append(f'{pad}"{name}":{padding} {{ "type": "{field}" }}{comma}') + continue + + type_name = ch_type_name(field) + + if field.chtype == ClickHouseType.Flatten and field.fields: + lines.append(f'{pad}"{name}":{padding} {{') + lines.append(f'{pad} "type": "{type_name}",') + lines.append(f'{pad} "description": "{field.description}",') + lines.append(f'{pad} "fields": {{') + nested_lines = format_fields_json(field.fields, indent + 8) + lines.extend(nested_lines) + lines.append(f'{pad} }}') + lines.append(f'{pad}}}{comma}') + elif field.variants: + lines.append(f'{pad}"{name}":{padding} {{') + lines.append(f'{pad} "type": "{type_name}",') + lines.append(f'{pad} "description": "{field.description}",') + lines.append(f'{pad} "variants": {{') + variant_items = list(field.variants.items()) + for j, (vname, vdata) in enumerate(variant_items): + vcomma = "," if j < len(variant_items) - 1 else "" + lines.append(f'{pad} "{vname}": {{ "description": "{vdata.description}" }}{vcomma}') + lines.append(f'{pad} }}') + lines.append(f'{pad}}}{comma}') + else: + lines.append(f'{pad}"{name}":{padding} {{ "type": "{type_name}", "description": "{field.description}" }}{comma}') + + return lines + +def format_schema_json(schema: dict) -> str: + lines = ["{"] + lines.append(f' "name": "{schema["name"]}",') + lines.append(f' "id": {schema["id"]},') + lines.append(f' "description": "{schema["description"]}",') + lines.append(' "fields": {') + lines.extend(format_fields_json(schema["fields"], indent=8)) + lines.append(" }") + lines.append("}") + return "\n".join(lines) + +def metric_enum_to_schema_flatten(metric: Metric) -> Field: + nested_fields = {} + for enum_val in metric.enum.values: + field_name = f"{to_snake_case(metric.name)}_{to_snake_case(enum_val.name)}" + nested_fields[field_name] = Field( + chtype=ClickHouseType.UInt64, + description=enum_val.label + ) + + return Field( + chtype=ClickHouseType.Flatten, + description=metric.description, + fields=nested_fields + ) + +def generate_event_schemas(metrics: Metrics, schema_dir: Path) -> int: + count = 0 + for tile, tile_metrics in metrics.tiles.items(): + if tile in metrics.tiles_no_telemetry: + continue + + count += 1 + tile_name = tile.name.lower() + + fields: Dict[str, Union[Field, str]] = {} + fields["meta"] = "ref:MetricMeta" + fields["tile"] = "ref:MetricTile" + + for metric in tile_metrics: + if isinstance(metric, HistogramMetric): + continue + + if isinstance(metric, (CounterEnumMetric, GaugeEnumMetric)): + fields[to_snake_case(metric.name)] = metric_enum_to_schema_flatten(metric) + else: + fields[to_snake_case(metric.name)] = Field( + chtype=ClickHouseType.UInt64, + description=metric.description + ) + + event_id = 1000 + tile.value + schema = { + "name": f"metrics_{tile_name}", + "id": event_id, + "description": f"Metrics snapshot for the {tile_name} tile", + "fields": fields + } + + schema_path = schema_dir / f"metrics_{tile_name}.json" + schema_path.write_text(format_schema_json(schema) + "\n") + + return count + +def write_schemas(metrics: Metrics): + schema_dir = Path(__file__).parent.parent.parent / "events" / "schema" + count = generate_event_schemas(metrics, schema_dir) + + print(f"Wrote {count} metric schemas to {schema_dir}") + + gen_events_main()