From 1e9cbf9756be4bdefbc8688d48fcfce2849f7fce Mon Sep 17 00:00:00 2001 From: Douglas Wightman Date: Wed, 7 Jan 2026 10:12:43 -0700 Subject: [PATCH] Fix for monitoring nvlink fields, update cuda versions --- Dockerfile | 10 +- docker-bake.hcl | 8 +- pkg/dcgm/const_fields.go | 1310 +++++++++++++++++++------------------- pkg/dcgm/fields.go | 5 +- 4 files changed, 667 insertions(+), 666 deletions(-) diff --git a/Dockerfile b/Dockerfile index f273646..d76d07c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,15 +3,15 @@ # point to an image that exists, see here for list: https://hub.docker.com/r/nvidia/cuda/tags # CUDA_VERSION -ARG CUDA_VERSION=12.5.1 -# cuda image supports these images rockylinux9, rockylinux8, ubi9, ubi8, ubuntu24.04, ubuntu22.04, ubuntu20.04 +ARG CUDA_VERSION=13.1.0 +# cuda image supports these images rockylinux9, rockylinux8, ubi9, ubi8, ubuntu24.04, ubuntu22.04 # Note: Testing has only been done with the ubuntu variants. ARG DISTRO_FLAVOR=ubuntu24.04 # Use build arguments to select our base image or just stick with the defaults above. FROM nvidia/cuda:$CUDA_VERSION-base-$DISTRO_FLAVOR AS base -ARG DCGM_VERSION=4.2.3-2 -ARG GO_VERSION=1.24.4 +ARG DCGM_VERSION=4.4.2-1 +ARG GO_VERSION=1.25.5 ENV DEBIAN_FRONTEND=noninteractive SHELL ["/bin/bash", "-o", "pipefail", "-c"] @@ -22,8 +22,6 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] # hadolint ignore=DL3015,DL3008 RUN apt-get update && apt-get install -y --no-install-recommends \ gnupg2 curl ca-certificates && \ - curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb | apt-get install -y --no-install-recommends && \ - curl -fsSL https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/nvidia-machine-learning-repo-ubuntu2004_1.0.0-1_amd64.deb | apt-get install -y --no-install-recommends && \ curl -fsSL https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz | tar -C /usr/local -xz && \ apt-get purge --autoremove -y curl && \ apt-get install -y datacenter-gpu-manager-4-dev=1:${DCGM_VERSION} && \ diff --git a/docker-bake.hcl b/docker-bake.hcl index f93e646..537c466 100644 --- a/docker-bake.hcl +++ b/docker-bake.hcl @@ -3,10 +3,10 @@ target "default" { tags = ["go-dcgm:${distro}-go${go}-cuda${cuda}-dcgm${dcgm}"] platforms = ["linux/amd64"] matrix = { - go = ["1.24.4"] - distro = ["ubuntu24.04", "ubuntu22.04", "ubuntu20.04"] - cuda = ["12.9.1", "12.5.1"] - dcgm = ["4.2.3-2"] + go = ["1.25.5"] + distro = ["ubuntu24.04", "ubuntu22.04"] + cuda = ["12.9.1", "13.1.0"] + dcgm = ["4.4.2-1"] } args = { GO_VERSION = go diff --git a/pkg/dcgm/const_fields.go b/pkg/dcgm/const_fields.go index ebd7182..3a91fbb 100644 --- a/pkg/dcgm/const_fields.go +++ b/pkg/dcgm/const_fields.go @@ -1257,669 +1257,669 @@ const ( // dcgmFields maps field names to their IDs var dcgmFields = map[string]Short{ - "DCGM_FI_UNKNOWN": 0, - "DCGM_FI_DRIVER_VERSION": 1, - "DCGM_FI_NVML_VERSION": 2, - "DCGM_FI_PROCESS_NAME": 3, - "DCGM_FI_DEV_COUNT": 4, - "DCGM_FI_CUDA_DRIVER_VERSION": 5, - "DCGM_FI_BIND_UNBIND_EVENT": 6, - "DCGM_FI_DEV_NAME": 50, - "DCGM_FI_DEV_BRAND": 51, - "DCGM_FI_DEV_NVML_INDEX": 52, - "DCGM_FI_DEV_SERIAL": 53, - "DCGM_FI_DEV_UUID": 54, - "DCGM_FI_DEV_MINOR_NUMBER": 55, - "DCGM_FI_DEV_OEM_INFOROM_VER": 56, - "DCGM_FI_DEV_PCI_BUSID": 57, - "DCGM_FI_DEV_PCI_COMBINED_ID": 58, - "DCGM_FI_DEV_PCI_SUBSYS_ID": 59, - "DCGM_FI_GPU_TOPOLOGY_PCI": 60, - "DCGM_FI_GPU_TOPOLOGY_NVLINK": 61, - "DCGM_FI_GPU_TOPOLOGY_AFFINITY": 62, - "DCGM_FI_DEV_CUDA_COMPUTE_CAPABILITY": 63, - "DCGM_FI_DEV_P2P_NVLINK_STATUS": 64, - "DCGM_FI_DEV_COMPUTE_MODE": 65, - "DCGM_FI_DEV_PERSISTENCE_MODE": 66, - "DCGM_FI_DEV_MIG_MODE": 67, - "DCGM_FI_DEV_CUDA_VISIBLE_DEVICES_STR": 68, - "DCGM_FI_DEV_MIG_MAX_SLICES": 69, - "DCGM_FI_DEV_CPU_AFFINITY_0": 70, - "DCGM_FI_DEV_CPU_AFFINITY_1": 71, - "DCGM_FI_DEV_CPU_AFFINITY_2": 72, - "DCGM_FI_DEV_CPU_AFFINITY_3": 73, - "DCGM_FI_DEV_CC_MODE": 74, - "DCGM_FI_DEV_MIG_ATTRIBUTES": 75, - "DCGM_FI_DEV_MIG_GI_INFO": 76, - "DCGM_FI_DEV_MIG_CI_INFO": 77, - "DCGM_FI_DEV_ECC_INFOROM_VER": 80, - "DCGM_FI_DEV_POWER_INFOROM_VER": 81, - "DCGM_FI_DEV_INFOROM_IMAGE_VER": 82, - "DCGM_FI_DEV_INFOROM_CONFIG_CHECK": 83, - "DCGM_FI_DEV_INFOROM_CONFIG_VALID": 84, - "DCGM_FI_DEV_VBIOS_VERSION": 85, - "DCGM_FI_DEV_MEM_AFFINITY_0": 86, - "DCGM_FI_DEV_MEM_AFFINITY_1": 87, - "DCGM_FI_DEV_MEM_AFFINITY_2": 88, - "DCGM_FI_DEV_MEM_AFFINITY_3": 89, - "DCGM_FI_DEV_BAR1_TOTAL": 90, - "DCGM_FI_SYNC_BOOST": 91, - "DCGM_FI_DEV_BAR1_USED": 92, - "DCGM_FI_DEV_BAR1_FREE": 93, - "DCGM_FI_DEV_GPM_SUPPORT": 94, - "DCGM_FI_DEV_SM_CLOCK": 100, - "DCGM_FI_DEV_MEM_CLOCK": 101, - "DCGM_FI_DEV_VIDEO_CLOCK": 102, - "DCGM_FI_DEV_APP_SM_CLOCK": 110, - "DCGM_FI_DEV_APP_MEM_CLOCK": 111, - "DCGM_FI_DEV_CLOCKS_EVENT_REASONS": 112, - "DCGM_FI_DEV_MAX_SM_CLOCK": 113, - "DCGM_FI_DEV_MAX_MEM_CLOCK": 114, - "DCGM_FI_DEV_MAX_VIDEO_CLOCK": 115, - "DCGM_FI_DEV_AUTOBOOST": 120, - "DCGM_FI_DEV_SUPPORTED_CLOCKS": 130, - "DCGM_FI_DEV_MEMORY_TEMP": 140, - "DCGM_FI_DEV_GPU_TEMP": 150, - "DCGM_FI_DEV_MEM_MAX_OP_TEMP": 151, - "DCGM_FI_DEV_GPU_MAX_OP_TEMP": 152, - "DCGM_FI_DEV_GPU_TEMP_LIMIT": 153, - "DCGM_FI_DEV_POWER_USAGE": 155, - "DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION": 156, - "DCGM_FI_DEV_POWER_USAGE_INSTANT": 157, - "DCGM_FI_DEV_SLOWDOWN_TEMP": 158, - "DCGM_FI_DEV_SHUTDOWN_TEMP": 159, - "DCGM_FI_DEV_POWER_MGMT_LIMIT": 160, - "DCGM_FI_DEV_POWER_MGMT_LIMIT_MIN": 161, - "DCGM_FI_DEV_POWER_MGMT_LIMIT_MAX": 162, - "DCGM_FI_DEV_POWER_MGMT_LIMIT_DEF": 163, - "DCGM_FI_DEV_ENFORCED_POWER_LIMIT": 164, - "DCGM_FI_DEV_REQUESTED_POWER_PROFILE_MASK": 165, - "DCGM_FI_DEV_ENFORCED_POWER_PROFILE_MASK": 166, - "DCGM_FI_DEV_VALID_POWER_PROFILE_MASK": 167, - "DCGM_FI_DEV_FABRIC_MANAGER_STATUS": 170, - "DCGM_FI_DEV_FABRIC_MANAGER_ERROR_CODE": 171, - "DCGM_FI_DEV_FABRIC_CLUSTER_UUID": 172, - "DCGM_FI_DEV_FABRIC_CLIQUE_ID": 173, - "DCGM_FI_DEV_FABRIC_HEALTH_MASK": 174, - "DCGM_FI_DEV_PSTATE": 190, - "DCGM_FI_DEV_FAN_SPEED": 191, - "DCGM_FI_DEV_PCIE_TX_THROUGHPUT": 200, - "DCGM_FI_DEV_PCIE_RX_THROUGHPUT": 201, - "DCGM_FI_DEV_PCIE_REPLAY_COUNTER": 202, - "DCGM_FI_DEV_GPU_UTIL": 203, - "DCGM_FI_DEV_MEM_COPY_UTIL": 204, - "DCGM_FI_DEV_ACCOUNTING_DATA": 205, - "DCGM_FI_DEV_ENC_UTIL": 206, - "DCGM_FI_DEV_DEC_UTIL": 207, - "DCGM_FI_DEV_XID_ERRORS": 230, - "DCGM_FI_DEV_PCIE_MAX_LINK_GEN": 235, - "DCGM_FI_DEV_PCIE_MAX_LINK_WIDTH": 236, - "DCGM_FI_DEV_PCIE_LINK_GEN": 237, - "DCGM_FI_DEV_PCIE_LINK_WIDTH": 238, - "DCGM_FI_DEV_POWER_VIOLATION": 240, - "DCGM_FI_DEV_THERMAL_VIOLATION": 241, - "DCGM_FI_DEV_SYNC_BOOST_VIOLATION": 242, - "DCGM_FI_DEV_BOARD_LIMIT_VIOLATION": 243, - "DCGM_FI_DEV_LOW_UTIL_VIOLATION": 244, - "DCGM_FI_DEV_RELIABILITY_VIOLATION": 245, - "DCGM_FI_DEV_TOTAL_APP_CLOCKS_VIOLATION": 246, - "DCGM_FI_DEV_TOTAL_BASE_CLOCKS_VIOLATION": 247, - "DCGM_FI_DEV_FB_TOTAL": 250, - "DCGM_FI_DEV_FB_FREE": 251, - "DCGM_FI_DEV_FB_USED": 252, - "DCGM_FI_DEV_FB_RESERVED": 253, - "DCGM_FI_DEV_FB_USED_PERCENT": 254, - "DCGM_FI_DEV_C2C_LINK_COUNT": 285, - "DCGM_FI_DEV_C2C_LINK_STATUS": 286, - "DCGM_FI_DEV_C2C_MAX_BANDWIDTH": 287, - "DCGM_FI_DEV_ECC_CURRENT": 300, - "DCGM_FI_DEV_ECC_PENDING": 301, - "DCGM_FI_DEV_ECC_SBE_VOL_TOTAL": 310, - "DCGM_FI_DEV_ECC_DBE_VOL_TOTAL": 311, - "DCGM_FI_DEV_ECC_SBE_AGG_TOTAL": 312, - "DCGM_FI_DEV_ECC_DBE_AGG_TOTAL": 313, - "DCGM_FI_DEV_ECC_SBE_VOL_L1": 314, - "DCGM_FI_DEV_ECC_DBE_VOL_L1": 315, - "DCGM_FI_DEV_ECC_SBE_VOL_L2": 316, - "DCGM_FI_DEV_ECC_DBE_VOL_L2": 317, - "DCGM_FI_DEV_ECC_SBE_VOL_DEV": 318, - "DCGM_FI_DEV_ECC_DBE_VOL_DEV": 319, - "DCGM_FI_DEV_ECC_SBE_VOL_REG": 320, - "DCGM_FI_DEV_ECC_DBE_VOL_REG": 321, - "DCGM_FI_DEV_ECC_SBE_VOL_TEX": 322, - "DCGM_FI_DEV_ECC_DBE_VOL_TEX": 323, - "DCGM_FI_DEV_ECC_SBE_AGG_L1": 324, - "DCGM_FI_DEV_ECC_DBE_AGG_L1": 325, - "DCGM_FI_DEV_ECC_SBE_AGG_L2": 326, - "DCGM_FI_DEV_ECC_DBE_AGG_L2": 327, - "DCGM_FI_DEV_ECC_SBE_AGG_DEV": 328, - "DCGM_FI_DEV_ECC_DBE_AGG_DEV": 329, - "DCGM_FI_DEV_ECC_SBE_AGG_REG": 330, - "DCGM_FI_DEV_ECC_DBE_AGG_REG": 331, - "DCGM_FI_DEV_ECC_SBE_AGG_TEX": 332, - "DCGM_FI_DEV_ECC_DBE_AGG_TEX": 333, - "DCGM_FI_DEV_ECC_SBE_VOL_SHM": 334, - "DCGM_FI_DEV_ECC_DBE_VOL_SHM": 335, - "DCGM_FI_DEV_ECC_SBE_VOL_CBU": 336, - "DCGM_FI_DEV_ECC_DBE_VOL_CBU": 337, - "DCGM_FI_DEV_ECC_SBE_AGG_SHM": 338, - "DCGM_FI_DEV_ECC_DBE_AGG_SHM": 339, - "DCGM_FI_DEV_ECC_SBE_AGG_CBU": 340, - "DCGM_FI_DEV_ECC_DBE_AGG_CBU": 341, - "DCGM_FI_DEV_ECC_SBE_VOL_SRM": 342, - "DCGM_FI_DEV_ECC_DBE_VOL_SRM": 343, - "DCGM_FI_DEV_ECC_SBE_AGG_SRM": 344, - "DCGM_FI_DEV_ECC_DBE_AGG_SRM": 345, - "DCGM_FI_DEV_THRESHOLD_SRM": 346, - "DCGM_FI_DEV_DIAG_MEMORY_RESULT": 350, - "DCGM_FI_DEV_DIAG_DIAGNOSTIC_RESULT": 351, - "DCGM_FI_DEV_DIAG_PCIE_RESULT": 352, - "DCGM_FI_DEV_DIAG_TARGETED_STRESS_RESULT": 353, - "DCGM_FI_DEV_DIAG_TARGETED_POWER_RESULT": 354, - "DCGM_FI_DEV_DIAG_MEMORY_BANDWIDTH_RESULT": 355, - "DCGM_FI_DEV_DIAG_MEMTEST_RESULT": 356, - "DCGM_FI_DEV_DIAG_PULSE_TEST_RESULT": 357, - "DCGM_FI_DEV_DIAG_EUD_RESULT": 358, - "DCGM_FI_DEV_DIAG_CPU_EUD_RESULT": 359, - "DCGM_FI_DEV_DIAG_SOFTWARE_RESULT": 360, - "DCGM_FI_DEV_DIAG_NVBANDWIDTH_RESULT": 361, - "DCGM_FI_DEV_DIAG_STATUS": 362, - "DCGM_FI_DEV_BANKS_REMAP_ROWS_AVAIL_MAX": 385, - "DCGM_FI_DEV_BANKS_REMAP_ROWS_AVAIL_HIGH": 386, - "DCGM_FI_DEV_BANKS_REMAP_ROWS_AVAIL_PARTIAL": 387, - "DCGM_FI_DEV_BANKS_REMAP_ROWS_AVAIL_LOW": 388, - "DCGM_FI_DEV_BANKS_REMAP_ROWS_AVAIL_NONE": 389, - "DCGM_FI_DEV_RETIRED_SBE": 390, - "DCGM_FI_DEV_RETIRED_DBE": 391, - "DCGM_FI_DEV_RETIRED_PENDING": 392, - "DCGM_FI_DEV_UNCORRECTABLE_REMAPPED_ROWS": 393, - "DCGM_FI_DEV_CORRECTABLE_REMAPPED_ROWS": 394, - "DCGM_FI_DEV_ROW_REMAP_FAILURE": 395, - "DCGM_FI_DEV_ROW_REMAP_PENDING": 396, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L0": 400, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L1": 401, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L2": 402, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L3": 403, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L4": 404, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L5": 405, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L12": 406, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L13": 407, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L14": 408, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL": 409, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L0": 410, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L1": 411, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L2": 412, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L3": 413, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L4": 414, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L5": 415, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L12": 416, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L13": 417, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L14": 418, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_TOTAL": 419, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L0": 420, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L1": 421, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L2": 422, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L3": 423, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L4": 424, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L5": 425, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L12": 426, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L13": 427, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L14": 428, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_TOTAL": 429, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L0": 430, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L1": 431, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L2": 432, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L3": 433, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L4": 434, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L5": 435, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L12": 436, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L13": 437, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L14": 438, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_TOTAL": 439, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L0": 440, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L1": 441, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L2": 442, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L3": 443, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L4": 444, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L5": 445, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L12": 446, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L13": 447, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L14": 448, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_TOTAL": 449, - "DCGM_FI_DEV_GPU_NVLINK_ERRORS": 450, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L6": 451, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L7": 452, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L8": 453, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L9": 454, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L10": 455, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L11": 456, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L6": 457, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L7": 458, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L8": 459, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L9": 460, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L10": 461, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L11": 462, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L6": 463, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L7": 464, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L8": 465, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L9": 466, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L10": 467, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L11": 468, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L6": 469, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L7": 470, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L8": 471, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L9": 472, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L10": 473, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L11": 474, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L6": 475, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L7": 476, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L8": 477, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L9": 478, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L10": 479, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L11": 480, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L15": 481, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L16": 482, - "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L17": 483, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L15": 484, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L16": 485, - "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L17": 486, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L15": 487, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L16": 488, - "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L17": 489, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L15": 491, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L16": 492, - "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L17": 493, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L15": 494, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L16": 495, - "DCGM_FI_DEV_NVLINK_BANDWIDTH_L17": 496, - "DCGM_FI_DEV_NVLINK_ERROR_DL_CRC": 497, - "DCGM_FI_DEV_NVLINK_ERROR_DL_RECOVERY": 498, - "DCGM_FI_DEV_NVLINK_ERROR_DL_REPLAY": 499, - "DCGM_FI_DEV_VIRTUAL_MODE": 500, - "DCGM_FI_DEV_SUPPORTED_TYPE_INFO": 501, - "DCGM_FI_DEV_CREATABLE_VGPU_TYPE_IDS": 502, - "DCGM_FI_DEV_VGPU_INSTANCE_IDS": 503, - "DCGM_FI_DEV_VGPU_UTILIZATIONS": 504, - "DCGM_FI_DEV_VGPU_PER_PROCESS_UTILIZATION": 505, - "DCGM_FI_DEV_ENC_STATS": 506, - "DCGM_FI_DEV_FBC_STATS": 507, - "DCGM_FI_DEV_FBC_SESSIONS_INFO": 508, - "DCGM_FI_DEV_SUPPORTED_VGPU_TYPE_IDS": 509, - "DCGM_FI_DEV_VGPU_TYPE_INFO": 510, - "DCGM_FI_DEV_VGPU_TYPE_NAME": 511, - "DCGM_FI_DEV_VGPU_TYPE_CLASS": 512, - "DCGM_FI_DEV_VGPU_TYPE_LICENSE": 513, - "DCGM_FI_DEV_VGPU_VM_ID": 520, - "DCGM_FI_FIRST_VGPU_FIELD_ID": 520, - "DCGM_FI_DEV_VGPU_VM_NAME": 521, - "DCGM_FI_DEV_VGPU_TYPE": 522, - "DCGM_FI_DEV_VGPU_UUID": 523, - "DCGM_FI_DEV_VGPU_DRIVER_VERSION": 524, - "DCGM_FI_DEV_VGPU_MEMORY_USAGE": 525, - "DCGM_FI_DEV_VGPU_LICENSE_STATUS": 526, - "DCGM_FI_DEV_VGPU_FRAME_RATE_LIMIT": 527, - "DCGM_FI_DEV_VGPU_ENC_STATS": 528, - "DCGM_FI_DEV_VGPU_ENC_SESSIONS_INFO": 529, - "DCGM_FI_DEV_VGPU_FBC_STATS": 530, - "DCGM_FI_DEV_VGPU_FBC_SESSIONS_INFO": 531, - "DCGM_FI_DEV_VGPU_INSTANCE_LICENSE_STATE": 532, - "DCGM_FI_DEV_VGPU_PCI_ID": 533, - "DCGM_FI_DEV_VGPU_VM_GPU_INSTANCE_ID": 534, - "DCGM_FI_LAST_VGPU_FIELD_ID": 570, - "DCGM_FI_DEV_PLATFORM_INFINIBAND_GUID": 571, - "DCGM_FI_DEV_PLATFORM_CHASSIS_SERIAL_NUMBER": 572, - "DCGM_FI_DEV_PLATFORM_CHASSIS_SLOT_NUMBER": 573, - "DCGM_FI_DEV_PLATFORM_TRAY_INDEX": 574, - "DCGM_FI_DEV_PLATFORM_HOST_ID": 575, - "DCGM_FI_DEV_PLATFORM_PEER_TYPE": 576, - "DCGM_FI_DEV_PLATFORM_MODULE_ID": 577, - "DCGM_FI_DEV_NVLINK_PPRM_OPER_RECOVERY": 580, - "DCGM_FI_DEV_NVLINK_PPCNT_RECOVERY_TIME_SINCE_LAST": 581, - "DCGM_FI_DEV_NVLINK_PPCNT_RECOVERY_TIME_BETWEEN_LAST_TWO": 582, - "DCGM_FI_DEV_NVLINK_PPCNT_RECOVERY_TOTAL_SUCCESSFUL_EVENTS": 583, - "DCGM_FI_DEV_NVLINK_PPCNT_PHYSICAL_SUCCESSFUL_RECOVERY_EVENTS": 584, - "DCGM_FI_DEV_NVLINK_PPCNT_PHYSICAL_LINK_DOWN_COUNTER": 585, - "DCGM_FI_DEV_NVLINK_PPCNT_PLR_RCV_CODES": 586, - "DCGM_FI_DEV_NVLINK_PPCNT_PLR_RCV_CODE_ERR": 587, - "DCGM_FI_DEV_NVLINK_PPCNT_PLR_RCV_UNCORRECTABLE_CODE": 588, - "DCGM_FI_DEV_NVLINK_PPCNT_PLR_XMIT_CODES": 589, - "DCGM_FI_DEV_NVLINK_PPCNT_PLR_XMIT_RETRY_CODES": 590, - "DCGM_FI_DEV_NVLINK_PPCNT_PLR_XMIT_RETRY_EVENTS": 591, - "DCGM_FI_DEV_NVLINK_PPCNT_PLR_SYNC_EVENTS": 592, - "DCGM_FI_INTERNAL_FIELDS_0_START": 600, - "DCGM_FI_INTERNAL_FIELDS_0_END": 699, - "DCGM_FI_FIRST_NVSWITCH_FIELD_ID": 700, - "DCGM_FI_DEV_NVSWITCH_VOLTAGE_MVOLT": 701, - "DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ": 702, - "DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ_REV": 703, - "DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ_DVDD": 704, - "DCGM_FI_DEV_NVSWITCH_POWER_VDD": 705, - "DCGM_FI_DEV_NVSWITCH_POWER_DVDD": 706, - "DCGM_FI_DEV_NVSWITCH_POWER_HVDD": 707, - "DCGM_FI_DEV_NVSWITCH_LINK_THROUGHPUT_TX": 780, - "DCGM_FI_DEV_NVSWITCH_LINK_THROUGHPUT_RX": 781, - "DCGM_FI_DEV_NVSWITCH_LINK_FATAL_ERRORS": 782, - "DCGM_FI_DEV_NVSWITCH_LINK_NON_FATAL_ERRORS": 783, - "DCGM_FI_DEV_NVSWITCH_LINK_REPLAY_ERRORS": 784, - "DCGM_FI_DEV_NVSWITCH_LINK_RECOVERY_ERRORS": 785, - "DCGM_FI_DEV_NVSWITCH_LINK_FLIT_ERRORS": 786, - "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS": 787, - "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS": 788, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_LOW_VC0": 789, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_LOW_VC1": 790, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_LOW_VC2": 791, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_LOW_VC3": 792, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_MEDIUM_VC0": 793, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_MEDIUM_VC1": 794, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_MEDIUM_VC2": 795, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_MEDIUM_VC3": 796, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_HIGH_VC0": 797, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_HIGH_VC1": 798, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_HIGH_VC2": 799, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_HIGH_VC3": 800, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_PANIC_VC0": 801, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_PANIC_VC1": 802, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_PANIC_VC2": 803, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_PANIC_VC3": 804, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_COUNT_VC0": 805, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_COUNT_VC1": 806, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_COUNT_VC2": 807, - "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_COUNT_VC3": 808, - "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE0": 809, - "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE1": 810, - "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE2": 811, - "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE3": 812, - "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE0": 813, - "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE1": 814, - "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE2": 815, - "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE3": 816, - "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE4": 817, - "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE5": 818, - "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE6": 819, - "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE7": 820, - "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE4": 821, - "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE5": 822, - "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE6": 823, - "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE7": 824, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L0": 825, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L1": 826, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L2": 827, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L3": 828, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L4": 829, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L5": 830, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L6": 831, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L7": 832, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L8": 833, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L9": 834, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L10": 835, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L11": 836, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L12": 837, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L13": 838, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L14": 839, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L15": 840, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L16": 841, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L17": 842, - "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_TOTAL": 843, - "DCGM_FI_DEV_NVSWITCH_FATAL_ERRORS": 856, - "DCGM_FI_DEV_NVSWITCH_NON_FATAL_ERRORS": 857, - "DCGM_FI_DEV_NVSWITCH_TEMPERATURE_CURRENT": 858, - "DCGM_FI_DEV_NVSWITCH_TEMPERATURE_LIMIT_SLOWDOWN": 859, - "DCGM_FI_DEV_NVSWITCH_TEMPERATURE_LIMIT_SHUTDOWN": 860, - "DCGM_FI_DEV_NVSWITCH_THROUGHPUT_TX": 861, - "DCGM_FI_DEV_NVSWITCH_THROUGHPUT_RX": 862, - "DCGM_FI_DEV_NVSWITCH_PHYS_ID": 863, - "DCGM_FI_DEV_NVSWITCH_RESET_REQUIRED": 864, - "DCGM_FI_DEV_NVSWITCH_LINK_ID": 865, - "DCGM_FI_DEV_NVSWITCH_PCIE_DOMAIN": 866, - "DCGM_FI_DEV_NVSWITCH_PCIE_BUS": 867, - "DCGM_FI_DEV_NVSWITCH_PCIE_DEVICE": 868, - "DCGM_FI_DEV_NVSWITCH_PCIE_FUNCTION": 869, - "DCGM_FI_DEV_NVSWITCH_LINK_STATUS": 870, - "DCGM_FI_DEV_NVSWITCH_LINK_TYPE": 871, - "DCGM_FI_DEV_NVSWITCH_LINK_REMOTE_PCIE_DOMAIN": 872, - "DCGM_FI_DEV_NVSWITCH_LINK_REMOTE_PCIE_BUS": 873, - "DCGM_FI_DEV_NVSWITCH_LINK_REMOTE_PCIE_DEVICE": 874, - "DCGM_FI_DEV_NVSWITCH_LINK_REMOTE_PCIE_FUNCTION": 875, - "DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_LINK_ID": 876, - "DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_LINK_SID": 877, - "DCGM_FI_DEV_NVSWITCH_DEVICE_UUID": 878, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L0": 879, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L1": 880, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L2": 881, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L3": 882, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L4": 883, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L5": 884, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L6": 885, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L7": 886, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L8": 887, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L9": 888, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L10": 889, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L11": 890, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L12": 891, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L13": 892, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L14": 893, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L15": 894, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L16": 895, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L17": 896, - "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_TOTAL": 897, - "DCGM_FI_LAST_NVSWITCH_FIELD_ID": 899, - "DCGM_FI_PROF_GR_ENGINE_ACTIVE": 1001, - "DCGM_FI_PROF_SM_ACTIVE": 1002, - "DCGM_FI_PROF_SM_OCCUPANCY": 1003, - "DCGM_FI_PROF_PIPE_TENSOR_ACTIVE": 1004, - "DCGM_FI_PROF_DRAM_ACTIVE": 1005, - "DCGM_FI_PROF_PIPE_FP64_ACTIVE": 1006, - "DCGM_FI_PROF_PIPE_FP32_ACTIVE": 1007, - "DCGM_FI_PROF_PIPE_FP16_ACTIVE": 1008, - "DCGM_FI_PROF_PCIE_TX_BYTES": 1009, - "DCGM_FI_PROF_PCIE_RX_BYTES": 1010, - "DCGM_FI_PROF_NVLINK_TX_BYTES": 1011, - "DCGM_FI_PROF_NVLINK_RX_BYTES": 1012, - "DCGM_FI_PROF_PIPE_TENSOR_IMMA_ACTIVE": 1013, - "DCGM_FI_PROF_PIPE_TENSOR_HMMA_ACTIVE": 1014, - "DCGM_FI_PROF_PIPE_TENSOR_DFMA_ACTIVE": 1015, - "DCGM_FI_PROF_PIPE_INT_ACTIVE": 1016, - "DCGM_FI_PROF_NVDEC0_ACTIVE": 1017, - "DCGM_FI_PROF_NVDEC1_ACTIVE": 1018, - "DCGM_FI_PROF_NVDEC2_ACTIVE": 1019, - "DCGM_FI_PROF_NVDEC3_ACTIVE": 1020, - "DCGM_FI_PROF_NVDEC4_ACTIVE": 1021, - "DCGM_FI_PROF_NVDEC5_ACTIVE": 1022, - "DCGM_FI_PROF_NVDEC6_ACTIVE": 1023, - "DCGM_FI_PROF_NVDEC7_ACTIVE": 1024, - "DCGM_FI_PROF_NVJPG0_ACTIVE": 1025, - "DCGM_FI_PROF_NVJPG1_ACTIVE": 1026, - "DCGM_FI_PROF_NVJPG2_ACTIVE": 1027, - "DCGM_FI_PROF_NVJPG3_ACTIVE": 1028, - "DCGM_FI_PROF_NVJPG4_ACTIVE": 1029, - "DCGM_FI_PROF_NVJPG5_ACTIVE": 1030, - "DCGM_FI_PROF_NVJPG6_ACTIVE": 1031, - "DCGM_FI_PROF_NVJPG7_ACTIVE": 1032, - "DCGM_FI_PROF_NVOFA0_ACTIVE": 1033, - "DCGM_FI_PROF_NVOFA1_ACTIVE": 1034, - "DCGM_FI_PROF_NVLINK_L0_TX_BYTES": 1040, - "DCGM_FI_PROF_NVLINK_L0_RX_BYTES": 1041, - "DCGM_FI_PROF_NVLINK_L1_TX_BYTES": 1042, - "DCGM_FI_PROF_NVLINK_L1_RX_BYTES": 1043, - "DCGM_FI_PROF_NVLINK_L2_TX_BYTES": 1044, - "DCGM_FI_PROF_NVLINK_L2_RX_BYTES": 1045, - "DCGM_FI_PROF_NVLINK_L3_TX_BYTES": 1046, - "DCGM_FI_PROF_NVLINK_L3_RX_BYTES": 1047, - "DCGM_FI_PROF_NVLINK_L4_TX_BYTES": 1048, - "DCGM_FI_PROF_NVLINK_L4_RX_BYTES": 1049, - "DCGM_FI_PROF_NVLINK_L5_TX_BYTES": 1050, - "DCGM_FI_PROF_NVLINK_L5_RX_BYTES": 1051, - "DCGM_FI_PROF_NVLINK_L6_TX_BYTES": 1052, - "DCGM_FI_PROF_NVLINK_L6_RX_BYTES": 1053, - "DCGM_FI_PROF_NVLINK_L7_TX_BYTES": 1054, - "DCGM_FI_PROF_NVLINK_L7_RX_BYTES": 1055, - "DCGM_FI_PROF_NVLINK_L8_TX_BYTES": 1056, - "DCGM_FI_PROF_NVLINK_L8_RX_BYTES": 1057, - "DCGM_FI_PROF_NVLINK_L9_TX_BYTES": 1058, - "DCGM_FI_PROF_NVLINK_L9_RX_BYTES": 1059, - "DCGM_FI_PROF_NVLINK_L10_TX_BYTES": 1060, - "DCGM_FI_PROF_NVLINK_L10_RX_BYTES": 1061, - "DCGM_FI_PROF_NVLINK_L11_TX_BYTES": 1062, - "DCGM_FI_PROF_NVLINK_L11_RX_BYTES": 1063, - "DCGM_FI_PROF_NVLINK_L12_TX_BYTES": 1064, - "DCGM_FI_PROF_NVLINK_L12_RX_BYTES": 1065, - "DCGM_FI_PROF_NVLINK_L13_TX_BYTES": 1066, - "DCGM_FI_PROF_NVLINK_L13_RX_BYTES": 1067, - "DCGM_FI_PROF_NVLINK_L14_TX_BYTES": 1068, - "DCGM_FI_PROF_NVLINK_L14_RX_BYTES": 1069, - "DCGM_FI_PROF_NVLINK_L15_TX_BYTES": 1070, - "DCGM_FI_PROF_NVLINK_L15_RX_BYTES": 1071, - "DCGM_FI_PROF_NVLINK_L16_TX_BYTES": 1072, - "DCGM_FI_PROF_NVLINK_L16_RX_BYTES": 1073, - "DCGM_FI_PROF_NVLINK_L17_TX_BYTES": 1074, - "DCGM_FI_PROF_NVLINK_L17_RX_BYTES": 1075, - "DCGM_FI_PROF_C2C_TX_ALL_BYTES": 1076, - "DCGM_FI_PROF_C2C_TX_DATA_BYTES": 1077, - "DCGM_FI_PROF_C2C_RX_ALL_BYTES": 1078, - "DCGM_FI_PROF_C2C_RX_DATA_BYTES": 1079, - "DCGM_FI_PROF_HOSTMEM_CACHE_HIT": 1080, - "DCGM_FI_PROF_HOSTMEM_CACHE_MISS": 1081, - "DCGM_FI_PROF_PEERMEM_CACHE_HIT": 1082, - "DCGM_FI_PROF_PEERMEM_CACHE_MISS": 1083, - "DCGM_FI_DEV_CPU_UTIL_TOTAL": 1100, - "DCGM_FI_DEV_CPU_UTIL_USER": 1101, - "DCGM_FI_DEV_CPU_UTIL_NICE": 1102, - "DCGM_FI_DEV_CPU_UTIL_SYS": 1103, - "DCGM_FI_DEV_CPU_UTIL_IRQ": 1104, - "DCGM_FI_DEV_CPU_TEMP_CURRENT": 1110, - "DCGM_FI_DEV_CPU_TEMP_WARNING": 1111, - "DCGM_FI_DEV_CPU_TEMP_CRITICAL": 1112, - "DCGM_FI_DEV_CPU_CLOCK_CURRENT": 1120, - "DCGM_FI_DEV_CPU_POWER_UTIL_CURRENT": 1130, - "DCGM_FI_DEV_CPU_POWER_LIMIT": 1131, - "DCGM_FI_DEV_SYSIO_POWER_UTIL_CURRENT": 1132, - "DCGM_FI_DEV_MODULE_POWER_UTIL_CURRENT": 1133, - "DCGM_FI_DEV_CPU_VENDOR": 1140, - "DCGM_FI_DEV_CPU_MODEL": 1141, - "DCGM_FI_DEV_NVLINK_COUNT_TX_PACKETS": 1200, - "DCGM_FI_DEV_NVLINK_COUNT_TX_BYTES": 1201, - "DCGM_FI_DEV_NVLINK_COUNT_RX_PACKETS": 1202, - "DCGM_FI_DEV_NVLINK_COUNT_RX_BYTES": 1203, - "DCGM_FI_DEV_NVLINK_COUNT_RX_MALFORMED_PACKET_ERRORS": 1204, - "DCGM_FI_DEV_NVLINK_COUNT_RX_BUFFER_OVERRUN_ERRORS": 1205, - "DCGM_FI_DEV_NVLINK_COUNT_RX_ERRORS": 1206, - "DCGM_FI_DEV_NVLINK_COUNT_RX_REMOTE_ERRORS": 1207, - "DCGM_FI_DEV_NVLINK_COUNT_RX_GENERAL_ERRORS": 1208, - "DCGM_FI_DEV_NVLINK_COUNT_LOCAL_LINK_INTEGRITY_ERRORS": 1209, - "DCGM_FI_DEV_NVLINK_COUNT_TX_DISCARDS": 1210, - "DCGM_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_SUCCESSFUL_EVENTS": 1211, - "DCGM_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_FAILED_EVENTS": 1212, - "DCGM_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_EVENTS": 1213, - "DCGM_FI_DEV_NVLINK_COUNT_RX_SYMBOL_ERRORS": 1214, - "DCGM_FI_DEV_NVLINK_COUNT_SYMBOL_BER": 1215, - "DCGM_FI_DEV_NVLINK_COUNT_SYMBOL_BER_FLOAT": 1216, - "DCGM_FI_DEV_NVLINK_COUNT_EFFECTIVE_BER": 1217, - "DCGM_FI_DEV_NVLINK_COUNT_EFFECTIVE_BER_FLOAT": 1218, - "DCGM_FI_DEV_NVLINK_COUNT_EFFECTIVE_ERRORS": 1219, - "DCGM_FI_DEV_FIRST_CONNECTX_FIELD_ID": 1300, - "DCGM_FI_DEV_CONNECTX_HEALTH": 1300, - "DCGM_FI_DEV_CONNECTX_ACTIVE_PCIE_LINK_WIDTH": 1301, - "DCGM_FI_DEV_CONNECTX_ACTIVE_PCIE_LINK_SPEED": 1302, - "DCGM_FI_DEV_CONNECTX_EXPECT_PCIE_LINK_WIDTH": 1303, - "DCGM_FI_DEV_CONNECTX_EXPECT_PCIE_LINK_SPEED": 1304, - "DCGM_FI_DEV_CONNECTX_CORRECTABLE_ERR_STATUS": 1305, - "DCGM_FI_DEV_CONNECTX_CORRECTABLE_ERR_MASK": 1306, - "DCGM_FI_DEV_CONNECTX_UNCORRECTABLE_ERR_STATUS": 1307, - "DCGM_FI_DEV_CONNECTX_UNCORRECTABLE_ERR_MASK": 1308, - "DCGM_FI_DEV_CONNECTX_UNCORRECTABLE_ERR_SEVERITY": 1309, - "DCGM_FI_DEV_CONNECTX_DEVICE_TEMPERATURE": 1310, - "DCGM_FI_DEV_LAST_CONNECTX_FIELD_ID": 1399, - "DCGM_FI_DEV_C2C_LINK_ERROR_INTR": 1400, - "DCGM_FI_DEV_C2C_LINK_ERROR_REPLAY": 1401, - "DCGM_FI_DEV_C2C_LINK_ERROR_REPLAY_B2B": 1402, - "DCGM_FI_DEV_C2C_LINK_POWER_STATE": 1403, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_0": 1404, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_1": 1405, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_2": 1406, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_3": 1407, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_4": 1408, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_5": 1409, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_6": 1410, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_7": 1411, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_8": 1412, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_9": 1413, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_10": 1414, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_11": 1415, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_12": 1416, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_13": 1417, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_14": 1418, - "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_15": 1419, - "DCGM_FI_DEV_CLOCKS_EVENT_REASON_SW_POWER_CAP_NS": 1420, - "DCGM_FI_DEV_CLOCKS_EVENT_REASON_SYNC_BOOST_NS": 1421, - "DCGM_FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN_NS": 1422, - "DCGM_FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN_NS": 1423, - "DCGM_FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN_NS": 1424, - "DCGM_FI_DEV_PWR_SMOOTHING_ENABLED": 1425, - "DCGM_FI_DEV_PWR_SMOOTHING_PRIV_LVL": 1426, - "DCGM_FI_DEV_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED": 1427, - "DCGM_FI_DEV_PWR_SMOOTHING_APPLIED_TMP_CEIL": 1428, - "DCGM_FI_DEV_PWR_SMOOTHING_APPLIED_TMP_FLOOR": 1429, - "DCGM_FI_DEV_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING": 1430, - "DCGM_FI_DEV_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING": 1431, + "DCGM_FI_UNKNOWN": 0, + "DCGM_FI_DRIVER_VERSION": 1, + "DCGM_FI_NVML_VERSION": 2, + "DCGM_FI_PROCESS_NAME": 3, + "DCGM_FI_DEV_COUNT": 4, + "DCGM_FI_CUDA_DRIVER_VERSION": 5, + "DCGM_FI_BIND_UNBIND_EVENT": 6, + "DCGM_FI_DEV_NAME": 50, + "DCGM_FI_DEV_BRAND": 51, + "DCGM_FI_DEV_NVML_INDEX": 52, + "DCGM_FI_DEV_SERIAL": 53, + "DCGM_FI_DEV_UUID": 54, + "DCGM_FI_DEV_MINOR_NUMBER": 55, + "DCGM_FI_DEV_OEM_INFOROM_VER": 56, + "DCGM_FI_DEV_PCI_BUSID": 57, + "DCGM_FI_DEV_PCI_COMBINED_ID": 58, + "DCGM_FI_DEV_PCI_SUBSYS_ID": 59, + "DCGM_FI_GPU_TOPOLOGY_PCI": 60, + "DCGM_FI_GPU_TOPOLOGY_NVLINK": 61, + "DCGM_FI_GPU_TOPOLOGY_AFFINITY": 62, + "DCGM_FI_DEV_CUDA_COMPUTE_CAPABILITY": 63, + "DCGM_FI_DEV_P2P_NVLINK_STATUS": 64, + "DCGM_FI_DEV_COMPUTE_MODE": 65, + "DCGM_FI_DEV_PERSISTENCE_MODE": 66, + "DCGM_FI_DEV_MIG_MODE": 67, + "DCGM_FI_DEV_CUDA_VISIBLE_DEVICES_STR": 68, + "DCGM_FI_DEV_MIG_MAX_SLICES": 69, + "DCGM_FI_DEV_CPU_AFFINITY_0": 70, + "DCGM_FI_DEV_CPU_AFFINITY_1": 71, + "DCGM_FI_DEV_CPU_AFFINITY_2": 72, + "DCGM_FI_DEV_CPU_AFFINITY_3": 73, + "DCGM_FI_DEV_CC_MODE": 74, + "DCGM_FI_DEV_MIG_ATTRIBUTES": 75, + "DCGM_FI_DEV_MIG_GI_INFO": 76, + "DCGM_FI_DEV_MIG_CI_INFO": 77, + "DCGM_FI_DEV_ECC_INFOROM_VER": 80, + "DCGM_FI_DEV_POWER_INFOROM_VER": 81, + "DCGM_FI_DEV_INFOROM_IMAGE_VER": 82, + "DCGM_FI_DEV_INFOROM_CONFIG_CHECK": 83, + "DCGM_FI_DEV_INFOROM_CONFIG_VALID": 84, + "DCGM_FI_DEV_VBIOS_VERSION": 85, + "DCGM_FI_DEV_MEM_AFFINITY_0": 86, + "DCGM_FI_DEV_MEM_AFFINITY_1": 87, + "DCGM_FI_DEV_MEM_AFFINITY_2": 88, + "DCGM_FI_DEV_MEM_AFFINITY_3": 89, + "DCGM_FI_DEV_BAR1_TOTAL": 90, + "DCGM_FI_SYNC_BOOST": 91, + "DCGM_FI_DEV_BAR1_USED": 92, + "DCGM_FI_DEV_BAR1_FREE": 93, + "DCGM_FI_DEV_GPM_SUPPORT": 94, + "DCGM_FI_DEV_SM_CLOCK": 100, + "DCGM_FI_DEV_MEM_CLOCK": 101, + "DCGM_FI_DEV_VIDEO_CLOCK": 102, + "DCGM_FI_DEV_APP_SM_CLOCK": 110, + "DCGM_FI_DEV_APP_MEM_CLOCK": 111, + "DCGM_FI_DEV_CLOCKS_EVENT_REASONS": 112, + "DCGM_FI_DEV_MAX_SM_CLOCK": 113, + "DCGM_FI_DEV_MAX_MEM_CLOCK": 114, + "DCGM_FI_DEV_MAX_VIDEO_CLOCK": 115, + "DCGM_FI_DEV_AUTOBOOST": 120, + "DCGM_FI_DEV_SUPPORTED_CLOCKS": 130, + "DCGM_FI_DEV_MEMORY_TEMP": 140, + "DCGM_FI_DEV_GPU_TEMP": 150, + "DCGM_FI_DEV_MEM_MAX_OP_TEMP": 151, + "DCGM_FI_DEV_GPU_MAX_OP_TEMP": 152, + "DCGM_FI_DEV_GPU_TEMP_LIMIT": 153, + "DCGM_FI_DEV_POWER_USAGE": 155, + "DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION": 156, + "DCGM_FI_DEV_POWER_USAGE_INSTANT": 157, + "DCGM_FI_DEV_SLOWDOWN_TEMP": 158, + "DCGM_FI_DEV_SHUTDOWN_TEMP": 159, + "DCGM_FI_DEV_POWER_MGMT_LIMIT": 160, + "DCGM_FI_DEV_POWER_MGMT_LIMIT_MIN": 161, + "DCGM_FI_DEV_POWER_MGMT_LIMIT_MAX": 162, + "DCGM_FI_DEV_POWER_MGMT_LIMIT_DEF": 163, + "DCGM_FI_DEV_ENFORCED_POWER_LIMIT": 164, + "DCGM_FI_DEV_REQUESTED_POWER_PROFILE_MASK": 165, + "DCGM_FI_DEV_ENFORCED_POWER_PROFILE_MASK": 166, + "DCGM_FI_DEV_VALID_POWER_PROFILE_MASK": 167, + "DCGM_FI_DEV_FABRIC_MANAGER_STATUS": 170, + "DCGM_FI_DEV_FABRIC_MANAGER_ERROR_CODE": 171, + "DCGM_FI_DEV_FABRIC_CLUSTER_UUID": 172, + "DCGM_FI_DEV_FABRIC_CLIQUE_ID": 173, + "DCGM_FI_DEV_FABRIC_HEALTH_MASK": 174, + "DCGM_FI_DEV_PSTATE": 190, + "DCGM_FI_DEV_FAN_SPEED": 191, + "DCGM_FI_DEV_PCIE_TX_THROUGHPUT": 200, + "DCGM_FI_DEV_PCIE_RX_THROUGHPUT": 201, + "DCGM_FI_DEV_PCIE_REPLAY_COUNTER": 202, + "DCGM_FI_DEV_GPU_UTIL": 203, + "DCGM_FI_DEV_MEM_COPY_UTIL": 204, + "DCGM_FI_DEV_ACCOUNTING_DATA": 205, + "DCGM_FI_DEV_ENC_UTIL": 206, + "DCGM_FI_DEV_DEC_UTIL": 207, + "DCGM_FI_DEV_XID_ERRORS": 230, + "DCGM_FI_DEV_PCIE_MAX_LINK_GEN": 235, + "DCGM_FI_DEV_PCIE_MAX_LINK_WIDTH": 236, + "DCGM_FI_DEV_PCIE_LINK_GEN": 237, + "DCGM_FI_DEV_PCIE_LINK_WIDTH": 238, + "DCGM_FI_DEV_POWER_VIOLATION": 240, + "DCGM_FI_DEV_THERMAL_VIOLATION": 241, + "DCGM_FI_DEV_SYNC_BOOST_VIOLATION": 242, + "DCGM_FI_DEV_BOARD_LIMIT_VIOLATION": 243, + "DCGM_FI_DEV_LOW_UTIL_VIOLATION": 244, + "DCGM_FI_DEV_RELIABILITY_VIOLATION": 245, + "DCGM_FI_DEV_TOTAL_APP_CLOCKS_VIOLATION": 246, + "DCGM_FI_DEV_TOTAL_BASE_CLOCKS_VIOLATION": 247, + "DCGM_FI_DEV_FB_TOTAL": 250, + "DCGM_FI_DEV_FB_FREE": 251, + "DCGM_FI_DEV_FB_USED": 252, + "DCGM_FI_DEV_FB_RESERVED": 253, + "DCGM_FI_DEV_FB_USED_PERCENT": 254, + "DCGM_FI_DEV_C2C_LINK_COUNT": 285, + "DCGM_FI_DEV_C2C_LINK_STATUS": 286, + "DCGM_FI_DEV_C2C_MAX_BANDWIDTH": 287, + "DCGM_FI_DEV_ECC_CURRENT": 300, + "DCGM_FI_DEV_ECC_PENDING": 301, + "DCGM_FI_DEV_ECC_SBE_VOL_TOTAL": 310, + "DCGM_FI_DEV_ECC_DBE_VOL_TOTAL": 311, + "DCGM_FI_DEV_ECC_SBE_AGG_TOTAL": 312, + "DCGM_FI_DEV_ECC_DBE_AGG_TOTAL": 313, + "DCGM_FI_DEV_ECC_SBE_VOL_L1": 314, + "DCGM_FI_DEV_ECC_DBE_VOL_L1": 315, + "DCGM_FI_DEV_ECC_SBE_VOL_L2": 316, + "DCGM_FI_DEV_ECC_DBE_VOL_L2": 317, + "DCGM_FI_DEV_ECC_SBE_VOL_DEV": 318, + "DCGM_FI_DEV_ECC_DBE_VOL_DEV": 319, + "DCGM_FI_DEV_ECC_SBE_VOL_REG": 320, + "DCGM_FI_DEV_ECC_DBE_VOL_REG": 321, + "DCGM_FI_DEV_ECC_SBE_VOL_TEX": 322, + "DCGM_FI_DEV_ECC_DBE_VOL_TEX": 323, + "DCGM_FI_DEV_ECC_SBE_AGG_L1": 324, + "DCGM_FI_DEV_ECC_DBE_AGG_L1": 325, + "DCGM_FI_DEV_ECC_SBE_AGG_L2": 326, + "DCGM_FI_DEV_ECC_DBE_AGG_L2": 327, + "DCGM_FI_DEV_ECC_SBE_AGG_DEV": 328, + "DCGM_FI_DEV_ECC_DBE_AGG_DEV": 329, + "DCGM_FI_DEV_ECC_SBE_AGG_REG": 330, + "DCGM_FI_DEV_ECC_DBE_AGG_REG": 331, + "DCGM_FI_DEV_ECC_SBE_AGG_TEX": 332, + "DCGM_FI_DEV_ECC_DBE_AGG_TEX": 333, + "DCGM_FI_DEV_ECC_SBE_VOL_SHM": 334, + "DCGM_FI_DEV_ECC_DBE_VOL_SHM": 335, + "DCGM_FI_DEV_ECC_SBE_VOL_CBU": 336, + "DCGM_FI_DEV_ECC_DBE_VOL_CBU": 337, + "DCGM_FI_DEV_ECC_SBE_AGG_SHM": 338, + "DCGM_FI_DEV_ECC_DBE_AGG_SHM": 339, + "DCGM_FI_DEV_ECC_SBE_AGG_CBU": 340, + "DCGM_FI_DEV_ECC_DBE_AGG_CBU": 341, + "DCGM_FI_DEV_ECC_SBE_VOL_SRM": 342, + "DCGM_FI_DEV_ECC_DBE_VOL_SRM": 343, + "DCGM_FI_DEV_ECC_SBE_AGG_SRM": 344, + "DCGM_FI_DEV_ECC_DBE_AGG_SRM": 345, + "DCGM_FI_DEV_THRESHOLD_SRM": 346, + "DCGM_FI_DEV_DIAG_MEMORY_RESULT": 350, + "DCGM_FI_DEV_DIAG_DIAGNOSTIC_RESULT": 351, + "DCGM_FI_DEV_DIAG_PCIE_RESULT": 352, + "DCGM_FI_DEV_DIAG_TARGETED_STRESS_RESULT": 353, + "DCGM_FI_DEV_DIAG_TARGETED_POWER_RESULT": 354, + "DCGM_FI_DEV_DIAG_MEMORY_BANDWIDTH_RESULT": 355, + "DCGM_FI_DEV_DIAG_MEMTEST_RESULT": 356, + "DCGM_FI_DEV_DIAG_PULSE_TEST_RESULT": 357, + "DCGM_FI_DEV_DIAG_EUD_RESULT": 358, + "DCGM_FI_DEV_DIAG_CPU_EUD_RESULT": 359, + "DCGM_FI_DEV_DIAG_SOFTWARE_RESULT": 360, + "DCGM_FI_DEV_DIAG_NVBANDWIDTH_RESULT": 361, + "DCGM_FI_DEV_DIAG_STATUS": 362, + "DCGM_FI_DEV_BANKS_REMAP_ROWS_AVAIL_MAX": 385, + "DCGM_FI_DEV_BANKS_REMAP_ROWS_AVAIL_HIGH": 386, + "DCGM_FI_DEV_BANKS_REMAP_ROWS_AVAIL_PARTIAL": 387, + "DCGM_FI_DEV_BANKS_REMAP_ROWS_AVAIL_LOW": 388, + "DCGM_FI_DEV_BANKS_REMAP_ROWS_AVAIL_NONE": 389, + "DCGM_FI_DEV_RETIRED_SBE": 390, + "DCGM_FI_DEV_RETIRED_DBE": 391, + "DCGM_FI_DEV_RETIRED_PENDING": 392, + "DCGM_FI_DEV_UNCORRECTABLE_REMAPPED_ROWS": 393, + "DCGM_FI_DEV_CORRECTABLE_REMAPPED_ROWS": 394, + "DCGM_FI_DEV_ROW_REMAP_FAILURE": 395, + "DCGM_FI_DEV_ROW_REMAP_PENDING": 396, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L0": 400, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L1": 401, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L2": 402, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L3": 403, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L4": 404, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L5": 405, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L12": 406, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L13": 407, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L14": 408, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL": 409, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L0": 410, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L1": 411, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L2": 412, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L3": 413, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L4": 414, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L5": 415, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L12": 416, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L13": 417, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L14": 418, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_TOTAL": 419, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L0": 420, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L1": 421, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L2": 422, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L3": 423, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L4": 424, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L5": 425, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L12": 426, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L13": 427, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L14": 428, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_TOTAL": 429, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L0": 430, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L1": 431, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L2": 432, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L3": 433, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L4": 434, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L5": 435, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L12": 436, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L13": 437, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L14": 438, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_TOTAL": 439, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L0": 440, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L1": 441, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L2": 442, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L3": 443, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L4": 444, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L5": 445, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L12": 446, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L13": 447, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L14": 448, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_TOTAL": 449, + "DCGM_FI_DEV_GPU_NVLINK_ERRORS": 450, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L6": 451, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L7": 452, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L8": 453, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L9": 454, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L10": 455, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L11": 456, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L6": 457, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L7": 458, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L8": 459, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L9": 460, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L10": 461, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L11": 462, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L6": 463, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L7": 464, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L8": 465, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L9": 466, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L10": 467, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L11": 468, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L6": 469, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L7": 470, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L8": 471, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L9": 472, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L10": 473, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L11": 474, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L6": 475, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L7": 476, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L8": 477, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L9": 478, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L10": 479, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L11": 480, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L15": 481, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L16": 482, + "DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L17": 483, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L15": 484, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L16": 485, + "DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L17": 486, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L15": 487, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L16": 488, + "DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L17": 489, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L15": 491, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L16": 492, + "DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L17": 493, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L15": 494, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L16": 495, + "DCGM_FI_DEV_NVLINK_BANDWIDTH_L17": 496, + "DCGM_FI_DEV_NVLINK_ERROR_DL_CRC": 497, + "DCGM_FI_DEV_NVLINK_ERROR_DL_RECOVERY": 498, + "DCGM_FI_DEV_NVLINK_ERROR_DL_REPLAY": 499, + "DCGM_FI_DEV_VIRTUAL_MODE": 500, + "DCGM_FI_DEV_SUPPORTED_TYPE_INFO": 501, + "DCGM_FI_DEV_CREATABLE_VGPU_TYPE_IDS": 502, + "DCGM_FI_DEV_VGPU_INSTANCE_IDS": 503, + "DCGM_FI_DEV_VGPU_UTILIZATIONS": 504, + "DCGM_FI_DEV_VGPU_PER_PROCESS_UTILIZATION": 505, + "DCGM_FI_DEV_ENC_STATS": 506, + "DCGM_FI_DEV_FBC_STATS": 507, + "DCGM_FI_DEV_FBC_SESSIONS_INFO": 508, + "DCGM_FI_DEV_SUPPORTED_VGPU_TYPE_IDS": 509, + "DCGM_FI_DEV_VGPU_TYPE_INFO": 510, + "DCGM_FI_DEV_VGPU_TYPE_NAME": 511, + "DCGM_FI_DEV_VGPU_TYPE_CLASS": 512, + "DCGM_FI_DEV_VGPU_TYPE_LICENSE": 513, + "DCGM_FI_DEV_VGPU_VM_ID": 520, + "DCGM_FI_FIRST_VGPU_FIELD_ID": 520, + "DCGM_FI_DEV_VGPU_VM_NAME": 521, + "DCGM_FI_DEV_VGPU_TYPE": 522, + "DCGM_FI_DEV_VGPU_UUID": 523, + "DCGM_FI_DEV_VGPU_DRIVER_VERSION": 524, + "DCGM_FI_DEV_VGPU_MEMORY_USAGE": 525, + "DCGM_FI_DEV_VGPU_LICENSE_STATUS": 526, + "DCGM_FI_DEV_VGPU_FRAME_RATE_LIMIT": 527, + "DCGM_FI_DEV_VGPU_ENC_STATS": 528, + "DCGM_FI_DEV_VGPU_ENC_SESSIONS_INFO": 529, + "DCGM_FI_DEV_VGPU_FBC_STATS": 530, + "DCGM_FI_DEV_VGPU_FBC_SESSIONS_INFO": 531, + "DCGM_FI_DEV_VGPU_INSTANCE_LICENSE_STATE": 532, + "DCGM_FI_DEV_VGPU_PCI_ID": 533, + "DCGM_FI_DEV_VGPU_VM_GPU_INSTANCE_ID": 534, + "DCGM_FI_LAST_VGPU_FIELD_ID": 570, + "DCGM_FI_DEV_PLATFORM_INFINIBAND_GUID": 571, + "DCGM_FI_DEV_PLATFORM_CHASSIS_SERIAL_NUMBER": 572, + "DCGM_FI_DEV_PLATFORM_CHASSIS_SLOT_NUMBER": 573, + "DCGM_FI_DEV_PLATFORM_TRAY_INDEX": 574, + "DCGM_FI_DEV_PLATFORM_HOST_ID": 575, + "DCGM_FI_DEV_PLATFORM_PEER_TYPE": 576, + "DCGM_FI_DEV_PLATFORM_MODULE_ID": 577, + "DCGM_FI_DEV_NVLINK_PPRM_OPER_RECOVERY": 580, + "DCGM_FI_DEV_NVLINK_PPCNT_RECOVERY_TIME_SINCE_LAST": 581, + "DCGM_FI_DEV_NVLINK_PPCNT_RECOVERY_TIME_BETWEEN_LAST_TWO": 582, + "DCGM_FI_DEV_NVLINK_PPCNT_RECOVERY_TOTAL_SUCCESSFUL_EVENTS": 583, + "DCGM_FI_DEV_NVLINK_PPCNT_PHYSICAL_SUCCESSFUL_RECOVERY_EVENTS": 584, + "DCGM_FI_DEV_NVLINK_PPCNT_PHYSICAL_LINK_DOWN_COUNTER": 585, + "DCGM_FI_DEV_NVLINK_PPCNT_PLR_RCV_CODES": 586, + "DCGM_FI_DEV_NVLINK_PPCNT_PLR_RCV_CODE_ERR": 587, + "DCGM_FI_DEV_NVLINK_PPCNT_PLR_RCV_UNCORRECTABLE_CODE": 588, + "DCGM_FI_DEV_NVLINK_PPCNT_PLR_XMIT_CODES": 589, + "DCGM_FI_DEV_NVLINK_PPCNT_PLR_XMIT_RETRY_CODES": 590, + "DCGM_FI_DEV_NVLINK_PPCNT_PLR_XMIT_RETRY_EVENTS": 591, + "DCGM_FI_DEV_NVLINK_PPCNT_PLR_SYNC_EVENTS": 592, + "DCGM_FI_INTERNAL_FIELDS_0_START": 600, + "DCGM_FI_INTERNAL_FIELDS_0_END": 699, + "DCGM_FI_FIRST_NVSWITCH_FIELD_ID": 700, + "DCGM_FI_DEV_NVSWITCH_VOLTAGE_MVOLT": 701, + "DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ": 702, + "DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ_REV": 703, + "DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ_DVDD": 704, + "DCGM_FI_DEV_NVSWITCH_POWER_VDD": 705, + "DCGM_FI_DEV_NVSWITCH_POWER_DVDD": 706, + "DCGM_FI_DEV_NVSWITCH_POWER_HVDD": 707, + "DCGM_FI_DEV_NVSWITCH_LINK_THROUGHPUT_TX": 780, + "DCGM_FI_DEV_NVSWITCH_LINK_THROUGHPUT_RX": 781, + "DCGM_FI_DEV_NVSWITCH_LINK_FATAL_ERRORS": 782, + "DCGM_FI_DEV_NVSWITCH_LINK_NON_FATAL_ERRORS": 783, + "DCGM_FI_DEV_NVSWITCH_LINK_REPLAY_ERRORS": 784, + "DCGM_FI_DEV_NVSWITCH_LINK_RECOVERY_ERRORS": 785, + "DCGM_FI_DEV_NVSWITCH_LINK_FLIT_ERRORS": 786, + "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS": 787, + "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS": 788, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_LOW_VC0": 789, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_LOW_VC1": 790, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_LOW_VC2": 791, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_LOW_VC3": 792, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_MEDIUM_VC0": 793, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_MEDIUM_VC1": 794, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_MEDIUM_VC2": 795, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_MEDIUM_VC3": 796, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_HIGH_VC0": 797, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_HIGH_VC1": 798, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_HIGH_VC2": 799, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_HIGH_VC3": 800, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_PANIC_VC0": 801, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_PANIC_VC1": 802, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_PANIC_VC2": 803, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_PANIC_VC3": 804, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_COUNT_VC0": 805, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_COUNT_VC1": 806, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_COUNT_VC2": 807, + "DCGM_FI_DEV_NVSWITCH_LINK_LATENCY_COUNT_VC3": 808, + "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE0": 809, + "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE1": 810, + "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE2": 811, + "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE3": 812, + "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE0": 813, + "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE1": 814, + "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE2": 815, + "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE3": 816, + "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE4": 817, + "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE5": 818, + "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE6": 819, + "DCGM_FI_DEV_NVSWITCH_LINK_CRC_ERRORS_LANE7": 820, + "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE4": 821, + "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE5": 822, + "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE6": 823, + "DCGM_FI_DEV_NVSWITCH_LINK_ECC_ERRORS_LANE7": 824, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L0": 825, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L1": 826, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L2": 827, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L3": 828, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L4": 829, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L5": 830, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L6": 831, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L7": 832, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L8": 833, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L9": 834, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L10": 835, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L11": 836, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L12": 837, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L13": 838, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L14": 839, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L15": 840, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L16": 841, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_L17": 842, + "DCGM_FI_DEV_NVLINK_TX_BANDWIDTH_TOTAL": 843, + "DCGM_FI_DEV_NVSWITCH_FATAL_ERRORS": 856, + "DCGM_FI_DEV_NVSWITCH_NON_FATAL_ERRORS": 857, + "DCGM_FI_DEV_NVSWITCH_TEMPERATURE_CURRENT": 858, + "DCGM_FI_DEV_NVSWITCH_TEMPERATURE_LIMIT_SLOWDOWN": 859, + "DCGM_FI_DEV_NVSWITCH_TEMPERATURE_LIMIT_SHUTDOWN": 860, + "DCGM_FI_DEV_NVSWITCH_THROUGHPUT_TX": 861, + "DCGM_FI_DEV_NVSWITCH_THROUGHPUT_RX": 862, + "DCGM_FI_DEV_NVSWITCH_PHYS_ID": 863, + "DCGM_FI_DEV_NVSWITCH_RESET_REQUIRED": 864, + "DCGM_FI_DEV_NVSWITCH_LINK_ID": 865, + "DCGM_FI_DEV_NVSWITCH_PCIE_DOMAIN": 866, + "DCGM_FI_DEV_NVSWITCH_PCIE_BUS": 867, + "DCGM_FI_DEV_NVSWITCH_PCIE_DEVICE": 868, + "DCGM_FI_DEV_NVSWITCH_PCIE_FUNCTION": 869, + "DCGM_FI_DEV_NVSWITCH_LINK_STATUS": 870, + "DCGM_FI_DEV_NVSWITCH_LINK_TYPE": 871, + "DCGM_FI_DEV_NVSWITCH_LINK_REMOTE_PCIE_DOMAIN": 872, + "DCGM_FI_DEV_NVSWITCH_LINK_REMOTE_PCIE_BUS": 873, + "DCGM_FI_DEV_NVSWITCH_LINK_REMOTE_PCIE_DEVICE": 874, + "DCGM_FI_DEV_NVSWITCH_LINK_REMOTE_PCIE_FUNCTION": 875, + "DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_LINK_ID": 876, + "DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_LINK_SID": 877, + "DCGM_FI_DEV_NVSWITCH_DEVICE_UUID": 878, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L0": 879, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L1": 880, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L2": 881, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L3": 882, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L4": 883, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L5": 884, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L6": 885, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L7": 886, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L8": 887, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L9": 888, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L10": 889, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L11": 890, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L12": 891, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L13": 892, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L14": 893, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L15": 894, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L16": 895, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_L17": 896, + "DCGM_FI_DEV_NVLINK_RX_BANDWIDTH_TOTAL": 897, + "DCGM_FI_LAST_NVSWITCH_FIELD_ID": 899, + "DCGM_FI_PROF_GR_ENGINE_ACTIVE": 1001, + "DCGM_FI_PROF_SM_ACTIVE": 1002, + "DCGM_FI_PROF_SM_OCCUPANCY": 1003, + "DCGM_FI_PROF_PIPE_TENSOR_ACTIVE": 1004, + "DCGM_FI_PROF_DRAM_ACTIVE": 1005, + "DCGM_FI_PROF_PIPE_FP64_ACTIVE": 1006, + "DCGM_FI_PROF_PIPE_FP32_ACTIVE": 1007, + "DCGM_FI_PROF_PIPE_FP16_ACTIVE": 1008, + "DCGM_FI_PROF_PCIE_TX_BYTES": 1009, + "DCGM_FI_PROF_PCIE_RX_BYTES": 1010, + "DCGM_FI_PROF_NVLINK_TX_BYTES": 1011, + "DCGM_FI_PROF_NVLINK_RX_BYTES": 1012, + "DCGM_FI_PROF_PIPE_TENSOR_IMMA_ACTIVE": 1013, + "DCGM_FI_PROF_PIPE_TENSOR_HMMA_ACTIVE": 1014, + "DCGM_FI_PROF_PIPE_TENSOR_DFMA_ACTIVE": 1015, + "DCGM_FI_PROF_PIPE_INT_ACTIVE": 1016, + "DCGM_FI_PROF_NVDEC0_ACTIVE": 1017, + "DCGM_FI_PROF_NVDEC1_ACTIVE": 1018, + "DCGM_FI_PROF_NVDEC2_ACTIVE": 1019, + "DCGM_FI_PROF_NVDEC3_ACTIVE": 1020, + "DCGM_FI_PROF_NVDEC4_ACTIVE": 1021, + "DCGM_FI_PROF_NVDEC5_ACTIVE": 1022, + "DCGM_FI_PROF_NVDEC6_ACTIVE": 1023, + "DCGM_FI_PROF_NVDEC7_ACTIVE": 1024, + "DCGM_FI_PROF_NVJPG0_ACTIVE": 1025, + "DCGM_FI_PROF_NVJPG1_ACTIVE": 1026, + "DCGM_FI_PROF_NVJPG2_ACTIVE": 1027, + "DCGM_FI_PROF_NVJPG3_ACTIVE": 1028, + "DCGM_FI_PROF_NVJPG4_ACTIVE": 1029, + "DCGM_FI_PROF_NVJPG5_ACTIVE": 1030, + "DCGM_FI_PROF_NVJPG6_ACTIVE": 1031, + "DCGM_FI_PROF_NVJPG7_ACTIVE": 1032, + "DCGM_FI_PROF_NVOFA0_ACTIVE": 1033, + "DCGM_FI_PROF_NVOFA1_ACTIVE": 1034, + "DCGM_FI_PROF_NVLINK_L0_TX_BYTES": 1040, + "DCGM_FI_PROF_NVLINK_L0_RX_BYTES": 1041, + "DCGM_FI_PROF_NVLINK_L1_TX_BYTES": 1042, + "DCGM_FI_PROF_NVLINK_L1_RX_BYTES": 1043, + "DCGM_FI_PROF_NVLINK_L2_TX_BYTES": 1044, + "DCGM_FI_PROF_NVLINK_L2_RX_BYTES": 1045, + "DCGM_FI_PROF_NVLINK_L3_TX_BYTES": 1046, + "DCGM_FI_PROF_NVLINK_L3_RX_BYTES": 1047, + "DCGM_FI_PROF_NVLINK_L4_TX_BYTES": 1048, + "DCGM_FI_PROF_NVLINK_L4_RX_BYTES": 1049, + "DCGM_FI_PROF_NVLINK_L5_TX_BYTES": 1050, + "DCGM_FI_PROF_NVLINK_L5_RX_BYTES": 1051, + "DCGM_FI_PROF_NVLINK_L6_TX_BYTES": 1052, + "DCGM_FI_PROF_NVLINK_L6_RX_BYTES": 1053, + "DCGM_FI_PROF_NVLINK_L7_TX_BYTES": 1054, + "DCGM_FI_PROF_NVLINK_L7_RX_BYTES": 1055, + "DCGM_FI_PROF_NVLINK_L8_TX_BYTES": 1056, + "DCGM_FI_PROF_NVLINK_L8_RX_BYTES": 1057, + "DCGM_FI_PROF_NVLINK_L9_TX_BYTES": 1058, + "DCGM_FI_PROF_NVLINK_L9_RX_BYTES": 1059, + "DCGM_FI_PROF_NVLINK_L10_TX_BYTES": 1060, + "DCGM_FI_PROF_NVLINK_L10_RX_BYTES": 1061, + "DCGM_FI_PROF_NVLINK_L11_TX_BYTES": 1062, + "DCGM_FI_PROF_NVLINK_L11_RX_BYTES": 1063, + "DCGM_FI_PROF_NVLINK_L12_TX_BYTES": 1064, + "DCGM_FI_PROF_NVLINK_L12_RX_BYTES": 1065, + "DCGM_FI_PROF_NVLINK_L13_TX_BYTES": 1066, + "DCGM_FI_PROF_NVLINK_L13_RX_BYTES": 1067, + "DCGM_FI_PROF_NVLINK_L14_TX_BYTES": 1068, + "DCGM_FI_PROF_NVLINK_L14_RX_BYTES": 1069, + "DCGM_FI_PROF_NVLINK_L15_TX_BYTES": 1070, + "DCGM_FI_PROF_NVLINK_L15_RX_BYTES": 1071, + "DCGM_FI_PROF_NVLINK_L16_TX_BYTES": 1072, + "DCGM_FI_PROF_NVLINK_L16_RX_BYTES": 1073, + "DCGM_FI_PROF_NVLINK_L17_TX_BYTES": 1074, + "DCGM_FI_PROF_NVLINK_L17_RX_BYTES": 1075, + "DCGM_FI_PROF_C2C_TX_ALL_BYTES": 1076, + "DCGM_FI_PROF_C2C_TX_DATA_BYTES": 1077, + "DCGM_FI_PROF_C2C_RX_ALL_BYTES": 1078, + "DCGM_FI_PROF_C2C_RX_DATA_BYTES": 1079, + "DCGM_FI_PROF_HOSTMEM_CACHE_HIT": 1080, + "DCGM_FI_PROF_HOSTMEM_CACHE_MISS": 1081, + "DCGM_FI_PROF_PEERMEM_CACHE_HIT": 1082, + "DCGM_FI_PROF_PEERMEM_CACHE_MISS": 1083, + "DCGM_FI_DEV_CPU_UTIL_TOTAL": 1100, + "DCGM_FI_DEV_CPU_UTIL_USER": 1101, + "DCGM_FI_DEV_CPU_UTIL_NICE": 1102, + "DCGM_FI_DEV_CPU_UTIL_SYS": 1103, + "DCGM_FI_DEV_CPU_UTIL_IRQ": 1104, + "DCGM_FI_DEV_CPU_TEMP_CURRENT": 1110, + "DCGM_FI_DEV_CPU_TEMP_WARNING": 1111, + "DCGM_FI_DEV_CPU_TEMP_CRITICAL": 1112, + "DCGM_FI_DEV_CPU_CLOCK_CURRENT": 1120, + "DCGM_FI_DEV_CPU_POWER_UTIL_CURRENT": 1130, + "DCGM_FI_DEV_CPU_POWER_LIMIT": 1131, + "DCGM_FI_DEV_SYSIO_POWER_UTIL_CURRENT": 1132, + "DCGM_FI_DEV_MODULE_POWER_UTIL_CURRENT": 1133, + "DCGM_FI_DEV_CPU_VENDOR": 1140, + "DCGM_FI_DEV_CPU_MODEL": 1141, + "DCGM_FI_DEV_NVLINK_COUNT_TX_PACKETS": 1200, + "DCGM_FI_DEV_NVLINK_COUNT_TX_BYTES": 1201, + "DCGM_FI_DEV_NVLINK_COUNT_RX_PACKETS": 1202, + "DCGM_FI_DEV_NVLINK_COUNT_RX_BYTES": 1203, + "DCGM_FI_DEV_NVLINK_COUNT_RX_MALFORMED_PACKET_ERRORS": 1204, + "DCGM_FI_DEV_NVLINK_COUNT_RX_BUFFER_OVERRUN_ERRORS": 1205, + "DCGM_FI_DEV_NVLINK_COUNT_RX_ERRORS": 1206, + "DCGM_FI_DEV_NVLINK_COUNT_RX_REMOTE_ERRORS": 1207, + "DCGM_FI_DEV_NVLINK_COUNT_RX_GENERAL_ERRORS": 1208, + "DCGM_FI_DEV_NVLINK_COUNT_LOCAL_LINK_INTEGRITY_ERRORS": 1209, + "DCGM_FI_DEV_NVLINK_COUNT_TX_DISCARDS": 1210, + "DCGM_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_SUCCESSFUL_EVENTS": 1211, + "DCGM_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_FAILED_EVENTS": 1212, + "DCGM_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_EVENTS": 1213, + "DCGM_FI_DEV_NVLINK_COUNT_RX_SYMBOL_ERRORS": 1214, + "DCGM_FI_DEV_NVLINK_COUNT_SYMBOL_BER": 1215, + "DCGM_FI_DEV_NVLINK_COUNT_SYMBOL_BER_FLOAT": 1216, + "DCGM_FI_DEV_NVLINK_COUNT_EFFECTIVE_BER": 1217, + "DCGM_FI_DEV_NVLINK_COUNT_EFFECTIVE_BER_FLOAT": 1218, + "DCGM_FI_DEV_NVLINK_COUNT_EFFECTIVE_ERRORS": 1219, + "DCGM_FI_DEV_FIRST_CONNECTX_FIELD_ID": 1300, + "DCGM_FI_DEV_CONNECTX_HEALTH": 1300, + "DCGM_FI_DEV_CONNECTX_ACTIVE_PCIE_LINK_WIDTH": 1301, + "DCGM_FI_DEV_CONNECTX_ACTIVE_PCIE_LINK_SPEED": 1302, + "DCGM_FI_DEV_CONNECTX_EXPECT_PCIE_LINK_WIDTH": 1303, + "DCGM_FI_DEV_CONNECTX_EXPECT_PCIE_LINK_SPEED": 1304, + "DCGM_FI_DEV_CONNECTX_CORRECTABLE_ERR_STATUS": 1305, + "DCGM_FI_DEV_CONNECTX_CORRECTABLE_ERR_MASK": 1306, + "DCGM_FI_DEV_CONNECTX_UNCORRECTABLE_ERR_STATUS": 1307, + "DCGM_FI_DEV_CONNECTX_UNCORRECTABLE_ERR_MASK": 1308, + "DCGM_FI_DEV_CONNECTX_UNCORRECTABLE_ERR_SEVERITY": 1309, + "DCGM_FI_DEV_CONNECTX_DEVICE_TEMPERATURE": 1310, + "DCGM_FI_DEV_LAST_CONNECTX_FIELD_ID": 1399, + "DCGM_FI_DEV_C2C_LINK_ERROR_INTR": 1400, + "DCGM_FI_DEV_C2C_LINK_ERROR_REPLAY": 1401, + "DCGM_FI_DEV_C2C_LINK_ERROR_REPLAY_B2B": 1402, + "DCGM_FI_DEV_C2C_LINK_POWER_STATE": 1403, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_0": 1404, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_1": 1405, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_2": 1406, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_3": 1407, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_4": 1408, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_5": 1409, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_6": 1410, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_7": 1411, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_8": 1412, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_9": 1413, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_10": 1414, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_11": 1415, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_12": 1416, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_13": 1417, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_14": 1418, + "DCGM_FI_DEV_NVLINK_COUNT_FEC_HISTORY_15": 1419, + "DCGM_FI_DEV_CLOCKS_EVENT_REASON_SW_POWER_CAP_NS": 1420, + "DCGM_FI_DEV_CLOCKS_EVENT_REASON_SYNC_BOOST_NS": 1421, + "DCGM_FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN_NS": 1422, + "DCGM_FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN_NS": 1423, + "DCGM_FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN_NS": 1424, + "DCGM_FI_DEV_PWR_SMOOTHING_ENABLED": 1425, + "DCGM_FI_DEV_PWR_SMOOTHING_PRIV_LVL": 1426, + "DCGM_FI_DEV_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED": 1427, + "DCGM_FI_DEV_PWR_SMOOTHING_APPLIED_TMP_CEIL": 1428, + "DCGM_FI_DEV_PWR_SMOOTHING_APPLIED_TMP_FLOOR": 1429, + "DCGM_FI_DEV_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING": 1430, + "DCGM_FI_DEV_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING": 1431, "DCGM_FI_DEV_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING": 1432, - "DCGM_FI_DEV_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES": 1433, - "DCGM_FI_DEV_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR": 1434, - "DCGM_FI_DEV_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE": 1435, - "DCGM_FI_DEV_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE": 1436, - "DCGM_FI_DEV_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL": 1437, - "DCGM_FI_DEV_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE": 1438, - "DCGM_FI_DEV_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR": 1439, - "DCGM_FI_DEV_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE": 1440, - "DCGM_FI_DEV_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE": 1441, - "DCGM_FI_DEV_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL": 1442, - "DCGM_FI_DEV_PCIE_COUNT_CORRECTABLE_ERRORS": 1501, - "DCGM_FI_IMEX_DOMAIN_STATUS": 1502, - "DCGM_FI_IMEX_DAEMON_STATUS": 1503, + "DCGM_FI_DEV_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES": 1433, + "DCGM_FI_DEV_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR": 1434, + "DCGM_FI_DEV_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE": 1435, + "DCGM_FI_DEV_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE": 1436, + "DCGM_FI_DEV_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL": 1437, + "DCGM_FI_DEV_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE": 1438, + "DCGM_FI_DEV_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR": 1439, + "DCGM_FI_DEV_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE": 1440, + "DCGM_FI_DEV_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE": 1441, + "DCGM_FI_DEV_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL": 1442, + "DCGM_FI_DEV_PCIE_COUNT_CORRECTABLE_ERRORS": 1501, + "DCGM_FI_IMEX_DOMAIN_STATUS": 1502, + "DCGM_FI_IMEX_DAEMON_STATUS": 1503, } // legacyDCGMFields maps legacy field names to their IDs var legacyDCGMFields = map[string]Short{ - "dcgm_board_limit_violation": 243, - "dcgm_dec_utilization": 207, - "dcgm_ecc_dbe_aggregate_total": 313, - "dcgm_ecc_dbe_volatile_total": 311, - "dcgm_ecc_sbe_aggregate_total": 312, - "dcgm_ecc_sbe_volatile_total": 310, - "dcgm_enc_utilization": 206, - "dcgm_fb_free": 251, - "dcgm_fb_used": 252, - "dcgm_fi_prof_dram_active": 1005, - "dcgm_fi_prof_gr_engine_active": 1001, - "dcgm_fi_prof_pcie_rx_bytes": 1010, - "dcgm_fi_prof_pcie_tx_bytes": 1009, - "dcgm_fi_prof_pipe_tensor_active": 1004, - "dcgm_fi_prof_sm_active": 1002, - "dcgm_fi_prof_sm_occupancy": 1003, - "dcgm_gpu_temp": 150, - "dcgm_gpu_utilization": 203, - "dcgm_low_util_violation": 244, - "dcgm_mem_copy_utilization": 204, - "dcgm_memory_clock": 101, - "dcgm_memory_temp": 140, - "dcgm_nvlink_bandwidth_total": 449, + "dcgm_board_limit_violation": 243, + "dcgm_dec_utilization": 207, + "dcgm_ecc_dbe_aggregate_total": 313, + "dcgm_ecc_dbe_volatile_total": 311, + "dcgm_ecc_sbe_aggregate_total": 312, + "dcgm_ecc_sbe_volatile_total": 310, + "dcgm_enc_utilization": 206, + "dcgm_fb_free": 251, + "dcgm_fb_used": 252, + "dcgm_fi_prof_dram_active": 1005, + "dcgm_fi_prof_gr_engine_active": 1001, + "dcgm_fi_prof_pcie_rx_bytes": 1010, + "dcgm_fi_prof_pcie_tx_bytes": 1009, + "dcgm_fi_prof_pipe_tensor_active": 1004, + "dcgm_fi_prof_sm_active": 1002, + "dcgm_fi_prof_sm_occupancy": 1003, + "dcgm_gpu_temp": 150, + "dcgm_gpu_utilization": 203, + "dcgm_low_util_violation": 244, + "dcgm_mem_copy_utilization": 204, + "dcgm_memory_clock": 101, + "dcgm_memory_temp": 140, + "dcgm_nvlink_bandwidth_total": 449, "dcgm_nvlink_data_crc_error_count_total": 419, "dcgm_nvlink_flit_crc_error_count_total": 409, "dcgm_nvlink_recovery_error_count_total": 439, - "dcgm_nvlink_replay_error_count_total": 429, - "dcgm_pcie_replay_counter": 202, - "dcgm_pcie_rx_throughput": 201, - "dcgm_pcie_tx_throughput": 200, - "dcgm_power_usage": 155, - "dcgm_power_violation": 240, - "dcgm_reliability_violation": 245, - "dcgm_retired_pages_dbe": 391, - "dcgm_retired_pages_pending": 392, - "dcgm_retired_pages_sbe": 390, - "dcgm_sm_clock": 100, - "dcgm_sync_boost_violation": 242, - "dcgm_thermal_violation": 241, - "dcgm_total_energy_consumption": 156, - "dcgm_xid_errors": 230, + "dcgm_nvlink_replay_error_count_total": 429, + "dcgm_pcie_replay_counter": 202, + "dcgm_pcie_rx_throughput": 201, + "dcgm_pcie_tx_throughput": 200, + "dcgm_power_usage": 155, + "dcgm_power_violation": 240, + "dcgm_reliability_violation": 245, + "dcgm_retired_pages_dbe": 391, + "dcgm_retired_pages_pending": 392, + "dcgm_retired_pages_sbe": 390, + "dcgm_sm_clock": 100, + "dcgm_sync_boost_violation": 242, + "dcgm_thermal_violation": 241, + "dcgm_total_energy_consumption": 156, + "dcgm_xid_errors": 230, } // GetFieldID returns the DCGM field ID for a given field name and whether it was found diff --git a/pkg/dcgm/fields.go b/pkg/dcgm/fields.go index 14b7bf8..0126c63 100644 --- a/pkg/dcgm/fields.go +++ b/pkg/dcgm/fields.go @@ -219,7 +219,10 @@ func GetLatestValuesForFields(gpu uint, fields []Short) ([]FieldValue_v1, error) // fields is a slice of field IDs to retrieve. // Returns a slice of field values and any error encountered. func LinkGetLatestValues(index uint, parentType Field_Entity_Group, parentId uint, fields []Short) ([]FieldValue_v1, error) { - slice := []byte{uint8(parentType), uint8(index), uint8(parentId), 0} + slice := make([]byte, 4) + slice[0] = uint8(parentType) + binary.LittleEndian.PutUint16(slice[1:3], uint16(index)) + slice[3] = uint8(parentId) entityId := binary.LittleEndian.Uint32(slice) return EntityGetLatestValues(FE_LINK, uint(entityId), fields) }