diff --git a/docs/conformance/cncf/evidence/ai-service-metrics.md b/docs/conformance/cncf/evidence/ai-service-metrics.md
deleted file mode 100644
index 768ed0a69..000000000
--- a/docs/conformance/cncf/evidence/ai-service-metrics.md
+++ /dev/null
@@ -1,224 +0,0 @@
-# AI Service Metrics (Prometheus Discovery)
-
-**Kubernetes Version:** v1.35
-**Platform:** linux/amd64
-**Validated on:** EKS / p5.48xlarge / NVIDIA H100 80GB HBM3
-
----
-
-Demonstrates that Prometheus discovers and collects metrics from AI workloads
-that expose them in Prometheus exposition format, using PodMonitor and
-ServiceMonitor CRDs for automatic target discovery across both inference and
-training workloads.
-
-## Inference: Dynamo Platform (PodMonitor)
-
-**Cluster:** `aicr-cuj2` (EKS, inference)
-**Generated:** 2026-03-25 10:18:30 UTC
-
-The Dynamo operator auto-creates PodMonitors for worker and frontend pods.
-The Dynamo vLLM runtime exposes both Dynamo-specific and embedded vLLM metrics
-on port 9090 (`system` port) in Prometheus format.
-
-### Dynamo Workload Pods
-
-**Dynamo workload pods**
-```
-$ kubectl get pods -n dynamo-workload -o wide
-NAME                                READY   STATUS    RESTARTS   AGE     IP             NODE                           NOMINATED NODE   READINESS GATES
-vllm-agg-0-frontend-qqrff           1/1     Running   0          3m29s   10.0.159.241   ip-10-0-184-187.ec2.internal   <none>           <none>
-vllm-agg-0-vllmdecodeworker-95ths   1/1     Running   0          3m29s   10.0.214.229   ip-10-0-180-136.ec2.internal   <none>           <none>
-```
-
-### Worker Metrics Endpoint
-
-**Worker metrics (sampled after 10 inference requests)**
-```
-dynamo_component_request_bytes_total{dynamo_component="backend",dynamo_endpoint="generate",model="Qwen/Qwen3-0.6B"} 11230
-dynamo_component_request_duration_seconds_sum{dynamo_component="backend",dynamo_endpoint="generate",model="Qwen/Qwen3-0.6B"} 0.984
-dynamo_component_request_duration_seconds_count{dynamo_component="backend",dynamo_endpoint="generate",model="Qwen/Qwen3-0.6B"} 10
-dynamo_component_requests_total{dynamo_component="backend",dynamo_endpoint="generate",model="Qwen/Qwen3-0.6B"} 10
-dynamo_component_response_bytes_total{dynamo_component="backend",dynamo_endpoint="generate",model="Qwen/Qwen3-0.6B"} 31826
-dynamo_component_uptime_seconds 223.250
-vllm:engine_sleep_state{engine="0",model_name="Qwen/Qwen3-0.6B",sleep_state="awake"} 1.0
-vllm:prefix_cache_queries_total{engine="0",model_name="Qwen/Qwen3-0.6B"} 50.0
-```
-
-### PodMonitors (Auto-Created by Dynamo Operator)
-
-**Dynamo PodMonitors**
-```
-$ kubectl get podmonitors -n dynamo-system
-NAME              AGE
-dynamo-frontend   11d
-dynamo-planner    11d
-dynamo-worker     11d
-```
-
-**Worker PodMonitor spec**
-```
-$ kubectl get podmonitor dynamo-worker -n dynamo-system -o yaml
-apiVersion: monitoring.coreos.com/v1
-kind: PodMonitor
-metadata:
-  name: dynamo-worker
-  namespace: dynamo-system
-spec:
-  namespaceSelector:
-    any: true
-  podMetricsEndpoints:
-  - interval: 5s
-    path: /metrics
-    port: system
-  selector:
-    matchLabels:
-      nvidia.com/dynamo-component-type: worker
-      nvidia.com/metrics-enabled: "true"
-```
-
-### Prometheus Target Discovery
-
-**Prometheus scrape targets (active)**
-```
-{
-  "job": "dynamo-system/dynamo-frontend",
-  "endpoint": "http://10.0.159.241:8000/metrics",
-  "health": "up",
-  "lastScrape": "2026-03-25T10:19:21.101766071Z"
-}
-{
-  "job": "dynamo-system/dynamo-worker",
-  "endpoint": "http://10.0.214.229:9090/metrics",
-  "health": "up",
-  "lastScrape": "2026-03-25T10:19:22.70334816Z"
-}
-```
-
-### Dynamo Metrics in Prometheus
-
-**Dynamo metrics queried from Prometheus (after 10 inference requests)**
-```
-dynamo_component_requests_total{endpoint="generate"} = 10
-dynamo_component_request_bytes_total{endpoint="generate"} = 11230
-dynamo_component_response_bytes_total{endpoint="generate"} = 31826
-dynamo_component_request_duration_seconds_count{endpoint="generate"} = 10
-dynamo_component_request_duration_seconds_sum{endpoint="generate"} = 0.984
-dynamo_component_uptime_seconds = 223.250
-dynamo_frontend_input_sequence_tokens_sum = 50
-dynamo_frontend_input_sequence_tokens_count = 10
-dynamo_frontend_inter_token_latency_seconds_sum = 0.866
-dynamo_frontend_inter_token_latency_seconds_count = 490
-dynamo_frontend_model_context_length = 40960
-dynamo_frontend_model_total_kv_blocks = 37710
-```
-
-**Result: PASS** — Prometheus discovers Dynamo inference workloads (frontend + worker) via operator-managed PodMonitors and actively scrapes their Prometheus-format metrics endpoints. Application-level AI inference metrics (request count, request duration, inter-token latency, token throughput, KV cache utilization) are collected and queryable.
-
----
-
-## Training: PyTorch Workload (ServiceMonitor)
-
-**Cluster:** `aicr-cuj1` (EKS, training)
-**Generated:** 2026-03-25 11:03:00 UTC
-
-A PyTorch training workload runs a GPU training loop and exposes training-level
-metrics (step count, loss, throughput, GPU memory) on port 8080 in Prometheus
-format, discovered via ServiceMonitor.
-
-### Training Workload Pod
-
-**Training pod**
-```
-$ kubectl get pods -n trainer-metrics-test -o wide
-NAME                   READY   STATUS    RESTARTS   AGE
-pytorch-training-job   1/1     Running   0          2m
-```
-
-### Training Metrics Endpoint
-
-**Training metrics (after 100 training steps)**
-```
-# HELP training_step_total Total training steps completed
-# TYPE training_step_total counter
-training_step_total 100
-# HELP training_loss Current training loss
-# TYPE training_loss gauge
-training_loss 1.334257
-# HELP training_throughput_samples_per_sec Training throughput
-# TYPE training_throughput_samples_per_sec gauge
-training_throughput_samples_per_sec 549228.55
-# HELP training_gpu_memory_used_bytes GPU memory used
-# TYPE training_gpu_memory_used_bytes gauge
-training_gpu_memory_used_bytes 79213568
-# HELP training_gpu_memory_total_bytes GPU memory total
-# TYPE training_gpu_memory_total_bytes gauge
-training_gpu_memory_total_bytes 85017624576
-```
-
-### ServiceMonitor
-
-**Training ServiceMonitor**
-```
-$ kubectl get servicemonitor pytorch-training -n trainer-metrics-test -o yaml
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
-  labels:
-    release: kube-prometheus-stack
-  name: pytorch-training
-  namespace: trainer-metrics-test
-spec:
-  endpoints:
-  - interval: 15s
-    path: /metrics
-    port: metrics
-  selector:
-    matchLabels:
-      app: pytorch-training
-```
-
-### Prometheus Target Discovery
-
-**Prometheus scrape target (active)**
-```
-{
-  "job": "pytorch-training-metrics",
-  "endpoint": "http://10.0.212.201:8080/metrics",
-  "health": "up",
-  "lastScrape": "2026-03-25T11:03:49.310258779Z"
-}
-```
-
-### Training Metrics in Prometheus
-
-**Training metrics queried from Prometheus**
-```
-training_step_total = 100
-training_loss = 1.334257
-training_throughput_samples_per_sec = 549228.55
-training_gpu_memory_used_bytes = 79213568
-training_gpu_memory_total_bytes = 85017624576
-```
-
-**Result: PASS** — Prometheus discovers the PyTorch training workload via ServiceMonitor and actively scrapes its Prometheus-format metrics endpoint. Training-level metrics (step count, loss, throughput, GPU memory) are collected and queryable.
-
----
-
-## Summary
-
-| Workload | Discovery | Metrics Port | Metrics Type | Result |
-|----------|-----------|-------------|--------------|--------|
-| **Dynamo vLLM** (inference) | PodMonitor (auto-created) | 9090 (HTTP) | `dynamo_component_*`, `dynamo_frontend_*`, `vllm:*` | **PASS** |
-| **PyTorch training** (training) | ServiceMonitor | 8080 (HTTP) | `training_step_total`, `training_loss`, `training_throughput_*`, `training_gpu_memory_*` | **PASS** |
-
-## Cleanup
-
-**Delete inference workload**
-```
-$ kubectl delete ns dynamo-workload
-```
-
-**Delete training workload**
-```
-$ kubectl delete ns trainer-metrics-test
-```
diff --git a/docs/conformance/cncf/evidence/robust-operator.md b/docs/conformance/cncf/evidence/robust-operator.md
deleted file mode 100644
index 917222560..000000000
--- a/docs/conformance/cncf/evidence/robust-operator.md
+++ /dev/null
@@ -1,184 +0,0 @@
-# Robust AI Operator
-
-**Kubernetes Version:** v1.35
-**Platform:** linux/amd64
-**Validated on:** Kubernetes v1.35 clusters with NVIDIA H100 80GB HBM3
-
----
-
-Demonstrates CNCF AI Conformance requirement that at least one complex AI operator
-with a CRD can be installed and functions reliably, including operator pods running,
-webhooks operational, and custom resources reconciled.
-
-## Summary
-
-Two operators validated across inference and training intents:
-
-| Operator | Intent | CRDs | Webhooks | CR Reconciled | Result |
-|----------|--------|------|----------|---------------|--------|
-| **Dynamo Platform** | Inference | 6 CRDs | 4 validating webhooks | DynamoGraphDeployment → PodCliques | **PASS** |
-| **Kubeflow Trainer** | Training | 3 CRDs | 3 validating webhooks | TrainJob → distributed training pods | **PASS** |
-
----
-
-## Inference: Dynamo Platform
-
-**Generated:** 2026-03-10 03:41:48 UTC
-
-### Dynamo Operator Health
-
-**Dynamo operator deployments**
-```
-$ kubectl get deploy -n dynamo-system
-NAME                                                 READY   UP-TO-DATE   AVAILABLE   AGE
-dynamo-platform-dynamo-operator-controller-manager   1/1     1            1           13m
-grove-operator                                       1/1     1            1           13m
-```
-
-**Dynamo operator pods**
-```
-$ kubectl get pods -n dynamo-system
-NAME                                                              READY   STATUS      RESTARTS      AGE
-dynamo-platform-dynamo-operator-controller-manager-59f6dc6gs7tt   2/2     Running     0             13m
-dynamo-platform-dynamo-operator-webhook-ca-inject-1-6t95h         0/1     Completed   0             13m
-dynamo-platform-dynamo-operator-webhook-cert-gen-1-bnqwh          0/1     Completed   0             13m
-grove-operator-7c69b46ddf-mxgtz                                   1/1     Running     1 (13m ago)   13m
-```
-
-### Custom Resource Definitions
-
-**Dynamo CRDs**
-```
-dynamocomponentdeployments.nvidia.com                  2026-03-10T03:20:42Z
-dynamographdeploymentrequests.nvidia.com               2026-03-10T03:20:42Z
-dynamographdeployments.nvidia.com                      2026-03-10T03:20:42Z
-dynamographdeploymentscalingadapters.nvidia.com        2026-03-10T03:20:42Z
-dynamomodels.nvidia.com                                2026-03-10T03:20:42Z
-dynamoworkermetadatas.nvidia.com                       2026-03-10T03:20:42Z
-```
-
-### Webhooks
-
-**Validating webhooks**
-```
-$ kubectl get validatingwebhookconfigurations -l app.kubernetes.io/instance=dynamo-platform
-NAME                                         WEBHOOKS   AGE
-dynamo-platform-dynamo-operator-validating   4          13m
-```
-
-### Custom Resource Reconciliation
-
-A `DynamoGraphDeployment` defines an inference serving graph. The operator reconciles
-it into workload pods managed via PodCliques.
-
-**DynamoGraphDeployments**
-```
-$ kubectl get dynamographdeployments -A
-NAMESPACE         NAME       AGE
-dynamo-workload   vllm-agg   5m33s
-```
-
-**Workload Pods Created by Operator**
-```
-$ kubectl get pods -n dynamo-workload -l nvidia.com/dynamo-graph-deployment-name -o wide
-NAME                                READY   STATUS    RESTARTS   AGE     IP             NODE                           NOMINATED NODE   READINESS GATES
-vllm-agg-0-frontend-kkmpd           1/1     Running   0          5m35s   10.0.222.55    system-node-2   <none>           <none>
-vllm-agg-0-vllmdecodeworker-s65j5   1/1     Running   0          5m35s   10.0.235.180   gpu-node-1   <none>           <none>
-```
-
-**PodCliques**
-```
-$ kubectl get podcliques -n dynamo-workload
-NAME                          AGE
-vllm-agg-0-frontend           5m36s
-vllm-agg-0-vllmdecodeworker   5m36s
-```
-
-### Webhook Rejection Test
-
-Submit an invalid DynamoGraphDeployment to verify the validating webhook
-actively rejects malformed resources.
-
-**Invalid CR rejection**
-```
-Error from server (Forbidden): error when creating "STDIN": admission webhook "vdynamographdeployment.kb.io" denied the request: spec.services must have at least one service
-```
-
-Webhook correctly rejected the invalid resource.
-
-**Result: PASS** — Dynamo operator running, webhooks operational (rejection verified), CRDs registered, DynamoGraphDeployment reconciled with 2 healthy workload pod(s).
-
----
-
-## Training: Kubeflow Trainer
-
-**Generated:** 2026-03-16 21:48:55 UTC
-
-### Kubeflow Trainer Health
-
-**Kubeflow Trainer deployments**
-```
-$ kubectl get deploy -n kubeflow
-NAME                                  READY   UP-TO-DATE   AVAILABLE   AGE
-jobset-controller                     1/1     1            1           13m
-kubeflow-trainer-controller-manager   1/1     1            1           13m
-```
-
-**Kubeflow Trainer pods**
-```
-$ kubectl get pods -n kubeflow -o wide
-NAME                                                   READY   STATUS      RESTARTS      AGE   IP             NODE                                                 NOMINATED NODE   READINESS GATES
-jobset-controller-75f94fdfb7-r7lqd                     1/1     Running     1 (13m ago)   13m   10.100.1.52    system-node-1       <none>           <none>
-kubeflow-trainer-controller-manager-677b98f74f-8dvgj   1/1     Running     1 (13m ago)   13m   10.100.5.60    system-node-2       <none>           <none>
-pytorch-mnist-node-0-0-9wkj5                           0/1     Completed   0             12m   10.100.2.169   gpu-node-1   <none>           <none>
-```
-
-### Custom Resource Definitions
-
-**Kubeflow Trainer CRDs**
-```
-clustertrainingruntimes.trainer.kubeflow.org                2026-03-16T20:45:34Z
-trainingruntimes.trainer.kubeflow.org                       2026-03-16T20:45:36Z
-trainjobs.trainer.kubeflow.org                              2026-03-16T20:45:36Z
-```
-
-### Webhooks
-
-**Validating webhooks**
-```
-$ kubectl get validatingwebhookconfigurations validator.trainer.kubeflow.org
-NAME                             WEBHOOKS   AGE
-validator.trainer.kubeflow.org   3          13m
-```
-
-**Webhook endpoint verification**
-```
-NAME                                  ENDPOINTS                           AGE
-jobset-metrics-service                10.100.1.52:8443                    13m
-jobset-webhook-service                10.100.1.52:9443                    13m
-kubeflow-trainer-controller-manager   10.100.5.60:8080,10.100.5.60:9443   13m
-pytorch-mnist                         10.100.2.169                        12m
-```
-
-### ClusterTrainingRuntimes
-
-**ClusterTrainingRuntimes**
-```
-$ kubectl get clustertrainingruntimes
-NAME                AGE
-torch-distributed   13m
-```
-
-### Webhook Rejection Test
-
-Submit an invalid TrainJob (referencing a non-existent runtime) to verify the
-validating webhook actively rejects malformed resources.
-
-**Invalid TrainJob rejection**
-```
-Error from server (Forbidden): error when creating "STDIN": admission webhook "validator.trainjob.trainer.kubeflow.org" denied the request: spec.RuntimeRef: Invalid value: {"name":"nonexistent-runtime","apiGroup":"trainer.kubeflow.org","kind":"ClusterTrainingRuntime"}: ClusterTrainingRuntime.trainer.kubeflow.org "nonexistent-runtime" not found: specified clusterTrainingRuntime must be created before the TrainJob is created
-```
-
-Webhook correctly rejected the invalid resource.
-
-**Result: PASS** — Kubeflow Trainer running, webhooks operational (rejection verified), 3 CRDs registered.
diff --git a/docs/conformance/cncf/index.md b/docs/conformance/cncf/index.md
index bb20c9980..bee8027e2 100644
--- a/docs/conformance/cncf/index.md
+++ b/docs/conformance/cncf/index.md
@@ -1,43 +1,43 @@
-# CNCF AI Conformance Evidence
+# CNCF AI Conformance
 
 ## Overview
 
 This directory contains evidence for [CNCF Kubernetes AI Conformance](https://github.com/cncf/k8s-ai-conformance)
-certification. The evidence demonstrates that a cluster configured with a specific
-recipe meets the Must-have requirements for Kubernetes v1.35.
+certification. Each submission certifies a specific product on a specific Kubernetes
+distribution, with evidence collected using AICR as the validation tooling.
 
-> **Note:** It is the **cluster configured by a recipe** that is conformant, not the
-> tool itself. The recipe determines which components are deployed and how they are
-> configured. Different recipes may produce clusters with different conformance profiles.
+> **Note:** It is the **product deployed on a Kubernetes platform** that is conformant.
+> AICR serves as the deployment and validation tooling (similar to sonobuoy for K8s
+> conformance), while the certified product is the AI inference/training platform.
 
-**Kubernetes:** v1.35
-**Product:** Kubernetes clusters with NVIDIA AI Cluster Runtime (AICR)
+## Submissions
 
-AICR deploys the runtime components that make a Kubernetes cluster AI conformant.
-All conformance requirements are platform-agnostic except cluster autoscaling,
-which relies on the underlying platform's node group scaling mechanism.
+| Version | Product | Platform | Status | Evidence |
+|---------|---------|----------|--------|----------|
+| v1.35 | [NVIDIA NIM](https://developer.nvidia.com/nim) | EKS | 9/9 PASS | [v1.35/nim-eks/](v1.35/nim-eks/) |
 
 ## Directory Structure
 
 ```
 docs/conformance/cncf/
-├── README.md
-├── submission/
-│   ├── PRODUCT.yaml
-│   └── README.md
-└── evidence/
-    ├── index.md
-    ├── dra-support.md
-    ├── gang-scheduling.md
-    ├── secure-accelerator-access.md
-    ├── accelerator-metrics.md
-    ├── ai-service-metrics.md
-    ├── inference-gateway.md
-    ├── robust-operator.md
-    ├── pod-autoscaling.md
-    └── cluster-autoscaling.md
-
-pkg/evidence/scripts/             # Evidence collection script + test manifests
+├── index.md                          # This file
+└── v1.35/                            # Kubernetes version
+    └── nim-eks/                      # Product + platform (mirrors CNCF repo)
+        ├── PRODUCT.yaml              # CNCF submission metadata
+        ├── README.md                 # Submission overview + results table
+        └── evidence/                 # Behavioral evidence files
+            ├── index.md
+            ├── dra-support.md
+            ├── gang-scheduling.md
+            ├── secure-accelerator-access.md
+            ├── accelerator-metrics.md
+            ├── ai-service-metrics.md
+            ├── inference-gateway.md
+            ├── robust-operator.md
+            ├── pod-autoscaling.md
+            └── cluster-autoscaling.md
+
+pkg/evidence/scripts/                 # Evidence collection script + test manifests
 ├── collect-evidence.sh
 └── manifests/
     ├── dra-gpu-test.yaml
@@ -82,9 +82,9 @@ Alternatively, run the evidence collection script directly:
 ```
 
 > **Note:** The `--cncf-submission` flag deploys GPU workloads and takes ~5-10
-> minutes. The evidence collection script uses polling with early exit on both
-> success and failure, minimizing wait times. The HPA test uses CUDA N-Body
-> Simulation to stress GPUs and verifies scale-up.
+> minutes. The evidence collection script automatically detects the AI workload
+> type (NIM inference, Dynamo inference, or Kubeflow training) and collects
+> appropriate metrics and operator evidence.
 
 ### Two Modes
 
@@ -101,21 +101,3 @@ Alternatively, run the evidence collection script directly:
 | **Gateway** | Condition verification (Accepted, Programmed) | Same |
 | **Webhook test** | Rejection test with invalid CR | Same |
 | **Cluster autoscaling** | Cloud node group validation | Cloud-provider autoscaler API |
-
-## Evidence
-
-See [evidence/index.md](evidence/index.md) for a summary of all collected evidence and results.
-
-## Feature Areas
-
-| # | Feature | Requirement | Evidence File |
-|---|---------|-------------|---------------|
-| 1 | DRA Support | `dra_support` | [evidence/dra-support.md](evidence/dra-support.md) |
-| 2 | Gang Scheduling | `gang_scheduling` | [evidence/gang-scheduling.md](evidence/gang-scheduling.md) |
-| 3 | Secure Accelerator Access | `secure_accelerator_access` | [evidence/secure-accelerator-access.md](evidence/secure-accelerator-access.md) |
-| 4 | Accelerator Metrics | `accelerator_metrics` | [evidence/accelerator-metrics.md](evidence/accelerator-metrics.md) |
-| 5 | AI Service Metrics | `ai_service_metrics` | [evidence/ai-service-metrics.md](evidence/ai-service-metrics.md) |
-| 6 | Inference API Gateway | `ai_inference` | [evidence/inference-gateway.md](evidence/inference-gateway.md) |
-| 7 | Robust AI Operator | `robust_controller` | [evidence/robust-operator.md](evidence/robust-operator.md) |
-| 8 | Pod Autoscaling | `pod_autoscaling` | [evidence/pod-autoscaling.md](evidence/pod-autoscaling.md) |
-| 9 | Cluster Autoscaling | `cluster_autoscaling` | [evidence/cluster-autoscaling.md](evidence/cluster-autoscaling.md) |
diff --git a/docs/conformance/cncf/submission/README.md b/docs/conformance/cncf/submission/README.md
deleted file mode 100644
index 3da12ef75..000000000
--- a/docs/conformance/cncf/submission/README.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# NVIDIA AI Cluster Runtime
-
-[NVIDIA AI Cluster Runtime (AICR)](https://github.com/NVIDIA/aicr) generates validated, GPU-accelerated Kubernetes configurations and deploys runtime components that satisfy all CNCF AI Conformance requirements for accelerator management, scheduling, observability, security, and inference networking.
-
-## Conformance Submission
-
-- [PRODUCT.yaml](PRODUCT.yaml)
-
-## Evidence
-
-Evidence was collected on Kubernetes v1.35 clusters with NVIDIA H100 80GB HBM3 GPUs using AICR-deployed runtime components.
-
-| # | Requirement | Feature | Result | Evidence |
-|---|-------------|---------|--------|----------|
-| 1 | `dra_support` | Dynamic Resource Allocation | PASS | [dra-support.md](../evidence/dra-support.md) |
-| 2 | `gang_scheduling` | Gang Scheduling (KAI Scheduler) | PASS | [gang-scheduling.md](../evidence/gang-scheduling.md) |
-| 3 | `secure_accelerator_access` | Secure Accelerator Access | PASS | [secure-accelerator-access.md](../evidence/secure-accelerator-access.md) |
-| 4 | `accelerator_metrics` | Accelerator Metrics (DCGM Exporter) | PASS | [accelerator-metrics.md](../evidence/accelerator-metrics.md) |
-| 5 | `ai_service_metrics` | AI Service Metrics (Prometheus ServiceMonitor) | PASS | [ai-service-metrics.md](../evidence/ai-service-metrics.md) |
-| 6 | `ai_inference` | Inference API Gateway (kgateway) | PASS | [inference-gateway.md](../evidence/inference-gateway.md) |
-| 7 | `robust_controller` | Robust AI Operator (Dynamo + Kubeflow Trainer) | PASS | [robust-operator.md](../evidence/robust-operator.md) |
-| 8 | `pod_autoscaling` | Pod Autoscaling (HPA + GPU Metrics) | PASS | [pod-autoscaling.md](../evidence/pod-autoscaling.md) |
-| 9 | `cluster_autoscaling` | Cluster Autoscaling | PASS | [cluster-autoscaling.md](../evidence/cluster-autoscaling.md) |
-
-All 9 MUST conformance requirement IDs across 9 evidence files are **Implemented**. 3 SHOULD requirements (`driver_runtime_management`, `gpu_sharing`, `virtualized_accelerator`) are also Implemented.
diff --git a/docs/conformance/cncf/submission/PRODUCT.yaml b/docs/conformance/cncf/v1.35/nim-eks/PRODUCT.yaml
similarity index 83%
rename from docs/conformance/cncf/submission/PRODUCT.yaml
rename to docs/conformance/cncf/v1.35/nim-eks/PRODUCT.yaml
index 49888769b..16af204d0 100644
--- a/docs/conformance/cncf/submission/PRODUCT.yaml
+++ b/docs/conformance/cncf/v1.35/nim-eks/PRODUCT.yaml
@@ -14,23 +14,24 @@
 
 metadata:
   kubernetesVersion: v1.35
-  platformName: "NVIDIA AI Cluster Runtime"
-  platformVersion: "0.8.0"
+  platformName: "NVIDIA NIM on EKS"
+  platformVersion: "1.8.3"
   vendorName: "NVIDIA"
-  websiteUrl: "https://github.com/NVIDIA/aicr"
-  repoUrl: "https://github.com/NVIDIA/aicr"
-  documentationUrl: "https://github.com/NVIDIA/aicr/blob/main/README.md"
+  websiteUrl: "https://developer.nvidia.com/nim"
+  repoUrl: "https://github.com/NVIDIA/k8s-nim-operator"
+  documentationUrl: "https://docs.nvidia.com/nim/large-language-models/latest/deploy-helm.html"
   productLogoUrl: "https://raw.githubusercontent.com/cncf/landscape/master/hosted_logos/nvidia-member.svg"
   description: >-
-    NVIDIA AI Cluster Runtime (AICR) generates validated, GPU-accelerated
-    Kubernetes configurations and deploys runtime components that satisfy all
-    CNCF AI Conformance requirements.
+    NVIDIA NIM on EKS is a Kubernetes-based AI inference platform that deploys
+    and manages NVIDIA NIM microservices on Amazon EKS with GPU scheduling,
+    autoscaling, and Gateway API integration. Configured and validated using
+    NVIDIA AI Cluster Runtime (AICR).
   contactEmailAddress: "aicr-maintainers@nvidia.com"
-  # AICR is not a Kubernetes distribution — it deploys AI runtime components on
-  # existing conformant platforms. We reference EKS's k8s-conformance entry
-  # because evidence was collected on a conformant EKS cluster. AICR is
-  # validated on multiple conformant platforms.
-  # Also validated on GKE: https://github.com/cncf/k8s-conformance/tree/master/v1.35/gke
+  # NVIDIA NIM on EKS is not a Kubernetes distribution — it is an AI inference
+  # platform deployed on top of conformant Amazon EKS. Per CNCF AI Conformance
+  # guidelines, we reference the underlying Kubernetes distribution's conformance
+  # entry to establish that the base platform is already K8s conformant.
+  # This submission certifies the AI capabilities layered on top of EKS.
   k8sConformanceUrl: "https://github.com/cncf/k8s-conformance/tree/master/v1.35/eks"
 
 spec:
@@ -40,7 +41,7 @@ spec:
       level: MUST
       status: "Implemented"
       evidence:
-        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/evidence/dra-support.md"
+        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/v1.35/nim-eks/evidence/dra-support.md"
       notes: >-
         DRA API (resource.k8s.io/v1) is enabled with DeviceClass, ResourceClaim,
         ResourceClaimTemplate, and ResourceSlice resources available. The NVIDIA
@@ -58,7 +59,7 @@ spec:
       level: SHOULD
       status: "Implemented"
       evidence:
-        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/evidence/dra-support.md"
+        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/v1.35/nim-eks/evidence/dra-support.md"
       notes: >-
         GPU Operator manages the full driver and runtime lifecycle: driver
         installation, container toolkit configuration, device plugin, and DRA
@@ -115,7 +116,7 @@ spec:
       level: MUST
       status: "Implemented"
       evidence:
-        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/evidence/inference-gateway.md"
+        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/v1.35/nim-eks/evidence/inference-gateway.md"
       notes: >-
         kgateway controller is deployed with full Gateway API CRD support
         (GatewayClass, Gateway, HTTPRoute, GRPCRoute, ReferenceGrant). Inference
@@ -134,7 +135,7 @@ spec:
       level: MUST
       status: "Implemented"
       evidence:
-        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/evidence/gang-scheduling.md"
+        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/v1.35/nim-eks/evidence/gang-scheduling.md"
       notes: >-
         KAI Scheduler is deployed with operator, scheduler, admission
         controller, pod-grouper, and queue-controller components. PodGroup CRD
@@ -150,12 +151,11 @@ spec:
       level: MUST
       status: "Implemented"
       evidence:
-        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/evidence/cluster-autoscaling.md"
+        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/v1.35/nim-eks/evidence/cluster-autoscaling.md"
       notes: >-
         Demonstrated on EKS with a GPU Auto Scaling Group (p5.48xlarge, 8x H100
-        per node) tagged for Cluster Autoscaler discovery, and on GKE with the
-        built-in cluster autoscaler managing a3-megagpu-8g node pools. Both
-        platforms support scaling GPU nodes based on pending pod demand.
+        per node) tagged for Cluster Autoscaler discovery. The platform supports
+        scaling GPU nodes based on pending pod demand.
     - id: pod_autoscaling
       description: >-
         If the platform supports the HorizontalPodAutoscaler, it must function
@@ -164,7 +164,7 @@ spec:
       level: MUST
       status: "Implemented"
       evidence:
-        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/evidence/pod-autoscaling.md"
+        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/v1.35/nim-eks/evidence/pod-autoscaling.md"
       notes: >-
         Prometheus adapter exposes GPU custom metrics (gpu_utilization,
         gpu_memory_used, gpu_power_usage) via the Kubernetes custom metrics API.
@@ -189,7 +189,7 @@ spec:
       level: MUST
       status: "Implemented"
       evidence:
-        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/evidence/accelerator-metrics.md"
+        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/v1.35/nim-eks/evidence/accelerator-metrics.md"
       notes: >-
         DCGM Exporter runs on GPU nodes exposing metrics at :9400/metrics in
         Prometheus format. Per-GPU metrics include utilization, memory usage,
@@ -205,13 +205,14 @@ spec:
       level: MUST
       status: "Implemented"
       evidence:
-        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/evidence/accelerator-metrics.md"
+        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/v1.35/nim-eks/evidence/ai-service-metrics.md"
       notes: >-
-        Prometheus and Grafana are deployed as the monitoring stack. Prometheus
-        discovers and scrapes workloads exposing metrics in Prometheus
-        exposition format via ServiceMonitors. The prometheus-adapter bridges
-        these metrics into the Kubernetes custom metrics API for consumption by
-        HPA and other controllers.
+        NVIDIA NIM inference microservice exposes Prometheus-format metrics at
+        /v1/metrics including token throughput (prompt_tokens_total,
+        generation_tokens_total), request latency (time_to_first_token_seconds,
+        time_per_output_token_seconds), and model request counts. Prometheus
+        and prometheus-adapter are deployed for metrics collection and bridging
+        to the Kubernetes custom metrics API.
   security:
     - id: secure_accelerator_access
       description: >-
@@ -222,7 +223,7 @@ spec:
       level: MUST
       status: "Implemented"
       evidence:
-        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/evidence/secure-accelerator-access.md"
+        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/v1.35/nim-eks/evidence/secure-accelerator-access.md"
       notes: >-
         GPU Operator manages all GPU lifecycle components (driver, device-plugin,
         DCGM, toolkit, validator, MIG manager). 8x H100 GPUs are individually
@@ -240,11 +241,9 @@ spec:
       level: MUST
       status: "Implemented"
       evidence:
-        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/evidence/robust-operator.md"
+        - "https://github.com/NVIDIA/aicr/blob/main/docs/conformance/cncf/v1.35/nim-eks/evidence/robust-operator.md"
       notes: >-
-        Two operators validated: (1) NVIDIA Dynamo for inference — 6 CRDs,
-        4 validating webhooks, DynamoGraphDeployment reconciled into running
-        workload pods; (2) Kubeflow Trainer for training — 3 CRDs, 3 validating
-        webhooks, TrainJob reconciled into distributed training pods. Both
-        operators verified via webhook rejection tests (invalid CRs correctly
-        denied).
+        NVIDIA NIM Operator validated: 4 CRDs (NIMService, NIMCache, NIMPipeline,
+        NIMBuild), admission controller with webhook rejection test (invalid
+        NIMService correctly denied), NIMService CR reconciled into running
+        inference pod serving Llama 3.2 1B on H100 GPU.
diff --git a/docs/conformance/cncf/v1.35/nim-eks/README.md b/docs/conformance/cncf/v1.35/nim-eks/README.md
new file mode 100644
index 000000000..b275e6f6e
--- /dev/null
+++ b/docs/conformance/cncf/v1.35/nim-eks/README.md
@@ -0,0 +1,25 @@
+# NVIDIA NIM on EKS
+
+[NVIDIA NIM](https://developer.nvidia.com/nim) on EKS is a Kubernetes-based AI inference platform that deploys and manages NVIDIA NIM microservices on Amazon EKS with GPU scheduling, autoscaling, and Gateway API integration. NIM microservice lifecycle is managed by the [NIM Operator](https://github.com/NVIDIA/k8s-nim-operator). The platform is configured and validated using [NVIDIA AI Cluster Runtime (AICR)](https://github.com/NVIDIA/aicr).
+
+## Conformance Submission
+
+- [PRODUCT.yaml](PRODUCT.yaml)
+
+## Evidence
+
+Evidence was collected on an EKS v1.35 cluster with NVIDIA H100 80GB HBM3 GPUs running NIM inference workloads, validated by AICR.
+
+| # | Requirement | Feature | Result | Evidence |
+|---|-------------|---------|--------|----------|
+| 1 | `dra_support` | Dynamic Resource Allocation | PASS | [dra-support.md](evidence/dra-support.md) |
+| 2 | `gang_scheduling` | Gang Scheduling (KAI Scheduler) | PASS | [gang-scheduling.md](evidence/gang-scheduling.md) |
+| 3 | `secure_accelerator_access` | Secure Accelerator Access | PASS | [secure-accelerator-access.md](evidence/secure-accelerator-access.md) |
+| 4 | `accelerator_metrics` | Accelerator Metrics (DCGM Exporter) | PASS | [accelerator-metrics.md](evidence/accelerator-metrics.md) |
+| 5 | `ai_service_metrics` | AI Service Metrics (NIM Inference) | PASS | [ai-service-metrics.md](evidence/ai-service-metrics.md) |
+| 6 | `ai_inference` | Inference API Gateway (kgateway) | PASS | [inference-gateway.md](evidence/inference-gateway.md) |
+| 7 | `robust_controller` | Robust AI Operator (NIM Operator) | PASS | [robust-operator.md](evidence/robust-operator.md) |
+| 8 | `pod_autoscaling` | Pod Autoscaling (HPA + GPU Metrics) | PASS | [pod-autoscaling.md](evidence/pod-autoscaling.md) |
+| 9 | `cluster_autoscaling` | Cluster Autoscaling | PASS | [cluster-autoscaling.md](evidence/cluster-autoscaling.md) |
+
+All 9 MUST conformance requirement IDs across 9 evidence files are **Implemented**. 3 SHOULD requirements (`driver_runtime_management`, `gpu_sharing`, `virtualized_accelerator`) are also Implemented.
diff --git a/docs/conformance/cncf/evidence/accelerator-metrics.md b/docs/conformance/cncf/v1.35/nim-eks/evidence/accelerator-metrics.md
similarity index 59%
rename from docs/conformance/cncf/evidence/accelerator-metrics.md
rename to docs/conformance/cncf/v1.35/nim-eks/evidence/accelerator-metrics.md
index 278ad1329..b98f8844d 100644
--- a/docs/conformance/cncf/evidence/accelerator-metrics.md
+++ b/docs/conformance/cncf/v1.35/nim-eks/evidence/accelerator-metrics.md
@@ -1,18 +1,14 @@
-# Accelerator & AI Service Metrics
+# Accelerator Metrics (DCGM Exporter)
 
+**Cluster:** `EKS / p5.48xlarge / NVIDIA-H100-80GB-HBM3`
+**Generated:** 2026-04-01 23:15:23 UTC
 **Kubernetes Version:** v1.35
 **Platform:** linux/amd64
-**Validated on:** Kubernetes v1.35 clusters with NVIDIA H100 80GB HBM3
-**Generated:** 2026-03-10 03:41:11 UTC
 
 ---
 
-Demonstrates two CNCF AI Conformance observability requirements:
-
-1. **accelerator_metrics** — Fine-grained GPU performance metrics (utilization, memory,
-   temperature, power) exposed via standardized Prometheus endpoint
-2. **ai_service_metrics** — Monitoring system that discovers and collects metrics from
-   workloads exposing Prometheus exposition format
+Demonstrates that the DCGM exporter exposes per-GPU metrics (utilization, memory,
+temperature, power) in Prometheus format via a standardized metrics endpoint.
 
 ## Monitoring Stack Health
 
@@ -22,14 +18,14 @@ Demonstrates two CNCF AI Conformance observability requirements:
 ```
 $ kubectl get pods -n monitoring -l app.kubernetes.io/name=prometheus
 NAME                                      READY   STATUS    RESTARTS   AGE
-prometheus-kube-prometheus-prometheus-0   2/2     Running   0          18m
+prometheus-kube-prometheus-prometheus-0   2/2     Running   0          64m
 ```
 
 **Prometheus service**
 ```
 $ kubectl get svc kube-prometheus-prometheus -n monitoring
-NAME                         TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)             AGE
-kube-prometheus-prometheus   ClusterIP   172.20.135.224   <none>        9090/TCP,8080/TCP   18m
+NAME                         TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)             AGE
+kube-prometheus-prometheus   ClusterIP   172.20.72.172   <none>        9090/TCP,8080/TCP   64m
 ```
 
 ### Prometheus Adapter (Custom Metrics API)
@@ -38,14 +34,14 @@ kube-prometheus-prometheus   ClusterIP   172.20.135.224   <none>        9090/TCP
 ```
 $ kubectl get pods -n monitoring -l app.kubernetes.io/name=prometheus-adapter
 NAME                                  READY   STATUS    RESTARTS   AGE
-prometheus-adapter-78b8b8d75c-fh4cf   1/1     Running   0          17m
+prometheus-adapter-78b8b8d75c-wv9h2   1/1     Running   0          64m
 ```
 
 **Prometheus adapter service**
 ```
 $ kubectl get svc prometheus-adapter -n monitoring
-NAME                 TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)   AGE
-prometheus-adapter   ClusterIP   172.20.178.141   <none>        443/TCP   17m
+NAME                 TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)   AGE
+prometheus-adapter   ClusterIP   172.20.38.130   <none>        443/TCP   64m
 ```
 
 ### Grafana
@@ -54,7 +50,7 @@ prometheus-adapter   ClusterIP   172.20.178.141   <none>        443/TCP   17m
 ```
 $ kubectl get pods -n monitoring -l app.kubernetes.io/name=grafana
 NAME                       READY   STATUS    RESTARTS   AGE
-grafana-56fbffd7d7-r2htr   3/3     Running   0          18m
+grafana-56fbffd7d7-8rnr6   3/3     Running   0          64m
 ```
 
 ## Accelerator Metrics (DCGM Exporter)
@@ -68,15 +64,15 @@ temperature, power draw, and more in Prometheus exposition format.
 ```
 $ kubectl get pods -n gpu-operator -l app=nvidia-dcgm-exporter -o wide
 NAME                         READY   STATUS    RESTARTS   AGE   IP             NODE                           NOMINATED NODE   READINESS GATES
-nvidia-dcgm-exporter-g2fjs   1/1     Running   0          15m   10.0.247.52    gpu-node-2     <none>           <none>
-nvidia-dcgm-exporter-wqqqn   1/1     Running   0          15m   10.0.172.246   gpu-node-1   <none>           <none>
+nvidia-dcgm-exporter-2xrln   1/1     Running   0          62m   10.0.187.45    ip-10-0-180-136.ec2.internal   <none>           <none>
+nvidia-dcgm-exporter-sscnw   1/1     Running   0          62m   10.0.147.205   ip-10-0-251-220.ec2.internal   <none>           <none>
 ```
 
 **DCGM exporter service**
 ```
 $ kubectl get svc -n gpu-operator -l app=nvidia-dcgm-exporter
 NAME                   TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)    AGE
-nvidia-dcgm-exporter   ClusterIP   172.20.181.11   <none>        9400/TCP   15m
+nvidia-dcgm-exporter   ClusterIP   172.20.93.244   <none>        9400/TCP   62m
 ```
 
 ### DCGM Metrics Endpoint
@@ -85,36 +81,36 @@ Query DCGM exporter directly to show raw GPU metrics in Prometheus format.
 
 **Key GPU metrics from DCGM exporter (sampled)**
 ```
-DCGM_FI_DEV_GPU_TEMP{gpu="0",UUID="GPU-c4529c8d-69c4-b61d-e0bc-7b2460096005",pci_bus_id="00000000:53:00.0",device="nvidia0",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08",container="main",namespace="dynamo-workload",pod="vllm-agg-0-vllmdecodeworker-s65j5",pod_uid=""} 30
-DCGM_FI_DEV_GPU_TEMP{gpu="1",UUID="GPU-bc5610b9-79c8-fedd-8899-07539c7f868a",pci_bus_id="00000000:64:00.0",device="nvidia1",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 29
-DCGM_FI_DEV_GPU_TEMP{gpu="2",UUID="GPU-fbc2c554-4d37-8938-0032-f923bad0f716",pci_bus_id="00000000:75:00.0",device="nvidia2",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 26
-DCGM_FI_DEV_GPU_TEMP{gpu="3",UUID="GPU-a65a773d-52bb-bcc1-a8ee-f78c3faa2e2d",pci_bus_id="00000000:86:00.0",device="nvidia3",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 29
-DCGM_FI_DEV_GPU_TEMP{gpu="4",UUID="GPU-82e45d1b-1618-559f-144c-eab51545030b",pci_bus_id="00000000:97:00.0",device="nvidia4",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 28
-DCGM_FI_DEV_GPU_TEMP{gpu="5",UUID="GPU-39e28159-8c62-ee71-64db-b748edd61e15",pci_bus_id="00000000:A8:00.0",device="nvidia5",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 26
-DCGM_FI_DEV_GPU_TEMP{gpu="6",UUID="GPU-e64d69ca-b4b3-59b2-e78c-94f26c4db365",pci_bus_id="00000000:B9:00.0",device="nvidia6",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 28
-DCGM_FI_DEV_GPU_TEMP{gpu="7",UUID="GPU-04d228d3-3b5a-3534-f5cf-969706647d56",pci_bus_id="00000000:CA:00.0",device="nvidia7",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 26
-DCGM_FI_DEV_POWER_USAGE{gpu="0",UUID="GPU-c4529c8d-69c4-b61d-e0bc-7b2460096005",pci_bus_id="00000000:53:00.0",device="nvidia0",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08",container="main",namespace="dynamo-workload",pod="vllm-agg-0-vllmdecodeworker-s65j5",pod_uid=""} 113.611000
-DCGM_FI_DEV_POWER_USAGE{gpu="1",UUID="GPU-bc5610b9-79c8-fedd-8899-07539c7f868a",pci_bus_id="00000000:64:00.0",device="nvidia1",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 68.347000
-DCGM_FI_DEV_POWER_USAGE{gpu="2",UUID="GPU-fbc2c554-4d37-8938-0032-f923bad0f716",pci_bus_id="00000000:75:00.0",device="nvidia2",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 65.709000
-DCGM_FI_DEV_POWER_USAGE{gpu="3",UUID="GPU-a65a773d-52bb-bcc1-a8ee-f78c3faa2e2d",pci_bus_id="00000000:86:00.0",device="nvidia3",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 67.316000
-DCGM_FI_DEV_POWER_USAGE{gpu="4",UUID="GPU-82e45d1b-1618-559f-144c-eab51545030b",pci_bus_id="00000000:97:00.0",device="nvidia4",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 68.717000
-DCGM_FI_DEV_POWER_USAGE{gpu="5",UUID="GPU-39e28159-8c62-ee71-64db-b748edd61e15",pci_bus_id="00000000:A8:00.0",device="nvidia5",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 65.742000
-DCGM_FI_DEV_POWER_USAGE{gpu="6",UUID="GPU-e64d69ca-b4b3-59b2-e78c-94f26c4db365",pci_bus_id="00000000:B9:00.0",device="nvidia6",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 67.328000
-DCGM_FI_DEV_POWER_USAGE{gpu="7",UUID="GPU-04d228d3-3b5a-3534-f5cf-969706647d56",pci_bus_id="00000000:CA:00.0",device="nvidia7",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 66.997000
-DCGM_FI_DEV_GPU_UTIL{gpu="0",UUID="GPU-c4529c8d-69c4-b61d-e0bc-7b2460096005",pci_bus_id="00000000:53:00.0",device="nvidia0",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08",container="main",namespace="dynamo-workload",pod="vllm-agg-0-vllmdecodeworker-s65j5",pod_uid=""} 0
-DCGM_FI_DEV_GPU_UTIL{gpu="1",UUID="GPU-bc5610b9-79c8-fedd-8899-07539c7f868a",pci_bus_id="00000000:64:00.0",device="nvidia1",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
-DCGM_FI_DEV_GPU_UTIL{gpu="2",UUID="GPU-fbc2c554-4d37-8938-0032-f923bad0f716",pci_bus_id="00000000:75:00.0",device="nvidia2",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
-DCGM_FI_DEV_GPU_UTIL{gpu="3",UUID="GPU-a65a773d-52bb-bcc1-a8ee-f78c3faa2e2d",pci_bus_id="00000000:86:00.0",device="nvidia3",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
-DCGM_FI_DEV_GPU_UTIL{gpu="4",UUID="GPU-82e45d1b-1618-559f-144c-eab51545030b",pci_bus_id="00000000:97:00.0",device="nvidia4",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
-DCGM_FI_DEV_GPU_UTIL{gpu="5",UUID="GPU-39e28159-8c62-ee71-64db-b748edd61e15",pci_bus_id="00000000:A8:00.0",device="nvidia5",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
-DCGM_FI_DEV_GPU_UTIL{gpu="6",UUID="GPU-e64d69ca-b4b3-59b2-e78c-94f26c4db365",pci_bus_id="00000000:B9:00.0",device="nvidia6",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
-DCGM_FI_DEV_GPU_UTIL{gpu="7",UUID="GPU-04d228d3-3b5a-3534-f5cf-969706647d56",pci_bus_id="00000000:CA:00.0",device="nvidia7",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
-DCGM_FI_DEV_MEM_COPY_UTIL{gpu="0",UUID="GPU-c4529c8d-69c4-b61d-e0bc-7b2460096005",pci_bus_id="00000000:53:00.0",device="nvidia0",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08",container="main",namespace="dynamo-workload",pod="vllm-agg-0-vllmdecodeworker-s65j5",pod_uid=""} 0
-DCGM_FI_DEV_MEM_COPY_UTIL{gpu="1",UUID="GPU-bc5610b9-79c8-fedd-8899-07539c7f868a",pci_bus_id="00000000:64:00.0",device="nvidia1",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
-DCGM_FI_DEV_MEM_COPY_UTIL{gpu="2",UUID="GPU-fbc2c554-4d37-8938-0032-f923bad0f716",pci_bus_id="00000000:75:00.0",device="nvidia2",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
-DCGM_FI_DEV_MEM_COPY_UTIL{gpu="3",UUID="GPU-a65a773d-52bb-bcc1-a8ee-f78c3faa2e2d",pci_bus_id="00000000:86:00.0",device="nvidia3",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
-DCGM_FI_DEV_MEM_COPY_UTIL{gpu="4",UUID="GPU-82e45d1b-1618-559f-144c-eab51545030b",pci_bus_id="00000000:97:00.0",device="nvidia4",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
-DCGM_FI_DEV_MEM_COPY_UTIL{gpu="5",UUID="GPU-39e28159-8c62-ee71-64db-b748edd61e15",pci_bus_id="00000000:A8:00.0",device="nvidia5",modelName="NVIDIA H100 80GB HBM3",Hostname="gpu-node-1",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
+DCGM_FI_DEV_GPU_TEMP{gpu="0",UUID="GPU-15704b32-f531-14ce-0530-1ac21e4b68e6",pci_bus_id="00000000:53:00.0",device="nvidia0",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 31
+DCGM_FI_DEV_GPU_TEMP{gpu="1",UUID="GPU-edc718f8-e593-6468-b9f9-563d508366ed",pci_bus_id="00000000:64:00.0",device="nvidia1",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 33
+DCGM_FI_DEV_GPU_TEMP{gpu="2",UUID="GPU-e2d9b65e-98cb-5b7a-90f0-e0336573f9e2",pci_bus_id="00000000:75:00.0",device="nvidia2",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 31
+DCGM_FI_DEV_GPU_TEMP{gpu="3",UUID="GPU-3a325419-de5f-778f-cf4e-fe7290362ac5",pci_bus_id="00000000:86:00.0",device="nvidia3",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 34
+DCGM_FI_DEV_GPU_TEMP{gpu="4",UUID="GPU-275ad37d-ebd6-4cf6-3867-0499ba033a12",pci_bus_id="00000000:97:00.0",device="nvidia4",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 34
+DCGM_FI_DEV_GPU_TEMP{gpu="5",UUID="GPU-3cab564d-1f63-674b-a831-024600bf985c",pci_bus_id="00000000:A8:00.0",device="nvidia5",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 32
+DCGM_FI_DEV_GPU_TEMP{gpu="6",UUID="GPU-d0f25a6f-9a3f-61b9-c128-3d14759651d7",pci_bus_id="00000000:B9:00.0",device="nvidia6",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08",container="llama-3-2-1b-ctr",namespace="nim-workload",pod="llama-3-2-1b-7577f87fc7-dhb97",pod_uid=""} 37
+DCGM_FI_DEV_GPU_TEMP{gpu="7",UUID="GPU-9bc10e9a-e27e-652b-9a1e-e84f7e446206",pci_bus_id="00000000:CA:00.0",device="nvidia7",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 31
+DCGM_FI_DEV_POWER_USAGE{gpu="0",UUID="GPU-15704b32-f531-14ce-0530-1ac21e4b68e6",pci_bus_id="00000000:53:00.0",device="nvidia0",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 67.692000
+DCGM_FI_DEV_POWER_USAGE{gpu="1",UUID="GPU-edc718f8-e593-6468-b9f9-563d508366ed",pci_bus_id="00000000:64:00.0",device="nvidia1",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 67.219000
+DCGM_FI_DEV_POWER_USAGE{gpu="2",UUID="GPU-e2d9b65e-98cb-5b7a-90f0-e0336573f9e2",pci_bus_id="00000000:75:00.0",device="nvidia2",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 67.899000
+DCGM_FI_DEV_POWER_USAGE{gpu="3",UUID="GPU-3a325419-de5f-778f-cf4e-fe7290362ac5",pci_bus_id="00000000:86:00.0",device="nvidia3",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 66.711000
+DCGM_FI_DEV_POWER_USAGE{gpu="4",UUID="GPU-275ad37d-ebd6-4cf6-3867-0499ba033a12",pci_bus_id="00000000:97:00.0",device="nvidia4",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 67.875000
+DCGM_FI_DEV_POWER_USAGE{gpu="5",UUID="GPU-3cab564d-1f63-674b-a831-024600bf985c",pci_bus_id="00000000:A8:00.0",device="nvidia5",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 67.664000
+DCGM_FI_DEV_POWER_USAGE{gpu="6",UUID="GPU-d0f25a6f-9a3f-61b9-c128-3d14759651d7",pci_bus_id="00000000:B9:00.0",device="nvidia6",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08",container="llama-3-2-1b-ctr",namespace="nim-workload",pod="llama-3-2-1b-7577f87fc7-dhb97",pod_uid=""} 112.670000
+DCGM_FI_DEV_POWER_USAGE{gpu="7",UUID="GPU-9bc10e9a-e27e-652b-9a1e-e84f7e446206",pci_bus_id="00000000:CA:00.0",device="nvidia7",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 65.061000
+DCGM_FI_DEV_GPU_UTIL{gpu="0",UUID="GPU-15704b32-f531-14ce-0530-1ac21e4b68e6",pci_bus_id="00000000:53:00.0",device="nvidia0",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
+DCGM_FI_DEV_GPU_UTIL{gpu="1",UUID="GPU-edc718f8-e593-6468-b9f9-563d508366ed",pci_bus_id="00000000:64:00.0",device="nvidia1",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
+DCGM_FI_DEV_GPU_UTIL{gpu="2",UUID="GPU-e2d9b65e-98cb-5b7a-90f0-e0336573f9e2",pci_bus_id="00000000:75:00.0",device="nvidia2",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
+DCGM_FI_DEV_GPU_UTIL{gpu="3",UUID="GPU-3a325419-de5f-778f-cf4e-fe7290362ac5",pci_bus_id="00000000:86:00.0",device="nvidia3",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
+DCGM_FI_DEV_GPU_UTIL{gpu="4",UUID="GPU-275ad37d-ebd6-4cf6-3867-0499ba033a12",pci_bus_id="00000000:97:00.0",device="nvidia4",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
+DCGM_FI_DEV_GPU_UTIL{gpu="5",UUID="GPU-3cab564d-1f63-674b-a831-024600bf985c",pci_bus_id="00000000:A8:00.0",device="nvidia5",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
+DCGM_FI_DEV_GPU_UTIL{gpu="6",UUID="GPU-d0f25a6f-9a3f-61b9-c128-3d14759651d7",pci_bus_id="00000000:B9:00.0",device="nvidia6",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08",container="llama-3-2-1b-ctr",namespace="nim-workload",pod="llama-3-2-1b-7577f87fc7-dhb97",pod_uid=""} 0
+DCGM_FI_DEV_GPU_UTIL{gpu="7",UUID="GPU-9bc10e9a-e27e-652b-9a1e-e84f7e446206",pci_bus_id="00000000:CA:00.0",device="nvidia7",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
+DCGM_FI_DEV_MEM_COPY_UTIL{gpu="0",UUID="GPU-15704b32-f531-14ce-0530-1ac21e4b68e6",pci_bus_id="00000000:53:00.0",device="nvidia0",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
+DCGM_FI_DEV_MEM_COPY_UTIL{gpu="1",UUID="GPU-edc718f8-e593-6468-b9f9-563d508366ed",pci_bus_id="00000000:64:00.0",device="nvidia1",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
+DCGM_FI_DEV_MEM_COPY_UTIL{gpu="2",UUID="GPU-e2d9b65e-98cb-5b7a-90f0-e0336573f9e2",pci_bus_id="00000000:75:00.0",device="nvidia2",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
+DCGM_FI_DEV_MEM_COPY_UTIL{gpu="3",UUID="GPU-3a325419-de5f-778f-cf4e-fe7290362ac5",pci_bus_id="00000000:86:00.0",device="nvidia3",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
+DCGM_FI_DEV_MEM_COPY_UTIL{gpu="4",UUID="GPU-275ad37d-ebd6-4cf6-3867-0499ba033a12",pci_bus_id="00000000:97:00.0",device="nvidia4",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
+DCGM_FI_DEV_MEM_COPY_UTIL{gpu="5",UUID="GPU-3cab564d-1f63-674b-a831-024600bf985c",pci_bus_id="00000000:A8:00.0",device="nvidia5",modelName="NVIDIA H100 80GB HBM3",Hostname="ip-10-0-180-136.ec2.internal",DCGM_FI_DRIVER_VERSION="580.105.08"} 0
 ```
 
 ### Prometheus Querying GPU Metrics
@@ -131,368 +127,368 @@ Query Prometheus to verify it is actively scraping and storing DCGM metrics.
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-bc5610b9-79c8-fedd-8899-07539c7f868a",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-15704b32-f531-14ce-0530-1ac21e4b68e6",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
-          "device": "nvidia1",
+          "device": "nvidia0",
           "endpoint": "gpu-metrics",
-          "gpu": "1",
-          "instance": "10.0.172.246:9400",
+          "gpu": "0",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
-          "pci_bus_id": "00000000:64:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pci_bus_id": "00000000:53:00.0",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-fbc2c554-4d37-8938-0032-f923bad0f716",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-edc718f8-e593-6468-b9f9-563d508366ed",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
-          "device": "nvidia2",
+          "device": "nvidia1",
           "endpoint": "gpu-metrics",
-          "gpu": "2",
-          "instance": "10.0.172.246:9400",
+          "gpu": "1",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
-          "pci_bus_id": "00000000:75:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pci_bus_id": "00000000:64:00.0",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-a65a773d-52bb-bcc1-a8ee-f78c3faa2e2d",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-e2d9b65e-98cb-5b7a-90f0-e0336573f9e2",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
-          "device": "nvidia3",
+          "device": "nvidia2",
           "endpoint": "gpu-metrics",
-          "gpu": "3",
-          "instance": "10.0.172.246:9400",
+          "gpu": "2",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
-          "pci_bus_id": "00000000:86:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pci_bus_id": "00000000:75:00.0",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-82e45d1b-1618-559f-144c-eab51545030b",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-3a325419-de5f-778f-cf4e-fe7290362ac5",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
-          "device": "nvidia4",
+          "device": "nvidia3",
           "endpoint": "gpu-metrics",
-          "gpu": "4",
-          "instance": "10.0.172.246:9400",
+          "gpu": "3",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
-          "pci_bus_id": "00000000:97:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pci_bus_id": "00000000:86:00.0",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-39e28159-8c62-ee71-64db-b748edd61e15",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-275ad37d-ebd6-4cf6-3867-0499ba033a12",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
-          "device": "nvidia5",
+          "device": "nvidia4",
           "endpoint": "gpu-metrics",
-          "gpu": "5",
-          "instance": "10.0.172.246:9400",
+          "gpu": "4",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
-          "pci_bus_id": "00000000:A8:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pci_bus_id": "00000000:97:00.0",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-e64d69ca-b4b3-59b2-e78c-94f26c4db365",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-3cab564d-1f63-674b-a831-024600bf985c",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
-          "device": "nvidia6",
+          "device": "nvidia5",
           "endpoint": "gpu-metrics",
-          "gpu": "6",
-          "instance": "10.0.172.246:9400",
+          "gpu": "5",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
-          "pci_bus_id": "00000000:B9:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pci_bus_id": "00000000:A8:00.0",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-04d228d3-3b5a-3534-f5cf-969706647d56",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-9bc10e9a-e27e-652b-9a1e-e84f7e446206",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia7",
           "endpoint": "gpu-metrics",
           "gpu": "7",
-          "instance": "10.0.172.246:9400",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:CA:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-92da0328-2f33-b563-d577-9d2b9f21f280",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-3f048793-8751-030e-5870-ebbd2b10cef2",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia0",
           "endpoint": "gpu-metrics",
           "gpu": "0",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:53:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-184dab49-47ce-eeec-2239-3e03fbd4c002",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-cc644abe-17e4-7cb7-500d-ed8c09aea2fb",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia1",
           "endpoint": "gpu-metrics",
           "gpu": "1",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:64:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-dbabb552-a092-0ca9-0580-8d4fe378eb02",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-8d0b1081-9549-2b14-7e01-b4a725873c21",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia2",
           "endpoint": "gpu-metrics",
           "gpu": "2",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:75:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-5342927e-e180-84f1-55ba-257f1cbd3ba4",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-38bbfee9-dc95-ffb5-4034-f9a6c82a45bb",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia3",
           "endpoint": "gpu-metrics",
           "gpu": "3",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:86:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-95085215-739e-e7c6-4011-8dbe004af8c3",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-24087b69-8889-6b23-feeb-2905664fbcbf",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia4",
           "endpoint": "gpu-metrics",
           "gpu": "4",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:97:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-a7b658ad-f23e-cea9-2523-569d521700bf",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-d2f75162-e86d-0da0-0af4-3fa0b80038cd",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia5",
           "endpoint": "gpu-metrics",
           "gpu": "5",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:A8:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-1e9a0e94-769a-b1e6-36f7-9296e286ef90",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-b00fe5f9-5832-19d6-0276-28d8630f0f4b",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia6",
           "endpoint": "gpu-metrics",
           "gpu": "6",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:B9:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-16b2cd36-9dbe-3ee7-0810-07b330e36e04",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-530bd4b0-238b-f0c2-b496-63595812bca8",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia7",
           "endpoint": "gpu-metrics",
           "gpu": "7",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:CA:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-c4529c8d-69c4-b61d-e0bc-7b2460096005",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-d0f25a6f-9a3f-61b9-c128-3d14759651d7",
           "__name__": "DCGM_FI_DEV_GPU_UTIL",
-          "container": "main",
-          "device": "nvidia0",
+          "container": "llama-3-2-1b-ctr",
+          "device": "nvidia6",
           "endpoint": "gpu-metrics",
-          "gpu": "0",
-          "instance": "10.0.172.246:9400",
+          "gpu": "6",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
-          "namespace": "dynamo-workload",
-          "pci_bus_id": "00000000:53:00.0",
-          "pod": "vllm-agg-0-vllmdecodeworker-s65j5",
+          "namespace": "nim-workload",
+          "pci_bus_id": "00000000:B9:00.0",
+          "pod": "llama-3-2-1b-7577f87fc7-dhb97",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.184,
+          1775085339.885,
           "0"
         ]
       }
@@ -511,369 +507,369 @@ Query Prometheus to verify it is actively scraping and storing DCGM metrics.
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-bc5610b9-79c8-fedd-8899-07539c7f868a",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-15704b32-f531-14ce-0530-1ac21e4b68e6",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
-          "device": "nvidia1",
+          "device": "nvidia0",
           "endpoint": "gpu-metrics",
-          "gpu": "1",
-          "instance": "10.0.172.246:9400",
+          "gpu": "0",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
-          "pci_bus_id": "00000000:64:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pci_bus_id": "00000000:53:00.0",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-fbc2c554-4d37-8938-0032-f923bad0f716",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-edc718f8-e593-6468-b9f9-563d508366ed",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
-          "device": "nvidia2",
+          "device": "nvidia1",
           "endpoint": "gpu-metrics",
-          "gpu": "2",
-          "instance": "10.0.172.246:9400",
+          "gpu": "1",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
-          "pci_bus_id": "00000000:75:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pci_bus_id": "00000000:64:00.0",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-a65a773d-52bb-bcc1-a8ee-f78c3faa2e2d",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-e2d9b65e-98cb-5b7a-90f0-e0336573f9e2",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
-          "device": "nvidia3",
+          "device": "nvidia2",
           "endpoint": "gpu-metrics",
-          "gpu": "3",
-          "instance": "10.0.172.246:9400",
+          "gpu": "2",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
-          "pci_bus_id": "00000000:86:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pci_bus_id": "00000000:75:00.0",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-82e45d1b-1618-559f-144c-eab51545030b",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-3a325419-de5f-778f-cf4e-fe7290362ac5",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
-          "device": "nvidia4",
+          "device": "nvidia3",
           "endpoint": "gpu-metrics",
-          "gpu": "4",
-          "instance": "10.0.172.246:9400",
+          "gpu": "3",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
-          "pci_bus_id": "00000000:97:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pci_bus_id": "00000000:86:00.0",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-39e28159-8c62-ee71-64db-b748edd61e15",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-275ad37d-ebd6-4cf6-3867-0499ba033a12",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
-          "device": "nvidia5",
+          "device": "nvidia4",
           "endpoint": "gpu-metrics",
-          "gpu": "5",
-          "instance": "10.0.172.246:9400",
+          "gpu": "4",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
-          "pci_bus_id": "00000000:A8:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pci_bus_id": "00000000:97:00.0",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-e64d69ca-b4b3-59b2-e78c-94f26c4db365",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-3cab564d-1f63-674b-a831-024600bf985c",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
-          "device": "nvidia6",
+          "device": "nvidia5",
           "endpoint": "gpu-metrics",
-          "gpu": "6",
-          "instance": "10.0.172.246:9400",
+          "gpu": "5",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
-          "pci_bus_id": "00000000:B9:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pci_bus_id": "00000000:A8:00.0",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-04d228d3-3b5a-3534-f5cf-969706647d56",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-9bc10e9a-e27e-652b-9a1e-e84f7e446206",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia7",
           "endpoint": "gpu-metrics",
           "gpu": "7",
-          "instance": "10.0.172.246:9400",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:CA:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-92da0328-2f33-b563-d577-9d2b9f21f280",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-3f048793-8751-030e-5870-ebbd2b10cef2",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia0",
           "endpoint": "gpu-metrics",
           "gpu": "0",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:53:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-184dab49-47ce-eeec-2239-3e03fbd4c002",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-cc644abe-17e4-7cb7-500d-ed8c09aea2fb",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia1",
           "endpoint": "gpu-metrics",
           "gpu": "1",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:64:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-dbabb552-a092-0ca9-0580-8d4fe378eb02",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-8d0b1081-9549-2b14-7e01-b4a725873c21",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia2",
           "endpoint": "gpu-metrics",
           "gpu": "2",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:75:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-5342927e-e180-84f1-55ba-257f1cbd3ba4",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-38bbfee9-dc95-ffb5-4034-f9a6c82a45bb",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia3",
           "endpoint": "gpu-metrics",
           "gpu": "3",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:86:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-95085215-739e-e7c6-4011-8dbe004af8c3",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-24087b69-8889-6b23-feeb-2905664fbcbf",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia4",
           "endpoint": "gpu-metrics",
           "gpu": "4",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:97:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-a7b658ad-f23e-cea9-2523-569d521700bf",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-d2f75162-e86d-0da0-0af4-3fa0b80038cd",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia5",
           "endpoint": "gpu-metrics",
           "gpu": "5",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:A8:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-1e9a0e94-769a-b1e6-36f7-9296e286ef90",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-b00fe5f9-5832-19d6-0276-28d8630f0f4b",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia6",
           "endpoint": "gpu-metrics",
           "gpu": "6",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:B9:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-16b2cd36-9dbe-3ee7-0810-07b330e36e04",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-530bd4b0-238b-f0c2-b496-63595812bca8",
           "__name__": "DCGM_FI_DEV_FB_USED",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia7",
           "endpoint": "gpu-metrics",
           "gpu": "7",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:CA:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
+          1775085340.205,
           "0"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-c4529c8d-69c4-b61d-e0bc-7b2460096005",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-d0f25a6f-9a3f-61b9-c128-3d14759651d7",
           "__name__": "DCGM_FI_DEV_FB_USED",
-          "container": "main",
-          "device": "nvidia0",
+          "container": "llama-3-2-1b-ctr",
+          "device": "nvidia6",
           "endpoint": "gpu-metrics",
-          "gpu": "0",
-          "instance": "10.0.172.246:9400",
+          "gpu": "6",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
-          "namespace": "dynamo-workload",
-          "pci_bus_id": "00000000:53:00.0",
-          "pod": "vllm-agg-0-vllmdecodeworker-s65j5",
+          "namespace": "nim-workload",
+          "pci_bus_id": "00000000:B9:00.0",
+          "pod": "llama-3-2-1b-7577f87fc7-dhb97",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.444,
-          "74166"
+          1775085340.205,
+          "75050"
         ]
       }
     ]
@@ -891,369 +887,369 @@ Query Prometheus to verify it is actively scraping and storing DCGM metrics.
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-bc5610b9-79c8-fedd-8899-07539c7f868a",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-15704b32-f531-14ce-0530-1ac21e4b68e6",
+          "__name__": "DCGM_FI_DEV_GPU_TEMP",
+          "container": "nvidia-dcgm-exporter",
+          "device": "nvidia0",
+          "endpoint": "gpu-metrics",
+          "gpu": "0",
+          "instance": "10.0.187.45:9400",
+          "job": "nvidia-dcgm-exporter",
+          "modelName": "NVIDIA H100 80GB HBM3",
+          "namespace": "gpu-operator",
+          "pci_bus_id": "00000000:53:00.0",
+          "pod": "nvidia-dcgm-exporter-2xrln",
+          "service": "nvidia-dcgm-exporter"
+        },
+        "value": [
+          1775085340.554,
+          "31"
+        ]
+      },
+      {
+        "metric": {
+          "DCGM_FI_DRIVER_VERSION": "580.105.08",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-edc718f8-e593-6468-b9f9-563d508366ed",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia1",
           "endpoint": "gpu-metrics",
           "gpu": "1",
-          "instance": "10.0.172.246:9400",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:64:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "29"
+          1775085340.554,
+          "33"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-fbc2c554-4d37-8938-0032-f923bad0f716",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-e2d9b65e-98cb-5b7a-90f0-e0336573f9e2",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia2",
           "endpoint": "gpu-metrics",
           "gpu": "2",
-          "instance": "10.0.172.246:9400",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:75:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "26"
+          1775085340.554,
+          "31"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-a65a773d-52bb-bcc1-a8ee-f78c3faa2e2d",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-3a325419-de5f-778f-cf4e-fe7290362ac5",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia3",
           "endpoint": "gpu-metrics",
           "gpu": "3",
-          "instance": "10.0.172.246:9400",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:86:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "29"
+          1775085340.554,
+          "34"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-82e45d1b-1618-559f-144c-eab51545030b",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-275ad37d-ebd6-4cf6-3867-0499ba033a12",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia4",
           "endpoint": "gpu-metrics",
           "gpu": "4",
-          "instance": "10.0.172.246:9400",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:97:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "28"
+          1775085340.554,
+          "34"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-39e28159-8c62-ee71-64db-b748edd61e15",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-3cab564d-1f63-674b-a831-024600bf985c",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia5",
           "endpoint": "gpu-metrics",
           "gpu": "5",
-          "instance": "10.0.172.246:9400",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:A8:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "26"
+          1775085340.554,
+          "32"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-e64d69ca-b4b3-59b2-e78c-94f26c4db365",
-          "__name__": "DCGM_FI_DEV_GPU_TEMP",
-          "container": "nvidia-dcgm-exporter",
-          "device": "nvidia6",
-          "endpoint": "gpu-metrics",
-          "gpu": "6",
-          "instance": "10.0.172.246:9400",
-          "job": "nvidia-dcgm-exporter",
-          "modelName": "NVIDIA H100 80GB HBM3",
-          "namespace": "gpu-operator",
-          "pci_bus_id": "00000000:B9:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
-          "service": "nvidia-dcgm-exporter"
-        },
-        "value": [
-          1773114089.702,
-          "28"
-        ]
-      },
-      {
-        "metric": {
-          "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-04d228d3-3b5a-3534-f5cf-969706647d56",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-9bc10e9a-e27e-652b-9a1e-e84f7e446206",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia7",
           "endpoint": "gpu-metrics",
           "gpu": "7",
-          "instance": "10.0.172.246:9400",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:CA:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "26"
+          1775085340.554,
+          "31"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-92da0328-2f33-b563-d577-9d2b9f21f280",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-3f048793-8751-030e-5870-ebbd2b10cef2",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia0",
           "endpoint": "gpu-metrics",
           "gpu": "0",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:53:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "27"
+          1775085340.554,
+          "31"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-184dab49-47ce-eeec-2239-3e03fbd4c002",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-cc644abe-17e4-7cb7-500d-ed8c09aea2fb",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia1",
           "endpoint": "gpu-metrics",
           "gpu": "1",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:64:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "29"
+          1775085340.554,
+          "33"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-dbabb552-a092-0ca9-0580-8d4fe378eb02",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-8d0b1081-9549-2b14-7e01-b4a725873c21",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia2",
           "endpoint": "gpu-metrics",
           "gpu": "2",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:75:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "28"
+          1775085340.554,
+          "31"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-5342927e-e180-84f1-55ba-257f1cbd3ba4",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-38bbfee9-dc95-ffb5-4034-f9a6c82a45bb",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia3",
           "endpoint": "gpu-metrics",
           "gpu": "3",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:86:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "29"
+          1775085340.554,
+          "32"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-95085215-739e-e7c6-4011-8dbe004af8c3",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-24087b69-8889-6b23-feeb-2905664fbcbf",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia4",
           "endpoint": "gpu-metrics",
           "gpu": "4",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:97:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "29"
+          1775085340.554,
+          "33"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-a7b658ad-f23e-cea9-2523-569d521700bf",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-d2f75162-e86d-0da0-0af4-3fa0b80038cd",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia5",
           "endpoint": "gpu-metrics",
           "gpu": "5",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:A8:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "27"
+          1775085340.554,
+          "31"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-1e9a0e94-769a-b1e6-36f7-9296e286ef90",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-b00fe5f9-5832-19d6-0276-28d8630f0f4b",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia6",
           "endpoint": "gpu-metrics",
           "gpu": "6",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:B9:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "30"
+          1775085340.554,
+          "32"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-16b2cd36-9dbe-3ee7-0810-07b330e36e04",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-530bd4b0-238b-f0c2-b496-63595812bca8",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia7",
           "endpoint": "gpu-metrics",
           "gpu": "7",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:CA:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "27"
+          1775085340.554,
+          "31"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-c4529c8d-69c4-b61d-e0bc-7b2460096005",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-d0f25a6f-9a3f-61b9-c128-3d14759651d7",
           "__name__": "DCGM_FI_DEV_GPU_TEMP",
-          "container": "main",
-          "device": "nvidia0",
+          "container": "llama-3-2-1b-ctr",
+          "device": "nvidia6",
           "endpoint": "gpu-metrics",
-          "gpu": "0",
-          "instance": "10.0.172.246:9400",
+          "gpu": "6",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
-          "namespace": "dynamo-workload",
-          "pci_bus_id": "00000000:53:00.0",
-          "pod": "vllm-agg-0-vllmdecodeworker-s65j5",
+          "namespace": "nim-workload",
+          "pci_bus_id": "00000000:B9:00.0",
+          "pod": "llama-3-2-1b-7577f87fc7-dhb97",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.702,
-          "30"
+          1775085340.554,
+          "37"
         ]
       }
     ]
@@ -1271,369 +1267,369 @@ Query Prometheus to verify it is actively scraping and storing DCGM metrics.
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-bc5610b9-79c8-fedd-8899-07539c7f868a",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-15704b32-f531-14ce-0530-1ac21e4b68e6",
+          "__name__": "DCGM_FI_DEV_POWER_USAGE",
+          "container": "nvidia-dcgm-exporter",
+          "device": "nvidia0",
+          "endpoint": "gpu-metrics",
+          "gpu": "0",
+          "instance": "10.0.187.45:9400",
+          "job": "nvidia-dcgm-exporter",
+          "modelName": "NVIDIA H100 80GB HBM3",
+          "namespace": "gpu-operator",
+          "pci_bus_id": "00000000:53:00.0",
+          "pod": "nvidia-dcgm-exporter-2xrln",
+          "service": "nvidia-dcgm-exporter"
+        },
+        "value": [
+          1775085340.891,
+          "67.692"
+        ]
+      },
+      {
+        "metric": {
+          "DCGM_FI_DRIVER_VERSION": "580.105.08",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-edc718f8-e593-6468-b9f9-563d508366ed",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia1",
           "endpoint": "gpu-metrics",
           "gpu": "1",
-          "instance": "10.0.172.246:9400",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:64:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "68.347"
+          1775085340.891,
+          "67.219"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-fbc2c554-4d37-8938-0032-f923bad0f716",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-e2d9b65e-98cb-5b7a-90f0-e0336573f9e2",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia2",
           "endpoint": "gpu-metrics",
           "gpu": "2",
-          "instance": "10.0.172.246:9400",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:75:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "65.709"
+          1775085340.891,
+          "67.899"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-a65a773d-52bb-bcc1-a8ee-f78c3faa2e2d",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-3a325419-de5f-778f-cf4e-fe7290362ac5",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia3",
           "endpoint": "gpu-metrics",
           "gpu": "3",
-          "instance": "10.0.172.246:9400",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:86:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "67.316"
+          1775085340.891,
+          "66.711"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-82e45d1b-1618-559f-144c-eab51545030b",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-275ad37d-ebd6-4cf6-3867-0499ba033a12",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia4",
           "endpoint": "gpu-metrics",
           "gpu": "4",
-          "instance": "10.0.172.246:9400",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:97:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "68.717"
+          1775085340.891,
+          "67.875"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-39e28159-8c62-ee71-64db-b748edd61e15",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-3cab564d-1f63-674b-a831-024600bf985c",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia5",
           "endpoint": "gpu-metrics",
           "gpu": "5",
-          "instance": "10.0.172.246:9400",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:A8:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
-          "service": "nvidia-dcgm-exporter"
-        },
-        "value": [
-          1773114089.943,
-          "65.742"
-        ]
-      },
-      {
-        "metric": {
-          "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-e64d69ca-b4b3-59b2-e78c-94f26c4db365",
-          "__name__": "DCGM_FI_DEV_POWER_USAGE",
-          "container": "nvidia-dcgm-exporter",
-          "device": "nvidia6",
-          "endpoint": "gpu-metrics",
-          "gpu": "6",
-          "instance": "10.0.172.246:9400",
-          "job": "nvidia-dcgm-exporter",
-          "modelName": "NVIDIA H100 80GB HBM3",
-          "namespace": "gpu-operator",
-          "pci_bus_id": "00000000:B9:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "67.328"
+          1775085340.891,
+          "67.664"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-04d228d3-3b5a-3534-f5cf-969706647d56",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-9bc10e9a-e27e-652b-9a1e-e84f7e446206",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia7",
           "endpoint": "gpu-metrics",
           "gpu": "7",
-          "instance": "10.0.172.246:9400",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:CA:00.0",
-          "pod": "nvidia-dcgm-exporter-wqqqn",
+          "pod": "nvidia-dcgm-exporter-2xrln",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "66.997"
+          1775085340.891,
+          "65.061"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-92da0328-2f33-b563-d577-9d2b9f21f280",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-3f048793-8751-030e-5870-ebbd2b10cef2",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia0",
           "endpoint": "gpu-metrics",
           "gpu": "0",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:53:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "69.339"
+          1775085340.891,
+          "68.284"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-184dab49-47ce-eeec-2239-3e03fbd4c002",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-cc644abe-17e4-7cb7-500d-ed8c09aea2fb",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia1",
           "endpoint": "gpu-metrics",
           "gpu": "1",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:64:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "68.754"
+          1775085340.891,
+          "70.963"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-dbabb552-a092-0ca9-0580-8d4fe378eb02",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-8d0b1081-9549-2b14-7e01-b4a725873c21",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia2",
           "endpoint": "gpu-metrics",
           "gpu": "2",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:75:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "68.61"
+          1775085340.891,
+          "67.535"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-5342927e-e180-84f1-55ba-257f1cbd3ba4",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-38bbfee9-dc95-ffb5-4034-f9a6c82a45bb",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia3",
           "endpoint": "gpu-metrics",
           "gpu": "3",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:86:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "66.499"
+          1775085340.891,
+          "68.419"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-95085215-739e-e7c6-4011-8dbe004af8c3",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-24087b69-8889-6b23-feeb-2905664fbcbf",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia4",
           "endpoint": "gpu-metrics",
           "gpu": "4",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:97:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "67.645"
+          1775085340.891,
+          "69.498"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-a7b658ad-f23e-cea9-2523-569d521700bf",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-d2f75162-e86d-0da0-0af4-3fa0b80038cd",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia5",
           "endpoint": "gpu-metrics",
           "gpu": "5",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:A8:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "66.68"
+          1775085340.891,
+          "69.66"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-1e9a0e94-769a-b1e6-36f7-9296e286ef90",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-b00fe5f9-5832-19d6-0276-28d8630f0f4b",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia6",
           "endpoint": "gpu-metrics",
           "gpu": "6",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:B9:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "68.395"
+          1775085340.891,
+          "66.98"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-2",
-          "UUID": "GPU-16b2cd36-9dbe-3ee7-0810-07b330e36e04",
+          "Hostname": "ip-10-0-251-220.ec2.internal",
+          "UUID": "GPU-530bd4b0-238b-f0c2-b496-63595812bca8",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
           "container": "nvidia-dcgm-exporter",
           "device": "nvidia7",
           "endpoint": "gpu-metrics",
           "gpu": "7",
-          "instance": "10.0.247.52:9400",
+          "instance": "10.0.147.205:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
           "namespace": "gpu-operator",
           "pci_bus_id": "00000000:CA:00.0",
-          "pod": "nvidia-dcgm-exporter-g2fjs",
+          "pod": "nvidia-dcgm-exporter-sscnw",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "69.523"
+          1775085340.891,
+          "68.367"
         ]
       },
       {
         "metric": {
           "DCGM_FI_DRIVER_VERSION": "580.105.08",
-          "Hostname": "gpu-node-1",
-          "UUID": "GPU-c4529c8d-69c4-b61d-e0bc-7b2460096005",
+          "Hostname": "ip-10-0-180-136.ec2.internal",
+          "UUID": "GPU-d0f25a6f-9a3f-61b9-c128-3d14759651d7",
           "__name__": "DCGM_FI_DEV_POWER_USAGE",
-          "container": "main",
-          "device": "nvidia0",
+          "container": "llama-3-2-1b-ctr",
+          "device": "nvidia6",
           "endpoint": "gpu-metrics",
-          "gpu": "0",
-          "instance": "10.0.172.246:9400",
+          "gpu": "6",
+          "instance": "10.0.187.45:9400",
           "job": "nvidia-dcgm-exporter",
           "modelName": "NVIDIA H100 80GB HBM3",
-          "namespace": "dynamo-workload",
-          "pci_bus_id": "00000000:53:00.0",
-          "pod": "vllm-agg-0-vllmdecodeworker-s65j5",
+          "namespace": "nim-workload",
+          "pci_bus_id": "00000000:B9:00.0",
+          "pod": "llama-3-2-1b-7577f87fc7-dhb97",
           "service": "nvidia-dcgm-exporter"
         },
         "value": [
-          1773114089.943,
-          "113.611"
+          1775085340.891,
+          "112.67"
         ]
       }
     ]
@@ -1641,20 +1637,4 @@ Query Prometheus to verify it is actively scraping and storing DCGM metrics.
 }
 ```
 
-## AI Service Metrics (Custom Metrics API)
-
-Prometheus adapter exposes custom metrics via the Kubernetes custom metrics API,
-enabling HPA and other consumers to act on workload-specific metrics.
-
-**Custom metrics API available resources**
-```
-$ kubectl get --raw /apis/custom.metrics.k8s.io/v1beta1 | python3 -c "..." # extract resource names
-namespaces/gpu_utilization
-pods/gpu_utilization
-namespaces/gpu_memory_used
-pods/gpu_memory_used
-namespaces/gpu_power_usage
-pods/gpu_power_usage
-```
-
-**Result: PASS** — DCGM exporter provides per-GPU metrics (utilization, memory, temperature, power). Prometheus actively scrapes and stores metrics. Custom metrics API available via prometheus-adapter.
+**Result: PASS** — DCGM exporter provides per-GPU metrics (utilization, memory, temperature, power). Prometheus actively scrapes and stores metrics.
diff --git a/docs/conformance/cncf/v1.35/nim-eks/evidence/ai-service-metrics.md b/docs/conformance/cncf/v1.35/nim-eks/evidence/ai-service-metrics.md
new file mode 100644
index 000000000..855926886
--- /dev/null
+++ b/docs/conformance/cncf/v1.35/nim-eks/evidence/ai-service-metrics.md
@@ -0,0 +1,114 @@
+# AI Service Metrics (NIM Inference)
+
+**Cluster:** `EKS / p5.48xlarge / NVIDIA-H100-80GB-HBM3`
+**Generated:** 2026-04-01 23:15:43 UTC
+**Kubernetes Version:** v1.35
+**Platform:** linux/amd64
+
+---
+
+Demonstrates that NVIDIA NIM inference microservices expose Prometheus-format
+metrics that can be discovered and collected by the monitoring stack.
+
+## NIM Inference Workload
+
+**NIMService**
+```
+$ kubectl get nimservice -n nim-workload
+NAME           STATUS   AGE
+llama-3-2-1b   Ready    58m
+```
+
+**NIM workload pods**
+```
+$ kubectl get pods -n nim-workload -o wide
+NAME                            READY   STATUS    RESTARTS   AGE   IP            NODE                           NOMINATED NODE   READINESS GATES
+llama-3-2-1b-7577f87fc7-dhb97   1/1     Running   0          58m   10.0.158.63   ip-10-0-180-136.ec2.internal   <none>           <none>
+```
+
+**NIM models endpoint**
+```
+Model: meta/llama-3.2-1b-instruct
+```
+
+**NIM inference metrics endpoint (sampled after generating inference traffic)**
+```
+num_requests_waiting{model_name="meta/llama-3.2-1b-instruct"} 1.0
+num_request_max{model_name="meta/llama-3.2-1b-instruct"} 2048.0
+prompt_tokens_total{model_name="meta/llama-3.2-1b-instruct"} 603.0
+generation_tokens_total{model_name="meta/llama-3.2-1b-instruct"} 997.0
+time_to_first_token_seconds_count{model_name="meta/llama-3.2-1b-instruct"} 34.0
+time_to_first_token_seconds_sum{model_name="meta/llama-3.2-1b-instruct"} 3.781902551651001
+time_per_output_token_seconds_count{model_name="meta/llama-3.2-1b-instruct"} 963.0
+time_per_output_token_seconds_sum{model_name="meta/llama-3.2-1b-instruct"} 1.705470085144043
+e2e_request_latency_seconds_count{model_name="meta/llama-3.2-1b-instruct"} 34.0
+e2e_request_latency_seconds_sum{model_name="meta/llama-3.2-1b-instruct"} 5.490677356719971
+request_prompt_tokens_count{model_name="meta/llama-3.2-1b-instruct"} 34.0
+request_prompt_tokens_sum{model_name="meta/llama-3.2-1b-instruct"} 603.0
+request_generation_tokens_count{model_name="meta/llama-3.2-1b-instruct"} 34.0
+request_generation_tokens_sum{model_name="meta/llama-3.2-1b-instruct"} 997.0
+request_success_total{model_name="meta/llama-3.2-1b-instruct"} 34.0
+```
+
+## Prometheus Metrics Discovery
+
+A ServiceMonitor is created to enable Prometheus auto-discovery of NIM inference
+metrics. NIM exposes metrics at `/v1/metrics` in Prometheus exposition format.
+
+**NIM ServiceMonitor**
+```
+$ kubectl get servicemonitor nim-inference -n monitoring -o yaml
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  annotations:
+    kubectl.kubernetes.io/last-applied-configuration: |
+      {"apiVersion":"monitoring.coreos.com/v1","kind":"ServiceMonitor","metadata":{"annotations":{},"labels":{"release":"kube-prometheus"},"name":"nim-inference","namespace":"monitoring"},"spec":{"endpoints":[{"interval":"15s","path":"/v1/metrics","port":"api"}],"namespaceSelector":{"matchNames":["nim-workload"]},"selector":{"matchLabels":{"app.kubernetes.io/managed-by":"k8s-nim-operator"}}}}
+  creationTimestamp: "2026-04-01T23:16:15Z"
+  generation: 1
+  labels:
+    release: kube-prometheus
+  name: nim-inference
+  namespace: monitoring
+  resourceVersion: "102073064"
+  uid: e29b3536-c76d-410c-a236-a3ac5d745822
+spec:
+  endpoints:
+  - interval: 15s
+    path: /v1/metrics
+    port: api
+  namespaceSelector:
+    matchNames:
+    - nim-workload
+  selector:
+    matchLabels:
+      app.kubernetes.io/managed-by: k8s-nim-operator
+```
+
+**Prometheus scrape targets (active)**
+```
+{
+  "job": "llama-3-2-1b",
+  "endpoint": "http://10.0.158.63:8000/v1/metrics",
+  "health": "up",
+  "lastScrape": "2026-04-01T23:18:42.378844773Z"
+}
+```
+
+**NIM metrics queried from Prometheus**
+```
+prompt_tokens_total{model_name="meta/llama-3.2-1b-instruct"} = 603
+generation_tokens_total{model_name="meta/llama-3.2-1b-instruct"} = 997
+time_to_first_token_seconds_sum{model_name="meta/llama-3.2-1b-instruct"} = 3.781902551651001
+time_per_output_token_seconds_sum{model_name="meta/llama-3.2-1b-instruct"} = 1.705470085144043
+e2e_request_latency_seconds_sum{model_name="meta/llama-3.2-1b-instruct"} = 5.490677356719971
+```
+
+**Result: PASS** — Prometheus discovers NIM inference workloads via ServiceMonitor and actively scrapes application-level AI inference metrics (token throughput, request latency, time-to-first-token) from the /v1/metrics endpoint.
+
+## Cleanup
+
+**Delete workload namespace**
+```
+$ kubectl delete ns nim-workload
+```
diff --git a/docs/conformance/cncf/evidence/cluster-autoscaling.md b/docs/conformance/cncf/v1.35/nim-eks/evidence/cluster-autoscaling.md
similarity index 54%
rename from docs/conformance/cncf/evidence/cluster-autoscaling.md
rename to docs/conformance/cncf/v1.35/nim-eks/evidence/cluster-autoscaling.md
index 4f71c4b8f..a00bc7d74 100644
--- a/docs/conformance/cncf/evidence/cluster-autoscaling.md
+++ b/docs/conformance/cncf/v1.35/nim-eks/evidence/cluster-autoscaling.md
@@ -1,49 +1,48 @@
 # Cluster Autoscaling
 
+**Cluster:** `EKS / p5.48xlarge / NVIDIA-H100-80GB-HBM3`
+**Generated:** 2026-04-01 23:20:45 UTC
 **Kubernetes Version:** v1.35
 **Platform:** linux/amd64
-**Validated on:** EKS (p5.48xlarge, 8x H100) and GKE (a3-megagpu-8g, 8x H100)
 
 ---
 
 Demonstrates CNCF AI Conformance requirement that the platform has GPU-aware
-cluster autoscaling infrastructure configured, capable of scaling GPU node
-groups based on workload demand.
+cluster autoscaling infrastructure configured, with Auto Scaling Groups capable
+of scaling GPU node groups based on workload demand.
 
 ## Summary
 
-| Platform | Autoscaler | GPU Instances | Nodes | Result |
-|----------|-----------|---------------|-------|--------|
-| **EKS** | AWS Auto Scaling Group | p5.48xlarge (8x H100) | 2 | **PASS** |
-| **GKE** | GKE built-in cluster autoscaler | a3-megagpu-8g (8x H100) | 2 | **PASS** |
+1. **GPU Node Group (ASG)** — EKS Auto Scaling Group configured with GPU instances
+2. **Capacity Reservation** — Dedicated GPU capacity available for scale-up
+3. **Scalable Configuration** — ASG min/max configurable for demand-based scaling
+4. **Kubernetes Integration** — ASG nodes auto-join the EKS cluster with GPU labels
+5. **Autoscaler Compatibility** — Cluster Autoscaler supported via ASG tag discovery
 
 ---
 
-## EKS: Auto Scaling Groups
-
-**Generated:** 2026-03-10 03:44:07 UTC
+## GPU Node Auto Scaling Group
 
 The cluster uses an AWS Auto Scaling Group (ASG) for GPU nodes, which can scale
-up/down based on workload demand. The ASG is configured with p5.48xlarge instances
-(8x NVIDIA H100 80GB HBM3 each) backed by a capacity reservation.
+up/down based on workload demand.
 
-### EKS Cluster Details
+## EKS Cluster Details
 
 - **Region:** us-east-1
 - **Cluster:** aws-us-east-1-aicr-cuj2
 - **GPU Node Group:** gpu-worker
 
-### GPU Nodes
+## GPU Nodes
 
 **GPU nodes**
 ```
 $ kubectl get nodes -l nvidia.com/gpu.present=true -o custom-columns=NAME:.metadata.name,INSTANCE-TYPE:.metadata.labels.node\.kubernetes\.io/instance-type,GPUS:.metadata.labels.nvidia\.com/gpu\.count,PRODUCT:.metadata.labels.nvidia\.com/gpu\.product,NODE-GROUP:.metadata.labels.nodeGroup,ZONE:.metadata.labels.topology\.kubernetes\.io/zone
 NAME                           INSTANCE-TYPE   GPUS   PRODUCT                 NODE-GROUP   ZONE
-ip-10-0-171-111.ec2.internal   p5.48xlarge     8      NVIDIA-H100-80GB-HBM3   gpu-worker   us-east-1e
-ip-10-0-206-2.ec2.internal     p5.48xlarge     8      NVIDIA-H100-80GB-HBM3   gpu-worker   us-east-1e
+ip-10-0-180-136.ec2.internal   p5.48xlarge     8      NVIDIA-H100-80GB-HBM3   gpu-worker   us-east-1e
+ip-10-0-251-220.ec2.internal   p5.48xlarge     8      NVIDIA-H100-80GB-HBM3   gpu-worker   us-east-1e
 ```
 
-### Auto Scaling Group (AWS)
+## Auto Scaling Group (AWS)
 
 **GPU ASG details**
 ```
@@ -65,7 +64,7 @@ $ aws autoscaling describe-auto-scaling-groups --region us-east-1 --auto-scaling
 
 **GPU launch template**
 ```
-$ aws ec2 describe-launch-template-versions --region us-east-1 --launch-template-id lt-038186420dd139467 --versions $Latest --query LaunchTemplateVersions[0].LaunchTemplateData.{InstanceType:InstanceType,ImageId:ImageId} --output table
+$ aws ec2 describe-launch-template-versions --region us-east-1 --launch-template-id lt-043af36be99f4f76b --versions $Latest --query LaunchTemplateVersions[0].LaunchTemplateData.{InstanceType:InstanceType,ImageId:ImageId} --output table
 -------------------------------------------
 |     DescribeLaunchTemplateVersions      |
 +------------------------+----------------+
@@ -91,7 +90,7 @@ $ aws autoscaling describe-tags --region us-east-1 --filters Name=auto-scaling-g
 +--------------------------------------+------------------------+
 ```
 
-### Capacity Reservation
+## Capacity Reservation
 
 **GPU capacity reservation**
 ```
@@ -100,7 +99,7 @@ $ aws ec2 describe-capacity-reservations --region us-east-1 --query CapacityRese
 |    DescribeCapacityReservations     |
 +------------+------------------------+
 |  AZ        |  us-east-1e            |
-|  Available |  2                     |
+|  Available |  1                     |
 |  ID        |  cr-0cbe491320188dfa6  |
 |  State     |  active                |
 |  Total     |  10                    |
@@ -108,85 +107,4 @@ $ aws ec2 describe-capacity-reservations --region us-east-1 --query CapacityRese
 +------------+------------------------+
 ```
 
-**Result: PASS** — EKS cluster with GPU nodes managed by Auto Scaling Group, ASG configuration verified via AWS API.
-
----
-
-## GKE: Built-in Cluster Autoscaler
-
-**Generated:** 2026-03-16 21:50:46 UTC
-
-GKE includes a built-in cluster autoscaler that manages node pool scaling based
-on workload demand. The autoscaler is configured per node pool.
-
-### GKE Cluster Details
-
-- **Project:** eidosx
-- **Zone:** us-central1-c
-
-### GPU Nodes
-
-**GPU nodes**
-```
-$ kubectl get nodes -l nvidia.com/gpu.present=true -o custom-columns=NAME:.metadata.name,INSTANCE-TYPE:.metadata.labels.node\.kubernetes\.io/instance-type,GPUS:.status.capacity.nvidia\.com/gpu,ACCELERATOR:.metadata.labels.cloud\.google\.com/gke-accelerator,NODE-POOL:.metadata.labels.cloud\.google\.com/gke-nodepool
-NAME                                                 INSTANCE-TYPE   GPUS   ACCELERATOR             NODE-POOL
-gke-aicr-demo2-aicr-demo2-gpu-worker-8de6040c-h2d0   a3-megagpu-8g   8      nvidia-h100-mega-80gb   aicr-demo2-gpu-worker
-gke-aicr-demo2-aicr-demo2-gpu-worker-8de6040c-t81x   a3-megagpu-8g   8      nvidia-h100-mega-80gb   aicr-demo2-gpu-worker
-```
-
-### GKE Cluster Autoscaler Status
-
-**Cluster Autoscaler Status**
-```
-autoscalerStatus: Running
-clusterWide:
-  health:
-    lastProbeTime: "2026-03-16T21:50:43Z"
-    lastTransitionTime: "2026-03-12T21:28:08Z"
-    nodeCounts:
-      registered:
-        ready: 6
-        total: 6
-    status: Healthy
-  scaleDown:
-    status: NoCandidates
-  scaleUp:
-    status: NoActivity
-nodeGroups:
-- health:
-    cloudProviderTarget: 1
-    maxSize: 1
-    minSize: 1
-    status: Healthy
-  name: .../gke-aicr-demo2-aicr-demo2-cpu-worker-cd95cf64-grp
-- health:
-    cloudProviderTarget: 2
-    maxSize: 2
-    minSize: 2
-    status: Healthy
-  name: .../gke-aicr-demo2-aicr-demo2-gpu-worker-8de6040c-grp
-- health:
-    cloudProviderTarget: 1
-    maxSize: 3
-    minSize: 1
-    status: Healthy
-  name: .../gke-aicr-demo2-aicr-demo2-system-f5af1da6-grp
-- health:
-    cloudProviderTarget: 1
-    maxSize: 3
-    minSize: 1
-    status: Healthy
-  name: .../gke-aicr-demo2-aicr-demo2-system-358b1ae8-grp
-- health:
-    cloudProviderTarget: 1
-    maxSize: 3
-    minSize: 1
-    status: Healthy
-  name: .../gke-aicr-demo2-aicr-demo2-system-b313be0b-grp
-```
-
-**Result: PASS** — GKE cluster with 2 GPU nodes and built-in cluster autoscaler active, all node groups healthy.
-
----
-
-Evidence is configuration-level; a live scale event is not triggered to avoid disrupting the cluster.
+**Result: PASS** — EKS cluster with GPU nodes managed by Auto Scaling Group, ASG configuration verified via AWS API. Evidence is configuration-level; a live scale event is not triggered to avoid disrupting the cluster.
diff --git a/docs/conformance/cncf/evidence/dra-support.md b/docs/conformance/cncf/v1.35/nim-eks/evidence/dra-support.md
similarity index 70%
rename from docs/conformance/cncf/evidence/dra-support.md
rename to docs/conformance/cncf/v1.35/nim-eks/evidence/dra-support.md
index 38993b745..1d5b9f724 100644
--- a/docs/conformance/cncf/evidence/dra-support.md
+++ b/docs/conformance/cncf/v1.35/nim-eks/evidence/dra-support.md
@@ -1,9 +1,9 @@
 # DRA Support (Dynamic Resource Allocation)
 
+**Cluster:** `EKS / p5.48xlarge / NVIDIA-H100-80GB-HBM3`
+**Generated:** 2026-04-01 23:13:30 UTC
 **Kubernetes Version:** v1.35
 **Platform:** linux/amd64
-**Validated on:** Kubernetes v1.35 clusters with NVIDIA H100 80GB HBM3
-**Generated:** 2026-03-10 03:39:16 UTC
 
 ---
 
@@ -29,11 +29,11 @@ resourceslices                        resource.k8s.io/v1   false        Resource
 ```
 $ kubectl get deviceclass
 NAME                                        AGE
-compute-domain-daemon.nvidia.com            10m
-compute-domain-default-channel.nvidia.com   10m
-gpu.nvidia.com                              10m
-mig.nvidia.com                              10m
-vfio.gpu.nvidia.com                         10m
+compute-domain-daemon.nvidia.com            58m
+compute-domain-default-channel.nvidia.com   58m
+gpu.nvidia.com                              58m
+mig.nvidia.com                              58m
+vfio.gpu.nvidia.com                         58m
 ```
 
 ## DRA Driver Health
@@ -41,10 +41,10 @@ vfio.gpu.nvidia.com                         10m
 **DRA driver pods**
 ```
 $ kubectl get pods -n nvidia-dra-driver -o wide
-NAME                                                READY   STATUS    RESTARTS   AGE     IP             NODE                           NOMINATED NODE   READINESS GATES
-nvidia-dra-driver-gpu-controller-68966c79bb-zj7lf   1/1     Running   0          10m     10.0.4.122     system-node-1     <none>           <none>
-nvidia-dra-driver-gpu-kubelet-plugin-4kfhk          2/2     Running   0          9m54s   10.0.143.178   gpu-node-1   <none>           <none>
-nvidia-dra-driver-gpu-kubelet-plugin-grg2l          2/2     Running   0          9m54s   10.0.216.98    gpu-node-2     <none>           <none>
+NAME                                                READY   STATUS    RESTARTS   AGE   IP             NODE                           NOMINATED NODE   READINESS GATES
+nvidia-dra-driver-gpu-controller-68966c79bb-xvh7f   1/1     Running   0          58m   10.0.7.228     ip-10-0-6-154.ec2.internal     <none>           <none>
+nvidia-dra-driver-gpu-kubelet-plugin-px7p8          2/2     Running   0          58m   10.0.136.3     ip-10-0-251-220.ec2.internal   <none>           <none>
+nvidia-dra-driver-gpu-kubelet-plugin-smkl9          2/2     Running   0          58m   10.0.136.235   ip-10-0-180-136.ec2.internal   <none>           <none>
 ```
 
 ## Device Advertisement (ResourceSlices)
@@ -53,10 +53,10 @@ nvidia-dra-driver-gpu-kubelet-plugin-grg2l          2/2     Running   0
 ```
 $ kubectl get resourceslices
 NAME                                                           NODE                           DRIVER                      POOL                           AGE
-gpu-node-1-compute-domain.nvidia.com-q9xqc   gpu-node-1   compute-domain.nvidia.com   gpu-node-1   10m
-gpu-node-1-gpu.nvidia.com-7cbz2              gpu-node-1   gpu.nvidia.com              gpu-node-1   10m
-gpu-node-2-compute-domain.nvidia.com-2n2cq     gpu-node-2     compute-domain.nvidia.com   gpu-node-2     10m
-gpu-node-2-gpu.nvidia.com-79gvw                gpu-node-2     gpu.nvidia.com              gpu-node-2     10m
+ip-10-0-180-136.ec2.internal-compute-domain.nvidia.com-kfxd7   ip-10-0-180-136.ec2.internal   compute-domain.nvidia.com   ip-10-0-180-136.ec2.internal   58m
+ip-10-0-180-136.ec2.internal-gpu.nvidia.com-8w29z              ip-10-0-180-136.ec2.internal   gpu.nvidia.com              ip-10-0-180-136.ec2.internal   58m
+ip-10-0-251-220.ec2.internal-compute-domain.nvidia.com-btqsj   ip-10-0-251-220.ec2.internal   compute-domain.nvidia.com   ip-10-0-251-220.ec2.internal   58m
+ip-10-0-251-220.ec2.internal-gpu.nvidia.com-qwdqr              ip-10-0-251-220.ec2.internal   gpu.nvidia.com              ip-10-0-251-220.ec2.internal   58m
 ```
 
 ## GPU Allocation Test
@@ -140,7 +140,7 @@ pod/dra-gpu-test created
 ```
 $ kubectl get resourceclaim -n dra-test -o wide
 NAME        STATE     AGE
-gpu-claim   pending   11s
+gpu-claim   pending   10s
 ```
 
 > **Note:** ResourceClaim shows `pending` because the DRA controller deallocates the claim after pod completion. The pod logs below confirm the GPU was successfully allocated and visible during execution.
@@ -148,8 +148,8 @@ gpu-claim   pending   11s
 **Pod status**
 ```
 $ kubectl get pod dra-gpu-test -n dra-test -o wide
-NAME           READY   STATUS      RESTARTS   AGE   IP            NODE                         NOMINATED NODE   READINESS GATES
-dra-gpu-test   0/1     Completed   0          13s   10.0.177.19   gpu-node-2   <none>           <none>
+NAME           READY   STATUS      RESTARTS   AGE   IP             NODE                           NOMINATED NODE   READINESS GATES
+dra-gpu-test   0/1     Completed   0          12s   10.0.142.150   ip-10-0-251-220.ec2.internal   <none>           <none>
 ```
 
 **Pod logs**
@@ -158,7 +158,7 @@ $ kubectl logs dra-gpu-test -n dra-test
 /dev/nvidia-modeset
 /dev/nvidia-uvm
 /dev/nvidia-uvm-tools
-/dev/nvidia2
+/dev/nvidia7
 /dev/nvidiactl
 DRA GPU allocation successful
 ```
diff --git a/docs/conformance/cncf/evidence/gang-scheduling.md b/docs/conformance/cncf/v1.35/nim-eks/evidence/gang-scheduling.md
similarity index 82%
rename from docs/conformance/cncf/evidence/gang-scheduling.md
rename to docs/conformance/cncf/v1.35/nim-eks/evidence/gang-scheduling.md
index 53a00fa9e..f1e8888e9 100644
--- a/docs/conformance/cncf/evidence/gang-scheduling.md
+++ b/docs/conformance/cncf/v1.35/nim-eks/evidence/gang-scheduling.md
@@ -1,7 +1,7 @@
 # Gang Scheduling (KAI Scheduler)
 
 **Cluster:** `EKS / p5.48xlarge / NVIDIA-H100-80GB-HBM3`
-**Generated:** 2026-03-20 20:09:13 UTC
+**Generated:** 2026-04-01 23:14:07 UTC
 **Kubernetes Version:** v1.35
 **Platform:** linux/amd64
 
@@ -16,26 +16,26 @@ scheduler with PodGroups. Both pods in the group must be scheduled together or n
 ```
 $ kubectl get deploy -n kai-scheduler
 NAME                    READY   UP-TO-DATE   AVAILABLE   AGE
-admission               1/1     1            1           20m
-binder                  1/1     1            1           20m
-kai-operator            1/1     1            1           20m
-kai-scheduler-default   1/1     1            1           6d22h
-pod-grouper             1/1     1            1           20m
-podgroup-controller     1/1     1            1           20m
-queue-controller        1/1     1            1           20m
+admission               1/1     1            1           59m
+binder                  1/1     1            1           59m
+kai-operator            1/1     1            1           59m
+kai-scheduler-default   1/1     1            1           59m
+pod-grouper             1/1     1            1           59m
+podgroup-controller     1/1     1            1           59m
+queue-controller        1/1     1            1           59m
 ```
 
 **KAI scheduler pods**
 ```
 $ kubectl get pods -n kai-scheduler
 NAME                                     READY   STATUS    RESTARTS   AGE
-admission-6d48656c78-vsf22               1/1     Running   0          20m
-binder-8cfb98496-79hwx                   1/1     Running   0          20m
-kai-operator-558c46545b-tth97            1/1     Running   0          20m
-kai-scheduler-default-7945d65d9c-5w4bb   1/1     Running   0          20m
-pod-grouper-7bd4c7488c-wlfds             1/1     Running   0          20m
-podgroup-controller-798798fb5f-mjht6     1/1     Running   0          20m
-queue-controller-5b45bb74c9-b75vg        1/1     Running   0          20m
+admission-6d48656c78-wshnq               1/1     Running   0          59m
+binder-8cfb98496-sdg2h                   1/1     Running   0          59m
+kai-operator-558c46545b-qz2rx            1/1     Running   0          59m
+kai-scheduler-default-57bdcb878c-fpkl2   1/1     Running   0          59m
+pod-grouper-7bd4c7488c-mpbsh             1/1     Running   0          59m
+podgroup-controller-798798fb5f-pjwkm     1/1     Running   0          59m
+queue-controller-5b45bb74c9-knjc9        1/1     Running   0          59m
 ```
 
 ## PodGroup CRD
@@ -44,7 +44,7 @@ queue-controller-5b45bb74c9-b75vg        1/1     Running   0          20m
 ```
 $ kubectl get crd podgroups.scheduling.run.ai
 NAME                          CREATED AT
-podgroups.scheduling.run.ai   2026-03-10T20:53:06Z
+podgroups.scheduling.run.ai   2026-04-01T22:13:48Z
 ```
 
 ## Gang Scheduling Test
@@ -195,23 +195,23 @@ pod/gang-worker-1 created
 ```
 $ kubectl get podgroups -n gang-scheduling-test -o wide
 NAME                                                    AGE
-gang-test-group                                         12s
-pg-gang-worker-0-0f1259e1-c344-4964-a1fb-b1ae14e25859   10s
-pg-gang-worker-1-af882f6e-316a-49b2-95f6-189b1a20b5c3   10s
+gang-test-group                                         13s
+pg-gang-worker-0-bb3f5b6f-080d-4cf3-8625-8be214e2032b   11s
+pg-gang-worker-1-f9c72e1a-f7e9-427f-8127-42bb50491402   11s
 ```
 
 **Pod status**
 ```
 $ kubectl get pods -n gang-scheduling-test -o wide
-NAME            READY   STATUS      RESTARTS   AGE   IP             NODE                           NOMINATED NODE   READINESS GATES
-gang-worker-0   0/1     Completed   0          13s   10.0.214.229   ip-10-0-180-136.ec2.internal   <none>           <none>
-gang-worker-1   0/1     Completed   0          13s   10.0.238.183   ip-10-0-180-136.ec2.internal   <none>           <none>
+NAME            READY   STATUS      RESTARTS   AGE   IP            NODE                           NOMINATED NODE   READINESS GATES
+gang-worker-0   0/1     Completed   0          13s   10.0.190.56   ip-10-0-180-136.ec2.internal   <none>           <none>
+gang-worker-1   0/1     Completed   0          13s   10.0.153.74   ip-10-0-180-136.ec2.internal   <none>           <none>
 ```
 
 **gang-worker-0 logs**
 ```
 $ kubectl logs gang-worker-0 -n gang-scheduling-test
-Fri Mar 20 20:09:24 2026       
+Wed Apr  1 23:14:19 2026       
 +-----------------------------------------------------------------------------------------+
 | NVIDIA-SMI 580.105.08             Driver Version: 580.105.08     CUDA Version: 13.0     |
 +-----------------------------------------+------------------------+----------------------+
@@ -219,8 +219,8 @@ Fri Mar 20 20:09:24 2026
 | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
 |                                         |                        |               MIG M. |
 |=========================================+========================+======================|
-|   0  NVIDIA H100 80GB HBM3          On  |   00000000:86:00.0 Off |                    0 |
-| N/A   32C    P0             66W /  700W |       0MiB /  81559MiB |      0%      Default |
+|   0  NVIDIA H100 80GB HBM3          On  |   00000000:53:00.0 Off |                    0 |
+| N/A   31C    P0             67W /  700W |       0MiB /  81559MiB |      0%      Default |
 |                                         |                        |             Disabled |
 +-----------------------------------------+------------------------+----------------------+
 
@@ -237,7 +237,7 @@ Gang worker 0 completed successfully
 **gang-worker-1 logs**
 ```
 $ kubectl logs gang-worker-1 -n gang-scheduling-test
-Fri Mar 20 20:09:24 2026       
+Wed Apr  1 23:14:19 2026       
 +-----------------------------------------------------------------------------------------+
 | NVIDIA-SMI 580.105.08             Driver Version: 580.105.08     CUDA Version: 13.0     |
 +-----------------------------------------+------------------------+----------------------+
@@ -245,7 +245,7 @@ Fri Mar 20 20:09:24 2026
 | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
 |                                         |                        |               MIG M. |
 |=========================================+========================+======================|
-|   0  NVIDIA H100 80GB HBM3          On  |   00000000:97:00.0 Off |                    0 |
+|   0  NVIDIA H100 80GB HBM3          On  |   00000000:64:00.0 Off |                    0 |
 | N/A   33C    P0             67W /  700W |       0MiB /  81559MiB |      0%      Default |
 |                                         |                        |             Disabled |
 +-----------------------------------------+------------------------+----------------------+
diff --git a/docs/conformance/cncf/evidence/index.md b/docs/conformance/cncf/v1.35/nim-eks/evidence/index.md
similarity index 54%
rename from docs/conformance/cncf/evidence/index.md
rename to docs/conformance/cncf/v1.35/nim-eks/evidence/index.md
index 8334ae517..782a73bff 100644
--- a/docs/conformance/cncf/evidence/index.md
+++ b/docs/conformance/cncf/v1.35/nim-eks/evidence/index.md
@@ -2,12 +2,13 @@
 
 **Kubernetes Version:** v1.35
 **Platform:** linux/amd64
-**Product:** Kubernetes clusters with NVIDIA AI Cluster Runtime (AICR)
+**Product:** [NVIDIA NIM](https://developer.nvidia.com/nim) on EKS — A Kubernetes-based AI inference platform that deploys and manages NVIDIA NIM microservices on Amazon EKS with GPU scheduling, autoscaling, and Gateway API integration.
+**Validation Tooling:** NVIDIA AI Cluster Runtime (AICR)
 
-AICR deploys the runtime components (GPU Operator, KAI Scheduler, DCGM Exporter,
-kgateway, Kubeflow Trainer, Dynamo, etc.) that make a Kubernetes cluster AI conformant.
-Evidence was collected on AICR-enabled Kubernetes v1.35 clusters with NVIDIA H100 80GB HBM3 accelerators.
-Cluster autoscaling evidence covers the underlying platform's node group scaling mechanism.
+AICR deploys the runtime components (GPU Operator, NIM Operator, KAI Scheduler,
+DCGM Exporter, kgateway, etc.) and validates that the platform meets CNCF AI
+Conformance requirements. Evidence was collected on an EKS v1.35 cluster with
+NVIDIA H100 80GB HBM3 accelerators running NIM inference workloads.
 
 ## Results
 
@@ -17,8 +18,8 @@ Cluster autoscaling evidence covers the underlying platform's node group scaling
 | 2 | `gang_scheduling` | Gang Scheduling (KAI Scheduler) | PASS | [gang-scheduling.md](gang-scheduling.md) |
 | 3 | `secure_accelerator_access` | Secure Accelerator Access | PASS | [secure-accelerator-access.md](secure-accelerator-access.md) |
 | 4 | `accelerator_metrics` | Accelerator Metrics (DCGM Exporter) | PASS | [accelerator-metrics.md](accelerator-metrics.md) |
-| 5 | `ai_service_metrics` | AI Service Metrics (Prometheus ServiceMonitor) | PASS | [ai-service-metrics.md](ai-service-metrics.md) |
+| 5 | `ai_service_metrics` | AI Service Metrics (NIM Inference) | PASS | [ai-service-metrics.md](ai-service-metrics.md) |
 | 6 | `ai_inference` | Inference API Gateway (kgateway) | PASS | [inference-gateway.md](inference-gateway.md) |
-| 7 | `robust_controller` | Robust AI Operator (Dynamo + Kubeflow Trainer) | PASS | [robust-operator.md](robust-operator.md) |
+| 7 | `robust_controller` | Robust AI Operator (NIM Operator) | PASS | [robust-operator.md](robust-operator.md) |
 | 8 | `pod_autoscaling` | Pod Autoscaling (HPA + GPU metrics) | PASS | [pod-autoscaling.md](pod-autoscaling.md) |
 | 9 | `cluster_autoscaling` | Cluster Autoscaling | PASS | [cluster-autoscaling.md](cluster-autoscaling.md) |
diff --git a/docs/conformance/cncf/evidence/inference-gateway.md b/docs/conformance/cncf/v1.35/nim-eks/evidence/inference-gateway.md
similarity index 67%
rename from docs/conformance/cncf/evidence/inference-gateway.md
rename to docs/conformance/cncf/v1.35/nim-eks/evidence/inference-gateway.md
index 2c3ddd992..26e910b36 100644
--- a/docs/conformance/cncf/evidence/inference-gateway.md
+++ b/docs/conformance/cncf/v1.35/nim-eks/evidence/inference-gateway.md
@@ -1,9 +1,9 @@
 # Inference API Gateway (kgateway)
 
+**Cluster:** `EKS / p5.48xlarge / NVIDIA-H100-80GB-HBM3`
+**Generated:** 2026-04-01 23:18:52 UTC
 **Kubernetes Version:** v1.35
 **Platform:** linux/amd64
-**Validated on:** Kubernetes v1.35 clusters with NVIDIA H100 80GB HBM3
-**Generated:** 2026-03-10 03:49:45 UTC
 
 ---
 
@@ -15,7 +15,7 @@ with an implementation for advanced traffic management for inference services.
 1. **kgateway controller** — Running in `kgateway-system`
 2. **inference-gateway deployment** — Running (the inference extension controller)
 3. **Gateway API CRDs** — All present (GatewayClass, Gateway, HTTPRoute, GRPCRoute, ReferenceGrant)
-4. **Active Gateway** — `inference-gateway` with class `kgateway`, programmed with a load balancer address
+4. **Active Gateway** — `inference-gateway` with class `kgateway`, programmed with an AWS ELB address
 5. **Inference Extension CRDs** — InferencePool, InferenceModelRewrite, InferenceObjective installed
 6. **Result: PASS**
 
@@ -27,16 +27,16 @@ with an implementation for advanced traffic management for inference services.
 ```
 $ kubectl get deploy -n kgateway-system
 NAME                READY   UP-TO-DATE   AVAILABLE   AGE
-inference-gateway   1/1     1            1           28m
-kgateway            1/1     1            1           28m
+inference-gateway   1/1     1            1           69m
+kgateway            1/1     1            1           69m
 ```
 
 **kgateway pods**
 ```
 $ kubectl get pods -n kgateway-system
 NAME                                 READY   STATUS    RESTARTS   AGE
-inference-gateway-6f55d54bd8-gj9t8   1/1     Running   0          28m
-kgateway-7d6dfdc5dc-s6lwc            1/1     Running   0          28m
+inference-gateway-6f55d54bd8-rxt9g   1/1     Running   0          69m
+kgateway-7d6dfdc5dc-5wtw2            1/1     Running   0          69m
 ```
 
 ## GatewayClass
@@ -45,8 +45,8 @@ kgateway-7d6dfdc5dc-s6lwc            1/1     Running   0          28m
 ```
 $ kubectl get gatewayclass
 NAME                CONTROLLER              ACCEPTED   AGE
-kgateway            kgateway.dev/kgateway   True       28m
-kgateway-waypoint   kgateway.dev/kgateway   True       28m
+kgateway            kgateway.dev/kgateway   True       69m
+kgateway-waypoint   kgateway.dev/kgateway   True       69m
 ```
 
 ## Gateway API CRDs
@@ -54,11 +54,11 @@ kgateway-waypoint   kgateway.dev/kgateway   True       28m
 **Gateway API CRDs**
 ```
 $ kubectl get crds | grep gateway.networking.k8s.io
-gatewayclasses.gateway.networking.k8s.io               2026-03-10T03:21:04Z
-gateways.gateway.networking.k8s.io                     2026-03-10T03:21:05Z
-grpcroutes.gateway.networking.k8s.io                   2026-03-10T03:21:05Z
-httproutes.gateway.networking.k8s.io                   2026-03-10T03:21:06Z
-referencegrants.gateway.networking.k8s.io              2026-03-10T03:21:06Z
+gatewayclasses.gateway.networking.k8s.io               2026-04-01T22:09:22Z
+gateways.gateway.networking.k8s.io                     2026-04-01T22:09:22Z
+grpcroutes.gateway.networking.k8s.io                   2026-04-01T22:09:23Z
+httproutes.gateway.networking.k8s.io                   2026-04-01T22:09:23Z
+referencegrants.gateway.networking.k8s.io              2026-04-01T22:09:24Z
 ```
 
 ## Active Gateway
@@ -66,8 +66,8 @@ referencegrants.gateway.networking.k8s.io              2026-03-10T03:21:06Z
 **Gateways**
 ```
 $ kubectl get gateways -A
-NAMESPACE         NAME                CLASS      ADDRESS                                                                   PROGRAMMED   AGE
-kgateway-system   inference-gateway   kgateway   <load-balancer-address>   True         28m
+NAMESPACE         NAME                CLASS      ADDRESS                                                                  PROGRAMMED   AGE
+kgateway-system   inference-gateway   kgateway   <elb-redacted>.elb.amazonaws.com   True         69m
 ```
 
 **Gateway details**
@@ -82,12 +82,12 @@ metadata:
     helm.sh/hook-weight: "10"
     kubectl.kubernetes.io/last-applied-configuration: |
       {"apiVersion":"gateway.networking.k8s.io/v1","kind":"Gateway","metadata":{"annotations":{"helm.sh/hook":"post-install,post-upgrade","helm.sh/hook-delete-policy":"before-hook-creation","helm.sh/hook-weight":"10"},"name":"inference-gateway","namespace":"kgateway-system"},"spec":{"gatewayClassName":"kgateway","infrastructure":{"parametersRef":{"group":"gateway.kgateway.dev","kind":"GatewayParameters","name":"system-proxy"}},"listeners":[{"allowedRoutes":{"namespaces":{"from":"All"}},"name":"http","port":80,"protocol":"HTTP"}]}}
-  creationTimestamp: "2026-03-10T03:21:34Z"
+  creationTimestamp: "2026-04-01T22:09:39Z"
   generation: 1
   name: inference-gateway
   namespace: kgateway-system
-  resourceVersion: "1158803"
-  uid: 4dac636a-d90d-431c-9397-4baf2c81a150
+  resourceVersion: "101860353"
+  uid: 1b8b3a2a-dd47-4ac0-b18b-b5da8c25cff6
 spec:
   gatewayClassName: kgateway
   infrastructure:
@@ -105,15 +105,15 @@ spec:
 status:
   addresses:
   - type: Hostname
-    value: <load-balancer-address>
+    value: <elb-redacted>.elb.amazonaws.com
   conditions:
-  - lastTransitionTime: "2026-03-10T03:21:40Z"
+  - lastTransitionTime: "2026-04-01T22:09:45Z"
     message: ""
     observedGeneration: 1
     reason: Accepted
     status: "True"
     type: Accepted
-  - lastTransitionTime: "2026-03-10T03:21:40Z"
+  - lastTransitionTime: "2026-04-01T22:09:45Z"
     message: ""
     observedGeneration: 1
     reason: Programmed
@@ -122,25 +122,25 @@ status:
   listeners:
   - attachedRoutes: 0
     conditions:
-    - lastTransitionTime: "2026-03-10T03:21:40Z"
+    - lastTransitionTime: "2026-04-01T22:09:45Z"
       message: ""
       observedGeneration: 1
       reason: Accepted
       status: "True"
       type: Accepted
-    - lastTransitionTime: "2026-03-10T03:21:40Z"
+    - lastTransitionTime: "2026-04-01T22:09:45Z"
       message: ""
       observedGeneration: 1
       reason: NoConflicts
       status: "False"
       type: Conflicted
-    - lastTransitionTime: "2026-03-10T03:21:40Z"
+    - lastTransitionTime: "2026-04-01T22:09:45Z"
       message: ""
       observedGeneration: 1
       reason: ResolvedRefs
       status: "True"
       type: ResolvedRefs
-    - lastTransitionTime: "2026-03-10T03:21:40Z"
+    - lastTransitionTime: "2026-04-01T22:09:45Z"
       message: ""
       observedGeneration: 1
       reason: Programmed
@@ -173,11 +173,11 @@ Programmed: True (Programmed)
 **Inference extension CRDs installed**
 ```
 $ kubectl get crds | grep inference
-inferencemodelrewrites.inference.networking.x-k8s.io   2026-03-10T03:21:06Z
-inferenceobjectives.inference.networking.x-k8s.io      2026-03-10T03:21:06Z
-inferencepoolimports.inference.networking.x-k8s.io     2026-03-10T03:21:07Z
-inferencepools.inference.networking.k8s.io             2026-03-10T03:21:07Z
-inferencepools.inference.networking.x-k8s.io           2026-03-10T03:21:07Z
+inferencemodelrewrites.inference.networking.x-k8s.io   2026-04-01T22:09:24Z
+inferenceobjectives.inference.networking.x-k8s.io      2026-04-01T22:09:24Z
+inferencepoolimports.inference.networking.x-k8s.io     2026-04-01T22:09:24Z
+inferencepools.inference.networking.k8s.io             2026-04-01T22:09:24Z
+inferencepools.inference.networking.x-k8s.io           2026-04-01T22:09:25Z
 ```
 
 **Result: PASS** — kgateway controller running, GatewayClass Accepted, Gateway Programmed, inference CRDs installed.
diff --git a/docs/conformance/cncf/evidence/pod-autoscaling.md b/docs/conformance/cncf/v1.35/nim-eks/evidence/pod-autoscaling.md
similarity index 84%
rename from docs/conformance/cncf/evidence/pod-autoscaling.md
rename to docs/conformance/cncf/v1.35/nim-eks/evidence/pod-autoscaling.md
index f78b1d97a..74994f5ba 100644
--- a/docs/conformance/cncf/evidence/pod-autoscaling.md
+++ b/docs/conformance/cncf/v1.35/nim-eks/evidence/pod-autoscaling.md
@@ -1,9 +1,9 @@
 # Pod Autoscaling (HPA with GPU Metrics)
 
+**Cluster:** `EKS / p5.48xlarge / NVIDIA-H100-80GB-HBM3`
+**Generated:** 2026-04-01 23:19:27 UTC
 **Kubernetes Version:** v1.35
 **Platform:** linux/amd64
-**Validated on:** Kubernetes v1.35 clusters with NVIDIA H100 80GB HBM3
-**Generated:** 2026-03-10 03:42:06 UTC
 
 ---
 
@@ -27,14 +27,14 @@ utilizing accelerators, including the ability to scale based on custom GPU metri
 ```
 $ kubectl get pods -n monitoring -l app.kubernetes.io/name=prometheus-adapter
 NAME                                  READY   STATUS    RESTARTS   AGE
-prometheus-adapter-78b8b8d75c-fh4cf   1/1     Running   0          18m
+prometheus-adapter-78b8b8d75c-wv9h2   1/1     Running   0          68m
 ```
 
 **Prometheus adapter service**
 ```
 $ kubectl get svc prometheus-adapter -n monitoring
-NAME                 TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)   AGE
-prometheus-adapter   ClusterIP   172.20.178.141   <none>        443/TCP   18m
+NAME                 TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)   AGE
+prometheus-adapter   ClusterIP   172.20.38.130   <none>        443/TCP   68m
 ```
 
 ## Custom Metrics API
@@ -42,12 +42,12 @@ prometheus-adapter   ClusterIP   172.20.178.141   <none>        443/TCP   18m
 **Available custom metrics**
 ```
 $ kubectl get --raw /apis/custom.metrics.k8s.io/v1beta1 | python3 -c "..." # extract resource names
-namespaces/gpu_memory_used
 namespaces/gpu_power_usage
 pods/gpu_power_usage
 pods/gpu_utilization
 namespaces/gpu_utilization
 pods/gpu_memory_used
+namespaces/gpu_memory_used
 ```
 
 ## GPU Stress Test Deployment
@@ -166,8 +166,8 @@ horizontalpodautoscaler.autoscaling/gpu-workload-hpa created
 **GPU workload pod**
 ```
 $ kubectl get pods -n hpa-test -o wide
-NAME                            READY   STATUS    RESTARTS   AGE   IP            NODE                         NOMINATED NODE   READINESS GATES
-gpu-workload-86c75dcd97-2wk4f   1/1     Running   0          3s    10.0.254.75   gpu-node-2   <none>           <none>
+NAME                            READY   STATUS    RESTARTS   AGE   IP             NODE                           NOMINATED NODE   READINESS GATES
+gpu-workload-86c75dcd97-qbc7g   1/1     Running   0          4s    10.0.222.136   ip-10-0-251-220.ec2.internal   <none>           <none>
 ```
 
 ## HPA Status
@@ -176,7 +176,7 @@ gpu-workload-86c75dcd97-2wk4f   1/1     Running   0          3s    10.0.254.75
 ```
 $ kubectl get hpa -n hpa-test
 NAME               REFERENCE                 TARGETS   MINPODS   MAXPODS   REPLICAS   AGE
-gpu-workload-hpa   Deployment/gpu-workload   100/50    1         2         2          90s
+gpu-workload-hpa   Deployment/gpu-workload   100/50    1         2         2          49s
 ```
 
 **HPA details**
@@ -186,10 +186,10 @@ Name:                         gpu-workload-hpa
 Namespace:                    hpa-test
 Labels:                       <none>
 Annotations:                  <none>
-CreationTimestamp:            Mon, 09 Mar 2026 20:42:14 -0700
+CreationTimestamp:            Wed, 01 Apr 2026 16:19:34 -0700
 Reference:                    Deployment/gpu-workload
 Metrics:                      ( current / target )
-  "gpu_utilization" on pods:  50 / 50
+  "gpu_utilization" on pods:  100 / 50
 Min replicas:                 1
 Max replicas:                 2
 Behavior:
@@ -214,18 +214,18 @@ Conditions:
 Events:
   Type     Reason                        Age   From                       Message
   ----     ------                        ----  ----                       -------
-  Warning  FailedGetPodsMetric           76s   horizontal-pod-autoscaler  unable to get metric gpu_utilization: no metrics returned from custom metrics API
-  Warning  FailedComputeMetricsReplicas  76s   horizontal-pod-autoscaler  invalid metrics (1 invalid out of 1), first error is: failed to get pods metric value: unable to get metric gpu_utilization: no metrics returned from custom metrics API
-  Normal   SuccessfulRescale             31s   horizontal-pod-autoscaler  New size: 2; reason: pods metric gpu_utilization above target
+  Warning  FailedGetPodsMetric           35s   horizontal-pod-autoscaler  unable to get metric gpu_utilization: no metrics returned from custom metrics API
+  Warning  FailedComputeMetricsReplicas  35s   horizontal-pod-autoscaler  invalid metrics (1 invalid out of 1), first error is: failed to get pods metric value: unable to get metric gpu_utilization: no metrics returned from custom metrics API
+  Normal   SuccessfulRescale             20s   horizontal-pod-autoscaler  New size: 2; reason: pods metric gpu_utilization above target
 ```
 
 ## GPU Utilization Evidence
 
 **GPU utilization (nvidia-smi)**
 ```
-$ kubectl exec -n hpa-test gpu-workload-86c75dcd97-2wk4f -- nvidia-smi --query-gpu=utilization.gpu,utilization.memory,power.draw --format=csv
+$ kubectl exec -n hpa-test gpu-workload-86c75dcd97-qbc7g -- nvidia-smi --query-gpu=utilization.gpu,utilization.memory,power.draw --format=csv
 utilization.gpu [%], utilization.memory [%], power.draw [W]
-100 %, 0 %, 290.28 W
+100 %, 0 %, 297.05 W
 ```
 
 ## Pods After Scale-Up
@@ -233,9 +233,9 @@ utilization.gpu [%], utilization.memory [%], power.draw [W]
 **Pods after scale-up**
 ```
 $ kubectl get pods -n hpa-test -o wide
-NAME                            READY   STATUS    RESTARTS   AGE   IP            NODE                         NOMINATED NODE   READINESS GATES
-gpu-workload-86c75dcd97-2wk4f   1/1     Running   0          96s   10.0.254.75   gpu-node-2   <none>           <none>
-gpu-workload-86c75dcd97-4gbn8   1/1     Running   0          36s   10.0.219.76   gpu-node-2   <none>           <none>
+NAME                            READY   STATUS    RESTARTS   AGE   IP             NODE                           NOMINATED NODE   READINESS GATES
+gpu-workload-86c75dcd97-qbc7g   1/1     Running   0          55s   10.0.222.136   ip-10-0-251-220.ec2.internal   <none>           <none>
+gpu-workload-86c75dcd97-zvnlg   1/1     Running   0          25s   10.0.228.202   ip-10-0-251-220.ec2.internal   <none>           <none>
 ```
 
 **Result: PASS** — HPA successfully read gpu_utilization metric and scaled replicas when utilization exceeded target threshold.
diff --git a/docs/conformance/cncf/v1.35/nim-eks/evidence/robust-operator.md b/docs/conformance/cncf/v1.35/nim-eks/evidence/robust-operator.md
new file mode 100644
index 000000000..eb9cb5e7c
--- /dev/null
+++ b/docs/conformance/cncf/v1.35/nim-eks/evidence/robust-operator.md
@@ -0,0 +1,179 @@
+# Robust AI Operator (NIM Operator)
+
+**Cluster:** `EKS / p5.48xlarge / NVIDIA-H100-80GB-HBM3`
+**Generated:** 2026-04-01 23:19:10 UTC
+**Kubernetes Version:** v1.35
+**Platform:** linux/amd64
+
+---
+
+Demonstrates CNCF AI Conformance requirement that at least one complex AI operator
+with a CRD can be installed and functions reliably, including operator pods running,
+webhooks operational, and custom resources reconciled.
+
+## Summary
+
+1. **NIM Operator** — Controller manager running in `nvidia-nim`
+2. **Custom Resource Definitions** — NIMService, NIMCache, NIMPipeline, NIMBuild CRDs registered
+3. **Admission Controller** — Validating/mutating webhooks configured and active
+4. **Custom Resource Reconciled** — `NIMService` reconciled into running inference pod(s)
+5. **Result: PASS**
+
+---
+
+## NIM Operator Health
+
+**NIM operator deployment**
+```
+$ kubectl get deploy -n nvidia-nim
+NAME               READY   UP-TO-DATE   AVAILABLE   AGE
+k8s-nim-operator   1/1     1            1           65m
+```
+
+**NIM operator pods**
+```
+$ kubectl get pods -n nvidia-nim
+NAME                                READY   STATUS    RESTARTS   AGE
+k8s-nim-operator-64fb4b7cc6-5ktwg   1/1     Running   0          65m
+```
+
+## Custom Resource Definitions
+
+**NIM CRDs**
+```
+nemocustomizers.apps.nvidia.com                        2026-04-01T22:13:10Z
+nemodatastores.apps.nvidia.com                         2026-04-01T22:13:11Z
+nemoentitystores.apps.nvidia.com                       2026-04-01T22:13:12Z
+nemoevaluators.apps.nvidia.com                         2026-04-01T22:13:13Z
+nemoguardrails.apps.nvidia.com                         2026-04-01T22:13:13Z
+nimbuilds.apps.nvidia.com                              2026-04-01T22:13:14Z
+nimcaches.apps.nvidia.com                              2026-04-01T22:13:14Z
+nimpipelines.apps.nvidia.com                           2026-04-01T22:13:15Z
+nimservices.apps.nvidia.com                            2026-04-01T22:13:16Z
+```
+
+## Webhooks
+
+**NIM Operator webhooks**
+```
+validatingwebhookconfiguration.admissionregistration.k8s.io/k8s-nim-operator-validating-webhook-configuration   2          65m
+```
+
+## Custom Resource Reconciliation
+
+A `NIMService` defines an inference microservice. The operator reconciles it into
+a Deployment with GPU resources, a Service, and health monitoring.
+
+**NIMServices**
+```
+$ kubectl get nimservices -A
+NAMESPACE      NAME           STATUS   AGE
+nim-workload   llama-3-2-1b   Ready    61m
+```
+
+**NIMService details**
+```
+$ kubectl get nimservice llama-3-2-1b -n nim-workload -o yaml
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMService
+metadata:
+  annotations:
+    kubectl.kubernetes.io/last-applied-configuration: |
+      {"apiVersion":"apps.nvidia.com/v1alpha1","kind":"NIMService","metadata":{"annotations":{},"name":"llama-3-2-1b","namespace":"nim-workload"},"spec":{"authSecret":"ngc-api-secret","expose":{"service":{"port":8000,"type":"ClusterIP"}},"image":{"pullPolicy":"IfNotPresent","pullSecrets":["ngc-pull-secret"],"repository":"nvcr.io/nim/meta/llama-3.2-1b-instruct","tag":"1.8.3"},"replicas":1,"resources":{"limits":{"nvidia.com/gpu":"1"},"requests":{"nvidia.com/gpu":"1"}},"storage":{"pvc":{"name":"nim-model-store"}},"tolerations":[{"effect":"NoSchedule","key":"dedicated","operator":"Equal","value":"worker-workload"},{"effect":"NoExecute","key":"dedicated","operator":"Equal","value":"worker-workload"}]}}
+  creationTimestamp: "2026-04-01T22:17:39Z"
+  finalizers:
+  - finalizer.nimservice.apps.nvidia.com
+  generation: 2
+  name: llama-3-2-1b
+  namespace: nim-workload
+  resourceVersion: "101880642"
+  uid: 27ab2169-5913-4c98-a39d-635ce99af343
+spec:
+  authSecret: ngc-api-secret
+  expose:
+    ingress:
+      spec: {}
+    router: {}
+    service:
+      port: 8000
+      type: ClusterIP
+  image:
+    pullPolicy: IfNotPresent
+    pullSecrets:
+    - ngc-pull-secret
+    repository: nvcr.io/nim/meta/llama-3.2-1b-instruct
+    tag: 1.8.3
+  inferencePlatform: standalone
+  livenessProbe: {}
+  metrics:
+    serviceMonitor: {}
+  readinessProbe: {}
+  replicas: 1
+  resources:
+    limits:
+      nvidia.com/gpu: "1"
+    requests:
+      nvidia.com/gpu: "1"
+  scale:
+    hpa:
+      maxReplicas: 0
+      minReplicas: 1
+  startupProbe: {}
+  storage:
+    nimCache: {}
+    pvc:
+      name: nim-model-store
+  tolerations:
+  - effect: NoSchedule
+    key: dedicated
+    operator: Equal
+    value: worker-workload
+  - effect: NoExecute
+    key: dedicated
+    operator: Equal
+    value: worker-workload
+status:
+  conditions:
+  - lastTransitionTime: "2026-04-01T22:19:34Z"
+    message: |
+      deployment "llama-3-2-1b" successfully rolled out
+    reason: Ready
+    status: "True"
+    type: Ready
+  - lastTransitionTime: "2026-04-01T22:17:39Z"
+    message: ""
+    reason: Ready
+    status: "False"
+    type: Failed
+  model:
+    clusterEndpoint: 172.20.99.16:8000
+    externalEndpoint: ""
+    name: meta/llama-3.2-1b-instruct
+  state: Ready
+```
+
+### Workload Pods Created by Operator
+
+**NIM workload pods**
+```
+$ kubectl get pods -n nim-workload -l app.kubernetes.io/managed-by=k8s-nim-operator -o wide
+NAME                            READY   STATUS    RESTARTS   AGE   IP            NODE                           NOMINATED NODE   READINESS GATES
+llama-3-2-1b-7577f87fc7-dhb97   1/1     Running   0          61m   10.0.158.63   ip-10-0-180-136.ec2.internal   <none>           <none>
+```
+
+## Webhook Rejection Test
+
+Submit an invalid NIMService to verify the admission controller actively
+rejects malformed resources.
+
+**Invalid CR rejection**
+```
+The NIMService "webhook-test-invalid" is invalid: 
+* spec.authSecret: Required value
+* spec.image: Required value
+* <nil>: Invalid value: null: some validation rules were not checked because the object was invalid; correct the existing errors to complete validation
+```
+
+Webhook correctly rejected the invalid resource.
+
+**Result: PASS** — NIM operator running, webhooks operational (rejection verified), 9 CRDs registered, NIMService reconciled with 1 healthy inference pod(s).
diff --git a/docs/conformance/cncf/evidence/secure-accelerator-access.md b/docs/conformance/cncf/v1.35/nim-eks/evidence/secure-accelerator-access.md
similarity index 66%
rename from docs/conformance/cncf/evidence/secure-accelerator-access.md
rename to docs/conformance/cncf/v1.35/nim-eks/evidence/secure-accelerator-access.md
index 093ceffdb..235d0e38b 100644
--- a/docs/conformance/cncf/evidence/secure-accelerator-access.md
+++ b/docs/conformance/cncf/v1.35/nim-eks/evidence/secure-accelerator-access.md
@@ -1,9 +1,9 @@
 # Secure Accelerator Access
 
+**Cluster:** `EKS / p5.48xlarge / NVIDIA-H100-80GB-HBM3`
+**Generated:** 2026-04-01 23:14:45 UTC
 **Kubernetes Version:** v1.35
 **Platform:** linux/amd64
-**Validated on:** Kubernetes v1.35 clusters with NVIDIA H100 80GB HBM3
-**Generated:** 2026-03-10 03:40:33 UTC
 
 ---
 
@@ -19,7 +19,7 @@ access control, and auditability of accelerator usage.
 ```
 $ kubectl get clusterpolicy -o wide
 NAME             STATUS   AGE
-cluster-policy   ready    2026-03-10T03:25:45Z
+cluster-policy   ready    2026-04-01T22:12:51Z
 ```
 
 ### GPU Operator Pods
@@ -28,30 +28,30 @@ cluster-policy   ready    2026-03-10T03:25:45Z
 ```
 $ kubectl get pods -n gpu-operator -o wide
 NAME                                             READY   STATUS      RESTARTS   AGE   IP             NODE                           NOMINATED NODE   READINESS GATES
-gpu-feature-discovery-6rcxf                      1/1     Running     0          14m   10.0.224.30    gpu-node-2     <none>           <none>
-gpu-feature-discovery-8jhh7                      1/1     Running     0          14m   10.0.224.179   gpu-node-1   <none>           <none>
-gpu-operator-6bf99d6478-r55t5                    1/1     Running     0          14m   10.0.6.44      system-node-1     <none>           <none>
-node-feature-discovery-gc-5495c9b5c9-5jhtb       1/1     Running     0          14m   10.0.4.105     system-node-1     <none>           <none>
-node-feature-discovery-master-6f876b9c85-97zcw   1/1     Running     0          14m   10.0.6.62      system-node-1     <none>           <none>
-node-feature-discovery-worker-7z8fm              1/1     Running     0          14m   10.0.230.31    system-node-2   <none>           <none>
-node-feature-discovery-worker-9s5tc              1/1     Running     0          14m   10.0.154.69    gpu-node-1   <none>           <none>
-node-feature-discovery-worker-vb62k              1/1     Running     0          14m   10.0.189.91    gpu-node-2     <none>           <none>
-nvidia-container-toolkit-daemonset-c49gs         1/1     Running     0          14m   10.0.201.217   gpu-node-1   <none>           <none>
-nvidia-container-toolkit-daemonset-lr895         1/1     Running     0          14m   10.0.182.110   gpu-node-2     <none>           <none>
-nvidia-cuda-validator-9866n                      0/1     Completed   0          12m   10.0.247.169   gpu-node-2     <none>           <none>
-nvidia-cuda-validator-f42hd                      0/1     Completed   0          12m   10.0.143.223   gpu-node-1   <none>           <none>
-nvidia-dcgm-4bq8l                                1/1     Running     0          14m   10.0.145.214   gpu-node-1   <none>           <none>
-nvidia-dcgm-exporter-g2fjs                       1/1     Running     0          14m   10.0.247.52    gpu-node-2     <none>           <none>
-nvidia-dcgm-exporter-wqqqn                       1/1     Running     0          14m   10.0.172.246   gpu-node-1   <none>           <none>
-nvidia-dcgm-xjsqq                                1/1     Running     0          14m   10.0.159.246   gpu-node-2     <none>           <none>
-nvidia-device-plugin-daemonset-5884b             1/1     Running     0          14m   10.0.255.120   gpu-node-1   <none>           <none>
-nvidia-device-plugin-daemonset-kx2zg             1/1     Running     0          14m   10.0.185.249   gpu-node-2     <none>           <none>
-nvidia-driver-daemonset-qc7cg                    3/3     Running     0          14m   10.0.198.38    gpu-node-1   <none>           <none>
-nvidia-driver-daemonset-vvlsc                    3/3     Running     0          14m   10.0.166.43    gpu-node-2     <none>           <none>
-nvidia-mig-manager-4gn76                         1/1     Running     0          14m   10.0.135.89    gpu-node-1   <none>           <none>
-nvidia-mig-manager-8s9wj                         1/1     Running     0          14m   10.0.253.166   gpu-node-2     <none>           <none>
-nvidia-operator-validator-twprm                  1/1     Running     0          14m   10.0.231.53    gpu-node-1   <none>           <none>
-nvidia-operator-validator-vwnsb                  1/1     Running     0          14m   10.0.194.119   gpu-node-2     <none>           <none>
+gpu-feature-discovery-bvjjh                      1/1     Running     0          61m   10.0.218.175   ip-10-0-251-220.ec2.internal   <none>           <none>
+gpu-feature-discovery-q4k8g                      1/1     Running     0          61m   10.0.133.127   ip-10-0-180-136.ec2.internal   <none>           <none>
+gpu-operator-6bf99d6478-lpll4                    1/1     Running     0          61m   10.0.4.84      ip-10-0-7-209.ec2.internal     <none>           <none>
+node-feature-discovery-gc-5495c9b5c9-5lv2g       1/1     Running     0          61m   10.0.6.61      ip-10-0-7-209.ec2.internal     <none>           <none>
+node-feature-discovery-master-6f876b9c85-b7wlm   1/1     Running     0          61m   10.0.6.161     ip-10-0-7-209.ec2.internal     <none>           <none>
+node-feature-discovery-worker-lrn2p              1/1     Running     0          61m   10.0.212.66    ip-10-0-251-220.ec2.internal   <none>           <none>
+node-feature-discovery-worker-srp76              1/1     Running     0          61m   10.0.231.205   ip-10-0-180-136.ec2.internal   <none>           <none>
+node-feature-discovery-worker-svrbw              1/1     Running     0          61m   10.0.201.87    ip-10-0-184-187.ec2.internal   <none>           <none>
+nvidia-container-toolkit-daemonset-2kj4m         1/1     Running     0          61m   10.0.236.177   ip-10-0-180-136.ec2.internal   <none>           <none>
+nvidia-container-toolkit-daemonset-98f25         1/1     Running     0          61m   10.0.157.16    ip-10-0-251-220.ec2.internal   <none>           <none>
+nvidia-cuda-validator-cpnk4                      0/1     Completed   0          59m   10.0.146.2     ip-10-0-180-136.ec2.internal   <none>           <none>
+nvidia-cuda-validator-l665p                      0/1     Completed   0          59m   10.0.247.132   ip-10-0-251-220.ec2.internal   <none>           <none>
+nvidia-dcgm-bwb6w                                1/1     Running     0          61m   10.0.129.30    ip-10-0-251-220.ec2.internal   <none>           <none>
+nvidia-dcgm-exporter-2xrln                       1/1     Running     0          61m   10.0.187.45    ip-10-0-180-136.ec2.internal   <none>           <none>
+nvidia-dcgm-exporter-sscnw                       1/1     Running     0          61m   10.0.147.205   ip-10-0-251-220.ec2.internal   <none>           <none>
+nvidia-dcgm-gdm9j                                1/1     Running     0          61m   10.0.130.151   ip-10-0-180-136.ec2.internal   <none>           <none>
+nvidia-device-plugin-daemonset-5dmkr             1/1     Running     0          61m   10.0.170.117   ip-10-0-180-136.ec2.internal   <none>           <none>
+nvidia-device-plugin-daemonset-tg9x2             1/1     Running     0          61m   10.0.169.151   ip-10-0-251-220.ec2.internal   <none>           <none>
+nvidia-driver-daemonset-9xv78                    3/3     Running     0          61m   10.0.163.144   ip-10-0-251-220.ec2.internal   <none>           <none>
+nvidia-driver-daemonset-fbvmz                    3/3     Running     0          61m   10.0.147.204   ip-10-0-180-136.ec2.internal   <none>           <none>
+nvidia-mig-manager-6565z                         1/1     Running     0          58m   10.0.243.110   ip-10-0-180-136.ec2.internal   <none>           <none>
+nvidia-mig-manager-jm8tl                         1/1     Running     0          58m   10.0.191.228   ip-10-0-251-220.ec2.internal   <none>           <none>
+nvidia-operator-validator-bpg4w                  1/1     Running     0          61m   10.0.160.53    ip-10-0-251-220.ec2.internal   <none>           <none>
+nvidia-operator-validator-mws7n                  1/1     Running     0          61m   10.0.247.220   ip-10-0-180-136.ec2.internal   <none>           <none>
 ```
 
 ### GPU Operator DaemonSets
@@ -60,16 +60,16 @@ nvidia-operator-validator-vwnsb                  1/1     Running     0
 ```
 $ kubectl get ds -n gpu-operator
 NAME                                      DESIRED   CURRENT   READY   UP-TO-DATE   AVAILABLE   NODE SELECTOR                                                          AGE
-gpu-feature-discovery                     2         2         2       2            2           nvidia.com/gpu.deploy.gpu-feature-discovery=true                       14m
-node-feature-discovery-worker             3         3         3       3            3           <none>                                                                 14m
-nvidia-container-toolkit-daemonset        2         2         2       2            2           nvidia.com/gpu.deploy.container-toolkit=true                           14m
-nvidia-dcgm                               2         2         2       2            2           nvidia.com/gpu.deploy.dcgm=true                                        14m
-nvidia-dcgm-exporter                      2         2         2       2            2           nvidia.com/gpu.deploy.dcgm-exporter=true                               14m
-nvidia-device-plugin-daemonset            2         2         2       2            2           nvidia.com/gpu.deploy.device-plugin=true                               14m
-nvidia-device-plugin-mps-control-daemon   0         0         0       0            0           nvidia.com/gpu.deploy.device-plugin=true,nvidia.com/mps.capable=true   14m
-nvidia-driver-daemonset                   2         2         2       2            2           nvidia.com/gpu.deploy.driver=true                                      14m
-nvidia-mig-manager                        2         2         2       2            2           nvidia.com/gpu.deploy.mig-manager=true                                 14m
-nvidia-operator-validator                 2         2         2       2            2           nvidia.com/gpu.deploy.operator-validator=true                          14m
+gpu-feature-discovery                     2         2         2       2            2           nvidia.com/gpu.deploy.gpu-feature-discovery=true                       61m
+node-feature-discovery-worker             3         3         3       3            3           <none>                                                                 61m
+nvidia-container-toolkit-daemonset        2         2         2       2            2           nvidia.com/gpu.deploy.container-toolkit=true                           61m
+nvidia-dcgm                               2         2         2       2            2           nvidia.com/gpu.deploy.dcgm=true                                        61m
+nvidia-dcgm-exporter                      2         2         2       2            2           nvidia.com/gpu.deploy.dcgm-exporter=true                               61m
+nvidia-device-plugin-daemonset            2         2         2       2            2           nvidia.com/gpu.deploy.device-plugin=true                               61m
+nvidia-device-plugin-mps-control-daemon   0         0         0       0            0           nvidia.com/gpu.deploy.device-plugin=true,nvidia.com/mps.capable=true   61m
+nvidia-driver-daemonset                   2         2         2       2            2           nvidia.com/gpu.deploy.driver=true                                      61m
+nvidia-mig-manager                        2         2         2       2            2           nvidia.com/gpu.deploy.mig-manager=true                                 61m
+nvidia-operator-validator                 2         2         2       2            2           nvidia.com/gpu.deploy.operator-validator=true                          61m
 ```
 
 ## DRA-Mediated GPU Access
@@ -84,10 +84,10 @@ GPU devices via ResourceSlices, and pods request access through ResourceClaims.
 ```
 $ kubectl get resourceslices -o wide
 NAME                                                           NODE                           DRIVER                      POOL                           AGE
-gpu-node-1-compute-domain.nvidia.com-q9xqc   gpu-node-1   compute-domain.nvidia.com   gpu-node-1   11m
-gpu-node-1-gpu.nvidia.com-7cbz2              gpu-node-1   gpu.nvidia.com              gpu-node-1   11m
-gpu-node-2-compute-domain.nvidia.com-2n2cq     gpu-node-2     compute-domain.nvidia.com   gpu-node-2     11m
-gpu-node-2-gpu.nvidia.com-79gvw                gpu-node-2     gpu.nvidia.com              gpu-node-2     11m
+ip-10-0-180-136.ec2.internal-compute-domain.nvidia.com-kfxd7   ip-10-0-180-136.ec2.internal   compute-domain.nvidia.com   ip-10-0-180-136.ec2.internal   60m
+ip-10-0-180-136.ec2.internal-gpu.nvidia.com-8w29z              ip-10-0-180-136.ec2.internal   gpu.nvidia.com              ip-10-0-180-136.ec2.internal   59m
+ip-10-0-251-220.ec2.internal-compute-domain.nvidia.com-btqsj   ip-10-0-251-220.ec2.internal   compute-domain.nvidia.com   ip-10-0-251-220.ec2.internal   60m
+ip-10-0-251-220.ec2.internal-gpu.nvidia.com-qwdqr              ip-10-0-251-220.ec2.internal   gpu.nvidia.com              ip-10-0-251-220.ec2.internal   59m
 ```
 
 ### GPU Device Details
@@ -100,18 +100,18 @@ items:
 - apiVersion: resource.k8s.io/v1
   kind: ResourceSlice
   metadata:
-    creationTimestamp: "2026-03-10T03:29:20Z"
-    generateName: gpu-node-1-compute-domain.nvidia.com-
-    generation: 2
-    name: gpu-node-1-compute-domain.nvidia.com-q9xqc
+    creationTimestamp: "2026-04-01T22:14:50Z"
+    generateName: ip-10-0-180-136.ec2.internal-compute-domain.nvidia.com-
+    generation: 1
+    name: ip-10-0-180-136.ec2.internal-compute-domain.nvidia.com-kfxd7
     ownerReferences:
     - apiVersion: v1
       controller: true
       kind: Node
-      name: gpu-node-1
-      uid: fef55be3-f566-47c8-8bb8-52c117cb3855
-    resourceVersion: "1169500"
-    uid: 8087c1b4-71e0-42c3-9f74-12629e2ee5b5
+      name: ip-10-0-180-136.ec2.internal
+      uid: c01459a2-a385-4843-bc1f-582d283ea94e
+    resourceVersion: "101864746"
+    uid: 84642059-2fb9-484f-bb98-7e5ae1802eba
   spec:
     devices:
     - attributes:
@@ -127,26 +127,26 @@ items:
           string: channel
       name: channel-0
     driver: compute-domain.nvidia.com
-    nodeName: gpu-node-1
+    nodeName: ip-10-0-180-136.ec2.internal
     pool:
       generation: 1
-      name: gpu-node-1
+      name: ip-10-0-180-136.ec2.internal
       resourceSliceCount: 1
 - apiVersion: resource.k8s.io/v1
   kind: ResourceSlice
   metadata:
-    creationTimestamp: "2026-03-10T03:29:22Z"
-    generateName: gpu-node-1-gpu.nvidia.com-
+    creationTimestamp: "2026-04-01T22:14:52Z"
+    generateName: ip-10-0-180-136.ec2.internal-gpu.nvidia.com-
     generation: 2
-    name: gpu-node-1-gpu.nvidia.com-7cbz2
+    name: ip-10-0-180-136.ec2.internal-gpu.nvidia.com-8w29z
     ownerReferences:
     - apiVersion: v1
       controller: true
       kind: Node
-      name: gpu-node-1
-      uid: fef55be3-f566-47c8-8bb8-52c117cb3855
-    resourceVersion: "1169562"
-    uid: 3441669c-08c4-43ff-9b83-42c5f3dddcff
+      name: ip-10-0-180-136.ec2.internal
+      uid: c01459a2-a385-4843-bc1f-582d283ea94e
+    resourceVersion: "101865710"
+    uid: 89a1966f-5c3f-4664-a5b7-b348a122db07
   spec:
     devices:
     - attributes:
@@ -165,17 +165,17 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: 0000:64:00.0
+          string: "0000:53:00.0"
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:55
+          string: pci0000:44
         type:
           string: gpu
         uuid:
-          string: GPU-bc5610b9-79c8-fedd-8899-07539c7f868a
+          string: GPU-15704b32-f531-14ce-0530-1ac21e4b68e6
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-1
+      name: gpu-0
     - attributes:
         addressingMode:
           string: HMM
@@ -192,17 +192,17 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: 0000:75:00.0
+          string: 0000:64:00.0
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:66
+          string: pci0000:55
         type:
           string: gpu
         uuid:
-          string: GPU-fbc2c554-4d37-8938-0032-f923bad0f716
+          string: GPU-edc718f8-e593-6468-b9f9-563d508366ed
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-2
+      name: gpu-1
     - attributes:
         addressingMode:
           string: HMM
@@ -219,17 +219,17 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: 0000:86:00.0
+          string: 0000:75:00.0
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:77
+          string: pci0000:66
         type:
           string: gpu
         uuid:
-          string: GPU-a65a773d-52bb-bcc1-a8ee-f78c3faa2e2d
+          string: GPU-e2d9b65e-98cb-5b7a-90f0-e0336573f9e2
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-3
+      name: gpu-2
     - attributes:
         addressingMode:
           string: HMM
@@ -246,17 +246,17 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: 0000:97:00.0
+          string: 0000:86:00.0
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:88
+          string: pci0000:77
         type:
           string: gpu
         uuid:
-          string: GPU-82e45d1b-1618-559f-144c-eab51545030b
+          string: GPU-3a325419-de5f-778f-cf4e-fe7290362ac5
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-4
+      name: gpu-3
     - attributes:
         addressingMode:
           string: HMM
@@ -273,17 +273,17 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: 0000:a8:00.0
+          string: 0000:97:00.0
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:99
+          string: pci0000:88
         type:
           string: gpu
         uuid:
-          string: GPU-39e28159-8c62-ee71-64db-b748edd61e15
+          string: GPU-275ad37d-ebd6-4cf6-3867-0499ba033a12
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-5
+      name: gpu-4
     - attributes:
         addressingMode:
           string: HMM
@@ -300,17 +300,17 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: 0000:b9:00.0
+          string: 0000:a8:00.0
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:aa
+          string: pci0000:99
         type:
           string: gpu
         uuid:
-          string: GPU-e64d69ca-b4b3-59b2-e78c-94f26c4db365
+          string: GPU-3cab564d-1f63-674b-a831-024600bf985c
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-6
+      name: gpu-5
     - attributes:
         addressingMode:
           string: HMM
@@ -327,17 +327,17 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: 0000:ca:00.0
+          string: 0000:b9:00.0
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:bb
+          string: pci0000:aa
         type:
           string: gpu
         uuid:
-          string: GPU-04d228d3-3b5a-3534-f5cf-969706647d56
+          string: GPU-d0f25a6f-9a3f-61b9-c128-3d14759651d7
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-7
+      name: gpu-6
     - attributes:
         addressingMode:
           string: HMM
@@ -354,38 +354,38 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: "0000:53:00.0"
+          string: 0000:ca:00.0
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:44
+          string: pci0000:bb
         type:
           string: gpu
         uuid:
-          string: GPU-c4529c8d-69c4-b61d-e0bc-7b2460096005
+          string: GPU-9bc10e9a-e27e-652b-9a1e-e84f7e446206
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-0
+      name: gpu-7
     driver: gpu.nvidia.com
-    nodeName: gpu-node-1
+    nodeName: ip-10-0-180-136.ec2.internal
     pool:
       generation: 1
-      name: gpu-node-1
+      name: ip-10-0-180-136.ec2.internal
       resourceSliceCount: 1
 - apiVersion: resource.k8s.io/v1
   kind: ResourceSlice
   metadata:
-    creationTimestamp: "2026-03-10T03:29:19Z"
-    generateName: gpu-node-2-compute-domain.nvidia.com-
+    creationTimestamp: "2026-04-01T22:14:51Z"
+    generateName: ip-10-0-251-220.ec2.internal-compute-domain.nvidia.com-
     generation: 1
-    name: gpu-node-2-compute-domain.nvidia.com-2n2cq
+    name: ip-10-0-251-220.ec2.internal-compute-domain.nvidia.com-btqsj
     ownerReferences:
     - apiVersion: v1
       controller: true
       kind: Node
-      name: gpu-node-2
-      uid: b171b90a-eb8f-4662-bd0d-2055b634dc98
-    resourceVersion: "1168846"
-    uid: 3eca27ae-5231-4845-8407-1e24fd9b5683
+      name: ip-10-0-251-220.ec2.internal
+      uid: d55d06fd-ee55-4525-b7da-393b71669e8f
+    resourceVersion: "101864753"
+    uid: af18d2bf-b15f-43cb-8d2b-a49098f4f5bd
   spec:
     devices:
     - attributes:
@@ -401,26 +401,26 @@ items:
           string: daemon
       name: daemon-0
     driver: compute-domain.nvidia.com
-    nodeName: gpu-node-2
+    nodeName: ip-10-0-251-220.ec2.internal
     pool:
       generation: 1
-      name: gpu-node-2
+      name: ip-10-0-251-220.ec2.internal
       resourceSliceCount: 1
 - apiVersion: resource.k8s.io/v1
   kind: ResourceSlice
   metadata:
-    creationTimestamp: "2026-03-10T03:29:21Z"
-    generateName: gpu-node-2-gpu.nvidia.com-
+    creationTimestamp: "2026-04-01T22:14:52Z"
+    generateName: ip-10-0-251-220.ec2.internal-gpu.nvidia.com-
     generation: 2
-    name: gpu-node-2-gpu.nvidia.com-79gvw
+    name: ip-10-0-251-220.ec2.internal-gpu.nvidia.com-qwdqr
     ownerReferences:
     - apiVersion: v1
       controller: true
       kind: Node
-      name: gpu-node-2
-      uid: b171b90a-eb8f-4662-bd0d-2055b634dc98
-    resourceVersion: "1169576"
-    uid: 0b3dc1d8-a1ba-4fae-894b-cb90e62ed783
+      name: ip-10-0-251-220.ec2.internal
+      uid: d55d06fd-ee55-4525-b7da-393b71669e8f
+    resourceVersion: "101865689"
+    uid: 48e7fc88-8ff6-4c50-9e74-8755d19ede37
   spec:
     devices:
     - attributes:
@@ -439,17 +439,17 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: 0000:75:00.0
+          string: 0000:ca:00.0
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:66
+          string: pci0000:bb
         type:
           string: gpu
         uuid:
-          string: GPU-dbabb552-a092-0ca9-0580-8d4fe378eb02
+          string: GPU-530bd4b0-238b-f0c2-b496-63595812bca8
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-2
+      name: gpu-7
     - attributes:
         addressingMode:
           string: HMM
@@ -466,17 +466,17 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: 0000:86:00.0
+          string: "0000:53:00.0"
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:77
+          string: pci0000:44
         type:
           string: gpu
         uuid:
-          string: GPU-5342927e-e180-84f1-55ba-257f1cbd3ba4
+          string: GPU-3f048793-8751-030e-5870-ebbd2b10cef2
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-3
+      name: gpu-0
     - attributes:
         addressingMode:
           string: HMM
@@ -493,17 +493,17 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: 0000:97:00.0
+          string: 0000:64:00.0
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:88
+          string: pci0000:55
         type:
           string: gpu
         uuid:
-          string: GPU-95085215-739e-e7c6-4011-8dbe004af8c3
+          string: GPU-cc644abe-17e4-7cb7-500d-ed8c09aea2fb
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-4
+      name: gpu-1
     - attributes:
         addressingMode:
           string: HMM
@@ -520,17 +520,17 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: 0000:a8:00.0
+          string: 0000:75:00.0
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:99
+          string: pci0000:66
         type:
           string: gpu
         uuid:
-          string: GPU-a7b658ad-f23e-cea9-2523-569d521700bf
+          string: GPU-8d0b1081-9549-2b14-7e01-b4a725873c21
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-5
+      name: gpu-2
     - attributes:
         addressingMode:
           string: HMM
@@ -547,17 +547,17 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: 0000:b9:00.0
+          string: 0000:86:00.0
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:aa
+          string: pci0000:77
         type:
           string: gpu
         uuid:
-          string: GPU-1e9a0e94-769a-b1e6-36f7-9296e286ef90
+          string: GPU-38bbfee9-dc95-ffb5-4034-f9a6c82a45bb
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-6
+      name: gpu-3
     - attributes:
         addressingMode:
           string: HMM
@@ -574,17 +574,17 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: 0000:ca:00.0
+          string: 0000:97:00.0
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:bb
+          string: pci0000:88
         type:
           string: gpu
         uuid:
-          string: GPU-16b2cd36-9dbe-3ee7-0810-07b330e36e04
+          string: GPU-24087b69-8889-6b23-feeb-2905664fbcbf
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-7
+      name: gpu-4
     - attributes:
         addressingMode:
           string: HMM
@@ -601,17 +601,17 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: "0000:53:00.0"
+          string: 0000:a8:00.0
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:44
+          string: pci0000:99
         type:
           string: gpu
         uuid:
-          string: GPU-92da0328-2f33-b563-d577-9d2b9f21f280
+          string: GPU-d2f75162-e86d-0da0-0af4-3fa0b80038cd
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-0
+      name: gpu-5
     - attributes:
         addressingMode:
           string: HMM
@@ -628,22 +628,22 @@ items:
         productName:
           string: NVIDIA H100 80GB HBM3
         resource.kubernetes.io/pciBusID:
-          string: 0000:64:00.0
+          string: 0000:b9:00.0
         resource.kubernetes.io/pcieRoot:
-          string: pci0000:55
+          string: pci0000:aa
         type:
           string: gpu
         uuid:
-          string: GPU-184dab49-47ce-eeec-2239-3e03fbd4c002
+          string: GPU-b00fe5f9-5832-19d6-0276-28d8630f0f4b
       capacity:
         memory:
           value: 81559Mi
-      name: gpu-1
+      name: gpu-6
     driver: gpu.nvidia.com
-    nodeName: gpu-node-2
+    nodeName: ip-10-0-251-220.ec2.internal
     pool:
       generation: 1
-      name: gpu-node-2
+      name: ip-10-0-251-220.ec2.internal
       resourceSliceCount: 1
 kind: List
 metadata:
@@ -668,14 +668,14 @@ $ kubectl get pod isolation-test -n secure-access-test -o jsonpath={.spec.resour
 **Pod volumes (no hostPath)**
 ```
 $ kubectl get pod isolation-test -n secure-access-test -o jsonpath={.spec.volumes}
-[{"name":"kube-api-access-dl259","projected":{"defaultMode":420,"sources":[{"serviceAccountToken":{"expirationSeconds":3607,"path":"token"}},{"configMap":{"items":[{"key":"ca.crt","path":"ca.crt"}],"name":"kube-root-ca.crt"}},{"downwardAPI":{"items":[{"fieldRef":{"apiVersion":"v1","fieldPath":"metadata.namespace"},"path":"namespace"}]}}]}}]
+[{"name":"kube-api-access-vk49g","projected":{"defaultMode":420,"sources":[{"serviceAccountToken":{"expirationSeconds":3607,"path":"token"}},{"configMap":{"items":[{"key":"ca.crt","path":"ca.crt"}],"name":"kube-root-ca.crt"}},{"downwardAPI":{"items":[{"fieldRef":{"apiVersion":"v1","fieldPath":"metadata.namespace"},"path":"namespace"}]}}]}}]
 ```
 
 **ResourceClaim allocation**
 ```
 $ kubectl get resourceclaim isolated-gpu -n secure-access-test -o wide
 NAME           STATE     AGE
-isolated-gpu   pending   12s
+isolated-gpu   pending   13s
 ```
 
 > **Note:** ResourceClaim may show `pending` after pod completion because the DRA controller deallocates claims when the consuming pod terminates. The pod logs below confirm GPU isolation was enforced during execution.
@@ -686,17 +686,17 @@ isolated-gpu   pending   12s
 ```
 $ kubectl logs isolation-test -n secure-access-test
 === Visible NVIDIA devices ===
-crw-rw-rw- 1 root root 195, 254 Mar 10 03:40 /dev/nvidia-modeset
-crw-rw-rw- 1 root root 507,   0 Mar 10 03:40 /dev/nvidia-uvm
-crw-rw-rw- 1 root root 507,   1 Mar 10 03:40 /dev/nvidia-uvm-tools
-crw-rw-rw- 1 root root 195,   1 Mar 10 03:40 /dev/nvidia1
-crw-rw-rw- 1 root root 195, 255 Mar 10 03:40 /dev/nvidiactl
+crw-rw-rw- 1 root root 195, 254 Apr  1 23:14 /dev/nvidia-modeset
+crw-rw-rw- 1 root root 507,   0 Apr  1 23:14 /dev/nvidia-uvm
+crw-rw-rw- 1 root root 507,   1 Apr  1 23:14 /dev/nvidia-uvm-tools
+crw-rw-rw- 1 root root 195,   7 Apr  1 23:14 /dev/nvidia7
+crw-rw-rw- 1 root root 195, 255 Apr  1 23:14 /dev/nvidiactl
 
 === nvidia-smi output ===
-GPU 0: NVIDIA H100 80GB HBM3 (UUID: GPU-bc5610b9-79c8-fedd-8899-07539c7f868a)
+GPU 0: NVIDIA H100 80GB HBM3 (UUID: GPU-530bd4b0-238b-f0c2-b496-63595812bca8)
 
 === GPU count ===
-0, NVIDIA H100 80GB HBM3, GPU-bc5610b9-79c8-fedd-8899-07539c7f868a
+0, NVIDIA H100 80GB HBM3, GPU-530bd4b0-238b-f0c2-b496-63595812bca8
 
 Secure accelerator access test completed
 ```
diff --git a/pkg/evidence/scripts/collect-evidence.sh b/pkg/evidence/scripts/collect-evidence.sh
index da9d66a35..13116300c 100755
--- a/pkg/evidence/scripts/collect-evidence.sh
+++ b/pkg/evidence/scripts/collect-evidence.sh
@@ -657,11 +657,14 @@ collect_service_metrics() {
     EVIDENCE_FILE="${EVIDENCE_DIR}/ai-service-metrics.md"
     log_info "Collecting AI Service Metrics evidence → ${EVIDENCE_FILE}"
 
-    # Detect workload type: prefer Dynamo if running, otherwise use training path
+    # Detect workload type: Dynamo inference > NIM inference > PyTorch training
     local dynamo_ns="dynamo-workload"
+    local nim_ns="nim-workload"
 
     if kubectl get pods -n "${dynamo_ns}" -l nvidia.com/dynamo-component-type=worker --no-headers 2>/dev/null | grep -q .; then
         collect_service_metrics_dynamo
+    elif kubectl get pods -n "${nim_ns}" -l app.kubernetes.io/managed-by=k8s-nim-operator --no-headers 2>/dev/null | grep -q .; then
+        collect_service_metrics_nim
     else
         # Training path: deploys a standalone PyTorch pod with Prometheus metrics.
         # Only requires GPU nodes + Prometheus — no Kubeflow Trainer dependency.
@@ -900,6 +903,222 @@ EOF
     log_info "AI service metrics (Dynamo) evidence collection complete."
 }
 
+# --- NIM inference metrics collection ---
+# Collects metrics from a running NIMService deployment. NIM exposes OpenAI-compatible
+# inference metrics at /v1/metrics in Prometheus exposition format.
+collect_service_metrics_nim() {
+    write_section_header "AI Service Metrics (NIM Inference)"
+
+    cat >> "${EVIDENCE_FILE}" <<'EOF'
+Demonstrates that NVIDIA NIM inference microservices expose Prometheus-format
+metrics that can be discovered and collected by the monitoring stack.
+
+## NIM Inference Workload
+EOF
+
+    local NS="nim-workload"
+
+    # Find the NIM service pod
+    local nim_pod=""
+    nim_pod=$(kubectl get pods -n "${NS}" -l app.kubernetes.io/managed-by=k8s-nim-operator \
+        --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
+
+    if [ -z "${nim_pod}" ]; then
+        log_warn "No running NIM pod found in ${NS}"
+        echo "**Result: SKIP** — No running NIM pod found in ${NS}." >> "${EVIDENCE_FILE}"
+        return
+    fi
+
+    # Get the NIMService name from pod labels
+    local nim_service=""
+    nim_service=$(kubectl get pod "${nim_pod}" -n "${NS}" -o jsonpath='{.metadata.labels.app\.kubernetes\.io/name}' 2>/dev/null)
+
+    capture "NIMService" kubectl get nimservice -n "${NS}"
+    capture "NIM workload pods" kubectl get pods -n "${NS}" -o wide
+
+    # Wait for NIM to be serving
+    log_info "Checking NIM readiness..."
+    local serving_ready=false
+    for i in $(seq 1 12); do
+        if kubectl exec -n "${NS}" "${nim_pod}" -- python3 -c "
+import urllib.request
+urllib.request.urlopen('http://localhost:8000/v1/health/ready')" &>/dev/null; then
+            serving_ready=true
+            break
+        fi
+        log_info "NIM not serving yet (attempt ${i}/12), retrying in 15s..."
+        sleep 15
+    done
+
+    if [ "${serving_ready}" != "true" ]; then
+        log_warn "NIM service not serving after 3 minutes"
+        echo "**Result: FAIL** — NIM service did not become ready." >> "${EVIDENCE_FILE}"
+        return
+    fi
+
+    # Show available models
+    echo "" >> "${EVIDENCE_FILE}"
+    echo "**NIM models endpoint**" >> "${EVIDENCE_FILE}"
+    echo '```' >> "${EVIDENCE_FILE}"
+    kubectl exec -n "${NS}" "${nim_pod}" -- python3 -c "
+import urllib.request, json
+data = json.loads(urllib.request.urlopen('http://localhost:8000/v1/models').read())
+for m in data['data']:
+    print(f\"Model: {m['id']}\")" >> "${EVIDENCE_FILE}" 2>&1
+    echo '```' >> "${EVIDENCE_FILE}"
+
+    # Get model name for requests
+    local model_name=""
+    model_name=$(kubectl exec -n "${NS}" "${nim_pod}" -- python3 -c "
+import urllib.request, json
+data = json.loads(urllib.request.urlopen('http://localhost:8000/v1/models').read())
+print(data['data'][0]['id'])" 2>/dev/null)
+
+    # Send inference requests to generate non-zero metrics
+    log_info "Sending 10 inference requests via NIM..."
+    for i in $(seq 1 10); do
+        kubectl exec -n "${NS}" "${nim_pod}" -- python3 -c "
+import urllib.request, json
+req = urllib.request.Request('http://localhost:8000/v1/chat/completions',
+    data=json.dumps({'model': '${model_name}', 'messages': [{'role': 'user', 'content': 'Explain GPU computing in one sentence.'}], 'max_tokens': 30}).encode(),
+    headers={'Content-Type': 'application/json'})
+urllib.request.urlopen(req)" &>/dev/null || true
+    done
+
+    # Collect NIM metrics from /v1/metrics
+    echo "" >> "${EVIDENCE_FILE}"
+    echo "**NIM inference metrics endpoint (sampled after generating inference traffic)**" >> "${EVIDENCE_FILE}"
+    echo '```' >> "${EVIDENCE_FILE}"
+    kubectl exec -n "${NS}" "${nim_pod}" -- python3 -c "
+import urllib.request
+data = urllib.request.urlopen('http://localhost:8000/v1/metrics').read().decode()
+for l in data.split('\n'):
+    if not l or l.startswith('#') or '_bucket' in l or '_created' in l:
+        continue
+    parts = l.rsplit(' ', 1)
+    if len(parts) == 2 and parts[1] not in ('0', '0.0'):
+        # Show key inference metrics
+        if any(k in l for k in ['prompt_tokens', 'generation_tokens', 'time_to_first_token',
+                'time_per_output_token', 'request_success', 'num_request',
+                'e2e_request_latency', 'request_prompt_tokens', 'request_generation_tokens']):
+            print(l)" 2>&1 | head -20 >> "${EVIDENCE_FILE}"
+    echo '```' >> "${EVIDENCE_FILE}"
+
+    # Create a ServiceMonitor so Prometheus can discover and scrape NIM metrics.
+    # NIM exposes metrics at /v1/metrics (not /metrics), so we need a custom path.
+    log_info "Creating ServiceMonitor for NIM metrics discovery..."
+    kubectl apply -f - <<'SM_EOF'
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: nim-inference
+  namespace: monitoring
+  labels:
+    release: kube-prometheus
+spec:
+  namespaceSelector:
+    matchNames:
+      - nim-workload
+  selector:
+    matchLabels:
+      app.kubernetes.io/managed-by: k8s-nim-operator
+  endpoints:
+    - port: api
+      path: /v1/metrics
+      interval: 15s
+SM_EOF
+
+    cat >> "${EVIDENCE_FILE}" <<'EOF'
+
+## Prometheus Metrics Discovery
+
+A ServiceMonitor is created to enable Prometheus auto-discovery of NIM inference
+metrics. NIM exposes metrics at `/v1/metrics` in Prometheus exposition format.
+EOF
+
+    capture "NIM ServiceMonitor" kubectl get servicemonitor nim-inference -n monitoring -o yaml
+
+    log_info "Waiting for Prometheus to discover and scrape NIM targets (up to 3m)..."
+    kubectl port-forward svc/kube-prometheus-prometheus -n monitoring 9090:9090 &>/dev/null &
+    local pf_pid=$!
+
+    if wait_for_port 9090 30 "${pf_pid}"; then
+        # Wait for NIM targets with health=up (at least one successful scrape).
+        # Match by namespace since the job name comes from the service name.
+        local target_found=false
+        for i in $(seq 1 18); do
+            if curl -sf 'http://localhost:9090/api/v1/targets?state=active' 2>/dev/null | \
+                python3 -c "import sys,json; data=json.load(sys.stdin); exit(0 if any(t['labels'].get('namespace','')=='${NS}' and t.get('health')=='up' for t in data['data']['activeTargets']) else 1)" 2>/dev/null; then
+                target_found=true
+                break
+            fi
+            log_info "NIM target not yet healthy (attempt ${i}/18), retrying in 10s..."
+            sleep 10
+        done
+
+        if [ "${target_found}" = "true" ]; then
+            echo "" >> "${EVIDENCE_FILE}"
+            echo "**Prometheus scrape targets (active)**" >> "${EVIDENCE_FILE}"
+            echo '```' >> "${EVIDENCE_FILE}"
+            curl -sf 'http://localhost:9090/api/v1/targets?state=active' 2>/dev/null | \
+                python3 -c "
+import sys,json
+data=json.load(sys.stdin)
+for t in data['data']['activeTargets']:
+    ns = t['labels'].get('namespace','')
+    if ns == '${NS}':
+        print(json.dumps({'job':t['labels'].get('job',''),'endpoint':t['scrapeUrl'],'health':t['health'],'lastScrape':t['lastScrape']},indent=2))" >> "${EVIDENCE_FILE}" 2>&1
+            echo '```' >> "${EVIDENCE_FILE}"
+
+            # Query NIM-specific metrics from Prometheus
+            local prom_response
+            prom_response=$(curl -sf --data-urlencode "query={__name__=~\"prompt_tokens_total|generation_tokens_total|time_to_first_token_seconds_sum|time_per_output_token_seconds_sum|e2e_request_latency_seconds_sum\",model_name=~\".*\"}" 'http://localhost:9090/api/v1/query' 2>/dev/null)
+
+            if [ -n "${prom_response}" ] && echo "${prom_response}" | python3 -c "import sys,json; data=json.load(sys.stdin); exit(0 if data['data']['result'] else 1)" 2>/dev/null; then
+                echo "" >> "${EVIDENCE_FILE}"
+                echo "**NIM metrics queried from Prometheus**" >> "${EVIDENCE_FILE}"
+                echo '```' >> "${EVIDENCE_FILE}"
+                echo "${prom_response}" | python3 -c "
+import sys,json
+data=json.load(sys.stdin)
+for r in data['data']['result']:
+    name=r['metric']['__name__']
+    model=r['metric'].get('model_name','')
+    val=r['value'][1]
+    print(f'{name}{{model_name=\"{model}\"}} = {val}')" 2>&1 | head -15 >> "${EVIDENCE_FILE}"
+                echo '```' >> "${EVIDENCE_FILE}"
+            fi
+
+            echo "" >> "${EVIDENCE_FILE}"
+            echo "**Result: PASS** — Prometheus discovers NIM inference workloads via ServiceMonitor and actively scrapes application-level AI inference metrics (token throughput, request latency, time-to-first-token) from the /v1/metrics endpoint." >> "${EVIDENCE_FILE}"
+        else
+            echo "" >> "${EVIDENCE_FILE}"
+            echo "**Result: FAIL** — Prometheus did not discover NIM targets within 2 minutes." >> "${EVIDENCE_FILE}"
+        fi
+    else
+        echo "" >> "${EVIDENCE_FILE}"
+        echo "**Result: FAIL** — Could not connect to Prometheus." >> "${EVIDENCE_FILE}"
+    fi
+    kill "${pf_pid}" 2>/dev/null || true
+
+    # Clean up ServiceMonitor
+    if [ "${NO_CLEANUP}" != "true" ]; then
+        kubectl delete servicemonitor nim-inference -n monitoring --ignore-not-found 2>/dev/null || true
+    fi
+
+    cat >> "${EVIDENCE_FILE}" <<'EOF'
+
+## Cleanup
+
+**Delete workload namespace**
+```
+$ kubectl delete ns nim-workload
+```
+EOF
+
+    log_info "AI service metrics (NIM) evidence collection complete."
+}
+
 # --- PyTorch training workload metrics collection ---
 # Deploys a PyTorch training pod that exposes training metrics (loss, throughput,
 # GPU memory) on :8080/metrics in Prometheus format via a ServiceMonitor.
@@ -1186,8 +1405,11 @@ collect_operator() {
     log_info "Collecting Robust AI Operator evidence → ${EVIDENCE_FILE}"
 
     # Detect which AI operator is present and route to the appropriate collector.
+    # Priority: Dynamo > NIM Operator > Kubeflow Trainer
     if kubectl get deploy -n dynamo-system dynamo-platform-dynamo-operator-controller-manager --no-headers 2>/dev/null | grep -q .; then
         collect_operator_dynamo
+    elif kubectl get deploy -n nvidia-nim -l app.kubernetes.io/name=k8s-nim-operator --no-headers 2>/dev/null | grep -q .; then
+        collect_operator_nim
     elif kubectl get deploy -n kubeflow kubeflow-trainer-controller-manager --no-headers 2>/dev/null | grep -q .; then
         collect_operator_kubeflow
     else
@@ -1310,6 +1532,130 @@ INVALID_CR
     log_info "Robust operator (Kubeflow Trainer) evidence collection complete."
 }
 
+# --- NIM Operator evidence ---
+collect_operator_nim() {
+    write_section_header "Robust AI Operator (NIM Operator)"
+
+    cat >> "${EVIDENCE_FILE}" <<'EOF'
+Demonstrates CNCF AI Conformance requirement that at least one complex AI operator
+with a CRD can be installed and functions reliably, including operator pods running,
+webhooks operational, and custom resources reconciled.
+
+## Summary
+
+1. **NIM Operator** — Controller manager running in `nvidia-nim`
+2. **Custom Resource Definitions** — NIMService, NIMCache, NIMPipeline, NIMBuild CRDs registered
+3. **Admission Controller** — Validating/mutating webhooks configured and active
+4. **Custom Resource Reconciled** — `NIMService` reconciled into running inference pod(s)
+5. **Result: PASS**
+
+---
+
+## NIM Operator Health
+EOF
+    capture "NIM operator deployment" kubectl get deploy -n nvidia-nim
+    capture "NIM operator pods" kubectl get pods -n nvidia-nim
+
+    cat >> "${EVIDENCE_FILE}" <<'EOF'
+
+## Custom Resource Definitions
+EOF
+    echo "" >> "${EVIDENCE_FILE}"
+    echo "**NIM CRDs**" >> "${EVIDENCE_FILE}"
+    echo '```' >> "${EVIDENCE_FILE}"
+    kubectl get crds 2>/dev/null | grep "apps\.nvidia\.com" >> "${EVIDENCE_FILE}" 2>&1
+    echo '```' >> "${EVIDENCE_FILE}"
+
+    cat >> "${EVIDENCE_FILE}" <<'EOF'
+
+## Webhooks
+EOF
+    echo "" >> "${EVIDENCE_FILE}"
+    echo "**NIM Operator webhooks**" >> "${EVIDENCE_FILE}"
+    echo '```' >> "${EVIDENCE_FILE}"
+    # Match webhooks by name or by backing service in the nvidia-nim namespace
+    if [[ "${HAS_JQ}" == "true" ]]; then
+      kubectl get validatingwebhookconfigurations,mutatingwebhookconfigurations -o json 2>/dev/null | \
+        jq -r '.items[] | select(.webhooks[]?.clientConfig.service.namespace == "nvidia-nim") | "\(.kind)/\(.metadata.name)"' 2>/dev/null >> "${EVIDENCE_FILE}" 2>&1 || true
+    else
+      kubectl get validatingwebhookconfigurations,mutatingwebhookconfigurations 2>/dev/null | grep -iE 'nim|apps\.nvidia\.com' >> "${EVIDENCE_FILE}" 2>&1 || true
+    fi
+    echo '```' >> "${EVIDENCE_FILE}"
+
+    cat >> "${EVIDENCE_FILE}" <<'EOF'
+
+## Custom Resource Reconciliation
+
+A `NIMService` defines an inference microservice. The operator reconciles it into
+a Deployment with GPU resources, a Service, and health monitoring.
+EOF
+    capture "NIMServices" kubectl get nimservices -A
+    local nim_ns="nim-workload"
+    local nim_service=""
+    nim_service=$(kubectl get nimservices -n "${nim_ns}" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
+    if [ -n "${nim_service}" ]; then
+        capture "NIMService details" kubectl get nimservice "${nim_service}" -n "${nim_ns}" -o yaml
+    fi
+
+    cat >> "${EVIDENCE_FILE}" <<'EOF'
+
+### Workload Pods Created by Operator
+EOF
+    capture "NIM workload pods" kubectl get pods -n "${nim_ns}" -l app.kubernetes.io/managed-by=k8s-nim-operator -o wide
+
+    cat >> "${EVIDENCE_FILE}" <<'EOF'
+
+## Webhook Rejection Test
+
+Submit an invalid NIMService to verify the admission controller actively
+rejects malformed resources.
+EOF
+    echo "" >> "${EVIDENCE_FILE}"
+    echo "**Invalid CR rejection**" >> "${EVIDENCE_FILE}"
+    echo '```' >> "${EVIDENCE_FILE}"
+    local webhook_result
+    webhook_result=$(kubectl apply -f - 2>&1 <<INVALID_CR || true
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMService
+metadata:
+  name: webhook-test-invalid
+  namespace: default
+spec: {}
+INVALID_CR
+)
+    echo "${webhook_result}" >> "${EVIDENCE_FILE}"
+    echo '```' >> "${EVIDENCE_FILE}"
+
+    echo "" >> "${EVIDENCE_FILE}"
+    if echo "${webhook_result}" | grep -qi "denied\|forbidden\|invalid\|error"; then
+        echo "Webhook correctly rejected the invalid resource." >> "${EVIDENCE_FILE}"
+    else
+        echo "WARNING: Webhook did not reject the invalid resource." >> "${EVIDENCE_FILE}"
+        kubectl delete nimservice webhook-test-invalid -n default --ignore-not-found 2>/dev/null
+    fi
+
+    # Verdict
+    echo "" >> "${EVIDENCE_FILE}"
+    local crd_count
+    crd_count=$(kubectl get crds 2>/dev/null | grep -c "apps\.nvidia\.com" || true)
+    local running_pods
+    running_pods=$(kubectl get pods -n "${nim_ns}" -l app.kubernetes.io/managed-by=k8s-nim-operator --no-headers 2>/dev/null | grep -c "Running" || true)
+    local webhook_ok
+    webhook_ok=$(echo "${webhook_result}" | grep -ci "denied\|forbidden\|invalid\|error" || true)
+
+    if [ "${crd_count}" -gt 0 ] && [ "${running_pods}" -gt 0 ] && [ "${webhook_ok}" -gt 0 ]; then
+        echo "**Result: PASS** — NIM operator running, webhooks operational (rejection verified), ${crd_count} CRDs registered, NIMService reconciled with ${running_pods} healthy inference pod(s)." >> "${EVIDENCE_FILE}"
+    elif [ "${crd_count}" -gt 0 ] && [ "${running_pods}" -gt 0 ]; then
+        echo "**Result: PASS** — NIM operator running, ${crd_count} CRDs registered, NIMService reconciled with ${running_pods} healthy inference pod(s)." >> "${EVIDENCE_FILE}"
+    elif [ "${crd_count}" -gt 0 ]; then
+        echo "**Result: FAIL** — NIMService found but no healthy inference pods." >> "${EVIDENCE_FILE}"
+    else
+        echo "**Result: FAIL** — No NIM CRDs found." >> "${EVIDENCE_FILE}"
+    fi
+
+    log_info "Robust operator (NIM) evidence collection complete."
+}
+
 # --- Dynamo evidence ---
 collect_operator_dynamo() {
     write_section_header "Robust AI Operator (Dynamo Platform)"