From 1ca8a584879198974266b82753accec04abe6745 Mon Sep 17 00:00:00 2001
From: Yuan Chen <yuanchen97@gmail.com>
Date: Tue, 31 Mar 2026 15:49:35 -0700
Subject: [PATCH] feat(recipes): add NIM Operator recipe for CNCF AI
 Conformance

Add k8s-nim-operator as a new AICR component and create an H100/EKS/Ubuntu
inference recipe for NIM. This supports the CNCF AI Conformance submission
where NIM on EKS is the certified product and AICR is the validation tooling.

- Add `nim` platform type to recipe criteria with tests
- Register k8s-nim-operator v3.1.0 in component registry with health check
- Create h100-eks-ubuntu-inference-nim overlay with DRA support
- Add NIMService workload manifest (Llama 3.2 1B)
- Add NIM chat demo UI (nim-chat-server.sh, nim-chat.html)
- Fix: load healthCheck.assertFile content in ApplyRegistryDefaults so
  deployment validation actually executes Chainsaw health checks

Closes #473
---
 demos/workloads/inference/nim-chat-server.sh  | 108 ++++++++
 demos/workloads/inference/nim-chat.html       | 239 ++++++++++++++++++
 .../inference/nimservice-llama-3-2-1b.yaml    |  93 +++++++
 pkg/recipe/criteria.go                        |   5 +-
 pkg/recipe/criteria_test.go                   |   4 +-
 pkg/recipe/metadata.go                        |  14 +
 pkg/recipe/metadata_test.go                   |  90 +++++++
 .../checks/k8s-nim-operator/health-check.yaml |  68 +++++
 .../components/k8s-nim-operator/values.yaml   |  34 +++
 .../h100-eks-ubuntu-inference-nim.yaml        |  74 ++++++
 recipes/registry.yaml                         |  19 ++
 11 files changed, 746 insertions(+), 2 deletions(-)
 create mode 100755 demos/workloads/inference/nim-chat-server.sh
 create mode 100644 demos/workloads/inference/nim-chat.html
 create mode 100644 demos/workloads/inference/nimservice-llama-3-2-1b.yaml
 create mode 100644 recipes/checks/k8s-nim-operator/health-check.yaml
 create mode 100644 recipes/components/k8s-nim-operator/values.yaml
 create mode 100644 recipes/overlays/h100-eks-ubuntu-inference-nim.yaml

diff --git a/demos/workloads/inference/nim-chat-server.sh b/demos/workloads/inference/nim-chat-server.sh
new file mode 100755
index 000000000..4b59b58a0
--- /dev/null
+++ b/demos/workloads/inference/nim-chat-server.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NIM Chat UI — single script to launch everything
+# Usage: ./nim-chat-server.sh
+# Then open: http://127.0.0.1:9090/chat.html
+
+set -e
+
+NAMESPACE="${NAMESPACE:-nim-workload}"
+SERVICE="${SERVICE:-svc/llama-3-2-1b}"
+API_PORT=8000
+UI_PORT=9090
+
+cleanup() {
+    echo "Shutting down..."
+    kill $PF_PID 2>/dev/null
+    kill $PY_PID 2>/dev/null
+    exit 0
+}
+trap cleanup EXIT INT TERM
+
+# Kill anything already on our ports
+for port in $API_PORT $UI_PORT; do
+    pids=$(lsof -ti :$port 2>/dev/null || true)
+    if [ -n "$pids" ]; then
+        echo "Killing existing processes on port $port"
+        echo "$pids" | xargs kill 2>/dev/null || true
+        sleep 1
+    fi
+done
+
+# Start port-forward to NIM service
+echo "Starting port-forward to $SERVICE on :$API_PORT..."
+kubectl port-forward -n "$NAMESPACE" "$SERVICE" "$API_PORT":8000 &
+PF_PID=$!
+sleep 2
+
+# Start chat UI + API proxy on UI_PORT
+echo "Starting chat UI on :$UI_PORT..."
+python3 -c "
+import http.server, urllib.request, io
+
+API = 'http://127.0.0.1:${API_PORT}'
+HTML_PATH = '$(dirname "$0")/nim-chat.html'
+
+class H(http.server.BaseHTTPRequestHandler):
+    def do_GET(self):
+        if self.path == '/' or self.path == '/chat.html':
+            html = open(HTML_PATH, 'rb').read() if __import__('os').path.exists(HTML_PATH) else b''
+            self.send_response(200)
+            self.send_header('Content-Type', 'text/html')
+            self.send_header('Content-Length', len(html))
+            self.end_headers()
+            self.wfile.write(html)
+        elif self.path.startswith('/v1/'):
+            self._proxy()
+        else:
+            self.send_error(404)
+
+    def do_POST(self):
+        if self.path.startswith('/v1/'):
+            self._proxy()
+        else:
+            self.send_error(404)
+
+    def _proxy(self):
+        length = int(self.headers.get('Content-Length', 0))
+        body = self.rfile.read(length) if length else None
+        req = urllib.request.Request(
+            API + self.path, data=body,
+            headers={'Content-Type': self.headers.get('Content-Type', 'application/json')},
+            method=self.command)
+        try:
+            with urllib.request.urlopen(req) as r:
+                data = r.read()
+                self.send_response(r.status)
+                self.send_header('Content-Type', r.headers.get('Content-Type', 'application/json'))
+                self.send_header('Content-Length', len(data))
+                self.end_headers()
+                self.wfile.write(data)
+        except urllib.error.URLError as e:
+            self.send_error(502, str(e))
+
+    def log_message(self, fmt, *args): pass
+
+http.server.HTTPServer(('127.0.0.1', ${UI_PORT}), H).serve_forever()
+" &
+PY_PID=$!
+
+echo ""
+echo "Ready! Open http://127.0.0.1:${UI_PORT}/chat.html"
+echo "Press Ctrl+C to stop."
+echo ""
+
+wait
diff --git a/demos/workloads/inference/nim-chat.html b/demos/workloads/inference/nim-chat.html
new file mode 100644
index 000000000..eee6384d4
--- /dev/null
+++ b/demos/workloads/inference/nim-chat.html
@@ -0,0 +1,239 @@
+<!DOCTYPE html>
+<!--
+ Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>NIM Chat</title>
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; background: #1a1a2e; color: #e0e0e0; height: 100vh; display: flex; flex-direction: column; }
+  header { padding: 16px 24px; background: #16213e; border-bottom: 1px solid #0f3460; display: flex; align-items: center; gap: 12px; }
+  header h1 { font-size: 18px; font-weight: 600; }
+  header span { font-size: 12px; color: #888; background: #0f3460; padding: 2px 8px; border-radius: 10px; }
+  #chat { flex: 1; overflow-y: auto; padding: 24px; display: flex; flex-direction: column; gap: 16px; }
+  .msg { max-width: 720px; width: 100%; margin: 0 auto; display: flex; gap: 12px; }
+  .msg.user { flex-direction: row-reverse; }
+  .msg .avatar { width: 32px; height: 32px; border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 14px; flex-shrink: 0; }
+  .msg.user .avatar { background: #533483; }
+  .msg.assistant .avatar { background: #76b900; }
+  .msg .bubble { padding: 12px 16px; border-radius: 12px; line-height: 1.6; word-break: break-word; }
+  .msg.user .bubble { background: #533483; border-bottom-right-radius: 4px; }
+  .msg.assistant .bubble { background: #16213e; border: 1px solid #0f3460; border-bottom-left-radius: 4px; }
+  .bubble h1, .bubble h2, .bubble h3 { margin: 12px 0 6px; color: #fff; }
+  .bubble h1 { font-size: 1.3em; } .bubble h2 { font-size: 1.15em; } .bubble h3 { font-size: 1.05em; }
+  .bubble p { margin: 6px 0; }
+  .bubble ul, .bubble ol { margin: 6px 0 6px 20px; }
+  .bubble li { margin: 2px 0; }
+  .bubble code { background: #0d1b2a; padding: 2px 6px; border-radius: 4px; font-size: 0.9em; }
+  .bubble pre { background: #0d1b2a; padding: 12px; border-radius: 8px; overflow-x: auto; margin: 8px 0; }
+  .bubble pre code { background: none; padding: 0; }
+  .bubble strong { color: #fff; }
+  .bubble hr { border: none; border-top: 1px solid #0f3460; margin: 12px 0; }
+  #input-area { padding: 16px 24px; background: #16213e; border-top: 1px solid #0f3460; }
+  #input-row { max-width: 720px; margin: 0 auto; display: flex; gap: 8px; }
+  #input { flex: 1; padding: 12px 16px; border-radius: 12px; border: 1px solid #0f3460; background: #1a1a2e; color: #e0e0e0; font-size: 15px; outline: none; resize: none; font-family: inherit; }
+  #input:focus { border-color: #76b900; }
+  #send { padding: 12px 24px; border-radius: 12px; border: none; background: #76b900; color: white; font-size: 15px; cursor: pointer; font-weight: 600; }
+  #send:hover { background: #8fd400; }
+  #send:disabled { background: #333; cursor: not-allowed; }
+  .status { text-align: center; color: #666; font-size: 13px; padding: 8px; }
+</style>
+</head>
+<body>
+<header>
+  <h1>Llama 3.2 1B Chat</h1>
+  <span>NVIDIA NIM on EKS</span>
+</header>
+<div id="chat">
+  <div class="status">Send a message to start chatting</div>
+</div>
+<div id="input-area">
+  <div id="input-row">
+    <textarea id="input" rows="1" placeholder="Type a message... (Shift+Enter for newline)" autofocus></textarea>
+    <button id="send" onclick="sendMsg()">Send</button>
+  </div>
+</div>
+<script>
+var API = '/v1/chat/completions';
+var MODEL = 'meta/llama-3.2-1b-instruct';
+var messages = [];
+var sending = false;
+
+var chatEl = document.getElementById('chat');
+var inputEl = document.getElementById('input');
+var btnEl = document.getElementById('send');
+
+inputEl.addEventListener('keydown', function(e) {
+  if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); sendMsg(); }
+});
+
+inputEl.addEventListener('input', function() {
+  inputEl.style.height = 'auto';
+  inputEl.style.height = Math.min(inputEl.scrollHeight, 120) + 'px';
+});
+
+function escapeHtml(s) {
+  return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
+}
+
+function md(src) {
+  var codeBlocks = [];
+  var text = src.replace(/```(\w*)\n([\s\S]*?)```/g, function(m, lang, code) {
+    codeBlocks.push('<pre><code>' + escapeHtml(code) + '</code></pre>');
+    return '\x00CB' + (codeBlocks.length - 1) + '\x00';
+  });
+
+  var lines = text.split('\n');
+  var html = [];
+  var inList = false;
+
+  for (var i = 0; i < lines.length; i++) {
+    var line = lines[i];
+
+    var cbMatch = line.match(/^\x00CB(\d+)\x00$/);
+    if (cbMatch) {
+      if (inList) { html.push('</ul>'); inList = false; }
+      html.push(codeBlocks[parseInt(cbMatch[1])]);
+      continue;
+    }
+
+    var hMatch = line.match(/^(#{1,3})\s+(.+)$/);
+    if (hMatch) {
+      if (inList) { html.push('</ul>'); inList = false; }
+      var level = hMatch[1].length;
+      html.push('<h' + level + '>' + inlineFormat(hMatch[2]) + '</h' + level + '>');
+      continue;
+    }
+
+    if (line.match(/^---+$/)) {
+      if (inList) { html.push('</ul>'); inList = false; }
+      html.push('<hr>');
+      continue;
+    }
+
+    var liMatch = line.match(/^[-*]\s+(.+)$/);
+    if (liMatch) {
+      if (!inList) { html.push('<ul>'); inList = true; }
+      html.push('<li>' + inlineFormat(liMatch[1]) + '</li>');
+      continue;
+    }
+
+    var olMatch = line.match(/^\d+\.\s+(.+)$/);
+    if (olMatch) {
+      if (!inList) { html.push('<ul>'); inList = true; }
+      html.push('<li>' + inlineFormat(olMatch[1]) + '</li>');
+      continue;
+    }
+
+    if (inList) { html.push('</ul>'); inList = false; }
+
+    if (line.trim() === '') {
+      html.push('<br>');
+      continue;
+    }
+
+    html.push('<p>' + inlineFormat(line) + '</p>');
+  }
+
+  if (inList) html.push('</ul>');
+  return html.join('\n');
+}
+
+function inlineFormat(text) {
+  var codes = [];
+  text = text.replace(/`([^`]+)`/g, function(m, code) {
+    codes.push('<code>' + escapeHtml(code) + '</code>');
+    return '\x00IC' + (codes.length - 1) + '\x00';
+  });
+
+  text = escapeHtml(text);
+
+  text = text.replace(/\*\*\*(.+?)\*\*\*/g, '<strong><em>$1</em></strong>');
+  text = text.replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>');
+  text = text.replace(/\*(.+?)\*/g, '<em>$1</em>');
+
+  text = text.replace(/\x00IC(\d+)\x00/g, function(m, idx) {
+    return codes[parseInt(idx)];
+  });
+
+  return text;
+}
+
+function addMsg(role, content) {
+  var status = chatEl.querySelector('.status');
+  if (status) status.remove();
+
+  var div = document.createElement('div');
+  div.className = 'msg ' + role;
+  var avatar = role === 'user' ? 'You' : 'NIM';
+  div.innerHTML = '<div class="avatar">' + avatar + '</div><div class="bubble"></div>';
+  chatEl.appendChild(div);
+  chatEl.scrollTop = chatEl.scrollHeight;
+
+  var bubble = div.querySelector('.bubble');
+  if (content) setContent(bubble, content);
+  return bubble;
+}
+
+function setContent(bubble, text) {
+  bubble.innerHTML = md(text.trim());
+}
+
+function sendMsg() {
+  var text = inputEl.value.trim();
+  if (!text || sending) return;
+
+  sending = true;
+  btnEl.disabled = true;
+  inputEl.value = '';
+  inputEl.style.height = 'auto';
+
+  addMsg('user', text);
+  messages.push({ role: 'user', content: text });
+
+  var bubble = addMsg('assistant', '');
+  bubble.textContent = 'Thinking...';
+
+  fetch(API, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({ model: MODEL, messages: messages, max_tokens: 512 }),
+  })
+  .then(function(res) {
+    if (!res.ok) throw new Error('HTTP ' + res.status);
+    return res.json();
+  })
+  .then(function(data) {
+    var content = data.choices[0].message.content;
+    messages.push({ role: 'assistant', content: content });
+    setContent(bubble, content);
+  })
+  .catch(function(err) {
+    bubble.textContent = 'Error: ' + err.message + '. Make sure nim-chat-server.sh is running.';
+  })
+  .finally(function() {
+    sending = false;
+    btnEl.disabled = false;
+    chatEl.scrollTop = chatEl.scrollHeight;
+    inputEl.focus();
+  });
+}
+</script>
+</body>
+</html>
diff --git a/demos/workloads/inference/nimservice-llama-3-2-1b.yaml b/demos/workloads/inference/nimservice-llama-3-2-1b.yaml
new file mode 100644
index 000000000..27dc56f4b
--- /dev/null
+++ b/demos/workloads/inference/nimservice-llama-3-2-1b.yaml
@@ -0,0 +1,93 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NIM Service — Llama 3.2 1B inference deployment.
+# Deploys a single-GPU NIM microservice serving meta/llama-3.2-1b-instruct
+# via an OpenAI-compatible API (/v1/chat/completions, /v1/models).
+#
+# Prerequisites:
+#   - k8s-nim-operator deployed (via AICR NIM recipe)
+#   - NGC pull secret and API key in the target namespace
+#
+# Setup:
+#   kubectl create ns nim-workload
+#   kubectl create secret docker-registry ngc-pull-secret \
+#     --docker-server=nvcr.io --docker-username='$oauthtoken' \
+#     --docker-password="$NGC_CLI_API_KEY" -n nim-workload
+#   kubectl create secret generic ngc-api-secret \
+#     --from-literal=NGC_API_KEY="$NGC_CLI_API_KEY" -n nim-workload
+#
+# Deploy:
+#   kubectl apply -f nimservice-llama-3-2-1b.yaml
+#
+# Test:
+#   kubectl port-forward svc/llama-3-2-1b 8000:8000 -n nim-workload
+#   curl http://localhost:8000/v1/models
+#   curl http://localhost:8000/v1/chat/completions \
+#     -H "Content-Type: application/json" \
+#     -d '{"model":"meta/llama-3.2-1b-instruct","messages":[{"role":"user","content":"Hello!"}],"max_tokens":30}'
+
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: nim-workload
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: nim-model-store
+  namespace: nim-workload
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: gp2
+  resources:
+    requests:
+      storage: 20Gi
+---
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMService
+metadata:
+  name: llama-3-2-1b
+  namespace: nim-workload
+spec:
+  image:
+    repository: nvcr.io/nim/meta/llama-3.2-1b-instruct
+    tag: "1.8.3"
+    pullPolicy: IfNotPresent
+    pullSecrets:
+      - ngc-pull-secret
+  authSecret: ngc-api-secret
+  replicas: 1
+  resources:
+    limits:
+      nvidia.com/gpu: "1"
+    requests:
+      nvidia.com/gpu: "1"
+  tolerations:
+    - key: dedicated
+      value: worker-workload
+      operator: Equal
+      effect: NoSchedule
+    - key: dedicated
+      value: worker-workload
+      operator: Equal
+      effect: NoExecute
+  expose:
+    service:
+      type: ClusterIP
+      port: 8000
+  storage:
+    pvc:
+      name: nim-model-store
diff --git a/pkg/recipe/criteria.go b/pkg/recipe/criteria.go
index c9f370bf4..00997461e 100644
--- a/pkg/recipe/criteria.go
+++ b/pkg/recipe/criteria.go
@@ -180,6 +180,7 @@ const (
 	CriteriaPlatformAny      CriteriaPlatformType = "any"
 	CriteriaPlatformDynamo   CriteriaPlatformType = "dynamo"
 	CriteriaPlatformKubeflow CriteriaPlatformType = "kubeflow"
+	CriteriaPlatformNIM      CriteriaPlatformType = "nim"
 )
 
 // ParseCriteriaPlatformType parses a string into a CriteriaPlatformType.
@@ -191,6 +192,8 @@ func ParseCriteriaPlatformType(s string) (CriteriaPlatformType, error) {
 		return CriteriaPlatformDynamo, nil
 	case "kubeflow":
 		return CriteriaPlatformKubeflow, nil
+	case "nim":
+		return CriteriaPlatformNIM, nil
 	default:
 		return CriteriaPlatformAny, errors.New(errors.ErrCodeInvalidRequest, fmt.Sprintf("invalid platform type: %s", s))
 	}
@@ -198,7 +201,7 @@ func ParseCriteriaPlatformType(s string) (CriteriaPlatformType, error) {
 
 // GetCriteriaPlatformTypes returns all supported platform types sorted alphabetically.
 func GetCriteriaPlatformTypes() []string {
-	return []string{"dynamo", "kubeflow"}
+	return []string{"dynamo", "kubeflow", "nim"}
 }
 
 // Criteria represents the input parameters for recipe matching.
diff --git a/pkg/recipe/criteria_test.go b/pkg/recipe/criteria_test.go
index 4e96ac541..1358ec0ed 100644
--- a/pkg/recipe/criteria_test.go
+++ b/pkg/recipe/criteria_test.go
@@ -768,6 +768,8 @@ func TestParseCriteriaPlatformType(t *testing.T) {
 		{"Dynamo uppercase", "Dynamo", CriteriaPlatformDynamo, false},
 		{"kubeflow", "kubeflow", CriteriaPlatformKubeflow, false},
 		{"Kubeflow uppercase", "Kubeflow", CriteriaPlatformKubeflow, false},
+		{"nim", "nim", CriteriaPlatformNIM, false},
+		{"NIM uppercase", "NIM", CriteriaPlatformNIM, false},
 		{"invalid", "invalid", CriteriaPlatformAny, true},
 	}
 
@@ -789,7 +791,7 @@ func TestGetCriteriaPlatformTypes(t *testing.T) {
 	types := GetCriteriaPlatformTypes()
 
 	// Should return sorted list
-	expected := []string{"dynamo", "kubeflow"}
+	expected := []string{"dynamo", "kubeflow", "nim"}
 	if len(types) != len(expected) {
 		t.Errorf("GetCriteriaPlatformTypes() returned %d types, want %d", len(types), len(expected))
 	}
diff --git a/pkg/recipe/metadata.go b/pkg/recipe/metadata.go
index 4d9885b51..46ac7eeba 100644
--- a/pkg/recipe/metadata.go
+++ b/pkg/recipe/metadata.go
@@ -238,6 +238,20 @@ func (ref *ComponentRef) ApplyRegistryDefaults(config *ComponentConfig) {
 			ref.Path = config.Kustomize.DefaultPath
 		}
 	}
+
+	// Load health check assert file content if not already set
+	if ref.HealthCheckAsserts == "" && config.HealthCheck.AssertFile != "" {
+		provider := GetDataProvider()
+		if provider != nil {
+			data, err := provider.ReadFile(config.HealthCheck.AssertFile)
+			if err != nil {
+				slog.Debug("failed to read health check assert file",
+					"component", ref.Name, "file", config.HealthCheck.AssertFile, "error", err)
+			} else {
+				ref.HealthCheckAsserts = string(data)
+			}
+		}
+	}
 }
 
 // RecipeMetadataSpec contains the specification for a recipe.
diff --git a/pkg/recipe/metadata_test.go b/pkg/recipe/metadata_test.go
index d37c1aad4..a2cbd8710 100644
--- a/pkg/recipe/metadata_test.go
+++ b/pkg/recipe/metadata_test.go
@@ -35,8 +35,10 @@ package recipe
 
 import (
 	"context"
+	"io/fs"
 	"strings"
 	"testing"
+	"testing/fstest"
 )
 
 func TestRecipeMetadataSpecValidateDependencies(t *testing.T) {
@@ -1242,6 +1244,94 @@ func TestComponentRefApplyRegistryDefaults_NamespaceAndChart(t *testing.T) {
 	})
 }
 
+// TestComponentRefApplyRegistryDefaults_HealthCheckAsserts verifies that
+// ApplyRegistryDefaults loads healthCheck.assertFile content into HealthCheckAsserts.
+func TestComponentRefApplyRegistryDefaults_HealthCheckAsserts(t *testing.T) {
+	t.Run("loads assert file from data provider", func(t *testing.T) {
+		// Set up a test data provider with a health check file
+		fs := fstest.MapFS{
+			"checks/test-component/health-check.yaml": &fstest.MapFile{
+				Data: []byte("apiVersion: chainsaw.kyverno.io/v1alpha1\nkind: Test\n"),
+			},
+		}
+		old := GetDataProvider()
+		SetDataProvider(&testFSProvider{fs: fs})
+		defer SetDataProvider(old)
+
+		config := &ComponentConfig{
+			Name: "test-component",
+			HealthCheck: HealthCheckConfig{
+				AssertFile: "checks/test-component/health-check.yaml",
+			},
+			Helm: HelmConfig{DefaultRepository: "https://example.com"},
+		}
+		ref := &ComponentRef{Name: "test-component"}
+		ref.ApplyRegistryDefaults(config)
+
+		if ref.HealthCheckAsserts == "" {
+			t.Fatal("HealthCheckAsserts should be populated from assertFile")
+		}
+		if !strings.Contains(ref.HealthCheckAsserts, "chainsaw.kyverno.io") {
+			t.Errorf("HealthCheckAsserts = %q, want content containing chainsaw.kyverno.io", ref.HealthCheckAsserts)
+		}
+	})
+
+	t.Run("does not overwrite existing HealthCheckAsserts", func(t *testing.T) {
+		config := &ComponentConfig{
+			Name: "test-component",
+			HealthCheck: HealthCheckConfig{
+				AssertFile: "checks/test-component/health-check.yaml",
+			},
+		}
+		ref := &ComponentRef{
+			Name:               "test-component",
+			HealthCheckAsserts: "existing-content",
+		}
+		ref.ApplyRegistryDefaults(config)
+
+		if ref.HealthCheckAsserts != "existing-content" {
+			t.Errorf("HealthCheckAsserts = %q, want %q (should not overwrite)", ref.HealthCheckAsserts, "existing-content")
+		}
+	})
+
+	t.Run("handles missing assert file gracefully", func(t *testing.T) {
+		fs := fstest.MapFS{}
+		old := GetDataProvider()
+		SetDataProvider(&testFSProvider{fs: fs})
+		defer SetDataProvider(old)
+
+		config := &ComponentConfig{
+			Name: "test-component",
+			HealthCheck: HealthCheckConfig{
+				AssertFile: "checks/nonexistent/health-check.yaml",
+			},
+		}
+		ref := &ComponentRef{Name: "test-component"}
+		ref.ApplyRegistryDefaults(config)
+
+		if ref.HealthCheckAsserts != "" {
+			t.Errorf("HealthCheckAsserts = %q, want empty for missing file", ref.HealthCheckAsserts)
+		}
+	})
+}
+
+// testFSProvider wraps fstest.MapFS to implement DataProvider for testing.
+type testFSProvider struct {
+	fs fstest.MapFS
+}
+
+func (p *testFSProvider) ReadFile(path string) ([]byte, error) {
+	return p.fs.ReadFile(path)
+}
+
+func (p *testFSProvider) WalkDir(root string, fn fs.WalkDirFunc) error {
+	return fs.WalkDir(p.fs, root, fn)
+}
+
+func (p *testFSProvider) Source(path string) string {
+	return path
+}
+
 // TestComponentRefMergeWithPath verifies that the Path field is correctly merged
 // when merging ComponentRefs (overlay into base).
 func TestComponentRefMergeWithPath(t *testing.T) {
diff --git a/recipes/checks/k8s-nim-operator/health-check.yaml b/recipes/checks/k8s-nim-operator/health-check.yaml
new file mode 100644
index 000000000..f60b3a3da
--- /dev/null
+++ b/recipes/checks/k8s-nim-operator/health-check.yaml
@@ -0,0 +1,68 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NIM Operator Health Check
+#
+# Validates that the NVIDIA NIM Operator is running and healthy in the
+# nvidia-nim namespace. Checks that the k8s-nim-operator deployment has
+# at least one available replica and that no pods in the namespace are
+# stuck in Pending, Failed, or Unknown phases.
+apiVersion: chainsaw.kyverno.io/v1alpha1
+kind: Test
+metadata:
+  name: k8s-nim-operator-health-check
+spec:
+  timeouts:
+    assert: 5m
+  steps:
+    - name: validate-deployment-exists
+      try:
+        # Guard against vacuous pass on empty namespace: verify the
+        # k8s-nim-operator deployment exists and has at least one ready replica.
+        - assert:
+            resource:
+              apiVersion: apps/v1
+              kind: Deployment
+              metadata:
+                name: k8s-nim-operator
+                namespace: nvidia-nim
+              status:
+                (availableReplicas > `0`): true
+    - name: validate-all-pods-healthy
+      try:
+        # Assert no pods are in unhealthy phases.
+        - error:
+            resource:
+              apiVersion: v1
+              kind: Pod
+              metadata:
+                namespace: nvidia-nim
+              status:
+                phase: Pending
+        - error:
+            resource:
+              apiVersion: v1
+              kind: Pod
+              metadata:
+                namespace: nvidia-nim
+              status:
+                phase: Failed
+        - error:
+            resource:
+              apiVersion: v1
+              kind: Pod
+              metadata:
+                namespace: nvidia-nim
+              status:
+                phase: Unknown
diff --git a/recipes/components/k8s-nim-operator/values.yaml b/recipes/components/k8s-nim-operator/values.yaml
new file mode 100644
index 000000000..117f6ae2a
--- /dev/null
+++ b/recipes/components/k8s-nim-operator/values.yaml
@@ -0,0 +1,34 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NVIDIA NIM Operator Helm values (v3.1.0)
+# Manages NIM microservice lifecycle: deployment, scaling, health monitoring,
+# and model caching via NIMService, NIMPipeline, and NIMCache CRDs.
+#
+# Requires: cert-manager (for admission webhooks), gpu-operator (for GPU scheduling)
+
+operator:
+  # Enable admission controller with cert-manager for TLS
+  admissionController:
+    enabled: true
+    tls:
+      mode: cert-manager
+      certManager:
+        issuerType: selfsigned
+
+  # Remove default control-plane affinity — EKS has no control-plane nodes
+  affinity: {}
+
+dynamo:
+  enabled: false
diff --git a/recipes/overlays/h100-eks-ubuntu-inference-nim.yaml b/recipes/overlays/h100-eks-ubuntu-inference-nim.yaml
new file mode 100644
index 000000000..d5f9ceba2
--- /dev/null
+++ b/recipes/overlays/h100-eks-ubuntu-inference-nim.yaml
@@ -0,0 +1,74 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+kind: RecipeMetadata
+apiVersion: aicr.nvidia.com/v1alpha1
+metadata:
+  name: h100-eks-ubuntu-inference-nim
+
+spec:
+  # Inherits from h100-eks-ubuntu-inference (H100 + Ubuntu inference settings)
+  # Adds NVIDIA NIM Operator for managing NIM microservice deployments.
+  base: h100-eks-ubuntu-inference
+
+  criteria:
+    service: eks
+    accelerator: h100
+    os: ubuntu
+    intent: inference
+    platform: nim
+
+  # DRA requires Kubernetes 1.34+ (GA)
+  constraints:
+    - name: K8s.server.version
+      value: ">= 1.34"
+
+  componentRefs:
+    - name: nvidia-dra-driver-gpu
+      type: Helm
+      overrides:
+        gpuResourcesEnabledOverride: true
+
+    - name: k8s-nim-operator
+      type: Helm
+      source: https://helm.ngc.nvidia.com/nvidia
+      version: "3.1.0"
+      valuesFile: components/k8s-nim-operator/values.yaml
+      dependencyRefs:
+        - cert-manager
+        - gpu-operator
+
+  validation:
+    deployment:
+      checks:
+        - operator-health
+        - expected-resources
+        - gpu-operator-version
+        - check-nvidia-smi
+      constraints:
+        - name: Deployment.gpu-operator.version
+          value: ">= v24.6.0"
+    conformance:
+      checks:
+        - platform-health
+        - gpu-operator-health
+        - dra-support
+        - accelerator-metrics
+        - ai-service-metrics
+        - inference-gateway
+        - gang-scheduling
+        - pod-autoscaling
+        - cluster-autoscaling
+        - robust-controller
+        - secure-accelerator-access
diff --git a/recipes/registry.yaml b/recipes/registry.yaml
index 66d747550..cc86eb282 100644
--- a/recipes/registry.yaml
+++ b/recipes/registry.yaml
@@ -406,6 +406,25 @@ components:
         tolerationPaths:
           - tolerations
 
+  - name: k8s-nim-operator
+    displayName: k8s-nim-operator
+    valueOverrideKeys:
+      - nimoperator
+      - nim
+    healthCheck:
+      assertFile: checks/k8s-nim-operator/health-check.yaml
+    helm:
+      defaultRepository: https://helm.ngc.nvidia.com/nvidia
+      defaultChart: k8s-nim-operator
+      defaultVersion: "3.1.0"
+      defaultNamespace: nvidia-nim
+    nodeScheduling:
+      system:
+        nodeSelectorPaths:
+          - operator.nodeSelector
+        tolerationPaths:
+          - operator.tolerations
+
   - name: kubeflow-trainer
     displayName: kubeflow-trainer
     valueOverrideKeys: