diff --git a/demos/workloads/inference/nim-chat-server.sh b/demos/workloads/inference/nim-chat-server.sh
new file mode 100755
index 000000000..4b59b58a0
--- /dev/null
+++ b/demos/workloads/inference/nim-chat-server.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NIM Chat UI — single script to launch everything
+# Usage: ./nim-chat-server.sh
+# Then open: http://127.0.0.1:9090/chat.html
+
+set -e
+
+NAMESPACE="${NAMESPACE:-nim-workload}"
+SERVICE="${SERVICE:-svc/llama-3-2-1b}"
+API_PORT=8000
+UI_PORT=9090
+
+cleanup() {
+ echo "Shutting down..."
+ kill $PF_PID 2>/dev/null
+ kill $PY_PID 2>/dev/null
+ exit 0
+}
+trap cleanup EXIT INT TERM
+
+# Kill anything already on our ports
+for port in $API_PORT $UI_PORT; do
+ pids=$(lsof -ti :$port 2>/dev/null || true)
+ if [ -n "$pids" ]; then
+ echo "Killing existing processes on port $port"
+ echo "$pids" | xargs kill 2>/dev/null || true
+ sleep 1
+ fi
+done
+
+# Start port-forward to NIM service
+echo "Starting port-forward to $SERVICE on :$API_PORT..."
+kubectl port-forward -n "$NAMESPACE" "$SERVICE" "$API_PORT":8000 &
+PF_PID=$!
+sleep 2
+
+# Start chat UI + API proxy on UI_PORT
+echo "Starting chat UI on :$UI_PORT..."
+python3 -c "
+import http.server, urllib.request, io
+
+API = 'http://127.0.0.1:${API_PORT}'
+HTML_PATH = '$(dirname "$0")/nim-chat.html'
+
+class H(http.server.BaseHTTPRequestHandler):
+ def do_GET(self):
+ if self.path == '/' or self.path == '/chat.html':
+ html = open(HTML_PATH, 'rb').read() if __import__('os').path.exists(HTML_PATH) else b''
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/html')
+ self.send_header('Content-Length', len(html))
+ self.end_headers()
+ self.wfile.write(html)
+ elif self.path.startswith('/v1/'):
+ self._proxy()
+ else:
+ self.send_error(404)
+
+ def do_POST(self):
+ if self.path.startswith('/v1/'):
+ self._proxy()
+ else:
+ self.send_error(404)
+
+ def _proxy(self):
+ length = int(self.headers.get('Content-Length', 0))
+ body = self.rfile.read(length) if length else None
+ req = urllib.request.Request(
+ API + self.path, data=body,
+ headers={'Content-Type': self.headers.get('Content-Type', 'application/json')},
+ method=self.command)
+ try:
+ with urllib.request.urlopen(req) as r:
+ data = r.read()
+ self.send_response(r.status)
+ self.send_header('Content-Type', r.headers.get('Content-Type', 'application/json'))
+ self.send_header('Content-Length', len(data))
+ self.end_headers()
+ self.wfile.write(data)
+ except urllib.error.URLError as e:
+ self.send_error(502, str(e))
+
+ def log_message(self, fmt, *args): pass
+
+http.server.HTTPServer(('127.0.0.1', ${UI_PORT}), H).serve_forever()
+" &
+PY_PID=$!
+
+echo ""
+echo "Ready! Open http://127.0.0.1:${UI_PORT}/chat.html"
+echo "Press Ctrl+C to stop."
+echo ""
+
+wait
diff --git a/demos/workloads/inference/nim-chat.html b/demos/workloads/inference/nim-chat.html
new file mode 100644
index 000000000..eee6384d4
--- /dev/null
+++ b/demos/workloads/inference/nim-chat.html
@@ -0,0 +1,239 @@
+
+
+
+
+
+
+
+NIM Chat
+
+
+
+
+ Llama 3.2 1B Chat
+ NVIDIA NIM on EKS
+
+
+
Send a message to start chatting
+
+
+
+
+
diff --git a/demos/workloads/inference/nimservice-llama-3-2-1b.yaml b/demos/workloads/inference/nimservice-llama-3-2-1b.yaml
new file mode 100644
index 000000000..27dc56f4b
--- /dev/null
+++ b/demos/workloads/inference/nimservice-llama-3-2-1b.yaml
@@ -0,0 +1,93 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NIM Service — Llama 3.2 1B inference deployment.
+# Deploys a single-GPU NIM microservice serving meta/llama-3.2-1b-instruct
+# via an OpenAI-compatible API (/v1/chat/completions, /v1/models).
+#
+# Prerequisites:
+# - k8s-nim-operator deployed (via AICR NIM recipe)
+# - NGC pull secret and API key in the target namespace
+#
+# Setup:
+# kubectl create ns nim-workload
+# kubectl create secret docker-registry ngc-pull-secret \
+# --docker-server=nvcr.io --docker-username='$oauthtoken' \
+# --docker-password="$NGC_CLI_API_KEY" -n nim-workload
+# kubectl create secret generic ngc-api-secret \
+# --from-literal=NGC_API_KEY="$NGC_CLI_API_KEY" -n nim-workload
+#
+# Deploy:
+# kubectl apply -f nimservice-llama-3-2-1b.yaml
+#
+# Test:
+# kubectl port-forward svc/llama-3-2-1b 8000:8000 -n nim-workload
+# curl http://localhost:8000/v1/models
+# curl http://localhost:8000/v1/chat/completions \
+# -H "Content-Type: application/json" \
+# -d '{"model":"meta/llama-3.2-1b-instruct","messages":[{"role":"user","content":"Hello!"}],"max_tokens":30}'
+
+apiVersion: v1
+kind: Namespace
+metadata:
+ name: nim-workload
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ name: nim-model-store
+ namespace: nim-workload
+spec:
+ accessModes:
+ - ReadWriteOnce
+ storageClassName: gp2
+ resources:
+ requests:
+ storage: 20Gi
+---
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMService
+metadata:
+ name: llama-3-2-1b
+ namespace: nim-workload
+spec:
+ image:
+ repository: nvcr.io/nim/meta/llama-3.2-1b-instruct
+ tag: "1.8.3"
+ pullPolicy: IfNotPresent
+ pullSecrets:
+ - ngc-pull-secret
+ authSecret: ngc-api-secret
+ replicas: 1
+ resources:
+ limits:
+ nvidia.com/gpu: "1"
+ requests:
+ nvidia.com/gpu: "1"
+ tolerations:
+ - key: dedicated
+ value: worker-workload
+ operator: Equal
+ effect: NoSchedule
+ - key: dedicated
+ value: worker-workload
+ operator: Equal
+ effect: NoExecute
+ expose:
+ service:
+ type: ClusterIP
+ port: 8000
+ storage:
+ pvc:
+ name: nim-model-store
diff --git a/pkg/recipe/criteria.go b/pkg/recipe/criteria.go
index c9f370bf4..00997461e 100644
--- a/pkg/recipe/criteria.go
+++ b/pkg/recipe/criteria.go
@@ -180,6 +180,7 @@ const (
CriteriaPlatformAny CriteriaPlatformType = "any"
CriteriaPlatformDynamo CriteriaPlatformType = "dynamo"
CriteriaPlatformKubeflow CriteriaPlatformType = "kubeflow"
+ CriteriaPlatformNIM CriteriaPlatformType = "nim"
)
// ParseCriteriaPlatformType parses a string into a CriteriaPlatformType.
@@ -191,6 +192,8 @@ func ParseCriteriaPlatformType(s string) (CriteriaPlatformType, error) {
return CriteriaPlatformDynamo, nil
case "kubeflow":
return CriteriaPlatformKubeflow, nil
+ case "nim":
+ return CriteriaPlatformNIM, nil
default:
return CriteriaPlatformAny, errors.New(errors.ErrCodeInvalidRequest, fmt.Sprintf("invalid platform type: %s", s))
}
@@ -198,7 +201,7 @@ func ParseCriteriaPlatformType(s string) (CriteriaPlatformType, error) {
// GetCriteriaPlatformTypes returns all supported platform types sorted alphabetically.
func GetCriteriaPlatformTypes() []string {
- return []string{"dynamo", "kubeflow"}
+ return []string{"dynamo", "kubeflow", "nim"}
}
// Criteria represents the input parameters for recipe matching.
diff --git a/pkg/recipe/criteria_test.go b/pkg/recipe/criteria_test.go
index 4e96ac541..1358ec0ed 100644
--- a/pkg/recipe/criteria_test.go
+++ b/pkg/recipe/criteria_test.go
@@ -768,6 +768,8 @@ func TestParseCriteriaPlatformType(t *testing.T) {
{"Dynamo uppercase", "Dynamo", CriteriaPlatformDynamo, false},
{"kubeflow", "kubeflow", CriteriaPlatformKubeflow, false},
{"Kubeflow uppercase", "Kubeflow", CriteriaPlatformKubeflow, false},
+ {"nim", "nim", CriteriaPlatformNIM, false},
+ {"NIM uppercase", "NIM", CriteriaPlatformNIM, false},
{"invalid", "invalid", CriteriaPlatformAny, true},
}
@@ -789,7 +791,7 @@ func TestGetCriteriaPlatformTypes(t *testing.T) {
types := GetCriteriaPlatformTypes()
// Should return sorted list
- expected := []string{"dynamo", "kubeflow"}
+ expected := []string{"dynamo", "kubeflow", "nim"}
if len(types) != len(expected) {
t.Errorf("GetCriteriaPlatformTypes() returned %d types, want %d", len(types), len(expected))
}
diff --git a/pkg/recipe/metadata.go b/pkg/recipe/metadata.go
index 4d9885b51..46ac7eeba 100644
--- a/pkg/recipe/metadata.go
+++ b/pkg/recipe/metadata.go
@@ -238,6 +238,20 @@ func (ref *ComponentRef) ApplyRegistryDefaults(config *ComponentConfig) {
ref.Path = config.Kustomize.DefaultPath
}
}
+
+ // Load health check assert file content if not already set
+ if ref.HealthCheckAsserts == "" && config.HealthCheck.AssertFile != "" {
+ provider := GetDataProvider()
+ if provider != nil {
+ data, err := provider.ReadFile(config.HealthCheck.AssertFile)
+ if err != nil {
+ slog.Debug("failed to read health check assert file",
+ "component", ref.Name, "file", config.HealthCheck.AssertFile, "error", err)
+ } else {
+ ref.HealthCheckAsserts = string(data)
+ }
+ }
+ }
}
// RecipeMetadataSpec contains the specification for a recipe.
diff --git a/pkg/recipe/metadata_test.go b/pkg/recipe/metadata_test.go
index d37c1aad4..a2cbd8710 100644
--- a/pkg/recipe/metadata_test.go
+++ b/pkg/recipe/metadata_test.go
@@ -35,8 +35,10 @@ package recipe
import (
"context"
+ "io/fs"
"strings"
"testing"
+ "testing/fstest"
)
func TestRecipeMetadataSpecValidateDependencies(t *testing.T) {
@@ -1242,6 +1244,94 @@ func TestComponentRefApplyRegistryDefaults_NamespaceAndChart(t *testing.T) {
})
}
+// TestComponentRefApplyRegistryDefaults_HealthCheckAsserts verifies that
+// ApplyRegistryDefaults loads healthCheck.assertFile content into HealthCheckAsserts.
+func TestComponentRefApplyRegistryDefaults_HealthCheckAsserts(t *testing.T) {
+ t.Run("loads assert file from data provider", func(t *testing.T) {
+ // Set up a test data provider with a health check file
+ fs := fstest.MapFS{
+ "checks/test-component/health-check.yaml": &fstest.MapFile{
+ Data: []byte("apiVersion: chainsaw.kyverno.io/v1alpha1\nkind: Test\n"),
+ },
+ }
+ old := GetDataProvider()
+ SetDataProvider(&testFSProvider{fs: fs})
+ defer SetDataProvider(old)
+
+ config := &ComponentConfig{
+ Name: "test-component",
+ HealthCheck: HealthCheckConfig{
+ AssertFile: "checks/test-component/health-check.yaml",
+ },
+ Helm: HelmConfig{DefaultRepository: "https://example.com"},
+ }
+ ref := &ComponentRef{Name: "test-component"}
+ ref.ApplyRegistryDefaults(config)
+
+ if ref.HealthCheckAsserts == "" {
+ t.Fatal("HealthCheckAsserts should be populated from assertFile")
+ }
+ if !strings.Contains(ref.HealthCheckAsserts, "chainsaw.kyverno.io") {
+ t.Errorf("HealthCheckAsserts = %q, want content containing chainsaw.kyverno.io", ref.HealthCheckAsserts)
+ }
+ })
+
+ t.Run("does not overwrite existing HealthCheckAsserts", func(t *testing.T) {
+ config := &ComponentConfig{
+ Name: "test-component",
+ HealthCheck: HealthCheckConfig{
+ AssertFile: "checks/test-component/health-check.yaml",
+ },
+ }
+ ref := &ComponentRef{
+ Name: "test-component",
+ HealthCheckAsserts: "existing-content",
+ }
+ ref.ApplyRegistryDefaults(config)
+
+ if ref.HealthCheckAsserts != "existing-content" {
+ t.Errorf("HealthCheckAsserts = %q, want %q (should not overwrite)", ref.HealthCheckAsserts, "existing-content")
+ }
+ })
+
+ t.Run("handles missing assert file gracefully", func(t *testing.T) {
+ fs := fstest.MapFS{}
+ old := GetDataProvider()
+ SetDataProvider(&testFSProvider{fs: fs})
+ defer SetDataProvider(old)
+
+ config := &ComponentConfig{
+ Name: "test-component",
+ HealthCheck: HealthCheckConfig{
+ AssertFile: "checks/nonexistent/health-check.yaml",
+ },
+ }
+ ref := &ComponentRef{Name: "test-component"}
+ ref.ApplyRegistryDefaults(config)
+
+ if ref.HealthCheckAsserts != "" {
+ t.Errorf("HealthCheckAsserts = %q, want empty for missing file", ref.HealthCheckAsserts)
+ }
+ })
+}
+
+// testFSProvider wraps fstest.MapFS to implement DataProvider for testing.
+type testFSProvider struct {
+ fs fstest.MapFS
+}
+
+func (p *testFSProvider) ReadFile(path string) ([]byte, error) {
+ return p.fs.ReadFile(path)
+}
+
+func (p *testFSProvider) WalkDir(root string, fn fs.WalkDirFunc) error {
+ return fs.WalkDir(p.fs, root, fn)
+}
+
+func (p *testFSProvider) Source(path string) string {
+ return path
+}
+
// TestComponentRefMergeWithPath verifies that the Path field is correctly merged
// when merging ComponentRefs (overlay into base).
func TestComponentRefMergeWithPath(t *testing.T) {
diff --git a/recipes/checks/k8s-nim-operator/health-check.yaml b/recipes/checks/k8s-nim-operator/health-check.yaml
new file mode 100644
index 000000000..f60b3a3da
--- /dev/null
+++ b/recipes/checks/k8s-nim-operator/health-check.yaml
@@ -0,0 +1,68 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NIM Operator Health Check
+#
+# Validates that the NVIDIA NIM Operator is running and healthy in the
+# nvidia-nim namespace. Checks that the k8s-nim-operator deployment has
+# at least one available replica and that no pods in the namespace are
+# stuck in Pending, Failed, or Unknown phases.
+apiVersion: chainsaw.kyverno.io/v1alpha1
+kind: Test
+metadata:
+ name: k8s-nim-operator-health-check
+spec:
+ timeouts:
+ assert: 5m
+ steps:
+ - name: validate-deployment-exists
+ try:
+ # Guard against vacuous pass on empty namespace: verify the
+ # k8s-nim-operator deployment exists and has at least one ready replica.
+ - assert:
+ resource:
+ apiVersion: apps/v1
+ kind: Deployment
+ metadata:
+ name: k8s-nim-operator
+ namespace: nvidia-nim
+ status:
+ (availableReplicas > `0`): true
+ - name: validate-all-pods-healthy
+ try:
+ # Assert no pods are in unhealthy phases.
+ - error:
+ resource:
+ apiVersion: v1
+ kind: Pod
+ metadata:
+ namespace: nvidia-nim
+ status:
+ phase: Pending
+ - error:
+ resource:
+ apiVersion: v1
+ kind: Pod
+ metadata:
+ namespace: nvidia-nim
+ status:
+ phase: Failed
+ - error:
+ resource:
+ apiVersion: v1
+ kind: Pod
+ metadata:
+ namespace: nvidia-nim
+ status:
+ phase: Unknown
diff --git a/recipes/components/k8s-nim-operator/values.yaml b/recipes/components/k8s-nim-operator/values.yaml
new file mode 100644
index 000000000..117f6ae2a
--- /dev/null
+++ b/recipes/components/k8s-nim-operator/values.yaml
@@ -0,0 +1,34 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NVIDIA NIM Operator Helm values (v3.1.0)
+# Manages NIM microservice lifecycle: deployment, scaling, health monitoring,
+# and model caching via NIMService, NIMPipeline, and NIMCache CRDs.
+#
+# Requires: cert-manager (for admission webhooks), gpu-operator (for GPU scheduling)
+
+operator:
+ # Enable admission controller with cert-manager for TLS
+ admissionController:
+ enabled: true
+ tls:
+ mode: cert-manager
+ certManager:
+ issuerType: selfsigned
+
+ # Remove default control-plane affinity — EKS has no control-plane nodes
+ affinity: {}
+
+dynamo:
+ enabled: false
diff --git a/recipes/overlays/h100-eks-ubuntu-inference-nim.yaml b/recipes/overlays/h100-eks-ubuntu-inference-nim.yaml
new file mode 100644
index 000000000..d5f9ceba2
--- /dev/null
+++ b/recipes/overlays/h100-eks-ubuntu-inference-nim.yaml
@@ -0,0 +1,74 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+kind: RecipeMetadata
+apiVersion: aicr.nvidia.com/v1alpha1
+metadata:
+ name: h100-eks-ubuntu-inference-nim
+
+spec:
+ # Inherits from h100-eks-ubuntu-inference (H100 + Ubuntu inference settings)
+ # Adds NVIDIA NIM Operator for managing NIM microservice deployments.
+ base: h100-eks-ubuntu-inference
+
+ criteria:
+ service: eks
+ accelerator: h100
+ os: ubuntu
+ intent: inference
+ platform: nim
+
+ # DRA requires Kubernetes 1.34+ (GA)
+ constraints:
+ - name: K8s.server.version
+ value: ">= 1.34"
+
+ componentRefs:
+ - name: nvidia-dra-driver-gpu
+ type: Helm
+ overrides:
+ gpuResourcesEnabledOverride: true
+
+ - name: k8s-nim-operator
+ type: Helm
+ source: https://helm.ngc.nvidia.com/nvidia
+ version: "3.1.0"
+ valuesFile: components/k8s-nim-operator/values.yaml
+ dependencyRefs:
+ - cert-manager
+ - gpu-operator
+
+ validation:
+ deployment:
+ checks:
+ - operator-health
+ - expected-resources
+ - gpu-operator-version
+ - check-nvidia-smi
+ constraints:
+ - name: Deployment.gpu-operator.version
+ value: ">= v24.6.0"
+ conformance:
+ checks:
+ - platform-health
+ - gpu-operator-health
+ - dra-support
+ - accelerator-metrics
+ - ai-service-metrics
+ - inference-gateway
+ - gang-scheduling
+ - pod-autoscaling
+ - cluster-autoscaling
+ - robust-controller
+ - secure-accelerator-access
diff --git a/recipes/registry.yaml b/recipes/registry.yaml
index 66d747550..cc86eb282 100644
--- a/recipes/registry.yaml
+++ b/recipes/registry.yaml
@@ -406,6 +406,25 @@ components:
tolerationPaths:
- tolerations
+ - name: k8s-nim-operator
+ displayName: k8s-nim-operator
+ valueOverrideKeys:
+ - nimoperator
+ - nim
+ healthCheck:
+ assertFile: checks/k8s-nim-operator/health-check.yaml
+ helm:
+ defaultRepository: https://helm.ngc.nvidia.com/nvidia
+ defaultChart: k8s-nim-operator
+ defaultVersion: "3.1.0"
+ defaultNamespace: nvidia-nim
+ nodeScheduling:
+ system:
+ nodeSelectorPaths:
+ - operator.nodeSelector
+ tolerationPaths:
+ - operator.tolerations
+
- name: kubeflow-trainer
displayName: kubeflow-trainer
valueOverrideKeys: