diff --git a/demos/workloads/inference/nim-chat-server.sh b/demos/workloads/inference/nim-chat-server.sh index 4b59b58a..a54625f1 100755 --- a/demos/workloads/inference/nim-chat-server.sh +++ b/demos/workloads/inference/nim-chat-server.sh @@ -21,8 +21,8 @@ set -e NAMESPACE="${NAMESPACE:-nim-workload}" SERVICE="${SERVICE:-svc/llama-3-2-1b}" -API_PORT=8000 -UI_PORT=9090 +API_PORT="${API_PORT:-8000}" +UI_PORT="${UI_PORT:-9090}" cleanup() { echo "Shutting down..." @@ -32,13 +32,13 @@ cleanup() { } trap cleanup EXIT INT TERM -# Kill anything already on our ports +# Check if our ports are already in use for port in $API_PORT $UI_PORT; do - pids=$(lsof -ti :$port 2>/dev/null || true) - if [ -n "$pids" ]; then - echo "Killing existing processes on port $port" - echo "$pids" | xargs kill 2>/dev/null || true - sleep 1 + if lsof -ti :$port &>/dev/null; then + echo "Error: port $port is already in use. Free it or set a different port:" + echo " UI_PORT=9091 API_PORT=8001 $0" + lsof -ti :$port 2>/dev/null | xargs ps -p 2>/dev/null | tail -1 + exit 1 fi done @@ -48,6 +48,14 @@ kubectl port-forward -n "$NAMESPACE" "$SERVICE" "$API_PORT":8000 & PF_PID=$! sleep 2 +# Verify port-forward is still running +if ! kill -0 $PF_PID 2>/dev/null; then + echo "Error: port-forward to $SERVICE failed. Check that the service exists:" + echo " kubectl get svc -n $NAMESPACE" + exit 1 +fi + + # Start chat UI + API proxy on UI_PORT echo "Starting chat UI on :$UI_PORT..." python3 -c " diff --git a/pkg/recipe/metadata.go b/pkg/recipe/metadata.go index 46ac7eeb..56b7a94a 100644 --- a/pkg/recipe/metadata.go +++ b/pkg/recipe/metadata.go @@ -239,19 +239,12 @@ func (ref *ComponentRef) ApplyRegistryDefaults(config *ComponentConfig) { } } - // Load health check assert file content if not already set - if ref.HealthCheckAsserts == "" && config.HealthCheck.AssertFile != "" { - provider := GetDataProvider() - if provider != nil { - data, err := provider.ReadFile(config.HealthCheck.AssertFile) - if err != nil { - slog.Debug("failed to read health check assert file", - "component", ref.Name, "file", config.HealthCheck.AssertFile, "error", err) - } else { - ref.HealthCheckAsserts = string(data) - } - } - } + // NOTE: healthCheck.assertFile content is intentionally NOT loaded here. + // The deployment validator image (distroless) does not include the chainsaw + // binary required to execute Chainsaw Test format assertions. Loading the + // content would activate chainsaw-based checks in expected-resources, causing + // runtime failures. Health check files are used by the conformance validator, + // which has its own chainsaw execution path. } // RecipeMetadataSpec contains the specification for a recipe. diff --git a/pkg/recipe/metadata_test.go b/pkg/recipe/metadata_test.go index a2cbd871..2da4b21b 100644 --- a/pkg/recipe/metadata_test.go +++ b/pkg/recipe/metadata_test.go @@ -35,10 +35,8 @@ package recipe import ( "context" - "io/fs" "strings" "testing" - "testing/fstest" ) func TestRecipeMetadataSpecValidateDependencies(t *testing.T) { @@ -1245,19 +1243,11 @@ func TestComponentRefApplyRegistryDefaults_NamespaceAndChart(t *testing.T) { } // TestComponentRefApplyRegistryDefaults_HealthCheckAsserts verifies that -// ApplyRegistryDefaults loads healthCheck.assertFile content into HealthCheckAsserts. +// ApplyRegistryDefaults does NOT load healthCheck.assertFile into HealthCheckAsserts. +// The deployment validator image is distroless and lacks the chainsaw binary, +// so loading assert content would cause runtime failures in expected-resources. func TestComponentRefApplyRegistryDefaults_HealthCheckAsserts(t *testing.T) { - t.Run("loads assert file from data provider", func(t *testing.T) { - // Set up a test data provider with a health check file - fs := fstest.MapFS{ - "checks/test-component/health-check.yaml": &fstest.MapFile{ - Data: []byte("apiVersion: chainsaw.kyverno.io/v1alpha1\nkind: Test\n"), - }, - } - old := GetDataProvider() - SetDataProvider(&testFSProvider{fs: fs}) - defer SetDataProvider(old) - + t.Run("does not load assert file content", func(t *testing.T) { config := &ComponentConfig{ Name: "test-component", HealthCheck: HealthCheckConfig{ @@ -1268,15 +1258,12 @@ func TestComponentRefApplyRegistryDefaults_HealthCheckAsserts(t *testing.T) { ref := &ComponentRef{Name: "test-component"} ref.ApplyRegistryDefaults(config) - if ref.HealthCheckAsserts == "" { - t.Fatal("HealthCheckAsserts should be populated from assertFile") - } - if !strings.Contains(ref.HealthCheckAsserts, "chainsaw.kyverno.io") { - t.Errorf("HealthCheckAsserts = %q, want content containing chainsaw.kyverno.io", ref.HealthCheckAsserts) + if ref.HealthCheckAsserts != "" { + t.Errorf("HealthCheckAsserts = %q, want empty (assert files should not be loaded in ApplyRegistryDefaults)", ref.HealthCheckAsserts) } }) - t.Run("does not overwrite existing HealthCheckAsserts", func(t *testing.T) { + t.Run("preserves existing HealthCheckAsserts", func(t *testing.T) { config := &ComponentConfig{ Name: "test-component", HealthCheck: HealthCheckConfig{ @@ -1290,48 +1277,11 @@ func TestComponentRefApplyRegistryDefaults_HealthCheckAsserts(t *testing.T) { ref.ApplyRegistryDefaults(config) if ref.HealthCheckAsserts != "existing-content" { - t.Errorf("HealthCheckAsserts = %q, want %q (should not overwrite)", ref.HealthCheckAsserts, "existing-content") - } - }) - - t.Run("handles missing assert file gracefully", func(t *testing.T) { - fs := fstest.MapFS{} - old := GetDataProvider() - SetDataProvider(&testFSProvider{fs: fs}) - defer SetDataProvider(old) - - config := &ComponentConfig{ - Name: "test-component", - HealthCheck: HealthCheckConfig{ - AssertFile: "checks/nonexistent/health-check.yaml", - }, - } - ref := &ComponentRef{Name: "test-component"} - ref.ApplyRegistryDefaults(config) - - if ref.HealthCheckAsserts != "" { - t.Errorf("HealthCheckAsserts = %q, want empty for missing file", ref.HealthCheckAsserts) + t.Errorf("HealthCheckAsserts = %q, want %q (should preserve existing)", ref.HealthCheckAsserts, "existing-content") } }) } -// testFSProvider wraps fstest.MapFS to implement DataProvider for testing. -type testFSProvider struct { - fs fstest.MapFS -} - -func (p *testFSProvider) ReadFile(path string) ([]byte, error) { - return p.fs.ReadFile(path) -} - -func (p *testFSProvider) WalkDir(root string, fn fs.WalkDirFunc) error { - return fs.WalkDir(p.fs, root, fn) -} - -func (p *testFSProvider) Source(path string) string { - return path -} - // TestComponentRefMergeWithPath verifies that the Path field is correctly merged // when merging ComponentRefs (overlay into base). func TestComponentRefMergeWithPath(t *testing.T) { diff --git a/recipes/overlays/h100-eks-ubuntu-inference-nim.yaml b/recipes/overlays/h100-eks-ubuntu-inference-nim.yaml index d5f9ceba..38c62c0f 100644 --- a/recipes/overlays/h100-eks-ubuntu-inference-nim.yaml +++ b/recipes/overlays/h100-eks-ubuntu-inference-nim.yaml @@ -48,6 +48,10 @@ spec: dependencyRefs: - cert-manager - gpu-operator + expectedResources: + - kind: Deployment + namespace: nvidia-nim + name: k8s-nim-operator validation: deployment: