diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c500b0bd..1584dc6c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -58,6 +58,8 @@ jobs: sudo mv ./kind /usr/local/bin/kind - name: Create Kind cluster run: kind create cluster + - name: Set up Helm + uses: azure/setup-helm@b9e51907a09c216f16ebe8536097933489208112 # v4.3.0 - name: Run e2e tests run: | go mod tidy diff --git a/kagenti-operator/test/e2e/README.md b/kagenti-operator/test/e2e/README.md new file mode 100644 index 00000000..84a54197 --- /dev/null +++ b/kagenti-operator/test/e2e/README.md @@ -0,0 +1,219 @@ +# E2E Tests + +End-to-end tests for the kagenti-operator. The suite runs 8 specs: + +- **Manager tests** (2 specs) — controller pod readiness and Prometheus metrics +- **AgentCard tests** (6 specs) — webhook validation, auto-discovery, duplicate prevention, audit mode, and SPIRE signature verification + +## Prerequisites + +- [Kind](https://kind.sigs.k8s.io/) — `go install sigs.k8s.io/kind@latest` +- [Helm](https://helm.sh/) — `brew install helm` +- [kubectl](https://kubernetes.io/docs/tasks/tools/) — `brew install kubectl` +- Container runtime: **Docker** or **Podman** + +The test suite auto-detects Docker vs Podman. No env vars needed. + +## Run + +```bash +# Create a fresh Kind cluster +kind delete cluster 2>/dev/null; kind create cluster + +# Run all 8 specs (~7 min) +make test-e2e +``` + +The suite automatically builds/loads images, installs Prometheus, CertManager, SPIRE, deploys the controller, runs tests, and tears everything down. + +## Skip pre-installed components + +If Prometheus, CertManager, or SPIRE are already on your cluster: + +```bash +PROMETHEUS_INSTALL_SKIP=true \ +CERT_MANAGER_INSTALL_SKIP=true \ +SPIRE_INSTALL_SKIP=true \ +make test-e2e +``` + +## Run specific scenarios + +```bash +# Webhook + auto-discovery tests only (~4 min) +go test ./test/e2e/ -v -ginkgo.v -ginkgo.focus="should reject AgentCard|should not create|should auto-create|should reject duplicate" + +# Signature verification tests only (~5 min) +go test ./test/e2e/ -v -ginkgo.v -ginkgo.focus="SignatureInvalidAudit|should verify signed" + +# Manager tests only +go test ./test/e2e/ -v -ginkgo.v -ginkgo.focus="Manager" +``` + +## Cleanup + +```bash +kind delete cluster +``` + +## Test scenarios + +| Scenario | Context | What it tests | +|----------|---------|---------------| +| Reject missing targetRef | Without signature | Webhook rejects AgentCard with no `spec.targetRef` | +| No protocol label | Without signature | Workload with `kagenti.io/type=agent` but no `protocol.kagenti.io/*` label gets no auto-created card | +| Auto-discovery | Without signature | Properly labeled workload gets an auto-created AgentCard with correct targetRef, protocol, and Synced=True | +| Duplicate prevention | Without signature | Webhook rejects a second AgentCard targeting the same workload | +| Audit mode | With signature | Unsigned card syncs (Synced=True) but reports SignatureVerified=False with reason SignatureInvalidAudit | +| Signed agent | With signature | SPIRE-signed card gets SignatureVerified=True, correct SPIFFE ID, Synced=True, and Bound=True | + +## Architecture + +### What gets installed + +The test suite sets up the following infrastructure in a Kind cluster: + +``` +BeforeSuite (once per suite) +├── Build & load operator image into Kind +├── Install Prometheus Operator v0.77.1 (metrics/ServiceMonitor CRDs) +├── Install CertManager v1.16.3 (webhook TLS certificates) +├── Build & load agentcard-signer image into Kind +└── Install SPIRE via Helm (spire-crds v0.5.0 + spire v0.28.3) + +BeforeAll (per Describe block) +├── make install → applies AgentCard CRD via kustomize +├── make deploy → creates namespace, RBAC, Deployment, webhook, ServiceMonitor +├── Wait for controller pod Running + webhook endpoint ready +└── Create test namespace e2e-agentcard-test (labeled agentcard=true + PSA restricted) +``` + +### How the operator is installed + +``` +make docker-build make install make deploy + │ │ │ + ▼ ▼ ▼ +Build image from kustomize build config/crd kustomize edit set image +Dockerfile │ │ + │ kubectl apply --server-side kustomize build config/default + ▼ │ │ +kind load docker-image ▼ kubectl apply --server-side +(podman fallback) AgentCard CRD created │ + ▼ + kagenti-operator-system: + ├── ServiceAccount + ├── ClusterRole + Binding + ├── Certificate + Issuer (cert-manager) + ├── Webhook Service (port 443) + ├── Metrics Service (port 8443) + ├── Deployment (controller pod) + ├── ValidatingWebhookConfiguration + └── ServiceMonitor (Prometheus) +``` + +### Component interactions + +``` +┌─ cert-manager ───────────────────────────────────────────────────┐ +│ Issues TLS cert for operator webhook │ +│ Injects CA into ValidatingWebhookConfiguration │ +└───────────────────────────┬──────────────────────────────────────┘ + │ TLS cert + ▼ +┌─ kagenti-operator-system ────────────────────────────────────────┐ +│ Controller Manager Pod │ +│ ├── Webhook server (validates AgentCard create/update) │ +│ ├── Metrics server (HTTPS, scraped by Prometheus) │ +│ ├── AgentCardSync controller │ +│ │ watches Deployments → auto-creates AgentCards │ +│ └── AgentCard controller │ +│ fetches card metadata, verifies signatures, evaluates binding│ +└───────────────────────────┬──────────────────────────────────────┘ + │ fetches /.well-known/agent-card.json + ▼ +┌─ e2e-agentcard-test ─────────────────────────────────────────────┐ +│ Agent Deployments (echo-agent, audit-agent, signed-agent) │ +│ Services (expose agents for card fetching) │ +│ AgentCard CRs (auto-created or manually applied) │ +└───────────────────────────▲──────────────────────────────────────┘ + │ SPIRE CSI volume provides SVIDs +┌─ spire-system ───────────┴──────────────────────────────────────┐ +│ SPIRE Server → issues SVIDs via ClusterSPIFFEID policies │ +│ SPIRE Agent (DaemonSet) → distributes SVIDs via CSI driver │ +│ spire-bundle ConfigMap → CA certs for signature verification │ +└──────────────────────────────────────────────────────────────────┘ +``` + +### Test scenario details + +#### Reject missing targetRef + +Applies an AgentCard with no `spec.targetRef`. The validating webhook checks +`agentcard.Spec.TargetRef != nil` and rejects with `"spec.targetRef is required"`. + +#### No protocol label + +Deploys `noproto-agent` with `kagenti.io/type=agent` but no `protocol.kagenti.io/*` label. +The sync controller's `shouldSyncWorkload()` requires both the agent type AND a protocol +label, so it skips this workload. The test uses `Consistently` for 15s to prove no card appears. + +#### Auto-discovery + +Deploys `echo-agent` with both labels plus an inline Python HTTP server serving +`/.well-known/agent-card.json`. The sync controller auto-creates `echo-agent-deployment-card`. +The main controller reconciles it: fetches the card JSON from the Service endpoint, extracts +protocol from labels, and sets `Synced=True`. Test verifies managed-by label, targetRef fields, +protocol, and sync status. + +#### Duplicate prevention + +With `echo-agent-deployment-card` still present from the previous test (ordered container), +attempts to create `echo-agent-manual-card` targeting the same Deployment. The webhook's +`checkDuplicateTargetRef()` lists all AgentCards in the namespace, finds the existing card +with matching targetRef, and rejects with `"an AgentCard already targets"`. + +#### Audit mode + +Controller is patched with `--require-a2a-signature=true --signature-audit-mode=true`. +Deploys unsigned `audit-agent`. The controller verifies the signature (fails — no signature), +but audit mode allows sync to proceed. Status shows `Synced=True` and +`SignatureVerified=False` with reason `SignatureInvalidAudit`. + +#### Signed agent + +The most complex scenario. Controller runs with `--require-a2a-signature=true` (no audit mode). + +1. **ClusterSPIFFEID** tells SPIRE to issue SVIDs to agent pods +2. **signed-agent** Deployment uses an `agentcard-signer` init-container that: + - Connects to SPIRE agent via CSI-mounted socket + - Signs the unsigned card JSON with the pod's SVID + - Writes signed card to a shared emptyDir volume +3. Main container serves the signed card via HTTP +4. Controller fetches the card, verifies the x5c signature chain against the SPIRE trust + bundle, extracts the SPIFFE ID from the leaf cert SAN +5. Identity binding checks that the SPIFFE ID belongs to the configured trust domain + +Test verifies: `SignatureVerified=True` (reason `SignatureValid`), +`signatureSpiffeId = spiffe://example.org/ns/e2e-agentcard-test/sa/signed-agent-sa`, +`Synced=True`, `Bound=True`. + +## Troubleshooting + +**Stale cluster state** — if you see errors about namespaces being terminated or cert-manager TLS failures, delete and recreate the cluster: + +```bash +kind delete cluster && kind create cluster +``` + +**Podman socket errors** — ensure your Podman machine is running: + +```bash +podman machine start +``` + +**Override container tool** — if auto-detection picks the wrong runtime: + +```bash +CONTAINER_TOOL=podman make test-e2e +``` diff --git a/kagenti-operator/test/e2e/e2e_suite_test.go b/kagenti-operator/test/e2e/e2e_suite_test.go index fe7ac8fe..337bd5e6 100644 --- a/kagenti-operator/test/e2e/e2e_suite_test.go +++ b/kagenti-operator/test/e2e/e2e_suite_test.go @@ -21,6 +21,7 @@ import ( "os" "os/exec" "testing" + "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -32,18 +33,25 @@ var ( // Optional Environment Variables: // - PROMETHEUS_INSTALL_SKIP=true: Skips Prometheus Operator installation during test setup. // - CERT_MANAGER_INSTALL_SKIP=true: Skips CertManager installation during test setup. - // These variables are useful if Prometheus or CertManager is already installed, avoiding - // re-installation and conflicts. + // - SPIRE_INSTALL_SKIP=true: Skips SPIRE installation during test setup. + // These variables are useful if Prometheus, CertManager, or SPIRE is already installed, + // avoiding re-installation and conflicts. skipPrometheusInstall = os.Getenv("PROMETHEUS_INSTALL_SKIP") == "true" skipCertManagerInstall = os.Getenv("CERT_MANAGER_INSTALL_SKIP") == "true" + skipSpireInstall = os.Getenv("SPIRE_INSTALL_SKIP") == "true" // isPrometheusOperatorAlreadyInstalled will be set true when prometheus CRDs be found on the cluster isPrometheusOperatorAlreadyInstalled = false // isCertManagerAlreadyInstalled will be set true when CertManager CRDs be found on the cluster isCertManagerAlreadyInstalled = false + // isSpireAlreadyInstalled will be set true when SPIRE CRDs are found on the cluster + isSpireAlreadyInstalled = false // projectImage is the name of the image which will be build and loaded // with the code source changes to be tested. projectImage = "example.com/kagenti-operator:v0.0.1" + + // signerImage is the agentcard-signer init-container image + signerImage = "ghcr.io/kagenti/kagenti-operator/agentcard-signer:e2e-test" ) // TestE2E runs the end-to-end (e2e) test suite for the project. These tests execute in an isolated, @@ -60,21 +68,20 @@ var _ = BeforeSuite(func() { By("Ensure that Prometheus is enabled") _ = utils.UncommentCode("config/default/kustomization.yaml", "#- ../prometheus", "#") + containerTool := utils.DetectContainerTool() + _, _ = fmt.Fprintf(GinkgoWriter, "Using container tool: %s\n", containerTool) + By("building the manager(Operator) image") - cmd := exec.Command("make", "docker-build", fmt.Sprintf("IMG=%s", projectImage)) + cmd := exec.Command("make", "docker-build", + fmt.Sprintf("IMG=%s", projectImage), + fmt.Sprintf("CONTAINER_TOOL=%s", containerTool)) _, err := utils.Run(cmd) ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to build the manager(Operator) image") - // TODO(user): If you want to change the e2e test vendor from Kind, ensure the image is - // built and available before running the tests. Also, remove the following block. By("loading the manager(Operator) image on Kind") err = utils.LoadImageToKindClusterWithName(projectImage) ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to load the manager(Operator) image into Kind") - // The tests-e2e are intended to run on a temporary cluster that is created and destroyed for testing. - // To prevent errors when tests run in environments with Prometheus or CertManager already installed, - // we check for their presence before execution. - // Setup Prometheus and CertManager before the suite if not skipped and if not already installed if !skipPrometheusInstall { By("checking if prometheus is installed already") isPrometheusOperatorAlreadyInstalled = utils.IsPrometheusCRDsInstalled() @@ -95,10 +102,34 @@ var _ = BeforeSuite(func() { _, _ = fmt.Fprintf(GinkgoWriter, "WARNING: CertManager is already installed. Skipping installation...\n") } } + + By("building the agentcard-signer image") + cmd = exec.Command("make", "build-signer", + fmt.Sprintf("SIGNER_IMG=%s", signerImage), + fmt.Sprintf("CONTAINER_TOOL=%s", containerTool)) + _, err = utils.Run(cmd) + ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to build the agentcard-signer image") + + By("loading the agentcard-signer image on Kind") + err = utils.LoadImageToKindClusterWithName(signerImage) + ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to load the agentcard-signer image into Kind") + + if !skipSpireInstall { + By("checking if SPIRE is installed already") + isSpireAlreadyInstalled = utils.IsSpireCRDsInstalled() + if !isSpireAlreadyInstalled { + _, _ = fmt.Fprintf(GinkgoWriter, "Installing SPIRE...\n") + Expect(utils.InstallSpire("example.org")).To(Succeed(), "Failed to install SPIRE") + Expect(utils.WaitForSpireReady(5*time.Minute)).To(Succeed(), "SPIRE pods not ready in time") + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "WARNING: SPIRE is already installed. Skipping installation...\n") + } + } }) var _ = AfterSuite(func() { - // Teardown Prometheus and CertManager after the suite if not skipped and if they were not already installed + // Teardown Prometheus, CertManager, and SPIRE after the suite if not skipped + // and if they were not already installed if !skipPrometheusInstall && !isPrometheusOperatorAlreadyInstalled { _, _ = fmt.Fprintf(GinkgoWriter, "Uninstalling Prometheus Operator...\n") utils.UninstallPrometheusOperator() @@ -107,4 +138,8 @@ var _ = AfterSuite(func() { _, _ = fmt.Fprintf(GinkgoWriter, "Uninstalling CertManager...\n") utils.UninstallCertManager() } + if !skipSpireInstall && !isSpireAlreadyInstalled { + _, _ = fmt.Fprintf(GinkgoWriter, "Uninstalling SPIRE...\n") + utils.UninstallSpire() + } }) diff --git a/kagenti-operator/test/e2e/e2e_test.go b/kagenti-operator/test/e2e/e2e_test.go index 3f549176..7d8d986c 100644 --- a/kagenti-operator/test/e2e/e2e_test.go +++ b/kagenti-operator/test/e2e/e2e_test.go @@ -22,6 +22,7 @@ import ( "os" "os/exec" "path/filepath" + "strings" "time" . "github.com/onsi/ginkgo/v2" @@ -49,42 +50,19 @@ var _ = Describe("Manager", Ordered, func() { // enforce the restricted security policy to the namespace, installing CRDs, // and deploying the controller. BeforeAll(func() { - By("creating manager namespace") - cmd := exec.Command("kubectl", "create", "ns", namespace) - _, err := utils.Run(cmd) - Expect(err).NotTo(HaveOccurred(), "Failed to create namespace") - - By("labeling the namespace to enforce the restricted security policy") - cmd = exec.Command("kubectl", "label", "--overwrite", "ns", namespace, - "pod-security.kubernetes.io/enforce=restricted") - _, err = utils.Run(cmd) - Expect(err).NotTo(HaveOccurred(), "Failed to label namespace with restricted policy") - - By("installing CRDs") - cmd = exec.Command("make", "install") - _, err = utils.Run(cmd) - Expect(err).NotTo(HaveOccurred(), "Failed to install CRDs") - - By("deploying the controller-manager") - cmd = exec.Command("make", "deploy", fmt.Sprintf("IMG=%s", projectImage)) - _, err = utils.Run(cmd) - Expect(err).NotTo(HaveOccurred(), "Failed to deploy the controller-manager") + Expect(utils.DeployController(namespace, projectImage)).To(Succeed(), "Failed to deploy controller") }) - // After all tests have been executed, clean up by undeploying the controller, uninstalling CRDs, - // and deleting the namespace. AfterAll(func() { By("cleaning up the curl pod for metrics") cmd := exec.Command("kubectl", "delete", "pod", "curl-metrics", "-n", namespace) _, _ = utils.Run(cmd) - By("undeploying the controller-manager") - cmd = exec.Command("make", "undeploy") + By("cleaning up metrics ClusterRoleBinding") + cmd = exec.Command("kubectl", "delete", "clusterrolebinding", metricsRoleBindingName, "--ignore-not-found") _, _ = utils.Run(cmd) - By("uninstalling CRDs") - cmd = exec.Command("make", "uninstall") - _, _ = utils.Run(cmd) + utils.UndeployController() By("removing manager namespace") cmd = exec.Command("kubectl", "delete", "ns", namespace) @@ -262,15 +240,6 @@ var _ = Describe("Manager", Ordered, func() { }) // +kubebuilder:scaffold:e2e-webhooks-checks - - // TODO: Customize the e2e test suite with scenarios specific to your project. - // Consider applying sample/CR(s) and check their status and/or verifying - // the reconciliation by using the metrics, i.e.: - // metricsOutput := getMetricsOutput() - // Expect(metricsOutput).To(ContainSubstring( - // fmt.Sprintf(`controller_runtime_reconcile_total{controller="%s",result="success"} 1`, - // strings.ToLower(), - // )) }) }) @@ -332,3 +301,328 @@ type tokenRequest struct { Token string `json:"token"` } `json:"status"` } + +var _ = Describe("AgentCard E2E", Ordered, func() { + const controllerNamespace = "kagenti-operator-system" + const controllerDeployment = "kagenti-operator-controller-manager" + + BeforeAll(func() { + Expect(utils.DeployController(controllerNamespace, projectImage)).To(Succeed(), "Failed to deploy controller") + + By("waiting for controller-manager to be ready") + Eventually(func(g Gomega) { + cmd := exec.Command("kubectl", "get", "pods", "-l", "control-plane=controller-manager", + "-n", controllerNamespace, + "-o", "go-template={{ range .items }}{{ if not .metadata.deletionTimestamp }}{{ .status.phase }}{{ end }}{{ end }}") + output, err := utils.Run(cmd) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).To(ContainSubstring("Running")) + }, 2*time.Minute, 2*time.Second).Should(Succeed()) + + By("waiting for webhook endpoint to be ready") + Eventually(func(g Gomega) { + cmd := exec.Command("kubectl", "get", "endpoints", + "kagenti-operator-webhook-service", "-n", controllerNamespace, + "-o", "jsonpath={.subsets[0].addresses[0].ip}") + output, err := utils.Run(cmd) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).NotTo(BeEmpty(), "webhook endpoint not yet populated") + }, 2*time.Minute, 2*time.Second).Should(Succeed()) + + By("creating test namespace with labels") + cmd := exec.Command("kubectl", "create", "ns", testNamespace) + _, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred()) + + cmd = exec.Command("kubectl", "label", "--overwrite", "ns", testNamespace, + "agentcard=true", + "pod-security.kubernetes.io/enforce=restricted") + _, err = utils.Run(cmd) + Expect(err).NotTo(HaveOccurred()) + }) + + AfterAll(func() { + By("deleting test namespace") + cmd := exec.Command("kubectl", "delete", "ns", testNamespace, "--ignore-not-found") + _, _ = utils.Run(cmd) + + By("cleaning up ClusterSPIFFEID") + cmd = exec.Command("kubectl", "delete", "clusterspiffeid", "e2e-agentcard-test", "--ignore-not-found") + _, _ = utils.Run(cmd) + + utils.UndeployController() + }) + + AfterEach(func() { + if CurrentSpecReport().Failed() { + // Dump controller logs + cmd := exec.Command("kubectl", "logs", "-l", "control-plane=controller-manager", + "-n", controllerNamespace, "--tail=100") + logs, err := utils.Run(cmd) + if err == nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Controller logs:\n%s\n", logs) + } + + // Dump events in test namespace + cmd = exec.Command("kubectl", "get", "events", "-n", testNamespace, "--sort-by=.lastTimestamp") + events, err := utils.Run(cmd) + if err == nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Events:\n%s\n", events) + } + + // Dump AgentCards + cmd = exec.Command("kubectl", "get", "agentcards", "-n", testNamespace, "-o", "yaml") + cards, err := utils.Run(cmd) + if err == nil { + _, _ = fmt.Fprintf(GinkgoWriter, "AgentCards:\n%s\n", cards) + } + } + }) + + SetDefaultEventuallyTimeout(2 * time.Minute) + SetDefaultEventuallyPollingInterval(time.Second) + + Context("Without signature verification", Ordered, func() { + It("should reject AgentCard without targetRef", func() { + By("attempting to apply AgentCard without targetRef") + Eventually(func(g Gomega) { + cmd := exec.Command("kubectl", "apply", "-f", "-", "-n", testNamespace) + cmd.Stdin = strings.NewReader(invalidAgentCardFixture()) + output, err := cmd.CombinedOutput() + g.Expect(err).To(HaveOccurred(), "kubectl apply should fail") + g.Expect(string(output)).To(ContainSubstring("spec.targetRef is required")) + }, 1*time.Minute, 2*time.Second).Should(Succeed()) + }) + + It("should not create AgentCard for workload without protocol label", func() { + By("deploying noproto-agent without protocol label") + _, err := utils.KubectlApplyStdin(noProtocolAgentFixture(), testNamespace) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for deployment to be ready") + Expect(utils.WaitForDeploymentReady("noproto-agent", testNamespace, 2*time.Minute)).To(Succeed()) + + By("verifying no AgentCard is created") + Consistently(func() string { + cmd := exec.Command("kubectl", "get", "agentcards", "-n", testNamespace, + "-o", "jsonpath={.items[*].metadata.name}") + output, _ := utils.Run(cmd) + return output + }, 30*time.Second, 5*time.Second).ShouldNot(ContainSubstring("noproto-agent")) + }) + + It("should auto-create AgentCard for labelled workload", func() { + By("deploying echo-agent with agent and protocol labels") + _, err := utils.KubectlApplyStdin(echoAgentFixture(), testNamespace) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for deployment to be ready") + Expect(utils.WaitForDeploymentReady("echo-agent", testNamespace, 2*time.Minute)).To(Succeed()) + + cardName := "echo-agent-deployment-card" + + By("verifying AgentCard is auto-created") + Eventually(func(g Gomega) { + managedBy, err := utils.KubectlGetJsonpath("agentcard", cardName, testNamespace, + "{.metadata.labels['app\\.kubernetes\\.io/managed-by']}") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(managedBy).To(Equal("kagenti-operator")) + }).Should(Succeed()) + + By("verifying targetRef") + Eventually(func(g Gomega) { + apiVersion, err := utils.KubectlGetJsonpath("agentcard", cardName, testNamespace, + "{.spec.targetRef.apiVersion}") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(apiVersion).To(Equal("apps/v1")) + + kind, err := utils.KubectlGetJsonpath("agentcard", cardName, testNamespace, + "{.spec.targetRef.kind}") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(kind).To(Equal("Deployment")) + + name, err := utils.KubectlGetJsonpath("agentcard", cardName, testNamespace, + "{.spec.targetRef.name}") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(name).To(Equal("echo-agent")) + }).Should(Succeed()) + + By("verifying protocol and Synced condition") + Eventually(func(g Gomega) { + protocol, err := utils.KubectlGetJsonpath("agentcard", cardName, testNamespace, + "{.status.protocol}") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(protocol).To(Equal("a2a")) + + syncedStatus, err := utils.KubectlGetJsonpath("agentcard", cardName, testNamespace, + "{.status.conditions[?(@.type=='Synced')].status}") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(syncedStatus).To(Equal("True")) + }).Should(Succeed()) + }) + + It("should reject duplicate AgentCard targeting same workload", func() { + By("attempting to create manual AgentCard targeting echo-agent") + Eventually(func(g Gomega) { + cmd := exec.Command("kubectl", "apply", "-f", "-", "-n", testNamespace) + cmd.Stdin = strings.NewReader(manualAgentCardFixture()) + output, err := cmd.CombinedOutput() + g.Expect(err).To(HaveOccurred(), "kubectl apply should fail for duplicate") + g.Expect(string(output)).To(ContainSubstring("an AgentCard already targets")) + }, 30*time.Second, 2*time.Second).Should(Succeed()) + }) + }) + + Context("With signature verification", Ordered, func() { + var origArgs []string + + BeforeAll(func() { + By("patching controller with signature verification flags") + var err error + origArgs, err = utils.PatchControllerArgs(controllerNamespace, controllerDeployment, []string{ + "--require-a2a-signature=true", + "--spire-trust-domain=example.org", + "--spire-trust-bundle-configmap=spire-bundle", + "--spire-trust-bundle-configmap-namespace=spire-system", + }) + Expect(err).NotTo(HaveOccurred()) + }) + + AfterAll(func() { + By("restoring original controller args") + if origArgs != nil { + err := utils.RestoreControllerArgs(controllerNamespace, controllerDeployment, origArgs) + Expect(err).NotTo(HaveOccurred()) + + By("verifying controller args were restored") + currentArgs, readErr := utils.KubectlGetJsonpath("deployment", controllerDeployment, + controllerNamespace, "{.spec.template.spec.containers[0].args}") + Expect(readErr).NotTo(HaveOccurred()) + for _, arg := range []string{"--require-a2a-signature", "--spire-trust-domain"} { + Expect(currentArgs).NotTo(ContainSubstring(arg), + "controller args not fully restored: still contains "+arg) + } + } + }) + + Context("Audit mode", Ordered, func() { + var auditOrigArgs []string + + BeforeAll(func() { + By("adding audit mode flag") + var err error + auditOrigArgs, err = utils.PatchControllerArgs(controllerNamespace, controllerDeployment, []string{ + "--signature-audit-mode=true", + }) + Expect(err).NotTo(HaveOccurred()) + }) + + AfterAll(func() { + By("removing audit mode flag") + if auditOrigArgs != nil { + err := utils.RestoreControllerArgs(controllerNamespace, controllerDeployment, auditOrigArgs) + Expect(err).NotTo(HaveOccurred()) + + By("verifying audit mode flag was removed") + currentArgs, readErr := utils.KubectlGetJsonpath("deployment", controllerDeployment, + controllerNamespace, "{.spec.template.spec.containers[0].args}") + Expect(readErr).NotTo(HaveOccurred()) + Expect(currentArgs).NotTo(ContainSubstring("--signature-audit-mode"), + "controller args not restored: still contains --signature-audit-mode") + } + }) + + It("should allow sync but report SignatureInvalidAudit", func() { + By("deploying audit-agent (unsigned)") + _, err := utils.KubectlApplyStdin(auditAgentFixture(), testNamespace) + Expect(err).NotTo(HaveOccurred()) + Expect(utils.WaitForDeploymentReady("audit-agent", testNamespace, 2*time.Minute)).To(Succeed()) + + By("updating auto-created AgentCard for audit-agent") + Eventually(func(g Gomega) { + _, applyErr := utils.KubectlApplyStdin(auditModeAgentCardFixture(), testNamespace) + g.Expect(applyErr).NotTo(HaveOccurred()) + }, 30*time.Second, 2*time.Second).Should(Succeed()) + + cardName := "audit-agent-deployment-card" + + By("verifying Synced=True (audit mode allows sync)") + Eventually(func(g Gomega) { + syncedStatus, err := utils.KubectlGetJsonpath("agentcard", cardName, testNamespace, + "{.status.conditions[?(@.type=='Synced')].status}") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(syncedStatus).To(Equal("True")) + }).Should(Succeed()) + + By("verifying SignatureVerified=False with reason SignatureInvalidAudit") + Eventually(func(g Gomega) { + sigStatus, err := utils.KubectlGetJsonpath("agentcard", cardName, testNamespace, + "{.status.conditions[?(@.type=='SignatureVerified')].status}") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sigStatus).To(Equal("False")) + + sigReason, err := utils.KubectlGetJsonpath("agentcard", cardName, testNamespace, + "{.status.conditions[?(@.type=='SignatureVerified')].reason}") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sigReason).To(Equal("SignatureInvalidAudit")) + }).Should(Succeed()) + }) + }) + + It("should verify signed agent card", func() { + By("creating ClusterSPIFFEID") + _, err := utils.KubectlApplyStdin(clusterSPIFFEIDFixture(), "") + Expect(err).NotTo(HaveOccurred()) + + By("deploying signed-agent stack") + _, err = utils.KubectlApplyStdin(signedAgentFixture(), testNamespace) + Expect(err).NotTo(HaveOccurred()) + Expect(utils.WaitForDeploymentReady("signed-agent", testNamespace, 3*time.Minute)).To(Succeed()) + + By("updating auto-created AgentCard with identityBinding") + Eventually(func(g Gomega) { + _, applyErr := utils.KubectlApplyStdin(signedAgentCardFixture(), testNamespace) + g.Expect(applyErr).NotTo(HaveOccurred()) + }, 30*time.Second, 2*time.Second).Should(Succeed()) + + cardName := "signed-agent-deployment-card" + + By("verifying SignatureVerified=True") + Eventually(func(g Gomega) { + sigStatus, err := utils.KubectlGetJsonpath("agentcard", cardName, testNamespace, + "{.status.conditions[?(@.type=='SignatureVerified')].status}") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sigStatus).To(Equal("True")) + + sigReason, err := utils.KubectlGetJsonpath("agentcard", cardName, testNamespace, + "{.status.conditions[?(@.type=='SignatureVerified')].reason}") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sigReason).To(Equal("SignatureValid")) + }, 3*time.Minute).Should(Succeed()) + + By("verifying signatureSpiffeId") + Eventually(func(g Gomega) { + spiffeId, err := utils.KubectlGetJsonpath("agentcard", cardName, testNamespace, + "{.status.signatureSpiffeId}") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(spiffeId).To(Equal("spiffe://example.org/ns/e2e-agentcard-test/sa/signed-agent-sa")) + }).Should(Succeed()) + + By("verifying Synced=True") + Eventually(func(g Gomega) { + syncedStatus, err := utils.KubectlGetJsonpath("agentcard", cardName, testNamespace, + "{.status.conditions[?(@.type=='Synced')].status}") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(syncedStatus).To(Equal("True")) + }).Should(Succeed()) + + By("verifying Bound=True") + Eventually(func(g Gomega) { + boundStatus, err := utils.KubectlGetJsonpath("agentcard", cardName, testNamespace, + "{.status.conditions[?(@.type=='Bound')].status}") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(boundStatus).To(Equal("True")) + }).Should(Succeed()) + }) + }) +}) diff --git a/kagenti-operator/test/e2e/fixtures.go b/kagenti-operator/test/e2e/fixtures.go new file mode 100644 index 00000000..4bfc099a --- /dev/null +++ b/kagenti-operator/test/e2e/fixtures.go @@ -0,0 +1,467 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +const testNamespace = "e2e-agentcard-test" + +// echoAgentFixture returns YAML for echo-agent Deployment + Service (used by S1, S3). +func echoAgentFixture() string { + return `apiVersion: apps/v1 +kind: Deployment +metadata: + name: echo-agent + namespace: ` + testNamespace + ` + labels: + kagenti.io/type: agent + protocol.kagenti.io/a2a: "" + app.kubernetes.io/name: echo-agent +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: echo-agent + kagenti.io/type: agent + template: + metadata: + labels: + app.kubernetes.io/name: echo-agent + kagenti.io/type: agent + protocol.kagenti.io/a2a: "" + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + containers: + - name: echo + image: docker.io/python:3.11-slim + imagePullPolicy: IfNotPresent + command: + - python3 + - -c + - | + import http.server, json + class H(http.server.BaseHTTPRequestHandler): + def do_GET(self): + if self.path == '/.well-known/agent-card.json': + card = {'name': 'Echo Agent', 'version': '1.0.0', + 'url': 'http://echo-agent.` + testNamespace + `.svc:8001'} + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(card).encode()) + else: + self.send_response(404) + self.end_headers() + def log_message(self, *a): pass + http.server.HTTPServer(('', 8001), H).serve_forever() + ports: + - containerPort: 8001 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL +--- +apiVersion: v1 +kind: Service +metadata: + name: echo-agent + namespace: ` + testNamespace + ` +spec: + selector: + app.kubernetes.io/name: echo-agent + ports: + - port: 8001 + targetPort: 8001 +` +} + +// noProtocolAgentFixture returns YAML for noproto-agent Deployment (S2) - has +// kagenti.io/type=agent but NO protocol.kagenti.io/* label. +func noProtocolAgentFixture() string { + return `apiVersion: apps/v1 +kind: Deployment +metadata: + name: noproto-agent + namespace: ` + testNamespace + ` + labels: + kagenti.io/type: agent + app.kubernetes.io/name: noproto-agent +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: noproto-agent + kagenti.io/type: agent + template: + metadata: + labels: + app.kubernetes.io/name: noproto-agent + kagenti.io/type: agent + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + containers: + - name: pause + image: registry.k8s.io/pause:3.9 + imagePullPolicy: IfNotPresent + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL +` +} + +// manualAgentCardFixture returns YAML for a manual AgentCard targeting echo-agent (S3). +func manualAgentCardFixture() string { + return `apiVersion: agent.kagenti.dev/v1alpha1 +kind: AgentCard +metadata: + name: echo-agent-manual-card + namespace: ` + testNamespace + ` +spec: + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: echo-agent +` +} + +// invalidAgentCardFixture returns YAML for an AgentCard WITHOUT spec.targetRef (S6). +func invalidAgentCardFixture() string { + return `apiVersion: agent.kagenti.dev/v1alpha1 +kind: AgentCard +metadata: + name: invalid-no-targetref + namespace: ` + testNamespace + ` +spec: + syncPeriod: "30s" +` +} + +// auditAgentFixture returns YAML for audit-agent Deployment + Service (S5). +func auditAgentFixture() string { + return `apiVersion: apps/v1 +kind: Deployment +metadata: + name: audit-agent + namespace: ` + testNamespace + ` + labels: + kagenti.io/type: agent + protocol.kagenti.io/a2a: "" + app.kubernetes.io/name: audit-agent +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: audit-agent + kagenti.io/type: agent + template: + metadata: + labels: + app.kubernetes.io/name: audit-agent + kagenti.io/type: agent + protocol.kagenti.io/a2a: "" + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + containers: + - name: echo + image: docker.io/python:3.11-slim + imagePullPolicy: IfNotPresent + command: + - python3 + - -c + - | + import http.server, json + class H(http.server.BaseHTTPRequestHandler): + def do_GET(self): + if self.path == '/.well-known/agent-card.json': + card = {'name': 'Audit Agent', 'version': '1.0.0', + 'url': 'http://audit-agent.` + testNamespace + `.svc:8002'} + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(card).encode()) + else: + self.send_response(404) + self.end_headers() + def log_message(self, *a): pass + http.server.HTTPServer(('', 8002), H).serve_forever() + ports: + - containerPort: 8002 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL +--- +apiVersion: v1 +kind: Service +metadata: + name: audit-agent + namespace: ` + testNamespace + ` +spec: + selector: + app.kubernetes.io/name: audit-agent + ports: + - port: 8002 + targetPort: 8002 +` +} + +// auditModeAgentCardFixture returns YAML for AgentCard targeting audit-agent. +// Uses the auto-created card name so kubectl apply updates the existing card. +func auditModeAgentCardFixture() string { + return `apiVersion: agent.kagenti.dev/v1alpha1 +kind: AgentCard +metadata: + name: audit-agent-deployment-card + namespace: ` + testNamespace + ` + labels: + app.kubernetes.io/name: audit-agent + app.kubernetes.io/managed-by: kagenti-operator +spec: + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: audit-agent +` +} + +// signedAgentFixture returns YAML for the full signed-agent stack (S4): +// ServiceAccount, Role, RoleBinding, ConfigMap, Deployment (with agentcard-signer +// init-container + SPIRE CSI volume), Service. +func signedAgentFixture() string { + return `apiVersion: v1 +kind: ServiceAccount +metadata: + name: signed-agent-sa + namespace: ` + testNamespace + ` +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: agentcard-signer + namespace: ` + testNamespace + ` +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create", "update", "get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: agentcard-signer + namespace: ` + testNamespace + ` +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: agentcard-signer +subjects: + - kind: ServiceAccount + name: signed-agent-sa + namespace: ` + testNamespace + ` +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: signed-agent-card-unsigned + namespace: ` + testNamespace + ` +data: + agent.json: | + { + "name": "Signed Agent", + "description": "Agent with SPIRE-signed agent card", + "url": "http://signed-agent.` + testNamespace + `.svc.cluster.local:8080", + "version": "1.0.0", + "capabilities": { + "streaming": false, + "pushNotifications": false + }, + "defaultInputModes": ["text/plain"], + "defaultOutputModes": ["text/plain"], + "skills": [ + { + "name": "echo", + "description": "Echo back the input", + "inputModes": ["text/plain"], + "outputModes": ["text/plain"] + } + ] + } +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: signed-agent + namespace: ` + testNamespace + ` + labels: + kagenti.io/type: agent + protocol.kagenti.io/a2a: "" + app.kubernetes.io/name: signed-agent +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: signed-agent + kagenti.io/type: agent + template: + metadata: + labels: + app.kubernetes.io/name: signed-agent + kagenti.io/type: agent + protocol.kagenti.io/a2a: "" + spec: + serviceAccountName: signed-agent-sa + securityContext: + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + initContainers: + - name: sign-agentcard + image: ghcr.io/kagenti/kagenti-operator/agentcard-signer:e2e-test + imagePullPolicy: IfNotPresent + env: + - name: SPIFFE_ENDPOINT_SOCKET + value: unix:///run/spire/agent-sockets/spire-agent.sock + - name: UNSIGNED_CARD_PATH + value: /etc/agentcard/agent.json + - name: AGENT_CARD_PATH + value: /app/.well-known/agent-card.json + - name: SIGN_TIMEOUT + value: "30s" + - name: AGENT_NAME + value: signed-agent + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumeMounts: + - name: spire-agent-socket + mountPath: /run/spire/agent-sockets + readOnly: true + - name: unsigned-card + mountPath: /etc/agentcard + readOnly: true + - name: signed-card + mountPath: /app/.well-known + securityContext: + runAsNonRoot: true + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + resources: + requests: + cpu: 10m + memory: 16Mi + limits: + cpu: 100m + memory: 32Mi + containers: + - name: agent + image: docker.io/python:3.11-slim + imagePullPolicy: IfNotPresent + command: ["python3", "-m", "http.server", "8080", "--directory", "/app"] + ports: + - containerPort: 8080 + volumeMounts: + - name: signed-card + mountPath: /app/.well-known + readOnly: true + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + volumes: + - name: spire-agent-socket + csi: + driver: csi.spiffe.io + readOnly: true + - name: unsigned-card + configMap: + name: signed-agent-card-unsigned + - name: signed-card + emptyDir: + medium: Memory + sizeLimit: 1Mi +--- +apiVersion: v1 +kind: Service +metadata: + name: signed-agent + namespace: ` + testNamespace + ` +spec: + selector: + app.kubernetes.io/name: signed-agent + ports: + - port: 8080 + targetPort: 8080 +` +} + +// signedAgentCardFixture returns YAML for AgentCard with identityBinding for signed-agent (S4). +// Uses the auto-created card name so kubectl apply updates the existing card. +func signedAgentCardFixture() string { + return `apiVersion: agent.kagenti.dev/v1alpha1 +kind: AgentCard +metadata: + name: signed-agent-deployment-card + namespace: ` + testNamespace + ` + labels: + app.kubernetes.io/name: signed-agent + app.kubernetes.io/managed-by: kagenti-operator +spec: + syncPeriod: "30s" + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: signed-agent + identityBinding: + strict: true +` +} + +// clusterSPIFFEIDFixture returns YAML for ClusterSPIFFEID (S4). +func clusterSPIFFEIDFixture() string { + return `apiVersion: spire.spiffe.io/v1alpha1 +kind: ClusterSPIFFEID +metadata: + name: e2e-agentcard-test +spec: + spiffeIDTemplate: "spiffe://{{ .TrustDomain }}/ns/{{ .PodMeta.Namespace }}/sa/{{ .PodSpec.ServiceAccountName }}" + podSelector: + matchLabels: + kagenti.io/type: agent + namespaceSelector: + matchLabels: + agentcard: "true" +` +} diff --git a/kagenti-operator/test/utils/utils.go b/kagenti-operator/test/utils/utils.go index 04a5141c..70659c80 100644 --- a/kagenti-operator/test/utils/utils.go +++ b/kagenti-operator/test/utils/utils.go @@ -19,10 +19,12 @@ package utils import ( "bufio" "bytes" + "encoding/json" "fmt" "os" "os/exec" "strings" + "time" . "github.com/onsi/ginkgo/v2" //nolint:golint,revive ) @@ -34,12 +36,30 @@ const ( certmanagerVersion = "v1.16.3" certmanagerURLTmpl = "https://github.com/cert-manager/cert-manager/releases/download/%s/cert-manager.yaml" + + spireCRDsChartVersion = "0.5.0" + spireChartVersion = "0.28.3" ) func warnError(err error) { _, _ = fmt.Fprintf(GinkgoWriter, "warning: %v\n", err) } +// DetectContainerTool returns the container tool to use for building images. +// Honors the CONTAINER_TOOL env var. Falls back to auto-detection: docker first, then podman. +func DetectContainerTool() string { + if tool := os.Getenv("CONTAINER_TOOL"); tool != "" { + return tool + } + if _, err := exec.LookPath("docker"); err == nil { + return "docker" + } + if _, err := exec.LookPath("podman"); err == nil { + return "podman" + } + return "docker" +} + // Run executes the provided command within this context func Run(cmd *exec.Cmd) (string, error) { dir, _ := GetProjectDir() @@ -165,7 +185,8 @@ func IsCertManagerCRDsInstalled() bool { return false } -// LoadImageToKindClusterWithName loads a local docker image to the kind cluster +// LoadImageToKindClusterWithName loads a local container image to the kind cluster. +// Falls back to podman save + kind load image-archive when kind load docker-image fails. func LoadImageToKindClusterWithName(name string) error { cluster := "kind" if v, ok := os.LookupEnv("KIND_CLUSTER"); ok { @@ -174,7 +195,23 @@ func LoadImageToKindClusterWithName(name string) error { kindOptions := []string{"load", "docker-image", name, "--name", cluster} cmd := exec.Command("kind", kindOptions...) _, err := Run(cmd) - return err + if err == nil { + return nil + } + + // Fallback for podman: save image to archive, then load archive into Kind + _, _ = fmt.Fprintf(GinkgoWriter, "kind load docker-image failed, trying podman save fallback...\n") + archivePath := fmt.Sprintf("%s/kind-image-%d.tar", os.TempDir(), time.Now().UnixNano()) + defer func() { _ = os.Remove(archivePath) }() + + cmd = exec.Command("podman", "save", name, "-o", archivePath) + if _, saveErr := Run(cmd); saveErr != nil { + return fmt.Errorf("kind load docker-image failed (%w) and podman save fallback also failed: %v", err, saveErr) + } + + cmd = exec.Command("kind", "load", "image-archive", archivePath, "--name", cluster) + _, archiveErr := Run(cmd) + return archiveErr } // GetNonEmptyLines converts given command output string into individual objects @@ -201,6 +238,262 @@ func GetProjectDir() (string, error) { return wd, nil } +// InstallSpire installs SPIRE via Helm with the given trust domain. +// The SPIFFE hardened charts require CRDs to be installed separately first. +func InstallSpire(trustDomain string) error { + By("adding SPIFFE Helm repo") + cmd := exec.Command("helm", "repo", "add", "spiffe", + "https://spiffe.github.io/helm-charts-hardened/") + if _, err := Run(cmd); err != nil { + // Ignore "already exists" errors + if !strings.Contains(err.Error(), "already exists") { + return err + } + } + + cmd = exec.Command("helm", "repo", "update") + if _, err := Run(cmd); err != nil { + return err + } + + By("installing SPIRE CRDs") + cmd = exec.Command("helm", "install", "spire-crds", "spiffe/spire-crds", + "--version", spireCRDsChartVersion, + "-n", "spire-system", + "--create-namespace", + "--wait", + "--timeout", "2m", + ) + if _, err := Run(cmd); err != nil { + return err + } + + By("installing SPIRE Helm chart") + cmd = exec.Command("helm", "install", "spire", "spiffe/spire", + "--version", spireChartVersion, + "-n", "spire-system", + fmt.Sprintf("--set=global.spire.trustDomain=%s", trustDomain), + "--wait", + "--timeout", "5m", + ) + if _, err := Run(cmd); err != nil { + return err + } + + By("labeling spire-bundle configmap for controller cache visibility") + cmd = exec.Command("kubectl", "label", "--overwrite", "configmap", "spire-bundle", + "-n", "spire-system", "kagenti.io/defaults=true") + _, err := Run(cmd) + return err +} + +// UninstallSpire removes the SPIRE Helm releases. +func UninstallSpire() { + By("uninstalling SPIRE Helm release") + cmd := exec.Command("helm", "uninstall", "spire", "-n", "spire-system") + if _, err := Run(cmd); err != nil { + warnError(err) + } + + By("uninstalling SPIRE CRDs Helm release") + cmd = exec.Command("helm", "uninstall", "spire-crds", "-n", "spire-system") + if _, err := Run(cmd); err != nil { + warnError(err) + } + + By("deleting spire-system namespace") + cmd = exec.Command("kubectl", "delete", "ns", "spire-system", "--ignore-not-found") + if _, err := Run(cmd); err != nil { + warnError(err) + } +} + +// IsSpireCRDsInstalled checks if ClusterSPIFFEID CRD exists. +func IsSpireCRDsInstalled() bool { + cmd := exec.Command("kubectl", "get", "crd", "clusterspiffeids.spire.spiffe.io") + _, err := Run(cmd) + return err == nil +} + +// WaitForSpireReady waits for SPIRE server and agent pods to be ready. +func WaitForSpireReady(timeout time.Duration) error { + By("waiting for SPIRE pods to be ready") + cmd := exec.Command("kubectl", "wait", "pods", + "--all", + "-n", "spire-system", + "--for=condition=Ready", + fmt.Sprintf("--timeout=%s", timeout), + ) + _, err := Run(cmd) + return err +} + +// KubectlApplyStdin applies YAML from stdin to a namespace. +func KubectlApplyStdin(yaml, namespace string) (string, error) { + args := []string{"apply", "-f", "-"} + if namespace != "" { + args = append(args, "-n", namespace) + } + cmd := exec.Command("kubectl", args...) + cmd.Stdin = strings.NewReader(yaml) + return Run(cmd) +} + +// KubectlGetJsonpath gets a value using jsonpath from a resource. +func KubectlGetJsonpath(resource, name, namespace, jsonpath string) (string, error) { + cmd := exec.Command("kubectl", "get", resource, name, + "-n", namespace, + "-o", fmt.Sprintf("jsonpath=%s", jsonpath), + ) + output, err := Run(cmd) + return strings.TrimSpace(output), err +} + +// WaitForDeploymentReady waits for a deployment to have Available condition. +func WaitForDeploymentReady(name, namespace string, timeout time.Duration) error { + cmd := exec.Command("kubectl", "wait", + fmt.Sprintf("deployment/%s", name), + "-n", namespace, + "--for=condition=Available", + fmt.Sprintf("--timeout=%s", timeout), + ) + _, err := Run(cmd) + return err +} + +// WaitForRollout waits for a deployment rollout to complete. +func WaitForRollout(name, namespace string, timeout time.Duration) error { + cmd := exec.Command("kubectl", "rollout", "status", + fmt.Sprintf("deployment/%s", name), + "-n", namespace, + fmt.Sprintf("--timeout=%s", timeout), + ) + _, err := Run(cmd) + return err +} + +func buildArgsPatch(argsJSON []byte) string { + const patchTmpl = `[{"op":"replace",` + + `"path":"/spec/template/spec/containers/0/args",` + + `"value":%s}]` + return fmt.Sprintf(patchTmpl, string(argsJSON)) +} + +// PatchControllerArgs patches controller deployment args with additional flags +// and returns the original args for later restoration. +func PatchControllerArgs(namespace, deploy string, addArgs []string) (origArgs []string, err error) { + By("getting current controller args") + cmd := exec.Command("kubectl", "get", "deployment", deploy, + "-n", namespace, + "-o", "jsonpath={.spec.template.spec.containers[0].args}", + ) + output, err := Run(cmd) + if err != nil { + return nil, fmt.Errorf("failed to get current args: %w", err) + } + + output = strings.TrimSpace(output) + if output != "" { + if err := json.Unmarshal([]byte(output), &origArgs); err != nil { + return nil, fmt.Errorf("failed to parse current args %q: %w", output, err) + } + } + + By(fmt.Sprintf("patching controller with args: %v", addArgs)) + newArgs := make([]string, len(origArgs), len(origArgs)+len(addArgs)) + copy(newArgs, origArgs) + newArgs = append(newArgs, addArgs...) + argsJSON, jsonErr := json.Marshal(newArgs) + if jsonErr != nil { + return origArgs, fmt.Errorf("failed to marshal new args: %w", jsonErr) + } + patchJSON := buildArgsPatch(argsJSON) + cmd = exec.Command("kubectl", "patch", "deployment", deploy, + "-n", namespace, + "--type=json", + fmt.Sprintf("-p=%s", patchJSON), + ) + if _, patchErr := Run(cmd); patchErr != nil { + return origArgs, fmt.Errorf("failed to patch args: %w", patchErr) + } + + By("waiting for controller rollout after patch") + if err := WaitForRollout(deploy, namespace, 2*time.Minute); err != nil { + return origArgs, fmt.Errorf("rollout failed after patch: %w", err) + } + + return origArgs, nil +} + +// RestoreControllerArgs restores controller deployment to original args. +func RestoreControllerArgs(namespace, deploy string, origArgs []string) error { + By(fmt.Sprintf("restoring controller args to: %v", origArgs)) + + argsJSON, err := json.Marshal(origArgs) + if err != nil { + return fmt.Errorf("failed to marshal original args: %w", err) + } + + patchJSON := buildArgsPatch(argsJSON) + cmd := exec.Command("kubectl", "patch", "deployment", deploy, + "-n", namespace, + "--type=json", + fmt.Sprintf("-p=%s", patchJSON), + ) + if _, err := Run(cmd); err != nil { + return fmt.Errorf("failed to restore args: %w", err) + } + + By("waiting for controller rollout after restore") + if err := WaitForRollout(deploy, namespace, 2*time.Minute); err != nil { + return fmt.Errorf("rollout failed after restore: %w", err) + } + + return nil +} + +// DeployController installs CRDs and deploys the controller-manager. +func DeployController(namespace, img string) error { + By("creating manager namespace") + cmd := exec.Command("kubectl", "create", "ns", namespace) + if _, err := Run(cmd); err != nil && !strings.Contains(err.Error(), "already exists") { + return err + } + + By("labeling the namespace to enforce the restricted security policy") + cmd = exec.Command("kubectl", "label", "--overwrite", "ns", namespace, + "pod-security.kubernetes.io/enforce=restricted") + if _, err := Run(cmd); err != nil { + return err + } + + By("installing CRDs") + cmd = exec.Command("make", "install") + if _, err := Run(cmd); err != nil { + return err + } + + By("deploying the controller-manager") + cmd = exec.Command("make", "deploy", fmt.Sprintf("IMG=%s", img)) + _, err := Run(cmd) + return err +} + +// UndeployController undeploys the controller-manager and uninstalls CRDs. +func UndeployController() { + By("undeploying the controller-manager") + cmd := exec.Command("make", "undeploy") + if _, err := Run(cmd); err != nil { + warnError(err) + } + + By("uninstalling CRDs") + cmd = exec.Command("make", "uninstall") + if _, err := Run(cmd); err != nil { + warnError(err) + } +} + // UncommentCode searches for target in the file and remove the comment prefix // of the target content. The target content may span multiple lines. func UncommentCode(filename, target, prefix string) error {