diff --git a/kagenti-operator/.gitignore b/kagenti-operator/.gitignore index ada68ff..3231f35 100644 --- a/kagenti-operator/.gitignore +++ b/kagenti-operator/.gitignore @@ -7,6 +7,9 @@ bin/* Dockerfile.cross +# Local dev (make run webhook TLS) +.run/ + # Test binary, built with `go test -c` *.test diff --git a/kagenti-operator/Makefile b/kagenti-operator/Makefile index c5ea356..9c608e8 100644 --- a/kagenti-operator/Makefile +++ b/kagenti-operator/Makefile @@ -112,9 +112,28 @@ lint-config: golangci-lint ## Verify golangci-lint linter configuration build: manifests generate fmt vet ## Build manager binary. go build -o bin/manager cmd/main.go +# Self-signed TLS for the validating webhook when running outside the cluster (controller-runtime +# otherwise expects tls.crt/tls.key under $TMPDIR/k8s-webhook-server/serving-certs). +WEBHOOK_CERT_DIR ?= $(CURDIR)/.run/webhook-certs + .PHONY: run -run: manifests generate fmt vet ## Run a controller from your host. - go run ./cmd/main.go +run: manifests generate fmt vet ## Run a controller from your host (needs cluster + CRDs: make install). + @mkdir -p "$(WEBHOOK_CERT_DIR)" + @if [ ! -f "$(WEBHOOK_CERT_DIR)/tls.crt" ] || [ ! -f "$(WEBHOOK_CERT_DIR)/tls.key" ]; then \ + command -v openssl >/dev/null 2>&1 || { \ + echo "openssl not found: install it, or create tls.crt/tls.key under WEBHOOK_CERT_DIR and re-run"; \ + echo " WEBHOOK_CERT_DIR=$(WEBHOOK_CERT_DIR)"; \ + exit 1; \ + }; \ + openssl req -x509 -newkey rsa:2048 -nodes \ + -keyout "$(WEBHOOK_CERT_DIR)/tls.key" \ + -out "$(WEBHOOK_CERT_DIR)/tls.crt" \ + -days 365 \ + -subj "/CN=localhost"; \ + fi + go run ./cmd/main.go \ + --webhook-cert-path="$(WEBHOOK_CERT_DIR)" \ + --metrics-secure=false # If you wish to build the manager image targeting other platforms you can use the --platform flag. # (i.e. docker build --platform linux/arm64). However, you must enable docker buildKit for it. diff --git a/kagenti-operator/cmd/main.go b/kagenti-operator/cmd/main.go index b312f0f..677f7c1 100644 --- a/kagenti-operator/cmd/main.go +++ b/kagenti-operator/cmd/main.go @@ -44,6 +44,7 @@ import ( agentv1alpha1 "github.com/kagenti/operator/api/v1alpha1" "github.com/kagenti/operator/internal/agentcard" "github.com/kagenti/operator/internal/controller" + "github.com/kagenti/operator/internal/keycloak" "github.com/kagenti/operator/internal/signature" "github.com/kagenti/operator/internal/tekton" webhookv1alpha1 "github.com/kagenti/operator/internal/webhook/v1alpha1" @@ -76,6 +77,7 @@ func main() { var requireA2ASignature bool var signatureAuditMode bool var enforceNetworkPolicies bool + var enableOperatorClientRegistration bool var spireTrustDomain string var spireTrustBundleConfigMapName string @@ -107,6 +109,8 @@ func main() { "When true, log signature verification failures but don't block (use for rollout)") flag.BoolVar(&enforceNetworkPolicies, "enforce-network-policies", false, "Create NetworkPolicies to restrict traffic for agents with unverified signatures") + flag.BoolVar(&enableOperatorClientRegistration, "enable-operator-client-registration", false, + "Reconcile Keycloak client registration for workloads with kagenti.io/client-registration-inject=false") flag.StringVar(&spireTrustDomain, "spire-trust-domain", "", "SPIRE trust domain for identity binding (e.g. 'example.org')") @@ -325,6 +329,20 @@ func main() { os.Exit(1) } + if enableOperatorClientRegistration { + if err = (&controller.ClientRegistrationReconciler{ + Client: mgr.GetClient(), + APIReader: mgr.GetAPIReader(), + Scheme: mgr.GetScheme(), + SpireTrustDomain: spireTrustDomain, + KeycloakAdminTokenCache: &keycloak.CachedAdminTokenProvider{}, + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "ClientRegistration") + os.Exit(1) + } + setupLog.Info("Operator-managed client registration controller enabled") + } + if controller.TektonConfigCRDExists(mgr.GetConfig()) { if err = (&controller.TektonConfigReconciler{ Client: mgr.GetClient(), diff --git a/kagenti-operator/config/rbac/role.yaml b/kagenti-operator/config/rbac/role.yaml index 816fce8..5a0a263 100644 --- a/kagenti-operator/config/rbac/role.yaml +++ b/kagenti-operator/config/rbac/role.yaml @@ -30,6 +30,17 @@ rules: - patch - update - watch +- apiGroups: + - "" + resources: + - secrets + verbs: + - create + - get + - list + - patch + - update + - watch - apiGroups: - agent.kagenti.dev resources: diff --git a/kagenti-operator/docs/operator-managed-client-registration.md b/kagenti-operator/docs/operator-managed-client-registration.md new file mode 100644 index 0000000..17ebb92 --- /dev/null +++ b/kagenti-operator/docs/operator-managed-client-registration.md @@ -0,0 +1,171 @@ +# Operator-managed Keycloak client registration + +This document describes the split responsibility between **kagenti-operator** and **kagenti-webhook** for registering agent workloads as OAuth clients in Keycloak and delivering credentials to AuthBridge sidecars. + +The implementation lives in two repositories today (`kagenti-operator`, `kagenti-extensions` / `kagenti-webhook`); the same feature branch name is used in both until the code is consolidated into a single repo. + +--- + +## 1. Why this change + +### 1.1 Problem + +By default, the mutating webhook injects a **`kagenti-client-registration`** sidecar (or embeds equivalent behavior inside the combined **authbridge** container). That sidecar: + +- Runs **inside every pod**, uses workload identity (SPIFFE when SPIRE is enabled), and talks to Keycloak to register or refresh the OAuth client. +- Competes for startup ordering and resources with the application and other sidecars. + +Some deployments want **Envoy / SPIFFE / AuthBridge** injection to stay **pod-local**, but prefer **client lifecycle and secrets** to be handled **centrally** by the platform: one registration path, predictable secret names, and no client-registration container in the pod. + +### 1.2 Approach + +Workloads **opt out** of webhook-injected client registration with a well-known label. The **operator** then: + +1. Registers the workload as a Keycloak client using the **Keycloak admin API** (same conceptual contract as the sidecar). +2. Creates a **Secret** in the workload namespace with `client-id.txt` and `client-secret.txt`. +3. Annotates the pod template so the **webhook** can mount that Secret into every container that already uses the **`shared-data`** volume, at the **same paths** the sidecar used (`/shared/client-id.txt`, `/shared/client-secret.txt`). + +The webhook continues to inject **proxy-init**, **envoy** / **authbridge**, and **spiffe-helper** according to existing precedence and feature gates; it only skips the **client-registration** sidecar (or the registration portion of combined authbridge) when the label opts out. + +### 1.3 Benefits + +- **Fewer containers** when the sidecar path is not desired. +- **Centralized registration** using namespace `keycloak-admin-secret` (already provisioned for the sidecar contract). +- **Deterministic secret naming** derived from namespace and workload name (`kagenti-keycloak-client-credentials-`), with **owner references** to the Deployment or StatefulSet. +- **Safe ordering**: the operator creates the Secret **before** setting the pod-template annotation, so new Pods do not reference a missing Secret. +- **Admission reinvocation**: the webhook uses `reinvocationPolicy: IfNeeded` so a second pass can add Secret volume mounts if the operator annotates the template **after** the first injection. + +--- + +## 2. How it works + +### 2.1 Contract (labels and annotations) + +| Key | Value | Meaning | +|-----|--------|---------| +| `kagenti.io/client-registration-inject` | `"false"` | Workload opts **out** of webhook-injected client registration; operator is expected to manage registration **if** other conditions hold. | +| `kagenti.io/keycloak-client-credentials-secret-name` | Secret name | Set by the operator on the **pod template**; webhook reads it from **Pod** annotations at admission time and mounts the Secret. | + +The string values for the label key and the annotation key are **duplicated** in both repos and must stay in sync: + +- Operator: `LabelClientRegistrationInject`, `AnnotationKeycloakClientSecretName` in `clientregistration_controller.go`. +- Webhook: `LabelClientRegistrationInject` in `constants.go`, `AnnotationKeycloakClientSecretName` in `keycloak_client_credentials.go`. + +### 2.2 Which workloads the operator reconciles + +The **ClientRegistration** controller watches **Deployments** and **StatefulSets** whose pod template labels satisfy: + +- `kagenti.io/client-registration-inject` is **exactly** `"false"`. +- `kagenti.io/type` is **`agent`**, or **`tool`** when the cluster feature gate **`injectTools`** is true (tools are skipped if `injectTools` is false). + +Other workloads are ignored by this controller. + +### 2.3 Webhook behavior + +1. **Precedence** (unchanged): `kagenti.io/client-registration-inject=false` disables injection of the client-registration sidecar / registration slice in combined authbridge (`precedence.go`). +2. **After** sidecars and volumes are applied, **`ApplyKeycloakClientCredentialsSecretVolumes`** runs for **every** mutation: + - If the pod (template) annotation `kagenti.io/keycloak-client-credentials-secret-name` is set, the webhook adds a **Secret volume** named like the Secret (`kagenti-keycloak-client-credentials-`) and **subPath mounts** for `client-id.txt` and `client-secret.txt` into **each container that already has a `shared-data` volume mount**. +3. **Reinvocation**: if the pod is already considered “injected” (e.g. envoy or proxy-init present) but operator mounts are still missing, **`NeedsKeycloakClientCredentialsVolumePatch`** returns true and the webhook applies **only** the operator Secret mounts (`authbridge_webhook.go`). + +### 2.4 Operator reconcile flow (simplified) + +1. Read **cluster feature gates** (`kagenti-webhook` ConfigMap in the cluster defaults namespace). If `globalEnabled` or `clientRegistration` is false, skip. +2. Read **`authbridge-config`** in the workload namespace (`KEYCLOAK_URL`, `KEYCLOAK_REALM`, `SPIRE_ENABLED`, etc.). +3. Read **`keycloak-admin-secret`** (admin username/password). +4. Compute **Keycloak client ID**: + - If `SPIRE_ENABLED` is not true: `namespace/workloadName`. + - If SPIRE is enabled: `spiffe:///ns//sa/` (requires a **non-default** `serviceAccountName` and operator **`--spire-trust-domain`**). +5. **Register or fetch** the client via Keycloak admin API (`internal/keycloak`). +6. **Create or update** the credentials Secret; set **owner** to the Deployment/StatefulSet. +7. **Patch** the pod template annotation `kagenti.io/keycloak-client-credentials-secret-name` to the deterministic secret name. + +### 2.5 Feature flags + +| Component | Flag / gate | Role | +|-----------|-------------|------| +| Operator | `--enable-operator-client-registration` (default **true**) | Master switch for the ClientRegistration controller. | +| Operator | `--spire-trust-domain` | Required for SPIFFE-shaped client IDs when `authbridge-config` has `SPIRE_ENABLED=true`. | +| Webhook | `--enable-client-registration` | Cluster-wide gate for client-registration **injection** (precedence still applies). | +| Webhook | Feature gates ConfigMap | `clientRegistration`, `injectTools`, `globalEnabled`, etc., same as for injected sidecars. | + +--- + +## 3. Requirements + +### 3.1 Platform / namespace + +- **`authbridge-config`** ConfigMap in the workload namespace with at least `KEYCLOAK_URL`, `KEYCLOAK_REALM`, and consistent `SPIRE_ENABLED` with the mesh. +- **`keycloak-admin-secret`** in the same namespace with `KEYCLOAK_ADMIN_USERNAME` and `KEYCLOAK_ADMIN_PASSWORD`. +- **Webhook** and **operator** versions that both implement this contract (deploy together). + +### 3.2 Workload + +- **Deployment** or **StatefulSet** (not bare Pods for operator ownership of Secrets). +- Pod template labels: `kagenti.io/client-registration-inject: "false"` and `kagenti.io/type: agent` or `tool` (subject to `injectTools`). +- For **SPIRE-enabled** namespaces: `spec.template.spec.serviceAccountName` must be a **dedicated** ServiceAccount (not `default`). + +### 3.3 Operator configuration + +- When `authbridge-config` sets `SPIRE_ENABLED=true`, configure **`--spire-trust-domain`** to match the SPIRE server trust domain (same value as used for workload SPIFFE IDs). +- Ensure the operator can read **`authbridge-config`** and **`keycloak-admin-secret`** in agent namespaces (RBAC is extended for ConfigMaps and Secrets as needed). + +### 3.4 Webhook configuration + +- **`reinvocationPolicy: IfNeeded`** on the mutating webhook so late annotations still get mounts. +- Pod template must eventually carry **`kagenti.io/keycloak-client-credentials-secret-name`** once the operator has reconciled; until then, auth consumers on `shared-data` may not see credentials (operator retries with backoff). + +--- + +## 4. Migration strategy + +### 4.1 Recommended rollout order + +1. **Upgrade operator** (with ClientRegistration controller and Keycloak client package). +2. **Upgrade webhook** (operator Secret mounts + reinvocation path). +3. **Configure** `--spire-trust-domain` on the operator if agent namespaces use SPIRE (`SPIRE_ENABLED=true`). + +Rolling webhook before operator can leave workloads with `client-registration-inject=false` **without** registration until the operator is available; rolling operator before webhook can create Secrets and annotations **without** mounts until the new webhook is live. Short overlap is acceptable if you migrate workloads **after** both are deployed. + +### 4.2 Adopting operator-managed registration per workload + +1. Ensure the namespace has `authbridge-config` and `keycloak-admin-secret`. +2. On the workload pod template, set **`kagenti.io/client-registration-inject: "false"`**. +3. If SPIRE is on, set a **dedicated** `serviceAccountName`. +4. **Restart** or roll the workload so the webhook sees the new template and the operator reconciles. + +The operator will create or reuse the Keycloak client and Secret; the webhook will inject mounts on create or on reinvocation. + +### 4.3 Rollback + +- Remove **`kagenti.io/client-registration-inject: "false"`** (or set client-registration injection back to the default path) and **remove** the operator annotation if present. +- Roll pods so the **client-registration sidecar** (or combined authbridge with registration) runs again. +- Optionally delete operator-created Secrets named `kagenti-keycloak-client-credentials-*` after confirming Keycloak clients are recreated by the sidecar path if needed. + +Disabling **`--enable-operator-client-registration`** stops new reconciliation but does not remove existing annotations or Secrets; clean those up if you need a full rollback. + +### 4.4 Keycloak client identity + +Switching from **sidecar** to **operator** registration may change the **client ID** string (e.g. from SPIFFE-based to `namespace/name` when SPIRE is off, or same SPIFFE shape when SPIRE is on). Plan for **one-time** Keycloak client cleanup or renamed clients if both paths ran for the same logical workload. + +### 4.5 Future consolidation + +When webhook and operator live in one repository, keep this document as the single **source of truth** for the contract; co-locate constants in one package to avoid drift between annotation/label keys. + +--- + +## 5. Related code + +| Area | Location | +|------|-----------| +| Operator reconciler | `internal/controller/clientregistration_controller.go` | +| Keycloak admin client | `internal/keycloak/` | +| Operator entrypoint / flags | `cmd/main.go` | +| Webhook mounts + reinvocation | `internal/webhook/injector/keycloak_client_credentials.go`, `pod_mutator.go`, `internal/webhook/v1alpha1/authbridge_webhook.go` | +| Injection precedence | `internal/webhook/injector/precedence.go` | + +--- + +## 6. Operational notes + +- If logs show **`cannot resolve Keycloak client id yet`** with reason **`--spire-trust-domain is required`**, configure the operator trust domain to match SPIRE (see platform docs / `kagenti-deps` `spire.trustDomain` on Kind installs). +- Operator reads **`authbridge-config`** via an **uncached API reader** because ConfigMaps may be excluded from the controller-runtime cache for scalability; this matches how the webhook resolves namespace config. diff --git a/kagenti-operator/go.mod b/kagenti-operator/go.mod index 3c9e07a..19c3164 100644 --- a/kagenti-operator/go.mod +++ b/kagenti-operator/go.mod @@ -17,6 +17,7 @@ require ( k8s.io/client-go v0.32.0 k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 sigs.k8s.io/controller-runtime v0.20.0 + sigs.k8s.io/yaml v1.4.0 ) require ( @@ -103,5 +104,4 @@ require ( sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0 // indirect sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.2 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect ) diff --git a/kagenti-operator/internal/controller/clientregistration_controller.go b/kagenti-operator/internal/controller/clientregistration_controller.go new file mode 100644 index 0000000..a57224f --- /dev/null +++ b/kagenti-operator/internal/controller/clientregistration_controller.go @@ -0,0 +1,449 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +*/ + +package controller + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "strings" + "time" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/retry" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/yaml" + + agentv1alpha1 "github.com/kagenti/operator/api/v1alpha1" + "github.com/kagenti/operator/internal/keycloak" +) + +// Well-known namespace resources (same contract as kagenti-webhook injector). +const ( + authbridgeConfigConfigMap = "authbridge-config" + keycloakAdminSecret = "keycloak-admin-secret" + + // LabelClientRegistrationInject opts a workload out of webhook-injected client-registration; + // the operator controller then registers the client and sets AnnotationKeycloakClientSecretName. + LabelClientRegistrationInject = "kagenti.io/client-registration-inject" + + // AnnotationKeycloakClientSecretName must match kagenti-webhook injector.AnnotationKeycloakClientSecretName. + AnnotationKeycloakClientSecretName = "kagenti.io/keycloak-client-credentials-secret-name" +) + +// ClientRegistrationReconciler registers OAuth clients in Keycloak and patches agent workloads that +// use kagenti.io/client-registration-inject=false so the webhook still injects envoy/SPIRE but not +// the registration sidecar. The Secret is created before the pod template annotation is set so new Pods +// never reference a missing Secret; the webhook mounts the Secret for injected sidecars that use shared-data. +type ClientRegistrationReconciler struct { + client.Client + // APIReader reads authbridge-config and keycloak-admin-secret from the API server. Those objects + // are not in the manager's ConfigMap cache (see cmd/main.go cache.ByObject for ConfigMap). + APIReader client.Reader + Scheme *runtime.Scheme + + SpireTrustDomain string + // KeycloakAdminTokenCache caches admin password-grant tokens by Keycloak URL and credentials to + // avoid a token request on every reconcile. If nil, PasswordGrantToken is used without caching. + KeycloakAdminTokenCache *keycloak.CachedAdminTokenProvider +} + +func (r *ClientRegistrationReconciler) uncachedReader() client.Reader { + if r.APIReader != nil { + return r.APIReader + } + return r.Client +} + +// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;update;patch +// +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch;update;patch +// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch +// +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch + +func (r *ClientRegistrationReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx) + + globalOn, clientRegGate, injectTools, err := readClusterFeatureGates(ctx, r.Client) + if err != nil { + logger.Error(err, "read cluster feature gates") + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + if !globalOn || !clientRegGate { + logger.V(1).Info("skipping operator client registration: cluster feature gates disabled injection") + return ctrl.Result{}, nil + } + + dep := &appsv1.Deployment{} + if err := r.Get(ctx, req.NamespacedName, dep); err == nil { + return r.reconcileOne(ctx, dep, injectTools, dep.Name, &dep.Spec.Template, + func(ctx context.Context) error { + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + d := &appsv1.Deployment{} + if err := r.Get(ctx, req.NamespacedName, d); err != nil { + return err + } + if !injectKeycloakClientCredentialsAnnotation(&d.Spec.Template, keycloakClientCredentialsSecretName(d.Namespace, d.Name)) { + return nil + } + return r.Update(ctx, d) + }) + }) + } + if !apierrors.IsNotFound(err) { + return ctrl.Result{}, err + } + + sts := &appsv1.StatefulSet{} + if err := r.Get(ctx, req.NamespacedName, sts); err != nil { + if apierrors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + return r.reconcileOne(ctx, sts, injectTools, sts.Name, &sts.Spec.Template, + func(ctx context.Context) error { + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + s := &appsv1.StatefulSet{} + if err := r.Get(ctx, req.NamespacedName, s); err != nil { + return err + } + if !injectKeycloakClientCredentialsAnnotation(&s.Spec.Template, keycloakClientCredentialsSecretName(s.Namespace, s.Name)) { + return nil + } + return r.Update(ctx, s) + }) + }) +} + +func (r *ClientRegistrationReconciler) reconcileOne( + ctx context.Context, + owner client.Object, + injectTools bool, + workloadName string, + template *corev1.PodTemplateSpec, + patchTemplate func(context.Context) error, +) (ctrl.Result, error) { + logger := log.FromContext(ctx) + labels := template.Labels + if reason := keycloakClientCredentialsSkipReason(labels, injectTools); reason != "" { + logger.Info("skipping operator client registration for workload", + "namespace", owner.GetNamespace(), + "workload", workloadName, + "reason", reason) + return ctrl.Result{}, nil + } + + ns := owner.GetNamespace() + + ab, err := readAuthbridgeConfigMap(ctx, r.uncachedReader(), ns) + if err != nil { + logger.Error(err, "read authbridge-config") + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + if ab.KeycloakURL == "" || ab.KeycloakRealm == "" { + logger.Info("waiting for KEYCLOAK_URL/KEYCLOAK_REALM in authbridge-config", "namespace", ns) + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + + adminSecret := &corev1.Secret{} + if err := r.uncachedReader().Get(ctx, types.NamespacedName{Namespace: ns, Name: keycloakAdminSecret}, adminSecret); err != nil { + if apierrors.IsNotFound(err) { + logger.Info("waiting for keycloak-admin-secret", "namespace", ns) + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + return ctrl.Result{}, err + } + adminUser := string(adminSecret.Data["KEYCLOAK_ADMIN_USERNAME"]) + adminPass := string(adminSecret.Data["KEYCLOAK_ADMIN_PASSWORD"]) + if adminUser == "" || adminPass == "" { + logger.Info("keycloak-admin-secret missing username/password keys") + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + + spireEnabled := strings.EqualFold(strings.TrimSpace(ab.SpireEnabled), "true") + clientName := ns + "/" + workloadName + clientID, err := resolveKeycloakClientID(ns, workloadName, template.Spec.ServiceAccountName, spireEnabled, r.SpireTrustDomain) + if err != nil { + logger.Info("cannot resolve Keycloak client id yet", "reason", err.Error()) + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + + authType := strings.TrimSpace(ab.ClientAuthType) + if authType == "" { + authType = "client-secret" + } + tokenExch := strings.TrimSpace(ab.KeycloakTokenExchangeEnabled) != "false" + audienceScopeOn := strings.TrimSpace(ab.KeycloakAudienceScopeEnabled) != "false" + + kc := keycloak.Admin{BaseURL: ab.KeycloakURL, HTTPClient: keycloak.DefaultHTTPClient()} + var token string + if r.KeycloakAdminTokenCache != nil { + token, err = r.KeycloakAdminTokenCache.Token(ctx, &kc, adminUser, adminPass) + } else { + token, err = kc.PasswordGrantToken(ctx, adminUser, adminPass) + } + if err != nil { + logger.Error(err, "Keycloak admin token failed") + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + _, clientSecret, err := kc.RegisterOrFetchClientWithToken(ctx, token, keycloak.ClientRegistrationParams{ + Realm: ab.KeycloakRealm, + ClientID: clientID, + ClientName: clientName, + ClientAuthType: authType, + SpiffeIDPAlias: ab.SpiffeIDPAlias, + TokenExchangeEnable: tokenExch, + }) + if err != nil { + logger.Error(err, "Keycloak client registration failed", "clientId", clientID) + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + + if err := kc.EnsureAudienceScope(ctx, token, keycloak.AudienceParams{ + Realm: ab.KeycloakRealm, + ClientName: clientName, + AudienceClientID: clientID, + PlatformClientIDs: parsePlatformClientIDs(ab.PlatformClientIDs), + AudienceScopeEnabled: audienceScopeOn, + }); err != nil { + logger.Error(err, "Keycloak audience scope management failed (credentials will still be written)", + "clientId", clientID) + } + + secretName := keycloakClientCredentialsSecretName(ns, workloadName) + if err := r.ensureClientCredentialsSecret(ctx, owner, secretName, clientID, clientSecret); err != nil { + logger.Error(err, "ensure client credentials secret") + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + + if err := patchTemplate(ctx); err != nil { + logger.Error(err, "patch workload pod template") + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + + logger.Info("operator client registration applied", + "workload", workloadName, "namespace", ns, "secret", secretName) + return ctrl.Result{}, nil +} + +func injectKeycloakClientCredentialsAnnotation(template *corev1.PodTemplateSpec, secretName string) bool { + if template.Annotations != nil && template.Annotations[AnnotationKeycloakClientSecretName] == secretName { + return false + } + if template.Annotations == nil { + template.Annotations = map[string]string{} + } + template.Annotations[AnnotationKeycloakClientSecretName] = secretName + return true +} + +// keycloakClientCredentialsSkipReason returns a non-empty human-readable reason when this controller should +// not process the workload; empty string means reconcile should continue. +func keycloakClientCredentialsSkipReason(labels map[string]string, injectTools bool) string { + if labels == nil { + return "pod template has no labels" + } + if labels[LabelClientRegistrationInject] != "false" { + return fmt.Sprintf("%s is not \"false\" (only workloads that opt out of webhook-injected client-registration are managed here)", LabelClientRegistrationInject) + } + switch labels[LabelAgentType] { + case LabelValueAgent: + return "" + case string(agentv1alpha1.RuntimeTypeTool): + if !injectTools { + return "kagenti.io/type is tool but cluster injectTools feature gate is disabled" + } + return "" + default: + t := labels[LabelAgentType] + if t == "" { + return "kagenti.io/type label is missing or not agent/tool" + } + return fmt.Sprintf("kagenti.io/type=%q is not agent or tool", t) + } +} + +func workloadWantsOperatorClientReg(labels map[string]string, injectTools bool) bool { + return keycloakClientCredentialsSkipReason(labels, injectTools) == "" +} + +type authbridgeConfig struct { + KeycloakURL string + KeycloakRealm string + SpireEnabled string + ClientAuthType string + SpiffeIDPAlias string + KeycloakTokenExchangeEnabled string + PlatformClientIDs string + KeycloakAudienceScopeEnabled string +} + +func readAuthbridgeConfigMap(ctx context.Context, c client.Reader, namespace string) (authbridgeConfig, error) { + cm := &corev1.ConfigMap{} + err := c.Get(ctx, types.NamespacedName{Namespace: namespace, Name: authbridgeConfigConfigMap}, cm) + if apierrors.IsNotFound(err) { + return authbridgeConfig{}, nil + } + if err != nil { + return authbridgeConfig{}, err + } + if cm.Data == nil { + return authbridgeConfig{}, nil + } + return authbridgeConfig{ + KeycloakURL: cm.Data["KEYCLOAK_URL"], + KeycloakRealm: cm.Data["KEYCLOAK_REALM"], + SpireEnabled: cm.Data["SPIRE_ENABLED"], + ClientAuthType: cm.Data["CLIENT_AUTH_TYPE"], + SpiffeIDPAlias: cm.Data["SPIFFE_IDP_ALIAS"], + KeycloakTokenExchangeEnabled: cm.Data["KEYCLOAK_TOKEN_EXCHANGE_ENABLED"], + PlatformClientIDs: cm.Data["PLATFORM_CLIENT_IDS"], + KeycloakAudienceScopeEnabled: cm.Data["KEYCLOAK_AUDIENCE_SCOPE_ENABLED"], + }, nil +} + +func parsePlatformClientIDs(raw string) []string { + raw = strings.TrimSpace(raw) + if raw == "" { + return []string{"kagenti"} + } + var out []string + for _, p := range strings.Split(raw, ",") { + p = strings.TrimSpace(p) + if p != "" { + out = append(out, p) + } + } + if len(out) == 0 { + return []string{"kagenti"} + } + return out +} + +func readClusterFeatureGates(ctx context.Context, c client.Reader) (globalOn, clientReg, injectTools bool, err error) { + globalOn, clientReg, injectTools = true, true, false + cm := &corev1.ConfigMap{} + if err := c.Get(ctx, types.NamespacedName{Namespace: ClusterDefaultsNamespace, Name: ClusterFeatureGatesConfigMapName}, cm); err != nil { + if apierrors.IsNotFound(err) { + return globalOn, clientReg, injectTools, nil + } + return false, false, false, err + } + if cm.Data == nil { + return globalOn, clientReg, injectTools, nil + } + // Only one ConfigMap data entry is consulted: we return after the first non-empty + // value that unmarshals to a non-empty YAML map (map iteration order); other keys are ignored. + for _, raw := range cm.Data { + raw = strings.TrimSpace(raw) + if raw == "" { + continue + } + var m map[string]interface{} + if err := yaml.Unmarshal([]byte(raw), &m); err != nil || len(m) == 0 { + continue + } + if v, ok := m["globalEnabled"].(bool); ok { + globalOn = v + } + if v, ok := m["clientRegistration"].(bool); ok { + clientReg = v + } + if v, ok := m["injectTools"].(bool); ok { + injectTools = v + } + return globalOn, clientReg, injectTools, nil + } + return globalOn, clientReg, injectTools, nil +} + +func resolveKeycloakClientID(namespace, workloadName, serviceAccount string, spireEnabled bool, trustDomain string) (string, error) { + sa := strings.TrimSpace(serviceAccount) + if sa == "" { + sa = "default" + } + if !spireEnabled { + return namespace + "/" + workloadName, nil + } + if sa == "default" { + return "", fmt.Errorf("SPIRE enabled: set spec.template.spec.serviceAccountName to a dedicated ServiceAccount (not default) on the workload for a stable SPIFFE client ID") + } + if trustDomain == "" { + return "", fmt.Errorf("SPIRE enabled: operator --spire-trust-domain is required for operator-managed client registration") + } + return fmt.Sprintf("spiffe://%s/ns/%s/sa/%s", trustDomain, namespace, sa), nil +} + +func keycloakClientCredentialsSecretName(namespace, workload string) string { + sum := sha256.Sum256([]byte(namespace + "\000" + workload + "\000kagenti-keycloak-client-credentials")) + return "kagenti-keycloak-client-credentials-" + hex.EncodeToString(sum[:8]) +} + +func (r *ClientRegistrationReconciler) ensureClientCredentialsSecret(ctx context.Context, owner client.Object, secretName, clientID, clientSecret string) error { + sec := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: owner.GetNamespace(), + Labels: map[string]string{ + LabelManagedBy: LabelManagedByValue, + }, + }, + } + _, err := controllerutil.CreateOrUpdate(ctx, r.Client, sec, func() error { + if sec.Labels == nil { + sec.Labels = map[string]string{} + } + sec.Labels[LabelManagedBy] = LabelManagedByValue + sec.Type = corev1.SecretTypeOpaque + if sec.StringData == nil { + sec.StringData = map[string]string{} + } + sec.StringData["client-secret.txt"] = clientSecret + sec.StringData["client-id.txt"] = clientID + return controllerutil.SetControllerReference(owner, sec, r.Scheme) + }) + return err +} + +func clientRegistrationWorkloadPredicate(obj client.Object) bool { + switch o := obj.(type) { + case *appsv1.Deployment: + return workloadWantsOperatorClientReg(o.Spec.Template.Labels, true) + case *appsv1.StatefulSet: + return workloadWantsOperatorClientReg(o.Spec.Template.Labels, true) + default: + return false + } +} + +// SetupWithManager registers the controller. injectTools is resolved at reconcile time from cluster +// feature gates; the predicate uses injectTools=true so tool workloads are not dropped before gates load. +func (r *ClientRegistrationReconciler) SetupWithManager(mgr ctrl.Manager) error { + pred := predicate.NewPredicateFuncs(clientRegistrationWorkloadPredicate) + return ctrl.NewControllerManagedBy(mgr). + Named("clientregistration"). + For(&appsv1.Deployment{}, builder.WithPredicates(pred)). + Watches( + &appsv1.StatefulSet{}, + &handler.EnqueueRequestForObject{}, + builder.WithPredicates(pred), + ). + Complete(r) +} diff --git a/kagenti-operator/internal/controller/clientregistration_controller_test.go b/kagenti-operator/internal/controller/clientregistration_controller_test.go new file mode 100644 index 0000000..cce6c03 --- /dev/null +++ b/kagenti-operator/internal/controller/clientregistration_controller_test.go @@ -0,0 +1,375 @@ +package controller + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + + agentv1alpha1 "github.com/kagenti/operator/api/v1alpha1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestWorkloadWantsOperatorClientReg(t *testing.T) { + cases := []struct { + name string + labels map[string]string + injectTools bool + want bool + }{ + { + name: "agent with opt-out", + labels: map[string]string{ + LabelAgentType: LabelValueAgent, + LabelClientRegistrationInject: "false", + }, + want: true, + }, + { + name: "agent without opt-out", + labels: map[string]string{ + LabelAgentType: LabelValueAgent, + }, + want: false, + }, + { + name: "tool with opt-out and injectTools", + labels: map[string]string{ + LabelAgentType: string(agentv1alpha1.RuntimeTypeTool), + LabelClientRegistrationInject: "false", + }, + injectTools: true, + want: true, + }, + { + name: "tool with opt-out no injectTools", + labels: map[string]string{ + LabelAgentType: string(agentv1alpha1.RuntimeTypeTool), + LabelClientRegistrationInject: "false", + }, + injectTools: false, + want: false, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := workloadWantsOperatorClientReg(tc.labels, tc.injectTools); got != tc.want { + t.Fatalf("want %v got %v", tc.want, got) + } + }) + } +} + +func TestInjectKeycloakClientCredentialsAnnotation(t *testing.T) { + pt := &corev1.PodTemplateSpec{} + secretName := "kagenti-keycloak-client-credentials-deadbeefcafe4242" + if !injectKeycloakClientCredentialsAnnotation(pt, secretName) { + t.Fatal("expected change") + } + if pt.Annotations[AnnotationKeycloakClientSecretName] != secretName { + t.Fatalf("annotation: %v", pt.Annotations) + } + if injectKeycloakClientCredentialsAnnotation(pt, secretName) { + t.Fatal("expected no change") + } +} + +func TestParsePlatformClientIDs(t *testing.T) { + if got := parsePlatformClientIDs(""); len(got) != 1 || got[0] != "kagenti" { + t.Fatalf("empty: %v", got) + } + if got := parsePlatformClientIDs("a, b"); len(got) != 2 || got[0] != "a" || got[1] != "b" { + t.Fatalf("list: %v", got) + } + if got := parsePlatformClientIDs(" , "); len(got) != 1 || got[0] != "kagenti" { + t.Fatalf("all blank: %v", got) + } +} + +func TestResolveKeycloakClientID(t *testing.T) { + id, err := resolveKeycloakClientID("ns1", "dep", "", false, "") + if err != nil || id != "ns1/dep" { + t.Fatalf("non-spire: %q %v", id, err) + } + _, err = resolveKeycloakClientID("ns1", "dep", "", true, "example.org") + if err == nil { + t.Fatal("expected error for default SA with SPIRE") + } + id, err = resolveKeycloakClientID("ns1", "dep", "mysa", true, "example.org") + if err != nil || id != "spiffe://example.org/ns/ns1/sa/mysa" { + t.Fatalf("spire: %q %v", id, err) + } +} + +func clientRegistrationTestScheme(t *testing.T) *runtime.Scheme { + t.Helper() + s := runtime.NewScheme() + if err := clientgoscheme.AddToScheme(s); err != nil { + t.Fatal(err) + } + return s +} + +func clusterFeatureGatesConfigMap(clientRegistration bool) *corev1.ConfigMap { + reg := "false" + if clientRegistration { + reg = "true" + } + return &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: ClusterDefaultsNamespace, + Name: ClusterFeatureGatesConfigMapName, + }, + Data: map[string]string{ + "gates.yaml": "globalEnabled: true\nclientRegistration: " + reg + "\ninjectTools: false\n", + }, + } +} + +func testDeploymentForClientReg(ns, name string) *appsv1.Deployment { + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: ns, + Name: name, + }, + Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": name, + LabelAgentType: LabelValueAgent, + LabelClientRegistrationInject: "false", + }, + }, + Spec: corev1.PodSpec{}, + }, + }, + } +} + +func authbridgeConfigMapForTest(ns, keycloakURL string) *corev1.ConfigMap { + return &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: ns, + Name: authbridgeConfigConfigMap, + }, + Data: map[string]string{ + "KEYCLOAK_URL": keycloakURL, + "KEYCLOAK_REALM": "kagenti", + "KEYCLOAK_AUDIENCE_SCOPE_ENABLED": "false", + }, + } +} + +func keycloakAdminSecretForTest(ns string) *corev1.Secret { + return &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: ns, + Name: keycloakAdminSecret, + }, + Data: map[string][]byte{ + "KEYCLOAK_ADMIN_USERNAME": []byte("admin"), + "KEYCLOAK_ADMIN_PASSWORD": []byte("secret"), + }, + } +} + +func startTestKeycloakServer(t *testing.T) *httptest.Server { + t.Helper() + // Matches keycloak.RegisterOrFetchClientWithToken defaults for the happy-path deployment: + // client ID test-ns/my-dep, client-secret auth, token exchange on (AuthBridge omits KEYCLOAK_TOKEN_EXCHANGE_ENABLED). + inSyncClientRep := map[string]any{ + "id": "uuid-1", + "clientId": "test-ns/my-dep", + "name": "test-ns/my-dep", + "standardFlowEnabled": true, + "directAccessGrantsEnabled": true, + "serviceAccountsEnabled": true, + "fullScopeAllowed": false, + "publicClient": false, + "clientAuthenticatorType": "client-secret", + "attributes": map[string]any{"standard.token.exchange.enabled": []any{"true"}}, + } + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.URL.Path == "/realms/master/protocol/openid-connect/token" && r.Method == http.MethodPost: + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{"access_token": "tok", "expires_in": 3600}) + case r.Method == http.MethodGet && r.URL.Query().Get("clientId") != "": + cid := r.URL.Query().Get("clientId") + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode([]map[string]string{{"id": "uuid-1", "clientId": cid}}) + case r.Method == http.MethodGet && strings.HasSuffix(r.URL.Path, "/client-secret"): + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{"value": "client-secret-value"}) + case r.Method == http.MethodGet && strings.HasPrefix(r.URL.Path, "/admin/realms/"): + // GET .../realms/{realm}/clients/{uuid} (reconcile path; not list ?clientId=, not .../client-secret) + trim := strings.TrimPrefix(r.URL.Path, "/admin/realms/") + parts := strings.Split(trim, "/") + if len(parts) == 3 && parts[1] == "clients" { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(inSyncClientRep) + return + } + fallthrough + default: + t.Errorf("unexpected Keycloak request %s %s", r.Method, r.URL.Path) + http.NotFound(w, r) + } + })) +} + +func TestClientRegistrationReconciler_Reconcile(t *testing.T) { + const ( + workNs = "test-ns" + depName = "my-dep" + ) + req := ctrl.Request{NamespacedName: types.NamespacedName{Namespace: workNs, Name: depName}} + requeue := 30 * time.Second + ctx := context.Background() + + globalOffGates := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: ClusterDefaultsNamespace, + Name: ClusterFeatureGatesConfigMapName, + }, + Data: map[string]string{ + "gates.yaml": "globalEnabled: false\nclientRegistration: true\ninjectTools: false\n", + }, + } + + reconcileCases := []struct { + name string + objs []client.Object + wantRequeue time.Duration + check func(t *testing.T, c client.Client) + }{ + { + name: "feature gates disable client registration", + objs: []client.Object{ + clusterFeatureGatesConfigMap(false), + testDeploymentForClientReg(workNs, depName), + }, + check: func(t *testing.T, c client.Client) { + dep := &appsv1.Deployment{} + if err := c.Get(ctx, req.NamespacedName, dep); err != nil { + t.Fatal(err) + } + if dep.Spec.Template.Annotations != nil && dep.Spec.Template.Annotations[AnnotationKeycloakClientSecretName] != "" { + t.Fatalf("expected no credentials annotation when gates off, got %v", dep.Spec.Template.Annotations) + } + }, + }, + { + name: "global feature gate disabled skips before workload fetch", + objs: []client.Object{globalOffGates}, + }, + { + name: "deployment and statefulset not found", + objs: []client.Object{clusterFeatureGatesConfigMap(true)}, + }, + { + name: "missing authbridge config waits with requeue", + objs: []client.Object{ + clusterFeatureGatesConfigMap(true), + testDeploymentForClientReg(workNs, depName), + }, + wantRequeue: requeue, + }, + { + name: "missing keycloak admin secret waits with requeue", + objs: []client.Object{ + clusterFeatureGatesConfigMap(true), + testDeploymentForClientReg(workNs, depName), + authbridgeConfigMapForTest(workNs, "https://keycloak.example"), + }, + wantRequeue: requeue, + }, + } + + for _, tc := range reconcileCases { + t.Run(tc.name, func(t *testing.T) { + scheme := clientRegistrationTestScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tc.objs...).Build() + r := &ClientRegistrationReconciler{Client: c, Scheme: scheme} + res, err := r.Reconcile(ctx, req) + if err != nil { + t.Fatalf("Reconcile: %v", err) + } + if tc.wantRequeue != 0 { + if res.RequeueAfter != tc.wantRequeue { + t.Fatalf("got RequeueAfter=%v, want %v", res.RequeueAfter, tc.wantRequeue) + } + } else if res != (ctrl.Result{}) { + t.Fatalf("got %#v, want zero ctrl.Result", res) + } + if tc.check != nil { + tc.check(t, c) + } + }) + } + + t.Run("happy path registers client patches deployment and creates secret", func(t *testing.T) { + srv := startTestKeycloakServer(t) + defer srv.Close() + + scheme := clientRegistrationTestScheme(t) + dep := testDeploymentForClientReg(workNs, depName) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects( + clusterFeatureGatesConfigMap(true), + dep, + authbridgeConfigMapForTest(workNs, srv.URL), + keycloakAdminSecretForTest(workNs), + ).Build() + r := &ClientRegistrationReconciler{Client: c, Scheme: scheme} + res, err := r.Reconcile(ctx, req) + if err != nil || res != (ctrl.Result{}) { + t.Fatalf("got (%v, %v), want (zero Result, nil)", res, err) + } + + secretName := keycloakClientCredentialsSecretName(workNs, depName) + got := &appsv1.Deployment{} + if err := c.Get(ctx, req.NamespacedName, got); err != nil { + t.Fatal(err) + } + if got.Spec.Template.Annotations == nil || got.Spec.Template.Annotations[AnnotationKeycloakClientSecretName] != secretName { + t.Fatalf("pod template annotation: %#v", got.Spec.Template.Annotations) + } + + sec := &corev1.Secret{} + secKey := types.NamespacedName{Namespace: workNs, Name: secretName} + if err := c.Get(ctx, secKey, sec); err != nil { + t.Fatal(err) + } + // Fake client may leave credential keys in StringData (like an apiserver write path) instead of Data. + clientID := string(sec.Data["client-id.txt"]) + if clientID == "" && sec.StringData != nil { + clientID = sec.StringData["client-id.txt"] + } + clientSecret := string(sec.Data["client-secret.txt"]) + if clientSecret == "" && sec.StringData != nil { + clientSecret = sec.StringData["client-secret.txt"] + } + if clientID != workNs+"/"+depName { + t.Fatalf("client-id: %q (secret %#v)", clientID, sec) + } + if clientSecret != "client-secret-value" { + t.Fatalf("client-secret: %q", clientSecret) + } + }) +} diff --git a/kagenti-operator/internal/keycloak/admin.go b/kagenti-operator/internal/keycloak/admin.go new file mode 100644 index 0000000..f5078ba --- /dev/null +++ b/kagenti-operator/internal/keycloak/admin.go @@ -0,0 +1,506 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +*/ + +package keycloak + +import ( + "bytes" + "context" + "crypto/tls" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" +) + +// ClientRegistrationParams mirrors the AuthBridge client-registration sidecar contract. +type ClientRegistrationParams struct { + Realm string + ClientID string // Keycloak OAuth clientId (SPIFFE ID or namespace/name) + ClientName string // Human-readable name field in Keycloak + ClientAuthType string // "client-secret" or "federated-jwt" + SpiffeIDPAlias string // Keycloak SPIFFE IdP alias when using federated-jwt + TokenExchangeEnable bool +} + +type adminTokenResponse struct { + AccessToken string `json:"access_token"` + ExpiresIn int `json:"expires_in"` +} + +type keycloakClientRep struct { + ID string `json:"id,omitempty"` + ClientID string `json:"clientId"` + Name string `json:"name"` + + StandardFlowEnabled bool `json:"standardFlowEnabled"` + DirectAccessGrantsEnabled bool `json:"directAccessGrantsEnabled"` + ServiceAccountsEnabled bool `json:"serviceAccountsEnabled"` + FullScopeAllowed bool `json:"fullScopeAllowed"` + PublicClient bool `json:"publicClient"` + ClientAuthenticatorType string `json:"clientAuthenticatorType"` + Attributes map[string]string `json:"attributes"` +} + +type clientSecretRep struct { + Value string `json:"value"` +} + +// Admin is a minimal Keycloak admin REST client (password grant, client CRUD, secret read). +type Admin struct { + BaseURL string // e.g. https://keycloak.example.com:8080 (no trailing path) + HTTPClient *http.Client +} + +func (a *Admin) httpc() *http.Client { + if a.HTTPClient != nil { + return a.HTTPClient + } + return http.DefaultClient +} + +func trimBaseURL(base string) string { + return strings.TrimRight(strings.TrimSpace(base), "/") +} + +// adminToken returns an access token and its absolute expiry time (from OAuth expires_in). +// If expires_in is absent or zero, a conservative default TTL is applied. +func (a *Admin) adminToken(ctx context.Context, username, password string) (string, time.Time, error) { + base := trimBaseURL(a.BaseURL) + form := url.Values{} + form.Set("grant_type", "password") + form.Set("client_id", "admin-cli") + form.Set("username", username) + form.Set("password", password) + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, + base+"/realms/master/protocol/openid-connect/token", + strings.NewReader(form.Encode())) + if err != nil { + return "", time.Time{}, err + } + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + + resp, err := a.httpc().Do(req) + if err != nil { + return "", time.Time{}, err + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + if resp.StatusCode != http.StatusOK { + return "", time.Time{}, fmt.Errorf("keycloak token: status %d: %s", resp.StatusCode, truncate(body, 512)) + } + var tr adminTokenResponse + if err := json.Unmarshal(body, &tr); err != nil { + return "", time.Time{}, fmt.Errorf("keycloak token decode: %w", err) + } + if tr.AccessToken == "" { + return "", time.Time{}, fmt.Errorf("keycloak token: empty access_token") + } + expiresIn := tr.ExpiresIn + if expiresIn <= 0 { + expiresIn = 60 + } + expiresAt := time.Now().Add(time.Duration(expiresIn) * time.Second) + return tr.AccessToken, expiresAt, nil +} + +func truncate(b []byte, n int) string { + s := string(b) + if len(s) <= n { + return s + } + return s[:n] + "..." +} + +// PasswordGrantToken returns an admin access token using the master realm password grant (admin-cli). +func (a *Admin) PasswordGrantToken(ctx context.Context, adminUser, adminPass string) (string, error) { + token, _, err := a.adminToken(ctx, adminUser, adminPass) + return token, err +} + +// RegisterOrFetchClient ensures an OAuth client exists and returns its internal UUID and client secret value. +func (a *Admin) RegisterOrFetchClient(ctx context.Context, adminUser, adminPass string, p ClientRegistrationParams) (internalID, secret string, err error) { + token, _, err := a.adminToken(ctx, adminUser, adminPass) + if err != nil { + return "", "", err + } + return a.RegisterOrFetchClientWithToken(ctx, token, p) +} + +// RegisterOrFetchClientWithToken is like RegisterOrFetchClient but reuses an existing admin token. +func (a *Admin) RegisterOrFetchClientWithToken(ctx context.Context, token string, p ClientRegistrationParams) (internalID, secret string, err error) { + authType := p.ClientAuthType + if authType == "" { + authType = "client-secret" + } + + attrs := map[string]string{ + "standard.token.exchange.enabled": fmt.Sprintf("%t", p.TokenExchangeEnable), + } + if authType == "federated-jwt" { + alias := p.SpiffeIDPAlias + if alias == "" { + alias = "spire-spiffe" + } + attrs["jwt.credential.issuer"] = alias + attrs["jwt.credential.sub"] = p.ClientID + } + + rep := keycloakClientRep{ + ClientID: p.ClientID, + Name: p.ClientName, + StandardFlowEnabled: true, + DirectAccessGrantsEnabled: true, + ServiceAccountsEnabled: true, + FullScopeAllowed: false, + PublicClient: false, + ClientAuthenticatorType: authType, + Attributes: attrs, + } + + internalID, err = a.findClientUUID(ctx, token, p.Realm, p.ClientID) + if err != nil { + return "", "", err + } + if internalID == "" { + internalID, err = a.createClient(ctx, token, p.Realm, &rep) + if err != nil { + return "", "", err + } + } else { + if err := a.reconcileExistingClient(ctx, token, p.Realm, internalID, &rep); err != nil { + return "", "", err + } + } + + secret, err = a.readClientSecret(ctx, token, p.Realm, internalID) + if err != nil { + return "", "", err + } + return internalID, secret, nil +} + +func (a *Admin) findClientUUID(ctx context.Context, token, realm, clientID string) (string, error) { + base := trimBaseURL(a.BaseURL) + u, err := url.Parse(base + "/admin/realms/" + url.PathEscape(realm) + "/clients") + if err != nil { + return "", err + } + q := u.Query() + q.Set("clientId", clientID) + u.RawQuery = q.Encode() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) + if err != nil { + return "", err + } + req.Header.Set("Authorization", "Bearer "+token) + + resp, err := a.httpc().Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("keycloak list clients: status %d: %s", resp.StatusCode, truncate(body, 512)) + } + var list []struct { + ID string `json:"id"` + ClientID string `json:"clientId"` + } + if err := json.Unmarshal(body, &list); err != nil { + return "", fmt.Errorf("keycloak list clients decode: %w", err) + } + for i := range list { + if list[i].ClientID == clientID { + return list[i].ID, nil + } + } + return "", nil +} + +func (a *Admin) createClient(ctx context.Context, token, realm string, rep *keycloakClientRep) (string, error) { + base := trimBaseURL(a.BaseURL) + payload, err := json.Marshal(rep) + if err != nil { + return "", err + } + endpoint := base + "/admin/realms/" + url.PathEscape(realm) + "/clients" + req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewReader(payload)) + if err != nil { + return "", err + } + req.Header.Set("Authorization", "Bearer "+token) + req.Header.Set("Content-Type", "application/json") + + resp, err := a.httpc().Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + if resp.StatusCode == http.StatusCreated { + loc := resp.Header.Get("Location") + if loc != "" { + if id := pathLastSegment(loc); id != "" { + return id, nil + } + } + // Fall through to lookup + return a.findClientUUID(ctx, token, realm, rep.ClientID) + } + body, _ := io.ReadAll(resp.Body) + if resp.StatusCode == http.StatusConflict { + return a.findClientUUID(ctx, token, realm, rep.ClientID) + } + return "", fmt.Errorf("keycloak create client: status %d: %s", resp.StatusCode, truncate(body, 512)) +} + +func pathLastSegment(loc string) string { + loc = strings.TrimRight(loc, "/") + if idx := strings.LastIndex(loc, "/"); idx >= 0 { + return loc[idx+1:] + } + return "" +} + +// reconcileExistingClient loads the Keycloak client and PUTs the representation when managed fields drift. +func (a *Admin) reconcileExistingClient(ctx context.Context, token, realm, internalUUID string, desired *keycloakClientRep) error { + current, err := a.getClientRepresentationMap(ctx, token, realm, internalUUID) + if err != nil { + return err + } + if !clientRepDrifted(current, desired) { + return nil + } + mergeDesiredClientIntoMap(current, desired) + return a.updateClient(ctx, token, realm, internalUUID, current) +} + +func (a *Admin) getClientRepresentationMap(ctx context.Context, token, realm, internalUUID string) (map[string]interface{}, error) { + base := trimBaseURL(a.BaseURL) + endpoint := base + "/admin/realms/" + url.PathEscape(realm) + "/clients/" + url.PathEscape(internalUUID) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return nil, err + } + req.Header.Set("Authorization", "Bearer "+token) + + resp, err := a.httpc().Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("keycloak get client: status %d: %s", resp.StatusCode, truncate(body, 512)) + } + var m map[string]interface{} + if err := json.Unmarshal(body, &m); err != nil { + return nil, fmt.Errorf("keycloak get client decode: %w", err) + } + return m, nil +} + +func keycloakAttrString(attrs interface{}, key string) string { + m, ok := attrs.(map[string]interface{}) + if !ok || m == nil { + return "" + } + v, ok := m[key] + if !ok || v == nil { + return "" + } + switch x := v.(type) { + case string: + return x + case []interface{}: + if len(x) == 0 { + return "" + } + if s, ok := x[0].(string); ok { + return s + } + case []string: + if len(x) > 0 { + return x[0] + } + } + return "" +} + +func boolFromJSONMap(m map[string]interface{}, key string) bool { + v, ok := m[key] + if !ok || v == nil { + return false + } + switch x := v.(type) { + case bool: + return x + case float64: + return x != 0 + } + return false +} + +func stringFromJSONMap(m map[string]interface{}, key string) string { + v, ok := m[key] + if !ok || v == nil { + return "" + } + if s, ok := v.(string); ok { + return s + } + return "" +} + +func clientRepDrifted(current map[string]interface{}, desired *keycloakClientRep) bool { + if stringFromJSONMap(current, "name") != desired.Name { + return true + } + if boolFromJSONMap(current, "standardFlowEnabled") != desired.StandardFlowEnabled { + return true + } + if boolFromJSONMap(current, "directAccessGrantsEnabled") != desired.DirectAccessGrantsEnabled { + return true + } + if boolFromJSONMap(current, "serviceAccountsEnabled") != desired.ServiceAccountsEnabled { + return true + } + if boolFromJSONMap(current, "fullScopeAllowed") != desired.FullScopeAllowed { + return true + } + if boolFromJSONMap(current, "publicClient") != desired.PublicClient { + return true + } + if stringFromJSONMap(current, "clientAuthenticatorType") != desired.ClientAuthenticatorType { + return true + } + attrs := current["attributes"] + for k, want := range desired.Attributes { + if keycloakAttrString(attrs, k) != want { + return true + } + } + return false +} + +func mergeDesiredClientIntoMap(m map[string]interface{}, desired *keycloakClientRep) { + m["clientId"] = desired.ClientID + m["name"] = desired.Name + m["standardFlowEnabled"] = desired.StandardFlowEnabled + m["directAccessGrantsEnabled"] = desired.DirectAccessGrantsEnabled + m["serviceAccountsEnabled"] = desired.ServiceAccountsEnabled + m["fullScopeAllowed"] = desired.FullScopeAllowed + m["publicClient"] = desired.PublicClient + m["clientAuthenticatorType"] = desired.ClientAuthenticatorType + + var attrs map[string]interface{} + if raw, ok := m["attributes"].(map[string]interface{}); ok && raw != nil { + attrs = raw + } else { + attrs = make(map[string]interface{}) + m["attributes"] = attrs + } + for k, v := range desired.Attributes { + attrs[k] = []interface{}{v} + } +} + +func (a *Admin) updateClient(ctx context.Context, token, realm, internalUUID string, rep map[string]interface{}) error { + base := trimBaseURL(a.BaseURL) + payload, err := json.Marshal(rep) + if err != nil { + return err + } + endpoint := base + "/admin/realms/" + url.PathEscape(realm) + "/clients/" + url.PathEscape(internalUUID) + req, err := http.NewRequestWithContext(ctx, http.MethodPut, endpoint, bytes.NewReader(payload)) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+token) + req.Header.Set("Content-Type", "application/json") + + resp, err := a.httpc().Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode == http.StatusNoContent || resp.StatusCode == http.StatusOK { + return nil + } + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("keycloak update client: status %d: %s", resp.StatusCode, truncate(body, 512)) +} + +func (a *Admin) readClientSecret(ctx context.Context, token, realm, internalUUID string) (string, error) { + base := trimBaseURL(a.BaseURL) + endpoint := base + "/admin/realms/" + url.PathEscape(realm) + "/clients/" + url.PathEscape(internalUUID) + "/client-secret" + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return "", err + } + req.Header.Set("Authorization", "Bearer "+token) + + resp, err := a.httpc().Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("keycloak client secret: status %d: %s", resp.StatusCode, truncate(body, 512)) + } + var cs clientSecretRep + if err := json.Unmarshal(body, &cs); err != nil { + return "", fmt.Errorf("keycloak client secret decode: %w", err) + } + return cs.Value, nil +} + +// HTTPClientConfig configures optional TLS and transport for Keycloak admin HTTP clients. +// Use it with NewHTTPClient when Keycloak is served with a certificate from a private CA +// (set TLSConfig.RootCAs) or when you need a custom RoundTripper. +type HTTPClientConfig struct { + // Timeout for the whole client. Zero means 60 seconds. + Timeout time.Duration + // Transport, if non-nil, is used as the client's RoundTripper. TLSConfig is ignored when + // Transport is set; configure TLS on the transport yourself. + Transport http.RoundTripper + // TLSConfig is used when Transport is nil: a clone of http.DefaultTransport gets this TLS + // config (for example RootCAs for a private CA). If nil, default TLS verification applies. + TLSConfig *tls.Config +} + +// NewHTTPClient returns an HTTP client suitable for Keycloak admin calls. +func NewHTTPClient(cfg HTTPClientConfig) *http.Client { + timeout := cfg.Timeout + if timeout == 0 { + timeout = 60 * time.Second + } + var rt http.RoundTripper + switch { + case cfg.Transport != nil: + rt = cfg.Transport + case cfg.TLSConfig != nil: + tr := http.DefaultTransport.(*http.Transport).Clone() + tr.TLSClientConfig = cfg.TLSConfig.Clone() + rt = tr + default: + rt = nil + } + return &http.Client{Timeout: timeout, Transport: rt} +} + +// DefaultHTTPClient returns an HTTP client suitable for Keycloak admin calls. +func DefaultHTTPClient() *http.Client { + return NewHTTPClient(HTTPClientConfig{}) +} diff --git a/kagenti-operator/internal/keycloak/admin_test.go b/kagenti-operator/internal/keycloak/admin_test.go new file mode 100644 index 0000000..cc59b4d --- /dev/null +++ b/kagenti-operator/internal/keycloak/admin_test.go @@ -0,0 +1,161 @@ +package keycloak + +import ( + "context" + "crypto/tls" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +func inSyncClientRep() map[string]interface{} { + return map[string]interface{}{ + "id": "uuid-1", + "clientId": "ns/workload", + "name": "ns/workload", + "standardFlowEnabled": true, + "directAccessGrantsEnabled": true, + "serviceAccountsEnabled": true, + "fullScopeAllowed": false, + "publicClient": false, + "clientAuthenticatorType": "client-secret", + "attributes": map[string]interface{}{"standard.token.exchange.enabled": []interface{}{"false"}}, + } +} + +func TestAdmin_RegisterOrFetchClient(t *testing.T) { + var tokenCalls, listCalls, getCalls, createCalls, putCalls, secretCalls int + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.URL.Path == "/realms/master/protocol/openid-connect/token": + tokenCalls++ + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{"access_token": "t"}) + case strings.HasPrefix(r.URL.Path, "/admin/realms/kagenti/clients") && r.Method == http.MethodGet && r.URL.Query().Get("clientId") != "": + listCalls++ + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode([]map[string]string{{"id": "uuid-1", "clientId": "ns/workload"}}) + case strings.HasPrefix(r.URL.Path, "/admin/realms/kagenti/clients/") && strings.HasSuffix(r.URL.Path, "/client-secret"): + secretCalls++ + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{"value": "topsecret"}) + case strings.HasPrefix(r.URL.Path, "/admin/realms/kagenti/clients/") && r.Method == http.MethodGet: + getCalls++ + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(inSyncClientRep()) + case strings.HasPrefix(r.URL.Path, "/admin/realms/kagenti/clients/") && r.Method == http.MethodPut: + putCalls++ + t.Fatal("unexpected PUT when client is in sync") + case r.URL.Path == "/admin/realms/kagenti/clients" && r.Method == http.MethodPost: + createCalls++ + t.Fatal("unexpected create when client exists") + default: + t.Fatalf("unexpected request %s %s", r.Method, r.URL.Path) + } + })) + defer srv.Close() + + a := Admin{BaseURL: srv.URL, HTTPClient: srv.Client()} + id, sec, err := a.RegisterOrFetchClient(context.Background(), "admin", "pw", ClientRegistrationParams{ + Realm: "kagenti", + ClientID: "ns/workload", + ClientName: "ns/workload", + }) + if err != nil { + t.Fatal(err) + } + if id != "uuid-1" || sec != "topsecret" { + t.Fatalf("got id=%q sec=%q", id, sec) + } + if tokenCalls != 1 || listCalls != 1 || getCalls != 1 || secretCalls != 1 || putCalls != 0 { + t.Fatalf("calls token=%d list=%d get=%d put=%d create=%d secret=%d", tokenCalls, listCalls, getCalls, putCalls, createCalls, secretCalls) + } +} + +func TestAdmin_RegisterOrFetchClient_updatesDrift(t *testing.T) { + var getCalls, putCalls int + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.URL.Path == "/realms/master/protocol/openid-connect/token": + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{"access_token": "t"}) + case strings.HasPrefix(r.URL.Path, "/admin/realms/kagenti/clients") && r.Method == http.MethodGet && r.URL.Query().Get("clientId") != "": + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode([]map[string]string{{"id": "uuid-1", "clientId": "ns/workload"}}) + case strings.HasPrefix(r.URL.Path, "/admin/realms/kagenti/clients/") && strings.HasSuffix(r.URL.Path, "/client-secret"): + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{"value": "topsecret"}) + case strings.HasPrefix(r.URL.Path, "/admin/realms/kagenti/clients/") && r.Method == http.MethodGet: + getCalls++ + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(inSyncClientRep()) + case strings.HasPrefix(r.URL.Path, "/admin/realms/kagenti/clients/") && r.Method == http.MethodPut: + putCalls++ + if r.URL.Path != "/admin/realms/kagenti/clients/uuid-1" { + t.Fatalf("unexpected PUT path %s", r.URL.Path) + } + var body map[string]interface{} + if err := json.NewDecoder(r.Body).Decode(&body); err != nil { + t.Fatal(err) + } + attrs, _ := body["attributes"].(map[string]interface{}) + if attrs == nil { + t.Fatal("expected attributes in PUT body") + } + ex, _ := attrs["standard.token.exchange.enabled"].([]interface{}) + if len(ex) != 1 || ex[0] != "true" { + t.Fatalf("expected token exchange true in PUT, got %#v", attrs["standard.token.exchange.enabled"]) + } + w.WriteHeader(http.StatusNoContent) + case r.URL.Path == "/admin/realms/kagenti/clients" && r.Method == http.MethodPost: + t.Fatal("unexpected create") + default: + t.Fatalf("unexpected request %s %s", r.Method, r.URL.Path) + } + })) + defer srv.Close() + + a := Admin{BaseURL: srv.URL, HTTPClient: srv.Client()} + _, _, err := a.RegisterOrFetchClient(context.Background(), "admin", "pw", ClientRegistrationParams{ + Realm: "kagenti", + ClientID: "ns/workload", + ClientName: "ns/workload", + TokenExchangeEnable: true, + }) + if err != nil { + t.Fatal(err) + } + if getCalls != 1 || putCalls != 1 { + t.Fatalf("expected 1 get and 1 put, got get=%d put=%d", getCalls, putCalls) + } +} + +func TestNewHTTPClient_defaults(t *testing.T) { + c := NewHTTPClient(HTTPClientConfig{}) + if c.Timeout != 60*time.Second || c.Transport != nil { + t.Fatalf("expected 60s timeout and nil transport, got timeout=%v transport=%v", c.Timeout, c.Transport) + } +} + +func TestNewHTTPClient_TLSConfig(t *testing.T) { + tlsCfg := &tls.Config{MinVersion: tls.VersionTLS12} + c := NewHTTPClient(HTTPClientConfig{TLSConfig: tlsCfg}) + tr, ok := c.Transport.(*http.Transport) + if !ok || tr.TLSClientConfig == nil { + t.Fatalf("expected *http.Transport with TLSClientConfig, got %T", c.Transport) + } + if tr.TLSClientConfig.MinVersion != tls.VersionTLS12 { + t.Fatalf("MinVersion: got %v want %v", tr.TLSClientConfig.MinVersion, tls.VersionTLS12) + } +} + +func TestNewHTTPClient_customTransport(t *testing.T) { + custom := http.DefaultTransport + c := NewHTTPClient(HTTPClientConfig{Transport: custom, TLSConfig: &tls.Config{MinVersion: tls.VersionTLS13}}) + if c.Transport != custom { + t.Fatal("expected custom transport to be used as-is") + } +} diff --git a/kagenti-operator/internal/keycloak/audience.go b/kagenti-operator/internal/keycloak/audience.go new file mode 100644 index 0000000..800cbef --- /dev/null +++ b/kagenti-operator/internal/keycloak/audience.go @@ -0,0 +1,252 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +*/ + +package keycloak + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" +) + +// AudienceParams configures audience client-scope management (mirrors AuthBridge client_registration.py). +type AudienceParams struct { + Realm string + ClientName string // e.g. namespace/workload — used to derive scope name + AudienceClientID string // OAuth clientId / SPIFFE ID used as custom audience in the mapper + PlatformClientIDs []string // Keycloak clientId strings (e.g. UI client), not internal UUIDs + AudienceScopeEnabled bool // when false, EnsureAudienceScope is a no-op +} + +type clientScopeListItem struct { + ID string `json:"id"` + Name string `json:"name"` +} + +type clientScopeCreateRep struct { + Name string `json:"name"` + Protocol string `json:"protocol"` + Attributes map[string]string `json:"attributes,omitempty"` +} + +type protocolMapperRep struct { + Name string `json:"name"` + Protocol string `json:"protocol"` + ProtocolMapper string `json:"protocolMapper"` + ConsentRequired bool `json:"consentRequired"` + Config map[string]string `json:"config"` +} + +// AudienceScopeName derives the realm client-scope name from CLIENT_NAME (same as Python). +func AudienceScopeName(clientName string) string { + return "agent-" + strings.ReplaceAll(clientName, "/", "-") + "-aud" +} + +// EnsureAudienceScope creates or reuses an audience client scope, adds the oidc-audience mapper, +// registers it as a realm default default client scope, and attaches it to each platform client. +// Missing platform clients are skipped (like Python). Realm / per-client attachment errors are ignored +// except they are swallowed (Python prints only); attachment uses best-effort PUT with 204/409 success. +func (a *Admin) EnsureAudienceScope(ctx context.Context, token string, p AudienceParams) error { + if !p.AudienceScopeEnabled { + return nil + } + scopeName := AudienceScopeName(p.ClientName) + scopeID, err := a.getOrCreateAudienceClientScope(ctx, token, p.Realm, scopeName, p.AudienceClientID) + if err != nil { + return err + } + _ = a.putRealmDefaultDefaultClientScope(ctx, token, p.Realm, scopeID) + for _, plat := range p.PlatformClientIDs { + plat = strings.TrimSpace(plat) + if plat == "" { + continue + } + internal, err := a.findClientUUID(ctx, token, p.Realm, plat) + if err != nil || internal == "" { + continue + } + _ = a.putClientDefaultClientScope(ctx, token, p.Realm, internal, scopeID) + } + return nil +} + +func (a *Admin) getOrCreateAudienceClientScope(ctx context.Context, token, realm, scopeName, audience string) (string, error) { + scopeID, err := a.findClientScopeIDByName(ctx, token, realm, scopeName) + if err != nil { + return "", err + } + if scopeID != "" { + _ = a.ensureAudienceMapper(ctx, token, realm, scopeID, scopeName, audience) + return scopeID, nil + } + + scopeID, err = a.createClientScope(ctx, token, realm, clientScopeCreateRep{ + Name: scopeName, + Protocol: "openid-connect", + Attributes: map[string]string{ + "include.in.token.scope": "true", + "display.on.consent.screen": "true", + }, + }) + if err != nil { + return "", err + } + if scopeID == "" { + return "", fmt.Errorf("create client scope %q returned empty id", scopeName) + } + _ = a.ensureAudienceMapper(ctx, token, realm, scopeID, scopeName, audience) + return scopeID, nil +} + +func (a *Admin) findClientScopeIDByName(ctx context.Context, token, realm, name string) (string, error) { + base := trimBaseURL(a.BaseURL) + endpoint := base + "/admin/realms/" + url.PathEscape(realm) + "/client-scopes" + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return "", err + } + req.Header.Set("Authorization", "Bearer "+token) + + resp, err := a.httpc().Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("keycloak list client-scopes: status %d: %s", resp.StatusCode, truncate(body, 512)) + } + var list []clientScopeListItem + if err := json.Unmarshal(body, &list); err != nil { + return "", fmt.Errorf("keycloak list client-scopes decode: %w", err) + } + for i := range list { + if list[i].Name == name { + return list[i].ID, nil + } + } + return "", nil +} + +func (a *Admin) createClientScope(ctx context.Context, token, realm string, rep clientScopeCreateRep) (string, error) { + base := trimBaseURL(a.BaseURL) + payload, err := json.Marshal(rep) + if err != nil { + return "", err + } + endpoint := base + "/admin/realms/" + url.PathEscape(realm) + "/client-scopes" + req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewReader(payload)) + if err != nil { + return "", err + } + req.Header.Set("Authorization", "Bearer "+token) + req.Header.Set("Content-Type", "application/json") + + resp, err := a.httpc().Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + if resp.StatusCode == http.StatusCreated { + if loc := resp.Header.Get("Location"); loc != "" { + if id := pathLastSegment(loc); id != "" { + return id, nil + } + } + return a.findClientScopeIDByName(ctx, token, realm, rep.Name) + } + body, _ := io.ReadAll(resp.Body) + if resp.StatusCode == http.StatusConflict { + return a.findClientScopeIDByName(ctx, token, realm, rep.Name) + } + return "", fmt.Errorf("keycloak create client-scope: status %d: %s", resp.StatusCode, truncate(body, 512)) +} + +func (a *Admin) ensureAudienceMapper(ctx context.Context, token, realm, scopeID, scopeName, audience string) error { + mapper := protocolMapperRep{ + Name: scopeName, + Protocol: "openid-connect", + ProtocolMapper: "oidc-audience-mapper", + ConsentRequired: false, + Config: map[string]string{ + "included.custom.audience": audience, + "id.token.claim": "false", + "access.token.claim": "true", + "userinfo.token.claim": "false", + }, + } + payload, err := json.Marshal(mapper) + if err != nil { + return err + } + base := trimBaseURL(a.BaseURL) + endpoint := base + "/admin/realms/" + url.PathEscape(realm) + "/client-scopes/" + url.PathEscape(scopeID) + "/protocol-mappers/models" + req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewReader(payload)) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+token) + req.Header.Set("Content-Type", "application/json") + + resp, err := a.httpc().Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode == http.StatusCreated || resp.StatusCode == http.StatusConflict { + return nil + } + body, _ := io.ReadAll(resp.Body) + // Mapper may already exist — treat other errors as non-fatal (Python logs and continues). + if resp.StatusCode >= 400 { + return fmt.Errorf("keycloak add audience mapper: status %d: %s", resp.StatusCode, truncate(body, 256)) + } + return nil +} + +func (a *Admin) putRealmDefaultDefaultClientScope(ctx context.Context, token, realm, scopeID string) error { + base := trimBaseURL(a.BaseURL) + endpoint := base + "/admin/realms/" + url.PathEscape(realm) + "/default-default-client-scopes/" + url.PathEscape(scopeID) + return a.putNoBodyExpectSuccess(ctx, token, endpoint) +} + +func (a *Admin) putClientDefaultClientScope(ctx context.Context, token, realm, clientInternalUUID, scopeID string) error { + base := trimBaseURL(a.BaseURL) + endpoint := base + "/admin/realms/" + url.PathEscape(realm) + "/clients/" + url.PathEscape(clientInternalUUID) + "/default-client-scopes/" + url.PathEscape(scopeID) + return a.putNoBodyExpectSuccess(ctx, token, endpoint) +} + +func (a *Admin) putNoBodyExpectSuccess(ctx context.Context, token, endpoint string) error { + req, err := http.NewRequestWithContext(ctx, http.MethodPut, endpoint, nil) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+token) + + resp, err := a.httpc().Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + // 204 success; 409 often means already linked; 404 can mean already removed / wrong id — ignore like Python prints. + if resp.StatusCode == http.StatusNoContent || resp.StatusCode == http.StatusConflict { + return nil + } + if resp.StatusCode == http.StatusNotFound { + return nil + } + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + return nil + } + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("keycloak PUT %s: status %d: %s", endpoint, resp.StatusCode, truncate(body, 256)) +} diff --git a/kagenti-operator/internal/keycloak/audience_test.go b/kagenti-operator/internal/keycloak/audience_test.go new file mode 100644 index 0000000..adb0ea3 --- /dev/null +++ b/kagenti-operator/internal/keycloak/audience_test.go @@ -0,0 +1,81 @@ +package keycloak + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +func TestAudienceScopeName(t *testing.T) { + if got := AudienceScopeName("team1/my-agent"); got != "agent-team1-my-agent-aud" { + t.Fatalf("got %q", got) + } +} + +func TestEnsureAudienceScope(t *testing.T) { + var listScopesCalls, postScopeCalls, postMapperCalls, putRealmCalls, listKagentiCalls, putClientCalls int + var srv *httptest.Server + srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + switch { + case path == "/realms/master/protocol/openid-connect/token": + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{"access_token": "tok"}) + case path == "/admin/realms/kagenti/client-scopes" && r.Method == http.MethodGet: + listScopesCalls++ + _ = json.NewEncoder(w).Encode([]clientScopeListItem{}) + case path == "/admin/realms/kagenti/client-scopes" && r.Method == http.MethodPost: + postScopeCalls++ + w.Header().Set("Location", srv.URL+"/admin/realms/kagenti/client-scopes/new-scope-id") + w.WriteHeader(http.StatusCreated) + case strings.Contains(path, "/client-scopes/new-scope-id/protocol-mappers/models") && r.Method == http.MethodPost: + postMapperCalls++ + w.WriteHeader(http.StatusCreated) + case path == "/admin/realms/kagenti/default-default-client-scopes/new-scope-id" && r.Method == http.MethodPut: + putRealmCalls++ + w.WriteHeader(http.StatusNoContent) + case strings.HasPrefix(path, "/admin/realms/kagenti/clients") && r.Method == http.MethodGet && r.URL.Query().Get("clientId") == "kagenti": + listKagentiCalls++ + _ = json.NewEncoder(w).Encode([]map[string]string{{"id": "plat-int", "clientId": "kagenti"}}) + case path == "/admin/realms/kagenti/clients/plat-int/default-client-scopes/new-scope-id" && r.Method == http.MethodPut: + putClientCalls++ + w.WriteHeader(http.StatusNoContent) + default: + t.Fatalf("unexpected %s %s", r.Method, path) + } + })) + defer srv.Close() + + a := Admin{BaseURL: srv.URL, HTTPClient: srv.Client()} + token, err := a.PasswordGrantToken(context.Background(), "u", "p") + if err != nil { + t.Fatal(err) + } + err = a.EnsureAudienceScope(context.Background(), token, AudienceParams{ + Realm: "kagenti", + ClientName: "ns/wl", + AudienceClientID: "ns/wl", + PlatformClientIDs: []string{"kagenti"}, + AudienceScopeEnabled: true, + }) + if err != nil { + t.Fatal(err) + } + if listScopesCalls != 1 { + t.Fatalf("listScopesCalls=%d", listScopesCalls) + } + if postScopeCalls != 1 || postMapperCalls != 1 || putRealmCalls != 1 || listKagentiCalls != 1 || putClientCalls != 1 { + t.Fatalf("calls scope=%d mapper=%d realm=%d listK=%d putC=%d", postScopeCalls, postMapperCalls, putRealmCalls, listKagentiCalls, putClientCalls) + } +} + +func TestEnsureAudienceScope_Disabled(t *testing.T) { + a := Admin{} + err := a.EnsureAudienceScope(context.Background(), "t", AudienceParams{AudienceScopeEnabled: false}) + if err != nil { + t.Fatal(err) + } +} diff --git a/kagenti-operator/internal/keycloak/token_cache.go b/kagenti-operator/internal/keycloak/token_cache.go new file mode 100644 index 0000000..87ae596 --- /dev/null +++ b/kagenti-operator/internal/keycloak/token_cache.go @@ -0,0 +1,72 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +*/ + +package keycloak + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "strings" + "sync" + "time" +) + +// tokenCacheSkew is how long before OAuth expiry we refresh the cached admin token. +const tokenCacheSkew = 60 * time.Second + +// CachedAdminTokenProvider caches Keycloak admin password-grant tokens keyed by base URL and +// credentials so frequent reconciles do not issue a new token request every time. +type CachedAdminTokenProvider struct { + mu sync.Mutex + entries map[string]cachedAdminTokenEntry +} + +type cachedAdminTokenEntry struct { + token string + expiresAt time.Time +} + +func adminTokenCacheKey(baseURL, username, password string) string { + base := strings.TrimRight(strings.TrimSpace(baseURL), "/") + sum := sha256.Sum256([]byte(base + "\x00" + username + "\x00" + password)) + return hex.EncodeToString(sum[:]) +} + +// Token returns a valid admin access token, reusing the cache when the token is not near expiry. +func (p *CachedAdminTokenProvider) Token(ctx context.Context, a *Admin, adminUser, adminPass string) (string, error) { + key := adminTokenCacheKey(a.BaseURL, adminUser, adminPass) + now := time.Now() + + p.mu.Lock() + if p.entries != nil { + for k, e := range p.entries { + if !now.Before(e.expiresAt) { + delete(p.entries, k) + } + } + if e, ok := p.entries[key]; ok && now.Before(e.expiresAt.Add(-tokenCacheSkew)) { + tok := e.token + p.mu.Unlock() + return tok, nil + } + } + p.mu.Unlock() + + token, expiresAt, err := a.adminToken(ctx, adminUser, adminPass) + if err != nil { + return "", err + } + + p.mu.Lock() + defer p.mu.Unlock() + if p.entries == nil { + p.entries = make(map[string]cachedAdminTokenEntry) + } + p.entries[key] = cachedAdminTokenEntry{token: token, expiresAt: expiresAt} + return token, nil +} diff --git a/kagenti-operator/internal/keycloak/token_cache_test.go b/kagenti-operator/internal/keycloak/token_cache_test.go new file mode 100644 index 0000000..76a6191 --- /dev/null +++ b/kagenti-operator/internal/keycloak/token_cache_test.go @@ -0,0 +1,99 @@ +package keycloak + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + "time" +) + +func TestCachedAdminTokenProvider_Token(t *testing.T) { + var tokenCalls int + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/realms/master/protocol/openid-connect/token" { + t.Fatalf("unexpected path %s", r.URL.Path) + } + tokenCalls++ + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]interface{}{ + "access_token": "tok", + "expires_in": 3600, + }) + })) + defer srv.Close() + + a := &Admin{BaseURL: srv.URL, HTTPClient: srv.Client()} + var cache CachedAdminTokenProvider + + ctx := context.Background() + for i := 0; i < 3; i++ { + tok, err := cache.Token(ctx, a, "u", "p") + if err != nil || tok != "tok" { + t.Fatalf("iter %d: tok=%q err=%v", i, tok, err) + } + } + if tokenCalls != 1 { + t.Fatalf("expected 1 token HTTP call, got %d", tokenCalls) + } +} + +func TestCachedAdminTokenProvider_differentCredentials(t *testing.T) { + var tokenCalls int + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + tokenCalls++ + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]interface{}{ + "access_token": "tok", + "expires_in": 3600, + }) + })) + defer srv.Close() + + a := &Admin{BaseURL: srv.URL, HTTPClient: srv.Client()} + var cache CachedAdminTokenProvider + ctx := context.Background() + + if _, err := cache.Token(ctx, a, "u1", "p"); err != nil { + t.Fatal(err) + } + if _, err := cache.Token(ctx, a, "u2", "p"); err != nil { + t.Fatal(err) + } + if tokenCalls != 2 { + t.Fatalf("expected 2 token calls for different users, got %d", tokenCalls) + } +} + +func TestCachedAdminTokenProvider_refreshNearExpiry(t *testing.T) { + var tokenCalls int + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + tokenCalls++ + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]interface{}{ + "access_token": "tok", + "expires_in": 90, + }) + })) + defer srv.Close() + + a := &Admin{BaseURL: srv.URL, HTTPClient: srv.Client()} + var cache CachedAdminTokenProvider + ctx := context.Background() + + if _, err := cache.Token(ctx, a, "u", "p"); err != nil { + t.Fatal(err) + } + // Force expiry inside skew window: cached expiresAt is now+90s; skew is 60s, so at now+31s we refresh. + e := cache.entries[adminTokenCacheKey(srv.URL, "u", "p")] + e.expiresAt = time.Now().Add(30 * time.Second) + cache.entries[adminTokenCacheKey(srv.URL, "u", "p")] = e + + if _, err := cache.Token(ctx, a, "u", "p"); err != nil { + t.Fatal(err) + } + if tokenCalls != 2 { + t.Fatalf("expected refresh after near-expiry, tokenCalls=%d", tokenCalls) + } +}