diff --git a/kagenti-operator/internal/webhook/injector/agentruntime_config.go b/kagenti-operator/internal/webhook/injector/agentruntime_config.go index 65ab6a1..eb1d64c 100644 --- a/kagenti-operator/internal/webhook/injector/agentruntime_config.go +++ b/kagenti-operator/internal/webhook/injector/agentruntime_config.go @@ -18,6 +18,7 @@ package injector import ( "context" + "strings" agentv1alpha1 "github.com/kagenti/operator/api/v1alpha1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -50,6 +51,13 @@ type AgentRuntimeOverrides struct { // ReadAgentRuntimeOverrides reads the AgentRuntime CR for a given workload // using typed access. It lists AgentRuntimes in the namespace and finds the // one whose spec.targetRef.name matches workloadName. +// +// At Pod CREATE time the webhook derives the workload name from GenerateName, +// which yields the ReplicaSet name (e.g. "myapp-7d4f8b9c5") not the Deployment +// name ("myapp"). The AgentRuntime CR's targetRef.name is the Deployment name. +// To bridge this, we first try an exact match, then try matching after stripping +// the pod-template-hash suffix (last "-" segment) from the workload name. +// // Returns (nil, nil) if no matching AgentRuntime CR is found. func ReadAgentRuntimeOverrides(ctx context.Context, c client.Reader, namespace, workloadName string) (*AgentRuntimeOverrides, error) { list := &agentv1alpha1.AgentRuntimeList{} @@ -60,20 +68,26 @@ func ReadAgentRuntimeOverrides(ctx context.Context, c client.Reader, namespace, return nil, nil } + // Derive the Deployment name by stripping the pod-template-hash suffix. + // ReplicaSet names follow the pattern "-". + deploymentName := workloadName + if idx := strings.LastIndex(workloadName, "-"); idx > 0 { + deploymentName = workloadName[:idx] + } + // Find the AgentRuntime whose spec.targetRef.name matches the workload for i := range list.Items { rt := &list.Items[i] - if rt.Spec.TargetRef.Name != workloadName { - continue + if rt.Spec.TargetRef.Name == workloadName || rt.Spec.TargetRef.Name == deploymentName { + arConfigLog.Info("Found matching AgentRuntime CR", + "namespace", namespace, "crName", rt.Name, + "targetRef.name", rt.Spec.TargetRef.Name, "derivedFrom", workloadName) + return extractOverrides(rt), nil } - - arConfigLog.Info("Found matching AgentRuntime CR", - "namespace", namespace, "crName", rt.Name, "targetRef.name", workloadName) - return extractOverrides(rt), nil } arConfigLog.V(1).Info("No AgentRuntime CR targets this workload", - "namespace", namespace, "workloadName", workloadName) + "namespace", namespace, "workloadName", workloadName, "deploymentName", deploymentName) return nil, nil } diff --git a/kagenti-operator/internal/webhook/injector/agentruntime_config_test.go b/kagenti-operator/internal/webhook/injector/agentruntime_config_test.go index 1e33919..5ef77bf 100644 --- a/kagenti-operator/internal/webhook/injector/agentruntime_config_test.go +++ b/kagenti-operator/internal/webhook/injector/agentruntime_config_test.go @@ -154,6 +154,80 @@ func TestReadAgentRuntimeOverrides_PartialOverrides(t *testing.T) { } } +func TestReadAgentRuntimeOverrides_MatchesByReplicaSetName(t *testing.T) { + scheme := newAgentRuntimeScheme() + + // AgentRuntime targets the Deployment name "my-agent" + cr := &agentv1alpha1.AgentRuntime{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-agent-runtime", + Namespace: "ns1", + }, + Spec: agentv1alpha1.AgentRuntimeSpec{ + Type: agentv1alpha1.RuntimeTypeAgent, + TargetRef: agentv1alpha1.TargetRef{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "my-agent", + }, + Trace: &agentv1alpha1.TraceSpec{ + Endpoint: "http://otel:4317", + }, + }, + } + + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cr).Build() + + // Webhook passes the ReplicaSet name (e.g. "my-agent-7d4f8b9c5") + overrides, err := ReadAgentRuntimeOverrides(context.Background(), fakeClient, "ns1", "my-agent-7d4f8b9c5") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if overrides == nil { + t.Fatal("expected non-nil overrides when matching by ReplicaSet name") + } + if overrides.TraceEndpoint == nil || *overrides.TraceEndpoint != "http://otel:4317" { + t.Errorf("TraceEndpoint = %v, want http://otel:4317", overrides.TraceEndpoint) + } +} + +func TestReadAgentRuntimeOverrides_MatchesByMultiHyphenReplicaSetName(t *testing.T) { + scheme := newAgentRuntimeScheme() + + // Deployment name has multiple hyphens: "api-server-prod" + cr := &agentv1alpha1.AgentRuntime{ + ObjectMeta: metav1.ObjectMeta{ + Name: "api-server-prod-runtime", + Namespace: "ns1", + }, + Spec: agentv1alpha1.AgentRuntimeSpec{ + Type: agentv1alpha1.RuntimeTypeAgent, + TargetRef: agentv1alpha1.TargetRef{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "api-server-prod", + }, + Trace: &agentv1alpha1.TraceSpec{ + Endpoint: "http://otel:4317", + }, + }, + } + + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cr).Build() + + // ReplicaSet name: "api-server-prod-7d4f8b9c5" + overrides, err := ReadAgentRuntimeOverrides(context.Background(), fakeClient, "ns1", "api-server-prod-7d4f8b9c5") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if overrides == nil { + t.Fatal("expected non-nil overrides for multi-hyphen Deployment name") + } + if overrides.TraceEndpoint == nil || *overrides.TraceEndpoint != "http://otel:4317" { + t.Errorf("TraceEndpoint = %v, want http://otel:4317", overrides.TraceEndpoint) + } +} + func TestReadAgentRuntimeOverrides_NoTargetRefMatch(t *testing.T) { scheme := newAgentRuntimeScheme() diff --git a/kagenti-operator/internal/webhook/injector/container_builder.go b/kagenti-operator/internal/webhook/injector/container_builder.go index 6dabea0..f286d8f 100644 --- a/kagenti-operator/internal/webhook/injector/container_builder.go +++ b/kagenti-operator/internal/webhook/injector/container_builder.go @@ -31,9 +31,9 @@ var builderLog = logf.Log.WithName("container-builder") const ( // Container names for AuthBridge sidecars - EnvoyProxyContainerName = "envoy-proxy" - ProxyInitContainerName = "proxy-init" - AuthBridgeContainerName = "authbridge" + EnvoyProxyContainerName = "envoy-proxy" + ProxyInitContainerName = "proxy-init" + AuthBridgeContainerName = "authbridge" // Client registration container configuration // Keep in sync with AuthBridge/client-registration/Dockerfile @@ -365,7 +365,6 @@ func (b *ContainerBuilder) BuildEnvoyProxyContainerWithSpireOption(spireEnabled env = b.buildEnvoyProxyEnvLegacy() } - return corev1.Container{ Name: EnvoyProxyContainerName, Image: b.cfg.Images.EnvoyProxy, diff --git a/kagenti-operator/internal/webhook/injector/pod_mutator.go b/kagenti-operator/internal/webhook/injector/pod_mutator.go index e83bc02..e2ce7ad 100644 --- a/kagenti-operator/internal/webhook/injector/pod_mutator.go +++ b/kagenti-operator/internal/webhook/injector/pod_mutator.go @@ -212,6 +212,7 @@ func (m *PodMutator) InjectAuthBridge(ctx context.Context, podSpec *corev1.PodSp var builder *ContainerBuilder var requiredVolumes []corev1.Volume + var resolved *ResolvedConfig if currentGates.PerWorkloadConfigResolution { // Resolved path: read namespace config and build literal env vars @@ -226,7 +227,7 @@ func (m *PodMutator) InjectAuthBridge(ctx context.Context, podSpec *corev1.PodSp } // arOverrides was already read above as a gate check. - resolved := ResolveConfig(currentConfig, nsConfig, arOverrides) + resolved = ResolveConfig(currentConfig, nsConfig, arOverrides) builder = NewResolvedContainerBuilder(resolved) requiredVolumes = BuildResolvedVolumes(spireEnabled, "") @@ -292,6 +293,13 @@ func (m *PodMutator) InjectAuthBridge(ctx context.Context, podSpec *corev1.PodSp } } + // Inject OTEL trace env vars into the user's (non-sidecar) containers. + // These come from the AgentRuntime spec.trace overrides and configure + // the agent's own telemetry (e.g., MLflow, LangChain, OpenTelemetry SDK). + if resolved != nil { + injectTraceEnvVars(podSpec, resolved) + } + mutatorLog.Info("Successfully mutated pod spec", "namespace", namespace, "crName", crName, "containers", len(podSpec.Containers), "initContainers", len(podSpec.InitContainers), @@ -338,6 +346,55 @@ func (m *PodMutator) ensureServiceAccount(ctx context.Context, namespace, name s return nil } +// sidecarContainerNames is the set of container names injected by the webhook. +// Used to identify user containers when injecting trace env vars. +// MAINTENANCE: Update this map when new sidecar containers are added. +var sidecarContainerNames = map[string]bool{ + EnvoyProxyContainerName: true, + ProxyInitContainerName: true, + SpiffeHelperContainerName: true, + ClientRegistrationContainerName: true, + AuthBridgeContainerName: true, +} + +// injectTraceEnvVars adds OTEL trace env vars to all user (non-sidecar) +// containers. Existing env vars are not overwritten — if the developer already +// set OTEL_EXPORTER_OTLP_ENDPOINT, the webhook respects their value. +func injectTraceEnvVars(podSpec *corev1.PodSpec, resolved *ResolvedConfig) { + var traceEnv []corev1.EnvVar + if resolved.TraceEndpoint != "" { + traceEnv = append(traceEnv, corev1.EnvVar{Name: "OTEL_EXPORTER_OTLP_ENDPOINT", Value: resolved.TraceEndpoint}) + } + if resolved.TraceProtocol != "" { + traceEnv = append(traceEnv, corev1.EnvVar{Name: "OTEL_EXPORTER_OTLP_PROTOCOL", Value: resolved.TraceProtocol}) + } + if resolved.TraceSamplingRate != nil { + traceEnv = append(traceEnv, corev1.EnvVar{Name: "OTEL_TRACES_SAMPLER_ARG", Value: fmt.Sprintf("%g", *resolved.TraceSamplingRate)}) + } + if len(traceEnv) == 0 { + return + } + + for i := range podSpec.Containers { + if sidecarContainerNames[podSpec.Containers[i].Name] { + continue + } + // Build a set of existing env var names to avoid overwriting + existing := make(map[string]bool, len(podSpec.Containers[i].Env)) + for _, e := range podSpec.Containers[i].Env { + existing[e.Name] = true + } + for _, e := range traceEnv { + if !existing[e.Name] { + podSpec.Containers[i].Env = append(podSpec.Containers[i].Env, e) + } + } + mutatorLog.Info("Injected trace env vars into user container", + "container", podSpec.Containers[i].Name, + "vars", len(traceEnv)) + } +} + func containerExists(containers []corev1.Container, name string) bool { for i := range containers { if containers[i].Name == name { diff --git a/kagenti-operator/internal/webhook/injector/pod_mutator_test.go b/kagenti-operator/internal/webhook/injector/pod_mutator_test.go index 29f5278..09e8359 100644 --- a/kagenti-operator/internal/webhook/injector/pod_mutator_test.go +++ b/kagenti-operator/internal/webhook/injector/pod_mutator_test.go @@ -618,3 +618,81 @@ func TestInjectAuthBridge_NilAnnotations(t *testing.T) { } t.Fatal("proxy-init container not found in initContainers") } + +func TestInjectTraceEnvVars_InjectsIntoUserContainers(t *testing.T) { + rate := 0.75 + resolved := &ResolvedConfig{ + TraceEndpoint: "otel-collector:4317", + TraceProtocol: "grpc", + TraceSamplingRate: &rate, + } + podSpec := &corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "agent"}, + {Name: EnvoyProxyContainerName}, + }, + } + + injectTraceEnvVars(podSpec, resolved) + + // User container should have trace env vars + agentEnv := make(map[string]string) + for _, e := range podSpec.Containers[0].Env { + agentEnv[e.Name] = e.Value + } + if v := agentEnv["OTEL_EXPORTER_OTLP_ENDPOINT"]; v != "otel-collector:4317" { + t.Errorf("OTEL_EXPORTER_OTLP_ENDPOINT = %q, want otel-collector:4317", v) + } + if v := agentEnv["OTEL_EXPORTER_OTLP_PROTOCOL"]; v != "grpc" { + t.Errorf("OTEL_EXPORTER_OTLP_PROTOCOL = %q, want grpc", v) + } + if v := agentEnv["OTEL_TRACES_SAMPLER_ARG"]; v != "0.75" { + t.Errorf("OTEL_TRACES_SAMPLER_ARG = %q, want 0.75", v) + } + + // Sidecar container should NOT have trace env vars + for _, e := range podSpec.Containers[1].Env { + if e.Name == "OTEL_EXPORTER_OTLP_ENDPOINT" { + t.Error("trace env vars should not be injected into sidecar containers") + } + } +} + +func TestInjectTraceEnvVars_DoesNotOverwriteExisting(t *testing.T) { + resolved := &ResolvedConfig{ + TraceEndpoint: "otel-collector:4317", + } + podSpec := &corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "agent", + Env: []corev1.EnvVar{ + {Name: "OTEL_EXPORTER_OTLP_ENDPOINT", Value: "my-custom-endpoint:4317"}, + }, + }, + }, + } + + injectTraceEnvVars(podSpec, resolved) + + for _, e := range podSpec.Containers[0].Env { + if e.Name == "OTEL_EXPORTER_OTLP_ENDPOINT" && e.Value != "my-custom-endpoint:4317" { + t.Errorf("existing env var was overwritten: got %q, want my-custom-endpoint:4317", e.Value) + } + } +} + +func TestInjectTraceEnvVars_NoOverrides(t *testing.T) { + resolved := &ResolvedConfig{} + podSpec := &corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "agent"}, + }, + } + + injectTraceEnvVars(podSpec, resolved) + + if len(podSpec.Containers[0].Env) != 0 { + t.Errorf("expected no env vars injected, got %d", len(podSpec.Containers[0].Env)) + } +}