Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package injector

import (
"context"
"strings"

agentv1alpha1 "github.com/kagenti/operator/api/v1alpha1"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -50,6 +51,13 @@ type AgentRuntimeOverrides struct {
// ReadAgentRuntimeOverrides reads the AgentRuntime CR for a given workload
// using typed access. It lists AgentRuntimes in the namespace and finds the
// one whose spec.targetRef.name matches workloadName.
//
// At Pod CREATE time the webhook derives the workload name from GenerateName,
// which yields the ReplicaSet name (e.g. "myapp-7d4f8b9c5") not the Deployment
// name ("myapp"). The AgentRuntime CR's targetRef.name is the Deployment name.
// To bridge this, we first try an exact match, then try matching after stripping
// the pod-template-hash suffix (last "-<hash>" segment) from the workload name.
//
// Returns (nil, nil) if no matching AgentRuntime CR is found.
func ReadAgentRuntimeOverrides(ctx context.Context, c client.Reader, namespace, workloadName string) (*AgentRuntimeOverrides, error) {
list := &agentv1alpha1.AgentRuntimeList{}
Expand All @@ -60,20 +68,26 @@ func ReadAgentRuntimeOverrides(ctx context.Context, c client.Reader, namespace,
return nil, nil
}

// Derive the Deployment name by stripping the pod-template-hash suffix.
// ReplicaSet names follow the pattern "<deployment>-<pod-template-hash>".
deploymentName := workloadName
if idx := strings.LastIndex(workloadName, "-"); idx > 0 {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: The LastIndex stripping is correct for the common case and the exact-match-first approach handles ambiguity well. One edge case worth noting: a Deployment literally named with a hash-like suffix (e.g. my-agent-7d4f8b9c5) would be ambiguous, but exact match takes priority so it still works. Might be worth a brief comment noting this precedence for future readers.

deploymentName = workloadName[:idx]
}

// Find the AgentRuntime whose spec.targetRef.name matches the workload
for i := range list.Items {
rt := &list.Items[i]
if rt.Spec.TargetRef.Name != workloadName {
continue
if rt.Spec.TargetRef.Name == workloadName || rt.Spec.TargetRef.Name == deploymentName {
arConfigLog.Info("Found matching AgentRuntime CR",
"namespace", namespace, "crName", rt.Name,
"targetRef.name", rt.Spec.TargetRef.Name, "derivedFrom", workloadName)
return extractOverrides(rt), nil
}

arConfigLog.Info("Found matching AgentRuntime CR",
"namespace", namespace, "crName", rt.Name, "targetRef.name", workloadName)
return extractOverrides(rt), nil
}

arConfigLog.V(1).Info("No AgentRuntime CR targets this workload",
"namespace", namespace, "workloadName", workloadName)
"namespace", namespace, "workloadName", workloadName, "deploymentName", deploymentName)
return nil, nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,80 @@ func TestReadAgentRuntimeOverrides_PartialOverrides(t *testing.T) {
}
}

func TestReadAgentRuntimeOverrides_MatchesByReplicaSetName(t *testing.T) {
scheme := newAgentRuntimeScheme()

// AgentRuntime targets the Deployment name "my-agent"
cr := &agentv1alpha1.AgentRuntime{
ObjectMeta: metav1.ObjectMeta{
Name: "my-agent-runtime",
Namespace: "ns1",
},
Spec: agentv1alpha1.AgentRuntimeSpec{
Type: agentv1alpha1.RuntimeTypeAgent,
TargetRef: agentv1alpha1.TargetRef{
APIVersion: "apps/v1",
Kind: "Deployment",
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 Thorough test coverage for both single-hyphen and multi-hyphen deployment names. Good test structure.

Name: "my-agent",
},
Trace: &agentv1alpha1.TraceSpec{
Endpoint: "http://otel:4317",
},
},
}

fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cr).Build()

// Webhook passes the ReplicaSet name (e.g. "my-agent-7d4f8b9c5")
overrides, err := ReadAgentRuntimeOverrides(context.Background(), fakeClient, "ns1", "my-agent-7d4f8b9c5")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if overrides == nil {
t.Fatal("expected non-nil overrides when matching by ReplicaSet name")
}
if overrides.TraceEndpoint == nil || *overrides.TraceEndpoint != "http://otel:4317" {
t.Errorf("TraceEndpoint = %v, want http://otel:4317", overrides.TraceEndpoint)
}
}

func TestReadAgentRuntimeOverrides_MatchesByMultiHyphenReplicaSetName(t *testing.T) {
scheme := newAgentRuntimeScheme()

// Deployment name has multiple hyphens: "api-server-prod"
cr := &agentv1alpha1.AgentRuntime{
ObjectMeta: metav1.ObjectMeta{
Name: "api-server-prod-runtime",
Namespace: "ns1",
},
Spec: agentv1alpha1.AgentRuntimeSpec{
Type: agentv1alpha1.RuntimeTypeAgent,
TargetRef: agentv1alpha1.TargetRef{
APIVersion: "apps/v1",
Kind: "Deployment",
Name: "api-server-prod",
},
Trace: &agentv1alpha1.TraceSpec{
Endpoint: "http://otel:4317",
},
},
}

fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cr).Build()

// ReplicaSet name: "api-server-prod-7d4f8b9c5"
overrides, err := ReadAgentRuntimeOverrides(context.Background(), fakeClient, "ns1", "api-server-prod-7d4f8b9c5")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if overrides == nil {
t.Fatal("expected non-nil overrides for multi-hyphen Deployment name")
}
if overrides.TraceEndpoint == nil || *overrides.TraceEndpoint != "http://otel:4317" {
t.Errorf("TraceEndpoint = %v, want http://otel:4317", overrides.TraceEndpoint)
}
}

func TestReadAgentRuntimeOverrides_NoTargetRefMatch(t *testing.T) {
scheme := newAgentRuntimeScheme()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ var builderLog = logf.Log.WithName("container-builder")

const (
// Container names for AuthBridge sidecars
EnvoyProxyContainerName = "envoy-proxy"
ProxyInitContainerName = "proxy-init"
AuthBridgeContainerName = "authbridge"
EnvoyProxyContainerName = "envoy-proxy"
ProxyInitContainerName = "proxy-init"
AuthBridgeContainerName = "authbridge"

// Client registration container configuration
// Keep in sync with AuthBridge/client-registration/Dockerfile
Expand Down Expand Up @@ -365,7 +365,6 @@ func (b *ContainerBuilder) BuildEnvoyProxyContainerWithSpireOption(spireEnabled
env = b.buildEnvoyProxyEnvLegacy()
}


return corev1.Container{
Name: EnvoyProxyContainerName,
Image: b.cfg.Images.EnvoyProxy,
Expand Down
59 changes: 58 additions & 1 deletion kagenti-operator/internal/webhook/injector/pod_mutator.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ func (m *PodMutator) InjectAuthBridge(ctx context.Context, podSpec *corev1.PodSp

var builder *ContainerBuilder
var requiredVolumes []corev1.Volume
var resolved *ResolvedConfig

if currentGates.PerWorkloadConfigResolution {
// Resolved path: read namespace config and build literal env vars
Expand All @@ -226,7 +227,7 @@ func (m *PodMutator) InjectAuthBridge(ctx context.Context, podSpec *corev1.PodSp
}

// arOverrides was already read above as a gate check.
resolved := ResolveConfig(currentConfig, nsConfig, arOverrides)
resolved = ResolveConfig(currentConfig, nsConfig, arOverrides)
builder = NewResolvedContainerBuilder(resolved)
requiredVolumes = BuildResolvedVolumes(spireEnabled, "")

Expand Down Expand Up @@ -292,6 +293,13 @@ func (m *PodMutator) InjectAuthBridge(ctx context.Context, podSpec *corev1.PodSp
}
}

// Inject OTEL trace env vars into the user's (non-sidecar) containers.
// These come from the AgentRuntime spec.trace overrides and configure
// the agent's own telemetry (e.g., MLflow, LangChain, OpenTelemetry SDK).
if resolved != nil {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Trace env vars are only injected when PerWorkloadConfigResolution is enabled (since resolved is nil on the legacy path). This seems intentional — a brief comment noting "trace injection requires the resolved config path" would clarify this for future maintainers.

injectTraceEnvVars(podSpec, resolved)
}

mutatorLog.Info("Successfully mutated pod spec", "namespace", namespace, "crName", crName,
"containers", len(podSpec.Containers),
"initContainers", len(podSpec.InitContainers),
Expand Down Expand Up @@ -338,6 +346,55 @@ func (m *PodMutator) ensureServiceAccount(ctx context.Context, namespace, name s
return nil
}

// sidecarContainerNames is the set of container names injected by the webhook.
// Used to identify user containers when injecting trace env vars.
// MAINTENANCE: Update this map when new sidecar containers are added.
var sidecarContainerNames = map[string]bool{
EnvoyProxyContainerName: true,
ProxyInitContainerName: true,
SpiffeHelperContainerName: true,
ClientRegistrationContainerName: true,
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 Good approach — centralizing sidecar names in a map with a MAINTENANCE comment is clean and maintainable. The no-overwrite semantics are well-implemented.

AuthBridgeContainerName: true,
}

// injectTraceEnvVars adds OTEL trace env vars to all user (non-sidecar)
// containers. Existing env vars are not overwritten — if the developer already
// set OTEL_EXPORTER_OTLP_ENDPOINT, the webhook respects their value.
func injectTraceEnvVars(podSpec *corev1.PodSpec, resolved *ResolvedConfig) {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: The log "vars", len(traceEnv) always logs the total count of candidate trace vars, not the number actually injected (some may be skipped due to existing vars). Consider logging the injected count, or rename to "candidateVars".

var traceEnv []corev1.EnvVar
if resolved.TraceEndpoint != "" {
traceEnv = append(traceEnv, corev1.EnvVar{Name: "OTEL_EXPORTER_OTLP_ENDPOINT", Value: resolved.TraceEndpoint})
}
if resolved.TraceProtocol != "" {
traceEnv = append(traceEnv, corev1.EnvVar{Name: "OTEL_EXPORTER_OTLP_PROTOCOL", Value: resolved.TraceProtocol})
}
if resolved.TraceSamplingRate != nil {
traceEnv = append(traceEnv, corev1.EnvVar{Name: "OTEL_TRACES_SAMPLER_ARG", Value: fmt.Sprintf("%g", *resolved.TraceSamplingRate)})
}
if len(traceEnv) == 0 {
return
}

for i := range podSpec.Containers {
if sidecarContainerNames[podSpec.Containers[i].Name] {
continue
}
// Build a set of existing env var names to avoid overwriting
existing := make(map[string]bool, len(podSpec.Containers[i].Env))
for _, e := range podSpec.Containers[i].Env {
existing[e.Name] = true
}
for _, e := range traceEnv {
if !existing[e.Name] {
podSpec.Containers[i].Env = append(podSpec.Containers[i].Env, e)
}
}
mutatorLog.Info("Injected trace env vars into user container",
"container", podSpec.Containers[i].Name,
"vars", len(traceEnv))
}
}

func containerExists(containers []corev1.Container, name string) bool {
for i := range containers {
if containers[i].Name == name {
Expand Down
78 changes: 78 additions & 0 deletions kagenti-operator/internal/webhook/injector/pod_mutator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -618,3 +618,81 @@ func TestInjectAuthBridge_NilAnnotations(t *testing.T) {
}
t.Fatal("proxy-init container not found in initContainers")
}

func TestInjectTraceEnvVars_InjectsIntoUserContainers(t *testing.T) {
rate := 0.75
resolved := &ResolvedConfig{
TraceEndpoint: "otel-collector:4317",
TraceProtocol: "grpc",
TraceSamplingRate: &rate,
}
podSpec := &corev1.PodSpec{
Containers: []corev1.Container{
{Name: "agent"},
{Name: EnvoyProxyContainerName},
},
}

injectTraceEnvVars(podSpec, resolved)

// User container should have trace env vars
agentEnv := make(map[string]string)
for _, e := range podSpec.Containers[0].Env {
agentEnv[e.Name] = e.Value
}
if v := agentEnv["OTEL_EXPORTER_OTLP_ENDPOINT"]; v != "otel-collector:4317" {
t.Errorf("OTEL_EXPORTER_OTLP_ENDPOINT = %q, want otel-collector:4317", v)
}
if v := agentEnv["OTEL_EXPORTER_OTLP_PROTOCOL"]; v != "grpc" {
t.Errorf("OTEL_EXPORTER_OTLP_PROTOCOL = %q, want grpc", v)
}
if v := agentEnv["OTEL_TRACES_SAMPLER_ARG"]; v != "0.75" {
t.Errorf("OTEL_TRACES_SAMPLER_ARG = %q, want 0.75", v)
}

// Sidecar container should NOT have trace env vars
for _, e := range podSpec.Containers[1].Env {
if e.Name == "OTEL_EXPORTER_OTLP_ENDPOINT" {
t.Error("trace env vars should not be injected into sidecar containers")
}
}
}

func TestInjectTraceEnvVars_DoesNotOverwriteExisting(t *testing.T) {
resolved := &ResolvedConfig{
TraceEndpoint: "otel-collector:4317",
}
podSpec := &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "agent",
Env: []corev1.EnvVar{
{Name: "OTEL_EXPORTER_OTLP_ENDPOINT", Value: "my-custom-endpoint:4317"},
},
},
},
}

injectTraceEnvVars(podSpec, resolved)

for _, e := range podSpec.Containers[0].Env {
if e.Name == "OTEL_EXPORTER_OTLP_ENDPOINT" && e.Value != "my-custom-endpoint:4317" {
t.Errorf("existing env var was overwritten: got %q, want my-custom-endpoint:4317", e.Value)
}
}
}

func TestInjectTraceEnvVars_NoOverrides(t *testing.T) {
resolved := &ResolvedConfig{}
podSpec := &corev1.PodSpec{
Containers: []corev1.Container{
{Name: "agent"},
},
}

injectTraceEnvVars(podSpec, resolved)

if len(podSpec.Containers[0].Env) != 0 {
t.Errorf("expected no env vars injected, got %d", len(podSpec.Containers[0].Env))
}
}