-
Notifications
You must be signed in to change notification settings - Fork 33
fix: resolve AgentRuntime by Deployment name and inject trace env vars into agent containers #245
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: feat/webhook-migration
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -154,6 +154,80 @@ func TestReadAgentRuntimeOverrides_PartialOverrides(t *testing.T) { | |
| } | ||
| } | ||
|
|
||
| func TestReadAgentRuntimeOverrides_MatchesByReplicaSetName(t *testing.T) { | ||
| scheme := newAgentRuntimeScheme() | ||
|
|
||
| // AgentRuntime targets the Deployment name "my-agent" | ||
| cr := &agentv1alpha1.AgentRuntime{ | ||
| ObjectMeta: metav1.ObjectMeta{ | ||
| Name: "my-agent-runtime", | ||
| Namespace: "ns1", | ||
| }, | ||
| Spec: agentv1alpha1.AgentRuntimeSpec{ | ||
| Type: agentv1alpha1.RuntimeTypeAgent, | ||
| TargetRef: agentv1alpha1.TargetRef{ | ||
| APIVersion: "apps/v1", | ||
| Kind: "Deployment", | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 Thorough test coverage for both single-hyphen and multi-hyphen deployment names. Good test structure. |
||
| Name: "my-agent", | ||
| }, | ||
| Trace: &agentv1alpha1.TraceSpec{ | ||
| Endpoint: "http://otel:4317", | ||
| }, | ||
| }, | ||
| } | ||
|
|
||
| fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cr).Build() | ||
|
|
||
| // Webhook passes the ReplicaSet name (e.g. "my-agent-7d4f8b9c5") | ||
| overrides, err := ReadAgentRuntimeOverrides(context.Background(), fakeClient, "ns1", "my-agent-7d4f8b9c5") | ||
| if err != nil { | ||
| t.Fatalf("unexpected error: %v", err) | ||
| } | ||
| if overrides == nil { | ||
| t.Fatal("expected non-nil overrides when matching by ReplicaSet name") | ||
| } | ||
| if overrides.TraceEndpoint == nil || *overrides.TraceEndpoint != "http://otel:4317" { | ||
| t.Errorf("TraceEndpoint = %v, want http://otel:4317", overrides.TraceEndpoint) | ||
| } | ||
| } | ||
|
|
||
| func TestReadAgentRuntimeOverrides_MatchesByMultiHyphenReplicaSetName(t *testing.T) { | ||
| scheme := newAgentRuntimeScheme() | ||
|
|
||
| // Deployment name has multiple hyphens: "api-server-prod" | ||
| cr := &agentv1alpha1.AgentRuntime{ | ||
| ObjectMeta: metav1.ObjectMeta{ | ||
| Name: "api-server-prod-runtime", | ||
| Namespace: "ns1", | ||
| }, | ||
| Spec: agentv1alpha1.AgentRuntimeSpec{ | ||
| Type: agentv1alpha1.RuntimeTypeAgent, | ||
| TargetRef: agentv1alpha1.TargetRef{ | ||
| APIVersion: "apps/v1", | ||
| Kind: "Deployment", | ||
| Name: "api-server-prod", | ||
| }, | ||
| Trace: &agentv1alpha1.TraceSpec{ | ||
| Endpoint: "http://otel:4317", | ||
| }, | ||
| }, | ||
| } | ||
|
|
||
| fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cr).Build() | ||
|
|
||
| // ReplicaSet name: "api-server-prod-7d4f8b9c5" | ||
| overrides, err := ReadAgentRuntimeOverrides(context.Background(), fakeClient, "ns1", "api-server-prod-7d4f8b9c5") | ||
| if err != nil { | ||
| t.Fatalf("unexpected error: %v", err) | ||
| } | ||
| if overrides == nil { | ||
| t.Fatal("expected non-nil overrides for multi-hyphen Deployment name") | ||
| } | ||
| if overrides.TraceEndpoint == nil || *overrides.TraceEndpoint != "http://otel:4317" { | ||
| t.Errorf("TraceEndpoint = %v, want http://otel:4317", overrides.TraceEndpoint) | ||
| } | ||
| } | ||
|
|
||
| func TestReadAgentRuntimeOverrides_NoTargetRefMatch(t *testing.T) { | ||
| scheme := newAgentRuntimeScheme() | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -212,6 +212,7 @@ func (m *PodMutator) InjectAuthBridge(ctx context.Context, podSpec *corev1.PodSp | |
|
|
||
| var builder *ContainerBuilder | ||
| var requiredVolumes []corev1.Volume | ||
| var resolved *ResolvedConfig | ||
|
|
||
| if currentGates.PerWorkloadConfigResolution { | ||
| // Resolved path: read namespace config and build literal env vars | ||
|
|
@@ -226,7 +227,7 @@ func (m *PodMutator) InjectAuthBridge(ctx context.Context, podSpec *corev1.PodSp | |
| } | ||
|
|
||
| // arOverrides was already read above as a gate check. | ||
| resolved := ResolveConfig(currentConfig, nsConfig, arOverrides) | ||
| resolved = ResolveConfig(currentConfig, nsConfig, arOverrides) | ||
| builder = NewResolvedContainerBuilder(resolved) | ||
| requiredVolumes = BuildResolvedVolumes(spireEnabled, "") | ||
|
|
||
|
|
@@ -292,6 +293,13 @@ func (m *PodMutator) InjectAuthBridge(ctx context.Context, podSpec *corev1.PodSp | |
| } | ||
| } | ||
|
|
||
| // Inject OTEL trace env vars into the user's (non-sidecar) containers. | ||
| // These come from the AgentRuntime spec.trace overrides and configure | ||
| // the agent's own telemetry (e.g., MLflow, LangChain, OpenTelemetry SDK). | ||
| if resolved != nil { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: Trace env vars are only injected when |
||
| injectTraceEnvVars(podSpec, resolved) | ||
| } | ||
|
|
||
| mutatorLog.Info("Successfully mutated pod spec", "namespace", namespace, "crName", crName, | ||
| "containers", len(podSpec.Containers), | ||
| "initContainers", len(podSpec.InitContainers), | ||
|
|
@@ -338,6 +346,55 @@ func (m *PodMutator) ensureServiceAccount(ctx context.Context, namespace, name s | |
| return nil | ||
| } | ||
|
|
||
| // sidecarContainerNames is the set of container names injected by the webhook. | ||
| // Used to identify user containers when injecting trace env vars. | ||
| // MAINTENANCE: Update this map when new sidecar containers are added. | ||
| var sidecarContainerNames = map[string]bool{ | ||
| EnvoyProxyContainerName: true, | ||
| ProxyInitContainerName: true, | ||
| SpiffeHelperContainerName: true, | ||
| ClientRegistrationContainerName: true, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 Good approach — centralizing sidecar names in a map with a |
||
| AuthBridgeContainerName: true, | ||
| } | ||
|
|
||
| // injectTraceEnvVars adds OTEL trace env vars to all user (non-sidecar) | ||
| // containers. Existing env vars are not overwritten — if the developer already | ||
| // set OTEL_EXPORTER_OTLP_ENDPOINT, the webhook respects their value. | ||
| func injectTraceEnvVars(podSpec *corev1.PodSpec, resolved *ResolvedConfig) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: The log |
||
| var traceEnv []corev1.EnvVar | ||
| if resolved.TraceEndpoint != "" { | ||
| traceEnv = append(traceEnv, corev1.EnvVar{Name: "OTEL_EXPORTER_OTLP_ENDPOINT", Value: resolved.TraceEndpoint}) | ||
| } | ||
| if resolved.TraceProtocol != "" { | ||
| traceEnv = append(traceEnv, corev1.EnvVar{Name: "OTEL_EXPORTER_OTLP_PROTOCOL", Value: resolved.TraceProtocol}) | ||
| } | ||
| if resolved.TraceSamplingRate != nil { | ||
| traceEnv = append(traceEnv, corev1.EnvVar{Name: "OTEL_TRACES_SAMPLER_ARG", Value: fmt.Sprintf("%g", *resolved.TraceSamplingRate)}) | ||
| } | ||
| if len(traceEnv) == 0 { | ||
| return | ||
| } | ||
|
|
||
| for i := range podSpec.Containers { | ||
| if sidecarContainerNames[podSpec.Containers[i].Name] { | ||
| continue | ||
| } | ||
| // Build a set of existing env var names to avoid overwriting | ||
| existing := make(map[string]bool, len(podSpec.Containers[i].Env)) | ||
| for _, e := range podSpec.Containers[i].Env { | ||
| existing[e.Name] = true | ||
| } | ||
| for _, e := range traceEnv { | ||
| if !existing[e.Name] { | ||
| podSpec.Containers[i].Env = append(podSpec.Containers[i].Env, e) | ||
| } | ||
| } | ||
| mutatorLog.Info("Injected trace env vars into user container", | ||
| "container", podSpec.Containers[i].Name, | ||
| "vars", len(traceEnv)) | ||
| } | ||
| } | ||
|
|
||
| func containerExists(containers []corev1.Container, name string) bool { | ||
| for i := range containers { | ||
| if containers[i].Name == name { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
suggestion: The
LastIndexstripping is correct for the common case and the exact-match-first approach handles ambiguity well. One edge case worth noting: a Deployment literally named with a hash-like suffix (e.g.my-agent-7d4f8b9c5) would be ambiguous, but exact match takes priority so it still works. Might be worth a brief comment noting this precedence for future readers.