diff --git a/pkg/clusteragent/admission/mutate/autoinstrumentation/config.go b/pkg/clusteragent/admission/mutate/autoinstrumentation/config.go index 7be211b96c210d..5250620120caea 100644 --- a/pkg/clusteragent/admission/mutate/autoinstrumentation/config.go +++ b/pkg/clusteragent/admission/mutate/autoinstrumentation/config.go @@ -46,6 +46,11 @@ type staticConfig struct { // containerRegistry is the container registry to use for the autoinstrumentation logic containerRegistry string + // registryAllowList restricts which registries can be used for library injection. + // When non-empty, libraries from registries not in this list will not be injected. + // An empty list allows all registries (default). + registryAllowList []string + // mutateUnlabelled is used to control if we require workloads to have a label when using Local Lib Injection. mutateUnlabelled bool @@ -126,6 +131,7 @@ func NewConfig(datadogConfig config.Component) (*Config, error) { } containerRegistry := mutatecommon.ContainerRegistry(datadogConfig, "admission_controller.auto_instrumentation.container_registry") + registryAllowList := datadogConfig.GetStringSlice("admission_controller.auto_instrumentation.container_registry_allow_list") mutateUnlabelled := datadogConfig.GetBool("admission_controller.mutate_unlabelled") return &Config{ @@ -134,6 +140,7 @@ func NewConfig(datadogConfig config.Component) (*Config, error) { LanguageDetection: NewLanguageDetectionConfig(datadogConfig), Instrumentation: instrumentationConfig, containerRegistry: containerRegistry, + registryAllowList: registryAllowList, mutateUnlabelled: mutateUnlabelled, initResources: initResources, initSecurityContext: initSecurityContext, diff --git a/pkg/clusteragent/admission/mutate/autoinstrumentation/libraryinjection/mutator.go b/pkg/clusteragent/admission/mutate/autoinstrumentation/libraryinjection/mutator.go index f4712b7834c0fd..ab5bd6ea7daa97 100644 --- a/pkg/clusteragent/admission/mutate/autoinstrumentation/libraryinjection/mutator.go +++ b/pkg/clusteragent/admission/mutate/autoinstrumentation/libraryinjection/mutator.go @@ -9,6 +9,7 @@ package libraryinjection import ( "fmt" + "slices" "strconv" "time" @@ -28,6 +29,26 @@ import ( // // Returns an error if the injection fails. func InjectAPMLibraries(pod *corev1.Pod, cfg LibraryInjectionConfig) error { + // Check the registry allow list before any injection. Both the injector image and + // any library images (which may come from custom-image annotations pointing to + // arbitrary registries) must be in the allow list. + if len(cfg.RegistryAllowList) > 0 { + if !slices.Contains(cfg.RegistryAllowList, cfg.Injector.Package.Registry) { + msg := fmt.Sprintf("registry %q is not in the allow list", cfg.Injector.Package.Registry) + log.Warnf("Skipping APM library injection for pod %s: %s", mutatecommon.PodString(pod), msg) + annotation.Set(pod, annotation.InjectionError, msg) + return nil + } + for _, lib := range cfg.Libraries { + if lib.Package.Registry != "" && !slices.Contains(cfg.RegistryAllowList, lib.Package.Registry) { + msg := fmt.Sprintf("registry %q is not in the allow list", lib.Package.Registry) + log.Warnf("Skipping APM library injection for pod %s: %s", mutatecommon.PodString(pod), msg) + annotation.Set(pod, annotation.InjectionError, msg) + return nil + } + } + } + // Select the provider based on the injection mode (annotation or default) factory := NewProviderFactory(InjectionMode(cfg.InjectionMode)) provider := factory.GetProviderForPod(pod, cfg) diff --git a/pkg/clusteragent/admission/mutate/autoinstrumentation/libraryinjection/mutator_test.go b/pkg/clusteragent/admission/mutate/autoinstrumentation/libraryinjection/mutator_test.go index df82e07358e87f..0480d3e9861c04 100644 --- a/pkg/clusteragent/admission/mutate/autoinstrumentation/libraryinjection/mutator_test.go +++ b/pkg/clusteragent/admission/mutate/autoinstrumentation/libraryinjection/mutator_test.go @@ -19,6 +19,120 @@ import ( "github.com/DataDog/datadog-agent/pkg/clusteragent/admission/mutate/autoinstrumentation/libraryinjection" ) +func TestInjectAPMLibraries_RegistryAllowList(t *testing.T) { + tests := []struct { + name string + registryAllowList []string + injectorRegistry string + expectInjected bool + expectErrorAnnot bool + }{ + { + name: "empty allow list permits any registry", + registryAllowList: []string{}, + injectorRegistry: "registry.datadoghq.com", + expectInjected: true, + expectErrorAnnot: false, + }, + { + name: "registry in allow list permits injection", + registryAllowList: []string{"gcr.io/datadoghq", "public.ecr.aws/datadog"}, + injectorRegistry: "gcr.io/datadoghq", + expectInjected: true, + expectErrorAnnot: false, + }, + { + name: "registry not in allow list blocks injection", + registryAllowList: []string{"fake.registry.invalid"}, + injectorRegistry: "registry.datadoghq.com", + expectInjected: false, + expectErrorAnnot: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "app"}}, + }, + } + + err := libraryinjection.InjectAPMLibraries(pod, libraryinjection.LibraryInjectionConfig{ + InjectionMode: "auto", + KubeServerVersion: &version.Info{GitVersion: "v1.30.9"}, + RegistryAllowList: tt.registryAllowList, + Injector: libraryinjection.InjectorConfig{ + Package: libraryinjection.NewLibraryImageFromFullRef(tt.injectorRegistry+"/apm-inject:0.52.0", "0.52.0"), + }, + }) + + require.NoError(t, err) + + _, hasErrorAnnot := annotation.Get(pod, annotation.InjectionError) + require.Equal(t, tt.expectErrorAnnot, hasErrorAnnot) + + if tt.expectErrorAnnot { + val, _ := annotation.Get(pod, annotation.InjectionError) + require.Contains(t, val, "not in the allow list") + } + + // Verify injection state: check whether LD_PRELOAD was set on the container. + injected := false + for _, env := range pod.Spec.Containers[0].Env { + if env.Name == "LD_PRELOAD" { + injected = true + break + } + } + require.Equal(t, tt.expectInjected, injected) + }) + } +} + +func TestInjectAPMLibraries_RegistryAllowListBlocksLibraryRegistry(t *testing.T) { + // A library specified via custom-image annotation may come from a different registry + // than the injector. The allow list must also cover library registries. + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "app"}}, + }, + } + + err := libraryinjection.InjectAPMLibraries(pod, libraryinjection.LibraryInjectionConfig{ + InjectionMode: "auto", + KubeServerVersion: &version.Info{GitVersion: "v1.30.9"}, + RegistryAllowList: []string{"registry.datadoghq.com"}, + Injector: libraryinjection.InjectorConfig{ + Package: libraryinjection.NewLibraryImageFromFullRef("registry.datadoghq.com/apm-inject:0.52.0", "0.52.0"), + }, + Libraries: []libraryinjection.LibraryConfig{ + { + Language: "python", + Package: libraryinjection.NewLibraryImageFromFullRef("evil.registry.invalid/dd-lib-python-init:v3.18.1", "v3.18.1"), + }, + }, + }) + require.NoError(t, err) + + val, ok := annotation.Get(pod, annotation.InjectionError) + require.True(t, ok, "expected injection-error annotation to be set") + require.Contains(t, val, "not in the allow list") + + // Injection should be blocked — LD_PRELOAD should not be set. + for _, env := range pod.Spec.Containers[0].Env { + require.NotEqual(t, "LD_PRELOAD", env.Name, "expected LD_PRELOAD to not be set") + } +} + func TestInjectAPMLibraries_StopsGracefullyWhenProviderUnavailable(t *testing.T) { pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/clusteragent/admission/mutate/autoinstrumentation/libraryinjection/provider.go b/pkg/clusteragent/admission/mutate/autoinstrumentation/libraryinjection/provider.go index 1ab7f80ec37a2a..55546317008d44 100644 --- a/pkg/clusteragent/admission/mutate/autoinstrumentation/libraryinjection/provider.go +++ b/pkg/clusteragent/admission/mutate/autoinstrumentation/libraryinjection/provider.go @@ -110,6 +110,11 @@ type LibraryInjectionConfig struct { // InjectionType identifies the type of injection, e.g. "single step" or "lib injection" (for metrics). InjectionType string + + // RegistryAllowList is an optional list of allowed container registries for library injection. + // When non-empty, only libraries from these registries will be injected. + // An empty list allows all registries (default). + RegistryAllowList []string } // LibraryInjectionProvider defines the strategy for injecting APM libraries into pods. diff --git a/pkg/clusteragent/admission/mutate/autoinstrumentation/namespace_mutator.go b/pkg/clusteragent/admission/mutate/autoinstrumentation/namespace_mutator.go index 62808663a81bfc..ec015cd78d3894 100644 --- a/pkg/clusteragent/admission/mutate/autoinstrumentation/namespace_mutator.go +++ b/pkg/clusteragent/admission/mutate/autoinstrumentation/namespace_mutator.go @@ -111,6 +111,7 @@ func (m *mutatorCore) apmInjectionMutator(config extractedPodLibInfo, autoDetect Debug: m.isDebugEnabled(pod), AutoDetected: autoDetected, InjectionType: injectionType, + RegistryAllowList: m.config.registryAllowList, Injector: libraryinjection.InjectorConfig{ Package: m.resolveInjectorImage(pod), }, diff --git a/pkg/config/setup/common_settings.go b/pkg/config/setup/common_settings.go index 018397f5aa739b..4eb2ca6e88917d 100644 --- a/pkg/config/setup/common_settings.go +++ b/pkg/config/setup/common_settings.go @@ -665,13 +665,24 @@ func initCoreAgentFull(config pkgconfigmodel.Setup) { config.BindEnvAndSetDefault("admission_controller.auto_instrumentation.patcher.fallback_to_file_provider", false) // to be enabled only in e2e tests config.BindEnvAndSetDefault("admission_controller.auto_instrumentation.patcher.file_provider_path", "/etc/datadog-agent/patch/auto-instru.json") // to be used only in e2e tests config.BindEnvAndSetDefault("admission_controller.auto_instrumentation.inject_auto_detected_libraries", true) // allows injecting libraries for languages detected by automatic language detection feature - config.BindEnv("admission_controller.auto_instrumentation.init_resources.cpu") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' - config.BindEnv("admission_controller.auto_instrumentation.init_resources.memory") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' - config.BindEnv("admission_controller.auto_instrumentation.init_security_context") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' - config.BindEnv("admission_controller.auto_instrumentation.asm.enabled", "DD_ADMISSION_CONTROLLER_AUTO_INSTRUMENTATION_APPSEC_ENABLED") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' // config for ASM which is implemented in the client libraries - config.BindEnv("admission_controller.auto_instrumentation.iast.enabled", "DD_ADMISSION_CONTROLLER_AUTO_INSTRUMENTATION_IAST_ENABLED") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' // config for IAST which is implemented in the client libraries - config.BindEnv("admission_controller.auto_instrumentation.asm_sca.enabled", "DD_ADMISSION_CONTROLLER_AUTO_INSTRUMENTATION_APPSEC_SCA_ENABLED") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' // config for SCA - config.BindEnv("admission_controller.auto_instrumentation.profiling.enabled", "DD_ADMISSION_CONTROLLER_AUTO_INSTRUMENTATION_PROFILING_ENABLED") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' // config for profiling + config.BindEnvAndSetDefault("admission_controller.auto_instrumentation.container_registry_allow_list", []string{}) // restricts which registries can be used for library injection + config.ParseEnvAsStringSlice("admission_controller.auto_instrumentation.container_registry_allow_list", func(s string) []string { + var result []string + for _, r := range strings.Split(s, ",") { + r = strings.TrimSpace(r) + if r != "" { + result = append(result, r) + } + } + return result + }) + config.BindEnv("admission_controller.auto_instrumentation.init_resources.cpu") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' + config.BindEnv("admission_controller.auto_instrumentation.init_resources.memory") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' + config.BindEnv("admission_controller.auto_instrumentation.init_security_context") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' + config.BindEnv("admission_controller.auto_instrumentation.asm.enabled", "DD_ADMISSION_CONTROLLER_AUTO_INSTRUMENTATION_APPSEC_ENABLED") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' // config for ASM which is implemented in the client libraries + config.BindEnv("admission_controller.auto_instrumentation.iast.enabled", "DD_ADMISSION_CONTROLLER_AUTO_INSTRUMENTATION_IAST_ENABLED") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' // config for IAST which is implemented in the client libraries + config.BindEnv("admission_controller.auto_instrumentation.asm_sca.enabled", "DD_ADMISSION_CONTROLLER_AUTO_INSTRUMENTATION_APPSEC_SCA_ENABLED") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' // config for SCA + config.BindEnv("admission_controller.auto_instrumentation.profiling.enabled", "DD_ADMISSION_CONTROLLER_AUTO_INSTRUMENTATION_PROFILING_ENABLED") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' // config for profiling config.BindEnvAndSetDefault("admission_controller.cws_instrumentation.enabled", false) config.BindEnvAndSetDefault("admission_controller.cws_instrumentation.pod_endpoint", "/inject-pod-cws") config.BindEnvAndSetDefault("admission_controller.cws_instrumentation.command_endpoint", "/inject-command-cws") diff --git a/releasenotes/notes/ssi-registry-allow-list-1fb5c3991073fbf8.yaml b/releasenotes/notes/ssi-registry-allow-list-1fb5c3991073fbf8.yaml new file mode 100644 index 00000000000000..0fd148fb6e238b --- /dev/null +++ b/releasenotes/notes/ssi-registry-allow-list-1fb5c3991073fbf8.yaml @@ -0,0 +1,18 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +features: + - | + Add ``admission_controller.auto_instrumentation.container_registry_allow_list`` + configuration option (env var ``DD_ADMISSION_CONTROLLER_AUTO_INSTRUMENTATION_CONTAINER_REGISTRY_ALLOW_LIST``) + to restrict which container registries can be used as sources for APM library + injection via Single Step Instrumentation. When set to a non-empty + comma-separated list, the admission controller will skip injection for any pod + whose injector image registry is not in the list, and will set the + ``internal.apm.datadoghq.com/injection-error`` annotation with the reason. + An empty list (the default) allows injection from any registry. diff --git a/test/new-e2e/tests/ssi/ssi_test.go b/test/new-e2e/tests/ssi/ssi_test.go index 023d3a1b13b105..a2e18622cf6001 100644 --- a/test/new-e2e/tests/ssi/ssi_test.go +++ b/test/new-e2e/tests/ssi/ssi_test.go @@ -45,6 +45,9 @@ var namespaceSelectionHelmValues string //go:embed testdata/workload_selection.yaml var workloadSelectionHelmValues string +//go:embed testdata/registry_allow_list.yaml +var registryAllowListHelmValues string + // ssiSuite runs all SSI test groups on a single cluster, calling UpdateEnv at the start of // each group to update the env (workloads, helm values). type ssiSuite struct { @@ -349,6 +352,98 @@ func (v *ssiSuite) TestWorkloadSelection() { }) } +func (v *ssiSuite) TestRegistryAllowList() { + // All three apps run in the same cluster with allow list = registry.datadoghq.com. + // - "allowed": default injector and library, both from registry.datadoghq.com — injection proceeds. + // - "injector-blocked": injector image overridden to fake.registry.invalid — injection blocked. + // - "library-blocked": injector is allowed, but python-lib.custom-image points to + // fake.registry.invalid — injection blocked by library registry check. + v.UpdateEnv(Provisioner(ProvisionerOptions{ + AgentOptions: []kubernetesagentparams.Option{ + kubernetesagentparams.WithHelmValues(registryAllowListHelmValues), + }, + AgentDependentWorkloadAppFunc: func(e config.Env, kubeProvider *kubernetes.Provider, dependsOnAgent pulumi.ResourceOption) (*compkube.Workload, error) { + return singlestep.Scenario(e, kubeProvider, "registry-allow-list", []singlestep.Namespace{ + { + Name: "registry-allow-list", + Apps: []singlestep.App{ + { + Name: "registry-allow-list-allowed", + Image: "registry.datadoghq.com/injector-dev/python", + Version: "16ad9d4b", + Port: 8080, + }, + { + Name: "registry-allow-list-injector-blocked", + Image: "registry.datadoghq.com/injector-dev/python", + Version: "16ad9d4b", + Port: 8080, + PodAnnotations: map[string]string{ + // Override injector to a registry not in the allow list. + "admission.datadoghq.com/apm-inject.custom-image": "fake.registry.invalid/apm-inject:0.54.0", + }, + }, + { + Name: "registry-allow-list-library-blocked", + Image: "registry.datadoghq.com/injector-dev/python", + Version: "16ad9d4b", + Port: 8080, + PodAnnotations: map[string]string{ + // Override python library to a registry not in the allow list. + "admission.datadoghq.com/python-lib.custom-image": "fake.registry.invalid/dd-lib-python-init:v3.18.1", + }, + }, + }, + }, + }, dependsOnAgent) + }, + })) + + v.Run("InjectionAllowedByAllowList", func() { + intake := v.Env().FakeIntake.Client() + k8s := v.Env().KubernetesCluster.Client() + + pod := FindPodInNamespace(v.T(), k8s, "registry-allow-list", "registry-allow-list-allowed") + podValidator := testutils.NewPodValidator(pod, testutils.InjectionModeAuto) + podValidator.RequireInjection(v.T(), []string{"registry-allow-list-allowed"}) + podValidator.RequireInjectorVersion(v.T(), "0.54.0") + podValidator.RequireLibraryVersions(v.T(), map[string]string{"python": "v3.18.1"}) + + require.Eventually(v.T(), func() bool { + traces := FindTracesForService(v.T(), intake, "registry-allow-list-allowed") + return len(traces) != 0 + }, 1*time.Minute, 10*time.Second, "did not find any traces at intake for DD_SERVICE %s", "registry-allow-list-allowed") + }) + + v.Run("InjectorRegistryBlockedByAllowList", func() { + k8s := v.Env().KubernetesCluster.Client() + pod := FindPodInNamespace(v.T(), k8s, "registry-allow-list", "registry-allow-list-injector-blocked") + + // The injector image is overridden to fake.registry.invalid via pod annotation, + // which is not in the allow list. Injection should be skipped entirely. + podValidator := testutils.NewPodValidator(pod, testutils.InjectionModeAuto) + podValidator.RequireNoInjection(v.T()) + + errAnnotation := pod.Annotations["internal.apm.datadoghq.com/injection-error"] + require.NotEmpty(v.T(), errAnnotation, "expected injection-error annotation to be set") + require.Contains(v.T(), errAnnotation, "not in the allow list") + }) + + v.Run("LibraryRegistryBlockedByAllowList", func() { + k8s := v.Env().KubernetesCluster.Client() + pod := FindPodInNamespace(v.T(), k8s, "registry-allow-list", "registry-allow-list-library-blocked") + + // The injector is from the allowed registry, but the python library is overridden + // to fake.registry.invalid via annotation. Injection should be skipped entirely. + podValidator := testutils.NewPodValidator(pod, testutils.InjectionModeAuto) + podValidator.RequireNoInjection(v.T()) + + errAnnotation := pod.Annotations["internal.apm.datadoghq.com/injection-error"] + require.NotEmpty(v.T(), errAnnotation, "expected injection-error annotation to be set") + require.Contains(v.T(), errAnnotation, "not in the allow list") + }) +} + func isOpenShift() bool { switch getProvisionerType() { case ProvisionerOpenShift, ProvisionerOpenShiftLocal: diff --git a/test/new-e2e/tests/ssi/testdata/registry_allow_list.yaml b/test/new-e2e/tests/ssi/testdata/registry_allow_list.yaml new file mode 100644 index 00000000000000..278ebd56aa364e --- /dev/null +++ b/test/new-e2e/tests/ssi/testdata/registry_allow_list.yaml @@ -0,0 +1,30 @@ +--- +clusterAgent: + admissionController: + configMode: "hostip" + env: + - name: DD_ADMISSION_CONTROLLER_AUTO_INSTRUMENTATION_CONTAINER_REGISTRY_ALLOW_LIST + value: "registry.datadoghq.com" + +# Temporary until the Datadog CSI driver is released +datadog-csi-driver: + image: + repository: "gcr.io/datadoghq/csi-driver" + tag: "1.2.0" + +datadog: + csi: + enabled: true + apm: + instrumentation: + enabled: true + injector: + imageTag: "0.54.0" + enabledNamespaces: [] + targets: + - name: "apps" + namespaceSelector: + matchNames: + - "registry-allow-list" + ddTraceVersions: + python: "v3.18.1"