diff --git a/BUILD.bazel b/BUILD.bazel index 0fa34cbc261ff3..c9574954e59527 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -103,6 +103,8 @@ ALL_BUILD_TAGS = [ # gazelle:exclude comp/otelcol/ddflareextension/impl # gazelle:exclude comp/otelcol/ddprofilingextension/fx # gazelle:exclude comp/otelcol/ddprofilingextension/impl +# gazelle:exclude comp/otelcol/dogtelextension/fx +# gazelle:exclude comp/otelcol/dogtelextension/impl # gazelle:exclude comp/otelcol/logsagentpipeline # gazelle:exclude comp/otelcol/otlp # gazelle:exclude comp/otelcol/status/fx diff --git a/cmd/otel-agent/config/agent_config.go b/cmd/otel-agent/config/agent_config.go index d6806f3fe64ec0..4eeb51e24ec2aa 100644 --- a/cmd/otel-agent/config/agent_config.go +++ b/cmd/otel-agent/config/agent_config.go @@ -10,6 +10,7 @@ import ( "context" "errors" "fmt" + "reflect" "strconv" "strings" @@ -26,6 +27,7 @@ import ( "github.com/DataDog/datadog-agent/comp/core/config" secretnooptypes "github.com/DataDog/datadog-agent/comp/core/secrets/noop-impl/types" + dogtelextensionimpl "github.com/DataDog/datadog-agent/comp/otelcol/dogtelextension/impl" "github.com/DataDog/datadog-agent/comp/otelcol/otlp/components/exporter/datadogexporter" pkgconfigmodel "github.com/DataDog/datadog-agent/pkg/config/model" pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" @@ -228,6 +230,91 @@ func NewConfigComponent(ctx context.Context, ddCfg string, uris []string) (confi pkgconfig.Set("proxy.https", ddc.ProxyURL, pkgconfigmodel.SourceLocalConfigProcess) } + // Apply dogtelextension config only in standalone mode. In connected mode + // the core agent owns these settings; we must not override them here. + if pkgconfig.GetBool("otel_standalone") { + extcfg, err := getDogtelExtensionConfig(cfg) + if err != nil { + return nil, err + } + if extcfg != nil { + if extcfg.EnableMetadataCollection != nil { + pkgconfig.Set("enable_metadata_collection", *extcfg.EnableMetadataCollection, pkgconfigmodel.SourceFile) + } + // MetadataInterval configures the host metadata provider collection interval. + // The host provider reads this from the "metadata_providers" list entry named "host". + // Merge into the existing list rather than replacing it wholesale, so that + // other providers configured in datadog.yaml (e.g. "resources") are preserved. + if extcfg.MetadataInterval > 0 { + existing := pkgconfig.Get("metadata_providers") + var providers []map[string]interface{} + switch ev := existing.(type) { + case []map[string]interface{}: + providers = ev + case []interface{}: + // YAML v2 stores maps within sequences as map[interface{}]interface{}; + // convert each entry to map[string]interface{} before modifying. + for _, item := range ev { + switch m := item.(type) { + case map[string]interface{}: + providers = append(providers, m) + default: + if rv := reflect.ValueOf(item); rv.Kind() == reflect.Map { + converted := make(map[string]interface{}, rv.Len()) + for _, k := range rv.MapKeys() { + converted[fmt.Sprintf("%v", k.Interface())] = rv.MapIndex(k).Interface() + } + providers = append(providers, converted) + } + } + } + } + found := false + for _, p := range providers { + if p["name"] == "host" { + p["interval"] = extcfg.MetadataInterval + found = true + break + } + } + if !found { + providers = append(providers, map[string]interface{}{ + "name": "host", + "interval": extcfg.MetadataInterval, + }) + } + pkgconfig.Set("metadata_providers", providers, pkgconfigmodel.SourceFile) + } + if extcfg.Hostname != "" { + pkgconfig.Set("hostname", extcfg.Hostname, pkgconfigmodel.SourceFile) + } + if extcfg.SecretBackendCommand != "" { + pkgconfig.Set("secret_backend_command", extcfg.SecretBackendCommand, pkgconfigmodel.SourceFile) + } + if len(extcfg.SecretBackendArguments) > 0 { + pkgconfig.Set("secret_backend_arguments", extcfg.SecretBackendArguments, pkgconfigmodel.SourceFile) + } + if extcfg.SecretBackendTimeout > 0 { + pkgconfig.Set("secret_backend_timeout", extcfg.SecretBackendTimeout, pkgconfigmodel.SourceFile) + } + if extcfg.SecretBackendOutputMaxSize > 0 { + pkgconfig.Set("secret_backend_output_max_size", extcfg.SecretBackendOutputMaxSize, pkgconfigmodel.SourceFile) + } + if extcfg.KubernetesKubeletHost != "" { + pkgconfig.Set("kubernetes_kubelet_host", extcfg.KubernetesKubeletHost, pkgconfigmodel.SourceFile) + } + if extcfg.KubeletTLSVerify != nil { + pkgconfig.Set("kubelet_tls_verify", *extcfg.KubeletTLSVerify, pkgconfigmodel.SourceFile) + } + if extcfg.KubernetesHTTPKubeletPort > 0 { + pkgconfig.Set("kubernetes_http_kubelet_port", extcfg.KubernetesHTTPKubeletPort, pkgconfigmodel.SourceFile) + } + if extcfg.KubernetesHTTPSKubeletPort > 0 { + pkgconfig.Set("kubernetes_https_kubelet_port", extcfg.KubernetesHTTPSKubeletPort, pkgconfigmodel.SourceFile) + } + } + } + return pkgconfig, nil } @@ -248,6 +335,49 @@ func getServiceConfig(cfg *confmap.Conf) (*service.Config, error) { return pipelineConfig, nil } +// getDogtelExtensionConfig parses the first "dogtel*" entry in the +// extensions section of the OTel config and returns the typed +// dogtelextensionimpl.Config. Returns nil (no error) when no dogtelextension +// is present. Pointer fields (EnableMetadataCollection, KubeletTLSVerify) are +// nil when the user did not set them, preserving the DD agent defaults. +func getDogtelExtensionConfig(cfg *confmap.Conf) (*dogtelextensionimpl.Config, error) { + for k, v := range cfg.ToStringMap() { + if k != "extensions" { + continue + } + extensions, ok := v.(map[string]any) + if !ok { + return nil, errors.New("invalid extensions config") + } + var dogtelNames []string + for name := range extensions { + if strings.HasPrefix(name, "dogtel") { + dogtelNames = append(dogtelNames, name) + } + } + if len(dogtelNames) > 1 { + return nil, fmt.Errorf("multiple dogtel extensions found (%s): only one is allowed", strings.Join(dogtelNames, ", ")) + } + for name, val := range extensions { + if !strings.HasPrefix(name, "dogtel") { + continue + } + extcfg := &dogtelextensionimpl.Config{} + if val != nil { + m, ok := val.(map[string]any) + if !ok { + return nil, fmt.Errorf("invalid dogtelextension config for %q", name) + } + if err := confmap.NewFromStringMap(m).Unmarshal(extcfg); err != nil { + return nil, fmt.Errorf("failed to unmarshal dogtelextension config: %w", err) + } + } + return extcfg, nil + } + } + return nil, nil +} + func getDDExporterConfig(cfg *confmap.Conf) (*datadogconfig.Config, error) { var configs []*datadogconfig.Config for k, v := range cfg.ToStringMap() { diff --git a/cmd/otel-agent/config/agent_config_test.go b/cmd/otel-agent/config/agent_config_test.go index c8504188b382f2..0b6472a73a1619 100644 --- a/cmd/otel-agent/config/agent_config_test.go +++ b/cmd/otel-agent/config/agent_config_test.go @@ -12,6 +12,7 @@ import ( "os" "testing" + "go.opentelemetry.io/collector/confmap" "go.opentelemetry.io/collector/featuregate" configmock "github.com/DataDog/datadog-agent/pkg/config/mock" @@ -503,6 +504,238 @@ func TestLogsEnabledViaDatadogConfig(t *testing.T) { assert.True(t, c.GetBool("logs_enabled"), "logs_enabled should be true from datadog config") } +// TestDogtelExtensionConfig_FullStandaloneConfig verifies that all dogtelextension +// standalone config fields are applied to the DD agent config. +func (suite *ConfigTestSuite) TestDogtelExtensionConfig_FullStandaloneConfig() { + t := suite.T() + t.Setenv("DD_OTEL_STANDALONE", "true") + c, err := NewConfigComponent(context.Background(), "", []string{"testdata/config_standalone.yaml"}) + require.NoError(t, err) + + assert.Equal(t, true, c.GetBool("enable_metadata_collection")) + assert.Equal(t, "my-standalone-host", c.Get("hostname")) + assert.Equal(t, "/usr/local/bin/secret-provider", c.Get("secret_backend_command")) + assert.Equal(t, []string{"--timeout", "30"}, c.GetStringSlice("secret_backend_arguments")) + assert.Equal(t, 60, c.GetInt("secret_backend_timeout")) + assert.Equal(t, 8192, c.GetInt("secret_backend_output_max_size")) + assert.Equal(t, "10.0.0.1", c.Get("kubernetes_kubelet_host")) + assert.Equal(t, false, c.GetBool("kubelet_tls_verify")) + assert.Equal(t, 10255, c.GetInt("kubernetes_http_kubelet_port")) + assert.Equal(t, 10250, c.GetInt("kubernetes_https_kubelet_port")) +} + +// TestDogtelExtensionConfig_PartialConfig verifies that only the dogtelextension +// fields that are explicitly set override the corresponding DD agent config keys. +func (suite *ConfigTestSuite) TestDogtelExtensionConfig_PartialConfig() { + t := suite.T() + t.Setenv("DD_OTEL_STANDALONE", "true") + c, err := NewConfigComponent(context.Background(), "", []string{"testdata/config_standalone_partial.yaml"}) + require.NoError(t, err) + + assert.Equal(t, "192.168.1.100", c.Get("kubernetes_kubelet_host")) + assert.Equal(t, false, c.GetBool("kubelet_tls_verify")) + // Fields not set in dogtelextension must not override DD agent defaults. + assert.Equal(t, "", c.GetString("hostname")) + assert.Equal(t, "", c.GetString("secret_backend_command")) +} + +// TestDogtelExtensionConfig_MetadataDisabled verifies that setting +// enable_metadata_collection: false propagates to the DD agent config. +func (suite *ConfigTestSuite) TestDogtelExtensionConfig_MetadataDisabled() { + t := suite.T() + t.Setenv("DD_OTEL_STANDALONE", "true") + c, err := NewConfigComponent(context.Background(), "", []string{"testdata/config_standalone_no_metadata.yaml"}) + require.NoError(t, err) + + assert.Equal(t, false, c.GetBool("enable_metadata_collection")) +} + +// TestDogtelExtensionConfig_MetadataInterval verifies that metadata_interval is +// applied to the metadata_providers host entry so the host metadata collector +// uses the configured interval. +func (suite *ConfigTestSuite) TestDogtelExtensionConfig_MetadataInterval() { + t := suite.T() + t.Setenv("DD_OTEL_STANDALONE", "true") + c, err := NewConfigComponent(context.Background(), "", []string{"testdata/config_standalone.yaml"}) + require.NoError(t, err) + + providers := c.Get("metadata_providers") + require.NotNil(t, providers) + providerList, ok := providers.([]map[string]interface{}) + require.True(t, ok) + require.Len(t, providerList, 1) + assert.Equal(t, "host", providerList[0]["name"]) + assert.Equal(t, 600, providerList[0]["interval"]) +} + +// TestDogtelExtensionConfig_MetadataIntervalMerge verifies that setting +// metadata_interval in the dogtel extension merges into the existing +// metadata_providers list rather than replacing it wholesale. Providers other +// than "host" must be preserved, and an existing "host" entry must have its +// interval updated in place. +func (suite *ConfigTestSuite) TestDogtelExtensionConfig_MetadataIntervalMerge() { + t := suite.T() + t.Setenv("DD_OTEL_STANDALONE", "true") + // datadog_with_metadata_providers.yaml pre-seeds two providers: + // {name: resources, interval: 300} and {name: host, interval: 60} + // The dogtel extension in config_standalone.yaml sets metadata_interval: 600. + // The host entry's interval must be updated to 600; the resources entry must survive. + c, err := NewConfigComponent(context.Background(), "testdata/datadog_with_metadata_providers.yaml", []string{"testdata/config_standalone.yaml"}) + require.NoError(t, err) + + providers := c.Get("metadata_providers") + require.NotNil(t, providers) + providerList, ok := providers.([]map[string]interface{}) + require.True(t, ok, "metadata_providers should be []map[string]interface{}") + + byName := map[string]map[string]interface{}{} + for _, p := range providerList { + if name, ok := p["name"].(string); ok { + byName[name] = p + } + } + + require.Contains(t, byName, "host", "host provider must be present") + assert.Equal(t, 600, byName["host"]["interval"], "host interval must be updated to metadata_interval value") + + require.Contains(t, byName, "resources", "resources provider must be preserved") + assert.Equal(t, 300, byName["resources"]["interval"], "resources interval must remain unchanged") +} + +// TestDogtelExtensionConfig_NoDogtelExtension verifies that a config without +// the dogtelextension section is still processed correctly (no error, no overrides). +// This test does NOT set DD_OTEL_STANDALONE, verifying that the block is skipped +// entirely in connected mode regardless of whether a dogtelextension is present. +func (suite *ConfigTestSuite) TestDogtelExtensionConfig_NoDogtelExtension() { + t := suite.T() + c, err := NewConfigComponent(context.Background(), "", []string{"testdata/config_default.yaml"}) + require.NoError(t, err) + + // No dogtelextension + not standalone → hostname not set. + assert.Equal(t, "", c.GetString("hostname")) + assert.Equal(t, "", c.GetString("secret_backend_command")) + assert.Equal(t, "", c.GetString("kubernetes_kubelet_host")) +} + +// TestDogtelExtensionConfig_ConnectedModeIgnored verifies that dogtelextension +// config is NOT applied when otel_standalone is false (connected mode). +func (suite *ConfigTestSuite) TestDogtelExtensionConfig_ConnectedModeIgnored() { + t := suite.T() + // otel_standalone is false by default — dogtelextension fields must be ignored. + c, err := NewConfigComponent(context.Background(), "", []string{"testdata/config_standalone.yaml"}) + require.NoError(t, err) + + assert.Equal(t, "", c.GetString("hostname")) + assert.Equal(t, "", c.GetString("secret_backend_command")) + assert.Equal(t, "", c.GetString("kubernetes_kubelet_host")) +} + +// TestGetDogtelExtensionConfig_NilExtensionSection verifies that getDogtelExtensionConfig +// returns nil without error when the extensions section is absent. +func TestGetDogtelExtensionConfig_NilExtensionSection(t *testing.T) { + cfg := confmap.NewFromStringMap(map[string]any{ + "exporters": map[string]any{}, + }) + extcfg, err := getDogtelExtensionConfig(cfg) + require.NoError(t, err) + assert.Nil(t, extcfg) +} + +// TestGetDogtelExtensionConfig_EmptyDogtelSection verifies that an empty dogtel +// extension section returns a zero-value struct with all pointer fields nil. +func TestGetDogtelExtensionConfig_EmptyDogtelSection(t *testing.T) { + cfg := confmap.NewFromStringMap(map[string]any{ + "extensions": map[string]any{ + "dogtel": nil, + }, + }) + extcfg, err := getDogtelExtensionConfig(cfg) + require.NoError(t, err) + require.NotNil(t, extcfg) + assert.Equal(t, "", extcfg.Hostname) + assert.Nil(t, extcfg.KubeletTLSVerify) + assert.Nil(t, extcfg.EnableMetadataCollection) + assert.Equal(t, 0, extcfg.MetadataInterval) +} + +// TestGetDogtelExtensionConfig_EnableMetadataCollectionFalse verifies that +// enable_metadata_collection: false is correctly parsed as a *bool pointing to false, +// not left as nil. +func TestGetDogtelExtensionConfig_EnableMetadataCollectionFalse(t *testing.T) { + falseVal := false + cfg := confmap.NewFromStringMap(map[string]any{ + "extensions": map[string]any{ + "dogtel": map[string]any{ + "enable_metadata_collection": falseVal, + }, + }, + }) + extcfg, err := getDogtelExtensionConfig(cfg) + require.NoError(t, err) + require.NotNil(t, extcfg) + require.NotNil(t, extcfg.EnableMetadataCollection) + assert.False(t, *extcfg.EnableMetadataCollection) +} + +// TestGetDogtelExtensionConfig_KubeletTLSVerify verifies that kubelet_tls_verify +// can be explicitly set to false (distinguishable from the unset/nil state). +func TestGetDogtelExtensionConfig_KubeletTLSVerify(t *testing.T) { + falseVal := false + cfg := confmap.NewFromStringMap(map[string]any{ + "extensions": map[string]any{ + "dogtel": map[string]any{ + "kubelet_tls_verify": falseVal, + }, + }, + }) + extcfg, err := getDogtelExtensionConfig(cfg) + require.NoError(t, err) + require.NotNil(t, extcfg) + require.NotNil(t, extcfg.KubeletTLSVerify) + assert.False(t, *extcfg.KubeletTLSVerify) +} + +// TestGetDogtelExtensionConfig_InvalidExtensions verifies that a malformed +// extensions section returns an error. +func TestGetDogtelExtensionConfig_InvalidExtensions(t *testing.T) { + cfg := confmap.NewFromStringMap(map[string]any{ + "extensions": "not-a-map", + }) + _, err := getDogtelExtensionConfig(cfg) + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid extensions config") +} + +// TestGetDogtelExtensionConfig_MultipleDogtelEntries verifies that having more than one +// "dogtel*" extension returns an error instead of silently picking one. +func TestGetDogtelExtensionConfig_MultipleDogtelEntries(t *testing.T) { + cfg := confmap.NewFromStringMap(map[string]any{ + "extensions": map[string]any{ + "dogtel": nil, + "dogtel/second": nil, + }, + }) + _, err := getDogtelExtensionConfig(cfg) + require.Error(t, err) + assert.Contains(t, err.Error(), "multiple dogtel extensions found") +} + +// TestGetDogtelExtensionConfig_SingleNamedDogtelEntry verifies that a single +// named "dogtel/" entry (not literally "dogtel") is accepted without error. +func TestGetDogtelExtensionConfig_SingleNamedDogtelEntry(t *testing.T) { + cfg := confmap.NewFromStringMap(map[string]any{ + "extensions": map[string]any{ + "dogtel/custom": map[string]any{ + "hostname": "myhost", + }, + }, + }) + extcfg, err := getDogtelExtensionConfig(cfg) + require.NoError(t, err) + require.NotNil(t, extcfg) + assert.Equal(t, "myhost", extcfg.Hostname) +} + // TestSuite runs the CalculatorTestSuite func TestSuite(t *testing.T) { suite.Run(t, new(ConfigTestSuite)) diff --git a/cmd/otel-agent/config/testdata/config_standalone.yaml b/cmd/otel-agent/config/testdata/config_standalone.yaml new file mode 100644 index 00000000000000..55f6753ac13b10 --- /dev/null +++ b/cmd/otel-agent/config/testdata/config_standalone.yaml @@ -0,0 +1,28 @@ +extensions: + dogtel: + enable_metadata_collection: true + metadata_interval: 600 + hostname: "my-standalone-host" + secret_backend_command: "/usr/local/bin/secret-provider" + secret_backend_arguments: ["--timeout", "30"] + secret_backend_timeout: 60 + secret_backend_output_max_size: 8192 + kubernetes_kubelet_host: "10.0.0.1" + kubelet_tls_verify: false + kubernetes_http_kubelet_port: 10255 + kubernetes_https_kubelet_port: 10250 +receivers: + otlp: + protocols: + grpc: +exporters: + datadog: + api: + key: STANDALONE_API_KEY + site: datadoghq.com +service: + extensions: [dogtel] + pipelines: + traces: + receivers: [otlp] + exporters: [datadog] diff --git a/cmd/otel-agent/config/testdata/config_standalone_no_metadata.yaml b/cmd/otel-agent/config/testdata/config_standalone_no_metadata.yaml new file mode 100644 index 00000000000000..3250b4b6b8bc5d --- /dev/null +++ b/cmd/otel-agent/config/testdata/config_standalone_no_metadata.yaml @@ -0,0 +1,18 @@ +extensions: + dogtel: + enable_metadata_collection: false + metadata_interval: 120 +receivers: + otlp: + protocols: + grpc: +exporters: + datadog: + api: + key: STANDALONE_API_KEY +service: + extensions: [dogtel] + pipelines: + traces: + receivers: [otlp] + exporters: [datadog] diff --git a/cmd/otel-agent/config/testdata/config_standalone_partial.yaml b/cmd/otel-agent/config/testdata/config_standalone_partial.yaml new file mode 100644 index 00000000000000..36ad22c8c5642b --- /dev/null +++ b/cmd/otel-agent/config/testdata/config_standalone_partial.yaml @@ -0,0 +1,18 @@ +extensions: + dogtel: + kubernetes_kubelet_host: "192.168.1.100" + kubelet_tls_verify: false +receivers: + otlp: + protocols: + grpc: +exporters: + datadog: + api: + key: STANDALONE_API_KEY +service: + extensions: [dogtel] + pipelines: + traces: + receivers: [otlp] + exporters: [datadog] diff --git a/cmd/otel-agent/config/testdata/datadog_with_metadata_providers.yaml b/cmd/otel-agent/config/testdata/datadog_with_metadata_providers.yaml new file mode 100644 index 00000000000000..7d06c3237f7f15 --- /dev/null +++ b/cmd/otel-agent/config/testdata/datadog_with_metadata_providers.yaml @@ -0,0 +1,5 @@ +metadata_providers: + - name: resources + interval: 300 + - name: host + interval: 60 diff --git a/cmd/otel-agent/subcommands/run/command.go b/cmd/otel-agent/subcommands/run/command.go index 76b8f75dfc848c..0ae68e2f6f4a22 100644 --- a/cmd/otel-agent/subcommands/run/command.go +++ b/cmd/otel-agent/subcommands/run/command.go @@ -24,6 +24,7 @@ import ( "github.com/DataDog/datadog-agent/comp/core/configsync/configsyncimpl" delegatedauthnoopfx "github.com/DataDog/datadog-agent/comp/core/delegatedauth/fx-noop" fxinstrumentation "github.com/DataDog/datadog-agent/comp/core/fxinstrumentation/fx" + "github.com/DataDog/datadog-agent/comp/core/hostname/hostnameimpl" "github.com/DataDog/datadog-agent/comp/core/hostname/hostnameinterface" "github.com/DataDog/datadog-agent/comp/core/hostname/remotehostnameimpl" ipc "github.com/DataDog/datadog-agent/comp/core/ipc/def" @@ -34,10 +35,14 @@ import ( pid "github.com/DataDog/datadog-agent/comp/core/pid/def" pidfx "github.com/DataDog/datadog-agent/comp/core/pid/fx" pidimpl "github.com/DataDog/datadog-agent/comp/core/pid/impl" + secretsfx "github.com/DataDog/datadog-agent/comp/core/secrets/fx" secretsnoopfx "github.com/DataDog/datadog-agent/comp/core/secrets/fx-noop" tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def" + taggerfx "github.com/DataDog/datadog-agent/comp/core/tagger/fx" remoteTaggerFx "github.com/DataDog/datadog-agent/comp/core/tagger/fx-optional-remote" "github.com/DataDog/datadog-agent/comp/core/telemetry/telemetryimpl" + workloadfilterfx "github.com/DataDog/datadog-agent/comp/core/workloadfilter/fx" + wmcatalog "github.com/DataDog/datadog-agent/comp/core/workloadmeta/collectors/catalog-otel" workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" workloadmetafx "github.com/DataDog/datadog-agent/comp/core/workloadmeta/fx" workloadmetainit "github.com/DataDog/datadog-agent/comp/core/workloadmeta/init" @@ -46,7 +51,10 @@ import ( "github.com/DataDog/datadog-agent/comp/forwarder/defaultforwarder" "github.com/DataDog/datadog-agent/comp/forwarder/orchestrator/orchestratorinterface" logconfig "github.com/DataDog/datadog-agent/comp/logs/agent/config" + "github.com/DataDog/datadog-agent/comp/metadata/host/hostimpl" "github.com/DataDog/datadog-agent/comp/metadata/inventoryagent/inventoryagentimpl" + "github.com/DataDog/datadog-agent/comp/metadata/inventoryhost/inventoryhostimpl" + "github.com/DataDog/datadog-agent/comp/metadata/runner/runnerimpl" collectorcontribFx "github.com/DataDog/datadog-agent/comp/otelcol/collector-contrib/fx" collectordef "github.com/DataDog/datadog-agent/comp/otelcol/collector/def" collectorfx "github.com/DataDog/datadog-agent/comp/otelcol/collector/fx" @@ -154,7 +162,21 @@ func runOTelAgentCommand(ctx context.Context, params *cliParams, opts ...fx.Opti ) } + if acfg.GetBool("otel_standalone") { + return fxutil.Run( + commonAgentFxOptions(ctx, params, acfg, uris, opts...), + standaloneAgentFxOptions(params), + ) + } return fxutil.Run( + commonAgentFxOptions(ctx, params, acfg, uris, opts...), + connectedAgentFxOptions(params), + ) +} + +// commonAgentFxOptions returns FX options shared by both standalone and connected agent modes. +func commonAgentFxOptions(ctx context.Context, params *cliParams, acfg coreconfig.Component, uris []string, opts ...fx.Option) fx.Option { + return fx.Options( ForwarderBundle(), logtracefx.Module(), inventoryagentimpl.Module(), @@ -174,17 +196,16 @@ func runOTelAgentCommand(ctx context.Context, params *cliParams, opts ...fx.Opti return acfg, nil }), fxutil.ProvideOptional[coreconfig.Component](), - secretsnoopfx.Module(), workloadmetafx.Module(workloadmeta.Params{ AgentType: workloadmeta.NodeAgent, InitHelper: workloadmetainit.GetWorkloadmetaInit(), }), + wmcatalog.GetCatalog(), + workloadfilterfx.Module(), fx.Supply(uris), fx.Provide(func(h hostnameinterface.Component) (serializerexporter.SourceProviderFunc, error) { return h.Get, nil }), - remotehostnameimpl.Module(), - fx.Provide(func(_ coreconfig.Component) log.Params { return log.ForDaemon(params.LoggerName, "log_file", pkgconfigsetup.DefaultOTelAgentLogFile) }), @@ -210,7 +231,6 @@ func runOTelAgentCommand(ctx context.Context, params *cliParams, opts ...fx.Opti return "", err } l.Info("Using ", "hostname", hn) - return hn, nil }), @@ -223,19 +243,14 @@ func runOTelAgentCommand(ctx context.Context, params *cliParams, opts ...fx.Opti fx.Options(opts...), fx.Invoke(func(_ collectordef.Component, _ defaultforwarder.Forwarder, _ option.Option[logsagentpipeline.Component], _ pid.Component) { }), - - configsyncimpl.Module(configsyncimpl.NewParams(params.SyncTimeout, true, params.SyncOnInitTimeout)), - remoteTaggerFx.Module(tagger.OptionalRemoteParams{Disable: isCmdPortNegative}, tagger.NewRemoteParams()), telemetryimpl.Module(), fx.Provide(func(cfg traceconfigdef.Component) telemetry.TelemetryCollector { return telemetry.NewCollector(cfg.Object()) }), gzipfx.Module(), - // ctx is required to be supplied from here, as Windows needs to inject its own context // to allow the agent to work as a service. fx.Provide(func() context.Context { return ctx }), // fx.Supply(ctx) fails with a missing type error. - // TODO: consider adding configsync.Component as an explicit dependency for traceconfig // to avoid this sort of dependency tree hack. fx.Provide(func(params traceconfigdef.Params, cfg coreconfig.Component, taggerComp tagger.Component, ipcComp ipc.Component, _ configsync.Component) (traceconfigdef.Component, error) { @@ -250,7 +265,6 @@ func runOTelAgentCommand(ctx context.Context, params *cliParams, opts ...fx.Opti return provides.Comp, err }), fx.Supply(traceconfigdef.Params{FailIfAPIKeyMissing: false}), - fx.Supply(&traceagentcomp.Params{ CPUProfile: "", MemProfile: "", @@ -265,6 +279,43 @@ func runOTelAgentCommand(ctx context.Context, params *cliParams, opts ...fx.Opti ) } +// standaloneAgentFxOptions returns FX options specific to standalone mode (DD_OTEL_STANDALONE=true). +// In standalone mode the otel-agent runs without a core Datadog Agent on the same host. +func standaloneAgentFxOptions(params *cliParams) fx.Option { + return fx.Options( + // Metadata collection (host inventory, runner) for dogtelextension + runnerimpl.Module(), + hostimpl.Module(), + inventoryhostimpl.Module(), + // Real secrets backend so ENC[] handles in OTel/DD config are resolved locally + secretsfx.Module(), + // Resolve hostname locally; no core agent to ask + hostnameimpl.Module(), + // No on-init config sync (no core agent to sync from); periodic sync is also + // effectively disabled by the default agent_ipc.config_refresh_interval=0 + configsyncimpl.Module(configsyncimpl.NewParams(params.SyncTimeout, false, params.SyncOnInitTimeout)), + // Local workloadmeta-backed tagger so the infraattributes processor can enrich + // spans with K8s tags (pod, namespace, deployment, ...) without a core agent + taggerfx.Module(), + ) +} + +// connectedAgentFxOptions returns FX options specific to connected mode (DD_OTEL_STANDALONE=false). +// In connected mode the otel-agent runs alongside a core Datadog Agent on the same host. +func connectedAgentFxOptions(params *cliParams) fx.Option { + return fx.Options( + // No local metadata runner; core agent handles host metadata + // Noop secrets; core agent resolves ENC[] handles + secretsnoopfx.Module(), + // Ask core agent for hostname first, fall back to local resolution + remotehostnameimpl.Module(), + // Sync config from core agent on init and periodically + configsyncimpl.Module(configsyncimpl.NewParams(params.SyncTimeout, true, params.SyncOnInitTimeout)), + // Remote tagger proxying tag lookups to core agent + remoteTaggerFx.Module(tagger.OptionalRemoteParams{Disable: isCmdPortNegative}, tagger.NewRemoteParams()), + ) +} + // ForwarderBundle returns the fx.Option for the forwarder bundle. // TODO: cleanup the forwarder instantiation with fx. // This is a bit of a hack because we need to enforce configsync.Component diff --git a/cmd/otel-agent/subcommands/run/command_test.go b/cmd/otel-agent/subcommands/run/command_test.go index 0a21edf0266405..181ed61242cca3 100644 --- a/cmd/otel-agent/subcommands/run/command_test.go +++ b/cmd/otel-agent/subcommands/run/command_test.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2016-present Datadog, Inc. -//go:build otlp +//go:build otlp && test package run @@ -43,6 +43,34 @@ func TestFxRun_NoDatadogExporter(t *testing.T) { }) } +func TestFxRun_Standalone(t *testing.T) { + t.Setenv("DD_OTELCOLLECTOR_ENABLED", "true") + t.Setenv("DD_OTEL_STANDALONE", "true") + fxutil.TestRun(t, func() error { + ctx := context.Background() + params := &cliParams{ + GlobalParams: &subcommands.GlobalParams{ + ConfPaths: []string{"test_config.yaml"}, + }, + } + return runOTelAgentCommand(ctx, params) + }) +} + +func TestFxRun_NoDatadogExporter_Standalone(t *testing.T) { + t.Setenv("DD_OTELCOLLECTOR_ENABLED", "true") + t.Setenv("DD_OTEL_STANDALONE", "true") + fxutil.TestRun(t, func() error { + ctx := context.Background() + params := &cliParams{ + GlobalParams: &subcommands.GlobalParams{ + ConfPaths: []string{"test_config_no_dd_standalone.yaml"}, + }, + } + return runOTelAgentCommand(ctx, params) + }) +} + func TestFxRun_Disabled(t *testing.T) { t.Setenv("DD_OTELCOLLECTOR_ENABLED", "false") ctx := context.Background() diff --git a/cmd/otel-agent/subcommands/run/test_config_no_dd_standalone.yaml b/cmd/otel-agent/subcommands/run/test_config_no_dd_standalone.yaml new file mode 100644 index 00000000000000..3f07eb7dbcbb39 --- /dev/null +++ b/cmd/otel-agent/subcommands/run/test_config_no_dd_standalone.yaml @@ -0,0 +1,33 @@ +extensions: + dogtel: + enable_metadata_collection: true + hostname: "my-standalone-host" + +receivers: + otlp: + protocols: + http: + endpoint: "localhost:4318" + grpc: + endpoint: "localhost:4317" + +exporters: + debug: + +service: + extensions: [dogtel] + pipelines: + traces: + receivers: [otlp] + exporters: [debug] + telemetry: + metrics: + readers: + - pull: + exporter: + prometheus: + host: "localhost" + port: 8888 + without_scope_info: true + without_type_suffix: true + without_units: true diff --git a/comp/README.md b/comp/README.md index d0366a370fc102..682f908fcd293e 100644 --- a/comp/README.md +++ b/comp/README.md @@ -511,6 +511,10 @@ Package ddflareextension defines the OpenTelemetry Extension component. Package ddprofilingextension defines the otel agent ddprofilingextension component. +### [comp/otelcol/dogtelextension](https://pkg.go.dev/github.com/DataDog/datadog-agent/comp/otelcol/dogtelextension) + +Package dogtelextension provides Datadog agent functionalities for OTel collector + ### [comp/otelcol/logsagentpipeline](https://pkg.go.dev/github.com/DataDog/datadog-agent/comp/otelcol/logsagentpipeline) Package logsagentpipeline contains logs agent pipeline component diff --git a/comp/core/workloadmeta/collectors/catalog-otel/catalog.go b/comp/core/workloadmeta/collectors/catalog-otel/catalog.go new file mode 100644 index 00000000000000..341eb649612791 --- /dev/null +++ b/comp/core/workloadmeta/collectors/catalog-otel/catalog.go @@ -0,0 +1,43 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024-present Datadog, Inc. + +// Package catalog is the workloadmeta collector catalog for the otel-agent. +// It includes collectors for Kubernetes (kubelet) and container runtimes so that +// the local tagger can enrich OTel spans/metrics/logs with K8s entity tags. +package catalog + +import ( + "go.uber.org/fx" + + "github.com/DataDog/datadog-agent/comp/core/workloadmeta/collectors/internal/containerd" + "github.com/DataDog/datadog-agent/comp/core/workloadmeta/collectors/internal/crio" + "github.com/DataDog/datadog-agent/comp/core/workloadmeta/collectors/internal/docker" + "github.com/DataDog/datadog-agent/comp/core/workloadmeta/collectors/internal/ecs" + "github.com/DataDog/datadog-agent/comp/core/workloadmeta/collectors/internal/kubelet" + "github.com/DataDog/datadog-agent/comp/core/workloadmeta/collectors/internal/kubemetadata" + "github.com/DataDog/datadog-agent/comp/core/workloadmeta/collectors/internal/podman" +) + +// GetCatalog returns the FX options for the otel-agent workloadmeta collectors. +func GetCatalog() fx.Option { + options := []fx.Option{ + containerd.GetFxOptions(), + crio.GetFxOptions(), + docker.GetFxOptions(), + ecs.GetFxOptions(), + kubelet.GetFxOptions(), + kubemetadata.GetFxOptions(), + podman.GetFxOptions(), + } + + // remove nil options (collectors disabled by build tags) + opts := make([]fx.Option, 0, len(options)) + for _, item := range options { + if item != nil { + opts = append(opts, item) + } + } + return fx.Options(opts...) +} diff --git a/comp/otelcol/collector/impl/collector.go b/comp/otelcol/collector/impl/collector.go index c5411bf823be26..1610360120d34a 100644 --- a/comp/otelcol/collector/impl/collector.go +++ b/comp/otelcol/collector/impl/collector.go @@ -31,13 +31,16 @@ import ( "github.com/DataDog/datadog-agent/comp/core/hostname/hostnameinterface" ipc "github.com/DataDog/datadog-agent/comp/core/ipc/def" corelog "github.com/DataDog/datadog-agent/comp/core/log/def" + secrets "github.com/DataDog/datadog-agent/comp/core/secrets/def" tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def" "github.com/DataDog/datadog-agent/comp/core/telemetry" + workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" compdef "github.com/DataDog/datadog-agent/comp/def" collectorcontrib "github.com/DataDog/datadog-agent/comp/otelcol/collector-contrib/def" collector "github.com/DataDog/datadog-agent/comp/otelcol/collector/def" ddextension "github.com/DataDog/datadog-agent/comp/otelcol/ddflareextension/impl" ddprofilingextension "github.com/DataDog/datadog-agent/comp/otelcol/ddprofilingextension/impl" + dogtelextension "github.com/DataDog/datadog-agent/comp/otelcol/dogtelextension/impl" "github.com/DataDog/datadog-agent/comp/otelcol/logsagentpipeline" "github.com/DataDog/datadog-agent/comp/otelcol/otlp/components/exporter/datadogexporter" "github.com/DataDog/datadog-agent/comp/otelcol/otlp/components/exporter/logsagentexporter" @@ -85,6 +88,8 @@ type Requires struct { Ipc ipc.Component Telemetry telemetry.Component AgentTelemetry agenttelemetry.Component + WorkloadMeta workloadmeta.Component + Secrets secrets.Component Params Params } @@ -197,6 +202,17 @@ func addFactories(reqs Requires, factories otelcol.Factories, gatewayUsage otel. factories.Connectors[datadogConnectorType] = apmstats.NewConnectorFactory(datadogConnectorType, tracesToTracesStability, tracesToMetricsStability, reqs.Tagger, reqs.Hostname.Get, nil) factories.Extensions[ddextension.Type] = ddextension.NewFactoryForAgent(&factories, newConfigProviderSettings(reqs.URIs, reqs.Converter, false), option.New(reqs.Ipc), byoc) factories.Extensions[ddprofilingextension.Type] = ddprofilingextension.NewFactoryForAgent(reqs.TraceAgent, reqs.Log) + factories.Extensions[dogtelextension.Type] = dogtelextension.NewFactoryForAgent( + reqs.Config, + reqs.Log, + reqs.Serializer, + reqs.Hostname, + reqs.WorkloadMeta, + reqs.Tagger, + reqs.Ipc, + reqs.Telemetry, + reqs.Secrets, + ) } var buildInfo = component.BuildInfo{ diff --git a/comp/otelcol/dogtelextension/README.md b/comp/otelcol/dogtelextension/README.md new file mode 100644 index 00000000000000..efd72c8922b563 --- /dev/null +++ b/comp/otelcol/dogtelextension/README.md @@ -0,0 +1,310 @@ +# Datadog Agent Extension for OpenTelemetry Collector + +The `dogtelextension` packages Datadog Agent functionalities for use in the OpenTelemetry Collector (otel-agent) when running in **standalone mode**. + +## ⚠️ Important: Standalone Mode Only + +**This extension should ONLY be enabled when `DD_OTEL_STANDALONE=true`.** + +- **Standalone mode**: Use this extension when otel-agent runs independently without a core Datadog Agent +- **Connected mode**: Do NOT use this extension when otel-agent runs alongside the core Datadog Agent (the core agent already provides these functionalities) + +The extension will automatically disable itself if `DD_OTEL_STANDALONE` is not set to `true`. + +## Features + +### 1. Remote Tagger gRPC Server +Provides a minimal tagger gRPC server that allows other agents (process-agent, trace-agent, etc.) to stream entity tags. + +**Key capabilities:** +- Streams entity changes to connected clients +- Fetches individual entities with tags +- Supports TLS and authentication via IPC component +- Configurable message sizes and concurrent sync limits + +### 2. Kubernetes Tag Enrichment (Standalone Mode) +In standalone mode, the otel-agent runs a **local tagger** backed by workloadmeta collectors (kubelet, containerd, docker, ECS, crio, podman). The `infraattributes` processor uses this local tagger to enrich spans, metrics, and logs with K8s entity tags — `kube_deployment`, `kube_namespace`, `pod_name`, `kube_replica_set`, etc. — without a core Datadog Agent on the same host. + +**Required deployment configuration:** +```yaml +env: + - name: DD_KUBERNETES_KUBELET_HOST + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: DD_KUBELET_TLS_VERIFY + value: "false" # or configure a CA cert +``` + +**Required RBAC:** The ServiceAccount needs `get` on `nodes/proxy` so the kubelet collector can list pods: +```yaml +- apiGroups: [""] + resources: ["nodes/proxy"] + verbs: ["get"] +``` + +### 3. Workload Detection Integration +Ensures workload metadata (via `workloadmeta.Component`) is accessible to the extension and other components. + +**Note:** Workload detection itself is provided by the `workloadmetafx.Module()` already present in otel-agent. + +### 4. Secrets Resolution (Conditional) +Supports secrets resolution when running in standalone mode (`DD_OTEL_STANDALONE=true`). + +**Note:** Secrets are configured at the otel-agent level, not within the extension itself. See configuration section below. + +### 5. Host Metadata Submission +**Status:** ✅ Implemented via FX modules in otel-agent. + +Host metadata collection is enabled through FX modules added to otel-agent startup in [cmd/otel-agent/subcommands/run/command.go](../../cmd/otel-agent/subcommands/run/command.go): + +```go +runnerimpl.Module(), // Metadata scheduler and submission +hostimpl.Module(), // Host metadata (V5 payload) +inventoryhostimpl.Module(), // Inventory host metadata +``` + +This provides: +- Host metadata (OS, hostname, cloud provider info, agent version) +- Inventory metadata (installed packages, system configuration) +- Scheduled submission to Datadog API (every 5 minutes by default) +- Controlled by `enable_metadata_collection` config in datadog.yaml + +## Configuration + +The extension is configured via the OpenTelemetry Collector configuration file. + +**⚠️ Only include this extension when DD_OTEL_STANDALONE=true** + +```yaml +# Datadog Agent config (datadog.yaml or via DD_OTEL_STANDALONE env var) +# REQUIRED: Set this to true to enable dogtelextension +otel_standalone: true + +# OTel Collector config (otel-config.yaml) +extensions: + dogtel: + # Tagger server settings + enable_tagger_server: true # Enable tagger gRPC server (default: false) + tagger_server_port: 5000 # Port to listen on, 0 = auto-assign (default: 0) + tagger_server_addr: "localhost" # Address to bind to (default: localhost) + tagger_max_message_size: 4194304 # Max gRPC message size in bytes (default: 4MB) + tagger_max_concurrent_sync: 5 # Max concurrent sync connections (default: 5) + + # Metadata collection (via FX modules in otel-agent) + enable_metadata_collection: true # Informational flag (default: true) + metadata_interval: 300 # Interval in seconds (default: 300) + +service: + extensions: [dogtel] # Only include when DD_OTEL_STANDALONE=true +``` + +### Environment Variables + +- **`DD_OTEL_STANDALONE`** (REQUIRED): Must be set to `true` to enable this extension + - Enables standalone mode with full secrets resolution + - Enables metadata collection via FX modules + - Activates dogtelextension functionalities + +## Deployment Modes + +### Connected Mode (Default: `DD_OTEL_STANDALONE=false`) +- **dogtelextension:** ❌ **DO NOT enable** - core agent provides these functionalities +- **Secrets:** Uses no-op secrets (expects core agent for secrets) +- **Tagger:** Uses remote tagger client to connect to core agent +- **Metadata:** Handled by core agent +- **Use case:** otel-agent running alongside core Datadog agent + +### Standalone Mode (`DD_OTEL_STANDALONE=true`) +- **dogtelextension:** ✅ **ENABLE THIS EXTENSION** +- **Secrets:** Full secrets resolution enabled +- **Tagger:** Runs tagger server for other agents +- **Metadata:** Collected via FX modules (runner, host, inventoryhost) +- **Workload detection:** Provided by workloadmeta component +- **Use case:** otel-agent running independently without core agent + +## Architecture + +### Extension Factory Pattern +The extension uses the factory pattern to integrate with OpenTelemetry Collector: + +- `NewFactory()`: Basic factory for standalone OTel builds (returns error) +- `NewFactoryForAgent()`: Factory with FX component injection for otel-agent + +FX-injected components: +- `config.Component`: Core configuration +- `log.Component`: Logging +- `serializer.MetricSerializer`: Metrics serialization +- `hostnameinterface.Component`: Hostname resolution +- `workloadmeta.Component`: Workload metadata +- `tagger.Component`: Tagging service +- `ipc.Component`: IPC/authentication +- `telemetry.Component`: Telemetry +- `secrets.Component`: Secrets resolution + +### Tagger Server Implementation +The tagger server is a minimal implementation of `pb.AgentSecureServer` that: +1. Wraps `comp/core/tagger/server.Server` +2. Implements only tagger methods (`TaggerStreamEntities`, `TaggerFetchEntity`) +3. Uses IPC component for authentication and TLS +4. Starts in a goroutine during `Start()` +5. Gracefully stops during `Shutdown()` + +## Integration + +### Registering the Extension +In `comp/otelcol/collector/impl/collector.go`: + +```go +factories.Extensions[dogtelextension.Type] = dogtelextension.NewFactoryForAgent( + reqs.Config, + reqs.Log, + reqs.Serializer, + reqs.Hostname, + reqs.Workloadmeta, + reqs.Tagger, + reqs.Ipc, + reqs.Telemetry, + reqs.Secrets, +) +``` + +### Conditional Secrets +In `cmd/otel-agent/subcommands/run/command.go`: + +```go +// Determine standalone mode +standaloneMode := cfg.GetBool("otel_standalone") // From DD_OTEL_STANDALONE + +// Choose secrets module +var secretsModule fx.Option +if standaloneMode { + secretsModule = secretsfx.Module() // Real secrets +} else { + secretsModule = secretsnoopfx.Module() // No-op +} + +// Use in FX options +fxutil.Run( + secretsModule, + // ... other modules ... +) +``` + +## Development + +### Building +The extension is automatically included when building otel-agent with the `otlp` build tag: + +```bash +dda inv otel-agent.build +``` + +### Testing +```bash +# Unit tests +go test ./comp/otelcol/dogtelextension/... + +# Integration test with otel-agent +./bin/otel-agent/otel-agent run --config test-config.yaml +``` + +### Example Standalone Mode Configuration + +**Datadog Agent config (datadog.yaml):** +```yaml +api_key: ${DD_API_KEY} +hostname: my-host +otel_standalone: true # REQUIRED for dogtelextension +enable_metadata_collection: true +``` + +**OTel Collector config (otel-config.yaml):** +```yaml +receivers: + otlp: + protocols: + grpc: + endpoint: localhost:4317 + +exporters: + datadog: + api: + key: ${env:DD_API_KEY} + +extensions: + dogtel: # Only include in standalone mode + enable_tagger_server: true + tagger_server_port: 5000 + enable_metadata_collection: true + +service: + extensions: [dogtel] + pipelines: + traces: + receivers: [otlp] + exporters: [datadog] +``` + +**Run command:** +```bash +DD_OTEL_STANDALONE=true ./bin/otel-agent/otel-agent \ + --core-config=datadog.yaml \ + --config=file:otel-config.yaml +``` + +## Limitations + +### Phase 1 (Implemented) +- ✅ Tagger gRPC server +- ✅ K8s tag enrichment via local tagger (kubelet + container runtime collectors) +- ✅ Workload detection integration +- ✅ Conditional secrets (via otel-agent config) +- ✅ Host metadata submission (via FX modules in otel-agent) +- ❌ System metrics collection (deferred to Phase 2) + +### Future Enhancements (Phase 2) +- System checks integration (CPU, memory, disk, load, uptime, io) +- Collector component with demultiplexer +- Check scheduler +- Direct metadata collection within extension (alternative to current FX-based approach) + +## Troubleshooting + +### Extension Not Starting / Disabled +**Symptom:** Logs show "dogtelextension disabled (not in standalone mode)" + +**Solution:** +- Set `DD_OTEL_STANDALONE=true` environment variable +- Or set `otel_standalone: true` in datadog.yaml +- The extension requires standalone mode to function +- In connected mode (with core agent), remove this extension from your OTel config + +### Tagger Server Not Starting +- **First:** Ensure `DD_OTEL_STANDALONE=true` is set +- Check that `enable_tagger_server: true` in extension config +- Verify port is not in use: `lsof -i :` +- Check logs for TLS/authentication errors +- Ensure IPC component is properly initialized + +### Metadata Collection Not Working +- **First:** Ensure `DD_OTEL_STANDALONE=true` is set +- Metadata collection only runs in standalone mode +- Verify metadata modules are loaded in otel-agent startup (check for conditional FX modules) +- Check logs for "Starting metadata runner" or metadata-related errors +- Verify `enable_metadata_collection: true` in datadog.yaml +- Check that serializer and hostname components are available + +### Secrets Not Resolving +- **First:** Ensure `DD_OTEL_STANDALONE=true` is set +- Secrets resolution requires standalone mode +- Verify secrets backend is configured in datadog.yaml +- Check logs for secrets component errors +- In connected mode, secrets resolution is intentionally disabled (no-op) + +## References + +- [OTel Collector Extension Development](https://opentelemetry.io/docs/collector/building/extension/) +- [Datadog Agent Architecture](../../docs/dev/README.md) +- [Tagger Component](../../comp/core/tagger/README.md) +- [Workload Metadata](../../comp/core/workloadmeta/README.md) diff --git a/comp/otelcol/dogtelextension/def/BUILD.bazel b/comp/otelcol/dogtelextension/def/BUILD.bazel new file mode 100644 index 00000000000000..e7ee3db5592e83 --- /dev/null +++ b/comp/otelcol/dogtelextension/def/BUILD.bazel @@ -0,0 +1,9 @@ +load("@rules_go//go:def.bzl", "go_library") + +go_library( + name = "def", + srcs = ["component.go"], + importpath = "github.com/DataDog/datadog-agent/comp/otelcol/dogtelextension/def", + visibility = ["//visibility:public"], + deps = ["@io_opentelemetry_go_collector_extension//:extension"], +) diff --git a/comp/otelcol/dogtelextension/def/component.go b/comp/otelcol/dogtelextension/def/component.go new file mode 100644 index 00000000000000..5584127110b29b --- /dev/null +++ b/comp/otelcol/dogtelextension/def/component.go @@ -0,0 +1,26 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package dogtelextension provides Datadog agent functionalities for OTel collector +package dogtelextension + +import ( + "go.opentelemetry.io/collector/extension" +) + +// team: opentelemetry-agent + +// Component provides Datadog agent functionalities for OTel collector including: +// - Host metadata submission +// - Remote tagger gRPC server +// - Secrets resolution (conditional) +// - Workload detection integration +type Component interface { + extension.Extension // Implement OTel Extension lifecycle + + // GetTaggerServerPort returns the port where tagger gRPC server is listening. + // Returns 0 if server is not started. + GetTaggerServerPort() int +} diff --git a/comp/otelcol/dogtelextension/fx/fx.go b/comp/otelcol/dogtelextension/fx/fx.go new file mode 100644 index 00000000000000..10428a603aa13a --- /dev/null +++ b/comp/otelcol/dogtelextension/fx/fx.go @@ -0,0 +1,28 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package fx provides the FX module for dogtelextension +package fx + +import ( + dogtelextension "github.com/DataDog/datadog-agent/comp/otelcol/dogtelextension/def" + dogtelextensionimpl "github.com/DataDog/datadog-agent/comp/otelcol/dogtelextension/impl" + "github.com/DataDog/datadog-agent/pkg/util/fxutil" +) + +// Module defines the fx options for this component. +// +// Note: This FX module is not currently used, as the dogtelextension is +// instantiated directly by the OTel collector framework, not via FX. +// The extension factory (NewFactoryForAgent) receives FX-injected components +// as parameters instead. +// +// This module is provided for potential future use if the architecture changes. +func Module() fxutil.Module { + return fxutil.Component( + fxutil.ProvideComponentConstructor(dogtelextensionimpl.NewExtension), + fxutil.ProvideOptional[dogtelextension.Component](), + ) +} diff --git a/comp/otelcol/dogtelextension/impl/config.go b/comp/otelcol/dogtelextension/impl/config.go new file mode 100644 index 00000000000000..126d185b415001 --- /dev/null +++ b/comp/otelcol/dogtelextension/impl/config.go @@ -0,0 +1,76 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package dogtelextensionimpl provides the implementation of the dogtelextension component. +package dogtelextensionimpl + +import ( + "fmt" +) + +// Config defines the configuration for the dogtelextension +type Config struct { + // Metadata collection settings. + // EnableMetadataCollection is a pointer so that nil ("not set") is + // distinguishable from an explicit false, allowing NewConfigComponent to + // leave the DD agent default (true) intact when the field is absent. + EnableMetadataCollection *bool `mapstructure:"enable_metadata_collection"` + MetadataInterval int `mapstructure:"metadata_interval"` // seconds; 0 = use agent default (1800) + + // Tagger server settings + EnableTaggerServer bool `mapstructure:"enable_tagger_server"` + TaggerServerPort int `mapstructure:"tagger_server_port"` // 0 = auto-assign + TaggerServerAddr string `mapstructure:"tagger_server_addr"` // Default: localhost + TaggerMaxMessageSize int `mapstructure:"tagger_max_message_size"` // Default: 4MB + TaggerMaxConcurrentSync int `mapstructure:"tagger_max_concurrent_sync"` // Default: 5 + + // Standalone mode (controls secrets and other features) + // This is typically set via DD_OTEL_STANDALONE environment variable + StandaloneMode bool `mapstructure:"standalone_mode"` + + // Hostname overrides the agent hostname used in standalone mode. + // Maps to the "hostname" DD agent config key. + // When empty the agent resolves the hostname automatically. + Hostname string `mapstructure:"hostname"` + + // Secrets backend settings (standalone mode). + // These map directly to the corresponding DD agent config keys so that + // ENC[] handles in the OTel config can be resolved without a separate + // datadog.yaml file. + SecretBackendCommand string `mapstructure:"secret_backend_command"` // path to the resolver binary + SecretBackendArguments []string `mapstructure:"secret_backend_arguments"` // extra CLI arguments + SecretBackendTimeout int `mapstructure:"secret_backend_timeout"` // seconds; 0 = agent default (30s) + SecretBackendOutputMaxSize int `mapstructure:"secret_backend_output_max_size"` // bytes; 0 = agent default + + // Kubernetes / kubelet settings for K8s tag enrichment (standalone mode). + // When the otel-agent runs on a Kubernetes node these allow the workloadmeta + // kubelet collector and the local tagger to reach the kubelet API without + // needing a separate datadog.yaml. + KubernetesKubeletHost string `mapstructure:"kubernetes_kubelet_host"` // e.g. "status.hostIP" or an explicit IP + KubeletTLSVerify *bool `mapstructure:"kubelet_tls_verify"` // nil = keep DD agent default (true) + KubernetesHTTPKubeletPort int `mapstructure:"kubernetes_http_kubelet_port"` // 0 = agent default (10255) + KubernetesHTTPSKubeletPort int `mapstructure:"kubernetes_https_kubelet_port"` // 0 = agent default (10250) +} + +// Validate validates the configuration +func (cfg *Config) Validate() error { + if cfg.TaggerServerPort < 0 || cfg.TaggerServerPort > 65535 { + return fmt.Errorf("invalid tagger_server_port: %d (must be 0-65535)", cfg.TaggerServerPort) + } + + if cfg.TaggerMaxMessageSize <= 0 { + cfg.TaggerMaxMessageSize = 4 * 1024 * 1024 // 4MB default + } + + if cfg.MetadataInterval < 0 { + return fmt.Errorf("invalid metadata_interval: %d (must be >= 0)", cfg.MetadataInterval) + } + + if cfg.TaggerMaxConcurrentSync <= 0 { + cfg.TaggerMaxConcurrentSync = 5 // Default + } + + return nil +} diff --git a/comp/otelcol/dogtelextension/impl/config_test.go b/comp/otelcol/dogtelextension/impl/config_test.go new file mode 100644 index 00000000000000..15e469c17d02f0 --- /dev/null +++ b/comp/otelcol/dogtelextension/impl/config_test.go @@ -0,0 +1,121 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package dogtelextensionimpl + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestConfigValidate_ValidDefaults(t *testing.T) { + cfg := createDefaultConfig().(*Config) + require.NoError(t, cfg.Validate()) +} + +func TestConfigValidate_NegativePort(t *testing.T) { + cfg := createDefaultConfig().(*Config) + cfg.TaggerServerPort = -1 + err := cfg.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid tagger_server_port") +} + +func TestConfigValidate_PortTooHigh(t *testing.T) { + cfg := createDefaultConfig().(*Config) + cfg.TaggerServerPort = 65536 + err := cfg.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid tagger_server_port") +} + +func TestConfigValidate_PortZeroAllowed(t *testing.T) { + cfg := createDefaultConfig().(*Config) + cfg.TaggerServerPort = 0 + require.NoError(t, cfg.Validate()) +} + +func TestConfigValidate_PortMaxAllowed(t *testing.T) { + cfg := createDefaultConfig().(*Config) + cfg.TaggerServerPort = 65535 + require.NoError(t, cfg.Validate()) +} + +func TestConfigValidate_NegativeMetadataInterval(t *testing.T) { + cfg := createDefaultConfig().(*Config) + cfg.MetadataInterval = -1 + err := cfg.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid metadata_interval") +} + +func TestConfigValidate_AutoFixMaxMessageSize(t *testing.T) { + cfg := createDefaultConfig().(*Config) + cfg.TaggerMaxMessageSize = 0 + require.NoError(t, cfg.Validate()) + assert.Equal(t, 4*1024*1024, cfg.TaggerMaxMessageSize) +} + +func TestConfigValidate_AutoFixConcurrentSync(t *testing.T) { + cfg := createDefaultConfig().(*Config) + cfg.TaggerMaxConcurrentSync = 0 + require.NoError(t, cfg.Validate()) + assert.Equal(t, 5, cfg.TaggerMaxConcurrentSync) +} + +func TestCreateDefaultConfig(t *testing.T) { + cfg := createDefaultConfig().(*Config) + require.NotNil(t, cfg.EnableMetadataCollection) + assert.True(t, *cfg.EnableMetadataCollection) + assert.Equal(t, 300, cfg.MetadataInterval) + assert.False(t, cfg.EnableTaggerServer) + assert.Equal(t, 0, cfg.TaggerServerPort) + assert.Equal(t, "localhost", cfg.TaggerServerAddr) + assert.Equal(t, 4*1024*1024, cfg.TaggerMaxMessageSize) + assert.Equal(t, 5, cfg.TaggerMaxConcurrentSync) + assert.False(t, cfg.StandaloneMode) + // Standalone-mode fields default to zero/nil so they do not + // override the DD agent config when not explicitly set. + assert.Equal(t, "", cfg.Hostname) + assert.Equal(t, "", cfg.SecretBackendCommand) + assert.Nil(t, cfg.SecretBackendArguments) + assert.Equal(t, 0, cfg.SecretBackendTimeout) + assert.Equal(t, 0, cfg.SecretBackendOutputMaxSize) + assert.Equal(t, "", cfg.KubernetesKubeletHost) + assert.Nil(t, cfg.KubeletTLSVerify) + assert.Equal(t, 0, cfg.KubernetesHTTPKubeletPort) + assert.Equal(t, 0, cfg.KubernetesHTTPSKubeletPort) +} + +func TestConfigValidate_StandaloneFields_Valid(t *testing.T) { + falseVal := false + cfg := createDefaultConfig().(*Config) + cfg.Hostname = "my-host" + cfg.SecretBackendCommand = "/usr/local/bin/secret-provider" + cfg.SecretBackendArguments = []string{"--arg1"} + cfg.SecretBackendTimeout = 30 + cfg.SecretBackendOutputMaxSize = 4096 + cfg.KubernetesKubeletHost = "10.0.0.1" + cfg.KubeletTLSVerify = &falseVal + cfg.KubernetesHTTPKubeletPort = 10255 + cfg.KubernetesHTTPSKubeletPort = 10250 + require.NoError(t, cfg.Validate()) +} + +func TestConfigValidate_KubeletTLSVerify_NilIsValid(t *testing.T) { + cfg := createDefaultConfig().(*Config) + cfg.KubeletTLSVerify = nil + require.NoError(t, cfg.Validate()) +} + +func TestConfigValidate_KubeletTLSVerify_ExplicitFalse(t *testing.T) { + falseVal := false + cfg := createDefaultConfig().(*Config) + cfg.KubeletTLSVerify = &falseVal + require.NoError(t, cfg.Validate()) + assert.False(t, *cfg.KubeletTLSVerify) +} diff --git a/comp/otelcol/dogtelextension/impl/extension.go b/comp/otelcol/dogtelextension/impl/extension.go new file mode 100644 index 00000000000000..6834a41037974e --- /dev/null +++ b/comp/otelcol/dogtelextension/impl/extension.go @@ -0,0 +1,159 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package dogtelextensionimpl + +import ( + "context" + "net" + "time" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/pdata/pcommon" + "google.golang.org/grpc" + + coreconfig "github.com/DataDog/datadog-agent/comp/core/config" + "github.com/DataDog/datadog-agent/comp/core/hostname/hostnameinterface" + ipc "github.com/DataDog/datadog-agent/comp/core/ipc/def" + log "github.com/DataDog/datadog-agent/comp/core/log/def" + secrets "github.com/DataDog/datadog-agent/comp/core/secrets/def" + secretnooptypes "github.com/DataDog/datadog-agent/comp/core/secrets/noop-impl/types" + tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def" + "github.com/DataDog/datadog-agent/comp/core/telemetry" + workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + "github.com/DataDog/datadog-agent/comp/metadata/runner" + dogtelmetrics "github.com/DataDog/datadog-agent/comp/otelcol/dogtelextension/impl/metrics" + agentmetrics "github.com/DataDog/datadog-agent/pkg/metrics" + "github.com/DataDog/datadog-agent/pkg/serializer" +) + +// dogtelExtension implements the dogtelextension.Component interface +type dogtelExtension struct { + config *Config + log log.Component + coreConfig coreconfig.Component + + // Core components injected from FX + serializer serializer.MetricSerializer + hostname hostnameinterface.Component + workloadmeta workloadmeta.Component + tagger tagger.Component + ipc ipc.Component + telemetry telemetry.Component + secrets secrets.Component + + // Build info for metric tags + buildInfo component.BuildInfo + + // Metadata components (created by extension) + metadataRunner runner.Component + + // Tagger gRPC server + taggerServer *grpc.Server + taggerServerPort int + taggerListener net.Listener +} + +// Start implements extension.Extension +func (e *dogtelExtension) Start(_ context.Context, _ component.Host) error { + // Check if running in standalone mode + standalone := e.coreConfig.GetBool("otel_standalone") + if !standalone { + e.log.Warn("dogtelextension is enabled but DD_OTEL_STANDALONE is false") + e.log.Warn("This extension should only be used in standalone mode (DD_OTEL_STANDALONE=true)") + e.log.Warn("In connected mode, the core Datadog Agent provides these functionalities") + e.log.Info("dogtelextension disabled (not in standalone mode)") + return nil + } + + e.log.Info("Starting dogtelextension in standalone mode") + + // Warn if the noop secrets implementation is wired in standalone mode. + // In standalone mode command.go selects secretsfx.Module() (real impl); finding + // the noop here indicates a misconfiguration and ENC[] handles will not be resolved. + if isSecretsNoop(e.secrets) { + e.log.Warn("dogtelextension: secrets component is noop — ENC[] handles in OTel config will NOT be resolved") + e.log.Warn("Ensure secretsfx.Module() (not secretsnoopfx.Module()) is wired when DD_OTEL_STANDALONE=true") + } + + // Start tagger gRPC server if enabled + if e.config.EnableTaggerServer { + if err := e.startTaggerServer(); err != nil { + e.log.Errorf("Failed to start tagger server: %v", err) + return err + } + } + + // Start metadata collection if enabled + metadataEnabled := e.config.EnableMetadataCollection != nil && *e.config.EnableMetadataCollection + if metadataEnabled && e.metadataRunner != nil { + e.log.Info("Metadata collection is enabled") + } + + e.log.Infof("dogtelextension started successfully (tagger_port=%d, metadata_enabled=%t)", + e.taggerServerPort, metadataEnabled) + + // Send liveness metric to indicate the extension is running + if err := e.sendLivenessMetric(context.Background()); err != nil { + e.log.Warnf("Failed to send liveness metric: %v", err) + } + + return nil +} + +// sendLivenessMetric sends a gauge metric indicating the extension is running. +func (e *dogtelExtension) sendLivenessMetric(ctx context.Context) error { + hostname := e.hostname.GetSafe(ctx) + now := pcommon.NewTimestampFromTime(time.Now()) + buildTags := dogtelmetrics.TagsFromBuildInfo(e.buildInfo) + serie := dogtelmetrics.CreateLivenessSerie(hostname, uint64(now), buildTags) + + var serieErr error + agentmetrics.Serialize( + agentmetrics.NewIterableSeries(func(_ *agentmetrics.Serie) {}, 200, 4000), + agentmetrics.NewIterableSketches(func(_ *agentmetrics.SketchSeries) {}, 200, 4000), + func(seriesSink agentmetrics.SerieSink, _ agentmetrics.SketchesSink) { + seriesSink.Append(serie) + }, + func(serieSource agentmetrics.SerieSource) { + serieErr = e.serializer.SendIterableSeries(serieSource) + }, + func(_ agentmetrics.SketchesSource) {}, + ) + return serieErr +} + +// Shutdown implements extension.Extension +func (e *dogtelExtension) Shutdown(_ context.Context) error { + e.log.Info("Shutting down dogtelextension") + + // Stop tagger server gracefully + e.stopTaggerServer() + + // Stop metadata collection + if e.metadataRunner != nil { + // Metadata runner stops via its own lifecycle hooks + e.log.Debug("Metadata runner will stop via FX lifecycle") + } + + e.log.Info("dogtelextension shutdown complete") + return nil +} + +// GetTaggerServerPort implements dogtelextension.Component +func (e *dogtelExtension) GetTaggerServerPort() int { + return e.taggerServerPort +} + +// isSecretsNoop reports whether s is the noop secrets implementation. +// In standalone mode the real secretsfx should always be injected; finding +// the noop indicates a wiring mistake and secrets handles won't be resolved. +func isSecretsNoop(s secrets.Component) bool { + if s == nil { + return false + } + _, ok := s.(*secretnooptypes.SecretNoop) + return ok +} diff --git a/comp/otelcol/dogtelextension/impl/extension_test.go b/comp/otelcol/dogtelextension/impl/extension_test.go new file mode 100644 index 00000000000000..86b554258a1dda --- /dev/null +++ b/comp/otelcol/dogtelextension/impl/extension_test.go @@ -0,0 +1,210 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build test + +package dogtelextensionimpl + +import ( + "context" + "errors" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + + configmock "github.com/DataDog/datadog-agent/comp/core/config" + "github.com/DataDog/datadog-agent/comp/core/hostname/hostnameinterface" + ipcmock "github.com/DataDog/datadog-agent/comp/core/ipc/mock" + logmock "github.com/DataDog/datadog-agent/comp/core/log/mock" + secrets "github.com/DataDog/datadog-agent/comp/core/secrets/def" + secretnooptypes "github.com/DataDog/datadog-agent/comp/core/secrets/noop-impl/types" + "github.com/DataDog/datadog-agent/comp/core/telemetry/noopsimpl" + agentmetrics "github.com/DataDog/datadog-agent/pkg/metrics" + serializermock "github.com/DataDog/datadog-agent/pkg/serializer/mocks" +) + +// newTestExtension creates a dogtelExtension wired with test doubles. +func newTestExtension(t *testing.T, cfgOverrides map[string]interface{}, extCfg *Config) *dogtelExtension { + t.Helper() + if extCfg == nil { + extCfg = createDefaultConfig().(*Config) + } + hostname, _ := hostnameinterface.NewMock("test-host") + return &dogtelExtension{ + config: extCfg, + log: logmock.New(t), + coreConfig: configmock.NewMockWithOverrides(t, cfgOverrides), + serializer: serializermock.NewMetricSerializer(t), + hostname: hostname, + telemetry: noopsimpl.GetCompatComponent(), + ipc: ipcmock.New(t), + buildInfo: component.BuildInfo{}, + } +} + +// TestStart_ConnectedMode verifies that Start is a no-op when otel_standalone=false. +func TestStart_ConnectedMode(t *testing.T) { + ext := newTestExtension(t, map[string]interface{}{"otel_standalone": false}, nil) + err := ext.Start(context.Background(), nil) + require.NoError(t, err) + assert.Nil(t, ext.taggerServer) + assert.Equal(t, 0, ext.taggerServerPort) +} + +// TestStart_StandaloneMode_TaggerDisabled verifies that Start succeeds in standalone +// mode without starting the tagger server when it is disabled. +func TestStart_StandaloneMode_TaggerDisabled(t *testing.T) { + cfg := createDefaultConfig().(*Config) + cfg.EnableTaggerServer = false + + hostname, _ := hostnameinterface.NewMock("test-host") + serializer := serializermock.NewMetricSerializer(t) + serializer.On("SendIterableSeries", mock.Anything).Return(nil) + + ext := &dogtelExtension{ + config: cfg, + log: logmock.New(t), + coreConfig: configmock.NewMockWithOverrides(t, map[string]interface{}{"otel_standalone": true}), + serializer: serializer, + hostname: hostname, + telemetry: noopsimpl.GetCompatComponent(), + ipc: ipcmock.New(t), + buildInfo: component.BuildInfo{Version: "1.0", Command: "otel-agent"}, + } + + err := ext.Start(context.Background(), nil) + require.NoError(t, err) + assert.Nil(t, ext.taggerServer) +} + +// TestShutdown_NoTaggerServer verifies Shutdown does not panic when no server is running. +func TestShutdown_NoTaggerServer(t *testing.T) { + ext := newTestExtension(t, nil, nil) + err := ext.Shutdown(context.Background()) + require.NoError(t, err) +} + +// TestGetTaggerServerPort returns the stored port. +func TestGetTaggerServerPort(t *testing.T) { + ext := newTestExtension(t, nil, nil) + assert.Equal(t, 0, ext.GetTaggerServerPort()) + + ext.taggerServerPort = 15555 + assert.Equal(t, 15555, ext.GetTaggerServerPort()) +} + +// TestSendLivenessMetric_Success verifies SendIterableSeries is called with a SerieSource. +func TestSendLivenessMetric_Success(t *testing.T) { + hostname, _ := hostnameinterface.NewMock("my-host") + + serializer := serializermock.NewMetricSerializer(t) + serializer.On("SendIterableSeries", mock.AnythingOfType("*metrics.IterableSeries")).Return(nil) + + ext := &dogtelExtension{ + log: logmock.New(t), + coreConfig: configmock.NewMockWithOverrides(t, nil), + serializer: serializer, + hostname: hostname, + buildInfo: component.BuildInfo{Version: "7.0.0", Command: "otel-agent"}, + } + + err := ext.sendLivenessMetric(context.Background()) + require.NoError(t, err) + serializer.AssertExpectations(t) +} + +// TestSendLivenessMetric_SerializerError verifies errors from SendIterableSeries are propagated. +func TestSendLivenessMetric_SerializerError(t *testing.T) { + hostname, _ := hostnameinterface.NewMock("my-host") + wantErr := errors.New("serializer unavailable") + + serializer := serializermock.NewMetricSerializer(t) + serializer.On("SendIterableSeries", mock.Anything).Return(wantErr) + + ext := &dogtelExtension{ + log: logmock.New(t), + coreConfig: configmock.NewMockWithOverrides(t, nil), + serializer: serializer, + hostname: hostname, + } + + err := ext.sendLivenessMetric(context.Background()) + require.ErrorIs(t, err, wantErr) +} + +// TestSendLivenessMetric_UsesHostname verifies the hostname is passed to the liveness serie. +func TestSendLivenessMetric_UsesHostname(t *testing.T) { + hostname, _ := hostnameinterface.NewMock("expected-host") + + var captured []*agentmetrics.Serie + serializer := serializermock.NewMetricSerializer(t) + serializer.On("SendIterableSeries", mock.Anything).Run(func(args mock.Arguments) { + src := args.Get(0).(agentmetrics.SerieSource) + // Consume all series while the goroutine is running. + for src.MoveNext() { + if s := src.Current(); s != nil { + sc := *s + captured = append(captured, &sc) + } + } + }).Return(nil) + + ext := &dogtelExtension{ + log: logmock.New(t), + coreConfig: configmock.NewMockWithOverrides(t, nil), + serializer: serializer, + hostname: hostname, + buildInfo: component.BuildInfo{}, + } + + err := ext.sendLivenessMetric(context.Background()) + require.NoError(t, err) + require.Len(t, captured, 1) + assert.Equal(t, "otel.dogtel_extension.running", captured[0].Name) + assert.Equal(t, "expected-host", captured[0].Host) + assert.Equal(t, 1.0, captured[0].Points[0].Value) +} + +// TestIsSecretsNoop_WithNoopImpl verifies that the noop impl is detected. +func TestIsSecretsNoop_WithNoopImpl(t *testing.T) { + var s secrets.Component = &secretnooptypes.SecretNoop{} + assert.True(t, isSecretsNoop(s)) +} + +// TestIsSecretsNoop_WithNilSecrets verifies that a nil component returns false. +func TestIsSecretsNoop_WithNilSecrets(t *testing.T) { + assert.False(t, isSecretsNoop(nil)) +} + +// TestStart_StandaloneMode_NoopSecretsWarning verifies that Start succeeds even +// when the noop secrets impl is injected in standalone mode (the warning is logged +// but does not prevent startup). +func TestStart_StandaloneMode_NoopSecretsWarning(t *testing.T) { + cfg := createDefaultConfig().(*Config) + cfg.EnableTaggerServer = false + + hostname, _ := hostnameinterface.NewMock("test-host") + sz := serializermock.NewMetricSerializer(t) + sz.On("SendIterableSeries", mock.Anything).Return(nil) + + ext := &dogtelExtension{ + config: cfg, + log: logmock.New(t), + coreConfig: configmock.NewMockWithOverrides(t, map[string]interface{}{"otel_standalone": true}), + serializer: sz, + hostname: hostname, + telemetry: noopsimpl.GetCompatComponent(), + ipc: ipcmock.New(t), + // Deliberately inject the noop impl to simulate a misconfiguration. + secrets: &secretnooptypes.SecretNoop{}, + } + + // Start should succeed; the warning is logged but not fatal. + err := ext.Start(context.Background(), nil) + require.NoError(t, err) +} diff --git a/comp/otelcol/dogtelextension/impl/factory.go b/comp/otelcol/dogtelextension/impl/factory.go new file mode 100644 index 00000000000000..1f555117f4a103 --- /dev/null +++ b/comp/otelcol/dogtelextension/impl/factory.go @@ -0,0 +1,171 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package dogtelextensionimpl + +import ( + "context" + "errors" + "fmt" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/extension" + + coreconfig "github.com/DataDog/datadog-agent/comp/core/config" + "github.com/DataDog/datadog-agent/comp/core/hostname/hostnameinterface" + ipc "github.com/DataDog/datadog-agent/comp/core/ipc/def" + log "github.com/DataDog/datadog-agent/comp/core/log/def" + secrets "github.com/DataDog/datadog-agent/comp/core/secrets/def" + tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def" + "github.com/DataDog/datadog-agent/comp/core/telemetry" + workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + dogtelextension "github.com/DataDog/datadog-agent/comp/otelcol/dogtelextension/def" + "github.com/DataDog/datadog-agent/pkg/serializer" +) + +var ( + // Type is the type string for this extension + Type = component.MustNewType("dogtel") +) + +const ( + stability = component.StabilityLevelAlpha +) + +// componentHolder stores FX-injected components for extension creation +type componentHolder struct { + config coreconfig.Component + log log.Component + serializer serializer.MetricSerializer + hostname hostnameinterface.Component + workloadmeta workloadmeta.Component + tagger tagger.Component + ipc ipc.Component + telemetry telemetry.Component + secrets secrets.Component +} + +// NewFactory creates a basic factory (for standalone OTel collector builds) +// This factory will return an error since the extension requires agent components +func NewFactory() extension.Factory { + return extension.NewFactory( + Type, + createDefaultConfig, + func(_ context.Context, _ extension.Settings, _ component.Config) (extension.Extension, error) { + return nil, errors.New("dogtelextension requires agent components, use NewFactoryForAgent()") + }, + stability, + ) +} + +// NewFactoryForAgent creates factory with FX component injection (for otel-agent) +func NewFactoryForAgent( + config coreconfig.Component, + log log.Component, + serializer serializer.MetricSerializer, + hostname hostnameinterface.Component, + workloadmeta workloadmeta.Component, + tagger tagger.Component, + ipc ipc.Component, + telemetry telemetry.Component, + secrets secrets.Component, +) extension.Factory { + components := &componentHolder{ + config: config, + log: log, + serializer: serializer, + hostname: hostname, + workloadmeta: workloadmeta, + tagger: tagger, + ipc: ipc, + telemetry: telemetry, + secrets: secrets, + } + + return extension.NewFactory( + Type, + createDefaultConfig, + func(_ context.Context, params extension.Settings, cfg component.Config) (extension.Extension, error) { + return newExtension(cfg.(*Config), components, params.BuildInfo) + }, + stability, + ) +} + +// Requires defines the dependencies needed to create a dogtelExtension via FX. +type Requires struct { + Config coreconfig.Component + Log log.Component + Serializer serializer.MetricSerializer + Hostname hostnameinterface.Component + Workloadmeta workloadmeta.Component + Tagger tagger.Component + IPC ipc.Component + Telemetry telemetry.Component + Secrets secrets.Component +} + +// NewExtension creates a new dogtelextension instance for use with FX. +func NewExtension(reqs Requires) (dogtelextension.Component, error) { + cfg := createDefaultConfig().(*Config) + if err := cfg.Validate(); err != nil { + return nil, fmt.Errorf("invalid configuration: %w", err) + } + return &dogtelExtension{ + config: cfg, + log: reqs.Log, + coreConfig: reqs.Config, + serializer: reqs.Serializer, + hostname: reqs.Hostname, + workloadmeta: reqs.Workloadmeta, + tagger: reqs.Tagger, + ipc: reqs.IPC, + telemetry: reqs.Telemetry, + secrets: reqs.Secrets, + buildInfo: component.BuildInfo{}, + }, nil +} + +func createDefaultConfig() component.Config { + trueVal := true + return &Config{ + EnableMetadataCollection: &trueVal, + MetadataInterval: 300, // 5 minutes + EnableTaggerServer: false, + TaggerServerPort: 0, // Auto-assign + TaggerServerAddr: "localhost", + TaggerMaxMessageSize: 4 * 1024 * 1024, // 4MB + TaggerMaxConcurrentSync: 5, + StandaloneMode: false, // Default: connected mode + } +} + +func newExtension( + cfg *Config, + components *componentHolder, + buildInfo component.BuildInfo, +) (dogtelextension.Component, error) { + // Validate configuration + if err := cfg.Validate(); err != nil { + return nil, fmt.Errorf("invalid configuration: %w", err) + } + + // Create extension with all dependencies + ext := &dogtelExtension{ + config: cfg, + log: components.log, + serializer: components.serializer, + hostname: components.hostname, + workloadmeta: components.workloadmeta, + tagger: components.tagger, + ipc: components.ipc, + telemetry: components.telemetry, + secrets: components.secrets, + coreConfig: components.config, + buildInfo: buildInfo, + } + + return ext, nil +} diff --git a/comp/otelcol/dogtelextension/impl/factory_test.go b/comp/otelcol/dogtelextension/impl/factory_test.go new file mode 100644 index 00000000000000..579f31b97c29dd --- /dev/null +++ b/comp/otelcol/dogtelextension/impl/factory_test.go @@ -0,0 +1,109 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build test + +package dogtelextensionimpl + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/extension" + + configmock "github.com/DataDog/datadog-agent/comp/core/config" + "github.com/DataDog/datadog-agent/comp/core/hostname/hostnameinterface" + ipcmock "github.com/DataDog/datadog-agent/comp/core/ipc/mock" + logmock "github.com/DataDog/datadog-agent/comp/core/log/mock" + "github.com/DataDog/datadog-agent/comp/core/telemetry/noopsimpl" + serializermock "github.com/DataDog/datadog-agent/pkg/serializer/mocks" +) + +// TestNewFactory_ReturnsErrorOnCreate verifies NewFactory produces a factory +// that refuses to create an extension (agent components are required). +func TestNewFactory_ReturnsErrorOnCreate(t *testing.T) { + factory := NewFactory() + assert.Equal(t, Type, factory.Type()) + + cfg := factory.CreateDefaultConfig() + require.NotNil(t, cfg) + + settings := extension.Settings{ID: component.NewID(Type)} + _, err := factory.Create(context.Background(), settings, cfg) + require.Error(t, err) + assert.Contains(t, err.Error(), "dogtelextension requires agent components") +} + +// TestNewFactoryForAgent_CreatesExtension verifies NewFactoryForAgent produces +// a working factory that creates an extension successfully. +func TestNewFactoryForAgent_CreatesExtension(t *testing.T) { + hostname, _ := hostnameinterface.NewMock("test-host") + factory := NewFactoryForAgent( + configmock.NewMockWithOverrides(t, nil), + logmock.New(t), + serializermock.NewMetricSerializer(t), + hostname, + nil, // workloadmeta + nil, // tagger + ipcmock.New(t), + noopsimpl.GetCompatComponent(), + nil, // secrets + ) + + assert.Equal(t, Type, factory.Type()) + + cfg := factory.CreateDefaultConfig() + settings := extension.Settings{ + ID: component.NewID(Type), + BuildInfo: component.BuildInfo{Version: "1.0.0", Command: "otel-agent"}, + } + ext, err := factory.Create(context.Background(), settings, cfg) + require.NoError(t, err) + require.NotNil(t, ext) +} + +// TestNewFactoryForAgent_InvalidConfigErrors verifies the factory rejects +// an invalid configuration at create time. +func TestNewFactoryForAgent_InvalidConfigErrors(t *testing.T) { + hostname, _ := hostnameinterface.NewMock("test-host") + factory := NewFactoryForAgent( + configmock.NewMockWithOverrides(t, nil), + logmock.New(t), + serializermock.NewMetricSerializer(t), + hostname, + nil, + nil, + ipcmock.New(t), + noopsimpl.GetCompatComponent(), + nil, + ) + + cfg := factory.CreateDefaultConfig().(*Config) + cfg.TaggerServerPort = -5 + + settings := extension.Settings{ID: component.NewID(Type)} + _, err := factory.Create(context.Background(), settings, cfg) + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid configuration") +} + +// TestNewExtension_FX_InvalidConfig verifies the FX constructor (NewExtension) +// rejects an invalid default config. +func TestNewExtension_FX_DefaultSucceeds(t *testing.T) { + hostname, _ := hostnameinterface.NewMock("test-host") + ext, err := NewExtension(Requires{ + Config: configmock.NewMockWithOverrides(t, nil), + Log: logmock.New(t), + Serializer: serializermock.NewMetricSerializer(t), + Hostname: hostname, + IPC: ipcmock.New(t), + Telemetry: noopsimpl.GetCompatComponent(), + }) + require.NoError(t, err) + require.NotNil(t, ext) +} diff --git a/comp/otelcol/dogtelextension/impl/metadata/metadata.go b/comp/otelcol/dogtelextension/impl/metadata/metadata.go new file mode 100644 index 00000000000000..75cf6a316cb67c --- /dev/null +++ b/comp/otelcol/dogtelextension/impl/metadata/metadata.go @@ -0,0 +1,19 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package metadata provides OpenTelemetry component metadata for dogtelextension +package metadata + +import ( + "go.opentelemetry.io/collector/component" +) + +var ( + // Type is the component type for this extension + Type = component.MustNewType("dogtel") + + // ScopeName is the scope name for telemetry produced by this extension + ScopeName = "github.com/DataDog/datadog-agent/comp/otelcol/dogtelextension" +) diff --git a/comp/otelcol/dogtelextension/impl/metrics/metrics.go b/comp/otelcol/dogtelextension/impl/metrics/metrics.go new file mode 100644 index 00000000000000..4af759be09eb34 --- /dev/null +++ b/comp/otelcol/dogtelextension/impl/metrics/metrics.go @@ -0,0 +1,43 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package metrics provides metric creation helpers for the dogtelextension. +package metrics + +import ( + "go.opentelemetry.io/collector/component" + + "github.com/DataDog/datadog-agent/pkg/metrics" + "github.com/DataDog/datadog-agent/pkg/tagset" +) + +// TagsFromBuildInfo returns a list of tags derived from buildInfo to be used when creating metrics. +func TagsFromBuildInfo(buildInfo component.BuildInfo) []string { + var tags []string + if buildInfo.Version != "" { + tags = append(tags, "version:"+buildInfo.Version) + } + if buildInfo.Command != "" { + tags = append(tags, "command:"+buildInfo.Command) + } + return tags +} + +// CreateLivenessSerie creates a liveness metric serie to report that the dogtel extension is running. +// The timestamp should be in Unix nanoseconds. +func CreateLivenessSerie(hostname string, timestampNs uint64, tags []string) *metrics.Serie { + // Transform UnixNano timestamp into Unix timestamp (seconds) + timestamp := float64(timestampNs / 1e9) + + return &metrics.Serie{ + Name: "otel.dogtel_extension.running", + Points: []metrics.Point{{Ts: timestamp, Value: 1.0}}, + Tags: tagset.NewCompositeTags(tags, nil), + Host: hostname, + MType: metrics.APIGaugeType, + SourceTypeName: "otel.dogtel_extension", + Source: metrics.MetricSourceOpenTelemetryCollectorUnknown, + } +} diff --git a/comp/otelcol/dogtelextension/impl/metrics/metrics_test.go b/comp/otelcol/dogtelextension/impl/metrics/metrics_test.go new file mode 100644 index 00000000000000..68eb4d0b8c6dbe --- /dev/null +++ b/comp/otelcol/dogtelextension/impl/metrics/metrics_test.go @@ -0,0 +1,97 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package metrics + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + + "github.com/DataDog/datadog-agent/pkg/metrics" +) + +func TestTagsFromBuildInfo(t *testing.T) { + tests := []struct { + name string + buildInfo component.BuildInfo + expected []string + }{ + { + name: "empty build info", + buildInfo: component.BuildInfo{}, + expected: nil, + }, + { + name: "version only", + buildInfo: component.BuildInfo{ + Version: "1.2.3", + }, + expected: []string{"version:1.2.3"}, + }, + { + name: "command only", + buildInfo: component.BuildInfo{ + Command: "otel-agent", + }, + expected: []string{"command:otel-agent"}, + }, + { + name: "version and command", + buildInfo: component.BuildInfo{ + Version: "1.2.3", + Command: "otel-agent", + }, + expected: []string{"version:1.2.3", "command:otel-agent"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tags := TagsFromBuildInfo(tt.buildInfo) + assert.Equal(t, tt.expected, tags) + }) + } +} + +func TestCreateLivenessSerie(t *testing.T) { + hostname := "test-host" + // 1000 seconds in nanoseconds + timestampNs := uint64(1000 * 1e9) + tags := []string{"version:1.0.0", "command:otel-agent"} + + serie := CreateLivenessSerie(hostname, timestampNs, tags) + + require.NotNil(t, serie) + assert.Equal(t, "otel.dogtel_extension.running", serie.Name) + assert.Equal(t, hostname, serie.Host) + assert.Equal(t, metrics.APIGaugeType, serie.MType) + assert.Equal(t, "otel.dogtel_extension", serie.SourceTypeName) + assert.Equal(t, metrics.MetricSourceOpenTelemetryCollectorUnknown, serie.Source) + + require.Len(t, serie.Points, 1) + assert.Equal(t, float64(1000), serie.Points[0].Ts) + assert.Equal(t, 1.0, serie.Points[0].Value) + + assert.Equal(t, tags, serie.Tags.UnsafeToReadOnlySliceString()) +} + +func TestCreateLivenessSerie_TimestampConversion(t *testing.T) { + // Verify nanoseconds are correctly converted to seconds + timestampNs := uint64(1704067200 * 1e9) // 2024-01-01 00:00:00 UTC in nanoseconds + serie := CreateLivenessSerie("host", timestampNs, nil) + + require.NotNil(t, serie) + require.Len(t, serie.Points, 1) + assert.Equal(t, float64(1704067200), serie.Points[0].Ts) +} + +func TestCreateLivenessSerie_EmptyTags(t *testing.T) { + serie := CreateLivenessSerie("host", uint64(1000*1e9), nil) + require.NotNil(t, serie) + assert.Empty(t, serie.Tags.UnsafeToReadOnlySliceString()) +} diff --git a/comp/otelcol/dogtelextension/impl/tagger_server.go b/comp/otelcol/dogtelextension/impl/tagger_server.go new file mode 100644 index 00000000000000..414c032242e76a --- /dev/null +++ b/comp/otelcol/dogtelextension/impl/tagger_server.go @@ -0,0 +1,135 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package dogtelextensionimpl + +import ( + "context" + "fmt" + "net" + "time" + + grpcauth "github.com/grpc-ecosystem/go-grpc-middleware/auth" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + + taggerserver "github.com/DataDog/datadog-agent/comp/core/tagger/server" + pb "github.com/DataDog/datadog-agent/pkg/proto/pbgo/core" + grpcutil "github.com/DataDog/datadog-agent/pkg/util/grpc" +) + +var taggerServerGracefulStopTimeout = 5 * time.Second + +// taggerServerWrapper implements minimal pb.AgentSecureServer for tagger only +type taggerServerWrapper struct { + pb.UnimplementedAgentSecureServer + taggerSrv *taggerserver.Server +} + +// TaggerStreamEntities implements pb.AgentSecureServer +func (w *taggerServerWrapper) TaggerStreamEntities(in *pb.StreamTagsRequest, out pb.AgentSecure_TaggerStreamEntitiesServer) error { + return w.taggerSrv.TaggerStreamEntities(in, out) +} + +// TaggerFetchEntity implements pb.AgentSecureServer +func (w *taggerServerWrapper) TaggerFetchEntity(ctx context.Context, in *pb.FetchEntityRequest) (*pb.FetchEntityResponse, error) { + return w.taggerSrv.TaggerFetchEntity(ctx, in) +} + +// startTaggerServer starts the minimal tagger gRPC server +func (e *dogtelExtension) startTaggerServer() error { + // 1. Create listener + addr := fmt.Sprintf("%s:%d", e.config.TaggerServerAddr, e.config.TaggerServerPort) + lis, err := net.Listen("tcp", addr) + if err != nil { + return fmt.Errorf("failed to create listener on %s: %w", addr, err) + } + e.taggerListener = lis + + // Store the actual port (useful when auto-assigning with port 0) + e.taggerServerPort = lis.Addr().(*net.TCPAddr).Port + + // 2. Create tagger server component (from comp/core/tagger/server) + maxEventSize := e.config.TaggerMaxMessageSize / 2 + taggerSrv := taggerserver.NewServer( + e.tagger, + e.telemetry, + maxEventSize, + e.config.TaggerMaxConcurrentSync, + ) + + // 3. Setup gRPC server with authentication + var grpcOpts []grpc.ServerOption + + // Get TLS credentials from IPC component + tlsConf := e.ipc.GetTLSServerConfig() + if tlsConf != nil { + creds := credentials.NewTLS(tlsConf) + grpcOpts = append(grpcOpts, grpc.Creds(creds)) + e.log.Debug("Tagger server: TLS enabled") + } else { + e.log.Warn("Tagger server: TLS not configured, running without TLS") + } + + // Add auth interceptor from IPC + authInterceptor := grpcauth.UnaryServerInterceptor(grpcutil.StaticAuthInterceptor(e.ipc.GetAuthToken())) + grpcOpts = append(grpcOpts, grpc.UnaryInterceptor(authInterceptor)) + + // Add stream auth interceptor + streamAuthInterceptor := grpcauth.StreamServerInterceptor(grpcutil.StaticAuthInterceptor(e.ipc.GetAuthToken())) + grpcOpts = append(grpcOpts, grpc.StreamInterceptor(streamAuthInterceptor)) + + // Set max message size + grpcOpts = append(grpcOpts, + grpc.MaxRecvMsgSize(e.config.TaggerMaxMessageSize), + grpc.MaxSendMsgSize(e.config.TaggerMaxMessageSize), + ) + + // 4. Create gRPC server + e.taggerServer = grpc.NewServer(grpcOpts...) + + // 5. Register tagger service + pb.RegisterAgentSecureServer(e.taggerServer, &taggerServerWrapper{ + taggerSrv: taggerSrv, + }) + + // 6. Start serving in goroutine + go func() { + e.log.Infof("Starting tagger gRPC server on %s (port %d)", addr, e.taggerServerPort) + if err := e.taggerServer.Serve(lis); err != nil { + e.log.Errorf("Tagger server error: %v", err) + } + }() + + return nil +} + +// stopTaggerServer stops the tagger gRPC server. It attempts a graceful stop +// (waiting for in-flight RPCs to finish) but falls back to a forced Stop() if +// any stream subscriber is still connected after taggerServerGracefulStopTimeout. +// This prevents TaggerStreamEntities long-lived streams from blocking process exit. +func (e *dogtelExtension) stopTaggerServer() { + if e.taggerServer != nil { + e.log.Info("Stopping tagger gRPC server") + stopped := make(chan struct{}) + go func() { + e.taggerServer.GracefulStop() + close(stopped) + }() + select { + case <-stopped: + // clean shutdown + case <-time.After(taggerServerGracefulStopTimeout): + e.log.Warn("Tagger gRPC server did not stop gracefully within timeout; forcing stop") + e.taggerServer.Stop() + <-stopped // wait for GracefulStop goroutine to return + } + e.taggerServer = nil + } + if e.taggerListener != nil { + e.taggerListener.Close() + e.taggerListener = nil + } +} diff --git a/comp/otelcol/dogtelextension/impl/tagger_server_test.go b/comp/otelcol/dogtelextension/impl/tagger_server_test.go new file mode 100644 index 00000000000000..4c12f8a8c3e9e1 --- /dev/null +++ b/comp/otelcol/dogtelextension/impl/tagger_server_test.go @@ -0,0 +1,152 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build test + +package dogtelextensionimpl + +import ( + "context" + "net" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + + configmock "github.com/DataDog/datadog-agent/comp/core/config" + ipcmock "github.com/DataDog/datadog-agent/comp/core/ipc/mock" + logmock "github.com/DataDog/datadog-agent/comp/core/log/mock" + pb "github.com/DataDog/datadog-agent/pkg/proto/pbgo/core" + serializermock "github.com/DataDog/datadog-agent/pkg/serializer/mocks" +) + +// startBareGRPCServer starts a real gRPC server on an ephemeral port and +// returns the server and the listener. The caller owns both. +func startBareGRPCServer(t *testing.T) (*grpc.Server, net.Listener) { + t.Helper() + lis, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + srv := grpc.NewServer() + pb.RegisterAgentSecureServer(srv, &pb.UnimplementedAgentSecureServer{}) + go func() { + _ = srv.Serve(lis) + }() + return srv, lis +} + +// TestStopTaggerServer_NilServer verifies stopTaggerServer is a no-op and does +// not panic when neither taggerServer nor taggerListener are set. +func TestStopTaggerServer_NilServer(t *testing.T) { + ext := &dogtelExtension{ + log: logmock.New(t), + coreConfig: configmock.NewMockWithOverrides(t, nil), + serializer: serializermock.NewMetricSerializer(t), + ipc: ipcmock.New(t), + } + // Must not panic and must leave fields nil. + assert.NotPanics(t, func() { ext.stopTaggerServer() }) + assert.Nil(t, ext.taggerServer) + assert.Nil(t, ext.taggerListener) +} + +// TestStopTaggerServer_CleanStop verifies that stopTaggerServer completes +// quickly when no clients are connected (GracefulStop returns immediately). +func TestStopTaggerServer_CleanStop(t *testing.T) { + srv, lis := startBareGRPCServer(t) + + ext := &dogtelExtension{ + log: logmock.New(t), + coreConfig: configmock.NewMockWithOverrides(t, nil), + serializer: serializermock.NewMetricSerializer(t), + ipc: ipcmock.New(t), + taggerServer: srv, + taggerListener: lis, + } + + start := time.Now() + ext.stopTaggerServer() + elapsed := time.Since(start) + + // Should finish well within the graceful-stop timeout. + assert.Less(t, elapsed, taggerServerGracefulStopTimeout, + "stopTaggerServer took longer than expected for an idle server") + assert.Nil(t, ext.taggerServer) + assert.Nil(t, ext.taggerListener) +} + +// TestStopTaggerServer_ForcedStop verifies that stopTaggerServer does not block +// indefinitely when GracefulStop would stall (simulated by holding an open +// stream). It must fall back to Stop() and return within a reasonable bound +// (well under 2× the grace timeout). +func TestStopTaggerServer_ForcedStop(t *testing.T) { + lis, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + + // blockingWrapper never finishes TaggerStreamEntities, simulating a + // long-lived subscriber that would prevent GracefulStop from returning. + blockingWrapper := &blockingTaggerServer{done: make(chan struct{})} + srv := grpc.NewServer() + pb.RegisterAgentSecureServer(srv, blockingWrapper) + go func() { _ = srv.Serve(lis) }() + + // Dial and open a streaming RPC so GracefulStop will wait for it. + conn, err := grpc.NewClient(lis.Addr().String(), grpc.WithTransportCredentials(insecure.NewCredentials())) + require.NoError(t, err) + t.Cleanup(func() { conn.Close() }) + + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + client := pb.NewAgentSecureClient(conn) + stream, err := client.TaggerStreamEntities(ctx, &pb.StreamTagsRequest{}) + require.NoError(t, err) + // Consume the stream in the background so the RPC is live. + go func() { _, _ = stream.Recv() }() + + // Give the server a moment to register the in-flight stream. + time.Sleep(50 * time.Millisecond) + + // Use a very short grace timeout so the test runs fast. + origTimeout := taggerServerGracefulStopTimeout + taggerServerGracefulStopTimeout = 200 * time.Millisecond + t.Cleanup(func() { taggerServerGracefulStopTimeout = origTimeout }) + + ext := &dogtelExtension{ + log: logmock.New(t), + coreConfig: configmock.NewMockWithOverrides(t, nil), + serializer: serializermock.NewMetricSerializer(t), + ipc: ipcmock.New(t), + taggerServer: srv, + taggerListener: lis, + } + + // stopTaggerServer must return within grace timeout + a small margin. + deadline := time.Now().Add(taggerServerGracefulStopTimeout + 500*time.Millisecond) + ext.stopTaggerServer() + + assert.True(t, time.Now().Before(deadline), + "stopTaggerServer blocked past the expected deadline") + assert.Nil(t, ext.taggerServer) + assert.Nil(t, ext.taggerListener) +} + +// blockingTaggerServer is a pb.AgentSecureServer whose TaggerStreamEntities +// blocks until either the done channel is closed or the stream context is +// cancelled (which happens when the gRPC server calls Stop()). +type blockingTaggerServer struct { + pb.UnimplementedAgentSecureServer + done chan struct{} +} + +func (b *blockingTaggerServer) TaggerStreamEntities(_ *pb.StreamTagsRequest, stream pb.AgentSecure_TaggerStreamEntitiesServer) error { + select { + case <-b.done: + case <-stream.Context().Done(): + } + return nil +} diff --git a/pkg/config/setup/common_settings.go b/pkg/config/setup/common_settings.go index 018397f5aa739b..e6887fef8f192c 100644 --- a/pkg/config/setup/common_settings.go +++ b/pkg/config/setup/common_settings.go @@ -820,6 +820,9 @@ func initCoreAgentFull(config pkgconfigmodel.Setup) { }) config.BindEnvAndSetDefault("otelcollector.gateway.mode", false) config.BindEnvAndSetDefault("otelcollector.installation_method", "") + // otel_standalone controls whether otel-agent runs in standalone mode (with full secrets, tagger server) + // or connected mode (expects core agent for secrets and tagger) + config.BindEnvAndSetDefault("otel_standalone", false) // inventories config.BindEnvAndSetDefault("inventories_enabled", true) diff --git a/releasenotes/notes/otel-agent-standalone-3d7b2197f6f3dd94.yaml b/releasenotes/notes/otel-agent-standalone-3d7b2197f6f3dd94.yaml new file mode 100644 index 00000000000000..3a1ea27499ee68 --- /dev/null +++ b/releasenotes/notes/otel-agent-standalone-3d7b2197f6f3dd94.yaml @@ -0,0 +1,14 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +enhancements: + - | + The OTel Agent now supports a standalone mode (``DD_OTEL_STANDALONE=true``) that + runs without a co-resident core Datadog Agent. In standalone mode a new + ``dogtelextension`` OpenTelemetry Collector extension provides Datadog Agent + functionality directly. \ No newline at end of file diff --git a/tasks/build_tags.py b/tasks/build_tags.py index 7f7f6a4565db2a..bbda7f5d42de18 100644 --- a/tasks/build_tags.py +++ b/tasks/build_tags.py @@ -250,7 +250,7 @@ CWS_INSTRUMENTATION_TAGS = {"netgo", "osusergo"} -OTEL_AGENT_TAGS = {"otlp", "zlib", "zstd"} +OTEL_AGENT_TAGS = {"otlp", "zlib", "zstd", "kubelet"} LOADER_TAGS = set()