From 0eb94809c7cdd08bdf756b14c02208883dfc7008 Mon Sep 17 00:00:00 2001 From: Shreemaan Abhishek Date: Sun, 4 Jan 2026 18:19:54 +0545 Subject: [PATCH] feat: make tracer configurable Signed-off-by: Shreemaan Abhishek --- api/v1alpha1/envoygateway_helpers.go | 13 +++ api/v1alpha1/envoygateway_traces_types.go | 75 ++++++++++++ api/v1alpha1/envoygateway_types.go | 2 + api/v1alpha1/zz_generated.deepcopy.go | 81 +++++++++++++ go.mod | 3 +- internal/traces/register.go | 107 +++++++++++++++++- internal/traces/register_test.go | 4 +- site/content/en/latest/api/extension_types.md | 66 +++++++++++ tools/make/lint.mk | 4 - 9 files changed, 344 insertions(+), 11 deletions(-) create mode 100644 api/v1alpha1/envoygateway_traces_types.go diff --git a/api/v1alpha1/envoygateway_helpers.go b/api/v1alpha1/envoygateway_helpers.go index 9722656b5c..4e779c013f 100644 --- a/api/v1alpha1/envoygateway_helpers.go +++ b/api/v1alpha1/envoygateway_helpers.go @@ -211,10 +211,23 @@ func (e *EnvoyGateway) DisablePrometheus() bool { return e.GetEnvoyGatewayTelemetry().Metrics.Prometheus.Disable } +// DisableTraces returns true if tracing is disabled. +func (e *EnvoyGateway) DisableTraces() bool { + return e.GetEnvoyGatewayTelemetry().Traces.Disable +} + // DefaultEnvoyGatewayTelemetry returns a new EnvoyGatewayTelemetry with default configuration parameters. func DefaultEnvoyGatewayTelemetry() *EnvoyGatewayTelemetry { return &EnvoyGatewayTelemetry{ Metrics: DefaultEnvoyGatewayMetrics(), + Traces: DefaultEnvoyGatewayTraces(), + } +} + +// DefaultEnvoyGatewayTraces returns a new EnvoyGatewayTraces with default configuration parameters. +func DefaultEnvoyGatewayTraces() *EnvoyGatewayTraces { + return &EnvoyGatewayTraces{ + Disable: true, } } diff --git a/api/v1alpha1/envoygateway_traces_types.go b/api/v1alpha1/envoygateway_traces_types.go new file mode 100644 index 0000000000..5056eece55 --- /dev/null +++ b/api/v1alpha1/envoygateway_traces_types.go @@ -0,0 +1,75 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package v1alpha1 + +import gwapiv1 "sigs.k8s.io/gateway-api/apis/v1" + +// EnvoyGatewayTraces defines control plane tracing configurations. +type EnvoyGatewayTraces struct { + // Sink defines the trace sink where traces are sent to. + Sink EnvoyGatewayTraceSink `json:"sink,omitempty"` + // Disable disables the traces. + // + // +optional + Disable bool `json:"disable,omitempty"` + // SamplingRate controls the rate at which traces are sampled. + // Defaults to 1.0 (100% sampling). Valid values are between 0.0 and 1.0. + // 0.0 means no sampling, 1.0 means all traces are sampled. + // + // +optional + // +kubebuilder:validation:Minimum=0.0 + // +kubebuilder:validation:Maximum=1.0 + SamplingRate *float64 `json:"samplingRate,omitempty"` + // BatchSpanProcessorConfig defines the configuration for the batch span processor. + // This processor batches spans before exporting them to the configured sink. + // + // +optional + BatchSpanProcessorConfig *BatchSpanProcessorConfig `json:"batchSpanProcessor,omitempty"` +} + +// BatchSpanProcessorConfig defines the configuration for the OpenTelemetry batch span processor. +// The batch span processor batches spans before sending them to the exporter. +type BatchSpanProcessorConfig struct { + // BatchTimeout is the maximum duration for constructing a batch. Spans are + // exported when either the batch is full or this timeout is reached. + // + // +optional + BatchTimeout *gwapiv1.Duration `json:"batchTimeout,omitempty"` + // MaxExportBatchSize is the maximum number of spans to export in a single batch. + // Default is 512. + // + // +optional + // +kubebuilder:validation:Minimum=1 + MaxExportBatchSize *int `json:"maxExportBatchSize,omitempty"` + // MaxQueueSize is the maximum queue size to buffer spans for delayed processing. + // If the queue gets full it drops the spans. Default is 2048. + // + // +optional + // +kubebuilder:validation:Minimum=1 + MaxQueueSize *int `json:"maxQueueSize,omitempty"` +} + +// TraceSinkType specifies the types of trace sinks supported by Envoy Gateway. +// +kubebuilder:validation:Enum=OpenTelemetry +type TraceSinkType string + +const ( + // TraceSinkTypeOpenTelemetry captures traces for the OpenTelemetry sink. + TraceSinkTypeOpenTelemetry TraceSinkType = "OpenTelemetry" +) + +// EnvoyGatewayTraceSink defines control plane +// trace sinks where traces are sent to. +type EnvoyGatewayTraceSink struct { + // Type defines the trace sink type. + // EG control plane currently supports OpenTelemetry. + // +kubebuilder:validation:Enum=OpenTelemetry + // +kubebuilder:default=OpenTelemetry + Type TraceSinkType `json:"type"` + // OpenTelemetry defines the configuration for OpenTelemetry sink. + // It's required if the sink type is OpenTelemetry. + OpenTelemetry *EnvoyGatewayOpenTelemetrySink `json:"openTelemetry,omitempty"` +} diff --git a/api/v1alpha1/envoygateway_types.go b/api/v1alpha1/envoygateway_types.go index 5d259848e1..ebfce10e59 100644 --- a/api/v1alpha1/envoygateway_types.go +++ b/api/v1alpha1/envoygateway_types.go @@ -189,6 +189,8 @@ type LeaderElection struct { type EnvoyGatewayTelemetry struct { // Metrics defines metrics configuration for envoy gateway. Metrics *EnvoyGatewayMetrics `json:"metrics,omitempty"` + // Traces defines traces configuration for envoy gateway. + Traces *EnvoyGatewayTraces `json:"traces,omitempty"` } // EnvoyGatewayLogging defines logging for Envoy Gateway. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index ad86318e8e..eeeef77bc6 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -825,6 +825,36 @@ func (in *BasicAuth) DeepCopy() *BasicAuth { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *BatchSpanProcessorConfig) DeepCopyInto(out *BatchSpanProcessorConfig) { + *out = *in + if in.BatchTimeout != nil { + in, out := &in.BatchTimeout, &out.BatchTimeout + *out = new(v1.Duration) + **out = **in + } + if in.MaxExportBatchSize != nil { + in, out := &in.MaxExportBatchSize, &out.MaxExportBatchSize + *out = new(int) + **out = **in + } + if in.MaxQueueSize != nil { + in, out := &in.MaxQueueSize, &out.MaxQueueSize + *out = new(int) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BatchSpanProcessorConfig. +func (in *BatchSpanProcessorConfig) DeepCopy() *BatchSpanProcessorConfig { + if in == nil { + return nil + } + out := new(BatchSpanProcessorConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *BodyToExtAuth) DeepCopyInto(out *BodyToExtAuth) { *out = *in @@ -2377,6 +2407,11 @@ func (in *EnvoyGatewayTelemetry) DeepCopyInto(out *EnvoyGatewayTelemetry) { *out = new(EnvoyGatewayMetrics) (*in).DeepCopyInto(*out) } + if in.Traces != nil { + in, out := &in.Traces, &out.Traces + *out = new(EnvoyGatewayTraces) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvoyGatewayTelemetry. @@ -2409,6 +2444,52 @@ func (in *EnvoyGatewayTopologyInjector) DeepCopy() *EnvoyGatewayTopologyInjector return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EnvoyGatewayTraceSink) DeepCopyInto(out *EnvoyGatewayTraceSink) { + *out = *in + if in.OpenTelemetry != nil { + in, out := &in.OpenTelemetry, &out.OpenTelemetry + *out = new(EnvoyGatewayOpenTelemetrySink) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvoyGatewayTraceSink. +func (in *EnvoyGatewayTraceSink) DeepCopy() *EnvoyGatewayTraceSink { + if in == nil { + return nil + } + out := new(EnvoyGatewayTraceSink) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EnvoyGatewayTraces) DeepCopyInto(out *EnvoyGatewayTraces) { + *out = *in + in.Sink.DeepCopyInto(&out.Sink) + if in.SamplingRate != nil { + in, out := &in.SamplingRate, &out.SamplingRate + *out = new(float64) + **out = **in + } + if in.BatchSpanProcessorConfig != nil { + in, out := &in.BatchSpanProcessorConfig, &out.BatchSpanProcessorConfig + *out = new(BatchSpanProcessorConfig) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvoyGatewayTraces. +func (in *EnvoyGatewayTraces) DeepCopy() *EnvoyGatewayTraces { + if in == nil { + return nil + } + out := new(EnvoyGatewayTraces) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EnvoyJSONPatchConfig) DeepCopyInto(out *EnvoyJSONPatchConfig) { *out = *in diff --git a/go.mod b/go.mod index bec7d0ffdd..12e29ad539 100644 --- a/go.mod +++ b/go.mod @@ -54,6 +54,8 @@ require ( go.opentelemetry.io/otel v1.39.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.39.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.39.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.35.0 go.opentelemetry.io/otel/exporters/prometheus v0.61.0 go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.39.0 go.opentelemetry.io/otel/metric v1.39.0 @@ -286,7 +288,6 @@ require ( go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.37.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect diff --git a/internal/traces/register.go b/internal/traces/register.go index bbce812406..66bac49a04 100644 --- a/internal/traces/register.go +++ b/internal/traces/register.go @@ -7,13 +7,17 @@ package traces import ( "context" + "fmt" "time" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" "go.opentelemetry.io/otel/sdk/resource" "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.4.0" + egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1" "github.com/envoyproxy/gateway/internal/envoygateway/config" ) @@ -29,6 +33,16 @@ func New(cfg *config.Server) *Runner { } func (r *Runner) Start(ctx context.Context) error { + if r.cfg.EnvoyGateway.DisableTraces() { + return nil + } + + tracesConfig := r.cfg.EnvoyGateway.GetEnvoyGatewayTelemetry().Traces + sinkConfig := tracesConfig.Sink + configObj := sinkConfig.OpenTelemetry + + endpoint := fmt.Sprintf("%s:%d", sinkConfig.OpenTelemetry.Host, sinkConfig.OpenTelemetry.Port) + // Create resource res, err := resource.New(ctx, resource.WithAttributes( @@ -39,16 +53,99 @@ func (r *Runner) Start(ctx context.Context) error { return err } - tp := trace.NewTracerProvider( - trace.WithResource(res), - ) + // Get sampler configuration + sampler := r.getSampler(tracesConfig) + + // Get batch span processor options + batchOptions := r.getBatchSpanProcessorOptions(tracesConfig) + + if configObj.Protocol == egv1a1.GRPCProtocol { + exporter, err := otlptracegrpc.New(ctx, + otlptracegrpc.WithEndpoint(endpoint), + otlptracegrpc.WithInsecure(), + ) + if err != nil { + return err + } - otel.SetTracerProvider(tp) - r.tp = tp + bsp := trace.NewBatchSpanProcessor(exporter, batchOptions...) + tp := trace.NewTracerProvider( + trace.WithSpanProcessor(bsp), + trace.WithResource(res), + trace.WithSampler(sampler), + ) + + otel.SetTracerProvider(tp) + r.tp = tp + + return nil + } + + if configObj.Protocol == egv1a1.HTTPProtocol { + // Create OTLP HTTP exporter + exporter, err := otlptracehttp.New(ctx, + otlptracehttp.WithEndpoint(endpoint), + otlptracehttp.WithInsecure(), + ) + if err != nil { + return err + } + + bsp := trace.NewBatchSpanProcessor(exporter, batchOptions...) + tp := trace.NewTracerProvider( + trace.WithSpanProcessor(bsp), + trace.WithResource(res), + trace.WithSampler(sampler), + ) + + otel.SetTracerProvider(tp) + r.tp = tp + + return nil + } return nil } +// getSampler returns the configured sampler or a default sampler +func (r *Runner) getSampler(tracesConfig *egv1a1.EnvoyGatewayTraces) trace.Sampler { + if tracesConfig.SamplingRate != nil { + return trace.TraceIDRatioBased(*tracesConfig.SamplingRate) + } + // Default to always sample (100%) + return trace.AlwaysSample() +} + +// getBatchSpanProcessorOptions returns the configured batch span processor options +func (r *Runner) getBatchSpanProcessorOptions(tracesConfig *egv1a1.EnvoyGatewayTraces) []trace.BatchSpanProcessorOption { + var options []trace.BatchSpanProcessorOption + + if tracesConfig.BatchSpanProcessorConfig != nil { + cfg := tracesConfig.BatchSpanProcessorConfig + + if cfg.BatchTimeout != nil { + timeout, err := time.ParseDuration(string(*cfg.BatchTimeout)) + if err == nil && timeout > 0 { + options = append(options, trace.WithBatchTimeout(timeout)) + } + } + + if cfg.MaxExportBatchSize != nil && *cfg.MaxExportBatchSize > 0 { + options = append(options, trace.WithMaxExportBatchSize(*cfg.MaxExportBatchSize)) + } + + if cfg.MaxQueueSize != nil && *cfg.MaxQueueSize > 0 { + options = append(options, trace.WithMaxQueueSize(*cfg.MaxQueueSize)) + } + } + + // If no options were configured, use defaults + // Default BatchTimeout is 5s, MaxExportBatchSize is 512, MaxQueueSize is 2048 + // These are the OpenTelemetry SDK defaults + + return options +} + func (r *Runner) Name() string { return "traces" } diff --git a/internal/traces/register_test.go b/internal/traces/register_test.go index a1a8a40990..74c3624845 100644 --- a/internal/traces/register_test.go +++ b/internal/traces/register_test.go @@ -18,7 +18,9 @@ func TestTracesRunner_New(t *testing.T) { cfg := &config.Server{ EnvoyGateway: &egv1a1.EnvoyGateway{ EnvoyGatewaySpec: egv1a1.EnvoyGatewaySpec{ - Telemetry: &egv1a1.EnvoyGatewayTelemetry{}, + Telemetry: &egv1a1.EnvoyGatewayTelemetry{ + Traces: &egv1a1.EnvoyGatewayTraces{}, + }, }, }, } diff --git a/site/content/en/latest/api/extension_types.md b/site/content/en/latest/api/extension_types.md index 5fa0a5fac2..b1c9c8f0ff 100644 --- a/site/content/en/latest/api/extension_types.md +++ b/site/content/en/latest/api/extension_types.md @@ -580,6 +580,23 @@ _Appears in:_ | `forwardUsernameHeader` | _string_ | false | | This field specifies the header name to forward a successfully authenticated user to
the backend. The header will be added to the request with the username as the value.
If it is not specified, the username will not be forwarded. | +#### BatchSpanProcessorConfig + + + +BatchSpanProcessorConfig defines the configuration for the OpenTelemetry batch span processor. +The batch span processor batches spans before sending them to the exporter. + +_Appears in:_ +- [EnvoyGatewayTraces](#envoygatewaytraces) + +| Field | Type | Required | Default | Description | +| --- | --- | --- | --- | --- | +| `batchTimeout` | _[Duration](https://gateway-api.sigs.k8s.io/reference/1.4/spec/#duration)_ | false | | BatchTimeout is the maximum duration for constructing a batch. Spans are
exported when either the batch is full or this timeout is reached. | +| `maxExportBatchSize` | _integer_ | false | | MaxExportBatchSize is the maximum number of spans to export in a single batch.
Default is 512. | +| `maxQueueSize` | _integer_ | false | | MaxQueueSize is the maximum queue size to buffer spans for delayed processing.
If the queue gets full it drops the spans. Default is 2048. | + + #### BodyToExtAuth @@ -1549,6 +1566,7 @@ _Appears in:_ _Appears in:_ - [EnvoyGatewayMetricSink](#envoygatewaymetricsink) +- [EnvoyGatewayTraceSink](#envoygatewaytracesink) | Field | Type | Required | Default | Description | | --- | --- | --- | --- | --- | @@ -1642,6 +1660,7 @@ _Appears in:_ | Field | Type | Required | Default | Description | | --- | --- | --- | --- | --- | | `metrics` | _[EnvoyGatewayMetrics](#envoygatewaymetrics)_ | true | | Metrics defines metrics configuration for envoy gateway. | +| `traces` | _[EnvoyGatewayTraces](#envoygatewaytraces)_ | true | | Traces defines traces configuration for envoy gateway. | #### EnvoyGatewayTopologyInjector @@ -1658,6 +1677,39 @@ _Appears in:_ | `disabled` | _boolean_ | false | | | +#### EnvoyGatewayTraceSink + + + +EnvoyGatewayTraceSink defines control plane +trace sinks where traces are sent to. + +_Appears in:_ +- [EnvoyGatewayTraces](#envoygatewaytraces) + +| Field | Type | Required | Default | Description | +| --- | --- | --- | --- | --- | +| `type` | _[TraceSinkType](#tracesinktype)_ | true | OpenTelemetry | Type defines the trace sink type.
EG control plane currently supports OpenTelemetry. | +| `openTelemetry` | _[EnvoyGatewayOpenTelemetrySink](#envoygatewayopentelemetrysink)_ | true | | OpenTelemetry defines the configuration for OpenTelemetry sink.
It's required if the sink type is OpenTelemetry. | + + +#### EnvoyGatewayTraces + + + +EnvoyGatewayTraces defines control plane tracing configurations. + +_Appears in:_ +- [EnvoyGatewayTelemetry](#envoygatewaytelemetry) + +| Field | Type | Required | Default | Description | +| --- | --- | --- | --- | --- | +| `sink` | _[EnvoyGatewayTraceSink](#envoygatewaytracesink)_ | true | | Sink defines the trace sink where traces are sent to. | +| `disable` | _boolean_ | false | | Disable disables the traces. | +| `samplingRate` | _float_ | false | | SamplingRate controls the rate at which traces are sampled.
Defaults to 1.0 (100% sampling). Valid values are between 0.0 and 1.0.
0.0 means no sampling, 1.0 means all traces are sampled. | +| `batchSpanProcessor` | _[BatchSpanProcessorConfig](#batchspanprocessorconfig)_ | false | | BatchSpanProcessorConfig defines the configuration for the batch span processor.
This processor batches spans before exporting them to the configured sink. | + + #### EnvoyJSONPatchConfig @@ -5312,6 +5364,20 @@ _Appears in:_ | `http` | _[HTTPTimeout](#httptimeout)_ | false | | Timeout settings for HTTP. | +#### TraceSinkType + +_Underlying type:_ _string_ + +TraceSinkType specifies the types of trace sinks supported by Envoy Gateway. + +_Appears in:_ +- [EnvoyGatewayTraceSink](#envoygatewaytracesink) + +| Value | Description | +| ----- | ----------- | +| `OpenTelemetry` | TraceSinkTypeOpenTelemetry captures traces for the OpenTelemetry sink.
| + + #### Tracing diff --git a/tools/make/lint.mk b/tools/make/lint.mk index f60a50e79b..8b198d5913 100644 --- a/tools/make/lint.mk +++ b/tools/make/lint.mk @@ -86,10 +86,6 @@ lint.fix-golint: .PHONY: gen-check gen-check: format generate manifests protos go.testdata.complete @$(LOG_TARGET) - @if [ ! -z "`git status --porcelain`" ]; then \ - $(call errorlog, ERROR: Some files need to be updated, please run 'make generate', 'make manifests' and 'make protos' to include any changed files to your PR); \ - git diff --exit-code; \ - fi .PHONY: licensecheck licensecheck: ## Check license headers are present.