From 2f2212a004d98654238521a004a30dccb4a36f2e Mon Sep 17 00:00:00 2001 From: Nic Waller Date: Mon, 8 Dec 2025 13:41:28 -0800 Subject: [PATCH 1/2] feat(PL-4281): enable Otel retry --- internal/observability/tracer.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/internal/observability/tracer.go b/internal/observability/tracer.go index 25bd6a2..7bb5d8c 100644 --- a/internal/observability/tracer.go +++ b/internal/observability/tracer.go @@ -2,6 +2,7 @@ package observability import ( "context" + "time" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" @@ -9,10 +10,9 @@ import ( "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" "go.opentelemetry.io/otel/sdk/resource" "go.opentelemetry.io/otel/sdk/trace" - "go.opentelemetry.io/otel/trace/noop" - semconv "go.opentelemetry.io/otel/semconv/v1.26.0" oteltrace "go.opentelemetry.io/otel/trace" + "go.opentelemetry.io/otel/trace/noop" ) type TracerOptions struct { @@ -33,6 +33,12 @@ func SetupTracer(opts TracerOptions) (func() error, error) { otlpClient := otlptracegrpc.NewClient( otlptracegrpc.WithInsecure(), otlptracegrpc.WithEndpoint(opts.OTLPEndpoint), + otlptracegrpc.WithRetry(otlptracegrpc.RetryConfig{ + Enabled: true, + InitialInterval: 5 * time.Second, + MaxInterval: 15 * time.Second, + MaxElapsedTime: 90 * time.Second, + }), ) exporter, err := otlptrace.New(context.Background(), otlpClient) From 73c9e9384f4391d53b0aa25af760df13a3d20bcb Mon Sep 17 00:00:00 2001 From: Nic Waller Date: Mon, 8 Dec 2025 15:04:19 -0800 Subject: [PATCH 2/2] fix: also retry after network-level failures --- internal/observability/tracer.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/internal/observability/tracer.go b/internal/observability/tracer.go index 7bb5d8c..9b0fb53 100644 --- a/internal/observability/tracer.go +++ b/internal/observability/tracer.go @@ -4,6 +4,9 @@ import ( "context" "time" + "google.golang.org/grpc" + "google.golang.org/grpc/backoff" + "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/exporters/otlp/otlptrace" @@ -33,6 +36,21 @@ func SetupTracer(opts TracerOptions) (func() error, error) { otlpClient := otlptracegrpc.NewClient( otlptracegrpc.WithInsecure(), otlptracegrpc.WithEndpoint(opts.OTLPEndpoint), + otlptracegrpc.WithTimeout(90*time.Second), + // Configure gRPC connection retry for network-level failures (e.g., "connection refused") + otlptracegrpc.WithDialOption( + grpc.WithConnectParams(grpc.ConnectParams{ + Backoff: backoff.Config{ + // this delay/multiplier config gives retries at roughly [3, 9, 21, 45, 93] seconds + BaseDelay: 3 * time.Second, + Multiplier: 2.0, + Jitter: 0.2, + MaxDelay: 120 * time.Second, + }, + MinConnectTimeout: 5 * time.Second, + }), + ), + // Configure application-level retry for retryable errors (e.g., rate limits, temporary server errors) otlptracegrpc.WithRetry(otlptracegrpc.RetryConfig{ Enabled: true, InitialInterval: 5 * time.Second,