diff --git a/datadog-opentelemetry/Cargo.toml b/datadog-opentelemetry/Cargo.toml index 084d872c..5fcd935a 100644 --- a/datadog-opentelemetry/Cargo.toml +++ b/datadog-opentelemetry/Cargo.toml @@ -107,3 +107,13 @@ path = "benches/inject_benchmark.rs" name = "extract_benchmark" harness = false path = "benches/extract_benchmark.rs" + +[[bench]] +name = "datadog_sampling_benchmark" +harness = false +path = "benches/datadog_sampling_benchmark.rs" + +[[bench]] +name = "otel_sampling_benchmark" +harness = false +path = "benches/otel_sampling_benchmark.rs" diff --git a/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs b/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs new file mode 100644 index 00000000..a05a8174 --- /dev/null +++ b/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs @@ -0,0 +1,447 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use criterion::{criterion_group, criterion_main, Criterion}; +use datadog_opentelemetry::core::test_utils::benchmarks::{ + memory_allocated_measurement, MeasurementName, ReportingAllocator, +}; +use datadog_opentelemetry::sampling::OtelSamplingData; +use datadog_opentelemetry::sampling::SamplingRule; +use datadog_opentelemetry::sampling::{DatadogSampler, SamplingData}; +use opentelemetry::{trace::SpanKind, KeyValue, TraceId}; +use std::collections::HashMap; +use std::hint::black_box; +use std::sync::{Arc, RwLock}; + +#[global_allocator] +static GLOBAL: ReportingAllocator = ReportingAllocator::new(std::alloc::System); + +struct BenchmarkConfig { + name: &'static str, + rules: Vec, + resource: opentelemetry_sdk::Resource, + trace_id: TraceId, + span_name: &'static str, + span_kind: SpanKind, + attributes: Vec, + is_parent_sampled: Option, + should_keep: Option, +} + +fn create_benchmark_configs() -> Vec { + let trace_id = TraceId::from(0x12345678901234567890123456789012_u128); + + vec![ + // 1. All spans rule (baseline) + BenchmarkConfig { + name: "rule_all_spans_only_rate", + rules: vec![SamplingRule::new(1.0, None, None, None, None, None)], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "something", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar"), KeyValue::new("bar", "baz")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 2. Service rule - matching + BenchmarkConfig { + name: "service_rule_matching", + rules: vec![SamplingRule::new( + 1.0, + Some("test-service".to_string()), + None, + None, + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 3. Service rule - not matching + BenchmarkConfig { + name: "service_rule_not_matching", + rules: vec![SamplingRule::new( + 1.0, + Some("test-service".to_string()), + None, + None, + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("other-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 4. Name pattern rule - matching + BenchmarkConfig { + name: "name_pattern_rule_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + Some("http.*".to_string()), + None, + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("http.method", "GET")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 5. Name pattern rule - not matching + BenchmarkConfig { + name: "name_pattern_rule_not_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + Some("http.*".to_string()), + None, + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "grpc.request", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("rpc.method", "GetUser")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 6. Resource pattern rule - matching + BenchmarkConfig { + name: "resource_pattern_rule_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + Some("/api/*".to_string()), + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("http.method", "GET"), + KeyValue::new("http.route", "/api/users"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 7. Resource pattern rule - not matching + BenchmarkConfig { + name: "resource_pattern_rule_not_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + Some("/api/*".to_string()), + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("http.method", "GET"), + KeyValue::new("http.route", "/health"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 8. Tag rule - matching + BenchmarkConfig { + name: "tag_rule_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + None, + Some(HashMap::from([( + "environment".to_string(), + "production".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "production"), + KeyValue::new("foo", "bar"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 9. Tag rule - not matching + BenchmarkConfig { + name: "tag_rule_not_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + None, + Some(HashMap::from([( + "environment".to_string(), + "production".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "staging"), + KeyValue::new("foo", "bar"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 10. Complex rule - matching + BenchmarkConfig { + name: "complex_rule_matching", + rules: vec![SamplingRule::new( + 0.5, + Some("api-service".to_string()), + Some("http.*".to_string()), + Some("/api/v1/*".to_string()), + Some(HashMap::from([( + "environment".to_string(), + "production".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "production"), + KeyValue::new("http.method", "POST"), + KeyValue::new("http.route", "/api/v1/users"), + ], + is_parent_sampled: None, + should_keep: None, // Probabilistic sampling at 0.5 rate + }, + // 11. Complex rule - partial match + BenchmarkConfig { + name: "complex_rule_partial_match", + rules: vec![SamplingRule::new( + 0.5, + Some("api-service".to_string()), + Some("http.*".to_string()), + Some("/api/v1/*".to_string()), + Some(HashMap::from([( + "environment".to_string(), + "production".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "staging"), + KeyValue::new("http.method", "POST"), + KeyValue::new("http.route", "/health"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 12. Multiple rules - first match + BenchmarkConfig { + name: "multiple_rules_first_match", + rules: vec![ + SamplingRule::new(0.1, Some("api-service".to_string()), None, None, None, None), + SamplingRule::new(0.5, Some("web-service".to_string()), None, None, None, None), + SamplingRule::new(1.0, None, None, None, None, None), + ], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + is_parent_sampled: None, + should_keep: None, // Probabilistic sampling at 0.1 rate + }, + // 13. Multiple rules - last match + BenchmarkConfig { + name: "multiple_rules_last_match", + rules: vec![ + SamplingRule::new(0.1, Some("api-service".to_string()), None, None, None, None), + SamplingRule::new(0.5, Some("web-service".to_string()), None, None, None, None), + SamplingRule::new(1.0, None, None, None, None, None), + ], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("other-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 14. Many attributes + BenchmarkConfig { + name: "many_attributes", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + None, + Some(HashMap::from([( + "key10".to_string(), + "value10".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: (0..20) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + is_parent_sampled: None, + should_keep: Some(true), + }, + // 15. Parent sampled - short circuit with many attributes and complex rules + BenchmarkConfig { + name: "parent_sampled_short_circuit", + rules: vec![SamplingRule::new( + 1.0, + Some("test-service".to_string()), + Some("http.*".to_string()), + Some("/api/*".to_string()), + Some(HashMap::from([ + ("environment".to_string(), "production".to_string()), + ("region".to_string(), "us-east-1".to_string()), + ("version".to_string(), "v1.2.3".to_string()), + ])), + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: (0..50) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + is_parent_sampled: Some(true), + should_keep: Some(true), + }, + // 16. Parent not sampled - short circuit with many attributes and complex rules + BenchmarkConfig { + name: "parent_not_sampled_short_circuit", + rules: vec![SamplingRule::new( + 1.0, + Some("test-service".to_string()), + Some("http.*".to_string()), + Some("/api/*".to_string()), + Some(HashMap::from([ + ("environment".to_string(), "production".to_string()), + ("region".to_string(), "us-east-1".to_string()), + ("version".to_string(), "v1.2.3".to_string()), + ])), + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: (0..50) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + is_parent_sampled: Some(false), + should_keep: Some(false), + }, + ] +} + +fn bench_datadog_sampling( + c: &mut Criterion, +) { + let configs = create_benchmark_configs(); + + for config in configs { + let sampler = DatadogSampler::new(config.rules, -1); + let resource = Arc::new(RwLock::new(config.resource)); + let data = OtelSamplingData::new( + black_box(config.is_parent_sampled), + black_box(&config.trace_id), + black_box(config.span_name), + black_box(config.span_kind.clone()), + black_box(&config.attributes), + black_box(resource.as_ref()), + ); + + c.bench_function( + &format!("datadog_sample_span/{}/{}", config.name, M::name()), + |b| { + b.iter_batched( + || (), + |_| { + bench_sample(&sampler, &data, config.should_keep); + }, + criterion::BatchSize::LargeInput, + ) + }, + ); + } +} + +#[inline(never)] +fn bench_sample(sampler: &DatadogSampler, data: &impl SamplingData, should_keep: Option) { + let result = black_box(sampler).sample(black_box(data)); + if let Some(should_keep) = should_keep { + assert_eq!(result.get_priority().is_keep(), should_keep); + black_box(result); + } else { + black_box(result); + } +} + +criterion_group!( + name = memory_benches; + config = memory_allocated_measurement(&GLOBAL); + targets = bench_datadog_sampling +); +criterion_group!( + name = wall_time_benches; + config = Criterion::default(); + targets = bench_datadog_sampling +); +criterion_main!(memory_benches, wall_time_benches); diff --git a/datadog-opentelemetry/benches/otel_sampling_benchmark.rs b/datadog-opentelemetry/benches/otel_sampling_benchmark.rs new file mode 100644 index 00000000..6ad3adad --- /dev/null +++ b/datadog-opentelemetry/benches/otel_sampling_benchmark.rs @@ -0,0 +1,517 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use criterion::{criterion_group, criterion_main, Criterion}; +use datadog_opentelemetry::configuration::{Config, SamplingRuleConfig}; +use datadog_opentelemetry::core::test_utils::benchmarks::{ + memory_allocated_measurement, MeasurementName, ReportingAllocator, +}; +use datadog_opentelemetry::sampler::Sampler; +use opentelemetry::{trace::SamplingDecision, trace::SpanKind, KeyValue, TraceId}; +use opentelemetry_sdk::trace::ShouldSample; +use std::collections::HashMap; +use std::hint::black_box; +use std::sync::{Arc, RwLock}; + +#[global_allocator] +static GLOBAL: ReportingAllocator = ReportingAllocator::new(std::alloc::System); + +struct BenchmarkConfig { + name: &'static str, + rules: Vec, + resource: opentelemetry_sdk::Resource, + trace_id: TraceId, + span_name: &'static str, + span_kind: SpanKind, + attributes: Vec, + parent_context: Option, + expected_decision: Option, +} + +fn create_benchmark_configs() -> Vec { + use opentelemetry::trace::{SpanContext, SpanId, TraceContextExt, TraceFlags, TraceState}; + + let trace_id = TraceId::from(0x12345678901234567890123456789012_u128); + + // Helper to create parent context + let create_parent_context = |is_sampled: bool| { + let flags = if is_sampled { + TraceFlags::SAMPLED + } else { + TraceFlags::default() + }; + let span_context = SpanContext::new( + trace_id, + SpanId::from(0x1234567890123456_u64), + flags, + false, + TraceState::default(), + ); + opentelemetry::Context::current().with_remote_span_context(span_context) + }; + + vec![ + // 1. All spans rule (baseline) + BenchmarkConfig { + name: "rule_all_spans_only_rate", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "something", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar"), KeyValue::new("bar", "baz")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 2. Service rule - matching + BenchmarkConfig { + name: "service_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: Some("test-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 3. Service rule - not matching + BenchmarkConfig { + name: "service_rule_not_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: Some("test-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("other-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 4. Name pattern rule - matching + BenchmarkConfig { + name: "name_pattern_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: Some("http.*".to_string()), + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("http.method", "GET")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 5. Name pattern rule - not matching + BenchmarkConfig { + name: "name_pattern_rule_not_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: Some("http.*".to_string()), + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "grpc.request", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("rpc.method", "GetUser")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 6. Resource pattern rule - matching + BenchmarkConfig { + name: "resource_pattern_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: Some("/api/*".to_string()), + tags: HashMap::new(), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("http.method", "GET"), + KeyValue::new("http.route", "/api/users"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 7. Resource pattern rule - not matching + BenchmarkConfig { + name: "resource_pattern_rule_not_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: Some("/api/*".to_string()), + tags: HashMap::new(), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("http.method", "GET"), + KeyValue::new("http.route", "/health"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 8. Tag rule - matching + BenchmarkConfig { + name: "tag_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::from([("environment".to_string(), "production".to_string())]), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "production"), + KeyValue::new("foo", "bar"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 9. Tag rule - not matching + BenchmarkConfig { + name: "tag_rule_not_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::from([("environment".to_string(), "production".to_string())]), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "staging"), + KeyValue::new("foo", "bar"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 10. Complex rule - matching + BenchmarkConfig { + name: "complex_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 0.5, + service: Some("api-service".to_string()), + name: Some("http.*".to_string()), + resource: Some("/api/v1/*".to_string()), + tags: HashMap::from([("environment".to_string(), "production".to_string())]), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "production"), + KeyValue::new("http.method", "POST"), + KeyValue::new("http.route", "/api/v1/users"), + ], + parent_context: None, + expected_decision: None, // Probabilistic sampling at 0.5 rate + }, + // 11. Complex rule - partial match + BenchmarkConfig { + name: "complex_rule_partial_match", + rules: vec![SamplingRuleConfig { + sample_rate: 0.5, + service: Some("api-service".to_string()), + name: Some("http.*".to_string()), + resource: Some("/api/v1/*".to_string()), + tags: HashMap::from([("environment".to_string(), "production".to_string())]), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "staging"), + KeyValue::new("http.method", "POST"), + KeyValue::new("http.route", "/health"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 12. Multiple rules - first match + BenchmarkConfig { + name: "multiple_rules_first_match", + rules: vec![ + SamplingRuleConfig { + sample_rate: 0.1, + service: Some("api-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }, + SamplingRuleConfig { + sample_rate: 0.5, + service: Some("web-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }, + SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }, + ], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + parent_context: None, + expected_decision: None, // Probabilistic sampling at 0.1 rate + }, + // 13. Multiple rules - last match + BenchmarkConfig { + name: "multiple_rules_last_match", + rules: vec![ + SamplingRuleConfig { + sample_rate: 0.1, + service: Some("api-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }, + SamplingRuleConfig { + sample_rate: 0.5, + service: Some("web-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }, + SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }, + ], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("other-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 14. Many attributes + BenchmarkConfig { + name: "many_attributes", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::from([("key10".to_string(), "value10".to_string())]), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: (0..20) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 15. Parent sampled - short circuit with many attributes and complex rules + BenchmarkConfig { + name: "parent_sampled_short_circuit", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: Some("test-service".to_string()), + name: Some("http.*".to_string()), + resource: Some("/api/*".to_string()), + tags: HashMap::from([ + ("environment".to_string(), "production".to_string()), + ("region".to_string(), "us-east-1".to_string()), + ("version".to_string(), "v1.2.3".to_string()), + ]), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: (0..50) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + parent_context: Some(create_parent_context(true)), + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 16. Parent not sampled - short circuit with many attributes and complex rules + BenchmarkConfig { + name: "parent_not_sampled_short_circuit", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: Some("test-service".to_string()), + name: Some("http.*".to_string()), + resource: Some("/api/*".to_string()), + tags: HashMap::from([ + ("environment".to_string(), "production".to_string()), + ("region".to_string(), "us-east-1".to_string()), + ("version".to_string(), "v1.2.3".to_string()), + ]), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: (0..50) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + parent_context: Some(create_parent_context(false)), + expected_decision: Some(SamplingDecision::RecordOnly), + }, + ] +} + +fn bench_otel_span_sampling( + c: &mut Criterion, +) { + let configs = create_benchmark_configs(); + let links: Vec = vec![]; + + for config in configs { + let test_resource = Arc::new(RwLock::new(config.resource)); + let test_config = Arc::new( + Config::builder() + .set_trace_rate_limit(-1) + .set_trace_sampling_rules(config.rules) + .build(), + ); + let test_sampler = Sampler::new(test_config, test_resource.clone(), None); + + c.bench_function( + &format!("otel_sample_span/{}/{}", config.name, M::name()), + |b| { + b.iter_batched( + || (), + |_| { + bench_sample( + &test_sampler, + config.parent_context.as_ref(), + config.trace_id, + config.span_name, + &config.span_kind, + &config.attributes, + &links, + &config.expected_decision, + ); + }, + criterion::BatchSize::LargeInput, + ) + }, + ); + } +} + +#[inline(never)] +#[allow(clippy::too_many_arguments)] +fn bench_sample( + sampler: &Sampler, + parent_context: Option<&opentelemetry::Context>, + trace_id: TraceId, + span_name: &str, + span_kind: &SpanKind, + attributes: &[KeyValue], + links: &[opentelemetry::trace::Link], + expected_decision: &Option, +) { + let result = black_box(sampler).should_sample( + black_box(parent_context), + black_box(trace_id), + black_box(span_name), + black_box(span_kind), + black_box(attributes), + black_box(links), + ); + if let Some(expected_decision) = expected_decision { + assert_eq!(result.decision, *expected_decision); + black_box(result); + } else { + black_box(result); + } +} + +criterion_group!(name = memory_benches; config = memory_allocated_measurement(&GLOBAL); targets = bench_otel_span_sampling); +criterion_group!(name = wall_time_benches; config = Criterion::default(); targets = bench_otel_span_sampling); +criterion_main!(memory_benches, wall_time_benches); diff --git a/datadog-opentelemetry/src/core/sampling.rs b/datadog-opentelemetry/src/core/sampling.rs index cb67c4ee..90f32f91 100644 --- a/datadog-opentelemetry/src/core/sampling.rs +++ b/datadog-opentelemetry/src/core/sampling.rs @@ -38,7 +38,25 @@ impl SamplingPriority { self.value } - pub(crate) fn is_keep(&self) -> bool { + /// Returns whether this sampling priority indicates the trace should be kept. + /// + /// # Returns + /// + /// `true` if the priority value is positive (indicating the trace should be kept), + /// `false` otherwise (indicating the trace should be dropped). + /// + /// # Examples + /// + /// ``` + /// use datadog_opentelemetry::core::sampling::priority; + /// + /// assert!(priority::AUTO_KEEP.is_keep()); + /// assert!(priority::USER_KEEP.is_keep()); + /// assert!(!priority::AUTO_REJECT.is_keep()); + /// assert!(!priority::USER_REJECT.is_keep()); + /// ``` + #[inline(always)] + pub fn is_keep(&self) -> bool { self.value > 0 } } diff --git a/datadog-opentelemetry/src/lib.rs b/datadog-opentelemetry/src/lib.rs index d096c6c3..5f04a693 100644 --- a/datadog-opentelemetry/src/lib.rs +++ b/datadog-opentelemetry/src/lib.rs @@ -188,7 +188,11 @@ pub mod mappings; #[cfg(feature = "test-utils")] pub mod propagation; #[cfg(feature = "test-utils")] +pub mod sampler; +#[cfg(feature = "test-utils")] pub mod sampling; +#[cfg(feature = "test-utils")] +pub mod span_processor; #[cfg(not(feature = "test-utils"))] pub(crate) mod core; @@ -197,7 +201,11 @@ pub(crate) mod mappings; #[cfg(not(feature = "test-utils"))] pub(crate) mod propagation; #[cfg(not(feature = "test-utils"))] +mod sampler; +#[cfg(not(feature = "test-utils"))] pub(crate) mod sampling; +#[cfg(not(feature = "test-utils"))] +mod span_processor; mod ddtrace_transform; #[cfg(any(feature = "logs-grpc", feature = "logs-http"))] @@ -205,9 +213,7 @@ mod logs_reader; #[cfg(any(feature = "metrics-grpc", feature = "metrics-http"))] mod metrics_reader; mod otlp_utils; -mod sampler; mod span_exporter; -mod span_processor; mod spans_metrics; #[cfg(any(feature = "logs-grpc", feature = "logs-http"))] mod telemetry_logs_exporter; @@ -418,7 +424,11 @@ fn make_tracer( let resource_slot = Arc::new(RwLock::new(Resource::builder_empty().build())); // Sampler only needs config for initialization (reads initial sampling rules) // Runtime updates come via config callback, so no need for shared config - let sampler = Sampler::new(config.clone(), resource_slot.clone(), registry.clone()); + let sampler = Sampler::new( + config.clone(), + resource_slot.clone(), + Some(registry.clone()), + ); let agent_response_handler = sampler.on_agent_response(); diff --git a/datadog-opentelemetry/src/mappings/transform/mod.rs b/datadog-opentelemetry/src/mappings/transform/mod.rs index 5251ca05..b0ebf284 100644 --- a/datadog-opentelemetry/src/mappings/transform/mod.rs +++ b/datadog-opentelemetry/src/mappings/transform/mod.rs @@ -212,7 +212,7 @@ fn otel_span_to_dd_span_minimal<'a>( let code: u32 = if let Some(http_status_code) = span.get_attr_num(DATADOG_HTTP_STATUS_CODE) { http_status_code } else { - get_otel_status_code(span) + get_otel_status_code(span).unwrap_or(0) }; if code != 0 { dd_span.meta.insert( diff --git a/datadog-opentelemetry/src/mappings/transform/otel_util.rs b/datadog-opentelemetry/src/mappings/transform/otel_util.rs index e9194088..343cd36f 100644 --- a/datadog-opentelemetry/src/mappings/transform/otel_util.rs +++ b/datadog-opentelemetry/src/mappings/transform/otel_util.rs @@ -198,14 +198,9 @@ pub fn get_otel_resource_v2<'a>(span: &impl OtelSpan<'a>) -> Cow<'a, str> { } // https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/traceutil/otel_util.go#L571 -pub(crate) fn get_otel_status_code<'a>(span: &impl OtelSpan<'a>) -> u32 { - if let Some(code) = span.get_attr_num(HTTP_RESPONSE_STATUS_CODE) { - return code; - } - if let Some(code) = span.get_attr_num(HTTP_STATUS_CODE) { - return code; - } - 0 +pub(crate) fn get_otel_status_code<'a>(span: &impl OtelSpan<'a>) -> Option { + span.get_attr_num(HTTP_RESPONSE_STATUS_CODE) + .or_else(|| span.get_attr_num(HTTP_STATUS_CODE)) } const SPAN_TYPE_SQL: &str = "sql"; diff --git a/datadog-opentelemetry/src/sampler.rs b/datadog-opentelemetry/src/sampler.rs index 00553165..db022d0a 100644 --- a/datadog-opentelemetry/src/sampler.rs +++ b/datadog-opentelemetry/src/sampler.rs @@ -1,6 +1,8 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +//! Datadog OTel Sampler + use opentelemetry::trace::{TraceContextExt, TraceState}; use opentelemetry_sdk::{trace::ShouldSample, Resource}; use std::sync::{Arc, RwLock}; @@ -10,34 +12,52 @@ use crate::{ configuration::Config, constants::SAMPLING_DECISION_MAKER_TAG_KEY, sampling::SamplingDecision, }, - sampling::{DatadogSampler, SamplingRule, SamplingRulesCallback}, + sampling::{DatadogSampler, OtelSamplingData, SamplingRule, SamplingRulesCallback}, span_processor::{RegisterTracePropagationResult, TracePropagationData}, text_map_propagator::{self, DatadogExtractData}, TraceRegistry, }; +/// OpenTelemetry sampler implementation for Datadog tracing. +/// +/// Implements the `ShouldSample` trait to make sampling decisions for traces based on +/// Datadog's sampling rules, rate limits, and service-based sampling rates. #[derive(Debug, Clone)] pub struct Sampler { sampler: DatadogSampler, - trace_registry: TraceRegistry, + resource: Arc>, + trace_registry: Option, cfg: Arc, } impl Sampler { + /// Creates a new Datadog sampler. + /// + /// # Arguments + /// + /// * `cfg` - Configuration containing sampling rules and rate limits + /// * `resource` - OpenTelemetry resource with service information + /// * `trace_registry` - Optional trace registry for managing in-flight traces (None for + /// benchmarking) pub fn new( cfg: Arc, resource: Arc>, - trace_registry: TraceRegistry, + // This is an Option to allow benchmarking different parts of sampling + trace_registry: Option, ) -> Self { let rules = SamplingRule::from_configs(cfg.trace_sampling_rules().to_vec()); - let sampler = DatadogSampler::new(rules, cfg.trace_rate_limit(), resource); + let sampler = DatadogSampler::new(rules, cfg.trace_rate_limit()); Self { cfg, sampler, + resource, trace_registry, } } + /// Returns a callback for processing agent responses. + /// + /// The callback updates service-based sampling rates based on the agent's response. pub fn on_agent_response(&self) -> Box Fn(&'a str) + Send + Sync> { self.sampler.on_agent_response() } @@ -81,10 +101,18 @@ impl ShouldSample for Sampler { .filter(|c| !is_parent_deferred && c.has_active_span()) .map(|c| c.span().span_context().trace_flags().is_sampled()); - let result = self - .sampler - .sample(is_parent_sampled, trace_id, name, span_kind, attributes); - let trace_propagation_data = if let Some(trace_root_info) = &result.trace_root_info { + let data = OtelSamplingData::new( + is_parent_sampled, + &trace_id, + name, + span_kind.clone(), + attributes, + self.resource.as_ref(), + ); + let result = self.sampler.sample(&data); + let trace_propagation_data = if let Some(trace_root_info) = + result.get_trace_root_sampling_info() + { // If the parent was deferred, we try to merge propagation tags with what we extracted let (mut tags, origin) = if is_parent_deferred { if let Some(DatadogExtractData { @@ -100,7 +128,7 @@ impl ShouldSample for Sampler { } else { (None, None) }; - let mechanism = trace_root_info.mechanism; + let mechanism = trace_root_info.mechanism(); tags.get_or_insert_default().insert( SAMPLING_DECISION_MAKER_TAG_KEY.to_string(), mechanism.to_cow().into_owned(), @@ -108,7 +136,7 @@ impl ShouldSample for Sampler { Some(TracePropagationData { sampling_decision: SamplingDecision { - priority: Some(trace_root_info.priority), + priority: Some(result.get_priority()), mechanism: Some(mechanism), }, origin, @@ -140,36 +168,40 @@ impl ShouldSample for Sampler { None }; if let Some(trace_propagation_data) = trace_propagation_data { - match self - .trace_registry - .register_local_root_trace_propagation_data( + if let Some(trace_registry) = &self.trace_registry { + match trace_registry.register_local_root_trace_propagation_data( trace_id.to_bytes(), trace_propagation_data, ) { - RegisterTracePropagationResult::Existing(sampling_decision) => { - return opentelemetry::trace::SamplingResult { - // If at this point the sampling decision is still None, we will - // end up sending the span to the agent without a sampling priority, which - // will latter take a decision. - // So the span is marked as RecordAndSample because we treat it as such - decision: if sampling_decision.priority.is_none_or(|p| p.is_keep()) { - opentelemetry::trace::SamplingDecision::RecordAndSample - } else { - opentelemetry::trace::SamplingDecision::RecordOnly - }, - attributes: Vec::new(), - trace_state: parent_context - .map(|c| c.span().span_context().trace_state().clone()) - .unwrap_or_default(), - }; + RegisterTracePropagationResult::Existing(sampling_decision) => { + return opentelemetry::trace::SamplingResult { + // If at this point the sampling decision is still None, we will + // end up sending the span to the agent without a sampling priority, + // which will later take a decision. + // So the span is marked as RecordAndSample because we treat it as such + decision: if sampling_decision.priority.is_none_or(|p| p.is_keep()) { + opentelemetry::trace::SamplingDecision::RecordAndSample + } else { + opentelemetry::trace::SamplingDecision::RecordOnly + }, + attributes: Vec::new(), + trace_state: parent_context + .map(|c| c.span().span_context().trace_state().clone()) + .unwrap_or_default(), + }; + } + RegisterTracePropagationResult::New => {} } - RegisterTracePropagationResult::New => {} } } opentelemetry::trace::SamplingResult { - decision: result.to_otel_decision(), - attributes: result.to_dd_sampling_tags(), + decision: crate::sampling::otel_mappings::priority_to_otel_decision( + result.get_priority(), + ), + attributes: result + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .unwrap_or_default(), trace_state: parent_context .map(|c| c.span().span_context().trace_state().clone()) .unwrap_or_default(), @@ -205,7 +237,11 @@ mod tests { ); let test_resource = Arc::new(RwLock::new(Resource::builder().build())); - let sampler = Sampler::new(config.clone(), test_resource, TraceRegistry::new(config)); + let sampler = Sampler::new( + config.clone(), + test_resource, + Some(TraceRegistry::new(config)), + ); let trace_id_bytes = [1; 16]; let trace_id = TraceId::from_bytes(trace_id_bytes); @@ -227,7 +263,11 @@ mod tests { let config = Arc::new(Config::builder().build()); let test_resource = Arc::new(RwLock::new(Resource::builder_empty().build())); - let sampler = Sampler::new(config.clone(), test_resource, TraceRegistry::new(config)); + let sampler = Sampler::new( + config.clone(), + test_resource, + Some(TraceRegistry::new(config)), + ); let trace_id_bytes = [2; 16]; let trace_id = TraceId::from_bytes(trace_id_bytes); @@ -246,7 +286,11 @@ mod tests { let config = Arc::new(Config::builder().build()); let test_resource = Arc::new(RwLock::new(Resource::builder_empty().build())); - let sampler = Sampler::new(config.clone(), test_resource, TraceRegistry::new(config)); + let sampler = Sampler::new( + config.clone(), + test_resource, + Some(TraceRegistry::new(config)), + ); let trace_id = TraceId::from_bytes([2; 16]); let span_id = SpanId::from_bytes([3; 8]); diff --git a/datadog-opentelemetry/src/sampling/datadog_sampler.rs b/datadog-opentelemetry/src/sampling/datadog_sampler.rs index 07e1bc7b..4e2df073 100644 --- a/datadog-opentelemetry/src/sampling/datadog_sampler.rs +++ b/datadog-opentelemetry/src/sampling/datadog_sampler.rs @@ -6,31 +6,22 @@ use crate::core::constants::{ RL_EFFECTIVE_RATE, SAMPLING_AGENT_RATE_TAG_KEY, SAMPLING_DECISION_MAKER_TAG_KEY, SAMPLING_PRIORITY_TAG_KEY, SAMPLING_RULE_RATE_TAG_KEY, }; -use crate::core::sampling::{mechanism, SamplingMechanism, SamplingPriority}; +use crate::core::sampling::{mechanism, priority, SamplingMechanism, SamplingPriority}; /// Type alias for sampling rules update callback /// Consolidated callback type used across crates for remote config sampling updates pub type SamplingRulesCallback = Box Fn(&'a [SamplingRuleConfig]) + Send + Sync>; -use crate::mappings::{ - get_dd_key_for_otlp_attribute, get_otel_env, get_otel_operation_name_v2, get_otel_resource_v2, - get_otel_service, get_otel_status_code, OtelSpan, -}; -use opentelemetry::trace::SamplingDecision; -use opentelemetry::trace::TraceId; -use opentelemetry::KeyValue; +use crate::sampling::{AttributeLike, SamplingData, SpanProperties, TraceIdLike, ValueLike}; use std::collections::HashMap; -use std::sync::{Arc, RwLock}; use super::agent_service_sampler::{AgentRates, ServicesSampler}; // Import the attr constants use super::constants::pattern::NO_RULE; use super::glob_matcher::GlobMatcher; -use super::otel_mappings::PreSampledSpan; use super::rate_limiter::RateLimiter; use super::rate_sampler::RateSampler; use super::rules_sampler::RulesSampler; -use super::utils; fn matcher_from_rule(rule: &str) -> Option { (rule != NO_RULE).then(|| GlobMatcher::new(rule)) @@ -110,9 +101,9 @@ impl SamplingRule { /// Checks if this rule matches the given span's attributes and name /// The name is derived from the attributes and span kind - fn matches(&self, span: &PreSampledSpan) -> bool { - // Get the operation name from the attributes and span kind - let name: std::borrow::Cow<'_, str> = get_otel_operation_name_v2(span); + fn matches(&self, span: &impl SpanProperties) -> bool { + // Get the operation name from the span + let name = span.operation_name(); // Check name using glob matcher if specified if let Some(ref matcher) = self.name_matcher { @@ -123,21 +114,21 @@ impl SamplingRule { // Check service if specified using glob matcher if let Some(ref matcher) = self.service_matcher { - // Get service directly from the resource - let service_from_resource = get_otel_service(span); + // Get service from the span + let service = span.service(); - // Match against the service from resource - if !matcher.matches(&service_from_resource) { + // Match against the service + if !matcher.matches(&service) { return false; } } // Get the resource string for matching - let resource_str: std::borrow::Cow<'_, str> = get_otel_resource_v2(span); + let resource_str = span.resource(); // Check resource if specified using glob matcher if let Some(ref matcher) = self.resource_matcher { - // Use the resource generated by get_otel_resource_v2 + // Use the resource from the span if !matcher.matches(resource_str.as_ref()) { return false; } @@ -161,10 +152,9 @@ impl SamplingRule { // Logic for other tags: // First, try to match directly with the provided tag key let direct_match = span - .attributes - .iter() - .find(|kv| kv.key.as_str() == rule_tag_key_str) - .and_then(|kv| self.match_attribute_value(&kv.value, matcher)); + .attributes() + .find(|attr| attr.key() == rule_tag_key_str) + .and_then(|attr| self.match_attribute_value(attr.value(), matcher)); if direct_match.unwrap_or(false) { continue; @@ -175,12 +165,13 @@ impl SamplingRule { // is a Datadog key (e.g., "http.method") and the attribute is an // OTel key (e.g., "http.request.method") if rule_tag_key_str.starts_with("http.") { - let tag_match = span.attributes.iter().any(|kv| { - let dd_key_from_otel_attr = get_dd_key_for_otlp_attribute(kv.key.as_str()); - if dd_key_from_otel_attr == rule_tag_key_str { - return self - .match_attribute_value(&kv.value, matcher) - .unwrap_or(false); + let tag_match = span.attributes().any(|attr| { + if let Some(alternate_key) = span.get_alternate_key(attr.key()) { + if alternate_key == rule_tag_key_str { + return self + .match_attribute_value(attr.value(), matcher) + .unwrap_or(false); + } } false }); @@ -206,26 +197,18 @@ impl SamplingRule { fn match_http_status_code_rule( &self, matcher: &GlobMatcher, - span: &PreSampledSpan, + span: &impl SpanProperties, ) -> Option { - let status_code_u32 = get_otel_status_code(span); - if status_code_u32 != 0 { - // Assuming 0 means not found - let status_value = opentelemetry::Value::I64(i64::from(status_code_u32)); + span.status_code().and_then(|status_code| { + let status_value = opentelemetry::Value::I64(i64::from(status_code)); self.match_attribute_value(&status_value, matcher) - } else { - None // Status code not found in attributes - } + }) } // Helper method to match attribute values considering different value types - fn match_attribute_value( - &self, - value: &opentelemetry::Value, - matcher: &GlobMatcher, - ) -> Option { + fn match_attribute_value(&self, value: &impl ValueLike, matcher: &GlobMatcher) -> Option { // Floating point values are handled with special rules - if let Some(float_val) = utils::extract_float_value(value) { + if let Some(float_val) = value.extract_float() { // Check if the float has a non-zero decimal part let has_decimal = float_val != (float_val as i64) as f64; @@ -240,11 +223,13 @@ impl SamplingRule { } // For non-float values, use normal matching - utils::extract_string_value(value).map(|string_value| matcher.matches(&string_value)) + value + .extract_string() + .map(|string_value| matcher.matches(&string_value)) } /// Samples a trace ID using this rule's sample rate - pub fn sample(&self, trace_id: TraceId) -> bool { + pub fn sample(&self, trace_id: &impl TraceIdLike) -> bool { // Delegate to the internal rate sampler's new sample method self.rate_sampler.sample(trace_id) } @@ -280,18 +265,11 @@ pub struct DatadogSampler { /// Rate limiter for limiting the number of spans per second rate_limiter: RateLimiter, - - /// Resource with service information, wrapped in Arc> for sharing - resource: Arc>, } impl DatadogSampler { /// Creates a new DatadogSampler with the given rules - pub fn new( - rules: Vec, - rate_limit: i32, - resource: Arc>, - ) -> Self { + pub fn new(rules: Vec, rate_limit: i32) -> Self { // Create rate limiter with default value of 100 if not provided let limiter = RateLimiter::new(rate_limit, None); @@ -299,7 +277,6 @@ impl DatadogSampler { rules: RulesSampler::new(rules), service_samplers: ServicesSampler::default(), rate_limiter: limiter, - resource, } } @@ -335,17 +312,17 @@ impl DatadogSampler { } /// Computes a key for service-based sampling - fn service_key<'a>(&self, span: &impl OtelSpan<'a>) -> String { - // Get service directly from resource - let service = get_otel_service(span).into_owned(); - // Get env from attributes - let env = get_otel_env(span); + fn service_key(&self, span: &impl SpanProperties) -> String { + // Get service from span + let service = span.service().into_owned(); + // Get env from span + let env = span.env(); format!("service:{service},env:{env}") } /// Finds the highest precedence rule that matches the span - fn find_matching_rule(&self, span: &PreSampledSpan) -> Option { + fn find_matching_rule(&self, span: &impl SpanProperties) -> Option { self.rules.find_matching_rule(|rule| rule.matches(span)) } @@ -372,44 +349,37 @@ impl DatadogSampler { } /// Sample an incoming span based on the parent context and attributes - pub(crate) fn sample( - &self, - is_parent_sampled: Option, - trace_id: TraceId, - _name: &str, - span_kind: &opentelemetry::trace::SpanKind, - attributes: &[KeyValue], - ) -> DdSamplingResult { - if let Some(is_parent_sampled) = is_parent_sampled { + pub fn sample(&self, data: &impl SamplingData) -> DdSamplingResult { + if let Some(is_parent_sampled) = data.is_parent_sampled() { + let priority = match is_parent_sampled { + false => priority::AUTO_REJECT, + true => priority::AUTO_KEEP, + }; // If a parent exists, inherit its sampling decision and trace state return DdSamplingResult { - is_keep: is_parent_sampled, + priority, trace_root_info: None, }; } // Apply rules-based sampling - self.sample_root(trace_id, _name, span_kind, attributes) + data.with_span_properties(self, |sampler, span| sampler.sample_root(data, span)) } /// Sample the root span of a trace fn sample_root( &self, - trace_id: TraceId, - name: &str, - span_kind: &opentelemetry::trace::SpanKind, - attributes: &[KeyValue], + data: &impl SamplingData, + span: &impl SpanProperties, ) -> DdSamplingResult { let mut is_keep = true; let mut used_agent_sampler = false; let sample_rate; let mut rl_effective_rate: Option = None; - - let resource_guard = self.resource.read().unwrap(); - let span = PreSampledSpan::new(name, span_kind.clone(), attributes, &resource_guard); + let trace_id = data.trace_id(); // Find a matching rule - let matching_rule = self.find_matching_rule(&span); + let matching_rule = self.find_matching_rule(span); // Apply sampling logic if let Some(rule) = &matching_rule { @@ -426,7 +396,7 @@ impl DatadogSampler { } } else { // Try service-based sampling from Agent - let service_key = self.service_key(&span); + let service_key = self.service_key(span); if let Some(sampler) = self.service_samplers.get(&service_key) { // Use the service-based sampler used_agent_sampler = true; @@ -447,10 +417,9 @@ impl DatadogSampler { let mechanism = self.get_sampling_mechanism(matching_rule.as_ref(), used_agent_sampler); DdSamplingResult { - is_keep, + priority: mechanism.to_priority(is_keep), trace_root_info: Some(TraceRootSamplingInfo { mechanism, - priority: mechanism.to_priority(is_keep), rate: sample_rate, rl_effective_rate, }), @@ -458,87 +427,103 @@ impl DatadogSampler { } } -pub(crate) struct DdSamplingResult { - pub is_keep: bool, - pub trace_root_info: Option, +pub struct TraceRootSamplingInfo { + mechanism: SamplingMechanism, + rate: f64, + rl_effective_rate: Option, } -pub(crate) struct TraceRootSamplingInfo { - pub priority: SamplingPriority, - pub mechanism: SamplingMechanism, - pub rate: f64, - pub rl_effective_rate: Option, +impl TraceRootSamplingInfo { + /// Returns the sampling mechanism used for this trace root + pub fn mechanism(&self) -> SamplingMechanism { + self.mechanism + } + + /// Returns the sample rate used for this trace root + pub fn rate(&self) -> f64 { + self.rate + } + + /// Returns the effective rate limit if rate limiting was applied + pub fn rl_effective_rate(&self) -> Option { + self.rl_effective_rate + } +} + +pub struct DdSamplingResult { + priority: SamplingPriority, + trace_root_info: Option, } impl DdSamplingResult { + #[inline(always)] + pub fn get_priority(&self) -> SamplingPriority { + self.priority + } + + pub fn get_trace_root_sampling_info(&self) -> &Option { + &self.trace_root_info + } + /// Returns Datadog-specific sampling tags to be added as attributes /// /// # Parameters - /// * `decision` - The sampling decision (RecordAndSample or Drop) - /// * `mechanism` - The sampling mechanism used to make the decision - /// * `sample_rate` - The sample rate to use for the decision - /// * `rl_effective_rate` - The effective rate limit if rate limiting was applied + /// * `factory` - The attribute factory to use for creating attributes /// /// # Returns - /// A vector of attributes to add to the sampling result - pub fn to_dd_sampling_tags(&self) -> Vec { - let mut result = Vec::new(); + /// An optional vector of attributes to add to the sampling result + pub fn to_dd_sampling_tags(&self, factory: &F) -> Option> + where + F: crate::sampling::AttributeFactory, + { let Some(root_info) = &self.trace_root_info else { - return result; // No root info, return empty attributes + return None; // No root info, return empty attributes }; + let mut result: Vec; // Add rate limiting tag if applicable - if let Some(limit) = root_info.rl_effective_rate { - result.push(KeyValue::new(RL_EFFECTIVE_RATE, limit as i64)); + if let Some(limit) = root_info.rl_effective_rate() { + result = Vec::with_capacity(4); + result.push(factory.create_i64(RL_EFFECTIVE_RATE, limit as i64)); + } else { + result = Vec::with_capacity(3); } // Add the sampling decision trace tag with the mechanism - let mechanism = root_info.mechanism; - result.push(KeyValue::new( - SAMPLING_DECISION_MAKER_TAG_KEY, - mechanism.to_cow(), - )); + let mechanism = root_info.mechanism(); + result.push(factory.create_string(SAMPLING_DECISION_MAKER_TAG_KEY, mechanism.to_cow())); // Add the sample rate tag with the correct key based on the mechanism match mechanism { mechanism::AGENT_RATE_BY_SERVICE => { - result.push(KeyValue::new(SAMPLING_AGENT_RATE_TAG_KEY, root_info.rate)); + result.push(factory.create_f64(SAMPLING_AGENT_RATE_TAG_KEY, root_info.rate())); } mechanism::REMOTE_USER_TRACE_SAMPLING_RULE | mechanism::REMOTE_DYNAMIC_TRACE_SAMPLING_RULE | mechanism::LOCAL_USER_TRACE_SAMPLING_RULE => { - result.push(KeyValue::new(SAMPLING_RULE_RATE_TAG_KEY, root_info.rate)); + result.push(factory.create_f64(SAMPLING_RULE_RATE_TAG_KEY, root_info.rate())); } _ => {} } - let priority = root_info.priority; - result.push(KeyValue::new( - SAMPLING_PRIORITY_TAG_KEY, - priority.into_i8() as i64, - )); - - result - } + let priority = self.priority; + result.push(factory.create_i64(SAMPLING_PRIORITY_TAG_KEY, priority.into_i8() as i64)); - /// Converts the sampling result to a SamplingResult for OpenTelemetry - pub fn to_otel_decision(&self) -> SamplingDecision { - if self.is_keep { - SamplingDecision::RecordAndSample - } else { - SamplingDecision::RecordOnly - } + Some(result) } } #[cfg(test)] mod tests { use super::*; + use crate::mappings::get_otel_operation_name_v2; use crate::sampling::constants::{ attr::{ENV_TAG, RESOURCE_TAG}, pattern, }; - use opentelemetry::{trace::SpanKind, Key, KeyValue, Value}; + use crate::sampling::otel_mappings::{OtelSamplingData, PreSampledSpan}; + use opentelemetry::trace::{SpanKind, TraceId}; + use opentelemetry::{Key, KeyValue, Value}; use opentelemetry_sdk::Resource as SdkResource; use opentelemetry_semantic_conventions::{ attribute::{ @@ -547,6 +532,7 @@ mod tests { resource::SERVICE_NAME, trace::{HTTP_RESPONSE_STATUS_CODE, NETWORK_PROTOCOL_NAME}, }; + use std::sync::{Arc, RwLock}; fn create_empty_resource() -> opentelemetry_sdk::Resource { opentelemetry_sdk::Resource::builder_empty().build() @@ -583,6 +569,25 @@ mod tests { ] } + // Helper function to create SamplingData for testing + fn create_sampling_data<'a>( + is_parent_sampled: Option, + trace_id: &'a TraceId, + name: &'a str, + span_kind: SpanKind, + attributes: &'a [KeyValue], + resource: &'a RwLock, + ) -> OtelSamplingData<'a> { + OtelSamplingData::new( + is_parent_sampled, + trace_id, + name, + span_kind, + attributes, + resource, + ) + } + #[test] fn test_sampling_rule_creation() { let rule = SamplingRule::new( @@ -692,41 +697,41 @@ mod tests { let trace_id = create_trace_id(); // Rule with rate 1.0 should always sample - assert!(rule_always.sample(trace_id)); + assert!(rule_always.sample(&trace_id)); // Rule with rate 0.0 should never sample - assert!(!rule_never.sample(trace_id)); + assert!(!rule_never.sample(&trace_id)); } #[test] fn test_datadog_sampler_creation() { // Create a sampler with default config - let sampler = DatadogSampler::new(vec![], 100, create_empty_resource_arc()); + let sampler = DatadogSampler::new(vec![], 100); assert!(sampler.rules.is_empty()); assert!(sampler.service_samplers.is_empty()); // Create a sampler with rules let rule = SamplingRule::new(0.5, None, None, None, None, None); - let sampler_with_rules = DatadogSampler::new(vec![rule], 200, create_empty_resource_arc()); + let sampler_with_rules = DatadogSampler::new(vec![rule], 200); assert_eq!(sampler_with_rules.rules.len(), 1); } #[test] fn test_service_key_generation() { - // Use create_resource to initialize the sampler with a service name in its resource + // Create resource with test service name let test_service_name = "test-service".to_string(); - let sampler_resource = create_resource(test_service_name.clone()); - let sampler = DatadogSampler::new(vec![], 100, sampler_resource); + let resource = create_resource(test_service_name.clone()); + let sampler = DatadogSampler::new(vec![], 100); // Test with service and env // The 'service' in create_attributes is not used for the service part of the key, // but ENV_TAG is still correctly picked up from attributes. let attrs = create_attributes("resource", "production"); - let res = &sampler.resource.read().unwrap(); + let res = &resource.read().unwrap(); let span = PreSampledSpan::new("test-span", SpanKind::Internal, attrs.as_slice(), res); assert_eq!( sampler.service_key(&span), - // Expect the service name from the sampler's resource + // Expect the service name from the resource format!("service:{test_service_name},env:production") ); @@ -741,14 +746,14 @@ mod tests { ); assert_eq!( sampler.service_key(&span), - // Expect the service name from the sampler's resource and an empty env + // Expect the service name from the resource and an empty env format!("service:{test_service_name},env:") ); } #[test] fn test_update_service_rates() { - let sampler = DatadogSampler::new(vec![], 100, create_empty_resource_arc()); + let sampler = DatadogSampler::new(vec![], 100); // Update with service rates let mut rates = HashMap::new(); @@ -812,18 +817,13 @@ mod tests { Some("default".to_string()), // Lowest priority ); - // Sampler is mutable to allow resource updates - let mut sampler = DatadogSampler::new( - vec![rule1.clone(), rule2.clone(), rule3.clone()], - 100, - create_empty_resource_arc(), // Initial resource, will be updated before each check - ); + let sampler = DatadogSampler::new(vec![rule1.clone(), rule2.clone(), rule3.clone()], 100); // Test with a specific service that should match the first rule (rule1) { - sampler.resource = create_resource("service1".to_string()); + let resource = create_resource("service1".to_string()); let attrs1 = create_attributes("resource_val_for_attr1", "prod"); - let res = sampler.resource.read().unwrap(); + let res = resource.read().unwrap(); let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs1.as_slice(), &res); let matching_rule_for_attrs1 = sampler.find_matching_rule(&span); assert!( @@ -837,9 +837,9 @@ mod tests { // Test with a specific service that should match the second rule (rule2) { - sampler.resource = create_resource("service2".to_string()); + let resource = create_resource("service2".to_string()); let attrs2 = create_attributes("resource_val_for_attr2", "prod"); - let res = sampler.resource.read().unwrap(); + let res = resource.read().unwrap(); let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs2.as_slice(), &res); let matching_rule_for_attrs2 = sampler.find_matching_rule(&span); assert!( @@ -853,9 +853,9 @@ mod tests { // Test with a service that matches the wildcard rule (rule3) { - sampler.resource = create_resource("service3".to_string()); + let resource = create_resource("service3".to_string()); let attrs3 = create_attributes("resource_val_for_attr3", "prod"); - let res = sampler.resource.read().unwrap(); + let res = resource.read().unwrap(); let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs3.as_slice(), &res); let matching_rule_for_attrs3 = sampler.find_matching_rule(&span); assert!( @@ -869,9 +869,9 @@ mod tests { // Test with a service that doesn't match any rule's service pattern { - sampler.resource = create_resource("other_sampler_service".to_string()); + let resource = create_resource("other_sampler_service".to_string()); let attrs4 = create_attributes("resource_val_for_attr4", "prod"); - let res = sampler.resource.read().unwrap(); + let res = resource.read().unwrap(); let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs4.as_slice(), &res); let matching_rule_for_attrs4 = sampler.find_matching_rule(&span); assert!( @@ -883,7 +883,7 @@ mod tests { #[test] fn test_get_sampling_mechanism() { - let sampler = DatadogSampler::new(vec![], 100, create_empty_resource_arc()); + let sampler = DatadogSampler::new(vec![], 100); // Create rules with different provenances let rule_customer = @@ -921,16 +921,17 @@ mod tests { let is_sampled = true; let mechanism = mechanism::LOCAL_USER_TRACE_SAMPLING_RULE; let sampling_result = DdSamplingResult { - is_keep: true, + priority: mechanism.to_priority(is_sampled), trace_root_info: Some(TraceRootSamplingInfo { - priority: mechanism.to_priority(is_sampled), mechanism, rate: 0.5, rl_effective_rate: None, }), }; - let attrs = sampling_result.to_dd_sampling_tags(); + let attrs = sampling_result + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .unwrap_or_default(); // Verify the number of attributes assert_eq!(attrs.len(), 3); @@ -982,15 +983,16 @@ mod tests { let is_sampled = false; let mechanism = mechanism::LOCAL_USER_TRACE_SAMPLING_RULE; let sampling_result = DdSamplingResult { - is_keep: false, + priority: mechanism.to_priority(is_sampled), trace_root_info: Some(TraceRootSamplingInfo { - priority: mechanism.to_priority(is_sampled), mechanism, rate: 0.5, rl_effective_rate: Some(rate_limit), }), }; - let attrs_with_limit = sampling_result.to_dd_sampling_tags(); + let attrs_with_limit = sampling_result + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .unwrap_or_default(); // With rate limiting, there should be one more attribute assert_eq!(attrs_with_limit.len(), 4); @@ -1017,16 +1019,17 @@ mod tests { let is_sampled = false; let mechanism = mechanism::AGENT_RATE_BY_SERVICE; let sampling_result = DdSamplingResult { - is_keep: false, + priority: mechanism.to_priority(is_sampled), trace_root_info: Some(TraceRootSamplingInfo { - priority: mechanism.to_priority(is_sampled), mechanism, rate: agent_rate, rl_effective_rate: None, }), }; - let agent_attrs = sampling_result.to_dd_sampling_tags(); + let agent_attrs = sampling_result + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .unwrap_or_default(); // Verify the number of attributes (should be 3) assert_eq!(agent_attrs.len(), 3); @@ -1059,43 +1062,47 @@ mod tests { #[test] fn test_should_sample_parent_context() { - let sampler = DatadogSampler::new(vec![], 100, create_empty_resource_arc()); + let sampler = DatadogSampler::new(vec![], 100); // Create empty slices for attributes and links let empty_attrs: &[KeyValue] = &[]; + let trace_id = create_trace_id(); + let span_kind = SpanKind::Client; + let resource = create_empty_resource_arc(); // Test with sampled parent context - // let parent_sampled = create_parent_context(true); - let result_sampled = sampler.sample( + let data_sampled = create_sampling_data( Some(true), - create_trace_id(), + &trace_id, "span", - &SpanKind::Client, + span_kind.clone(), empty_attrs, + resource.as_ref(), ); + let result_sampled = sampler.sample(&data_sampled); // Should inherit the sampling decision from parent - assert_eq!( - result_sampled.to_otel_decision(), - SamplingDecision::RecordAndSample - ); - assert!(result_sampled.to_dd_sampling_tags().is_empty()); + assert!(result_sampled.get_priority().is_keep()); + assert!(result_sampled + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .is_none()); // Test with non-sampled parent context - let result_not_sampled = sampler.sample( + let data_not_sampled = create_sampling_data( Some(false), - create_trace_id(), + &trace_id, "span", - &SpanKind::Client, + span_kind, empty_attrs, + resource.as_ref(), ); + let result_not_sampled = sampler.sample(&data_not_sampled); // Should inherit the sampling decision from parent - assert_eq!( - result_not_sampled.to_otel_decision(), - SamplingDecision::RecordOnly - ); - assert!(result_not_sampled.to_dd_sampling_tags().is_empty()); + assert!(!result_not_sampled.get_priority().is_keep()); + assert!(result_not_sampled + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .is_none()); } #[test] @@ -1110,46 +1117,53 @@ mod tests { None, ); - let sampler = DatadogSampler::new(vec![rule], 100, create_empty_resource_arc()); + let sampler = DatadogSampler::new(vec![rule], 100); + + let trace_id = create_trace_id(); + let span_kind = SpanKind::Client; + let resource = create_empty_resource_arc(); // Test with matching attributes let attrs = create_attributes("resource", "prod"); - let result = sampler.sample( + let data = create_sampling_data( None, - create_trace_id(), + &trace_id, "span", - &SpanKind::Client, + span_kind.clone(), attrs.as_slice(), + resource.as_ref(), ); + let result = sampler.sample(&data); // Should sample and add attributes - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); - assert!(!result.to_dd_sampling_tags().is_empty()); + assert!(result.get_priority().is_keep()); + assert!(result + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .is_some()); // Test with non-matching attributes let attrs_no_match = create_attributes("other-resource", "prod"); - let result_no_match = sampler.sample( + let data_no_match = create_sampling_data( None, - create_trace_id(), + &trace_id, "span", - &SpanKind::Client, + span_kind, attrs_no_match.as_slice(), + resource.as_ref(), ); + let result_no_match = sampler.sample(&data_no_match); // Should still sample (default behavior when no rules match) and add attributes - assert_eq!( - result_no_match.to_otel_decision(), - SamplingDecision::RecordAndSample - ); - assert!(!result_no_match.to_dd_sampling_tags().is_empty()); + assert!(result_no_match.get_priority().is_keep()); + assert!(result_no_match + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .is_some()); } #[test] fn test_should_sample_with_service_rates() { - // Initialize sampler with a default service, e.g., "test-service" - // The sampler's own service name will be used for the 'service:' part of the service_key - let mut sampler = - DatadogSampler::new(vec![], 100, create_resource("test-service".to_string())); + // Initialize sampler + let sampler = DatadogSampler::new(vec![], 100); // Add service rates for different service+env combinations let mut rates = HashMap::new(); @@ -1158,39 +1172,43 @@ mod tests { sampler.update_service_rates(rates); + let trace_id = create_trace_id(); + let span_kind = SpanKind::Client; + // Test with attributes that should lead to "service:test-service,env:prod" key - // Sampler's resource is already for "test-service" + let resource_test_service = create_resource("test-service".to_string()); let attrs_sample = create_attributes("any_resource_name_matching_env", "prod"); - let result_sample = sampler.sample( + let data_sample = create_sampling_data( None, - create_trace_id(), + &trace_id, "span_for_test_service", - &SpanKind::Client, + span_kind.clone(), attrs_sample.as_slice(), + resource_test_service.as_ref(), ); + let result_sample = sampler.sample(&data_sample); // Expect RecordAndSample because service_key will be "service:test-service,env:prod" -> // rate 1.0 - assert_eq!( - result_sample.to_otel_decision(), - SamplingDecision::RecordAndSample, + assert!( + result_sample.get_priority().is_keep(), "Span for test-service/prod should be sampled" ); // Test with attributes that should lead to "service:other-service,env:prod" key - // Update sampler's resource to be "other-service" - sampler.resource = create_resource("other-service".to_string()); + let resource_other_service = create_resource("other-service".to_string()); let attrs_no_sample = create_attributes("any_resource_name_matching_env", "prod"); - let result_no_sample = sampler.sample( + let data_no_sample = create_sampling_data( None, - create_trace_id(), + &trace_id, "span_for_other_service", - &SpanKind::Client, + span_kind, attrs_no_sample.as_slice(), + resource_other_service.as_ref(), ); + let result_no_sample = sampler.sample(&data_no_sample); // Expect Drop because service_key will be "service:other-service,env:prod" -> rate 0.0 - assert_eq!( - result_no_sample.to_otel_decision(), - SamplingDecision::RecordOnly, + assert!( + !result_no_sample.get_priority().is_keep(), "Span for other-service/prod should be dropped" ); } @@ -1220,12 +1238,14 @@ mod tests { // Should match integer float let integer_float_attrs = create_attributes_with_float("float_tag", 42.0); - assert!(rule_integer.matches(&PreSampledSpan::new( + let resource = create_empty_resource(); + let span = PreSampledSpan::new( "test-span", SpanKind::Client, integer_float_attrs.as_slice(), - &create_empty_resource() - ))); + &resource, + ); + assert!(rule_integer.matches(&span)); // Test case 2: Rule with wildcard pattern and non-integer float let rule_wildcard = SamplingRule::new( @@ -1239,12 +1259,14 @@ mod tests { // Should match non-integer float with wildcard pattern let decimal_float_attrs = create_attributes_with_float("float_tag", 42.5); - assert!(rule_wildcard.matches(&PreSampledSpan::new( + let resource = create_empty_resource(); + let span = PreSampledSpan::new( "test-span", SpanKind::Client, decimal_float_attrs.as_slice(), - &create_empty_resource() - ))); + &resource, + ); + assert!(rule_wildcard.matches(&span)); // Test case 3: Rule with specific pattern and non-integer float // With our simplified logic, non-integer floats will never match non-wildcard patterns @@ -1262,12 +1284,14 @@ mod tests { // Should NOT match the exact decimal value because non-integer floats only match wildcards let decimal_float_attrs = create_attributes_with_float("float_tag", 42.5); - assert!(!rule_specific.matches(&PreSampledSpan::new( + let resource = create_empty_resource(); + let span = PreSampledSpan::new( "test-span", SpanKind::Client, decimal_float_attrs.as_slice(), - &create_empty_resource() - ))); + &resource, + ); + assert!(!rule_specific.matches(&span)); // Test case 4: Pattern with partial wildcard '*' for suffix let rule_prefix = SamplingRule::new( 0.5, @@ -1283,12 +1307,14 @@ mod tests { // Should NOT match decimal values as we don't do partial pattern matching for non-integer // floats - assert!(!rule_prefix.matches(&PreSampledSpan::new( + let resource = create_empty_resource(); + let span = PreSampledSpan::new( "test-span", SpanKind::Client, decimal_float_attrs.as_slice(), - &create_empty_resource() - ))); + &resource, + ); + assert!(!rule_prefix.matches(&span)); } #[test] @@ -1310,30 +1336,36 @@ mod tests { let otel_attrs = vec![KeyValue::new("http.response.status_code", 500)]; // The rule should match because both use the same OpenTelemetry attribute name - assert!(rule.matches(&PreSampledSpan::new( + let resource = create_empty_resource(); + let span = PreSampledSpan::new( "test-span", SpanKind::Client, otel_attrs.as_slice(), - &create_empty_resource() - ))); + &resource, + ); + assert!(rule.matches(&span)); // Attributes that don't match the value pattern shouldn't match let non_matching_attrs = vec![KeyValue::new("http.response.status_code", 200)]; - assert!(!rule.matches(&PreSampledSpan::new( + let resource = create_empty_resource(); + let span = PreSampledSpan::new( "test-span", SpanKind::Client, non_matching_attrs.as_slice(), - &create_empty_resource() - ))); + &resource, + ); + assert!(!rule.matches(&span)); // Attributes that have no mapping to the rule tag shouldn't match let unrelated_attrs = vec![KeyValue::new("unrelated.attribute", "value")]; - assert!(!rule.matches(&PreSampledSpan::new( + let resource = create_empty_resource(); + let span = PreSampledSpan::new( "test-span", SpanKind::Client, unrelated_attrs.as_slice(), - &create_empty_resource() - ))); + &resource, + ); + assert!(!rule.matches(&span)); } #[test] @@ -1357,12 +1389,9 @@ mod tests { ]; // The rule should match because all three criteria are satisfied through mapping - assert!(rule.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &mixed_attrs, - &create_empty_resource() - ),)); + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test-span", SpanKind::Client, &mixed_attrs, &resource); + assert!(rule.matches(&span)); // If any criteria is not met, the rule shouldn't match let missing_method = vec![ @@ -1371,12 +1400,9 @@ mod tests { KeyValue::new("url.full", "https://example.com/api/v1/resource"), ]; - assert!(!rule.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &missing_method, - &create_empty_resource() - ),)); + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test-span", SpanKind::Client, &missing_method, &resource); + assert!(!rule.matches(&span)); // Wrong value should also not match let wrong_method = vec![ @@ -1385,12 +1411,9 @@ mod tests { KeyValue::new("url.full", "https://example.com/api/v1/resource"), ]; - assert!(!rule.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &wrong_method, - &create_empty_resource() - ),)); + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test-span", SpanKind::Client, &wrong_method, &resource); + assert!(!rule.matches(&span)); } #[test] @@ -1417,34 +1440,37 @@ mod tests { KeyValue::new(otel_response_status_key_str, 503), KeyValue::new(custom_tag_key, custom_tag_value), ]; - assert!(rule1.matches(&PreSampledSpan::new( + let span = PreSampledSpan::new( "test-span", span_kind_client, &mixed_attrs_match, - &empty_resource - )), "Rule with dd_status_key (5*) and custom.tag should match span with otel_response_status_key (503) and custom.tag"); + &empty_resource, + ); + assert!(rule1.matches(&span), "Rule with dd_status_key (5*) and custom.tag should match span with otel_response_status_key (503) and custom.tag"); // Case 2: Datadog convention for status code (503 matches "5*") + Direct custom.tag match let dd_attrs_match = vec![ KeyValue::new(dd_status_key_str, 503), KeyValue::new(custom_tag_key, custom_tag_value), ]; - assert!(rule1.matches(&PreSampledSpan::new( + let span = PreSampledSpan::new( "test-span", SpanKind::Client, &dd_attrs_match, - &empty_resource - )), "Rule with dd_status_key (5*) and custom.tag should match span with dd_status_key (503) and custom.tag"); + &empty_resource, + ); + assert!(rule1.matches(&span), "Rule with dd_status_key (5*) and custom.tag should match span with dd_status_key (503) and custom.tag"); // Case 3: Missing the custom tag should fail (status code would match) let missing_custom_tag_attrs = vec![KeyValue::new(otel_response_status_key_str, 503)]; + let span = PreSampledSpan::new( + "test-span", + SpanKind::Client, + &missing_custom_tag_attrs, + &empty_resource, + ); assert!( - !rule1.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &missing_custom_tag_attrs, - &empty_resource - )), + !rule1.matches(&span), "Rule with dd_status_key (5*) and custom.tag should NOT match span missing custom.tag" ); @@ -1453,24 +1479,26 @@ mod tests { KeyValue::new(otel_response_status_key_str, 200), KeyValue::new(custom_tag_key, custom_tag_value), ]; - assert!(!rule1.matches(&PreSampledSpan::new( + let span = PreSampledSpan::new( "test-span", SpanKind::Client, &non_matching_otel_status_attrs, - &empty_resource - )), "Rule with dd_status_key (5*) and custom.tag should NOT match span with non-matching otel_response_status_key (200)"); + &empty_resource, + ); + assert!(!rule1.matches(&span), "Rule with dd_status_key (5*) and custom.tag should NOT match span with non-matching otel_response_status_key (200)"); // Case 5: No recognizable status code + custom.tag present let no_status_code_attrs = vec![ KeyValue::new("another.tag", "irrelevant"), KeyValue::new(custom_tag_key, custom_tag_value), ]; - assert!(!rule1.matches(&PreSampledSpan::new( + let span = PreSampledSpan::new( "test-span", SpanKind::Client, &no_status_code_attrs, - &empty_resource - )), "Rule with dd_status_key (5*) and custom.tag should NOT match span with no status code attribute"); + &empty_resource, + ); + assert!(!rule1.matches(&span), "Rule with dd_status_key (5*) and custom.tag should NOT match span with no status code attribute"); // Case 6: Rule uses OTel key http.response.status_code directly, span has matching OTel // key. @@ -1483,12 +1511,13 @@ mod tests { KeyValue::new(otel_response_status_key_str, 200), KeyValue::new(custom_tag_key, custom_tag_value), ]; - assert!(rule2.matches(&PreSampledSpan::new( + let span = PreSampledSpan::new( "test-span", SpanKind::Client, &otel_key_rule_match_attrs, - &empty_resource - )), "Rule with otel_response_status_key (200) and custom.tag should match span with otel_response_status_key (200) and custom.tag"); + &empty_resource, + ); + assert!(rule2.matches(&span), "Rule with otel_response_status_key (200) and custom.tag should match span with otel_response_status_key (200) and custom.tag"); } #[test] @@ -1522,14 +1551,11 @@ mod tests { ); // Create a sampler with these rules - let sampler = DatadogSampler::new( - vec![http_rule, db_rule, messaging_rule], - 100, - create_empty_resource_arc(), - ); + let sampler = DatadogSampler::new(vec![http_rule, db_rule, messaging_rule], 100); // Create a trace ID for testing let trace_id = create_trace_id(); + let resource = create_empty_resource_arc(); // Test cases for different span kinds and attributes @@ -1541,27 +1567,26 @@ mod tests { let empty_resource: SdkResource = create_empty_resource(); // Print the operation name that will be generated - let http_client_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Client, - &http_client_attrs, - &empty_resource, - )); + let span = PreSampledSpan::new("", SpanKind::Client, &http_client_attrs, &empty_resource); + let http_client_op_name = get_otel_operation_name_v2(&span); assert_eq!( http_client_op_name, "http.client.request", "HTTP client operation name should be correct" ); - let result = sampler.sample( + let span_kind_client = SpanKind::Client; + let data = create_sampling_data( None, - trace_id, + &trace_id, "test-span", - &SpanKind::Client, + span_kind_client.clone(), &http_client_attrs, + resource.as_ref(), ); + let result = sampler.sample(&data); // Should be sampled due to matching the http_rule - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); + assert!(result.get_priority().is_keep()); // 2. HTTP server request let http_server_attrs = vec![KeyValue::new( @@ -1570,27 +1595,26 @@ mod tests { )]; // Print the operation name that will be generated - let http_server_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Server, - &http_server_attrs, - &empty_resource, - )); + let span = PreSampledSpan::new("", SpanKind::Server, &http_server_attrs, &empty_resource); + let http_server_op_name = get_otel_operation_name_v2(&span); assert_eq!( http_server_op_name, "http.server.request", "HTTP server operation name should be correct" ); - let result = sampler.sample( + let span_kind_server = SpanKind::Server; + let data = create_sampling_data( None, - trace_id, + &trace_id, "test-span", - &SpanKind::Server, + span_kind_server.clone(), &http_server_attrs, + resource.as_ref(), ); + let result = sampler.sample(&data); // Should be sampled due to matching the http_rule - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); + assert!(result.get_priority().is_keep()); // 3. Database query let db_attrs = vec![KeyValue::new( @@ -1599,27 +1623,25 @@ mod tests { )]; // Print the operation name that will be generated - let db_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Client, - &db_attrs, - &empty_resource, - )); + let span = PreSampledSpan::new("", SpanKind::Client, &db_attrs, &empty_resource); + let db_op_name = get_otel_operation_name_v2(&span); assert_eq!( db_op_name, "postgresql.query", "Database operation name should be correct" ); - let result = sampler.sample( + let data = create_sampling_data( None, - trace_id, + &trace_id, "test-span", - &SpanKind::Client, // DB queries use client span kind + span_kind_client, // DB queries use client span kind &db_attrs, + resource.as_ref(), ); + let result = sampler.sample(&data); // Should be sampled due to matching the db_rule - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); + assert!(result.get_priority().is_keep()); // 4. Messaging operation let messaging_attrs = vec![ @@ -1634,53 +1656,51 @@ mod tests { ]; // Print the operation name that will be generated - let messaging_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Consumer, - &messaging_attrs, - &empty_resource, - )); + let span = PreSampledSpan::new("", SpanKind::Consumer, &messaging_attrs, &empty_resource); + let messaging_op_name = get_otel_operation_name_v2(&span); assert_eq!( messaging_op_name, "kafka.process", "Messaging operation name should be correct" ); - let result = sampler.sample( + let span_kind_consumer = SpanKind::Consumer; + let data = create_sampling_data( None, - trace_id, + &trace_id, "test-span", - &SpanKind::Consumer, // Messaging uses consumer span kind + span_kind_consumer, // Messaging uses consumer span kind &messaging_attrs, + resource.as_ref(), ); + let result = sampler.sample(&data); // Should be sampled due to matching the messaging_rule - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); + assert!(result.get_priority().is_keep()); // 5. Generic internal span (should not match any rules) let internal_attrs = vec![KeyValue::new("custom.tag", "value")]; // Print the operation name that will be generated - let internal_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Internal, - &internal_attrs, - &empty_resource, - )); + let span = PreSampledSpan::new("", SpanKind::Internal, &internal_attrs, &empty_resource); + let internal_op_name = get_otel_operation_name_v2(&span); assert_eq!( internal_op_name, "internal", "Internal operation name should be the span kind" ); - let result = sampler.sample( + let span_kind_internal = SpanKind::Internal; + let data = create_sampling_data( None, - trace_id, + &trace_id, "test-span", - &SpanKind::Internal, + span_kind_internal, &internal_attrs, + resource.as_ref(), ); + let result = sampler.sample(&data); // Should still be sampled (default behavior when no rules match) - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); + assert!(result.get_priority().is_keep()); // 6. Server with protocol but no HTTP method let server_protocol_attrs = vec![KeyValue::new( @@ -1689,28 +1709,31 @@ mod tests { )]; // Print the operation name that will be generated - let server_protocol_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( + let span = PreSampledSpan::new( "", SpanKind::Server, &server_protocol_attrs, &empty_resource, - )); + ); + let server_protocol_op_name = get_otel_operation_name_v2(&span); assert_eq!( server_protocol_op_name, "http.server.request", "Server with protocol operation name should use protocol" ); - let result = sampler.sample( + let data = create_sampling_data( None, - trace_id, + &trace_id, "test-span", - &SpanKind::Server, + span_kind_server, &server_protocol_attrs, + resource.as_ref(), ); + let result = sampler.sample(&data); // Should not match our http rule since operation name would be "http.server.request" // But should still be sampled (default behavior) - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); + assert!(result.get_priority().is_keep()); } #[test] @@ -1732,7 +1755,7 @@ mod tests { .build(), )); - let sampler = DatadogSampler::new(vec![initial_rule], 100, test_resource); + let sampler = DatadogSampler::new(vec![initial_rule], 100); // Verify initial state assert_eq!(sampler.rules.len(), 1); @@ -1772,7 +1795,7 @@ mod tests { KeyValue::new(HTTP_REQUEST_METHOD, "GET"), /* This will make operation name * "http.client.request" */ ]; - let resource_guard = sampler.resource.read().unwrap(); + let resource_guard = test_resource.read().unwrap(); let span = PreSampledSpan::new( "test-span", SpanKind::Client, diff --git a/datadog-opentelemetry/src/sampling/mod.rs b/datadog-opentelemetry/src/sampling/mod.rs index 2f995a30..1dd05d7d 100644 --- a/datadog-opentelemetry/src/sampling/mod.rs +++ b/datadog-opentelemetry/src/sampling/mod.rs @@ -11,7 +11,12 @@ pub(crate) mod otel_mappings; pub(crate) mod rate_limiter; pub(crate) mod rate_sampler; pub(crate) mod rules_sampler; +mod types; pub(crate) mod utils; // Re-export key public types pub use datadog_sampler::{DatadogSampler, SamplingRule, SamplingRulesCallback}; +pub use otel_mappings::{OtelAttributeFactory, OtelSamplingData}; +pub use types::{ + AttributeFactory, AttributeLike, SamplingData, SpanProperties, TraceIdLike, ValueLike, +}; diff --git a/datadog-opentelemetry/src/sampling/otel_mappings.rs b/datadog-opentelemetry/src/sampling/otel_mappings.rs index 95d763b7..5ccd5e2c 100644 --- a/datadog-opentelemetry/src/sampling/otel_mappings.rs +++ b/datadog-opentelemetry/src/sampling/otel_mappings.rs @@ -2,11 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 use std::borrow::Cow; +use std::sync::RwLock; -use crate::mappings::{AttributeIndices, AttributeKey, OtelSpan}; -use opentelemetry::Key; +use crate::mappings::{ + get_dd_key_for_otlp_attribute, get_otel_env, get_otel_operation_name_v2, get_otel_resource_v2, + get_otel_service, get_otel_status_code, AttributeIndices, AttributeKey, OtelSpan, +}; +use crate::sampling::{SamplingData, SpanProperties}; +use opentelemetry::{Key, KeyValue}; -pub(crate) struct PreSampledSpan<'a> { +pub struct PreSampledSpan<'a> { pub name: &'a str, pub span_kind: opentelemetry::trace::SpanKind, pub attributes: &'a [opentelemetry::KeyValue], @@ -73,3 +78,170 @@ impl<'a> OtelSpan<'a> for PreSampledSpan<'a> { self.resource.len() } } + +impl SpanProperties for PreSampledSpan<'_> { + type Attribute = opentelemetry::KeyValue; + type AttributesIter<'b> + = std::slice::Iter<'b, opentelemetry::KeyValue> + where + Self: 'b; + + fn operation_name(&self) -> Cow<'_, str> { + get_otel_operation_name_v2(self) + } + + fn service(&self) -> Cow<'_, str> { + get_otel_service(self) + } + + fn env(&self) -> Cow<'_, str> { + get_otel_env(self) + } + + fn resource(&self) -> Cow<'_, str> { + get_otel_resource_v2(self) + } + + fn status_code(&self) -> Option { + get_otel_status_code(self) + } + + fn attributes(&self) -> Self::AttributesIter<'_> { + self.attributes.iter() + } + + fn get_alternate_key<'b>(&self, key: &'b str) -> Option> { + let mapped = get_dd_key_for_otlp_attribute(key); + // If the mapping returned an empty string or the same key, there's no alternate + if mapped.is_empty() || mapped.as_ref() == key { + None + } else { + Some(mapped) + } + } +} + +/// OpenTelemetry Sampling Data implementation. +/// +/// Provides the necessary data for making sampling decisions on OpenTelemetry spans. +/// This struct contains references to span metadata including the trace ID, span name, +/// span kind, attributes, and resource information. +pub struct OtelSamplingData<'a> { + is_parent_sampled: Option, + trace_id: &'a opentelemetry::trace::TraceId, + name: &'a str, + span_kind: opentelemetry::trace::SpanKind, + attributes: &'a [KeyValue], + resource: &'a RwLock, +} + +impl<'a> OtelSamplingData<'a> { + /// Creates a new OpenTelemetry sampling data instance. + /// + /// # Arguments + /// + /// * `is_parent_sampled` - Whether the parent span was sampled, if known + /// * `trace_id` - The trace ID for this span + /// * `name` - The span name + /// * `span_kind` - The kind of span (e.g., Server, Client) + /// * `attributes` - The span's attributes + /// * `resource` - The OpenTelemetry resource containing service metadata + pub fn new( + is_parent_sampled: Option, + trace_id: &'a opentelemetry::trace::TraceId, + name: &'a str, + span_kind: opentelemetry::trace::SpanKind, + attributes: &'a [KeyValue], + resource: &'a RwLock, + ) -> Self { + Self { + is_parent_sampled, + trace_id, + name, + span_kind, + attributes, + resource, + } + } +} + +impl SamplingData for OtelSamplingData<'_> { + type TraceId = opentelemetry::trace::TraceId; + type Properties<'b> + = PreSampledSpan<'b> + where + Self: 'b; + + fn is_parent_sampled(&self) -> Option { + self.is_parent_sampled + } + fn trace_id(&self) -> &Self::TraceId { + self.trace_id + } + + fn with_span_properties(&self, s: &S, f: F) -> T + where + F: for<'b> Fn(&S, &PreSampledSpan<'b>) -> T, + { + let resource_guard = self.resource.read().unwrap(); + let span = PreSampledSpan::new( + self.name, + self.span_kind.clone(), + self.attributes, + &resource_guard, + ); + f(s, &span) + } +} + +impl crate::sampling::TraceIdLike for opentelemetry::trace::TraceId { + type Item = opentelemetry::trace::TraceId; + + fn to_u128(&self) -> u128 { + u128::from_be_bytes(self.to_bytes()) + } + + fn inner(&self) -> &Self::Item { + self + } +} + +/// Factory for creating OpenTelemetry KeyValue attributes. +pub struct OtelAttributeFactory; + +impl crate::sampling::AttributeFactory for OtelAttributeFactory { + type Attribute = opentelemetry::KeyValue; + + fn create_i64(&self, key: &'static str, value: i64) -> Self::Attribute { + opentelemetry::KeyValue::new(key, value) + } + + fn create_f64(&self, key: &'static str, value: f64) -> Self::Attribute { + opentelemetry::KeyValue::new(key, value) + } + + fn create_string(&self, key: &'static str, value: Cow<'static, str>) -> Self::Attribute { + opentelemetry::KeyValue::new(key, value) + } +} + +/// Converts a Datadog sampling priority to an OpenTelemetry sampling decision. +/// +/// # Arguments +/// +/// * `priority` - The Datadog sampling priority +/// +/// # Returns +/// +/// The corresponding OpenTelemetry sampling decision: +/// - `RecordAndSample` if the priority indicates the trace should be kept +/// - `RecordOnly` if the priority indicates the trace should be dropped +pub(crate) fn priority_to_otel_decision( + priority: crate::core::sampling::SamplingPriority, +) -> opentelemetry::trace::SamplingDecision { + if priority.is_keep() { + opentelemetry::trace::SamplingDecision::RecordAndSample + } else { + opentelemetry::trace::SamplingDecision::RecordOnly + } +} diff --git a/datadog-opentelemetry/src/sampling/rate_sampler.rs b/datadog-opentelemetry/src/sampling/rate_sampler.rs index ba6d77de..97a2304b 100644 --- a/datadog-opentelemetry/src/sampling/rate_sampler.rs +++ b/datadog-opentelemetry/src/sampling/rate_sampler.rs @@ -1,11 +1,10 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use opentelemetry::trace::TraceId; -use std::fmt; - use super::constants::{numeric, rate}; +use crate::sampling::TraceIdLike; use numeric::{KNUTH_FACTOR, MAX_UINT_64BITS}; +use std::fmt; /// Keeps (100 * `sample_rate`)% of the traces randomly. #[derive(Clone)] @@ -50,7 +49,7 @@ impl RateSampler { /// Determines if a trace should be sampled based on its trace_id and the configured rate. /// Returns true if the trace should be kept, false otherwise. - pub fn sample(&self, trace_id: TraceId) -> bool { + pub fn sample(&self, trace_id: &T) -> bool { // Fast-path for sample rate of 0.0 (always drop) or 1.0 (always sample) if self.sample_rate <= rate::MIN_SAMPLE_RATE { return false; @@ -60,8 +59,7 @@ impl RateSampler { } // Convert trace_id to u128 and then cast to u64 to get the lower 64 bits - let trace_id_u128 = u128::from_be_bytes(trace_id.to_bytes()); - let trace_id_64bits = trace_id_u128 as u64; + let trace_id_64bits = trace_id.to_u128() as u64; let hashed_id = trace_id_64bits.wrapping_mul(KNUTH_FACTOR); @@ -124,7 +122,7 @@ mod tests { bytes_zero[15] = 1; // Example ID let trace_id_zero = TraceId::from_bytes(bytes_zero); assert!( - !sampler_zero.sample(trace_id_zero), + !sampler_zero.sample(&trace_id_zero), "sampler_zero should return false" ); @@ -134,7 +132,7 @@ mod tests { bytes_one[15] = 2; // Example ID let trace_id_one = TraceId::from_bytes(bytes_one); assert!( - sampler_one.sample(trace_id_one), + sampler_one.sample(&trace_id_one), "sampler_one should return true" ); @@ -149,7 +147,7 @@ mod tests { let sample_hash = sample_u64.wrapping_mul(KNUTH_FACTOR); assert!(sample_hash <= threshold); assert!( - sampler_half.sample(trace_id_sample), + sampler_half.sample(&trace_id_sample), "sampler_half should sample trace_id_sample" ); @@ -165,7 +163,7 @@ mod tests { "Drop hash {drop_hash} should be > threshold {threshold}", ); assert!( - !sampler_half.sample(trace_id_drop), + !sampler_half.sample(&trace_id_drop), "sampler_half should drop trace_id_drop" ); } @@ -177,7 +175,7 @@ mod tests { let bytes_to_sample = [0u8; 16]; let trace_id_to_sample = TraceId::from_bytes(bytes_to_sample); assert!( - sampler_half.sample(trace_id_to_sample), + sampler_half.sample(&trace_id_to_sample), "Sampler with 0.5 rate should sample trace ID 0" ); } diff --git a/datadog-opentelemetry/src/sampling/rules_sampler.rs b/datadog-opentelemetry/src/sampling/rules_sampler.rs index e679a09e..7e412e9d 100644 --- a/datadog-opentelemetry/src/sampling/rules_sampler.rs +++ b/datadog-opentelemetry/src/sampling/rules_sampler.rs @@ -24,9 +24,9 @@ impl RulesSampler { } /// Finds the first matching rule for a span - pub fn find_matching_rule(&self, matcher: F) -> Option + pub fn find_matching_rule(&self, mut matcher: F) -> Option where - F: Fn(&SamplingRule) -> bool, + F: FnMut(&SamplingRule) -> bool, { self.inner .read() diff --git a/datadog-opentelemetry/src/sampling/types.rs b/datadog-opentelemetry/src/sampling/types.rs new file mode 100644 index 00000000..3e7e164a --- /dev/null +++ b/datadog-opentelemetry/src/sampling/types.rs @@ -0,0 +1,207 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Type definitions and traits for sampling + +use std::borrow::Cow; + +/// A trait for converting trace IDs to a numeric representation. +/// +/// Provides a common interface for converting trace IDs from different tracing systems +/// into a 128-bit unsigned integer for use in hash-based operations. +/// +/// # Examples +/// +/// ```ignore +/// use opentelemetry::trace::TraceId; +/// use datadog_opentelemetry::sampling::TraceIdLike; +/// +/// #[derive(Clone, PartialEq, Eq)] +/// struct MyTraceId(TraceId); +/// +/// impl TraceIdLike for MyTraceId { +/// type Item = TraceId; +/// +/// fn to_u128(&self) -> u128 { +/// u128::from_be_bytes(self.0.to_bytes()) +/// } +/// +/// fn inner(&self) -> &Self::Item { +/// &self.0 +/// } +/// } +/// ``` +pub trait TraceIdLike { + /// The underlying trace ID type. + type Item: PartialEq + Eq; + + /// Converts the trace ID to a 128-bit unsigned integer. + /// + /// The conversion should be deterministic: the same trace ID must always produce + /// the same `u128` value. Typically implemented by interpreting the trace ID's + /// bytes as a big-endian integer. + fn to_u128(&self) -> u128; + + /// Returns a reference to the underlying trace ID. + /// + /// Used internally for trait object equality comparisons. + fn inner(&self) -> &Self::Item; +} + +impl PartialEq for dyn TraceIdLike { + fn eq(&self, other: &Self) -> bool { + self.inner() == other.inner() + } +} + +/// A trait for accessing span attribute key-value pairs. +/// +/// Provides methods for retrieving the key and value of a span attribute. +pub trait AttributeLike { + /// The type of the value that implements `ValueLike`. + type Value: ValueLike; + + /// Returns the attribute key as a string. + fn key(&self) -> &str; + + /// Returns a reference to the attribute value. + fn value(&self) -> &Self::Value; +} + +/// A trait for extracting typed values from attribute values. +/// +/// Provides methods for converting attribute values to common types used in sampling logic. +pub trait ValueLike { + /// Extracts a float value if the value can be represented as `f64`. + /// + /// Returns `Some(f64)` for numeric types, `None` otherwise. + fn extract_float(&self) -> Option; + + /// Extracts a string representation of the value. + /// + /// Returns `Some(Cow)` for types that can be converted to strings, `None` otherwise. + fn extract_string(&self) -> Option>; +} + +impl AttributeLike for opentelemetry::KeyValue { + type Value = opentelemetry::Value; + + fn key(&self) -> &str { + self.key.as_str() + } + + fn value(&self) -> &Self::Value { + &self.value + } +} + +impl ValueLike for opentelemetry::Value { + fn extract_float(&self) -> Option { + crate::sampling::utils::extract_float_value(self) + } + + fn extract_string(&self) -> Option> { + crate::sampling::utils::extract_string_value(self) + } +} + +/// A trait for creating sampling attributes. +/// +/// This trait abstracts the creation of attributes for sampling tags, +/// allowing different implementations for different attribute types. +pub trait AttributeFactory { + /// The type of attribute created by this factory. + type Attribute: Sized; + + /// Creates an attribute with an i64 value. + fn create_i64(&self, key: &'static str, value: i64) -> Self::Attribute; + + /// Creates an attribute with an f64 value. + fn create_f64(&self, key: &'static str, value: f64) -> Self::Attribute; + + /// Creates an attribute with a string value. + fn create_string(&self, key: &'static str, value: Cow<'static, str>) -> Self::Attribute; +} + +/// A trait for accessing span properties needed for sampling decisions. +/// +/// Provides methods for retrieving span metadata like operation name, service, environment, +/// resource name, and status codes used by sampling rules. +pub trait SpanProperties { + /// The type of attribute that implements `AttributeLike`. + type Attribute: AttributeLike; + + /// The type of iterator over span attributes. + type AttributesIter<'a>: Iterator + where + Self: 'a; + + /// Returns the operation name for the span. + /// + /// The operation name is derived from span attributes and kind according to + /// OpenTelemetry semantic conventions. + fn operation_name(&self) -> Cow<'_, str>; + + /// Returns the service name for the span. + /// + /// The service name is extracted from resource attributes. + fn service(&self) -> Cow<'_, str>; + + /// Returns the environment name for the span. + /// + /// The environment is extracted from span or resource attributes. + fn env(&self) -> Cow<'_, str>; + + /// Returns the resource name for the span. + /// + /// The resource name is derived from span attributes and kind. + fn resource(&self) -> Cow<'_, str>; + + /// Returns the HTTP status code if present. + /// + /// Returns `None` if the span does not have an HTTP status code attribute. + fn status_code(&self) -> Option; + + /// Returns an iterator over span attributes. + fn attributes(&self) -> Self::AttributesIter<'_>; + + /// Returns an alternate key for the given attribute key. + /// + /// This is used for mapping between different attribute naming conventions + /// (e.g., OpenTelemetry to Datadog). Returns `Some(alternate_key)` if a mapping exists, + /// or `None` if the attribute key has no alternate mapping. + fn get_alternate_key<'b>(&self, key: &'b str) -> Option>; +} + +/// A trait for accessing sampling data, combining trace ID and span properties. +/// +/// This trait provides unified access to both the trace ID and span properties +/// needed for making sampling decisions. +pub trait SamplingData { + /// The type that implements `TraceIdLike`. + type TraceId: TraceIdLike; + + /// The type that implements `SpanProperties`. + type Properties<'a>: SpanProperties + where + Self: 'a; + + /// Returns whether the parent span was sampled. + /// + /// Returns: + /// - `Some(true)` if the parent span was sampled + /// - `Some(false)` if the parent span was not sampled + /// - `None` if there is no parent sampling information + fn is_parent_sampled(&self) -> Option; + + /// Returns a reference to the trace ID. + fn trace_id(&self) -> &Self::TraceId; + + /// Returns the span properties via a callback. + /// + /// This method constructs the span properties and passes them to the provided + /// callback function. The properties are only valid for the duration of the callback. + fn with_span_properties(&self, s: &S, f: F) -> T + where + F: Fn(&S, &Self::Properties<'_>) -> T; +} diff --git a/datadog-opentelemetry/src/span_processor.rs b/datadog-opentelemetry/src/span_processor.rs index df5f2b02..e594f8ac 100644 --- a/datadog-opentelemetry/src/span_processor.rs +++ b/datadog-opentelemetry/src/span_processor.rs @@ -1,6 +1,8 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +//! Datadog OTel SpanProcessor + use hashbrown::{hash_map, HashMap as BHashMap}; use std::{ collections::HashMap, @@ -69,7 +71,7 @@ struct InnerTraceRegistry { config: Arc, } -pub enum RegisterTracePropagationResult { +pub(crate) enum RegisterTracePropagationResult { Existing(SamplingDecision), New, } @@ -262,7 +264,7 @@ struct CachePadded(T); /// - The finished spans of the trace /// - The number of open spans in the trace /// - The sampling decision of the trace -pub(crate) struct TraceRegistry { +pub struct TraceRegistry { // Example: // inner: Arc<[CacheAligned>; N]>; // to access a trace we do inner[hash(trace_id) % N].read() @@ -271,6 +273,10 @@ pub(crate) struct TraceRegistry { } impl TraceRegistry { + /// Creates a new trace registry. + /// + /// The registry uses sharding to minimize lock contention when multiple threads + /// are creating and finishing spans concurrently. pub fn new(config: Arc) -> Self { Self { inner: Arc::new(std::array::from_fn(|_| { @@ -297,6 +303,7 @@ impl TraceRegistry { /// /// If the trace is already registered with a non None sampling decision, /// it will return the existing sampling decision instead + #[allow(private_interfaces)] pub fn register_local_root_trace_propagation_data( &self, trace_id: [u8; 16], @@ -321,6 +328,7 @@ impl TraceRegistry { } /// Register a new span with the given trace ID and span ID. + #[allow(private_interfaces)] pub fn register_span( &self, trace_id: [u8; 16], @@ -345,6 +353,10 @@ impl TraceRegistry { inner.finish_span(trace_id, span_data) } + /// Retrieves the trace propagation data for a given trace ID. + /// + /// Returns the sampling decision, origin, and internal tags associated with the trace. + #[allow(private_interfaces)] pub fn get_trace_propagation_data(&self, trace_id: [u8; 16]) -> TracePropagationData { let inner = self .get_shard(trace_id) @@ -354,6 +366,10 @@ impl TraceRegistry { inner.get_trace_propagation_data(trace_id).clone() } + /// Aggregates and returns metrics from all registry shards. + /// + /// Collects counters for spans created/finished, trace segments, and partial flushes + /// across all shards in the registry. pub fn get_metrics(&self) -> TraceRegistryMetrics { let mut stats = TraceRegistryMetrics::default(); for shard_idx in 0..TRACE_REGISTRY_SHARDS { @@ -369,12 +385,21 @@ impl TraceRegistry { } } +/// Metrics collected by the trace registry. +/// +/// Tracks the lifecycle of spans and traces through the registry, useful for +/// monitoring and debugging trace collection behavior. #[derive(Default, Debug)] pub struct TraceRegistryMetrics { + /// Number of spans created and registered in the registry. pub spans_created: usize, + /// Number of spans that have finished processing. pub spans_finished: usize, + /// Number of trace segments created (complete or partial traces). pub trace_segments_created: usize, + /// Number of trace segments closed and sent to the exporter. pub trace_segments_closed: usize, + /// Number of times traces were partially flushed before completion. pub trace_partial_flush_count: usize, }