diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c40517406..867361bdc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Add new frame fields for MetricKit flamegraphs. ([#5539](https://github.com/getsentry/relay/pull/5539)) - Apply clock drift correction to logs and trace metrics. ([#5609](https://github.com/getsentry/relay/pull/5609)) +- Trim spans with a new EAP trimming processor. ([#5616](https://github.com/getsentry/relay/pull/5616)) **Internal**: diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index 5697b03a9c..92bb46310b 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -657,6 +657,16 @@ pub struct Limits { max_replay_uncompressed_size: ByteSize, /// The maximum size for a replay recording Kafka message. pub max_replay_message_size: ByteSize, + /// The byte size limit up to which Relay will retain + /// keys of invalid/removed attributes. + /// + /// This is only relevant for EAP items (spans, logs, …). + /// In principle, we want to record all deletions of attributes, + /// but we have to institute some limit to protect our infrastructure + /// against excessive metadata sizes. + /// + /// Defaults to 10KiB. + pub max_removed_attribute_key_size: ByteSize, /// The maximum number of threads to spawn for CPU and web work, each. /// /// The total number of threads spawned will roughly be `2 * max_thread_count`. Defaults to @@ -735,6 +745,7 @@ impl Default for Limits { idle_timeout: None, max_connections: None, tcp_listen_backlog: 1024, + max_removed_attribute_key_size: ByteSize::kibibytes(10), } } } @@ -2469,6 +2480,11 @@ impl Config { self.values.limits.max_concurrent_queries } + /// Returns the maximum combined size of keys of invalid attributes. + pub fn max_removed_attribute_key_size(&self) -> usize { + self.values.limits.max_removed_attribute_key_size.as_bytes() + } + /// The maximum number of seconds a query is allowed to take across retries. pub fn query_timeout(&self) -> Duration { Duration::from_secs(self.values.limits.query_timeout) diff --git a/relay-dynamic-config/src/project.rs b/relay-dynamic-config/src/project.rs index bf1d409d3e..c358d9fdf4 100644 --- a/relay-dynamic-config/src/project.rs +++ b/relay-dynamic-config/src/project.rs @@ -50,6 +50,9 @@ pub struct ProjectConfig { /// Retention settings for different products. #[serde(default, skip_serializing_if = "RetentionsConfig::is_empty")] pub retentions: RetentionsConfig, + /// Trimming settings for different products. + #[serde(default, skip_serializing_if = "TrimmingConfigs::is_empty")] + pub trimming: TrimmingConfigs, /// Usage quotas for this project. #[serde(skip_serializing_if = "Vec::is_empty")] pub quotas: Vec, @@ -159,6 +162,7 @@ impl Default for ProjectConfig { event_retention: None, downsampled_event_retention: None, retentions: Default::default(), + trimming: Default::default(), quotas: Vec::new(), sampling: None, measurements: None, @@ -205,6 +209,8 @@ pub struct LimitedProjectConfig { pub filter_settings: ProjectFiltersConfig, #[serde(skip_serializing_if = "DataScrubbingConfig::is_disabled")] pub datascrubbing_settings: DataScrubbingConfig, + #[serde(skip_serializing_if = "TrimmingConfigs::is_empty")] + pub trimming: TrimmingConfigs, #[serde(skip_serializing_if = "Option::is_none")] pub sampling: Option>, #[serde(skip_serializing_if = "SessionMetricsConfig::is_disabled")] @@ -277,6 +283,30 @@ impl RetentionsConfig { } } +/// Per-category settings for item trimming. +#[derive(Debug, Copy, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct TrimmingConfig { + /// The maximum size in bytes above which an item should be trimmed. + pub max_size: u32, +} + +/// Settings for item trimming. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(rename_all = "camelCase")] +pub struct TrimmingConfigs { + /// Trimming settings for spans. + #[serde(skip_serializing_if = "Option::is_none")] + pub span: Option, +} + +impl TrimmingConfigs { + fn is_empty(&self) -> bool { + let Self { span } = self; + span.is_none() + } +} + fn is_false(value: &bool) -> bool { !*value } diff --git a/relay-event-normalization/src/eap/mod.rs b/relay-event-normalization/src/eap/mod.rs index b3b06698c7..84e2ec7ef5 100644 --- a/relay-event-normalization/src/eap/mod.rs +++ b/relay-event-normalization/src/eap/mod.rs @@ -26,11 +26,11 @@ mod ai; mod size; pub mod time; pub mod trace_metric; -#[allow(unused)] mod trimming; pub use self::ai::normalize_ai; pub use self::size::*; +pub use self::trimming::TrimmingProcessor; /// Infers the sentry.op attribute and inserts it into [`Attributes`] if not already set. pub fn normalize_sentry_op(attributes: &mut Annotated) { diff --git a/relay-event-normalization/src/eap/trimming.rs b/relay-event-normalization/src/eap/trimming.rs index 58bde9d05c..65e4414dc7 100644 --- a/relay-event-normalization/src/eap/trimming.rs +++ b/relay-event-normalization/src/eap/trimming.rs @@ -359,6 +359,12 @@ impl Processor for TrimmingProcessor { return Ok(()); } + // This counts the lengths of all attribute keys regardless of whether + // the attribute itself is valid or invalid. Strictly speaking, this is + // inconsistent with the trimming logic, which only counts keys of valid + // attributes. However, this value is only used to set the `original_value` + // on the attributes collection for documentation purposes, we accept this + // discrepancy for now. In any case this is fine to change. let original_length = size::attributes_size(attributes); // Sort attributes by key + value size so small attributes are more likely to be preserved. diff --git a/relay-event-schema/src/protocol/span_v2.rs b/relay-event-schema/src/protocol/span_v2.rs index 5aa9a028a4..bb77e049a1 100644 --- a/relay-event-schema/src/protocol/span_v2.rs +++ b/relay-event-schema/src/protocol/span_v2.rs @@ -15,6 +15,7 @@ pub struct SpanV2 { pub trace_id: Annotated, /// The ID of the span enclosing this span. + #[metastructure(trim = false)] pub parent_span_id: Annotated, /// The Span ID. @@ -22,30 +23,31 @@ pub struct SpanV2 { pub span_id: Annotated, /// Span type (see `OperationType` docs). - #[metastructure(required = true)] + #[metastructure(required = true, trim = false)] pub name: Annotated, /// The span's status. - #[metastructure(required = true)] + #[metastructure(required = true, trim = false)] pub status: Annotated, /// Whether this span is the root span of a segment. + #[metastructure(trim = false)] pub is_segment: Annotated, /// Timestamp when the span started. - #[metastructure(required = true)] + #[metastructure(required = true, trim = false)] pub start_timestamp: Annotated, /// Timestamp when the span was ended. - #[metastructure(required = true)] + #[metastructure(required = true, trim = false)] pub end_timestamp: Annotated, /// Links from this span to other spans. - #[metastructure(pii = "maybe")] + #[metastructure(pii = "maybe", trim = true)] pub links: Annotated>, /// Arbitrary attributes on a span. - #[metastructure(pii = "true", trim = false)] + #[metastructure(pii = "true", trim = true)] pub attributes: Annotated, /// Additional arbitrary fields for forwards compatibility. @@ -177,11 +179,11 @@ pub struct SpanV2Link { pub sampled: Annotated, /// Span link attributes, similar to span attributes/data. - #[metastructure(pii = "maybe", trim = false)] + #[metastructure(pii = "maybe", trim = true)] pub attributes: Annotated, /// Additional arbitrary fields for forwards compatibility. - #[metastructure(additional_properties, pii = "maybe", trim = false)] + #[metastructure(additional_properties, pii = "maybe")] pub other: Object, } diff --git a/relay-server/src/processing/spans/process.rs b/relay-server/src/processing/spans/process.rs index 49e8395a68..fcd44cf402 100644 --- a/relay-server/src/processing/spans/process.rs +++ b/relay-server/src/processing/spans/process.rs @@ -1,9 +1,7 @@ use std::collections::BTreeMap; use std::time::Duration; -use relay_event_normalization::{ - GeoIpLookup, RequiredMode, SchemaProcessor, TrimmingProcessor, eap, -}; +use relay_event_normalization::{GeoIpLookup, RequiredMode, SchemaProcessor, eap}; use relay_event_schema::processor::{ProcessingState, ValueType, process_value}; use relay_event_schema::protocol::{Span, SpanId, SpanV2}; use relay_protocol::Annotated; @@ -190,7 +188,22 @@ fn normalize_span( eap::write_legacy_attributes(&mut span.attributes); }; - process_value(span, &mut TrimmingProcessor::new(), ProcessingState::root())?; + // Set a max_bytes value on the root state if it's defined in the project config. + // This causes the whole item to be trimmed down to the limit. + let max_bytes = ctx + .project_info + .config() + .trimming + .span + .map(|cfg| cfg.max_size as usize); + let trimming_root = ProcessingState::root_builder().max_bytes(max_bytes).build(); + + process_value( + span, + &mut eap::TrimmingProcessor::new(ctx.config.max_removed_attribute_key_size()), + &trimming_root, + )?; + process_value( span, &mut SchemaProcessor::new().with_required(RequiredMode::DeleteParent), diff --git a/tests/integration/test_spansv2.py b/tests/integration/test_spansv2.py index 2aea511d69..b58ef9aaaf 100644 --- a/tests/integration/test_spansv2.py +++ b/tests/integration/test_spansv2.py @@ -173,6 +173,195 @@ def test_spansv2_basic( ] +def test_spansv2_trimming_basic( + mini_sentry, + relay, + relay_with_processing, + spans_consumer, + metrics_consumer, +): + """ + An adaptation of `test_spansv2_basic` that has a size limit for spans and attributes large enough + to demonstrate that trimming works. + """ + spans_consumer = spans_consumer() + metrics_consumer = metrics_consumer() + + project_id = 42 + project_config = mini_sentry.add_full_project_config(project_id) + project_config["config"].update( + { + "features": [ + "organizations:standalone-span-ingestion", + "projects:span-v2-experimental-processing", + ], + "retentions": {"span": {"standard": 42, "downsampled": 1337}}, + # This is sufficient for all builtin attributes not + # to be trimmed. The span fields that aren't trimmed + # also still count for the size limit. + "trimming": {"span": {"maxSize": 453}}, + } + ) + + config = { + "limits": { + "max_removed_attribute_key_size": 30, + }, + **TEST_CONFIG, + } + + relay = relay(relay_with_processing(options=config), options=config) + + ts = datetime.now(timezone.utc) + envelope = envelope_with_spans( + { + "start_timestamp": ts.timestamp(), + "end_timestamp": ts.timestamp() + 0.5, + "trace_id": "5b8efff798038103d269b633813fc60c", + "span_id": "eee19b7ec3c1b175", + "is_segment": True, + "name": "some op", + "status": "ok", + "attributes": { + "custom.string.attribute": { + "value": "This is actually a pretty long string", + "type": "string", + }, + # This attribute will get trimmed in the middle of the third string. + "custom.array.attribute": { + "value": [ + "A string", + "Another longer string", + "Yet another string", + ], + "type": "array", + }, + "custom.invalid.attribute": {"value": True, "type": "string"}, + # This attribute will be removed because the `max_removed_attribute_key_bytes` (30B) + # is already consumed by the previous invalid attribute + "second.custom.invalid.attribute": {"value": None, "type": "integer"}, + }, + }, + trace_info={ + "trace_id": "5b8efff798038103d269b633813fc60c", + "public_key": project_config["publicKeys"][0]["publicKey"], + "release": "foo@1.0", + "environment": "prod", + "transaction": "/my/fancy/endpoint", + }, + ) + + relay.send_envelope(project_id, envelope) + + span = spans_consumer.get_span() + + assert span == { + "trace_id": "5b8efff798038103d269b633813fc60c", + "span_id": "eee19b7ec3c1b175", + "attributes": { + "custom.array.attribute": { + "type": "array", + "value": ["A string", "Another longer string", "Yet anothe..."], + }, + "custom.string.attribute": { + "type": "string", + "value": "This is actually a pretty long string", + }, + "custom.invalid.attribute": None, + "sentry.browser.name": {"type": "string", "value": "Python Requests"}, + "sentry.browser.version": {"type": "string", "value": "2.32"}, + "sentry.dsc.environment": {"type": "string", "value": "prod"}, + "sentry.dsc.public_key": { + "type": "string", + "value": project_config["publicKeys"][0]["publicKey"], + }, + "sentry.dsc.release": {"type": "string", "value": "foo@1.0"}, + "sentry.dsc.transaction": {"type": "string", "value": "/my/fancy/endpoint"}, + "sentry.dsc.trace_id": { + "type": "string", + "value": "5b8efff798038103d269b633813fc60c", + }, + "sentry.observed_timestamp_nanos": { + "type": "string", + "value": time_within(ts, expect_resolution="ns"), + }, + "sentry.op": {"type": "string", "value": "default"}, + }, + "_meta": { + "attributes": { + "": {"len": 505}, + "custom.array.attribute": { + "value": { + "2": { + "": { + "len": 18, + "rem": [ + [ + "!limit", + "s", + 10, + 13, + ], + ], + }, + }, + }, + }, + "custom.invalid.attribute": { + "": { + "err": ["invalid_data"], + "val": {"type": "string", "value": True}, + } + }, + } + }, + "name": "some op", + "received": time_within(ts), + "start_timestamp": time_is(ts), + "end_timestamp": time_is(ts.timestamp() + 0.5), + "is_segment": True, + "status": "ok", + "retention_days": 42, + "downsampled_retention_days": 1337, + "key_id": 123, + "organization_id": 1, + "project_id": 42, + } + + assert metrics_consumer.get_metrics(n=2, with_headers=False) == [ + { + "name": "c:spans/count_per_root_project@none", + "org_id": 1, + "project_id": 42, + "received_at": time_within(ts, precision="s"), + "retention_days": 90, + "tags": { + "decision": "keep", + "is_segment": "true", + "target_project_id": "42", + "transaction": "/my/fancy/endpoint", + }, + "timestamp": time_within_delta(), + "type": "c", + "value": 1.0, + }, + { + "name": "c:spans/usage@none", + "org_id": 1, + "project_id": 42, + "received_at": time_within(ts, precision="s"), + "retention_days": 90, + "tags": { + "was_transaction": "false", + "is_segment": "true", + }, + "timestamp": time_within_delta(), + "type": "c", + "value": 1.0, + }, + ] + + @pytest.mark.parametrize( "rule_type", ["project", "trace"],