diff --git a/CHANGELOG.md b/CHANGELOG.md index 217da8dc8df..045b3891e26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Emit outcomes for spans trimmed from a transaction. ([#5410](https://github.com/getsentry/relay/pull/5410)) - Support `sample` alias in CSP reports. ([#5554](https://github.com/getsentry/relay/pull/5554)) +- Fix inconsistencies with Insights' expected attributes. ([#5561](https://github.com/getsentry/relay/pull/5561)) **Internal**: diff --git a/relay-conventions/sentry-conventions b/relay-conventions/sentry-conventions index 2c01cdc3720..0f5058dd7d4 160000 --- a/relay-conventions/sentry-conventions +++ b/relay-conventions/sentry-conventions @@ -1 +1 @@ -Subproject commit 2c01cdc37209a8062047bc7e35035fd1380373fc +Subproject commit 0f5058dd7d4c2fda6f980e82a1e37c0b0a4891c3 diff --git a/relay-conventions/src/consts.rs b/relay-conventions/src/consts.rs index 53aea2afa3d..f9e89c9c17a 100644 --- a/relay-conventions/src/consts.rs +++ b/relay-conventions/src/consts.rs @@ -19,6 +19,7 @@ convention_attributes!( CLIENT_SAMPLE_RATE => "sentry.client_sample_rate", DB_QUERY_TEXT => "db.query.text", DB_STATEMENT => "db.statement", + DB_SYSTEM => "db.system", DB_SYSTEM_NAME => "db.system.name", DB_OPERATION_NAME => "db.operation.name", DB_COLLECTION_NAME => "db.collection.name", @@ -74,6 +75,7 @@ convention_attributes!( SENTRY_DOMAIN => "sentry.domain", SENTRY_GROUP => "sentry.group", SENTRY_NORMALIZED_DESCRIPTION => "sentry.normalized_description", + SENTRY_STATUS_CODE => "sentry.status_code", SERVER_ADDRESS => "server.address", SPAN_KIND => "sentry.kind", STATUS_MESSAGE => "sentry.status.message", diff --git a/relay-event-normalization/src/eap/mod.rs b/relay-event-normalization/src/eap/mod.rs index 275dec38297..83c68e50c84 100644 --- a/relay-event-normalization/src/eap/mod.rs +++ b/relay-event-normalization/src/eap/mod.rs @@ -537,19 +537,29 @@ fn normalize_http_attributes( }; // Skip normalization if not an http span. - // This is equivalent to conditionally scrubbing by span category in the V1 pipeline. - if !attributes.contains_key(HTTP_REQUEST_METHOD) - && !attributes.contains_key(LEGACY_HTTP_REQUEST_METHOD) + if attributes + .get_value(SENTRY_CATEGORY) + .is_none_or(|category| category.as_str().unwrap_or_default() != "http") { return; } let op = attributes.get_value(OP).and_then(|v| v.as_str()); + let (description_method, description_url) = match attributes + .get_value(DESCRIPTION) + .and_then(|v| v.as_str()) + .and_then(|description| description.split_once(' ')) + { + Some((method, url)) => (Some(method), Some(url)), + _ => (None, None), + }; + let method = attributes .get_value(HTTP_REQUEST_METHOD) .or_else(|| attributes.get_value(LEGACY_HTTP_REQUEST_METHOD)) - .and_then(|v| v.as_str()); + .and_then(|v| v.as_str()) + .or(description_method); let server_address = attributes .get_value(SERVER_ADDRESS) @@ -558,12 +568,7 @@ fn normalize_http_attributes( let url: Option<&str> = attributes .get_value(URL_FULL) .and_then(|v| v.as_str()) - .or_else(|| { - attributes - .get_value(DESCRIPTION) - .and_then(|v| v.as_str()) - .and_then(|description| description.split_once(' ').map(|(_, url)| url)) - }); + .or(description_url); let url_scheme = attributes.get_value(URL_SCHEME).and_then(|v| v.as_str()); // If the span op is "http.client" and the method and url are present, @@ -615,13 +620,14 @@ pub fn write_legacy_attributes(attributes: &mut Annotated) { // Map of new sentry conventions attributes to legacy SpanV1 attributes let current_to_legacy_attributes = [ // DB attributes + (DB_QUERY_TEXT, DESCRIPTION), (NORMALIZED_DB_QUERY, SENTRY_NORMALIZED_DESCRIPTION), - (NORMALIZED_DB_QUERY_HASH, SENTRY_GROUP), (DB_OPERATION_NAME, SENTRY_ACTION), - (DB_COLLECTION_NAME, SENTRY_DOMAIN), + (DB_SYSTEM_NAME, DB_SYSTEM), // HTTP attributes (SERVER_ADDRESS, SENTRY_DOMAIN), (HTTP_REQUEST_METHOD, SENTRY_ACTION), + (HTTP_RESPONSE_STATUS_CODE, SENTRY_STATUS_CODE), ]; for (current_attribute, legacy_attribute) in current_to_legacy_attributes { @@ -632,6 +638,47 @@ pub fn write_legacy_attributes(attributes: &mut Annotated) { attributes.insert(legacy_attribute, attr.value.clone()); } } + + if !attributes.contains_key(SENTRY_DOMAIN) + && let Some(db_domain) = attributes + .get_value(DB_COLLECTION_NAME) + .and_then(|value| value.as_str()) + .map(|collection_name| collection_name.to_owned()) + { + // sentry.domain must be wrapped in preceding and trailing commas, for old hacky reasons. + if db_domain.starts_with(",") && db_domain.ends_with(",") { + attributes.insert(SENTRY_DOMAIN, db_domain); + } else { + let mut comma_separated_domain = String::with_capacity(db_domain.len() + 2); + if !db_domain.starts_with(",") { + comma_separated_domain.push(','); + } + comma_separated_domain.push_str(&db_domain); + if !db_domain.ends_with(",") { + comma_separated_domain.push(','); + } + attributes.insert(SENTRY_DOMAIN, comma_separated_domain); + } + } + + if let Some(&Value::String(method)) = attributes.get_value(HTTP_REQUEST_METHOD).as_ref() + && let Some(&Value::String(url)) = attributes.get_value(URL_FULL).as_ref() + { + attributes.insert(DESCRIPTION, format!("{method} {url}")) + } + + if !attributes.contains_key(DESCRIPTION) + && let Some(&Value::Array(keys)) = attributes.get_value("cache.key").as_ref() + && let description = keys + .iter() + .flat_map(|k| k.value()) + .flat_map(|k| k.as_str()) + .collect::>() + .join(", ") + && !description.is_empty() + { + attributes.insert(DESCRIPTION, description); + } } #[cfg(test)] @@ -1434,6 +1481,10 @@ mod tests { "type": "string", "value": "http.client" }, + "sentry.category": { + "type": "string", + "value": "http" + }, "http.request.method": { "type": "string", "value": "GET" @@ -1455,6 +1506,10 @@ mod tests { "type": "string", "value": "GET" }, + "sentry.category": { + "type": "string", + "value": "http" + }, "sentry.op": { "type": "string", "value": "http.client" @@ -1476,6 +1531,10 @@ mod tests { let mut attributes = Annotated::::from_json( r#" { + "sentry.category": { + "type": "string", + "value": "http" + }, "sentry.op": { "type": "string", "value": "http.client" @@ -1505,6 +1564,10 @@ mod tests { "type": "string", "value": "GET" }, + "sentry.category": { + "type": "string", + "value": "http" + }, "sentry.op": { "type": "string", "value": "http.client" @@ -1530,6 +1593,10 @@ mod tests { let mut attributes = Annotated::::from_json( r#" { + "sentry.category": { + "type": "string", + "value": "http" + }, "sentry.op": { "type": "string", "value": "http.client" @@ -1558,6 +1625,10 @@ mod tests { "type": "string", "value": "GET" }, + "sentry.category": { + "type": "string", + "value": "http" + }, "sentry.op": { "type": "string", "value": "http.client" @@ -1645,6 +1716,10 @@ mod tests { let mut attributes = Annotated::::from_json( r#" { + "sentry.category": { + "type": "string", + "value": "http" + }, "sentry.op": { "type": "string", "value": "http.client" @@ -1674,6 +1749,10 @@ mod tests { "type": "string", "value": "GET" }, + "sentry.category": { + "type": "string", + "value": "http" + }, "sentry.description": { "type": "string", "value": "GET https://application.www.xn--85x722f.xn--55qx5d.cn" @@ -1748,6 +1827,10 @@ mod tests { "type": "string", "value": "{\"find\": \"documents\", \"foo\": \"bar\"}" }, + "db.system": { + "type": "string", + "value": "mongodb" + }, "db.system.name": { "type": "string", "value": "mongodb" @@ -1756,13 +1839,13 @@ mod tests { "type": "string", "value": "FIND" }, - "sentry.domain": { + "sentry.description": { "type": "string", - "value": "documents" + "value": "{\"find\": \"documents\", \"foo\": \"bar\"}" }, - "sentry.group": { + "sentry.domain": { "type": "string", - "value": "aedc5c7e8cec726b" + "value": ",documents," }, "sentry.normalized_db_query": { "type": "string", @@ -2070,4 +2153,167 @@ mod tests { } "#); } + + #[test] + fn test_write_legacy_attributes_cache_key() { + let mut attributes = Annotated::::from_json( + r#" + { + "cache.key": { + "type": "array", + "value": ["key1", "key2", "key3"] + } + } + "#, + ) + .unwrap(); + + write_legacy_attributes(&mut attributes); + + insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#" + { + "cache.key": { + "type": "array", + "value": [ + "key1", + "key2", + "key3" + ] + }, + "sentry.description": { + "type": "string", + "value": "key1, key2, key3" + } + } + "#); + } + + #[test] + fn test_write_legacy_attributes_cache_key_single() { + let mut attributes = Annotated::::from_json( + r#" + { + "cache.key": { + "type": "array", + "value": ["single_key"] + } + } + "#, + ) + .unwrap(); + + write_legacy_attributes(&mut attributes); + + insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#" + { + "cache.key": { + "type": "array", + "value": [ + "single_key" + ] + }, + "sentry.description": { + "type": "string", + "value": "single_key" + } + } + "#); + } + + #[test] + fn test_write_legacy_attributes_cache_key_empty() { + let mut attributes = Annotated::::from_json( + r#" + { + "cache.key": { + "type": "array", + "value": [] + } + } + "#, + ) + .unwrap(); + + write_legacy_attributes(&mut attributes); + + insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#" + { + "cache.key": { + "type": "array", + "value": [] + } + } + "#); + } + + #[test] + fn test_write_legacy_attributes_cache_key_with_nulls() { + let mut attributes = Annotated::::from_json( + r#" + { + "cache.key": { + "type": "array", + "value": ["key1", null, "key2"] + } + } + "#, + ) + .unwrap(); + + write_legacy_attributes(&mut attributes); + + insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#" + { + "cache.key": { + "type": "array", + "value": [ + "key1", + null, + "key2" + ] + }, + "sentry.description": { + "type": "string", + "value": "key1, key2" + } + } + "#); + } + + #[test] + fn test_write_legacy_attributes_cache_key_does_not_overwrite_description() { + let mut attributes = Annotated::::from_json( + r#" + { + "cache.key": { + "type": "array", + "value": ["key1", "key2"] + }, + "sentry.description": { + "type": "string", + "value": "existing description" + } + } + "#, + ) + .unwrap(); + + write_legacy_attributes(&mut attributes); + + insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#" + { + "cache.key": { + "type": "array", + "value": [ + "key1", + "key2" + ] + }, + "sentry.description": { + "type": "string", + "value": "existing description" + } + } + "#); + } } diff --git a/relay-server/src/processing/spans/process.rs b/relay-server/src/processing/spans/process.rs index a155108047c..b797101a81c 100644 --- a/relay-server/src/processing/spans/process.rs +++ b/relay-server/src/processing/spans/process.rs @@ -271,9 +271,18 @@ fn span_duration(span: &SpanV2) -> Option { #[cfg(test)] mod tests { use chrono::DateTime; + use relay_conventions::{ + DB_QUERY_TEXT, DB_SYSTEM, DB_SYSTEM_NAME, DESCRIPTION, HTTP_REQUEST_METHOD, OP, + SENTRY_ACTION, SENTRY_CATEGORY, SENTRY_DOMAIN, SENTRY_NORMALIZED_DESCRIPTION, URL_FULL, + }; + use relay_event_schema::protocol::{ + AttributeType, AttributeValue, Attributes, EventId, SpanKind, + }; use relay_pii::PiiConfig; - use relay_protocol::SerializableAnnotated; + use relay_protocol::{SerializableAnnotated, Value}; + use crate::Envelope; + use crate::extractors::RequestMeta; use crate::services::projects::project::ProjectInfo; use super::*; @@ -759,4 +768,283 @@ mod tests { }); assert!(r.is_err()); } + + fn prepare_normalize_span_params( + string_attributes: Vec<(&str, &str)>, + float_attributes: Vec<(&str, f64)>, + array_attributes: Vec<(&str, Vec<&str>)>, + ) -> ( + Annotated, + EnvelopeHeaders, + GeoIpLookup, + Context<'static>, + ) { + let mut attributes = Attributes::new(); + string_attributes + .into_iter() + .for_each(|(key, value)| attributes.insert(key, value.to_owned())); + float_attributes + .into_iter() + .for_each(|(key, value)| attributes.insert(key, value)); + array_attributes.into_iter().for_each(|(key, values)| { + let array_value = AttributeValue { + ty: AttributeType::Array.into(), + value: Value::Array( + values + .into_iter() + .map(|v| Value::String(v.to_owned()).into()) + .collect(), + ) + .into(), + }; + attributes.insert(key, array_value); + }); + let attrs_json = + serde_json::to_string(&SerializableAnnotated(&Annotated::new(attributes))).unwrap(); + let span_json = format!( + r#"{{ + "trace_id": "5b8efff798038103d269b633813fc60c", + "span_id": "eee19b7ec3c1b175", + "start_timestamp": 1715000000.0, + "end_timestamp": 1715000001.0, + "name": "test", + "status": "ok", + "attributes": {attrs_json} + }}"# + ); + let span = Annotated::::from_json(&span_json).unwrap(); + + let dsn = "https://a94ae32be2584e0bbd7a4cbb95971fee:@sentry.io/42" + .parse() + .unwrap(); + let meta = RequestMeta::new(dsn); + let envelope = Envelope::from_request(Some(EventId::new()), meta); + let headers = envelope.headers().to_owned(); + + (span, headers, GeoIpLookup::empty(), Context::for_test()) + } + + fn assert_attributes_contains( + span: &Annotated, + string_attributes: Vec<(&str, &str)>, + float_attributes: Vec<(&str, f64)>, + ) { + let attrs = span.value().unwrap().attributes.value().unwrap(); + string_attributes.into_iter().for_each(|(key, value)| { + assert_eq!(attrs.get_value(key).and_then(|v| v.as_str()), Some(value),) + }); + float_attributes.into_iter().for_each(|(key, value)| { + assert_eq!(attrs.get_value(key).and_then(|v| v.as_f64()), Some(value),) + }); + } + + #[test] + fn test_insights_backend_queries_support_modern() { + let (mut span, headers, geo_lookup, ctx) = prepare_normalize_span_params( + vec![ + (DB_SYSTEM_NAME, "postgresql"), + (DB_QUERY_TEXT, "select * from users where id = 1"), + ], + vec![], + vec![], + ); + + normalize_span(&mut span, &headers, &geo_lookup, ctx).unwrap(); + + assert_attributes_contains( + &span, + vec![ + (DESCRIPTION, "select * from users where id = 1"), + ( + SENTRY_NORMALIZED_DESCRIPTION, + "SELECT * FROM users WHERE id = %s", + ), + (SENTRY_CATEGORY, "db"), + (DB_SYSTEM, "postgresql"), + (SENTRY_ACTION, "SELECT"), + (SENTRY_DOMAIN, ",users,"), + ], + vec![], + ); + } + + #[test] + fn test_insights_backend_queries_support_legacy() { + let (mut span, headers, geo_lookup, ctx) = prepare_normalize_span_params( + vec![ + (DB_SYSTEM, "postgresql"), + (DESCRIPTION, "select * from users where id = 1"), + ], + vec![], + vec![], + ); + + normalize_span(&mut span, &headers, &geo_lookup, ctx).unwrap(); + + assert_attributes_contains( + &span, + vec![ + (DESCRIPTION, "select * from users where id = 1"), + ( + SENTRY_NORMALIZED_DESCRIPTION, + "SELECT * FROM users WHERE id = %s", + ), + (SENTRY_CATEGORY, "db"), + (DB_SYSTEM, "postgresql"), + (SENTRY_ACTION, "SELECT"), + (SENTRY_DOMAIN, ",users,"), + ], + vec![], + ); + } + + #[test] + fn test_insights_backend_outbound_api_requests_support_modern() { + let (mut span, headers, geo_lookup, ctx) = prepare_normalize_span_params( + vec![ + ("sentry.kind", SpanKind::Client.as_str()), + (HTTP_REQUEST_METHOD, "GET"), + (URL_FULL, "https://www.example.com/path?param=value"), + ], + vec![("http.response.status_code", 502.)], + vec![], + ); + + normalize_span(&mut span, &headers, &geo_lookup, ctx).unwrap(); + + assert_attributes_contains( + &span, + vec![ + (SENTRY_CATEGORY, "http"), + (OP, "http.client"), + (DESCRIPTION, "GET https://www.example.com/path?param=value"), + (SENTRY_ACTION, "GET"), + (SENTRY_DOMAIN, "*.example.com"), + ], + vec![("sentry.status_code", 502.)], + ); + } + + #[test] + fn test_insights_backend_outbound_api_requests_support_legacy_absolute() { + let (mut span, headers, geo_lookup, ctx) = prepare_normalize_span_params( + vec![ + (OP, "http.client"), + (DESCRIPTION, "GET https://www.example.com/path?param=value"), + ], + vec![("http.response.status_code", 502.)], + vec![], + ); + + normalize_span(&mut span, &headers, &geo_lookup, ctx).unwrap(); + + assert_attributes_contains( + &span, + vec![ + (SENTRY_CATEGORY, "http"), + (OP, "http.client"), + (DESCRIPTION, "GET https://www.example.com/path?param=value"), + (SENTRY_ACTION, "GET"), + (SENTRY_DOMAIN, "*.example.com"), + ], + vec![("sentry.status_code", 502.)], + ); + } + + #[test] + fn test_insights_backend_outbound_api_requests_support_legacy_relative() { + let (mut span, headers, geo_lookup, ctx) = prepare_normalize_span_params( + vec![ + (OP, "http.client"), + (DESCRIPTION, "GET /path?param=value"), + ("server.address", "www.example.com"), + ], + vec![("http.response.status_code", 502.)], + vec![], + ); + + normalize_span(&mut span, &headers, &geo_lookup, ctx).unwrap(); + + assert_attributes_contains( + &span, + vec![ + (SENTRY_CATEGORY, "http"), + (OP, "http.client"), + (DESCRIPTION, "GET /path?param=value"), + (SENTRY_ACTION, "GET"), + (SENTRY_DOMAIN, "*.example.com"), + ], + vec![("sentry.status_code", 502.)], + ); + } + + #[test] + fn test_insights_backend_caches_put() { + let (mut span, headers, geo_lookup, ctx) = prepare_normalize_span_params( + vec![], + vec![("cache.item_size", 1024.)], + vec![("cache.key", vec!["key1", "key2"])], + ); + + normalize_span(&mut span, &headers, &geo_lookup, ctx).unwrap(); + + assert_attributes_contains( + &span, + vec![(OP, "cache.put"), (DESCRIPTION, "key1, key2")], + vec![("cache.item_size", 1024.)], + ); + let cache_key_attribute = span + .value() + .and_then(|v| v.attributes.value()) + .and_then(|span| span.get_value("cache.key")); + insta::assert_debug_snapshot!(cache_key_attribute, @r###" + Some( + Array( + [ + String( + "key1", + ), + String( + "key2", + ), + ], + ), + ) + "###); + } + + #[test] + fn test_insights_backend_caches_get() { + let (mut span, headers, geo_lookup, ctx) = prepare_normalize_span_params( + vec![], + vec![("cache.hit", 1.), ("cache.item_size", 1024.)], + vec![("cache.key", vec!["key1", "key2"])], + ); + + normalize_span(&mut span, &headers, &geo_lookup, ctx).unwrap(); + + assert_attributes_contains( + &span, + vec![(OP, "cache.get"), (DESCRIPTION, "key1, key2")], + vec![("cache.hit", 1.), ("cache.item_size", 1024.)], + ); + let cache_key_attribute = span + .value() + .and_then(|v| v.attributes.value()) + .and_then(|span| span.get_value("cache.key")); + insta::assert_debug_snapshot!(cache_key_attribute, @r###" + Some( + Array( + [ + String( + "key1", + ), + String( + "key2", + ), + ], + ), + ) + "###); + } } diff --git a/relay-spans/src/v2_to_v1.rs b/relay-spans/src/v2_to_v1.rs index cb0f566da1e..5ec17d34a79 100644 --- a/relay-spans/src/v2_to_v1.rs +++ b/relay-spans/src/v2_to_v1.rs @@ -268,6 +268,14 @@ pub fn derive_op_for_v2_span(attributes: &Annotated) -> String { return faas_trigger.to_owned(); } + if attributes.contains_key("cache.key") { + if attributes.contains_key("cache.hit") { + return String::from("cache.get"); + } else { + return String::from("cache.put"); + } + } + op } diff --git a/tests/integration/test_spansv2.py b/tests/integration/test_spansv2.py index 08509cfd0d7..b980d5dc3bb 100644 --- a/tests/integration/test_spansv2.py +++ b/tests/integration/test_spansv2.py @@ -1234,6 +1234,7 @@ def test_spansv2_attribute_normalization( "sentry.browser.version": {"type": "string", "value": "2.32"}, "sentry.category": {"type": "string", "value": "db"}, "sentry.op": {"type": "string", "value": "db"}, + "db.system": {"type": "string", "value": "mysql"}, "db.system.name": {"type": "string", "value": "mysql"}, "db.operation.name": {"type": "string", "value": "SELECT"}, "sentry.action": {"type": "string", "value": "SELECT"}, @@ -1242,7 +1243,11 @@ def test_spansv2_attribute_normalization( "value": "SELECT id FROM users WHERE id = 1 AND name = 'Test'", }, "db.collection.name": {"type": "string", "value": "users"}, - "sentry.domain": {"type": "string", "value": "users"}, + "sentry.description": { + "type": "string", + "value": "SELECT id FROM users WHERE id = 1 AND name = 'Test'", + }, + "sentry.domain": {"type": "string", "value": ",users,"}, "sentry.normalized_db_query": { "type": "string", "value": "SELECT id FROM users WHERE id = %s AND name = %s", @@ -1255,7 +1260,6 @@ def test_spansv2_attribute_normalization( "type": "string", "value": "f79af0ba3d26284c", }, - "sentry.group": {"type": "string", "value": "f79af0ba3d26284c"}, "sentry.observed_timestamp_nanos": { "type": "string", "value": time_within(ts, expect_resolution="ns"), @@ -1272,6 +1276,10 @@ def test_spansv2_attribute_normalization( "sentry.browser.name": {"type": "string", "value": "Python Requests"}, "sentry.browser.version": {"type": "string", "value": "2.32"}, "sentry.category": {"type": "string", "value": "http"}, + "sentry.description": { + "type": "string", + "value": "GET https://www.service.io/users/01234-qwerty/settings/98765-adfghj", + }, "sentry.op": {"type": "string", "value": "http.client"}, "sentry.observed_timestamp_nanos": { "type": "string",