Skip to content

Commit f824fd3

Browse files
constantiniusgeorge-sentry
authored andcommitted
feat(snuba): add a metric in query_trace_data to see what spans report span.status "ok" but have an associated error (#112090)
Emit a metric for cases when we detect a span with status "ok" but errors associated. This queries the additional fields "span.status", "origin", "sdk.version" which we use as tags in the metric. Contributes to https://linear.app/getsentry/issue/TET-2102/detect-spanstatus-inconsistencies-and-report-to-sdk-teams
1 parent 09cf4c5 commit f824fd3

File tree

2 files changed

+66
-1
lines changed

2 files changed

+66
-1
lines changed

src/sentry/snuba/trace.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import Any, Literal, NotRequired, TypedDict
66

77
from sentry.uptime.subscriptions.regions import get_region_config
8+
from sentry.utils import metrics
89
from sentry.utils.concurrent import ContextPropagatingThreadPoolExecutor
910

1011
logger = logging.getLogger(__name__)
@@ -634,6 +635,11 @@ def query_trace_data(
634635
# the thread pool, database connections can hang around as the threads are not cleaned
635636
# up. Because of that, tests can fail during tear down as there are active connections
636637
# to the database preventing a DROP.
638+
metric_attributes = {"span.status", "origin", "sdk.version"}
639+
all_additional_attributes = list({*(additional_attributes or []), *metric_attributes})
640+
# Attributes added only for metric tagging that should not appear in the response
641+
metric_only_attributes = metric_attributes - set(additional_attributes or [])
642+
637643
errors_query = _errors_query(snuba_params, trace_id, error_id)
638644
occurrence_query = _perf_issues_query(snuba_params, trace_id, organization)
639645
uptime_query = _uptime_results_query(snuba_params, trace_id) if include_uptime else None
@@ -650,7 +656,7 @@ def query_trace_data(
650656
params=snuba_params,
651657
referrer=referrer.value,
652658
config=SearchResolverConfig(),
653-
additional_attributes=additional_attributes,
659+
additional_attributes=all_additional_attributes,
654660
)
655661
errors_future = query_thread_pool.submit(
656662
_run_errors_query,
@@ -775,6 +781,18 @@ def query_trace_data(
775781
if span["id"] in id_to_error:
776782
errors = id_to_error.pop(span["id"])
777783
span["errors"].extend(errors)
784+
if span.get("span.status", "") == "ok":
785+
metrics.incr(
786+
"performance.trace.span_with_errors_ok_status",
787+
sample_rate=0.01,
788+
tags={
789+
"sdk_name": span.get("sdk.name", ""),
790+
"sdk_version": span.get("sdk.version", ""),
791+
"origin": span.get("origin", ""),
792+
"project_id": str(span.get("project.id", "")),
793+
"project_slug": span.get("project.slug", ""),
794+
},
795+
)
778796
if span["id"] in id_to_occurrence:
779797
occurrences: list[TraceOccurrenceEvent] = [
780798
{
@@ -803,6 +821,11 @@ def query_trace_data(
803821
snuba_params.end_date.timestamp() - span_max_ts,
804822
)
805823

824+
if metric_only_attributes:
825+
for span in spans_data:
826+
for attr in metric_only_attributes:
827+
span.pop(attr, None)
828+
806829
with sentry_sdk.start_span(op="process.errors_data"):
807830
for errors in id_to_error.values():
808831
result.extend(errors)

tests/snuba/api/endpoints/test_organization_trace.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from sentry.issues.issue_occurrence import IssueOccurrence
1919
from sentry.models.group import Group
2020
from sentry.search.events.types import SnubaParams
21+
from sentry.snuba.spans_rpc import Spans
2122
from sentry.snuba.trace import _run_errors_query_eap
2223
from sentry.testutils.cases import OccurrenceTestCase, SnubaTestCase, UptimeResultEAPTestCase
2324
from sentry.testutils.helpers.datetime import before_now
@@ -414,6 +415,47 @@ def test_with_errors_data_with_overlapping_span_id(self) -> None:
414415
assert error_event_2["event_id"] in [error.event_id, error_2.event_id]
415416
assert error_event_1["event_id"] != error_event_2["event_id"]
416417

418+
@mock.patch("sentry.snuba.trace.metrics")
419+
def test_emits_metric_for_error_on_ok_span(self, mock_metrics) -> None:
420+
self.load_trace()
421+
_, start = self.get_start_end_from_day_ago(123)
422+
root_span_id = self.root_event.data["contexts"]["trace"]["span_id"]
423+
error_data = load_data(
424+
"javascript",
425+
timestamp=start,
426+
)
427+
error_data["contexts"]["trace"] = {
428+
"type": "trace",
429+
"trace_id": self.trace_id,
430+
"span_id": root_span_id,
431+
}
432+
error_data["tags"] = [["transaction", "/transaction/gen1-0"]]
433+
self.store_event(error_data, project_id=self.gen1_project.id)
434+
435+
original_run_trace_query = Spans.run_trace_query
436+
437+
def patched_run_trace_query(**kwargs):
438+
spans = original_run_trace_query(**kwargs)
439+
for span in spans:
440+
if span["id"] == root_span_id:
441+
span["span.status"] = "ok"
442+
return spans
443+
444+
with (
445+
self.feature(self.FEATURES),
446+
mock.patch.object(Spans, "run_trace_query", side_effect=patched_run_trace_query),
447+
):
448+
response = self.client_get(
449+
data={"timestamp": self.day_ago},
450+
)
451+
assert response.status_code == 200, response.content
452+
453+
mock_metrics.incr.assert_any_call(
454+
"performance.trace.span_with_errors_ok_status",
455+
sample_rate=0.01,
456+
tags=mock.ANY,
457+
)
458+
417459
def test_with_performance_issues(self) -> None:
418460
self.load_trace()
419461
with self.feature(self.FEATURES):

0 commit comments

Comments
 (0)