diff --git a/src/sentry/search/eap/constants.py b/src/sentry/search/eap/constants.py index 0e558ebd1b3712..8f43e175cd584e 100644 --- a/src/sentry/search/eap/constants.py +++ b/src/sentry/search/eap/constants.py @@ -20,6 +20,7 @@ SupportedTraceItemType.PREPROD: TraceItemType.TRACE_ITEM_TYPE_PREPROD, SupportedTraceItemType.ATTACHMENTS: TraceItemType.TRACE_ITEM_TYPE_ATTACHMENT, SupportedTraceItemType.PROCESSING_ERRORS: TraceItemType.TRACE_ITEM_TYPE_PROCESSING_ERROR, + SupportedTraceItemType.OCCURRENCES: TraceItemType.TRACE_ITEM_TYPE_OCCURRENCE, } SUPPORTED_STATS_TYPES = {"attributeDistributions"} diff --git a/src/sentry/search/eap/occurrences/attributes.py b/src/sentry/search/eap/occurrences/attributes.py index e17cbee68ff4fd..27131eb950fe63 100644 --- a/src/sentry/search/eap/occurrences/attributes.py +++ b/src/sentry/search/eap/occurrences/attributes.py @@ -1,3 +1,5 @@ +from typing import Literal + from sentry.search.eap import constants from sentry.search.eap.columns import ( ResolvedAttribute, @@ -294,3 +296,76 @@ OCCURRENCE_VIRTUAL_CONTEXTS = { **project_virtual_contexts(), } + +OCCURRENCE_INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS: dict[ + Literal["string", "number", "boolean"], dict[str, str] +] = { + "string": { + definition.internal_name: definition.public_alias + for definition in OCCURRENCE_ATTRIBUTE_DEFINITIONS.values() + if not definition.secondary_alias and definition.search_type == "string" + } + | { + # sentry.service is the project id as a string, but map to project for convenience + "sentry.service": "project", + }, + "boolean": { + definition.internal_name: definition.public_alias + for definition in OCCURRENCE_ATTRIBUTE_DEFINITIONS.values() + if not definition.secondary_alias and definition.search_type == "boolean" + }, + "number": { + definition.internal_name: definition.public_alias + for definition in OCCURRENCE_ATTRIBUTE_DEFINITIONS.values() + # Include boolean attributes because they're stored as numbers (0 or 1) + if not definition.secondary_alias and definition.search_type != "string" + }, +} + +OCCURRENCE_PRIVATE_ATTRIBUTES: set[str] = { + definition.internal_name + for definition in OCCURRENCE_ATTRIBUTE_DEFINITIONS.values() + if definition.private +} + +# For dynamic internal attributes (eg. meta information for attributes) we match by the beginning of the key. +OCCURRENCE_PRIVATE_ATTRIBUTE_PREFIXES: set[str] = {constants.META_PREFIX} + +OCCURRENCE_REPLACEMENT_ATTRIBUTES: set[str] = { + definition.replacement + for definition in OCCURRENCE_ATTRIBUTE_DEFINITIONS.values() + if definition.replacement +} + +OCCURRENCE_REPLACEMENT_MAP: dict[str, str] = { + definition.public_alias: definition.replacement + for definition in OCCURRENCE_ATTRIBUTE_DEFINITIONS.values() + if definition.replacement +} + +OCCURRENCE_INTERNAL_TO_SECONDARY_ALIASES_MAPPING: dict[str, set[str]] = {} + +for definition in OCCURRENCE_ATTRIBUTE_DEFINITIONS.values(): + if not definition.secondary_alias: + continue + + secondary_aliases = OCCURRENCE_INTERNAL_TO_SECONDARY_ALIASES_MAPPING.get( + definition.internal_name, set() + ) + secondary_aliases.add(definition.public_alias) + OCCURRENCE_INTERNAL_TO_SECONDARY_ALIASES_MAPPING[definition.internal_name] = secondary_aliases + +# Attributes excluded from stats queries (e.g., attribute distributions) +# These are typically system-level identifiers that don't provide useful distribution insights +OCCURRENCE_STATS_EXCLUDED_ATTRIBUTES_PUBLIC_ALIAS: set[str] = { + "id", + "trace", + "span_id", + "group_id", + "issue_occurrence_id", + "primary_hash", + "fingerprint", + "resource_id", + "profile_id", + "replay_id", +} diff --git a/src/sentry/search/eap/utils.py b/src/sentry/search/eap/utils.py index db0192406ed096..e89ec30e37d888 100644 --- a/src/sentry/search/eap/utils.py +++ b/src/sentry/search/eap/utils.py @@ -6,6 +6,16 @@ from sentry.search.eap.columns import ColumnDefinitions, ResolvedAttribute from sentry.search.eap.constants import SENTRY_INTERNAL_PREFIXES +from sentry.search.eap.occurrences.attributes import ( + OCCURRENCE_ATTRIBUTE_DEFINITIONS, + OCCURRENCE_INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS, + OCCURRENCE_INTERNAL_TO_SECONDARY_ALIASES_MAPPING, + OCCURRENCE_PRIVATE_ATTRIBUTE_PREFIXES, + OCCURRENCE_PRIVATE_ATTRIBUTES, + OCCURRENCE_REPLACEMENT_ATTRIBUTES, + OCCURRENCE_REPLACEMENT_MAP, +) +from sentry.search.eap.occurrences.definitions import OCCURRENCE_DEFINITIONS from sentry.search.eap.ourlogs.attributes import ( LOGS_INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS, LOGS_INTERNAL_TO_SECONDARY_ALIASES_MAPPING, @@ -73,6 +83,7 @@ def add_start_end_conditions( SupportedTraceItemType.TRACEMETRICS: TRACE_METRICS_INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS, SupportedTraceItemType.PROFILE_FUNCTIONS: PROFILE_FUNCTIONS_INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS, SupportedTraceItemType.PREPROD: PREPROD_SIZE_INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS, + SupportedTraceItemType.OCCURRENCES: OCCURRENCE_INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS, } PUBLIC_ALIAS_TO_INTERNAL_MAPPING: dict[SupportedTraceItemType, dict[str, ResolvedAttribute]] = { @@ -80,6 +91,7 @@ def add_start_end_conditions( SupportedTraceItemType.LOGS: OURLOG_ATTRIBUTE_DEFINITIONS, SupportedTraceItemType.TRACEMETRICS: TRACE_METRICS_ATTRIBUTE_DEFINITIONS, SupportedTraceItemType.PROFILE_FUNCTIONS: PROFILE_FUNCTIONS_ATTRIBUTE_DEFINITIONS, + SupportedTraceItemType.OCCURRENCES: OCCURRENCE_ATTRIBUTE_DEFINITIONS, } @@ -88,6 +100,7 @@ def add_start_end_conditions( SupportedTraceItemType.LOGS: LOGS_PRIVATE_ATTRIBUTES, SupportedTraceItemType.TRACEMETRICS: TRACE_METRICS_PRIVATE_ATTRIBUTES, SupportedTraceItemType.PROFILE_FUNCTIONS: PROFILE_FUNCTIONS_PRIVATE_ATTRIBUTES, + SupportedTraceItemType.OCCURRENCES: OCCURRENCE_PRIVATE_ATTRIBUTES, } PRIVATE_ATTRIBUTE_PREFIXES: dict[SupportedTraceItemType, set[str]] = { @@ -95,6 +108,7 @@ def add_start_end_conditions( SupportedTraceItemType.LOGS: LOGS_PRIVATE_ATTRIBUTE_PREFIXES, SupportedTraceItemType.TRACEMETRICS: TRACE_METRICS_PRIVATE_ATTRIBUTE_PREFIXES, SupportedTraceItemType.PROFILE_FUNCTIONS: PROFILE_FUNCTIONS_PRIVATE_ATTRIBUTE_PREFIXES, + SupportedTraceItemType.OCCURRENCES: OCCURRENCE_PRIVATE_ATTRIBUTE_PREFIXES, } SENTRY_CONVENTIONS_REPLACEMENT_ATTRIBUTES: dict[SupportedTraceItemType, set[str]] = { @@ -102,6 +116,7 @@ def add_start_end_conditions( SupportedTraceItemType.LOGS: LOGS_REPLACEMENT_ATTRIBUTES, SupportedTraceItemType.TRACEMETRICS: TRACE_METRICS_REPLACEMENT_ATTRIBUTES, SupportedTraceItemType.PROFILE_FUNCTIONS: PROFILE_FUNCTIONS_REPLACEMENT_ATTRIBUTES, + SupportedTraceItemType.OCCURRENCES: OCCURRENCE_REPLACEMENT_ATTRIBUTES, } SENTRY_CONVENTIONS_REPLACEMENT_MAPPINGS: dict[SupportedTraceItemType, dict[str, str]] = { @@ -109,6 +124,7 @@ def add_start_end_conditions( SupportedTraceItemType.LOGS: LOGS_REPLACEMENT_MAP, SupportedTraceItemType.TRACEMETRICS: TRACE_METRICS_REPLACEMENT_MAP, SupportedTraceItemType.PROFILE_FUNCTIONS: PROFILE_FUNCTIONS_REPLACEMENT_MAP, + SupportedTraceItemType.OCCURRENCES: OCCURRENCE_REPLACEMENT_MAP, } @@ -117,6 +133,7 @@ def add_start_end_conditions( SupportedTraceItemType.LOGS: LOGS_INTERNAL_TO_SECONDARY_ALIASES_MAPPING, SupportedTraceItemType.TRACEMETRICS: TRACE_METRICS_INTERNAL_TO_SECONDARY_ALIASES_MAPPING, SupportedTraceItemType.PROFILE_FUNCTIONS: PROFILE_FUNCTIONS_INTERNAL_TO_SECONDARY_ALIASES_MAPPING, + SupportedTraceItemType.OCCURRENCES: OCCURRENCE_INTERNAL_TO_SECONDARY_ALIASES_MAPPING, } TRACE_ITEM_TYPE_DEFINITIONS: dict[SupportedTraceItemType, ColumnDefinitions] = { @@ -124,6 +141,7 @@ def add_start_end_conditions( SupportedTraceItemType.LOGS: OURLOG_DEFINITIONS, SupportedTraceItemType.TRACEMETRICS: TRACE_METRICS_DEFINITIONS, SupportedTraceItemType.PROFILE_FUNCTIONS: PROFILE_FUNCTIONS_DEFINITIONS, + SupportedTraceItemType.OCCURRENCES: OCCURRENCE_DEFINITIONS, } diff --git a/src/sentry/snuba/occurrences_rpc.py b/src/sentry/snuba/occurrences_rpc.py index 708dea54892f84..2502e8698440f3 100644 --- a/src/sentry/snuba/occurrences_rpc.py +++ b/src/sentry/snuba/occurrences_rpc.py @@ -1,8 +1,14 @@ import logging +from collections import defaultdict from enum import Enum from typing import Any import sentry_sdk +from sentry_protos.snuba.v1.endpoint_trace_item_stats_pb2 import ( + AttributeDistributionsRequest, + StatsType, + TraceItemStatsRequest, +) from sentry_protos.snuba.v1.request_common_pb2 import PageToken from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey from sentry_protos.snuba.v1.trace_item_filter_pb2 import ( @@ -13,12 +19,19 @@ from sentry.models.group import Group from sentry.search.eap.columns import ColumnDefinitions, ResolvedAttribute, ResolvedColumn +from sentry.search.eap.constants import SUPPORTED_STATS_TYPES from sentry.search.eap.occurrences.definitions import OCCURRENCE_DEFINITIONS from sentry.search.eap.resolver import SearchResolver from sentry.search.eap.rpc_utils import and_trace_item_filters -from sentry.search.eap.types import AdditionalQueries, EAPResponse, SearchResolverConfig +from sentry.search.eap.types import ( + AdditionalQueries, + EAPResponse, + SearchResolverConfig, + SupportedTraceItemType, +) from sentry.search.events.types import SAMPLING_MODES, SnubaData, SnubaParams from sentry.snuba import rpc_dataset_common +from sentry.utils import snuba_rpc from sentry.utils.snuba import process_value logger = logging.getLogger(__name__) @@ -253,6 +266,87 @@ def run_grouped_timeseries_query( return results + @classmethod + @sentry_sdk.trace + def run_stats_query( + cls, + *, + params: SnubaParams, + stats_types: set[str], + query_string: str, + referrer: str, + config: SearchResolverConfig, + search_resolver: SearchResolver | None = None, + attributes: list[AttributeKey] | None = None, + max_buckets: int = 75, + skip_translate_internal_to_public_alias: bool = False, + occurrence_category: OccurrenceCategory | None = None, + ) -> list[dict[str, Any]]: + search_resolver = search_resolver or cls.get_resolver(params, config) + stats_filter, _, _ = search_resolver.resolve_query(query_string) + + stats_filter = and_trace_item_filters( + stats_filter, cls._build_category_filter(occurrence_category) + ) + + meta = search_resolver.resolve_meta( + referrer=referrer, + sampling_mode=params.sampling_mode, + ) + stats_request = TraceItemStatsRequest( + filter=stats_filter, + meta=meta, + stats_types=[], + ) + + if not set(stats_types).intersection(SUPPORTED_STATS_TYPES): + return [] + + if "attributeDistributions" in stats_types: + stats_request.stats_types.append( + StatsType( + attribute_distributions=AttributeDistributionsRequest( + max_buckets=max_buckets, + attributes=attributes, + ) + ) + ) + + response = snuba_rpc.trace_item_stats_rpc(stats_request) + stats = [] + + from sentry.search.eap.utils import can_expose_attribute, translate_internal_to_public_alias + + for result in response.results: + if "attributeDistributions" in stats_types and result.HasField( + "attribute_distributions" + ): + attrs: dict[str, list[dict[str, Any]]] = defaultdict(list) + for attribute in result.attribute_distributions.attributes: + if not can_expose_attribute( + attribute.attribute_name, SupportedTraceItemType.OCCURRENCES + ): + continue + + for bucket in attribute.buckets: + if skip_translate_internal_to_public_alias: + attrs[attribute.attribute_name].append( + {"label": bucket.label, "value": bucket.value} + ) + else: + public_alias, _, _ = translate_internal_to_public_alias( + attribute.attribute_name, + "string", + SupportedTraceItemType.OCCURRENCES, + ) + public_alias = public_alias or attribute.attribute_name + attrs[public_alias].append( + {"label": bucket.label, "value": bucket.value} + ) + stats.append({"attribute_distributions": {"data": attrs}}) + + return stats + @classmethod def _fetch_issue_labels( cls, diff --git a/src/sentry/snuba/referrer.py b/src/sentry/snuba/referrer.py index 93bda3ba687119..2ebdce9b578eaf 100644 --- a/src/sentry/snuba/referrer.py +++ b/src/sentry/snuba/referrer.py @@ -582,6 +582,7 @@ class Referrer(StrEnum): API_SPANS_FREQUENCY_STATS_RPC = "api.spans.fields-stats.rpc" API_SPANS_TAG_VALUES_RPC = "api.spans.tags-values.rpc" API_SPANS_TRACE_VIEW = "api.spans.trace-view" + API_OCCURRENCES_FREQUENCY_STATS_RPC = "api.occurrences.fields-stats.rpc" API_TRACE_METRICS_TAG_KEYS_RPC = "api.tracemetrics.tags-keys.rpc" API_TRACE_METRICS_TAG_VALUES_RPC = "api.tracemetrics.tags-values.rpc" diff --git a/tests/sentry/snuba/test_occurrences_rpc.py b/tests/sentry/snuba/test_occurrences_rpc.py index 5ee629be84863b..86a7219b44923e 100644 --- a/tests/sentry/snuba/test_occurrences_rpc.py +++ b/tests/sentry/snuba/test_occurrences_rpc.py @@ -1,4 +1,6 @@ from datetime import datetime, timedelta, timezone +from typing import Any +from uuid import uuid4 import pytest from sentry_protos.snuba.v1.attribute_conditional_aggregation_pb2 import ( @@ -23,9 +25,9 @@ from sentry.search.eap.resolver import SearchResolver from sentry.search.eap.types import SearchResolverConfig from sentry.search.events.types import SnubaParams -from sentry.snuba.occurrences_rpc import Occurrences +from sentry.snuba.occurrences_rpc import OccurrenceCategory, Occurrences from sentry.snuba.rpc_dataset_common import RPCBase -from sentry.testutils.cases import TestCase +from sentry.testutils.cases import OccurrenceTestCase, SnubaTestCase, TestCase class OccurrencesRPCTest(TestCase): @@ -323,3 +325,145 @@ def test_validate_granularity_required_for_grouped_timeseries(self) -> None: referrer="test", config=self.config, ) + + +class OccurrencesStatsRPCTest(TestCase, SnubaTestCase, OccurrenceTestCase): + def setUp(self) -> None: + super().setUp() + self.now = datetime.now(timezone.utc) + + def _query_stats( + self, + query_string: str = "", + stats_types: set[str] | None = None, + attributes: list[AttributeKey] | None = None, + max_buckets: int = 75, + occurrence_category: OccurrenceCategory | None = None, + skip_translate_internal_to_public_alias: bool = False, + ) -> list[dict[str, Any]]: + snuba_params = SnubaParams( + start=self.now - timedelta(hours=1), + end=self.now + timedelta(hours=1), + organization=self.organization, + projects=[self.project], + ) + return Occurrences.run_stats_query( + params=snuba_params, + stats_types=stats_types or {"attributeDistributions"}, + query_string=query_string, + referrer="test.eap_occurrences_stats", + config=SearchResolverConfig(), + attributes=attributes, + max_buckets=max_buckets, + skip_translate_internal_to_public_alias=skip_translate_internal_to_public_alias, + occurrence_category=occurrence_category, + ) + + def test_stats_returns_attribute_distributions(self) -> None: + group = self.create_group(project=self.project) + for level in ["error", "error", "warning"]: + occ = self.create_eap_occurrence( + group_id=group.id, + level=level, + timestamp=self.now - timedelta(minutes=5), + ) + self.store_eap_items([occ]) + + result = self._query_stats() + assert len(result) == 1 + assert "attribute_distributions" in result[0] + data = result[0]["attribute_distributions"]["data"] + assert "level" in data + level_buckets = data["level"] + labels = {bucket["label"] for bucket in level_buckets} + assert "error" in labels + assert "warning" in labels + + def test_stats_with_query_filter(self) -> None: + group = self.create_group(project=self.project) + for level in ["error", "warning"]: + occ = self.create_eap_occurrence( + group_id=group.id, + level=level, + timestamp=self.now - timedelta(minutes=5), + ) + self.store_eap_items([occ]) + + result = self._query_stats(query_string="level:error") + assert len(result) == 1 + data = result[0]["attribute_distributions"]["data"] + # With the filter, only error occurrences are included + assert "level" in data + labels = {bucket["label"] for bucket in data["level"]} + assert "error" in labels + assert "warning" not in labels + + def test_stats_with_specific_attributes(self) -> None: + group = self.create_group(project=self.project) + occ = self.create_eap_occurrence( + group_id=group.id, + level="error", + timestamp=self.now - timedelta(minutes=5), + ) + self.store_eap_items([occ]) + + result = self._query_stats( + attributes=[AttributeKey(name="level", type=AttributeKey.TYPE_STRING)], + ) + assert len(result) == 1 + data = result[0]["attribute_distributions"]["data"] + assert "level" in data + + def test_stats_excludes_private_attributes(self) -> None: + group = self.create_group(project=self.project) + occ = self.create_eap_occurrence( + group_id=group.id, + level="error", + timestamp=self.now - timedelta(minutes=5), + ) + self.store_eap_items([occ]) + + result = self._query_stats() + assert len(result) == 1 + data = result[0]["attribute_distributions"]["data"] + # Private attributes should not appear + assert "sentry.item_type" not in data + assert "sentry.organization_id" not in data + + def test_stats_with_occurrence_category_filter(self) -> None: + group_error = self.create_group(project=self.project) + group_generic = self.create_group(project=self.project) + + # Error events have no issue_occurrence_id + error_occ = self.create_eap_occurrence( + group_id=group_error.id, + level="error", + timestamp=self.now - timedelta(minutes=5), + ) + # Issue platform events have an issue_occurrence_id + generic_occ = self.create_eap_occurrence( + group_id=group_generic.id, + level="warning", + issue_occurrence_id=uuid4().hex, + timestamp=self.now - timedelta(minutes=5), + ) + self.store_eap_items([error_occ, generic_occ]) + + # Filter to only error category + result = self._query_stats(occurrence_category=OccurrenceCategory.ERROR) + assert len(result) == 1 + data = result[0]["attribute_distributions"]["data"] + assert "level" in data + labels = {bucket["label"] for bucket in data["level"]} + assert "error" in labels + assert "warning" not in labels + + def test_stats_unsupported_stats_type(self) -> None: + result = self._query_stats(stats_types={"unsupported"}) + assert result == [] + + def test_stats_empty_results(self) -> None: + result = self._query_stats(query_string="level:nonexistent") + assert len(result) == 1 + data = result[0]["attribute_distributions"]["data"] + assert len(data) == 0