|
7 | 7 | from collections.abc import Callable, Mapping, MutableMapping, Sequence |
8 | 8 | from dataclasses import dataclass |
9 | 9 | from datetime import datetime, timedelta, timezone |
10 | | -from typing import TYPE_CHECKING, Any, Literal, NotRequired, TypedDict, overload |
| 10 | +from typing import TYPE_CHECKING, Any, Literal, TypedDict, overload |
11 | 11 |
|
12 | 12 | import orjson |
13 | 13 | import psycopg2.errors |
|
19 | 19 | from django.db.models import Max, Q |
20 | 20 | from django.db.models.signals import post_save |
21 | 21 | from django.utils.encoding import force_str |
22 | | -from urllib3.connectionpool import HTTPConnectionPool |
23 | | -from urllib3.exceptions import MaxRetryError, TimeoutError |
24 | | -from urllib3.response import BaseHTTPResponse |
25 | 22 | from usageaccountant import UsageUnit |
26 | 23 |
|
27 | 24 | from sentry import ( |
|
60 | 57 | get_grouping_config_dict_for_project, |
61 | 58 | ) |
62 | 59 | from sentry.grouping.enhancer import get_enhancements_version |
63 | | -from sentry.grouping.grouptype import ErrorGroupType |
64 | 60 | from sentry.grouping.ingest.config import is_in_transition, update_or_set_grouping_config_if_needed |
65 | 61 | from sentry.grouping.ingest.hashing import ( |
66 | 62 | find_grouphash_with_group, |
|
91 | 87 | from sentry.issue_detection.performance_problem import PerformanceProblem |
92 | 88 | from sentry.issues.issue_occurrence import IssueOccurrence |
93 | 89 | from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka |
94 | | -from sentry.killswitches import killswitch_matches_context |
95 | 90 | from sentry.lang.native.utils import STORE_CRASH_REPORTS_ALL, convert_crashreport_count |
96 | 91 | from sentry.models.activity import Activity |
97 | 92 | from sentry.models.environment import Environment |
|
114 | 109 | from sentry.models.releaseenvironment import ReleaseEnvironment |
115 | 110 | from sentry.models.releaseprojectenvironment import ReleaseProjectEnvironment |
116 | 111 | from sentry.models.releases.release_project import ReleaseProject |
117 | | -from sentry.net.http import connection_from_url |
118 | 112 | from sentry.plugins.base import plugins |
119 | 113 | from sentry.quotas.base import index_data_category |
120 | 114 | from sentry.receivers.features import record_event_processed |
121 | 115 | from sentry.receivers.onboarding import record_release_received |
122 | 116 | from sentry.reprocessing2 import is_reprocessed_event |
123 | | -from sentry.seer.signed_seer_api import SeerViewerContext, make_signed_seer_api_request |
124 | 117 | from sentry.services.eventstore.processing import event_processing_store |
125 | 118 | from sentry.signals import ( |
126 | 119 | first_event_received, |
|
137 | 130 | from sentry.utils import metrics |
138 | 131 | from sentry.utils.audit import create_system_audit_entry |
139 | 132 | from sentry.utils.cache import cache_key_for_event |
140 | | -from sentry.utils.circuit_breaker import ( |
141 | | - ERROR_COUNT_CACHE_KEY, |
142 | | - CircuitBreakerPassthrough, |
143 | | - circuit_breaker_activated, |
144 | | -) |
145 | 133 | from sentry.utils.dates import to_datetime |
146 | | -from sentry.utils.event import has_event_minified_stack_trace, has_stacktrace, is_handled |
| 134 | +from sentry.utils.event import has_event_minified_stack_trace |
147 | 135 | from sentry.utils.eventuser import EventUser |
148 | 136 | from sentry.utils.metrics import MutableTags |
149 | 137 | from sentry.utils.outcomes import Outcome, OutcomeAggregator, track_outcome |
|
173 | 161 |
|
174 | 162 | HIGH_SEVERITY_THRESHOLD = 0.1 |
175 | 163 |
|
176 | | -SEER_ERROR_COUNT_KEY = ERROR_COUNT_CACHE_KEY("sentry.seer.severity-failures") |
177 | | - |
178 | 164 |
|
179 | 165 | @dataclass |
180 | 166 | class GroupInfo: |
@@ -1569,18 +1555,7 @@ def _create_group( |
1569 | 1555 | # add sdk tag to metadata |
1570 | 1556 | group_data.setdefault("metadata", {}).update(sdk_metadata_from_event(event)) |
1571 | 1557 |
|
1572 | | - # add severity to metadata for alert filtering |
1573 | 1558 | severity: Mapping[str, Any] = {} |
1574 | | - try: |
1575 | | - group_type = group_creation_kwargs.get("type", None) |
1576 | | - severity = _get_severity_metadata_for_group(event, project.id, group_type) |
1577 | | - group_data["metadata"].update(severity) |
1578 | | - except Exception as e: |
1579 | | - logger.exception( |
1580 | | - "Failed to get severity metadata for group", |
1581 | | - repr(e), |
1582 | | - extra={"event_id": event.event_id}, |
1583 | | - ) |
1584 | 1559 |
|
1585 | 1560 | # the kwargs only include priority for non-error issue platform events, which takes precedence. |
1586 | 1561 | priority = group_creation_kwargs.get("priority", None) |
@@ -1958,149 +1933,6 @@ def _process_existing_aggregate( |
1958 | 1933 | return bool(is_regression) |
1959 | 1934 |
|
1960 | 1935 |
|
1961 | | -severity_connection_pool = connection_from_url( |
1962 | | - settings.SEER_GROUPING_URL, |
1963 | | - retries=settings.SEER_SEVERITY_RETRIES, |
1964 | | - timeout=settings.SEER_SEVERITY_TIMEOUT, # Defaults to 300 milliseconds |
1965 | | -) |
1966 | | - |
1967 | | - |
1968 | | -class SeverityScoreRequest(TypedDict): |
1969 | | - message: str |
1970 | | - has_stacktrace: int |
1971 | | - handled: bool | None |
1972 | | - org_id: int |
1973 | | - project_id: int |
1974 | | - trigger_timeout: NotRequired[bool] |
1975 | | - trigger_error: NotRequired[bool] |
1976 | | - |
1977 | | - |
1978 | | -def make_severity_score_request( |
1979 | | - body: SeverityScoreRequest, |
1980 | | - connection_pool: HTTPConnectionPool | None = None, |
1981 | | - timeout: int | float | None = None, |
1982 | | - viewer_context: SeerViewerContext | None = None, |
1983 | | -) -> BaseHTTPResponse: |
1984 | | - payload: SeverityScoreRequest = {**body} |
1985 | | - if options.get("processing.severity-backlog-test.timeout"): |
1986 | | - payload["trigger_timeout"] = True |
1987 | | - if options.get("processing.severity-backlog-test.error"): |
1988 | | - payload["trigger_error"] = True |
1989 | | - return make_signed_seer_api_request( |
1990 | | - connection_pool or severity_connection_pool, |
1991 | | - "/v0/issues/severity-score", |
1992 | | - body=orjson.dumps(payload), |
1993 | | - timeout=timeout, |
1994 | | - viewer_context=viewer_context, |
1995 | | - ) |
1996 | | - |
1997 | | - |
1998 | | -def _get_severity_metadata_for_group( |
1999 | | - event: Event, project_id: int, group_type: int | None |
2000 | | -) -> Mapping[str, Any]: |
2001 | | - """ |
2002 | | - Returns severity metadata for an event if all of the following are true |
2003 | | - - the feature flag is enabled |
2004 | | - - the event platform supports severity |
2005 | | - - the event group type is an error |
2006 | | -
|
2007 | | - Returns {} if conditions aren't met or on exception. |
2008 | | - """ |
2009 | | - from sentry.receivers.rules import PLATFORMS_WITH_PRIORITY_ALERTS |
2010 | | - |
2011 | | - if killswitch_matches_context( |
2012 | | - "issues.severity.skip-seer-requests", {"project_id": event.project_id} |
2013 | | - ): |
2014 | | - logger.warning( |
2015 | | - "get_severity_metadata_for_group.seer_killswitch_enabled", |
2016 | | - extra={"event_id": event.event_id, "project_id": project_id}, |
2017 | | - ) |
2018 | | - metrics.incr("issues.severity.seer_killswitch_enabled") |
2019 | | - return {} |
2020 | | - |
2021 | | - seer_based_priority_enabled = features.has( |
2022 | | - "organizations:seer-based-priority", event.project.organization, actor=None |
2023 | | - ) |
2024 | | - if not seer_based_priority_enabled: |
2025 | | - return {} |
2026 | | - |
2027 | | - feature_enabled = features.has("projects:first-event-severity-calculation", event.project) |
2028 | | - if not feature_enabled: |
2029 | | - return {} |
2030 | | - |
2031 | | - is_supported_platform = ( |
2032 | | - any(event.platform.startswith(platform) for platform in PLATFORMS_WITH_PRIORITY_ALERTS) |
2033 | | - if event.platform |
2034 | | - else False |
2035 | | - ) |
2036 | | - if not is_supported_platform: |
2037 | | - return {} |
2038 | | - |
2039 | | - is_error_group = group_type == ErrorGroupType.type_id if group_type else True |
2040 | | - if not is_error_group: |
2041 | | - return {} |
2042 | | - |
2043 | | - passthrough_data = options.get( |
2044 | | - "issues.severity.seer-circuit-breaker-passthrough-limit", |
2045 | | - CircuitBreakerPassthrough(limit=1, window=10), |
2046 | | - ) |
2047 | | - if circuit_breaker_activated("sentry.seer.severity", passthrough_data=passthrough_data): |
2048 | | - logger.warning( |
2049 | | - "get_severity_metadata_for_group.circuit_breaker_activated", |
2050 | | - extra={"event_id": event.event_id, "project_id": project_id}, |
2051 | | - ) |
2052 | | - return {} |
2053 | | - |
2054 | | - from sentry import ratelimits as ratelimiter |
2055 | | - |
2056 | | - ratelimit = options.get("issues.severity.seer-global-rate-limit") |
2057 | | - # This is temporary until we update the option values to be a dict |
2058 | | - if "limit" not in ratelimit or "window" not in ratelimit: |
2059 | | - return {} |
2060 | | - |
2061 | | - if ratelimiter.backend.is_limited( |
2062 | | - "seer:severity-calculation:global-limit", |
2063 | | - limit=ratelimit["limit"], |
2064 | | - window=ratelimit["window"], |
2065 | | - ): |
2066 | | - logger.warning( |
2067 | | - "get_severity_metadata_for_group.rate_limited_globally", |
2068 | | - extra={"event_id": event.event_id, "project_id": project_id}, |
2069 | | - ) |
2070 | | - metrics.incr("issues.severity.rate_limited_globally") |
2071 | | - return {} |
2072 | | - |
2073 | | - ratelimit = options.get("issues.severity.seer-project-rate-limit") |
2074 | | - # This is temporary until we update the option values to be a dict |
2075 | | - if "limit" not in ratelimit or "window" not in ratelimit: |
2076 | | - return {} |
2077 | | - |
2078 | | - if ratelimiter.backend.is_limited( |
2079 | | - f"seer:severity-calculation:{project_id}", |
2080 | | - limit=ratelimit["limit"], |
2081 | | - window=ratelimit["window"], |
2082 | | - ): |
2083 | | - logger.warning( |
2084 | | - "get_severity_metadata_for_group.rate_limited_for_project", |
2085 | | - extra={"event_id": event.event_id, "project_id": project_id}, |
2086 | | - ) |
2087 | | - metrics.incr("issues.severity.rate_limited_for_project", tags={"project_id": project_id}) |
2088 | | - return {} |
2089 | | - |
2090 | | - try: |
2091 | | - severity, reason = _get_severity_score(event) |
2092 | | - |
2093 | | - return { |
2094 | | - "severity": severity, |
2095 | | - "severity_reason": reason, |
2096 | | - } |
2097 | | - except Exception as e: |
2098 | | - logger.warning("Failed to calculate severity score for group", repr(e)) |
2099 | | - update_severity_error_count() |
2100 | | - metrics.incr("issues.severity.error") |
2101 | | - return {} |
2102 | | - |
2103 | | - |
2104 | 1936 | def _get_priority_for_group(severity: Mapping[str, Any], kwargs: Mapping[str, Any]) -> int: |
2105 | 1937 | """ |
2106 | 1938 | Returns priority for an event based on severity score and log level. |
@@ -2141,105 +1973,6 @@ def _get_priority_for_group(severity: Mapping[str, Any], kwargs: Mapping[str, An |
2141 | 1973 | return PriorityLevel.MEDIUM |
2142 | 1974 |
|
2143 | 1975 |
|
2144 | | -def update_severity_error_count(reset=False) -> None: |
2145 | | - timeout = 60 * 60 # 1 hour |
2146 | | - if reset: |
2147 | | - cache.set(SEER_ERROR_COUNT_KEY, 0, timeout=timeout) |
2148 | | - return |
2149 | | - |
2150 | | - try: |
2151 | | - cache.incr(SEER_ERROR_COUNT_KEY) |
2152 | | - cache.touch(SEER_ERROR_COUNT_KEY, timeout=timeout) |
2153 | | - except ValueError: |
2154 | | - cache.set(SEER_ERROR_COUNT_KEY, 1, timeout=timeout) |
2155 | | - |
2156 | | - |
2157 | | -def _get_severity_score(event: Event) -> tuple[float, str]: |
2158 | | - # Short circuit the severity value if we know the event is fatal or info/debug |
2159 | | - level = str(event.data.get("level", "error")) |
2160 | | - if LOG_LEVELS_MAP[level] == logging.FATAL: |
2161 | | - return 1.0, "log_level_fatal" |
2162 | | - if LOG_LEVELS_MAP[level] <= logging.INFO: |
2163 | | - return 0.0, "log_level_info" |
2164 | | - |
2165 | | - op = "event_manager._get_severity_score" |
2166 | | - logger_data = {"event_id": event.data["event_id"], "op": op} |
2167 | | - severity = 1.0 |
2168 | | - reason = None |
2169 | | - |
2170 | | - event_type = get_event_type(event.data) |
2171 | | - metadata = event_type.get_metadata(event.data) |
2172 | | - |
2173 | | - exception_type = metadata.get("type") |
2174 | | - exception_value = metadata.get("value") |
2175 | | - |
2176 | | - if exception_type: |
2177 | | - title = exception_type |
2178 | | - if exception_value: |
2179 | | - title += f": {exception_value}" |
2180 | | - |
2181 | | - # We truncate the title to 128 characters as any more than that is unlikely to be helpful |
2182 | | - # and would slow down the model. |
2183 | | - title = trim(title, 128) |
2184 | | - else: |
2185 | | - # Fall back to using just the title for events without an exception. |
2186 | | - title = event.title |
2187 | | - |
2188 | | - # If all we have is `<unlabeled event>` (or one of its equally unhelpful friends), bail |
2189 | | - if title in PLACEHOLDER_EVENT_TITLES: |
2190 | | - logger_data.update({"event_type": event_type.key, "title": title}) |
2191 | | - logger.warning( |
2192 | | - "Unable to get severity score because of unusable `message` value '%s'", |
2193 | | - title, |
2194 | | - extra=logger_data, |
2195 | | - ) |
2196 | | - return 0.0, "bad_title" |
2197 | | - |
2198 | | - payload: SeverityScoreRequest = { |
2199 | | - "message": title, |
2200 | | - "has_stacktrace": int(has_stacktrace(event.data)), |
2201 | | - "handled": is_handled(event.data), |
2202 | | - "org_id": event.project.organization_id, |
2203 | | - "project_id": event.project_id, |
2204 | | - } |
2205 | | - |
2206 | | - logger_data["payload"] = payload |
2207 | | - |
2208 | | - with sentry_sdk.start_span(op=op): |
2209 | | - try: |
2210 | | - with metrics.timer(op): |
2211 | | - timeout = options.get( |
2212 | | - "issues.severity.seer-timeout", |
2213 | | - settings.SEER_SEVERITY_TIMEOUT, |
2214 | | - ) |
2215 | | - viewer_context = SeerViewerContext(organization_id=event.project.organization_id) |
2216 | | - response = make_severity_score_request( |
2217 | | - payload, timeout=timeout, viewer_context=viewer_context |
2218 | | - ) |
2219 | | - severity = orjson.loads(response.data).get("severity") |
2220 | | - reason = "ml" |
2221 | | - except MaxRetryError: |
2222 | | - reason = "microservice_max_retry" |
2223 | | - update_severity_error_count() |
2224 | | - metrics.incr("issues.severity.error", tags={"reason": "max_retries"}) |
2225 | | - logger.exception("Seer severity microservice max retries exceeded") |
2226 | | - except TimeoutError: |
2227 | | - reason = "microservice_timeout" |
2228 | | - update_severity_error_count() |
2229 | | - metrics.incr("issues.severity.error", tags={"reason": "timeout"}) |
2230 | | - logger.exception("Seer severity microservice timeout") |
2231 | | - except Exception: |
2232 | | - reason = "microservice_error" |
2233 | | - update_severity_error_count() |
2234 | | - metrics.incr("issues.severity.error", tags={"reason": "unknown"}) |
2235 | | - logger.exception("Seer severity microservice error") |
2236 | | - sentry_sdk.capture_exception() |
2237 | | - else: |
2238 | | - update_severity_error_count(reset=True) |
2239 | | - |
2240 | | - return severity, reason |
2241 | | - |
2242 | | - |
2243 | 1976 | Attachment = CachedAttachment |
2244 | 1977 |
|
2245 | 1978 |
|
|
0 commit comments