Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 24 additions & 13 deletions src/sentry/incidents/logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,8 @@ class AlertRuleNameAlreadyUsedError(Exception):

# Default values for `SnubaQuery.resolution`, in minutes.
DEFAULT_ALERT_RULE_RESOLUTION = 1
# Comparison alerts query twice (current + comparison window), so we scale
# resolution down to compensate for the increased query load.
DEFAULT_CMP_ALERT_RULE_RESOLUTION_MULTIPLIER = 2
DEFAULT_ALERT_RULE_WINDOW_TO_RESOLUTION = {
30: 2,
Expand All @@ -505,13 +507,25 @@ class AlertRuleNameAlreadyUsedError(Exception):
}


def get_alert_resolution(time_window: int, organization: Organization) -> int:
def get_alert_resolution(time_window: int, organization: Organization) -> timedelta:
"""
Return the Snuba subscription evaluation interval for a given alert time window.

Larger time windows don't need fine-grained resolution, so we map them to
coarser buckets to reduce query load. See DEFAULT_ALERT_RULE_WINDOW_TO_RESOLUTION.

:param time_window: The alert's aggregation window, in minutes.
:param organization: The organization (reserved for future per-org overrides).
:return: The evaluation interval as a timedelta.
"""
index = bisect.bisect_right(SORTED_TIMEWINDOWS, time_window)

if index == 0:
return DEFAULT_ALERT_RULE_RESOLUTION
minutes = DEFAULT_ALERT_RULE_RESOLUTION
else:
minutes = DEFAULT_ALERT_RULE_WINDOW_TO_RESOLUTION[SORTED_TIMEWINDOWS[index - 1]]

return DEFAULT_ALERT_RULE_WINDOW_TO_RESOLUTION[SORTED_TIMEWINDOWS[index - 1]]
return timedelta(minutes=minutes)


class _OwnerKwargs(TypedDict):
Expand Down Expand Up @@ -585,7 +599,7 @@ def create_alert_rule(
raise ResourceDoesNotExist("Your organization does not have access to this feature.")

if detection_type == AlertRuleDetectionType.DYNAMIC:
resolution = time_window
resolution = timedelta(minutes=time_window)
# NOTE: we hardcode seasonality for EA
seasonality = AlertRuleSeasonality.AUTO
if not sensitivity:
Expand Down Expand Up @@ -617,8 +631,7 @@ def create_alert_rule(
raise ValidationError("Comparison delta is not a valid field for this alert type")

if comparison_delta is not None:
# Since comparison alerts make twice as many queries, run the queries less frequently.
resolution = resolution * DEFAULT_CMP_ALERT_RULE_RESOLUTION_MULTIPLIER
resolution *= DEFAULT_CMP_ALERT_RULE_RESOLUTION_MULTIPLIER
comparison_delta = int(timedelta(minutes=comparison_delta).total_seconds())

with transaction.atomic(router.db_for_write(SnubaQuery)):
Expand All @@ -629,7 +642,7 @@ def create_alert_rule(
query=query,
aggregate=aggregate,
time_window=timedelta(minutes=time_window),
resolution=timedelta(minutes=resolution),
resolution=resolution,
environment=environment,
event_types=event_types,
extrapolation_mode=extrapolation_mode,
Expand Down Expand Up @@ -900,11 +913,9 @@ def update_alert_rule(
)

if resolution_comparison_delta is not None:
updated_query_fields["resolution"] = timedelta(
minutes=(resolution * DEFAULT_CMP_ALERT_RULE_RESOLUTION_MULTIPLIER)
)
else:
updated_query_fields["resolution"] = timedelta(minutes=resolution)
resolution *= DEFAULT_CMP_ALERT_RULE_RESOLUTION_MULTIPLIER

updated_query_fields["resolution"] = resolution

if detection_type:
updated_fields["detection_type"] = detection_type
Expand All @@ -917,12 +928,12 @@ def update_alert_rule(
updated_fields["sensitivity"] = None
updated_fields["seasonality"] = None
elif detection_type == AlertRuleDetectionType.DYNAMIC:
# NOTE: we set seasonality for EA
if time_window is not None:
updated_query_fields["resolution"] = timedelta(minutes=time_window)
else:
# snuba_query.time_window is already in seconds
updated_query_fields["resolution"] = timedelta(seconds=snuba_query.time_window)
# NOTE: we set seasonality for EA
updated_fields["seasonality"] = AlertRuleSeasonality.AUTO
updated_fields["comparison_delta"] = None
if (
Expand Down
65 changes: 57 additions & 8 deletions src/sentry/incidents/metric_issue_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@

from sentry import features, quotas
from sentry.constants import ObjectStatus
from sentry.incidents.logic import enable_disable_subscriptions
from sentry.incidents.logic import (
DEFAULT_CMP_ALERT_RULE_RESOLUTION_MULTIPLIER,
enable_disable_subscriptions,
get_alert_resolution,
)
from sentry.incidents.models.alert_rule import AlertRuleDetectionType
from sentry.relay.config.metric_extraction import on_demand_metrics_feature_flags
from sentry.search.eap.trace_metrics.validator import validate_trace_metrics_aggregate
Expand Down Expand Up @@ -245,6 +249,27 @@
f"{extrapolation_mode.name.lower()} extrapolation mode is not supported for new detectors. Allowed modes are: client_and_server_weighted, unknown."
)

def _get_resolution_for_window(
self,
time_window_seconds: int | float,
detection_type: str | None,
comparison_delta: int | float | None,
) -> timedelta:
"""
Compute the appropriate SnubaQuery resolution for a given time window
(in seconds), mirroring the logic in create_alert_rule / update_alert_rule.
"""
organization = self.context["organization"]

if detection_type == AlertRuleDetectionType.DYNAMIC:

Check failure on line 264 in src/sentry/incidents/metric_issue_detector.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

detection_type comparison will never match AlertRuleDetectionType.DYNAMIC

The parameter `detection_type` is typed as `str | None` and receives string values like `"dynamic"` from `config.get("detection_type")`. The comparison at line 264 compares this string against `AlertRuleDetectionType.DYNAMIC`, which is a TextChoices enum, not a string. This comparison will always be False because `"dynamic" == AlertRuleDetectionType.DYNAMIC` evaluates to False. As a result, dynamic detection type alerts will get the wrong resolution calculation (the `else` branch instead of the `if` branch). The correct pattern is shown at line 479: `AlertRuleDetectionType.DYNAMIC.value`.
Comment thread
kcons marked this conversation as resolved.
Comment thread
kcons marked this conversation as resolved.
resolution = timedelta(seconds=time_window_seconds)
else:
resolution = get_alert_resolution(int(time_window_seconds) // 60, organization)
if comparison_delta is not None:
resolution *= DEFAULT_CMP_ALERT_RULE_RESOLUTION_MULTIPLIER

return resolution

def get_quota(self) -> DetectorQuota:
organization = self.context.get("organization")
request = self.context.get("request")
Expand Down Expand Up @@ -295,7 +320,10 @@
return False

def update_data_source(
self, instance: Detector, data_source: SnubaQueryDataSourceType, seer_updated: bool = False
self,
instance: Detector,
data_source: SnubaQueryDataSourceType,
seer_updated: bool = False,
) -> None:
try:
source_instance = DataSource.objects.get(detector=instance)
Expand All @@ -312,7 +340,7 @@
except SnubaQuery.DoesNotExist:
raise serializers.ValidationError("SnubaQuery not found, can't update")

event_types = SnubaQueryEventType.objects.filter(snuba_query_id=snuba_query.id)
event_types = snuba_query.event_types

if self.is_editing_transaction_dataset(snuba_query, data_source):
raise serializers.ValidationError(
Expand Down Expand Up @@ -342,16 +370,23 @@
data_source.get("extrapolation_mode", snuba_query.extrapolation_mode)
)

new_time_window = data_source.get("time_window", snuba_query.time_window)
resolution = self._get_resolution_for_window(
new_time_window,
instance.config.get("detection_type"),
instance.config.get("comparison_delta"),
)

update_snuba_query(
snuba_query=snuba_query,
query_type=data_source.get("query_type", snuba_query.type),
dataset=data_source.get("dataset", snuba_query.dataset),
query_type=data_source.get("query_type", SnubaQuery.Type(snuba_query.type)),
dataset=data_source.get("dataset", Dataset(snuba_query.dataset)),
Comment on lines +382 to +383
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

<3

query=data_source.get("query", snuba_query.query),
aggregate=data_source.get("aggregate", snuba_query.aggregate),
time_window=timedelta(seconds=data_source.get("time_window", snuba_query.time_window)),
resolution=timedelta(seconds=data_source.get("resolution", snuba_query.resolution)),
time_window=timedelta(seconds=new_time_window),
resolution=resolution,
environment=data_source.get("environment", snuba_query.environment),
event_types=data_source.get("event_types", [event_type for event_type in event_types]),
event_types=data_source.get("event_types", event_types),
extrapolation_mode=extrapolation_mode,
)

Expand Down Expand Up @@ -413,6 +448,10 @@

if data_source is not None:
self.update_data_source(instance, data_source, seer_updated)
elif "config" in validated_data:
# Config changed (e.g. detection_type or comparison_delta) without a
# data_source update — recalculate resolution to match the new config.
self.update_data_source(instance, {}, seer_updated)

instance.save()

Expand All @@ -421,10 +460,20 @@

def create(self, validated_data: dict[str, Any]) -> Detector:
if "data_sources" in validated_data:
config = validated_data.get("config", {})
detection_type = config.get("detection_type")
comparison_delta = config.get("comparison_delta")

for validated_data_source in validated_data["data_sources"]:
self._validate_transaction_dataset_deprecation(validated_data_source.get("dataset"))
self._validate_extrapolation_mode(validated_data_source.get("extrapolation_mode"))

time_window = validated_data_source.get("time_window")
if time_window is not None:
validated_data_source["resolution"] = self._get_resolution_for_window(
time_window, detection_type, comparison_delta
)

detector = super().create(validated_data)

if detector.config.get("detection_type") == AlertRuleDetectionType.DYNAMIC.value:
Expand Down
3 changes: 3 additions & 0 deletions src/sentry/snuba/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,10 @@ class Type(Enum):
size=100,
)
aggregate = models.TextField()
# The aggregation window for the query, in seconds.
time_window = models.IntegerField()
# How often the subscription query is evaluated, in seconds. Scaled up for
# larger time windows and comparison alerts to reduce query load.
resolution = models.IntegerField()
extrapolation_mode = models.IntegerField(
choices=ExtrapolationMode.as_choices(),
Expand Down
2 changes: 1 addition & 1 deletion src/sentry/snuba/snuba_query_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ def create_source(self, validated_data: dict[str, Any]) -> QuerySubscription:
query=validated_data["query"],
aggregate=validated_data["aggregate"],
time_window=timedelta(seconds=validated_data["time_window"]),
resolution=timedelta(minutes=1),
resolution=validated_data.get("resolution", timedelta(minutes=1)),
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👀 - should we do anything to see if the existing metric alerts have the correct resolutions?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory, but by my count there are fewer than 87 potentially impacted cases so far in US (and probably far less than that), so I'm not entirely sure it's worth pursuing. This is more of a thing where we want to close the door before we let in the crowds.

environment=validated_data["environment"],
event_types=validated_data["event_types"],
group_by=validated_data.get("group_by"),
Expand Down
2 changes: 1 addition & 1 deletion src/sentry/workflow_engine/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ class DataConditionType(TypedDict):


# TODO - Move this to snuba module
class SnubaQueryDataSourceType(TypedDict):
class SnubaQueryDataSourceType(TypedDict, total=False):
query_type: SnubaQuery.Type
dataset: Dataset
query: str
Expand Down
Loading
Loading