From ce30505b87583ffd3dbbf632b984b1d740de6c45 Mon Sep 17 00:00:00 2001 From: Kyle Consalus Date: Thu, 26 Mar 2026 15:20:40 -0700 Subject: [PATCH 1/3] perf(workflows): Avoid ~0.6s regression in backported OrganizationCombinedRuleIndexEndpoint.get --- .../organization_alert_rule_index.py | 59 +++++++++++-------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/src/sentry/incidents/endpoints/organization_alert_rule_index.py b/src/sentry/incidents/endpoints/organization_alert_rule_index.py index deba984655a047..6789e30130a097 100644 --- a/src/sentry/incidents/endpoints/organization_alert_rule_index.py +++ b/src/sentry/incidents/endpoints/organization_alert_rule_index.py @@ -3,6 +3,7 @@ from copy import deepcopy from datetime import UTC, datetime +from django.db import connections, router, transaction from django.db.models import ( Case, DateTimeField, @@ -515,33 +516,45 @@ def _get_workflow_engine( ), ) - # Build intermediaries for pagination - intermediaries: list[CombinedQuerysetIntermediary] = [] - def has_type(rule_type: str) -> bool: return not type_filter or rule_type in type_filter - if has_type("alert_rule"): - intermediaries.append(CombinedQuerysetIntermediary(metric_detectors, sort_key)) - if has_type("rule"): - intermediaries.append(CombinedQuerysetIntermediary(issue_workflows, sort_key)) - if has_type("uptime"): - intermediaries.append(CombinedQuerysetIntermediary(uptime_rules, sort_key)) - if has_type("monitor"): - intermediaries.append(CombinedQuerysetIntermediary(crons_rules, sort_key)) + # Disable JIT for the combined paginator queries. + # The planner thinks our metric detector query is going to be very slow because DetectorGroup + # in general has many Groups per Detector, even though for metrics detectors (our case here) it's effectively + # one-to-one. + # It decides to spend ~400ms JITing the query, thinking it is justified due to the bulk of the data, but it is + # wrong. What's worse, we send this query twice, and pay for the JIT twice. + # Disabling it makes this endpoint considerably faster. + # The risk of other regression here should be low; our API endpoint isn't generally doing the sort of bulk + # work that benefits from JIT. + db = router.db_for_write(Detector) + with transaction.atomic(using=db): + with connections[db].cursor() as cursor: + cursor.execute("SET LOCAL jit = off") + + intermediaries: list[CombinedQuerysetIntermediary] = [] + if has_type("alert_rule"): + intermediaries.append(CombinedQuerysetIntermediary(metric_detectors, sort_key)) + if has_type("rule"): + intermediaries.append(CombinedQuerysetIntermediary(issue_workflows, sort_key)) + if has_type("uptime"): + intermediaries.append(CombinedQuerysetIntermediary(uptime_rules, sort_key)) + if has_type("monitor"): + intermediaries.append(CombinedQuerysetIntermediary(crons_rules, sort_key)) - response = self.paginate( - request, - paginator_cls=CombinedQuerysetPaginator, - on_results=lambda x: serialize( - x, request.user, WorkflowEngineCombinedRuleSerializer(expand=expand) - ), - default_per_page=25, - intermediaries=intermediaries, - desc=not is_asc, - cursor_cls=StringCursor if case_insensitive else Cursor, - case_insensitive=case_insensitive, - ) + response = self.paginate( + request, + paginator_cls=CombinedQuerysetPaginator, + on_results=lambda x: serialize( + x, request.user, WorkflowEngineCombinedRuleSerializer(expand=expand) + ), + default_per_page=25, + intermediaries=intermediaries, + desc=not is_asc, + cursor_cls=StringCursor if case_insensitive else Cursor, + case_insensitive=case_insensitive, + ) response[MAX_QUERY_SUBSCRIPTIONS_HEADER] = get_max_metric_alert_subscriptions(organization) return response From 75ca47d21aa161324336270e500dc49a1048844b Mon Sep 17 00:00:00 2001 From: Kyle Consalus Date: Thu, 26 Mar 2026 16:03:47 -0700 Subject: [PATCH 2/3] Make it a context with a reset rather than a transactoin --- .../organization_alert_rule_index.py | 35 +++++++++++++++---- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/src/sentry/incidents/endpoints/organization_alert_rule_index.py b/src/sentry/incidents/endpoints/organization_alert_rule_index.py index 6789e30130a097..b1add7745c0c0b 100644 --- a/src/sentry/incidents/endpoints/organization_alert_rule_index.py +++ b/src/sentry/incidents/endpoints/organization_alert_rule_index.py @@ -1,9 +1,10 @@ import logging from collections.abc import Sequence +from contextlib import contextmanager from copy import deepcopy from datetime import UTC, datetime -from django.db import connections, router, transaction +from django.db import connections, router from django.db.models import ( Case, DateTimeField, @@ -113,6 +114,26 @@ logger = logging.getLogger(__name__) + +@contextmanager +def _postgres_jit_disabled(using: str): + """ + Disable PostgreSQL JIT compilation on the given database connection for the duration of + the block. + + Uses session-level SET/RESET rather than SET LOCAL (which requires a transaction) so that + callers don't need to open a transaction — important when the calling code may issue queries + against multiple databases. + """ + with connections[using].cursor() as cursor: + cursor.execute("SET jit = off") + try: + yield + finally: + with connections[using].cursor() as cursor: + cursor.execute("RESET jit") + + # Sentinel values for incident_status annotation when sorting combined rules # Used to ensure proper sort order for rules without active incidents INCIDENT_STATUS_NONE = -1 # Metric alerts with no active incident @@ -519,7 +540,7 @@ def _get_workflow_engine( def has_type(rule_type: str) -> bool: return not type_filter or rule_type in type_filter - # Disable JIT for the combined paginator queries. + # Disable JIT on the Detector/DetectorGroup database for the combined paginator queries. # The planner thinks our metric detector query is going to be very slow because DetectorGroup # in general has many Groups per Detector, even though for metrics detectors (our case here) it's effectively # one-to-one. @@ -528,11 +549,11 @@ def has_type(rule_type: str) -> bool: # Disabling it makes this endpoint considerably faster. # The risk of other regression here should be low; our API endpoint isn't generally doing the sort of bulk # work that benefits from JIT. - db = router.db_for_write(Detector) - with transaction.atomic(using=db): - with connections[db].cursor() as cursor: - cursor.execute("SET LOCAL jit = off") - + # + # Note: Monitor lives on a different database, so we can't use a transaction here + # (SET LOCAL would require one). Session-level SET + RESET is fine for a request-scoped + # connection. + with _postgres_jit_disabled(using=router.db_for_write(Detector)): intermediaries: list[CombinedQuerysetIntermediary] = [] if has_type("alert_rule"): intermediaries.append(CombinedQuerysetIntermediary(metric_detectors, sort_key)) From 504d533b468b16617cd149ee5804fd8952a4cec5 Mon Sep 17 00:00:00 2001 From: Kyle Consalus Date: Fri, 27 Mar 2026 11:24:25 -0700 Subject: [PATCH 3/3] use in_test_hide_transaction_boundary --- .../organization_alert_rule_index.py | 36 ++++++------------- 1 file changed, 10 insertions(+), 26 deletions(-) diff --git a/src/sentry/incidents/endpoints/organization_alert_rule_index.py b/src/sentry/incidents/endpoints/organization_alert_rule_index.py index b1add7745c0c0b..94577be68a725d 100644 --- a/src/sentry/incidents/endpoints/organization_alert_rule_index.py +++ b/src/sentry/incidents/endpoints/organization_alert_rule_index.py @@ -1,10 +1,9 @@ import logging from collections.abc import Sequence -from contextlib import contextmanager from copy import deepcopy from datetime import UTC, datetime -from django.db import connections, router +from django.db import connections, router, transaction from django.db.models import ( Case, DateTimeField, @@ -45,6 +44,7 @@ from sentry.apidocs.utils import inline_sentry_response_serializer from sentry.constants import ObjectStatus from sentry.db.models.manager.base_query_set import BaseQuerySet +from sentry.db.postgres.transactions import in_test_hide_transaction_boundary from sentry.exceptions import InvalidParams from sentry.incidents.endpoints.bases import OrganizationAlertRuleBaseEndpoint from sentry.incidents.endpoints.serializers.alert_rule import ( @@ -115,25 +115,6 @@ logger = logging.getLogger(__name__) -@contextmanager -def _postgres_jit_disabled(using: str): - """ - Disable PostgreSQL JIT compilation on the given database connection for the duration of - the block. - - Uses session-level SET/RESET rather than SET LOCAL (which requires a transaction) so that - callers don't need to open a transaction — important when the calling code may issue queries - against multiple databases. - """ - with connections[using].cursor() as cursor: - cursor.execute("SET jit = off") - try: - yield - finally: - with connections[using].cursor() as cursor: - cursor.execute("RESET jit") - - # Sentinel values for incident_status annotation when sorting combined rules # Used to ensure proper sort order for rules without active incidents INCIDENT_STATUS_NONE = -1 # Metric alerts with no active incident @@ -549,11 +530,14 @@ def has_type(rule_type: str) -> bool: # Disabling it makes this endpoint considerably faster. # The risk of other regression here should be low; our API endpoint isn't generally doing the sort of bulk # work that benefits from JIT. - # - # Note: Monitor lives on a different database, so we can't use a transaction here - # (SET LOCAL would require one). Session-level SET + RESET is fine for a request-scoped - # connection. - with _postgres_jit_disabled(using=router.db_for_write(Detector)): + # in_test_hide_transaction_boundary is safe here: this transaction is only + # used to scope SET LOCAL, not to guard data mutations. No writes happen + # inside this block, so there's no cross-db atomicity concern to enforce. + db = router.db_for_write(Detector) + with in_test_hide_transaction_boundary(), transaction.atomic(using=db): + with connections[db].cursor() as cursor: + cursor.execute("SET LOCAL jit = off") + intermediaries: list[CombinedQuerysetIntermediary] = [] if has_type("alert_rule"): intermediaries.append(CombinedQuerysetIntermediary(metric_detectors, sort_key))