Skip to content

Commit f131856

Browse files
swartzrockclaude
authored andcommitted
fix(deletions): Avoid DoesNotExist crash when FK target is already deleted (#112375)
DetectorDeletionTask and WorkflowDeletionTask both accessed FK descriptors (instance.workflow_condition_group, instance.when_condition_group) to check whether to include a related DataConditionGroup in child relations. When the referenced row has already been deleted, Django raises DoesNotExist. Because _run_deletion swallows all exceptions in production without re-raising, this caused the deletion task to silently fail and enter an infinite retry loop via reattempt_deletions. Orgs with detectors or workflows in this state can stay stuck in DELETION_IN_PROGRESS indefinitely. Fix by reading the raw FK id field instead of going through the descriptor, which avoids the database lookup and the potential DoesNotExist entirely. Fixes [SENTRY-5M8C](https://sentry.sentry.io/issues/7344636431) Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 4517271 commit f131856

File tree

4 files changed

+87
-5
lines changed

4 files changed

+87
-5
lines changed

src/sentry/deletions/defaults/detector.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ def get_child_relations(self, instance: Detector) -> list[BaseRelation]:
2626
):
2727
model_relations.append(ModelRelation(DataSource, {"detector": instance.id}))
2828

29-
if instance.workflow_condition_group:
29+
if instance.workflow_condition_group_id:
3030
model_relations.append(
31-
ModelRelation(DataConditionGroup, {"id": instance.workflow_condition_group.id})
31+
ModelRelation(DataConditionGroup, {"id": instance.workflow_condition_group_id})
3232
)
3333

3434
return model_relations

src/sentry/deletions/defaults/workflow.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ def get_child_relations(self, instance: Workflow) -> list[BaseRelation]:
2121

2222
model_relations.append(ModelRelation(DataConditionGroup, {"id__in": action_filter_ids}))
2323

24-
if instance.when_condition_group:
24+
if instance.when_condition_group_id:
2525
model_relations.append(
26-
ModelRelation(DataConditionGroup, {"id": instance.when_condition_group.id})
26+
ModelRelation(DataConditionGroup, {"id": instance.when_condition_group_id})
2727
)
2828

2929
return model_relations

tests/sentry/deletions/test_detector.py

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pytest
44

55
from sentry.constants import ObjectStatus
6-
from sentry.deletions.tasks.scheduled import run_scheduled_deletions
6+
from sentry.deletions.tasks.scheduled import reattempt_deletions, run_scheduled_deletions
77
from sentry.incidents.grouptype import MetricIssue
88
from sentry.snuba.models import QuerySubscription, SnubaQuery
99
from sentry.testutils.hybrid_cloud import HybridCloudTestMixin
@@ -125,6 +125,58 @@ def test_data_source_not_deleted(self) -> None:
125125
assert DataSource.objects.filter(id=self.data_source.id).exists()
126126
assert DataSourceDetector.objects.filter(id=data_source_detector_2.id).exists()
127127

128+
def test_dangling_fk_causes_stuck_deletion_loop(self) -> None:
129+
"""Regression test for the infinite-retry failure mode this fix addresses.
130+
131+
Before the fix, accessing instance.workflow_condition_group (a descriptor) with
132+
a dangling FK raised DataConditionGroup.DoesNotExist inside get_child_relations.
133+
In production, _run_deletion swallows the exception and exits cleanly, leaving
134+
the deletion record stuck with in_progress=True and the detector undeleted.
135+
reattempt_deletions eventually resets in_progress=False, but the next run hits
136+
the same error — resulting in an infinite loop.
137+
"""
138+
from datetime import timedelta
139+
140+
from django.utils import timezone
141+
142+
from sentry.deletions.models.scheduleddeletion import CellScheduledDeletion
143+
144+
schedule = self.ScheduledDeletion.schedule(instance=self.detector, days=0)
145+
146+
with (
147+
# Simulate the pre-fix crash: descriptor access raises DoesNotExist for a dangling FK
148+
mock.patch(
149+
"sentry.deletions.defaults.detector.DetectorDeletionTask.get_child_relations",
150+
side_effect=DataConditionGroup.DoesNotExist,
151+
),
152+
# Simulate production: _run_deletion swallows exceptions instead of re-raising
153+
mock.patch("sentry.deletions.tasks.scheduled.in_test_environment", return_value=False),
154+
mock.patch("sentry.deletions.tasks.scheduled.sentry_sdk"),
155+
):
156+
# First run: exception is swallowed; detector not deleted, record stuck in_progress
157+
with self.tasks():
158+
run_scheduled_deletions()
159+
160+
assert Detector.objects_for_deletion.filter(id=self.detector.id).exists()
161+
assert CellScheduledDeletion.objects.filter(id=schedule.id, in_progress=True).exists()
162+
163+
# reattempt_deletions resets in_progress after 6+ hours — but the root cause remains
164+
CellScheduledDeletion.objects.filter(id=schedule.id).update(
165+
date_scheduled=timezone.now() - timedelta(hours=7)
166+
)
167+
with self.tasks():
168+
reattempt_deletions()
169+
170+
schedule.refresh_from_db()
171+
assert not schedule.in_progress
172+
173+
# Second run: same failure — the loop repeats indefinitely
174+
with self.tasks():
175+
run_scheduled_deletions()
176+
177+
assert Detector.objects_for_deletion.filter(id=self.detector.id).exists()
178+
assert CellScheduledDeletion.objects.filter(id=schedule.id, in_progress=True).exists()
179+
128180
def test_delete_uptime_detector(self) -> None:
129181
detector = self.create_uptime_detector()
130182
uptime_sub = get_uptime_subscription(detector)
@@ -176,6 +228,20 @@ def test_delete_uptime_detector_succeeds_when_remove_seat_fails(
176228
# Verify the error path in DetectorDeletionTask was actually exercised.
177229
mock_remove_seat_subscriptions.assert_called_once()
178230

231+
def test_dangling_workflow_condition_group(self) -> None:
232+
"""Deletion succeeds when workflow_condition_group_id references a deleted DataConditionGroup."""
233+
# Simulate a dangling FK — points to a non-existent DataConditionGroup row
234+
Detector.objects_for_deletion.filter(id=self.detector.id).update(
235+
workflow_condition_group_id=999999
236+
)
237+
238+
self.ScheduledDeletion.schedule(instance=self.detector, days=0)
239+
240+
with self.tasks():
241+
run_scheduled_deletions()
242+
243+
assert not Detector.objects_for_deletion.filter(id=self.detector.id).exists()
244+
179245
def test_delete_uptime_subscription_without_detector(self) -> None:
180246
"""UptimeSubscription deletion proceeds when the detector no longer exists."""
181247
detector = self.create_uptime_detector()

tests/sentry/deletions/test_workflow.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,22 @@ def setUp(self) -> None:
5959
self.workflow.status = ObjectStatus.PENDING_DELETION
6060
self.workflow.save()
6161

62+
def test_dangling_when_condition_group(self) -> None:
63+
"""Deletion succeeds when when_condition_group_id references a deleted DataConditionGroup."""
64+
from sentry.workflow_engine.models import Workflow
65+
66+
# Simulate a dangling FK — points to a non-existent DataConditionGroup row
67+
Workflow.objects_for_deletion.filter(id=self.workflow.id).update(
68+
when_condition_group_id=999999
69+
)
70+
71+
self.ScheduledDeletion.schedule(instance=self.workflow, days=0)
72+
73+
with self.tasks():
74+
run_scheduled_deletions()
75+
76+
assert not Workflow.objects_for_deletion.filter(id=self.workflow.id).exists()
77+
6278
@pytest.mark.parametrize(
6379
"instance_attr",
6480
[

0 commit comments

Comments
 (0)