fix(deletions): Avoid DoesNotExist crash when FK target is already deleted (#112375)

swartzrock · claude · george-sentry · commit f1318563ad43 · 2026-04-09T14:12:57.000-07:00
DetectorDeletionTask and WorkflowDeletionTask both accessed FK descriptors (instance.workflow_condition_group, instance.when_condition_group) to check whether to include a related DataConditionGroup in child relations. When the referenced row has already been deleted, Django raises DoesNotExist. Because _run_deletion swallows all exceptions in production without re-raising, this caused the deletion task to silently fail and enter an infinite retry loop via reattempt_deletions. Orgs with detectors or workflows in this state can stay stuck in DELETION_IN_PROGRESS indefinitely. Fix by reading the raw FK id field instead of going through the descriptor, which avoids the database lookup and the potential DoesNotExist entirely. Fixes [SENTRY-5M8C](https://sentry.sentry.io/issues/7344636431) Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
diff --git a/src/sentry/deletions/defaults/detector.py b/src/sentry/deletions/defaults/detector.py
@@ -26,9 +26,9 @@ def get_child_relations(self, instance: Detector) -> list[BaseRelation]:
             ):
                 model_relations.append(ModelRelation(DataSource, {"detector": instance.id}))
 
-        if instance.workflow_condition_group:
+        if instance.workflow_condition_group_id:
             model_relations.append(
-                ModelRelation(DataConditionGroup, {"id": instance.workflow_condition_group.id})
+                ModelRelation(DataConditionGroup, {"id": instance.workflow_condition_group_id})
             )
 
         return model_relations
diff --git a/src/sentry/deletions/defaults/workflow.py b/src/sentry/deletions/defaults/workflow.py
@@ -21,9 +21,9 @@ def get_child_relations(self, instance: Workflow) -> list[BaseRelation]:
 
             model_relations.append(ModelRelation(DataConditionGroup, {"id__in": action_filter_ids}))
 
-        if instance.when_condition_group:
+        if instance.when_condition_group_id:
             model_relations.append(
-                ModelRelation(DataConditionGroup, {"id": instance.when_condition_group.id})
+                ModelRelation(DataConditionGroup, {"id": instance.when_condition_group_id})
             )
 
         return model_relations
diff --git a/tests/sentry/deletions/test_detector.py b/tests/sentry/deletions/test_detector.py
@@ -3,7 +3,7 @@
 import pytest
 
 from sentry.constants import ObjectStatus
-from sentry.deletions.tasks.scheduled import run_scheduled_deletions
+from sentry.deletions.tasks.scheduled import reattempt_deletions, run_scheduled_deletions
 from sentry.incidents.grouptype import MetricIssue
 from sentry.snuba.models import QuerySubscription, SnubaQuery
 from sentry.testutils.hybrid_cloud import HybridCloudTestMixin
@@ -125,6 +125,58 @@ def test_data_source_not_deleted(self) -> None:
         assert DataSource.objects.filter(id=self.data_source.id).exists()
         assert DataSourceDetector.objects.filter(id=data_source_detector_2.id).exists()
 
+    def test_dangling_fk_causes_stuck_deletion_loop(self) -> None:
+        """Regression test for the infinite-retry failure mode this fix addresses.
+
+        Before the fix, accessing instance.workflow_condition_group (a descriptor) with
+        a dangling FK raised DataConditionGroup.DoesNotExist inside get_child_relations.
+        In production, _run_deletion swallows the exception and exits cleanly, leaving
+        the deletion record stuck with in_progress=True and the detector undeleted.
+        reattempt_deletions eventually resets in_progress=False, but the next run hits
+        the same error — resulting in an infinite loop.
+        """
+        from datetime import timedelta
+
+        from django.utils import timezone
+
+        from sentry.deletions.models.scheduleddeletion import CellScheduledDeletion
+
+        schedule = self.ScheduledDeletion.schedule(instance=self.detector, days=0)
+
+        with (
+            # Simulate the pre-fix crash: descriptor access raises DoesNotExist for a dangling FK
+            mock.patch(
+                "sentry.deletions.defaults.detector.DetectorDeletionTask.get_child_relations",
+                side_effect=DataConditionGroup.DoesNotExist,
+            ),
+            # Simulate production: _run_deletion swallows exceptions instead of re-raising
+            mock.patch("sentry.deletions.tasks.scheduled.in_test_environment", return_value=False),
+            mock.patch("sentry.deletions.tasks.scheduled.sentry_sdk"),
+        ):
+            # First run: exception is swallowed; detector not deleted, record stuck in_progress
+            with self.tasks():
+                run_scheduled_deletions()
+
+            assert Detector.objects_for_deletion.filter(id=self.detector.id).exists()
+            assert CellScheduledDeletion.objects.filter(id=schedule.id, in_progress=True).exists()
+
+            # reattempt_deletions resets in_progress after 6+ hours — but the root cause remains
+            CellScheduledDeletion.objects.filter(id=schedule.id).update(
+                date_scheduled=timezone.now() - timedelta(hours=7)
+            )
+            with self.tasks():
+                reattempt_deletions()
+
+            schedule.refresh_from_db()
+            assert not schedule.in_progress
+
+            # Second run: same failure — the loop repeats indefinitely
+            with self.tasks():
+                run_scheduled_deletions()
+
+            assert Detector.objects_for_deletion.filter(id=self.detector.id).exists()
+            assert CellScheduledDeletion.objects.filter(id=schedule.id, in_progress=True).exists()
+
     def test_delete_uptime_detector(self) -> None:
         detector = self.create_uptime_detector()
         uptime_sub = get_uptime_subscription(detector)
@@ -176,6 +228,20 @@ def test_delete_uptime_detector_succeeds_when_remove_seat_fails(
         # Verify the error path in DetectorDeletionTask was actually exercised.
         mock_remove_seat_subscriptions.assert_called_once()
 
+    def test_dangling_workflow_condition_group(self) -> None:
+        """Deletion succeeds when workflow_condition_group_id references a deleted DataConditionGroup."""
+        # Simulate a dangling FK — points to a non-existent DataConditionGroup row
+        Detector.objects_for_deletion.filter(id=self.detector.id).update(
+            workflow_condition_group_id=999999
+        )
+
+        self.ScheduledDeletion.schedule(instance=self.detector, days=0)
+
+        with self.tasks():
+            run_scheduled_deletions()
+
+        assert not Detector.objects_for_deletion.filter(id=self.detector.id).exists()
+
     def test_delete_uptime_subscription_without_detector(self) -> None:
         """UptimeSubscription deletion proceeds when the detector no longer exists."""
         detector = self.create_uptime_detector()
diff --git a/tests/sentry/deletions/test_workflow.py b/tests/sentry/deletions/test_workflow.py
@@ -59,6 +59,22 @@ def setUp(self) -> None:
         self.workflow.status = ObjectStatus.PENDING_DELETION
         self.workflow.save()
 
+    def test_dangling_when_condition_group(self) -> None:
+        """Deletion succeeds when when_condition_group_id references a deleted DataConditionGroup."""
+        from sentry.workflow_engine.models import Workflow
+
+        # Simulate a dangling FK — points to a non-existent DataConditionGroup row
+        Workflow.objects_for_deletion.filter(id=self.workflow.id).update(
+            when_condition_group_id=999999
+        )
+
+        self.ScheduledDeletion.schedule(instance=self.workflow, days=0)
+
+        with self.tasks():
+            run_scheduled_deletions()
+
+        assert not Workflow.objects_for_deletion.filter(id=self.workflow.id).exists()
+
     @pytest.mark.parametrize(
         "instance_attr",
         [

Original file line number	Diff line number	Diff line change
`@@ -21,9 +21,9 @@ def get_child_relations(self, instance: Workflow) -> list[BaseRelation]:`
`21`	`21`
`22`	`22`	`model_relations.append(ModelRelation(DataConditionGroup, {"id__in": action_filter_ids}))`
`23`	`23`
`24`		`- if instance.when_condition_group:`
	`24`	`+ if instance.when_condition_group_id:`
`25`	`25`	`model_relations.append(`
`26`		`- ModelRelation(DataConditionGroup, {"id": instance.when_condition_group.id})`
	`26`	`+ ModelRelation(DataConditionGroup, {"id": instance.when_condition_group_id})`
`27`	`27`	`)`
`28`	`28`
`29`	`29`	`return model_relations`