Skip to content
1 change: 1 addition & 0 deletions src/sentry/models/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,7 @@ class Group(Model):
priority_locked_at = models.DateTimeField(null=True)
seer_fixability_score = models.FloatField(null=True)
seer_autofix_last_triggered = models.DateTimeField(null=True)
# This actually represents the last timestamp when the explorer agent completes a step
seer_explorer_autofix_last_triggered = models.DateTimeField(null=True)

objects: ClassVar[GroupManager] = GroupManager(cache_fields=("id",))
Expand Down
3 changes: 0 additions & 3 deletions src/sentry/seer/autofix/autofix_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from enum import StrEnum
from typing import TYPE_CHECKING, Literal

from django.utils import timezone
from pydantic import BaseModel

from sentry.seer.autofix.artifact_schemas import (
Expand Down Expand Up @@ -248,8 +247,6 @@ def trigger_autofix_explorer(
artifact_schema=artifact_schema,
)

group.update(seer_explorer_autofix_last_triggered=timezone.now())

payload = {
"run_id": run_id,
"group_id": group.id,
Expand Down
73 changes: 27 additions & 46 deletions src/sentry/seer/autofix/on_completion_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import logging
from typing import TYPE_CHECKING

from django.utils import timezone

from sentry import features
from sentry.models.group import Group
from sentry.models.organization import Organization
Expand Down Expand Up @@ -76,13 +78,20 @@
)
return

group_id = state.metadata.get("group_id") if state.metadata else None
if group_id is None:
return

group = Group.objects.get(id=group_id, project__organization_id=organization.id)

Check failure on line 85 in src/sentry/seer/autofix/on_completion_hook.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

[TVD-3MR] Unhandled Group.DoesNotExist when group is deleted between fetch_run_status and lookup (additional location)

The refactoring in this hunk removes DoesNotExist handlers from `_maybe_trigger_supergroups_embedding` (removed lines 236-248) and `_maybe_continue_pipeline` (removed lines 310-328), replacing them with a `group` parameter passed from `execute()`. However, the `execute()` method at line 85 calls `Group.objects.get(id=group_id, project__organization_id=organization.id)` without any try/except handler. If the group is deleted between when the run was created and when this completion hook executes, a `Group.DoesNotExist` exception will crash the hook, preventing webhook delivery and pipeline continuation.
Comment thread
sentry-warden[bot] marked this conversation as resolved.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The Group.objects.get() call lacks exception handling. If a group is deleted mid-process, the hook will fail and abort subsequent steps.
Severity: MEDIUM

Suggested Fix

Wrap the Group.objects.get(id=group_id, ...) call in a try/except Group.DoesNotExist block. Inside the except block, log a warning and return gracefully, similar to how the previous version of the code handled this case.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/seer/autofix/on_completion_hook.py#L85

Potential issue: The `Group.objects.get()` call at line 85 is not wrapped in a
`try/except` block. If a group is deleted between when an autofix is triggered and when
the completion webhook fires, a `Group.DoesNotExist` exception will be raised. This
unhandled exception propagates up and causes the entire hook to abort. As a result,
subsequent actions like sending completion webhooks, continuing the autofix pipeline,
and triggering supergroup embeddings will not execute. This is a regression from the
previous implementation, which defensively handled this known scenario.

Did we get this right? 👍 / 👎 to inform future reviews.

Comment thread
cursor[bot] marked this conversation as resolved.
group.update(seer_explorer_autofix_last_triggered=timezone.now())
Comment thread
sentry[bot] marked this conversation as resolved.
Comment thread
cursor[bot] marked this conversation as resolved.

# Send webhook for the completed step
cls._send_step_webhook(organization, run_id, state)
cls._send_step_webhook(organization, run_id, state, group)

cls._maybe_trigger_supergroups_embedding(organization, run_id, state)
cls._maybe_trigger_supergroups_embedding(organization, run_id, state, group)

# Continue the automated pipeline if stopping_point hasn't been reached
cls._maybe_continue_pipeline(organization, run_id, state)
cls._maybe_continue_pipeline(organization, run_id, state, group)

@classmethod
def find_latest_artifact_for_step(cls, state: SeerRunState, key: str) -> Artifact | None:
Expand All @@ -95,19 +104,24 @@
return None

@classmethod
def _send_step_webhook(cls, organization, run_id, state: SeerRunState):
def _send_step_webhook(
cls,
organization: Organization,
run_id: int,
state: SeerRunState,
group: Group,
):
"""
Send webhook for the completed step.

Determines which step just completed and sends the appropriate webhook event.
"""
current_step = cls._get_current_step(state)

webhook_payload = {"run_id": run_id}

group_id = state.metadata.get("group_id") if state.metadata else None
if group_id is not None:
webhook_payload["group_id"] = group_id
webhook_payload = {
"run_id": run_id,
"group_id": group.id,
}

# Iterate through blocks in reverse order (most recent first)
# to find which step just completed
Expand Down Expand Up @@ -213,25 +227,13 @@
organization: Organization,
run_id: int,
state: SeerRunState,
group: Group,

Check failure on line 230 in src/sentry/seer/autofix/on_completion_hook.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

Unhandled Group.DoesNotExist when group is deleted between fetch_run_status and lookup

The refactoring in this hunk removes DoesNotExist handlers from `_maybe_trigger_supergroups_embedding` (removed lines 236-248) and `_maybe_continue_pipeline` (removed lines 310-328), replacing them with a `group` parameter passed from `execute()`. However, the `execute()` method at line 85 calls `Group.objects.get(id=group_id, project__organization_id=organization.id)` without any try/except handler. If the group is deleted between when the run was created and when this completion hook executes, a `Group.DoesNotExist` exception will crash the hook, preventing webhook delivery and pipeline continuation.
) -> None:
"""Trigger supergroups embedding if feature flag is enabled."""
current_step = cls._get_current_step(state)
if current_step != AutofixStep.ROOT_CAUSE:
return

group_id = state.metadata.get("group_id") if state.metadata else None
if group_id is None:
return

try:
group = Group.objects.get(id=group_id)
except Group.DoesNotExist:
logger.warning(
"autofix.supergroup_embedding.group_not_found",
extra={"group_id": group_id},
)
return

if not features.has("projects:supergroup-embeddings-explorer", group.project):
return

Expand All @@ -242,7 +244,7 @@
try:
trigger_supergroups_embedding(
organization_id=organization.id,
group_id=group_id,
group_id=group.id,
project_id=group.project_id,
artifact_data=root_cause_artifact.data,
)
Expand All @@ -252,7 +254,7 @@
extra={
"run_id": run_id,
"organization_id": organization.id,
"group_id": group_id,
"group_id": group.id,
},
)

Expand Down Expand Up @@ -289,6 +291,7 @@
organization: Organization,
run_id: int,
state: SeerRunState,
group: Group,
) -> None:
"""
Continue to the next step if stopping_point hasn't been reached.
Expand All @@ -307,28 +310,6 @@
return

stopping_point = AutofixStoppingPoint(metadata["stopping_point"])
group_id = metadata.get("group_id")

if not group_id:
logger.warning(
"autofix.on_completion_hook.no_group_id_in_metadata",
extra={"run_id": run_id, "organization_id": organization.id},
)
return

# Get the group
try:
group = Group.objects.get(id=group_id, project__organization=organization)
except Group.DoesNotExist:
logger.warning(
"autofix.on_completion_hook.group_not_found",
extra={
"run_id": run_id,
"organization_id": organization.id,
"group_id": group_id,
},
)
return

if current_step is None:
logger.warning(
Expand Down
9 changes: 8 additions & 1 deletion src/sentry/seer/autofix/trigger.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
)
from sentry.utils.cache import cache

# This timeout should be longer than what we expect the seer agents to take.
# This is because we do not want to trigger another run while the previous
# run is still running.
#
# NOTE: The timeout on the seer job is 5 minutes.
SEER_AUTOMATION_TIMEOUT_SECONDS = 30 * 60

if TYPE_CHECKING:
from sentry.models.group import Group
from sentry.utils.locking.manager import LockManager
Expand Down Expand Up @@ -171,7 +178,7 @@ def get_seat_based_seer_automation_skip_reason(

# Atomically set cache to prevent duplicate dispatches (returns False if key exists)
automation_dispatch_cache_key = f"seer-automation-dispatched:{group.id}"
if not cache.add(automation_dispatch_cache_key, True, timeout=300):
if not cache.add(automation_dispatch_cache_key, True, timeout=SEER_AUTOMATION_TIMEOUT_SECONDS):
return "automation_already_dispatched" # Another process already dispatched automation

# Check if project has connected repositories - requirement for new pricing
Expand Down
24 changes: 0 additions & 24 deletions tests/sentry/seer/autofix/test_autofix_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,30 +354,6 @@ def test_trigger_autofix_explorer_passes_group_id_in_metadata(
call_kwargs = mock_client.start_run.call_args.kwargs
assert call_kwargs["metadata"] == {"group_id": self.group.id, "referrer": "unknown"}

@patch("sentry.seer.autofix.autofix_agent.broadcast_webhooks_for_organization.delay")
@patch("sentry.seer.autofix.autofix_agent.SeerExplorerClient")
def test_trigger_autofix_explorer_updates_explorer_last_triggered(
self, mock_client_class, mock_broadcast
):
"""trigger_autofix_explorer sets seer_explorer_autofix_last_triggered on the group."""
mock_client = MagicMock()
mock_client_class.return_value = mock_client
mock_client.start_run.return_value = 123

assert self.group.seer_explorer_autofix_last_triggered is None

trigger_autofix_explorer(
group=self.group,
step=AutofixStep.ROOT_CAUSE,
referrer=AutofixReferrer.UNKNOWN,
run_id=None,
)

self.group.refresh_from_db()

assert self.group.seer_autofix_last_triggered is None
assert self.group.seer_explorer_autofix_last_triggered is not None


class TestTriggerCodingAgentHandoff(TestCase):
"""Tests for trigger_coding_agent_handoff function."""
Expand Down
36 changes: 17 additions & 19 deletions tests/sentry/seer/autofix/test_autofix_on_completion_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,14 +200,14 @@ def setUp(self):
def test_maybe_continue_pipeline_no_metadata(self, mock_trigger):
"""Does not continue when metadata is missing."""
state = run_state(blocks=[root_cause_memory_block()])
AutofixOnCompletionHook._maybe_continue_pipeline(self.organization, 123, state)
AutofixOnCompletionHook._maybe_continue_pipeline(self.organization, 123, state, self.group)
mock_trigger.assert_not_called()

@patch("sentry.seer.autofix.on_completion_hook.trigger_autofix_explorer")
def test_maybe_continue_pipeline_no_stopping_point_in_metadata(self, mock_trigger):
"""Does not continue when stopping_point is missing from metadata."""
state = run_state(blocks=[root_cause_memory_block()], metadata={"group_id": self.group.id})
AutofixOnCompletionHook._maybe_continue_pipeline(self.organization, 123, state)
AutofixOnCompletionHook._maybe_continue_pipeline(self.organization, 123, state, self.group)
mock_trigger.assert_not_called()

@patch("sentry.seer.autofix.on_completion_hook.trigger_autofix_explorer")
Expand All @@ -220,7 +220,7 @@ def test_maybe_continue_pipeline_at_stopping_point(self, mock_trigger):
"stopping_point": AutofixStoppingPoint.ROOT_CAUSE.value,
},
)
AutofixOnCompletionHook._maybe_continue_pipeline(self.organization, 123, state)
AutofixOnCompletionHook._maybe_continue_pipeline(self.organization, 123, state, self.group)
mock_trigger.assert_not_called()

@patch("sentry.seer.autofix.on_completion_hook.get_project_seer_preferences")
Expand All @@ -237,7 +237,7 @@ def test_maybe_continue_pipeline_continues_to_next_step(self, mock_trigger, mock
"stopping_point": AutofixStoppingPoint.CODE_CHANGES.value,
},
)
AutofixOnCompletionHook._maybe_continue_pipeline(self.organization, 123, state)
AutofixOnCompletionHook._maybe_continue_pipeline(self.organization, 123, state, self.group)
mock_trigger.assert_called_once()
call_kwargs = mock_trigger.call_args.kwargs
assert call_kwargs["group"].id == self.group.id
Expand All @@ -258,7 +258,7 @@ def test_maybe_continue_pipeline_pushes_changes_for_open_pr(self, mock_push_chan
"stopping_point": AutofixStoppingPoint.OPEN_PR.value,
},
)
AutofixOnCompletionHook._maybe_continue_pipeline(self.organization, 123, state)
AutofixOnCompletionHook._maybe_continue_pipeline(self.organization, 123, state, self.group)
mock_push_changes.assert_called_once_with(
self.group,
123,
Expand Down Expand Up @@ -290,6 +290,8 @@ class TestAutofixOnCompletionHookWebhooks(TestCase):
def setUp(self):
super().setUp()
self.organization = self.create_organization()
self.project = self.create_project(organization=self.organization)
self.group = self.create_group(project=self.project)

@patch("sentry.seer.autofix.on_completion_hook.broadcast_webhooks_for_organization.delay")
def test_send_step_webhook_artifact_types(self, mock_broadcast):
Expand Down Expand Up @@ -328,7 +330,7 @@ class TestCaseDict(TypedDict):
for i, test_case in enumerate(test_cases):
mock_broadcast.reset_mock()
state = run_state(blocks=[test_case["block"]])
AutofixOnCompletionHook._send_step_webhook(self.organization, run_id, state)
AutofixOnCompletionHook._send_step_webhook(self.organization, run_id, state, self.group)

mock_broadcast.assert_called_once()
call_kwargs = mock_broadcast.call_args.kwargs
Expand All @@ -352,7 +354,7 @@ def test_send_step_webhook_coding(self, mock_broadcast):
code_changes_memory_block(),
]
)
AutofixOnCompletionHook._send_step_webhook(self.organization, 123, state)
AutofixOnCompletionHook._send_step_webhook(self.organization, 123, state, self.group)

mock_broadcast.assert_called_once()
call_kwargs = mock_broadcast.call_args.kwargs
Expand All @@ -371,7 +373,7 @@ def test_send_step_webhook_no_artifacts_no_webhook(self, mock_broadcast):
artifacts=[],
)
state = run_state(blocks=[block])
AutofixOnCompletionHook._send_step_webhook(self.organization, 123, state)
AutofixOnCompletionHook._send_step_webhook(self.organization, 123, state, self.group)

mock_broadcast.assert_not_called()

Expand All @@ -391,7 +393,9 @@ def test_triggers_embedding_on_root_cause(self, mock_trigger_sg):
"""Triggers supergroups embedding when root cause completes with feature flag enabled."""
block = root_cause_memory_block()
state = run_state(blocks=[block], metadata={"group_id": self.group.id})
AutofixOnCompletionHook._maybe_trigger_supergroups_embedding(self.organization, 123, state)
AutofixOnCompletionHook._maybe_trigger_supergroups_embedding(
self.organization, 123, state, self.group
)

mock_trigger_sg.assert_called_once_with(
organization_id=self.organization.id,
Expand All @@ -407,15 +411,9 @@ def test_skips_embedding_when_flag_disabled(self, mock_trigger_sg):
blocks=[root_cause_memory_block()],
metadata={"group_id": self.group.id},
)
AutofixOnCompletionHook._maybe_trigger_supergroups_embedding(self.organization, 123, state)

mock_trigger_sg.assert_not_called()

@patch("sentry.seer.autofix.on_completion_hook.trigger_supergroups_embedding")
def test_skips_embedding_when_no_group_id(self, mock_trigger_sg):
"""Does not trigger supergroups embedding when group_id is missing from metadata."""
state = run_state(blocks=[root_cause_memory_block()])
AutofixOnCompletionHook._maybe_trigger_supergroups_embedding(self.organization, 123, state)
AutofixOnCompletionHook._maybe_trigger_supergroups_embedding(
self.organization, 123, state, self.group
)

mock_trigger_sg.assert_not_called()

Expand Down Expand Up @@ -559,7 +557,7 @@ def test_maybe_continue_pipeline_triggers_handoff_when_configured(
},
)

AutofixOnCompletionHook._maybe_continue_pipeline(self.organization, 123, state)
AutofixOnCompletionHook._maybe_continue_pipeline(self.organization, 123, state, self.group)

mock_trigger_handoff.assert_called_once()

Expand Down
Loading