Skip to content

Commit 4fdfd88

Browse files
yuvmenclaude
andcommitted
ref(seer): Batch Snuba queries for event fetching and improve tests
Replace per-group get_latest_event() calls with batched Snuba queries via bulk_snuba_queries for the event fetching phase. Uses a tight timestamp window around each group's last_seen. Also reduces inter-batch delay to 1s, rewrites cursor resumption test to verify only post-cursor groups are processed, and adds exact batch boundary edge case test. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 86e0a24 commit 4fdfd88

File tree

2 files changed

+114
-16
lines changed

2 files changed

+114
-16
lines changed

src/sentry/tasks/seer/backfill_supergroups_lightweight.py

Lines changed: 63 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import logging
2+
from collections.abc import Sequence
23
from datetime import UTC, datetime, timedelta
34

5+
from snuba_sdk import Column, Condition, Direction, Entity, Limit, Op, OrderBy, Query, Request
6+
47
from sentry import features, options
58
from sentry.api.serializers import EventSerializer, serialize
69
from sentry.eventstore import backend as eventstore
@@ -12,16 +15,18 @@
1215
SeerViewerContext,
1316
make_lightweight_rca_cluster_request,
1417
)
18+
from sentry.snuba.dataset import Dataset
1519
from sentry.tasks.base import instrumented_task
1620
from sentry.taskworker.namespaces import seer_tasks
1721
from sentry.types.group import GroupSubStatus
1822
from sentry.utils import metrics
23+
from sentry.utils.snuba import bulk_snuba_queries
1924

2025
logger = logging.getLogger(__name__)
2126

2227
BACKFILL_LAST_SEEN_DAYS = 90
2328
BATCH_SIZE = 50
24-
INTER_BATCH_DELAY_S = 5
29+
INTER_BATCH_DELAY_S = 1
2530

2631

2732
@instrumented_task(
@@ -102,20 +107,7 @@ def backfill_supergroups_lightweight_for_org(
102107
return
103108

104109
# Phase 1: Batch fetch event data
105-
group_event_pairs: list[tuple[Group, dict]] = []
106-
for group in groups:
107-
event = group.get_latest_event()
108-
if not event:
109-
continue
110-
111-
ready_event = eventstore.get_event_by_id(
112-
group.project_id, event.event_id, group_id=group.id
113-
)
114-
if not ready_event:
115-
continue
116-
117-
serialized_event = serialize(ready_event, None, EventSerializer())
118-
group_event_pairs.append((group, serialized_event))
110+
group_event_pairs = _batch_fetch_events(groups, organization_id)
119111

120112
# Phase 2: Send to Seer (per-group for now, bulk-ready)
121113
failure_count = 0
@@ -184,3 +176,59 @@ def backfill_supergroups_lightweight_for_org(
184176
"supergroups_backfill_lightweight.org_completed",
185177
extra={"organization_id": organization_id},
186178
)
179+
180+
181+
def _batch_fetch_events(groups: Sequence[Group], organization_id: int) -> list[tuple[Group, dict]]:
182+
"""
183+
Fetch the latest event for each group using batched Snuba queries,
184+
then serialize each event for sending to Seer.
185+
"""
186+
now = datetime.now(UTC)
187+
timestamp_start = now - timedelta(days=BACKFILL_LAST_SEEN_DAYS)
188+
189+
# Build one Snuba request per group to find the latest event_id
190+
snuba_requests = []
191+
for group in groups:
192+
# Use a tight window around the group's last_seen to minimize scan range,
193+
# falling back to the full backfill window if last_seen is unavailable
194+
group_start = group.last_seen - timedelta(hours=1) if group.last_seen else timestamp_start
195+
snuba_requests.append(
196+
Request(
197+
dataset=Dataset.Events.value,
198+
app_id="supergroups_backfill",
199+
query=Query(
200+
match=Entity(Dataset.Events.value),
201+
select=[Column("event_id"), Column("group_id"), Column("project_id")],
202+
where=[
203+
Condition(Column("project_id"), Op.EQ, group.project_id),
204+
Condition(Column("group_id"), Op.EQ, group.id),
205+
Condition(Column("timestamp"), Op.GTE, group_start),
206+
Condition(Column("timestamp"), Op.LT, now + timedelta(minutes=5)),
207+
],
208+
orderby=[OrderBy(Column("timestamp"), Direction.DESC)],
209+
limit=Limit(1),
210+
),
211+
tenant_ids={"organization_id": organization_id},
212+
)
213+
)
214+
215+
results = bulk_snuba_queries(
216+
snuba_requests, referrer="supergroups_backfill_lightweight.get_latest_events"
217+
)
218+
219+
# Fetch full events from nodestore and serialize
220+
group_event_pairs: list[tuple[Group, dict]] = []
221+
for group, result in zip(groups, results):
222+
rows = result.get("data", [])
223+
if not rows:
224+
continue
225+
226+
event_id = rows[0]["event_id"]
227+
ready_event = eventstore.get_event_by_id(group.project_id, event_id, group_id=group.id)
228+
if not ready_event:
229+
continue
230+
231+
serialized_event = serialize(ready_event, None, EventSerializer())
232+
group_event_pairs.append((group, serialized_event))
233+
234+
return group_event_pairs

tests/sentry/tasks/seer/test_backfill_supergroups_lightweight.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,60 @@ def test_skips_non_error_groups(self, mock_request):
174174
"sentry.tasks.seer.backfill_supergroups_lightweight.make_lightweight_rca_cluster_request"
175175
)
176176
def test_resumes_from_cursor(self, mock_request):
177+
mock_request.return_value = MagicMock(status=200)
178+
179+
event2 = self.store_event(
180+
data={"message": "second error", "level": "error", "fingerprint": ["group2"]},
181+
project_id=self.project.id,
182+
)
183+
assert event2.group is not None
184+
event2.group.substatus = GroupSubStatus.NEW
185+
event2.group.save(update_fields=["substatus"])
186+
187+
# Resume from cursor pointing at the first group — should only process the second
177188
backfill_supergroups_lightweight_for_org(
178189
self.organization.id,
179190
last_project_id=self.project.id,
180191
last_group_id=self.group.id,
181192
)
182193

183-
mock_request.assert_not_called()
194+
mock_request.assert_called_once()
195+
assert mock_request.call_args.args[0]["group_id"] == event2.group.id
196+
197+
@with_feature("organizations:supergroups-lightweight-rca-clustering-write")
198+
@patch(
199+
"sentry.tasks.seer.backfill_supergroups_lightweight.make_lightweight_rca_cluster_request"
200+
)
201+
def test_chains_then_completes_on_exact_batch_boundary(self, mock_request):
202+
mock_request.return_value = MagicMock(status=200)
203+
204+
# Create exactly BATCH_SIZE groups total (setUp already created 1)
205+
for i in range(BATCH_SIZE - 1):
206+
evt = self.store_event(
207+
data={
208+
"message": f"error {i}",
209+
"level": "error",
210+
"fingerprint": [f"boundary-{i}"],
211+
},
212+
project_id=self.project.id,
213+
)
214+
assert evt.group is not None
215+
evt.group.substatus = GroupSubStatus.NEW
216+
evt.group.save(update_fields=["substatus"])
217+
218+
# First call: full batch, should self-chain
219+
with patch(
220+
"sentry.tasks.seer.backfill_supergroups_lightweight.backfill_supergroups_lightweight_for_org.apply_async"
221+
) as mock_chain:
222+
backfill_supergroups_lightweight_for_org(self.organization.id)
223+
mock_chain.assert_called_once()
224+
next_kwargs = mock_chain.call_args.kwargs["kwargs"]
225+
226+
# Second call with the cursor: no groups left, should not chain
227+
mock_request.reset_mock()
228+
with patch(
229+
"sentry.tasks.seer.backfill_supergroups_lightweight.backfill_supergroups_lightweight_for_org.apply_async"
230+
) as mock_chain:
231+
backfill_supergroups_lightweight_for_org(self.organization.id, **next_kwargs)
232+
mock_request.assert_not_called()
233+
mock_chain.assert_not_called()

0 commit comments

Comments
 (0)