Skip to content

Commit 0f0768b

Browse files
mjqclaude
andauthored
ref(flamegraph): Replace transactions dataset in profile candidates (#112406)
Calling the flamegraph endpoint with `data_source` set to `profiles` still hits the transactions dataset when searching for matching profiles. Switch to query the Spans EAP dataset via the existing `get_spans_based_candidates` instead. (By adding `is_transaction:true` to the spans query, you'll get equivalent results as the previous transactions query). This also removes the exponential chunking loop which was a workaround for querying the transactions dataset efficiently - with EAP we should be able to just make the query in one go (as we already do when `data_source` is `spans`). Fixes DAIN-1463. --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 6a669ba commit 0f0768b

File tree

2 files changed

+63
-167
lines changed

2 files changed

+63
-167
lines changed

src/sentry/profiles/flamegraph.py

Lines changed: 34 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -450,75 +450,48 @@ def get_profile_candidates_from_profiles(self) -> ProfileCandidates:
450450
raise ValueError("`organization` is required and cannot be `None`")
451451

452452
max_profiles = options.get("profiling.flamegraph.profile-set.size")
453-
initial_chunk_delta_hours = options.get(
454-
"profiling.flamegraph.query.initial_chunk_delta.hours"
455-
)
456-
max_chunk_delta_hours = options.get("profiling.flamegraph.query.max_delta.hours")
457-
multiplier = options.get("profiling.flamegraph.query.multiplier")
458453

459-
initial_chunk_delta = timedelta(hours=initial_chunk_delta_hours)
460-
max_chunk_delta = timedelta(hours=max_chunk_delta_hours)
461-
462-
referrer = Referrer.API_PROFILING_PROFILE_FLAMEGRAPH_PROFILE_CANDIDATES.value
463-
transaction_profile_candidates: list[TransactionProfileCandidate] = []
464-
profiler_metas: list[ProfilerMeta] = []
465-
466-
assert self.snuba_params.start is not None and self.snuba_params.end is not None
467-
snuba_params = self.snuba_params.copy()
468-
469-
for chunk_start, chunk_end in split_datetime_range_exponential(
470-
self.snuba_params.start,
471-
self.snuba_params.end,
472-
initial_chunk_delta,
473-
max_chunk_delta,
474-
multiplier,
475-
reverse=True,
476-
):
477-
snuba_params.start = chunk_start
478-
snuba_params.end = chunk_end
479-
480-
builder = self.get_transactions_based_candidate_query(
481-
query=self.query, limit=max_profiles, snuba_params=snuba_params
482-
)
483-
results = builder.run_query(referrer)
484-
results = builder.process_results(results)
485-
486-
for row in results["data"]:
487-
if row["profile.id"] is not None:
488-
transaction_profile_candidates.append(
489-
{
490-
"project_id": row["project.id"],
491-
"profile_id": row["profile.id"],
492-
}
493-
)
494-
elif row["profiler.id"] is not None and row["thread.id"]:
495-
profiler_metas.append(
496-
ProfilerMeta(
497-
project_id=row["project.id"],
498-
profiler_id=row["profiler.id"],
499-
thread_id=row["thread.id"],
500-
start=row["precise.start_ts"],
501-
end=row["precise.finish_ts"],
502-
transaction_id=row["id"],
503-
)
504-
)
454+
transaction_constraint = "is_transaction:true"
455+
query = (
456+
f"({transaction_constraint}) and ({self.query})"
457+
if self.query
458+
else transaction_constraint
459+
)
460+
results = self.get_spans_based_candidates(query=query, limit=max_profiles)
505461

506-
if len(transaction_profile_candidates) + len(profiler_metas) >= max_profiles:
507-
break
462+
transaction_profile_candidates: list[TransactionProfileCandidate] = [
463+
{
464+
"project_id": row["project.id"],
465+
"profile_id": row["profile.id"],
466+
}
467+
for row in results["data"]
468+
if row["profile.id"]
469+
]
508470

509471
max_continuous_profile_candidates = max(
510472
max_profiles - len(transaction_profile_candidates), 0
511473
)
512474

475+
profiler_metas = [
476+
ProfilerMeta(
477+
project_id=row["project.id"],
478+
profiler_id=row["profiler.id"],
479+
thread_id=row["thread.id"],
480+
start=row["precise.start_ts"],
481+
end=row["precise.finish_ts"],
482+
transaction_id=row["transaction.event_id"] or None,
483+
)
484+
for row in results["data"]
485+
if row["profiler.id"] and row["thread.id"]
486+
]
487+
513488
continuous_profile_candidates: list[ContinuousProfileCandidate] = []
514-
continuous_duration = 0.0
515489

516-
# If there are continuous profiles attached to transactions, we prefer those as
490+
# If there are continuous profiles attached to spans, we prefer those as
517491
# the active thread id gives us more user friendly flamegraphs than without.
518492
if profiler_metas and max_continuous_profile_candidates > 0:
519-
snuba_params.end = self.snuba_params.end
520-
continuous_profile_candidates, continuous_duration = self.get_chunks_for_profilers(
521-
profiler_metas, max_continuous_profile_candidates, snuba_params
493+
continuous_profile_candidates, _ = self.get_chunks_for_profilers(
494+
profiler_metas, max_continuous_profile_candidates
522495
)
523496

524497
seen_chunks = {
@@ -559,6 +532,7 @@ def get_profile_candidates_from_profiles(self) -> ProfileCandidates:
559532
limit=Limit(max_profiles),
560533
)
561534

535+
referrer = Referrer.API_PROFILING_PROFILE_FLAMEGRAPH_PROFILE_CANDIDATES.value
562536
all_results = bulk_snuba_queries(
563537
[
564538
Request(
@@ -646,7 +620,7 @@ def get_spans_based_candidates(self, query: str | None, limit: int) -> EAPRespon
646620
# add constraints in order to fetch only spans with profiles
647621
profiling_constraint = "(has:profile.id) or (has:profiler.id has:thread.id)"
648622
if query is not None and len(query) > 0:
649-
query = f"{query} and {profiling_constraint}"
623+
query = f"({query}) and ({profiling_constraint})"
650624
else:
651625
query = profiling_constraint
652626
return Spans.run_table_query(
@@ -661,6 +635,7 @@ def get_spans_based_candidates(self, query: str | None, limit: int) -> EAPRespon
661635
"profiler.id",
662636
"thread.id",
663637
"timestamp",
638+
"transaction.event_id", # holds the transaction ID, if this span was originally a transaction
664639
],
665640
orderby=["-timestamp"],
666641
offset=0,

tests/sentry/api/endpoints/test_organization_profiling_profiles.py

Lines changed: 29 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -530,122 +530,38 @@ def test_queries_profile_candidates_from_transactions_with_data(
530530
},
531531
)
532532

533-
def test_queries_profile_candidates_from_profiles_with_continuous_profiles_without_transactions(
533+
def test_queries_profile_candidates_from_profiles_with_continuous_profiles(
534534
self,
535535
):
536-
# this transaction has transaction profile
537536
profile_id = uuid4().hex
538-
self.store_transaction(
539-
transaction="foo",
540-
profile_id=profile_id,
541-
project=self.project,
537+
span_with_transaction_profile = self.create_span(
538+
project=self.project, start_ts=self.ten_mins_ago, duration=1000
542539
)
540+
span_with_transaction_profile.update({"profile_id": profile_id, "is_segment": True})
541+
self.store_span(span_with_transaction_profile)
543542

544-
# this transaction has continuous profile with a matching chunk (to be mocked below)
545543
profiler_id = uuid4().hex
546544
thread_id = "12345"
547-
profiler_transaction_id = uuid4().hex
548-
profiler_transaction = self.store_transaction(
549-
transaction="foo",
550-
profiler_id=profiler_id,
551-
thread_id=thread_id,
552-
transaction_id=profiler_transaction_id,
545+
span_with_continuous_profile = self.create_span(
546+
{"sentry_tags": {"profiler_id": profiler_id, "thread.id": thread_id}},
553547
project=self.project,
548+
start_ts=self.ten_mins_ago,
549+
duration=1000,
554550
)
555-
start_timestamp = datetime.fromtimestamp(profiler_transaction["start_timestamp"])
556-
finish_timestamp = datetime.fromtimestamp(profiler_transaction["timestamp"])
557-
buffer = timedelta(seconds=3)
558-
# not able to write profile chunks to the table yet so mock it's response here
559-
# so that the profiler transaction looks like it has a profile chunk within
560-
# the specified time range
561-
chunk = {
562-
"project_id": self.project.id,
563-
"profiler_id": profiler_id,
564-
"chunk_id": uuid4().hex,
565-
"start_timestamp": (start_timestamp - buffer).isoformat(),
566-
"end_timestamp": (finish_timestamp + buffer).isoformat(),
567-
}
551+
del span_with_continuous_profile["profile_id"]
552+
span_with_continuous_profile["is_segment"] = True
553+
self.store_span(span_with_continuous_profile)
568554

569-
with (
570-
patch(
571-
"sentry.profiles.flamegraph.FlamegraphExecutor._query_chunks_for_profilers"
572-
) as mock_query_chunks_for_profilers,
573-
patch(
574-
"sentry.api.endpoints.organization_profiling_profiles.proxy_profiling_service"
575-
) as mock_proxy_profiling_service,
576-
):
577-
# Mock the chunks query for the profiler_meta
578-
mock_query_chunks_for_profilers.return_value = [{"data": [chunk]}]
579-
mock_proxy_profiling_service.return_value = HttpResponse(status=200)
580-
581-
response = self.do_request(
582-
{
583-
"project": [self.project.id],
584-
"dataSource": "profiles",
585-
},
586-
)
587-
588-
assert response.status_code == 200, response.content
589-
590-
# Verify that chunks were queried for the profiler
591-
mock_query_chunks_for_profilers.assert_called_once()
592-
593-
mock_proxy_profiling_service.assert_called_once_with(
594-
method="POST",
595-
path=f"/organizations/{self.project.organization.id}/flamegraph",
596-
json_data={
597-
"transaction": [
598-
{
599-
"project_id": self.project.id,
600-
"profile_id": profile_id,
601-
},
602-
],
603-
"continuous": [
604-
{
605-
"project_id": self.project.id,
606-
"profiler_id": profiler_id,
607-
"chunk_id": chunk["chunk_id"],
608-
"thread_id": thread_id,
609-
"start": str(int(profiler_transaction["start_timestamp"] * 1e9)),
610-
"end": str(int(profiler_transaction["timestamp"] * 1e9)),
611-
"transaction_id": profiler_transaction_id,
612-
},
613-
],
614-
},
615-
)
616-
617-
def test_queries_profile_candidates_from_profiles_with_continuous_profiles_with_transactions(
618-
self,
619-
):
620-
# this transaction has transaction profile
621-
profile_id = uuid4().hex
622-
profile_transaction_id = uuid4().hex
623-
self.store_transaction(
624-
transaction="foo",
625-
profile_id=profile_id,
626-
transaction_id=profile_transaction_id,
627-
project=self.project,
555+
start_timestamp = datetime.fromtimestamp(
556+
span_with_continuous_profile["start_timestamp_precise"]
628557
)
629-
630-
# this transaction has continuous profile with a matching chunk (to be mocked below)
631-
profiler_id = uuid4().hex
632-
thread_id = "12345"
633-
profiler_transaction_id = uuid4().hex
634-
profiler_transaction = self.store_transaction(
635-
transaction="foo",
636-
profiler_id=profiler_id,
637-
thread_id=thread_id,
638-
transaction_id=profiler_transaction_id,
639-
project=self.project,
558+
finish_timestamp = datetime.fromtimestamp(
559+
span_with_continuous_profile["end_timestamp_precise"]
640560
)
641-
642-
start_timestamp = datetime.fromtimestamp(profiler_transaction["start_timestamp"])
643-
finish_timestamp = datetime.fromtimestamp(profiler_transaction["timestamp"])
644561
buffer = timedelta(seconds=3)
645-
# not able to write profile chunks to the table yet so mock it's response here
646-
# so that the profiler transaction looks like it has a profile chunk within
647-
# the specified time range
648-
chunk_1 = {
562+
# not able to write profile chunks to the table yet so mock its response here
563+
# so that the span looks like it has a profile chunk within the specified time range
564+
chunk = {
649565
"project_id": self.project.id,
650566
"profiler_id": profiler_id,
651567
"chunk_id": uuid4().hex,
@@ -662,13 +578,14 @@ def test_queries_profile_candidates_from_profiles_with_continuous_profiles_with_
662578
) as mock_proxy_profiling_service,
663579
):
664580
# Mock the chunks query for the profiler_meta
665-
mock_query_chunks_for_profilers.return_value = [{"data": [chunk_1]}]
581+
mock_query_chunks_for_profilers.return_value = [{"data": [chunk]}]
666582
mock_proxy_profiling_service.return_value = HttpResponse(status=200)
667583

668584
response = self.do_request(
669585
{
670586
"project": [self.project.id],
671587
"dataSource": "profiles",
588+
"statsPeriod": "1h",
672589
},
673590
)
674591

@@ -691,11 +608,15 @@ def test_queries_profile_candidates_from_profiles_with_continuous_profiles_with_
691608
{
692609
"project_id": self.project.id,
693610
"profiler_id": profiler_id,
694-
"chunk_id": chunk_1["chunk_id"],
611+
"chunk_id": chunk["chunk_id"],
695612
"thread_id": thread_id,
696-
"start": str(int(profiler_transaction["start_timestamp"] * 1e9)),
697-
"end": str(int(profiler_transaction["timestamp"] * 1e9)),
698-
"transaction_id": profiler_transaction_id,
613+
"start": str(
614+
int(span_with_continuous_profile["start_timestamp_precise"] * 1e9)
615+
),
616+
"end": str(
617+
int(span_with_continuous_profile["end_timestamp_precise"] * 1e9)
618+
),
619+
"transaction_id": span_with_continuous_profile["event_id"],
699620
},
700621
],
701622
},

0 commit comments

Comments
 (0)