Skip to content

Commit 54d7c86

Browse files
mtopo27claude
andauthored
feat(preprod): Use sequential (n-1) comparisons for size analysis diff monitors (#111482)
Change size analysis diff monitors to use sequential (n-1) comparisons instead of git-based (merge-base) comparisons. When a new build arrives, each diff-threshold monitor (`absolute_diff`, `relative_diff`) now resolves its base by finding the most recent prior artifact matching the detector's query filters and structural identity (`app_id`, `artifact_type`, `build_configuration`), ordered by `date_added`. Previously, monitors were evaluated inside the git-based comparison path (`_run_size_analysis_comparison` → `maybe_emit_issues`), which meant: - Artifacts without git metadata (no `commit_comparison`) never triggered monitors - All diff detectors shared a single git-derived base, even if they had different query filters - Monitor evaluation was coupled to the status check comparison flow Now the two paths are independent: - **Git path** — persists `PreprodArtifactSizeComparison` records and triggers status checks. No longer evaluates monitors. - **Sequential path** — `maybe_emit_issues_from_diff_size_results` runs unconditionally after the git block (even without git metadata). Each detector resolves its own base via `get_sequential_base_artifact`, with batch optimization (one DB lookup per unique query string across detectors). `absolute` threshold monitors are unaffected — they're already handled separately by `maybe_emit_issues_from_absolute_size_results`. --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 5294cdb commit 54d7c86

File tree

4 files changed

+480
-234
lines changed

4 files changed

+480
-234
lines changed

src/sentry/preprod/artifact_search.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,3 +315,49 @@ def apply_filters(
315315
q = ~q
316316
queryset = queryset.filter(q)
317317
return queryset
318+
319+
320+
def get_sequential_base_artifact(
321+
artifact: PreprodArtifact,
322+
query: str,
323+
organization: Organization,
324+
) -> PreprodArtifact | None:
325+
"""
326+
Find the most recent prior artifact matching the given query and structural
327+
identity fields, with completed size metrics.
328+
329+
Used by sequential (n-1) monitors to resolve the base artifact for diff
330+
comparisons. Unlike the git-based lookup (get_base_artifact_for_commit),
331+
this orders by date_added rather than commit ancestry.
332+
333+
Args:
334+
artifact: The current (head) artifact to find a base for.
335+
query: The detector's query string (e.g. "app_name:MyApp"). May be empty.
336+
organization: The organization for scoping query filters.
337+
338+
Returns:
339+
The most recent prior PreprodArtifact matching the criteria, or None.
340+
"""
341+
queryset = PreprodArtifact.objects.get_queryset()
342+
343+
if query and query.strip():
344+
search_filters = parse_search_query(
345+
query, config=search_config, get_field_type=get_field_type
346+
)
347+
queryset = apply_filters(queryset, search_filters, organization)
348+
349+
queryset = queryset.filter(
350+
project_id=artifact.project_id,
351+
app_id=artifact.app_id,
352+
artifact_type=artifact.artifact_type,
353+
build_configuration=artifact.build_configuration,
354+
)
355+
356+
queryset = queryset.filter(
357+
preprodartifactsizemetrics__state=PreprodArtifactSizeMetrics.SizeAnalysisState.COMPLETED,
358+
preprodartifactsizemetrics__metrics_artifact_type=PreprodArtifactSizeMetrics.MetricsArtifactType.MAIN_ARTIFACT,
359+
)
360+
361+
queryset = queryset.exclude(pk=artifact.pk)
362+
363+
return queryset.order_by("-date_added").first()

src/sentry/preprod/size_analysis/tasks.py

Lines changed: 127 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
from django.utils import timezone
99

1010
from sentry import features
11+
from sentry.exceptions import InvalidSearchQuery
1112
from sentry.models.files.file import File
13+
from sentry.preprod.artifact_search import get_sequential_base_artifact
1214
from sentry.preprod.models import (
1315
PreprodArtifact,
1416
PreprodArtifactSizeComparison,
@@ -21,7 +23,7 @@
2123
SizeAnalysisMetadata,
2224
SizeAnalysisValue,
2325
)
24-
from sentry.preprod.size_analysis.models import ComparisonResults, SizeAnalysisResults
26+
from sentry.preprod.size_analysis.models import SizeAnalysisResults
2527
from sentry.preprod.size_analysis.utils import build_size_metrics_map, can_compare_size_metrics
2628
from sentry.preprod.size_analysis.webhooks import send_size_analysis_webhook
2729
from sentry.preprod.vcs.status_checks.size.tasks import create_preprod_status_check_task
@@ -96,6 +98,8 @@ def compare_preprod_artifact_size_analysis(
9698
"caller": "compare_build_config_mismatch",
9799
}
98100
)
101+
# Still evaluate sequential monitors even on build config mismatch
102+
maybe_emit_issues_from_diff_size_results(artifact, org_id)
99103
return
100104

101105
base_size_metrics_qs = PreprodArtifactSizeMetrics.objects.filter(
@@ -279,6 +283,9 @@ def compare_preprod_artifact_size_analysis(
279283
finally:
280284
send_size_analysis_webhook(artifact=artifact, organization_id=org_id)
281285

286+
# Evaluate sequential monitors unconditionally — independent of git metadata
287+
maybe_emit_issues_from_diff_size_results(artifact, org_id)
288+
282289

283290
@instrumented_task(
284291
name="sentry.preprod.tasks.manual_size_analysis_comparison",
@@ -543,50 +550,39 @@ def _run_size_analysis_comparison(
543550
comparison.state = PreprodArtifactSizeComparison.State.SUCCESS
544551
comparison.save()
545552

546-
maybe_emit_issues(
547-
comparison_results=comparison_results,
548-
head_metric=head_size_metric,
549-
base_metric=base_size_metric,
550-
)
551-
552553
logger.info(
553554
"preprod.size_analysis.compare.success",
554555
extra={"comparison_id": comparison.id},
555556
)
556557

557558

558-
def maybe_emit_issues(
559-
comparison_results: ComparisonResults,
560-
head_metric: PreprodArtifactSizeMetrics,
561-
base_metric: PreprodArtifactSizeMetrics,
559+
def maybe_emit_issues_from_diff_size_results(
560+
artifact: PreprodArtifact,
561+
org_id: int,
562562
) -> None:
563563
try:
564-
_maybe_emit_issues(
565-
comparison_results=comparison_results, head_metric=head_metric, base_metric=base_metric
566-
)
564+
_maybe_emit_issues_from_diff_size_results(artifact=artifact, org_id=org_id)
567565
except Exception:
568-
logger.exception("Error emitting issues")
566+
logger.exception("Error emitting issues from diff size results")
569567

570568

571569
def _get_platform(artifact: PreprodArtifact) -> str:
572570
return artifact.platform or "unknown"
573571

574572

575-
def _maybe_emit_issues(
576-
comparison_results: ComparisonResults,
577-
head_metric: PreprodArtifactSizeMetrics,
578-
base_metric: PreprodArtifactSizeMetrics,
573+
def _maybe_emit_issues_from_diff_size_results(
574+
artifact: PreprodArtifact,
575+
org_id: int,
579576
) -> None:
580-
project = head_metric.preprod_artifact.project
577+
project = artifact.project
581578
project_id = project.id
582-
organization_id = project.organization.id
583579

584580
if not features.has("organizations:preprod-issues", project.organization):
585581
logger.info(
586-
"preprod.size_analysis.compare.issues.disabled",
582+
"preprod.size_analysis.diff_results.issues.disabled",
587583
extra={
588584
"project_id": project_id,
589-
"organization_id": organization_id,
585+
"organization_id": org_id,
590586
},
591587
)
592588
return
@@ -600,62 +596,129 @@ def _maybe_emit_issues(
600596
)
601597
if not detectors:
602598
logger.info(
603-
"preprod.size_analysis.no_detectors",
599+
"preprod.size_analysis.diff_results.no_detectors",
604600
extra={"project_id": project_id},
605601
)
606602
return
607603

608-
# Only process diff-based detectors from the comparison path
609604
detectors = [d for d in detectors if d.config.get("threshold_type") in DIFF_THRESHOLD_TYPES]
610605
if not detectors:
611606
logger.info(
612-
"preprod.size_analysis.no_diff_detectors",
607+
"preprod.size_analysis.diff_results.no_diff_detectors",
613608
extra={"project_id": project_id},
614609
)
615610
return
616611

617-
diff = comparison_results.size_metric_diff_item
618-
head_artifact = head_metric.preprod_artifact
619-
base_artifact = base_metric.preprod_artifact
612+
head_metrics = list(
613+
PreprodArtifactSizeMetrics.objects.filter(
614+
preprod_artifact=artifact,
615+
metrics_artifact_type=PreprodArtifactSizeMetrics.MetricsArtifactType.MAIN_ARTIFACT,
616+
state=PreprodArtifactSizeMetrics.SizeAnalysisState.COMPLETED,
617+
).select_related("preprod_artifact", "preprod_artifact__mobile_app_info")
618+
)
619+
if not head_metrics:
620+
logger.info(
621+
"preprod.size_analysis.diff_results.no_head_metrics",
622+
extra={"artifact_id": artifact.id},
623+
)
624+
return
620625

621-
metadata: SizeAnalysisMetadata = {
622-
"platform": _get_platform(head_artifact),
623-
"head_metric_id": head_metric.id,
624-
"base_metric_id": base_metric.id,
625-
"head_artifact_id": head_artifact.id,
626-
"base_artifact_id": base_artifact.id,
627-
"head_artifact": head_artifact,
628-
"base_artifact": base_artifact,
629-
}
626+
head_metric = head_metrics[0]
627+
organization = project.organization
630628

631-
size_data: SizeAnalysisValue = {
632-
"head_install_size_bytes": diff.head_install_size,
633-
"head_download_size_bytes": diff.head_download_size,
634-
"base_install_size_bytes": diff.base_install_size,
635-
"base_download_size_bytes": diff.base_download_size,
636-
"metadata": metadata,
637-
}
629+
# Batch-resolve sequential bases by unique query string
630+
query_to_base: dict[str, PreprodArtifact | None] = {}
631+
for detector in detectors:
632+
query = detector.config.get("query", "")
633+
normalized_query = query.strip() if query else ""
634+
if normalized_query not in query_to_base:
635+
try:
636+
base = get_sequential_base_artifact(artifact, normalized_query, organization)
637+
except InvalidSearchQuery:
638+
logger.exception(
639+
"preprod.size_analysis.diff_results.invalid_query",
640+
extra={"detector_id": detector.id, "query": normalized_query},
641+
)
642+
base = None
643+
query_to_base[normalized_query] = base
644+
645+
# Run comparisons for unique sequential bases (persists file-level diffs)
646+
base_artifact_to_metric: dict[int, PreprodArtifactSizeMetrics] = {}
647+
seen_base_ids: set[int] = set()
648+
for base_artifact in query_to_base.values():
649+
if base_artifact is None or base_artifact.id in seen_base_ids:
650+
continue
651+
seen_base_ids.add(base_artifact.id)
652+
653+
base_metrics = list(
654+
PreprodArtifactSizeMetrics.objects.filter(
655+
preprod_artifact=base_artifact,
656+
metrics_artifact_type=PreprodArtifactSizeMetrics.MetricsArtifactType.MAIN_ARTIFACT,
657+
state=PreprodArtifactSizeMetrics.SizeAnalysisState.COMPLETED,
658+
).select_related("preprod_artifact", "preprod_artifact__mobile_app_info")
659+
)
660+
if not base_metrics:
661+
continue
638662

639-
data_packet: SizeAnalysisDataPacket = DataPacket(
640-
source_id=f"preprod-size-analysis:{project_id}",
641-
packet=size_data,
642-
)
663+
base_metric = base_metrics[0]
664+
base_artifact_to_metric[base_artifact.id] = base_metric
643665

644-
logger.info(
645-
"preprod.size_analysis.process_detectors.starting",
646-
extra={
647-
"project_id": project_id,
648-
"detector_count": len(detectors),
649-
},
650-
)
651-
results = process_detectors(data_packet, detectors)
652-
logger.info(
653-
"preprod.size_analysis.process_detectors.completed",
654-
extra={
655-
"project_id": project_id,
656-
"detector_count": len(results),
657-
},
658-
)
666+
with transaction.atomic(router.db_for_write(PreprodArtifactSizeComparison)):
667+
PreprodArtifactSizeComparison.objects.get_or_create(
668+
head_size_analysis=head_metric,
669+
base_size_analysis=base_metric,
670+
organization_id=org_id,
671+
defaults={"state": PreprodArtifactSizeComparison.State.PENDING},
672+
)
673+
_run_size_analysis_comparison(org_id, head_metric, base_metric)
674+
675+
for detector in detectors:
676+
query = detector.config.get("query", "")
677+
normalized_query = query.strip() if query else ""
678+
base_artifact = query_to_base.get(normalized_query)
679+
if base_artifact is None:
680+
logger.info(
681+
"preprod.size_analysis.diff_results.no_base",
682+
extra={"detector_id": detector.id, "artifact_id": artifact.id},
683+
)
684+
continue
685+
686+
detector_base_metric = base_artifact_to_metric.get(base_artifact.id)
687+
if detector_base_metric is None:
688+
continue
689+
690+
metadata: SizeAnalysisMetadata = {
691+
"platform": _get_platform(artifact),
692+
"head_metric_id": head_metric.id,
693+
"base_metric_id": detector_base_metric.id,
694+
"head_artifact_id": artifact.id,
695+
"base_artifact_id": base_artifact.id,
696+
"head_artifact": artifact,
697+
"base_artifact": base_artifact,
698+
}
699+
700+
size_data: SizeAnalysisValue = {
701+
"head_install_size_bytes": head_metric.max_install_size or 0,
702+
"head_download_size_bytes": head_metric.max_download_size or 0,
703+
"base_install_size_bytes": detector_base_metric.max_install_size or 0,
704+
"base_download_size_bytes": detector_base_metric.max_download_size or 0,
705+
"metadata": metadata,
706+
}
707+
708+
data_packet: SizeAnalysisDataPacket = DataPacket(
709+
source_id=f"preprod-size-analysis:{project_id}",
710+
packet=size_data,
711+
)
712+
713+
logger.info(
714+
"preprod.size_analysis.diff_results.evaluating",
715+
extra={
716+
"detector_id": detector.id,
717+
"artifact_id": artifact.id,
718+
"base_artifact_id": base_artifact.id,
719+
},
720+
)
721+
process_detectors(data_packet, [detector])
659722

660723

661724
def maybe_emit_issues_from_absolute_size_results(

0 commit comments

Comments
 (0)