From fa5d13f992e5ba348db37f8b01cd6250d51c9ae6 Mon Sep 17 00:00:00 2001 From: Chris Klochek Date: Fri, 10 Apr 2026 13:34:16 -0400 Subject: [PATCH 1/2] fix(uptime): two possible fixes for cron monitor incident timing inconsistency --- src/sentry/monitors/logic/incident_occurrence.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sentry/monitors/logic/incident_occurrence.py b/src/sentry/monitors/logic/incident_occurrence.py index ede1c595a8af73..7bd1a670107b59 100644 --- a/src/sentry/monitors/logic/incident_occurrence.py +++ b/src/sentry/monitors/logic/incident_occurrence.py @@ -217,6 +217,8 @@ def send_incident_occurrence( "monitor.slug": str(monitor_env.monitor.slug), "monitor.incident": str(incident.id), }, + # option 1: timestamp <- received.isoformat(). This seems reasonable, because the cron incident starts not when + # we've processed it, but when relay got the message (which is approximately when the cron failure occurred.) "timestamp": current_timestamp.isoformat(), } @@ -293,6 +295,8 @@ def resolve_incident_group(incident: MonitorIncident, project_id: int) -> None: project_id=project_id, new_status=GroupStatus.RESOLVED, new_substatus=None, + # option 2: make this current_time. Then, because of monitor ordering guarantees, this will always be ahead of + # the start time update_date=incident.resolving_timestamp, ) produce_occurrence_to_kafka( From 3484af518874d89a4be247d451c5cf3a110e588d Mon Sep 17 00:00:00 2001 From: Chris Klochek Date: Tue, 14 Apr 2026 16:46:30 -0400 Subject: [PATCH 2/2] fix(uptime): fix cron monitor incident timing inconsistency Ensure we use the same kind of 'time' for timestamps when computing the start and end of crons open periods. We currently use the consumer processing time for the start of an open period, but use the relay time for the close of it. In the event that a cron start is massively delayed due to backlogging, we wind up with the end coming before the start. This corrects that inconsistency by using the relay time for the open period start. --- src/sentry/monitors/logic/incident_occurrence.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/sentry/monitors/logic/incident_occurrence.py b/src/sentry/monitors/logic/incident_occurrence.py index 7bd1a670107b59..547f2b33f48800 100644 --- a/src/sentry/monitors/logic/incident_occurrence.py +++ b/src/sentry/monitors/logic/incident_occurrence.py @@ -4,7 +4,7 @@ import uuid from collections import Counter from collections.abc import Mapping, Sequence -from datetime import datetime, timezone +from datetime import datetime from typing import TYPE_CHECKING from arroyo import Topic as ArroyoTopic @@ -138,8 +138,6 @@ def send_incident_occurrence( """ monitor_env = failed_checkin.monitor_environment - current_timestamp = datetime.now(timezone.utc) - # Get last successful check-in to show in evidence display last_successful_checkin_timestamp = "Never" last_successful_checkin = monitor_env.get_last_successful_checkin() @@ -217,9 +215,7 @@ def send_incident_occurrence( "monitor.slug": str(monitor_env.monitor.slug), "monitor.incident": str(incident.id), }, - # option 1: timestamp <- received.isoformat(). This seems reasonable, because the cron incident starts not when - # we've processed it, but when relay got the message (which is approximately when the cron failure occurred.) - "timestamp": current_timestamp.isoformat(), + "timestamp": received.isoformat(), } if trace_id: @@ -295,8 +291,6 @@ def resolve_incident_group(incident: MonitorIncident, project_id: int) -> None: project_id=project_id, new_status=GroupStatus.RESOLVED, new_substatus=None, - # option 2: make this current_time. Then, because of monitor ordering guarantees, this will always be ahead of - # the start time update_date=incident.resolving_timestamp, ) produce_occurrence_to_kafka(