Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/sentry/conf/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
"sentry.workflow_engine.tasks.cleanup",
"sentry.tasks.seer.explorer_index",
"sentry.tasks.seer.context_engine_index",
"sentry.tasks.seer.lightweight_rca_cluster",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should probably name this lightweight_rca_embedding or just lightweight rca?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its like the command - to trigger clustering, because I basically treat the task as not a task to generate lgithweight-rca and a side effect of clustering, but instead of purposely triggering clustering, because before we didnt even save the lightweight-rca.
The endpoint I added is even called /cluster-lightweight, so thats like the command here, so I think the name fits. Does that make sense? I dont feel strongly about it though, just want it all to be coherent

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe it should be more explicit like just "trigger_supergroup_clustering_lightweight"

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i also don't feel too strongly, i think a slightly more consistent name would be 'lightweight rca embedding' / 'lightweight rca generation' but i think cluster also fits

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer cluster to both of these I think, like embedding is kinda technical, its not what the caller really intends, and generation is sort of inaccurate because of the way we set it up where the point is to cluster by lightweightRCA - not to generate it, we didnt even save the generated RCA until we realized we need it for resummarization, so its like a side effect now.

Just so were all on the same page - I am just trying to be consistent with the way I phrased and treated it up until now, I even leaned in the direction of making it all about lightweight RCA generation and the clustering being a side effect, but went the other way around in the API and flow, so I think we should stay consistent.

# Used for tests
"sentry.taskworker.tasks.examples",
)
Expand Down
8 changes: 8 additions & 0 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -1374,6 +1374,14 @@
flags=FLAG_MODIFIABLE_RATE | FLAG_AUTOMATOR_MODIFIABLE,
)

# Supergroups / Lightweight RCA
register(
"supergroups.lightweight-enabled-orgs",
type=Sequence,
default=[],
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
)

# ## sentry.killswitches
#
# The following options are documented in sentry.killswitches in more detail
Expand Down
6 changes: 3 additions & 3 deletions src/sentry/seer/autofix/issue_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
make_signed_seer_api_request,
make_summarize_issue_request,
)
from sentry.seer.supergroups.lightweight_rca import trigger_lightweight_rca
from sentry.seer.supergroups.explorer_lightweight_rca import trigger_explorer_lightweight_rca
from sentry.services import eventstore
from sentry.services.eventstore.models import Event, GroupEvent
from sentry.tasks.base import instrumented_task
Expand Down Expand Up @@ -226,10 +226,10 @@ def _trigger_autofix_task(
stopping_point=stopping_point,
)
try:
trigger_lightweight_rca(group)
trigger_explorer_lightweight_rca(group)
except Exception:
logger.exception(
"lightweight_rca.trigger_error_in_trigger_autofix_task",
"explorer_lightweight_rca.trigger_error_in_trigger_autofix_task",
extra={"group_id": group_id},
)
else:
Expand Down
22 changes: 22 additions & 0 deletions src/sentry/seer/signed_seer_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,14 @@ class SupergroupsEmbeddingRequest(TypedDict):
artifact_data: dict[str, Any]


class LightweightRCAClusterRequest(TypedDict):
group_id: int
issue: dict[str, Any]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
issue: dict[str, Any]
group: dict[str, Any]

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seer has this thing where issue is the word used in APIs, I am mimicking IssueSummary endpoint here, and its in other places as well, the model used in code there is IssueDetails, and so theres this weird thing of like Issue is the model, group_id is the number it gets...

organization_slug: str
organization_id: int
project_id: int


class SupergroupsGetRequest(TypedDict):
organization_id: int
supergroup_id: int
Expand Down Expand Up @@ -485,6 +493,20 @@ def make_supergroups_embedding_request(
)


def make_lightweight_rca_cluster_request(
body: LightweightRCAClusterRequest,
timeout: int | float | None = None,
viewer_context: SeerViewerContext | None = None,
) -> BaseHTTPResponse:
return make_signed_seer_api_request(
seer_autofix_default_connection_pool,
"/v0/issues/supergroups/cluster-lightweight",
body=orjson.dumps(body, option=orjson.OPT_NON_STR_KEYS),
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curious, what is this orjson option?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its to allow dict keys that are non string, in this case integers - from what I understand when we send event data it contains these kind of keys and its requried that we allow it, the issue summary endpoint does the same thing for the same reason I believe

timeout=timeout,
viewer_context=viewer_context,
)


def make_supergroups_get_request(
body: SupergroupsGetRequest,
viewer_context: SeerViewerContext,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
logger = logging.getLogger(__name__)


def trigger_lightweight_rca(group: Group) -> int | None:
def trigger_explorer_lightweight_rca(group: Group) -> int | None:
"""
Trigger a lightweight Explorer RCA run for the given group.

Expand All @@ -26,7 +26,7 @@ def trigger_lightweight_rca(group: Group) -> int | None:
"""
has_feature = features.has("projects:supergroup-lightweight-rca", group.project)
logger.info(
"lightweight_rca.feature_flag_check",
"explorer_lightweight_rca.feature_flag_check",
extra={
"group_id": group.id,
"project_id": group.project.id,
Expand Down Expand Up @@ -66,7 +66,7 @@ def trigger_lightweight_rca(group: Group) -> int | None:
)

logger.info(
"lightweight_rca.starting_run",
"explorer_lightweight_rca.starting_run",
extra={
"group_id": group.id,
"project_id": group.project.id,
Expand All @@ -83,7 +83,7 @@ def trigger_lightweight_rca(group: Group) -> int | None:
)

logger.info(
"lightweight_rca.run_started",
"explorer_lightweight_rca.run_started",
extra={
"group_id": group.id,
"project_id": group.project.id,
Expand All @@ -94,7 +94,7 @@ def trigger_lightweight_rca(group: Group) -> int | None:
return run_id
except Exception:
logger.exception(
"lightweight_rca.trigger_failed",
"explorer_lightweight_rca.trigger_failed",
extra={
"group_id": group.id,
"organization_id": group.organization.id,
Expand Down
68 changes: 68 additions & 0 deletions src/sentry/seer/supergroups/lightweight_rca_cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from __future__ import annotations

import logging

from sentry.api.serializers import EventSerializer, serialize
from sentry.eventstore import backend as eventstore
from sentry.models.group import Group
from sentry.seer.models import SeerApiError
Comment thread
sentry[bot] marked this conversation as resolved.
from sentry.seer.signed_seer_api import (
LightweightRCAClusterRequest,
SeerViewerContext,
make_lightweight_rca_cluster_request,
)

logger = logging.getLogger(__name__)


def trigger_lightweight_rca_cluster(group: Group) -> None:
"""
Call Seer's lightweight RCA clustering endpoint for the given group.

Sends issue event data to Seer, which generates a lightweight root cause analysis
and clusters the issue into supergroups based on embedding similarity.
"""
event = group.get_latest_event()
if not event:
logger.info(
"lightweight_rca_cluster.no_event",
extra={"group_id": group.id},
)
return

ready_event = eventstore.get_event_by_id(group.project.id, event.event_id, group_id=group.id)
if not ready_event:
logger.info(
"lightweight_rca_cluster.event_not_ready",
extra={"group_id": group.id, "event_id": event.event_id},
)
return

serialized_event = serialize(ready_event, None, EventSerializer())

body = LightweightRCAClusterRequest(
group_id=group.id,
issue={
"id": group.id,
"title": group.title,
"short_id": group.qualified_short_id,
"events": [serialized_event],
},
organization_slug=group.organization.slug,
organization_id=group.organization.id,
project_id=group.project.id,
)
viewer_context = SeerViewerContext(organization_id=group.organization.id)
Comment thread
sentry-warden[bot] marked this conversation as resolved.

response = make_lightweight_rca_cluster_request(body, timeout=30, viewer_context=viewer_context)
if response.status >= 400:
raise SeerApiError("Lightweight RCA cluster request failed", response.status)

logger.info(
"lightweight_rca_cluster.success",
extra={
"group_id": group.id,
"project_id": group.project.id,
"organization_id": group.organization.id,
},
)
17 changes: 17 additions & 0 deletions src/sentry/tasks/post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -1584,6 +1584,22 @@ def kick_off_seer_automation(job: PostProcessJob) -> None:
)


def kick_off_lightweight_rca_cluster(job: PostProcessJob) -> None:
from sentry.tasks.seer.lightweight_rca_cluster import trigger_lightweight_rca_cluster_task

if not job["group_state"]["is_new"]:
return

event = job["event"]
group = event.group

enabled_orgs: list[int] = options.get("supergroups.lightweight-enabled-orgs")
if group.organization.id not in enabled_orgs:
return
Comment thread
yuvmen marked this conversation as resolved.

trigger_lightweight_rca_cluster_task.delay(group.id)


GROUP_CATEGORY_POST_PROCESS_PIPELINE: dict[
GroupCategory, list[Callable[[PostProcessJob], None]]
] = {
Expand All @@ -1596,6 +1612,7 @@ def kick_off_seer_automation(job: PostProcessJob) -> None:
handle_owner_assignment,
handle_auto_assignment,
kick_off_seer_automation,
kick_off_lightweight_rca_cluster,
process_workflow_engine_issue_alerts,
process_resource_change_bounds,
process_data_forwarding,
Expand Down
32 changes: 32 additions & 0 deletions src/sentry/tasks/seer/lightweight_rca_cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import logging

from sentry.models.group import Group
from sentry.seer.supergroups.lightweight_rca_cluster import trigger_lightweight_rca_cluster
from sentry.tasks.base import instrumented_task
from sentry.taskworker.namespaces import ingest_errors_tasks

logger = logging.getLogger(__name__)


@instrumented_task(
name="sentry.tasks.seer.lightweight_rca_cluster.trigger_lightweight_rca_cluster_task",
namespace=ingest_errors_tasks,
)
def trigger_lightweight_rca_cluster_task(group_id: int, **kwargs) -> None:
try:
group = Group.objects.get(id=group_id)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't we have the group in post_process when we pass it here? do we need another fetch?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its standard for these tasks, you cant pass a model into them, they gotta refetch

except Group.DoesNotExist:
logger.info(
"lightweight_rca_cluster_task.group_not_found",
extra={"group_id": group_id},
)
return

try:
trigger_lightweight_rca_cluster(group)
except Exception:
logger.exception(
"lightweight_rca_cluster_task.failed",
extra={"group_id": group_id},
)
raise
26 changes: 13 additions & 13 deletions tests/sentry/seer/autofix/test_issue_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -960,14 +960,14 @@ def setUp(self) -> None:
event_data = load_data("python")
self.event = self.store_event(data=event_data, project_id=self.project.id)

@patch("sentry.seer.autofix.issue_summary.trigger_lightweight_rca")
@patch("sentry.seer.autofix.issue_summary.trigger_explorer_lightweight_rca")
@patch("sentry.seer.autofix.issue_summary.trigger_autofix_explorer", return_value=42)
def test_lightweight_rca_called_on_explorer_path(
self,
mock_explorer,
mock_lightweight_rca,
mock_explorer_lightweight_rca,
):
"""trigger_lightweight_rca is called when the explorer path is taken"""
"""trigger_explorer_lightweight_rca is called when the explorer path is taken"""
_trigger_autofix_task(
group_id=self.group.id,
event_id=self.event.event_id,
Expand All @@ -976,18 +976,18 @@ def test_lightweight_rca_called_on_explorer_path(
)

mock_explorer.assert_called_once()
mock_lightweight_rca.assert_called_once_with(self.group)
mock_explorer_lightweight_rca.assert_called_once_with(self.group)

@patch("sentry.seer.autofix.issue_summary.trigger_lightweight_rca")
@patch("sentry.seer.autofix.issue_summary.trigger_explorer_lightweight_rca")
@patch(
"sentry.seer.autofix.issue_summary.trigger_autofix", return_value=Mock(data={"run_id": 42})
)
def test_lightweight_rca_not_called_on_legacy_path(
self,
mock_autofix,
mock_lightweight_rca,
mock_explorer_lightweight_rca,
):
"""trigger_lightweight_rca is NOT called on the legacy autofix path"""
"""trigger_explorer_lightweight_rca is NOT called on the legacy autofix path"""
with self.feature(
{
"organizations:seer-explorer": False,
Expand All @@ -1002,17 +1002,17 @@ def test_lightweight_rca_not_called_on_legacy_path(
)

mock_autofix.assert_called_once()
mock_lightweight_rca.assert_not_called()
mock_explorer_lightweight_rca.assert_not_called()

@patch("sentry.seer.autofix.issue_summary.trigger_lightweight_rca")
@patch("sentry.seer.autofix.issue_summary.trigger_explorer_lightweight_rca")
@patch("sentry.seer.autofix.issue_summary.trigger_autofix_explorer", return_value=42)
def test_lightweight_rca_failure_does_not_block_explorer(
self,
mock_explorer,
mock_lightweight_rca,
mock_explorer_lightweight_rca,
):
"""Failure in trigger_lightweight_rca doesn't prevent the explorer autofix from completing"""
mock_lightweight_rca.side_effect = Exception("lightweight RCA failed")
"""Failure in trigger_explorer_lightweight_rca doesn't prevent the explorer autofix from completing"""
mock_explorer_lightweight_rca.side_effect = Exception("lightweight RCA failed")

_trigger_autofix_task(
group_id=self.group.id,
Expand All @@ -1022,7 +1022,7 @@ def test_lightweight_rca_failure_does_not_block_explorer(
)

mock_explorer.assert_called_once()
mock_lightweight_rca.assert_called_once_with(self.group)
mock_explorer_lightweight_rca.assert_called_once_with(self.group)


class TestFetchUserPreference:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from unittest.mock import MagicMock, patch

from sentry.seer.supergroups.lightweight_rca import trigger_lightweight_rca
from sentry.seer.supergroups.explorer_lightweight_rca import trigger_explorer_lightweight_rca
from sentry.testutils.cases import TestCase


class TestTriggerLightweightRca(TestCase):
class TestTriggerExplorerLightweightRca(TestCase):
def setUp(self) -> None:
super().setUp()
self.user = self.create_user()
Expand All @@ -13,18 +13,18 @@ def setUp(self) -> None:
self.group = self.create_group(project=self.project)

def test_returns_none_when_feature_flag_off(self) -> None:
run_id = trigger_lightweight_rca(self.group)
run_id = trigger_explorer_lightweight_rca(self.group)

assert run_id is None

@patch("sentry.seer.supergroups.lightweight_rca.SeerExplorerClient")
@patch("sentry.seer.supergroups.explorer_lightweight_rca.SeerExplorerClient")
def test_creates_client_with_correct_params(self, mock_client_cls):
mock_client = MagicMock()
mock_client.start_run.return_value = 42
mock_client_cls.return_value = mock_client

with self.feature("projects:supergroup-lightweight-rca"):
run_id = trigger_lightweight_rca(self.group)
run_id = trigger_explorer_lightweight_rca(self.group)

assert run_id == 42
mock_client_cls.assert_called_once()
Expand All @@ -38,14 +38,14 @@ def test_creates_client_with_correct_params(self, mock_client_cls):
assert kwargs["category_key"] == "lightweight_rca"
assert kwargs["category_value"] == str(self.group.id)

@patch("sentry.seer.supergroups.lightweight_rca.SeerExplorerClient")
@patch("sentry.seer.supergroups.explorer_lightweight_rca.SeerExplorerClient")
def test_start_run_called_with_correct_params(self, mock_client_cls):
mock_client = MagicMock()
mock_client.start_run.return_value = 42
mock_client_cls.return_value = mock_client

with self.feature("projects:supergroup-lightweight-rca"):
trigger_lightweight_rca(self.group)
trigger_explorer_lightweight_rca(self.group)

mock_client.start_run.assert_called_once()
kwargs = mock_client.start_run.call_args[1]
Expand All @@ -54,11 +54,11 @@ def test_start_run_called_with_correct_params(self, mock_client_cls):
assert kwargs["metadata"] == {"group_id": self.group.id}
assert "root cause" in kwargs["prompt"].lower()

@patch("sentry.seer.supergroups.lightweight_rca.SeerExplorerClient")
@patch("sentry.seer.supergroups.explorer_lightweight_rca.SeerExplorerClient")
def test_returns_none_on_error(self, mock_client_cls):
mock_client_cls.side_effect = Exception("connection failed")

with self.feature("projects:supergroup-lightweight-rca"):
run_id = trigger_lightweight_rca(self.group)
run_id = trigger_explorer_lightweight_rca(self.group)

assert run_id is None
Loading
Loading