Skip to content
2 changes: 2 additions & 0 deletions src/sentry/features/temporary.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,8 @@ def register_temporary_features(manager: FeatureManager) -> None:
manager.add("organizations:seer-explorer-streaming", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable context engine for Seer Explorer
manager.add("organizations:seer-explorer-context-engine", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable context engine experimental contexts
manager.add("organizations:context-engine-experiments", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
# Enable frontend override for context engine (only for AI/ML/Reasoning platform team)
manager.add("organizations:seer-explorer-context-engine-allow-fe-override", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable frontend override UI component for context engine (only for AI/ML/Reasoning platform team)
Expand Down
1 change: 1 addition & 0 deletions src/sentry/seer/autofix/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,7 @@ def get_autofix_repos_from_project_code_mappings(
"owner": repo_name_sections[0],
"name": "/".join(repo_name_sections[1:]),
"external_id": repo.external_id,
"languages": repo.languages or [],
}
repo_key = (repo_dict["provider"], repo_dict["owner"], repo_dict["name"])

Expand Down
29 changes: 29 additions & 0 deletions src/sentry/seer/signed_seer_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,35 @@ class LlmGenerateRequest(TypedDict):
response_schema: NotRequired[dict[str, Any]]


class RepoDetails(TypedDict):
project_ids: list[int]
provider: str
owner: str
name: str
external_id: str
languages: list[str]
integration_id: NotRequired[str | None]


class ExplorerIndexOrgRepoRequest(TypedDict):
org_id: int
repos: list[RepoDetails]


def make_org_repo_knowledge_index_request(
body: ExplorerIndexOrgRepoRequest,
timeout: int | float | None = None,
viewer_context: SeerViewerContext | None = None,
) -> BaseHTTPResponse:
return make_signed_seer_api_request(
seer_autofix_default_connection_pool,
"/v1/automation/explorer/index/org-repo-knowledge",
body=orjson.dumps(body),
timeout=timeout,
viewer_context=viewer_context,
)
Comment thread
cursor[bot] marked this conversation as resolved.


def make_org_project_knowledge_index_request(
body: OrgProjectKnowledgeIndexRequest,
timeout: int | float | None = None,
Expand Down
96 changes: 96 additions & 0 deletions src/sentry/tasks/seer/context_engine_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
from sentry.models.organization import Organization
from sentry.models.project import Project
from sentry.search.events.types import SnubaParams
from sentry.seer.autofix.utils import (
bulk_get_project_preferences,
get_autofix_repos_from_project_code_mappings,
)
from sentry.seer.explorer.context_engine_utils import (
EVENT_COUNT_LOOKBACK_DAYS,
ProjectEventCounts,
Expand All @@ -30,12 +34,15 @@
)
from sentry.seer.models import SeerApiError
from sentry.seer.signed_seer_api import (
ExplorerIndexOrgRepoRequest,
ExplorerIndexSentryKnowledgeRequest,
OrgProjectKnowledgeIndexRequest,
OrgProjectKnowledgeProjectData,
RepoDetails,
SeerViewerContext,
make_index_sentry_knowledge_request,
make_org_project_knowledge_index_request,
make_org_repo_knowledge_index_request,
)
from sentry.tasks.base import instrumented_task
from sentry.taskworker.namespaces import seer_tasks
Expand Down Expand Up @@ -213,6 +220,90 @@ def build_service_map(organization_id: int, *args, **kwargs) -> None:
raise


@instrumented_task(
name="sentry.tasks.seer.context_engine_index.index_repos",
namespace=seer_tasks,
processing_deadline_duration=10 * 60, # 10 minutes
retry=Retry(times=3, on=(SeerApiError,), delay=60),
)
def index_repos(organization_id: int, *args, **kwargs) -> None:
if not options.get("explorer.context_engine_indexing.enable"):
logger.info("explorer.context_engine_indexing.enable flag is disabled")
return

try:
organization = Organization.objects.get(id=organization_id)
except Organization.DoesNotExist:
logger.error("Organization not found", extra={"org_id": organization_id})
return

if not features.has("organizations:context-engine-experiments", organization):
logger.info("organizations:context-engine-experiments flag is disabled")
return

logger.info(
"Starting repo index task",
extra={"org_id": organization_id},
)

projects = list(
Project.objects.filter(organization_id=organization_id, status=ObjectStatus.ACTIVE)
)
project_map = {p.id: p for p in projects}

if not project_map:
logger.info("No projects found for organization", extra={"org_id": organization_id})
return

org_repo_definitions: dict[tuple[str, str, str], RepoDetails] = {}

preferences_by_id = bulk_get_project_preferences(organization_id, list(project_map.keys()))
Comment thread
sentry-warden[bot] marked this conversation as resolved.
Comment thread
sentry[bot] marked this conversation as resolved.

for project_id, project in project_map.items():
existing_pref = preferences_by_id.get(str(project_id))
if not existing_pref:
continue

project_pref_repos = existing_pref.get("repositories") or []
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NoneType AttributeError when project has no preferences

High Severity

preferences_by_id.get(str(project_id)) returns None when a project has no Seer preferences, then existing_pref.get("repositories") raises AttributeError: 'NoneType' object has no attribute 'get'. The bulk_get_project_preferences function returns a sparse dict — only projects with configured preferences appear as keys. Using .get(str(project_id), {}) as the default would prevent the crash.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 3243fdb. Configure here.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should skip projects that don't have preferences setup. If a project does not have preferences then customers basically can't use Seer for that project.


autofix_repos = get_autofix_repos_from_project_code_mappings(project)
# Use autofix repos to get repo languages
language_map: dict[tuple[str, str, str], list[str]] = {}
for autofix_repo in autofix_repos:
key = (autofix_repo["provider"], autofix_repo["owner"], autofix_repo["name"])
language_map[key] = autofix_repo["languages"]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The index_repos task will crash with a KeyError if a repository configured via SEER_AUTOFIX_FORCE_USE_REPOS is missing the languages key.
Severity: MEDIUM

Suggested Fix

Use the .get() method with a default value when accessing the languages key to prevent a KeyError. Change autofix_repo["languages"] to autofix_repo.get("languages", []). This will provide a safe fallback to an empty list if the key is not present in the repository configuration.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/tasks/seer/context_engine_index.py#L271

Potential issue: When the `SEER_AUTOFIX_FORCE_USE_REPOS` setting is used, for example in
testing or staging environments, the `index_repos` task can fail. The code iterates
through the configured repositories and directly accesses the `languages` key from each
repository dictionary. However, unlike the standard code path, the logic for this
setting does not ensure the `languages` key is present. If a repository is configured
without this key, the task will raise a `KeyError` and fail, as this exception is not
configured for retries. This will halt the repository indexing process in environments
that use this override.


for repo in project_pref_repos:
key = (repo["provider"], repo["owner"], repo["name"])
if key in org_repo_definitions:
repo_definition = org_repo_definitions[key]
repo_definition["project_ids"].append(project_id)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Repo languages lost during cross-project deduplication

Low Severity

When a repo already exists in org_repo_definitions, only project_ids is appended — the languages field is never backfilled. If the first project to register a repo uses seer preferences (where the repo isn't in that project's autofix code mappings), languages is set to [] via language_map.get(key, []). When a later project encounters the same repo from its autofix repos (which do have language data), the existing entry's empty languages is never updated, permanently losing that information.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit e1ab127. Configure here.

else:
org_repo_definitions[key] = {
"project_ids": [project_id],
"provider": repo["provider"],
"owner": repo["owner"],
"name": repo["name"],
"external_id": repo["external_id"],
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The code unsafely accesses keys on a raw dictionary from an API response, which will raise a KeyError if the response is malformed or missing expected keys.
Severity: HIGH

Suggested Fix

Use the safe .get() method when accessing keys from the repo dictionary to prevent KeyError exceptions. For a more robust solution, validate the raw API response with a Pydantic model before processing the data to ensure the data structure is correct.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/tasks/seer/context_engine_index.py#L285

Potential issue: The function `index_repos` processes repository data fetched from the
Seer API via `bulk_get_project_preferences()`. The code directly accesses dictionary
keys like `repo["external_id"]`, `repo["provider"]`, `repo["owner"]`, and `repo["name"]`
without using safe access methods like `.get()`. The API response is not validated
against a schema. If the Seer API returns a malformed response object that is missing
one of these required keys, the operation will fail with a `KeyError`. This will cause
the `index_repos` background task to crash, preventing repository indexing for the
affected organization.

"languages": language_map.get(key, []),
"integration_id": repo.get("integration_id"),
}

viewer_context = SeerViewerContext(organization_id=organization_id)
response = make_org_repo_knowledge_index_request(
ExplorerIndexOrgRepoRequest(
org_id=organization.id, repos=list(org_repo_definitions.values())
),
timeout=30,
viewer_context=viewer_context,
)
Comment thread
cursor[bot] marked this conversation as resolved.

if response.status >= 400:
raise SeerApiError("Seer request failed", response.status)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing early return when no repos are collected

Low Severity

The index_repos function makes a Seer API call even when org_repo_definitions is empty (e.g., when all projects lack preferences or have empty/None repository lists). Other similar tasks like build_service_map and index_org_project_knowledge include early returns for analogous "no data" scenarios (no nodes, no high-volume projects). Adding an early return when org_repo_definitions is empty would avoid unnecessary API calls, which can add up since this runs across many orgs.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit cf5c5a4. Configure here.


logger.info("Successfully indexed repos for org", extra={"org_id": organization_id})


def get_allowed_org_ids_context_engine_indexing() -> list[int]:
"""
Get the list of allowed organizations for context engine indexing.
Expand Down Expand Up @@ -283,12 +374,17 @@ def schedule_context_engine_indexing_tasks() -> None:
return

allowed_org_ids = get_allowed_org_ids_context_engine_indexing()
now = datetime.now(UTC)

dispatched = 0
for org_id in allowed_org_ids:
try:
index_org_project_knowledge.apply_async(args=[org_id])
build_service_map.apply_async(args=[org_id])

if now.weekday() == 6: # Sunday
index_repos.apply_async(args=[org_id])

dispatched += 1
except Exception:
logger.exception(
Expand Down
1 change: 1 addition & 0 deletions tests/sentry/autofix/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def test_get_repos_from_project_code_mappings_with_data(self) -> None:
"owner": "getsentry",
"name": "sentry",
"external_id": "123",
"languages": [],
}
]
assert repos == expected_repos
Expand Down
Loading
Loading