Skip to content

Commit c94c014

Browse files
shruthilayajclaude
authored andcommitted
feat: Add repo indexing job (#112136)
Schedule repo indexing job for context engine. This is behind a new "experimental" feature flag so we can see how this context works out on sentry seer explorer runs. Only runs index job on Sunday because we don't want to eat into GH API quotas and interfere with code review and autofix. Depends on: getsentry/seer#5594 --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 4917ad0 commit c94c014

File tree

6 files changed

+433
-8
lines changed

6 files changed

+433
-8
lines changed

src/sentry/features/temporary.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,8 @@ def register_temporary_features(manager: FeatureManager) -> None:
302302
manager.add("organizations:seer-explorer-streaming", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
303303
# Enable context engine for Seer Explorer
304304
manager.add("organizations:seer-explorer-context-engine", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
305+
# Enable context engine experimental contexts
306+
manager.add("organizations:context-engine-experiments", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
305307
# Enable frontend override for context engine (only for AI/ML/Reasoning platform team)
306308
manager.add("organizations:seer-explorer-context-engine-allow-fe-override", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
307309
# Enable frontend override UI component for context engine (only for AI/ML/Reasoning platform team)

src/sentry/seer/autofix/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,7 @@ def get_autofix_repos_from_project_code_mappings(
789789
"owner": repo_name_sections[0],
790790
"name": "/".join(repo_name_sections[1:]),
791791
"external_id": repo.external_id,
792+
"languages": repo.languages or [],
792793
}
793794
repo_key = (repo_dict["provider"], repo_dict["owner"], repo_dict["name"])
794795

src/sentry/seer/signed_seer_api.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,35 @@ class LlmGenerateRequest(TypedDict):
226226
response_schema: NotRequired[dict[str, Any]]
227227

228228

229+
class RepoDetails(TypedDict):
230+
project_ids: list[int]
231+
provider: str
232+
owner: str
233+
name: str
234+
external_id: str
235+
languages: list[str]
236+
integration_id: NotRequired[str | None]
237+
238+
239+
class ExplorerIndexOrgRepoRequest(TypedDict):
240+
org_id: int
241+
repos: list[RepoDetails]
242+
243+
244+
def make_org_repo_knowledge_index_request(
245+
body: ExplorerIndexOrgRepoRequest,
246+
timeout: int | float | None = None,
247+
viewer_context: SeerViewerContext | None = None,
248+
) -> BaseHTTPResponse:
249+
return make_signed_seer_api_request(
250+
seer_autofix_default_connection_pool,
251+
"/v1/automation/explorer/index/org-repo-knowledge",
252+
body=orjson.dumps(body),
253+
timeout=timeout,
254+
viewer_context=viewer_context,
255+
)
256+
257+
229258
def make_org_project_knowledge_index_request(
230259
body: OrgProjectKnowledgeIndexRequest,
231260
timeout: int | float | None = None,

src/sentry/tasks/seer/context_engine_index.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
from sentry.models.organization import Organization
1515
from sentry.models.project import Project
1616
from sentry.search.events.types import SnubaParams
17+
from sentry.seer.autofix.utils import (
18+
bulk_get_project_preferences,
19+
get_autofix_repos_from_project_code_mappings,
20+
)
1721
from sentry.seer.explorer.context_engine_utils import (
1822
EVENT_COUNT_LOOKBACK_DAYS,
1923
ProjectEventCounts,
@@ -30,12 +34,15 @@
3034
)
3135
from sentry.seer.models import SeerApiError
3236
from sentry.seer.signed_seer_api import (
37+
ExplorerIndexOrgRepoRequest,
3338
ExplorerIndexSentryKnowledgeRequest,
3439
OrgProjectKnowledgeIndexRequest,
3540
OrgProjectKnowledgeProjectData,
41+
RepoDetails,
3642
SeerViewerContext,
3743
make_index_sentry_knowledge_request,
3844
make_org_project_knowledge_index_request,
45+
make_org_repo_knowledge_index_request,
3946
)
4047
from sentry.tasks.base import instrumented_task
4148
from sentry.taskworker.namespaces import seer_tasks
@@ -213,6 +220,90 @@ def build_service_map(organization_id: int, *args, **kwargs) -> None:
213220
raise
214221

215222

223+
@instrumented_task(
224+
name="sentry.tasks.seer.context_engine_index.index_repos",
225+
namespace=seer_tasks,
226+
processing_deadline_duration=10 * 60, # 10 minutes
227+
retry=Retry(times=3, on=(SeerApiError,), delay=60),
228+
)
229+
def index_repos(organization_id: int, *args, **kwargs) -> None:
230+
if not options.get("explorer.context_engine_indexing.enable"):
231+
logger.info("explorer.context_engine_indexing.enable flag is disabled")
232+
return
233+
234+
try:
235+
organization = Organization.objects.get(id=organization_id)
236+
except Organization.DoesNotExist:
237+
logger.error("Organization not found", extra={"org_id": organization_id})
238+
return
239+
240+
if not features.has("organizations:context-engine-experiments", organization):
241+
logger.info("organizations:context-engine-experiments flag is disabled")
242+
return
243+
244+
logger.info(
245+
"Starting repo index task",
246+
extra={"org_id": organization_id},
247+
)
248+
249+
projects = list(
250+
Project.objects.filter(organization_id=organization_id, status=ObjectStatus.ACTIVE)
251+
)
252+
project_map = {p.id: p for p in projects}
253+
254+
if not project_map:
255+
logger.info("No projects found for organization", extra={"org_id": organization_id})
256+
return
257+
258+
org_repo_definitions: dict[tuple[str, str, str], RepoDetails] = {}
259+
260+
preferences_by_id = bulk_get_project_preferences(organization_id, list(project_map.keys()))
261+
262+
for project_id, project in project_map.items():
263+
existing_pref = preferences_by_id.get(str(project_id))
264+
if not existing_pref:
265+
continue
266+
267+
project_pref_repos = existing_pref.get("repositories") or []
268+
269+
autofix_repos = get_autofix_repos_from_project_code_mappings(project)
270+
# Use autofix repos to get repo languages
271+
language_map: dict[tuple[str, str, str], list[str]] = {}
272+
for autofix_repo in autofix_repos:
273+
key = (autofix_repo["provider"], autofix_repo["owner"], autofix_repo["name"])
274+
language_map[key] = autofix_repo["languages"]
275+
276+
for repo in project_pref_repos:
277+
key = (repo["provider"], repo["owner"], repo["name"])
278+
if key in org_repo_definitions:
279+
repo_definition = org_repo_definitions[key]
280+
repo_definition["project_ids"].append(project_id)
281+
else:
282+
org_repo_definitions[key] = {
283+
"project_ids": [project_id],
284+
"provider": repo["provider"],
285+
"owner": repo["owner"],
286+
"name": repo["name"],
287+
"external_id": repo["external_id"],
288+
"languages": language_map.get(key, []),
289+
"integration_id": repo.get("integration_id"),
290+
}
291+
292+
viewer_context = SeerViewerContext(organization_id=organization_id)
293+
response = make_org_repo_knowledge_index_request(
294+
ExplorerIndexOrgRepoRequest(
295+
org_id=organization.id, repos=list(org_repo_definitions.values())
296+
),
297+
timeout=30,
298+
viewer_context=viewer_context,
299+
)
300+
301+
if response.status >= 400:
302+
raise SeerApiError("Seer request failed", response.status)
303+
304+
logger.info("Successfully indexed repos for org", extra={"org_id": organization_id})
305+
306+
216307
def get_allowed_org_ids_context_engine_indexing() -> list[int]:
217308
"""
218309
Get the list of allowed organizations for context engine indexing.
@@ -283,12 +374,17 @@ def schedule_context_engine_indexing_tasks() -> None:
283374
return
284375

285376
allowed_org_ids = get_allowed_org_ids_context_engine_indexing()
377+
now = datetime.now(UTC)
286378

287379
dispatched = 0
288380
for org_id in allowed_org_ids:
289381
try:
290382
index_org_project_knowledge.apply_async(args=[org_id])
291383
build_service_map.apply_async(args=[org_id])
384+
385+
if now.weekday() == 6: # Sunday
386+
index_repos.apply_async(args=[org_id])
387+
292388
dispatched += 1
293389
except Exception:
294390
logger.exception(

tests/sentry/autofix/test_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def test_get_repos_from_project_code_mappings_with_data(self) -> None:
4444
"owner": "getsentry",
4545
"name": "sentry",
4646
"external_id": "123",
47+
"languages": [],
4748
}
4849
]
4950
assert repos == expected_repos

0 commit comments

Comments
 (0)