-
-
Notifications
You must be signed in to change notification settings - Fork 4.7k
feat: Add repo indexing job #112136
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add repo indexing job #112136
Changes from all commits
ebc8938
c6b7523
8ece1bf
b7850b5
a5ed0a4
1d53649
3243fdb
4abf5aa
f8a59de
58b0a70
e1ab127
f63942d
f3b33d2
cf5c5a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,10 @@ | |
| from sentry.models.organization import Organization | ||
| from sentry.models.project import Project | ||
| from sentry.search.events.types import SnubaParams | ||
| from sentry.seer.autofix.utils import ( | ||
| bulk_get_project_preferences, | ||
| get_autofix_repos_from_project_code_mappings, | ||
| ) | ||
| from sentry.seer.explorer.context_engine_utils import ( | ||
| EVENT_COUNT_LOOKBACK_DAYS, | ||
| ProjectEventCounts, | ||
|
|
@@ -30,12 +34,15 @@ | |
| ) | ||
| from sentry.seer.models import SeerApiError | ||
| from sentry.seer.signed_seer_api import ( | ||
| ExplorerIndexOrgRepoRequest, | ||
| ExplorerIndexSentryKnowledgeRequest, | ||
| OrgProjectKnowledgeIndexRequest, | ||
| OrgProjectKnowledgeProjectData, | ||
| RepoDetails, | ||
| SeerViewerContext, | ||
| make_index_sentry_knowledge_request, | ||
| make_org_project_knowledge_index_request, | ||
| make_org_repo_knowledge_index_request, | ||
| ) | ||
| from sentry.tasks.base import instrumented_task | ||
| from sentry.taskworker.namespaces import seer_tasks | ||
|
|
@@ -213,6 +220,90 @@ def build_service_map(organization_id: int, *args, **kwargs) -> None: | |
| raise | ||
|
|
||
|
|
||
| @instrumented_task( | ||
| name="sentry.tasks.seer.context_engine_index.index_repos", | ||
| namespace=seer_tasks, | ||
| processing_deadline_duration=10 * 60, # 10 minutes | ||
| retry=Retry(times=3, on=(SeerApiError,), delay=60), | ||
| ) | ||
| def index_repos(organization_id: int, *args, **kwargs) -> None: | ||
| if not options.get("explorer.context_engine_indexing.enable"): | ||
| logger.info("explorer.context_engine_indexing.enable flag is disabled") | ||
| return | ||
|
|
||
| try: | ||
| organization = Organization.objects.get(id=organization_id) | ||
| except Organization.DoesNotExist: | ||
| logger.error("Organization not found", extra={"org_id": organization_id}) | ||
| return | ||
|
|
||
| if not features.has("organizations:context-engine-experiments", organization): | ||
| logger.info("organizations:context-engine-experiments flag is disabled") | ||
| return | ||
|
|
||
| logger.info( | ||
| "Starting repo index task", | ||
| extra={"org_id": organization_id}, | ||
| ) | ||
|
|
||
| projects = list( | ||
| Project.objects.filter(organization_id=organization_id, status=ObjectStatus.ACTIVE) | ||
| ) | ||
| project_map = {p.id: p for p in projects} | ||
|
|
||
| if not project_map: | ||
| logger.info("No projects found for organization", extra={"org_id": organization_id}) | ||
| return | ||
|
|
||
| org_repo_definitions: dict[tuple[str, str, str], RepoDetails] = {} | ||
|
|
||
| preferences_by_id = bulk_get_project_preferences(organization_id, list(project_map.keys())) | ||
|
sentry-warden[bot] marked this conversation as resolved.
sentry[bot] marked this conversation as resolved.
|
||
|
|
||
| for project_id, project in project_map.items(): | ||
| existing_pref = preferences_by_id.get(str(project_id)) | ||
| if not existing_pref: | ||
| continue | ||
|
|
||
| project_pref_repos = existing_pref.get("repositories") or [] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NoneType AttributeError when project has no preferencesHigh Severity
Reviewed by Cursor Bugbot for commit 3243fdb. Configure here.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should skip projects that don't have preferences setup. If a project does not have preferences then customers basically can't use Seer for that project. |
||
|
|
||
| autofix_repos = get_autofix_repos_from_project_code_mappings(project) | ||
| # Use autofix repos to get repo languages | ||
| language_map: dict[tuple[str, str, str], list[str]] = {} | ||
| for autofix_repo in autofix_repos: | ||
| key = (autofix_repo["provider"], autofix_repo["owner"], autofix_repo["name"]) | ||
| language_map[key] = autofix_repo["languages"] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: The Suggested FixUse the Prompt for AI Agent |
||
|
|
||
| for repo in project_pref_repos: | ||
| key = (repo["provider"], repo["owner"], repo["name"]) | ||
| if key in org_repo_definitions: | ||
| repo_definition = org_repo_definitions[key] | ||
| repo_definition["project_ids"].append(project_id) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Repo languages lost during cross-project deduplicationLow Severity When a repo already exists in Additional Locations (1)Reviewed by Cursor Bugbot for commit e1ab127. Configure here. |
||
| else: | ||
| org_repo_definitions[key] = { | ||
| "project_ids": [project_id], | ||
| "provider": repo["provider"], | ||
| "owner": repo["owner"], | ||
| "name": repo["name"], | ||
| "external_id": repo["external_id"], | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: The code unsafely accesses keys on a raw dictionary from an API response, which will raise a Suggested FixUse the safe Prompt for AI Agent |
||
| "languages": language_map.get(key, []), | ||
| "integration_id": repo.get("integration_id"), | ||
| } | ||
|
|
||
| viewer_context = SeerViewerContext(organization_id=organization_id) | ||
| response = make_org_repo_knowledge_index_request( | ||
| ExplorerIndexOrgRepoRequest( | ||
| org_id=organization.id, repos=list(org_repo_definitions.values()) | ||
| ), | ||
| timeout=30, | ||
| viewer_context=viewer_context, | ||
| ) | ||
|
cursor[bot] marked this conversation as resolved.
|
||
|
|
||
| if response.status >= 400: | ||
| raise SeerApiError("Seer request failed", response.status) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing early return when no repos are collectedLow Severity The Reviewed by Cursor Bugbot for commit cf5c5a4. Configure here. |
||
|
|
||
| logger.info("Successfully indexed repos for org", extra={"org_id": organization_id}) | ||
|
|
||
|
|
||
| def get_allowed_org_ids_context_engine_indexing() -> list[int]: | ||
| """ | ||
| Get the list of allowed organizations for context engine indexing. | ||
|
|
@@ -283,12 +374,17 @@ def schedule_context_engine_indexing_tasks() -> None: | |
| return | ||
|
|
||
| allowed_org_ids = get_allowed_org_ids_context_engine_indexing() | ||
| now = datetime.now(UTC) | ||
|
|
||
| dispatched = 0 | ||
| for org_id in allowed_org_ids: | ||
| try: | ||
| index_org_project_knowledge.apply_async(args=[org_id]) | ||
| build_service_map.apply_async(args=[org_id]) | ||
|
|
||
| if now.weekday() == 6: # Sunday | ||
| index_repos.apply_async(args=[org_id]) | ||
|
|
||
| dispatched += 1 | ||
| except Exception: | ||
| logger.exception( | ||
|
|
||


Uh oh!
There was an error while loading. Please reload this page.