Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from sentry.models.organization import Organization
from sentry.models.repository import Repository
from sentry.shared_integrations.exceptions import IntegrationError
from sentry.utils.cursors import Cursor, CursorResult


class IntegrationRepository(TypedDict):
Expand Down Expand Up @@ -54,6 +55,13 @@ def get(
installation has access to, filtering locally instead of
using the provider's search API which may return results
beyond the installation's scope.
:qparam int per_page: When present (without ``search``), enables cursor-based
pagination. Providers that support paginated browsing return
one page of results with ``Link`` headers. Providers that
don't support it fall back to returning the full list.
The paginated path always returns installation-accessible
repos (``accessibleOnly`` is ignored).
:qparam string cursor: Pagination cursor (only used when ``per_page`` is set).
"""
integration = self.get_integration(organization.id, integration_id)

Expand All @@ -71,19 +79,41 @@ def get(
search = request.GET.get("search")
accessible_only = request.GET.get("accessibleOnly", "false").lower() == "true"

try:
repositories = install.get_repositories(
search,
accessible_only=accessible_only,
use_cache=accessible_only and bool(search),
)
except (IntegrationError, IdentityNotValid) as e:
return self.respond({"detail": str(e)}, status=400)
# When per_page is present and there's no search query,
# try the paginated path. This lets pagination-aware callers
# (e.g. the SCM onboarding repo selector) get fast page-at-a-time
# results, while existing callers that don't send per_page
# continue to receive the full list.
paginate = "per_page" in request.GET and not search
if paginate:
per_page = self.get_per_page(request)
cursor = self.get_cursor_from_request(request)
offset = max(0, cursor.offset) if cursor is not None else 0
try:
repositories, has_next = install.get_repositories_paginated(
offset=offset, per_page=per_page
)
except NotImplementedError:
paginate = False
except (IntegrationError, IdentityNotValid) as e:
return self.respond({"detail": str(e)}, status=400)

if not paginate:
try:
repositories = install.get_repositories(
search,
accessible_only=accessible_only,
use_cache=accessible_only and bool(search),
)
except (IntegrationError, IdentityNotValid) as e:
return self.respond({"detail": str(e)}, status=400)
has_next = False

installable_only = request.GET.get("installableOnly", "false").lower() == "true"

# Include a repository if the request is for all repositories, or if we want
# installable-only repositories and the repository isn't already installed
# installableOnly filtering happens after pagination, so pages
# may contain fewer items than per_page when installed repos are
# excluded. Acceptable for infinite-scroll consumers.
serialized_repositories = [
IntegrationRepository(
name=repo["name"],
Expand All @@ -95,8 +125,19 @@ def get(
for repo in repositories
if not installable_only or repo["identifier"] not in installed_repo_names
]
return self.respond(

response = self.respond(
{"repos": serialized_repositories, "searchable": install.repo_search}
)

if paginate and (has_next or offset > 0):
cursor_result: CursorResult[IntegrationRepository] = CursorResult(
results=[],
prev=Cursor(0, max(0, offset - per_page), True, offset > 0),
next=Cursor(0, offset + per_page, False, has_next),
)
self.add_cursor_headers(request, response, cursor_result)

return response

return self.respond({"detail": "Repositories not supported"}, status=400)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be worth seeing if there's a way to work this into using our generic self.paginate interface. I don't know if it will work well for an api call like this, but could be worth a quick try to see

20 changes: 20 additions & 0 deletions src/sentry/integrations/github/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,26 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any]
page_number_limit=page_number_limit,
)

def get_repos_page(
self, page: int = 1, per_page: int = 100
) -> tuple[list[dict[str, Any]], int]:
"""
Fetch a single page of repositories accessible to this installation.

Returns (repositories, total_count).
https://docs.github.com/en/rest/apps/installations#list-repositories-accessible-to-the-app-installation
"""
with SCMIntegrationInteractionEvent(
interaction_type=SCMIntegrationInteractionType.GET_REPOSITORIES,
provider_key=self.integration_name,
integration_id=self.integration.id,
).capture():
response = self.get(
"/installation/repositories",
params={"per_page": per_page, "page": page},
)
return response["repositories"], response["total_count"]

def get_repos_cached(self, ttl: int = 300) -> list[CachedRepo]:
"""
Return all repos accessible to this installation, cached in
Expand Down
48 changes: 34 additions & 14 deletions src/sentry/integrations/github/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,30 +341,19 @@ def get_repositories(
"""
client = self.get_client()

def to_repo_info(raw_repos: Iterable[Mapping[str, Any]]) -> list[RepositoryInfo]:
return [
{
"name": i["name"],
"identifier": i["full_name"],
"external_id": self.get_repo_external_id(i),
"default_branch": i.get("default_branch"),
}
for i in raw_repos
]

def _get_all_repos():
if use_cache:
return client.get_repos_cached()
return client.get_repos(page_number_limit=page_number_limit)

if not query:
all_repos = _get_all_repos()
return to_repo_info(r for r in all_repos if not r.get("archived"))
return self._to_repo_info(r for r in all_repos if not r.get("archived"))

if accessible_only:
all_repos = _get_all_repos()
query_lower = query.lower()
return to_repo_info(
return self._to_repo_info(
r
for r in all_repos
if not r.get("archived") and query_lower in r["full_name"].lower()
Expand All @@ -373,7 +362,38 @@ def _get_all_repos():
assert not use_cache, "use_cache is not supported with the Search API path"
full_query = build_repository_query(self.model.metadata, self.model.name, query)
response = client.search_repositories(full_query)
return to_repo_info(response.get("items", []))
return self._to_repo_info(response.get("items", []))

def _to_repo_info(self, raw_repos: Iterable[Mapping[str, Any]]) -> list[RepositoryInfo]:
return [
{
"name": i["name"],
"identifier": i["full_name"],
"external_id": self.get_repo_external_id(i),
"default_branch": i.get("default_branch"),
}
for i in raw_repos
]

def get_repositories_paginated(
self,
offset: int = 0,
per_page: int = 100,
) -> tuple[list[RepositoryInfo], bool]:
"""Fetch a single page of repos from the GitHub API.

Converts the cursor offset to a GitHub page number and makes
one API call per page request.
"""
client = self.get_client()
page_number = (offset // per_page) + 1
repos, total_count = client.get_repos_page(page=page_number, per_page=per_page)
active_repos = [r for r in repos if not r.get("archived")]
# total_count includes archived repos, so has_next may overestimate
# and pages may contain fewer than per_page items. Acceptable for
# infinite-scroll consumers (worst case: one extra empty fetch).
has_next = (page_number * per_page) < total_count
return self._to_repo_info(active_repos), has_next

def get_unmigratable_repositories(self) -> list[RpcRepository]:
accessible_repos = self.get_repositories()
Expand Down
15 changes: 15 additions & 0 deletions src/sentry/integrations/source_code_management/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,21 @@ def get_repositories(
"""
raise NotImplementedError

def get_repositories_paginated(
self,
offset: int = 0,
per_page: int = 100,
) -> tuple[list[RepositoryInfo], bool]:
"""
Return a page of repositories and whether more pages exist.

Returns ``(repos, has_next)``. Providers that don't support
paginated browsing should leave this unimplemented; the
endpoint catches ``NotImplementedError`` and falls back to
``get_repositories()``.
"""
raise NotImplementedError


ClientT = TypeVar("ClientT", bound="RepositoryClient", default="RepositoryClient")

Expand Down
Loading
Loading