diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos.py b/src/sentry/integrations/api/endpoints/organization_integration_repos.py index dd44a5b6f65628..1b0a2b3253dcdb 100644 --- a/src/sentry/integrations/api/endpoints/organization_integration_repos.py +++ b/src/sentry/integrations/api/endpoints/organization_integration_repos.py @@ -15,6 +15,7 @@ from sentry.models.organization import Organization from sentry.models.repository import Repository from sentry.shared_integrations.exceptions import IntegrationError +from sentry.utils.cursors import Cursor, CursorResult class IntegrationRepository(TypedDict): @@ -54,6 +55,13 @@ def get( installation has access to, filtering locally instead of using the provider's search API which may return results beyond the installation's scope. + :qparam int per_page: When present (without ``search``), enables cursor-based + pagination. Providers that support paginated browsing return + one page of results with ``Link`` headers. Providers that + don't support it fall back to returning the full list. + The paginated path always returns installation-accessible + repos (``accessibleOnly`` is ignored). + :qparam string cursor: Pagination cursor (only used when ``per_page`` is set). """ integration = self.get_integration(organization.id, integration_id) @@ -71,19 +79,41 @@ def get( search = request.GET.get("search") accessible_only = request.GET.get("accessibleOnly", "false").lower() == "true" - try: - repositories = install.get_repositories( - search, - accessible_only=accessible_only, - use_cache=accessible_only and bool(search), - ) - except (IntegrationError, IdentityNotValid) as e: - return self.respond({"detail": str(e)}, status=400) + # When per_page is present and there's no search query, + # try the paginated path. This lets pagination-aware callers + # (e.g. the SCM onboarding repo selector) get fast page-at-a-time + # results, while existing callers that don't send per_page + # continue to receive the full list. + paginate = "per_page" in request.GET and not search + if paginate: + per_page = self.get_per_page(request) + cursor = self.get_cursor_from_request(request) + offset = max(0, cursor.offset) if cursor is not None else 0 + try: + repositories, has_next = install.get_repositories_paginated( + offset=offset, per_page=per_page + ) + except NotImplementedError: + paginate = False + except (IntegrationError, IdentityNotValid) as e: + return self.respond({"detail": str(e)}, status=400) + + if not paginate: + try: + repositories = install.get_repositories( + search, + accessible_only=accessible_only, + use_cache=accessible_only and bool(search), + ) + except (IntegrationError, IdentityNotValid) as e: + return self.respond({"detail": str(e)}, status=400) + has_next = False installable_only = request.GET.get("installableOnly", "false").lower() == "true" - # Include a repository if the request is for all repositories, or if we want - # installable-only repositories and the repository isn't already installed + # installableOnly filtering happens after pagination, so pages + # may contain fewer items than per_page when installed repos are + # excluded. Acceptable for infinite-scroll consumers. serialized_repositories = [ IntegrationRepository( name=repo["name"], @@ -95,8 +125,19 @@ def get( for repo in repositories if not installable_only or repo["identifier"] not in installed_repo_names ] - return self.respond( + + response = self.respond( {"repos": serialized_repositories, "searchable": install.repo_search} ) + if paginate and (has_next or offset > 0): + cursor_result: CursorResult[IntegrationRepository] = CursorResult( + results=[], + prev=Cursor(0, max(0, offset - per_page), True, offset > 0), + next=Cursor(0, offset + per_page, False, has_next), + ) + self.add_cursor_headers(request, response, cursor_result) + + return response + return self.respond({"detail": "Repositories not supported"}, status=400) diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index 3825c82669cbcf..f2862dc1d41d00 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -558,6 +558,26 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any] page_number_limit=page_number_limit, ) + def get_repos_page( + self, page: int = 1, per_page: int = 100 + ) -> tuple[list[dict[str, Any]], int]: + """ + Fetch a single page of repositories accessible to this installation. + + Returns (repositories, total_count). + https://docs.github.com/en/rest/apps/installations#list-repositories-accessible-to-the-app-installation + """ + with SCMIntegrationInteractionEvent( + interaction_type=SCMIntegrationInteractionType.GET_REPOSITORIES, + provider_key=self.integration_name, + integration_id=self.integration.id, + ).capture(): + response = self.get( + "/installation/repositories", + params={"per_page": per_page, "page": page}, + ) + return response["repositories"], response["total_count"] + def get_repos_cached(self, ttl: int = 300) -> list[CachedRepo]: """ Return all repos accessible to this installation, cached in diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index f4f92a9c4fd2e8..6958aa70eafcc3 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -341,17 +341,6 @@ def get_repositories( """ client = self.get_client() - def to_repo_info(raw_repos: Iterable[Mapping[str, Any]]) -> list[RepositoryInfo]: - return [ - { - "name": i["name"], - "identifier": i["full_name"], - "external_id": self.get_repo_external_id(i), - "default_branch": i.get("default_branch"), - } - for i in raw_repos - ] - def _get_all_repos(): if use_cache: return client.get_repos_cached() @@ -359,12 +348,12 @@ def _get_all_repos(): if not query: all_repos = _get_all_repos() - return to_repo_info(r for r in all_repos if not r.get("archived")) + return self._to_repo_info(r for r in all_repos if not r.get("archived")) if accessible_only: all_repos = _get_all_repos() query_lower = query.lower() - return to_repo_info( + return self._to_repo_info( r for r in all_repos if not r.get("archived") and query_lower in r["full_name"].lower() @@ -373,7 +362,38 @@ def _get_all_repos(): assert not use_cache, "use_cache is not supported with the Search API path" full_query = build_repository_query(self.model.metadata, self.model.name, query) response = client.search_repositories(full_query) - return to_repo_info(response.get("items", [])) + return self._to_repo_info(response.get("items", [])) + + def _to_repo_info(self, raw_repos: Iterable[Mapping[str, Any]]) -> list[RepositoryInfo]: + return [ + { + "name": i["name"], + "identifier": i["full_name"], + "external_id": self.get_repo_external_id(i), + "default_branch": i.get("default_branch"), + } + for i in raw_repos + ] + + def get_repositories_paginated( + self, + offset: int = 0, + per_page: int = 100, + ) -> tuple[list[RepositoryInfo], bool]: + """Fetch a single page of repos from the GitHub API. + + Converts the cursor offset to a GitHub page number and makes + one API call per page request. + """ + client = self.get_client() + page_number = (offset // per_page) + 1 + repos, total_count = client.get_repos_page(page=page_number, per_page=per_page) + active_repos = [r for r in repos if not r.get("archived")] + # total_count includes archived repos, so has_next may overestimate + # and pages may contain fewer than per_page items. Acceptable for + # infinite-scroll consumers (worst case: one extra empty fetch). + has_next = (page_number * per_page) < total_count + return self._to_repo_info(active_repos), has_next def get_unmigratable_repositories(self) -> list[RpcRepository]: accessible_repos = self.get_repositories() diff --git a/src/sentry/integrations/source_code_management/repository.py b/src/sentry/integrations/source_code_management/repository.py index 731f99d2cfc390..5dfbd817ed2d6d 100644 --- a/src/sentry/integrations/source_code_management/repository.py +++ b/src/sentry/integrations/source_code_management/repository.py @@ -87,6 +87,21 @@ def get_repositories( """ raise NotImplementedError + def get_repositories_paginated( + self, + offset: int = 0, + per_page: int = 100, + ) -> tuple[list[RepositoryInfo], bool]: + """ + Return a page of repositories and whether more pages exist. + + Returns ``(repos, has_next)``. Providers that don't support + paginated browsing should leave this unimplemented; the + endpoint catches ``NotImplementedError`` and falls back to + ``get_repositories()``. + """ + raise NotImplementedError + ClientT = TypeVar("ClientT", bound="RepositoryClient", default="RepositoryClient") diff --git a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py index 48c46a02548e43..8b00f3e4bb3c54 100644 --- a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py +++ b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py @@ -1,8 +1,25 @@ +from typing import Any from unittest.mock import MagicMock, patch from sentry.testutils.cases import APITestCase +def _make_github_repo( + id: int, + name: str, + full_name: str, + default_branch: str | None = "main", + archived: bool = False, +) -> dict[str, Any]: + return { + "id": id, + "name": name, + "full_name": full_name, + "default_branch": default_branch, + "archived": archived, + } + + class OrganizationIntegrationReposTest(APITestCase): def setUp(self) -> None: super().setUp() @@ -335,3 +352,198 @@ def test_no_repository_method(self) -> None: response = self.client.get(path, format="json") assert response.status_code == 400 + + +ALL_REPOS = [_make_github_repo(i, f"repo-{i}", f"Example/repo-{i}") for i in range(1, 6)] +TOTAL_COUNT = len(ALL_REPOS) +MOCK_TARGET = "sentry.integrations.github.client.GitHubBaseClient.get_repos_page" + + +class OrganizationIntegrationReposPaginatedTest(APITestCase): + """Tests for cursor-based pagination via GitHub API page params.""" + + def setUp(self) -> None: + super().setUp() + self.login_as(user=self.user) + self.org = self.create_organization(owner=self.user, name="baz") + self.project = self.create_project(organization=self.org) + self.integration = self.create_integration( + organization=self.org, provider="github", name="Example", external_id="github:1" + ) + self.path = ( + f"/api/0/organizations/{self.org.slug}/integrations/{self.integration.id}/repos/" + ) + + @patch(MOCK_TARGET) + def test_first_page(self, mock_page: MagicMock) -> None: + mock_page.return_value = (ALL_REPOS[:2], TOTAL_COUNT) + response = self.client.get(self.path, data={"per_page": "2"}, format="json") + + assert response.status_code == 200, response.content + mock_page.assert_called_once_with(page=1, per_page=2) + repos = response.data["repos"] + assert len(repos) == 2 + assert repos[0]["identifier"] == "Example/repo-1" + assert repos[1]["identifier"] == "Example/repo-2" + assert response.data["searchable"] is True + assert 'rel="next"' in response["Link"] + assert 'results="true"' in response["Link"].split("next")[1] + + @patch(MOCK_TARGET) + def test_second_page(self, mock_page: MagicMock) -> None: + mock_page.return_value = (ALL_REPOS[2:4], TOTAL_COUNT) + response = self.client.get( + self.path, data={"per_page": "2", "cursor": "0:2:0"}, format="json" + ) + + assert response.status_code == 200, response.content + mock_page.assert_called_once_with(page=2, per_page=2) + repos = response.data["repos"] + assert len(repos) == 2 + assert repos[0]["identifier"] == "Example/repo-3" + assert repos[1]["identifier"] == "Example/repo-4" + + @patch(MOCK_TARGET) + def test_last_page(self, mock_page: MagicMock) -> None: + mock_page.return_value = (ALL_REPOS[4:], TOTAL_COUNT) + response = self.client.get( + self.path, data={"per_page": "2", "cursor": "0:4:0"}, format="json" + ) + + assert response.status_code == 200, response.content + mock_page.assert_called_once_with(page=3, per_page=2) + repos = response.data["repos"] + assert len(repos) == 1 + assert repos[0]["identifier"] == "Example/repo-5" + link = response["Link"] + next_part = link.split("next")[1] + assert 'results="false"' in next_part + + @patch(MOCK_TARGET) + def test_excludes_archived(self, mock_page: MagicMock) -> None: + mock_page.return_value = ( + [ + _make_github_repo(1, "active", "Example/active"), + _make_github_repo(2, "archived", "Example/archived", archived=True), + ], + 2, + ) + response = self.client.get(self.path, data={"per_page": "100"}, format="json") + + assert response.status_code == 200, response.content + repos = response.data["repos"] + assert len(repos) == 1 + assert repos[0]["identifier"] == "Example/active" + + @patch(MOCK_TARGET) + def test_installable_only(self, mock_page: MagicMock) -> None: + mock_page.return_value = ( + [ + _make_github_repo(1, "installed-repo", "Example/installed-repo"), + _make_github_repo(2, "available-repo", "Example/available-repo"), + ], + 2, + ) + self.create_repo( + project=self.project, + integration_id=self.integration.id, + name="Example/installed-repo", + ) + response = self.client.get( + self.path, data={"per_page": "100", "installableOnly": "true"}, format="json" + ) + + assert response.status_code == 200, response.content + repos = response.data["repos"] + assert len(repos) == 1 + assert repos[0]["identifier"] == "Example/available-repo" + assert repos[0]["isInstalled"] is False + + @patch(MOCK_TARGET) + def test_no_cursor_on_single_page(self, mock_page: MagicMock) -> None: + """When all repos fit in one page, no Link header is added.""" + mock_page.return_value = ([_make_github_repo(1, "repo-1", "Example/repo-1")], 1) + response = self.client.get(self.path, data={"per_page": "100"}, format="json") + + assert response.status_code == 200, response.content + assert "Link" not in response + + @patch( + "sentry.integrations.github.integration.GitHubIntegration.get_repositories", + ) + def test_without_per_page_uses_full_list(self, get_repositories: MagicMock) -> None: + """Without per_page, existing behavior: full list, no pagination.""" + get_repositories.return_value = [ + {"name": "repo-1", "identifier": "Example/repo-1", "default_branch": "main"}, + ] + response = self.client.get(self.path, format="json") + + assert response.status_code == 200, response.content + get_repositories.assert_called_once_with(None, accessible_only=False, use_cache=False) + assert "Link" not in response + + @patch( + "sentry.integrations.github.integration.GitHubIntegration.get_repositories", + ) + def test_search_with_per_page_uses_full_list(self, get_repositories: MagicMock) -> None: + """When search is present, per_page is ignored -- full list returned.""" + get_repositories.return_value = [ + {"name": "repo-1", "identifier": "Example/repo-1", "default_branch": "main"}, + ] + response = self.client.get( + self.path, data={"search": "repo", "per_page": "2"}, format="json" + ) + + assert response.status_code == 200, response.content + get_repositories.assert_called_once_with("repo", accessible_only=False, use_cache=False) + assert "Link" not in response + + def test_per_page_invalid_returns_400(self) -> None: + """Invalid per_page values (0, negative, non-numeric, >100) return 400.""" + for value in ("0", "-1", "abc", "200"): + response = self.client.get(self.path, data={"per_page": value}, format="json") + assert response.status_code == 400, f"per_page={value} should return 400" + + @patch(MOCK_TARGET) + def test_negative_cursor_offset_clamped_to_zero(self, mock_page: MagicMock) -> None: + mock_page.return_value = (ALL_REPOS[:2], TOTAL_COUNT) + response = self.client.get( + self.path, data={"per_page": "2", "cursor": "0:-5:0"}, format="json" + ) + + assert response.status_code == 200, response.content + mock_page.assert_called_once_with(page=1, per_page=2) + repos = response.data["repos"] + assert len(repos) == 2 + assert repos[0]["identifier"] == "Example/repo-1" + + @patch(MOCK_TARGET) + def test_integration_error_returns_400(self, mock_page: MagicMock) -> None: + from sentry.shared_integrations.exceptions import IntegrationError + + mock_page.side_effect = IntegrationError("token revoked") + response = self.client.get(self.path, data={"per_page": "2"}, format="json") + + assert response.status_code == 400 + assert response.data["detail"] == "token revoked" + + @patch( + "sentry.integrations.github.integration.GitHubIntegration.get_repositories_paginated", + side_effect=NotImplementedError, + ) + @patch( + "sentry.integrations.github.integration.GitHubIntegration.get_repositories", + ) + def test_not_implemented_falls_back_to_full_list( + self, get_repositories: MagicMock, _mock_paginated: MagicMock + ) -> None: + """When a provider raises NotImplementedError, per_page falls back to get_repositories.""" + get_repositories.return_value = [ + {"name": "repo-1", "identifier": "Example/repo-1", "default_branch": "main"}, + ] + response = self.client.get(self.path, data={"per_page": "2"}, format="json") + + assert response.status_code == 200, response.content + get_repositories.assert_called_once_with(None, accessible_only=False, use_cache=False) + assert len(response.data["repos"]) == 1 + assert "Link" not in response