From a384179f04345e92e3c5654f03d6be8dba710c26 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 11:35:07 -0500 Subject: [PATCH 01/13] feat(github): Add paginated repos endpoint for SCM onboarding Add a new /repos-paginated/ endpoint that paginates over the cached accessible repo list for GitHub. Other providers fall back to returning the full list from get_repositories(). This is purpose-built for the SCM onboarding repo selector dropdown, where we need fast initial page loads via cursor-based pagination. Search is intentionally excluded -- the FE uses the existing /repos/ endpoint with accessibleOnly for that. The pagination interface is provider-agnostic: - BaseRepositoryIntegration.get_repositories_paginated() returns None by default (unsupported) - GitHubIntegration overrides it to slice the Django-cached repo list - The endpoint dispatches based on the return value Refs VDY-46 --- src/sentry/api/urls.py | 8 + ...rganization_integration_repos_paginated.py | 107 ++++++++ src/sentry/integrations/github/integration.py | 21 ++ .../source_code_management/repository.py | 15 ++ ...rganization_integration_repos_paginated.py | 235 ++++++++++++++++++ 5 files changed, 386 insertions(+) create mode 100644 src/sentry/integrations/api/endpoints/organization_integration_repos_paginated.py create mode 100644 tests/sentry/integrations/api/endpoints/test_organization_integration_repos_paginated.py diff --git a/src/sentry/api/urls.py b/src/sentry/api/urls.py index 65037711689f7e..5b05765b22f0b2 100644 --- a/src/sentry/api/urls.py +++ b/src/sentry/api/urls.py @@ -282,6 +282,9 @@ from sentry.integrations.api.endpoints.organization_integration_repos import ( OrganizationIntegrationReposEndpoint, ) +from sentry.integrations.api.endpoints.organization_integration_repos_paginated import ( + OrganizationIntegrationReposPaginatedEndpoint, +) from sentry.integrations.api.endpoints.organization_integration_request import ( OrganizationIntegrationRequestEndpoint, ) @@ -1990,6 +1993,11 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]: OrganizationIntegrationReposEndpoint.as_view(), name="sentry-api-0-organization-integration-repos", ), + re_path( + r"^(?P[^/]+)/integrations/(?P[^/]+)/repos-paginated/$", + OrganizationIntegrationReposPaginatedEndpoint.as_view(), + name="sentry-api-0-organization-integration-repos-paginated", + ), re_path( r"^(?P[^/]+)/integrations/(?P[^/]+)/channels/$", OrganizationIntegrationChannelsEndpoint.as_view(), diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos_paginated.py b/src/sentry/integrations/api/endpoints/organization_integration_repos_paginated.py new file mode 100644 index 00000000000000..2d1b3c55f9a4fa --- /dev/null +++ b/src/sentry/integrations/api/endpoints/organization_integration_repos_paginated.py @@ -0,0 +1,107 @@ +from typing import Any + +from rest_framework.request import Request +from rest_framework.response import Response + +from sentry.api.api_owners import ApiOwner +from sentry.api.api_publish_status import ApiPublishStatus +from sentry.api.base import cell_silo_endpoint +from sentry.auth.exceptions import IdentityNotValid +from sentry.constants import ObjectStatus +from sentry.integrations.api.bases.organization_integrations import ( + CellOrganizationIntegrationBaseEndpoint, +) +from sentry.integrations.api.endpoints.organization_integration_repos import ( + IntegrationRepository, +) +from sentry.integrations.source_code_management.repository import RepositoryIntegration +from sentry.models.organization import Organization +from sentry.models.repository import Repository +from sentry.shared_integrations.exceptions import IntegrationError +from sentry.utils.cursors import Cursor, CursorResult + + +@cell_silo_endpoint +class OrganizationIntegrationReposPaginatedEndpoint(CellOrganizationIntegrationBaseEndpoint): + publish_status = { + "GET": ApiPublishStatus.PRIVATE, + } + owner = ApiOwner.ISSUES + + def get( + self, + request: Request, + organization: Organization, + integration_id: int, + **kwds: Any, + ) -> Response: + """ + Paginated list of repositories for an integration. + + Providers that implement ``get_repositories_paginated()`` return + cursor-paginated results. All others fall back to the full list + from ``get_repositories()``. + + For search, use the existing ``/repos/`` endpoint with + ``search`` and ``accessibleOnly`` params. + """ + integration = self.get_integration(organization.id, integration_id) + + if integration.status == ObjectStatus.DISABLED: + return self.respond({"repos": []}) + + install = integration.get_installation(organization_id=organization.id) + + if not isinstance(install, RepositoryIntegration): + return self.respond({"detail": "Repositories not supported"}, status=400) + + per_page = min(int(request.GET.get("per_page", 100)), 100) + cursor = self._parse_cursor(request) + installable_only = request.GET.get("installableOnly", "false").lower() == "true" + + installed_repos = Repository.objects.filter( + integration_id=integration.id, organization_id=organization.id + ).exclude(status=ObjectStatus.HIDDEN) + installed_repo_names = {repo.name for repo in installed_repos} + + try: + paginated = install.get_repositories_paginated(offset=cursor.offset, per_page=per_page) + if paginated is not None: + repositories, has_next = paginated + else: + repositories = install.get_repositories() + has_next = False + except (IntegrationError, IdentityNotValid) as e: + return self.respond({"detail": str(e)}, status=400) + + serialized = [ + IntegrationRepository( + name=repo["name"], + identifier=repo["identifier"], + defaultBranch=repo.get("default_branch"), + isInstalled=repo["identifier"] in installed_repo_names, + ) + for repo in repositories + if not installable_only or repo["identifier"] not in installed_repo_names + ] + + response = self.respond({"repos": serialized, "searchable": install.repo_search}) + + if has_next or cursor.offset > 0: + cursor_result = CursorResult( + results=[], + prev=Cursor(0, max(0, cursor.offset - per_page), True, cursor.offset > 0), + next=Cursor(0, cursor.offset + per_page, False, has_next), + ) + self.add_cursor_headers(request, response, cursor_result) + + return response + + def _parse_cursor(self, request: Request) -> Cursor: + cursor_param = request.GET.get("cursor", "") + if not cursor_param: + return Cursor(0, 0, False) + try: + return Cursor.from_string(cursor_param) + except (ValueError, TypeError): + return Cursor(0, 0, False) diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index f4f92a9c4fd2e8..477bde0ec611ee 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -375,6 +375,27 @@ def _get_all_repos(): response = client.search_repositories(full_query) return to_repo_info(response.get("items", [])) + def get_repositories_paginated( + self, + offset: int = 0, + per_page: int = 100, + ) -> tuple[list[RepositoryInfo], bool]: + client = self.get_client() + all_repos = client.get_accessible_repos_cached() + repos = [r for r in all_repos if not r.get("archived")] + + page = repos[offset : offset + per_page] + has_next = len(repos) > offset + per_page + return [ + { + "name": r["name"], + "identifier": r["full_name"], + "external_id": self.get_repo_external_id(r), + "default_branch": r.get("default_branch"), + } + for r in page + ], has_next + def get_unmigratable_repositories(self) -> list[RpcRepository]: accessible_repos = self.get_repositories() accessible_repo_names = [r["identifier"] for r in accessible_repos] diff --git a/src/sentry/integrations/source_code_management/repository.py b/src/sentry/integrations/source_code_management/repository.py index 731f99d2cfc390..c68bb474c3c687 100644 --- a/src/sentry/integrations/source_code_management/repository.py +++ b/src/sentry/integrations/source_code_management/repository.py @@ -87,6 +87,21 @@ def get_repositories( """ raise NotImplementedError + def get_repositories_paginated( + self, + offset: int = 0, + per_page: int = 100, + ) -> tuple[list[RepositoryInfo], bool] | None: + """ + Return a page of repositories and whether more pages exist. + + Returns ``(repos, has_next)`` for providers that support + paginated browsing, or ``None`` if the provider does not + implement pagination (callers should fall back to + ``get_repositories()``). + """ + return None + ClientT = TypeVar("ClientT", bound="RepositoryClient", default="RepositoryClient") diff --git a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos_paginated.py b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos_paginated.py new file mode 100644 index 00000000000000..fde62e62757e3e --- /dev/null +++ b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos_paginated.py @@ -0,0 +1,235 @@ +from unittest.mock import MagicMock, patch + +from sentry.integrations.github.client import CachedRepo +from sentry.testutils.cases import APITestCase + + +def _make_cached_repo( + id: int, + name: str, + full_name: str, + default_branch: str | None = "main", + archived: bool = False, +) -> CachedRepo: + return { + "id": id, + "name": name, + "full_name": full_name, + "default_branch": default_branch, + "archived": archived, + } + + +CACHED_REPOS = [_make_cached_repo(i, f"repo-{i}", f"Example/repo-{i}") for i in range(1, 6)] + + +class OrganizationIntegrationReposPaginatedGitHubTest(APITestCase): + def setUp(self) -> None: + super().setUp() + self.login_as(user=self.user) + self.org = self.create_organization(owner=self.user, name="baz") + self.project = self.create_project(organization=self.org) + self.integration = self.create_integration( + organization=self.org, provider="github", name="Example", external_id="github:1" + ) + self.path = ( + f"/api/0/organizations/{self.org.slug}" + f"/integrations/{self.integration.id}/repos-paginated/" + ) + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_first_page(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = CACHED_REPOS + response = self.client.get(self.path, data={"per_page": "2"}, format="json") + + assert response.status_code == 200, response.content + repos = response.data["repos"] + assert len(repos) == 2 + assert repos[0]["identifier"] == "Example/repo-1" + assert repos[1]["identifier"] == "Example/repo-2" + assert response.data["searchable"] is True + assert 'rel="next"' in response["Link"] + assert 'results="true"' in response["Link"].split("next")[1] + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_second_page(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = CACHED_REPOS + response = self.client.get( + self.path, data={"per_page": "2", "cursor": "0:2:0"}, format="json" + ) + + assert response.status_code == 200, response.content + repos = response.data["repos"] + assert len(repos) == 2 + assert repos[0]["identifier"] == "Example/repo-3" + assert repos[1]["identifier"] == "Example/repo-4" + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_last_page(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = CACHED_REPOS + response = self.client.get( + self.path, data={"per_page": "2", "cursor": "0:4:0"}, format="json" + ) + + assert response.status_code == 200, response.content + repos = response.data["repos"] + assert len(repos) == 1 + assert repos[0]["identifier"] == "Example/repo-5" + # next cursor should indicate no more results + link = response["Link"] + next_part = link.split("next")[1] + assert 'results="false"' in next_part + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_excludes_archived(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = [ + _make_cached_repo(1, "active", "Example/active"), + _make_cached_repo(2, "archived", "Example/archived", archived=True), + ] + response = self.client.get(self.path, format="json") + + assert response.status_code == 200, response.content + repos = response.data["repos"] + assert len(repos) == 1 + assert repos[0]["identifier"] == "Example/active" + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_installable_only(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = [ + _make_cached_repo(1, "installed-repo", "Example/installed-repo"), + _make_cached_repo(2, "available-repo", "Example/available-repo"), + ] + self.create_repo( + project=self.project, + integration_id=self.integration.id, + name="Example/installed-repo", + ) + + response = self.client.get(self.path, data={"installableOnly": "true"}, format="json") + + assert response.status_code == 200, response.content + repos = response.data["repos"] + assert len(repos) == 1 + assert repos[0]["identifier"] == "Example/available-repo" + assert repos[0]["isInstalled"] is False + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_is_installed_field(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = [ + _make_cached_repo(1, "installed-repo", "Example/installed-repo"), + _make_cached_repo(2, "other-repo", "Example/other-repo"), + ] + self.create_repo( + project=self.project, + integration_id=self.integration.id, + name="Example/installed-repo", + ) + + response = self.client.get(self.path, format="json") + + assert response.status_code == 200, response.content + repos = response.data["repos"] + assert repos[0]["isInstalled"] is True + assert repos[1]["isInstalled"] is False + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_cache_used_on_second_request(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = CACHED_REPOS + self.client.get(self.path, format="json") + self.client.get(self.path, format="json") + assert mock_cache.call_count == 2 + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_no_cursor_on_single_page(self, mock_cache: MagicMock) -> None: + """When all repos fit in one page, no Link header is added.""" + mock_cache.return_value = [ + _make_cached_repo(1, "repo-1", "Example/repo-1"), + ] + response = self.client.get(self.path, data={"per_page": "100"}, format="json") + + assert response.status_code == 200, response.content + assert "Link" not in response + + +class OrganizationIntegrationReposPaginatedNonGitHubTest(APITestCase): + def setUp(self) -> None: + super().setUp() + self.login_as(user=self.user) + self.org = self.create_organization(owner=self.user, name="baz") + self.integration = self.create_integration( + organization=self.org, provider="bitbucket", name="Example", external_id="bitbucket:1" + ) + self.path = ( + f"/api/0/organizations/{self.org.slug}" + f"/integrations/{self.integration.id}/repos-paginated/" + ) + + @patch( + "sentry.integrations.bitbucket.integration.BitbucketIntegration.get_repositories", + ) + def test_non_github_returns_full_list(self, mock_get_repos: MagicMock) -> None: + mock_get_repos.return_value = [ + { + "name": "repo-1", + "identifier": "Example/repo-1", + "default_branch": "main", + }, + { + "name": "repo-2", + "identifier": "Example/repo-2", + "default_branch": "develop", + }, + ] + response = self.client.get(self.path, format="json") + + assert response.status_code == 200, response.content + assert len(response.data["repos"]) == 2 + assert "Link" not in response + + +class OrganizationIntegrationReposPaginatedEdgeCasesTest(APITestCase): + def setUp(self) -> None: + super().setUp() + self.login_as(user=self.user) + self.org = self.create_organization(owner=self.user, name="baz") + + def test_non_repository_integration(self) -> None: + integration = self.create_integration( + organization=self.org, provider="jira", name="Jira", external_id="jira:1" + ) + path = ( + f"/api/0/organizations/{self.org.slug}/integrations/{integration.id}/repos-paginated/" + ) + response = self.client.get(path, format="json") + assert response.status_code == 400 + + def test_disabled_integration(self) -> None: + integration = self.create_integration( + organization=self.org, + provider="github", + name="Disabled", + external_id="github:disabled", + status=1, # ObjectStatus.DISABLED + ) + path = ( + f"/api/0/organizations/{self.org.slug}/integrations/{integration.id}/repos-paginated/" + ) + response = self.client.get(path, format="json") + assert response.status_code == 200 + assert response.data == {"repos": []} From c62b01b8911f062146a4a4d34b22ba17d1ff64ab Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 11:54:43 -0500 Subject: [PATCH 02/13] Revert "feat(github): Add paginated repos endpoint for SCM onboarding" This reverts commit fe092f3770040f8cfd48dcb9698011778ae6f8ea. --- src/sentry/api/urls.py | 8 - ...rganization_integration_repos_paginated.py | 107 -------- src/sentry/integrations/github/integration.py | 21 -- .../source_code_management/repository.py | 15 -- ...rganization_integration_repos_paginated.py | 235 ------------------ 5 files changed, 386 deletions(-) delete mode 100644 src/sentry/integrations/api/endpoints/organization_integration_repos_paginated.py delete mode 100644 tests/sentry/integrations/api/endpoints/test_organization_integration_repos_paginated.py diff --git a/src/sentry/api/urls.py b/src/sentry/api/urls.py index 5b05765b22f0b2..65037711689f7e 100644 --- a/src/sentry/api/urls.py +++ b/src/sentry/api/urls.py @@ -282,9 +282,6 @@ from sentry.integrations.api.endpoints.organization_integration_repos import ( OrganizationIntegrationReposEndpoint, ) -from sentry.integrations.api.endpoints.organization_integration_repos_paginated import ( - OrganizationIntegrationReposPaginatedEndpoint, -) from sentry.integrations.api.endpoints.organization_integration_request import ( OrganizationIntegrationRequestEndpoint, ) @@ -1993,11 +1990,6 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]: OrganizationIntegrationReposEndpoint.as_view(), name="sentry-api-0-organization-integration-repos", ), - re_path( - r"^(?P[^/]+)/integrations/(?P[^/]+)/repos-paginated/$", - OrganizationIntegrationReposPaginatedEndpoint.as_view(), - name="sentry-api-0-organization-integration-repos-paginated", - ), re_path( r"^(?P[^/]+)/integrations/(?P[^/]+)/channels/$", OrganizationIntegrationChannelsEndpoint.as_view(), diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos_paginated.py b/src/sentry/integrations/api/endpoints/organization_integration_repos_paginated.py deleted file mode 100644 index 2d1b3c55f9a4fa..00000000000000 --- a/src/sentry/integrations/api/endpoints/organization_integration_repos_paginated.py +++ /dev/null @@ -1,107 +0,0 @@ -from typing import Any - -from rest_framework.request import Request -from rest_framework.response import Response - -from sentry.api.api_owners import ApiOwner -from sentry.api.api_publish_status import ApiPublishStatus -from sentry.api.base import cell_silo_endpoint -from sentry.auth.exceptions import IdentityNotValid -from sentry.constants import ObjectStatus -from sentry.integrations.api.bases.organization_integrations import ( - CellOrganizationIntegrationBaseEndpoint, -) -from sentry.integrations.api.endpoints.organization_integration_repos import ( - IntegrationRepository, -) -from sentry.integrations.source_code_management.repository import RepositoryIntegration -from sentry.models.organization import Organization -from sentry.models.repository import Repository -from sentry.shared_integrations.exceptions import IntegrationError -from sentry.utils.cursors import Cursor, CursorResult - - -@cell_silo_endpoint -class OrganizationIntegrationReposPaginatedEndpoint(CellOrganizationIntegrationBaseEndpoint): - publish_status = { - "GET": ApiPublishStatus.PRIVATE, - } - owner = ApiOwner.ISSUES - - def get( - self, - request: Request, - organization: Organization, - integration_id: int, - **kwds: Any, - ) -> Response: - """ - Paginated list of repositories for an integration. - - Providers that implement ``get_repositories_paginated()`` return - cursor-paginated results. All others fall back to the full list - from ``get_repositories()``. - - For search, use the existing ``/repos/`` endpoint with - ``search`` and ``accessibleOnly`` params. - """ - integration = self.get_integration(organization.id, integration_id) - - if integration.status == ObjectStatus.DISABLED: - return self.respond({"repos": []}) - - install = integration.get_installation(organization_id=organization.id) - - if not isinstance(install, RepositoryIntegration): - return self.respond({"detail": "Repositories not supported"}, status=400) - - per_page = min(int(request.GET.get("per_page", 100)), 100) - cursor = self._parse_cursor(request) - installable_only = request.GET.get("installableOnly", "false").lower() == "true" - - installed_repos = Repository.objects.filter( - integration_id=integration.id, organization_id=organization.id - ).exclude(status=ObjectStatus.HIDDEN) - installed_repo_names = {repo.name for repo in installed_repos} - - try: - paginated = install.get_repositories_paginated(offset=cursor.offset, per_page=per_page) - if paginated is not None: - repositories, has_next = paginated - else: - repositories = install.get_repositories() - has_next = False - except (IntegrationError, IdentityNotValid) as e: - return self.respond({"detail": str(e)}, status=400) - - serialized = [ - IntegrationRepository( - name=repo["name"], - identifier=repo["identifier"], - defaultBranch=repo.get("default_branch"), - isInstalled=repo["identifier"] in installed_repo_names, - ) - for repo in repositories - if not installable_only or repo["identifier"] not in installed_repo_names - ] - - response = self.respond({"repos": serialized, "searchable": install.repo_search}) - - if has_next or cursor.offset > 0: - cursor_result = CursorResult( - results=[], - prev=Cursor(0, max(0, cursor.offset - per_page), True, cursor.offset > 0), - next=Cursor(0, cursor.offset + per_page, False, has_next), - ) - self.add_cursor_headers(request, response, cursor_result) - - return response - - def _parse_cursor(self, request: Request) -> Cursor: - cursor_param = request.GET.get("cursor", "") - if not cursor_param: - return Cursor(0, 0, False) - try: - return Cursor.from_string(cursor_param) - except (ValueError, TypeError): - return Cursor(0, 0, False) diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 477bde0ec611ee..f4f92a9c4fd2e8 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -375,27 +375,6 @@ def _get_all_repos(): response = client.search_repositories(full_query) return to_repo_info(response.get("items", [])) - def get_repositories_paginated( - self, - offset: int = 0, - per_page: int = 100, - ) -> tuple[list[RepositoryInfo], bool]: - client = self.get_client() - all_repos = client.get_accessible_repos_cached() - repos = [r for r in all_repos if not r.get("archived")] - - page = repos[offset : offset + per_page] - has_next = len(repos) > offset + per_page - return [ - { - "name": r["name"], - "identifier": r["full_name"], - "external_id": self.get_repo_external_id(r), - "default_branch": r.get("default_branch"), - } - for r in page - ], has_next - def get_unmigratable_repositories(self) -> list[RpcRepository]: accessible_repos = self.get_repositories() accessible_repo_names = [r["identifier"] for r in accessible_repos] diff --git a/src/sentry/integrations/source_code_management/repository.py b/src/sentry/integrations/source_code_management/repository.py index c68bb474c3c687..731f99d2cfc390 100644 --- a/src/sentry/integrations/source_code_management/repository.py +++ b/src/sentry/integrations/source_code_management/repository.py @@ -87,21 +87,6 @@ def get_repositories( """ raise NotImplementedError - def get_repositories_paginated( - self, - offset: int = 0, - per_page: int = 100, - ) -> tuple[list[RepositoryInfo], bool] | None: - """ - Return a page of repositories and whether more pages exist. - - Returns ``(repos, has_next)`` for providers that support - paginated browsing, or ``None`` if the provider does not - implement pagination (callers should fall back to - ``get_repositories()``). - """ - return None - ClientT = TypeVar("ClientT", bound="RepositoryClient", default="RepositoryClient") diff --git a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos_paginated.py b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos_paginated.py deleted file mode 100644 index fde62e62757e3e..00000000000000 --- a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos_paginated.py +++ /dev/null @@ -1,235 +0,0 @@ -from unittest.mock import MagicMock, patch - -from sentry.integrations.github.client import CachedRepo -from sentry.testutils.cases import APITestCase - - -def _make_cached_repo( - id: int, - name: str, - full_name: str, - default_branch: str | None = "main", - archived: bool = False, -) -> CachedRepo: - return { - "id": id, - "name": name, - "full_name": full_name, - "default_branch": default_branch, - "archived": archived, - } - - -CACHED_REPOS = [_make_cached_repo(i, f"repo-{i}", f"Example/repo-{i}") for i in range(1, 6)] - - -class OrganizationIntegrationReposPaginatedGitHubTest(APITestCase): - def setUp(self) -> None: - super().setUp() - self.login_as(user=self.user) - self.org = self.create_organization(owner=self.user, name="baz") - self.project = self.create_project(organization=self.org) - self.integration = self.create_integration( - organization=self.org, provider="github", name="Example", external_id="github:1" - ) - self.path = ( - f"/api/0/organizations/{self.org.slug}" - f"/integrations/{self.integration.id}/repos-paginated/" - ) - - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", - ) - def test_first_page(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = CACHED_REPOS - response = self.client.get(self.path, data={"per_page": "2"}, format="json") - - assert response.status_code == 200, response.content - repos = response.data["repos"] - assert len(repos) == 2 - assert repos[0]["identifier"] == "Example/repo-1" - assert repos[1]["identifier"] == "Example/repo-2" - assert response.data["searchable"] is True - assert 'rel="next"' in response["Link"] - assert 'results="true"' in response["Link"].split("next")[1] - - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", - ) - def test_second_page(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = CACHED_REPOS - response = self.client.get( - self.path, data={"per_page": "2", "cursor": "0:2:0"}, format="json" - ) - - assert response.status_code == 200, response.content - repos = response.data["repos"] - assert len(repos) == 2 - assert repos[0]["identifier"] == "Example/repo-3" - assert repos[1]["identifier"] == "Example/repo-4" - - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", - ) - def test_last_page(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = CACHED_REPOS - response = self.client.get( - self.path, data={"per_page": "2", "cursor": "0:4:0"}, format="json" - ) - - assert response.status_code == 200, response.content - repos = response.data["repos"] - assert len(repos) == 1 - assert repos[0]["identifier"] == "Example/repo-5" - # next cursor should indicate no more results - link = response["Link"] - next_part = link.split("next")[1] - assert 'results="false"' in next_part - - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", - ) - def test_excludes_archived(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = [ - _make_cached_repo(1, "active", "Example/active"), - _make_cached_repo(2, "archived", "Example/archived", archived=True), - ] - response = self.client.get(self.path, format="json") - - assert response.status_code == 200, response.content - repos = response.data["repos"] - assert len(repos) == 1 - assert repos[0]["identifier"] == "Example/active" - - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", - ) - def test_installable_only(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = [ - _make_cached_repo(1, "installed-repo", "Example/installed-repo"), - _make_cached_repo(2, "available-repo", "Example/available-repo"), - ] - self.create_repo( - project=self.project, - integration_id=self.integration.id, - name="Example/installed-repo", - ) - - response = self.client.get(self.path, data={"installableOnly": "true"}, format="json") - - assert response.status_code == 200, response.content - repos = response.data["repos"] - assert len(repos) == 1 - assert repos[0]["identifier"] == "Example/available-repo" - assert repos[0]["isInstalled"] is False - - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", - ) - def test_is_installed_field(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = [ - _make_cached_repo(1, "installed-repo", "Example/installed-repo"), - _make_cached_repo(2, "other-repo", "Example/other-repo"), - ] - self.create_repo( - project=self.project, - integration_id=self.integration.id, - name="Example/installed-repo", - ) - - response = self.client.get(self.path, format="json") - - assert response.status_code == 200, response.content - repos = response.data["repos"] - assert repos[0]["isInstalled"] is True - assert repos[1]["isInstalled"] is False - - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", - ) - def test_cache_used_on_second_request(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = CACHED_REPOS - self.client.get(self.path, format="json") - self.client.get(self.path, format="json") - assert mock_cache.call_count == 2 - - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", - ) - def test_no_cursor_on_single_page(self, mock_cache: MagicMock) -> None: - """When all repos fit in one page, no Link header is added.""" - mock_cache.return_value = [ - _make_cached_repo(1, "repo-1", "Example/repo-1"), - ] - response = self.client.get(self.path, data={"per_page": "100"}, format="json") - - assert response.status_code == 200, response.content - assert "Link" not in response - - -class OrganizationIntegrationReposPaginatedNonGitHubTest(APITestCase): - def setUp(self) -> None: - super().setUp() - self.login_as(user=self.user) - self.org = self.create_organization(owner=self.user, name="baz") - self.integration = self.create_integration( - organization=self.org, provider="bitbucket", name="Example", external_id="bitbucket:1" - ) - self.path = ( - f"/api/0/organizations/{self.org.slug}" - f"/integrations/{self.integration.id}/repos-paginated/" - ) - - @patch( - "sentry.integrations.bitbucket.integration.BitbucketIntegration.get_repositories", - ) - def test_non_github_returns_full_list(self, mock_get_repos: MagicMock) -> None: - mock_get_repos.return_value = [ - { - "name": "repo-1", - "identifier": "Example/repo-1", - "default_branch": "main", - }, - { - "name": "repo-2", - "identifier": "Example/repo-2", - "default_branch": "develop", - }, - ] - response = self.client.get(self.path, format="json") - - assert response.status_code == 200, response.content - assert len(response.data["repos"]) == 2 - assert "Link" not in response - - -class OrganizationIntegrationReposPaginatedEdgeCasesTest(APITestCase): - def setUp(self) -> None: - super().setUp() - self.login_as(user=self.user) - self.org = self.create_organization(owner=self.user, name="baz") - - def test_non_repository_integration(self) -> None: - integration = self.create_integration( - organization=self.org, provider="jira", name="Jira", external_id="jira:1" - ) - path = ( - f"/api/0/organizations/{self.org.slug}/integrations/{integration.id}/repos-paginated/" - ) - response = self.client.get(path, format="json") - assert response.status_code == 400 - - def test_disabled_integration(self) -> None: - integration = self.create_integration( - organization=self.org, - provider="github", - name="Disabled", - external_id="github:disabled", - status=1, # ObjectStatus.DISABLED - ) - path = ( - f"/api/0/organizations/{self.org.slug}/integrations/{integration.id}/repos-paginated/" - ) - response = self.client.get(path, format="json") - assert response.status_code == 200 - assert response.data == {"repos": []} From 9d0d15148ebb098ef3a957bc8dca64d530374397 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 12:02:53 -0500 Subject: [PATCH 03/13] feat(github): Add cursor-based pagination to integration repos endpoint Add opt-in pagination to the existing /repos/ endpoint, triggered when a caller sends the per_page query param without a search query. Existing consumers that don't send per_page continue to receive the full repo list unchanged. The pagination interface is provider-agnostic: - BaseRepositoryIntegration.get_repositories_paginated() returns None by default (provider does not support pagination) - GitHubIntegration overrides it to slice the Django-cached repo list - The endpoint dispatches based on the return value, falling back to get_repositories() when pagination is not supported This supports the SCM onboarding repo selector, which needs fast page-at-a-time loading for orgs with many GitHub repos. Refs VDY-46 --- .../organization_integration_repos.py | 63 +++++-- src/sentry/integrations/github/integration.py | 21 +++ .../source_code_management/repository.py | 15 ++ .../test_organization_integration_repos.py | 165 ++++++++++++++++++ 4 files changed, 254 insertions(+), 10 deletions(-) diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos.py b/src/sentry/integrations/api/endpoints/organization_integration_repos.py index dd44a5b6f65628..83181fdd137a00 100644 --- a/src/sentry/integrations/api/endpoints/organization_integration_repos.py +++ b/src/sentry/integrations/api/endpoints/organization_integration_repos.py @@ -15,6 +15,7 @@ from sentry.models.organization import Organization from sentry.models.repository import Repository from sentry.shared_integrations.exceptions import IntegrationError +from sentry.utils.cursors import Cursor, CursorResult class IntegrationRepository(TypedDict): @@ -54,6 +55,11 @@ def get( installation has access to, filtering locally instead of using the provider's search API which may return results beyond the installation's scope. + :qparam int per_page: When present (without ``search``), enables cursor-based + pagination. Providers that support paginated browsing return + one page of results with ``Link`` headers. Providers that + don't support it fall back to returning the full list. + :qparam string cursor: Pagination cursor (only used when ``per_page`` is set). """ integration = self.get_integration(organization.id, integration_id) @@ -71,19 +77,36 @@ def get( search = request.GET.get("search") accessible_only = request.GET.get("accessibleOnly", "false").lower() == "true" - try: - repositories = install.get_repositories( - search, - accessible_only=accessible_only, - use_cache=accessible_only and bool(search), + # When per_page is present and there's no search query, + # try the paginated path. This lets pagination-aware callers + # (e.g. the SCM onboarding repo selector) get fast page-at-a-time + # results, while existing callers that don't send per_page + # continue to receive the full list. + paginate = "per_page" in request.GET and not search + if paginate: + per_page = min(int(request.GET.get("per_page", 100)), 100) + cursor = self._parse_cursor(request) + paginated = install.get_repositories_paginated( + offset=cursor.offset, per_page=per_page ) - except (IntegrationError, IdentityNotValid) as e: - return self.respond({"detail": str(e)}, status=400) + else: + paginated = None + + if paginated is not None: + repositories, has_next = paginated + else: + try: + repositories = install.get_repositories( + search, + accessible_only=accessible_only, + use_cache=accessible_only and bool(search), + ) + except (IntegrationError, IdentityNotValid) as e: + return self.respond({"detail": str(e)}, status=400) + has_next = False installable_only = request.GET.get("installableOnly", "false").lower() == "true" - # Include a repository if the request is for all repositories, or if we want - # installable-only repositories and the repository isn't already installed serialized_repositories = [ IntegrationRepository( name=repo["name"], @@ -95,8 +118,28 @@ def get( for repo in repositories if not installable_only or repo["identifier"] not in installed_repo_names ] - return self.respond( + + response = self.respond( {"repos": serialized_repositories, "searchable": install.repo_search} ) + if paginated is not None and (has_next or cursor.offset > 0): + cursor_result = CursorResult( + results=[], + prev=Cursor(0, max(0, cursor.offset - per_page), True, cursor.offset > 0), + next=Cursor(0, cursor.offset + per_page, False, has_next), + ) + self.add_cursor_headers(request, response, cursor_result) + + return response + return self.respond({"detail": "Repositories not supported"}, status=400) + + def _parse_cursor(self, request: Request) -> Cursor: + cursor_param = request.GET.get("cursor", "") + if not cursor_param: + return Cursor(0, 0, False) + try: + return Cursor.from_string(cursor_param) + except (ValueError, TypeError): + return Cursor(0, 0, False) diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index f4f92a9c4fd2e8..477bde0ec611ee 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -375,6 +375,27 @@ def _get_all_repos(): response = client.search_repositories(full_query) return to_repo_info(response.get("items", [])) + def get_repositories_paginated( + self, + offset: int = 0, + per_page: int = 100, + ) -> tuple[list[RepositoryInfo], bool]: + client = self.get_client() + all_repos = client.get_accessible_repos_cached() + repos = [r for r in all_repos if not r.get("archived")] + + page = repos[offset : offset + per_page] + has_next = len(repos) > offset + per_page + return [ + { + "name": r["name"], + "identifier": r["full_name"], + "external_id": self.get_repo_external_id(r), + "default_branch": r.get("default_branch"), + } + for r in page + ], has_next + def get_unmigratable_repositories(self) -> list[RpcRepository]: accessible_repos = self.get_repositories() accessible_repo_names = [r["identifier"] for r in accessible_repos] diff --git a/src/sentry/integrations/source_code_management/repository.py b/src/sentry/integrations/source_code_management/repository.py index 731f99d2cfc390..c68bb474c3c687 100644 --- a/src/sentry/integrations/source_code_management/repository.py +++ b/src/sentry/integrations/source_code_management/repository.py @@ -87,6 +87,21 @@ def get_repositories( """ raise NotImplementedError + def get_repositories_paginated( + self, + offset: int = 0, + per_page: int = 100, + ) -> tuple[list[RepositoryInfo], bool] | None: + """ + Return a page of repositories and whether more pages exist. + + Returns ``(repos, has_next)`` for providers that support + paginated browsing, or ``None`` if the provider does not + implement pagination (callers should fall back to + ``get_repositories()``). + """ + return None + ClientT = TypeVar("ClientT", bound="RepositoryClient", default="RepositoryClient") diff --git a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py index 48c46a02548e43..ac188f29a243bf 100644 --- a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py +++ b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py @@ -1,8 +1,25 @@ from unittest.mock import MagicMock, patch +from sentry.integrations.github.client import CachedRepo from sentry.testutils.cases import APITestCase +def _make_cached_repo( + id: int, + name: str, + full_name: str, + default_branch: str | None = "main", + archived: bool = False, +) -> CachedRepo: + return { + "id": id, + "name": name, + "full_name": full_name, + "default_branch": default_branch, + "archived": archived, + } + + class OrganizationIntegrationReposTest(APITestCase): def setUp(self) -> None: super().setUp() @@ -335,3 +352,151 @@ def test_no_repository_method(self) -> None: response = self.client.get(path, format="json") assert response.status_code == 400 + + +CACHED_REPOS = [_make_cached_repo(i, f"repo-{i}", f"Example/repo-{i}") for i in range(1, 6)] + + +class OrganizationIntegrationReposPaginatedTest(APITestCase): + """Tests for cursor-based pagination triggered by sending per_page.""" + + def setUp(self) -> None: + super().setUp() + self.login_as(user=self.user) + self.org = self.create_organization(owner=self.user, name="baz") + self.project = self.create_project(organization=self.org) + self.integration = self.create_integration( + organization=self.org, provider="github", name="Example", external_id="github:1" + ) + self.path = ( + f"/api/0/organizations/{self.org.slug}/integrations/{self.integration.id}/repos/" + ) + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_first_page(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = CACHED_REPOS + response = self.client.get(self.path, data={"per_page": "2"}, format="json") + + assert response.status_code == 200, response.content + repos = response.data["repos"] + assert len(repos) == 2 + assert repos[0]["identifier"] == "Example/repo-1" + assert repos[1]["identifier"] == "Example/repo-2" + assert response.data["searchable"] is True + assert 'rel="next"' in response["Link"] + assert 'results="true"' in response["Link"].split("next")[1] + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_second_page(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = CACHED_REPOS + response = self.client.get( + self.path, data={"per_page": "2", "cursor": "0:2:0"}, format="json" + ) + + assert response.status_code == 200, response.content + repos = response.data["repos"] + assert len(repos) == 2 + assert repos[0]["identifier"] == "Example/repo-3" + assert repos[1]["identifier"] == "Example/repo-4" + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_last_page(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = CACHED_REPOS + response = self.client.get( + self.path, data={"per_page": "2", "cursor": "0:4:0"}, format="json" + ) + + assert response.status_code == 200, response.content + repos = response.data["repos"] + assert len(repos) == 1 + assert repos[0]["identifier"] == "Example/repo-5" + link = response["Link"] + next_part = link.split("next")[1] + assert 'results="false"' in next_part + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_excludes_archived(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = [ + _make_cached_repo(1, "active", "Example/active"), + _make_cached_repo(2, "archived", "Example/archived", archived=True), + ] + response = self.client.get(self.path, data={"per_page": "100"}, format="json") + + assert response.status_code == 200, response.content + repos = response.data["repos"] + assert len(repos) == 1 + assert repos[0]["identifier"] == "Example/active" + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_installable_only(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = [ + _make_cached_repo(1, "installed-repo", "Example/installed-repo"), + _make_cached_repo(2, "available-repo", "Example/available-repo"), + ] + self.create_repo( + project=self.project, + integration_id=self.integration.id, + name="Example/installed-repo", + ) + response = self.client.get( + self.path, data={"per_page": "100", "installableOnly": "true"}, format="json" + ) + + assert response.status_code == 200, response.content + repos = response.data["repos"] + assert len(repos) == 1 + assert repos[0]["identifier"] == "Example/available-repo" + assert repos[0]["isInstalled"] is False + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_no_cursor_on_single_page(self, mock_cache: MagicMock) -> None: + """When all repos fit in one page, no Link header is added.""" + mock_cache.return_value = [_make_cached_repo(1, "repo-1", "Example/repo-1")] + response = self.client.get(self.path, data={"per_page": "100"}, format="json") + + assert response.status_code == 200, response.content + assert "Link" not in response + + @patch( + "sentry.integrations.github.integration.GitHubIntegration.get_repositories", + return_value=[], + ) + def test_without_per_page_uses_full_list(self, get_repositories: MagicMock) -> None: + """Without per_page, existing behavior: full list, no pagination.""" + get_repositories.return_value = [ + {"name": "repo-1", "identifier": "Example/repo-1", "default_branch": "main"}, + ] + response = self.client.get(self.path, format="json") + + assert response.status_code == 200, response.content + get_repositories.assert_called_once_with(None, accessible_only=False) + assert "Link" not in response + + @patch( + "sentry.integrations.github.integration.GitHubIntegration.get_repositories", + return_value=[], + ) + def test_search_with_per_page_uses_full_list(self, get_repositories: MagicMock) -> None: + """When search is present, per_page is ignored — full list returned.""" + get_repositories.return_value = [ + {"name": "repo-1", "identifier": "Example/repo-1", "default_branch": "main"}, + ] + response = self.client.get( + self.path, data={"search": "repo", "per_page": "2"}, format="json" + ) + + assert response.status_code == 200, response.content + get_repositories.assert_called_once_with("repo", accessible_only=False) + assert "Link" not in response From 4b400c7d4552d57e38c9c0adf16b34ae89d6ac68 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 12:12:51 -0500 Subject: [PATCH 04/13] ref(github): Extract _to_repo_info method for reuse Promote the local to_repo_info closure to a method on GitHubIntegration so both get_repositories and get_repositories_paginated share the same formatting logic. Refs VDY-46 --- src/sentry/integrations/github/integration.py | 38 ++++++++----------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 477bde0ec611ee..11746b4484ee7e 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -341,17 +341,6 @@ def get_repositories( """ client = self.get_client() - def to_repo_info(raw_repos: Iterable[Mapping[str, Any]]) -> list[RepositoryInfo]: - return [ - { - "name": i["name"], - "identifier": i["full_name"], - "external_id": self.get_repo_external_id(i), - "default_branch": i.get("default_branch"), - } - for i in raw_repos - ] - def _get_all_repos(): if use_cache: return client.get_repos_cached() @@ -359,12 +348,12 @@ def _get_all_repos(): if not query: all_repos = _get_all_repos() - return to_repo_info(r for r in all_repos if not r.get("archived")) + return self._to_repo_info(r for r in all_repos if not r.get("archived")) if accessible_only: all_repos = _get_all_repos() query_lower = query.lower() - return to_repo_info( + return self._to_repo_info( r for r in all_repos if not r.get("archived") and query_lower in r["full_name"].lower() @@ -373,7 +362,18 @@ def _get_all_repos(): assert not use_cache, "use_cache is not supported with the Search API path" full_query = build_repository_query(self.model.metadata, self.model.name, query) response = client.search_repositories(full_query) - return to_repo_info(response.get("items", [])) + return self._to_repo_info(response.get("items", [])) + + def _to_repo_info(self, raw_repos: Iterable[Mapping[str, Any]]) -> list[RepositoryInfo]: + return [ + { + "name": i["name"], + "identifier": i["full_name"], + "external_id": self.get_repo_external_id(i), + "default_branch": i.get("default_branch"), + } + for i in raw_repos + ] def get_repositories_paginated( self, @@ -386,15 +386,7 @@ def get_repositories_paginated( page = repos[offset : offset + per_page] has_next = len(repos) > offset + per_page - return [ - { - "name": r["name"], - "identifier": r["full_name"], - "external_id": self.get_repo_external_id(r), - "default_branch": r.get("default_branch"), - } - for r in page - ], has_next + return self._to_repo_info(page), has_next def get_unmigratable_repositories(self) -> list[RpcRepository]: accessible_repos = self.get_repositories() From 78d378b080e5612533dd5f07675457879591863e Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 12:21:03 -0500 Subject: [PATCH 05/13] fix(github): Clamp per_page lower bound and add docstring Clamp per_page to at least 1 to prevent infinite empty pages (per_page=0) or incorrect has_next (per_page=-1). Add docstring to get_repositories_paginated noting it always serves from the cached accessible-repos list. Refs VDY-46 --- .../api/endpoints/organization_integration_repos.py | 2 +- src/sentry/integrations/github/integration.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos.py b/src/sentry/integrations/api/endpoints/organization_integration_repos.py index 83181fdd137a00..854d364130ca9f 100644 --- a/src/sentry/integrations/api/endpoints/organization_integration_repos.py +++ b/src/sentry/integrations/api/endpoints/organization_integration_repos.py @@ -84,7 +84,7 @@ def get( # continue to receive the full list. paginate = "per_page" in request.GET and not search if paginate: - per_page = min(int(request.GET.get("per_page", 100)), 100) + per_page = max(1, min(int(request.GET.get("per_page", 100)), 100)) cursor = self._parse_cursor(request) paginated = install.get_repositories_paginated( offset=cursor.offset, per_page=per_page diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 11746b4484ee7e..8f7b81e67ece95 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -380,6 +380,11 @@ def get_repositories_paginated( offset: int = 0, per_page: int = 100, ) -> tuple[list[RepositoryInfo], bool]: + """Paginate over the cached accessible-repos list. + + Always serves from ``get_accessible_repos_cached()`` so every + page is fast after the initial cache warm-up. + """ client = self.get_client() all_repos = client.get_accessible_repos_cached() repos = [r for r in all_repos if not r.get("archived")] From 642a4810e01090f7f4e965ba3306b82f4f68ef4f Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 12:22:50 -0500 Subject: [PATCH 06/13] docs(github): Note installableOnly filtering after pagination slice --- .../api/endpoints/organization_integration_repos.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos.py b/src/sentry/integrations/api/endpoints/organization_integration_repos.py index 854d364130ca9f..947ccac56be67d 100644 --- a/src/sentry/integrations/api/endpoints/organization_integration_repos.py +++ b/src/sentry/integrations/api/endpoints/organization_integration_repos.py @@ -107,6 +107,11 @@ def get( installable_only = request.GET.get("installableOnly", "false").lower() == "true" + # NOTE: installableOnly filtering happens after pagination slicing, + # so pages may contain fewer items than per_page when installed repos + # are filtered out. This is acceptable for the infinite-scroll use + # case (SCM onboarding) but would need to move before slicing if + # exact page sizes matter for a future consumer. serialized_repositories = [ IntegrationRepository( name=repo["name"], From b8e1e80029984a862ab01f7dcd075821134bfcc0 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 12:26:58 -0500 Subject: [PATCH 07/13] fix(github): Harden paginated path input validation Wrap get_repositories_paginated in try/except for IntegrationError and IdentityNotValid so token revocations return 400 instead of 500. Validate per_page as an integer (non-numeric input like ?per_page=abc defaults to 100 instead of raising ValueError). Clamp cursor offset to >= 0 so crafted cursors like 0:-5:0 cannot produce unexpected slicing behavior. Refs VDY-46 --- .../organization_integration_repos.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos.py b/src/sentry/integrations/api/endpoints/organization_integration_repos.py index 947ccac56be67d..9ed3adb123d5cc 100644 --- a/src/sentry/integrations/api/endpoints/organization_integration_repos.py +++ b/src/sentry/integrations/api/endpoints/organization_integration_repos.py @@ -84,11 +84,16 @@ def get( # continue to receive the full list. paginate = "per_page" in request.GET and not search if paginate: - per_page = max(1, min(int(request.GET.get("per_page", 100)), 100)) + try: + per_page = max(1, min(int(request.GET["per_page"]), 100)) + except (ValueError, TypeError): + per_page = 100 cursor = self._parse_cursor(request) - paginated = install.get_repositories_paginated( - offset=cursor.offset, per_page=per_page - ) + offset = max(0, cursor.offset) + try: + paginated = install.get_repositories_paginated(offset=offset, per_page=per_page) + except (IntegrationError, IdentityNotValid) as e: + return self.respond({"detail": str(e)}, status=400) else: paginated = None @@ -128,11 +133,11 @@ def get( {"repos": serialized_repositories, "searchable": install.repo_search} ) - if paginated is not None and (has_next or cursor.offset > 0): + if paginated is not None and (has_next or offset > 0): cursor_result = CursorResult( results=[], - prev=Cursor(0, max(0, cursor.offset - per_page), True, cursor.offset > 0), - next=Cursor(0, cursor.offset + per_page, False, has_next), + prev=Cursor(0, max(0, offset - per_page), True, offset > 0), + next=Cursor(0, offset + per_page, False, has_next), ) self.add_cursor_headers(request, response, cursor_result) From 4760bef558181b28105afef0caac80fea4f578d3 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 12:28:29 -0500 Subject: [PATCH 08/13] fix(github): Add type annotation for cursor_result --- .../api/endpoints/organization_integration_repos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos.py b/src/sentry/integrations/api/endpoints/organization_integration_repos.py index 9ed3adb123d5cc..cf5e4660c342b3 100644 --- a/src/sentry/integrations/api/endpoints/organization_integration_repos.py +++ b/src/sentry/integrations/api/endpoints/organization_integration_repos.py @@ -134,7 +134,7 @@ def get( ) if paginated is not None and (has_next or offset > 0): - cursor_result = CursorResult( + cursor_result: CursorResult[IntegrationRepository] = CursorResult( results=[], prev=Cursor(0, max(0, offset - per_page), True, offset > 0), next=Cursor(0, offset + per_page, False, has_next), From a856a1eaae2e0cb0747e8c5b01dee30912378604 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 12:30:59 -0500 Subject: [PATCH 09/13] test(github): Add edge case tests and docstring improvements Add tests for input validation edge cases: per_page=0, per_page=-1, per_page=abc, per_page=200, negative cursor offset, and IntegrationError in the paginated path. Document that accessibleOnly is ignored in the paginated path and that CursorResult is only used for Link header generation. Remove redundant return_value=[] from test patch decorators. Refs VDY-46 --- .../organization_integration_repos.py | 3 + .../test_organization_integration_repos.py | 70 ++++++++++++++++++- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos.py b/src/sentry/integrations/api/endpoints/organization_integration_repos.py index cf5e4660c342b3..22d8a8e7587410 100644 --- a/src/sentry/integrations/api/endpoints/organization_integration_repos.py +++ b/src/sentry/integrations/api/endpoints/organization_integration_repos.py @@ -59,6 +59,8 @@ def get( pagination. Providers that support paginated browsing return one page of results with ``Link`` headers. Providers that don't support it fall back to returning the full list. + The paginated path always returns installation-accessible + repos (``accessibleOnly`` is ignored). :qparam string cursor: Pagination cursor (only used when ``per_page`` is set). """ integration = self.get_integration(organization.id, integration_id) @@ -134,6 +136,7 @@ def get( ) if paginated is not None and (has_next or offset > 0): + # CursorResult only used for Link header generation cursor_result: CursorResult[IntegrationRepository] = CursorResult( results=[], prev=Cursor(0, max(0, offset - per_page), True, offset > 0), diff --git a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py index ac188f29a243bf..02733ff4342dbc 100644 --- a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py +++ b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py @@ -471,7 +471,6 @@ def test_no_cursor_on_single_page(self, mock_cache: MagicMock) -> None: @patch( "sentry.integrations.github.integration.GitHubIntegration.get_repositories", - return_value=[], ) def test_without_per_page_uses_full_list(self, get_repositories: MagicMock) -> None: """Without per_page, existing behavior: full list, no pagination.""" @@ -486,10 +485,9 @@ def test_without_per_page_uses_full_list(self, get_repositories: MagicMock) -> N @patch( "sentry.integrations.github.integration.GitHubIntegration.get_repositories", - return_value=[], ) def test_search_with_per_page_uses_full_list(self, get_repositories: MagicMock) -> None: - """When search is present, per_page is ignored — full list returned.""" + """When search is present, per_page is ignored -- full list returned.""" get_repositories.return_value = [ {"name": "repo-1", "identifier": "Example/repo-1", "default_branch": "main"}, ] @@ -500,3 +498,69 @@ def test_search_with_per_page_uses_full_list(self, get_repositories: MagicMock) assert response.status_code == 200, response.content get_repositories.assert_called_once_with("repo", accessible_only=False) assert "Link" not in response + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_per_page_zero_clamped_to_one(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = CACHED_REPOS + response = self.client.get(self.path, data={"per_page": "0"}, format="json") + + assert response.status_code == 200, response.content + assert len(response.data["repos"]) == 1 + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_per_page_negative_clamped_to_one(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = CACHED_REPOS + response = self.client.get(self.path, data={"per_page": "-1"}, format="json") + + assert response.status_code == 200, response.content + assert len(response.data["repos"]) == 1 + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_per_page_non_numeric_defaults_to_100(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = CACHED_REPOS + response = self.client.get(self.path, data={"per_page": "abc"}, format="json") + + assert response.status_code == 200, response.content + assert len(response.data["repos"]) == 5 + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_per_page_over_max_clamped_to_100(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = CACHED_REPOS + response = self.client.get(self.path, data={"per_page": "200"}, format="json") + + assert response.status_code == 200, response.content + assert len(response.data["repos"]) == 5 + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_negative_cursor_offset_clamped_to_zero(self, mock_cache: MagicMock) -> None: + mock_cache.return_value = CACHED_REPOS + response = self.client.get( + self.path, data={"per_page": "2", "cursor": "0:-5:0"}, format="json" + ) + + assert response.status_code == 200, response.content + repos = response.data["repos"] + assert len(repos) == 2 + assert repos[0]["identifier"] == "Example/repo-1" + + @patch( + "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + ) + def test_integration_error_returns_400(self, mock_cache: MagicMock) -> None: + from sentry.shared_integrations.exceptions import IntegrationError + + mock_cache.side_effect = IntegrationError("token revoked") + response = self.client.get(self.path, data={"per_page": "2"}, format="json") + + assert response.status_code == 400 + assert response.data["detail"] == "token revoked" From 89c140a8627d534f5cc8f2ee4fe10d316ae4684c Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 12:35:32 -0500 Subject: [PATCH 10/13] ref(github): Address review nits on paginated repos Document cache consistency caveat in get_repositories_paginated docstring: cache expiry between pages can cause duplicates/skips, acceptable for infinite-scroll. Make _parse_cursor a staticmethod. Add comment clarifying per_page and offset are guaranteed set when paginated is not None. Refs VDY-46 --- .../api/endpoints/organization_integration_repos.py | 4 +++- src/sentry/integrations/github/integration.py | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos.py b/src/sentry/integrations/api/endpoints/organization_integration_repos.py index 22d8a8e7587410..a3230c4a6b908b 100644 --- a/src/sentry/integrations/api/endpoints/organization_integration_repos.py +++ b/src/sentry/integrations/api/endpoints/organization_integration_repos.py @@ -135,6 +135,7 @@ def get( {"repos": serialized_repositories, "searchable": install.repo_search} ) + # per_page and offset are guaranteed set when paginated is not None if paginated is not None and (has_next or offset > 0): # CursorResult only used for Link header generation cursor_result: CursorResult[IntegrationRepository] = CursorResult( @@ -148,7 +149,8 @@ def get( return self.respond({"detail": "Repositories not supported"}, status=400) - def _parse_cursor(self, request: Request) -> Cursor: + @staticmethod + def _parse_cursor(request: Request) -> Cursor: cursor_param = request.GET.get("cursor", "") if not cursor_param: return Cursor(0, 0, False) diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 8f7b81e67ece95..bb0bb5215f2ed0 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -383,7 +383,10 @@ def get_repositories_paginated( """Paginate over the cached accessible-repos list. Always serves from ``get_accessible_repos_cached()`` so every - page is fast after the initial cache warm-up. + page is fast after the initial cache warm-up. If the cache + expires between page fetches the underlying list can change, + which may cause duplicates or skipped repos across pages. + This is acceptable for infinite-scroll consumers. """ client = self.get_client() all_repos = client.get_accessible_repos_cached() From 8103616a41e47208698ed1a252bc74be12ffb49c Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 16:26:16 -0500 Subject: [PATCH 11/13] ref(github): Update paginated path for get_repos_cached rename Fix references to the old get_accessible_repos_cached name in get_repositories_paginated and pagination tests after rebase. --- src/sentry/integrations/github/integration.py | 8 +++--- .../test_organization_integration_repos.py | 28 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index bb0bb5215f2ed0..10f505929a2010 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -382,14 +382,14 @@ def get_repositories_paginated( ) -> tuple[list[RepositoryInfo], bool]: """Paginate over the cached accessible-repos list. - Always serves from ``get_accessible_repos_cached()`` so every - page is fast after the initial cache warm-up. If the cache - expires between page fetches the underlying list can change, + Always serves from ``get_repos_cached()`` so every page is + fast after the initial cache warm-up. If the cache expires + between page fetches the underlying list can change, which may cause duplicates or skipped repos across pages. This is acceptable for infinite-scroll consumers. """ client = self.get_client() - all_repos = client.get_accessible_repos_cached() + all_repos = client.get_repos_cached() repos = [r for r in all_repos if not r.get("archived")] page = repos[offset : offset + per_page] diff --git a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py index 02733ff4342dbc..a8dc933084d071 100644 --- a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py +++ b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py @@ -373,7 +373,7 @@ def setUp(self) -> None: ) @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", ) def test_first_page(self, mock_cache: MagicMock) -> None: mock_cache.return_value = CACHED_REPOS @@ -389,7 +389,7 @@ def test_first_page(self, mock_cache: MagicMock) -> None: assert 'results="true"' in response["Link"].split("next")[1] @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", ) def test_second_page(self, mock_cache: MagicMock) -> None: mock_cache.return_value = CACHED_REPOS @@ -404,7 +404,7 @@ def test_second_page(self, mock_cache: MagicMock) -> None: assert repos[1]["identifier"] == "Example/repo-4" @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", ) def test_last_page(self, mock_cache: MagicMock) -> None: mock_cache.return_value = CACHED_REPOS @@ -421,7 +421,7 @@ def test_last_page(self, mock_cache: MagicMock) -> None: assert 'results="false"' in next_part @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", ) def test_excludes_archived(self, mock_cache: MagicMock) -> None: mock_cache.return_value = [ @@ -436,7 +436,7 @@ def test_excludes_archived(self, mock_cache: MagicMock) -> None: assert repos[0]["identifier"] == "Example/active" @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", ) def test_installable_only(self, mock_cache: MagicMock) -> None: mock_cache.return_value = [ @@ -459,7 +459,7 @@ def test_installable_only(self, mock_cache: MagicMock) -> None: assert repos[0]["isInstalled"] is False @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", ) def test_no_cursor_on_single_page(self, mock_cache: MagicMock) -> None: """When all repos fit in one page, no Link header is added.""" @@ -480,7 +480,7 @@ def test_without_per_page_uses_full_list(self, get_repositories: MagicMock) -> N response = self.client.get(self.path, format="json") assert response.status_code == 200, response.content - get_repositories.assert_called_once_with(None, accessible_only=False) + get_repositories.assert_called_once_with(None, accessible_only=False, use_cache=False) assert "Link" not in response @patch( @@ -496,11 +496,11 @@ def test_search_with_per_page_uses_full_list(self, get_repositories: MagicMock) ) assert response.status_code == 200, response.content - get_repositories.assert_called_once_with("repo", accessible_only=False) + get_repositories.assert_called_once_with("repo", accessible_only=False, use_cache=False) assert "Link" not in response @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", ) def test_per_page_zero_clamped_to_one(self, mock_cache: MagicMock) -> None: mock_cache.return_value = CACHED_REPOS @@ -510,7 +510,7 @@ def test_per_page_zero_clamped_to_one(self, mock_cache: MagicMock) -> None: assert len(response.data["repos"]) == 1 @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", ) def test_per_page_negative_clamped_to_one(self, mock_cache: MagicMock) -> None: mock_cache.return_value = CACHED_REPOS @@ -520,7 +520,7 @@ def test_per_page_negative_clamped_to_one(self, mock_cache: MagicMock) -> None: assert len(response.data["repos"]) == 1 @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", ) def test_per_page_non_numeric_defaults_to_100(self, mock_cache: MagicMock) -> None: mock_cache.return_value = CACHED_REPOS @@ -530,7 +530,7 @@ def test_per_page_non_numeric_defaults_to_100(self, mock_cache: MagicMock) -> No assert len(response.data["repos"]) == 5 @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", ) def test_per_page_over_max_clamped_to_100(self, mock_cache: MagicMock) -> None: mock_cache.return_value = CACHED_REPOS @@ -540,7 +540,7 @@ def test_per_page_over_max_clamped_to_100(self, mock_cache: MagicMock) -> None: assert len(response.data["repos"]) == 5 @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", ) def test_negative_cursor_offset_clamped_to_zero(self, mock_cache: MagicMock) -> None: mock_cache.return_value = CACHED_REPOS @@ -554,7 +554,7 @@ def test_negative_cursor_offset_clamped_to_zero(self, mock_cache: MagicMock) -> assert repos[0]["identifier"] == "Example/repo-1" @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_accessible_repos_cached", + "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", ) def test_integration_error_returns_400(self, mock_cache: MagicMock) -> None: from sentry.shared_integrations.exceptions import IntegrationError From 64cce913a38b6b4e55839512846a9c4503dcfb49 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 21:36:20 -0500 Subject: [PATCH 12/13] ref(github): Use real GitHub API pagination instead of cached list slicing Paginated repo browsing now makes a single GitHub API call per page via GET /installation/repositories?page=N&per_page=M instead of fetching all repos into a cache and slicing. This avoids loading the full repo list into memory on every request. Also switches to the built-in get_per_page() and get_cursor_from_request() helpers, raises NotImplementedError in the base class instead of returning None, and removes the custom _parse_cursor method. Refs VDY-46 --- .../organization_integration_repos.py | 40 ++--- src/sentry/integrations/github/client.py | 20 +++ src/sentry/integrations/github/integration.py | 20 +-- .../source_code_management/repository.py | 12 +- .../test_organization_integration_repos.py | 142 +++++++----------- 5 files changed, 96 insertions(+), 138 deletions(-) diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos.py b/src/sentry/integrations/api/endpoints/organization_integration_repos.py index a3230c4a6b908b..c8dda76052e4c3 100644 --- a/src/sentry/integrations/api/endpoints/organization_integration_repos.py +++ b/src/sentry/integrations/api/endpoints/organization_integration_repos.py @@ -86,22 +86,19 @@ def get( # continue to receive the full list. paginate = "per_page" in request.GET and not search if paginate: + per_page = self.get_per_page(request) + cursor = self.get_cursor_from_request(request) + offset = max(0, cursor.offset) if cursor is not None else 0 try: - per_page = max(1, min(int(request.GET["per_page"]), 100)) - except (ValueError, TypeError): - per_page = 100 - cursor = self._parse_cursor(request) - offset = max(0, cursor.offset) - try: - paginated = install.get_repositories_paginated(offset=offset, per_page=per_page) + repositories, has_next = install.get_repositories_paginated( + offset=offset, per_page=per_page + ) + except NotImplementedError: + paginate = False except (IntegrationError, IdentityNotValid) as e: return self.respond({"detail": str(e)}, status=400) - else: - paginated = None - if paginated is not None: - repositories, has_next = paginated - else: + if not paginate: try: repositories = install.get_repositories( search, @@ -114,11 +111,6 @@ def get( installable_only = request.GET.get("installableOnly", "false").lower() == "true" - # NOTE: installableOnly filtering happens after pagination slicing, - # so pages may contain fewer items than per_page when installed repos - # are filtered out. This is acceptable for the infinite-scroll use - # case (SCM onboarding) but would need to move before slicing if - # exact page sizes matter for a future consumer. serialized_repositories = [ IntegrationRepository( name=repo["name"], @@ -135,9 +127,7 @@ def get( {"repos": serialized_repositories, "searchable": install.repo_search} ) - # per_page and offset are guaranteed set when paginated is not None - if paginated is not None and (has_next or offset > 0): - # CursorResult only used for Link header generation + if paginate and (has_next or offset > 0): cursor_result: CursorResult[IntegrationRepository] = CursorResult( results=[], prev=Cursor(0, max(0, offset - per_page), True, offset > 0), @@ -148,13 +138,3 @@ def get( return response return self.respond({"detail": "Repositories not supported"}, status=400) - - @staticmethod - def _parse_cursor(request: Request) -> Cursor: - cursor_param = request.GET.get("cursor", "") - if not cursor_param: - return Cursor(0, 0, False) - try: - return Cursor.from_string(cursor_param) - except (ValueError, TypeError): - return Cursor(0, 0, False) diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index 3825c82669cbcf..f2862dc1d41d00 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -558,6 +558,26 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any] page_number_limit=page_number_limit, ) + def get_repos_page( + self, page: int = 1, per_page: int = 100 + ) -> tuple[list[dict[str, Any]], int]: + """ + Fetch a single page of repositories accessible to this installation. + + Returns (repositories, total_count). + https://docs.github.com/en/rest/apps/installations#list-repositories-accessible-to-the-app-installation + """ + with SCMIntegrationInteractionEvent( + interaction_type=SCMIntegrationInteractionType.GET_REPOSITORIES, + provider_key=self.integration_name, + integration_id=self.integration.id, + ).capture(): + response = self.get( + "/installation/repositories", + params={"per_page": per_page, "page": page}, + ) + return response["repositories"], response["total_count"] + def get_repos_cached(self, ttl: int = 300) -> list[CachedRepo]: """ Return all repos accessible to this installation, cached in diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 10f505929a2010..9b1477bf0e398c 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -380,21 +380,17 @@ def get_repositories_paginated( offset: int = 0, per_page: int = 100, ) -> tuple[list[RepositoryInfo], bool]: - """Paginate over the cached accessible-repos list. + """Fetch a single page of repos from the GitHub API. - Always serves from ``get_repos_cached()`` so every page is - fast after the initial cache warm-up. If the cache expires - between page fetches the underlying list can change, - which may cause duplicates or skipped repos across pages. - This is acceptable for infinite-scroll consumers. + Converts the cursor offset to a GitHub page number and makes + one API call per page request. """ client = self.get_client() - all_repos = client.get_repos_cached() - repos = [r for r in all_repos if not r.get("archived")] - - page = repos[offset : offset + per_page] - has_next = len(repos) > offset + per_page - return self._to_repo_info(page), has_next + page_number = (offset // per_page) + 1 + repos, total_count = client.get_repos_page(page=page_number, per_page=per_page) + active_repos = [r for r in repos if not r.get("archived")] + has_next = (page_number * per_page) < total_count + return self._to_repo_info(active_repos), has_next def get_unmigratable_repositories(self) -> list[RpcRepository]: accessible_repos = self.get_repositories() diff --git a/src/sentry/integrations/source_code_management/repository.py b/src/sentry/integrations/source_code_management/repository.py index c68bb474c3c687..5dfbd817ed2d6d 100644 --- a/src/sentry/integrations/source_code_management/repository.py +++ b/src/sentry/integrations/source_code_management/repository.py @@ -91,16 +91,16 @@ def get_repositories_paginated( self, offset: int = 0, per_page: int = 100, - ) -> tuple[list[RepositoryInfo], bool] | None: + ) -> tuple[list[RepositoryInfo], bool]: """ Return a page of repositories and whether more pages exist. - Returns ``(repos, has_next)`` for providers that support - paginated browsing, or ``None`` if the provider does not - implement pagination (callers should fall back to - ``get_repositories()``). + Returns ``(repos, has_next)``. Providers that don't support + paginated browsing should leave this unimplemented; the + endpoint catches ``NotImplementedError`` and falls back to + ``get_repositories()``. """ - return None + raise NotImplementedError ClientT = TypeVar("ClientT", bound="RepositoryClient", default="RepositoryClient") diff --git a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py index a8dc933084d071..3f51ead27447b5 100644 --- a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py +++ b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py @@ -1,16 +1,16 @@ +from typing import Any from unittest.mock import MagicMock, patch -from sentry.integrations.github.client import CachedRepo from sentry.testutils.cases import APITestCase -def _make_cached_repo( +def _make_github_repo( id: int, name: str, full_name: str, default_branch: str | None = "main", archived: bool = False, -) -> CachedRepo: +) -> dict[str, Any]: return { "id": id, "name": name, @@ -354,11 +354,13 @@ def test_no_repository_method(self) -> None: assert response.status_code == 400 -CACHED_REPOS = [_make_cached_repo(i, f"repo-{i}", f"Example/repo-{i}") for i in range(1, 6)] +ALL_REPOS = [_make_github_repo(i, f"repo-{i}", f"Example/repo-{i}") for i in range(1, 6)] +TOTAL_COUNT = len(ALL_REPOS) +MOCK_TARGET = "sentry.integrations.github.client.GitHubBaseClient.get_repos_page" class OrganizationIntegrationReposPaginatedTest(APITestCase): - """Tests for cursor-based pagination triggered by sending per_page.""" + """Tests for cursor-based pagination via GitHub API page params.""" def setUp(self) -> None: super().setUp() @@ -372,14 +374,13 @@ def setUp(self) -> None: f"/api/0/organizations/{self.org.slug}/integrations/{self.integration.id}/repos/" ) - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", - ) - def test_first_page(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = CACHED_REPOS + @patch(MOCK_TARGET) + def test_first_page(self, mock_page: MagicMock) -> None: + mock_page.return_value = (ALL_REPOS[:2], TOTAL_COUNT) response = self.client.get(self.path, data={"per_page": "2"}, format="json") assert response.status_code == 200, response.content + mock_page.assert_called_once_with(page=1, per_page=2) repos = response.data["repos"] assert len(repos) == 2 assert repos[0]["identifier"] == "Example/repo-1" @@ -388,31 +389,29 @@ def test_first_page(self, mock_cache: MagicMock) -> None: assert 'rel="next"' in response["Link"] assert 'results="true"' in response["Link"].split("next")[1] - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", - ) - def test_second_page(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = CACHED_REPOS + @patch(MOCK_TARGET) + def test_second_page(self, mock_page: MagicMock) -> None: + mock_page.return_value = (ALL_REPOS[2:4], TOTAL_COUNT) response = self.client.get( self.path, data={"per_page": "2", "cursor": "0:2:0"}, format="json" ) assert response.status_code == 200, response.content + mock_page.assert_called_once_with(page=2, per_page=2) repos = response.data["repos"] assert len(repos) == 2 assert repos[0]["identifier"] == "Example/repo-3" assert repos[1]["identifier"] == "Example/repo-4" - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", - ) - def test_last_page(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = CACHED_REPOS + @patch(MOCK_TARGET) + def test_last_page(self, mock_page: MagicMock) -> None: + mock_page.return_value = (ALL_REPOS[4:], TOTAL_COUNT) response = self.client.get( self.path, data={"per_page": "2", "cursor": "0:4:0"}, format="json" ) assert response.status_code == 200, response.content + mock_page.assert_called_once_with(page=3, per_page=2) repos = response.data["repos"] assert len(repos) == 1 assert repos[0]["identifier"] == "Example/repo-5" @@ -420,14 +419,15 @@ def test_last_page(self, mock_cache: MagicMock) -> None: next_part = link.split("next")[1] assert 'results="false"' in next_part - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", - ) - def test_excludes_archived(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = [ - _make_cached_repo(1, "active", "Example/active"), - _make_cached_repo(2, "archived", "Example/archived", archived=True), - ] + @patch(MOCK_TARGET) + def test_excludes_archived(self, mock_page: MagicMock) -> None: + mock_page.return_value = ( + [ + _make_github_repo(1, "active", "Example/active"), + _make_github_repo(2, "archived", "Example/archived", archived=True), + ], + 2, + ) response = self.client.get(self.path, data={"per_page": "100"}, format="json") assert response.status_code == 200, response.content @@ -435,14 +435,15 @@ def test_excludes_archived(self, mock_cache: MagicMock) -> None: assert len(repos) == 1 assert repos[0]["identifier"] == "Example/active" - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", - ) - def test_installable_only(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = [ - _make_cached_repo(1, "installed-repo", "Example/installed-repo"), - _make_cached_repo(2, "available-repo", "Example/available-repo"), - ] + @patch(MOCK_TARGET) + def test_installable_only(self, mock_page: MagicMock) -> None: + mock_page.return_value = ( + [ + _make_github_repo(1, "installed-repo", "Example/installed-repo"), + _make_github_repo(2, "available-repo", "Example/available-repo"), + ], + 2, + ) self.create_repo( project=self.project, integration_id=self.integration.id, @@ -458,12 +459,10 @@ def test_installable_only(self, mock_cache: MagicMock) -> None: assert repos[0]["identifier"] == "Example/available-repo" assert repos[0]["isInstalled"] is False - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", - ) - def test_no_cursor_on_single_page(self, mock_cache: MagicMock) -> None: + @patch(MOCK_TARGET) + def test_no_cursor_on_single_page(self, mock_page: MagicMock) -> None: """When all repos fit in one page, no Link header is added.""" - mock_cache.return_value = [_make_cached_repo(1, "repo-1", "Example/repo-1")] + mock_page.return_value = ([_make_github_repo(1, "repo-1", "Example/repo-1")], 1) response = self.client.get(self.path, data={"per_page": "100"}, format="json") assert response.status_code == 200, response.content @@ -499,67 +498,30 @@ def test_search_with_per_page_uses_full_list(self, get_repositories: MagicMock) get_repositories.assert_called_once_with("repo", accessible_only=False, use_cache=False) assert "Link" not in response - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", - ) - def test_per_page_zero_clamped_to_one(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = CACHED_REPOS - response = self.client.get(self.path, data={"per_page": "0"}, format="json") + def test_per_page_invalid_returns_400(self) -> None: + """Invalid per_page values (0, negative, non-numeric, >100) return 400.""" + for value in ("0", "-1", "abc", "200"): + response = self.client.get(self.path, data={"per_page": value}, format="json") + assert response.status_code == 400, f"per_page={value} should return 400" - assert response.status_code == 200, response.content - assert len(response.data["repos"]) == 1 - - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", - ) - def test_per_page_negative_clamped_to_one(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = CACHED_REPOS - response = self.client.get(self.path, data={"per_page": "-1"}, format="json") - - assert response.status_code == 200, response.content - assert len(response.data["repos"]) == 1 - - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", - ) - def test_per_page_non_numeric_defaults_to_100(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = CACHED_REPOS - response = self.client.get(self.path, data={"per_page": "abc"}, format="json") - - assert response.status_code == 200, response.content - assert len(response.data["repos"]) == 5 - - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", - ) - def test_per_page_over_max_clamped_to_100(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = CACHED_REPOS - response = self.client.get(self.path, data={"per_page": "200"}, format="json") - - assert response.status_code == 200, response.content - assert len(response.data["repos"]) == 5 - - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", - ) - def test_negative_cursor_offset_clamped_to_zero(self, mock_cache: MagicMock) -> None: - mock_cache.return_value = CACHED_REPOS + @patch(MOCK_TARGET) + def test_negative_cursor_offset_clamped_to_zero(self, mock_page: MagicMock) -> None: + mock_page.return_value = (ALL_REPOS[:2], TOTAL_COUNT) response = self.client.get( self.path, data={"per_page": "2", "cursor": "0:-5:0"}, format="json" ) assert response.status_code == 200, response.content + mock_page.assert_called_once_with(page=1, per_page=2) repos = response.data["repos"] assert len(repos) == 2 assert repos[0]["identifier"] == "Example/repo-1" - @patch( - "sentry.integrations.github.client.GitHubBaseClient.get_repos_cached", - ) - def test_integration_error_returns_400(self, mock_cache: MagicMock) -> None: + @patch(MOCK_TARGET) + def test_integration_error_returns_400(self, mock_page: MagicMock) -> None: from sentry.shared_integrations.exceptions import IntegrationError - mock_cache.side_effect = IntegrationError("token revoked") + mock_page.side_effect = IntegrationError("token revoked") response = self.client.get(self.path, data={"per_page": "2"}, format="json") assert response.status_code == 400 From 000570be34f7adcc0ed7701e25152a47b87d2183 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 21:54:49 -0500 Subject: [PATCH 13/13] ref(github): Add comments for pagination edge cases and fallback test Document that archived repos and installableOnly filtering can cause pages to have fewer items than per_page and has_next to overestimate. Add test for NotImplementedError fallback when a provider does not support paginated browsing. Refs VDY-46 --- .../organization_integration_repos.py | 3 +++ src/sentry/integrations/github/integration.py | 3 +++ .../test_organization_integration_repos.py | 21 +++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos.py b/src/sentry/integrations/api/endpoints/organization_integration_repos.py index c8dda76052e4c3..1b0a2b3253dcdb 100644 --- a/src/sentry/integrations/api/endpoints/organization_integration_repos.py +++ b/src/sentry/integrations/api/endpoints/organization_integration_repos.py @@ -111,6 +111,9 @@ def get( installable_only = request.GET.get("installableOnly", "false").lower() == "true" + # installableOnly filtering happens after pagination, so pages + # may contain fewer items than per_page when installed repos are + # excluded. Acceptable for infinite-scroll consumers. serialized_repositories = [ IntegrationRepository( name=repo["name"], diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 9b1477bf0e398c..6958aa70eafcc3 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -389,6 +389,9 @@ def get_repositories_paginated( page_number = (offset // per_page) + 1 repos, total_count = client.get_repos_page(page=page_number, per_page=per_page) active_repos = [r for r in repos if not r.get("archived")] + # total_count includes archived repos, so has_next may overestimate + # and pages may contain fewer than per_page items. Acceptable for + # infinite-scroll consumers (worst case: one extra empty fetch). has_next = (page_number * per_page) < total_count return self._to_repo_info(active_repos), has_next diff --git a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py index 3f51ead27447b5..8b00f3e4bb3c54 100644 --- a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py +++ b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py @@ -526,3 +526,24 @@ def test_integration_error_returns_400(self, mock_page: MagicMock) -> None: assert response.status_code == 400 assert response.data["detail"] == "token revoked" + + @patch( + "sentry.integrations.github.integration.GitHubIntegration.get_repositories_paginated", + side_effect=NotImplementedError, + ) + @patch( + "sentry.integrations.github.integration.GitHubIntegration.get_repositories", + ) + def test_not_implemented_falls_back_to_full_list( + self, get_repositories: MagicMock, _mock_paginated: MagicMock + ) -> None: + """When a provider raises NotImplementedError, per_page falls back to get_repositories.""" + get_repositories.return_value = [ + {"name": "repo-1", "identifier": "Example/repo-1", "default_branch": "main"}, + ] + response = self.client.get(self.path, data={"per_page": "2"}, format="json") + + assert response.status_code == 200, response.content + get_repositories.assert_called_once_with(None, accessible_only=False, use_cache=False) + assert len(response.data["repos"]) == 1 + assert "Link" not in response