getsentry · jaydgoss · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
@@ -15,6 +15,7 @@
 from sentry.models.organization import Organization
 from sentry.models.repository import Repository
 from sentry.shared_integrations.exceptions import IntegrationError
+from sentry.utils.cursors import Cursor, CursorResult
 
 
 class IntegrationRepository(TypedDict):
@@ -54,6 +55,13 @@ def get(
                                      installation has access to, filtering locally instead of
                                      using the provider's search API which may return results
                                      beyond the installation's scope.
+        :qparam int per_page: When present (without ``search``), enables cursor-based
+                              pagination. Providers that support paginated browsing return
+                              one page of results with ``Link`` headers. Providers that
+                              don't support it fall back to returning the full list.
+                              The paginated path always returns installation-accessible
+                              repos (``accessibleOnly`` is ignored).
+        :qparam string cursor: Pagination cursor (only used when ``per_page`` is set).
         """
         integration = self.get_integration(organization.id, integration_id)
 
@@ -71,19 +79,41 @@ def get(
             search = request.GET.get("search")
             accessible_only = request.GET.get("accessibleOnly", "false").lower() == "true"
 
-            try:
-                repositories = install.get_repositories(
-                    search,
-                    accessible_only=accessible_only,
-                    use_cache=accessible_only and bool(search),
-                )
-            except (IntegrationError, IdentityNotValid) as e:
-                return self.respond({"detail": str(e)}, status=400)
+            # When per_page is present and there's no search query,
+            # try the paginated path. This lets pagination-aware callers
+            # (e.g. the SCM onboarding repo selector) get fast page-at-a-time
+            # results, while existing callers that don't send per_page
+            # continue to receive the full list.
+            paginate = "per_page" in request.GET and not search
+            if paginate:
+                per_page = self.get_per_page(request)
+                cursor = self.get_cursor_from_request(request)
+                offset = max(0, cursor.offset) if cursor is not None else 0
+                try:
+                    repositories, has_next = install.get_repositories_paginated(
+                        offset=offset, per_page=per_page
+                    )
+                except NotImplementedError:
+                    paginate = False
+                except (IntegrationError, IdentityNotValid) as e:
+                    return self.respond({"detail": str(e)}, status=400)
+
+            if not paginate:
+                try:
+                    repositories = install.get_repositories(
+                        search,
+                        accessible_only=accessible_only,
+                        use_cache=accessible_only and bool(search),
+                    )
+                except (IntegrationError, IdentityNotValid) as e:
+                    return self.respond({"detail": str(e)}, status=400)
+                has_next = False
 
             installable_only = request.GET.get("installableOnly", "false").lower() == "true"
 
-            # Include a repository if the request is for all repositories, or if we want
-            # installable-only repositories and the repository isn't already installed
+            # installableOnly filtering happens after pagination, so pages
+            # may contain fewer items than per_page when installed repos are
+            # excluded. Acceptable for infinite-scroll consumers.
             serialized_repositories = [
                 IntegrationRepository(
                     name=repo["name"],
@@ -95,8 +125,19 @@ def get(
                 for repo in repositories
                 if not installable_only or repo["identifier"] not in installed_repo_names
             ]
-            return self.respond(
+
+            response = self.respond(
                 {"repos": serialized_repositories, "searchable": install.repo_search}
             )
 
+            if paginate and (has_next or offset > 0):
+                cursor_result: CursorResult[IntegrationRepository] = CursorResult(
+                    results=[],
+                    prev=Cursor(0, max(0, offset - per_page), True, offset > 0),
+                    next=Cursor(0, offset + per_page, False, has_next),
+                )
+                self.add_cursor_headers(request, response, cursor_result)
+
+            return response
+
         return self.respond({"detail": "Repositories not supported"}, status=400)
@@ -558,6 +558,26 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any]
                 page_number_limit=page_number_limit,
             )
 
+    def get_repos_page(
+        self, page: int = 1, per_page: int = 100
+    ) -> tuple[list[dict[str, Any]], int]:
+        """
+        Fetch a single page of repositories accessible to this installation.
+
+        Returns (repositories, total_count).
+        https://docs.github.com/en/rest/apps/installations#list-repositories-accessible-to-the-app-installation
+        """
+        with SCMIntegrationInteractionEvent(
+            interaction_type=SCMIntegrationInteractionType.GET_REPOSITORIES,
+            provider_key=self.integration_name,
+            integration_id=self.integration.id,
+        ).capture():
+            response = self.get(
+                "/installation/repositories",
+                params={"per_page": per_page, "page": page},
+            )
+            return response["repositories"], response["total_count"]
+
     def get_repos_cached(self, ttl: int = 300) -> list[CachedRepo]:
         """
         Return all repos accessible to this installation, cached in

@@ -341,30 +341,19 @@ def get_repositories(
         """
         client = self.get_client()
 
-        def to_repo_info(raw_repos: Iterable[Mapping[str, Any]]) -> list[RepositoryInfo]:
-            return [
-                {
-                    "name": i["name"],
-                    "identifier": i["full_name"],
-                    "external_id": self.get_repo_external_id(i),
-                    "default_branch": i.get("default_branch"),
-                }
-                for i in raw_repos
-            ]
-
         def _get_all_repos():
             if use_cache:
                 return client.get_repos_cached()
             return client.get_repos(page_number_limit=page_number_limit)
 
         if not query:
             all_repos = _get_all_repos()
-            return to_repo_info(r for r in all_repos if not r.get("archived"))
+            return self._to_repo_info(r for r in all_repos if not r.get("archived"))
 
         if accessible_only:
             all_repos = _get_all_repos()
             query_lower = query.lower()
-            return to_repo_info(
+            return self._to_repo_info(
                 r
                 for r in all_repos
                 if not r.get("archived") and query_lower in r["full_name"].lower()
@@ -373,7 +362,38 @@ def _get_all_repos():
         assert not use_cache, "use_cache is not supported with the Search API path"
         full_query = build_repository_query(self.model.metadata, self.model.name, query)
         response = client.search_repositories(full_query)
-        return to_repo_info(response.get("items", []))
+        return self._to_repo_info(response.get("items", []))
+
+    def _to_repo_info(self, raw_repos: Iterable[Mapping[str, Any]]) -> list[RepositoryInfo]:
+        return [
+            {
+                "name": i["name"],
+                "identifier": i["full_name"],
+                "external_id": self.get_repo_external_id(i),
+                "default_branch": i.get("default_branch"),
+            }
+            for i in raw_repos
+        ]
+
+    def get_repositories_paginated(
+        self,
+        offset: int = 0,
+        per_page: int = 100,
+    ) -> tuple[list[RepositoryInfo], bool]:
+        """Fetch a single page of repos from the GitHub API.
+
+        Converts the cursor offset to a GitHub page number and makes
+        one API call per page request.
+        """
+        client = self.get_client()
+        page_number = (offset // per_page) + 1
+        repos, total_count = client.get_repos_page(page=page_number, per_page=per_page)
+        active_repos = [r for r in repos if not r.get("archived")]
+        # total_count includes archived repos, so has_next may overestimate
+        # and pages may contain fewer than per_page items. Acceptable for
+        # infinite-scroll consumers (worst case: one extra empty fetch).
+        has_next = (page_number * per_page) < total_count
+        return self._to_repo_info(active_repos), has_next
 
     def get_unmigratable_repositories(self) -> list[RpcRepository]:
         accessible_repos = self.get_repositories()

@@ -87,6 +87,21 @@ def get_repositories(
         """
         raise NotImplementedError
 
+    def get_repositories_paginated(
+        self,
+        offset: int = 0,
+        per_page: int = 100,
+    ) -> tuple[list[RepositoryInfo], bool]:
+        """
+        Return a page of repositories and whether more pages exist.
+
+        Returns ``(repos, has_next)``. Providers that don't support
+        paginated browsing should leave this unimplemented; the
+        endpoint catches ``NotImplementedError`` and falls back to
+        ``get_repositories()``.
+        """
+        raise NotImplementedError
+
 
 ClientT = TypeVar("ClientT", bound="RepositoryClient", default="RepositoryClient")