Skip to content

Commit ae08ef3

Browse files
committed
feat(integrations): Add paginated repo fetching for GitHub integration
Add opt-in cursor-based pagination to the integration repos endpoint, scoped to GitHub only. When `paginate=true` is passed (and no search query), the endpoint calls `get_repositories_page` which fetches a single page from GitHub's `/installation/repositories` API and returns standard Sentry cursor pagination headers. This avoids aggregating all GitHub pages into one response, which is slow for large installations. The paginated path is gated behind `paginate=true` and duck-typed via `hasattr(install, "get_repositories_page")`, so only GitHub is affected. All existing callers get unchanged behavior. Also extracts `_format_repos` to share repo formatting between `get_repositories` and `get_repositories_page`, and `_serialize_repos` to share serialization in the endpoint. Default page size is 25. The `per_page` query param allows callers to override up to 100. Refs VDY-46
1 parent b32d74d commit ae08ef3

File tree

4 files changed

+210
-25
lines changed

4 files changed

+210
-25
lines changed

src/sentry/integrations/api/endpoints/organization_integration_repos.py

Lines changed: 53 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from sentry.models.organization import Organization
1616
from sentry.models.repository import Repository
1717
from sentry.shared_integrations.exceptions import IntegrationError
18+
from sentry.utils.cursors import Cursor, CursorResult
1819

1920

2021
class IntegrationRepository(TypedDict):
@@ -69,28 +70,65 @@ def get(
6970
if isinstance(install, RepositoryIntegration):
7071
search = request.GET.get("search")
7172
accessible_only = request.GET.get("accessibleOnly", "false").lower() == "true"
73+
paginate = request.GET.get("paginate", "false").lower() == "true"
74+
75+
# Paginated path: opt-in via paginate=true, only when not
76+
# searching, and only for integrations that support it.
77+
if paginate and not search and hasattr(install, "get_repositories_page"):
78+
cursor_param = request.GET.get("cursor")
79+
cursor = Cursor.from_string(cursor_param) if cursor_param else Cursor(0, 0, False)
80+
per_page = min(int(request.GET.get("per_page", 25)), 100)
81+
page_number = (cursor.offset // per_page) + 1
82+
83+
try:
84+
repositories, has_next = install.get_repositories_page(
85+
page=page_number, per_page=per_page
86+
)
87+
except (IntegrationError, IdentityNotValid) as e:
88+
return self.respond({"detail": str(e)}, status=400)
89+
90+
response = self.respond(
91+
{
92+
"repos": self._serialize_repos(repositories, installed_repo_names, request),
93+
"searchable": install.repo_search,
94+
}
95+
)
96+
cursor_result = CursorResult(
97+
results=[],
98+
prev=Cursor(0, max(0, cursor.offset - per_page), True, cursor.offset > 0),
99+
next=Cursor(0, cursor.offset + per_page, False, has_next),
100+
)
101+
self.add_cursor_headers(request, response, cursor_result)
102+
return response
72103

73104
try:
74105
repositories = install.get_repositories(search, accessible_only=accessible_only)
75106
except (IntegrationError, IdentityNotValid) as e:
76107
return self.respond({"detail": str(e)}, status=400)
77108

78-
installable_only = request.GET.get("installableOnly", "false").lower() == "true"
79-
80-
# Include a repository if the request is for all repositories, or if we want
81-
# installable-only repositories and the repository isn't already installed
82-
serialized_repositories = [
83-
IntegrationRepository(
84-
name=repo["name"],
85-
identifier=repo["identifier"],
86-
defaultBranch=repo.get("default_branch"),
87-
isInstalled=repo["identifier"] in installed_repo_names,
88-
)
89-
for repo in repositories
90-
if not installable_only or repo["identifier"] not in installed_repo_names
91-
]
92109
return self.respond(
93-
{"repos": serialized_repositories, "searchable": install.repo_search}
110+
{
111+
"repos": self._serialize_repos(repositories, installed_repo_names, request),
112+
"searchable": install.repo_search,
113+
}
94114
)
95115

96116
return self.respond({"detail": "Repositories not supported"}, status=400)
117+
118+
@staticmethod
119+
def _serialize_repos(
120+
repositories: list[dict[str, Any]],
121+
installed_repo_names: set[str],
122+
request: Request,
123+
) -> list[IntegrationRepository]:
124+
installable_only = request.GET.get("installableOnly", "false").lower() == "true"
125+
return [
126+
IntegrationRepository(
127+
name=repo["name"],
128+
identifier=repo["identifier"],
129+
defaultBranch=repo.get("default_branch"),
130+
isInstalled=repo["identifier"] in installed_repo_names,
131+
)
132+
for repo in repositories
133+
if not installable_only or repo["identifier"] not in installed_repo_names
134+
]

src/sentry/integrations/github/client.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,29 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any]
549549
page_number_limit=page_number_limit,
550550
)
551551

552+
def get_repos_page(
553+
self, page: int = 1, per_page: int | None = None
554+
) -> tuple[list[dict[str, Any]], bool]:
555+
"""
556+
Fetch a single page of repositories accessible to the GitHub App installation.
557+
Returns (repositories, has_next_page).
558+
559+
Unlike get_repos() which aggregates all pages, this returns one page at a time
560+
for cursor-based pagination support.
561+
"""
562+
if per_page is None:
563+
per_page = self.page_size
564+
with SCMIntegrationInteractionEvent(
565+
interaction_type=SCMIntegrationInteractionType.GET_REPOSITORIES,
566+
provider_key=self.integration_name,
567+
integration_id=self.integration.id,
568+
).capture():
569+
resp = self.get(
570+
"/installation/repositories",
571+
params={"per_page": per_page, "page": page},
572+
)
573+
return resp["repositories"], get_next_link(resp) is not None
574+
552575
def search_repositories(self, query: bytes) -> Mapping[str, Sequence[Any]]:
553576
"""
554577
Find repositories matching a query.

src/sentry/integrations/github/integration.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -335,29 +335,36 @@ def get_repositories(
335335
"""
336336
if not query or accessible_only:
337337
all_repos = self.get_client().get_repos(page_number_limit=page_number_limit)
338-
repos = [
339-
{
340-
"name": i["name"],
341-
"identifier": i["full_name"],
342-
"default_branch": i.get("default_branch"),
343-
}
344-
for i in all_repos
345-
if not i.get("archived")
346-
]
338+
repos = self._format_repos(all_repos)
347339
if query:
348340
query_lower = query.lower()
349341
repos = [r for r in repos if query_lower in r["identifier"].lower()]
350342
return repos
351343

352344
full_query = build_repository_query(self.model.metadata, self.model.name, query)
353345
response = self.get_client().search_repositories(full_query)
346+
return self._format_repos(response.get("items", []))
347+
348+
def get_repositories_page(
349+
self, page: int = 1, per_page: int = 25
350+
) -> tuple[list[dict[str, Any]], bool]:
351+
"""
352+
Fetch a single page of non-archived repositories.
353+
Returns (formatted_repos, has_next_page).
354+
"""
355+
raw_repos, has_next = self.get_client().get_repos_page(page=page, per_page=per_page)
356+
return self._format_repos(raw_repos), has_next
357+
358+
@staticmethod
359+
def _format_repos(raw_repos: list[dict[str, Any]]) -> list[dict[str, Any]]:
354360
return [
355361
{
356362
"name": i["name"],
357363
"identifier": i["full_name"],
358364
"default_branch": i.get("default_branch"),
359365
}
360-
for i in response.get("items", [])
366+
for i in raw_repos
367+
if not i.get("archived")
361368
]
362369

363370
def get_unmigratable_repositories(self) -> list[RpcRepository]:

tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,123 @@ def test_accessible_only_with_installable_only(self, get_repositories: MagicMock
262262
"searchable": True,
263263
}
264264

265+
@patch(
266+
"sentry.integrations.github.integration.GitHubIntegration.get_repositories_page",
267+
return_value=([], False),
268+
)
269+
def test_paginate_first_page(self, get_repositories_page: MagicMock) -> None:
270+
"""paginate=true uses get_repositories_page for single-page fetching."""
271+
get_repositories_page.return_value = (
272+
[
273+
{"name": "repo-a", "identifier": "Example/repo-a", "default_branch": "main"},
274+
{"name": "repo-b", "identifier": "Example/repo-b", "default_branch": "main"},
275+
],
276+
True,
277+
)
278+
response = self.client.get(self.path, format="json", data={"paginate": "true"})
279+
280+
assert response.status_code == 200, response.content
281+
get_repositories_page.assert_called_once_with(page=1, per_page=25)
282+
assert response.data == {
283+
"repos": [
284+
{
285+
"name": "repo-a",
286+
"identifier": "Example/repo-a",
287+
"defaultBranch": "main",
288+
"isInstalled": False,
289+
},
290+
{
291+
"name": "repo-b",
292+
"identifier": "Example/repo-b",
293+
"defaultBranch": "main",
294+
"isInstalled": False,
295+
},
296+
],
297+
"searchable": True,
298+
}
299+
assert 'results="true"' in response["Link"]
300+
301+
@patch(
302+
"sentry.integrations.github.integration.GitHubIntegration.get_repositories_page",
303+
return_value=([], False),
304+
)
305+
def test_paginate_second_page(self, get_repositories_page: MagicMock) -> None:
306+
"""Passing a cursor fetches the corresponding page."""
307+
get_repositories_page.return_value = (
308+
[{"name": "repo-c", "identifier": "Example/repo-c", "default_branch": "main"}],
309+
False,
310+
)
311+
response = self.client.get(
312+
self.path, format="json", data={"paginate": "true", "cursor": "0:25:0"}
313+
)
314+
315+
assert response.status_code == 200, response.content
316+
get_repositories_page.assert_called_once_with(page=2, per_page=25)
317+
# next cursor should indicate no more results
318+
assert 'rel="next"; results="false"' in response["Link"]
319+
# prev cursor should indicate results exist
320+
assert 'rel="previous"; results="true"' in response["Link"]
321+
322+
@patch(
323+
"sentry.integrations.github.integration.GitHubIntegration.get_repositories",
324+
return_value=[],
325+
)
326+
def test_paginate_with_search_falls_through(self, get_repositories: MagicMock) -> None:
327+
"""paginate=true with search uses the non-paginated path."""
328+
get_repositories.return_value = [
329+
{"name": "rad-repo", "identifier": "Example/rad-repo", "default_branch": "main"},
330+
]
331+
response = self.client.get(
332+
self.path, format="json", data={"paginate": "true", "search": "rad"}
333+
)
334+
335+
assert response.status_code == 200, response.content
336+
get_repositories.assert_called_once()
337+
assert "Link" not in response
338+
339+
@patch(
340+
"sentry.integrations.github.integration.GitHubIntegration.get_repositories",
341+
return_value=[],
342+
)
343+
def test_no_paginate_param_uses_existing_path(self, get_repositories: MagicMock) -> None:
344+
"""Without paginate=true, get_repositories is called (not get_repositories_page)."""
345+
get_repositories.return_value = [
346+
{"name": "rad-repo", "identifier": "Example/rad-repo", "default_branch": "main"},
347+
]
348+
response = self.client.get(self.path, format="json")
349+
350+
assert response.status_code == 200, response.content
351+
get_repositories.assert_called_once()
352+
assert "Link" not in response
353+
354+
@patch(
355+
"sentry.integrations.github.integration.GitHubIntegration.get_repositories_page",
356+
return_value=([], False),
357+
)
358+
def test_paginate_installable_only(self, get_repositories_page: MagicMock) -> None:
359+
"""installableOnly filter works with the paginated path."""
360+
get_repositories_page.return_value = (
361+
[
362+
{"name": "installed", "identifier": "Example/installed", "default_branch": "main"},
363+
{"name": "new-repo", "identifier": "Example/new-repo", "default_branch": "main"},
364+
],
365+
False,
366+
)
367+
self.create_repo(
368+
project=self.project,
369+
integration_id=self.integration.id,
370+
name="Example/installed",
371+
)
372+
response = self.client.get(
373+
self.path,
374+
format="json",
375+
data={"paginate": "true", "installableOnly": "true"},
376+
)
377+
378+
assert response.status_code == 200, response.content
379+
assert len(response.data["repos"]) == 1
380+
assert response.data["repos"][0]["identifier"] == "Example/new-repo"
381+
265382
def test_no_repository_method(self) -> None:
266383
integration = self.create_integration(
267384
organization=self.org, provider="jira", name="Example", external_id="example:1"

0 commit comments

Comments
 (0)