Skip to content

Commit bc5a262

Browse files
committed
ref(github): Use real GitHub API pagination instead of cached list slicing
Paginated repo browsing now makes a single GitHub API call per page via GET /installation/repositories?page=N&per_page=M instead of fetching all repos into a cache and slicing. This avoids loading the full repo list into memory on every request. Also switches to the built-in get_per_page() and get_cursor_from_request() helpers, raises NotImplementedError in the base class instead of returning None, and removes the custom _parse_cursor method. Refs VDY-46
1 parent 41c5d02 commit bc5a262

File tree

5 files changed

+96
-138
lines changed

5 files changed

+96
-138
lines changed

src/sentry/integrations/api/endpoints/organization_integration_repos.py

Lines changed: 10 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -85,22 +85,19 @@ def get(
8585
# continue to receive the full list.
8686
paginate = "per_page" in request.GET and not search
8787
if paginate:
88+
per_page = self.get_per_page(request)
89+
cursor = self.get_cursor_from_request(request)
90+
offset = max(0, cursor.offset) if cursor is not None else 0
8891
try:
89-
per_page = max(1, min(int(request.GET["per_page"]), 100))
90-
except (ValueError, TypeError):
91-
per_page = 100
92-
cursor = self._parse_cursor(request)
93-
offset = max(0, cursor.offset)
94-
try:
95-
paginated = install.get_repositories_paginated(offset=offset, per_page=per_page)
92+
repositories, has_next = install.get_repositories_paginated(
93+
offset=offset, per_page=per_page
94+
)
95+
except NotImplementedError:
96+
paginate = False
9697
except (IntegrationError, IdentityNotValid) as e:
9798
return self.respond({"detail": str(e)}, status=400)
98-
else:
99-
paginated = None
10099

101-
if paginated is not None:
102-
repositories, has_next = paginated
103-
else:
100+
if not paginate:
104101
try:
105102
repositories = install.get_repositories(
106103
search,
@@ -113,11 +110,6 @@ def get(
113110

114111
installable_only = request.GET.get("installableOnly", "false").lower() == "true"
115112

116-
# NOTE: installableOnly filtering happens after pagination slicing,
117-
# so pages may contain fewer items than per_page when installed repos
118-
# are filtered out. This is acceptable for the infinite-scroll use
119-
# case (SCM onboarding) but would need to move before slicing if
120-
# exact page sizes matter for a future consumer.
121113
serialized_repositories = [
122114
IntegrationRepository(
123115
name=repo["name"],
@@ -133,9 +125,7 @@ def get(
133125
{"repos": serialized_repositories, "searchable": install.repo_search}
134126
)
135127

136-
# per_page and offset are guaranteed set when paginated is not None
137-
if paginated is not None and (has_next or offset > 0):
138-
# CursorResult only used for Link header generation
128+
if paginate and (has_next or offset > 0):
139129
cursor_result: CursorResult[IntegrationRepository] = CursorResult(
140130
results=[],
141131
prev=Cursor(0, max(0, offset - per_page), True, offset > 0),
@@ -146,13 +136,3 @@ def get(
146136
return response
147137

148138
return self.respond({"detail": "Repositories not supported"}, status=400)
149-
150-
@staticmethod
151-
def _parse_cursor(request: Request) -> Cursor:
152-
cursor_param = request.GET.get("cursor", "")
153-
if not cursor_param:
154-
return Cursor(0, 0, False)
155-
try:
156-
return Cursor.from_string(cursor_param)
157-
except (ValueError, TypeError):
158-
return Cursor(0, 0, False)

src/sentry/integrations/github/client.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,26 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any]
558558
page_number_limit=page_number_limit,
559559
)
560560

561+
def get_repos_page(
562+
self, page: int = 1, per_page: int = 100
563+
) -> tuple[list[dict[str, Any]], int]:
564+
"""
565+
Fetch a single page of repositories accessible to this installation.
566+
567+
Returns (repositories, total_count).
568+
https://docs.github.com/en/rest/apps/installations#list-repositories-accessible-to-the-app-installation
569+
"""
570+
with SCMIntegrationInteractionEvent(
571+
interaction_type=SCMIntegrationInteractionType.GET_REPOSITORIES,
572+
provider_key=self.integration_name,
573+
integration_id=self.integration.id,
574+
).capture():
575+
response = self.get(
576+
"/installation/repositories",
577+
params={"per_page": per_page, "page": page},
578+
)
579+
return response["repositories"], response["total_count"]
580+
561581
def get_repos_cached(self, ttl: int = 300) -> list[CachedRepo]:
562582
"""
563583
Return all repos accessible to this installation, cached in

src/sentry/integrations/github/integration.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -380,21 +380,17 @@ def get_repositories_paginated(
380380
offset: int = 0,
381381
per_page: int = 100,
382382
) -> tuple[list[RepositoryInfo], bool]:
383-
"""Paginate over the cached accessible-repos list.
383+
"""Fetch a single page of repos from the GitHub API.
384384
385-
Always serves from ``get_repos_cached()`` so every page is
386-
fast after the initial cache warm-up. If the cache expires
387-
between page fetches the underlying list can change,
388-
which may cause duplicates or skipped repos across pages.
389-
This is acceptable for infinite-scroll consumers.
385+
Converts the cursor offset to a GitHub page number and makes
386+
one API call per page request.
390387
"""
391388
client = self.get_client()
392-
all_repos = client.get_repos_cached()
393-
repos = [r for r in all_repos if not r.get("archived")]
394-
395-
page = repos[offset : offset + per_page]
396-
has_next = len(repos) > offset + per_page
397-
return self._to_repo_info(page), has_next
389+
page_number = (offset // per_page) + 1
390+
repos, total_count = client.get_repos_page(page=page_number, per_page=per_page)
391+
active_repos = [r for r in repos if not r.get("archived")]
392+
has_next = (page_number * per_page) < total_count
393+
return self._to_repo_info(active_repos), has_next
398394

399395
def get_unmigratable_repositories(self) -> list[RpcRepository]:
400396
accessible_repos = self.get_repositories()

src/sentry/integrations/source_code_management/repository.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,16 @@ def get_repositories_paginated(
9191
self,
9292
offset: int = 0,
9393
per_page: int = 100,
94-
) -> tuple[list[RepositoryInfo], bool] | None:
94+
) -> tuple[list[RepositoryInfo], bool]:
9595
"""
9696
Return a page of repositories and whether more pages exist.
9797
98-
Returns ``(repos, has_next)`` for providers that support
99-
paginated browsing, or ``None`` if the provider does not
100-
implement pagination (callers should fall back to
101-
``get_repositories()``).
98+
Returns ``(repos, has_next)``. Providers that don't support
99+
paginated browsing should leave this unimplemented; the
100+
endpoint catches ``NotImplementedError`` and falls back to
101+
``get_repositories()``.
102102
"""
103-
return None
103+
raise NotImplementedError
104104

105105

106106
ClientT = TypeVar("ClientT", bound="RepositoryClient", default="RepositoryClient")

tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py

Lines changed: 52 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
1+
from typing import Any
12
from unittest.mock import MagicMock, patch
23

3-
from sentry.integrations.github.client import CachedRepo
44
from sentry.testutils.cases import APITestCase
55

66

7-
def _make_cached_repo(
7+
def _make_github_repo(
88
id: int,
99
name: str,
1010
full_name: str,
1111
default_branch: str | None = "main",
1212
archived: bool = False,
13-
) -> CachedRepo:
13+
) -> dict[str, Any]:
1414
return {
1515
"id": id,
1616
"name": name,
@@ -289,11 +289,13 @@ def test_no_repository_method(self) -> None:
289289
assert response.status_code == 400
290290

291291

292-
CACHED_REPOS = [_make_cached_repo(i, f"repo-{i}", f"Example/repo-{i}") for i in range(1, 6)]
292+
ALL_REPOS = [_make_github_repo(i, f"repo-{i}", f"Example/repo-{i}") for i in range(1, 6)]
293+
TOTAL_COUNT = len(ALL_REPOS)
294+
MOCK_TARGET = "sentry.integrations.github.client.GitHubBaseClient.get_repos_page"
293295

294296

295297
class OrganizationIntegrationReposPaginatedTest(APITestCase):
296-
"""Tests for cursor-based pagination triggered by sending per_page."""
298+
"""Tests for cursor-based pagination via GitHub API page params."""
297299

298300
def setUp(self) -> None:
299301
super().setUp()
@@ -307,14 +309,13 @@ def setUp(self) -> None:
307309
f"/api/0/organizations/{self.org.slug}/integrations/{self.integration.id}/repos/"
308310
)
309311

310-
@patch(
311-
"sentry.integrations.github.client.GitHubBaseClient.get_repos_cached",
312-
)
313-
def test_first_page(self, mock_cache: MagicMock) -> None:
314-
mock_cache.return_value = CACHED_REPOS
312+
@patch(MOCK_TARGET)
313+
def test_first_page(self, mock_page: MagicMock) -> None:
314+
mock_page.return_value = (ALL_REPOS[:2], TOTAL_COUNT)
315315
response = self.client.get(self.path, data={"per_page": "2"}, format="json")
316316

317317
assert response.status_code == 200, response.content
318+
mock_page.assert_called_once_with(page=1, per_page=2)
318319
repos = response.data["repos"]
319320
assert len(repos) == 2
320321
assert repos[0]["identifier"] == "Example/repo-1"
@@ -323,61 +324,61 @@ def test_first_page(self, mock_cache: MagicMock) -> None:
323324
assert 'rel="next"' in response["Link"]
324325
assert 'results="true"' in response["Link"].split("next")[1]
325326

326-
@patch(
327-
"sentry.integrations.github.client.GitHubBaseClient.get_repos_cached",
328-
)
329-
def test_second_page(self, mock_cache: MagicMock) -> None:
330-
mock_cache.return_value = CACHED_REPOS
327+
@patch(MOCK_TARGET)
328+
def test_second_page(self, mock_page: MagicMock) -> None:
329+
mock_page.return_value = (ALL_REPOS[2:4], TOTAL_COUNT)
331330
response = self.client.get(
332331
self.path, data={"per_page": "2", "cursor": "0:2:0"}, format="json"
333332
)
334333

335334
assert response.status_code == 200, response.content
335+
mock_page.assert_called_once_with(page=2, per_page=2)
336336
repos = response.data["repos"]
337337
assert len(repos) == 2
338338
assert repos[0]["identifier"] == "Example/repo-3"
339339
assert repos[1]["identifier"] == "Example/repo-4"
340340

341-
@patch(
342-
"sentry.integrations.github.client.GitHubBaseClient.get_repos_cached",
343-
)
344-
def test_last_page(self, mock_cache: MagicMock) -> None:
345-
mock_cache.return_value = CACHED_REPOS
341+
@patch(MOCK_TARGET)
342+
def test_last_page(self, mock_page: MagicMock) -> None:
343+
mock_page.return_value = (ALL_REPOS[4:], TOTAL_COUNT)
346344
response = self.client.get(
347345
self.path, data={"per_page": "2", "cursor": "0:4:0"}, format="json"
348346
)
349347

350348
assert response.status_code == 200, response.content
349+
mock_page.assert_called_once_with(page=3, per_page=2)
351350
repos = response.data["repos"]
352351
assert len(repos) == 1
353352
assert repos[0]["identifier"] == "Example/repo-5"
354353
link = response["Link"]
355354
next_part = link.split("next")[1]
356355
assert 'results="false"' in next_part
357356

358-
@patch(
359-
"sentry.integrations.github.client.GitHubBaseClient.get_repos_cached",
360-
)
361-
def test_excludes_archived(self, mock_cache: MagicMock) -> None:
362-
mock_cache.return_value = [
363-
_make_cached_repo(1, "active", "Example/active"),
364-
_make_cached_repo(2, "archived", "Example/archived", archived=True),
365-
]
357+
@patch(MOCK_TARGET)
358+
def test_excludes_archived(self, mock_page: MagicMock) -> None:
359+
mock_page.return_value = (
360+
[
361+
_make_github_repo(1, "active", "Example/active"),
362+
_make_github_repo(2, "archived", "Example/archived", archived=True),
363+
],
364+
2,
365+
)
366366
response = self.client.get(self.path, data={"per_page": "100"}, format="json")
367367

368368
assert response.status_code == 200, response.content
369369
repos = response.data["repos"]
370370
assert len(repos) == 1
371371
assert repos[0]["identifier"] == "Example/active"
372372

373-
@patch(
374-
"sentry.integrations.github.client.GitHubBaseClient.get_repos_cached",
375-
)
376-
def test_installable_only(self, mock_cache: MagicMock) -> None:
377-
mock_cache.return_value = [
378-
_make_cached_repo(1, "installed-repo", "Example/installed-repo"),
379-
_make_cached_repo(2, "available-repo", "Example/available-repo"),
380-
]
373+
@patch(MOCK_TARGET)
374+
def test_installable_only(self, mock_page: MagicMock) -> None:
375+
mock_page.return_value = (
376+
[
377+
_make_github_repo(1, "installed-repo", "Example/installed-repo"),
378+
_make_github_repo(2, "available-repo", "Example/available-repo"),
379+
],
380+
2,
381+
)
381382
self.create_repo(
382383
project=self.project,
383384
integration_id=self.integration.id,
@@ -393,12 +394,10 @@ def test_installable_only(self, mock_cache: MagicMock) -> None:
393394
assert repos[0]["identifier"] == "Example/available-repo"
394395
assert repos[0]["isInstalled"] is False
395396

396-
@patch(
397-
"sentry.integrations.github.client.GitHubBaseClient.get_repos_cached",
398-
)
399-
def test_no_cursor_on_single_page(self, mock_cache: MagicMock) -> None:
397+
@patch(MOCK_TARGET)
398+
def test_no_cursor_on_single_page(self, mock_page: MagicMock) -> None:
400399
"""When all repos fit in one page, no Link header is added."""
401-
mock_cache.return_value = [_make_cached_repo(1, "repo-1", "Example/repo-1")]
400+
mock_page.return_value = ([_make_github_repo(1, "repo-1", "Example/repo-1")], 1)
402401
response = self.client.get(self.path, data={"per_page": "100"}, format="json")
403402

404403
assert response.status_code == 200, response.content
@@ -434,67 +433,30 @@ def test_search_with_per_page_uses_full_list(self, get_repositories: MagicMock)
434433
get_repositories.assert_called_once_with("repo", accessible_only=False, use_cache=False)
435434
assert "Link" not in response
436435

437-
@patch(
438-
"sentry.integrations.github.client.GitHubBaseClient.get_repos_cached",
439-
)
440-
def test_per_page_zero_clamped_to_one(self, mock_cache: MagicMock) -> None:
441-
mock_cache.return_value = CACHED_REPOS
442-
response = self.client.get(self.path, data={"per_page": "0"}, format="json")
436+
def test_per_page_invalid_returns_400(self) -> None:
437+
"""Invalid per_page values (0, negative, non-numeric, >100) return 400."""
438+
for value in ("0", "-1", "abc", "200"):
439+
response = self.client.get(self.path, data={"per_page": value}, format="json")
440+
assert response.status_code == 400, f"per_page={value} should return 400"
443441

444-
assert response.status_code == 200, response.content
445-
assert len(response.data["repos"]) == 1
446-
447-
@patch(
448-
"sentry.integrations.github.client.GitHubBaseClient.get_repos_cached",
449-
)
450-
def test_per_page_negative_clamped_to_one(self, mock_cache: MagicMock) -> None:
451-
mock_cache.return_value = CACHED_REPOS
452-
response = self.client.get(self.path, data={"per_page": "-1"}, format="json")
453-
454-
assert response.status_code == 200, response.content
455-
assert len(response.data["repos"]) == 1
456-
457-
@patch(
458-
"sentry.integrations.github.client.GitHubBaseClient.get_repos_cached",
459-
)
460-
def test_per_page_non_numeric_defaults_to_100(self, mock_cache: MagicMock) -> None:
461-
mock_cache.return_value = CACHED_REPOS
462-
response = self.client.get(self.path, data={"per_page": "abc"}, format="json")
463-
464-
assert response.status_code == 200, response.content
465-
assert len(response.data["repos"]) == 5
466-
467-
@patch(
468-
"sentry.integrations.github.client.GitHubBaseClient.get_repos_cached",
469-
)
470-
def test_per_page_over_max_clamped_to_100(self, mock_cache: MagicMock) -> None:
471-
mock_cache.return_value = CACHED_REPOS
472-
response = self.client.get(self.path, data={"per_page": "200"}, format="json")
473-
474-
assert response.status_code == 200, response.content
475-
assert len(response.data["repos"]) == 5
476-
477-
@patch(
478-
"sentry.integrations.github.client.GitHubBaseClient.get_repos_cached",
479-
)
480-
def test_negative_cursor_offset_clamped_to_zero(self, mock_cache: MagicMock) -> None:
481-
mock_cache.return_value = CACHED_REPOS
442+
@patch(MOCK_TARGET)
443+
def test_negative_cursor_offset_clamped_to_zero(self, mock_page: MagicMock) -> None:
444+
mock_page.return_value = (ALL_REPOS[:2], TOTAL_COUNT)
482445
response = self.client.get(
483446
self.path, data={"per_page": "2", "cursor": "0:-5:0"}, format="json"
484447
)
485448

486449
assert response.status_code == 200, response.content
450+
mock_page.assert_called_once_with(page=1, per_page=2)
487451
repos = response.data["repos"]
488452
assert len(repos) == 2
489453
assert repos[0]["identifier"] == "Example/repo-1"
490454

491-
@patch(
492-
"sentry.integrations.github.client.GitHubBaseClient.get_repos_cached",
493-
)
494-
def test_integration_error_returns_400(self, mock_cache: MagicMock) -> None:
455+
@patch(MOCK_TARGET)
456+
def test_integration_error_returns_400(self, mock_page: MagicMock) -> None:
495457
from sentry.shared_integrations.exceptions import IntegrationError
496458

497-
mock_cache.side_effect = IntegrationError("token revoked")
459+
mock_page.side_effect = IntegrationError("token revoked")
498460
response = self.client.get(self.path, data={"per_page": "2"}, format="json")
499461

500462
assert response.status_code == 400

0 commit comments

Comments
 (0)