Skip to content

Commit b691b5d

Browse files
authored
perf(github): Cache accessible repos for accessibleOnly search (#112548)
## Summary - The `OrganizationIntegrationReposEndpoint` (`/integrations/{id}/repos/`) lets the frontend search for GitHub repos available to a GitHub App installation. When called with `accessibleOnly=true` and a search query (as the SCM onboarding repo selector does on each debounced keystroke), the previous implementation fetched all installation-accessible repos from the GitHub API (up to 50 pages of 100 = 5,000 repos) on every request, then filtered with a Python list comprehension - Cache the full repo list in `sentry.cache.default_cache` (Redis) for 5 minutes, and filter locally on subsequent requests — reducing each typed query from O(pages) GitHub API calls to zero ## Test plan - [ ] Existing `get_repositories` tests pass (6/6) - [ ] New `test_get_repositories_accessible_only_caches_repos` verifies cache hit path skips `/installation/repositories` calls - [ ] Manual testing: second `accessibleOnly` search returns instantly from cache Refs VDY-68
1 parent a1a9615 commit b691b5d

File tree

13 files changed

+111
-27
lines changed

13 files changed

+111
-27
lines changed

src/sentry/integrations/api/endpoints/organization_integration_repos.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,11 @@ def get(
7272
accessible_only = request.GET.get("accessibleOnly", "false").lower() == "true"
7373

7474
try:
75-
repositories = install.get_repositories(search, accessible_only=accessible_only)
75+
repositories = install.get_repositories(
76+
search,
77+
accessible_only=accessible_only,
78+
use_cache=accessible_only and bool(search),
79+
)
7680
except (IntegrationError, IdentityNotValid) as e:
7781
return self.respond({"detail": str(e)}, status=400)
7882

src/sentry/integrations/bitbucket/integration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def get_repositories(
137137
query: str | None = None,
138138
page_number_limit: int | None = None,
139139
accessible_only: bool = False,
140+
use_cache: bool = False,
140141
) -> list[RepositoryInfo]:
141142
username = self.model.metadata.get("uuid", self.username)
142143
if not query:

src/sentry/integrations/bitbucket_server/integration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ def get_repositories(
288288
query: str | None = None,
289289
page_number_limit: int | None = None,
290290
accessible_only: bool = False,
291+
use_cache: bool = False,
291292
) -> list[RepositoryInfo]:
292293
if not query:
293294
resp = self.get_client().get_repos()

src/sentry/integrations/example/integration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ def get_repositories(
154154
query: str | None = None,
155155
page_number_limit: int | None = None,
156156
accessible_only: bool = False,
157+
use_cache: bool = False,
157158
) -> list[RepositoryInfo]:
158159
return [{"name": "repo", "identifier": "user/repo", "external_id": "1"}]
159160

src/sentry/integrations/github/client.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import orjson
1010
import sentry_sdk
11+
from django.core.cache import cache
1112
from requests import PreparedRequest
1213

1314
from sentry.constants import ObjectStatus
@@ -56,6 +57,14 @@
5657
JWT_AUTH_ROUTES = ("/app/installations", "access_tokens")
5758

5859

60+
class CachedRepo(TypedDict):
61+
id: int
62+
name: str
63+
full_name: str
64+
default_branch: str | None
65+
archived: bool | None
66+
67+
5968
class GithubRateLimitInfo:
6069
def __init__(self, info: dict[str, int]) -> None:
6170
self.limit = info["limit"]
@@ -549,6 +558,33 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any]
549558
page_number_limit=page_number_limit,
550559
)
551560

561+
def get_repos_cached(self, ttl: int = 300) -> list[CachedRepo]:
562+
"""
563+
Return all repos accessible to this installation, cached in
564+
Django cache for ``ttl`` seconds.
565+
566+
Only the fields used by get_repositories() are stored to keep
567+
the cache payload small.
568+
"""
569+
cache_key = f"github:repos:{self.integration.id}"
570+
cached = cache.get(cache_key)
571+
if cached is not None:
572+
return cached
573+
574+
all_repos = self.get_repos()
575+
repos: list[CachedRepo] = [
576+
{
577+
"id": r["id"],
578+
"name": r["name"],
579+
"full_name": r["full_name"],
580+
"default_branch": r.get("default_branch"),
581+
"archived": r.get("archived"),
582+
}
583+
for r in all_repos
584+
]
585+
cache.set(cache_key, repos, ttl)
586+
return repos
587+
552588
def search_repositories(self, query: bytes) -> Mapping[str, Sequence[Any]]:
553589
"""
554590
Find repositories matching a query.

src/sentry/integrations/github/integration.py

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import logging
44
import re
5-
from collections.abc import Callable, Mapping, MutableMapping, Sequence
5+
from collections.abc import Callable, Iterable, Mapping, MutableMapping, Sequence
66
from dataclasses import dataclass
77
from enum import StrEnum
88
from typing import Any, NotRequired, TypedDict
@@ -325,46 +325,55 @@ def get_repositories(
325325
query: str | None = None,
326326
page_number_limit: int | None = None,
327327
accessible_only: bool = False,
328+
use_cache: bool = False,
328329
) -> list[RepositoryInfo]:
329330
"""
330331
args:
331332
* query - a query to filter the repositories by
332333
* accessible_only - when True with a query, fetch only installation-
333334
accessible repos and filter locally instead of using the Search API
334335
(which may return repos outside the installation's scope)
336+
* use_cache - when True, serve repos from a short-lived cache instead
337+
of re-fetching all pages from GitHub on every call
335338
336339
This fetches all repositories accessible to the Github App
337340
https://docs.github.com/en/rest/apps/installations#list-repositories-accessible-to-the-app-installation
338341
"""
339-
if not query or accessible_only:
340-
all_repos = self.get_client().get_repos(page_number_limit=page_number_limit)
341-
repos: list[RepositoryInfo] = [
342+
client = self.get_client()
343+
344+
def to_repo_info(raw_repos: Iterable[Mapping[str, Any]]) -> list[RepositoryInfo]:
345+
return [
342346
{
343347
"name": i["name"],
344348
"identifier": i["full_name"],
345349
"external_id": self.get_repo_external_id(i),
346350
"default_branch": i.get("default_branch"),
347351
}
348-
for i in all_repos
349-
if not i.get("archived")
352+
for i in raw_repos
350353
]
351-
if query:
352-
query_lower = query.lower()
353-
repos = [r for r in repos if query_lower in str(r["identifier"]).lower()]
354-
return repos
355354

355+
def _get_all_repos():
356+
if use_cache:
357+
return client.get_repos_cached()
358+
return client.get_repos(page_number_limit=page_number_limit)
359+
360+
if not query:
361+
all_repos = _get_all_repos()
362+
return to_repo_info(r for r in all_repos if not r.get("archived"))
363+
364+
if accessible_only:
365+
all_repos = _get_all_repos()
366+
query_lower = query.lower()
367+
return to_repo_info(
368+
r
369+
for r in all_repos
370+
if not r.get("archived") and query_lower in r["full_name"].lower()
371+
)
372+
373+
assert not use_cache, "use_cache is not supported with the Search API path"
356374
full_query = build_repository_query(self.model.metadata, self.model.name, query)
357-
response = self.get_client().search_repositories(full_query)
358-
search_repos: list[RepositoryInfo] = [
359-
{
360-
"name": i["name"],
361-
"identifier": i["full_name"],
362-
"external_id": self.get_repo_external_id(i),
363-
"default_branch": i.get("default_branch"),
364-
}
365-
for i in response.get("items", [])
366-
]
367-
return search_repos
375+
response = client.search_repositories(full_query)
376+
return to_repo_info(response.get("items", []))
368377

369378
def get_unmigratable_repositories(self) -> list[RpcRepository]:
370379
accessible_repos = self.get_repositories()

src/sentry/integrations/github_enterprise/integration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ def get_repositories(
226226
query: str | None = None,
227227
page_number_limit: int | None = None,
228228
accessible_only: bool = False,
229+
use_cache: bool = False,
229230
) -> list[RepositoryInfo]:
230231
if not query:
231232
all_repos = self.get_client().get_repos(page_number_limit=page_number_limit)

src/sentry/integrations/gitlab/integration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ def get_repositories(
179179
query: str | None = None,
180180
page_number_limit: int | None = None,
181181
accessible_only: bool = False,
182+
use_cache: bool = False,
182183
) -> list[RepositoryInfo]:
183184
try:
184185
# Note: gitlab projects are the same things as repos everywhere else

src/sentry/integrations/perforce/integration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,7 @@ def get_repositories(
361361
query: str | None = None,
362362
page_number_limit: int | None = None,
363363
accessible_only: bool = False,
364+
use_cache: bool = False,
364365
) -> list[RepositoryInfo]:
365366
"""
366367
Get list of depots/streams from Perforce server.

src/sentry/integrations/source_code_management/repository.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def get_repositories(
5959
query: str | None = None,
6060
page_number_limit: int | None = None,
6161
accessible_only: bool = False,
62+
use_cache: bool = False,
6263
) -> list[RepositoryInfo]:
6364
"""
6465
Get a list of available repositories for an installation

0 commit comments

Comments
 (0)