Skip to content

Commit 1d3c1d8

Browse files
committed
perf(github): Cache accessible repo IDs for accessibleOnly search
When accessibleOnly=true with a search query, the old path fetched all installation repos (up to 5,000) on every debounced keystroke, then filtered with a Python list comprehension. Replace this with a cached set of accessible repo IDs (5-min Redis TTL) combined with the GitHub Search API, reducing each typed query from O(pages) API calls to a single search call plus a Redis lookup. Refs VDY-68
1 parent fb92eb8 commit 1d3c1d8

File tree

3 files changed

+118
-12
lines changed

3 files changed

+118
-12
lines changed

src/sentry/integrations/github/client.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import orjson
1010
import sentry_sdk
11+
from django.core.cache import cache
1112
from requests import PreparedRequest
1213

1314
from sentry.constants import ObjectStatus
@@ -549,6 +550,22 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any]
549550
page_number_limit=page_number_limit,
550551
)
551552

553+
def get_accessible_repo_ids(self, ttl: int = 300) -> set[int]:
554+
"""
555+
Return the set of GitHub repo IDs accessible to this installation.
556+
Cached in Django cache (Redis) for ``ttl`` seconds to avoid
557+
re-fetching all pages on every keystroke.
558+
"""
559+
cache_key = f"github:accessible_repo_ids:{self.integration.id}"
560+
cached = cache.get(cache_key)
561+
if cached is not None:
562+
return set(cached)
563+
564+
all_repos = self.get_repos()
565+
repo_ids = {r["id"] for r in all_repos if not r.get("archived")}
566+
cache.set(cache_key, list(repo_ids), ttl)
567+
return repo_ids
568+
552569
def search_repositories(self, query: bytes) -> Mapping[str, Sequence[Any]]:
553570
"""
554571
Find repositories matching a query.

src/sentry/integrations/github/integration.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -336,9 +336,12 @@ def get_repositories(
336336
This fetches all repositories accessible to the Github App
337337
https://docs.github.com/en/rest/apps/installations#list-repositories-accessible-to-the-app-installation
338338
"""
339-
if not query or accessible_only:
340-
all_repos = self.get_client().get_repos(page_number_limit=page_number_limit)
341-
repos: list[RepositoryInfo] = [
339+
client = self.get_client()
340+
341+
# No query: fetch all accessible repos (existing behavior)
342+
if not query:
343+
all_repos = client.get_repos(page_number_limit=page_number_limit)
344+
return [
342345
{
343346
"name": i["name"],
344347
"identifier": i["full_name"],
@@ -348,14 +351,27 @@ def get_repositories(
348351
for i in all_repos
349352
if not i.get("archived")
350353
]
351-
if query:
352-
query_lower = query.lower()
353-
repos = [r for r in repos if query_lower in str(r["identifier"]).lower()]
354-
return repos
355354

355+
# Query + accessible_only: use Search API + cached ID set
356+
if accessible_only:
357+
accessible_ids = client.get_accessible_repo_ids()
358+
full_query = build_repository_query(self.model.metadata, self.model.name, query)
359+
response = client.search_repositories(full_query)
360+
return [
361+
{
362+
"name": i["name"],
363+
"identifier": i["full_name"],
364+
"external_id": self.get_repo_external_id(i),
365+
"default_branch": i.get("default_branch"),
366+
}
367+
for i in response.get("items", [])
368+
if i["id"] in accessible_ids
369+
]
370+
371+
# Query without accessible_only: existing search behavior
356372
full_query = build_repository_query(self.model.metadata, self.model.name, query)
357-
response = self.get_client().search_repositories(full_query)
358-
search_repos: list[RepositoryInfo] = [
373+
response = client.search_repositories(full_query)
374+
return [
359375
{
360376
"name": i["name"],
361377
"identifier": i["full_name"],
@@ -364,7 +380,6 @@ def get_repositories(
364380
}
365381
for i in response.get("items", [])
366382
]
367-
return search_repos
368383

369384
def get_unmigratable_repositories(self) -> list[RpcRepository]:
370385
accessible_repos = self.get_repositories()

tests/sentry/integrations/github/test_integration.py

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -679,16 +679,40 @@ def test_get_repositories_search_param(self) -> None:
679679

680680
@responses.activate
681681
def test_get_repositories_accessible_only(self) -> None:
682-
"""When accessible_only=True, fetches installation repos and filters locally."""
682+
"""accessible_only+query uses Search API filtered by cached accessible IDs."""
683683
with self.tasks():
684684
self.assert_setup_flow()
685685

686+
querystring = urlencode({"q": "fork:true org:Test Organization foo"})
687+
responses.add(
688+
responses.GET,
689+
f"{self.base_url}/search/repositories?{querystring}",
690+
json={
691+
"items": [
692+
{
693+
"id": 1296269,
694+
"name": "foo",
695+
"full_name": "Test-Organization/foo",
696+
"default_branch": "master",
697+
},
698+
{
699+
"id": 9999999,
700+
"name": "foo-external",
701+
"full_name": "Other-Org/foo-external",
702+
"default_branch": "main",
703+
},
704+
]
705+
},
706+
)
707+
686708
integration = Integration.objects.get(provider=self.provider.key)
687709
installation = get_installation_of_type(
688710
GitHubIntegration, integration, self.organization.id
689711
)
690712

691713
result = installation.get_repositories("foo", accessible_only=True)
714+
# foo-external is filtered out: its id (9999999) is the archived repo's id,
715+
# which is excluded from the accessible set
692716
assert result == [
693717
{
694718
"name": "foo",
@@ -700,10 +724,17 @@ def test_get_repositories_accessible_only(self) -> None:
700724

701725
@responses.activate
702726
def test_get_repositories_accessible_only_no_match(self) -> None:
703-
"""When accessible_only=True and nothing matches, returns empty list."""
727+
"""When accessible_only=True and search returns no accessible repos, returns empty list."""
704728
with self.tasks():
705729
self.assert_setup_flow()
706730

731+
querystring = urlencode({"q": "fork:true org:Test Organization nonexistent"})
732+
responses.add(
733+
responses.GET,
734+
f"{self.base_url}/search/repositories?{querystring}",
735+
json={"items": []},
736+
)
737+
707738
integration = Integration.objects.get(provider=self.provider.key)
708739
installation = get_installation_of_type(
709740
GitHubIntegration, integration, self.organization.id
@@ -712,6 +743,49 @@ def test_get_repositories_accessible_only_no_match(self) -> None:
712743
result = installation.get_repositories("nonexistent", accessible_only=True)
713744
assert result == []
714745

746+
@responses.activate
747+
def test_get_repositories_accessible_only_caches_ids(self) -> None:
748+
"""Second accessible_only call uses cached IDs instead of re-fetching all repos."""
749+
with self.tasks():
750+
self.assert_setup_flow()
751+
752+
querystring = urlencode({"q": "fork:true org:Test Organization foo"})
753+
responses.add(
754+
responses.GET,
755+
f"{self.base_url}/search/repositories?{querystring}",
756+
json={
757+
"items": [
758+
{
759+
"id": 1296269,
760+
"name": "foo",
761+
"full_name": "Test-Organization/foo",
762+
"default_branch": "master",
763+
},
764+
]
765+
},
766+
)
767+
768+
integration = Integration.objects.get(provider=self.provider.key)
769+
installation = get_installation_of_type(
770+
GitHubIntegration, integration, self.organization.id
771+
)
772+
773+
# First call: cache miss, fetches /installation/repositories + search
774+
result1 = installation.get_repositories("foo", accessible_only=True)
775+
install_repo_calls = [
776+
c for c in responses.calls if "/installation/repositories" in c.request.url
777+
]
778+
first_fetch_count = len(install_repo_calls)
779+
assert first_fetch_count > 0
780+
781+
# Second call: cache hit, only search is called (no new /installation/repositories)
782+
result2 = installation.get_repositories("foo", accessible_only=True)
783+
install_repo_calls = [
784+
c for c in responses.calls if "/installation/repositories" in c.request.url
785+
]
786+
assert len(install_repo_calls) == first_fetch_count
787+
assert result1 == result2
788+
715789
@responses.activate
716790
def test_get_repositories_all_and_pagination(self) -> None:
717791
"""Fetch all repositories and test the pagination logic."""

0 commit comments

Comments
 (0)