Skip to content

Commit 07de4ec

Browse files
committed
ref(github): Cache full repo list and filter locally instead of Search API
Switch from Search API + cached ID set to caching the full repo list and filtering locally. This avoids the Search API's shared 30 req/min rate limit and uses sentry.cache.default_cache (Redis-backed) instead of django.core.cache (DummyCache in Sentry). Refs VDY-68
1 parent 1d3c1d8 commit 07de4ec

File tree

3 files changed

+36
-70
lines changed

3 files changed

+36
-70
lines changed

src/sentry/integrations/github/client.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88

99
import orjson
1010
import sentry_sdk
11-
from django.core.cache import cache
1211
from requests import PreparedRequest
1312

13+
from sentry.cache import default_cache
1414
from sentry.constants import ObjectStatus
1515
from sentry.integrations.github.blame import (
1616
create_blame_query,
@@ -550,21 +550,33 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any]
550550
page_number_limit=page_number_limit,
551551
)
552552

553-
def get_accessible_repo_ids(self, ttl: int = 300) -> set[int]:
553+
def get_accessible_repos_cached(self, ttl: int = 300) -> list[dict[str, Any]]:
554554
"""
555-
Return the set of GitHub repo IDs accessible to this installation.
556-
Cached in Django cache (Redis) for ``ttl`` seconds to avoid
557-
re-fetching all pages on every keystroke.
555+
Return all non-archived repos accessible to this installation.
556+
Cached in Django cache for ``ttl`` seconds so that debounced
557+
search keystrokes don't re-fetch all pages from GitHub.
558558
"""
559-
cache_key = f"github:accessible_repo_ids:{self.integration.id}"
560-
cached = cache.get(cache_key)
559+
cache_key = f"github:accessible_repos:{self.integration.id}"
560+
cached = default_cache.get(cache_key)
561561
if cached is not None:
562-
return set(cached)
562+
logger.info(
563+
"get_accessible_repos_cached.cache_hit",
564+
extra={"integration_id": self.integration.id, "count": len(cached)},
565+
)
566+
return cached
563567

568+
logger.info(
569+
"get_accessible_repos_cached.cache_miss",
570+
extra={"integration_id": self.integration.id},
571+
)
564572
all_repos = self.get_repos()
565-
repo_ids = {r["id"] for r in all_repos if not r.get("archived")}
566-
cache.set(cache_key, list(repo_ids), ttl)
567-
return repo_ids
573+
repos = [r for r in all_repos if not r.get("archived")]
574+
default_cache.set(cache_key, repos, ttl)
575+
logger.info(
576+
"get_accessible_repos_cached.cached",
577+
extra={"integration_id": self.integration.id, "count": len(repos)},
578+
)
579+
return repos
568580

569581
def search_repositories(self, query: bytes) -> Mapping[str, Sequence[Any]]:
570582
"""

src/sentry/integrations/github/integration.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -352,20 +352,21 @@ def get_repositories(
352352
if not i.get("archived")
353353
]
354354

355-
# Query + accessible_only: use Search API + cached ID set
355+
# Query + accessible_only: filter cached repo list locally.
356+
# Avoids re-fetching all pages on every debounced keystroke and
357+
# avoids the Search API's 30 req/min shared rate limit.
356358
if accessible_only:
357-
accessible_ids = client.get_accessible_repo_ids()
358-
full_query = build_repository_query(self.model.metadata, self.model.name, query)
359-
response = client.search_repositories(full_query)
359+
all_repos = client.get_accessible_repos_cached()
360+
query_lower = query.lower()
360361
return [
361362
{
362363
"name": i["name"],
363364
"identifier": i["full_name"],
364365
"external_id": self.get_repo_external_id(i),
365366
"default_branch": i.get("default_branch"),
366367
}
367-
for i in response.get("items", [])
368-
if i["id"] in accessible_ids
368+
for i in all_repos
369+
if query_lower in i["full_name"].lower()
369370
]
370371

371372
# Query without accessible_only: existing search behavior

tests/sentry/integrations/github/test_integration.py

Lines changed: 6 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -679,40 +679,16 @@ def test_get_repositories_search_param(self) -> None:
679679

680680
@responses.activate
681681
def test_get_repositories_accessible_only(self) -> None:
682-
"""accessible_only+query uses Search API filtered by cached accessible IDs."""
682+
"""accessible_only+query filters cached repo list locally."""
683683
with self.tasks():
684684
self.assert_setup_flow()
685685

686-
querystring = urlencode({"q": "fork:true org:Test Organization foo"})
687-
responses.add(
688-
responses.GET,
689-
f"{self.base_url}/search/repositories?{querystring}",
690-
json={
691-
"items": [
692-
{
693-
"id": 1296269,
694-
"name": "foo",
695-
"full_name": "Test-Organization/foo",
696-
"default_branch": "master",
697-
},
698-
{
699-
"id": 9999999,
700-
"name": "foo-external",
701-
"full_name": "Other-Org/foo-external",
702-
"default_branch": "main",
703-
},
704-
]
705-
},
706-
)
707-
708686
integration = Integration.objects.get(provider=self.provider.key)
709687
installation = get_installation_of_type(
710688
GitHubIntegration, integration, self.organization.id
711689
)
712690

713691
result = installation.get_repositories("foo", accessible_only=True)
714-
# foo-external is filtered out: its id (9999999) is the archived repo's id,
715-
# which is excluded from the accessible set
716692
assert result == [
717693
{
718694
"name": "foo",
@@ -724,17 +700,10 @@ def test_get_repositories_accessible_only(self) -> None:
724700

725701
@responses.activate
726702
def test_get_repositories_accessible_only_no_match(self) -> None:
727-
"""When accessible_only=True and search returns no accessible repos, returns empty list."""
703+
"""accessible_only+query with no matching repos returns empty list."""
728704
with self.tasks():
729705
self.assert_setup_flow()
730706

731-
querystring = urlencode({"q": "fork:true org:Test Organization nonexistent"})
732-
responses.add(
733-
responses.GET,
734-
f"{self.base_url}/search/repositories?{querystring}",
735-
json={"items": []},
736-
)
737-
738707
integration = Integration.objects.get(provider=self.provider.key)
739708
installation = get_installation_of_type(
740709
GitHubIntegration, integration, self.organization.id
@@ -744,41 +713,25 @@ def test_get_repositories_accessible_only_no_match(self) -> None:
744713
assert result == []
745714

746715
@responses.activate
747-
def test_get_repositories_accessible_only_caches_ids(self) -> None:
748-
"""Second accessible_only call uses cached IDs instead of re-fetching all repos."""
716+
def test_get_repositories_accessible_only_caches_repos(self) -> None:
717+
"""Second accessible_only call uses cached repos instead of re-fetching from GitHub."""
749718
with self.tasks():
750719
self.assert_setup_flow()
751720

752-
querystring = urlencode({"q": "fork:true org:Test Organization foo"})
753-
responses.add(
754-
responses.GET,
755-
f"{self.base_url}/search/repositories?{querystring}",
756-
json={
757-
"items": [
758-
{
759-
"id": 1296269,
760-
"name": "foo",
761-
"full_name": "Test-Organization/foo",
762-
"default_branch": "master",
763-
},
764-
]
765-
},
766-
)
767-
768721
integration = Integration.objects.get(provider=self.provider.key)
769722
installation = get_installation_of_type(
770723
GitHubIntegration, integration, self.organization.id
771724
)
772725

773-
# First call: cache miss, fetches /installation/repositories + search
726+
# First call: cache miss, fetches /installation/repositories
774727
result1 = installation.get_repositories("foo", accessible_only=True)
775728
install_repo_calls = [
776729
c for c in responses.calls if "/installation/repositories" in c.request.url
777730
]
778731
first_fetch_count = len(install_repo_calls)
779732
assert first_fetch_count > 0
780733

781-
# Second call: cache hit, only search is called (no new /installation/repositories)
734+
# Second call: cache hit, no new /installation/repositories calls
782735
result2 = installation.get_repositories("foo", accessible_only=True)
783736
install_repo_calls = [
784737
c for c in responses.calls if "/installation/repositories" in c.request.url

0 commit comments

Comments
 (0)