From 1d3c1d85c8103f4dfa5693c42003ac37da4854e6 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Wed, 8 Apr 2026 16:59:12 -0500 Subject: [PATCH 01/18] perf(github): Cache accessible repo IDs for accessibleOnly search When accessibleOnly=true with a search query, the old path fetched all installation repos (up to 5,000) on every debounced keystroke, then filtered with a Python list comprehension. Replace this with a cached set of accessible repo IDs (5-min Redis TTL) combined with the GitHub Search API, reducing each typed query from O(pages) API calls to a single search call plus a Redis lookup. Refs VDY-68 --- src/sentry/integrations/github/client.py | 17 ++++ src/sentry/integrations/github/integration.py | 35 ++++++--- .../integrations/github/test_integration.py | 78 ++++++++++++++++++- 3 files changed, 118 insertions(+), 12 deletions(-) diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index 5abaa1c2852de5..ced42a5b37518e 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -8,6 +8,7 @@ import orjson import sentry_sdk +from django.core.cache import cache from requests import PreparedRequest from sentry.constants import ObjectStatus @@ -549,6 +550,22 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any] page_number_limit=page_number_limit, ) + def get_accessible_repo_ids(self, ttl: int = 300) -> set[int]: + """ + Return the set of GitHub repo IDs accessible to this installation. + Cached in Django cache (Redis) for ``ttl`` seconds to avoid + re-fetching all pages on every keystroke. + """ + cache_key = f"github:accessible_repo_ids:{self.integration.id}" + cached = cache.get(cache_key) + if cached is not None: + return set(cached) + + all_repos = self.get_repos() + repo_ids = {r["id"] for r in all_repos if not r.get("archived")} + cache.set(cache_key, list(repo_ids), ttl) + return repo_ids + def search_repositories(self, query: bytes) -> Mapping[str, Sequence[Any]]: """ Find repositories matching a query. diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 7fb151b633cb12..4a2cecc388cb90 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -336,9 +336,12 @@ def get_repositories( This fetches all repositories accessible to the Github App https://docs.github.com/en/rest/apps/installations#list-repositories-accessible-to-the-app-installation """ - if not query or accessible_only: - all_repos = self.get_client().get_repos(page_number_limit=page_number_limit) - repos: list[RepositoryInfo] = [ + client = self.get_client() + + # No query: fetch all accessible repos (existing behavior) + if not query: + all_repos = client.get_repos(page_number_limit=page_number_limit) + return [ { "name": i["name"], "identifier": i["full_name"], @@ -348,14 +351,27 @@ def get_repositories( for i in all_repos if not i.get("archived") ] - if query: - query_lower = query.lower() - repos = [r for r in repos if query_lower in str(r["identifier"]).lower()] - return repos + # Query + accessible_only: use Search API + cached ID set + if accessible_only: + accessible_ids = client.get_accessible_repo_ids() + full_query = build_repository_query(self.model.metadata, self.model.name, query) + response = client.search_repositories(full_query) + return [ + { + "name": i["name"], + "identifier": i["full_name"], + "external_id": self.get_repo_external_id(i), + "default_branch": i.get("default_branch"), + } + for i in response.get("items", []) + if i["id"] in accessible_ids + ] + + # Query without accessible_only: existing search behavior full_query = build_repository_query(self.model.metadata, self.model.name, query) - response = self.get_client().search_repositories(full_query) - search_repos: list[RepositoryInfo] = [ + response = client.search_repositories(full_query) + return [ { "name": i["name"], "identifier": i["full_name"], @@ -364,7 +380,6 @@ def get_repositories( } for i in response.get("items", []) ] - return search_repos def get_unmigratable_repositories(self) -> list[RpcRepository]: accessible_repos = self.get_repositories() diff --git a/tests/sentry/integrations/github/test_integration.py b/tests/sentry/integrations/github/test_integration.py index a8981259957280..0957d903d3503a 100644 --- a/tests/sentry/integrations/github/test_integration.py +++ b/tests/sentry/integrations/github/test_integration.py @@ -679,16 +679,40 @@ def test_get_repositories_search_param(self) -> None: @responses.activate def test_get_repositories_accessible_only(self) -> None: - """When accessible_only=True, fetches installation repos and filters locally.""" + """accessible_only+query uses Search API filtered by cached accessible IDs.""" with self.tasks(): self.assert_setup_flow() + querystring = urlencode({"q": "fork:true org:Test Organization foo"}) + responses.add( + responses.GET, + f"{self.base_url}/search/repositories?{querystring}", + json={ + "items": [ + { + "id": 1296269, + "name": "foo", + "full_name": "Test-Organization/foo", + "default_branch": "master", + }, + { + "id": 9999999, + "name": "foo-external", + "full_name": "Other-Org/foo-external", + "default_branch": "main", + }, + ] + }, + ) + integration = Integration.objects.get(provider=self.provider.key) installation = get_installation_of_type( GitHubIntegration, integration, self.organization.id ) result = installation.get_repositories("foo", accessible_only=True) + # foo-external is filtered out: its id (9999999) is the archived repo's id, + # which is excluded from the accessible set assert result == [ { "name": "foo", @@ -700,10 +724,17 @@ def test_get_repositories_accessible_only(self) -> None: @responses.activate def test_get_repositories_accessible_only_no_match(self) -> None: - """When accessible_only=True and nothing matches, returns empty list.""" + """When accessible_only=True and search returns no accessible repos, returns empty list.""" with self.tasks(): self.assert_setup_flow() + querystring = urlencode({"q": "fork:true org:Test Organization nonexistent"}) + responses.add( + responses.GET, + f"{self.base_url}/search/repositories?{querystring}", + json={"items": []}, + ) + integration = Integration.objects.get(provider=self.provider.key) installation = get_installation_of_type( GitHubIntegration, integration, self.organization.id @@ -712,6 +743,49 @@ def test_get_repositories_accessible_only_no_match(self) -> None: result = installation.get_repositories("nonexistent", accessible_only=True) assert result == [] + @responses.activate + def test_get_repositories_accessible_only_caches_ids(self) -> None: + """Second accessible_only call uses cached IDs instead of re-fetching all repos.""" + with self.tasks(): + self.assert_setup_flow() + + querystring = urlencode({"q": "fork:true org:Test Organization foo"}) + responses.add( + responses.GET, + f"{self.base_url}/search/repositories?{querystring}", + json={ + "items": [ + { + "id": 1296269, + "name": "foo", + "full_name": "Test-Organization/foo", + "default_branch": "master", + }, + ] + }, + ) + + integration = Integration.objects.get(provider=self.provider.key) + installation = get_installation_of_type( + GitHubIntegration, integration, self.organization.id + ) + + # First call: cache miss, fetches /installation/repositories + search + result1 = installation.get_repositories("foo", accessible_only=True) + install_repo_calls = [ + c for c in responses.calls if "/installation/repositories" in c.request.url + ] + first_fetch_count = len(install_repo_calls) + assert first_fetch_count > 0 + + # Second call: cache hit, only search is called (no new /installation/repositories) + result2 = installation.get_repositories("foo", accessible_only=True) + install_repo_calls = [ + c for c in responses.calls if "/installation/repositories" in c.request.url + ] + assert len(install_repo_calls) == first_fetch_count + assert result1 == result2 + @responses.activate def test_get_repositories_all_and_pagination(self) -> None: """Fetch all repositories and test the pagination logic.""" From 07de4ec8a865092ffbfa515bd89809e1976a27ef Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Wed, 8 Apr 2026 17:23:14 -0500 Subject: [PATCH 02/18] ref(github): Cache full repo list and filter locally instead of Search API Switch from Search API + cached ID set to caching the full repo list and filtering locally. This avoids the Search API's shared 30 req/min rate limit and uses sentry.cache.default_cache (Redis-backed) instead of django.core.cache (DummyCache in Sentry). Refs VDY-68 --- src/sentry/integrations/github/client.py | 34 +++++++---- src/sentry/integrations/github/integration.py | 13 ++-- .../integrations/github/test_integration.py | 59 ++----------------- 3 files changed, 36 insertions(+), 70 deletions(-) diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index ced42a5b37518e..dfc6a8d349add4 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -8,9 +8,9 @@ import orjson import sentry_sdk -from django.core.cache import cache from requests import PreparedRequest +from sentry.cache import default_cache from sentry.constants import ObjectStatus from sentry.integrations.github.blame import ( create_blame_query, @@ -550,21 +550,33 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any] page_number_limit=page_number_limit, ) - def get_accessible_repo_ids(self, ttl: int = 300) -> set[int]: + def get_accessible_repos_cached(self, ttl: int = 300) -> list[dict[str, Any]]: """ - Return the set of GitHub repo IDs accessible to this installation. - Cached in Django cache (Redis) for ``ttl`` seconds to avoid - re-fetching all pages on every keystroke. + Return all non-archived repos accessible to this installation. + Cached in Django cache for ``ttl`` seconds so that debounced + search keystrokes don't re-fetch all pages from GitHub. """ - cache_key = f"github:accessible_repo_ids:{self.integration.id}" - cached = cache.get(cache_key) + cache_key = f"github:accessible_repos:{self.integration.id}" + cached = default_cache.get(cache_key) if cached is not None: - return set(cached) + logger.info( + "get_accessible_repos_cached.cache_hit", + extra={"integration_id": self.integration.id, "count": len(cached)}, + ) + return cached + logger.info( + "get_accessible_repos_cached.cache_miss", + extra={"integration_id": self.integration.id}, + ) all_repos = self.get_repos() - repo_ids = {r["id"] for r in all_repos if not r.get("archived")} - cache.set(cache_key, list(repo_ids), ttl) - return repo_ids + repos = [r for r in all_repos if not r.get("archived")] + default_cache.set(cache_key, repos, ttl) + logger.info( + "get_accessible_repos_cached.cached", + extra={"integration_id": self.integration.id, "count": len(repos)}, + ) + return repos def search_repositories(self, query: bytes) -> Mapping[str, Sequence[Any]]: """ diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 4a2cecc388cb90..4f39743e3257b9 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -352,11 +352,12 @@ def get_repositories( if not i.get("archived") ] - # Query + accessible_only: use Search API + cached ID set + # Query + accessible_only: filter cached repo list locally. + # Avoids re-fetching all pages on every debounced keystroke and + # avoids the Search API's 30 req/min shared rate limit. if accessible_only: - accessible_ids = client.get_accessible_repo_ids() - full_query = build_repository_query(self.model.metadata, self.model.name, query) - response = client.search_repositories(full_query) + all_repos = client.get_accessible_repos_cached() + query_lower = query.lower() return [ { "name": i["name"], @@ -364,8 +365,8 @@ def get_repositories( "external_id": self.get_repo_external_id(i), "default_branch": i.get("default_branch"), } - for i in response.get("items", []) - if i["id"] in accessible_ids + for i in all_repos + if query_lower in i["full_name"].lower() ] # Query without accessible_only: existing search behavior diff --git a/tests/sentry/integrations/github/test_integration.py b/tests/sentry/integrations/github/test_integration.py index 0957d903d3503a..8fa197081977cd 100644 --- a/tests/sentry/integrations/github/test_integration.py +++ b/tests/sentry/integrations/github/test_integration.py @@ -679,40 +679,16 @@ def test_get_repositories_search_param(self) -> None: @responses.activate def test_get_repositories_accessible_only(self) -> None: - """accessible_only+query uses Search API filtered by cached accessible IDs.""" + """accessible_only+query filters cached repo list locally.""" with self.tasks(): self.assert_setup_flow() - querystring = urlencode({"q": "fork:true org:Test Organization foo"}) - responses.add( - responses.GET, - f"{self.base_url}/search/repositories?{querystring}", - json={ - "items": [ - { - "id": 1296269, - "name": "foo", - "full_name": "Test-Organization/foo", - "default_branch": "master", - }, - { - "id": 9999999, - "name": "foo-external", - "full_name": "Other-Org/foo-external", - "default_branch": "main", - }, - ] - }, - ) - integration = Integration.objects.get(provider=self.provider.key) installation = get_installation_of_type( GitHubIntegration, integration, self.organization.id ) result = installation.get_repositories("foo", accessible_only=True) - # foo-external is filtered out: its id (9999999) is the archived repo's id, - # which is excluded from the accessible set assert result == [ { "name": "foo", @@ -724,17 +700,10 @@ def test_get_repositories_accessible_only(self) -> None: @responses.activate def test_get_repositories_accessible_only_no_match(self) -> None: - """When accessible_only=True and search returns no accessible repos, returns empty list.""" + """accessible_only+query with no matching repos returns empty list.""" with self.tasks(): self.assert_setup_flow() - querystring = urlencode({"q": "fork:true org:Test Organization nonexistent"}) - responses.add( - responses.GET, - f"{self.base_url}/search/repositories?{querystring}", - json={"items": []}, - ) - integration = Integration.objects.get(provider=self.provider.key) installation = get_installation_of_type( GitHubIntegration, integration, self.organization.id @@ -744,33 +713,17 @@ def test_get_repositories_accessible_only_no_match(self) -> None: assert result == [] @responses.activate - def test_get_repositories_accessible_only_caches_ids(self) -> None: - """Second accessible_only call uses cached IDs instead of re-fetching all repos.""" + def test_get_repositories_accessible_only_caches_repos(self) -> None: + """Second accessible_only call uses cached repos instead of re-fetching from GitHub.""" with self.tasks(): self.assert_setup_flow() - querystring = urlencode({"q": "fork:true org:Test Organization foo"}) - responses.add( - responses.GET, - f"{self.base_url}/search/repositories?{querystring}", - json={ - "items": [ - { - "id": 1296269, - "name": "foo", - "full_name": "Test-Organization/foo", - "default_branch": "master", - }, - ] - }, - ) - integration = Integration.objects.get(provider=self.provider.key) installation = get_installation_of_type( GitHubIntegration, integration, self.organization.id ) - # First call: cache miss, fetches /installation/repositories + search + # First call: cache miss, fetches /installation/repositories result1 = installation.get_repositories("foo", accessible_only=True) install_repo_calls = [ c for c in responses.calls if "/installation/repositories" in c.request.url @@ -778,7 +731,7 @@ def test_get_repositories_accessible_only_caches_ids(self) -> None: first_fetch_count = len(install_repo_calls) assert first_fetch_count > 0 - # Second call: cache hit, only search is called (no new /installation/repositories) + # Second call: cache hit, no new /installation/repositories calls result2 = installation.get_repositories("foo", accessible_only=True) install_repo_calls = [ c for c in responses.calls if "/installation/repositories" in c.request.url From b41e1c586fc79dcc6581d6596e2827cbc943b3cb Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Wed, 8 Apr 2026 17:55:17 -0500 Subject: [PATCH 03/18] ref(github): Move archived filter out of cache into get_repositories Keep the cached repo list unfiltered so the cache is a faithful snapshot of the GitHub API response. Apply the archived filter in get_repositories alongside the other transforms. Also let the accessible_only path handle both with and without a query. Refs VDY-68 --- src/sentry/integrations/github/client.py | 5 ++-- src/sentry/integrations/github/integration.py | 25 +++++++++++-------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index dfc6a8d349add4..449ec10759f46d 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -552,7 +552,7 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any] def get_accessible_repos_cached(self, ttl: int = 300) -> list[dict[str, Any]]: """ - Return all non-archived repos accessible to this installation. + Return all repos accessible to this installation. Cached in Django cache for ``ttl`` seconds so that debounced search keystrokes don't re-fetch all pages from GitHub. """ @@ -569,8 +569,7 @@ def get_accessible_repos_cached(self, ttl: int = 300) -> list[dict[str, Any]]: "get_accessible_repos_cached.cache_miss", extra={"integration_id": self.integration.id}, ) - all_repos = self.get_repos() - repos = [r for r in all_repos if not r.get("archived")] + repos = self.get_repos() default_cache.set(cache_key, repos, ttl) logger.info( "get_accessible_repos_cached.cached", diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 4f39743e3257b9..78882c16d3360e 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -338,10 +338,12 @@ def get_repositories( """ client = self.get_client() - # No query: fetch all accessible repos (existing behavior) - if not query: - all_repos = client.get_repos(page_number_limit=page_number_limit) - return [ + # accessible_only: filter cached repo list locally. + # Avoids re-fetching all pages on every debounced keystroke and + # avoids the Search API's 30 req/min shared rate limit. + if accessible_only: + all_repos = client.get_accessible_repos_cached() + repos: list[RepositoryInfo] = [ { "name": i["name"], "identifier": i["full_name"], @@ -351,13 +353,14 @@ def get_repositories( for i in all_repos if not i.get("archived") ] - - # Query + accessible_only: filter cached repo list locally. - # Avoids re-fetching all pages on every debounced keystroke and - # avoids the Search API's 30 req/min shared rate limit. - if accessible_only: - all_repos = client.get_accessible_repos_cached() + if not query: + return repos query_lower = query.lower() + return [r for r in repos if query_lower in r["identifier"].lower()] + + # No query: fetch all accessible repos + if not query: + all_repos = client.get_repos(page_number_limit=page_number_limit) return [ { "name": i["name"], @@ -366,7 +369,7 @@ def get_repositories( "default_branch": i.get("default_branch"), } for i in all_repos - if query_lower in i["full_name"].lower() + if not i.get("archived") ] # Query without accessible_only: existing search behavior From c06ce7ae6e8e6a317317766bb250658c8f697c83 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Wed, 8 Apr 2026 17:58:59 -0500 Subject: [PATCH 04/18] chore(github): Remove debug logging from repo cache --- src/sentry/integrations/github/client.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index 449ec10759f46d..ede8edc2a28405 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -559,22 +559,10 @@ def get_accessible_repos_cached(self, ttl: int = 300) -> list[dict[str, Any]]: cache_key = f"github:accessible_repos:{self.integration.id}" cached = default_cache.get(cache_key) if cached is not None: - logger.info( - "get_accessible_repos_cached.cache_hit", - extra={"integration_id": self.integration.id, "count": len(cached)}, - ) return cached - logger.info( - "get_accessible_repos_cached.cache_miss", - extra={"integration_id": self.integration.id}, - ) repos = self.get_repos() default_cache.set(cache_key, repos, ttl) - logger.info( - "get_accessible_repos_cached.cached", - extra={"integration_id": self.integration.id, "count": len(repos)}, - ) return repos def search_repositories(self, query: bytes) -> Mapping[str, Sequence[Any]]: From 0261ee219c52c02fd6951222998ba7005c36c459 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Wed, 8 Apr 2026 18:04:22 -0500 Subject: [PATCH 05/18] ref(github): Extract to_repo_info helper to DRY up get_repositories --- src/sentry/integrations/github/integration.py | 38 ++++++------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 78882c16d3360e..3bcc95fb541ac7 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -338,21 +338,23 @@ def get_repositories( """ client = self.get_client() - # accessible_only: filter cached repo list locally. - # Avoids re-fetching all pages on every debounced keystroke and - # avoids the Search API's 30 req/min shared rate limit. - if accessible_only: - all_repos = client.get_accessible_repos_cached() - repos: list[RepositoryInfo] = [ + def to_repo_info(raw_repos: Sequence[Mapping[str, Any]]) -> list[RepositoryInfo]: + return [ { "name": i["name"], "identifier": i["full_name"], "external_id": self.get_repo_external_id(i), "default_branch": i.get("default_branch"), } - for i in all_repos + for i in raw_repos if not i.get("archived") ] + + # accessible_only: filter cached repo list locally. + # Avoids re-fetching all pages on every debounced keystroke and + # avoids the Search API's 30 req/min shared rate limit. + if accessible_only: + repos = to_repo_info(client.get_accessible_repos_cached()) if not query: return repos query_lower = query.lower() @@ -360,30 +362,12 @@ def get_repositories( # No query: fetch all accessible repos if not query: - all_repos = client.get_repos(page_number_limit=page_number_limit) - return [ - { - "name": i["name"], - "identifier": i["full_name"], - "external_id": self.get_repo_external_id(i), - "default_branch": i.get("default_branch"), - } - for i in all_repos - if not i.get("archived") - ] + return to_repo_info(client.get_repos(page_number_limit=page_number_limit)) # Query without accessible_only: existing search behavior full_query = build_repository_query(self.model.metadata, self.model.name, query) response = client.search_repositories(full_query) - return [ - { - "name": i["name"], - "identifier": i["full_name"], - "external_id": self.get_repo_external_id(i), - "default_branch": i.get("default_branch"), - } - for i in response.get("items", []) - ] + return to_repo_info(response.get("items", [])) def get_unmigratable_repositories(self) -> list[RpcRepository]: accessible_repos = self.get_repositories() From b8dd4e29dec368dbc35f037a9cfce1e434887496 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Wed, 8 Apr 2026 18:09:31 -0500 Subject: [PATCH 06/18] fix(github): Only filter archived repos from installation responses The Search API does not return archived repos, so the archived filter should only apply to the /installation/repositories paths. --- src/sentry/integrations/github/integration.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 3bcc95fb541ac7..ba5e1f46bae707 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -347,14 +347,14 @@ def to_repo_info(raw_repos: Sequence[Mapping[str, Any]]) -> list[RepositoryInfo] "default_branch": i.get("default_branch"), } for i in raw_repos - if not i.get("archived") ] # accessible_only: filter cached repo list locally. # Avoids re-fetching all pages on every debounced keystroke and # avoids the Search API's 30 req/min shared rate limit. if accessible_only: - repos = to_repo_info(client.get_accessible_repos_cached()) + all_repos = client.get_accessible_repos_cached() + repos = to_repo_info(r for r in all_repos if not r.get("archived")) if not query: return repos query_lower = query.lower() @@ -362,7 +362,8 @@ def to_repo_info(raw_repos: Sequence[Mapping[str, Any]]) -> list[RepositoryInfo] # No query: fetch all accessible repos if not query: - return to_repo_info(client.get_repos(page_number_limit=page_number_limit)) + all_repos = client.get_repos(page_number_limit=page_number_limit) + return to_repo_info(r for r in all_repos if not r.get("archived")) # Query without accessible_only: existing search behavior full_query = build_repository_query(self.model.metadata, self.model.name, query) From 434bdcb43084189d5fea39cdeacdc9c017f787c2 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Wed, 8 Apr 2026 18:16:08 -0500 Subject: [PATCH 07/18] docs(github): Clarify comments in get_repositories --- src/sentry/integrations/github/integration.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index ba5e1f46bae707..cefdc4658cd5ab 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -349,8 +349,8 @@ def to_repo_info(raw_repos: Sequence[Mapping[str, Any]]) -> list[RepositoryInfo] for i in raw_repos ] - # accessible_only: filter cached repo list locally. - # Avoids re-fetching all pages on every debounced keystroke and + # accessible_only: fetch all accessible repos (cached) for listing and querying + # avoids re-fetching all pages on every debounced keystroke and # avoids the Search API's 30 req/min shared rate limit. if accessible_only: all_repos = client.get_accessible_repos_cached() @@ -360,7 +360,7 @@ def to_repo_info(raw_repos: Sequence[Mapping[str, Any]]) -> list[RepositoryInfo] query_lower = query.lower() return [r for r in repos if query_lower in r["identifier"].lower()] - # No query: fetch all accessible repos + # No query: fetch all accessible repos (without cache) if not query: all_repos = client.get_repos(page_number_limit=page_number_limit) return to_repo_info(r for r in all_repos if not r.get("archived")) From 1589a682fc06f1beccf1178513475dbb0b360daa Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Wed, 8 Apr 2026 18:28:32 -0500 Subject: [PATCH 08/18] ref(github): Reorder get_repositories and combine filters Move no-query path first since accessible_only is only useful with a query (repeated keystrokes). Combine archived and query filters into a single pass through to_repo_info. --- src/sentry/integrations/github/integration.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index cefdc4658cd5ab..856e36e44e54ce 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -349,22 +349,23 @@ def to_repo_info(raw_repos: Sequence[Mapping[str, Any]]) -> list[RepositoryInfo] for i in raw_repos ] - # accessible_only: fetch all accessible repos (cached) for listing and querying - # avoids re-fetching all pages on every debounced keystroke and - # avoids the Search API's 30 req/min shared rate limit. - if accessible_only: - all_repos = client.get_accessible_repos_cached() - repos = to_repo_info(r for r in all_repos if not r.get("archived")) - if not query: - return repos - query_lower = query.lower() - return [r for r in repos if query_lower in r["identifier"].lower()] - # No query: fetch all accessible repos (without cache) if not query: all_repos = client.get_repos(page_number_limit=page_number_limit) return to_repo_info(r for r in all_repos if not r.get("archived")) + # accessible_only: fetch and filter accessible repos (cached) + # avoids re-fetching all pages on every debounced keystroke and + # avoids the Search API's 30 req/min shared rate limit. + if accessible_only: + all_repos_cached = client.get_accessible_repos_cached() + query_lower = query.lower() + return to_repo_info( + r + for r in all_repos_cached + if not r.get("archived") and query_lower in r["full_name"].lower() + ) + # Query without accessible_only: existing search behavior full_query = build_repository_query(self.model.metadata, self.model.name, query) response = client.search_repositories(full_query) From d642bfb0761a8fbd0fac573973a9b3fa1ec731e6 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Wed, 8 Apr 2026 18:46:49 -0500 Subject: [PATCH 09/18] fix(github): Use Iterable instead of Sequence for generator args --- src/sentry/integrations/github/integration.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 856e36e44e54ce..e7087564ec6736 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -2,7 +2,7 @@ import logging import re -from collections.abc import Callable, Mapping, MutableMapping, Sequence +from collections.abc import Callable, Iterable, Mapping, MutableMapping, Sequence from dataclasses import dataclass from enum import StrEnum from typing import Any, NotRequired, TypedDict @@ -338,7 +338,7 @@ def get_repositories( """ client = self.get_client() - def to_repo_info(raw_repos: Sequence[Mapping[str, Any]]) -> list[RepositoryInfo]: + def to_repo_info(raw_repos: Iterable[Mapping[str, Any]]) -> list[RepositoryInfo]: return [ { "name": i["name"], From a594905d71a5dd9e6ceeee6645b56575fba8d8f7 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Wed, 8 Apr 2026 18:48:49 -0500 Subject: [PATCH 10/18] perf(github): Cache only required fields for accessible repos Strip raw GitHub repo dicts down to the 5 fields used by get_repositories before storing in the cache. Reduces per-integration cache size from ~3KB per repo to ~100 bytes. --- src/sentry/integrations/github/client.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index ede8edc2a28405..ba725262f2a652 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -550,18 +550,25 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any] page_number_limit=page_number_limit, ) + # Fields from the GitHub API response needed by get_repositories(). + _CACHED_REPO_FIELDS = ("id", "name", "full_name", "default_branch", "archived") + def get_accessible_repos_cached(self, ttl: int = 300) -> list[dict[str, Any]]: """ Return all repos accessible to this installation. Cached in Django cache for ``ttl`` seconds so that debounced search keystrokes don't re-fetch all pages from GitHub. + + Only the fields used by get_repositories() are stored to keep + the cache payload small. """ cache_key = f"github:accessible_repos:{self.integration.id}" cached = default_cache.get(cache_key) if cached is not None: return cached - repos = self.get_repos() + all_repos = self.get_repos() + repos = [{k: r.get(k) for k in self._CACHED_REPO_FIELDS} for r in all_repos] default_cache.set(cache_key, repos, ttl) return repos From c0987be3ed4040a3063cbab360b13fcbb5f1e288 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Wed, 8 Apr 2026 18:55:09 -0500 Subject: [PATCH 11/18] ref(github): Add CachedRepo TypedDict for cached repo shape --- src/sentry/integrations/github/client.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index ba725262f2a652..aed1e7af625f1d 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -550,10 +550,17 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any] page_number_limit=page_number_limit, ) + class CachedRepo(TypedDict): + id: int + name: str + full_name: str + default_branch: str | None + archived: bool | None + # Fields from the GitHub API response needed by get_repositories(). _CACHED_REPO_FIELDS = ("id", "name", "full_name", "default_branch", "archived") - def get_accessible_repos_cached(self, ttl: int = 300) -> list[dict[str, Any]]: + def get_accessible_repos_cached(self, ttl: int = 300) -> list[CachedRepo]: """ Return all repos accessible to this installation. Cached in Django cache for ``ttl`` seconds so that debounced From 5b01fd634e9af58fe5a6ca660759db5c6309d45c Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Wed, 8 Apr 2026 19:01:07 -0500 Subject: [PATCH 12/18] ref(github): Use django cache instead of sentry default_cache getsentry configures CACHES with memcached in production, so django.core.cache.cache works and matches the pattern used by the rest of the integrations codebase. --- src/sentry/integrations/github/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index aed1e7af625f1d..7c3989faeefd4f 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -8,9 +8,9 @@ import orjson import sentry_sdk +from django.core.cache import cache from requests import PreparedRequest -from sentry.cache import default_cache from sentry.constants import ObjectStatus from sentry.integrations.github.blame import ( create_blame_query, @@ -570,13 +570,13 @@ def get_accessible_repos_cached(self, ttl: int = 300) -> list[CachedRepo]: the cache payload small. """ cache_key = f"github:accessible_repos:{self.integration.id}" - cached = default_cache.get(cache_key) + cached = cache.get(cache_key) if cached is not None: return cached all_repos = self.get_repos() repos = [{k: r.get(k) for k in self._CACHED_REPO_FIELDS} for r in all_repos] - default_cache.set(cache_key, repos, ttl) + cache.set(cache_key, repos, ttl) return repos def search_repositories(self, query: bytes) -> Mapping[str, Sequence[Any]]: From c0742717520f5ec86152ab34469e1104751e6470 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Wed, 8 Apr 2026 19:03:25 -0500 Subject: [PATCH 13/18] ref(github): Use explicit field picks instead of dict comprehension --- src/sentry/integrations/github/client.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index 7c3989faeefd4f..8716f69b25508e 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -557,9 +557,6 @@ class CachedRepo(TypedDict): default_branch: str | None archived: bool | None - # Fields from the GitHub API response needed by get_repositories(). - _CACHED_REPO_FIELDS = ("id", "name", "full_name", "default_branch", "archived") - def get_accessible_repos_cached(self, ttl: int = 300) -> list[CachedRepo]: """ Return all repos accessible to this installation. @@ -575,7 +572,16 @@ def get_accessible_repos_cached(self, ttl: int = 300) -> list[CachedRepo]: return cached all_repos = self.get_repos() - repos = [{k: r.get(k) for k in self._CACHED_REPO_FIELDS} for r in all_repos] + repos: list[GitHubBaseClient.CachedRepo] = [ + { + "id": r["id"], + "name": r["name"], + "full_name": r["full_name"], + "default_branch": r.get("default_branch"), + "archived": r.get("archived"), + } + for r in all_repos + ] cache.set(cache_key, repos, ttl) return repos From b24d6f514dc33863041804105a2eb8ecc3e255e6 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Wed, 8 Apr 2026 19:07:28 -0500 Subject: [PATCH 14/18] ref(github): Move CachedRepo TypedDict to module level --- src/sentry/integrations/github/client.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index 8716f69b25508e..3b81ade91eda58 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -57,6 +57,14 @@ JWT_AUTH_ROUTES = ("/app/installations", "access_tokens") +class CachedRepo(TypedDict): + id: int + name: str + full_name: str + default_branch: str | None + archived: bool | None + + class GithubRateLimitInfo: def __init__(self, info: dict[str, int]) -> None: self.limit = info["limit"] @@ -550,13 +558,6 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any] page_number_limit=page_number_limit, ) - class CachedRepo(TypedDict): - id: int - name: str - full_name: str - default_branch: str | None - archived: bool | None - def get_accessible_repos_cached(self, ttl: int = 300) -> list[CachedRepo]: """ Return all repos accessible to this installation. @@ -572,7 +573,7 @@ def get_accessible_repos_cached(self, ttl: int = 300) -> list[CachedRepo]: return cached all_repos = self.get_repos() - repos: list[GitHubBaseClient.CachedRepo] = [ + repos: list[CachedRepo] = [ { "id": r["id"], "name": r["name"], From da09de12e0bb09402af5ab5cf475ebc0d0acc229 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 16:06:14 -0500 Subject: [PATCH 15/18] ref(github): Decouple use_cache from accessible_only in get_repositories Add explicit use_cache parameter to get_repositories instead of implicitly tying caching to the accessible_only flag. This makes caching an independent concern that callers opt into explicitly. --- .../organization_integration_repos.py | 4 +++- src/sentry/integrations/github/integration.py | 24 ++++++++++++------- .../source_code_management/repository.py | 1 + .../test_organization_integration_repos.py | 6 ++--- .../integrations/github/test_integration.py | 4 ++-- 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos.py b/src/sentry/integrations/api/endpoints/organization_integration_repos.py index a633deca9c809f..525d039ef2df71 100644 --- a/src/sentry/integrations/api/endpoints/organization_integration_repos.py +++ b/src/sentry/integrations/api/endpoints/organization_integration_repos.py @@ -71,7 +71,9 @@ def get( accessible_only = request.GET.get("accessibleOnly", "false").lower() == "true" try: - repositories = install.get_repositories(search, accessible_only=accessible_only) + repositories = install.get_repositories( + search, accessible_only=accessible_only, use_cache=accessible_only + ) except (IntegrationError, IdentityNotValid) as e: return self.respond({"detail": str(e)}, status=400) diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index e7087564ec6736..462f28b84cf8a4 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -325,6 +325,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: """ args: @@ -332,6 +333,8 @@ def get_repositories( * accessible_only - when True with a query, fetch only installation- accessible repos and filter locally instead of using the Search API (which may return repos outside the installation's scope) + * use_cache - when True, serve repos from a short-lived cache instead + of re-fetching all pages from GitHub on every call This fetches all repositories accessible to the Github App https://docs.github.com/en/rest/apps/installations#list-repositories-accessible-to-the-app-installation @@ -349,24 +352,29 @@ def to_repo_info(raw_repos: Iterable[Mapping[str, Any]]) -> list[RepositoryInfo] for i in raw_repos ] - # No query: fetch all accessible repos (without cache) + def _get_all_repos(): + if use_cache: + return client.get_accessible_repos_cached() + return client.get_repos(page_number_limit=page_number_limit) + + # No query: return all non-archived repos if not query: - all_repos = client.get_repos(page_number_limit=page_number_limit) + all_repos = _get_all_repos() return to_repo_info(r for r in all_repos if not r.get("archived")) - # accessible_only: fetch and filter accessible repos (cached) - # avoids re-fetching all pages on every debounced keystroke and - # avoids the Search API's 30 req/min shared rate limit. + # accessible_only: fetch accessible repos and filter locally + # avoids the Search API's 30 req/min shared rate limit if accessible_only: - all_repos_cached = client.get_accessible_repos_cached() + all_repos = _get_all_repos() query_lower = query.lower() return to_repo_info( r - for r in all_repos_cached + for r in all_repos if not r.get("archived") and query_lower in r["full_name"].lower() ) - # Query without accessible_only: existing search behavior + # Query without accessible_only: use the Search API + assert not use_cache, "use_cache is not supported with the Search API path" full_query = build_repository_query(self.model.metadata, self.model.name, query) response = client.search_repositories(full_query) return to_repo_info(response.get("items", [])) diff --git a/src/sentry/integrations/source_code_management/repository.py b/src/sentry/integrations/source_code_management/repository.py index 0ba84a1bfd18f9..731f99d2cfc390 100644 --- a/src/sentry/integrations/source_code_management/repository.py +++ b/src/sentry/integrations/source_code_management/repository.py @@ -59,6 +59,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: """ Get a list of available repositories for an installation diff --git a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py index 53a2d06b5694c4..1f697596821be9 100644 --- a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py +++ b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py @@ -200,7 +200,7 @@ def test_accessible_only_passes_param(self, get_repositories: MagicMock) -> None ) assert response.status_code == 200, response.content - get_repositories.assert_called_once_with("rad", accessible_only=True) + get_repositories.assert_called_once_with("rad", accessible_only=True, use_cache=True) assert response.data == { "repos": [ { @@ -224,7 +224,7 @@ def test_accessible_only_without_search(self, get_repositories: MagicMock) -> No response = self.client.get(self.path, format="json", data={"accessibleOnly": "true"}) assert response.status_code == 200, response.content - get_repositories.assert_called_once_with(None, accessible_only=True) + get_repositories.assert_called_once_with(None, accessible_only=True, use_cache=True) @patch( "sentry.integrations.github.integration.GitHubIntegration.get_repositories", return_value=[] @@ -249,7 +249,7 @@ def test_accessible_only_with_installable_only(self, get_repositories: MagicMock ) assert response.status_code == 200, response.content - get_repositories.assert_called_once_with("Example", accessible_only=True) + get_repositories.assert_called_once_with("Example", accessible_only=True, use_cache=True) assert response.data == { "repos": [ { diff --git a/tests/sentry/integrations/github/test_integration.py b/tests/sentry/integrations/github/test_integration.py index 8fa197081977cd..73c5ab052282ef 100644 --- a/tests/sentry/integrations/github/test_integration.py +++ b/tests/sentry/integrations/github/test_integration.py @@ -724,7 +724,7 @@ def test_get_repositories_accessible_only_caches_repos(self) -> None: ) # First call: cache miss, fetches /installation/repositories - result1 = installation.get_repositories("foo", accessible_only=True) + result1 = installation.get_repositories("foo", accessible_only=True, use_cache=True) install_repo_calls = [ c for c in responses.calls if "/installation/repositories" in c.request.url ] @@ -732,7 +732,7 @@ def test_get_repositories_accessible_only_caches_repos(self) -> None: assert first_fetch_count > 0 # Second call: cache hit, no new /installation/repositories calls - result2 = installation.get_repositories("foo", accessible_only=True) + result2 = installation.get_repositories("foo", accessible_only=True, use_cache=True) install_repo_calls = [ c for c in responses.calls if "/installation/repositories" in c.request.url ] From 68df5e606c58b3353034b4fb7d863879b2ca4130 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 16:08:50 -0500 Subject: [PATCH 16/18] ref(integrations): Add use_cache param to all get_repositories overrides --- src/sentry/integrations/bitbucket/integration.py | 1 + src/sentry/integrations/bitbucket_server/integration.py | 1 + src/sentry/integrations/example/integration.py | 1 + src/sentry/integrations/github_enterprise/integration.py | 1 + src/sentry/integrations/gitlab/integration.py | 1 + src/sentry/integrations/perforce/integration.py | 1 + src/sentry/integrations/vsts/integration.py | 1 + 7 files changed, 7 insertions(+) diff --git a/src/sentry/integrations/bitbucket/integration.py b/src/sentry/integrations/bitbucket/integration.py index 2456c02e1e5fad..057d735e374bcb 100644 --- a/src/sentry/integrations/bitbucket/integration.py +++ b/src/sentry/integrations/bitbucket/integration.py @@ -137,6 +137,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: username = self.model.metadata.get("uuid", self.username) if not query: diff --git a/src/sentry/integrations/bitbucket_server/integration.py b/src/sentry/integrations/bitbucket_server/integration.py index 6d3dc20c34c1b6..4b21374ba3dc0f 100644 --- a/src/sentry/integrations/bitbucket_server/integration.py +++ b/src/sentry/integrations/bitbucket_server/integration.py @@ -288,6 +288,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: if not query: resp = self.get_client().get_repos() diff --git a/src/sentry/integrations/example/integration.py b/src/sentry/integrations/example/integration.py index 7c2a8c4461cd26..c0b1f4ec5ea988 100644 --- a/src/sentry/integrations/example/integration.py +++ b/src/sentry/integrations/example/integration.py @@ -154,6 +154,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: return [{"name": "repo", "identifier": "user/repo", "external_id": "1"}] diff --git a/src/sentry/integrations/github_enterprise/integration.py b/src/sentry/integrations/github_enterprise/integration.py index 84ae6d7576afa0..e26af025029759 100644 --- a/src/sentry/integrations/github_enterprise/integration.py +++ b/src/sentry/integrations/github_enterprise/integration.py @@ -226,6 +226,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: if not query: all_repos = self.get_client().get_repos(page_number_limit=page_number_limit) diff --git a/src/sentry/integrations/gitlab/integration.py b/src/sentry/integrations/gitlab/integration.py index 38e77b70a83ca9..7339028288588b 100644 --- a/src/sentry/integrations/gitlab/integration.py +++ b/src/sentry/integrations/gitlab/integration.py @@ -179,6 +179,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: try: # Note: gitlab projects are the same things as repos everywhere else diff --git a/src/sentry/integrations/perforce/integration.py b/src/sentry/integrations/perforce/integration.py index 1f36340886dd7a..4ba28eb133b46d 100644 --- a/src/sentry/integrations/perforce/integration.py +++ b/src/sentry/integrations/perforce/integration.py @@ -361,6 +361,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: """ Get list of depots/streams from Perforce server. diff --git a/src/sentry/integrations/vsts/integration.py b/src/sentry/integrations/vsts/integration.py index 77393319440e0b..91f21b0753de08 100644 --- a/src/sentry/integrations/vsts/integration.py +++ b/src/sentry/integrations/vsts/integration.py @@ -315,6 +315,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: try: repos = self.get_client().get_repos() From 155c015d5891add28c6d296dadd5a87a81966316 Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 16:12:38 -0500 Subject: [PATCH 17/18] ref(github): Rename get_accessible_repos_cached to get_repos_cached Rename method and cache key to reflect that the cache is not specific to the accessible_only path. Remove implementation-detail comments from get_repositories. --- src/sentry/integrations/github/client.py | 9 ++++----- src/sentry/integrations/github/integration.py | 6 +----- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index 3b81ade91eda58..3825c82669cbcf 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -558,16 +558,15 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any] page_number_limit=page_number_limit, ) - def get_accessible_repos_cached(self, ttl: int = 300) -> list[CachedRepo]: + def get_repos_cached(self, ttl: int = 300) -> list[CachedRepo]: """ - Return all repos accessible to this installation. - Cached in Django cache for ``ttl`` seconds so that debounced - search keystrokes don't re-fetch all pages from GitHub. + Return all repos accessible to this installation, cached in + Django cache for ``ttl`` seconds. Only the fields used by get_repositories() are stored to keep the cache payload small. """ - cache_key = f"github:accessible_repos:{self.integration.id}" + cache_key = f"github:repos:{self.integration.id}" cached = cache.get(cache_key) if cached is not None: return cached diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 462f28b84cf8a4..f4f92a9c4fd2e8 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -354,16 +354,13 @@ def to_repo_info(raw_repos: Iterable[Mapping[str, Any]]) -> list[RepositoryInfo] def _get_all_repos(): if use_cache: - return client.get_accessible_repos_cached() + return client.get_repos_cached() return client.get_repos(page_number_limit=page_number_limit) - # No query: return all non-archived repos if not query: all_repos = _get_all_repos() return to_repo_info(r for r in all_repos if not r.get("archived")) - # accessible_only: fetch accessible repos and filter locally - # avoids the Search API's 30 req/min shared rate limit if accessible_only: all_repos = _get_all_repos() query_lower = query.lower() @@ -373,7 +370,6 @@ def _get_all_repos(): if not r.get("archived") and query_lower in r["full_name"].lower() ) - # Query without accessible_only: use the Search API assert not use_cache, "use_cache is not supported with the Search API path" full_query = build_repository_query(self.model.metadata, self.model.name, query) response = client.search_repositories(full_query) From 3bf81a4dc0e490d758fb2b88a00689b15c53414f Mon Sep 17 00:00:00 2001 From: Jay Goss Date: Thu, 9 Apr 2026 16:22:16 -0500 Subject: [PATCH 18/18] fix(github): Only use cache when search query is present Avoid caching the initial no-query load when accessibleOnly is set. Cache is only useful for debounced keystroke searches, not the first page load which should always return fresh data. --- .../api/endpoints/organization_integration_repos.py | 4 +++- .../api/endpoints/test_organization_integration_repos.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos.py b/src/sentry/integrations/api/endpoints/organization_integration_repos.py index 525d039ef2df71..e7c1d9ddeb6b52 100644 --- a/src/sentry/integrations/api/endpoints/organization_integration_repos.py +++ b/src/sentry/integrations/api/endpoints/organization_integration_repos.py @@ -72,7 +72,9 @@ def get( try: repositories = install.get_repositories( - search, accessible_only=accessible_only, use_cache=accessible_only + search, + accessible_only=accessible_only, + use_cache=accessible_only and bool(search), ) except (IntegrationError, IdentityNotValid) as e: return self.respond({"detail": str(e)}, status=400) diff --git a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py index 1f697596821be9..4616406b9f99e1 100644 --- a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py +++ b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py @@ -224,7 +224,7 @@ def test_accessible_only_without_search(self, get_repositories: MagicMock) -> No response = self.client.get(self.path, format="json", data={"accessibleOnly": "true"}) assert response.status_code == 200, response.content - get_repositories.assert_called_once_with(None, accessible_only=True, use_cache=True) + get_repositories.assert_called_once_with(None, accessible_only=True, use_cache=False) @patch( "sentry.integrations.github.integration.GitHubIntegration.get_repositories", return_value=[]