diff --git a/src/sentry/integrations/api/endpoints/organization_integration_repos.py b/src/sentry/integrations/api/endpoints/organization_integration_repos.py index a633deca9c809f..e7c1d9ddeb6b52 100644 --- a/src/sentry/integrations/api/endpoints/organization_integration_repos.py +++ b/src/sentry/integrations/api/endpoints/organization_integration_repos.py @@ -71,7 +71,11 @@ def get( accessible_only = request.GET.get("accessibleOnly", "false").lower() == "true" try: - repositories = install.get_repositories(search, accessible_only=accessible_only) + repositories = install.get_repositories( + search, + accessible_only=accessible_only, + use_cache=accessible_only and bool(search), + ) except (IntegrationError, IdentityNotValid) as e: return self.respond({"detail": str(e)}, status=400) diff --git a/src/sentry/integrations/bitbucket/integration.py b/src/sentry/integrations/bitbucket/integration.py index 2456c02e1e5fad..057d735e374bcb 100644 --- a/src/sentry/integrations/bitbucket/integration.py +++ b/src/sentry/integrations/bitbucket/integration.py @@ -137,6 +137,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: username = self.model.metadata.get("uuid", self.username) if not query: diff --git a/src/sentry/integrations/bitbucket_server/integration.py b/src/sentry/integrations/bitbucket_server/integration.py index 6d3dc20c34c1b6..4b21374ba3dc0f 100644 --- a/src/sentry/integrations/bitbucket_server/integration.py +++ b/src/sentry/integrations/bitbucket_server/integration.py @@ -288,6 +288,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: if not query: resp = self.get_client().get_repos() diff --git a/src/sentry/integrations/example/integration.py b/src/sentry/integrations/example/integration.py index 7c2a8c4461cd26..c0b1f4ec5ea988 100644 --- a/src/sentry/integrations/example/integration.py +++ b/src/sentry/integrations/example/integration.py @@ -154,6 +154,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: return [{"name": "repo", "identifier": "user/repo", "external_id": "1"}] diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index 5abaa1c2852de5..3825c82669cbcf 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -8,6 +8,7 @@ import orjson import sentry_sdk +from django.core.cache import cache from requests import PreparedRequest from sentry.constants import ObjectStatus @@ -56,6 +57,14 @@ JWT_AUTH_ROUTES = ("/app/installations", "access_tokens") +class CachedRepo(TypedDict): + id: int + name: str + full_name: str + default_branch: str | None + archived: bool | None + + class GithubRateLimitInfo: def __init__(self, info: dict[str, int]) -> None: self.limit = info["limit"] @@ -549,6 +558,33 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any] page_number_limit=page_number_limit, ) + def get_repos_cached(self, ttl: int = 300) -> list[CachedRepo]: + """ + Return all repos accessible to this installation, cached in + Django cache for ``ttl`` seconds. + + Only the fields used by get_repositories() are stored to keep + the cache payload small. + """ + cache_key = f"github:repos:{self.integration.id}" + cached = cache.get(cache_key) + if cached is not None: + return cached + + all_repos = self.get_repos() + repos: list[CachedRepo] = [ + { + "id": r["id"], + "name": r["name"], + "full_name": r["full_name"], + "default_branch": r.get("default_branch"), + "archived": r.get("archived"), + } + for r in all_repos + ] + cache.set(cache_key, repos, ttl) + return repos + def search_repositories(self, query: bytes) -> Mapping[str, Sequence[Any]]: """ Find repositories matching a query. diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 7fb151b633cb12..f4f92a9c4fd2e8 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -2,7 +2,7 @@ import logging import re -from collections.abc import Callable, Mapping, MutableMapping, Sequence +from collections.abc import Callable, Iterable, Mapping, MutableMapping, Sequence from dataclasses import dataclass from enum import StrEnum from typing import Any, NotRequired, TypedDict @@ -325,6 +325,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: """ args: @@ -332,39 +333,47 @@ def get_repositories( * accessible_only - when True with a query, fetch only installation- accessible repos and filter locally instead of using the Search API (which may return repos outside the installation's scope) + * use_cache - when True, serve repos from a short-lived cache instead + of re-fetching all pages from GitHub on every call This fetches all repositories accessible to the Github App https://docs.github.com/en/rest/apps/installations#list-repositories-accessible-to-the-app-installation """ - if not query or accessible_only: - all_repos = self.get_client().get_repos(page_number_limit=page_number_limit) - repos: list[RepositoryInfo] = [ + client = self.get_client() + + def to_repo_info(raw_repos: Iterable[Mapping[str, Any]]) -> list[RepositoryInfo]: + return [ { "name": i["name"], "identifier": i["full_name"], "external_id": self.get_repo_external_id(i), "default_branch": i.get("default_branch"), } - for i in all_repos - if not i.get("archived") + for i in raw_repos ] - if query: - query_lower = query.lower() - repos = [r for r in repos if query_lower in str(r["identifier"]).lower()] - return repos + def _get_all_repos(): + if use_cache: + return client.get_repos_cached() + return client.get_repos(page_number_limit=page_number_limit) + + if not query: + all_repos = _get_all_repos() + return to_repo_info(r for r in all_repos if not r.get("archived")) + + if accessible_only: + all_repos = _get_all_repos() + query_lower = query.lower() + return to_repo_info( + r + for r in all_repos + if not r.get("archived") and query_lower in r["full_name"].lower() + ) + + assert not use_cache, "use_cache is not supported with the Search API path" full_query = build_repository_query(self.model.metadata, self.model.name, query) - response = self.get_client().search_repositories(full_query) - search_repos: list[RepositoryInfo] = [ - { - "name": i["name"], - "identifier": i["full_name"], - "external_id": self.get_repo_external_id(i), - "default_branch": i.get("default_branch"), - } - for i in response.get("items", []) - ] - return search_repos + response = client.search_repositories(full_query) + return to_repo_info(response.get("items", [])) def get_unmigratable_repositories(self) -> list[RpcRepository]: accessible_repos = self.get_repositories() diff --git a/src/sentry/integrations/github_enterprise/integration.py b/src/sentry/integrations/github_enterprise/integration.py index 84ae6d7576afa0..e26af025029759 100644 --- a/src/sentry/integrations/github_enterprise/integration.py +++ b/src/sentry/integrations/github_enterprise/integration.py @@ -226,6 +226,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: if not query: all_repos = self.get_client().get_repos(page_number_limit=page_number_limit) diff --git a/src/sentry/integrations/gitlab/integration.py b/src/sentry/integrations/gitlab/integration.py index 38e77b70a83ca9..7339028288588b 100644 --- a/src/sentry/integrations/gitlab/integration.py +++ b/src/sentry/integrations/gitlab/integration.py @@ -179,6 +179,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: try: # Note: gitlab projects are the same things as repos everywhere else diff --git a/src/sentry/integrations/perforce/integration.py b/src/sentry/integrations/perforce/integration.py index 1f36340886dd7a..4ba28eb133b46d 100644 --- a/src/sentry/integrations/perforce/integration.py +++ b/src/sentry/integrations/perforce/integration.py @@ -361,6 +361,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: """ Get list of depots/streams from Perforce server. diff --git a/src/sentry/integrations/source_code_management/repository.py b/src/sentry/integrations/source_code_management/repository.py index 0ba84a1bfd18f9..731f99d2cfc390 100644 --- a/src/sentry/integrations/source_code_management/repository.py +++ b/src/sentry/integrations/source_code_management/repository.py @@ -59,6 +59,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: """ Get a list of available repositories for an installation diff --git a/src/sentry/integrations/vsts/integration.py b/src/sentry/integrations/vsts/integration.py index 77393319440e0b..91f21b0753de08 100644 --- a/src/sentry/integrations/vsts/integration.py +++ b/src/sentry/integrations/vsts/integration.py @@ -315,6 +315,7 @@ def get_repositories( query: str | None = None, page_number_limit: int | None = None, accessible_only: bool = False, + use_cache: bool = False, ) -> list[RepositoryInfo]: try: repos = self.get_client().get_repos() diff --git a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py index 53a2d06b5694c4..4616406b9f99e1 100644 --- a/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py +++ b/tests/sentry/integrations/api/endpoints/test_organization_integration_repos.py @@ -200,7 +200,7 @@ def test_accessible_only_passes_param(self, get_repositories: MagicMock) -> None ) assert response.status_code == 200, response.content - get_repositories.assert_called_once_with("rad", accessible_only=True) + get_repositories.assert_called_once_with("rad", accessible_only=True, use_cache=True) assert response.data == { "repos": [ { @@ -224,7 +224,7 @@ def test_accessible_only_without_search(self, get_repositories: MagicMock) -> No response = self.client.get(self.path, format="json", data={"accessibleOnly": "true"}) assert response.status_code == 200, response.content - get_repositories.assert_called_once_with(None, accessible_only=True) + get_repositories.assert_called_once_with(None, accessible_only=True, use_cache=False) @patch( "sentry.integrations.github.integration.GitHubIntegration.get_repositories", return_value=[] @@ -249,7 +249,7 @@ def test_accessible_only_with_installable_only(self, get_repositories: MagicMock ) assert response.status_code == 200, response.content - get_repositories.assert_called_once_with("Example", accessible_only=True) + get_repositories.assert_called_once_with("Example", accessible_only=True, use_cache=True) assert response.data == { "repos": [ { diff --git a/tests/sentry/integrations/github/test_integration.py b/tests/sentry/integrations/github/test_integration.py index a8981259957280..73c5ab052282ef 100644 --- a/tests/sentry/integrations/github/test_integration.py +++ b/tests/sentry/integrations/github/test_integration.py @@ -679,7 +679,7 @@ def test_get_repositories_search_param(self) -> None: @responses.activate def test_get_repositories_accessible_only(self) -> None: - """When accessible_only=True, fetches installation repos and filters locally.""" + """accessible_only+query filters cached repo list locally.""" with self.tasks(): self.assert_setup_flow() @@ -700,7 +700,7 @@ def test_get_repositories_accessible_only(self) -> None: @responses.activate def test_get_repositories_accessible_only_no_match(self) -> None: - """When accessible_only=True and nothing matches, returns empty list.""" + """accessible_only+query with no matching repos returns empty list.""" with self.tasks(): self.assert_setup_flow() @@ -712,6 +712,33 @@ def test_get_repositories_accessible_only_no_match(self) -> None: result = installation.get_repositories("nonexistent", accessible_only=True) assert result == [] + @responses.activate + def test_get_repositories_accessible_only_caches_repos(self) -> None: + """Second accessible_only call uses cached repos instead of re-fetching from GitHub.""" + with self.tasks(): + self.assert_setup_flow() + + integration = Integration.objects.get(provider=self.provider.key) + installation = get_installation_of_type( + GitHubIntegration, integration, self.organization.id + ) + + # First call: cache miss, fetches /installation/repositories + result1 = installation.get_repositories("foo", accessible_only=True, use_cache=True) + install_repo_calls = [ + c for c in responses.calls if "/installation/repositories" in c.request.url + ] + first_fetch_count = len(install_repo_calls) + assert first_fetch_count > 0 + + # Second call: cache hit, no new /installation/repositories calls + result2 = installation.get_repositories("foo", accessible_only=True, use_cache=True) + install_repo_calls = [ + c for c in responses.calls if "/installation/repositories" in c.request.url + ] + assert len(install_repo_calls) == first_fetch_count + assert result1 == result2 + @responses.activate def test_get_repositories_all_and_pagination(self) -> None: """Fetch all repositories and test the pagination logic."""