Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
1d3c1d8
perf(github): Cache accessible repo IDs for accessibleOnly search
jaydgoss Apr 8, 2026
07de4ec
ref(github): Cache full repo list and filter locally instead of Searc…
jaydgoss Apr 8, 2026
b41e1c5
ref(github): Move archived filter out of cache into get_repositories
jaydgoss Apr 8, 2026
c06ce7a
chore(github): Remove debug logging from repo cache
jaydgoss Apr 8, 2026
0261ee2
ref(github): Extract to_repo_info helper to DRY up get_repositories
jaydgoss Apr 8, 2026
b8dd4e2
fix(github): Only filter archived repos from installation responses
jaydgoss Apr 8, 2026
434bdcb
docs(github): Clarify comments in get_repositories
jaydgoss Apr 8, 2026
1589a68
ref(github): Reorder get_repositories and combine filters
jaydgoss Apr 8, 2026
d642bfb
fix(github): Use Iterable instead of Sequence for generator args
jaydgoss Apr 8, 2026
a594905
perf(github): Cache only required fields for accessible repos
jaydgoss Apr 8, 2026
c0987be
ref(github): Add CachedRepo TypedDict for cached repo shape
jaydgoss Apr 8, 2026
5b01fd6
ref(github): Use django cache instead of sentry default_cache
jaydgoss Apr 9, 2026
c074271
ref(github): Use explicit field picks instead of dict comprehension
jaydgoss Apr 9, 2026
b24d6f5
ref(github): Move CachedRepo TypedDict to module level
jaydgoss Apr 9, 2026
da09de1
ref(github): Decouple use_cache from accessible_only in get_repositories
jaydgoss Apr 9, 2026
68df5e6
ref(integrations): Add use_cache param to all get_repositories overrides
jaydgoss Apr 9, 2026
155c015
ref(github): Rename get_accessible_repos_cached to get_repos_cached
jaydgoss Apr 9, 2026
3bf81a4
fix(github): Only use cache when search query is present
jaydgoss Apr 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ def get(
accessible_only = request.GET.get("accessibleOnly", "false").lower() == "true"

try:
repositories = install.get_repositories(search, accessible_only=accessible_only)
repositories = install.get_repositories(
search,
accessible_only=accessible_only,
use_cache=accessible_only and bool(search),
)
except (IntegrationError, IdentityNotValid) as e:
return self.respond({"detail": str(e)}, status=400)

Expand Down
1 change: 1 addition & 0 deletions src/sentry/integrations/bitbucket/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def get_repositories(
query: str | None = None,
page_number_limit: int | None = None,
accessible_only: bool = False,
use_cache: bool = False,
) -> list[RepositoryInfo]:
username = self.model.metadata.get("uuid", self.username)
if not query:
Expand Down
1 change: 1 addition & 0 deletions src/sentry/integrations/bitbucket_server/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ def get_repositories(
query: str | None = None,
page_number_limit: int | None = None,
accessible_only: bool = False,
use_cache: bool = False,
) -> list[RepositoryInfo]:
if not query:
resp = self.get_client().get_repos()
Expand Down
1 change: 1 addition & 0 deletions src/sentry/integrations/example/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def get_repositories(
query: str | None = None,
page_number_limit: int | None = None,
accessible_only: bool = False,
use_cache: bool = False,
) -> list[RepositoryInfo]:
return [{"name": "repo", "identifier": "user/repo", "external_id": "1"}]

Expand Down
36 changes: 36 additions & 0 deletions src/sentry/integrations/github/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import orjson
import sentry_sdk
from django.core.cache import cache
from requests import PreparedRequest

from sentry.constants import ObjectStatus
Expand Down Expand Up @@ -56,6 +57,14 @@
JWT_AUTH_ROUTES = ("/app/installations", "access_tokens")


class CachedRepo(TypedDict):
id: int
name: str
full_name: str
default_branch: str | None
archived: bool | None


class GithubRateLimitInfo:
def __init__(self, info: dict[str, int]) -> None:
self.limit = info["limit"]
Expand Down Expand Up @@ -549,6 +558,33 @@ def get_repos(self, page_number_limit: int | None = None) -> list[dict[str, Any]
page_number_limit=page_number_limit,
)

def get_repos_cached(self, ttl: int = 300) -> list[CachedRepo]:
"""
Return all repos accessible to this installation, cached in
Django cache for ``ttl`` seconds.

Only the fields used by get_repositories() are stored to keep
the cache payload small.
"""
cache_key = f"github:repos:{self.integration.id}"
cached = cache.get(cache_key)
if cached is not None:
return cached

all_repos = self.get_repos()
repos: list[CachedRepo] = [
{
"id": r["id"],
"name": r["name"],
"full_name": r["full_name"],
"default_branch": r.get("default_branch"),
"archived": r.get("archived"),
}
for r in all_repos
]
cache.set(cache_key, repos, ttl)
return repos

def search_repositories(self, query: bytes) -> Mapping[str, Sequence[Any]]:
"""
Find repositories matching a query.
Expand Down
51 changes: 30 additions & 21 deletions src/sentry/integrations/github/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import re
from collections.abc import Callable, Mapping, MutableMapping, Sequence
from collections.abc import Callable, Iterable, Mapping, MutableMapping, Sequence
from dataclasses import dataclass
from enum import StrEnum
from typing import Any, NotRequired, TypedDict
Expand Down Expand Up @@ -325,46 +325,55 @@ def get_repositories(
query: str | None = None,
page_number_limit: int | None = None,
accessible_only: bool = False,
use_cache: bool = False,
) -> list[RepositoryInfo]:
"""
args:
* query - a query to filter the repositories by
* accessible_only - when True with a query, fetch only installation-
accessible repos and filter locally instead of using the Search API
(which may return repos outside the installation's scope)
* use_cache - when True, serve repos from a short-lived cache instead
of re-fetching all pages from GitHub on every call

This fetches all repositories accessible to the Github App
https://docs.github.com/en/rest/apps/installations#list-repositories-accessible-to-the-app-installation
"""
if not query or accessible_only:
all_repos = self.get_client().get_repos(page_number_limit=page_number_limit)
repos: list[RepositoryInfo] = [
client = self.get_client()

def to_repo_info(raw_repos: Iterable[Mapping[str, Any]]) -> list[RepositoryInfo]:
return [
{
"name": i["name"],
"identifier": i["full_name"],
"external_id": self.get_repo_external_id(i),
"default_branch": i.get("default_branch"),
}
for i in all_repos
if not i.get("archived")
for i in raw_repos
]
if query:
query_lower = query.lower()
repos = [r for r in repos if query_lower in str(r["identifier"]).lower()]
return repos

def _get_all_repos():
if use_cache:
return client.get_repos_cached()
return client.get_repos(page_number_limit=page_number_limit)

if not query:
all_repos = _get_all_repos()
return to_repo_info(r for r in all_repos if not r.get("archived"))

if accessible_only:
all_repos = _get_all_repos()
query_lower = query.lower()
return to_repo_info(
r
for r in all_repos
if not r.get("archived") and query_lower in r["full_name"].lower()
)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that rather than making the cache use conditional on accessible_only, we should have use_cache=False. We should have an assert like assert not use_cache or not query since the cache doesn't work with the query

Copy link
Copy Markdown
Member Author

@jaydgoss jaydgoss Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A little confused by this comment because the cache is set up to work with the accessible_only variant of querying.

accessible_only is a means to query accessible repos, default query behavior was exposing repos that sentry does not necessarily have access to (public repos not selected during installation configuration).

the cache in its current form its only enabled for the accessible_only path because I didn't want to alter behavior for existing consumers.

that said cache could instead be opt in and applied to either the not query path or the accessible_only path, is that along the lines of your thinking?

Further changes here are introduced with pagination support #112591

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My main point is that it doesn't really make sense to make the cache specific to accessible only. We're caching all the repos, and then we're filtering to accessible only in python. So if we add use_cache=False, then in your usages you pass use_cache=True, is_accessible=True it's more general and allows us to use the cache in other places later, if we want to. In general, I feel like Claude tends to do things in an overly specific way so I just wanted to guide us away from that here.

Copy link
Copy Markdown
Member Author

@jaydgoss jaydgoss Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I will alter things so that cache can be applied to either the not query or accessible_only paths.

Filtering of accessible is not handled via python filtering (its a different github api path), the "archived" filter is kind of a red herring there.

accessible vs non accessible:

accessible repo fetch client.get_repos(
accessible agnostic repo search client.search_repositories(

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh sorry, I misread here... although I'm confused about the split that exists in the current code because it looks like it fetches all repos from github and then filters out archived repos when it's accessible.

It seems like if there's no query, we should always be using self.get_client().get_repos? I'm not totally sure why we'd use the search api when we're fetching everything

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, discussed on slack and I understand what's going on a bit better here now.

Yeah, I think it still makes sense to have use_cache be explicit. Then whenever is_accessible is true, we can optionally use the cache or not based on it, and just have assert not use_cache or not is_accessible to make sure that we don't confuse folks who use is_accessible.

Probably it'd be nice to get rid of is_accessible completely but idk if we're relying on the behaviour implicitly somewhere.


assert not use_cache, "use_cache is not supported with the Search API path"
full_query = build_repository_query(self.model.metadata, self.model.name, query)
response = self.get_client().search_repositories(full_query)
search_repos: list[RepositoryInfo] = [
{
"name": i["name"],
"identifier": i["full_name"],
"external_id": self.get_repo_external_id(i),
"default_branch": i.get("default_branch"),
}
for i in response.get("items", [])
]
return search_repos
response = client.search_repositories(full_query)
return to_repo_info(response.get("items", []))

def get_unmigratable_repositories(self) -> list[RpcRepository]:
accessible_repos = self.get_repositories()
Expand Down
1 change: 1 addition & 0 deletions src/sentry/integrations/github_enterprise/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ def get_repositories(
query: str | None = None,
page_number_limit: int | None = None,
accessible_only: bool = False,
use_cache: bool = False,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use_cache parameter is accepted but ignored in GitHub Enterprise integration

The use_cache parameter was added to the method signature for interface compliance but is never used in the implementation. When accessibleOnly=true is passed to the repos endpoint, the endpoint calls get_repositories(..., use_cache=True) (line 76 of organization_integration_repos.py). The GitHub integration correctly calls client.get_repos_cached() when use_cache=True, but the GitHub Enterprise integration silently ignores this flag and always calls get_client().get_repos() without caching. GitHub Enterprise users will not receive the caching benefit this PR is intended to provide.

Verification

Read github_enterprise/integration.py lines 224-254 to confirm use_cache parameter is never referenced in method body. Read github/integration.py lines 355-376 to see correct implementation using use_cache. Read organization_integration_repos.py line 76 to confirm use_cache=accessible_only is passed to all integrations.

Identified by Warden sentry-backend-bugs · CHR-VP4

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix attempt detected (commit 3bf81a4)

The use_cache parameter was added to the GitHub Enterprise get_repositories signature for interface compliance, but the method implementation still ignores it and unconditionally calls get_client().get_repos() without any conditional caching logic, identical to the before state in the critical execution paths.

The original issue appears unresolved. Please review and try again.

Evaluated by Warden

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this change is siloed to github only, not GHE

) -> list[RepositoryInfo]:
if not query:
all_repos = self.get_client().get_repos(page_number_limit=page_number_limit)
Expand Down
1 change: 1 addition & 0 deletions src/sentry/integrations/gitlab/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ def get_repositories(
query: str | None = None,
page_number_limit: int | None = None,
accessible_only: bool = False,
use_cache: bool = False,
) -> list[RepositoryInfo]:
try:
# Note: gitlab projects are the same things as repos everywhere else
Expand Down
1 change: 1 addition & 0 deletions src/sentry/integrations/perforce/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ def get_repositories(
query: str | None = None,
page_number_limit: int | None = None,
accessible_only: bool = False,
use_cache: bool = False,
) -> list[RepositoryInfo]:
"""
Get list of depots/streams from Perforce server.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def get_repositories(
query: str | None = None,
page_number_limit: int | None = None,
accessible_only: bool = False,
use_cache: bool = False,
) -> list[RepositoryInfo]:
"""
Get a list of available repositories for an installation
Expand Down
1 change: 1 addition & 0 deletions src/sentry/integrations/vsts/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ def get_repositories(
query: str | None = None,
page_number_limit: int | None = None,
accessible_only: bool = False,
use_cache: bool = False,
) -> list[RepositoryInfo]:
try:
repos = self.get_client().get_repos()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def test_accessible_only_passes_param(self, get_repositories: MagicMock) -> None
)

assert response.status_code == 200, response.content
get_repositories.assert_called_once_with("rad", accessible_only=True)
get_repositories.assert_called_once_with("rad", accessible_only=True, use_cache=True)
assert response.data == {
"repos": [
{
Expand All @@ -224,7 +224,7 @@ def test_accessible_only_without_search(self, get_repositories: MagicMock) -> No
response = self.client.get(self.path, format="json", data={"accessibleOnly": "true"})

assert response.status_code == 200, response.content
get_repositories.assert_called_once_with(None, accessible_only=True)
get_repositories.assert_called_once_with(None, accessible_only=True, use_cache=False)

@patch(
"sentry.integrations.github.integration.GitHubIntegration.get_repositories", return_value=[]
Expand All @@ -249,7 +249,7 @@ def test_accessible_only_with_installable_only(self, get_repositories: MagicMock
)

assert response.status_code == 200, response.content
get_repositories.assert_called_once_with("Example", accessible_only=True)
get_repositories.assert_called_once_with("Example", accessible_only=True, use_cache=True)
assert response.data == {
"repos": [
{
Expand Down
31 changes: 29 additions & 2 deletions tests/sentry/integrations/github/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,7 @@ def test_get_repositories_search_param(self) -> None:

@responses.activate
def test_get_repositories_accessible_only(self) -> None:
"""When accessible_only=True, fetches installation repos and filters locally."""
"""accessible_only+query filters cached repo list locally."""
with self.tasks():
self.assert_setup_flow()

Expand All @@ -700,7 +700,7 @@ def test_get_repositories_accessible_only(self) -> None:

@responses.activate
def test_get_repositories_accessible_only_no_match(self) -> None:
"""When accessible_only=True and nothing matches, returns empty list."""
"""accessible_only+query with no matching repos returns empty list."""
with self.tasks():
self.assert_setup_flow()

Expand All @@ -712,6 +712,33 @@ def test_get_repositories_accessible_only_no_match(self) -> None:
result = installation.get_repositories("nonexistent", accessible_only=True)
assert result == []

@responses.activate
def test_get_repositories_accessible_only_caches_repos(self) -> None:
"""Second accessible_only call uses cached repos instead of re-fetching from GitHub."""
with self.tasks():
self.assert_setup_flow()

integration = Integration.objects.get(provider=self.provider.key)
installation = get_installation_of_type(
GitHubIntegration, integration, self.organization.id
)

# First call: cache miss, fetches /installation/repositories
result1 = installation.get_repositories("foo", accessible_only=True, use_cache=True)
install_repo_calls = [
c for c in responses.calls if "/installation/repositories" in c.request.url
]
first_fetch_count = len(install_repo_calls)
assert first_fetch_count > 0

# Second call: cache hit, no new /installation/repositories calls
result2 = installation.get_repositories("foo", accessible_only=True, use_cache=True)
install_repo_calls = [
c for c in responses.calls if "/installation/repositories" in c.request.url
]
assert len(install_repo_calls) == first_fetch_count
assert result1 == result2

@responses.activate
def test_get_repositories_all_and_pagination(self) -> None:
"""Fetch all repositories and test the pagination logic."""
Expand Down
Loading