Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/sentry/integrations/source_code_management/repo_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class RepoTree(NamedTuple):
# When the number of remaining API requests is less than this value, it will
# fall back to the cache.
MINIMUM_REQUESTS_REMAINING = 200
NOT_FOUND_CACHE_SECONDS = 3600 * 24


class RepoTreesIntegration(ABC):
Expand Down Expand Up @@ -211,6 +212,16 @@ def get_cached_repo_files(
)
cache.set(key, [], self.CACHE_SECONDS + shifted_seconds)
tree = None
except ApiError as error:
if _is_not_found_error(error):
logger.info(
"Caching empty files result for missing repo or ref",
extra={"repo": repo_full_name},
)
cache.set(key, [], NOT_FOUND_CACHE_SECONDS)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

404 cache missing staggered expiration unlike other cases

Low Severity

The 404 cache uses a fixed NOT_FOUND_CACHE_SECONDS duration without adding shifted_seconds, while the ApiConflictError (409) and success paths both use self.CACHE_SECONDS + shifted_seconds to stagger cache expirations across repos. When _populate_trees processes many missing repos, all their negative-cache entries expire at the exact same time, causing a burst of repeated 404 API calls on the next run instead of spreading them out.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit c29b65f. Configure here.

tree = None
else:
raise
if tree:
# Keep files; discard directories
repo_files = [node["path"] for node in tree if node["type"] == "blob"]
Expand Down Expand Up @@ -296,3 +307,11 @@ def should_include(file_path: str) -> bool:
if any(file_path.startswith(path) for path in EXCLUDED_PATHS):
return False
return True


def _is_not_found_error(error: ApiError) -> bool:
if error.code == 404:
return True

error_message = error.json.get("message") if error.json else error.text
return error_message in ("Not Found", "Not Found.")
32 changes: 32 additions & 0 deletions tests/sentry/integrations/github/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,38 @@ def test_get_cached_repo_files_with_all_files(self) -> None:
files = self.install.get_cached_repo_files(self.repo.name, "master", 0)
assert files == ["src/foo.py"]

@responses.activate
def test_get_cached_repo_files_caches_not_found(self) -> None:
responses.add(
method=responses.GET,
url=f"https://api.github.com/repos/{self.repo.name}/git/trees/master?recursive=1",
status=404,
json={"message": "Not Found"},
)
repo_key = f"github:repo:{self.repo.name}:source-code"
assert cache.get(repo_key) is None

files = self.install.get_cached_repo_files(self.repo.name, "master", 0)
assert files == []
assert cache.get(repo_key) == []

# Negative-cache hit should avoid an additional API request.
files = self.install.get_cached_repo_files(self.repo.name, "master", 0)
assert files == []
assert len(responses.calls) == 1

@responses.activate
def test_get_cached_repo_files_raises_non_not_found_api_error(self) -> None:
responses.add(
method=responses.GET,
url=f"https://api.github.com/repos/{self.repo.name}/git/trees/master?recursive=1",
status=500,
json={"message": "Server Error"},
)

with pytest.raises(ApiError):
self.install.get_cached_repo_files(self.repo.name, "master", 0)

@mock.patch("sentry.integrations.github.client.get_jwt", return_value="jwt_token_1")
@responses.activate
def test_update_comment(self, get_jwt) -> None:
Expand Down
Loading