Skip to content

Commit dbd31cb

Browse files
authored
feat(releases): Cache calls to compare-commits (#112494)
A customer has a large number of GitHub API rate limit emails coming in and the highest referrer is compare-commits. This branch aims to temporarily cache and protect from many incoming calls to compare-commits and see if it solves the problem.
1 parent d0c0daf commit dbd31cb

File tree

5 files changed

+325
-57
lines changed

5 files changed

+325
-57
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,7 @@ module = [
682682
"sentry.tasks.beacon",
683683
"sentry.tasks.codeowners.*",
684684
"sentry.tasks.commit_context",
685+
"sentry.tasks.commits",
685686
"sentry.tasks.on_demand_metrics",
686687
"sentry.tasks.post_process",
687688
"sentry.tasks.reprocessing2",

src/sentry/features/temporary.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ def register_temporary_features(manager: FeatureManager) -> None:
138138
manager.add("organizations:integrations-claude-code", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
139139
manager.add("organizations:integrations-cursor", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
140140
manager.add("organizations:integrations-github-copilot-agent", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
141+
manager.add("organizations:integrations-github-fetch-commits-compare-cache", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
141142
manager.add("organizations:integrations-github-platform-detection", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
142143
manager.add("organizations:github-repo-auto-sync", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
143144
manager.add("organizations:github-repo-auto-sync-apply", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)

src/sentry/tasks/commits.py

Lines changed: 151 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
from __future__ import annotations
22

33
import logging
4+
from collections.abc import Mapping, Sequence
5+
from typing import Any
46

57
import sentry_sdk
68
from django.urls import reverse
79
from sentry_sdk import set_tag
810
from taskbroker_client.retry import Retry
911

12+
from sentry import features
1013
from sentry.constants import ObjectStatus
1114
from sentry.exceptions import InvalidIdentity, PluginError
1215
from sentry.integrations.source_code_management.metrics import (
@@ -28,13 +31,23 @@
2831
from sentry.users.models.user import User
2932
from sentry.users.services.user import RpcUser
3033
from sentry.users.services.user.service import user_service
34+
from sentry.utils.cache import cache
3135
from sentry.utils.email import MessageBuilder
36+
from sentry.utils.hashlib import hash_values
3237
from sentry.utils.http import absolute_uri
3338

3439
logger = logging.getLogger(__name__)
3540

41+
GITHUB_FETCH_COMMITS_COMPARE_CACHE_FEATURE = (
42+
"organizations:integrations-github-fetch-commits-compare-cache"
43+
)
44+
GITHUB_FETCH_COMMITS_COMPARE_CACHE_TTL_SECONDS = 120
45+
GITHUB_CACHEABLE_REPOSITORY_PROVIDERS = frozenset(
46+
("integrations:github", "integrations:github_enterprise")
47+
)
48+
3649

37-
def generate_invalid_identity_email(identity, commit_failure=False):
50+
def generate_invalid_identity_email(identity: Any, commit_failure: bool = False) -> MessageBuilder:
3851
new_context = {
3952
"identity": identity,
4053
"auth_url": absolute_uri(reverse("socialauth_associate", args=[identity.provider])),
@@ -49,7 +62,9 @@ def generate_invalid_identity_email(identity, commit_failure=False):
4962
)
5063

5164

52-
def generate_fetch_commits_error_email(release, repo, error_message):
65+
def generate_fetch_commits_error_email(
66+
release: Release, repo: Repository, error_message: str
67+
) -> MessageBuilder:
5368
new_context = {"release": release, "error_message": error_message, "repo": repo}
5469

5570
return MessageBuilder(
@@ -63,7 +78,7 @@ def generate_fetch_commits_error_email(release, repo, error_message):
6378
# we're future proofing this function a bit so it could be used with other code
6479

6580

66-
def handle_invalid_identity(identity, commit_failure=False):
81+
def handle_invalid_identity(identity: Any, commit_failure: bool = False) -> None:
6782
# email the user
6883
msg = generate_invalid_identity_email(identity, commit_failure)
6984
msg.send_async(to=[identity.user.email])
@@ -72,6 +87,103 @@ def handle_invalid_identity(identity, commit_failure=False):
7287
identity.delete()
7388

7489

90+
def get_github_compare_commits_cache_key(
91+
organization_id: int,
92+
repository_id: int,
93+
provider: str | None,
94+
start_sha: str | None,
95+
end_sha: str,
96+
) -> str:
97+
digest = hash_values(
98+
[organization_id, repository_id, provider or "", start_sha or "", end_sha],
99+
seed="fetch-commits:compare-commits",
100+
)
101+
return f"fetch-commits:compare-commits:v1:{digest}"
102+
103+
104+
def fetch_compare_commits(
105+
*,
106+
cache_enabled: bool,
107+
repo: Repository,
108+
provider: Any,
109+
is_integration_repo_provider: bool,
110+
start_sha: str | None,
111+
end_sha: str,
112+
user: RpcUser | None,
113+
lifecycle: Any,
114+
) -> list[dict[str, Any]]:
115+
if cache_enabled:
116+
cache_key = get_github_compare_commits_cache_key(
117+
repo.organization_id, repo.id, repo.provider, start_sha, end_sha
118+
)
119+
cached_repo_commits = cache.get(cache_key)
120+
lifecycle.add_extra("compare_commits_cache_enabled", True)
121+
if cached_repo_commits is not None:
122+
lifecycle.add_extra("compare_commits_cache_hit", True)
123+
return cached_repo_commits
124+
125+
lifecycle.add_extra("compare_commits_cache_hit", False)
126+
else:
127+
lifecycle.add_extra("compare_commits_cache_enabled", False)
128+
129+
if is_integration_repo_provider:
130+
repo_commits = provider.compare_commits(repo, start_sha, end_sha)
131+
else:
132+
# XXX: This only works for plugins that support actor context
133+
repo_commits = provider.compare_commits(repo, start_sha, end_sha, actor=user)
134+
135+
if cache_enabled:
136+
cache.set(
137+
cache_key,
138+
repo_commits,
139+
GITHUB_FETCH_COMMITS_COMPARE_CACHE_TTL_SECONDS,
140+
)
141+
return repo_commits
142+
143+
144+
def get_repo_and_provider_for_ref(
145+
*,
146+
release: Release,
147+
ref: Mapping[str, str],
148+
user_id: int,
149+
) -> tuple[Repository, Any, bool, str] | None:
150+
repo = (
151+
Repository.objects.filter(
152+
organization_id=release.organization_id,
153+
name=ref["repository"],
154+
status=ObjectStatus.ACTIVE,
155+
)
156+
.order_by("-pk")
157+
.first()
158+
)
159+
if not repo:
160+
logger.info(
161+
"repository.missing",
162+
extra={
163+
"organization_id": release.organization_id,
164+
"user_id": user_id,
165+
"repository": ref["repository"],
166+
},
167+
)
168+
return None
169+
170+
is_integration_repo_provider = is_integration_provider(repo.provider)
171+
binding_key = (
172+
"integration-repository.provider" if is_integration_repo_provider else "repository.provider"
173+
)
174+
try:
175+
provider_cls = bindings.get(binding_key).get(repo.provider)
176+
except KeyError:
177+
return None
178+
179+
provider = provider_cls(id=repo.provider)
180+
provider_key = (
181+
provider_cls.repo_provider if is_integration_repo_provider else provider_cls.auth_provider
182+
)
183+
184+
return repo, provider, is_integration_repo_provider, provider_key
185+
186+
75187
@instrumented_task(
76188
name="sentry.tasks.commits.fetch_commits",
77189
namespace=issues_tasks,
@@ -80,9 +192,14 @@ def handle_invalid_identity(identity, commit_failure=False):
80192
silo_mode=SiloMode.CELL,
81193
)
82194
@retry(exclude=(Release.DoesNotExist, User.DoesNotExist))
83-
def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **kwargs):
84-
# TODO(dcramer): this function could use some cleanup/refactoring as it's a bit unwieldy
85-
commit_list = []
195+
def fetch_commits(
196+
release_id: int,
197+
user_id: int,
198+
refs: Sequence[Mapping[str, str]],
199+
prev_release_id: int | None = None,
200+
**kwargs: Any,
201+
) -> None:
202+
commit_list: list[dict[str, Any]] = []
86203

87204
release = Release.objects.get(id=release_id)
88205
set_tag("organization.slug", release.organization.slug)
@@ -97,37 +214,16 @@ def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **k
97214
except Release.DoesNotExist:
98215
pass
99216

100-
for ref in refs:
101-
repo = (
102-
Repository.objects.filter(
103-
organization_id=release.organization_id,
104-
name=ref["repository"],
105-
status=ObjectStatus.ACTIVE,
106-
)
107-
.order_by("-pk")
108-
.first()
109-
)
110-
if not repo:
111-
logger.info(
112-
"repository.missing",
113-
extra={
114-
"organization_id": release.organization_id,
115-
"user_id": user_id,
116-
"repository": ref["repository"],
117-
},
118-
)
119-
continue
217+
organization = release.organization
218+
github_compare_commits_cache_feature_enabled = features.has(
219+
GITHUB_FETCH_COMMITS_COMPARE_CACHE_FEATURE, organization, actor=user
220+
)
120221

121-
is_integration_repo_provider = is_integration_provider(repo.provider)
122-
binding_key = (
123-
"integration-repository.provider"
124-
if is_integration_repo_provider
125-
else "repository.provider"
126-
)
127-
try:
128-
provider_cls = bindings.get(binding_key).get(repo.provider)
129-
except KeyError:
222+
for ref in refs:
223+
resolved = get_repo_and_provider_for_ref(release=release, ref=ref, user_id=user_id)
224+
if resolved is None:
130225
continue
226+
repo, provider, is_integration_repo_provider, provider_key = resolved
131227

132228
# if previous commit isn't provided, try to get from
133229
# previous release otherwise, try to get
@@ -146,13 +242,6 @@ def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **k
146242
pass
147243

148244
end_sha = ref["commit"]
149-
provider = provider_cls(id=repo.provider)
150-
151-
provider_key = (
152-
provider_cls.repo_provider
153-
if is_integration_repo_provider
154-
else provider_cls.auth_provider
155-
)
156245

157246
with SCMIntegrationInteractionEvent(
158247
SCMIntegrationInteractionType.COMPARE_COMMITS,
@@ -171,10 +260,23 @@ def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **k
171260
}
172261
)
173262
try:
174-
if is_integration_repo_provider:
175-
repo_commits = provider.compare_commits(repo, start_sha, end_sha)
176-
else:
177-
repo_commits = provider.compare_commits(repo, start_sha, end_sha, actor=user)
263+
provider_name = repo.provider
264+
compare_commits_cache_enabled = (
265+
github_compare_commits_cache_feature_enabled
266+
and isinstance(provider_name, str)
267+
and provider_name in GITHUB_CACHEABLE_REPOSITORY_PROVIDERS
268+
and start_sha is not None
269+
)
270+
repo_commits = fetch_compare_commits(
271+
cache_enabled=compare_commits_cache_enabled,
272+
repo=repo,
273+
provider=provider,
274+
is_integration_repo_provider=is_integration_repo_provider,
275+
start_sha=start_sha,
276+
end_sha=end_sha,
277+
user=user,
278+
lifecycle=lifecycle,
279+
)
178280
except NotImplementedError:
179281
pass
180282
except IntegrationResourceNotFoundError:
@@ -277,11 +379,11 @@ def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **k
277379
Deploy.notify_if_ready(deploy_id, fetch_complete=True)
278380

279381

280-
def is_integration_provider(provider):
281-
return provider and provider.startswith("integrations:")
382+
def is_integration_provider(provider: str | None) -> bool:
383+
return bool(provider and provider.startswith("integrations:"))
282384

283385

284-
def get_emails_for_user_or_org(user: RpcUser | None, orgId: int):
386+
def get_emails_for_user_or_org(user: RpcUser | None, orgId: int) -> list[str]:
285387
emails: list[str] = []
286388
if not user:
287389
return []

tests/sentry/integrations/github/test_repository.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,33 @@ def test_patchset_caching(self, get_jwt: mock.MagicMock) -> None:
206206
# Now that patchset was cached, github shouldn't have been called again
207207
assert len(responses.calls) == 1
208208

209+
@mock.patch("sentry.integrations.github.client.get_jwt", return_value="jwt_token_1")
210+
@responses.activate
211+
def test_compare_commits_reuses_cached_patchset_across_calls(
212+
self, get_jwt: mock.MagicMock
213+
) -> None:
214+
responses.add(
215+
responses.GET,
216+
"https://api.github.com/repos/getsentry/example-repo/compare/xyz123...abcdef",
217+
json=orjson.loads(COMPARE_COMMITS_EXAMPLE),
218+
)
219+
responses.add(
220+
responses.GET,
221+
"https://api.github.com/repos/getsentry/example-repo/compare/xyz123...abcdef",
222+
json=orjson.loads(COMPARE_COMMITS_EXAMPLE),
223+
)
224+
responses.add(
225+
responses.GET,
226+
"https://api.github.com/repos/getsentry/example-repo/commits/6dcb09b5b57875f334f61aebed695e2e4193db5e",
227+
json=orjson.loads(GET_COMMIT_EXAMPLE),
228+
)
229+
230+
first = self.provider.compare_commits(self.repository, "xyz123", "abcdef")
231+
second = self.provider.compare_commits(self.repository, "xyz123", "abcdef")
232+
233+
assert first == second
234+
assert len(responses.calls) == 3
235+
209236
@responses.activate
210237
def test_compare_commits_failure(self) -> None:
211238
responses.add(

0 commit comments

Comments
 (0)