From 6b1a01df95e06c9f612db86e31d060f54133fec2 Mon Sep 17 00:00:00 2001 From: "Armen Zambrano G." <44410+armenzg@users.noreply.github.com> Date: Wed, 8 Apr 2026 13:53:23 -0400 Subject: [PATCH 1/6] feat(commits): Cache calls to compare-commits - Added a new org feature flag in `src/sentry/features/temporary.py`: - `organizations:integrations-github-fetch-commits-compare-cache` - Refactored commit fetching in `src/sentry/tasks/commits.py`: - Added `fetch_compare_commits(...)` helper to centralize compare-commits behavior. - Added cache key generation with `get_github_compare_commits_cache_key(...)`. - Added cache-backed reuse of GitHub compare-commits results (TTL: 120s), gated by the new feature flag. - Limited caching to GitHub providers (`integrations:github`, `integrations:github_enterprise`) and only when `start_sha` is present. - Added lifecycle extras for cache telemetry (`compare_commits_cache_enabled`, `compare_commits_cache_hit`). - `fetch_commits(...)` now evaluates the feature flag once and routes compare calls through the helper. - Added provider-level test coverage in `tests/sentry/integrations/github/test_repository.py`: - New test verifies repeated compare calls reuse cached patchset data and reduce API calls. - Added task-level cache behavior tests in `tests/sentry/tasks/test_commits.py`: - Cache disabled: compare called on each fetch. - Cache enabled: compare result reused across releases with the same compare range. - Cache key variance: different `end_sha` values do not share cache entries. - Included a follow-up typing fix in `src/sentry/tasks/commits.py`: - Narrowed `repo.provider` to `str` before cache-key creation to satisfy mypy (`str | None` -> `str`). --- src/sentry/features/temporary.py | 1 + src/sentry/tasks/commits.py | 87 +++++++++- .../integrations/github/test_repository.py | 27 +++ tests/sentry/tasks/test_commits.py | 156 ++++++++++++++++++ 4 files changed, 267 insertions(+), 4 deletions(-) diff --git a/src/sentry/features/temporary.py b/src/sentry/features/temporary.py index 1122038668af8e..c0b0c8a52856de 100644 --- a/src/sentry/features/temporary.py +++ b/src/sentry/features/temporary.py @@ -133,6 +133,7 @@ def register_temporary_features(manager: FeatureManager) -> None: manager.add("organizations:integrations-claude-code", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) manager.add("organizations:integrations-cursor", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) manager.add("organizations:integrations-github-copilot-agent", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) + manager.add("organizations:integrations-github-fetch-commits-compare-cache", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False) manager.add("organizations:integrations-github-platform-detection", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) manager.add("organizations:integrations-perforce", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) # Project Management Integrations Feature Parity Flags diff --git a/src/sentry/tasks/commits.py b/src/sentry/tasks/commits.py index a1056a65f2420b..d119842e47d38f 100644 --- a/src/sentry/tasks/commits.py +++ b/src/sentry/tasks/commits.py @@ -7,6 +7,7 @@ from sentry_sdk import set_tag from taskbroker_client.retry import Retry +from sentry import features from sentry.constants import ObjectStatus from sentry.exceptions import InvalidIdentity, PluginError from sentry.integrations.source_code_management.metrics import ( @@ -28,11 +29,21 @@ from sentry.users.models.user import User from sentry.users.services.user import RpcUser from sentry.users.services.user.service import user_service +from sentry.utils.cache import cache from sentry.utils.email import MessageBuilder +from sentry.utils.hashlib import md5_text from sentry.utils.http import absolute_uri logger = logging.getLogger(__name__) +GITHUB_FETCH_COMMITS_COMPARE_CACHE_FEATURE = ( + "organizations:integrations-github-fetch-commits-compare-cache" +) +GITHUB_FETCH_COMMITS_COMPARE_CACHE_TTL_SECONDS = 120 +GITHUB_CACHEABLE_REPOSITORY_PROVIDERS = frozenset( + ("integrations:github", "integrations:github_enterprise") +) + def generate_invalid_identity_email(identity, commit_failure=False): new_context = { @@ -63,6 +74,63 @@ def generate_fetch_commits_error_email(release, repo, error_message): # we're future proofing this function a bit so it could be used with other code +def get_github_compare_commits_cache_key( + organization_id: int, repository_id: int, provider: str, start_sha: str | None, end_sha: str +) -> str: + digest = md5_text( + organization_id, repository_id, provider, start_sha or "", end_sha + ).hexdigest() + return f"fetch-commits:compare-commits:v1:{digest}" + + +def fetch_compare_commits( + *, + cache_enabled: bool, + repo: Repository, + provider, + is_integration_repo_provider: bool, + start_sha: str | None, + end_sha: str, + user: RpcUser | None, + lifecycle, +): + cache_key = None + provider = repo.provider + if ( + cache_enabled + and isinstance(provider, str) + and provider in GITHUB_CACHEABLE_REPOSITORY_PROVIDERS + and start_sha is not None + ): + cache_key = get_github_compare_commits_cache_key( + repo.organization_id, repo.id, provider, start_sha, end_sha + ) + + if cache_key is not None: + cached_repo_commits = cache.get(cache_key) + lifecycle.add_extra("compare_commits_cache_enabled", True) + if cached_repo_commits is not None: + lifecycle.add_extra("compare_commits_cache_hit", True) + return cached_repo_commits + + lifecycle.add_extra("compare_commits_cache_hit", False) + else: + lifecycle.add_extra("compare_commits_cache_enabled", False) + + if is_integration_repo_provider: + repo_commits = provider.compare_commits(repo, start_sha, end_sha) + else: + repo_commits = provider.compare_commits(repo, start_sha, end_sha, actor=user) + + if cache_key is not None: + cache.set( + cache_key, + repo_commits, + GITHUB_FETCH_COMMITS_COMPARE_CACHE_TTL_SECONDS, + ) + return repo_commits + + def handle_invalid_identity(identity, commit_failure=False): # email the user msg = generate_invalid_identity_email(identity, commit_failure) @@ -97,6 +165,11 @@ def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **k except Release.DoesNotExist: pass + organization = release.organization + github_compare_commits_cache_enabled = features.has( + GITHUB_FETCH_COMMITS_COMPARE_CACHE_FEATURE, organization, actor=user + ) + for ref in refs: repo = ( Repository.objects.filter( @@ -171,10 +244,16 @@ def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **k } ) try: - if is_integration_repo_provider: - repo_commits = provider.compare_commits(repo, start_sha, end_sha) - else: - repo_commits = provider.compare_commits(repo, start_sha, end_sha, actor=user) + repo_commits = fetch_compare_commits( + cache_enabled=github_compare_commits_cache_enabled, + repo=repo, + provider=provider, + is_integration_repo_provider=is_integration_repo_provider, + start_sha=start_sha, + end_sha=end_sha, + user=user, + lifecycle=lifecycle, + ) except NotImplementedError: pass except IntegrationResourceNotFoundError: diff --git a/tests/sentry/integrations/github/test_repository.py b/tests/sentry/integrations/github/test_repository.py index f974e752ba636e..76afa972ab9403 100644 --- a/tests/sentry/integrations/github/test_repository.py +++ b/tests/sentry/integrations/github/test_repository.py @@ -206,6 +206,33 @@ def test_patchset_caching(self, get_jwt: mock.MagicMock) -> None: # Now that patchset was cached, github shouldn't have been called again assert len(responses.calls) == 1 + @mock.patch("sentry.integrations.github.client.get_jwt", return_value="jwt_token_1") + @responses.activate + def test_compare_commits_reuses_cached_patchset_across_calls( + self, get_jwt: mock.MagicMock + ) -> None: + responses.add( + responses.GET, + "https://api.github.com/repos/getsentry/example-repo/compare/xyz123...abcdef", + json=orjson.loads(COMPARE_COMMITS_EXAMPLE), + ) + responses.add( + responses.GET, + "https://api.github.com/repos/getsentry/example-repo/compare/xyz123...abcdef", + json=orjson.loads(COMPARE_COMMITS_EXAMPLE), + ) + responses.add( + responses.GET, + "https://api.github.com/repos/getsentry/example-repo/commits/6dcb09b5b57875f334f61aebed695e2e4193db5e", + json=orjson.loads(GET_COMMIT_EXAMPLE), + ) + + first = self.provider.compare_commits(self.repository, "xyz123", "abcdef") + second = self.provider.compare_commits(self.repository, "xyz123", "abcdef") + + assert first == second + assert len(responses.calls) == 3 + @responses.activate def test_compare_commits_failure(self) -> None: responses.add( diff --git a/tests/sentry/tasks/test_commits.py b/tests/sentry/tasks/test_commits.py index 96736c5fab418a..c8e26b0000a4c2 100644 --- a/tests/sentry/tasks/test_commits.py +++ b/tests/sentry/tasks/test_commits.py @@ -17,11 +17,19 @@ from sentry.testutils.asserts import assert_slo_metric from sentry.testutils.cases import TestCase from sentry.testutils.silo import assume_test_silo_mode, control_silo_test +from sentry.utils.cache import cache from social_auth.models import UserSocialAuth @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") class FetchCommitsTest(TestCase): + def _github_compare_commits_result(self, repo_name: str, end_sha: str) -> list[dict[str, str]]: + return [ + {"id": "62de626b7c7cfb8e77efb4273b1a3df4123e6216", "repository": repo_name}, + {"id": "58de626b7c7cfb8e77efb4273b1a3df4123e6345", "repository": repo_name}, + {"id": end_sha, "repository": repo_name}, + ] + def _test_simple_action(self, user, org): repo = Repository.objects.create(name="example", provider="dummy", organization_id=org.id) release = Release.objects.create(organization_id=org.id, version="abcabcabc") @@ -86,6 +94,154 @@ def test_duplicate_repositories(self, mock_record: MagicMock) -> None: Repository.objects.create(name="example", provider="dummy", organization_id=org.id) self._test_simple_action(user=self.user, org=org) + @patch("sentry.integrations.github.repository.GitHubRepositoryProvider.compare_commits") + def test_github_compare_commits_cache_flag_disabled( + self, mock_compare_commits: MagicMock, mock_record: MagicMock + ) -> None: + self.login_as(user=self.user) + cache.clear() + + org = self.create_organization(owner=self.user, name="baz") + repo = Repository.objects.create( + name="example", + provider="integrations:github", + organization_id=org.id, + ) + previous_release = Release.objects.create(organization_id=org.id, version="old-release") + previous_commit = Commit.objects.create( + organization_id=org.id, repository_id=repo.id, key="a" * 40 + ) + ReleaseHeadCommit.objects.create( + organization_id=org.id, + repository_id=repo.id, + release=previous_release, + commit=previous_commit, + ) + + refs = [{"repository": repo.name, "commit": "b" * 40}] + mock_compare_commits.return_value = self._github_compare_commits_result(repo.name, "b" * 40) + + first_release = Release.objects.create(organization_id=org.id, version="new-release-1") + second_release = Release.objects.create(organization_id=org.id, version="new-release-2") + + with self.tasks(): + fetch_commits( + release_id=first_release.id, + user_id=self.user.id, + refs=refs, + previous_release_id=previous_release.id, + ) + fetch_commits( + release_id=second_release.id, + user_id=self.user.id, + refs=refs, + previous_release_id=previous_release.id, + ) + + assert mock_compare_commits.call_count == 2 + + @patch("sentry.integrations.github.repository.GitHubRepositoryProvider.compare_commits") + def test_github_compare_commits_cache_flag_enabled( + self, mock_compare_commits: MagicMock, mock_record: MagicMock + ) -> None: + self.login_as(user=self.user) + cache.clear() + + org = self.create_organization(owner=self.user, name="baz") + repo = Repository.objects.create( + name="example", + provider="integrations:github", + organization_id=org.id, + ) + previous_release = Release.objects.create(organization_id=org.id, version="old-release") + previous_commit = Commit.objects.create( + organization_id=org.id, repository_id=repo.id, key="a" * 40 + ) + ReleaseHeadCommit.objects.create( + organization_id=org.id, + repository_id=repo.id, + release=previous_release, + commit=previous_commit, + ) + + refs = [{"repository": repo.name, "commit": "b" * 40}] + mock_compare_commits.return_value = self._github_compare_commits_result(repo.name, "b" * 40) + + first_release = Release.objects.create(organization_id=org.id, version="new-release-1") + second_release = Release.objects.create(organization_id=org.id, version="new-release-2") + + with self.feature( + {"organizations:integrations-github-fetch-commits-compare-cache": [org.slug]} + ): + with self.tasks(): + fetch_commits( + release_id=first_release.id, + user_id=self.user.id, + refs=refs, + previous_release_id=previous_release.id, + ) + fetch_commits( + release_id=second_release.id, + user_id=self.user.id, + refs=refs, + previous_release_id=previous_release.id, + ) + + assert mock_compare_commits.call_count == 1 + + @patch("sentry.integrations.github.repository.GitHubRepositoryProvider.compare_commits") + def test_github_compare_commits_cache_key_variance_on_end_sha( + self, mock_compare_commits: MagicMock, mock_record: MagicMock + ) -> None: + self.login_as(user=self.user) + cache.clear() + + org = self.create_organization(owner=self.user, name="baz") + repo = Repository.objects.create( + name="example", + provider="integrations:github", + organization_id=org.id, + ) + previous_release = Release.objects.create(organization_id=org.id, version="old-release") + previous_commit = Commit.objects.create( + organization_id=org.id, repository_id=repo.id, key="a" * 40 + ) + ReleaseHeadCommit.objects.create( + organization_id=org.id, + repository_id=repo.id, + release=previous_release, + commit=previous_commit, + ) + + refs_first = [{"repository": repo.name, "commit": "b" * 40}] + refs_second = [{"repository": repo.name, "commit": "c" * 40}] + mock_compare_commits.side_effect = [ + self._github_compare_commits_result(repo.name, "b" * 40), + self._github_compare_commits_result(repo.name, "c" * 40), + ] + + first_release = Release.objects.create(organization_id=org.id, version="new-release-1") + second_release = Release.objects.create(organization_id=org.id, version="new-release-2") + + with self.feature( + {"organizations:integrations-github-fetch-commits-compare-cache": [org.slug]} + ): + with self.tasks(): + fetch_commits( + release_id=first_release.id, + user_id=self.user.id, + refs=refs_first, + previous_release_id=previous_release.id, + ) + fetch_commits( + release_id=second_release.id, + user_id=self.user.id, + refs=refs_second, + previous_release_id=previous_release.id, + ) + + assert mock_compare_commits.call_count == 2 + def test_release_locked(self, mock_record_event: MagicMock) -> None: self.login_as(user=self.user) org = self.create_organization(owner=self.user, name="baz") From c4b15e3fc9787415b3cc701ff46c62fe44d546dd Mon Sep 17 00:00:00 2001 From: "Armen Zambrano G." <44410+armenzg@users.noreply.github.com> Date: Wed, 8 Apr 2026 16:15:04 -0400 Subject: [PATCH 2/6] More changes --- pyproject.toml | 1 + src/sentry/tasks/commits.py | 152 +++++++++++++++++++++--------------- 2 files changed, 88 insertions(+), 65 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cf052bb24e5359..49aff6b2f4e498 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -677,6 +677,7 @@ module = [ "sentry.tasks.beacon", "sentry.tasks.codeowners.*", "sentry.tasks.commit_context", + "sentry.tasks.commits", "sentry.tasks.on_demand_metrics", "sentry.tasks.reprocessing2", "sentry.tasks.seer.delete_seer_grouping_records", diff --git a/src/sentry/tasks/commits.py b/src/sentry/tasks/commits.py index d119842e47d38f..f26ef2a4a210a8 100644 --- a/src/sentry/tasks/commits.py +++ b/src/sentry/tasks/commits.py @@ -1,6 +1,8 @@ from __future__ import annotations import logging +from collections.abc import Mapping, Sequence +from typing import Any import sentry_sdk from django.urls import reverse @@ -45,7 +47,7 @@ ) -def generate_invalid_identity_email(identity, commit_failure=False): +def generate_invalid_identity_email(identity: Any, commit_failure: bool = False) -> MessageBuilder: new_context = { "identity": identity, "auth_url": absolute_uri(reverse("socialauth_associate", args=[identity.provider])), @@ -60,7 +62,9 @@ def generate_invalid_identity_email(identity, commit_failure=False): ) -def generate_fetch_commits_error_email(release, repo, error_message): +def generate_fetch_commits_error_email( + release: Release, repo: Repository, error_message: str +) -> MessageBuilder: new_context = {"release": release, "error_message": error_message, "repo": repo} return MessageBuilder( @@ -75,10 +79,14 @@ def generate_fetch_commits_error_email(release, repo, error_message): def get_github_compare_commits_cache_key( - organization_id: int, repository_id: int, provider: str, start_sha: str | None, end_sha: str + organization_id: int, + repository_id: int, + provider: str | None, + start_sha: str | None, + end_sha: str, ) -> str: digest = md5_text( - organization_id, repository_id, provider, start_sha or "", end_sha + organization_id, repository_id, provider or "", start_sha or "", end_sha ).hexdigest() return f"fetch-commits:compare-commits:v1:{digest}" @@ -87,26 +95,17 @@ def fetch_compare_commits( *, cache_enabled: bool, repo: Repository, - provider, + provider: Any, is_integration_repo_provider: bool, start_sha: str | None, end_sha: str, user: RpcUser | None, - lifecycle, -): - cache_key = None - provider = repo.provider - if ( - cache_enabled - and isinstance(provider, str) - and provider in GITHUB_CACHEABLE_REPOSITORY_PROVIDERS - and start_sha is not None - ): + lifecycle: Any, +) -> list[dict[str, Any]]: + if cache_enabled: cache_key = get_github_compare_commits_cache_key( - repo.organization_id, repo.id, provider, start_sha, end_sha + repo.organization_id, repo.id, repo.provider, start_sha, end_sha ) - - if cache_key is not None: cached_repo_commits = cache.get(cache_key) lifecycle.add_extra("compare_commits_cache_enabled", True) if cached_repo_commits is not None: @@ -120,9 +119,10 @@ def fetch_compare_commits( if is_integration_repo_provider: repo_commits = provider.compare_commits(repo, start_sha, end_sha) else: + # XXX: This only works for plugins that support actor context repo_commits = provider.compare_commits(repo, start_sha, end_sha, actor=user) - if cache_key is not None: + if cache_enabled: cache.set( cache_key, repo_commits, @@ -131,7 +131,7 @@ def fetch_compare_commits( return repo_commits -def handle_invalid_identity(identity, commit_failure=False): +def handle_invalid_identity(identity: Any, commit_failure: bool = False) -> None: # email the user msg = generate_invalid_identity_email(identity, commit_failure) msg.send_async(to=[identity.user.email]) @@ -140,6 +140,49 @@ def handle_invalid_identity(identity, commit_failure=False): identity.delete() +def get_repo_and_provider_for_ref( + *, + release: Release, + ref: Mapping[str, str], + user_id: int, +) -> tuple[Repository, Any, bool, str] | None: + repo = ( + Repository.objects.filter( + organization_id=release.organization_id, + name=ref["repository"], + status=ObjectStatus.ACTIVE, + ) + .order_by("-pk") + .first() + ) + if not repo: + logger.info( + "repository.missing", + extra={ + "organization_id": release.organization_id, + "user_id": user_id, + "repository": ref["repository"], + }, + ) + return None + + is_integration_repo_provider = is_integration_provider(repo.provider) + binding_key = ( + "integration-repository.provider" if is_integration_repo_provider else "repository.provider" + ) + try: + provider_cls = bindings.get(binding_key).get(repo.provider) + except KeyError: + return None + + provider = provider_cls(id=repo.provider) + provider_key = ( + provider_cls.repo_provider if is_integration_repo_provider else provider_cls.auth_provider + ) + + return repo, provider, is_integration_repo_provider, provider_key + + @instrumented_task( name="sentry.tasks.commits.fetch_commits", namespace=issues_tasks, @@ -148,9 +191,15 @@ def handle_invalid_identity(identity, commit_failure=False): silo_mode=SiloMode.CELL, ) @retry(exclude=(Release.DoesNotExist, User.DoesNotExist)) -def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **kwargs): +def fetch_commits( + release_id: int, + user_id: int, + refs: Sequence[Mapping[str, str]], + prev_release_id: int | None = None, + **kwargs: Any, +) -> None: # TODO(dcramer): this function could use some cleanup/refactoring as it's a bit unwieldy - commit_list = [] + commit_list: list[dict[str, Any]] = [] release = Release.objects.get(id=release_id) set_tag("organization.slug", release.organization.slug) @@ -166,41 +215,12 @@ def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **k pass organization = release.organization - github_compare_commits_cache_enabled = features.has( - GITHUB_FETCH_COMMITS_COMPARE_CACHE_FEATURE, organization, actor=user - ) for ref in refs: - repo = ( - Repository.objects.filter( - organization_id=release.organization_id, - name=ref["repository"], - status=ObjectStatus.ACTIVE, - ) - .order_by("-pk") - .first() - ) - if not repo: - logger.info( - "repository.missing", - extra={ - "organization_id": release.organization_id, - "user_id": user_id, - "repository": ref["repository"], - }, - ) - continue - - is_integration_repo_provider = is_integration_provider(repo.provider) - binding_key = ( - "integration-repository.provider" - if is_integration_repo_provider - else "repository.provider" - ) - try: - provider_cls = bindings.get(binding_key).get(repo.provider) - except KeyError: + resolved = get_repo_and_provider_for_ref(release=release, ref=ref, user_id=user_id) + if resolved is None: continue + repo, provider, is_integration_repo_provider, provider_key = resolved # if previous commit isn't provided, try to get from # previous release otherwise, try to get @@ -219,13 +239,6 @@ def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **k pass end_sha = ref["commit"] - provider = provider_cls(id=repo.provider) - - provider_key = ( - provider_cls.repo_provider - if is_integration_repo_provider - else provider_cls.auth_provider - ) with SCMIntegrationInteractionEvent( SCMIntegrationInteractionType.COMPARE_COMMITS, @@ -244,8 +257,17 @@ def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **k } ) try: + provider_name = repo.provider + compare_commits_cache_enabled = ( + features.has( + GITHUB_FETCH_COMMITS_COMPARE_CACHE_FEATURE, organization, actor=user + ) + and isinstance(provider_name, str) + and provider_name in GITHUB_CACHEABLE_REPOSITORY_PROVIDERS + and start_sha is not None + ) repo_commits = fetch_compare_commits( - cache_enabled=github_compare_commits_cache_enabled, + cache_enabled=compare_commits_cache_enabled, repo=repo, provider=provider, is_integration_repo_provider=is_integration_repo_provider, @@ -356,11 +378,11 @@ def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **k Deploy.notify_if_ready(deploy_id, fetch_complete=True) -def is_integration_provider(provider): - return provider and provider.startswith("integrations:") +def is_integration_provider(provider: str | None) -> bool: + return bool(provider and provider.startswith("integrations:")) -def get_emails_for_user_or_org(user: RpcUser | None, orgId: int): +def get_emails_for_user_or_org(user: RpcUser | None, orgId: int) -> list[str]: emails: list[str] = [] if not user: return [] From 6bab863c27f9afcb38772ed094ebc99cbc7cd442 Mon Sep 17 00:00:00 2001 From: "Armen Zambrano G." <44410+armenzg@users.noreply.github.com> Date: Wed, 8 Apr 2026 16:16:26 -0400 Subject: [PATCH 3/6] More changes --- src/sentry/tasks/commits.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/sentry/tasks/commits.py b/src/sentry/tasks/commits.py index f26ef2a4a210a8..bb83dda5c6f2e1 100644 --- a/src/sentry/tasks/commits.py +++ b/src/sentry/tasks/commits.py @@ -215,6 +215,9 @@ def fetch_commits( pass organization = release.organization + github_compare_commits_cache_feature_enabled = features.has( + GITHUB_FETCH_COMMITS_COMPARE_CACHE_FEATURE, organization, actor=user + ) for ref in refs: resolved = get_repo_and_provider_for_ref(release=release, ref=ref, user_id=user_id) @@ -259,9 +262,7 @@ def fetch_commits( try: provider_name = repo.provider compare_commits_cache_enabled = ( - features.has( - GITHUB_FETCH_COMMITS_COMPARE_CACHE_FEATURE, organization, actor=user - ) + github_compare_commits_cache_feature_enabled and isinstance(provider_name, str) and provider_name in GITHUB_CACHEABLE_REPOSITORY_PROVIDERS and start_sha is not None From 2dda0cd70bf34cb5e06768c7fb1f11e003dc4f5b Mon Sep 17 00:00:00 2001 From: "Armen Zambrano G." <44410+armenzg@users.noreply.github.com> Date: Wed, 8 Apr 2026 16:33:40 -0400 Subject: [PATCH 4/6] Fix bug --- src/sentry/tasks/commits.py | 19 +++++++++---------- tests/sentry/tasks/test_commits.py | 26 +++++++++++++------------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/sentry/tasks/commits.py b/src/sentry/tasks/commits.py index bb83dda5c6f2e1..bfe8b50c828c2d 100644 --- a/src/sentry/tasks/commits.py +++ b/src/sentry/tasks/commits.py @@ -78,6 +78,15 @@ def generate_fetch_commits_error_email( # we're future proofing this function a bit so it could be used with other code +def handle_invalid_identity(identity: Any, commit_failure: bool = False) -> None: + # email the user + msg = generate_invalid_identity_email(identity, commit_failure) + msg.send_async(to=[identity.user.email]) + + # now remove the identity, as its invalid + identity.delete() + + def get_github_compare_commits_cache_key( organization_id: int, repository_id: int, @@ -131,15 +140,6 @@ def fetch_compare_commits( return repo_commits -def handle_invalid_identity(identity: Any, commit_failure: bool = False) -> None: - # email the user - msg = generate_invalid_identity_email(identity, commit_failure) - msg.send_async(to=[identity.user.email]) - - # now remove the identity, as its invalid - identity.delete() - - def get_repo_and_provider_for_ref( *, release: Release, @@ -198,7 +198,6 @@ def fetch_commits( prev_release_id: int | None = None, **kwargs: Any, ) -> None: - # TODO(dcramer): this function could use some cleanup/refactoring as it's a bit unwieldy commit_list: list[dict[str, Any]] = [] release = Release.objects.get(id=release_id) diff --git a/tests/sentry/tasks/test_commits.py b/tests/sentry/tasks/test_commits.py index c8e26b0000a4c2..d570a7c9e8d988 100644 --- a/tests/sentry/tasks/test_commits.py +++ b/tests/sentry/tasks/test_commits.py @@ -52,7 +52,7 @@ def _test_simple_action(self, user, org): release_id=release2.id, user_id=user.id, refs=refs, - previous_release_id=release.id, + prev_release_id=release.id, ) commit_list = list( @@ -129,13 +129,13 @@ def test_github_compare_commits_cache_flag_disabled( release_id=first_release.id, user_id=self.user.id, refs=refs, - previous_release_id=previous_release.id, + prev_release_id=previous_release.id, ) fetch_commits( release_id=second_release.id, user_id=self.user.id, refs=refs, - previous_release_id=previous_release.id, + prev_release_id=previous_release.id, ) assert mock_compare_commits.call_count == 2 @@ -178,13 +178,13 @@ def test_github_compare_commits_cache_flag_enabled( release_id=first_release.id, user_id=self.user.id, refs=refs, - previous_release_id=previous_release.id, + prev_release_id=previous_release.id, ) fetch_commits( release_id=second_release.id, user_id=self.user.id, refs=refs, - previous_release_id=previous_release.id, + prev_release_id=previous_release.id, ) assert mock_compare_commits.call_count == 1 @@ -231,13 +231,13 @@ def test_github_compare_commits_cache_key_variance_on_end_sha( release_id=first_release.id, user_id=self.user.id, refs=refs_first, - previous_release_id=previous_release.id, + prev_release_id=previous_release.id, ) fetch_commits( release_id=second_release.id, user_id=self.user.id, refs=refs_second, - previous_release_id=previous_release.id, + prev_release_id=previous_release.id, ) assert mock_compare_commits.call_count == 2 @@ -264,7 +264,7 @@ def test_release_locked(self, mock_record_event: MagicMock) -> None: release_id=new_release.id, user_id=self.user.id, refs=refs, - previous_release_id=old_release.id, + prev_release_id=old_release.id, ) count_query = ReleaseHeadCommit.objects.filter(release=new_release) # No release commits should be made as the task should return early. @@ -297,7 +297,7 @@ def test_fetch_error_invalid_identity( mock_compare_commits.side_effect = InvalidIdentity(identity=usa) fetch_commits( - release_id=release2.id, user_id=self.user.id, refs=refs, previous_release_id=release.id + release_id=release2.id, user_id=self.user.id, refs=refs, prev_release_id=release.id ) mock_handle_invalid_identity.assert_called_once_with(identity=usa, commit_failure=True) @@ -334,7 +334,7 @@ def test_fetch_error_plugin_error( release_id=release2.id, user_id=self.user.id, refs=refs, - previous_release_id=release.id, + prev_release_id=release.id, ) msg = mail.outbox[-1] @@ -375,7 +375,7 @@ def test_fetch_error_plugin_error_for_sentry_app( release_id=release2.id, user_id=sentry_app.proxy_user_id, refs=refs, - previous_release_id=release.id, + prev_release_id=release.id, ) msg = mail.outbox[-1] @@ -415,7 +415,7 @@ def test_fetch_error_random_exception( release_id=release2.id, user_id=self.user.id, refs=refs, - previous_release_id=release.id, + prev_release_id=release.id, ) msg = mail.outbox[-1] @@ -456,7 +456,7 @@ def test_fetch_error_random_exception_integration(self, mock_record: MagicMock) release_id=release2.id, user_id=self.user.id, refs=refs, - previous_release_id=release.id, + prev_release_id=release.id, ) msg = mail.outbox[-1] From 1df104b6a82ff3df7e3179636d1ad6fc1527ff9d Mon Sep 17 00:00:00 2001 From: "Armen Zambrano G." <44410+armenzg@users.noreply.github.com> Date: Wed, 8 Apr 2026 16:38:12 -0400 Subject: [PATCH 5/6] Address hash problem --- src/sentry/tasks/commits.py | 9 +++++---- tests/sentry/tasks/test_commits.py | 14 +++++++++++++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/sentry/tasks/commits.py b/src/sentry/tasks/commits.py index bfe8b50c828c2d..bb68885efa4d89 100644 --- a/src/sentry/tasks/commits.py +++ b/src/sentry/tasks/commits.py @@ -33,7 +33,7 @@ from sentry.users.services.user.service import user_service from sentry.utils.cache import cache from sentry.utils.email import MessageBuilder -from sentry.utils.hashlib import md5_text +from sentry.utils.hashlib import hash_values from sentry.utils.http import absolute_uri logger = logging.getLogger(__name__) @@ -94,9 +94,10 @@ def get_github_compare_commits_cache_key( start_sha: str | None, end_sha: str, ) -> str: - digest = md5_text( - organization_id, repository_id, provider or "", start_sha or "", end_sha - ).hexdigest() + digest = hash_values( + [organization_id, repository_id, provider or "", start_sha or "", end_sha], + seed="fetch-commits:compare-commits", + ) return f"fetch-commits:compare-commits:v1:{digest}" diff --git a/tests/sentry/tasks/test_commits.py b/tests/sentry/tasks/test_commits.py index d570a7c9e8d988..b5deeedb92366b 100644 --- a/tests/sentry/tasks/test_commits.py +++ b/tests/sentry/tasks/test_commits.py @@ -13,7 +13,11 @@ from sentry.models.releaseheadcommit import ReleaseHeadCommit from sentry.models.repository import Repository from sentry.silo.base import SiloMode -from sentry.tasks.commits import fetch_commits, handle_invalid_identity +from sentry.tasks.commits import ( + fetch_commits, + get_github_compare_commits_cache_key, + handle_invalid_identity, +) from sentry.testutils.asserts import assert_slo_metric from sentry.testutils.cases import TestCase from sentry.testutils.silo import assume_test_silo_mode, control_silo_test @@ -30,6 +34,14 @@ def _github_compare_commits_result(self, repo_name: str, end_sha: str) -> list[d {"id": end_sha, "repository": repo_name}, ] + def test_github_compare_commits_cache_key_avoids_ambiguous_id_collisions( + self, mock_record: MagicMock + ) -> None: + key_one = get_github_compare_commits_cache_key(1, 23, "integrations:github", "a", "b") + key_two = get_github_compare_commits_cache_key(12, 3, "integrations:github", "a", "b") + + assert key_one != key_two + def _test_simple_action(self, user, org): repo = Repository.objects.create(name="example", provider="dummy", organization_id=org.id) release = Release.objects.create(organization_id=org.id, version="abcabcabc") From e192014d2ca07a52a1f7be5da1dcf22e338dff8d Mon Sep 17 00:00:00 2001 From: "Armen Zambrano G." <44410+armenzg@users.noreply.github.com> Date: Wed, 8 Apr 2026 16:46:22 -0400 Subject: [PATCH 6/6] Simplify tests --- tests/sentry/tasks/test_commits.py | 89 ++++++++++-------------------- 1 file changed, 29 insertions(+), 60 deletions(-) diff --git a/tests/sentry/tasks/test_commits.py b/tests/sentry/tasks/test_commits.py index b5deeedb92366b..7cf3f4bad547e1 100644 --- a/tests/sentry/tasks/test_commits.py +++ b/tests/sentry/tasks/test_commits.py @@ -34,6 +34,29 @@ def _github_compare_commits_result(self, repo_name: str, end_sha: str) -> list[d {"id": end_sha, "repository": repo_name}, ] + def _setup_github_compare_commits_cache_context(self): + org = self.create_organization(owner=self.user, name="baz") + repo = Repository.objects.create( + name="example", + provider="integrations:github", + organization_id=org.id, + ) + previous_release = Release.objects.create(organization_id=org.id, version="old-release") + previous_commit = Commit.objects.create( + organization_id=org.id, repository_id=repo.id, key="a" * 40 + ) + ReleaseHeadCommit.objects.create( + organization_id=org.id, + repository_id=repo.id, + release=previous_release, + commit=previous_commit, + ) + + refs = [{"repository": repo.name, "commit": "b" * 40}] + first_release = Release.objects.create(organization_id=org.id, version="new-release-1") + second_release = Release.objects.create(organization_id=org.id, version="new-release-2") + return org, repo, previous_release, first_release, second_release, refs + def test_github_compare_commits_cache_key_avoids_ambiguous_id_collisions( self, mock_record: MagicMock ) -> None: @@ -113,29 +136,11 @@ def test_github_compare_commits_cache_flag_disabled( self.login_as(user=self.user) cache.clear() - org = self.create_organization(owner=self.user, name="baz") - repo = Repository.objects.create( - name="example", - provider="integrations:github", - organization_id=org.id, - ) - previous_release = Release.objects.create(organization_id=org.id, version="old-release") - previous_commit = Commit.objects.create( - organization_id=org.id, repository_id=repo.id, key="a" * 40 + _, repo, previous_release, first_release, second_release, refs = ( + self._setup_github_compare_commits_cache_context() ) - ReleaseHeadCommit.objects.create( - organization_id=org.id, - repository_id=repo.id, - release=previous_release, - commit=previous_commit, - ) - - refs = [{"repository": repo.name, "commit": "b" * 40}] mock_compare_commits.return_value = self._github_compare_commits_result(repo.name, "b" * 40) - first_release = Release.objects.create(organization_id=org.id, version="new-release-1") - second_release = Release.objects.create(organization_id=org.id, version="new-release-2") - with self.tasks(): fetch_commits( release_id=first_release.id, @@ -159,29 +164,11 @@ def test_github_compare_commits_cache_flag_enabled( self.login_as(user=self.user) cache.clear() - org = self.create_organization(owner=self.user, name="baz") - repo = Repository.objects.create( - name="example", - provider="integrations:github", - organization_id=org.id, - ) - previous_release = Release.objects.create(organization_id=org.id, version="old-release") - previous_commit = Commit.objects.create( - organization_id=org.id, repository_id=repo.id, key="a" * 40 + org, repo, previous_release, first_release, second_release, refs = ( + self._setup_github_compare_commits_cache_context() ) - ReleaseHeadCommit.objects.create( - organization_id=org.id, - repository_id=repo.id, - release=previous_release, - commit=previous_commit, - ) - - refs = [{"repository": repo.name, "commit": "b" * 40}] mock_compare_commits.return_value = self._github_compare_commits_result(repo.name, "b" * 40) - first_release = Release.objects.create(organization_id=org.id, version="new-release-1") - second_release = Release.objects.create(organization_id=org.id, version="new-release-2") - with self.feature( {"organizations:integrations-github-fetch-commits-compare-cache": [org.slug]} ): @@ -208,33 +195,15 @@ def test_github_compare_commits_cache_key_variance_on_end_sha( self.login_as(user=self.user) cache.clear() - org = self.create_organization(owner=self.user, name="baz") - repo = Repository.objects.create( - name="example", - provider="integrations:github", - organization_id=org.id, - ) - previous_release = Release.objects.create(organization_id=org.id, version="old-release") - previous_commit = Commit.objects.create( - organization_id=org.id, repository_id=repo.id, key="a" * 40 + org, repo, previous_release, first_release, second_release, refs_first = ( + self._setup_github_compare_commits_cache_context() ) - ReleaseHeadCommit.objects.create( - organization_id=org.id, - repository_id=repo.id, - release=previous_release, - commit=previous_commit, - ) - - refs_first = [{"repository": repo.name, "commit": "b" * 40}] refs_second = [{"repository": repo.name, "commit": "c" * 40}] mock_compare_commits.side_effect = [ self._github_compare_commits_result(repo.name, "b" * 40), self._github_compare_commits_result(repo.name, "c" * 40), ] - first_release = Release.objects.create(organization_id=org.id, version="new-release-1") - second_release = Release.objects.create(organization_id=org.id, version="new-release-2") - with self.feature( {"organizations:integrations-github-fetch-commits-compare-cache": [org.slug]} ):