Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ module = [
"sentry.integrations.discord.webhooks.*",
"sentry.integrations.github.actions.*",
"sentry.integrations.github.handlers.*",
"sentry.integrations.github.repository",
"sentry.integrations.github.webhook",
"sentry.integrations.github_enterprise.actions.*",
"sentry.integrations.github_enterprise.handlers.*",
Expand Down
44 changes: 29 additions & 15 deletions src/sentry/integrations/github/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,7 @@ def build_repository_config(
"integration_id": data["integration_id"],
}

def compare_commits(
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function would call two different endpoints: compare/ and sometimes commit/.

In a sense, we're not doing any comparison of commits, thus, even making the name more confusing.

self, repo: Repository, start_sha: str | None, end_sha: str
) -> Sequence[Mapping[str, Any]]:
def eval_commits(client: Any) -> Sequence[Mapping[str, Any]]:
# use config name because that is kept in sync via webhooks
name = repo.config["name"]
if start_sha is None:
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the difference between calling one endpoint vs the other one.

res = client.get_last_commits(name, end_sha)
return self._format_commits(client, name, res[:20])
else:
commits = client.compare_commits(name, start_sha, end_sha)
return self._format_commits(client, name, commits)

def _get_installation_and_client(self, repo: Repository) -> tuple[IntegrationInstallation, Any]:
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're adding two new functions and they will share this logic.

integration_id = repo.integration_id
if integration_id is None:
raise NotImplementedError("GitHub apps requires an integration id to fetch commits")
Expand All @@ -90,13 +78,39 @@ def eval_commits(client: Any) -> Sequence[Mapping[str, Any]]:
)

installation = integration.get_installation(organization_id=repo.organization_id)
client = installation.get_client()
return installation, installation.get_client()

def fetch_recent_commits(self, repo: Repository, end_sha: str) -> Sequence[Mapping[str, Any]]:
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one focuses on getting the last 20 commits. Not an idempotent call.

installation, client = self._get_installation_and_client(repo)
# use config name because that is kept in sync via webhooks
name = repo.config["name"]

try:
commits = client.get_last_commits(name, end_sha)
return self._format_commits(client, name, commits[:20])
Comment on lines +89 to +90
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: could we update the client to take a pagination size of 20 vs retrieving 30 and trimming the number down? https://docs.github.com/en/rest/commits/commits?apiVersion=2026-03-10#list-commits-on-a-repository

except Exception as e:
installation.raise_error(e)

def fetch_commits_for_compare_range(
self, repo: Repository, start_sha: str, end_sha: str
) -> Sequence[Mapping[str, Any]]:
installation, client = self._get_installation_and_client(repo)
# use config name because that is kept in sync via webhooks
name = repo.config["name"]

try:
return eval_commits(client)
commits = client.compare_commits(name, start_sha, end_sha)
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an idempotent call. In the next PR I will be moving the caching logic here.

return self._format_commits(client, name, commits)
except Exception as e:
installation.raise_error(e)

def compare_commits(
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for backward compatibility.

self, repo: Repository, start_sha: str | None, end_sha: str
) -> Sequence[Mapping[str, Any]]:
if start_sha is None:
return self.fetch_recent_commits(repo, end_sha)
return self.fetch_commits_for_compare_range(repo, start_sha, end_sha)

def _format_commits(
self,
client: Any,
Expand Down
69 changes: 49 additions & 20 deletions src/sentry/tasks/commits.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,13 @@ def get_github_compare_commits_cache_key(
return f"fetch-commits:compare-commits:v1:{digest}"


def fetch_compare_commits(
def fetch_commits_for_compare_range(
*,
cache_enabled: bool,
repo: Repository,
provider: Any,
is_integration_repo_provider: bool,
start_sha: str | None,
start_sha: str,
end_sha: str,
user: RpcUser | None,
lifecycle: Any,
Expand All @@ -127,7 +127,11 @@ def fetch_compare_commits(
lifecycle.add_extra("compare_commits_cache_enabled", False)

if is_integration_repo_provider:
repo_commits = provider.compare_commits(repo, start_sha, end_sha)
# New method to decouple the provider from the task
if hasattr(provider, "fetch_commits_for_compare_range"):
repo_commits = provider.fetch_commits_for_compare_range(repo, start_sha, end_sha)
else:
repo_commits = provider.compare_commits(repo, start_sha, end_sha)
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For backward compatibility.

else:
# XXX: This only works for plugins that support actor context
repo_commits = provider.compare_commits(repo, start_sha, end_sha, actor=user)
Expand All @@ -141,6 +145,23 @@ def fetch_compare_commits(
return repo_commits


def fetch_recent_commits(
*,
repo: Repository,
provider: Any,
is_integration_repo_provider: bool,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be standardized across provider clients consistently? Mainly asking out of ignorance for GitLab and Bitbucket's APIs.

end_sha: str,
user: RpcUser | None,
) -> list[dict[str, Any]]:
if is_integration_repo_provider:
if hasattr(provider, "fetch_recent_commits"):
return provider.fetch_recent_commits(repo, end_sha)
return provider.compare_commits(repo, None, end_sha)

# XXX: This only works for plugins that support actor context
return provider.compare_commits(repo, None, end_sha, actor=user)


def get_repo_and_provider_for_ref(
*,
release: Release,
Expand Down Expand Up @@ -260,23 +281,31 @@ def fetch_commits(
}
)
try:
provider_name = repo.provider
compare_commits_cache_enabled = (
github_compare_commits_cache_feature_enabled
and isinstance(provider_name, str)
and provider_name in GITHUB_CACHEABLE_REPOSITORY_PROVIDERS
and start_sha is not None
)
repo_commits = fetch_compare_commits(
cache_enabled=compare_commits_cache_enabled,
repo=repo,
provider=provider,
is_integration_repo_provider=is_integration_repo_provider,
start_sha=start_sha,
end_sha=end_sha,
user=user,
lifecycle=lifecycle,
)
if start_sha is None:
repo_commits = fetch_recent_commits(
repo=repo,
provider=provider,
is_integration_repo_provider=is_integration_repo_provider,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The fetch_recent_commits function is missing a lifecycle parameter, which prevents the compare_commits_cache_enabled metric from being recorded when start_sha is None.
Severity: LOW

Suggested Fix

Add a lifecycle parameter to the fetch_recent_commits function signature. Pass the lifecycle object to it from fetch_commits. Inside fetch_recent_commits, add the compare_commits_cache_enabled metric to the lifecycle to restore the previous telemetry behavior.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent. Verify if this is a real issue. If it is, propose a fix; if not, explain why it's
not valid.

Location: src/sentry/tasks/commits.py#L288

Potential issue: A refactoring separated commit fetching logic into
`fetch_recent_commits` and `fetch_commits_for_compare_range`. When `start_sha` is
`None`, `fetch_commits` calls `fetch_recent_commits`. However, the `lifecycle` parameter
was not added to the new `fetch_recent_commits` function. Consequently, the
`compare_commits_cache_enabled` metric is no longer recorded for this code path. This is
a regression in observability and does not impact the core functionality of fetching
commits, nor does it cause crashes or incorrect application behavior.

Did we get this right? 👍 / 👎 to inform future reviews.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is sort of an anti-pattern anyway. Function calls like this shouldn't necessarily be coupled to the metrics in the top-level tasks imo.

end_sha=end_sha,
user=user,
)
else:
provider_name = repo.provider
compare_commits_cache_enabled = (
github_compare_commits_cache_feature_enabled
and isinstance(provider_name, str)
and provider_name in GITHUB_CACHEABLE_REPOSITORY_PROVIDERS
)
repo_commits = fetch_commits_for_compare_range(
cache_enabled=compare_commits_cache_enabled,
repo=repo,
provider=provider,
is_integration_repo_provider=is_integration_repo_provider,
start_sha=start_sha,
end_sha=end_sha,
user=user,
lifecycle=lifecycle,
)
except NotImplementedError:
pass
except IntegrationResourceNotFoundError:
Expand Down
44 changes: 29 additions & 15 deletions tests/sentry/tasks/test_commits.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,17 +130,21 @@ def test_duplicate_repositories(self, mock_record: MagicMock) -> None:
Repository.objects.create(name="example", provider="dummy", organization_id=org.id)
self._test_simple_action(user=self.user, org=org)

@patch("sentry.integrations.github.repository.GitHubRepositoryProvider.compare_commits")
@patch(
"sentry.integrations.github.repository.GitHubRepositoryProvider.fetch_commits_for_compare_range"
)
def test_github_compare_commits_cache_flag_disabled(
self, mock_compare_commits: MagicMock, mock_record: MagicMock
self, mock_fetch_commits_for_compare_range: MagicMock, mock_record: MagicMock
) -> None:
self.login_as(user=self.user)
cache.clear()

_, repo, previous_release, first_release, second_release, refs = (
self._setup_github_compare_commits_cache_context()
)
mock_compare_commits.return_value = self._github_compare_commits_result(repo.name, "b" * 40)
mock_fetch_commits_for_compare_range.return_value = self._github_compare_commits_result(
repo.name, "b" * 40
)

with self.tasks():
fetch_commits(
Expand All @@ -156,19 +160,23 @@ def test_github_compare_commits_cache_flag_disabled(
prev_release_id=previous_release.id,
)

assert mock_compare_commits.call_count == 2
assert mock_fetch_commits_for_compare_range.call_count == 2

@patch("sentry.integrations.github.repository.GitHubRepositoryProvider.compare_commits")
@patch(
"sentry.integrations.github.repository.GitHubRepositoryProvider.fetch_commits_for_compare_range"
)
def test_github_compare_commits_cache_flag_enabled(
self, mock_compare_commits: MagicMock, mock_record: MagicMock
self, mock_fetch_commits_for_compare_range: MagicMock, mock_record: MagicMock
) -> None:
self.login_as(user=self.user)
cache.clear()

org, repo, previous_release, first_release, second_release, refs = (
self._setup_github_compare_commits_cache_context()
)
mock_compare_commits.return_value = self._github_compare_commits_result(repo.name, "b" * 40)
mock_fetch_commits_for_compare_range.return_value = self._github_compare_commits_result(
repo.name, "b" * 40
)

with self.feature(
{"organizations:integrations-github-fetch-commits-compare-cache": [org.slug]}
Expand All @@ -187,11 +195,13 @@ def test_github_compare_commits_cache_flag_enabled(
prev_release_id=previous_release.id,
)

assert mock_compare_commits.call_count == 1
assert mock_fetch_commits_for_compare_range.call_count == 1

@patch("sentry.integrations.github.repository.GitHubRepositoryProvider.compare_commits")
@patch(
"sentry.integrations.github.repository.GitHubRepositoryProvider.fetch_commits_for_compare_range"
)
def test_github_compare_commits_cache_key_variance_on_end_sha(
self, mock_compare_commits: MagicMock, mock_record: MagicMock
self, mock_fetch_commits_for_compare_range: MagicMock, mock_record: MagicMock
) -> None:
self.login_as(user=self.user)
cache.clear()
Expand All @@ -200,7 +210,7 @@ def test_github_compare_commits_cache_key_variance_on_end_sha(
self._setup_github_compare_commits_cache_context()
)
refs_second = [{"repository": repo.name, "commit": "c" * 40}]
mock_compare_commits.side_effect = [
mock_fetch_commits_for_compare_range.side_effect = [
self._github_compare_commits_result(repo.name, "b" * 40),
self._github_compare_commits_result(repo.name, "c" * 40),
]
Expand All @@ -222,12 +232,14 @@ def test_github_compare_commits_cache_key_variance_on_end_sha(
prev_release_id=previous_release.id,
)

assert mock_compare_commits.call_count == 2
assert mock_fetch_commits_for_compare_range.call_count == 2

@patch("sentry.integrations.github.repository.GitHubRepositoryProvider.compare_commits")
@patch(
"sentry.integrations.github.repository.GitHubRepositoryProvider.fetch_commits_for_compare_range"
)
def test_github_compare_commits_cache_ttl(
self,
mock_compare_commits: MagicMock,
mock_fetch_commits_for_compare_range: MagicMock,
mock_record: MagicMock,
) -> None:
self.login_as(user=self.user)
Expand All @@ -236,7 +248,9 @@ def test_github_compare_commits_cache_ttl(
org, repo, previous_release, first_release, _, refs = (
self._setup_github_compare_commits_cache_context()
)
mock_compare_commits.return_value = self._github_compare_commits_result(repo.name, "b" * 40)
mock_fetch_commits_for_compare_range.return_value = self._github_compare_commits_result(
repo.name, "b" * 40
)

with self.feature(
{"organizations:integrations-github-fetch-commits-compare-cache": [org.slug]}
Expand Down
Loading