Skip to content

Commit d20aa17

Browse files
authored
feat(integrations): Genericise repository syncing task (#112519)
This uses more generic functions in the syncing task. It turns out there's more work to do for each provider to make them generic, so not enabling everything yet.
1 parent 39b1ebf commit d20aa17

File tree

3 files changed

+76
-37
lines changed

3 files changed

+76
-37
lines changed

src/sentry/features/temporary.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ def register_temporary_features(manager: FeatureManager) -> None:
140140
manager.add("organizations:integrations-github-platform-detection", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
141141
manager.add("organizations:github-repo-auto-sync", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
142142
manager.add("organizations:github-repo-auto-sync-apply", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
143+
manager.add("organizations:github_enterprise-repo-auto-sync", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
144+
manager.add("organizations:github_enterprise-repo-auto-sync-apply", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
143145
manager.add("organizations:integrations-perforce", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
144146
manager.add("organizations:integrations-slack-staging", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
145147
manager.add("organizations:scm-source-context", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)

src/sentry/integrations/source_code_management/sync_repos.py

Lines changed: 61 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,16 @@
88
as needed.
99
"""
1010

11+
from __future__ import annotations
12+
1113
import logging
1214
from datetime import timedelta
1315

1416
from taskbroker_client.retry import Retry
1517

1618
from sentry import features
1719
from sentry.constants import ObjectStatus
18-
from sentry.integrations.github.tasks.link_all_repos import get_repo_config
20+
from sentry.features.exceptions import FeatureNotRegistered
1921
from sentry.integrations.models.organization_integration import OrganizationIntegration
2022
from sentry.integrations.services.integration import integration_service
2123
from sentry.integrations.services.repository.service import repository_service
@@ -24,8 +26,12 @@
2426
SCMIntegrationInteractionType,
2527
)
2628
from sentry.integrations.source_code_management.repo_audit import log_repo_change
29+
from sentry.integrations.source_code_management.repository import RepositoryIntegration
2730
from sentry.organizations.services.organization import organization_service
28-
from sentry.plugins.providers.integration_repository import get_integration_repository_provider
31+
from sentry.plugins.providers.integration_repository import (
32+
RepositoryInputConfig,
33+
get_integration_repository_provider,
34+
)
2935
from sentry.shared_integrations.exceptions import ApiError
3036
from sentry.silo.base import SiloMode
3137
from sentry.tasks.base import instrumented_task, retry
@@ -35,6 +41,28 @@
3541

3642
logger = logging.getLogger(__name__)
3743

44+
# Providers to include in the periodic sync. Each must implement
45+
# get_repositories() returning RepositoryInfo with external_id.
46+
# Perforce is excluded because it cannot derive external_id from its API.
47+
# Providers to include in the periodic sync.
48+
# Other providers (GitLab, Bitbucket, Bitbucket Server, VSTS) have
49+
# build_repository_config methods that expect additional data beyond what
50+
# RepositoryInfo provides (e.g. url, instance, project_id). They need
51+
# follow-up work to either enrich the sync config or simplify their
52+
# build_repository_config before they can be added here.
53+
SCM_SYNC_PROVIDERS = [
54+
"github",
55+
"github_enterprise",
56+
]
57+
58+
59+
def _has_feature(flag: str, org: object) -> bool:
60+
"""Check a feature flag, returning False if the flag isn't registered."""
61+
try:
62+
return features.has(flag, org)
63+
except FeatureNotRegistered:
64+
return False
65+
3866

3967
@instrumented_task(
4068
name="sentry.integrations.source_code_management.sync_repos.sync_repos_for_org",
@@ -48,8 +76,8 @@ def sync_repos_for_org(organization_integration_id: int) -> None:
4876
"""
4977
Sync repositories for a single OrganizationIntegration.
5078
51-
Fetches all repos from GitHub, diffs against Sentry's Repository table,
52-
and creates/disables/re-enables repos as needed.
79+
Fetches all repos from the SCM provider, diffs against Sentry's
80+
Repository table, and creates/disables/re-enables repos as needed.
5381
"""
5482
try:
5583
oi = OrganizationIntegration.objects.get(
@@ -85,23 +113,25 @@ def sync_repos_for_org(organization_integration_id: int) -> None:
85113
return
86114

87115
rpc_org = org_context.organization
88-
if not features.has("organizations:github-repo-auto-sync", rpc_org):
116+
provider_key = integration.provider
117+
118+
if not _has_feature(f"organizations:{provider_key}-repo-auto-sync", rpc_org):
89119
return
90120

91-
provider = f"integrations:{integration.provider}"
92-
dry_run = not features.has("organizations:github-repo-auto-sync-apply", rpc_org)
121+
provider = f"integrations:{provider_key}"
122+
dry_run = not _has_feature(f"organizations:{provider_key}-repo-auto-sync-apply", rpc_org)
93123

94124
with SCMIntegrationInteractionEvent(
95125
interaction_type=SCMIntegrationInteractionType.SYNC_REPOS,
96126
integration_id=integration.id,
97127
organization_id=organization_id,
98-
provider_key=integration.provider,
128+
provider_key=provider_key,
99129
).capture():
100130
installation = integration.get_installation(organization_id=organization_id)
101-
client = installation.get_client()
131+
assert isinstance(installation, RepositoryIntegration)
102132

103133
try:
104-
github_repos = client.get_repos()
134+
provider_repos = installation.get_repositories()
105135
except ApiError as e:
106136
if installation.is_rate_limited_error(e):
107137
logger.info(
@@ -113,7 +143,7 @@ def sync_repos_for_org(organization_integration_id: int) -> None:
113143
)
114144
raise
115145

116-
github_external_ids = {str(repo["id"]) for repo in github_repos}
146+
provider_external_ids = {repo["external_id"] for repo in provider_repos}
117147

118148
all_repos = repository_service.get_repositories(
119149
organization_id=organization_id,
@@ -128,19 +158,19 @@ def sync_repos_for_org(organization_integration_id: int) -> None:
128158
sentry_active_ids = {r.external_id for r in active_repos}
129159
sentry_disabled_ids = {r.external_id for r in disabled_repos}
130160

131-
new_ids = github_external_ids - sentry_active_ids - sentry_disabled_ids
132-
removed_ids = sentry_active_ids - github_external_ids
133-
restored_ids = sentry_disabled_ids & github_external_ids
161+
new_ids = provider_external_ids - sentry_active_ids - sentry_disabled_ids
162+
removed_ids = sentry_active_ids - provider_external_ids
163+
restored_ids = sentry_disabled_ids & provider_external_ids
134164

135165
metric_tags = {
136-
"provider": integration.provider,
166+
"provider": provider_key,
137167
"dry_run": str(dry_run),
138168
}
139169
metrics.distribution("scm.repo_sync.new_repos", len(new_ids), tags=metric_tags)
140170
metrics.distribution("scm.repo_sync.removed_repos", len(removed_ids), tags=metric_tags)
141171
metrics.distribution("scm.repo_sync.restored_repos", len(restored_ids), tags=metric_tags)
142172
metrics.distribution(
143-
"scm.repo_sync.provider_total", len(github_external_ids), tags=metric_tags
173+
"scm.repo_sync.provider_total", len(provider_external_ids), tags=metric_tags
144174
)
145175
metrics.distribution(
146176
"scm.repo_sync.sentry_active", len(sentry_active_ids), tags=metric_tags
@@ -153,11 +183,11 @@ def sync_repos_for_org(organization_integration_id: int) -> None:
153183
logger.info(
154184
"scm.repo_sync.diff",
155185
extra={
156-
"provider": integration.provider,
186+
"provider": provider_key,
157187
"integration_id": integration.id,
158188
"organization_id": organization_id,
159189
"dry_run": dry_run,
160-
"provider_total": len(github_external_ids),
190+
"provider_total": len(provider_external_ids),
161191
"sentry_active": len(sentry_active_ids),
162192
"sentry_disabled": len(sentry_disabled_ids),
163193
"new": len(new_ids),
@@ -173,10 +203,14 @@ def sync_repos_for_org(organization_integration_id: int) -> None:
173203

174204
if new_ids:
175205
integration_repo_provider = get_integration_repository_provider(integration)
176-
repo_configs = [
177-
get_repo_config(repo, integration.id)
178-
for repo in github_repos
179-
if str(repo["id"]) in new_ids
206+
repo_configs: list[RepositoryInputConfig] = [
207+
{
208+
"external_id": repo["external_id"],
209+
"integration_id": integration.id,
210+
"identifier": str(repo["identifier"]),
211+
}
212+
for repo in provider_repos
213+
if repo["external_id"] in new_ids
180214
]
181215
if repo_configs:
182216
created_repos, reactivated_repos, _ = integration_repo_provider.create_repositories(
@@ -189,7 +223,7 @@ def sync_repos_for_org(organization_integration_id: int) -> None:
189223
organization_id=organization_id,
190224
repo=repo,
191225
source="automatic SCM syncing",
192-
provider=integration.provider,
226+
provider=provider_key,
193227
)
194228

195229
for repo in reactivated_repos:
@@ -198,7 +232,7 @@ def sync_repos_for_org(organization_integration_id: int) -> None:
198232
organization_id=organization_id,
199233
repo=repo,
200234
source="automatic SCM syncing",
201-
provider=integration.provider,
235+
provider=provider_key,
202236
)
203237

204238
if removed_ids:
@@ -217,7 +251,7 @@ def sync_repos_for_org(organization_integration_id: int) -> None:
217251
organization_id=organization_id,
218252
repo=removed_repo,
219253
source="automatic SCM syncing",
220-
provider=integration.provider,
254+
provider=provider_key,
221255
)
222256

223257
if restored_ids:
@@ -232,7 +266,7 @@ def sync_repos_for_org(organization_integration_id: int) -> None:
232266
organization_id=organization_id,
233267
repo=repo,
234268
source="automatic SCM syncing",
235-
provider=integration.provider,
269+
provider=provider_key,
236270
)
237271

238272

@@ -246,7 +280,7 @@ def scm_repo_sync_beat() -> None:
246280
name="scm_repo_sync",
247281
schedule_key="scm-repo-sync-beat",
248282
queryset=OrganizationIntegration.objects.filter(
249-
integration__provider__in=["github", "github_enterprise"],
283+
integration__provider__in=SCM_SYNC_PROVIDERS,
250284
integration__status=ObjectStatus.ACTIVE,
251285
status=ObjectStatus.ACTIVE,
252286
),

tests/sentry/integrations/source_code_management/test_sync_repos.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ def _add_repos_response(self, repos: list[dict[str, object]]) -> None:
4444
def test_creates_new_repos(self, _: MagicMock) -> None:
4545
self._add_repos_response(
4646
[
47-
{"id": 1, "full_name": "getsentry/sentry"},
48-
{"id": 2, "full_name": "getsentry/snuba"},
47+
{"id": 1, "full_name": "getsentry/sentry", "name": "sentry"},
48+
{"id": 2, "full_name": "getsentry/snuba", "name": "snuba"},
4949
]
5050
)
5151

@@ -82,7 +82,7 @@ def test_disables_removed_repos(self, _: MagicMock) -> None:
8282
)
8383

8484
# GitHub no longer returns this repo
85-
self._add_repos_response([{"id": 1, "full_name": "getsentry/sentry"}])
85+
self._add_repos_response([{"id": 1, "full_name": "getsentry/sentry", "name": "sentry"}])
8686

8787
with self.feature(
8888
["organizations:github-repo-auto-sync", "organizations:github-repo-auto-sync-apply"]
@@ -121,7 +121,7 @@ def test_re_enables_restored_repos(self, _: MagicMock) -> None:
121121
)
122122

123123
# GitHub returns the repo again
124-
self._add_repos_response([{"id": 1, "full_name": "getsentry/sentry"}])
124+
self._add_repos_response([{"id": 1, "full_name": "getsentry/sentry", "name": "sentry"}])
125125

126126
with self.feature(
127127
["organizations:github-repo-auto-sync", "organizations:github-repo-auto-sync-apply"]
@@ -150,7 +150,7 @@ def test_no_changes_needed(self, _: MagicMock) -> None:
150150
status=ObjectStatus.ACTIVE,
151151
)
152152

153-
self._add_repos_response([{"id": 1, "full_name": "getsentry/sentry"}])
153+
self._add_repos_response([{"id": 1, "full_name": "getsentry/sentry", "name": "sentry"}])
154154

155155
with self.feature(
156156
["organizations:github-repo-auto-sync", "organizations:github-repo-auto-sync-apply"]
@@ -184,8 +184,8 @@ def test_dry_run_without_apply_flag(self, _: MagicMock) -> None:
184184
"""With auto-sync on but apply off, the task computes the diff but doesn't apply changes."""
185185
self._add_repos_response(
186186
[
187-
{"id": 1, "full_name": "getsentry/sentry"},
188-
{"id": 2, "full_name": "getsentry/snuba"},
187+
{"id": 1, "full_name": "getsentry/sentry", "name": "sentry"},
188+
{"id": 2, "full_name": "getsentry/snuba", "name": "snuba"},
189189
]
190190
)
191191

@@ -249,12 +249,15 @@ def test_creates_new_repos_for_ghe(self, mock_get_repos: MagicMock) -> None:
249249
)
250250

251251
mock_get_repos.return_value = [
252-
{"id": 1, "full_name": "testorg/repo1"},
253-
{"id": 2, "full_name": "testorg/repo2"},
252+
{"id": 1, "full_name": "testorg/repo1", "name": "repo1"},
253+
{"id": 2, "full_name": "testorg/repo2", "name": "repo2"},
254254
]
255255

256256
with self.feature(
257-
["organizations:github-repo-auto-sync", "organizations:github-repo-auto-sync-apply"]
257+
[
258+
"organizations:github_enterprise-repo-auto-sync",
259+
"organizations:github_enterprise-repo-auto-sync-apply",
260+
]
258261
):
259262
sync_repos_for_org(oi.id)
260263

0 commit comments

Comments
 (0)