Skip to content

Commit 8cc27c8

Browse files
committed
chore(integrations): Move repository syncing code to generic place.
This moves this code into a more generic location, but doesn't change it at all. Will follow up with the changes to make this work for all SCMs.
1 parent 463da02 commit 8cc27c8

File tree

5 files changed

+276
-296
lines changed

5 files changed

+276
-296
lines changed

src/sentry/conf/server.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
880880
"sentry.integrations.github.tasks.pr_comment",
881881
"sentry.integrations.github.tasks.sync_repos",
882882
"sentry.integrations.github.tasks.sync_repos_on_install_change",
883+
"sentry.integrations.source_code_management.sync_repos",
883884
"sentry.integrations.gitlab.tasks",
884885
"sentry.integrations.jira.tasks",
885886
"sentry.integrations.opsgenie.tasks",
@@ -1251,8 +1252,8 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
12511252
"task": "sdk.control:sentry.tasks.release_registry.fetch_release_registry_data_control",
12521253
"schedule": crontab("*/5", "*", "*", "*", "*"),
12531254
},
1254-
"github-repo-sync-beat": {
1255-
"task": "integrations.control:sentry.integrations.github.tasks.sync_repos.github_repo_sync_beat",
1255+
"scm-repo-sync-beat": {
1256+
"task": "integrations.control:sentry.integrations.source_code_management.sync_repos.scm_repo_sync_beat",
12561257
"schedule": timedelta(minutes=1),
12571258
},
12581259
}

src/sentry/integrations/github/tasks/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from .codecov_account_unlink import codecov_account_unlink
33
from .link_all_repos import link_all_repos
44
from .pr_comment import github_comment_workflow
5-
from .sync_repos import github_repo_sync_beat, sync_repos_for_org
5+
from .sync_repos import github_repo_sync_beat, scm_repo_sync_beat, sync_repos_for_org
66
from .sync_repos_on_install_change import sync_repos_on_install_change
77

88
__all__ = (
@@ -11,6 +11,7 @@
1111
"github_comment_workflow",
1212
"github_repo_sync_beat",
1313
"link_all_repos",
14+
"scm_repo_sync_beat",
1415
"sync_repos_for_org",
1516
"sync_repos_on_install_change",
1617
)
Lines changed: 13 additions & 249 deletions
Original file line numberDiff line numberDiff line change
@@ -1,256 +1,20 @@
11
"""
2-
Periodic repo sync for GitHub integrations.
3-
4-
The beat task (`github_repo_sync_beat`) runs on a schedule and uses
5-
CursoredScheduler to iterate over all active GitHub OrganizationIntegrations.
6-
For each one, it dispatches `sync_repos_for_org` which diffs GitHub's repo
7-
list against Sentry's Repository table and creates/disables/re-enables as needed.
2+
Backwards-compatible re-exports. The sync task has moved to
3+
sentry.integrations.source_code_management.sync_repos.
84
"""
95

10-
import logging
11-
from datetime import timedelta
12-
13-
from taskbroker_client.retry import Retry
14-
15-
from sentry import features
16-
from sentry.constants import ObjectStatus
17-
from sentry.integrations.models.organization_integration import OrganizationIntegration
18-
from sentry.integrations.services.integration import integration_service
19-
from sentry.integrations.services.repository.service import repository_service
20-
from sentry.integrations.source_code_management.metrics import (
21-
SCMIntegrationInteractionEvent,
22-
SCMIntegrationInteractionType,
6+
from sentry.integrations.source_code_management.sync_repos import (
7+
scm_repo_sync_beat as scm_repo_sync_beat,
238
)
24-
from sentry.integrations.source_code_management.repo_audit import log_repo_change
25-
from sentry.organizations.services.organization import organization_service
26-
from sentry.plugins.providers.integration_repository import get_integration_repository_provider
27-
from sentry.shared_integrations.exceptions import ApiError
28-
from sentry.silo.base import SiloMode
29-
from sentry.tasks.base import instrumented_task, retry
30-
from sentry.taskworker.namespaces import integrations_control_tasks
31-
from sentry.utils import metrics
32-
from sentry.utils.cursored_scheduler import CursoredScheduler
33-
34-
from .link_all_repos import get_repo_config
35-
36-
logger = logging.getLogger(__name__)
37-
38-
39-
@instrumented_task(
40-
name="sentry.integrations.github.tasks.sync_repos.sync_repos_for_org",
41-
namespace=integrations_control_tasks,
42-
retry=Retry(times=3, delay=120),
43-
processing_deadline_duration=120,
44-
silo_mode=SiloMode.CONTROL,
9+
from sentry.integrations.source_code_management.sync_repos import (
10+
sync_repos_for_org as sync_repos_for_org,
4511
)
46-
@retry()
47-
def sync_repos_for_org(organization_integration_id: int) -> None:
48-
"""
49-
Sync repositories for a single OrganizationIntegration.
50-
51-
Fetches all repos from GitHub, diffs against Sentry's Repository table,
52-
and creates/disables/re-enables repos as needed.
53-
"""
54-
try:
55-
oi = OrganizationIntegration.objects.get(
56-
id=organization_integration_id,
57-
status=ObjectStatus.ACTIVE,
58-
)
59-
except OrganizationIntegration.DoesNotExist:
60-
logger.info(
61-
"sync_repos_for_org.missing_org_integration",
62-
extra={"organization_integration_id": organization_integration_id},
63-
)
64-
return
65-
66-
integration = integration_service.get_integration(
67-
integration_id=oi.integration_id, status=ObjectStatus.ACTIVE
68-
)
69-
if integration is None:
70-
logger.info(
71-
"sync_repos_for_org.missing_integration",
72-
extra={"integration_id": oi.integration_id},
73-
)
74-
return
75-
76-
organization_id = oi.organization_id
77-
org_context = organization_service.get_organization_by_id(
78-
id=organization_id, include_projects=False, include_teams=False
79-
)
80-
if org_context is None:
81-
logger.info(
82-
"sync_repos_for_org.missing_organization",
83-
extra={"organization_id": organization_id},
84-
)
85-
return
86-
87-
rpc_org = org_context.organization
88-
if not features.has("organizations:github-repo-auto-sync", rpc_org):
89-
return
90-
91-
provider = f"integrations:{integration.provider}"
92-
dry_run = not features.has("organizations:github-repo-auto-sync-apply", rpc_org)
93-
94-
with SCMIntegrationInteractionEvent(
95-
interaction_type=SCMIntegrationInteractionType.SYNC_REPOS,
96-
integration_id=integration.id,
97-
organization_id=organization_id,
98-
provider_key=integration.provider,
99-
).capture():
100-
installation = integration.get_installation(organization_id=organization_id)
101-
client = installation.get_client()
102-
103-
try:
104-
github_repos = client.get_repos()
105-
except ApiError as e:
106-
if installation.is_rate_limited_error(e):
107-
logger.info(
108-
"sync_repos_for_org.rate_limited",
109-
extra={
110-
"integration_id": integration.id,
111-
"organization_id": organization_id,
112-
},
113-
)
114-
raise
115-
116-
github_external_ids = {str(repo["id"]) for repo in github_repos}
11712

118-
all_repos = repository_service.get_repositories(
119-
organization_id=organization_id,
120-
integration_id=integration.id,
121-
providers=[provider],
122-
)
123-
active_repos = [r for r in all_repos if r.status == ObjectStatus.ACTIVE and r.external_id]
124-
disabled_repos = [
125-
r for r in all_repos if r.status == ObjectStatus.DISABLED and r.external_id
126-
]
13+
# Legacy alias
14+
github_repo_sync_beat = scm_repo_sync_beat
12715

128-
sentry_active_ids = {r.external_id for r in active_repos}
129-
sentry_disabled_ids = {r.external_id for r in disabled_repos}
130-
131-
new_ids = github_external_ids - sentry_active_ids - sentry_disabled_ids
132-
removed_ids = sentry_active_ids - github_external_ids
133-
restored_ids = sentry_disabled_ids & github_external_ids
134-
135-
metric_tags = {
136-
"provider": integration.provider,
137-
"dry_run": str(dry_run),
138-
}
139-
metrics.distribution("scm.repo_sync.new_repos", len(new_ids), tags=metric_tags)
140-
metrics.distribution("scm.repo_sync.removed_repos", len(removed_ids), tags=metric_tags)
141-
metrics.distribution("scm.repo_sync.restored_repos", len(restored_ids), tags=metric_tags)
142-
metrics.distribution(
143-
"scm.repo_sync.provider_total", len(github_external_ids), tags=metric_tags
144-
)
145-
metrics.distribution(
146-
"scm.repo_sync.sentry_active", len(sentry_active_ids), tags=metric_tags
147-
)
148-
metrics.distribution(
149-
"scm.repo_sync.sentry_disabled", len(sentry_disabled_ids), tags=metric_tags
150-
)
151-
152-
if new_ids or removed_ids or restored_ids:
153-
logger.info(
154-
"scm.repo_sync.diff",
155-
extra={
156-
"provider": integration.provider,
157-
"integration_id": integration.id,
158-
"organization_id": organization_id,
159-
"dry_run": dry_run,
160-
"provider_total": len(github_external_ids),
161-
"sentry_active": len(sentry_active_ids),
162-
"sentry_disabled": len(sentry_disabled_ids),
163-
"new": len(new_ids),
164-
"removed": len(removed_ids),
165-
"restored": len(restored_ids),
166-
},
167-
)
168-
169-
if dry_run:
170-
return
171-
172-
repo_by_external_id = {r.external_id: r for r in active_repos + disabled_repos}
173-
174-
if new_ids:
175-
integration_repo_provider = get_integration_repository_provider(integration)
176-
repo_configs = [
177-
get_repo_config(repo, integration.id)
178-
for repo in github_repos
179-
if str(repo["id"]) in new_ids
180-
]
181-
if repo_configs:
182-
created_repos, reactivated_repos, _ = integration_repo_provider.create_repositories(
183-
configs=repo_configs, organization=rpc_org
184-
)
185-
186-
for repo in created_repos:
187-
log_repo_change(
188-
event_name="REPO_ADDED",
189-
organization_id=organization_id,
190-
repo=repo,
191-
source="automatic SCM syncing",
192-
provider=integration.provider,
193-
)
194-
195-
for repo in reactivated_repos:
196-
log_repo_change(
197-
event_name="REPO_ENABLED",
198-
organization_id=organization_id,
199-
repo=repo,
200-
source="automatic SCM syncing",
201-
provider=integration.provider,
202-
)
203-
204-
if removed_ids:
205-
repository_service.disable_repositories_by_external_ids(
206-
organization_id=organization_id,
207-
integration_id=integration.id,
208-
provider=provider,
209-
external_ids=list(removed_ids),
210-
)
211-
212-
for eid in removed_ids:
213-
removed_repo = repo_by_external_id.get(eid)
214-
if removed_repo:
215-
log_repo_change(
216-
event_name="REPO_DISABLED",
217-
organization_id=organization_id,
218-
repo=removed_repo,
219-
source="automatic SCM syncing",
220-
provider=integration.provider,
221-
)
222-
223-
if restored_ids:
224-
for repo in disabled_repos:
225-
if repo.external_id in restored_ids:
226-
repo.status = ObjectStatus.ACTIVE
227-
repository_service.update_repository(
228-
organization_id=organization_id, update=repo
229-
)
230-
log_repo_change(
231-
event_name="REPO_ENABLED",
232-
organization_id=organization_id,
233-
repo=repo,
234-
source="automatic SCM syncing",
235-
provider=integration.provider,
236-
)
237-
238-
239-
@instrumented_task(
240-
name="sentry.integrations.github.tasks.sync_repos.github_repo_sync_beat",
241-
namespace=integrations_control_tasks,
242-
silo_mode=SiloMode.CONTROL,
243-
)
244-
def github_repo_sync_beat() -> None:
245-
scheduler = CursoredScheduler(
246-
name="github_repo_sync",
247-
schedule_key="github-repo-sync-beat",
248-
queryset=OrganizationIntegration.objects.filter(
249-
integration__provider__in=["github", "github_enterprise"],
250-
integration__status=ObjectStatus.ACTIVE,
251-
status=ObjectStatus.ACTIVE,
252-
),
253-
task=sync_repos_for_org,
254-
cycle_duration=timedelta(hours=24),
255-
)
256-
scheduler.tick()
16+
__all__ = [
17+
"github_repo_sync_beat",
18+
"scm_repo_sync_beat",
19+
"sync_repos_for_org",
20+
]

0 commit comments

Comments
 (0)