Skip to content

Commit 04ffffa

Browse files
committed
feat(repos): Add task to periodically sync repos from github
This adds a task that runs every 24 hours to make sure that the available repositories on the github side match the repositories we have on the Sentry side.
1 parent 9a5dc41 commit 04ffffa

File tree

6 files changed

+424
-0
lines changed

6 files changed

+424
-0
lines changed

src/sentry/conf/server.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1248,6 +1248,10 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
12481248
"task": "sdk.control:sentry.tasks.release_registry.fetch_release_registry_data_control",
12491249
"schedule": crontab("*/5", "*", "*", "*", "*"),
12501250
},
1251+
"github-repo-sync-beat": {
1252+
"task": "integrations.control:sentry.integrations.github.tasks.sync_repos.github_repo_sync_beat",
1253+
"schedule": timedelta(minutes=1),
1254+
},
12511255
}
12521256

12531257
if SILO_MODE == "CONTROL":

src/sentry/features/temporary.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def register_temporary_features(manager: FeatureManager) -> None:
137137
manager.add("organizations:integrations-github-copilot-agent", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
138138
manager.add("organizations:integrations-github-platform-detection", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
139139
manager.add("organizations:github-repo-auto-sync", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
140+
manager.add("organizations:github-repo-auto-sync-apply", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
140141
manager.add("organizations:integrations-perforce", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
141142
manager.add("organizations:scm-source-context", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
142143
# Project Management Integrations Feature Parity Flags

src/sentry/integrations/github/tasks/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@
22
from .codecov_account_unlink import codecov_account_unlink
33
from .link_all_repos import link_all_repos
44
from .pr_comment import github_comment_workflow
5+
from .sync_repos import github_repo_sync_beat, sync_repos_for_org
56
from .sync_repos_on_install_change import sync_repos_on_install_change
67

78
__all__ = (
89
"codecov_account_link",
910
"codecov_account_unlink",
1011
"github_comment_workflow",
12+
"github_repo_sync_beat",
1113
"link_all_repos",
14+
"sync_repos_for_org",
1215
"sync_repos_on_install_change",
1316
)
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
"""
2+
Periodic repo sync for GitHub integrations.
3+
4+
The beat task (`github_repo_sync_beat`) runs on a schedule and uses
5+
CursoredScheduler to iterate over all active GitHub OrganizationIntegrations.
6+
For each one, it dispatches `sync_repos_for_org` which diffs GitHub's repo
7+
list against Sentry's Repository table and creates/disables/re-enables as needed.
8+
"""
9+
10+
import logging
11+
from datetime import timedelta
12+
13+
from taskbroker_client.retry import Retry
14+
15+
from sentry import features
16+
from sentry.constants import ObjectStatus
17+
from sentry.integrations.models.organization_integration import OrganizationIntegration
18+
from sentry.integrations.services.integration import integration_service
19+
from sentry.integrations.services.repository.service import repository_service
20+
from sentry.integrations.source_code_management.metrics import (
21+
SCMIntegrationInteractionEvent,
22+
SCMIntegrationInteractionType,
23+
)
24+
from sentry.organizations.services.organization import organization_service
25+
from sentry.plugins.providers.integration_repository import (
26+
RepoExistsError,
27+
get_integration_repository_provider,
28+
)
29+
from sentry.shared_integrations.exceptions import ApiError
30+
from sentry.silo.base import SiloMode
31+
from sentry.tasks.base import instrumented_task, retry
32+
from sentry.taskworker.namespaces import integrations_control_tasks
33+
from sentry.utils import metrics
34+
from sentry.utils.cursored_scheduler import CursoredScheduler
35+
36+
from .link_all_repos import get_repo_config
37+
38+
logger = logging.getLogger(__name__)
39+
40+
41+
@instrumented_task(
42+
name="sentry.integrations.github.tasks.sync_repos.sync_repos_for_org",
43+
namespace=integrations_control_tasks,
44+
retry=Retry(times=3, delay=120),
45+
processing_deadline_duration=120,
46+
silo_mode=SiloMode.CONTROL,
47+
)
48+
@retry(exclude=(RepoExistsError, KeyError))
49+
def sync_repos_for_org(organization_integration_id: int) -> None:
50+
"""
51+
Sync repositories for a single OrganizationIntegration.
52+
53+
Fetches all repos from GitHub, diffs against Sentry's Repository table,
54+
and creates/disables/re-enables repos as needed.
55+
"""
56+
try:
57+
oi = OrganizationIntegration.objects.get(
58+
id=organization_integration_id,
59+
status=ObjectStatus.ACTIVE,
60+
)
61+
except OrganizationIntegration.DoesNotExist:
62+
logger.info(
63+
"sync_repos_for_org.missing_org_integration",
64+
extra={"organization_integration_id": organization_integration_id},
65+
)
66+
return
67+
68+
integration = integration_service.get_integration(
69+
integration_id=oi.integration_id, status=ObjectStatus.ACTIVE
70+
)
71+
if integration is None:
72+
logger.info(
73+
"sync_repos_for_org.missing_integration",
74+
extra={"integration_id": oi.integration_id},
75+
)
76+
return
77+
78+
organization_id = oi.organization_id
79+
rpc_org = organization_service.get(id=organization_id)
80+
if rpc_org is None:
81+
logger.info(
82+
"sync_repos_for_org.missing_organization",
83+
extra={"organization_id": organization_id},
84+
)
85+
return
86+
87+
if not features.has("organizations:github-repo-auto-sync", rpc_org):
88+
return
89+
90+
provider = f"integrations:{integration.provider}"
91+
dry_run = not features.has("organizations:github-repo-auto-sync-apply", rpc_org)
92+
93+
with SCMIntegrationInteractionEvent(
94+
interaction_type=SCMIntegrationInteractionType.SYNC_REPOS,
95+
integration_id=integration.id,
96+
organization_id=organization_id,
97+
provider_key=integration.provider,
98+
).capture():
99+
installation = integration.get_installation(organization_id=organization_id)
100+
client = installation.get_client()
101+
102+
try:
103+
github_repos = client.get_repos()
104+
except ApiError as e:
105+
if installation.is_rate_limited_error(e):
106+
logger.info(
107+
"sync_repos_for_org.rate_limited",
108+
extra={
109+
"integration_id": integration.id,
110+
"organization_id": organization_id,
111+
},
112+
)
113+
raise
114+
115+
github_external_ids = {str(repo["id"]) for repo in github_repos}
116+
117+
all_repos = repository_service.get_repositories(
118+
organization_id=organization_id,
119+
integration_id=integration.id,
120+
providers=[provider],
121+
)
122+
active_repos = [r for r in all_repos if r.status == ObjectStatus.ACTIVE and r.external_id]
123+
disabled_repos = [
124+
r for r in all_repos if r.status == ObjectStatus.DISABLED and r.external_id
125+
]
126+
127+
sentry_active_ids = {r.external_id for r in active_repos}
128+
sentry_disabled_ids = {r.external_id for r in disabled_repos}
129+
130+
new_ids = github_external_ids - sentry_active_ids - sentry_disabled_ids
131+
removed_ids = sentry_active_ids - github_external_ids
132+
restored_ids = sentry_disabled_ids & github_external_ids
133+
134+
metric_tags = {
135+
"provider": integration.provider,
136+
"dry_run": str(dry_run),
137+
}
138+
metrics.distribution("scm.repo_sync.new_repos", len(new_ids), tags=metric_tags)
139+
metrics.distribution("scm.repo_sync.removed_repos", len(removed_ids), tags=metric_tags)
140+
metrics.distribution("scm.repo_sync.restored_repos", len(restored_ids), tags=metric_tags)
141+
metrics.distribution(
142+
"scm.repo_sync.provider_total", len(github_external_ids), tags=metric_tags
143+
)
144+
metrics.distribution(
145+
"scm.repo_sync.sentry_active", len(sentry_active_ids), tags=metric_tags
146+
)
147+
metrics.distribution(
148+
"scm.repo_sync.sentry_disabled", len(sentry_disabled_ids), tags=metric_tags
149+
)
150+
151+
if new_ids or removed_ids or restored_ids:
152+
logger.info(
153+
"scm.repo_sync.diff",
154+
extra={
155+
"provider": integration.provider,
156+
"integration_id": integration.id,
157+
"organization_id": organization_id,
158+
"dry_run": dry_run,
159+
"provider_total": len(github_external_ids),
160+
"sentry_active": len(sentry_active_ids),
161+
"sentry_disabled": len(sentry_disabled_ids),
162+
"new": len(new_ids),
163+
"removed": len(removed_ids),
164+
"restored": len(restored_ids),
165+
},
166+
)
167+
168+
if dry_run:
169+
return
170+
171+
if new_ids:
172+
integration_repo_provider = get_integration_repository_provider(integration)
173+
repo_configs = [
174+
get_repo_config(repo, integration.id)
175+
for repo in github_repos
176+
if str(repo["id"]) in new_ids
177+
]
178+
if repo_configs:
179+
try:
180+
integration_repo_provider.create_repositories(
181+
configs=repo_configs, organization=rpc_org
182+
)
183+
except RepoExistsError:
184+
pass
185+
186+
if removed_ids:
187+
repository_service.disable_repositories_by_external_ids(
188+
organization_id=organization_id,
189+
integration_id=integration.id,
190+
provider=provider,
191+
external_ids=list(removed_ids),
192+
)
193+
194+
if restored_ids:
195+
for repo in disabled_repos:
196+
if repo.external_id in restored_ids:
197+
repo.status = ObjectStatus.ACTIVE
198+
repository_service.update_repository(
199+
organization_id=organization_id, update=repo
200+
)
201+
202+
203+
@instrumented_task(
204+
name="sentry.integrations.github.tasks.sync_repos.github_repo_sync_beat",
205+
namespace=integrations_control_tasks,
206+
silo_mode=SiloMode.CONTROL,
207+
)
208+
def github_repo_sync_beat() -> None:
209+
scheduler = CursoredScheduler(
210+
name="github_repo_sync",
211+
schedule_key="github-repo-sync-beat",
212+
queryset=OrganizationIntegration.objects.filter(
213+
integration__provider="github",
214+
integration__status=ObjectStatus.ACTIVE,
215+
status=ObjectStatus.ACTIVE,
216+
),
217+
task=sync_repos_for_org,
218+
cycle_duration=timedelta(hours=24),
219+
)
220+
scheduler.tick()

src/sentry/integrations/source_code_management/metrics.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class SCMIntegrationInteractionType(StrEnum):
4141

4242
# Tasks
4343
LINK_ALL_REPOS = "link_all_repos"
44+
SYNC_REPOS = "sync_repos"
4445
SYNC_REPOS_ON_INSTALL_CHANGE = "sync_repos_on_install_change"
4546

4647
# GitHub only

0 commit comments

Comments
 (0)