Skip to content

Commit f57c63e

Browse files
authored
feat(repos): Add task to periodically sync repos from github (#112043)
This adds a task that runs every 24 hours to make sure that the available repositories on the github side match the repositories we have on the Sentry side.
1 parent 0d0c53a commit f57c63e

File tree

6 files changed

+428
-0
lines changed

6 files changed

+428
-0
lines changed

src/sentry/conf/server.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
882882
"sentry.integrations.github.tasks.codecov_account_unlink",
883883
"sentry.integrations.github.tasks.link_all_repos",
884884
"sentry.integrations.github.tasks.pr_comment",
885+
"sentry.integrations.github.tasks.sync_repos",
885886
"sentry.integrations.github.tasks.sync_repos_on_install_change",
886887
"sentry.integrations.gitlab.tasks",
887888
"sentry.integrations.jira.tasks",
@@ -1250,6 +1251,10 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
12501251
"task": "sdk.control:sentry.tasks.release_registry.fetch_release_registry_data_control",
12511252
"schedule": crontab("*/5", "*", "*", "*", "*"),
12521253
},
1254+
"github-repo-sync-beat": {
1255+
"task": "integrations.control:sentry.integrations.github.tasks.sync_repos.github_repo_sync_beat",
1256+
"schedule": timedelta(minutes=1),
1257+
},
12531258
}
12541259

12551260
if SILO_MODE == "CONTROL":

src/sentry/features/temporary.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ def register_temporary_features(manager: FeatureManager) -> None:
138138
manager.add("organizations:integrations-github-copilot-agent", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
139139
manager.add("organizations:integrations-github-platform-detection", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
140140
manager.add("organizations:github-repo-auto-sync", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
141+
manager.add("organizations:github-repo-auto-sync-apply", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
141142
manager.add("organizations:integrations-perforce", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
142143
manager.add("organizations:integrations-slack-staging", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
143144
manager.add("organizations:scm-source-context", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)

src/sentry/integrations/github/tasks/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@
22
from .codecov_account_unlink import codecov_account_unlink
33
from .link_all_repos import link_all_repos
44
from .pr_comment import github_comment_workflow
5+
from .sync_repos import github_repo_sync_beat, sync_repos_for_org
56
from .sync_repos_on_install_change import sync_repos_on_install_change
67

78
__all__ = (
89
"codecov_account_link",
910
"codecov_account_unlink",
1011
"github_comment_workflow",
12+
"github_repo_sync_beat",
1113
"link_all_repos",
14+
"sync_repos_for_org",
1215
"sync_repos_on_install_change",
1316
)
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
"""
2+
Periodic repo sync for GitHub integrations.
3+
4+
The beat task (`github_repo_sync_beat`) runs on a schedule and uses
5+
CursoredScheduler to iterate over all active GitHub OrganizationIntegrations.
6+
For each one, it dispatches `sync_repos_for_org` which diffs GitHub's repo
7+
list against Sentry's Repository table and creates/disables/re-enables as needed.
8+
"""
9+
10+
import logging
11+
from datetime import timedelta
12+
13+
from taskbroker_client.retry import Retry
14+
15+
from sentry import features
16+
from sentry.constants import ObjectStatus
17+
from sentry.integrations.models.organization_integration import OrganizationIntegration
18+
from sentry.integrations.services.integration import integration_service
19+
from sentry.integrations.services.repository.service import repository_service
20+
from sentry.integrations.source_code_management.metrics import (
21+
SCMIntegrationInteractionEvent,
22+
SCMIntegrationInteractionType,
23+
)
24+
from sentry.organizations.services.organization import organization_service
25+
from sentry.plugins.providers.integration_repository import (
26+
RepoExistsError,
27+
get_integration_repository_provider,
28+
)
29+
from sentry.shared_integrations.exceptions import ApiError
30+
from sentry.silo.base import SiloMode
31+
from sentry.tasks.base import instrumented_task, retry
32+
from sentry.taskworker.namespaces import integrations_control_tasks
33+
from sentry.utils import metrics
34+
from sentry.utils.cursored_scheduler import CursoredScheduler
35+
36+
from .link_all_repos import get_repo_config
37+
38+
logger = logging.getLogger(__name__)
39+
40+
41+
@instrumented_task(
42+
name="sentry.integrations.github.tasks.sync_repos.sync_repos_for_org",
43+
namespace=integrations_control_tasks,
44+
retry=Retry(times=3, delay=120),
45+
processing_deadline_duration=120,
46+
silo_mode=SiloMode.CONTROL,
47+
)
48+
@retry()
49+
def sync_repos_for_org(organization_integration_id: int) -> None:
50+
"""
51+
Sync repositories for a single OrganizationIntegration.
52+
53+
Fetches all repos from GitHub, diffs against Sentry's Repository table,
54+
and creates/disables/re-enables repos as needed.
55+
"""
56+
try:
57+
oi = OrganizationIntegration.objects.get(
58+
id=organization_integration_id,
59+
status=ObjectStatus.ACTIVE,
60+
)
61+
except OrganizationIntegration.DoesNotExist:
62+
logger.info(
63+
"sync_repos_for_org.missing_org_integration",
64+
extra={"organization_integration_id": organization_integration_id},
65+
)
66+
return
67+
68+
integration = integration_service.get_integration(
69+
integration_id=oi.integration_id, status=ObjectStatus.ACTIVE
70+
)
71+
if integration is None:
72+
logger.info(
73+
"sync_repos_for_org.missing_integration",
74+
extra={"integration_id": oi.integration_id},
75+
)
76+
return
77+
78+
organization_id = oi.organization_id
79+
org_context = organization_service.get_organization_by_id(
80+
id=organization_id, include_projects=False, include_teams=False
81+
)
82+
if org_context is None:
83+
logger.info(
84+
"sync_repos_for_org.missing_organization",
85+
extra={"organization_id": organization_id},
86+
)
87+
return
88+
89+
rpc_org = org_context.organization
90+
if not features.has("organizations:github-repo-auto-sync", rpc_org):
91+
return
92+
93+
provider = f"integrations:{integration.provider}"
94+
dry_run = not features.has("organizations:github-repo-auto-sync-apply", rpc_org)
95+
96+
with SCMIntegrationInteractionEvent(
97+
interaction_type=SCMIntegrationInteractionType.SYNC_REPOS,
98+
integration_id=integration.id,
99+
organization_id=organization_id,
100+
provider_key=integration.provider,
101+
).capture():
102+
installation = integration.get_installation(organization_id=organization_id)
103+
client = installation.get_client()
104+
105+
try:
106+
github_repos = client.get_repos()
107+
except ApiError as e:
108+
if installation.is_rate_limited_error(e):
109+
logger.info(
110+
"sync_repos_for_org.rate_limited",
111+
extra={
112+
"integration_id": integration.id,
113+
"organization_id": organization_id,
114+
},
115+
)
116+
raise
117+
118+
github_external_ids = {str(repo["id"]) for repo in github_repos}
119+
120+
all_repos = repository_service.get_repositories(
121+
organization_id=organization_id,
122+
integration_id=integration.id,
123+
providers=[provider],
124+
)
125+
active_repos = [r for r in all_repos if r.status == ObjectStatus.ACTIVE and r.external_id]
126+
disabled_repos = [
127+
r for r in all_repos if r.status == ObjectStatus.DISABLED and r.external_id
128+
]
129+
130+
sentry_active_ids = {r.external_id for r in active_repos}
131+
sentry_disabled_ids = {r.external_id for r in disabled_repos}
132+
133+
new_ids = github_external_ids - sentry_active_ids - sentry_disabled_ids
134+
removed_ids = sentry_active_ids - github_external_ids
135+
restored_ids = sentry_disabled_ids & github_external_ids
136+
137+
metric_tags = {
138+
"provider": integration.provider,
139+
"dry_run": str(dry_run),
140+
}
141+
metrics.distribution("scm.repo_sync.new_repos", len(new_ids), tags=metric_tags)
142+
metrics.distribution("scm.repo_sync.removed_repos", len(removed_ids), tags=metric_tags)
143+
metrics.distribution("scm.repo_sync.restored_repos", len(restored_ids), tags=metric_tags)
144+
metrics.distribution(
145+
"scm.repo_sync.provider_total", len(github_external_ids), tags=metric_tags
146+
)
147+
metrics.distribution(
148+
"scm.repo_sync.sentry_active", len(sentry_active_ids), tags=metric_tags
149+
)
150+
metrics.distribution(
151+
"scm.repo_sync.sentry_disabled", len(sentry_disabled_ids), tags=metric_tags
152+
)
153+
154+
if new_ids or removed_ids or restored_ids:
155+
logger.info(
156+
"scm.repo_sync.diff",
157+
extra={
158+
"provider": integration.provider,
159+
"integration_id": integration.id,
160+
"organization_id": organization_id,
161+
"dry_run": dry_run,
162+
"provider_total": len(github_external_ids),
163+
"sentry_active": len(sentry_active_ids),
164+
"sentry_disabled": len(sentry_disabled_ids),
165+
"new": len(new_ids),
166+
"removed": len(removed_ids),
167+
"restored": len(restored_ids),
168+
},
169+
)
170+
171+
if dry_run:
172+
return
173+
174+
if new_ids:
175+
integration_repo_provider = get_integration_repository_provider(integration)
176+
repo_configs = [
177+
get_repo_config(repo, integration.id)
178+
for repo in github_repos
179+
if str(repo["id"]) in new_ids
180+
]
181+
if repo_configs:
182+
try:
183+
integration_repo_provider.create_repositories(
184+
configs=repo_configs, organization=rpc_org
185+
)
186+
except RepoExistsError:
187+
pass
188+
189+
if removed_ids:
190+
repository_service.disable_repositories_by_external_ids(
191+
organization_id=organization_id,
192+
integration_id=integration.id,
193+
provider=provider,
194+
external_ids=list(removed_ids),
195+
)
196+
197+
if restored_ids:
198+
for repo in disabled_repos:
199+
if repo.external_id in restored_ids:
200+
repo.status = ObjectStatus.ACTIVE
201+
repository_service.update_repository(
202+
organization_id=organization_id, update=repo
203+
)
204+
205+
206+
@instrumented_task(
207+
name="sentry.integrations.github.tasks.sync_repos.github_repo_sync_beat",
208+
namespace=integrations_control_tasks,
209+
silo_mode=SiloMode.CONTROL,
210+
)
211+
def github_repo_sync_beat() -> None:
212+
scheduler = CursoredScheduler(
213+
name="github_repo_sync",
214+
schedule_key="github-repo-sync-beat",
215+
queryset=OrganizationIntegration.objects.filter(
216+
integration__provider="github",
217+
integration__status=ObjectStatus.ACTIVE,
218+
status=ObjectStatus.ACTIVE,
219+
),
220+
task=sync_repos_for_org,
221+
cycle_duration=timedelta(hours=24),
222+
)
223+
scheduler.tick()

src/sentry/integrations/source_code_management/metrics.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class SCMIntegrationInteractionType(StrEnum):
4141

4242
# Tasks
4343
LINK_ALL_REPOS = "link_all_repos"
44+
SYNC_REPOS = "sync_repos"
4445
SYNC_REPOS_ON_INSTALL_CHANGE = "sync_repos_on_install_change"
4546

4647
# GitHub only

0 commit comments

Comments
 (0)