Skip to content

Commit 8e262d2

Browse files
committed
Optimize DB query construction in calculating CODEOWNERS associations
1 parent 8c56f6a commit 8e262d2

File tree

2 files changed

+87
-34
lines changed

2 files changed

+87
-34
lines changed

src/sentry/api/validators/project_codeowners.py

Lines changed: 62 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,11 @@
22

33
from collections import defaultdict
44
from collections.abc import Collection, Mapping, Sequence
5-
from functools import reduce
6-
from operator import or_
75
from typing import Any
86

9-
from django.db.models import Subquery
10-
from django.db.models.query_utils import Q
7+
from django.db.models.functions import Lower
118

9+
from sentry.constants import ObjectStatus
1210
from sentry.integrations.models.external_actor import ExternalActor
1311
from sentry.integrations.types import ExternalProviders
1412
from sentry.issues.ownership.grammar import parse_code_owners
@@ -41,13 +39,14 @@ def build_codeowners_associations(
4139
filter=dict(emails=emails, organization_id=project.organization_id)
4240
)
4341

44-
# GitHub team and user names are case-insensitive
45-
# We build a query that filters on each name we parsed case-insensitively
46-
queries = [Q(external_name__iexact=xname) for xname in usernames + team_names]
47-
if queries:
48-
query = reduce(or_, queries)
49-
external_actors = ExternalActor.objects.filter(
50-
query,
42+
# GitHub team and user names are case-insensitive.
43+
# Deduplicate and lowercase names, then use a single IN query to filter.
44+
unique_lower_names = {name.lower() for name in usernames + team_names}
45+
if unique_lower_names:
46+
external_actors = ExternalActor.objects.annotate(
47+
external_name_lower=Lower("external_name")
48+
).filter(
49+
external_name_lower__in=unique_lower_names,
5150
organization_id=project.organization_id,
5251
provider__in=[
5352
ExternalProviders.GITHUB.value,
@@ -70,38 +69,67 @@ def build_codeowners_associations(
7069
team_ids_to_external_names: dict[int, list[str]] = defaultdict(list)
7170
user_ids_to_external_names: dict[int, list[str]] = defaultdict(list)
7271

72+
# Pre-build lowercase -> original name mappings
73+
lower_to_team_names: dict[str, list[str]] = defaultdict(list)
74+
for name in team_names:
75+
lower_to_team_names[name.lower()].append(name)
76+
lower_to_usernames: dict[str, list[str]] = defaultdict(list)
77+
for name in usernames:
78+
lower_to_usernames[name.lower()].append(name)
79+
7380
for xa in external_actors:
81+
xa_lower = xa.external_name.lower()
7482
if xa.team_id is not None:
75-
team_ids_to_external_names[xa.team_id].extend(
76-
filter(lambda team_name: team_name.lower() == xa.external_name.lower(), team_names)
77-
)
83+
team_ids_to_external_names[xa.team_id].extend(lower_to_team_names.get(xa_lower, []))
7884
if xa.user_id is not None:
79-
user_ids_to_external_names[xa.user_id].extend(
80-
filter(lambda username: username.lower() == xa.external_name.lower(), usernames)
81-
)
82-
83-
for user in user_service.get_many(
84-
filter=dict(user_ids=list(user_ids_to_external_names.keys()))
85-
):
86-
organization_members_ids = OrganizationMember.objects.filter(
87-
user_id=user.id, organization_id=project.organization_id
88-
).values_list("id", flat=True)
89-
team_ids = OrganizationMemberTeam.objects.filter(
90-
organizationmember_id__in=Subquery(organization_members_ids)
91-
).values_list("team_id", flat=True)
92-
projects = Project.objects.get_for_team_ids(Subquery(team_ids))
93-
94-
if project in projects:
85+
user_ids_to_external_names[xa.user_id].extend(lower_to_usernames.get(xa_lower, []))
86+
87+
# Determine which matched users have project access via team membership
88+
user_ids_with_access: set[int] = set()
89+
user_ids = list(user_ids_to_external_names.keys())
90+
if user_ids and project.status == ObjectStatus.ACTIVE:
91+
om_rows = list(
92+
OrganizationMember.objects.filter(
93+
user_id__in=user_ids,
94+
organization_id=project.organization_id,
95+
).values_list("id", "user_id")
96+
)
97+
om_id_to_user_id = {om_id: uid for om_id, uid in om_rows}
98+
99+
omt_rows = OrganizationMemberTeam.objects.filter(
100+
organizationmember_id__in=list(om_id_to_user_id.keys())
101+
).values_list("organizationmember_id", "team_id")
102+
103+
user_team_ids: dict[int, set[int]] = defaultdict(set)
104+
for om_id, team_id in omt_rows:
105+
user_team_ids[om_id_to_user_id[om_id]].add(team_id)
106+
107+
all_team_ids = {tid for tids in user_team_ids.values() for tid in tids}
108+
project_team_ids = set(
109+
project.teams.filter(id__in=all_team_ids).values_list("id", flat=True)
110+
)
111+
user_ids_with_access = {
112+
uid for uid, tids in user_team_ids.items() if tids & project_team_ids
113+
}
114+
115+
for user in user_service.get_many(filter=dict(user_ids=user_ids)):
116+
if user.id in user_ids_with_access:
95117
for external_name in user_ids_to_external_names[user.id]:
96118
users_dict[external_name] = user.email
97119
else:
98120
users_without_access.add(f"{user.get_display_name()}")
99121
users_without_access_external_names.update(user_ids_to_external_names[user.id])
100122

101-
for team in Team.objects.filter(id__in=list(team_ids_to_external_names.keys())):
102-
# make sure the sentry team has access to the project
103-
# tied to the codeowner
104-
if project in team.get_projects():
123+
# Determine which matched teams have project access
124+
team_ids_list = list(team_ids_to_external_names.keys())
125+
team_ids_with_access = (
126+
set(project.teams.filter(id__in=team_ids_list).values_list("id", flat=True))
127+
if team_ids_list and project.status == ObjectStatus.ACTIVE
128+
else set()
129+
)
130+
131+
for team in Team.objects.filter(id__in=team_ids_list):
132+
if team.id in team_ids_with_access:
105133
for external_name in team_ids_to_external_names[team.id]:
106134
teams_dict[external_name] = f"#{team.slug}"
107135
else:

tests/sentry/tasks/test_code_owners.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from rest_framework.exceptions import NotFound
55
from taskbroker_client.retry import RetryTaskError
66

7+
from sentry.api.validators.project_codeowners import build_codeowners_associations
78
from sentry.integrations.models.external_actor import ExternalActor
89
from sentry.models.commit import Commit
910
from sentry.models.commitfilechange import CommitFileChange, post_bulk_create
@@ -355,3 +356,27 @@ def test_codeowners_auto_sync_handles_not_found_error(
355356
assert code_owners.raw == original_raw
356357
# Notification should have been sent
357358
mock_send_email.assert_called_once_with()
359+
360+
def test_build_associations_case_insensitive_matching(self) -> None:
361+
self.create_external_user(
362+
user=self.user,
363+
external_name="@ShashankJarmale",
364+
integration=self.integration,
365+
)
366+
# CODEOWNERS uses lowercase, ExternalActor stored with mixed case
367+
raw = "docs/* @shashankjarmale\n"
368+
associations, _ = build_codeowners_associations(raw, self.project)
369+
assert "@shashankjarmale" in associations
370+
assert associations["@shashankjarmale"] == self.user.email
371+
372+
def test_build_associations_duplicate_names(self) -> None:
373+
self.create_external_user(
374+
user=self.user,
375+
external_name="@ShashankJarmale",
376+
integration=self.integration,
377+
)
378+
self.create_external_team(integration=self.integration)
379+
raw = "docs/* @ShashankJarmale @getsentry/ecosystem\napi/* @ShashankJarmale\nsrc/* @ShashankJarmale\n"
380+
associations, _ = build_codeowners_associations(raw, self.project)
381+
assert associations["@ShashankJarmale"] == self.user.email
382+
assert associations["@getsentry/ecosystem"] == f"#{self.team.slug}"

0 commit comments

Comments
 (0)