diff --git a/src/sentry/api/validators/project_codeowners.py b/src/sentry/api/validators/project_codeowners.py index cf90dec4248fa4..c022163de022cf 100644 --- a/src/sentry/api/validators/project_codeowners.py +++ b/src/sentry/api/validators/project_codeowners.py @@ -2,13 +2,11 @@ from collections import defaultdict from collections.abc import Collection, Mapping, Sequence -from functools import reduce -from operator import or_ from typing import Any -from django.db.models import Subquery -from django.db.models.query_utils import Q +from django.db.models.functions import Lower +from sentry.constants import ObjectStatus from sentry.integrations.models.external_actor import ExternalActor from sentry.integrations.types import ExternalProviders from sentry.issues.ownership.grammar import parse_code_owners @@ -41,22 +39,23 @@ def build_codeowners_associations( filter=dict(emails=emails, organization_id=project.organization_id) ) - # GitHub team and user names are case-insensitive - # We build a query that filters on each name we parsed case-insensitively - queries = [Q(external_name__iexact=xname) for xname in usernames + team_names] - if queries: - query = reduce(or_, queries) - external_actors = ExternalActor.objects.filter( - query, - organization_id=project.organization_id, - provider__in=[ - ExternalProviders.GITHUB.value, - ExternalProviders.GITHUB_ENTERPRISE.value, - ExternalProviders.GITLAB.value, - ], + # GitHub team and user names are case-insensitive. + # Deduplicate and lowercase names, then use a single IN query to filter. + unique_lower_names = {name.lower() for name in usernames + team_names} + if unique_lower_names: + external_actors = list( + ExternalActor.objects.annotate(external_name_lower=Lower("external_name")).filter( + external_name_lower__in=unique_lower_names, + organization_id=project.organization_id, + provider__in=[ + ExternalProviders.GITHUB.value, + ExternalProviders.GITHUB_ENTERPRISE.value, + ExternalProviders.GITLAB.value, + ], + ) ) else: - external_actors = ExternalActor.objects.none() + external_actors = [] # Convert CODEOWNERS into IssueOwner syntax users_dict = {} @@ -70,38 +69,67 @@ def build_codeowners_associations( team_ids_to_external_names: dict[int, list[str]] = defaultdict(list) user_ids_to_external_names: dict[int, list[str]] = defaultdict(list) + # Pre-build lowercase -> original name mappings + lower_to_team_names: dict[str, list[str]] = defaultdict(list) + for name in team_names: + lower_to_team_names[name.lower()].append(name) + lower_to_usernames: dict[str, list[str]] = defaultdict(list) + for name in usernames: + lower_to_usernames[name.lower()].append(name) + for xa in external_actors: + xa_lower = xa.external_name.lower() if xa.team_id is not None: - team_ids_to_external_names[xa.team_id].extend( - filter(lambda team_name: team_name.lower() == xa.external_name.lower(), team_names) - ) + team_ids_to_external_names[xa.team_id].extend(lower_to_team_names.get(xa_lower, [])) if xa.user_id is not None: - user_ids_to_external_names[xa.user_id].extend( - filter(lambda username: username.lower() == xa.external_name.lower(), usernames) - ) + user_ids_to_external_names[xa.user_id].extend(lower_to_usernames.get(xa_lower, [])) + + # Determine which matched users have project access via team membership + user_ids_with_access: set[int] = set() + user_ids = list(user_ids_to_external_names.keys()) + if user_ids and project.status == ObjectStatus.ACTIVE: + om_rows = list( + OrganizationMember.objects.filter( + user_id__in=user_ids, + organization_id=project.organization_id, + ).values_list("id", "user_id") + ) + om_id_to_user_id: dict[int, int] = {om_id: uid for om_id, uid in om_rows if uid is not None} + + omt_rows = OrganizationMemberTeam.objects.filter( + organizationmember_id__in=list(om_id_to_user_id.keys()) + ).values_list("organizationmember_id", "team_id") - for user in user_service.get_many( - filter=dict(user_ids=list(user_ids_to_external_names.keys())) - ): - organization_members_ids = OrganizationMember.objects.filter( - user_id=user.id, organization_id=project.organization_id - ).values_list("id", flat=True) - team_ids = OrganizationMemberTeam.objects.filter( - organizationmember_id__in=Subquery(organization_members_ids) - ).values_list("team_id", flat=True) - projects = Project.objects.get_for_team_ids(Subquery(team_ids)) - - if project in projects: + user_team_ids: dict[int, set[int]] = defaultdict(set) + for om_id, team_id in omt_rows: + user_team_ids[om_id_to_user_id[om_id]].add(team_id) + + all_team_ids = {tid for tids in user_team_ids.values() for tid in tids} + project_team_ids = set( + project.teams.filter(id__in=all_team_ids).values_list("id", flat=True) + ) + user_ids_with_access = { + uid for uid, tids in user_team_ids.items() if tids & project_team_ids + } + + for user in user_service.get_many(filter=dict(user_ids=user_ids)): + if user.id in user_ids_with_access: for external_name in user_ids_to_external_names[user.id]: users_dict[external_name] = user.email else: users_without_access.add(f"{user.get_display_name()}") users_without_access_external_names.update(user_ids_to_external_names[user.id]) - for team in Team.objects.filter(id__in=list(team_ids_to_external_names.keys())): - # make sure the sentry team has access to the project - # tied to the codeowner - if project in team.get_projects(): + # Determine which matched teams have project access + team_ids_list = list(team_ids_to_external_names.keys()) + team_ids_with_access = ( + set(project.teams.filter(id__in=team_ids_list).values_list("id", flat=True)) + if team_ids_list and project.status == ObjectStatus.ACTIVE + else set() + ) + + for team in Team.objects.filter(id__in=team_ids_list): + if team.id in team_ids_with_access: for external_name in team_ids_to_external_names[team.id]: teams_dict[external_name] = f"#{team.slug}" else: diff --git a/tests/sentry/tasks/test_code_owners.py b/tests/sentry/tasks/test_code_owners.py index cc00ea69a4799d..e0a87bb1687b0f 100644 --- a/tests/sentry/tasks/test_code_owners.py +++ b/tests/sentry/tasks/test_code_owners.py @@ -4,6 +4,7 @@ from rest_framework.exceptions import NotFound from taskbroker_client.retry import RetryTaskError +from sentry.api.validators.project_codeowners import build_codeowners_associations from sentry.integrations.models.external_actor import ExternalActor from sentry.models.commit import Commit from sentry.models.commitfilechange import CommitFileChange, post_bulk_create @@ -355,3 +356,27 @@ def test_codeowners_auto_sync_handles_not_found_error( assert code_owners.raw == original_raw # Notification should have been sent mock_send_email.assert_called_once_with() + + def test_build_associations_case_insensitive_matching(self) -> None: + self.create_external_user( + user=self.user, + external_name="@ShashankJarmale", + integration=self.integration, + ) + # CODEOWNERS uses lowercase, ExternalActor stored with mixed case + raw = "docs/* @shashankjarmale\n" + associations, _ = build_codeowners_associations(raw, self.project) + assert "@shashankjarmale" in associations + assert associations["@shashankjarmale"] == self.user.email + + def test_build_associations_duplicate_names(self) -> None: + self.create_external_user( + user=self.user, + external_name="@ShashankJarmale", + integration=self.integration, + ) + self.create_external_team(integration=self.integration) + raw = "docs/* @ShashankJarmale @getsentry/ecosystem\napi/* @ShashankJarmale\nsrc/* @ShashankJarmale\n" + associations, _ = build_codeowners_associations(raw, self.project) + assert associations["@ShashankJarmale"] == self.user.email + assert associations["@getsentry/ecosystem"] == f"#{self.team.slug}"