Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 68 additions & 40 deletions src/sentry/api/validators/project_codeowners.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@

from collections import defaultdict
from collections.abc import Collection, Mapping, Sequence
from functools import reduce
from operator import or_
from typing import Any

from django.db.models import Subquery
from django.db.models.query_utils import Q
from django.db.models.functions import Lower

from sentry.constants import ObjectStatus
from sentry.integrations.models.external_actor import ExternalActor
from sentry.integrations.types import ExternalProviders
from sentry.issues.ownership.grammar import parse_code_owners
Expand Down Expand Up @@ -41,22 +39,23 @@ def build_codeowners_associations(
filter=dict(emails=emails, organization_id=project.organization_id)
)

# GitHub team and user names are case-insensitive
# We build a query that filters on each name we parsed case-insensitively
queries = [Q(external_name__iexact=xname) for xname in usernames + team_names]
if queries:
query = reduce(or_, queries)
external_actors = ExternalActor.objects.filter(
query,
organization_id=project.organization_id,
provider__in=[
ExternalProviders.GITHUB.value,
ExternalProviders.GITHUB_ENTERPRISE.value,
ExternalProviders.GITLAB.value,
],
# GitHub team and user names are case-insensitive.
# Deduplicate and lowercase names, then use a single IN query to filter.
unique_lower_names = {name.lower() for name in usernames + team_names}
if unique_lower_names:
external_actors = list(
ExternalActor.objects.annotate(external_name_lower=Lower("external_name")).filter(
external_name_lower__in=unique_lower_names,
organization_id=project.organization_id,
provider__in=[
ExternalProviders.GITHUB.value,
ExternalProviders.GITHUB_ENTERPRISE.value,
ExternalProviders.GITLAB.value,
],
)
)
else:
external_actors = ExternalActor.objects.none()
external_actors = []

# Convert CODEOWNERS into IssueOwner syntax
users_dict = {}
Expand All @@ -70,38 +69,67 @@ def build_codeowners_associations(
team_ids_to_external_names: dict[int, list[str]] = defaultdict(list)
user_ids_to_external_names: dict[int, list[str]] = defaultdict(list)

# Pre-build lowercase -> original name mappings
lower_to_team_names: dict[str, list[str]] = defaultdict(list)
for name in team_names:
lower_to_team_names[name.lower()].append(name)
lower_to_usernames: dict[str, list[str]] = defaultdict(list)
for name in usernames:
lower_to_usernames[name.lower()].append(name)

for xa in external_actors:
xa_lower = xa.external_name.lower()
if xa.team_id is not None:
team_ids_to_external_names[xa.team_id].extend(
filter(lambda team_name: team_name.lower() == xa.external_name.lower(), team_names)
)
team_ids_to_external_names[xa.team_id].extend(lower_to_team_names.get(xa_lower, []))
if xa.user_id is not None:
user_ids_to_external_names[xa.user_id].extend(
filter(lambda username: username.lower() == xa.external_name.lower(), usernames)
)
user_ids_to_external_names[xa.user_id].extend(lower_to_usernames.get(xa_lower, []))

# Determine which matched users have project access via team membership
user_ids_with_access: set[int] = set()
user_ids = list(user_ids_to_external_names.keys())
if user_ids and project.status == ObjectStatus.ACTIVE:
om_rows = list(
OrganizationMember.objects.filter(
user_id__in=user_ids,
organization_id=project.organization_id,
).values_list("id", "user_id")
)
om_id_to_user_id: dict[int, int] = {om_id: uid for om_id, uid in om_rows if uid is not None}

omt_rows = OrganizationMemberTeam.objects.filter(
organizationmember_id__in=list(om_id_to_user_id.keys())
).values_list("organizationmember_id", "team_id")

for user in user_service.get_many(
filter=dict(user_ids=list(user_ids_to_external_names.keys()))
):
organization_members_ids = OrganizationMember.objects.filter(
user_id=user.id, organization_id=project.organization_id
).values_list("id", flat=True)
team_ids = OrganizationMemberTeam.objects.filter(
organizationmember_id__in=Subquery(organization_members_ids)
).values_list("team_id", flat=True)
projects = Project.objects.get_for_team_ids(Subquery(team_ids))

if project in projects:
user_team_ids: dict[int, set[int]] = defaultdict(set)
for om_id, team_id in omt_rows:
user_team_ids[om_id_to_user_id[om_id]].add(team_id)

all_team_ids = {tid for tids in user_team_ids.values() for tid in tids}
project_team_ids = set(
project.teams.filter(id__in=all_team_ids).values_list("id", flat=True)
)
user_ids_with_access = {
uid for uid, tids in user_team_ids.items() if tids & project_team_ids
}

for user in user_service.get_many(filter=dict(user_ids=user_ids)):
if user.id in user_ids_with_access:
for external_name in user_ids_to_external_names[user.id]:
users_dict[external_name] = user.email
else:
users_without_access.add(f"{user.get_display_name()}")
users_without_access_external_names.update(user_ids_to_external_names[user.id])

for team in Team.objects.filter(id__in=list(team_ids_to_external_names.keys())):
# make sure the sentry team has access to the project
# tied to the codeowner
if project in team.get_projects():
# Determine which matched teams have project access
team_ids_list = list(team_ids_to_external_names.keys())
team_ids_with_access = (
set(project.teams.filter(id__in=team_ids_list).values_list("id", flat=True))
if team_ids_list and project.status == ObjectStatus.ACTIVE
else set()
)

for team in Team.objects.filter(id__in=team_ids_list):
if team.id in team_ids_with_access:
for external_name in team_ids_to_external_names[team.id]:
teams_dict[external_name] = f"#{team.slug}"
else:
Expand Down
25 changes: 25 additions & 0 deletions tests/sentry/tasks/test_code_owners.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from rest_framework.exceptions import NotFound
from taskbroker_client.retry import RetryTaskError

from sentry.api.validators.project_codeowners import build_codeowners_associations
from sentry.integrations.models.external_actor import ExternalActor
from sentry.models.commit import Commit
from sentry.models.commitfilechange import CommitFileChange, post_bulk_create
Expand Down Expand Up @@ -355,3 +356,27 @@ def test_codeowners_auto_sync_handles_not_found_error(
assert code_owners.raw == original_raw
# Notification should have been sent
mock_send_email.assert_called_once_with()

def test_build_associations_case_insensitive_matching(self) -> None:
self.create_external_user(
user=self.user,
external_name="@ShashankJarmale",
integration=self.integration,
)
# CODEOWNERS uses lowercase, ExternalActor stored with mixed case
raw = "docs/* @shashankjarmale\n"
associations, _ = build_codeowners_associations(raw, self.project)
assert "@shashankjarmale" in associations
assert associations["@shashankjarmale"] == self.user.email

def test_build_associations_duplicate_names(self) -> None:
self.create_external_user(
user=self.user,
external_name="@ShashankJarmale",
integration=self.integration,
)
self.create_external_team(integration=self.integration)
raw = "docs/* @ShashankJarmale @getsentry/ecosystem\napi/* @ShashankJarmale\nsrc/* @ShashankJarmale\n"
associations, _ = build_codeowners_associations(raw, self.project)
assert associations["@ShashankJarmale"] == self.user.email
assert associations["@getsentry/ecosystem"] == f"#{self.team.slug}"
Loading