From 023f3ce130f5f8ee8f5bc3347ce95f7d51f1f3cf Mon Sep 17 00:00:00 2001
From: simon <simon.faltum@databricks.com>
Date: Wed, 18 Mar 2026 15:02:51 +0100
Subject: [PATCH 1/4] Add GitHub Action to suggest PR reviewers based on git
 history

Add a reviewer suggestion workflow that posts a PR comment with 1-2
suggested reviewers based on recency-weighted git history of changed
files. This is additive only, CODEOWNERS and auto-assign stay unchanged.

The action triggers on PR open and ready-for-review events, skips
drafts and fork PRs, and updates its comment in-place on re-runs.
---
 .github/workflows/suggest-reviewers.yml |  28 +++
 tools/suggest_reviewers.py              | 236 ++++++++++++++++++++++++
 2 files changed, 264 insertions(+)
 create mode 100644 .github/workflows/suggest-reviewers.yml
 create mode 100644 tools/suggest_reviewers.py

diff --git a/.github/workflows/suggest-reviewers.yml b/.github/workflows/suggest-reviewers.yml
new file mode 100644
index 0000000000..01dafab55e
--- /dev/null
+++ b/.github/workflows/suggest-reviewers.yml
@@ -0,0 +1,28 @@
+name: suggest-reviewers
+
+on:
+  pull_request:
+    types: [opened, ready_for_review]
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  suggest-reviewers:
+    runs-on: ubuntu-latest
+    if: ${{ !github.event.pull_request.draft && !github.event.pull_request.head.repo.fork }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+
+      - name: Suggest reviewers
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
+        run: python3 tools/suggest_reviewers.py
diff --git a/tools/suggest_reviewers.py b/tools/suggest_reviewers.py
new file mode 100644
index 0000000000..7abc757abc
--- /dev/null
+++ b/tools/suggest_reviewers.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.12"
+# ///
+
+import os
+import subprocess
+from datetime import datetime, timezone
+from pathlib import Path
+
+MENTION_REVIEWERS = True
+
+CODEOWNERS_FALLBACK = "@andrewnester @shreyas-goenka @denik @pietern @anton-107 @simonfaltum"
+
+AUTHOR_ALIASES = {
+    "Denis Bilenko": "denik",
+    "Pieter Noordhuis": "pietern",
+    "Andrew Nester": "andrewnester",
+    "shreyas-goenka": "shreyas-goenka",
+    "Shreyas Goenka": "shreyas-goenka",
+    "Lennart Kats": "lennartkats-db",
+    "simon": "simonfaltum",
+    "Simon Faltum": "simonfaltum",
+    "Ilya Kuznetsov": "ilyakuz-db",
+    "Anton Nekipelov": "anton-107",
+    "Fabian Jakobs": "fabian-jakobs",
+    "Gleb Kanterov": "kanterov",
+    "Jeff Cheng": "jefferycheng1",
+    "Miles Yucht": "mgyucht",
+    "Ilia Babanov": "ilia-db",
+}
+
+MARKER = "<!-- REVIEWER_SUGGESTION -->"
+
+
+def classify_file(path: str) -> float:
+    p = Path(path)
+    if p.name.startswith("out.") or p.name == "output.txt":
+        return 0.0
+    if path.startswith(("cmd/workspace/", "cmd/account/")):
+        return 0.0
+    if path.startswith(("acceptance/", "integration/")):
+        return 0.2
+    if path.endswith("_test.go"):
+        return 0.3
+    if path.endswith(".go"):
+        return 1.0
+    return 0.5
+
+
+def get_changed_files(pr_number: str) -> list[str]:
+    result = subprocess.run(
+        ["gh", "pr", "diff", "--name-only", pr_number],
+        capture_output=True,
+        encoding="utf-8",
+    )
+    if result.returncode != 0:
+        return []
+    return [f.strip() for f in result.stdout.splitlines() if f.strip()]
+
+
+def git_log(path: str) -> list[tuple[str, datetime]]:
+    result = subprocess.run(
+        ["git", "log", "-50", "--no-merges", "--format=%an|%aI", "--", path],
+        capture_output=True,
+        encoding="utf-8",
+    )
+    if result.returncode != 0:
+        return []
+    entries = []
+    for line in result.stdout.splitlines():
+        line = line.strip()
+        if not line or "|" not in line:
+            continue
+        name, date_str = line.split("|", 1)
+        try:
+            entries.append((name, datetime.fromisoformat(date_str)))
+        except ValueError:
+            continue
+    return entries
+
+
+def score_contributors(
+    files: list[str], pr_author: str, now: datetime
+) -> tuple[dict[str, float], dict[str, dict[str, float]], int]:
+    scores: dict[str, float] = {}
+    dir_scores: dict[str, dict[str, float]] = {}
+    scored_count = 0
+    author_login = pr_author.lower()
+
+    for filepath in files:
+        weight = classify_file(filepath)
+        if weight == 0.0:
+            continue
+
+        history = git_log(filepath)
+        if not history:
+            parent = str(Path(filepath).parent)
+            if parent and parent != ".":
+                history = git_log(parent)
+        if not history:
+            continue
+
+        top_dir = str(Path(filepath).parent) or "."
+        file_contributed = False
+
+        for name, commit_date in history:
+            if name.endswith("[bot]"):
+                continue
+            login = AUTHOR_ALIASES.get(name)
+            if not login or login.lower() == author_login:
+                continue
+
+            days_ago = max(0, (now - commit_date).total_seconds() / 86400)
+            recency = 0.5 ** (days_ago / 150)
+            s = weight * recency
+
+            scores[login] = scores.get(login, 0) + s
+            dir_scores.setdefault(login, {})
+            dir_scores[login][top_dir] = dir_scores[login].get(top_dir, 0) + s
+            file_contributed = True
+
+        if file_contributed:
+            scored_count += 1
+
+    return scores, dir_scores, scored_count
+
+
+def top_dirs(ds: dict[str, float], n: int = 3) -> list[str]:
+    return [d for d, _ in sorted(ds.items(), key=lambda x: -x[1])[:n]]
+
+
+def format_reviewer(login: str, dirs: list[str]) -> str:
+    mention = f"@{login}" if MENTION_REVIEWERS else login
+    dir_str = ", ".join(f"`{d}/`" for d in dirs)
+    return f"- {mention} -- recent work in {dir_str}"
+
+
+def compute_confidence(sorted_scores: list[tuple[str, float]], scored_count: int) -> str:
+    if scored_count < 3 or len(sorted_scores) < 2:
+        return "low"
+    if len(sorted_scores) >= 3 and sorted_scores[0][1] > 2 * sorted_scores[2][1]:
+        return "high"
+    if len(sorted_scores) >= 3 and sorted_scores[0][1] > 1.5 * sorted_scores[2][1]:
+        return "medium"
+    return "low"
+
+
+def build_comment(
+    sorted_scores: list[tuple[str, float]],
+    dir_scores: dict[str, dict[str, float]],
+    total_files: int,
+    scored_count: int,
+) -> str:
+    if not sorted_scores:
+        return (
+            f"{MARKER}\n"
+            "## Suggested reviewers\n\n"
+            "Could not determine reviewers from git history. "
+            f"Please pick from CODEOWNERS: {CODEOWNERS_FALLBACK}\n"
+        )
+
+    reviewers = [sorted_scores[0]]
+    if len(sorted_scores) >= 2 and sorted_scores[0][1] < 1.35 * sorted_scores[1][1]:
+        reviewers.append(sorted_scores[1])
+
+    confidence = compute_confidence(sorted_scores, scored_count)
+
+    lines = [MARKER, "## Suggested reviewers", ""]
+    for login, _ in reviewers:
+        dirs = top_dirs(dir_scores.get(login, {}))
+        lines.append(format_reviewer(login, dirs))
+    lines.append("")
+    lines.append(f"Confidence: {confidence}")
+    lines.append("")
+    lines.append(
+        f"<sub>Based on git history of {total_files} changed files "
+        f"({scored_count} scored). "
+        f"CODEOWNERS fallback: {CODEOWNERS_FALLBACK}</sub>"
+    )
+    return "\n".join(lines) + "\n"
+
+
+def find_existing_comment(repo: str, pr_number: str) -> str | None:
+    result = subprocess.run(
+        [
+            "gh",
+            "api",
+            f"repos/{repo}/issues/{pr_number}/comments",
+            "--paginate",
+            "--jq",
+            f'.[] | select(.body | contains("{MARKER}")) | .id',
+        ],
+        capture_output=True,
+        encoding="utf-8",
+    )
+    if result.returncode != 0:
+        return None
+    for comment_id in result.stdout.splitlines():
+        comment_id = comment_id.strip()
+        if comment_id:
+            return comment_id
+    return None
+
+
+def main():
+    repo = os.environ["GITHUB_REPOSITORY"]
+    pr_number = os.environ["PR_NUMBER"]
+    pr_author = os.environ["PR_AUTHOR"]
+
+    files = get_changed_files(pr_number)
+    if not files:
+        print("No changed files found.")
+        return
+
+    now = datetime.now(timezone.utc)
+    scores, dir_scores, scored_count = score_contributors(files, pr_author, now)
+    sorted_scores = sorted(scores.items(), key=lambda x: -x[1])
+    comment = build_comment(sorted_scores, dir_scores, len(files), scored_count)
+
+    print(comment)
+    existing_id = find_existing_comment(repo, pr_number)
+    if existing_id:
+        subprocess.run(
+            ["gh", "api", f"repos/{repo}/issues/comments/{existing_id}", "-X", "PATCH", "-f", f"body={comment}"],
+            check=True,
+        )
+    else:
+        subprocess.run(
+            ["gh", "pr", "comment", pr_number, "--body", comment],
+            check=True,
+        )
+
+
+if __name__ == "__main__":
+    main()

From 01908e097964352c95c3dbac9e46c8cceedd3617 Mon Sep 17 00:00:00 2001
From: simon <simon.faltum@databricks.com>
Date: Wed, 18 Mar 2026 15:53:01 +0100
Subject: [PATCH 2/4] Replace hardcoded alias table with GitHub API author
 resolution

Use gh api to resolve commit SHAs to GitHub logins automatically,
removing the need to maintain a manual author alias map. Also fix
error handling for gh subprocess failures, replace hardcoded
CODEOWNERS fallback with a link, add --since to bound git log,
and add synchronize trigger so suggestions update on new pushes.
---
 .github/workflows/suggest-reviewers.yml |  2 +-
 tools/suggest_reviewers.py              | 76 +++++++++++++++----------
 2 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/suggest-reviewers.yml b/.github/workflows/suggest-reviewers.yml
index 01dafab55e..47a8cc6cca 100644
--- a/.github/workflows/suggest-reviewers.yml
+++ b/.github/workflows/suggest-reviewers.yml
@@ -2,7 +2,7 @@ name: suggest-reviewers
 
 on:
   pull_request:
-    types: [opened, ready_for_review]
+    types: [opened, synchronize, ready_for_review]
 
 permissions:
   contents: read
diff --git a/tools/suggest_reviewers.py b/tools/suggest_reviewers.py
index 7abc757abc..f3273fa0d9 100644
--- a/tools/suggest_reviewers.py
+++ b/tools/suggest_reviewers.py
@@ -5,33 +5,18 @@
 
 import os
 import subprocess
+import sys
 from datetime import datetime, timezone
 from pathlib import Path
 
 MENTION_REVIEWERS = True
 
-CODEOWNERS_FALLBACK = "@andrewnester @shreyas-goenka @denik @pietern @anton-107 @simonfaltum"
-
-AUTHOR_ALIASES = {
-    "Denis Bilenko": "denik",
-    "Pieter Noordhuis": "pietern",
-    "Andrew Nester": "andrewnester",
-    "shreyas-goenka": "shreyas-goenka",
-    "Shreyas Goenka": "shreyas-goenka",
-    "Lennart Kats": "lennartkats-db",
-    "simon": "simonfaltum",
-    "Simon Faltum": "simonfaltum",
-    "Ilya Kuznetsov": "ilyakuz-db",
-    "Anton Nekipelov": "anton-107",
-    "Fabian Jakobs": "fabian-jakobs",
-    "Gleb Kanterov": "kanterov",
-    "Jeff Cheng": "jefferycheng1",
-    "Miles Yucht": "mgyucht",
-    "Ilia Babanov": "ilia-db",
-}
+CODEOWNERS_LINK = "[CODEOWNERS](.github/CODEOWNERS)"
 
 MARKER = "<!-- REVIEWER_SUGGESTION -->"
 
+_login_cache: dict[str, str | None] = {}
+
 
 def classify_file(path: str) -> float:
     p = Path(path)
@@ -55,13 +40,23 @@ def get_changed_files(pr_number: str) -> list[str]:
         encoding="utf-8",
     )
     if result.returncode != 0:
-        return []
+        print(f"gh pr diff failed: {result.stderr.strip()}", file=sys.stderr)
+        sys.exit(1)
     return [f.strip() for f in result.stdout.splitlines() if f.strip()]
 
 
-def git_log(path: str) -> list[tuple[str, datetime]]:
+def git_log(path: str) -> list[tuple[str, str, datetime]]:
     result = subprocess.run(
-        ["git", "log", "-50", "--no-merges", "--format=%an|%aI", "--", path],
+        [
+            "git",
+            "log",
+            "-50",
+            "--no-merges",
+            "--since=12 months ago",
+            "--format=%H|%an|%aI",
+            "--",
+            path,
+        ],
         capture_output=True,
         encoding="utf-8",
     )
@@ -72,16 +67,34 @@ def git_log(path: str) -> list[tuple[str, datetime]]:
         line = line.strip()
         if not line or "|" not in line:
             continue
-        name, date_str = line.split("|", 1)
+        parts = line.split("|", 2)
+        if len(parts) != 3:
+            continue
+        sha, name, date_str = parts
         try:
-            entries.append((name, datetime.fromisoformat(date_str)))
+            entries.append((sha, name, datetime.fromisoformat(date_str)))
         except ValueError:
             continue
     return entries
 
 
+def resolve_login(repo: str, sha: str, author_name: str) -> str | None:
+    if author_name in _login_cache:
+        return _login_cache[author_name]
+    result = subprocess.run(
+        ["gh", "api", f"repos/{repo}/commits/{sha}", "--jq", ".author.login"],
+        capture_output=True,
+        encoding="utf-8",
+    )
+    login = result.stdout.strip() if result.returncode == 0 else None
+    if not login:
+        login = None
+    _login_cache[author_name] = login
+    return login
+
+
 def score_contributors(
-    files: list[str], pr_author: str, now: datetime
+    files: list[str], pr_author: str, now: datetime, repo: str
 ) -> tuple[dict[str, float], dict[str, dict[str, float]], int]:
     scores: dict[str, float] = {}
     dir_scores: dict[str, dict[str, float]] = {}
@@ -104,10 +117,10 @@ def score_contributors(
         top_dir = str(Path(filepath).parent) or "."
         file_contributed = False
 
-        for name, commit_date in history:
+        for sha, name, commit_date in history:
             if name.endswith("[bot]"):
                 continue
-            login = AUTHOR_ALIASES.get(name)
+            login = resolve_login(repo, sha, name)
             if not login or login.lower() == author_login:
                 continue
 
@@ -157,7 +170,7 @@ def build_comment(
             f"{MARKER}\n"
             "## Suggested reviewers\n\n"
             "Could not determine reviewers from git history. "
-            f"Please pick from CODEOWNERS: {CODEOWNERS_FALLBACK}\n"
+            f"Please pick from {CODEOWNERS_LINK}.\n"
         )
 
     reviewers = [sorted_scores[0]]
@@ -176,7 +189,7 @@ def build_comment(
     lines.append(
         f"<sub>Based on git history of {total_files} changed files "
         f"({scored_count} scored). "
-        f"CODEOWNERS fallback: {CODEOWNERS_FALLBACK}</sub>"
+        f"See {CODEOWNERS_LINK} for path-specific owners.</sub>"
     )
     return "\n".join(lines) + "\n"
 
@@ -195,7 +208,8 @@ def find_existing_comment(repo: str, pr_number: str) -> str | None:
         encoding="utf-8",
     )
     if result.returncode != 0:
-        return None
+        print(f"gh api comments failed: {result.stderr.strip()}", file=sys.stderr)
+        sys.exit(1)
     for comment_id in result.stdout.splitlines():
         comment_id = comment_id.strip()
         if comment_id:
@@ -214,7 +228,7 @@ def main():
         return
 
     now = datetime.now(timezone.utc)
-    scores, dir_scores, scored_count = score_contributors(files, pr_author, now)
+    scores, dir_scores, scored_count = score_contributors(files, pr_author, now, repo)
     sorted_scores = sorted(scores.items(), key=lambda x: -x[1])
     comment = build_comment(sorted_scores, dir_scores, len(files), scored_count)
 

From f21bfc783fa3fa0210c392b24a28e13d1f046a97 Mon Sep 17 00:00:00 2001
From: simon <simon.faltum@databricks.com>
Date: Wed, 18 Mar 2026 16:03:04 +0100
Subject: [PATCH 3/4] Fix runner, add eligible reviewers section, expand to 1-3
 suggestions

Switch to deco runner group (IP allow list). Add CODEOWNERS parsing
to show eligible reviewers alongside the git-history-based suggestions.
Expand from 1-2 to 1-3 suggested reviewers with 1.5x threshold.
---
 .github/workflows/suggest-reviewers.yml |   4 +-
 tools/suggest_reviewers.py              | 211 +++++++++++++++---------
 2 files changed, 133 insertions(+), 82 deletions(-)

diff --git a/.github/workflows/suggest-reviewers.yml b/.github/workflows/suggest-reviewers.yml
index 47a8cc6cca..3f60e77420 100644
--- a/.github/workflows/suggest-reviewers.yml
+++ b/.github/workflows/suggest-reviewers.yml
@@ -10,7 +10,9 @@ permissions:
 
 jobs:
   suggest-reviewers:
-    runs-on: ubuntu-latest
+    runs-on:
+      group: databricks-deco-testing-runner-group
+      labels: ubuntu-latest-deco
     if: ${{ !github.event.pull_request.draft && !github.event.pull_request.head.repo.fork }}
 
     steps:
diff --git a/tools/suggest_reviewers.py b/tools/suggest_reviewers.py
index f3273fa0d9..2d1d91501b 100644
--- a/tools/suggest_reviewers.py
+++ b/tools/suggest_reviewers.py
@@ -3,6 +3,7 @@
 # requires-python = ">=3.12"
 # ///
 
+import fnmatch
 import os
 import subprocess
 import sys
@@ -10,11 +11,8 @@
 from pathlib import Path
 
 MENTION_REVIEWERS = True
-
 CODEOWNERS_LINK = "[CODEOWNERS](.github/CODEOWNERS)"
-
 MARKER = "<!-- REVIEWER_SUGGESTION -->"
-
 _login_cache: dict[str, str | None] = {}
 
 
@@ -28,51 +26,36 @@ def classify_file(path: str) -> float:
         return 0.2
     if path.endswith("_test.go"):
         return 0.3
-    if path.endswith(".go"):
-        return 1.0
-    return 0.5
+    return 1.0 if path.endswith(".go") else 0.5
 
 
 def get_changed_files(pr_number: str) -> list[str]:
-    result = subprocess.run(
+    r = subprocess.run(
         ["gh", "pr", "diff", "--name-only", pr_number],
         capture_output=True,
         encoding="utf-8",
     )
-    if result.returncode != 0:
-        print(f"gh pr diff failed: {result.stderr.strip()}", file=sys.stderr)
+    if r.returncode != 0:
+        print(f"gh pr diff failed: {r.stderr.strip()}", file=sys.stderr)
         sys.exit(1)
-    return [f.strip() for f in result.stdout.splitlines() if f.strip()]
+    return [f.strip() for f in r.stdout.splitlines() if f.strip()]
 
 
 def git_log(path: str) -> list[tuple[str, str, datetime]]:
-    result = subprocess.run(
-        [
-            "git",
-            "log",
-            "-50",
-            "--no-merges",
-            "--since=12 months ago",
-            "--format=%H|%an|%aI",
-            "--",
-            path,
-        ],
+    r = subprocess.run(
+        ["git", "log", "-50", "--no-merges", "--since=12 months ago", "--format=%H|%an|%aI", "--", path],
         capture_output=True,
         encoding="utf-8",
     )
-    if result.returncode != 0:
+    if r.returncode != 0:
         return []
     entries = []
-    for line in result.stdout.splitlines():
-        line = line.strip()
-        if not line or "|" not in line:
-            continue
-        parts = line.split("|", 2)
+    for line in r.stdout.splitlines():
+        parts = line.strip().split("|", 2)
         if len(parts) != 3:
             continue
-        sha, name, date_str = parts
         try:
-            entries.append((sha, name, datetime.fromisoformat(date_str)))
+            entries.append((parts[0], parts[1], datetime.fromisoformat(parts[2])))
         except ValueError:
             continue
     return entries
@@ -81,18 +64,49 @@ def git_log(path: str) -> list[tuple[str, str, datetime]]:
 def resolve_login(repo: str, sha: str, author_name: str) -> str | None:
     if author_name in _login_cache:
         return _login_cache[author_name]
-    result = subprocess.run(
+    r = subprocess.run(
         ["gh", "api", f"repos/{repo}/commits/{sha}", "--jq", ".author.login"],
         capture_output=True,
         encoding="utf-8",
     )
-    login = result.stdout.strip() if result.returncode == 0 else None
-    if not login:
-        login = None
+    login = r.stdout.strip() or None if r.returncode == 0 else None
     _login_cache[author_name] = login
     return login
 
 
+def _codeowners_match(pattern: str, filepath: str) -> bool:
+    if pattern.startswith("/"):
+        pattern = pattern[1:]
+        if pattern.endswith("/"):
+            return filepath.startswith(pattern)
+        return fnmatch.fnmatch(filepath, pattern) or filepath == pattern
+    return fnmatch.fnmatch(filepath, pattern) or fnmatch.fnmatch(Path(filepath).name, pattern)
+
+
+def parse_codeowners(changed_files: list[str]) -> list[str]:
+    path = Path(".github/CODEOWNERS")
+    if not path.exists():
+        return []
+    rules: list[tuple[str, list[str]]] = []
+    for line in path.read_text().splitlines():
+        line = line.strip()
+        if not line or line.startswith("#"):
+            continue
+        parts = line.split()
+        owners = [p for p in parts[1:] if p.startswith("@")]
+        if len(parts) >= 2 and owners:
+            rules.append((parts[0], owners))
+
+    all_owners: set[str] = set()
+    for filepath in changed_files:
+        matched = []
+        for pattern, owners in rules:
+            if _codeowners_match(pattern, filepath):
+                matched = owners
+        all_owners.update(matched)
+    return sorted(all_owners)
+
+
 def score_contributors(
     files: list[str], pr_author: str, now: datetime, repo: str
 ) -> tuple[dict[str, float], dict[str, dict[str, float]], int]:
@@ -105,7 +119,6 @@ def score_contributors(
         weight = classify_file(filepath)
         if weight == 0.0:
             continue
-
         history = git_log(filepath)
         if not history:
             parent = str(Path(filepath).parent)
@@ -116,26 +129,20 @@ def score_contributors(
 
         top_dir = str(Path(filepath).parent) or "."
         file_contributed = False
-
         for sha, name, commit_date in history:
             if name.endswith("[bot]"):
                 continue
             login = resolve_login(repo, sha, name)
             if not login or login.lower() == author_login:
                 continue
-
             days_ago = max(0, (now - commit_date).total_seconds() / 86400)
-            recency = 0.5 ** (days_ago / 150)
-            s = weight * recency
-
+            s = weight * (0.5 ** (days_ago / 150))
             scores[login] = scores.get(login, 0) + s
             dir_scores.setdefault(login, {})
             dir_scores[login][top_dir] = dir_scores[login].get(top_dir, 0) + s
             file_contributed = True
-
         if file_contributed:
             scored_count += 1
-
     return scores, dir_scores, scored_count
 
 
@@ -143,59 +150,100 @@ def top_dirs(ds: dict[str, float], n: int = 3) -> list[str]:
     return [d for d, _ in sorted(ds.items(), key=lambda x: -x[1])[:n]]
 
 
-def format_reviewer(login: str, dirs: list[str]) -> str:
+def fmt_reviewer(login: str, dirs: list[str]) -> str:
     mention = f"@{login}" if MENTION_REVIEWERS else login
-    dir_str = ", ".join(f"`{d}/`" for d in dirs)
-    return f"- {mention} -- recent work in {dir_str}"
+    return f"- {mention} -- recent work in {', '.join(f'`{d}/`' for d in dirs)}"
 
 
-def compute_confidence(sorted_scores: list[tuple[str, float]], scored_count: int) -> str:
-    if scored_count < 3 or len(sorted_scores) < 2:
+def select_reviewers(ss: list[tuple[str, float]]) -> list[tuple[str, float]]:
+    if not ss:
+        return []
+    out = [ss[0]]
+    if len(ss) >= 2 and ss[0][1] < 1.5 * ss[1][1]:
+        out.append(ss[1])
+        if len(ss) >= 3 and ss[1][1] < 1.5 * ss[2][1]:
+            out.append(ss[2])
+    return out
+
+
+def compute_confidence(ss: list[tuple[str, float]], scored_count: int) -> str:
+    if scored_count < 3 or len(ss) < 2:
         return "low"
-    if len(sorted_scores) >= 3 and sorted_scores[0][1] > 2 * sorted_scores[2][1]:
+    if len(ss) >= 3 and ss[0][1] > 2 * ss[2][1]:
         return "high"
-    if len(sorted_scores) >= 3 and sorted_scores[0][1] > 1.5 * sorted_scores[2][1]:
+    if len(ss) >= 3 and ss[0][1] > 1.5 * ss[2][1]:
         return "medium"
     return "low"
 
 
+def fmt_eligible(owners: list[str]) -> str:
+    if MENTION_REVIEWERS:
+        return ", ".join(owners)
+    return ", ".join(o.lstrip("@") for o in owners)
+
+
 def build_comment(
     sorted_scores: list[tuple[str, float]],
     dir_scores: dict[str, dict[str, float]],
     total_files: int,
     scored_count: int,
+    eligible_owners: list[str],
+    pr_author: str,
 ) -> str:
-    if not sorted_scores:
-        return (
-            f"{MARKER}\n"
-            "## Suggested reviewers\n\n"
-            "Could not determine reviewers from git history. "
-            f"Please pick from {CODEOWNERS_LINK}.\n"
-        )
-
-    reviewers = [sorted_scores[0]]
-    if len(sorted_scores) >= 2 and sorted_scores[0][1] < 1.35 * sorted_scores[1][1]:
-        reviewers.append(sorted_scores[1])
-
-    confidence = compute_confidence(sorted_scores, scored_count)
-
-    lines = [MARKER, "## Suggested reviewers", ""]
-    for login, _ in reviewers:
-        dirs = top_dirs(dir_scores.get(login, {}))
-        lines.append(format_reviewer(login, dirs))
-    lines.append("")
-    lines.append(f"Confidence: {confidence}")
-    lines.append("")
-    lines.append(
-        f"<sub>Based on git history of {total_files} changed files "
+    reviewers = select_reviewers(sorted_scores)
+    suggested_logins = {login.lower() for login, _ in reviewers}
+    eligible = [
+        o
+        for o in eligible_owners
+        if o.lstrip("@").lower() != pr_author.lower() and o.lstrip("@").lower() not in suggested_logins
+    ]
+
+    lines = [MARKER]
+    if reviewers:
+        lines += [
+            "## Suggested reviewers",
+            "",
+            "Based on git history of the changed files, these people are best suited to review:",
+            "",
+        ]
+        for login, _ in reviewers:
+            lines.append(fmt_reviewer(login, top_dirs(dir_scores.get(login, {}))))
+        lines += ["", f"Confidence: {compute_confidence(sorted_scores, scored_count)}"]
+        if eligible:
+            lines += [
+                "",
+                "## Eligible reviewers",
+                "",
+                "Based on CODEOWNERS, these people or teams could also review:",
+                "",
+                fmt_eligible(eligible),
+            ]
+    elif eligible:
+        lines += [
+            "## Eligible reviewers",
+            "",
+            "Could not determine reviewers from git history. Based on CODEOWNERS, these people or teams could review:",
+            "",
+            fmt_eligible(eligible),
+        ]
+    else:
+        lines += [
+            "## Suggested reviewers",
+            "",
+            f"Could not determine reviewers from git history. Please pick from {CODEOWNERS_LINK}.",
+        ]
+
+    lines += [
+        "",
+        f"<sub>Suggestions based on git history of {total_files} changed files "
         f"({scored_count} scored). "
-        f"See {CODEOWNERS_LINK} for path-specific owners.</sub>"
-    )
+        f"See {CODEOWNERS_LINK} for path-specific ownership rules.</sub>",
+    ]
     return "\n".join(lines) + "\n"
 
 
 def find_existing_comment(repo: str, pr_number: str) -> str | None:
-    result = subprocess.run(
+    r = subprocess.run(
         [
             "gh",
             "api",
@@ -207,13 +255,13 @@ def find_existing_comment(repo: str, pr_number: str) -> str | None:
         capture_output=True,
         encoding="utf-8",
     )
-    if result.returncode != 0:
-        print(f"gh api comments failed: {result.stderr.strip()}", file=sys.stderr)
+    if r.returncode != 0:
+        print(f"gh api comments failed: {r.stderr.strip()}", file=sys.stderr)
         sys.exit(1)
-    for comment_id in result.stdout.splitlines():
-        comment_id = comment_id.strip()
-        if comment_id:
-            return comment_id
+    for cid in r.stdout.splitlines():
+        cid = cid.strip()
+        if cid:
+            return cid
     return None
 
 
@@ -230,7 +278,8 @@ def main():
     now = datetime.now(timezone.utc)
     scores, dir_scores, scored_count = score_contributors(files, pr_author, now, repo)
     sorted_scores = sorted(scores.items(), key=lambda x: -x[1])
-    comment = build_comment(sorted_scores, dir_scores, len(files), scored_count)
+    eligible = parse_codeowners(files)
+    comment = build_comment(sorted_scores, dir_scores, len(files), scored_count, eligible, pr_author)
 
     print(comment)
     existing_id = find_existing_comment(repo, pr_number)

From 81c87b97c1c01a8be75278053f113773d585eb76 Mon Sep 17 00:00:00 2001
From: simon <simon.faltum@databricks.com>
Date: Wed, 18 Mar 2026 17:48:10 +0100
Subject: [PATCH 4/4] Address review: use uv, remove cmd/workspace and
 cmd/account skip

Remove the 0.0 weight override for cmd/workspace/ and cmd/account/
since those directories contain non-generated files too (denik).
Use uv + setup-uv to pin Python 3.12 (pietern).
---
 .github/workflows/suggest-reviewers.yml | 7 ++++++-
 tools/suggest_reviewers.py              | 2 --
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/suggest-reviewers.yml b/.github/workflows/suggest-reviewers.yml
index 3f60e77420..fee541d09f 100644
--- a/.github/workflows/suggest-reviewers.yml
+++ b/.github/workflows/suggest-reviewers.yml
@@ -21,10 +21,15 @@ jobs:
         with:
           fetch-depth: 0
 
+      - name: Install uv
+        uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0
+        with:
+          version: "0.6.5"
+
       - name: Suggest reviewers
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           GITHUB_REPOSITORY: ${{ github.repository }}
           PR_NUMBER: ${{ github.event.pull_request.number }}
           PR_AUTHOR: ${{ github.event.pull_request.user.login }}
-        run: python3 tools/suggest_reviewers.py
+        run: uv run tools/suggest_reviewers.py
diff --git a/tools/suggest_reviewers.py b/tools/suggest_reviewers.py
index 2d1d91501b..cf27ba8ca8 100644
--- a/tools/suggest_reviewers.py
+++ b/tools/suggest_reviewers.py
@@ -20,8 +20,6 @@ def classify_file(path: str) -> float:
     p = Path(path)
     if p.name.startswith("out.") or p.name == "output.txt":
         return 0.0
-    if path.startswith(("cmd/workspace/", "cmd/account/")):
-        return 0.0
     if path.startswith(("acceptance/", "integration/")):
         return 0.2
     if path.endswith("_test.go"):