From 023f3ce130f5f8ee8f5bc3347ce95f7d51f1f3cf Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 18 Mar 2026 15:02:51 +0100 Subject: [PATCH 1/4] Add GitHub Action to suggest PR reviewers based on git history Add a reviewer suggestion workflow that posts a PR comment with 1-2 suggested reviewers based on recency-weighted git history of changed files. This is additive only, CODEOWNERS and auto-assign stay unchanged. The action triggers on PR open and ready-for-review events, skips drafts and fork PRs, and updates its comment in-place on re-runs. --- .github/workflows/suggest-reviewers.yml | 28 +++ tools/suggest_reviewers.py | 236 ++++++++++++++++++++++++ 2 files changed, 264 insertions(+) create mode 100644 .github/workflows/suggest-reviewers.yml create mode 100644 tools/suggest_reviewers.py diff --git a/.github/workflows/suggest-reviewers.yml b/.github/workflows/suggest-reviewers.yml new file mode 100644 index 0000000000..01dafab55e --- /dev/null +++ b/.github/workflows/suggest-reviewers.yml @@ -0,0 +1,28 @@ +name: suggest-reviewers + +on: + pull_request: + types: [opened, ready_for_review] + +permissions: + contents: read + pull-requests: write + +jobs: + suggest-reviewers: + runs-on: ubuntu-latest + if: ${{ !github.event.pull_request.draft && !github.event.pull_request.head.repo.fork }} + + steps: + - name: Checkout repository + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + + - name: Suggest reviewers + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + PR_NUMBER: ${{ github.event.pull_request.number }} + PR_AUTHOR: ${{ github.event.pull_request.user.login }} + run: python3 tools/suggest_reviewers.py diff --git a/tools/suggest_reviewers.py b/tools/suggest_reviewers.py new file mode 100644 index 0000000000..7abc757abc --- /dev/null +++ b/tools/suggest_reviewers.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +# /// script +# requires-python = ">=3.12" +# /// + +import os +import subprocess +from datetime import datetime, timezone +from pathlib import Path + +MENTION_REVIEWERS = True + +CODEOWNERS_FALLBACK = "@andrewnester @shreyas-goenka @denik @pietern @anton-107 @simonfaltum" + +AUTHOR_ALIASES = { + "Denis Bilenko": "denik", + "Pieter Noordhuis": "pietern", + "Andrew Nester": "andrewnester", + "shreyas-goenka": "shreyas-goenka", + "Shreyas Goenka": "shreyas-goenka", + "Lennart Kats": "lennartkats-db", + "simon": "simonfaltum", + "Simon Faltum": "simonfaltum", + "Ilya Kuznetsov": "ilyakuz-db", + "Anton Nekipelov": "anton-107", + "Fabian Jakobs": "fabian-jakobs", + "Gleb Kanterov": "kanterov", + "Jeff Cheng": "jefferycheng1", + "Miles Yucht": "mgyucht", + "Ilia Babanov": "ilia-db", +} + +MARKER = "" + + +def classify_file(path: str) -> float: + p = Path(path) + if p.name.startswith("out.") or p.name == "output.txt": + return 0.0 + if path.startswith(("cmd/workspace/", "cmd/account/")): + return 0.0 + if path.startswith(("acceptance/", "integration/")): + return 0.2 + if path.endswith("_test.go"): + return 0.3 + if path.endswith(".go"): + return 1.0 + return 0.5 + + +def get_changed_files(pr_number: str) -> list[str]: + result = subprocess.run( + ["gh", "pr", "diff", "--name-only", pr_number], + capture_output=True, + encoding="utf-8", + ) + if result.returncode != 0: + return [] + return [f.strip() for f in result.stdout.splitlines() if f.strip()] + + +def git_log(path: str) -> list[tuple[str, datetime]]: + result = subprocess.run( + ["git", "log", "-50", "--no-merges", "--format=%an|%aI", "--", path], + capture_output=True, + encoding="utf-8", + ) + if result.returncode != 0: + return [] + entries = [] + for line in result.stdout.splitlines(): + line = line.strip() + if not line or "|" not in line: + continue + name, date_str = line.split("|", 1) + try: + entries.append((name, datetime.fromisoformat(date_str))) + except ValueError: + continue + return entries + + +def score_contributors( + files: list[str], pr_author: str, now: datetime +) -> tuple[dict[str, float], dict[str, dict[str, float]], int]: + scores: dict[str, float] = {} + dir_scores: dict[str, dict[str, float]] = {} + scored_count = 0 + author_login = pr_author.lower() + + for filepath in files: + weight = classify_file(filepath) + if weight == 0.0: + continue + + history = git_log(filepath) + if not history: + parent = str(Path(filepath).parent) + if parent and parent != ".": + history = git_log(parent) + if not history: + continue + + top_dir = str(Path(filepath).parent) or "." + file_contributed = False + + for name, commit_date in history: + if name.endswith("[bot]"): + continue + login = AUTHOR_ALIASES.get(name) + if not login or login.lower() == author_login: + continue + + days_ago = max(0, (now - commit_date).total_seconds() / 86400) + recency = 0.5 ** (days_ago / 150) + s = weight * recency + + scores[login] = scores.get(login, 0) + s + dir_scores.setdefault(login, {}) + dir_scores[login][top_dir] = dir_scores[login].get(top_dir, 0) + s + file_contributed = True + + if file_contributed: + scored_count += 1 + + return scores, dir_scores, scored_count + + +def top_dirs(ds: dict[str, float], n: int = 3) -> list[str]: + return [d for d, _ in sorted(ds.items(), key=lambda x: -x[1])[:n]] + + +def format_reviewer(login: str, dirs: list[str]) -> str: + mention = f"@{login}" if MENTION_REVIEWERS else login + dir_str = ", ".join(f"`{d}/`" for d in dirs) + return f"- {mention} -- recent work in {dir_str}" + + +def compute_confidence(sorted_scores: list[tuple[str, float]], scored_count: int) -> str: + if scored_count < 3 or len(sorted_scores) < 2: + return "low" + if len(sorted_scores) >= 3 and sorted_scores[0][1] > 2 * sorted_scores[2][1]: + return "high" + if len(sorted_scores) >= 3 and sorted_scores[0][1] > 1.5 * sorted_scores[2][1]: + return "medium" + return "low" + + +def build_comment( + sorted_scores: list[tuple[str, float]], + dir_scores: dict[str, dict[str, float]], + total_files: int, + scored_count: int, +) -> str: + if not sorted_scores: + return ( + f"{MARKER}\n" + "## Suggested reviewers\n\n" + "Could not determine reviewers from git history. " + f"Please pick from CODEOWNERS: {CODEOWNERS_FALLBACK}\n" + ) + + reviewers = [sorted_scores[0]] + if len(sorted_scores) >= 2 and sorted_scores[0][1] < 1.35 * sorted_scores[1][1]: + reviewers.append(sorted_scores[1]) + + confidence = compute_confidence(sorted_scores, scored_count) + + lines = [MARKER, "## Suggested reviewers", ""] + for login, _ in reviewers: + dirs = top_dirs(dir_scores.get(login, {})) + lines.append(format_reviewer(login, dirs)) + lines.append("") + lines.append(f"Confidence: {confidence}") + lines.append("") + lines.append( + f"Based on git history of {total_files} changed files " + f"({scored_count} scored). " + f"CODEOWNERS fallback: {CODEOWNERS_FALLBACK}" + ) + return "\n".join(lines) + "\n" + + +def find_existing_comment(repo: str, pr_number: str) -> str | None: + result = subprocess.run( + [ + "gh", + "api", + f"repos/{repo}/issues/{pr_number}/comments", + "--paginate", + "--jq", + f'.[] | select(.body | contains("{MARKER}")) | .id', + ], + capture_output=True, + encoding="utf-8", + ) + if result.returncode != 0: + return None + for comment_id in result.stdout.splitlines(): + comment_id = comment_id.strip() + if comment_id: + return comment_id + return None + + +def main(): + repo = os.environ["GITHUB_REPOSITORY"] + pr_number = os.environ["PR_NUMBER"] + pr_author = os.environ["PR_AUTHOR"] + + files = get_changed_files(pr_number) + if not files: + print("No changed files found.") + return + + now = datetime.now(timezone.utc) + scores, dir_scores, scored_count = score_contributors(files, pr_author, now) + sorted_scores = sorted(scores.items(), key=lambda x: -x[1]) + comment = build_comment(sorted_scores, dir_scores, len(files), scored_count) + + print(comment) + existing_id = find_existing_comment(repo, pr_number) + if existing_id: + subprocess.run( + ["gh", "api", f"repos/{repo}/issues/comments/{existing_id}", "-X", "PATCH", "-f", f"body={comment}"], + check=True, + ) + else: + subprocess.run( + ["gh", "pr", "comment", pr_number, "--body", comment], + check=True, + ) + + +if __name__ == "__main__": + main() From 01908e097964352c95c3dbac9e46c8cceedd3617 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 18 Mar 2026 15:53:01 +0100 Subject: [PATCH 2/4] Replace hardcoded alias table with GitHub API author resolution Use gh api to resolve commit SHAs to GitHub logins automatically, removing the need to maintain a manual author alias map. Also fix error handling for gh subprocess failures, replace hardcoded CODEOWNERS fallback with a link, add --since to bound git log, and add synchronize trigger so suggestions update on new pushes. --- .github/workflows/suggest-reviewers.yml | 2 +- tools/suggest_reviewers.py | 76 +++++++++++++++---------- 2 files changed, 46 insertions(+), 32 deletions(-) diff --git a/.github/workflows/suggest-reviewers.yml b/.github/workflows/suggest-reviewers.yml index 01dafab55e..47a8cc6cca 100644 --- a/.github/workflows/suggest-reviewers.yml +++ b/.github/workflows/suggest-reviewers.yml @@ -2,7 +2,7 @@ name: suggest-reviewers on: pull_request: - types: [opened, ready_for_review] + types: [opened, synchronize, ready_for_review] permissions: contents: read diff --git a/tools/suggest_reviewers.py b/tools/suggest_reviewers.py index 7abc757abc..f3273fa0d9 100644 --- a/tools/suggest_reviewers.py +++ b/tools/suggest_reviewers.py @@ -5,33 +5,18 @@ import os import subprocess +import sys from datetime import datetime, timezone from pathlib import Path MENTION_REVIEWERS = True -CODEOWNERS_FALLBACK = "@andrewnester @shreyas-goenka @denik @pietern @anton-107 @simonfaltum" - -AUTHOR_ALIASES = { - "Denis Bilenko": "denik", - "Pieter Noordhuis": "pietern", - "Andrew Nester": "andrewnester", - "shreyas-goenka": "shreyas-goenka", - "Shreyas Goenka": "shreyas-goenka", - "Lennart Kats": "lennartkats-db", - "simon": "simonfaltum", - "Simon Faltum": "simonfaltum", - "Ilya Kuznetsov": "ilyakuz-db", - "Anton Nekipelov": "anton-107", - "Fabian Jakobs": "fabian-jakobs", - "Gleb Kanterov": "kanterov", - "Jeff Cheng": "jefferycheng1", - "Miles Yucht": "mgyucht", - "Ilia Babanov": "ilia-db", -} +CODEOWNERS_LINK = "[CODEOWNERS](.github/CODEOWNERS)" MARKER = "" +_login_cache: dict[str, str | None] = {} + def classify_file(path: str) -> float: p = Path(path) @@ -55,13 +40,23 @@ def get_changed_files(pr_number: str) -> list[str]: encoding="utf-8", ) if result.returncode != 0: - return [] + print(f"gh pr diff failed: {result.stderr.strip()}", file=sys.stderr) + sys.exit(1) return [f.strip() for f in result.stdout.splitlines() if f.strip()] -def git_log(path: str) -> list[tuple[str, datetime]]: +def git_log(path: str) -> list[tuple[str, str, datetime]]: result = subprocess.run( - ["git", "log", "-50", "--no-merges", "--format=%an|%aI", "--", path], + [ + "git", + "log", + "-50", + "--no-merges", + "--since=12 months ago", + "--format=%H|%an|%aI", + "--", + path, + ], capture_output=True, encoding="utf-8", ) @@ -72,16 +67,34 @@ def git_log(path: str) -> list[tuple[str, datetime]]: line = line.strip() if not line or "|" not in line: continue - name, date_str = line.split("|", 1) + parts = line.split("|", 2) + if len(parts) != 3: + continue + sha, name, date_str = parts try: - entries.append((name, datetime.fromisoformat(date_str))) + entries.append((sha, name, datetime.fromisoformat(date_str))) except ValueError: continue return entries +def resolve_login(repo: str, sha: str, author_name: str) -> str | None: + if author_name in _login_cache: + return _login_cache[author_name] + result = subprocess.run( + ["gh", "api", f"repos/{repo}/commits/{sha}", "--jq", ".author.login"], + capture_output=True, + encoding="utf-8", + ) + login = result.stdout.strip() if result.returncode == 0 else None + if not login: + login = None + _login_cache[author_name] = login + return login + + def score_contributors( - files: list[str], pr_author: str, now: datetime + files: list[str], pr_author: str, now: datetime, repo: str ) -> tuple[dict[str, float], dict[str, dict[str, float]], int]: scores: dict[str, float] = {} dir_scores: dict[str, dict[str, float]] = {} @@ -104,10 +117,10 @@ def score_contributors( top_dir = str(Path(filepath).parent) or "." file_contributed = False - for name, commit_date in history: + for sha, name, commit_date in history: if name.endswith("[bot]"): continue - login = AUTHOR_ALIASES.get(name) + login = resolve_login(repo, sha, name) if not login or login.lower() == author_login: continue @@ -157,7 +170,7 @@ def build_comment( f"{MARKER}\n" "## Suggested reviewers\n\n" "Could not determine reviewers from git history. " - f"Please pick from CODEOWNERS: {CODEOWNERS_FALLBACK}\n" + f"Please pick from {CODEOWNERS_LINK}.\n" ) reviewers = [sorted_scores[0]] @@ -176,7 +189,7 @@ def build_comment( lines.append( f"Based on git history of {total_files} changed files " f"({scored_count} scored). " - f"CODEOWNERS fallback: {CODEOWNERS_FALLBACK}" + f"See {CODEOWNERS_LINK} for path-specific owners." ) return "\n".join(lines) + "\n" @@ -195,7 +208,8 @@ def find_existing_comment(repo: str, pr_number: str) -> str | None: encoding="utf-8", ) if result.returncode != 0: - return None + print(f"gh api comments failed: {result.stderr.strip()}", file=sys.stderr) + sys.exit(1) for comment_id in result.stdout.splitlines(): comment_id = comment_id.strip() if comment_id: @@ -214,7 +228,7 @@ def main(): return now = datetime.now(timezone.utc) - scores, dir_scores, scored_count = score_contributors(files, pr_author, now) + scores, dir_scores, scored_count = score_contributors(files, pr_author, now, repo) sorted_scores = sorted(scores.items(), key=lambda x: -x[1]) comment = build_comment(sorted_scores, dir_scores, len(files), scored_count) From f21bfc783fa3fa0210c392b24a28e13d1f046a97 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 18 Mar 2026 16:03:04 +0100 Subject: [PATCH 3/4] Fix runner, add eligible reviewers section, expand to 1-3 suggestions Switch to deco runner group (IP allow list). Add CODEOWNERS parsing to show eligible reviewers alongside the git-history-based suggestions. Expand from 1-2 to 1-3 suggested reviewers with 1.5x threshold. --- .github/workflows/suggest-reviewers.yml | 4 +- tools/suggest_reviewers.py | 211 +++++++++++++++--------- 2 files changed, 133 insertions(+), 82 deletions(-) diff --git a/.github/workflows/suggest-reviewers.yml b/.github/workflows/suggest-reviewers.yml index 47a8cc6cca..3f60e77420 100644 --- a/.github/workflows/suggest-reviewers.yml +++ b/.github/workflows/suggest-reviewers.yml @@ -10,7 +10,9 @@ permissions: jobs: suggest-reviewers: - runs-on: ubuntu-latest + runs-on: + group: databricks-deco-testing-runner-group + labels: ubuntu-latest-deco if: ${{ !github.event.pull_request.draft && !github.event.pull_request.head.repo.fork }} steps: diff --git a/tools/suggest_reviewers.py b/tools/suggest_reviewers.py index f3273fa0d9..2d1d91501b 100644 --- a/tools/suggest_reviewers.py +++ b/tools/suggest_reviewers.py @@ -3,6 +3,7 @@ # requires-python = ">=3.12" # /// +import fnmatch import os import subprocess import sys @@ -10,11 +11,8 @@ from pathlib import Path MENTION_REVIEWERS = True - CODEOWNERS_LINK = "[CODEOWNERS](.github/CODEOWNERS)" - MARKER = "" - _login_cache: dict[str, str | None] = {} @@ -28,51 +26,36 @@ def classify_file(path: str) -> float: return 0.2 if path.endswith("_test.go"): return 0.3 - if path.endswith(".go"): - return 1.0 - return 0.5 + return 1.0 if path.endswith(".go") else 0.5 def get_changed_files(pr_number: str) -> list[str]: - result = subprocess.run( + r = subprocess.run( ["gh", "pr", "diff", "--name-only", pr_number], capture_output=True, encoding="utf-8", ) - if result.returncode != 0: - print(f"gh pr diff failed: {result.stderr.strip()}", file=sys.stderr) + if r.returncode != 0: + print(f"gh pr diff failed: {r.stderr.strip()}", file=sys.stderr) sys.exit(1) - return [f.strip() for f in result.stdout.splitlines() if f.strip()] + return [f.strip() for f in r.stdout.splitlines() if f.strip()] def git_log(path: str) -> list[tuple[str, str, datetime]]: - result = subprocess.run( - [ - "git", - "log", - "-50", - "--no-merges", - "--since=12 months ago", - "--format=%H|%an|%aI", - "--", - path, - ], + r = subprocess.run( + ["git", "log", "-50", "--no-merges", "--since=12 months ago", "--format=%H|%an|%aI", "--", path], capture_output=True, encoding="utf-8", ) - if result.returncode != 0: + if r.returncode != 0: return [] entries = [] - for line in result.stdout.splitlines(): - line = line.strip() - if not line or "|" not in line: - continue - parts = line.split("|", 2) + for line in r.stdout.splitlines(): + parts = line.strip().split("|", 2) if len(parts) != 3: continue - sha, name, date_str = parts try: - entries.append((sha, name, datetime.fromisoformat(date_str))) + entries.append((parts[0], parts[1], datetime.fromisoformat(parts[2]))) except ValueError: continue return entries @@ -81,18 +64,49 @@ def git_log(path: str) -> list[tuple[str, str, datetime]]: def resolve_login(repo: str, sha: str, author_name: str) -> str | None: if author_name in _login_cache: return _login_cache[author_name] - result = subprocess.run( + r = subprocess.run( ["gh", "api", f"repos/{repo}/commits/{sha}", "--jq", ".author.login"], capture_output=True, encoding="utf-8", ) - login = result.stdout.strip() if result.returncode == 0 else None - if not login: - login = None + login = r.stdout.strip() or None if r.returncode == 0 else None _login_cache[author_name] = login return login +def _codeowners_match(pattern: str, filepath: str) -> bool: + if pattern.startswith("/"): + pattern = pattern[1:] + if pattern.endswith("/"): + return filepath.startswith(pattern) + return fnmatch.fnmatch(filepath, pattern) or filepath == pattern + return fnmatch.fnmatch(filepath, pattern) or fnmatch.fnmatch(Path(filepath).name, pattern) + + +def parse_codeowners(changed_files: list[str]) -> list[str]: + path = Path(".github/CODEOWNERS") + if not path.exists(): + return [] + rules: list[tuple[str, list[str]]] = [] + for line in path.read_text().splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + parts = line.split() + owners = [p for p in parts[1:] if p.startswith("@")] + if len(parts) >= 2 and owners: + rules.append((parts[0], owners)) + + all_owners: set[str] = set() + for filepath in changed_files: + matched = [] + for pattern, owners in rules: + if _codeowners_match(pattern, filepath): + matched = owners + all_owners.update(matched) + return sorted(all_owners) + + def score_contributors( files: list[str], pr_author: str, now: datetime, repo: str ) -> tuple[dict[str, float], dict[str, dict[str, float]], int]: @@ -105,7 +119,6 @@ def score_contributors( weight = classify_file(filepath) if weight == 0.0: continue - history = git_log(filepath) if not history: parent = str(Path(filepath).parent) @@ -116,26 +129,20 @@ def score_contributors( top_dir = str(Path(filepath).parent) or "." file_contributed = False - for sha, name, commit_date in history: if name.endswith("[bot]"): continue login = resolve_login(repo, sha, name) if not login or login.lower() == author_login: continue - days_ago = max(0, (now - commit_date).total_seconds() / 86400) - recency = 0.5 ** (days_ago / 150) - s = weight * recency - + s = weight * (0.5 ** (days_ago / 150)) scores[login] = scores.get(login, 0) + s dir_scores.setdefault(login, {}) dir_scores[login][top_dir] = dir_scores[login].get(top_dir, 0) + s file_contributed = True - if file_contributed: scored_count += 1 - return scores, dir_scores, scored_count @@ -143,59 +150,100 @@ def top_dirs(ds: dict[str, float], n: int = 3) -> list[str]: return [d for d, _ in sorted(ds.items(), key=lambda x: -x[1])[:n]] -def format_reviewer(login: str, dirs: list[str]) -> str: +def fmt_reviewer(login: str, dirs: list[str]) -> str: mention = f"@{login}" if MENTION_REVIEWERS else login - dir_str = ", ".join(f"`{d}/`" for d in dirs) - return f"- {mention} -- recent work in {dir_str}" + return f"- {mention} -- recent work in {', '.join(f'`{d}/`' for d in dirs)}" -def compute_confidence(sorted_scores: list[tuple[str, float]], scored_count: int) -> str: - if scored_count < 3 or len(sorted_scores) < 2: +def select_reviewers(ss: list[tuple[str, float]]) -> list[tuple[str, float]]: + if not ss: + return [] + out = [ss[0]] + if len(ss) >= 2 and ss[0][1] < 1.5 * ss[1][1]: + out.append(ss[1]) + if len(ss) >= 3 and ss[1][1] < 1.5 * ss[2][1]: + out.append(ss[2]) + return out + + +def compute_confidence(ss: list[tuple[str, float]], scored_count: int) -> str: + if scored_count < 3 or len(ss) < 2: return "low" - if len(sorted_scores) >= 3 and sorted_scores[0][1] > 2 * sorted_scores[2][1]: + if len(ss) >= 3 and ss[0][1] > 2 * ss[2][1]: return "high" - if len(sorted_scores) >= 3 and sorted_scores[0][1] > 1.5 * sorted_scores[2][1]: + if len(ss) >= 3 and ss[0][1] > 1.5 * ss[2][1]: return "medium" return "low" +def fmt_eligible(owners: list[str]) -> str: + if MENTION_REVIEWERS: + return ", ".join(owners) + return ", ".join(o.lstrip("@") for o in owners) + + def build_comment( sorted_scores: list[tuple[str, float]], dir_scores: dict[str, dict[str, float]], total_files: int, scored_count: int, + eligible_owners: list[str], + pr_author: str, ) -> str: - if not sorted_scores: - return ( - f"{MARKER}\n" - "## Suggested reviewers\n\n" - "Could not determine reviewers from git history. " - f"Please pick from {CODEOWNERS_LINK}.\n" - ) - - reviewers = [sorted_scores[0]] - if len(sorted_scores) >= 2 and sorted_scores[0][1] < 1.35 * sorted_scores[1][1]: - reviewers.append(sorted_scores[1]) - - confidence = compute_confidence(sorted_scores, scored_count) - - lines = [MARKER, "## Suggested reviewers", ""] - for login, _ in reviewers: - dirs = top_dirs(dir_scores.get(login, {})) - lines.append(format_reviewer(login, dirs)) - lines.append("") - lines.append(f"Confidence: {confidence}") - lines.append("") - lines.append( - f"Based on git history of {total_files} changed files " + reviewers = select_reviewers(sorted_scores) + suggested_logins = {login.lower() for login, _ in reviewers} + eligible = [ + o + for o in eligible_owners + if o.lstrip("@").lower() != pr_author.lower() and o.lstrip("@").lower() not in suggested_logins + ] + + lines = [MARKER] + if reviewers: + lines += [ + "## Suggested reviewers", + "", + "Based on git history of the changed files, these people are best suited to review:", + "", + ] + for login, _ in reviewers: + lines.append(fmt_reviewer(login, top_dirs(dir_scores.get(login, {})))) + lines += ["", f"Confidence: {compute_confidence(sorted_scores, scored_count)}"] + if eligible: + lines += [ + "", + "## Eligible reviewers", + "", + "Based on CODEOWNERS, these people or teams could also review:", + "", + fmt_eligible(eligible), + ] + elif eligible: + lines += [ + "## Eligible reviewers", + "", + "Could not determine reviewers from git history. Based on CODEOWNERS, these people or teams could review:", + "", + fmt_eligible(eligible), + ] + else: + lines += [ + "## Suggested reviewers", + "", + f"Could not determine reviewers from git history. Please pick from {CODEOWNERS_LINK}.", + ] + + lines += [ + "", + f"Suggestions based on git history of {total_files} changed files " f"({scored_count} scored). " - f"See {CODEOWNERS_LINK} for path-specific owners." - ) + f"See {CODEOWNERS_LINK} for path-specific ownership rules.", + ] return "\n".join(lines) + "\n" def find_existing_comment(repo: str, pr_number: str) -> str | None: - result = subprocess.run( + r = subprocess.run( [ "gh", "api", @@ -207,13 +255,13 @@ def find_existing_comment(repo: str, pr_number: str) -> str | None: capture_output=True, encoding="utf-8", ) - if result.returncode != 0: - print(f"gh api comments failed: {result.stderr.strip()}", file=sys.stderr) + if r.returncode != 0: + print(f"gh api comments failed: {r.stderr.strip()}", file=sys.stderr) sys.exit(1) - for comment_id in result.stdout.splitlines(): - comment_id = comment_id.strip() - if comment_id: - return comment_id + for cid in r.stdout.splitlines(): + cid = cid.strip() + if cid: + return cid return None @@ -230,7 +278,8 @@ def main(): now = datetime.now(timezone.utc) scores, dir_scores, scored_count = score_contributors(files, pr_author, now, repo) sorted_scores = sorted(scores.items(), key=lambda x: -x[1]) - comment = build_comment(sorted_scores, dir_scores, len(files), scored_count) + eligible = parse_codeowners(files) + comment = build_comment(sorted_scores, dir_scores, len(files), scored_count, eligible, pr_author) print(comment) existing_id = find_existing_comment(repo, pr_number) From 81c87b97c1c01a8be75278053f113773d585eb76 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 18 Mar 2026 17:48:10 +0100 Subject: [PATCH 4/4] Address review: use uv, remove cmd/workspace and cmd/account skip Remove the 0.0 weight override for cmd/workspace/ and cmd/account/ since those directories contain non-generated files too (denik). Use uv + setup-uv to pin Python 3.12 (pietern). --- .github/workflows/suggest-reviewers.yml | 7 ++++++- tools/suggest_reviewers.py | 2 -- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/suggest-reviewers.yml b/.github/workflows/suggest-reviewers.yml index 3f60e77420..fee541d09f 100644 --- a/.github/workflows/suggest-reviewers.yml +++ b/.github/workflows/suggest-reviewers.yml @@ -21,10 +21,15 @@ jobs: with: fetch-depth: 0 + - name: Install uv + uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0 + with: + version: "0.6.5" + - name: Suggest reviewers env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_REPOSITORY: ${{ github.repository }} PR_NUMBER: ${{ github.event.pull_request.number }} PR_AUTHOR: ${{ github.event.pull_request.user.login }} - run: python3 tools/suggest_reviewers.py + run: uv run tools/suggest_reviewers.py diff --git a/tools/suggest_reviewers.py b/tools/suggest_reviewers.py index 2d1d91501b..cf27ba8ca8 100644 --- a/tools/suggest_reviewers.py +++ b/tools/suggest_reviewers.py @@ -20,8 +20,6 @@ def classify_file(path: str) -> float: p = Path(path) if p.name.startswith("out.") or p.name == "output.txt": return 0.0 - if path.startswith(("cmd/workspace/", "cmd/account/")): - return 0.0 if path.startswith(("acceptance/", "integration/")): return 0.2 if path.endswith("_test.go"):