Fix reviewer suggestion confidence for sole experts (#4905)

simonfaltum · web-flow · commit 6426d2623561 · 2026-04-06T16:34:47.000Z
## Why The `suggest-reviewers` GitHub Action had two issues: 1. When a single person was the sole contributor to all changed files, confidence was reported as "low" instead of "high". This happened because `compute_confidence` treated `len(ss) < 2` as insufficient data, when it actually means there's one clear expert. 2. Output files (`out.*`, `output.txt`) were scored `0.0` and completely excluded from analysis. Reviewers should still verify output changes make sense, so these files deserve a small (but non-zero) contribution to scoring. ## Changes **Before:** Sole-author PRs always got "low" confidence. Output files were invisible to the scoring algorithm. PRs with exactly 2 contributors also always got "low" confidence (the `>= 3` guards prevented any comparison). **Now:** - `len(ss) == 1` (sole contributor) returns "high" confidence - `len(ss) == 2` compares top vs second using the same 2x/1.5x thresholds - Output files get weight `0.01 / total_files` instead of `0.0`, contributing signal without dominating scores - Removed dead `if weight == 0.0` guard that could never trigger after the weight change ## Test plan - Verified against PR #4857 (4 files, 2 output, pietern sole author): would now produce "high" instead of "low" - Logic review of all `compute_confidence` branches for correctness This pull request was AI-assisted by Isaac.
diff --git a/tools/suggest_reviewers.py b/tools/suggest_reviewers.py
@@ -16,10 +16,10 @@
 _login_cache: dict[str, str | None] = {}
 
 
-def classify_file(path: str) -> float:
+def classify_file(path: str, total_files: int) -> float:
     p = Path(path)
     if p.name.startswith("out.") or p.name == "output.txt":
-        return 0.0
+        return 0.01 / max(total_files, 1)
     if path.startswith(("acceptance/", "integration/")):
         return 0.2
     if path.endswith("_test.go"):
@@ -113,10 +113,9 @@ def score_contributors(
     scored_count = 0
     author_login = pr_author.lower()
 
+    total_files = len(files)
     for filepath in files:
-        weight = classify_file(filepath)
-        if weight == 0.0:
-            continue
+        weight = classify_file(filepath, total_files)
         history = git_log(filepath)
         if not history:
             parent = str(Path(filepath).parent)
@@ -165,11 +164,21 @@ def select_reviewers(ss: list[tuple[str, float]]) -> list[tuple[str, float]]:
 
 
 def compute_confidence(ss: list[tuple[str, float]], scored_count: int) -> str:
-    if scored_count < 3 or len(ss) < 2:
+    if not ss:
+        return "low"
+    if len(ss) == 1:
+        return "high"
+    if len(ss) == 2:
+        if ss[0][1] > 2 * ss[1][1]:
+            return "high"
+        if ss[0][1] > 1.5 * ss[1][1]:
+            return "medium"
+        return "low"
+    if scored_count < 3:
         return "low"
-    if len(ss) >= 3 and ss[0][1] > 2 * ss[2][1]:
+    if ss[0][1] > 2 * ss[2][1]:
         return "high"
-    if len(ss) >= 3 and ss[0][1] > 1.5 * ss[2][1]:
+    if ss[0][1] > 1.5 * ss[2][1]:
         return "medium"
     return "low"