From 0ff5e9e02d2fd7875ec36d919d1ba3017989083c Mon Sep 17 00:00:00 2001
From: simon <simon.faltum@databricks.com>
Date: Mon, 6 Apr 2026 13:43:28 +0200
Subject: [PATCH] Fix reviewer suggestion confidence for sole experts and
 output file scoring

The confidence calculation returned "low" when a single person was the sole
contributor to all changed files, which is actually maximum confidence. Also,
output files (out.*, output.txt) were scored 0.0 and completely excluded from
reviewer analysis. They now get a small weight (0.01/total_files) so they
contribute signal without dominating scores.

Co-authored-by: Isaac
---
 tools/suggest_reviewers.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/tools/suggest_reviewers.py b/tools/suggest_reviewers.py
index cf27ba8ca8..76609e5566 100644
--- a/tools/suggest_reviewers.py
+++ b/tools/suggest_reviewers.py
@@ -16,10 +16,10 @@
 _login_cache: dict[str, str | None] = {}
 
 
-def classify_file(path: str) -> float:
+def classify_file(path: str, total_files: int) -> float:
     p = Path(path)
     if p.name.startswith("out.") or p.name == "output.txt":
-        return 0.0
+        return 0.01 / max(total_files, 1)
     if path.startswith(("acceptance/", "integration/")):
         return 0.2
     if path.endswith("_test.go"):
@@ -113,10 +113,9 @@ def score_contributors(
     scored_count = 0
     author_login = pr_author.lower()
 
+    total_files = len(files)
     for filepath in files:
-        weight = classify_file(filepath)
-        if weight == 0.0:
-            continue
+        weight = classify_file(filepath, total_files)
         history = git_log(filepath)
         if not history:
             parent = str(Path(filepath).parent)
@@ -165,11 +164,21 @@ def select_reviewers(ss: list[tuple[str, float]]) -> list[tuple[str, float]]:
 
 
 def compute_confidence(ss: list[tuple[str, float]], scored_count: int) -> str:
-    if scored_count < 3 or len(ss) < 2:
+    if not ss:
+        return "low"
+    if len(ss) == 1:
+        return "high"
+    if len(ss) == 2:
+        if ss[0][1] > 2 * ss[1][1]:
+            return "high"
+        if ss[0][1] > 1.5 * ss[1][1]:
+            return "medium"
+        return "low"
+    if scored_count < 3:
         return "low"
-    if len(ss) >= 3 and ss[0][1] > 2 * ss[2][1]:
+    if ss[0][1] > 2 * ss[2][1]:
         return "high"
-    if len(ss) >= 3 and ss[0][1] > 1.5 * ss[2][1]:
+    if ss[0][1] > 1.5 * ss[2][1]:
         return "medium"
     return "low"