From b0dc3240856534207401e5a8078bea50a5a81c6f Mon Sep 17 00:00:00 2001
From: Alexey Zimarev <alex@zimarev.com>
Date: Mon, 13 Apr 2026 15:37:48 +0200
Subject: [PATCH 1/2] [DEV-1438] wire CLI to session-scoped judge-facts
 endpoints
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Matches the server-side pivot in kurrent-io/Kurrent.Capacitor#476: judge
facts are now scoped to the session's **project (repo)** and shared among
all team members working on it, rather than siloed per-user.

Client-side changes:

- Endpoint URLs:
  GET /api/judge-facts?category=X                        -> GET /api/sessions/{id}/judge-facts?category=X
  POST /api/judge-facts                                  -> POST /api/sessions/{id}/judge-facts
- JudgeFactPayload drops source_session_id (URL-bound now).
- Prompt template rewritten to frame retained facts as *project-level*
  observations, with updated examples and an explicit anti-example
  ("Alice tends to force-push" — individual, not codebase-level).
- FetchAllJudgeFactsAsync takes sessionId so per-category GETs hit the
  correct session-scoped URL.

Sessions without a detected repo get empty fact lists from the server,
so judges silently see "(no patterns retained yet)" — no special-casing
needed on the CLI side.

Existing EvalCommand tests continue to pass without changes (ParseVerdict,
ExtractRetainFact, Aggregate, FormatKnownPatterns, BuildQuestionPrompt are
all server-independent). Full suite 205/205, AOT publish clean.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/kapacitor/Commands/EvalCommand.cs         | 21 ++++++++++---------
 src/kapacitor/Models.cs                       | 14 ++++++-------
 .../Resources/prompt-eval-question.txt        | 15 ++++++-------
 3 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/src/kapacitor/Commands/EvalCommand.cs b/src/kapacitor/Commands/EvalCommand.cs
index 0662f5e..a23d06b 100644
--- a/src/kapacitor/Commands/EvalCommand.cs
+++ b/src/kapacitor/Commands/EvalCommand.cs
@@ -91,10 +91,12 @@ public static async Task<int> HandleEval(string baseUrl, string sessionId, strin
         }
 
         // 2. Fetch retained judge facts per category so we can inject them
-        //    into each judge's prompt as "known patterns" — DEV-1434.
-        //    Failures don't abort the run; the judges just won't see prior
-        //    patterns this time.
-        var knownFactsByCategory = await FetchAllJudgeFactsAsync(httpClient, baseUrl);
+        //    into each judge's prompt as "known patterns" — DEV-1434 /
+        //    DEV-1438. Facts are scoped to the session's repo server-side,
+        //    so sessions without a detected repository return empty lists
+        //    and the judges simply see no prior patterns. Failures don't
+        //    abort the run.
+        var knownFactsByCategory = await FetchAllJudgeFactsAsync(httpClient, baseUrl, sessionId);
 
         // 3. Run each question in sequence. Failures on individual questions
         //    are logged but don't abort the whole run — a partial result set
@@ -139,7 +141,7 @@ public static async Task<int> HandleEval(string baseUrl, string sessionId, strin
 
             // If the judge emitted a retain_fact, persist it for future evals.
             if (ExtractRetainFact(result.Result) is { } retainedFact) {
-                await PostJudgeFactAsync(httpClient, baseUrl, q.Category, retainedFact, context.SessionId, evalRunId);
+                await PostJudgeFactAsync(httpClient, baseUrl, sessionId, q.Category, retainedFact, evalRunId);
             }
         }
 
@@ -244,12 +246,12 @@ internal static string FormatKnownPatterns(List<JudgeFact> facts) {
         }
     }
 
-    static async Task<Dictionary<string, List<JudgeFact>>> FetchAllJudgeFactsAsync(HttpClient httpClient, string baseUrl) {
+    static async Task<Dictionary<string, List<JudgeFact>>> FetchAllJudgeFactsAsync(HttpClient httpClient, string baseUrl, string sessionId) {
         var result = new Dictionary<string, List<JudgeFact>>();
 
         foreach (var category in Categories) {
             try {
-                using var resp = await httpClient.GetWithRetryAsync($"{baseUrl}/api/judge-facts?category={category}");
+                using var resp = await httpClient.GetWithRetryAsync($"{baseUrl}/api/sessions/{sessionId}/judge-facts?category={category}");
                 if (!resp.IsSuccessStatusCode) {
                     Log($"Failed to fetch judge facts for {category}: HTTP {(int)resp.StatusCode}");
 
@@ -268,11 +270,10 @@ static async Task<Dictionary<string, List<JudgeFact>>> FetchAllJudgeFactsAsync(H
         return result;
     }
 
-    static async Task PostJudgeFactAsync(HttpClient httpClient, string baseUrl, string category, string fact, string sessionId, string evalRunId) {
+    static async Task PostJudgeFactAsync(HttpClient httpClient, string baseUrl, string sessionId, string category, string fact, string evalRunId) {
         var payload = new JudgeFactPayload {
             Category        = category,
             Fact            = fact,
-            SourceSessionId = sessionId,
             SourceEvalRunId = evalRunId
         };
 
@@ -280,7 +281,7 @@ static async Task PostJudgeFactAsync(HttpClient httpClient, string baseUrl, stri
         using var content = new StringContent(payloadJson, Encoding.UTF8, "application/json");
 
         try {
-            using var resp = await httpClient.PostWithRetryAsync($"{baseUrl}/api/judge-facts", content);
+            using var resp = await httpClient.PostWithRetryAsync($"{baseUrl}/api/sessions/{sessionId}/judge-facts", content);
             Log(
                 resp.IsSuccessStatusCode
                     ? $"  retained fact for category {category}"
diff --git a/src/kapacitor/Models.cs b/src/kapacitor/Models.cs
index ca7aaef..1485e8f 100644
--- a/src/kapacitor/Models.cs
+++ b/src/kapacitor/Models.cs
@@ -258,11 +258,12 @@ record EvalCategoryResult {
     public List<EvalQuestionVerdict> Questions { get; init; } = [];
 }
 
-// Cross-eval memory — DEV-1434. Judges may optionally emit a retain_fact
-// when they spot a cross-cutting pattern; the CLI POSTs it to the server's
-// judge-facts endpoint which appends to a per-category stream. Facts from
-// past evaluations are fetched at eval startup and injected into each
-// judge's prompt as "known patterns".
+// Cross-eval memory — DEV-1434 / DEV-1438. Judges may optionally emit a
+// retain_fact when they spot a cross-cutting pattern; the CLI POSTs it to
+// the session-scoped endpoint and the server derives repo scope from the
+// session (facts live on JudgeFacts-repo-{repoHash}-{category} streams).
+// Facts accumulated on the same repo by any team member are fetched at
+// eval startup and injected into each judge's prompt as "known patterns".
 record JudgeFactPayload {
     [JsonPropertyName("category")]
     public required string Category { get; init; }
@@ -270,9 +271,6 @@ record JudgeFactPayload {
     [JsonPropertyName("fact")]
     public required string Fact { get; init; }
 
-    [JsonPropertyName("source_session_id")]
-    public required string SourceSessionId { get; init; }
-
     [JsonPropertyName("source_eval_run_id")]
     public required string SourceEvalRunId { get; init; }
 }
diff --git a/src/kapacitor/Resources/prompt-eval-question.txt b/src/kapacitor/Resources/prompt-eval-question.txt
index be95539..2219693 100644
--- a/src/kapacitor/Resources/prompt-eval-question.txt
+++ b/src/kapacitor/Resources/prompt-eval-question.txt
@@ -24,9 +24,9 @@ Subagent activity (if any) carries `agent_id` / `agent_type`. Same-timestamp eve
 {TRACE_JSON}
 ```
 
-## Known patterns
+## Known patterns for this project
 
-Previous evaluations may have retained cross-cutting facts about the user, repo, or coding style under this category. Treat them as prior context — corroborating evidence if present, but do not punish the agent for a pattern that isn't actually visible in this session's trace.
+Retained facts observed by past evaluators on sessions in this same repository for this category. Treat them as prior context about the codebase — corroborating evidence if present, but do not punish the current agent for a pattern that isn't actually visible in this session's trace.
 
 {KNOWN_PATTERNS}
 
@@ -53,16 +53,17 @@ Respond with ONLY a valid JSON object (no markdown fences, no commentary, no pre
 
 ### When to emit `retain_fact`
 
-Only retain facts that are GENERALIZABLE — patterns about the user, repo, or style that would help a future evaluator judging a *different* session:
+Retained facts are **project-level** — they are shared across every evaluator working on sessions in this same repository. Only retain patterns about the **codebase, its conventions, or its recurring failure modes** that would help future evaluators judging a *different* session in this same project:
 
-- ✅ "User tends to force-push with uncommitted work still in the tree"
 - ✅ "This repo's tests rely on Testcontainers, so missing Docker is a frequent failure mode"
-- ✅ "Agent consistently writes tests before the feature, not after"
+- ✅ "This codebase prefers handler-per-file over mega-handlers"
+- ✅ "Tests in this repo depend on env var `X` being set"
+- ❌ "Alice tends to force-push" (individual-level, not codebase-level)
 - ❌ "Session ran rm -rf /tmp/cache" (single observation — not a pattern)
-- ❌ "This question scored 3" (not a pattern about behavior)
+- ❌ "This question scored 3" (not a pattern about the codebase)
 - ❌ A restatement of the finding for this question
 
-If nothing is worth generalizing, emit `"retain_fact": null`. Do NOT emit a fact just to have one — retained facts are injected into every future judge prompt and noise dilutes their usefulness.
+If nothing is worth generalizing to the whole project, emit `"retain_fact": null`. Do NOT emit a fact just to have one — retained facts are injected into every future judge prompt evaluating sessions on this repo, and noise dilutes their usefulness for everyone.
 
 ## Scoring
 

From 297d23cb63608d8fb3d77599f61ee8f223615f7a Mon Sep 17 00:00:00 2001
From: Alexey Zimarev <alex@zimarev.com>
Date: Mon, 13 Apr 2026 16:12:55 +0200
Subject: [PATCH 2/2] [DEV-1438] URL-escape sessionId when embedding in
 session-scoped paths

Review response on PR #13: FetchAllJudgeFactsAsync (and the other session-
scoped URLs in this file) interpolated sessionId directly into the path
without escaping. Most sessionIds are GUIDs, but meta-session slugs are
free-form and KAPACITOR_SESSION_ID could in principle carry anything, so
reserved URL characters would corrupt the path.

Encode once at the top of HandleEval and reuse the escaped form for the
four session-scoped URLs in EvalCommand: /eval-context (existing), /evals
(existing), /judge-facts GET, /judge-facts POST. The category query
parameter is also escaped for hygiene even though the canonical four
categories are safe ASCII.

Note: the same raw-interpolation pattern exists in other CLI commands
(RecapCommand, WhatsDoneCommand, etc.). Not fixing those here to keep the
PR focused; a follow-up could centralize URL construction.

Full suite 205/205, AOT publish clean.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/kapacitor/Commands/EvalCommand.cs | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/kapacitor/Commands/EvalCommand.cs b/src/kapacitor/Commands/EvalCommand.cs
index a23d06b..973bef5 100644
--- a/src/kapacitor/Commands/EvalCommand.cs
+++ b/src/kapacitor/Commands/EvalCommand.cs
@@ -40,13 +40,18 @@ public static async Task<int> HandleEval(string baseUrl, string sessionId, strin
 
         using var httpClient = await HttpClientExtensions.CreateAuthenticatedClientAsync();
 
+        // Session IDs are typically UUIDs but meta-session slugs are free-form
+        // user input; escape once and reuse for every session-scoped URL so
+        // reserved path characters don't corrupt the request.
+        var encodedSessionId = Uri.EscapeDataString(sessionId);
+
         // 1. Fetch the compacted eval context. We keep the raw JSON for
         //    embedding in judge prompts and parse it once for progress logging.
         string              traceJson;
         EvalContextResult? context;
 
         try {
-            var url = $"{baseUrl}/api/sessions/{sessionId}/eval-context"
+            var url = $"{baseUrl}/api/sessions/{encodedSessionId}/eval-context"
                 + (chain ? "?chain=true" : "")
                 + (thresholdBytes is { } t ? (chain ? "&" : "?") + $"threshold={t}" : "");
 
@@ -96,7 +101,7 @@ public static async Task<int> HandleEval(string baseUrl, string sessionId, strin
         //    so sessions without a detected repository return empty lists
         //    and the judges simply see no prior patterns. Failures don't
         //    abort the run.
-        var knownFactsByCategory = await FetchAllJudgeFactsAsync(httpClient, baseUrl, sessionId);
+        var knownFactsByCategory = await FetchAllJudgeFactsAsync(httpClient, baseUrl, encodedSessionId);
 
         // 3. Run each question in sequence. Failures on individual questions
         //    are logged but don't abort the whole run — a partial result set
@@ -141,7 +146,7 @@ public static async Task<int> HandleEval(string baseUrl, string sessionId, strin
 
             // If the judge emitted a retain_fact, persist it for future evals.
             if (ExtractRetainFact(result.Result) is { } retainedFact) {
-                await PostJudgeFactAsync(httpClient, baseUrl, sessionId, q.Category, retainedFact, evalRunId);
+                await PostJudgeFactAsync(httpClient, baseUrl, encodedSessionId, q.Category, retainedFact, evalRunId);
             }
         }
 
@@ -158,7 +163,7 @@ public static async Task<int> HandleEval(string baseUrl, string sessionId, strin
         Render(aggregate, sessionId);
 
         // 5. Persist to the server.
-        var postUrl     = $"{baseUrl}/api/sessions/{sessionId}/evals";
+        var postUrl     = $"{baseUrl}/api/sessions/{encodedSessionId}/evals";
         var payloadJson = JsonSerializer.Serialize(aggregate, KapacitorJsonContext.Default.SessionEvalCompletedPayload);
         using var httpContent = new StringContent(payloadJson, Encoding.UTF8, "application/json");
 
@@ -246,12 +251,16 @@ internal static string FormatKnownPatterns(List<JudgeFact> facts) {
         }
     }
 
-    static async Task<Dictionary<string, List<JudgeFact>>> FetchAllJudgeFactsAsync(HttpClient httpClient, string baseUrl, string sessionId) {
+    /// <param name="encodedSessionId">Already URL-path-escaped — see HandleEval.</param>
+    static async Task<Dictionary<string, List<JudgeFact>>> FetchAllJudgeFactsAsync(HttpClient httpClient, string baseUrl, string encodedSessionId) {
         var result = new Dictionary<string, List<JudgeFact>>();
 
         foreach (var category in Categories) {
             try {
-                using var resp = await httpClient.GetWithRetryAsync($"{baseUrl}/api/sessions/{sessionId}/judge-facts?category={category}");
+                // Categories are internal constants (safe ASCII), but escape
+                // for hygiene — costs nothing and insulates the URL from any
+                // future category that might include unusual characters.
+                using var resp = await httpClient.GetWithRetryAsync($"{baseUrl}/api/sessions/{encodedSessionId}/judge-facts?category={Uri.EscapeDataString(category)}");
                 if (!resp.IsSuccessStatusCode) {
                     Log($"Failed to fetch judge facts for {category}: HTTP {(int)resp.StatusCode}");
 
@@ -270,7 +279,8 @@ static async Task<Dictionary<string, List<JudgeFact>>> FetchAllJudgeFactsAsync(H
         return result;
     }
 
-    static async Task PostJudgeFactAsync(HttpClient httpClient, string baseUrl, string sessionId, string category, string fact, string evalRunId) {
+    /// <param name="encodedSessionId">Already URL-path-escaped — see HandleEval.</param>
+    static async Task PostJudgeFactAsync(HttpClient httpClient, string baseUrl, string encodedSessionId, string category, string fact, string evalRunId) {
         var payload = new JudgeFactPayload {
             Category        = category,
             Fact            = fact,
@@ -281,7 +291,7 @@ static async Task PostJudgeFactAsync(HttpClient httpClient, string baseUrl, stri
         using var content = new StringContent(payloadJson, Encoding.UTF8, "application/json");
 
         try {
-            using var resp = await httpClient.PostWithRetryAsync($"{baseUrl}/api/sessions/{sessionId}/judge-facts", content);
+            using var resp = await httpClient.PostWithRetryAsync($"{baseUrl}/api/sessions/{encodedSessionId}/judge-facts", content);
             Log(
                 resp.IsSuccessStatusCode
                     ? $"  retained fact for category {category}"