kurrent-io · alexeyzimarev · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/src/kapacitor/Commands/EvalCommand.cs b/src/kapacitor/Commands/EvalCommand.cs
@@ -40,13 +40,18 @@ public static async Task<int> HandleEval(string baseUrl, string sessionId, strin
 
         using var httpClient = await HttpClientExtensions.CreateAuthenticatedClientAsync();
 
+        // Session IDs are typically UUIDs but meta-session slugs are free-form
+        // user input; escape once and reuse for every session-scoped URL so
+        // reserved path characters don't corrupt the request.
+        var encodedSessionId = Uri.EscapeDataString(sessionId);
+
         // 1. Fetch the compacted eval context. We keep the raw JSON for
         //    embedding in judge prompts and parse it once for progress logging.
         string              traceJson;
         EvalContextResult? context;
 
         try {
-            var url = $"{baseUrl}/api/sessions/{sessionId}/eval-context"
+            var url = $"{baseUrl}/api/sessions/{encodedSessionId}/eval-context"
                 + (chain ? "?chain=true" : "")
                 + (thresholdBytes is { } t ? (chain ? "&" : "?") + $"threshold={t}" : "");
 
@@ -91,10 +96,12 @@ public static async Task<int> HandleEval(string baseUrl, string sessionId, strin
         }
 
         // 2. Fetch retained judge facts per category so we can inject them
-        //    into each judge's prompt as "known patterns" — DEV-1434.
-        //    Failures don't abort the run; the judges just won't see prior
-        //    patterns this time.
-        var knownFactsByCategory = await FetchAllJudgeFactsAsync(httpClient, baseUrl);
+        //    into each judge's prompt as "known patterns" — DEV-1434 /
+        //    DEV-1438. Facts are scoped to the session's repo server-side,
+        //    so sessions without a detected repository return empty lists
+        //    and the judges simply see no prior patterns. Failures don't
+        //    abort the run.
+        var knownFactsByCategory = await FetchAllJudgeFactsAsync(httpClient, baseUrl, encodedSessionId);
 
         // 3. Run each question in sequence. Failures on individual questions
         //    are logged but don't abort the whole run — a partial result set
@@ -139,7 +146,7 @@ public static async Task<int> HandleEval(string baseUrl, string sessionId, strin
 
             // If the judge emitted a retain_fact, persist it for future evals.
             if (ExtractRetainFact(result.Result) is { } retainedFact) {
-                await PostJudgeFactAsync(httpClient, baseUrl, q.Category, retainedFact, context.SessionId, evalRunId);
+                await PostJudgeFactAsync(httpClient, baseUrl, encodedSessionId, q.Category, retainedFact, evalRunId);
             }
         }
 
@@ -156,7 +163,7 @@ public static async Task<int> HandleEval(string baseUrl, string sessionId, strin
         Render(aggregate, sessionId);
 
         // 5. Persist to the server.
-        var postUrl     = $"{baseUrl}/api/sessions/{sessionId}/evals";
+        var postUrl     = $"{baseUrl}/api/sessions/{encodedSessionId}/evals";
         var payloadJson = JsonSerializer.Serialize(aggregate, KapacitorJsonContext.Default.SessionEvalCompletedPayload);
         using var httpContent = new StringContent(payloadJson, Encoding.UTF8, "application/json");
 
@@ -244,12 +251,16 @@ internal static string FormatKnownPatterns(List<JudgeFact> facts) {
         }
     }
 
-    static async Task<Dictionary<string, List<JudgeFact>>> FetchAllJudgeFactsAsync(HttpClient httpClient, string baseUrl) {
+    /// <param name="encodedSessionId">Already URL-path-escaped — see HandleEval.</param>
+    static async Task<Dictionary<string, List<JudgeFact>>> FetchAllJudgeFactsAsync(HttpClient httpClient, string baseUrl, string encodedSessionId) {
         var result = new Dictionary<string, List<JudgeFact>>();
 
         foreach (var category in Categories) {
             try {
-                using var resp = await httpClient.GetWithRetryAsync($"{baseUrl}/api/judge-facts?category={category}");
+                // Categories are internal constants (safe ASCII), but escape
+                // for hygiene — costs nothing and insulates the URL from any
+                // future category that might include unusual characters.
+                using var resp = await httpClient.GetWithRetryAsync($"{baseUrl}/api/sessions/{encodedSessionId}/judge-facts?category={Uri.EscapeDataString(category)}");
                 if (!resp.IsSuccessStatusCode) {
                     Log($"Failed to fetch judge facts for {category}: HTTP {(int)resp.StatusCode}");
 
@@ -268,19 +279,19 @@ static async Task<Dictionary<string, List<JudgeFact>>> FetchAllJudgeFactsAsync(H
         return result;
     }
 
-    static async Task PostJudgeFactAsync(HttpClient httpClient, string baseUrl, string category, string fact, string sessionId, string evalRunId) {
+    /// <param name="encodedSessionId">Already URL-path-escaped — see HandleEval.</param>
+    static async Task PostJudgeFactAsync(HttpClient httpClient, string baseUrl, string encodedSessionId, string category, string fact, string evalRunId) {
         var payload = new JudgeFactPayload {
             Category        = category,
             Fact            = fact,
-            SourceSessionId = sessionId,
             SourceEvalRunId = evalRunId
         };
 
         var payloadJson = JsonSerializer.Serialize(payload, KapacitorJsonContext.Default.JudgeFactPayload);
         using var content = new StringContent(payloadJson, Encoding.UTF8, "application/json");
 
         try {
-            using var resp = await httpClient.PostWithRetryAsync($"{baseUrl}/api/judge-facts", content);
+            using var resp = await httpClient.PostWithRetryAsync($"{baseUrl}/api/sessions/{encodedSessionId}/judge-facts", content);
             Log(
                 resp.IsSuccessStatusCode
                     ? $"  retained fact for category {category}"

diff --git a/src/kapacitor/Models.cs b/src/kapacitor/Models.cs
@@ -258,21 +258,19 @@ record EvalCategoryResult {
     public List<EvalQuestionVerdict> Questions { get; init; } = [];
 }
 
-// Cross-eval memory — DEV-1434. Judges may optionally emit a retain_fact
-// when they spot a cross-cutting pattern; the CLI POSTs it to the server's
-// judge-facts endpoint which appends to a per-category stream. Facts from
-// past evaluations are fetched at eval startup and injected into each
-// judge's prompt as "known patterns".
+// Cross-eval memory — DEV-1434 / DEV-1438. Judges may optionally emit a
+// retain_fact when they spot a cross-cutting pattern; the CLI POSTs it to
+// the session-scoped endpoint and the server derives repo scope from the
+// session (facts live on JudgeFacts-repo-{repoHash}-{category} streams).
+// Facts accumulated on the same repo by any team member are fetched at
+// eval startup and injected into each judge's prompt as "known patterns".
 record JudgeFactPayload {
     [JsonPropertyName("category")]
     public required string Category { get; init; }
 
     [JsonPropertyName("fact")]
     public required string Fact { get; init; }
 
-    [JsonPropertyName("source_session_id")]
-    public required string SourceSessionId { get; init; }
-
     [JsonPropertyName("source_eval_run_id")]
     public required string SourceEvalRunId { get; init; }
 }

diff --git a/src/kapacitor/Resources/prompt-eval-question.txt b/src/kapacitor/Resources/prompt-eval-question.txt
@@ -24,9 +24,9 @@ Subagent activity (if any) carries `agent_id` / `agent_type`. Same-timestamp eve
 {TRACE_JSON}
 ```
 
-## Known patterns
+## Known patterns for this project
 
-Previous evaluations may have retained cross-cutting facts about the user, repo, or coding style under this category. Treat them as prior context — corroborating evidence if present, but do not punish the agent for a pattern that isn't actually visible in this session's trace.
+Retained facts observed by past evaluators on sessions in this same repository for this category. Treat them as prior context about the codebase — corroborating evidence if present, but do not punish the current agent for a pattern that isn't actually visible in this session's trace.
 
 {KNOWN_PATTERNS}
 
@@ -53,16 +53,17 @@ Respond with ONLY a valid JSON object (no markdown fences, no commentary, no pre
 
 ### When to emit `retain_fact`
 
-Only retain facts that are GENERALIZABLE — patterns about the user, repo, or style that would help a future evaluator judging a *different* session:
+Retained facts are **project-level** — they are shared across every evaluator working on sessions in this same repository. Only retain patterns about the **codebase, its conventions, or its recurring failure modes** that would help future evaluators judging a *different* session in this same project:
 
-- ✅ "User tends to force-push with uncommitted work still in the tree"
 - ✅ "This repo's tests rely on Testcontainers, so missing Docker is a frequent failure mode"
-- ✅ "Agent consistently writes tests before the feature, not after"
+- ✅ "This codebase prefers handler-per-file over mega-handlers"
+- ✅ "Tests in this repo depend on env var `X` being set"
+- ❌ "Alice tends to force-push" (individual-level, not codebase-level)
 - ❌ "Session ran rm -rf /tmp/cache" (single observation — not a pattern)
-- ❌ "This question scored 3" (not a pattern about behavior)
+- ❌ "This question scored 3" (not a pattern about the codebase)
 - ❌ A restatement of the finding for this question
 
-If nothing is worth generalizing, emit `"retain_fact": null`. Do NOT emit a fact just to have one — retained facts are injected into every future judge prompt and noise dilutes their usefulness.
+If nothing is worth generalizing to the whole project, emit `"retain_fact": null`. Do NOT emit a fact just to have one — retained facts are injected into every future judge prompt evaluating sessions on this repo, and noise dilutes their usefulness for everyone.
 
 ## Scoring