AGIBuild
diff --git a/‎docs/superpowers/plans/2026-03-24-llama-server-hardening.md‎
Lines changed: 191 additions & 0 deletions b/‎docs/superpowers/plans/2026-03-24-llama-server-hardening.md‎
Lines changed: 191 additions & 0 deletions
diff --git a/‎docs/superpowers/plans/2026-03-24-new-design-coverage-sweep.md‎
Lines changed: 47 additions & 0 deletions b/‎docs/superpowers/plans/2026-03-24-new-design-coverage-sweep.md‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎src/LiveLingo.Core/Engines/LlamaTranslationEngine.cs‎
Lines changed: 11 additions & 47 deletions b/‎src/LiveLingo.Core/Engines/LlamaTranslationEngine.cs‎
Lines changed: 11 additions & 47 deletions
diff --git a/‎src/LiveLingo.Core/Processing/LlamaServerChatRequest.cs‎
Lines changed: 56 additions & 0 deletions b/‎src/LiveLingo.Core/Processing/LlamaServerChatRequest.cs‎
Lines changed: 56 additions & 0 deletions
@@ -0,0 +1,191 @@
+# Llama Server Hardening Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Harden the local llama-server integration so chat responses are parsed consistently, startup survives caller cancellation, and runtime downloads are more resilient.
+
+**Architecture:** Centralize `/v1/chat/completions` parsing in a single helper used by translation and text-processing flows. Keep model startup as a shared background task owned by `QwenModelHost`, and strengthen runtime download behavior inside `NativeRuntimeUpdater` so transport failures do not corrupt the bootstrap path.
+
+**Tech Stack:** .NET 10, xUnit, NSubstitute, System.Text.Json, HttpClient
+
+---
+
+### Task 1: Centralize chat response parsing
+
+**Files:**
+- Modify: `src/LiveLingo.Core/Engines/LlamaTranslationEngine.cs`
+- Modify: `src/LiveLingo.Core/Processing/QwenTextProcessor.cs`
+- Create: `src/LiveLingo.Core/Processing/LlamaServerChatResponse.cs`
+- Test: `tests/LiveLingo.Core.Tests/Processing/LlamaServerChatResponseTests.cs`
+
+- [ ] **Step 1: Write the failing test**
+
+```csharp
+[Fact]
+public void GetAssistantText_reads_text_from_content_array()
+{
+    const string json = """
+        {"choices":[{"message":{"content":[{"type":"text","text":"Hi"}]}}]}
+        """;
+    using var doc = JsonDocument.Parse(json);
+    Assert.Equal("Hi", LlamaServerChatResponse.GetAssistantText(doc.RootElement));
+}
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter LlamaServerChatResponseTests`
+Expected: FAIL because the helper does not yet exist or does not handle the response shape.
+
+- [ ] **Step 3: Write minimal implementation**
+
+Create a helper that:
+- reads `choices[0].message.content`
+- falls back to `reasoning_content` when `content` is blank
+- accepts either string content or OpenAI-style content arrays
+- strips Qwen `<think>` wrappers in one place
+- emits a short diagnostic string for empty-output logs
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter LlamaServerChatResponseTests`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/LiveLingo.Core/Engines/LlamaTranslationEngine.cs \
+  src/LiveLingo.Core/Processing/QwenTextProcessor.cs \
+  src/LiveLingo.Core/Processing/LlamaServerChatResponse.cs \
+  tests/LiveLingo.Core.Tests/Processing/LlamaServerChatResponseTests.cs
+git commit -m "test: harden llama chat response parsing"
+```
+
+### Task 2: Make Qwen model startup shareable across cancelled callers
+
+**Files:**
+- Modify: `src/LiveLingo.Core/Processing/QwenModelHost.cs`
+- Test: `tests/LiveLingo.Core.Tests/Processing/QwenModelHostTests.cs`
+
+- [ ] **Step 1: Write the failing test**
+
+```csharp
+[Fact]
+public async Task GetOrStartServerAsync_keeps_background_load_running_after_first_waiter_cancels()
+{
+    // Arrange host with a gate-controlled server startup.
+    // Cancel the first caller before startup completes.
+    // Assert a second caller later receives the same loaded endpoint.
+}
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter QwenModelHostTests`
+Expected: FAIL because startup is still bound to the first caller cancellation path.
+
+- [ ] **Step 3: Write minimal implementation**
+
+Update `QwenModelHost` so:
+- one shared `_ensureServerTask` owns startup/download work
+- the task is created under lock and run with `CancellationToken.None`
+- each caller can still cancel only its own wait via `WaitAsync(ct)`
+- reset paths clear the cached task
+- completion still verifies the server reached `Loaded`
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter QwenModelHostTests`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/LiveLingo.Core/Processing/QwenModelHost.cs \
+  tests/LiveLingo.Core.Tests/Processing/QwenModelHostTests.cs
+git commit -m "test: preserve shared llama startup across cancellations"
+```
+
+### Task 3: Make runtime downloads resumable and non-destructive
+
+**Files:**
+- Modify: `src/LiveLingo.Core/Processing/NativeRuntimeUpdater.cs`
+- Test: `tests/LiveLingo.Core.Tests/Processing/NativeRuntimeUpdaterTests.cs`
+
+- [ ] **Step 1: Write the failing test**
+
+```csharp
+[Fact]
+public async Task EnsureLatestLlamaServerAsync_resumes_partial_archive_download()
+{
+    // Arrange an HTTP handler that first returns partial content and then serves the tail.
+    // Seed an archive file with partial bytes.
+    // Assert the completed file length matches the advertised total.
+}
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter NativeRuntimeUpdaterTests`
+Expected: FAIL because downloads always restart from scratch.
+
+- [ ] **Step 3: Write minimal implementation**
+
+Add a resumable download helper that:
+- reuses a stable archive filename
+- sends `Range` when partial bytes already exist
+- appends only on `206 Partial Content`
+- retries transient HTTP / IO / timeout failures with backoff
+- validates final size when `Content-Range` exposes total length
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter NativeRuntimeUpdaterTests`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/LiveLingo.Core/Processing/NativeRuntimeUpdater.cs \
+  tests/LiveLingo.Core.Tests/Processing/NativeRuntimeUpdaterTests.cs
+git commit -m "test: resume llama runtime downloads"
+```
+
+### Task 4: Verify integration points only for the redesigned path
+
+**Files:**
+- Modify: `src/LiveLingo.Core/Processing/LlamaServerProcessManager.cs`
+- Verify: `tests/LiveLingo.Core.Tests/Processing/LlamaServerChatResponseTests.cs`
+- Verify: `tests/LiveLingo.Core.Tests/Processing/QwenModelHostTests.cs`
+- Verify: `tests/LiveLingo.Core.Tests/Processing/NativeRuntimeUpdaterTests.cs`
+
+- [ ] **Step 1: Add the smallest failing test or assertion for startup arguments if needed**
+
+```csharp
+// Add only if there is a practical seam for argument verification.
+```
+
+- [ ] **Step 2: Run targeted tests to verify the redesigned path fails where expected**
+
+Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter "LlamaServerChatResponseTests|QwenModelHostTests|NativeRuntimeUpdaterTests"`
+Expected: FAIL until all redesigned-path changes are complete.
+
+- [ ] **Step 3: Finalize implementation**
+
+Ensure the server starts with reasoning disabled and that translation now fails loudly on empty assistant output instead of silently echoing source text.
+
+- [ ] **Step 4: Run targeted verification**
+
+Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter "LlamaServerChatResponseTests|QwenModelHostTests|NativeRuntimeUpdaterTests"`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/LiveLingo.Core/Processing/LlamaServerProcessManager.cs \
+  src/LiveLingo.Core/Engines/LlamaTranslationEngine.cs \
+  src/LiveLingo.Core/Processing/QwenTextProcessor.cs \
+  src/LiveLingo.Core/Processing/QwenModelHost.cs \
+  src/LiveLingo.Core/Processing/NativeRuntimeUpdater.cs
+git commit -m "feat: harden local llama server integration"
+```
@@ -0,0 +1,47 @@
+# New Design Coverage Sweep Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Raise test coverage around the redesigned llama-server call boundary without drifting into unrelated legacy test repair.
+
+**Architecture:** Add focused unit tests around the new shared chat request / response path, especially the two concrete callers that still lacked direct behavioral tests: `LlamaTranslationEngine` and `QwenTextProcessor`. Keep production changes minimal and only when tests reveal missing seams or mismatched behavior.
+
+**Tech Stack:** .NET 10, xUnit, NSubstitute, HttpClient, System.Text.Json
+
+---
+
+### Task 1: Cover `LlamaTranslationEngine` request/response behavior
+
+**Files:**
+- Create: `tests/LiveLingo.Core.Tests/Engines/LlamaTranslationEngineTests.cs`
+- Modify: `src/LiveLingo.Core/Engines/LlamaTranslationEngine.cs` (only if tests expose a missing seam)
+
+- [ ] **Step 1: Write the failing tests**
+  - request body includes shared stop sequences and `stream = false`
+  - response content arrays are parsed into final translation text
+  - empty assistant output throws `InvalidOperationException`
+- [ ] **Step 2: Run `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter LlamaTranslationEngineTests` and verify red**
+- [ ] **Step 3: Apply the minimal production fix only if needed**
+- [ ] **Step 4: Re-run the same filter and verify green**
+
+### Task 2: Cover `QwenTextProcessor` fallback behavior
+
+**Files:**
+- Create: `tests/LiveLingo.Core.Tests/Processing/QwenTextProcessorTests.cs`
+- Modify: `src/LiveLingo.Core/Processing/QwenTextProcessor.cs` (only if tests expose a missing seam)
+
+- [ ] **Step 1: Write the failing tests**
+  - request body uses the shared request factory defaults
+  - empty assistant output falls back to original text
+  - transport failure falls back to original text
+- [ ] **Step 2: Run `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter QwenTextProcessorTests` and verify red**
+- [ ] **Step 3: Apply the minimal production fix only if needed**
+- [ ] **Step 4: Re-run the same filter and verify green**
+
+### Task 3: Re-verify the redesigned Core path
+
+**Files:**
+- Verify only
+
+- [ ] **Step 1: Run** `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter "LlamaTranslationEngineTests|QwenTextProcessorTests|LlamaServerChatRequestTests|LlamaServerChatResponseTests|QwenModelHostTests|NativeRuntimeUpdaterTests|LlamaServerProcessManagerTests|TranslationPipelineTests"`
+- [ ] **Step 2: Confirm all targeted redesigned-path tests pass**
@@ -29,8 +29,6 @@ public sealed class LlamaTranslationEngine : ITranslationEngine
     public IReadOnlyList<LanguageInfo> SupportedLanguages { get; } =
         Languages.Select(kv => new LanguageInfo(kv.Key, kv.Value.DisplayName)).ToList();
 
-    internal static readonly string[] StopSequences = ["</s>", "<|im_end|>", "</think>"];
-
     public LlamaTranslationEngine(QwenModelHost host, HttpClient http, ILogger<LlamaTranslationEngine> logger)
     {
         _host = host;
@@ -49,52 +47,26 @@ public async Task<string> TranslateAsync(
         var endpoint = await _host.GetOrStartServerAsync(ct);
         var url = $"{endpoint}/v1/chat/completions";
 
-        var systemPrompt = $"You are an expert translation engine. Your task is to translate the source text from {srcName} to {tgtName}.\n\nRules:\n1. Output ONLY the final {tgtName} translation.\n2. Do NOT output any {srcName} text.\n3. Do NOT output any explanations, conversational text, or notes.\n4. Do NOT use <think> tags or output any thought process.";
-        var userPrompt = $"Translate the following {srcName} text to {tgtName}:\n\n<source>\n{text}\n</source>";
-
-        _logger.LogDebug("Translation prompt for {Src}→{Tgt}: {Prompt}", sourceLanguage, targetLanguage, userPrompt);
+        var requestBody = LlamaServerChatRequest.CreateTranslation(text, srcName, tgtName);
 
-        var requestBody = new
-        {
-            messages = new[]
-            {
-                new { role = "system", content = systemPrompt },
-                new { role = "user", content = userPrompt }
-            },
-            max_tokens = 512,
-            temperature = 0.1f,
-            top_p = 0.95f,
-            stop = StopSequences,
-            stream = false
-        };
+        _logger.LogDebug("Translation prompt for {Src}→{Tgt}: {Prompt}", sourceLanguage, targetLanguage, requestBody.Messages[1].Content);
 
         var response = await _http.PostAsJsonAsync(url, requestBody, ct);
         response.EnsureSuccessStatusCode();
 
         var json = await response.Content.ReadAsStringAsync(ct);
         using var doc = JsonDocument.Parse(json);
-        var result = doc.RootElement
-            .GetProperty("choices")[0]
-            .GetProperty("message")
-            .GetProperty("content")
-            .GetString()?.Trim() ?? string.Empty;
-
-        // Clean up <think> tags if the model still generated them
-        if (result.Contains("</think>"))
-        {
-            var parts = result.Split("</think>");
-            result = parts.Last().Trim();
-        }
-        else if (result.StartsWith("<think>"))
-        {
-            // Model generated <think> but didn't finish it
-            result = string.Empty;
-        }
+        var result = LlamaServerChatResponse.GetAssistantText(doc.RootElement);
+        result = LlamaServerChatResponse.StripQwenThinkTags(result);
 
         if (string.IsNullOrWhiteSpace(result))
         {
-            _logger.LogWarning("Translation returned empty output for {Src}→{Tgt}", sourceLanguage, targetLanguage);
-            return text;
+            _logger.LogWarning(
+                "Translation returned empty output for {Src}→{Tgt}. {Diag}",
+                sourceLanguage,
+                targetLanguage,
+                LlamaServerChatResponse.DescribeFirstChoiceForLog(doc.RootElement));
+            throw new InvalidOperationException("Translation returned empty output.");
         }
 
         _logger.LogDebug("Translated {Src}→{Tgt}: {In} → {Out}", sourceLanguage, targetLanguage, text, result);
@@ -106,14 +78,6 @@ public bool SupportsLanguagePair(string sourceLanguage, string targetLanguage) =
 
     public void Dispose() { }
 
-    internal static string BuildPrompt(string text, string sourceLanguage, string targetLanguage)
-    {
-        var srcName = GetLanguageName(sourceLanguage);
-        var tgtName = GetLanguageName(targetLanguage);
-        var systemPrompt = $"You are a professional translator. Translate the user's text from {srcName} to {tgtName}. Output ONLY the translated text, nothing else. Do not output any thought process or explanation. Do not use <think> tags.";
-        return $"<|im_start|>system\n{systemPrompt}<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant\n";
-    }
-
     private static string GetLanguageName(string code) =>
         Languages.TryGetValue(code, out var info) ? info.EnglishName : code;
-}
+}
@@ -0,0 +1,56 @@
+using System.Text.Json.Serialization;
+
+namespace LiveLingo.Core.Processing;
+
+public sealed record LlamaServerChatMessage(
+    [property: JsonPropertyName("role")] string Role,
+    [property: JsonPropertyName("content")] string Content);
+
+public sealed record LlamaServerChatRequest(
+    [property: JsonPropertyName("messages")] IReadOnlyList<LlamaServerChatMessage> Messages,
+    [property: JsonPropertyName("max_tokens")] int MaxTokens,
+    [property: JsonPropertyName("temperature")] float Temperature,
+    [property: JsonPropertyName("top_p")] float TopP,
+    [property: JsonPropertyName("stop")] IReadOnlyList<string> Stop,
+    [property: JsonPropertyName("stream")] bool Stream)
+{
+    public static readonly string[] DefaultStopSequences = ["</s>", "<|im_end|>", "</think>"];
+
+    public static LlamaServerChatRequest CreateTranslation(
+        string text,
+        string sourceLanguageName,
+        string targetLanguageName)
+    {
+        var systemPrompt =
+            $"You are an expert translation engine. Your task is to translate the source text from {sourceLanguageName} to {targetLanguageName}.\n\n" +
+            $"Rules:\n" +
+            $"1. Output ONLY the final {targetLanguageName} translation.\n" +
+            $"2. Do NOT output any {sourceLanguageName} text.\n" +
+            $"3. Do NOT output any explanations, conversational text, or notes.\n" +
+            $"4. Do not use <think> tags or output any thought process.";
+        var userPrompt = $"Translate the following {sourceLanguageName} text to {targetLanguageName}:\n\n<source>\n{text}\n</source>";
+
+        return new LlamaServerChatRequest(
+            [
+                new LlamaServerChatMessage("system", systemPrompt),
+                new LlamaServerChatMessage("user", userPrompt)
+            ],
+            MaxTokens: 512,
+            Temperature: 0.1f,
+            TopP: 0.95f,
+            Stop: DefaultStopSequences,
+            Stream: false);
+    }
+
+    public static LlamaServerChatRequest CreateTextProcessor(string systemPrompt, string text) =>
+        new(
+            [
+                new LlamaServerChatMessage("system", $"{systemPrompt} Do not use <think> tags."),
+                new LlamaServerChatMessage("user", text)
+            ],
+            MaxTokens: 512,
+            Temperature: 0.3f,
+            TopP: 0.9f,
+            Stop: DefaultStopSequences,
+            Stream: false);
+}