Skip to content

Commit 64cc394

Browse files
committed
Harden llama runtime flow and desktop settings sync
1 parent 38e6db5 commit 64cc394

31 files changed

Lines changed: 2764 additions & 123 deletions
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
# Llama Server Hardening Implementation Plan
2+
3+
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
4+
5+
**Goal:** Harden the local llama-server integration so chat responses are parsed consistently, startup survives caller cancellation, and runtime downloads are more resilient.
6+
7+
**Architecture:** Centralize `/v1/chat/completions` parsing in a single helper used by translation and text-processing flows. Keep model startup as a shared background task owned by `QwenModelHost`, and strengthen runtime download behavior inside `NativeRuntimeUpdater` so transport failures do not corrupt the bootstrap path.
8+
9+
**Tech Stack:** .NET 10, xUnit, NSubstitute, System.Text.Json, HttpClient
10+
11+
---
12+
13+
### Task 1: Centralize chat response parsing
14+
15+
**Files:**
16+
- Modify: `src/LiveLingo.Core/Engines/LlamaTranslationEngine.cs`
17+
- Modify: `src/LiveLingo.Core/Processing/QwenTextProcessor.cs`
18+
- Create: `src/LiveLingo.Core/Processing/LlamaServerChatResponse.cs`
19+
- Test: `tests/LiveLingo.Core.Tests/Processing/LlamaServerChatResponseTests.cs`
20+
21+
- [ ] **Step 1: Write the failing test**
22+
23+
```csharp
24+
[Fact]
25+
public void GetAssistantText_reads_text_from_content_array()
26+
{
27+
const string json = """
28+
{"choices":[{"message":{"content":[{"type":"text","text":"Hi"}]}}]}
29+
""";
30+
using var doc = JsonDocument.Parse(json);
31+
Assert.Equal("Hi", LlamaServerChatResponse.GetAssistantText(doc.RootElement));
32+
}
33+
```
34+
35+
- [ ] **Step 2: Run test to verify it fails**
36+
37+
Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter LlamaServerChatResponseTests`
38+
Expected: FAIL because the helper does not yet exist or does not handle the response shape.
39+
40+
- [ ] **Step 3: Write minimal implementation**
41+
42+
Create a helper that:
43+
- reads `choices[0].message.content`
44+
- falls back to `reasoning_content` when `content` is blank
45+
- accepts either string content or OpenAI-style content arrays
46+
- strips Qwen `<think>` wrappers in one place
47+
- emits a short diagnostic string for empty-output logs
48+
49+
- [ ] **Step 4: Run test to verify it passes**
50+
51+
Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter LlamaServerChatResponseTests`
52+
Expected: PASS
53+
54+
- [ ] **Step 5: Commit**
55+
56+
```bash
57+
git add src/LiveLingo.Core/Engines/LlamaTranslationEngine.cs \
58+
src/LiveLingo.Core/Processing/QwenTextProcessor.cs \
59+
src/LiveLingo.Core/Processing/LlamaServerChatResponse.cs \
60+
tests/LiveLingo.Core.Tests/Processing/LlamaServerChatResponseTests.cs
61+
git commit -m "test: harden llama chat response parsing"
62+
```
63+
64+
### Task 2: Make Qwen model startup shareable across cancelled callers
65+
66+
**Files:**
67+
- Modify: `src/LiveLingo.Core/Processing/QwenModelHost.cs`
68+
- Test: `tests/LiveLingo.Core.Tests/Processing/QwenModelHostTests.cs`
69+
70+
- [ ] **Step 1: Write the failing test**
71+
72+
```csharp
73+
[Fact]
74+
public async Task GetOrStartServerAsync_keeps_background_load_running_after_first_waiter_cancels()
75+
{
76+
// Arrange host with a gate-controlled server startup.
77+
// Cancel the first caller before startup completes.
78+
// Assert a second caller later receives the same loaded endpoint.
79+
}
80+
```
81+
82+
- [ ] **Step 2: Run test to verify it fails**
83+
84+
Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter QwenModelHostTests`
85+
Expected: FAIL because startup is still bound to the first caller cancellation path.
86+
87+
- [ ] **Step 3: Write minimal implementation**
88+
89+
Update `QwenModelHost` so:
90+
- one shared `_ensureServerTask` owns startup/download work
91+
- the task is created under lock and run with `CancellationToken.None`
92+
- each caller can still cancel only its own wait via `WaitAsync(ct)`
93+
- reset paths clear the cached task
94+
- completion still verifies the server reached `Loaded`
95+
96+
- [ ] **Step 4: Run test to verify it passes**
97+
98+
Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter QwenModelHostTests`
99+
Expected: PASS
100+
101+
- [ ] **Step 5: Commit**
102+
103+
```bash
104+
git add src/LiveLingo.Core/Processing/QwenModelHost.cs \
105+
tests/LiveLingo.Core.Tests/Processing/QwenModelHostTests.cs
106+
git commit -m "test: preserve shared llama startup across cancellations"
107+
```
108+
109+
### Task 3: Make runtime downloads resumable and non-destructive
110+
111+
**Files:**
112+
- Modify: `src/LiveLingo.Core/Processing/NativeRuntimeUpdater.cs`
113+
- Test: `tests/LiveLingo.Core.Tests/Processing/NativeRuntimeUpdaterTests.cs`
114+
115+
- [ ] **Step 1: Write the failing test**
116+
117+
```csharp
118+
[Fact]
119+
public async Task EnsureLatestLlamaServerAsync_resumes_partial_archive_download()
120+
{
121+
// Arrange an HTTP handler that first returns partial content and then serves the tail.
122+
// Seed an archive file with partial bytes.
123+
// Assert the completed file length matches the advertised total.
124+
}
125+
```
126+
127+
- [ ] **Step 2: Run test to verify it fails**
128+
129+
Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter NativeRuntimeUpdaterTests`
130+
Expected: FAIL because downloads always restart from scratch.
131+
132+
- [ ] **Step 3: Write minimal implementation**
133+
134+
Add a resumable download helper that:
135+
- reuses a stable archive filename
136+
- sends `Range` when partial bytes already exist
137+
- appends only on `206 Partial Content`
138+
- retries transient HTTP / IO / timeout failures with backoff
139+
- validates final size when `Content-Range` exposes total length
140+
141+
- [ ] **Step 4: Run test to verify it passes**
142+
143+
Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter NativeRuntimeUpdaterTests`
144+
Expected: PASS
145+
146+
- [ ] **Step 5: Commit**
147+
148+
```bash
149+
git add src/LiveLingo.Core/Processing/NativeRuntimeUpdater.cs \
150+
tests/LiveLingo.Core.Tests/Processing/NativeRuntimeUpdaterTests.cs
151+
git commit -m "test: resume llama runtime downloads"
152+
```
153+
154+
### Task 4: Verify integration points only for the redesigned path
155+
156+
**Files:**
157+
- Modify: `src/LiveLingo.Core/Processing/LlamaServerProcessManager.cs`
158+
- Verify: `tests/LiveLingo.Core.Tests/Processing/LlamaServerChatResponseTests.cs`
159+
- Verify: `tests/LiveLingo.Core.Tests/Processing/QwenModelHostTests.cs`
160+
- Verify: `tests/LiveLingo.Core.Tests/Processing/NativeRuntimeUpdaterTests.cs`
161+
162+
- [ ] **Step 1: Add the smallest failing test or assertion for startup arguments if needed**
163+
164+
```csharp
165+
// Add only if there is a practical seam for argument verification.
166+
```
167+
168+
- [ ] **Step 2: Run targeted tests to verify the redesigned path fails where expected**
169+
170+
Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter "LlamaServerChatResponseTests|QwenModelHostTests|NativeRuntimeUpdaterTests"`
171+
Expected: FAIL until all redesigned-path changes are complete.
172+
173+
- [ ] **Step 3: Finalize implementation**
174+
175+
Ensure the server starts with reasoning disabled and that translation now fails loudly on empty assistant output instead of silently echoing source text.
176+
177+
- [ ] **Step 4: Run targeted verification**
178+
179+
Run: `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter "LlamaServerChatResponseTests|QwenModelHostTests|NativeRuntimeUpdaterTests"`
180+
Expected: PASS
181+
182+
- [ ] **Step 5: Commit**
183+
184+
```bash
185+
git add src/LiveLingo.Core/Processing/LlamaServerProcessManager.cs \
186+
src/LiveLingo.Core/Engines/LlamaTranslationEngine.cs \
187+
src/LiveLingo.Core/Processing/QwenTextProcessor.cs \
188+
src/LiveLingo.Core/Processing/QwenModelHost.cs \
189+
src/LiveLingo.Core/Processing/NativeRuntimeUpdater.cs
190+
git commit -m "feat: harden local llama server integration"
191+
```
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# New Design Coverage Sweep Implementation Plan
2+
3+
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
4+
5+
**Goal:** Raise test coverage around the redesigned llama-server call boundary without drifting into unrelated legacy test repair.
6+
7+
**Architecture:** Add focused unit tests around the new shared chat request / response path, especially the two concrete callers that still lacked direct behavioral tests: `LlamaTranslationEngine` and `QwenTextProcessor`. Keep production changes minimal and only when tests reveal missing seams or mismatched behavior.
8+
9+
**Tech Stack:** .NET 10, xUnit, NSubstitute, HttpClient, System.Text.Json
10+
11+
---
12+
13+
### Task 1: Cover `LlamaTranslationEngine` request/response behavior
14+
15+
**Files:**
16+
- Create: `tests/LiveLingo.Core.Tests/Engines/LlamaTranslationEngineTests.cs`
17+
- Modify: `src/LiveLingo.Core/Engines/LlamaTranslationEngine.cs` (only if tests expose a missing seam)
18+
19+
- [ ] **Step 1: Write the failing tests**
20+
- request body includes shared stop sequences and `stream = false`
21+
- response content arrays are parsed into final translation text
22+
- empty assistant output throws `InvalidOperationException`
23+
- [ ] **Step 2: Run `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter LlamaTranslationEngineTests` and verify red**
24+
- [ ] **Step 3: Apply the minimal production fix only if needed**
25+
- [ ] **Step 4: Re-run the same filter and verify green**
26+
27+
### Task 2: Cover `QwenTextProcessor` fallback behavior
28+
29+
**Files:**
30+
- Create: `tests/LiveLingo.Core.Tests/Processing/QwenTextProcessorTests.cs`
31+
- Modify: `src/LiveLingo.Core/Processing/QwenTextProcessor.cs` (only if tests expose a missing seam)
32+
33+
- [ ] **Step 1: Write the failing tests**
34+
- request body uses the shared request factory defaults
35+
- empty assistant output falls back to original text
36+
- transport failure falls back to original text
37+
- [ ] **Step 2: Run `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter QwenTextProcessorTests` and verify red**
38+
- [ ] **Step 3: Apply the minimal production fix only if needed**
39+
- [ ] **Step 4: Re-run the same filter and verify green**
40+
41+
### Task 3: Re-verify the redesigned Core path
42+
43+
**Files:**
44+
- Verify only
45+
46+
- [ ] **Step 1: Run** `dotnet test tests/LiveLingo.Core.Tests/LiveLingo.Core.Tests.csproj --filter "LlamaTranslationEngineTests|QwenTextProcessorTests|LlamaServerChatRequestTests|LlamaServerChatResponseTests|QwenModelHostTests|NativeRuntimeUpdaterTests|LlamaServerProcessManagerTests|TranslationPipelineTests"`
47+
- [ ] **Step 2: Confirm all targeted redesigned-path tests pass**

src/LiveLingo.Core/Engines/LlamaTranslationEngine.cs

Lines changed: 11 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ public sealed class LlamaTranslationEngine : ITranslationEngine
2929
public IReadOnlyList<LanguageInfo> SupportedLanguages { get; } =
3030
Languages.Select(kv => new LanguageInfo(kv.Key, kv.Value.DisplayName)).ToList();
3131

32-
internal static readonly string[] StopSequences = ["</s>", "<|im_end|>", "</think>"];
33-
3432
public LlamaTranslationEngine(QwenModelHost host, HttpClient http, ILogger<LlamaTranslationEngine> logger)
3533
{
3634
_host = host;
@@ -49,52 +47,26 @@ public async Task<string> TranslateAsync(
4947
var endpoint = await _host.GetOrStartServerAsync(ct);
5048
var url = $"{endpoint}/v1/chat/completions";
5149

52-
var systemPrompt = $"You are an expert translation engine. Your task is to translate the source text from {srcName} to {tgtName}.\n\nRules:\n1. Output ONLY the final {tgtName} translation.\n2. Do NOT output any {srcName} text.\n3. Do NOT output any explanations, conversational text, or notes.\n4. Do NOT use <think> tags or output any thought process.";
53-
var userPrompt = $"Translate the following {srcName} text to {tgtName}:\n\n<source>\n{text}\n</source>";
54-
55-
_logger.LogDebug("Translation prompt for {Src}→{Tgt}: {Prompt}", sourceLanguage, targetLanguage, userPrompt);
50+
var requestBody = LlamaServerChatRequest.CreateTranslation(text, srcName, tgtName);
5651

57-
var requestBody = new
58-
{
59-
messages = new[]
60-
{
61-
new { role = "system", content = systemPrompt },
62-
new { role = "user", content = userPrompt }
63-
},
64-
max_tokens = 512,
65-
temperature = 0.1f,
66-
top_p = 0.95f,
67-
stop = StopSequences,
68-
stream = false
69-
};
52+
_logger.LogDebug("Translation prompt for {Src}→{Tgt}: {Prompt}", sourceLanguage, targetLanguage, requestBody.Messages[1].Content);
7053

7154
var response = await _http.PostAsJsonAsync(url, requestBody, ct);
7255
response.EnsureSuccessStatusCode();
7356

7457
var json = await response.Content.ReadAsStringAsync(ct);
7558
using var doc = JsonDocument.Parse(json);
76-
var result = doc.RootElement
77-
.GetProperty("choices")[0]
78-
.GetProperty("message")
79-
.GetProperty("content")
80-
.GetString()?.Trim() ?? string.Empty;
81-
82-
// Clean up <think> tags if the model still generated them
83-
if (result.Contains("</think>"))
84-
{
85-
var parts = result.Split("</think>");
86-
result = parts.Last().Trim();
87-
}
88-
else if (result.StartsWith("<think>"))
89-
{
90-
// Model generated <think> but didn't finish it
91-
result = string.Empty;
92-
}
59+
var result = LlamaServerChatResponse.GetAssistantText(doc.RootElement);
60+
result = LlamaServerChatResponse.StripQwenThinkTags(result);
9361

9462
if (string.IsNullOrWhiteSpace(result))
9563
{
96-
_logger.LogWarning("Translation returned empty output for {Src}→{Tgt}", sourceLanguage, targetLanguage);
97-
return text;
64+
_logger.LogWarning(
65+
"Translation returned empty output for {Src}→{Tgt}. {Diag}",
66+
sourceLanguage,
67+
targetLanguage,
68+
LlamaServerChatResponse.DescribeFirstChoiceForLog(doc.RootElement));
69+
throw new InvalidOperationException("Translation returned empty output.");
9870
}
9971

10072
_logger.LogDebug("Translated {Src}→{Tgt}: {In} → {Out}", sourceLanguage, targetLanguage, text, result);
@@ -106,14 +78,6 @@ public bool SupportsLanguagePair(string sourceLanguage, string targetLanguage) =
10678

10779
public void Dispose() { }
10880

109-
internal static string BuildPrompt(string text, string sourceLanguage, string targetLanguage)
110-
{
111-
var srcName = GetLanguageName(sourceLanguage);
112-
var tgtName = GetLanguageName(targetLanguage);
113-
var systemPrompt = $"You are a professional translator. Translate the user's text from {srcName} to {tgtName}. Output ONLY the translated text, nothing else. Do not output any thought process or explanation. Do not use <think> tags.";
114-
return $"<|im_start|>system\n{systemPrompt}<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant\n";
115-
}
116-
11781
private static string GetLanguageName(string code) =>
11882
Languages.TryGetValue(code, out var info) ? info.EnglishName : code;
119-
}
83+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
using System.Text.Json.Serialization;
2+
3+
namespace LiveLingo.Core.Processing;
4+
5+
public sealed record LlamaServerChatMessage(
6+
[property: JsonPropertyName("role")] string Role,
7+
[property: JsonPropertyName("content")] string Content);
8+
9+
public sealed record LlamaServerChatRequest(
10+
[property: JsonPropertyName("messages")] IReadOnlyList<LlamaServerChatMessage> Messages,
11+
[property: JsonPropertyName("max_tokens")] int MaxTokens,
12+
[property: JsonPropertyName("temperature")] float Temperature,
13+
[property: JsonPropertyName("top_p")] float TopP,
14+
[property: JsonPropertyName("stop")] IReadOnlyList<string> Stop,
15+
[property: JsonPropertyName("stream")] bool Stream)
16+
{
17+
public static readonly string[] DefaultStopSequences = ["</s>", "<|im_end|>", "</think>"];
18+
19+
public static LlamaServerChatRequest CreateTranslation(
20+
string text,
21+
string sourceLanguageName,
22+
string targetLanguageName)
23+
{
24+
var systemPrompt =
25+
$"You are an expert translation engine. Your task is to translate the source text from {sourceLanguageName} to {targetLanguageName}.\n\n" +
26+
$"Rules:\n" +
27+
$"1. Output ONLY the final {targetLanguageName} translation.\n" +
28+
$"2. Do NOT output any {sourceLanguageName} text.\n" +
29+
$"3. Do NOT output any explanations, conversational text, or notes.\n" +
30+
$"4. Do not use <think> tags or output any thought process.";
31+
var userPrompt = $"Translate the following {sourceLanguageName} text to {targetLanguageName}:\n\n<source>\n{text}\n</source>";
32+
33+
return new LlamaServerChatRequest(
34+
[
35+
new LlamaServerChatMessage("system", systemPrompt),
36+
new LlamaServerChatMessage("user", userPrompt)
37+
],
38+
MaxTokens: 512,
39+
Temperature: 0.1f,
40+
TopP: 0.95f,
41+
Stop: DefaultStopSequences,
42+
Stream: false);
43+
}
44+
45+
public static LlamaServerChatRequest CreateTextProcessor(string systemPrompt, string text) =>
46+
new(
47+
[
48+
new LlamaServerChatMessage("system", $"{systemPrompt} Do not use <think> tags."),
49+
new LlamaServerChatMessage("user", text)
50+
],
51+
MaxTokens: 512,
52+
Temperature: 0.3f,
53+
TopP: 0.9f,
54+
Stop: DefaultStopSequences,
55+
Stream: false);
56+
}

0 commit comments

Comments
 (0)