Skip to content

Commit 38e6db5

Browse files
committed
Add LlamaServerProcessManager implementation
Made-with: Cursor
1 parent 7f97d05 commit 38e6db5

File tree

2 files changed

+257
-0
lines changed

2 files changed

+257
-0
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
namespace LiveLingo.Core.Processing;
2+
3+
public interface ILlamaServerProcessManager : IDisposable
4+
{
5+
/// <summary>
6+
/// Gets the current HTTP endpoint URL (e.g. "http://127.0.0.1:50123") if the server is running.
7+
/// </summary>
8+
string? CurrentEndpointUrl { get; }
9+
10+
ModelLoadState State { get; }
11+
12+
event Action<ModelLoadState>? StateChanged;
13+
14+
/// <summary>
15+
/// Starts the llama-server process with the specified model and settings.
16+
/// If the server is already running with the same model, it does nothing.
17+
/// If it's running with a different model, it stops the old one and starts a new one.
18+
/// </summary>
19+
Task EnsureServerRunningAsync(string modelPath, int contextSize, int inferenceThreads, CancellationToken ct = default);
20+
21+
/// <summary>
22+
/// Gracefully stops the background llama-server process.
23+
/// </summary>
24+
Task StopServerAsync();
25+
}
Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
using System.Diagnostics;
2+
using System.Net;
3+
using System.Net.Sockets;
4+
using System.Text.RegularExpressions;
5+
using Microsoft.Extensions.Logging;
6+
7+
namespace LiveLingo.Core.Processing;
8+
9+
public sealed class LlamaServerProcessManager : ILlamaServerProcessManager
10+
{
11+
private readonly INativeRuntimeUpdater _updater;
12+
private readonly ILogger<LlamaServerProcessManager> _logger;
13+
private readonly SemaphoreSlim _lock = new(1, 1);
14+
15+
private Process? _process;
16+
private string? _currentModelPath;
17+
private string? _currentEndpointUrl;
18+
private volatile ModelLoadState _state = ModelLoadState.Unloaded;
19+
20+
public string? CurrentEndpointUrl => _currentEndpointUrl;
21+
public ModelLoadState State => _state;
22+
23+
public event Action<ModelLoadState>? StateChanged;
24+
25+
public LlamaServerProcessManager(INativeRuntimeUpdater updater, ILogger<LlamaServerProcessManager> logger)
26+
{
27+
_updater = updater;
28+
_logger = logger;
29+
}
30+
31+
private void SetState(ModelLoadState state)
32+
{
33+
if (_state == state) return;
34+
_state = state;
35+
StateChanged?.Invoke(state);
36+
}
37+
38+
public async Task EnsureServerRunningAsync(string modelPath, int contextSize, int inferenceThreads, CancellationToken ct = default)
39+
{
40+
await _lock.WaitAsync(ct);
41+
try
42+
{
43+
if (_process != null && !_process.HasExited && string.Equals(_currentModelPath, modelPath, StringComparison.OrdinalIgnoreCase))
44+
{
45+
// Server is already running with the requested model
46+
return;
47+
}
48+
49+
await StopServerInternalAsync();
50+
51+
SetState(ModelLoadState.Loading);
52+
53+
var serverExe = await _updater.EnsureLatestLlamaServerAsync(ct);
54+
if (serverExe is null || !File.Exists(serverExe))
55+
{
56+
throw new FileNotFoundException("Failed to locate or download llama-server executable.");
57+
}
58+
59+
var port = GetAvailablePort();
60+
_currentEndpointUrl = $"http://127.0.0.1:{port}";
61+
62+
var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
63+
64+
var startInfo = new ProcessStartInfo
65+
{
66+
FileName = serverExe,
67+
Arguments = $"-m \"{modelPath}\" -c {contextSize} --port {port} --threads {inferenceThreads}",
68+
RedirectStandardOutput = true,
69+
RedirectStandardError = true,
70+
UseShellExecute = false,
71+
CreateNoWindow = true
72+
};
73+
74+
_process = new Process { StartInfo = startInfo, EnableRaisingEvents = true };
75+
76+
_process.OutputDataReceived += (sender, e) =>
77+
{
78+
if (!string.IsNullOrWhiteSpace(e.Data))
79+
{
80+
HandleServerLog(e.Data, tcs);
81+
}
82+
};
83+
84+
_process.ErrorDataReceived += (sender, e) =>
85+
{
86+
if (!string.IsNullOrWhiteSpace(e.Data))
87+
{
88+
HandleServerLog(e.Data, tcs);
89+
}
90+
};
91+
92+
_process.Exited += (sender, e) =>
93+
{
94+
_logger.LogWarning("llama-server process exited unexpectedly with code {ExitCode}.", _process?.ExitCode);
95+
_currentEndpointUrl = null;
96+
_currentModelPath = null;
97+
SetState(ModelLoadState.Unloaded);
98+
tcs.TrySetException(new InvalidOperationException($"llama-server exited prematurely with code {_process?.ExitCode}"));
99+
};
100+
101+
_logger.LogInformation("Starting llama-server on port {Port} for model {ModelPath}", port, modelPath);
102+
_process.Start();
103+
_process.BeginOutputReadLine();
104+
_process.BeginErrorReadLine();
105+
106+
// Wait until server reports it's ready or fails
107+
using var reg = ct.Register(() => tcs.TrySetCanceled());
108+
109+
// Timeout just in case it hangs
110+
var timeoutTask = Task.Delay(TimeSpan.FromSeconds(60), ct);
111+
var completedTask = await Task.WhenAny(tcs.Task, timeoutTask);
112+
113+
if (completedTask == timeoutTask)
114+
{
115+
await StopServerInternalAsync();
116+
throw new TimeoutException("llama-server did not start within the expected time.");
117+
}
118+
119+
await tcs.Task; // throw if fault/cancelled
120+
121+
_currentModelPath = modelPath;
122+
SetState(ModelLoadState.Loaded);
123+
}
124+
catch
125+
{
126+
await StopServerInternalAsync();
127+
SetState(ModelLoadState.Unloaded);
128+
throw;
129+
}
130+
finally
131+
{
132+
_lock.Release();
133+
}
134+
}
135+
136+
private void HandleServerLog(string logLine, TaskCompletionSource tcs)
137+
{
138+
// llama-server typically says "server is listening on http..." when ready.
139+
if (logLine.Contains("server is listening", StringComparison.OrdinalIgnoreCase))
140+
{
141+
tcs.TrySetResult();
142+
}
143+
else if (logLine.Contains("ERR", StringComparison.OrdinalIgnoreCase) ||
144+
logLine.Contains("failed", StringComparison.OrdinalIgnoreCase) ||
145+
logLine.Contains("error", StringComparison.OrdinalIgnoreCase))
146+
{
147+
// Specifically log errors to avoid spamming Serilog with normal progress
148+
// Ignore some common false positives
149+
if (!logLine.Contains("failed to initialize", StringComparison.OrdinalIgnoreCase) || logLine.Contains("llama_model_load"))
150+
{
151+
_logger.LogError("llama-server: {Log}", logLine);
152+
}
153+
}
154+
}
155+
156+
private int GetAvailablePort()
157+
{
158+
var listener = new TcpListener(IPAddress.Loopback, 0);
159+
listener.Start();
160+
var port = ((IPEndPoint)listener.LocalEndpoint).Port;
161+
listener.Stop();
162+
return port;
163+
}
164+
165+
public async Task StopServerAsync()
166+
{
167+
await _lock.WaitAsync();
168+
try
169+
{
170+
await StopServerInternalAsync();
171+
SetState(ModelLoadState.Unloaded);
172+
}
173+
finally
174+
{
175+
_lock.Release();
176+
}
177+
}
178+
179+
private Task StopServerInternalAsync()
180+
{
181+
if (_process != null)
182+
{
183+
try
184+
{
185+
if (!_process.HasExited)
186+
{
187+
_process.Kill(true);
188+
_process.WaitForExit(3000);
189+
}
190+
}
191+
catch (Exception ex)
192+
{
193+
_logger.LogDebug(ex, "Error while killing llama-server process.");
194+
}
195+
finally
196+
{
197+
_process.Dispose();
198+
_process = null;
199+
}
200+
}
201+
202+
_currentEndpointUrl = null;
203+
_currentModelPath = null;
204+
return Task.CompletedTask;
205+
}
206+
207+
private bool _disposed;
208+
209+
public void Dispose()
210+
{
211+
if (_disposed) return;
212+
_disposed = true;
213+
214+
try
215+
{
216+
_lock.Wait();
217+
try
218+
{
219+
StopServerInternalAsync().GetAwaiter().GetResult();
220+
}
221+
finally
222+
{
223+
_lock.Release();
224+
}
225+
}
226+
catch (ObjectDisposedException) { }
227+
finally
228+
{
229+
_lock.Dispose();
230+
}
231+
}
232+
}

0 commit comments

Comments
 (0)