1+ using System . Diagnostics ;
2+ using System . Net ;
3+ using System . Net . Sockets ;
4+ using System . Text . RegularExpressions ;
5+ using Microsoft . Extensions . Logging ;
6+
7+ namespace LiveLingo . Core . Processing ;
8+
9+ public sealed class LlamaServerProcessManager : ILlamaServerProcessManager
10+ {
11+ private readonly INativeRuntimeUpdater _updater ;
12+ private readonly ILogger < LlamaServerProcessManager > _logger ;
13+ private readonly SemaphoreSlim _lock = new ( 1 , 1 ) ;
14+
15+ private Process ? _process ;
16+ private string ? _currentModelPath ;
17+ private string ? _currentEndpointUrl ;
18+ private volatile ModelLoadState _state = ModelLoadState . Unloaded ;
19+
20+ public string ? CurrentEndpointUrl => _currentEndpointUrl ;
21+ public ModelLoadState State => _state ;
22+
23+ public event Action < ModelLoadState > ? StateChanged ;
24+
25+ public LlamaServerProcessManager ( INativeRuntimeUpdater updater , ILogger < LlamaServerProcessManager > logger )
26+ {
27+ _updater = updater ;
28+ _logger = logger ;
29+ }
30+
31+ private void SetState ( ModelLoadState state )
32+ {
33+ if ( _state == state ) return ;
34+ _state = state ;
35+ StateChanged ? . Invoke ( state ) ;
36+ }
37+
38+ public async Task EnsureServerRunningAsync ( string modelPath , int contextSize , int inferenceThreads , CancellationToken ct = default )
39+ {
40+ await _lock . WaitAsync ( ct ) ;
41+ try
42+ {
43+ if ( _process != null && ! _process . HasExited && string . Equals ( _currentModelPath , modelPath , StringComparison . OrdinalIgnoreCase ) )
44+ {
45+ // Server is already running with the requested model
46+ return ;
47+ }
48+
49+ await StopServerInternalAsync ( ) ;
50+
51+ SetState ( ModelLoadState . Loading ) ;
52+
53+ var serverExe = await _updater . EnsureLatestLlamaServerAsync ( ct ) ;
54+ if ( serverExe is null || ! File . Exists ( serverExe ) )
55+ {
56+ throw new FileNotFoundException ( "Failed to locate or download llama-server executable." ) ;
57+ }
58+
59+ var port = GetAvailablePort ( ) ;
60+ _currentEndpointUrl = $ "http://127.0.0.1:{ port } ";
61+
62+ var tcs = new TaskCompletionSource ( TaskCreationOptions . RunContinuationsAsynchronously ) ;
63+
64+ var startInfo = new ProcessStartInfo
65+ {
66+ FileName = serverExe ,
67+ Arguments = $ "-m \" { modelPath } \" -c { contextSize } --port { port } --threads { inferenceThreads } ",
68+ RedirectStandardOutput = true ,
69+ RedirectStandardError = true ,
70+ UseShellExecute = false ,
71+ CreateNoWindow = true
72+ } ;
73+
74+ _process = new Process { StartInfo = startInfo , EnableRaisingEvents = true } ;
75+
76+ _process . OutputDataReceived += ( sender , e ) =>
77+ {
78+ if ( ! string . IsNullOrWhiteSpace ( e . Data ) )
79+ {
80+ HandleServerLog ( e . Data , tcs ) ;
81+ }
82+ } ;
83+
84+ _process . ErrorDataReceived += ( sender , e ) =>
85+ {
86+ if ( ! string . IsNullOrWhiteSpace ( e . Data ) )
87+ {
88+ HandleServerLog ( e . Data , tcs ) ;
89+ }
90+ } ;
91+
92+ _process . Exited += ( sender , e ) =>
93+ {
94+ _logger . LogWarning ( "llama-server process exited unexpectedly with code {ExitCode}." , _process ? . ExitCode ) ;
95+ _currentEndpointUrl = null ;
96+ _currentModelPath = null ;
97+ SetState ( ModelLoadState . Unloaded ) ;
98+ tcs . TrySetException ( new InvalidOperationException ( $ "llama-server exited prematurely with code { _process ? . ExitCode } ") ) ;
99+ } ;
100+
101+ _logger . LogInformation ( "Starting llama-server on port {Port} for model {ModelPath}" , port , modelPath ) ;
102+ _process . Start ( ) ;
103+ _process . BeginOutputReadLine ( ) ;
104+ _process . BeginErrorReadLine ( ) ;
105+
106+ // Wait until server reports it's ready or fails
107+ using var reg = ct . Register ( ( ) => tcs . TrySetCanceled ( ) ) ;
108+
109+ // Timeout just in case it hangs
110+ var timeoutTask = Task . Delay ( TimeSpan . FromSeconds ( 60 ) , ct ) ;
111+ var completedTask = await Task . WhenAny ( tcs . Task , timeoutTask ) ;
112+
113+ if ( completedTask == timeoutTask )
114+ {
115+ await StopServerInternalAsync ( ) ;
116+ throw new TimeoutException ( "llama-server did not start within the expected time." ) ;
117+ }
118+
119+ await tcs . Task ; // throw if fault/cancelled
120+
121+ _currentModelPath = modelPath ;
122+ SetState ( ModelLoadState . Loaded ) ;
123+ }
124+ catch
125+ {
126+ await StopServerInternalAsync ( ) ;
127+ SetState ( ModelLoadState . Unloaded ) ;
128+ throw ;
129+ }
130+ finally
131+ {
132+ _lock . Release ( ) ;
133+ }
134+ }
135+
136+ private void HandleServerLog ( string logLine , TaskCompletionSource tcs )
137+ {
138+ // llama-server typically says "server is listening on http..." when ready.
139+ if ( logLine . Contains ( "server is listening" , StringComparison . OrdinalIgnoreCase ) )
140+ {
141+ tcs . TrySetResult ( ) ;
142+ }
143+ else if ( logLine . Contains ( "ERR" , StringComparison . OrdinalIgnoreCase ) ||
144+ logLine . Contains ( "failed" , StringComparison . OrdinalIgnoreCase ) ||
145+ logLine . Contains ( "error" , StringComparison . OrdinalIgnoreCase ) )
146+ {
147+ // Specifically log errors to avoid spamming Serilog with normal progress
148+ // Ignore some common false positives
149+ if ( ! logLine . Contains ( "failed to initialize" , StringComparison . OrdinalIgnoreCase ) || logLine . Contains ( "llama_model_load" ) )
150+ {
151+ _logger . LogError ( "llama-server: {Log}" , logLine ) ;
152+ }
153+ }
154+ }
155+
156+ private int GetAvailablePort ( )
157+ {
158+ var listener = new TcpListener ( IPAddress . Loopback , 0 ) ;
159+ listener . Start ( ) ;
160+ var port = ( ( IPEndPoint ) listener . LocalEndpoint ) . Port ;
161+ listener . Stop ( ) ;
162+ return port ;
163+ }
164+
165+ public async Task StopServerAsync ( )
166+ {
167+ await _lock . WaitAsync ( ) ;
168+ try
169+ {
170+ await StopServerInternalAsync ( ) ;
171+ SetState ( ModelLoadState . Unloaded ) ;
172+ }
173+ finally
174+ {
175+ _lock . Release ( ) ;
176+ }
177+ }
178+
179+ private Task StopServerInternalAsync ( )
180+ {
181+ if ( _process != null )
182+ {
183+ try
184+ {
185+ if ( ! _process . HasExited )
186+ {
187+ _process . Kill ( true ) ;
188+ _process . WaitForExit ( 3000 ) ;
189+ }
190+ }
191+ catch ( Exception ex )
192+ {
193+ _logger . LogDebug ( ex , "Error while killing llama-server process." ) ;
194+ }
195+ finally
196+ {
197+ _process . Dispose ( ) ;
198+ _process = null ;
199+ }
200+ }
201+
202+ _currentEndpointUrl = null ;
203+ _currentModelPath = null ;
204+ return Task . CompletedTask ;
205+ }
206+
207+ private bool _disposed ;
208+
209+ public void Dispose ( )
210+ {
211+ if ( _disposed ) return ;
212+ _disposed = true ;
213+
214+ try
215+ {
216+ _lock . Wait ( ) ;
217+ try
218+ {
219+ StopServerInternalAsync ( ) . GetAwaiter ( ) . GetResult ( ) ;
220+ }
221+ finally
222+ {
223+ _lock . Release ( ) ;
224+ }
225+ }
226+ catch ( ObjectDisposedException ) { }
227+ finally
228+ {
229+ _lock . Dispose ( ) ;
230+ }
231+ }
232+ }
0 commit comments