diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index cd3255176..5611472af 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -344,6 +344,7 @@ def iterator() -> Iterator[llama_cpp.CreateCompletionStreamResponse]: ping_message_factory=_ping_message_factory, ) else: + exit_stack.close() return iterator_or_completion