matatonic · bi1101 · Aug 28, 2024
diff --git a/speech.py b/speech.py
@@ -18,6 +18,9 @@
 from pydantic import BaseModel
 import uvicorn
 
+# Get the silence length from an environment variable, default to 0.2 seconds if not set
+SILENCE_LENGTH = float(os.getenv("SILENCE_LENGTH", "0.2"))
+
 @contextlib.asynccontextmanager
 async def lifespan(app):
     yield
@@ -349,10 +352,15 @@ def generator():
             logger.debug(f"{voice} wav samples: {audio_path}")
 
             try:
-                for text in all_text:
+                for i, text in enumerate(all_text):
                     for chunk in xtts.tts(text=text, language=language, audio_path=audio_path, **hf_generate_kwargs):
                         exception_check(ex_q)
                         in_q.put(chunk)
+
+                    # Add a short pause (e.g., 0.2 seconds) between chunks
+                    if i < len(all_text) - 1:
+                        silence_chunk = bytes([0] * int(24000 * SILENCE_LENGTH / speed * 4))  # Configurable silence length
+                        in_q.put(silence_chunk)   
 
             except BrokenPipeError as e: # client disconnect lands here
                 logger.info("Client disconnected - 'Broken pipe'")