From 10db1bda8cee09805fcfe254516c2b25b2d9cb80 Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Mon, 25 Aug 2025 13:59:46 +0300
Subject: [PATCH 01/22] Add a dependency group for Respeecher

---
 pyproject.toml |  1 +
 uv.lock        | 22 +++++++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d61c3efe3d..a4ea467639 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -85,6 +85,7 @@ openrouter = []
 perplexity = []
 playht = [ "websockets>=13.1,<15.0" ]
 qwen = []
+respeecher = [ "respeecher~=1.0" ]
 rime = [ "websockets>=13.1,<15.0" ]
 riva = [ "nvidia-riva-client~=2.21.1" ]
 runner = [ "python-dotenv>=1.0.0,<2.0.0", "uvicorn>=0.32.0,<1.0.0", "fastapi>=0.115.6,<0.117.0", "pipecat-ai-small-webrtc-prebuilt>=1.0.0"]
diff --git a/uv.lock b/uv.lock
index bbe6a1ca87..19612d0a31 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4315,6 +4315,9 @@ openpipe = [
 playht = [
     { name = "websockets" },
 ]
+respeecher = [
+    { name = "respeecher" },
+]
 rime = [
     { name = "websockets" },
 ]
@@ -4452,6 +4455,7 @@ requires-dist = [
     { name = "python-dotenv", marker = "extra == 'runner'", specifier = ">=1.0.0,<2.0.0" },
     { name = "pyvips", extras = ["binary"], marker = "extra == 'moondream'", specifier = "~=3.0.0" },
     { name = "resampy", specifier = "~=0.4.3" },
+    { name = "respeecher", marker = "extra == 'respeecher'", specifier = "~=1.0" },
     { name = "sentry-sdk", marker = "extra == 'sentry'", specifier = "~=2.23.1" },
     { name = "simli-ai", marker = "extra == 'simli'", specifier = "~=0.1.10" },
     { name = "soundfile", marker = "extra == 'soundfile'", specifier = "~=0.13.0" },
@@ -4483,7 +4487,7 @@ requires-dist = [
     { name = "websockets", marker = "extra == 'soniox'", specifier = ">=13.1,<15.0" },
     { name = "websockets", marker = "extra == 'websocket'", specifier = ">=13.1,<15.0" },
 ]
-provides-extras = ["anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "cerebras", "deepseek", "daily", "deepgram", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "grok", "groq", "gstreamer", "heygen", "inworld", "krisp", "koala", "langchain", "livekit", "lmnt", "local", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "nim", "neuphonic", "noisereduce", "openai", "openpipe", "openrouter", "perplexity", "playht", "qwen", "rime", "riva", "runner", "sambanova", "sentry", "local-smart-turn", "remote-smart-turn", "silero", "simli", "soniox", "soundfile", "speechmatics", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "whisper"]
+provides-extras = ["anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "cerebras", "deepseek", "daily", "deepgram", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "grok", "groq", "gstreamer", "heygen", "inworld", "krisp", "koala", "langchain", "livekit", "lmnt", "local", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "nim", "neuphonic", "noisereduce", "openai", "openpipe", "openrouter", "perplexity", "playht", "qwen", "respeecher", "rime", "riva", "runner", "sambanova", "sentry", "local-smart-turn", "remote-smart-turn", "silero", "simli", "soniox", "soundfile", "speechmatics", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "whisper"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -5749,6 +5753,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4d/b9/3b00ac340a1aab3389ebcc52c779914a44aadf7b0cb7a3bf053195735607/resampy-0.4.3-py3-none-any.whl", hash = "sha256:ad2ed64516b140a122d96704e32bc0f92b23f45419e8b8f478e5a05f83edcebd", size = 3076529, upload-time = "2024-03-05T20:36:02.439Z" },
 ]
 
+[[package]]
+name = "respeecher"
+version = "1.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "httpx" },
+    { name = "pydantic" },
+    { name = "pydantic-core" },
+    { name = "typing-extensions" },
+    { name = "websockets" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/a5/9eed1454f3c737733dffbd66c2212ea6a72168442393db6b52c138ba4dc7/respeecher-1.0.1.tar.gz", hash = "sha256:74a1f423d8a5fdd600b1634a1830525a0cb3996252687cc477c76254ce4d79a1", size = 28762, upload-time = "2025-08-25T10:50:53.533Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2e/bf/adc9d59abfdc6fee22ea8af97088b1713e090d4ab640a53d49e01e38b863/respeecher-1.0.1-py3-none-any.whl", hash = "sha256:7c940791cc310f939b2a9b7dc99f11830402b6b7eb934d7fb0282b38386f1f5c", size = 50565, upload-time = "2025-08-25T10:50:52.537Z" },
+]
+
 [[package]]
 name = "rich"
 version = "14.1.0"

From 24565b20615e62a6aea5c4b4cd6ccb145a95b80f Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Mon, 25 Aug 2025 15:33:12 +0300
Subject: [PATCH 02/22] Add Respeecher to env.example

---
 env.example | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/env.example b/env.example
index fae6bb2a4d..81d662425a 100644
--- a/env.example
+++ b/env.example
@@ -140,3 +140,6 @@ SENTRY_DSN=...
 
 # Heygen
 HEYGEN_API_KEY=...
+
+# Respeecher
+RESPEECHER_API_KEY=...

From c488706a2728f9b095d755fd3c3821908017fe1f Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Mon, 25 Aug 2025 15:36:53 +0300
Subject: [PATCH 03/22] Spit the audio context functionality out

---
 src/pipecat/services/tts_service.py | 188 +++++++++++++++++++---------
 1 file changed, 129 insertions(+), 59 deletions(-)

diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py
index 5562764944..77d1286935 100644
--- a/src/pipecat/services/tts_service.py
+++ b/src/pipecat/services/tts_service.py
@@ -702,29 +702,11 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):
             self._bot_speaking = False
 
 
-class AudioContextWordTTSService(WebsocketWordTTSService):
-    """Websocket-based TTS service with word timestamps and audio context management.
-
-    This is a base class for websocket-based TTS services that support word
-    timestamps and also allow correlating the generated audio with the requested
-    text.
-
-    Each request could be multiple sentences long which are grouped by
-    context. For this to work, the TTS service needs to support handling
-    multiple requests at once (i.e. multiple simultaneous contexts).
-
-    The audio received from the TTS will be played in context order. That is, if
-    we requested audio for a context "A" and then audio for context "B", the
-    audio from context ID "A" will be played first.
-    """
-
-    def __init__(self, **kwargs):
-        """Initialize the Audio Context Word TTS service.
+class _AudioContextContainer:
+    """A container for audio contexts."""
 
-        Args:
-            **kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
-        """
-        super().__init__(**kwargs)
+    def __init__(self):
+        """Initialize the container."""
         self._contexts: Dict[str, asyncio.Queue] = {}
         self._audio_context_task = None
 
@@ -777,43 +759,6 @@ def audio_context_available(self, context_id: str) -> bool:
         """
         return context_id in self._contexts
 
-    async def start(self, frame: StartFrame):
-        """Start the audio context TTS service.
-
-        Args:
-            frame: The start frame containing initialization parameters.
-        """
-        await super().start(frame)
-        self._create_audio_context_task()
-
-    async def stop(self, frame: EndFrame):
-        """Stop the audio context TTS service.
-
-        Args:
-            frame: The end frame.
-        """
-        await super().stop(frame)
-        if self._audio_context_task:
-            # Indicate no more audio contexts are available. this will end the
-            # task cleanly after all contexts have been processed.
-            await self._contexts_queue.put(None)
-            await self.wait_for_task(self._audio_context_task)
-            self._audio_context_task = None
-
-    async def cancel(self, frame: CancelFrame):
-        """Cancel the audio context TTS service.
-
-        Args:
-            frame: The cancel frame.
-        """
-        await super().cancel(frame)
-        await self._stop_audio_context_task()
-
-    async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
-        await super()._handle_interruption(frame, direction)
-        await self._stop_audio_context_task()
-        self._create_audio_context_task()
-
     def _create_audio_context_task(self):
         if not self._audio_context_task:
             self._contexts_queue = WatchdogQueue(self.task_manager)
@@ -868,3 +813,128 @@ async def _handle_audio_context(self, context_id: str):
                 # We didn't get audio, so let's consider this context finished.
                 logger.trace(f"{self} time out on audio context {context_id}")
                 break
+
+
+class AudioContextTTSService(WebsocketTTSService, _AudioContextContainer):
+    """Websocket-based TTS service with audio context management.
+
+    This is a base class for websocket-based TTS services that allow correlating
+    the generated audio with the requested text.
+
+    Each request could be multiple sentences long which are grouped by
+    context. For this to work, the TTS service needs to support handling
+    multiple requests at once (i.e. multiple simultaneous contexts).
+
+    The audio received from the TTS will be played in context order. That is, if
+    we requested audio for a context "A" and then audio for context "B", the
+    audio from context ID "A" will be played first.
+    """
+
+    def __init__(self, **kwargs):
+        """Initialize the Audio Context TTS service.
+
+        Args:
+            **kwargs: Additional arguments passed to the parent WebsocketTTSService.
+        """
+        WebSocketTTSService.__init__(self, **kwargs)
+        _AudioContextContainer.__init__(self)
+
+    async def start(self, frame: StartFrame):
+        """Start the audio context TTS service.
+
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
+        await super().start(frame)
+        self._create_audio_context_task()
+
+    async def stop(self, frame: EndFrame):
+        """Stop the audio context TTS service.
+
+        Args:
+            frame: The end frame.
+        """
+        await super().stop(frame)
+        if self._audio_context_task:
+            # Indicate no more audio contexts are available. this will end the
+            # task cleanly after all contexts have been processed.
+            await self._contexts_queue.put(None)
+            await self.wait_for_task(self._audio_context_task)
+            self._audio_context_task = None
+
+    async def cancel(self, frame: CancelFrame):
+        """Cancel the audio context TTS service.
+
+        Args:
+            frame: The cancel frame.
+        """
+        await super().cancel(frame)
+        await self._stop_audio_context_task()
+
+    async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
+        await super()._handle_interruption(frame, direction)
+        await self._stop_audio_context_task()
+        self._create_audio_context_task()
+
+
+class AudioContextWordTTSService(WebsocketWordTTSService, _AudioContextContainer):
+    """Websocket-based TTS service with word timestamps and audio context management.
+
+    This is a base class for websocket-based TTS services that support word
+    timestamps and also allow correlating the generated audio with the requested
+    text.
+
+    Each request could be multiple sentences long which are grouped by
+    context. For this to work, the TTS service needs to support handling
+    multiple requests at once (i.e. multiple simultaneous contexts).
+
+    The audio received from the TTS will be played in context order. That is, if
+    we requested audio for a context "A" and then audio for context "B", the
+    audio from context ID "A" will be played first.
+    """
+
+    def __init__(self, **kwargs):
+        """Initialize the Audio Context Word TTS service.
+
+        Args:
+            **kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
+        """
+        WebSocketTTSService.__init__(self, **kwargs)
+        _AudioContextContainer.__init__(self)
+
+    async def start(self, frame: StartFrame):
+        """Start the audio context TTS service.
+
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
+        await super().start(frame)
+        self._create_audio_context_task()
+
+    async def stop(self, frame: EndFrame):
+        """Stop the audio context TTS service.
+
+        Args:
+            frame: The end frame.
+        """
+        await super().stop(frame)
+        if self._audio_context_task:
+            # Indicate no more audio contexts are available. this will end the
+            # task cleanly after all contexts have been processed.
+            await self._contexts_queue.put(None)
+            await self.wait_for_task(self._audio_context_task)
+            self._audio_context_task = None
+
+    async def cancel(self, frame: CancelFrame):
+        """Cancel the audio context TTS service.
+
+        Args:
+            frame: The cancel frame.
+        """
+        await super().cancel(frame)
+        await self._stop_audio_context_task()
+
+    async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
+        await super()._handle_interruption(frame, direction)
+        await self._stop_audio_context_task()
+        self._create_audio_context_task()

From 1417791aaa7e626ba43608c1dddcac0e47718974 Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Thu, 28 Aug 2025 15:38:25 +0300
Subject: [PATCH 04/22] Fix some typos

---
 src/pipecat/services/tts_service.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py
index 77d1286935..966babb35a 100644
--- a/src/pipecat/services/tts_service.py
+++ b/src/pipecat/services/tts_service.py
@@ -836,7 +836,7 @@ def __init__(self, **kwargs):
         Args:
             **kwargs: Additional arguments passed to the parent WebsocketTTSService.
         """
-        WebSocketTTSService.__init__(self, **kwargs)
+        WebsocketTTSService.__init__(self, **kwargs)
         _AudioContextContainer.__init__(self)
 
     async def start(self, frame: StartFrame):
@@ -899,7 +899,7 @@ def __init__(self, **kwargs):
         Args:
             **kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
         """
-        WebSocketTTSService.__init__(self, **kwargs)
+        WebsocketWordTTSService.__init__(self, **kwargs)
         _AudioContextContainer.__init__(self)
 
     async def start(self, frame: StartFrame):

From 051c5fc9f0c9111e1d8dad39fb190c30aabf762c Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Thu, 28 Aug 2025 16:11:07 +0300
Subject: [PATCH 05/22] Make the type checker happy

---
 src/pipecat/services/tts_service.py | 33 ++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py
index 966babb35a..e224ef5b5a 100644
--- a/src/pipecat/services/tts_service.py
+++ b/src/pipecat/services/tts_service.py
@@ -7,8 +7,8 @@
 """Base classes for Text-to-speech services."""
 
 import asyncio
-from abc import abstractmethod
-from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional, Sequence, Tuple
+from abc import ABC, abstractmethod
+from typing import Any, AsyncGenerator, Coroutine, Dict, List, Mapping, Optional, Sequence, Tuple
 
 from loguru import logger
 
@@ -37,6 +37,7 @@
 from pipecat.services.ai_service import AIService
 from pipecat.services.websocket_service import WebsocketService
 from pipecat.transcriptions.language import Language
+from pipecat.utils.asyncio.task_manager import BaseTaskManager
 from pipecat.utils.asyncio.watchdog_queue import WatchdogQueue
 from pipecat.utils.text.base_text_aggregator import BaseTextAggregator
 from pipecat.utils.text.base_text_filter import BaseTextFilter
@@ -702,7 +703,7 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):
             self._bot_speaking = False
 
 
-class _AudioContextContainer:
+class _AudioContextContainer(ABC):
     """A container for audio contexts."""
 
     def __init__(self):
@@ -814,6 +815,32 @@ async def _handle_audio_context(self, context_id: str):
                 logger.trace(f"{self} time out on audio context {context_id}")
                 break
 
+    @abstractmethod
+    def reset_watchdog(self) -> None:
+        pass
+
+    @abstractmethod
+    def create_task(self, coroutine: Coroutine) -> asyncio.Task:
+        pass
+
+    @abstractmethod
+    async def cancel_task(self, task: asyncio.Task) -> None:
+        pass
+
+    @abstractmethod
+    async def push_frame(self, frame: Frame) -> None:
+        pass
+
+    @property
+    @abstractmethod
+    def sample_rate(self) -> int:
+        pass
+
+    @property
+    @abstractmethod
+    def task_manager(self) -> BaseTaskManager:
+        pass
+
 
 class AudioContextTTSService(WebsocketTTSService, _AudioContextContainer):
     """Websocket-based TTS service with audio context management.

From ee2f11dc4c9af6981638e7b6a465dc96b45189cd Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Thu, 28 Aug 2025 18:06:20 +0300
Subject: [PATCH 06/22] Rename a class

---
 src/pipecat/services/tts_service.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py
index e224ef5b5a..f4cd1891eb 100644
--- a/src/pipecat/services/tts_service.py
+++ b/src/pipecat/services/tts_service.py
@@ -703,11 +703,11 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):
             self._bot_speaking = False
 
 
-class _AudioContextContainer(ABC):
-    """A container for audio contexts."""
+class _AudioContextBaseService(ABC):
+    """A service that supports audio contexts."""
 
     def __init__(self):
-        """Initialize the container."""
+        """Initialize the service."""
         self._contexts: Dict[str, asyncio.Queue] = {}
         self._audio_context_task = None
 
@@ -842,7 +842,7 @@ def task_manager(self) -> BaseTaskManager:
         pass
 
 
-class AudioContextTTSService(WebsocketTTSService, _AudioContextContainer):
+class AudioContextTTSService(WebsocketTTSService, _AudioContextBaseService):
     """Websocket-based TTS service with audio context management.
 
     This is a base class for websocket-based TTS services that allow correlating
@@ -864,7 +864,7 @@ def __init__(self, **kwargs):
             **kwargs: Additional arguments passed to the parent WebsocketTTSService.
         """
         WebsocketTTSService.__init__(self, **kwargs)
-        _AudioContextContainer.__init__(self)
+        _AudioContextBaseService.__init__(self)
 
     async def start(self, frame: StartFrame):
         """Start the audio context TTS service.
@@ -904,7 +904,7 @@ async def _handle_interruption(self, frame: StartInterruptionFrame, direction: F
         self._create_audio_context_task()
 
 
-class AudioContextWordTTSService(WebsocketWordTTSService, _AudioContextContainer):
+class AudioContextWordTTSService(WebsocketWordTTSService, _AudioContextBaseService):
     """Websocket-based TTS service with word timestamps and audio context management.
 
     This is a base class for websocket-based TTS services that support word
@@ -927,7 +927,7 @@ def __init__(self, **kwargs):
             **kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
         """
         WebsocketWordTTSService.__init__(self, **kwargs)
-        _AudioContextContainer.__init__(self)
+        _AudioContextBaseService.__init__(self)
 
     async def start(self, frame: StartFrame):
         """Start the audio context TTS service.

From eb467d4e27322a768086062672efb2ed9d1e033b Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Thu, 28 Aug 2025 19:43:02 +0300
Subject: [PATCH 07/22] Add a link to the future docs page

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index fb66a4f271..7c086c8bfc 100644
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ You can connect to Pipecat from any platform using our official SDKs:
 | ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | Speech-to-Text      | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper)                                                                                                                                                                                          |
 | LLMs                | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together)                                                                                          |
-| Text-to-Speech      | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
+| Text-to-Speech      | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Respeecher](https://docs.pipecat.ai/server/services/tts/respeecher), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
 | Speech-to-Speech    | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | Transport           | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | Serializers         | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |

From c9a12b47089342c5464f57203103a0fd2d6b2b65 Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Thu, 28 Aug 2025 19:48:20 +0300
Subject: [PATCH 08/22] Add initial implementation

---
 src/pipecat/services/respeecher/__init__.py |   0
 src/pipecat/services/respeecher/tts.py      | 290 ++++++++++++++++++++
 2 files changed, 290 insertions(+)
 create mode 100644 src/pipecat/services/respeecher/__init__.py
 create mode 100644 src/pipecat/services/respeecher/tts.py

diff --git a/src/pipecat/services/respeecher/__init__.py b/src/pipecat/services/respeecher/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/pipecat/services/respeecher/tts.py b/src/pipecat/services/respeecher/tts.py
new file mode 100644
index 0000000000..00c60b95eb
--- /dev/null
+++ b/src/pipecat/services/respeecher/tts.py
@@ -0,0 +1,290 @@
+#
+# Copyright (c) 2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+"""Respeecher Space text-to-speech service implementation."""
+
+import base64
+import json
+import uuid
+from typing import AsyncGenerator, Optional
+
+from loguru import logger
+from pydantic import BaseModel
+
+from pipecat.frames.frames import (
+    CancelFrame,
+    EndFrame,
+    ErrorFrame,
+    Frame,
+    StartFrame,
+    StartInterruptionFrame,
+    TTSAudioRawFrame,
+    TTSStartedFrame,
+    TTSStoppedFrame,
+)
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.services.tts_service import AudioContextTTSService
+from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
+from pipecat.utils.tracing.service_decorators import traced_tts
+
+# See .env.example for Respeecher configuration needed
+try:
+    from respeecher import SamplingParams
+    from respeecher.tts import StreamingEncoding
+    from websockets.asyncio.client import connect as websocket_connect
+    from websockets.protocol import State
+except ModuleNotFoundError as e:
+    logger.error(f"Exception: {e}")
+    logger.error("In order to use Respeecher, you need to `pip install pipecat-ai[respeecher]`.")
+    raise Exception(f"Missing module: {e}")
+
+
+class RespeecherTTSService(AudioContextTTSService):
+    """Respeecher Space TTS service with WebSocket streaming and audio contexts.
+
+    Provides text-to-speech using Respeecher's streaming WebSocket API.
+    Supports audio context management and voice customization via sampling parameters.
+    """
+
+    class InputParams(BaseModel):
+        """Input parameters for Respeecher TTS configuration.
+
+        Parameters:
+            sampling_params: Sampling parameters used for speech synthesis.
+        """
+
+        sampling_params: SamplingParams = SamplingParams()
+
+    def __init__(
+        self,
+        *,
+        api_key: str,
+        voice_id: str,
+        model: str = "public/tts/en-rt",
+        url: str = "wss://api.respeecher.com/v1",
+        sample_rate: Optional[int] = None,
+        encoding: StreamingEncoding = "pcm_s16le",
+        params: Optional[InputParams] = None,
+        **kwargs,
+    ):
+        """Initialize the Respeecher TTS service.
+
+        Args:
+            api_key: Respeecher API key for authentication.
+            voice_id: ID of the voice to use for synthesis.
+            model: Model path for the Respeecher TTS API.
+            url: WebSocket base URL for Respeecher TTS API.
+            sample_rate: Audio sample rate. If None, uses default.
+            encoding: Audio encoding format.
+            params: Additional input parameters for voice customization.
+            **kwargs: Additional arguments passed to the parent service.
+        """
+        super().__init__(
+            push_text_frames=False,
+            pause_frame_processing=True,
+            sample_rate=sample_rate,
+            **kwargs,
+        )
+
+        self._params = params or RespeecherTTSService.InputParams()
+        self._api_key = api_key
+        self._url = url
+        self._output_format = {
+            "encoding": encoding,
+            "sample_rate": sample_rate,
+        }
+        self.set_model_name(model)
+        self.set_voice(voice_id)
+
+        self._context_id = None
+        self._receive_task = None
+
+    def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+
+        Returns:
+            True
+        """
+        return True
+
+    async def set_model(self, model: str):
+        """Set the TTS model.
+
+        Args:
+            model: The model name to use for synthesis.
+        """
+        self._model_id = model
+        await super().set_model(model)
+        await self._disconnect()
+        await self._connect()
+        logger.info(f"Switching TTS model to: [{model}]")
+
+    def _build_msg(self, text: str = "", continue_transcript: bool = True):
+        msg = {
+            "transcript": text,
+            "continue": continue_transcript,
+            "context_id": self._context_id,
+            "voice": {
+                "id": self._voice_id,
+                "sampling_params": self._params.sampling_params.model_dump(),
+            },
+            "output_format": self._output_format,
+        }
+
+        return json.dumps(msg)
+
+    async def start(self, frame: StartFrame):
+        """Start the Respeecher TTS service.
+
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
+        await super().start(frame)
+        self._output_format["sample_rate"] = self.sample_rate
+        await self._connect()
+
+    async def stop(self, frame: EndFrame):
+        """Stop the Respeecher TTS service.
+
+        Args:
+            frame: The end frame.
+        """
+        await super().stop(frame)
+        await self._disconnect()
+
+    async def cancel(self, frame: CancelFrame):
+        """Stop the Respeecher TTS service.
+
+        Args:
+            frame: The end frame.
+        """
+        await super().cancel(frame)
+        await self._disconnect()
+
+    async def _connect(self):
+        await self._connect_websocket()
+
+        if self._websocket and not self._receive_task:
+            self._receive_task = self.create_task(self._receive_task_handler(self._report_error))
+
+    async def _disconnect(self):
+        if self._receive_task:
+            await self.cancel_task(self._receive_task)
+            self._receive_task = None
+
+        await self._disconnect_websocket()
+
+    async def _connect_websocket(self):
+        try:
+            if self._websocket and self._websocket.state is State.OPEN:
+                return
+            logger.debug("Connecting to Respeecher")
+            self._websocket = await websocket_connect(
+                f"{self._url}/{self._model_name}?api_key={self._api_key}"
+            )
+        except Exception as e:
+            logger.error(f"{self} initialization error: {e}")
+            self._websocket = None
+            await self._call_event_handler("on_connection_error", f"{e}")
+
+    async def _disconnect_websocket(self):
+        try:
+            await self.stop_all_metrics()
+
+            if self._websocket:
+                logger.debug("Disconnecting from Respeecher")
+                await self._websocket.close()
+        except Exception as e:
+            logger.error(f"{self} error closing websocket: {e}")
+        finally:
+            self._context_id = None
+            self._websocket = None
+
+    def _get_websocket(self):
+        if self._websocket:
+            return self._websocket
+        raise Exception("Websocket not connected")
+
+    async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
+        await super()._handle_interruption(frame, direction)
+        await self.stop_all_metrics()
+        if self._context_id:
+            cancel_msg = json.dumps({"context_id": self._context_id, "cancel": True})
+            await self._get_websocket().send(cancel_msg)
+            self._context_id = None
+
+    async def flush_audio(self):
+        """Flush any pending audio and finalize the current context."""
+        if not self._context_id or not self._websocket:
+            return
+        logger.trace(f"{self}: flushing audio")
+        msg = self._build_msg(text="", continue_transcript=False)
+        await self._websocket.send(msg)
+        self._context_id = None
+
+    async def _receive_messages(self):
+        async for message in WatchdogAsyncIterator(
+            self._get_websocket(), manager=self.task_manager
+        ):
+            msg = json.loads(message)
+            if not msg or not self.audio_context_available(msg["context_id"]):
+                continue
+            if msg["type"] == "done":
+                await self.stop_ttfb_metrics()
+                await self.remove_audio_context(msg["context_id"])
+            elif msg["type"] == "chunk":
+                await self.stop_ttfb_metrics()
+                frame = TTSAudioRawFrame(
+                    audio=base64.b64decode(msg["data"]),
+                    sample_rate=self.sample_rate,
+                    num_channels=1,
+                )
+                await self.append_to_audio_context(msg["context_id"], frame)
+            elif msg["type"] == "error":
+                logger.error(f"{self} error: {msg}")
+                await self.push_frame(TTSStoppedFrame())
+                await self.stop_all_metrics()
+                await self.push_error(ErrorFrame(f"{self} error: {msg['error']}"))
+                self._context_id = None
+            else:
+                logger.error(f"{self} error, unknown message type: {msg}")
+
+    @traced_tts
+    async def run_tts(self, text: str) -> AsyncGenerator[Frame | None, None]:
+        """Generate speech from text using Respeecher's streaming API.
+
+        Args:
+            text: The text to synthesize into speech.
+
+        Yields:
+            Frame: Audio frames containing the synthesized speech.
+        """
+        logger.debug(f"{self}: Generating TTS [{text}]")
+
+        try:
+            if not self._websocket or self._websocket.state is State.CLOSED:
+                await self._connect()
+
+            if not self._context_id:
+                await self.start_ttfb_metrics()
+                yield TTSStartedFrame()
+                self._context_id = str(uuid.uuid4())
+                await self.create_audio_context(self._context_id)
+
+            msg = self._build_msg(text=text)
+
+            try:
+                await self._get_websocket().send(msg)
+                await self.start_tts_usage_metrics(text)
+            except Exception as e:
+                logger.error(f"{self} error sending message: {e}")
+                yield TTSStoppedFrame()
+                await self._disconnect()
+                await self._connect()
+                return
+            yield None
+        except Exception as e:
+            logger.error(f"{self} exception: {e}")

From 5efe01be3906c4239c21ffd9031b2ee073b925ca Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Fri, 29 Aug 2025 21:00:39 +0300
Subject: [PATCH 09/22] Fix some issues

---
 src/pipecat/services/respeecher/tts.py | 37 +++++++++-----------------
 1 file changed, 12 insertions(+), 25 deletions(-)

diff --git a/src/pipecat/services/respeecher/tts.py b/src/pipecat/services/respeecher/tts.py
index 00c60b95eb..598b83ed40 100644
--- a/src/pipecat/services/respeecher/tts.py
+++ b/src/pipecat/services/respeecher/tts.py
@@ -99,7 +99,6 @@ def __init__(
         self.set_model_name(model)
         self.set_voice(voice_id)
 
-        self._context_id = None
         self._receive_task = None
 
     def can_generate_metrics(self) -> bool:
@@ -118,18 +117,17 @@ async def set_model(self, model: str):
         """
         self._model_id = model
         await super().set_model(model)
+        logger.info(f"Switching TTS model to: [{model}]")
         await self._disconnect()
         await self._connect()
-        logger.info(f"Switching TTS model to: [{model}]")
 
-    def _build_msg(self, text: str = "", continue_transcript: bool = True):
+    def _build_msg(self, text: str, context_id: str):
         msg = {
             "transcript": text,
-            "continue": continue_transcript,
-            "context_id": self._context_id,
+            "context_id": context_id,
             "voice": {
                 "id": self._voice_id,
-                "sampling_params": self._params.sampling_params.model_dump(),
+                "sampling_params": self._params.sampling_params.model_dump(exclude_none=True),
             },
             "output_format": self._output_format,
         }
@@ -183,7 +181,7 @@ async def _connect_websocket(self):
                 return
             logger.debug("Connecting to Respeecher")
             self._websocket = await websocket_connect(
-                f"{self._url}/{self._model_name}?api_key={self._api_key}"
+                f"{self._url}/{self._model_name}/tts/websocket?api_key={self._api_key}"
             )
         except Exception as e:
             logger.error(f"{self} initialization error: {e}")
@@ -200,7 +198,6 @@ async def _disconnect_websocket(self):
         except Exception as e:
             logger.error(f"{self} error closing websocket: {e}")
         finally:
-            self._context_id = None
             self._websocket = None
 
     def _get_websocket(self):
@@ -211,19 +208,10 @@ def _get_websocket(self):
     async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
         await super()._handle_interruption(frame, direction)
         await self.stop_all_metrics()
-        if self._context_id:
-            cancel_msg = json.dumps({"context_id": self._context_id, "cancel": True})
+        for context_id in self._contexts:
+            cancel_msg = json.dumps({"context_id": context_id, "cancel": True})
             await self._get_websocket().send(cancel_msg)
-            self._context_id = None
-
-    async def flush_audio(self):
-        """Flush any pending audio and finalize the current context."""
-        if not self._context_id or not self._websocket:
-            return
-        logger.trace(f"{self}: flushing audio")
-        msg = self._build_msg(text="", continue_transcript=False)
-        await self._websocket.send(msg)
-        self._context_id = None
+            await self.remove_audio_context(context_id)
 
     async def _receive_messages(self):
         async for message in WatchdogAsyncIterator(
@@ -248,7 +236,6 @@ async def _receive_messages(self):
                 await self.push_frame(TTSStoppedFrame())
                 await self.stop_all_metrics()
                 await self.push_error(ErrorFrame(f"{self} error: {msg['error']}"))
-                self._context_id = None
             else:
                 logger.error(f"{self} error, unknown message type: {msg}")
 
@@ -268,13 +255,13 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame | None, None]:
             if not self._websocket or self._websocket.state is State.CLOSED:
                 await self._connect()
 
-            if not self._context_id:
+            if not self._contexts:
                 await self.start_ttfb_metrics()
                 yield TTSStartedFrame()
-                self._context_id = str(uuid.uuid4())
-                await self.create_audio_context(self._context_id)
 
-            msg = self._build_msg(text=text)
+            context_id = str(uuid.uuid4())
+            await self.create_audio_context(context_id)
+            msg = self._build_msg(text=text, context_id=context_id)
 
             try:
                 await self._get_websocket().send(msg)

From adadc6ab7c866d91eec170e96e7c0d221d6c6198 Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Mon, 1 Sep 2025 12:54:01 +0300
Subject: [PATCH 10/22] Refactor

---
 src/pipecat/services/respeecher/tts.py | 45 ++++++++++------
 src/pipecat/services/tts_service.py    | 74 ++++++++++++++++++--------
 2 files changed, 81 insertions(+), 38 deletions(-)

diff --git a/src/pipecat/services/respeecher/tts.py b/src/pipecat/services/respeecher/tts.py
index 598b83ed40..2bb87ec4de 100644
--- a/src/pipecat/services/respeecher/tts.py
+++ b/src/pipecat/services/respeecher/tts.py
@@ -12,7 +12,7 @@
 from typing import AsyncGenerator, Optional
 
 from loguru import logger
-from pydantic import BaseModel
+from pydantic import BaseModel, TypeAdapter, ValidationError
 
 from pipecat.frames.frames import (
     CancelFrame,
@@ -33,6 +33,7 @@
 # See .env.example for Respeecher configuration needed
 try:
     from respeecher import SamplingParams
+    from respeecher.tts import Response as TTSResponse
     from respeecher.tts import StreamingEncoding
     from websockets.asyncio.client import connect as websocket_connect
     from websockets.protocol import State
@@ -68,6 +69,7 @@ def __init__(
         sample_rate: Optional[int] = None,
         encoding: StreamingEncoding = "pcm_s16le",
         params: Optional[InputParams] = None,
+        additional_silence_between_sentences_time_s: float = 0,
         **kwargs,
     ):
         """Initialize the Respeecher TTS service.
@@ -80,12 +82,14 @@ def __init__(
             sample_rate: Audio sample rate. If None, uses default.
             encoding: Audio encoding format.
             params: Additional input parameters for voice customization.
+            additional_silence_between_sentences_time_s: Duration of additional silence to insert between sentences in seconds.
             **kwargs: Additional arguments passed to the parent service.
         """
         super().__init__(
             push_text_frames=False,
             pause_frame_processing=True,
             sample_rate=sample_rate,
+            silence_between_contexts_time_s=additional_silence_between_sentences_time_s,
             **kwargs,
         )
 
@@ -217,27 +221,38 @@ async def _receive_messages(self):
         async for message in WatchdogAsyncIterator(
             self._get_websocket(), manager=self.task_manager
         ):
-            msg = json.loads(message)
-            if not msg or not self.audio_context_available(msg["context_id"]):
+            try:
+                response = TypeAdapter(TTSResponse).validate_json(message)
+            except ValidationError as e:
+                logger.error(f"{self} cannot parse message: {e}")
+                continue
+
+            if response.context_id is not None and not self.audio_context_available(
+                response.context_id
+            ):
+                logger.error(
+                    f"{self} error, received {response.type} for unknown context_id: {response.context_id}"
+                )
                 continue
-            if msg["type"] == "done":
+
+            if response.type == "error":
+                logger.error(f"{self} error: {response}")
+                await self.push_frame(TTSStoppedFrame())
+                await self.stop_all_metrics()
+                await self.push_error(ErrorFrame(f"{self} error: {response.error}"))
+                continue
+
+            if response.type == "done":
                 await self.stop_ttfb_metrics()
-                await self.remove_audio_context(msg["context_id"])
-            elif msg["type"] == "chunk":
+                await self.remove_audio_context(response.context_id)
+            elif response.type == "chunk":
                 await self.stop_ttfb_metrics()
                 frame = TTSAudioRawFrame(
-                    audio=base64.b64decode(msg["data"]),
+                    audio=base64.b64decode(response.data),
                     sample_rate=self.sample_rate,
                     num_channels=1,
                 )
-                await self.append_to_audio_context(msg["context_id"], frame)
-            elif msg["type"] == "error":
-                logger.error(f"{self} error: {msg}")
-                await self.push_frame(TTSStoppedFrame())
-                await self.stop_all_metrics()
-                await self.push_error(ErrorFrame(f"{self} error: {msg['error']}"))
-            else:
-                logger.error(f"{self} error, unknown message type: {msg}")
+                await self.append_to_audio_context(response.context_id, frame)
 
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame | None, None]:
diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py
index f4cd1891eb..4415a79ddc 100644
--- a/src/pipecat/services/tts_service.py
+++ b/src/pipecat/services/tts_service.py
@@ -347,12 +347,7 @@ async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirect
             direction: The direction to push the frame.
         """
         if self._push_silence_after_stop and isinstance(frame, TTSStoppedFrame):
-            silence_num_bytes = int(self._silence_time_s * self.sample_rate * 2)  # 16-bit
-            silence_frame = TTSAudioRawFrame(
-                audio=b"\x00" * silence_num_bytes,
-                sample_rate=self.sample_rate,
-                num_channels=1,
-            )
+            silence_frame = self.silence_frame(self._silence_time_s)
             silence_frame.transport_destination = self._transport_destination
             await self.push_frame(silence_frame)
 
@@ -369,6 +364,20 @@ async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirect
         ):
             await self._stop_frame_queue.put(frame)
 
+    def silence_frame(self, duration_s: float) -> TTSAudioRawFrame:
+        """Create a frame of silence.
+
+        Args:
+            duration_s: Silence duration in seconds.
+        """
+        silence_num_bytes = int(duration_s * self.sample_rate * 2)  # 16-bit
+
+        return TTSAudioRawFrame(
+            audio=b"\x00" * silence_num_bytes,
+            sample_rate=self.sample_rate,
+            num_channels=1,
+        )
+
     async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
         self._processing_text = False
         await self._text_aggregator.handle_interruption()
@@ -703,13 +712,25 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):
             self._bot_speaking = False
 
 
-class _AudioContextBaseService(ABC):
-    """A service that supports audio contexts."""
+class _AudioContextServiceMixin(ABC):
+    """A service that supports audio contexts.
+
+    This class does not inherit from other service base classes to avoid
+    diamond inheritance.
+    """
 
-    def __init__(self):
-        """Initialize the service."""
+    def __init__(
+        self,
+        silence_between_contexts_time_s: float = 1.0,
+    ):
+        """Initialize the service.
+
+        Args:
+            silence_between_contexts_time_s: Duration of silence to push between contexts.
+        """
         self._contexts: Dict[str, asyncio.Queue] = {}
         self._audio_context_task = None
+        self._silence_between_contexts_time_s = silence_between_contexts_time_s
 
     async def create_audio_context(self, context_id: str):
         """Create a new audio context for grouping related audio.
@@ -787,11 +808,9 @@ async def _audio_context_task_handler(self):
                 del self._contexts[context_id]
 
                 # Append some silence between sentences.
-                silence = b"\x00" * self.sample_rate
-                frame = TTSAudioRawFrame(
-                    audio=silence, sample_rate=self.sample_rate, num_channels=1
-                )
-                await self.push_frame(frame)
+                if self._silence_between_contexts_time_s > 0:
+                    silence_frame = self.silence_frame(self._silence_between_contexts_time_s)
+                    await self.push_frame(silence_frame)
             else:
                 running = False
 
@@ -831,9 +850,8 @@ async def cancel_task(self, task: asyncio.Task) -> None:
     async def push_frame(self, frame: Frame) -> None:
         pass
 
-    @property
     @abstractmethod
-    def sample_rate(self) -> int:
+    def silence_frame(self, duration_s: float) -> TTSAudioRawFrame:
         pass
 
     @property
@@ -842,7 +860,7 @@ def task_manager(self) -> BaseTaskManager:
         pass
 
 
-class AudioContextTTSService(WebsocketTTSService, _AudioContextBaseService):
+class AudioContextTTSService(WebsocketTTSService, _AudioContextServiceMixin):
     """Websocket-based TTS service with audio context management.
 
     This is a base class for websocket-based TTS services that allow correlating
@@ -857,14 +875,19 @@ class AudioContextTTSService(WebsocketTTSService, _AudioContextBaseService):
     audio from context ID "A" will be played first.
     """
 
-    def __init__(self, **kwargs):
+    def __init__(
+        self,
+        silence_between_contexts_time_s: float = 1.0,
+        **kwargs,
+    ):
         """Initialize the Audio Context TTS service.
 
         Args:
+            silence_between_contexts_time_s: Duration of silence to push between contexts.
             **kwargs: Additional arguments passed to the parent WebsocketTTSService.
         """
         WebsocketTTSService.__init__(self, **kwargs)
-        _AudioContextBaseService.__init__(self)
+        _AudioContextServiceMixin.__init__(self, silence_between_contexts_time_s)
 
     async def start(self, frame: StartFrame):
         """Start the audio context TTS service.
@@ -904,7 +927,7 @@ async def _handle_interruption(self, frame: StartInterruptionFrame, direction: F
         self._create_audio_context_task()
 
 
-class AudioContextWordTTSService(WebsocketWordTTSService, _AudioContextBaseService):
+class AudioContextWordTTSService(WebsocketWordTTSService, _AudioContextServiceMixin):
     """Websocket-based TTS service with word timestamps and audio context management.
 
     This is a base class for websocket-based TTS services that support word
@@ -920,14 +943,19 @@ class AudioContextWordTTSService(WebsocketWordTTSService, _AudioContextBaseServi
     audio from context ID "A" will be played first.
     """
 
-    def __init__(self, **kwargs):
+    def __init__(
+        self,
+        silence_between_contexts_time_s: float = 1.0,
+        **kwargs,
+    ):
         """Initialize the Audio Context Word TTS service.
 
         Args:
+            silence_between_contexts_time_s: Duration of silence to push between contexts.
             **kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
         """
         WebsocketWordTTSService.__init__(self, **kwargs)
-        _AudioContextBaseService.__init__(self)
+        _AudioContextServiceMixin.__init__(self, silence_between_contexts_time_s)
 
     async def start(self, frame: StartFrame):
         """Start the audio context TTS service.

From 554ff8e83fc097b287313d3fc5b07a81bd847544 Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Mon, 1 Sep 2025 13:02:44 +0300
Subject: [PATCH 11/22] Forbid overriding encoding

---
 src/pipecat/services/respeecher/tts.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/pipecat/services/respeecher/tts.py b/src/pipecat/services/respeecher/tts.py
index 2bb87ec4de..65eee0472b 100644
--- a/src/pipecat/services/respeecher/tts.py
+++ b/src/pipecat/services/respeecher/tts.py
@@ -34,7 +34,6 @@
 try:
     from respeecher import SamplingParams
     from respeecher.tts import Response as TTSResponse
-    from respeecher.tts import StreamingEncoding
     from websockets.asyncio.client import connect as websocket_connect
     from websockets.protocol import State
 except ModuleNotFoundError as e:
@@ -67,7 +66,6 @@ def __init__(
         model: str = "public/tts/en-rt",
         url: str = "wss://api.respeecher.com/v1",
         sample_rate: Optional[int] = None,
-        encoding: StreamingEncoding = "pcm_s16le",
         params: Optional[InputParams] = None,
         additional_silence_between_sentences_time_s: float = 0,
         **kwargs,
@@ -80,7 +78,6 @@ def __init__(
             model: Model path for the Respeecher TTS API.
             url: WebSocket base URL for Respeecher TTS API.
             sample_rate: Audio sample rate. If None, uses default.
-            encoding: Audio encoding format.
             params: Additional input parameters for voice customization.
             additional_silence_between_sentences_time_s: Duration of additional silence to insert between sentences in seconds.
             **kwargs: Additional arguments passed to the parent service.
@@ -97,7 +94,7 @@ def __init__(
         self._api_key = api_key
         self._url = url
         self._output_format = {
-            "encoding": encoding,
+            "encoding": "pcm_s16le",
             "sample_rate": sample_rate,
         }
         self.set_model_name(model)

From d26e2fb0488369664c28406980c1ec8118b7f3f1 Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Mon, 1 Sep 2025 13:13:59 +0300
Subject: [PATCH 12/22] Use a TypedDict for convenience

---
 src/pipecat/services/respeecher/tts.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/pipecat/services/respeecher/tts.py b/src/pipecat/services/respeecher/tts.py
index 65eee0472b..d1324231e7 100644
--- a/src/pipecat/services/respeecher/tts.py
+++ b/src/pipecat/services/respeecher/tts.py
@@ -34,6 +34,9 @@
 try:
     from respeecher import SamplingParams
     from respeecher.tts import Response as TTSResponse
+    from respeecher.voices import (
+        SamplingParamsParams as SamplingParams,  # TypedDict instead of a Pydantic model
+    )
     from websockets.asyncio.client import connect as websocket_connect
     from websockets.protocol import State
 except ModuleNotFoundError as e:
@@ -56,7 +59,7 @@ class InputParams(BaseModel):
             sampling_params: Sampling parameters used for speech synthesis.
         """
 
-        sampling_params: SamplingParams = SamplingParams()
+        sampling_params: SamplingParams = {}
 
     def __init__(
         self,
@@ -128,7 +131,7 @@ def _build_msg(self, text: str, context_id: str):
             "context_id": context_id,
             "voice": {
                 "id": self._voice_id,
-                "sampling_params": self._params.sampling_params.model_dump(exclude_none=True),
+                "sampling_params": self._params.sampling_params,
             },
             "output_format": self._output_format,
         }

From 44b3ed044db70dd4c3e4523a96896b0952e40d15 Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Mon, 1 Sep 2025 13:21:01 +0300
Subject: [PATCH 13/22] Add some more type checking

---
 src/pipecat/services/respeecher/tts.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/pipecat/services/respeecher/tts.py b/src/pipecat/services/respeecher/tts.py
index d1324231e7..d0ad06f2f1 100644
--- a/src/pipecat/services/respeecher/tts.py
+++ b/src/pipecat/services/respeecher/tts.py
@@ -32,7 +32,7 @@
 
 # See .env.example for Respeecher configuration needed
 try:
-    from respeecher import SamplingParams
+    from respeecher.tts import ContextfulGenerationRequestParams, StreamingOutputFormatParams
     from respeecher.tts import Response as TTSResponse
     from respeecher.voices import (
         SamplingParamsParams as SamplingParams,  # TypedDict instead of a Pydantic model
@@ -96,9 +96,9 @@ def __init__(
         self._params = params or RespeecherTTSService.InputParams()
         self._api_key = api_key
         self._url = url
-        self._output_format = {
+        self._output_format: StreamingOutputFormatParams = {
             "encoding": "pcm_s16le",
-            "sample_rate": sample_rate,
+            "sample_rate": sample_rate or 0,
         }
         self.set_model_name(model)
         self.set_voice(voice_id)
@@ -126,7 +126,7 @@ async def set_model(self, model: str):
         await self._connect()
 
     def _build_msg(self, text: str, context_id: str):
-        msg = {
+        msg: ContextfulGenerationRequestParams = {
             "transcript": text,
             "context_id": context_id,
             "voice": {

From 999d073ef7d3631666a98e4d4e73c5b4b0b97f7d Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Mon, 1 Sep 2025 14:28:59 +0300
Subject: [PATCH 14/22] Add a foundational example

---
 .../07ad-interruptible-respeecher.py          | 124 ++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 examples/foundational/07ad-interruptible-respeecher.py

diff --git a/examples/foundational/07ad-interruptible-respeecher.py b/examples/foundational/07ad-interruptible-respeecher.py
new file mode 100644
index 0000000000..a70fd4d33f
--- /dev/null
+++ b/examples/foundational/07ad-interruptible-respeecher.py
@@ -0,0 +1,124 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.deepgram.stt import DeepgramSTTService
+from pipecat.services.openai.llm import OpenAILLMService
+from pipecat.services.respeecher.tts import RespeecherTTSService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
+from pipecat.transports.services.daily import DailyParams
+
+load_dotenv(override=True)
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+
+    tts = RespeecherTTSService(
+        api_key=os.getenv("RESPEECHER_API_KEY"),
+        voice_id="samantha",
+        sampling_params={
+            # Optional sampling params overrides
+            # See https://space.respeecher.com/docs/api/tts/sampling-params-guide
+            # "temperature": 0.5
+        },
+    )
+
+    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = OpenAILLMContext(messages)
+    context_aggregator = llm.create_context_aggregator(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+        await task.queue_frames([context_aggregator.user().get_context_frame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()

From f640eae9fbe70d18ff2fedc8d111a66798b3ada7 Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Mon, 1 Sep 2025 15:13:33 +0300
Subject: [PATCH 15/22] Fix an example

---
 .../foundational/07ad-interruptible-respeecher.py    | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/examples/foundational/07ad-interruptible-respeecher.py b/examples/foundational/07ad-interruptible-respeecher.py
index a70fd4d33f..c07183075a 100644
--- a/examples/foundational/07ad-interruptible-respeecher.py
+++ b/examples/foundational/07ad-interruptible-respeecher.py
@@ -55,11 +55,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
     tts = RespeecherTTSService(
         api_key=os.getenv("RESPEECHER_API_KEY"),
         voice_id="samantha",
-        sampling_params={
-            # Optional sampling params overrides
-            # See https://space.respeecher.com/docs/api/tts/sampling-params-guide
-            # "temperature": 0.5
-        },
+        params=RespeecherTTSService.InputParams(
+            sampling_params={
+                # Optional sampling params overrides
+                # See https://space.respeecher.com/docs/api/tts/sampling-params-guide
+                # "temperature": 0.5
+            },
+        ),
     )
 
     llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))

From 1e19dac2df74d23db3db24238dc583819f9137b4 Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Mon, 1 Sep 2025 15:26:50 +0300
Subject: [PATCH 16/22] Move sampling params into settings

---
 src/pipecat/services/respeecher/tts.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/pipecat/services/respeecher/tts.py b/src/pipecat/services/respeecher/tts.py
index d0ad06f2f1..a8438c9c01 100644
--- a/src/pipecat/services/respeecher/tts.py
+++ b/src/pipecat/services/respeecher/tts.py
@@ -93,13 +93,15 @@ def __init__(
             **kwargs,
         )
 
-        self._params = params or RespeecherTTSService.InputParams()
+        params = params or RespeecherTTSService.InputParams()
+
         self._api_key = api_key
         self._url = url
         self._output_format: StreamingOutputFormatParams = {
             "encoding": "pcm_s16le",
             "sample_rate": sample_rate or 0,
         }
+        self._settings = {"sampling_params": params.sampling_params}
         self.set_model_name(model)
         self.set_voice(voice_id)
 
@@ -131,7 +133,7 @@ def _build_msg(self, text: str, context_id: str):
             "context_id": context_id,
             "voice": {
                 "id": self._voice_id,
-                "sampling_params": self._params.sampling_params,
+                "sampling_params": self._settings["sampling_params"],
             },
             "output_format": self._output_format,
         }

From d565fc0a4a28d67a92da06efc743efff2af06ec9 Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Mon, 1 Sep 2025 17:14:12 +0300
Subject: [PATCH 17/22] Switch back to single context

---
 src/pipecat/services/respeecher/tts.py | 32 ++++++++++++++++++--------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/src/pipecat/services/respeecher/tts.py b/src/pipecat/services/respeecher/tts.py
index a8438c9c01..f9bbbc2c91 100644
--- a/src/pipecat/services/respeecher/tts.py
+++ b/src/pipecat/services/respeecher/tts.py
@@ -105,6 +105,7 @@ def __init__(
         self.set_model_name(model)
         self.set_voice(voice_id)
 
+        self._context_id: str | None = None
         self._receive_task = None
 
     def can_generate_metrics(self) -> bool:
@@ -127,10 +128,13 @@ async def set_model(self, model: str):
         await self._disconnect()
         await self._connect()
 
-    def _build_msg(self, text: str, context_id: str):
+    def _build_msg(self, text: str, continue_transcript: bool = True):
+        assert self._context_id is not None
+
         msg: ContextfulGenerationRequestParams = {
             "transcript": text,
-            "context_id": context_id,
+            "continue": continue_transcript,
+            "context_id": self._context_id,
             "voice": {
                 "id": self._voice_id,
                 "sampling_params": self._settings["sampling_params"],
@@ -191,6 +195,7 @@ async def _connect_websocket(self):
             )
         except Exception as e:
             logger.error(f"{self} initialization error: {e}")
+            self._context_id = None
             self._websocket = None
             await self._call_event_handler("on_connection_error", f"{e}")
 
@@ -214,10 +219,19 @@ def _get_websocket(self):
     async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
         await super()._handle_interruption(frame, direction)
         await self.stop_all_metrics()
-        for context_id in self._contexts:
-            cancel_msg = json.dumps({"context_id": context_id, "cancel": True})
+        if self._context_id:
+            cancel_msg = json.dumps({"context_id": self._context_id, "cancel": True})
             await self._get_websocket().send(cancel_msg)
-            await self.remove_audio_context(context_id)
+            self._context_id = None
+
+    async def flush_audio(self):
+        """Flush any pending audio and finalize the current context."""
+        if not self._context_id or not self._websocket:
+            return
+        logger.trace(f"{self}: flushing audio")
+        msg = self._build_msg(text="", continue_transcript=False)
+        await self._websocket.send(msg)
+        self._context_id = None
 
     async def _receive_messages(self):
         async for message in WatchdogAsyncIterator(
@@ -272,13 +286,13 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame | None, None]:
             if not self._websocket or self._websocket.state is State.CLOSED:
                 await self._connect()
 
-            if not self._contexts:
+            if not self._context_id:
                 await self.start_ttfb_metrics()
                 yield TTSStartedFrame()
+                self._context_id = str(uuid.uuid4())
+                await self.create_audio_context(self._context_id)
 
-            context_id = str(uuid.uuid4())
-            await self.create_audio_context(context_id)
-            msg = self._build_msg(text=text, context_id=context_id)
+            msg = self._build_msg(text=text)
 
             try:
                 await self._get_websocket().send(msg)

From fa4624b14e9677306e0bd63e99e9a967734d101d Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Mon, 1 Sep 2025 18:07:29 +0300
Subject: [PATCH 18/22] Refactor

---
 src/pipecat/services/respeecher/tts.py | 37 +++++++++++++++++--------
 src/pipecat/services/tts_service.py    | 38 +++++++-------------------
 2 files changed, 35 insertions(+), 40 deletions(-)

diff --git a/src/pipecat/services/respeecher/tts.py b/src/pipecat/services/respeecher/tts.py
index f9bbbc2c91..a5433726fb 100644
--- a/src/pipecat/services/respeecher/tts.py
+++ b/src/pipecat/services/respeecher/tts.py
@@ -19,6 +19,7 @@
     EndFrame,
     ErrorFrame,
     Frame,
+    LLMFullResponseEndFrame,
     StartFrame,
     StartInterruptionFrame,
     TTSAudioRawFrame,
@@ -70,7 +71,7 @@ def __init__(
         url: str = "wss://api.respeecher.com/v1",
         sample_rate: Optional[int] = None,
         params: Optional[InputParams] = None,
-        additional_silence_between_sentences_time_s: float = 0,
+        aggregate_sentences: bool = False,
         **kwargs,
     ):
         """Initialize the Respeecher TTS service.
@@ -82,14 +83,14 @@ def __init__(
             url: WebSocket base URL for Respeecher TTS API.
             sample_rate: Audio sample rate. If None, uses default.
             params: Additional input parameters for voice customization.
-            additional_silence_between_sentences_time_s: Duration of additional silence to insert between sentences in seconds.
+            aggregate_sentences: Whether to aggregate text into sentences client-side.
             **kwargs: Additional arguments passed to the parent service.
         """
         super().__init__(
             push_text_frames=False,
             pause_frame_processing=True,
             sample_rate=sample_rate,
-            silence_between_contexts_time_s=additional_silence_between_sentences_time_s,
+            aggregate_sentences=aggregate_sentences,
             **kwargs,
         )
 
@@ -128,10 +129,10 @@ async def set_model(self, model: str):
         await self._disconnect()
         await self._connect()
 
-    def _build_msg(self, text: str, continue_transcript: bool = True):
+    def _build_request(self, text: str, continue_transcript: bool = True):
         assert self._context_id is not None
 
-        msg: ContextfulGenerationRequestParams = {
+        request: ContextfulGenerationRequestParams = {
             "transcript": text,
             "continue": continue_transcript,
             "context_id": self._context_id,
@@ -142,7 +143,7 @@ def _build_msg(self, text: str, continue_transcript: bool = True):
             "output_format": self._output_format,
         }
 
-        return json.dumps(msg)
+        return json.dumps(request)
 
     async def start(self, frame: StartFrame):
         """Start the Respeecher TTS service.
@@ -220,17 +221,29 @@ async def _handle_interruption(self, frame: StartInterruptionFrame, direction: F
         await super()._handle_interruption(frame, direction)
         await self.stop_all_metrics()
         if self._context_id:
-            cancel_msg = json.dumps({"context_id": self._context_id, "cancel": True})
-            await self._get_websocket().send(cancel_msg)
+            cancel_request = json.dumps({"context_id": self._context_id, "cancel": True})
+            await self._get_websocket().send(cancel_request)
             self._context_id = None
 
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process frames with context awareness.
+
+        Args:
+            frame: The frame to process.
+            direction: The direction of frame processing.
+        """
+        await super().process_frame(frame, direction)
+
+        if isinstance(frame, (LLMFullResponseEndFrame, EndFrame)):
+            await self.flush_audio()
+
     async def flush_audio(self):
         """Flush any pending audio and finalize the current context."""
         if not self._context_id or not self._websocket:
             return
         logger.trace(f"{self}: flushing audio")
-        msg = self._build_msg(text="", continue_transcript=False)
-        await self._websocket.send(msg)
+        flush_request = self._build_request(text="", continue_transcript=False)
+        await self._websocket.send(flush_request)
         self._context_id = None
 
     async def _receive_messages(self):
@@ -292,10 +305,10 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame | None, None]:
                 self._context_id = str(uuid.uuid4())
                 await self.create_audio_context(self._context_id)
 
-            msg = self._build_msg(text=text)
+            generation_request = self._build_request(text=text)
 
             try:
-                await self._get_websocket().send(msg)
+                await self._get_websocket().send(generation_request)
                 await self.start_tts_usage_metrics(text)
             except Exception as e:
                 logger.error(f"{self} error sending message: {e}")
diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py
index 4415a79ddc..3cf0e73d73 100644
--- a/src/pipecat/services/tts_service.py
+++ b/src/pipecat/services/tts_service.py
@@ -719,18 +719,10 @@ class _AudioContextServiceMixin(ABC):
     diamond inheritance.
     """
 
-    def __init__(
-        self,
-        silence_between_contexts_time_s: float = 1.0,
-    ):
-        """Initialize the service.
-
-        Args:
-            silence_between_contexts_time_s: Duration of silence to push between contexts.
-        """
+    def __init__(self):
+        """Initialize the service."""
         self._contexts: Dict[str, asyncio.Queue] = {}
         self._audio_context_task = None
-        self._silence_between_contexts_time_s = silence_between_contexts_time_s
 
     async def create_audio_context(self, context_id: str):
         """Create a new audio context for grouping related audio.
@@ -807,10 +799,10 @@ async def _audio_context_task_handler(self):
                 # We just finished processing the context, so we can safely remove it.
                 del self._contexts[context_id]
 
-                # Append some silence between sentences.
-                if self._silence_between_contexts_time_s > 0:
-                    silence_frame = self.silence_frame(self._silence_between_contexts_time_s)
-                    await self.push_frame(silence_frame)
+                # Append some silence between contexts.
+                SILENCE_BETWEEN_CONTEXTS = 1
+                silence_frame = self.silence_frame(SILENCE_BETWEEN_CONTEXTS)
+                await self.push_frame(silence_frame)
             else:
                 running = False
 
@@ -875,19 +867,14 @@ class AudioContextTTSService(WebsocketTTSService, _AudioContextServiceMixin):
     audio from context ID "A" will be played first.
     """
 
-    def __init__(
-        self,
-        silence_between_contexts_time_s: float = 1.0,
-        **kwargs,
-    ):
+    def __init__(self, **kwargs):
         """Initialize the Audio Context TTS service.
 
         Args:
-            silence_between_contexts_time_s: Duration of silence to push between contexts.
             **kwargs: Additional arguments passed to the parent WebsocketTTSService.
         """
         WebsocketTTSService.__init__(self, **kwargs)
-        _AudioContextServiceMixin.__init__(self, silence_between_contexts_time_s)
+        _AudioContextServiceMixin.__init__(self)
 
     async def start(self, frame: StartFrame):
         """Start the audio context TTS service.
@@ -943,19 +930,14 @@ class AudioContextWordTTSService(WebsocketWordTTSService, _AudioContextServiceMi
     audio from context ID "A" will be played first.
     """
 
-    def __init__(
-        self,
-        silence_between_contexts_time_s: float = 1.0,
-        **kwargs,
-    ):
+    def __init__(self, **kwargs):
         """Initialize the Audio Context Word TTS service.
 
         Args:
-            silence_between_contexts_time_s: Duration of silence to push between contexts.
             **kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
         """
         WebsocketWordTTSService.__init__(self, **kwargs)
-        _AudioContextServiceMixin.__init__(self, silence_between_contexts_time_s)
+        _AudioContextServiceMixin.__init__(self)
 
     async def start(self, frame: StartFrame):
         """Start the audio context TTS service.

From 233fef46cf2e0a17b0676b6b5cbefabcdc206852 Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Mon, 1 Sep 2025 19:07:23 +0300
Subject: [PATCH 19/22] Remove the watchdog

---
 src/pipecat/services/respeecher/tts.py | 5 +----
 src/pipecat/services/tts_service.py    | 7 -------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/src/pipecat/services/respeecher/tts.py b/src/pipecat/services/respeecher/tts.py
index a5433726fb..1106a6fa6b 100644
--- a/src/pipecat/services/respeecher/tts.py
+++ b/src/pipecat/services/respeecher/tts.py
@@ -28,7 +28,6 @@
 )
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.tts_service import AudioContextTTSService
-from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
 from pipecat.utils.tracing.service_decorators import traced_tts
 
 # See .env.example for Respeecher configuration needed
@@ -247,9 +246,7 @@ async def flush_audio(self):
         self._context_id = None
 
     async def _receive_messages(self):
-        async for message in WatchdogAsyncIterator(
-            self._get_websocket(), manager=self.task_manager
-        ):
+        async for message in self._get_websocket():
             try:
                 response = TypeAdapter(TTSResponse).validate_json(message)
             except ValidationError as e:
diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py
index aede2dc1e9..62bfdfbd7a 100644
--- a/src/pipecat/services/tts_service.py
+++ b/src/pipecat/services/tts_service.py
@@ -38,7 +38,6 @@
 from pipecat.services.websocket_service import WebsocketService
 from pipecat.transcriptions.language import Language
 from pipecat.utils.asyncio.task_manager import BaseTaskManager
-from pipecat.utils.asyncio.watchdog_queue import WatchdogQueue
 from pipecat.utils.text.base_text_aggregator import BaseTextAggregator
 from pipecat.utils.text.base_text_filter import BaseTextFilter
 from pipecat.utils.text.simple_text_aggregator import SimpleTextAggregator
@@ -820,20 +819,14 @@ async def _handle_audio_context(self, context_id: str):
         while running:
             try:
                 frame = await asyncio.wait_for(queue.get(), timeout=AUDIO_CONTEXT_TIMEOUT)
-                self.reset_watchdog()
                 if frame:
                     await self.push_frame(frame)
                 running = frame is not None
             except asyncio.TimeoutError:
-                self.reset_watchdog()
                 # We didn't get audio, so let's consider this context finished.
                 logger.trace(f"{self} time out on audio context {context_id}")
                 break
 
-    @abstractmethod
-    def reset_watchdog(self) -> None:
-        pass
-
     @abstractmethod
     def create_task(self, coroutine: Coroutine) -> asyncio.Task:
         pass

From b0e971bc765abdbf71483c8a45270a519799ca79 Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Tue, 2 Sep 2025 18:27:37 +0300
Subject: [PATCH 20/22] Fix a docstring

---
 src/pipecat/services/respeecher/tts.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pipecat/services/respeecher/tts.py b/src/pipecat/services/respeecher/tts.py
index 1106a6fa6b..ad834eb422 100644
--- a/src/pipecat/services/respeecher/tts.py
+++ b/src/pipecat/services/respeecher/tts.py
@@ -164,10 +164,10 @@ async def stop(self, frame: EndFrame):
         await self._disconnect()
 
     async def cancel(self, frame: CancelFrame):
-        """Stop the Respeecher TTS service.
+        """Cancel the Respeecher TTS service.
 
         Args:
-            frame: The end frame.
+            frame: The cancel frame.
         """
         await super().cancel(frame)
         await self._disconnect()

From fbdb3502ebf7d0be99553b9776bf05612ba8026e Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Fri, 5 Sep 2025 11:35:44 +0300
Subject: [PATCH 21/22] Adjust a dependency specifier

---
 pyproject.toml | 2 +-
 uv.lock        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index d942a53c12..243c9fc30f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -87,7 +87,7 @@ openrouter = []
 perplexity = []
 playht = [ "websockets>=13.1,<15.0" ]
 qwen = []
-respeecher = [ "respeecher~=1.0" ]
+respeecher = [ "respeecher>=1.0,<2" ]
 rime = [ "websockets>=13.1,<15.0" ]
 riva = [ "nvidia-riva-client~=2.21.1" ]
 runner = [ "python-dotenv>=1.0.0,<2.0.0", "uvicorn>=0.32.0,<1.0.0", "fastapi>=0.115.6,<0.117.0", "pipecat-ai-small-webrtc-prebuilt>=1.0.0"]
diff --git a/uv.lock b/uv.lock
index d2d86eb857..b2d02ce29e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4470,7 +4470,7 @@ requires-dist = [
     { name = "python-dotenv", marker = "extra == 'runner'", specifier = ">=1.0.0,<2.0.0" },
     { name = "pyvips", extras = ["binary"], marker = "extra == 'moondream'", specifier = "~=3.0.0" },
     { name = "resampy", specifier = "~=0.4.3" },
-    { name = "respeecher", marker = "extra == 'respeecher'", specifier = "~=1.0" },
+    { name = "respeecher", marker = "extra == 'respeecher'", specifier = ">=1.0,<2" },
     { name = "sentry-sdk", marker = "extra == 'sentry'", specifier = "~=2.23.1" },
     { name = "simli-ai", marker = "extra == 'simli'", specifier = "~=0.1.10" },
     { name = "soundfile", marker = "extra == 'soundfile'", specifier = "~=0.13.0" },

From e1a9c1baa39648252abd23ede3fe583ef6623246 Mon Sep 17 00:00:00 2001
From: Nazar Vinnichuk <nv@respeecher.com>
Date: Tue, 23 Sep 2025 17:44:10 +0300
Subject: [PATCH 22/22] Fix a type hint

---
 src/pipecat/services/tts_service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py
index 20cba81329..de0936f0d2 100644
--- a/src/pipecat/services/tts_service.py
+++ b/src/pipecat/services/tts_service.py
@@ -968,7 +968,7 @@ async def cancel(self, frame: CancelFrame):
         await super().cancel(frame)
         await self._stop_audio_context_task()
 
-    async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
+    async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
         await super()._handle_interruption(frame, direction)
         await self._stop_audio_context_task()
         self._create_audio_context_task()