diff --git a/examples/other/text-to-speech/README.md b/examples/other/text-to-speech/README.md index 0165b8dfc8..915e5b9364 100644 --- a/examples/other/text-to-speech/README.md +++ b/examples/other/text-to-speech/README.md @@ -1,3 +1,30 @@ -# Text-to-speech +# Text-to-Speech Examples -This small example shows how you can generate real-time audio data from text. \ No newline at end of file +These examples demonstrate real-time text-to-speech generation using various TTS plugins with LiveKit. + +## Environment Variables + +### Plugin API Keys +Set the API key for your chosen plugin. + +### LiveKit Connection +For connecting to LiveKit Cloud: +- `LIVEKIT_URL` - Your LiveKit server URL +- `LIVEKIT_API_KEY` - LiveKit API key +- `LIVEKIT_API_SECRET` - LiveKit API secret + +## Running Examples + +Execute the example to connect to a LiveKit room and stream TTS audio: + +```bash +uv run examples/other/text-to-speech/{your_plugin}_tts.py start +``` + +The agent will join the room and stream synthesized speech to participants. + +### Running Locally + +Running the examples with `console` mode won't play audio since the examples use `rtc.LocalAudioTrack`, which requires the LiveKit room infrastructure for audio playback. The `LocalAudioTrack` is designed to publish audio streams to LiveKit rooms where they are processed and distributed to participants. Without a room connection, the audio frames are generated but not routed to any playback device. + +To test TTS output locally without a LiveKit room, you would need to modify the example file to save the generated audio frames to a WAV file instead of publishing them to a track. The saved WAV file can then be played using any audio player on your system. diff --git a/examples/other/text-to-speech/requirements.txt b/examples/other/text-to-speech/requirements.txt index 5da9ab7014..99f3655204 100644 --- a/examples/other/text-to-speech/requirements.txt +++ b/examples/other/text-to-speech/requirements.txt @@ -2,5 +2,6 @@ livekit-agents>=0.12.18 livekit-plugins-openai>=0.12.2 livekit-plugins-cartesia>=0.4.11 livekit-plugins-elevenlabs>=0.8.1 +livekit-plugins-respeecher>=0.0.1 livekit-plugins-speechify>=0.1.0 python-dotenv~=1.0 diff --git a/examples/other/text-to-speech/respeecher_tts.py b/examples/other/text-to-speech/respeecher_tts.py new file mode 100644 index 0000000000..f525c48d58 --- /dev/null +++ b/examples/other/text-to-speech/respeecher_tts.py @@ -0,0 +1,57 @@ +import asyncio +import logging + +from dotenv import load_dotenv + +from livekit import rtc +from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli +from livekit.plugins import respeecher + +load_dotenv() + +logger = logging.getLogger("respeecher-tts-demo") +logger.setLevel(logging.INFO) + + +async def entrypoint(job: JobContext): + logger.info("starting tts example agent") + + tts = respeecher.TTS() + + source = rtc.AudioSource(tts.sample_rate, tts.num_channels) + track = rtc.LocalAudioTrack.create_audio_track("agent-mic", source) + options = rtc.TrackPublishOptions() + options.source = rtc.TrackSource.SOURCE_MICROPHONE + + await job.connect(auto_subscribe=AutoSubscribe.SUBSCRIBE_NONE) + publication = await job.room.local_participant.publish_track(track, options) + await publication.wait_for_subscription() + + async with tts.stream() as stream: + + async def _playback_task(): + count = 0 + async for audio in stream: + count += 1 + await source.capture_frame(audio.frame) + + task = asyncio.create_task(_playback_task()) + + text = "Hello from Respeecher! I hope you are having a great day." + + # split into two word chunks to simulate LLM streaming + words = text.split() + for i in range(0, len(words), 2): + chunk = " ".join(words[i : i + 2]) + if chunk: + logger.info(f'pushing chunk: "{chunk} "') + stream.push_text(chunk + " ") + + # Mark end of input segment + stream.flush() + stream.end_input() + await asyncio.gather(task) + + +if __name__ == "__main__": + cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) diff --git a/livekit-plugins/livekit-plugins-respeecher/README.md b/livekit-plugins/livekit-plugins-respeecher/README.md new file mode 100644 index 0000000000..65fd0fed0c --- /dev/null +++ b/livekit-plugins/livekit-plugins-respeecher/README.md @@ -0,0 +1,27 @@ +# Respeecher plugin for LiveKit Agents + +Support for [Respeecher](https://respeecher.com/)'s TTS in LiveKit Agents. + +More information is available in the docs for the [Respeecher](https://docs.livekit.io/agents/integrations/tts/respeecher/) integration. + +## Installation + +```bash +pip install livekit-plugins-respeecher +``` + +## Pre-requisites + +You'll need an API key from Respeecher. It can be set as an environment variable: `RESPEECHER_API_KEY` or passed to the `respeecher.TTS()` constructor. + +To get the key, log in to [Respeecher Space](https://space.respeecher.com/). + +## Example + +To try out the Respeecher plugin, run the example: + +```bash +uv run python examples/other/text-to-speech/respeecher_tts.py start +``` + +Check [`examples/other/text-to-speech/README.md`](../../examples/other/text-to-speech/README.md) for running details. \ No newline at end of file diff --git a/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/__init__.py b/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/__init__.py new file mode 100644 index 0000000000..944fadd4cb --- /dev/null +++ b/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/__init__.py @@ -0,0 +1,44 @@ +# Copyright 2025 LiveKit, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Respeecher plugin for LiveKit Agents + +Voice cloning and synthesis plugin for LiveKit Agents using Respeecher API. +""" + +from .tts import TTS, ChunkedStream +from .version import __version__ + +__all__ = ["TTS", "ChunkedStream", "__version__"] + +from livekit.agents import Plugin + +from .log import logger + + +class RespeecherPlugin(Plugin): + def __init__(self) -> None: + super().__init__(__name__, __version__, __package__, logger) + + +Plugin.register_plugin(RespeecherPlugin()) + +# Cleanup docs of unexported modules +_module = dir() +NOT_IN_ALL = [m for m in _module if m not in __all__] + +__pdoc__ = {} + +for n in NOT_IN_ALL: + __pdoc__[n] = False diff --git a/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/log.py b/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/log.py new file mode 100644 index 0000000000..61bea91261 --- /dev/null +++ b/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/log.py @@ -0,0 +1,3 @@ +import logging + +logger = logging.getLogger("livekit.plugins.respeecher") diff --git a/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/models.py b/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/models.py new file mode 100644 index 0000000000..e378a6a0e7 --- /dev/null +++ b/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/models.py @@ -0,0 +1,37 @@ +from dataclasses import dataclass +from typing import Any, Literal, Optional + +TTSModels = Literal[ + # Respeecher's English model, multilanguage models will be added later + "/public/tts/en-rt", +] + +TTSEncoding = Literal["pcm_s16le",] + + +"""Check https://space.respeecher.com/docs/api/tts/sampling-params-guide for details""" +SamplingParams = dict[str, Any] + + +@dataclass +class VoiceSettings: + """Voice settings for Respeecher TTS""" + + sampling_params: Optional[SamplingParams] = None + + +class Voice(dict): + """Voice model for Respeecher - behaves like a dict with guaranteed `id` and optional `sampling_params`""" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + if "id" not in self: + raise ValueError("Voice must have an 'id' field") + + @property + def id(self) -> str: + return str(self["id"]) + + @property + def sampling_params(self) -> Optional[SamplingParams]: + return self.get("sampling_params") diff --git a/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/py.typed b/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/py.typed new file mode 100644 index 0000000000..e69de29bb2 diff --git a/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/tts.py b/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/tts.py new file mode 100644 index 0000000000..547c685f82 --- /dev/null +++ b/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/tts.py @@ -0,0 +1,407 @@ +# Copyright 2025 LiveKit, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import asyncio +import base64 +import json +import os +import weakref +from dataclasses import dataclass + +import aiohttp + +from livekit.agents import ( + APIConnectionError, + APIConnectOptions, + APIError, + APIStatusError, + APITimeoutError, + tokenize, + tts, + utils, +) +from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr +from livekit.agents.utils import is_given + +from .log import logger +from .models import TTSEncoding, TTSModels, Voice, VoiceSettings +from .version import __version__ + +API_VERSION = __version__ +API_AUTH_HEADER = "X-API-Key" +API_VERSION_HEADER = "LiveKit-Plugin-Respeecher-Version" +API_BASE_URL = "https://api.respeecher.com/v1" + + +@dataclass +class _TTSOptions: + model: TTSModels | str + encoding: TTSEncoding + sample_rate: int + voice_id: str + voice_settings: NotGivenOr[VoiceSettings] + api_key: str + base_url: str + + +class TTS(tts.TTS): + def __init__( + self, + *, + api_key: NotGivenOr[str] = NOT_GIVEN, + model: TTSModels | str = "/public/tts/en-rt", + encoding: TTSEncoding = "pcm_s16le", + voice_id: str = "samantha", + voice_settings: NotGivenOr[VoiceSettings] = NOT_GIVEN, + sample_rate: int = 24000, + tokenizer: NotGivenOr[tokenize.SentenceTokenizer] = NOT_GIVEN, + http_session: aiohttp.ClientSession | None = None, + base_url: str = API_BASE_URL, + ) -> None: + """ + Create a new instance of Respeecher TTS. + + Args: + api_key: Respeecher API key. If not provided, uses RESPEECHER_API_KEY env variable. + model: The Respeecher TTS model to use. + encoding: Audio encoding format. + voice_id: ID of the voice to use. Different set of voices is available for different models. Thus, update the value after getting list_voices() API. + voice_settings: Optional voice settings including sampling parameters. + sample_rate: Audio sample rate in Hz. + http_session: Optional aiohttp session to use for requests. + base_url: The base URL for the Respeecher API. + """ + + super().__init__( + capabilities=tts.TTSCapabilities( + streaming=True, + aligned_transcript=False, + ), + sample_rate=sample_rate, + num_channels=1, + ) + + respeecher_api_key = api_key if is_given(api_key) else os.environ.get("RESPEECHER_API_KEY") + if not respeecher_api_key: + raise ValueError("RESPEECHER_API_KEY must be set") + + self._opts = _TTSOptions( + model=model, + encoding=encoding, + sample_rate=sample_rate, + voice_id=voice_id, + voice_settings=voice_settings, + api_key=respeecher_api_key, + base_url=base_url, + ) + self._session = http_session + self._streams = weakref.WeakSet[SynthesizeStream]() + self._sentence_tokenizer = ( + tokenizer if is_given(tokenizer) else tokenize.blingfire.SentenceTokenizer() + ) + self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse]( + connect_cb=self._connect_ws, + close_cb=self._close_ws, + ) + + async def _connect_ws(self, timeout: float) -> aiohttp.ClientWebSocketResponse: + session = self._ensure_session() + # WebSocket protocol does not support custom headers, using query parameter + ws_url = self._opts.base_url.replace("https://", "wss://").replace("http://", "ws://") + if not ws_url.startswith("wss://"): + logger.error("Insecure WebSocket connection detected, wss:// required") + raise APIConnectionError("Secure WebSocket connection (wss://) required") + + full_ws_url = f"{ws_url}{self._opts.model}/tts/websocket?api_key={self._opts.api_key}&source={API_VERSION_HEADER}&version={API_VERSION}" + return await asyncio.wait_for(session.ws_connect(full_ws_url), timeout) + + async def _close_ws(self, ws: aiohttp.ClientWebSocketResponse) -> None: + await ws.close() + + def _ensure_session(self) -> aiohttp.ClientSession: + if not self._session: + self._session = utils.http_context.http_session() + return self._session + + async def list_voices(self) -> list[Voice]: + """List available voices from Respeecher API""" + async with self._ensure_session().get( + f"{self._opts.base_url}{self._opts.model}/voices", + headers={ + API_AUTH_HEADER: self._opts.api_key, + API_VERSION_HEADER: API_VERSION, + }, + ) as resp: + resp.raise_for_status() + data = await resp.json() + voices = [] + for voice_data in data: + voices.append(Voice(voice_data)) + + if len(voices) == 0: + raise APIError("No voices are available") + + return voices + + def update_options( + self, + *, + voice_id: NotGivenOr[str] = NOT_GIVEN, + voice_settings: NotGivenOr[VoiceSettings] = NOT_GIVEN, + model: NotGivenOr[TTSModels | str] = NOT_GIVEN, + ) -> None: + """Update TTS options""" + if is_given(model) and model != self._opts.model: + self._opts.model = model + # Clear the connection pool when model changes to force reconnection + asyncio.create_task(self._pool.aclose()) + self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse]( + connect_cb=self._connect_ws, + close_cb=self._close_ws, + ) + + if is_given(voice_id): + self._opts.voice_id = voice_id + if is_given(voice_settings): + self._opts.voice_settings = voice_settings + + def synthesize( + self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS + ) -> ChunkedStream: + return ChunkedStream(tts=self, input_text=text, conn_options=conn_options) + + def prewarm(self) -> None: + self._pool.prewarm() + + def stream( + self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS + ) -> SynthesizeStream: + stream = SynthesizeStream(tts=self, conn_options=conn_options) + self._streams.add(stream) + return stream + + async def aclose(self) -> None: + for stream in list(self._streams): + await stream.aclose() + + self._streams.clear() + await self._pool.aclose() + + if self._session: + await self._session.close() + self._session = None + + +class ChunkedStream(tts.ChunkedStream): + """Synthesize text using Respeecher HTTPS endpoint""" + + def __init__(self, *, tts: TTS, input_text: str, conn_options: APIConnectOptions) -> None: + super().__init__(tts=tts, input_text=input_text, conn_options=conn_options) + self._tts: TTS = tts + + async def _run(self, output_emitter: tts.AudioEmitter) -> None: + """Run the TTS synthesis""" + json_data = { + "transcript": self._input_text, + "voice": { + "id": self._tts._opts.voice_id, + }, + "output_format": { + "sample_rate": self._tts._opts.sample_rate, + "encoding": self._tts._opts.encoding, + }, + } + + if ( + is_given(self._tts._opts.voice_settings) + and self._tts._opts.voice_settings.sampling_params + ): + json_data["voice"]["sampling_params"] = self._tts._opts.voice_settings.sampling_params # type: ignore[index] + + http_url = f"{self._tts._opts.base_url}{self._tts._opts.model}/tts/bytes" + + try: + async with self._tts._ensure_session().post( + http_url, + headers={ + API_AUTH_HEADER: self._tts._opts.api_key, + API_VERSION_HEADER: API_VERSION, + "Content-Type": "application/json", + }, + json=json_data, + timeout=aiohttp.ClientTimeout(total=30, sock_connect=self._conn_options.timeout), + ) as resp: + resp.raise_for_status() + + output_emitter.initialize( + request_id=utils.shortuuid(), + sample_rate=self._tts._opts.sample_rate, + num_channels=1, + mime_type="audio/wav", + ) + + async for data, _ in resp.content.iter_chunks(): + output_emitter.push(data) + + output_emitter.flush() + except asyncio.TimeoutError: + raise APITimeoutError() from None + except aiohttp.ClientResponseError as e: + raise APIStatusError( + message=e.message, status_code=e.status, request_id=None, body=None + ) from None + except Exception as e: + raise APIConnectionError() from e + + +class SynthesizeStream(tts.SynthesizeStream): + """Streamed API using WebSocket for real-time synthesis""" + + def __init__(self, *, tts: TTS, conn_options: APIConnectOptions): + super().__init__(tts=tts, conn_options=conn_options) + + async def aclose(self) -> None: + await super().aclose() + + async def _run(self, output_emitter: tts.AudioEmitter) -> None: + context_id = utils.shortuuid() + output_emitter.initialize( + request_id=context_id, + sample_rate=self._tts._opts.sample_rate, # type: ignore[attr-defined] + num_channels=1, + stream=True, + mime_type="audio/pcm", + ) + output_emitter.start_segment(segment_id=context_id) + + sent_tokenizer_stream = self._tts._sentence_tokenizer.stream() # type: ignore[attr-defined] + + async def _input_task() -> None: + async for data in self._input_ch: + if isinstance(data, self._FlushSentinel): + sent_tokenizer_stream.flush() + continue + sent_tokenizer_stream.push_text(data) + sent_tokenizer_stream.end_input() + + async def _sentence_stream_task(ws: aiohttp.ClientWebSocketResponse) -> None: + async for sent in sent_tokenizer_stream: + generate_request = { + "context_id": context_id, + "transcript": sent.token, + "voice": { + "id": self._tts._opts.voice_id, # type: ignore[attr-defined] + }, + "continue": True, + "output_format": { + "encoding": self._tts._opts.encoding, # type: ignore[attr-defined] + "sample_rate": self._tts._opts.sample_rate, # type: ignore[attr-defined] + }, + } + if ( + is_given(self._tts._opts.voice_settings) # type: ignore[attr-defined] + and self._tts._opts.voice_settings.sampling_params # type: ignore[attr-defined] + ): + generate_request["voice"]["sampling_params"] = ( + self._tts._opts.voice_settings.sampling_params # type: ignore[attr-defined] + ) + + self._mark_started() + await ws.send_str(json.dumps(generate_request)) + + # Send final message with continue=False + end_request = { + "context_id": context_id, + "transcript": "", + "voice": { + "id": self._tts._opts.voice_id, # type: ignore[attr-defined] + }, + "continue": False, + "output_format": { + "encoding": self._tts._opts.encoding, # type: ignore[attr-defined] + "sample_rate": self._tts._opts.sample_rate, # type: ignore[attr-defined] + }, + } + if ( + is_given(self._tts._opts.voice_settings) # type: ignore[attr-defined] + and self._tts._opts.voice_settings.sampling_params # type: ignore[attr-defined] + ): + end_request["voice"]["sampling_params"] = ( # type: ignore[index] + self._tts._opts.voice_settings.sampling_params # type: ignore[attr-defined] + ) + await ws.send_str(json.dumps(end_request)) + + async def _recv_task(ws: aiohttp.ClientWebSocketResponse) -> None: + while True: + msg = await ws.receive() + if msg.type in ( + aiohttp.WSMsgType.CLOSED, + aiohttp.WSMsgType.CLOSE, + aiohttp.WSMsgType.CLOSING, + ): + raise APIStatusError( + "Respeecher connection closed unexpectedly", request_id=context_id + ) + + if msg.type != aiohttp.WSMsgType.TEXT: + logger.warning("Unexpected Respeecher message type %s", msg.type) + continue + + data = json.loads(msg.data) + + if data.get("context_id") != context_id: + logger.warning( + "Received a message with context_id=%s instead of expected %s", + data.get("context_id"), + context_id, + ) + continue + + if data.get("type") == "error": + raise APIError(f"Respeecher returned error: {data.get('error')}") + + if data.get("type") == "chunk": + audio_data = base64.b64decode(data["data"]) + output_emitter.push(audio_data) + + elif data.get("type") == "done": + if sent_tokenizer_stream.closed: + output_emitter.end_input() + break + + try: + async with self._tts._pool.connection(timeout=self._conn_options.timeout) as ws: # type: ignore[attr-defined] + tasks = [ + asyncio.create_task(_input_task()), + asyncio.create_task(_sentence_stream_task(ws)), + asyncio.create_task(_recv_task(ws)), + ] + + try: + await asyncio.gather(*tasks) + finally: + await sent_tokenizer_stream.aclose() + await utils.aio.gracefully_cancel(*tasks) + except asyncio.TimeoutError: + raise APITimeoutError() from None + except aiohttp.ClientResponseError as e: + raise APIStatusError( + message=e.message, status_code=e.status, request_id=None, body=None + ) from None + except Exception as e: + raise APIConnectionError() from e + finally: + output_emitter.end_segment() diff --git a/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/version.py b/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/version.py new file mode 100644 index 0000000000..a951f9245a --- /dev/null +++ b/livekit-plugins/livekit-plugins-respeecher/livekit/plugins/respeecher/version.py @@ -0,0 +1,15 @@ +# Copyright 2025 LiveKit, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "0.1.0" diff --git a/livekit-plugins/livekit-plugins-respeecher/pyproject.toml b/livekit-plugins/livekit-plugins-respeecher/pyproject.toml new file mode 100644 index 0000000000..ee827f1c0d --- /dev/null +++ b/livekit-plugins/livekit-plugins-respeecher/pyproject.toml @@ -0,0 +1,38 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "livekit-plugins-respeecher" +dynamic = ["version"] +description = "LiveKit Agents Plugin for Respeecher" +readme = "README.md" +license = "Apache-2.0" +requires-python = ">=3.9.0" +authors = [{ name = "LiveKit", email = "hello@livekit.io" }] +keywords = ["realtime", "audio", "livekit", "tts"] +classifiers = [ + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Topic :: Multimedia :: Sound/Audio", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3 :: Only", +] +dependencies = ["livekit-agents>=1.2.6", "aiohttp"] + +[project.urls] +Documentation = "https://docs.livekit.io" +Website = "https://livekit.io/" +Source = "https://github.com/livekit/agents" + +[tool.hatch.version] +path = "livekit/plugins/respeecher/version.py" + +[tool.hatch.build.targets.wheel] +packages = ["livekit"] + +[tool.hatch.build.targets.sdist] +include = ["/livekit"] \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e04425c023..b50e81f164 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ livekit-plugins-nltk = { workspace = true } livekit-plugins-openai = { workspace = true } livekit-plugins-playai = { workspace = true } livekit-plugins-resemble = { workspace = true } +livekit-plugins-respeecher = { workspace = true } livekit-plugins-rime = { workspace = true } livekit-plugins-sarvam = { workspace = true } livekit-plugins-silero = { workspace = true } diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml index f4c10f8174..6792729dd6 100644 --- a/tests/docker-compose.yml +++ b/tests/docker-compose.yml @@ -54,6 +54,7 @@ services: - AWS_SECRET_ACCESS_KEY - NEUPHONIC_API_KEY - RESEMBLE_API_KEY + - RESPEECHER_API_KEY - SPEECHIFY_API_KEY - HUME_API_KEY - SPITCH_API_KEY @@ -75,6 +76,7 @@ services: - "api.play.ht:172.30.0.10" - "f.cluster.resemble.ai:172.30.0.10" - "websocket.cluster.resemble.ai:172.30.0.10" + - "api.respeecher.com:172.30.0.10" - "users.rime.ai:172.30.0.10" - "api.hume.ai:172.30.0.10" - "api.lmnt.com:172.30.0.10" diff --git a/tests/test_tts.py b/tests/test_tts.py index 1dec0cf3c3..8c6ee7d691 100644 --- a/tests/test_tts.py +++ b/tests/test_tts.py @@ -33,6 +33,7 @@ openai, playai, resemble, + respeecher, rime, speechify, spitch, @@ -222,6 +223,13 @@ async def assert_valid_synthesized_audio( }, id="resemble", ), + pytest.param( + lambda: { + "tts": respeecher.TTS(), + "proxy-upstream": "api.respeecher.com:443", + }, + id="respeecher", + ), pytest.param( lambda: { "tts": rime.TTS(), @@ -435,6 +443,13 @@ async def test_tts_synthesize_error_propagation(): }, id="resemble", ), + pytest.param( + lambda: { + "tts": respeecher.TTS(), + "proxy-upstream": "api.respeecher.com:443", + }, + id="respeecher", + ), pytest.param( lambda: { "tts": google.TTS(),