KoljaB · jaebong-human · Mar 30, 2026 · Mar 31, 2026
diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ https://github.com/KoljaB/RealtimeTTS/assets/7604638/87dcd9a5-3a4e-4f57-be45-837
 - **High-Quality Audio**
   - generates clear and natural-sounding speech
 - **Multiple TTS Engine Support**
-  - supports OpenAI TTS, Elevenlabs, Azure Speech Services, Coqui TTS, StyleTTS2, Piper, gTTS, Edge TTS, Parler TTS, Kokoro, Cartesia, Faster Qwen 3, NeuTTS, PocketTTS, Modelslab, CAMB AI, MiniMax and System TTS
+  - supports OpenAI TTS, Elevenlabs, Azure Speech Services, Coqui TTS, StyleTTS2, Piper, gTTS, Edge TTS, Parler TTS, Kokoro, Cartesia, Faster Qwen 3, NeuTTS, PocketTTS, Modelslab, CAMB AI, MiniMax, Typecast and System TTS
 - **Multilingual**
 - **Robust and Reliable**:
   - ensures continuous operation through a fallback mechanism
@@ -140,6 +140,7 @@ This library uses:
   - **OrpheusEngine** 🏠: Llama‑powered TTS with emotion tags
   - **CambEngine** 🌐: CAMB AI MARS models with 140+ languages
   - **MiniMaxEngine** 🌐: MiniMax Cloud TTS with 12 voice presets
+  - **TypecastEngine** 🌐: Typecast Cloud TTS with emotion and prosody control
   - **ZipVoiceEngine** 🏠: 123M zero‑shot model, state‑of‑the‑art quality
   - **PocketTTSEngine** 🏠: Kyutai Labs 100M model, CPU-optimized with voice cloning
   - **NeuTTSEngine** 🏠: Voice cloning with 3-second reference audio
@@ -183,6 +184,7 @@ Install only required dependencies using these options:
 - **coqui**: Coqui TTS engine
 - **camb**: CAMB AI MARS TTS
 - **minimax**: MiniMax Cloud TTS
+- **typecast**: Typecast Cloud TTS
 - **minimal**: Core package only (for custom engine development)
 
 Example: `pip install realtimetts[all]`, `pip install realtimetts[azure]`, `pip install realtimetts[azure,elevenlabs,openai]`
@@ -235,6 +237,13 @@ To use the `MiniMaxEngine`, you need:
 - Available models: `speech-2.8-hd` (high quality), `speech-2.8-turbo` (fast)
 - 12 voice presets including English and multilingual options
 
+### TypecastEngine
+To use the `TypecastEngine`, you need:
+- Typecast API key (provided via TypecastEngine constructor parameter "api_key" or in the environment variable TYPECAST_API_KEY)
+- Voice ID (provided via TypecastEngine constructor parameter "voice_id" or in the environment variable TYPECAST_VOICE_ID)
+- Available models: `ssfm-v30` (default, supports smart emotion), `ssfm-v21`
+- Emotion presets (ssfm-v30): `normal`, `happy`, `sad`, `angry`, `whisper`, `toneup`, `tonedown`
+
 ### ElevenlabsEngine
 For the `ElevenlabsEngine`, you need:
 - Elevenlabs API key (provided via ElevenlabsEngine constructor parameter "api_key" or in the environment variable ELEVENLABS_API_KEY)

diff --git a/RealtimeTTS/__init__.py b/RealtimeTTS/__init__.py
@@ -24,6 +24,7 @@
     "MiniMaxEngine", "MiniMaxVoice",
     "CartesiaEngine", "CartesiaVoice",
     "FasterQwenEngine", "FasterQwenVoice",
+    "TypecastEngine", "TypecastVoice",
 ]
 
 
@@ -276,6 +277,19 @@ def _load_fasterqwen_engine():
     return FasterQwenEngine
 
 
+def _load_typecast_engine():
+    try:
+        from .engines.typecast_engine import TypecastEngine, TypecastVoice
+    except ImportError as e:
+        raise ImportError(
+            "Failed to load TypecastEngine and TypecastVoice. "
+            "Make sure the `cast` CLI is installed: https://typecast.ai"
+        ) from e
+    globals()["TypecastEngine"] = TypecastEngine
+    globals()["TypecastVoice"] = TypecastVoice
+    return TypecastEngine
+
+
 # Mapping names to their lazy loader functions.
 _lazy_imports = {
     "SystemEngine": _load_system_engine,
@@ -316,6 +330,8 @@ def _load_fasterqwen_engine():
     "CartesiaVoice": _load_cartesia_engine,
     "FasterQwenEngine": _load_fasterqwen_engine,
     "FasterQwenVoice": _load_fasterqwen_engine,
+    "TypecastEngine": _load_typecast_engine,
+    "TypecastVoice": _load_typecast_engine,
 }
 
 

diff --git a/RealtimeTTS/engines/__init__.py b/RealtimeTTS/engines/__init__.py
@@ -21,7 +21,8 @@
     "ModelsLabEngine", "ModelsLabVoice",
     "MiniMaxEngine", "MiniMaxVoice",
     "CartesiaEngine", "CartesiaVoice",
-    "FasterQwenEngine", "FasterQwenVoice"
+    "FasterQwenEngine", "FasterQwenVoice",
+    "TypecastEngine", "TypecastVoice"
 ]
 
 
@@ -166,6 +167,13 @@ def _load_fasterqwen_engine():
     return FasterQwenEngine
 
 
+def _load_typecast_engine():
+    from .typecast_engine import TypecastEngine, TypecastVoice
+    globals()["TypecastEngine"] = TypecastEngine
+    globals()["TypecastVoice"] = TypecastVoice
+    return TypecastEngine
+
+
 # Map attribute names to lazy loader functions.
 _lazy_imports = {
     "AzureEngine": _load_azure_engine,
@@ -208,6 +216,8 @@ def _load_fasterqwen_engine():
     "CartesiaVoice": _load_cartesia_engine,
     "FasterQwenEngine": _load_fasterqwen_engine,
     "FasterQwenVoice": _load_fasterqwen_engine,
+    "TypecastEngine": _load_typecast_engine,
+    "TypecastVoice": _load_typecast_engine,
 }
 
 

diff --git a/RealtimeTTS/engines/typecast_engine.py b/RealtimeTTS/engines/typecast_engine.py
@@ -0,0 +1,236 @@
+from .base_engine import BaseEngine
+from typing import Union
+import os
+import io
+import wave
+import pyaudio
+
+
+class TypecastVoice:
+    def __init__(self, name: str, voice_id: str, gender: str = "", age: str = "",
+                 use_cases: list = None, models: list = None):
+        self.name = name
+        self.voice_id = voice_id
+        self.gender = gender
+        self.age = age
+        self.use_cases = use_cases or []
+        self.models = models or []
+
+    def __repr__(self):
+        return f"{self.name} ({self.voice_id})"
+
+
+class TypecastEngine(BaseEngine):
+    def __init__(
+            self,
+            voice_id: str = None,
+            model: str = "ssfm-v30",
+            tempo: float = None,
+            pitch: int = None,
+            volume: int = None,
+            language: str = None,
+            emotion_type: str = "preset",
+            emotion_preset: str = "normal",
+            emotion_intensity: float = None,
+            seed: int = None,
+            api_key: str = None,
+            host: str = None,
+            debug: bool = False,
+            chunk_size: int = 4096,
+        ):
+        """
+        Initializes a Typecast TTS engine using the official typecast-python SDK.
+
+        Args:
+            voice_id (str): Typecast voice ID. If unspecified, uses TYPECAST_VOICE_ID env var.
+            model (str): Model version. "ssfm-v30" or "ssfm-v21". Defaults to "ssfm-v30".
+            tempo (float): Speech speed multiplier (0.5–2.0).
+            pitch (int): Pitch in semitones (-12 to +12).
+            volume (int): Volume (0–200).
+            language (str): Language code (ISO 639-3). Auto-detected if None.
+            emotion_type (str): Emotion mode. "preset" or "smart" (ssfm-v30 only).
+                "smart" infers emotion from context automatically.
+            emotion_preset (str): Emotion preset when emotion_type is "preset".
+                ssfm-v30: normal, happy, sad, angry, whisper, toneup, tonedown.
+                ssfm-v21: normal, happy, sad, angry.
+            emotion_intensity (float): Emotion intensity (0.0–2.0). Defaults to 1.0.
+            seed (int): Random seed for reproducible output.
+            api_key (str): Typecast API key. If unspecified, uses TYPECAST_API_KEY env var.
+            host (str): API host URL override. If unspecified, the SDK uses
+                TYPECAST_API_HOST env var or the default endpoint.
+            debug (bool): Print debug information.
+            chunk_size (int): Audio chunk size in bytes for streaming.
+        """
+        self.api_key = api_key or os.environ.get("TYPECAST_API_KEY")
+        if not self.api_key:
+            raise ValueError(
+                "Typecast API key is required. Provide it via api_key parameter "
+                "or TYPECAST_API_KEY environment variable."
+            )
+
+        self.voice_id = voice_id or os.environ.get("TYPECAST_VOICE_ID")
+        self.model = model
+        self.tempo = tempo
+        self.pitch = pitch
+        self.volume = volume
+        self.language = language
+        self.emotion_type = emotion_type
+        self.emotion_preset = emotion_preset
+        self.emotion_intensity = emotion_intensity
+        self.seed = seed
+        self.host = host  # If None, SDK falls back to TYPECAST_API_HOST env var or default
+        self.debug = debug
+        self.chunk_size = chunk_size
+        self._client = None
+
+    def post_init(self):
+        self.engine_name = "typecast"
+
+    def _get_client(self):
+        if self._client is None:
+            try:
+                from typecast import Typecast
+            except ImportError as e:
+                raise ImportError(
+                    "typecast-python package is required. Install with:\n"
+                    "pip install typecast-python"
+                ) from e
+            kwargs = {"api_key": self.api_key}
+            if self.host:
+                kwargs["host"] = self.host
+            self._client = Typecast(**kwargs)
+        return self._client
+
+    def _build_prompt(self):
+        """Builds the appropriate prompt object based on emotion_type and model."""
+        from typecast.models import Prompt, PresetPrompt, SmartPrompt
+
+        if self.model == "ssfm-v21":
+            kwargs = {}
+            if self.emotion_preset is not None:
+                kwargs["emotion_preset"] = self.emotion_preset
+            if self.emotion_intensity is not None:
+                kwargs["emotion_intensity"] = self.emotion_intensity
+            return Prompt(**kwargs) if kwargs else None
+
+        # ssfm-v30
+        if self.emotion_type == "smart":
+            return SmartPrompt()
+        else:
+            kwargs = {}
+            if self.emotion_preset is not None:
+                kwargs["emotion_preset"] = self.emotion_preset
+            if self.emotion_intensity is not None:
+                kwargs["emotion_intensity"] = self.emotion_intensity
+            return PresetPrompt(**kwargs)
+
+    def get_stream_info(self):
+        """
+        Returns PyAudio stream configuration for Typecast (WAV: mono, 44100Hz, 16-bit).
+        """
+        return pyaudio.paInt16, 1, 44100
+
+    def synthesize(self, text: str) -> bool:
+        """
+        Synthesizes text to audio using the typecast-python SDK and streams to queue.
+
+        Args:
+            text (str): Text to synthesize.
+
+        Returns:
+            bool: True on success, False on failure.
+        """
+        from typecast.models import TTSRequest, TTSModel, Output
+
+        if not self.voice_id:
+            print("[TypecastEngine] Error: voice_id is not set. "
+                  "Pass voice_id to the constructor or set TYPECAST_VOICE_ID.")
+            return False
+
+        if self.debug:
+            print(f"[TypecastEngine] Synthesizing: \"{text}\"")
+
+        try:
+            output_kwargs = {"audio_format": "wav"}
+            if self.tempo is not None:
+                output_kwargs["audio_tempo"] = self.tempo
+            if self.pitch is not None:
+                output_kwargs["audio_pitch"] = self.pitch
+            if self.volume is not None:
+                output_kwargs["volume"] = self.volume
+
+            request = TTSRequest(
+                text=text,
+                voice_id=self.voice_id,
+                model=TTSModel(self.model),
+                language=self.language,
+                prompt=self._build_prompt(),
+                seed=self.seed,
+                output=Output(**output_kwargs),
+            )
+
+            client = self._get_client()
+            response = client.text_to_speech(request)
+
+            with wave.open(io.BytesIO(response.audio_data), "rb") as wf:
+                frames_per_chunk = self.chunk_size // (wf.getsampwidth() * wf.getnchannels())
+                while not self.stop_synthesis_event.is_set():
+                    data = wf.readframes(frames_per_chunk)
+                    if not data:
+                        break
+                    self.queue.put(data)
+
+        except Exception as e:
+            print(f"[TypecastEngine] Error during synthesis: {e}")
+            return False
+
+        return True
+
+    def get_voices(self) -> list:
+        """
+        Returns available Typecast voices via the SDK.
+        """
+        try:
+            voices = self._get_client().voices_v2()
+            return [
+                TypecastVoice(
+                    name=v.voice_name,
+                    voice_id=v.voice_id,
+                    gender=v.gender.value if v.gender else "",
+                    age=v.age.value if v.age else "",
+                    use_cases=v.use_cases or [],
+                    models=[m.version for m in v.models] if v.models else [],
+                )
+                for v in voices
+            ]
+        except Exception as e:
+            print(f"[TypecastEngine] Error fetching voices: {e}")
+            return []
+
+    def set_voice(self, voice: Union[str, TypecastVoice]):
+        """
+        Sets the voice for synthesis.
+
+        Args:
+            voice: Voice name/ID string or TypecastVoice object.
+        """
+        if isinstance(voice, TypecastVoice):
+            self.voice_id = voice.voice_id
+            return
+        for v in self.get_voices():
+            if voice.lower() in v.name.lower() or voice == v.voice_id:
+                self.voice_id = v.voice_id
+                return
+        # Fallback: treat as raw voice_id
+        self.voice_id = voice
+
+    def set_voice_parameters(self, **voice_parameters):
+        """
+        Sets voice parameters.
+
+        Supported keys: tempo, pitch, volume, language, seed, model,
+                        emotion_type, emotion_preset, emotion_intensity
+        """
+        for key, value in voice_parameters.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
diff --git a/setup.py b/setup.py
@@ -32,6 +32,7 @@
 - **minimax**: MiniMax Cloud TTS
 - **cartesia** Cartesia API integration
 - **qwen** Faster Qwen3 TTS integration
+- **typecast**: Typecast Cloud TTS
 - **minimal**: Core package only (for custom engine development)
 
 You can install multiple engines by separating them with commas. For example:
@@ -94,6 +95,7 @@ def parse_requirements(filename):
     "minimax": base_requirements + [requirements.get("requests", "requests")],
     "orpheus": base_requirements + [requirements.get("snac", "snac")],
     "qwen": base_requirements + [requirements.get("faster-qwen3-tts", "faster-qwen3-tts")],
+    "typecast": base_requirements + ["typecast-python"],
     "jp": base_requirements +["mecab-python3>=1.0.6", "unidic-lite>=1.0.8", "cutlet", "fugashi>=1.4.0", "jaconv>=0.4.0", "mojimoji>=0.0.13", "pyopenjtalk>=0.4.0"],
     "zh": base_requirements +["pypinyin>=0.53.0", "ordered_set>=4.1.0", "jieba>=0.42.1", "cn2an>=0.5.23"],
     "ko": base_requirements +["hangul_romanize"],