From ae44a48227df42dee450e0ded4248674e3dee79e Mon Sep 17 00:00:00 2001
From: Anshuman Suri <as9rw@virginia.edu>
Date: Tue, 5 Aug 2025 14:11:05 -0400
Subject: [PATCH 01/14] Typing, minor README edits, gitignore

---
 .gitignore             |  1 +
 README.md              | 14 +-------------
 kittentts/.gitignore   |  1 +
 kittentts/get_model.py | 15 ++++++++-------
 4 files changed, 11 insertions(+), 20 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 kittentts/.gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..aace59f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.egg-info/
\ No newline at end of file
diff --git a/README.md b/README.md
index 81536da..5812ba2 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,6 @@ Kitten TTS is an open-source realistic text-to-speech model with just 15 million
 
 [Join our discord](https://discord.gg/upcyF5s6)
 
-
 ## ✨ Features
 
 - **Ultra-lightweight**: Model size less than 25MB
@@ -14,8 +13,6 @@ Kitten TTS is an open-source realistic text-to-speech model with just 15 million
 - **High-quality voices**: Several premium voice options available
 - **Fast inference**: Optimized for real-time speech synthesis
 
-
-
 ## 🚀 Quick Start
 
 ### Installation
@@ -24,9 +21,7 @@ Kitten TTS is an open-source realistic text-to-speech model with just 15 million
 pip install https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
 ```
 
-
-
- ### Basic Usage 
+### Basic Usage 
 
 ```
 from kittentts import KittenTTS
@@ -42,20 +37,13 @@ sf.write('output.wav', audio, 24000)
 
 ```
 
-
-
-
-
 ## 💻 System Requirements
 
 Works literally everywhere
 
-
-
 ## Checklist 
 
 - [x] Release a preview model
 - [ ] Release the fully trained model weights
 - [ ] Release mobile SDK 
 - [ ] Release web version 
-
diff --git a/kittentts/.gitignore b/kittentts/.gitignore
new file mode 100644
index 0000000..763624e
--- /dev/null
+++ b/kittentts/.gitignore
@@ -0,0 +1 @@
+__pycache__/*
\ No newline at end of file
diff --git a/kittentts/get_model.py b/kittentts/get_model.py
index f91c28c..af2febf 100644
--- a/kittentts/get_model.py
+++ b/kittentts/get_model.py
@@ -1,5 +1,6 @@
 import json
 import os
+import numpy as np
 from huggingface_hub import hf_hub_download
 from .onnx_model import KittenTTS_1_Onnx
 
@@ -22,8 +23,8 @@ def __init__(self, model_name="KittenML/kitten-tts-nano-0.1", cache_dir=None):
             repo_id = model_name
             
         self.model = download_from_huggingface(repo_id=repo_id, cache_dir=cache_dir)
-    
-    def generate(self, text, voice="expr-voice-5-m", speed=1.0):
+
+    def generate(self, text: str, voice: str = "expr-voice-5-m", speed: float = 1.0) -> np.ndarray:
         """Generate audio from text.
         
         Args:
@@ -35,8 +36,8 @@ def generate(self, text, voice="expr-voice-5-m", speed=1.0):
             Audio data as numpy array
         """
         return self.model.generate(text, voice=voice, speed=speed)
-    
-    def generate_to_file(self, text, output_path, voice="expr-voice-5-m", speed=1.0, sample_rate=24000):
+
+    def generate_to_file(self, text: str, output_path: str, voice: str = "expr-voice-5-m", speed: float = 1.0, sample_rate: int = 24000):
         """Generate audio from text and save to file.
         
         Args:
@@ -46,7 +47,7 @@ def generate_to_file(self, text, output_path, voice="expr-voice-5-m", speed=1.0,
             speed: Speech speed (1.0 = normal)
             sample_rate: Audio sample rate
         """
-        return self.model.generate_to_file(text, output_path, voice=voice, speed=speed, sample_rate=sample_rate)
+        self.model.generate_to_file(text, output_path, voice=voice, speed=speed, sample_rate=sample_rate)
     
     @property
     def available_voices(self):
@@ -54,7 +55,7 @@ def available_voices(self):
         return self.model.available_voices
 
 
-def download_from_huggingface(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir=None):
+def download_from_huggingface(repo_id: str="KittenML/kitten-tts-nano-0.1", cache_dir=None) -> KittenTTS_1_Onnx:
     """Download model files from Hugging Face repository.
     
     Args:
@@ -97,6 +98,6 @@ def download_from_huggingface(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir=
     return model
 
 
-def get_model(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir=None):
+def get_model(repo_id: str="KittenML/kitten-tts-nano-0.1", cache_dir=None) -> KittenTTS:
     """Get a KittenTTS model (legacy function for backward compatibility)."""
     return KittenTTS(repo_id, cache_dir)

From 5f9fe405ad6ad27773229f0addaedaabe94a65ba Mon Sep 17 00:00:00 2001
From: Anshuman Suri <as9rw@virginia.edu>
Date: Tue, 5 Aug 2025 14:39:24 -0400
Subject: [PATCH 02/14] Minor edits

---
 README.md               |  2 +-
 kittentts/get_model.py  |  2 ++
 kittentts/onnx_model.py | 18 ++++++------------
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 5812ba2..ac37b0a 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@ pip install https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentt
 from kittentts import KittenTTS
 m = KittenTTS("KittenML/kitten-tts-nano-0.1")
 
-audio = m.generate("This high quality TTS model works without a GPU", voice='expr-voice-2-f' )
+audio = m.generate("This high quality TTS model works without a GPU", voice='expr-voice-2-f')
 
 # available_voices : [  'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',  'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f' ]
 
diff --git a/kittentts/get_model.py b/kittentts/get_model.py
index af2febf..ed8354a 100644
--- a/kittentts/get_model.py
+++ b/kittentts/get_model.py
@@ -35,6 +35,8 @@ def generate(self, text: str, voice: str = "expr-voice-5-m", speed: float = 1.0)
         Returns:
             Audio data as numpy array
         """
+        if not text:
+            raise ValueError("Input text cannot be empty.")
         return self.model.generate(text, voice=voice, speed=speed)
 
     def generate_to_file(self, text: str, output_path: str, voice: str = "expr-voice-5-m", speed: float = 1.0, sample_rate: int = 24000):
diff --git a/kittentts/onnx_model.py b/kittentts/onnx_model.py
index e93c463..904484f 100644
--- a/kittentts/onnx_model.py
+++ b/kittentts/onnx_model.py
@@ -5,7 +5,7 @@
 import onnxruntime as ort
 
 
-def basic_english_tokenize(text):
+def basic_english_tokenize(text: str) -> list:
     """Basic English tokenizer that splits on whitespace and punctuation."""
     import re
     tokens = re.findall(r"\w+|[^\w\s]", text)
@@ -27,14 +27,9 @@ def __init__(self, dummy=None):
 
         self.word_index_dictionary = dicts
 
-    def __call__(self, text):
-        indexes = []
-        for char in text:
-            try:
-                indexes.append(self.word_index_dictionary[char])
-            except KeyError:
-                pass
-        return indexes
+    def __call__(self, text: str) -> list:
+        dicts = self.word_index_dictionary
+        return [dicts[char] for char in text if char in dicts]
 
 
 class KittenTTS_1_Onnx:
@@ -48,7 +43,6 @@ def __init__(self, model_path="kitten_tts_nano_preview.onnx", voices_path="voice
         self.model_path = model_path
         self.voices = np.load(voices_path)
         self.session = ort.InferenceSession(model_path)
-        
         self.phonemizer = phonemizer.backend.EspeakBackend(
             language="en-us", preserve_punctuation=True, with_stress=True
         )
@@ -124,10 +118,10 @@ def generate_to_file(self, text: str, output_path: str, voice: str = "expr-voice
 
 # Example usage
 if __name__ == "__main__":
-    tts = KittenTTS()
+    tts = KittenTTS_1_Onnx()
     
     text = """
     It begins with an "Ugh!" Another mysterious stain appears on a favorite shirt. Every trick has been tried, but the stain persists.
     """
 
-    tts.generate_to_file(text, "inference_output25.wav", voice="expr-voice-5-m")
\ No newline at end of file
+    tts.generate_to_file(text, "inference_output25.wav", voice="expr-voice-5-m")

From 3883bdf80d9e9e4bdf0d1d4707fa68d995d41c56 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Wed, 6 Aug 2025 13:22:03 +0300
Subject: [PATCH 03/14] Trim generated audio based on edge silence

---
 kittentts/onnx_model.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/kittentts/onnx_model.py b/kittentts/onnx_model.py
index e93c463..b9c16c4 100644
--- a/kittentts/onnx_model.py
+++ b/kittentts/onnx_model.py
@@ -100,10 +100,14 @@ def generate(self, text: str, voice: str = "expr-voice-5-m", speed: float = 1.0)
         onnx_inputs = self._prepare_inputs(text, voice, speed)
         
         outputs = self.session.run(None, onnx_inputs)
-        
-        # Trim audio
-        audio = outputs[0][5000:-10000]
 
+        audio = outputs[0]  # shape (n,)
+        # Trim edge silence from audio
+        non_silent = np.abs(audio) >= 0.01
+        if np.any(non_silent):
+            indices = np.where(non_silent)[0]
+            start, end = indices[0], indices[-1]
+            audio = audio[start : end + 1]
         return audio
     
     def generate_to_file(self, text: str, output_path: str, voice: str = "expr-voice-5-m", 

From 8e7213027eef7b65f7aa238498e561f6ae648a28 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Wed, 6 Aug 2025 13:38:46 +0300
Subject: [PATCH 04/14] Remove duplicate packaging files; use Hatchling as
 packaging backend

---
 MANIFEST.in      |  9 ---------
 pyproject.toml   | 33 +++++++++++++++++++--------------
 requirements.txt |  8 --------
 setup.py         | 46 ----------------------------------------------
 4 files changed, 19 insertions(+), 77 deletions(-)
 delete mode 100644 MANIFEST.in
 delete mode 100644 requirements.txt
 delete mode 100644 setup.py

diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 953bb15..0000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,9 +0,0 @@
-include README.md
-include LICENSE
-include requirements.txt
-recursive-include kittentts *.py
-recursive-include kittentts *.json
-recursive-include kittentts *.txt
-recursive-include kittentts *.onnx
-global-exclude __pycache__
-global-exclude *.py[co]
diff --git a/pyproject.toml b/pyproject.toml
index c2d1e5c..55ef4df 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,41 +1,46 @@
 [build-system]
-requires = ["setuptools>=45", "wheel"]
-build-backend = "setuptools.build_meta"
+requires = ["hatchling"]
+build-backend = "hatchling.build"
 
 [project]
 name = "kittentts"
-version = "0.1.0"
 description = "Ultra-lightweight text-to-speech model with just 15 million parameters"
 readme = "README.md"
 requires-python = ">=3.8"
-license = {text = "Apache 2.0"}
+license = "Apache-2.0"
 authors = [
     {name = "KittenML"}
 ]
 keywords = ["text-to-speech", "tts", "speech-synthesis", "neural-networks", "onnx"]
 classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "Topic :: Multimedia :: Sound/Audio :: Speech",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
 ]
 dependencies = [
-    "num2words",
-    "spacy",
     "espeakng_loader",
+    "huggingface_hub",
     "misaki[en]>=0.9.4",
+    "num2words",
+    "numpy",
     "onnxruntime",
     "soundfile",
-    "numpy",
-    "huggingface_hub",
+    "spacy",
 ]
+dynamic = ["version"]
 
 [project.urls]
 Homepage = "https://github.com/kittenml/kittentts"
 Repository = "https://github.com/kittenml/kittentts"
 Issues = "https://github.com/kittenml/kittentts/issues"
 
-[tool.setuptools.packages.find]
-where = ["."]
-include = ["kittentts*"]
-
-[tool.setuptools.package-data]
-kittentts = ["*.json", "*.txt", "*.onnx"]
+[tool.hatch.version]
+path = "kittentts/__init__.py"
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 37bfbb3..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-num2words
-spacy
-espeakng_loader
-misaki[en]>=0.9.4
-onnxruntime
-soundfile
-numpy
-huggingface_hub
diff --git a/setup.py b/setup.py
deleted file mode 100644
index d0ac187..0000000
--- a/setup.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from setuptools import setup, find_packages
-
-with open("README.md", "r", encoding="utf-8") as fh:
-    long_description = fh.read()
-
-setup(
-    name="kittentts",
-    version="0.1.0",
-    author="KittenML",
-    author_email="",
-    description="Ultra-lightweight text-to-speech model with just 15 million parameters",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    url="https://github.com/kittenml/kittentts",
-    packages=find_packages(),
-    classifiers=[
-        "Development Status :: 3 - Alpha",
-        "Intended Audience :: Developers",
-        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
-        "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
-        "Programming Language :: Python :: 3.12",
-        "Topic :: Multimedia :: Sound/Audio :: Speech",
-        "Topic :: Scientific/Engineering :: Artificial Intelligence",
-    ],
-    python_requires=">=3.8",
-    install_requires=[
-        "num2words",
-        "spacy",
-        "espeakng_loader",
-        "misaki[en]>=0.9.4",
-        "onnxruntime",
-        "soundfile",
-        "numpy",
-        "huggingface_hub",
-    ],
-    keywords="text-to-speech, tts, speech-synthesis, neural-networks, onnx",
-    project_urls={
-        "Bug Reports": "https://github.com/kittenml/kittentts/issues",
-        "Source": "https://github.com/kittenml/kittentts",
-    },
-)

From 03853c70da170fa727ace65215af369a6f504030 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Wed, 6 Aug 2025 13:45:43 +0300
Subject: [PATCH 05/14] Remove unnecessary misaki dependency

* Remove the `misaki` dependency, but directly depend on `phonemizer-fork` instead.
* Do the side-effect phonemizer initialization call by hand
---
 kittentts/onnx_model.py |  6 +++++-
 pyproject.toml          | 10 +++++-----
 requirements.txt        | 10 +++++-----
 setup.py                | 10 +++++-----
 4 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/kittentts/onnx_model.py b/kittentts/onnx_model.py
index e93c463..a53c42d 100644
--- a/kittentts/onnx_model.py
+++ b/kittentts/onnx_model.py
@@ -1,8 +1,12 @@
-from misaki import en, espeak
 import numpy as np
 import phonemizer
 import soundfile as sf
 import onnxruntime as ort
+import espeakng_loader
+from phonemizer.backend.espeak.wrapper import EspeakWrapper
+
+EspeakWrapper.set_library(espeakng_loader.get_library_path())
+EspeakWrapper.set_data_path(espeakng_loader.get_data_path())
 
 
 def basic_english_tokenize(text):
diff --git a/pyproject.toml b/pyproject.toml
index c2d1e5c..246e83b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,14 +18,14 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
 ]
 dependencies = [
-    "num2words",
-    "spacy",
     "espeakng_loader",
-    "misaki[en]>=0.9.4",
+    "huggingface_hub",
+    "num2words",
+    "numpy",
     "onnxruntime",
+    "phonemizer-fork~=3.3.2",
     "soundfile",
-    "numpy",
-    "huggingface_hub",
+    "spacy",
 ]
 
 [project.urls]
diff --git a/requirements.txt b/requirements.txt
index 37bfbb3..5c68793 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
-num2words
-spacy
 espeakng_loader
-misaki[en]>=0.9.4
+huggingface_hub
+num2words
+numpy
 onnxruntime
+phonemizer-fork~=3.3.2
 soundfile
-numpy
-huggingface_hub
+spacy
diff --git a/setup.py b/setup.py
index d0ac187..9259fa3 100644
--- a/setup.py
+++ b/setup.py
@@ -29,14 +29,14 @@
     ],
     python_requires=">=3.8",
     install_requires=[
-        "num2words",
-        "spacy",
         "espeakng_loader",
-        "misaki[en]>=0.9.4",
+        "huggingface_hub",
+        "num2words",
+        "numpy",
         "onnxruntime",
+        "phonemizer-fork~=3.3.2",
         "soundfile",
-        "numpy",
-        "huggingface_hub",
+        "spacy",
     ],
     keywords="text-to-speech, tts, speech-synthesis, neural-networks, onnx",
     project_urls={

From 0d7d96e0e2a17374ef425a3a268ef0254ecbe27e Mon Sep 17 00:00:00 2001
From: vincent d warmerdam <vincentwarmerdam@gmail.com>
Date: Wed, 6 Aug 2025 13:23:10 +0200
Subject: [PATCH 06/14] syntax highlighting

---
 README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 81536da..e9b3a32 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ pip install https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentt
 
  ### Basic Usage 
 
-```
+```python
 from kittentts import KittenTTS
 m = KittenTTS("KittenML/kitten-tts-nano-0.1")
 
@@ -39,7 +39,6 @@ audio = m.generate("This high quality TTS model works without a GPU", voice='exp
 # Save the audio
 import soundfile as sf
 sf.write('output.wav', audio, 24000)
-
 ```
 
 

From 0b4ad69ee103974f46e906e4ade9e6891ff921ba Mon Sep 17 00:00:00 2001
From: Kirby Rs <bizkirby@gmail.com>
Date: Sat, 8 Nov 2025 16:44:38 +0100
Subject: [PATCH 07/14] Add CLI binary interface for Kitten TTS

- Add executable kitten-tts wrapper script
- Add kittentts/cli.py with full command-line interface
- Configure console script entry point in pyproject.toml
- Implement audio fade-out with customizable duration (default: 0.2s)
- Add automatic dots suffix to prevent audio cutoff
- Support all available voices, speed control, and audio formats
- Add joblib dependency for proper package installation
- Include comprehensive help documentation and examples

Features:
- Text-to-speech synthesis via command line
- Multiple voice options (expr-voice-2/m/f through expr-voice-5/m/f)
- Adjustable speech speed and fade-out duration
- Audio file output (WAV, FLAC, OGG) or direct playback
- Automatic text preprocessing to prevent abrupt cutoffs
---
 kitten-tts       |  19 ++++
 kittentts/cli.py | 241 +++++++++++++++++++++++++++++++++++++++++++++++
 pyproject.toml   |   3 +
 3 files changed, 263 insertions(+)
 create mode 100755 kitten-tts
 create mode 100755 kittentts/cli.py

diff --git a/kitten-tts b/kitten-tts
new file mode 100755
index 0000000..f7f49e0
--- /dev/null
+++ b/kitten-tts
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+"""
+Kitten TTS Binary - Text-to-Speech Command Line Tool
+This is a wrapper script for the kittentts/cli.py
+"""
+
+import sys
+import os
+
+# Get the directory where this script is located
+script_dir = os.path.dirname(os.path.abspath(__file__))
+
+# Import and run the CLI
+sys.path.insert(0, script_dir)
+
+from kittentts.cli import main
+
+if __name__ == "__main__":
+    sys.exit(main())
\ No newline at end of file
diff --git a/kittentts/cli.py b/kittentts/cli.py
new file mode 100755
index 0000000..093b8c3
--- /dev/null
+++ b/kittentts/cli.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+"""
+Kitten TTS CLI - Text-to-Speech Command Line Tool
+
+Usage:
+    python kittentts_cli.py "Hello world"                           # Speak text
+    python kittentts_cli.py "Hello world" --voice expr-voice-2-f    # Use specific voice
+    python kittentts_cli.py "Hello world" --output output.wav       # Save to file
+    python kittentts_cli.py --list-voices                          # List available voices
+    python kittentts_cli.py --help                                 # Show help
+"""
+
+import argparse
+import sys
+import os
+import numpy as np
+import soundfile as sf
+
+# Add the current directory to Python path so we can import kittentts
+# We need to add the parent directory since we're inside kittentts/cli.py
+current_dir = os.path.dirname(os.path.abspath(__file__))
+parent_dir = os.path.dirname(current_dir)
+sys.path.insert(0, parent_dir)
+
+# Default fade out duration in seconds
+DEFAULT_FADE_OUT = 0.2
+
+try:
+    from kittentts import KittenTTS
+except ImportError:
+    print("Error: KittenTTS not found. Please install it with:")
+    print("pip install https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl")
+    sys.exit(1)
+
+
+def apply_fade_out(audio_data, sample_rate=24000, fade_duration=DEFAULT_FADE_OUT):
+    """Apply exponential fade out to audio data.
+
+    Args:
+        audio_data: NumPy array of audio samples
+        sample_rate: Audio sample rate (default: 24000)
+        fade_duration: Fade out duration in seconds (default: {DEFAULT_FADE_OUT}s)
+
+    Returns:
+        Audio data with fade out applied
+    """
+    if len(audio_data) == 0:
+        return audio_data
+
+    fade_samples = int(fade_duration * sample_rate)
+    if fade_samples >= len(audio_data):
+        fade_samples = len(audio_data) // 2  # Limit fade to half of audio if very short
+
+    # Create exponential fade curve
+    fade_curve = np.linspace(1, 0, fade_samples) ** 2  # Quadratic fade for smoother curve
+
+    # Apply fade to the end of audio
+    audio_with_fade = audio_data.copy()
+    audio_with_fade[-fade_samples:] *= fade_curve
+
+    return audio_with_fade
+
+
+def list_voices(model):
+    """List all available voices."""
+    print("Available voices:")
+    for voice in model.available_voices:
+        print(f"  - {voice}")
+
+
+def play_audio_simple(audio_data, sample_rate=24000):
+    """Simple audio playback using system command."""
+    # Save to temporary file and play with system command
+    temp_file = "temp_kitten_tts_output.wav"
+    try:
+        sf.write(temp_file, audio_data, sample_rate)
+
+        # Try different system audio players based on OS
+        import subprocess
+        import platform
+
+        system = platform.system()
+        if system == "Darwin":  # macOS
+            subprocess.run(["afplay", temp_file], check=True)
+        elif system == "Linux":
+            # Try common Linux audio players
+            for player in ["aplay", "paplay", "mpg123", "mplayer"]:
+                try:
+                    subprocess.run([player, temp_file], check=True)
+                    break
+                except (subprocess.CalledProcessError, FileNotFoundError):
+                    continue
+            else:
+                print(f"Audio saved to {temp_file} (no suitable audio player found)")
+        elif system == "Windows":
+            subprocess.run(["start", temp_file], shell=True, check=True)
+        else:
+            print(f"Audio saved to {temp_file} (unsupported OS for direct playback)")
+
+        # Clean up temp file
+        try:
+            os.remove(temp_file)
+        except:
+            pass
+
+    except Exception as e:
+        print(f"Error playing audio: {e}")
+        print(f"Audio saved to {temp_file}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Kitten TTS - Ultra-lightweight text-to-speech synthesis",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s "Hello world"                           # Speak text
+  %(prog)s "Hello world" --voice expr-voice-2-f    # Use specific voice
+  %(prog)s "Hello world" --output output.wav       # Save to file
+  %(prog)s "Hello world" --speed 1.2               # Faster speech
+  %(prog)s "Hello world" --fade-out 0.1            # 0.1s fade out
+  %(prog)s --list-voices                          # List available voices
+        """
+    )
+
+    parser.add_argument(
+        "text",
+        nargs="?",
+        help="Text to synthesize into speech"
+    )
+
+    parser.add_argument(
+        "--model",
+        default="KittenML/kitten-tts-nano-0.2",
+        help="Model name or path (default: KittenML/kitten-tts-nano-0.2)"
+    )
+
+    parser.add_argument(
+        "--voice",
+        default="expr-voice-2-m",
+        help="Voice to use (default: expr-voice-2-m)"
+    )
+
+    parser.add_argument(
+        "--speed",
+        type=float,
+        default=1.0,
+        help="Speech speed (1.0 = normal, higher = faster, lower = slower)"
+    )
+
+    parser.add_argument(
+        "--fade-out",
+        type=float,
+        default=DEFAULT_FADE_OUT,
+        help=f"Fade out duration in seconds (default: {DEFAULT_FADE_OUT}, use 0 to disable)"
+    )
+
+    parser.add_argument(
+        "--output", "-o",
+        help="Output file path (saves as WAV). If not specified, plays through speakers."
+    )
+
+    parser.add_argument(
+        "--list-voices",
+        action="store_true",
+        help="List available voices and exit"
+    )
+
+    parser.add_argument(
+        "--format",
+        choices=["wav", "flac", "ogg"],
+        default="wav",
+        help="Audio format for output file (default: wav)"
+    )
+
+    args = parser.parse_args()
+
+    # Handle --list-voices
+    if args.list_voices:
+        try:
+            model = KittenTTS(args.model)
+            list_voices(model)
+            return 0
+        except Exception as e:
+            print(f"Error loading model: {e}", file=sys.stderr)
+            return 1
+
+    # Check if text is provided
+    if not args.text:
+        parser.print_help()
+        print("\nError: Text input is required", file=sys.stderr)
+        return 1
+
+    try:
+        # Initialize the model
+        print(f"Loading model: {args.model}...")
+        model = KittenTTS(args.model)
+
+        # Validate voice
+        if args.voice not in model.available_voices:
+            print(f"Error: Voice '{args.voice}' not available.", file=sys.stderr)
+            print(f"Available voices: {', '.join(model.available_voices)}")
+            return 1
+
+        # Add dots at the end to prevent cutoff (simple fix)
+        if not args.text.endswith('...'):
+            args.text = args.text + '...'
+            print(f"Added dots to prevent audio cutoff")
+
+        # Generate audio
+        print(f"Generating speech using voice: {args.voice}...")
+        audio = model.generate(args.text, voice=args.voice, speed=args.speed)
+
+        # Apply fade out if specified
+        if args.fade_out > 0:
+            print(f"Applying {args.fade_out}s fade out...")
+            audio = apply_fade_out(audio, sample_rate=24000, fade_duration=args.fade_out)
+
+        if args.output:
+            # Save to file
+            print(f"Saving audio to: {args.output}")
+            sf.write(args.output, audio, 24000)
+            print("Done!")
+        else:
+            # Play through speakers
+            print("Playing audio...")
+            play_audio_simple(audio)
+            print("Done!")
+
+        return 0
+
+    except KeyboardInterrupt:
+        print("\nInterrupted by user")
+        return 1
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index c2d1e5c..7a460b8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,9 @@ Homepage = "https://github.com/kittenml/kittentts"
 Repository = "https://github.com/kittenml/kittentts"
 Issues = "https://github.com/kittenml/kittentts/issues"
 
+[project.scripts]
+kitten-tts = "kittentts.cli:main"
+
 [tool.setuptools.packages.find]
 where = ["."]
 include = ["kittentts*"]

From 6fa98b8da6b5ade50e2a1b7c93e23233d93a9e11 Mon Sep 17 00:00:00 2001
From: Kirby Rs <bizkirby@gmail.com>
Date: Sat, 8 Nov 2025 16:58:42 +0100
Subject: [PATCH 08/14] Add stdin pipeline support to CLI interface

- Implemented pipeline/stdin reading functionality
- Added support for piping text to kitten-tts command
- Updated help documentation with pipeline usage examples
- Enhanced error handling for stdin operations
- Maintained backward compatibility with argument-based input

Usage examples:
  echo "hello world" | ./kitten-tts
  cat text_file.txt | ./kitten-tts --output audio.wav
---
 kittentts/cli.py | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/kittentts/cli.py b/kittentts/cli.py
index 093b8c3..008916e 100755
--- a/kittentts/cli.py
+++ b/kittentts/cli.py
@@ -119,6 +119,7 @@ def main():
   %(prog)s "Hello world" --output output.wav       # Save to file
   %(prog)s "Hello world" --speed 1.2               # Faster speech
   %(prog)s "Hello world" --fade-out 0.1            # 0.1s fade out
+  echo "Hello world" | %(prog)s                    # Read from stdin
   %(prog)s --list-voices                          # List available voices
         """
     )
@@ -126,7 +127,7 @@ def main():
     parser.add_argument(
         "text",
         nargs="?",
-        help="Text to synthesize into speech"
+        help="Text to synthesize into speech (if not provided, reads from stdin)"
     )
 
     parser.add_argument(
@@ -185,11 +186,26 @@ def main():
             print(f"Error loading model: {e}", file=sys.stderr)
             return 1
 
-    # Check if text is provided
-    if not args.text:
-        parser.print_help()
-        print("\nError: Text input is required", file=sys.stderr)
-        return 1
+    # Get text from command line or stdin
+    if args.text:
+        text = args.text
+    else:
+        # Read from stdin
+        try:
+            if sys.stdin.isatty():
+                # No pipe, interactive mode
+                parser.print_help()
+                print("\nError: Text input is required (provide as argument or pipe from stdin)", file=sys.stderr)
+                return 1
+            else:
+                # Pipe detected, read from stdin
+                text = sys.stdin.read().strip()
+                if not text:
+                    print("\nError: No text received from stdin", file=sys.stderr)
+                    return 1
+        except Exception as e:
+            print(f"Error reading from stdin: {e}", file=sys.stderr)
+            return 1
 
     try:
         # Initialize the model
@@ -203,13 +219,13 @@ def main():
             return 1
 
         # Add dots at the end to prevent cutoff (simple fix)
-        if not args.text.endswith('...'):
-            args.text = args.text + '...'
+        if not text.endswith('...'):
+            text = text + '...'
             print(f"Added dots to prevent audio cutoff")
 
         # Generate audio
         print(f"Generating speech using voice: {args.voice}...")
-        audio = model.generate(args.text, voice=args.voice, speed=args.speed)
+        audio = model.generate(text, voice=args.voice, speed=args.speed)
 
         # Apply fade out if specified
         if args.fade_out > 0:

From bdf63530c0c93e7e44a9903673c7d3347b9cfdda Mon Sep 17 00:00:00 2001
From: Kirby Rs <bizkirby@gmail.com>
Date: Sat, 8 Nov 2025 17:08:14 +0100
Subject: [PATCH 09/14] Add CLI documentation to README.md

- Added comprehensive CLI usage section
- Documented installation and setup steps for CLI
- Listed all CLI features and available voices
- Added examples for both argument and stdin/pipeline usage
- Organized Python API and CLI sections separately
- Updated features list to highlight CLI functionality
---
 README.md | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 0f46ec7..cf893fb 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,7 @@ Email the creators with any questions : info@stellonlabs.com
 - **CPU-optimized**: Runs without GPU on any device
 - **High-quality voices**: Several premium voice options available
 - **Fast inference**: Optimized for real-time speech synthesis
+- **Command-line interface**: Easy-to-use CLI with pipeline support
 
 
 
@@ -30,9 +31,10 @@ pip install https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentt
 
 
 
- ### Basic Usage 
+ ### Basic Usage
 
-```
+#### Python API
+```python
 from kittentts import KittenTTS
 m = KittenTTS("KittenML/kitten-tts-nano-0.2")
 
@@ -43,9 +45,43 @@ audio = m.generate("This high quality TTS model works without a GPU", voice='exp
 # Save the audio
 import soundfile as sf
 sf.write('output.wav', audio, 24000)
+```
+
+#### Command Line Interface (CLI)
+
+```bash
+# Clone the repository
+git clone https://github.com/KittenML/KittenTTS.git
+cd KittenTTS
 
+# Create and activate virtual environment
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+
+# Install dependencies
+pip install -r requirements.txt
+
+# Use the CLI
+./kitten-tts "Hello world"                           # Speak text
+./kitten-tts "Hello world" --output hello.wav       # Save to file
+echo "Hello world" | ./kitten-tts                   # Read from stdin
+./kitten-tts --list-voices                          # List available voices
 ```
 
+**CLI Features:**
+- **Text input** via arguments or stdin (pipeline support)
+- **8 different voices** (expr-voice-2/m/f through expr-voice-5/m/f)
+- **Speed control** with `--speed` option
+- **Audio fade-out** with `--fade-out` option (default: 0.2s)
+- **Multiple formats** (WAV, FLAC, OGG)
+- **Cross-platform audio playback** (macOS, Linux, Windows)
+
+**Available Voices:**
+- `expr-voice-2-m` / `expr-voice-2-f`
+- `expr-voice-3-m` / `expr-voice-3-f`
+- `expr-voice-4-m` / `expr-voice-4-f`
+- `expr-voice-5-m` / `expr-voice-5-f`
+
 
 
 

From 283d38cd95b6dd4252ccd99e8694de4b22f2da1f Mon Sep 17 00:00:00 2001
From: Kirby Rs <bizkirby@gmail.com>
Date: Sat, 8 Nov 2025 17:09:39 +0100
Subject: [PATCH 10/14] Improve CLI documentation with collapsible section

- Organized CLI documentation in a collapsible details section
- Added structured subsections (Installation, Basic Usage, Advanced Options)
- Improved readability with better organization
- Maintained all CLI features and examples
- Made README more concise while preserving comprehensive information
---
 README.md | 38 +++++++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index cf893fb..0400c55 100644
--- a/README.md
+++ b/README.md
@@ -49,6 +49,11 @@ sf.write('output.wav', audio, 24000)
 
 #### Command Line Interface (CLI)
 
+<details>
+<summary>Click to expand CLI usage instructions</summary>
+
+##### Installation
+
 ```bash
 # Clone the repository
 git clone https://github.com/KittenML/KittenTTS.git
@@ -60,28 +65,51 @@ source venv/bin/activate  # On Windows: venv\Scripts\activate
 
 # Install dependencies
 pip install -r requirements.txt
+```
+
+##### Basic Usage
 
-# Use the CLI
+```bash
 ./kitten-tts "Hello world"                           # Speak text
 ./kitten-tts "Hello world" --output hello.wav       # Save to file
 echo "Hello world" | ./kitten-tts                   # Read from stdin
 ./kitten-tts --list-voices                          # List available voices
 ```
 
-**CLI Features:**
+##### Advanced Options
+
+```bash
+# With specific voice and fade-out
+./kitten-tts "Hello world" --voice expr-voice-2-f --fade-out 0.3
+
+# Adjust speech speed
+./kitten-tts "Hello world" --speed 1.5
+
+# Different audio formats
+./kitten-tts "Hello world" --output audio.flac --format flac
+
+# Pipeline usage with files
+cat text_file.txt | ./kitten-tts --output speech.wav
+```
+
+##### CLI Features
+
 - **Text input** via arguments or stdin (pipeline support)
 - **8 different voices** (expr-voice-2/m/f through expr-voice-5/m/f)
-- **Speed control** with `--speed` option
-- **Audio fade-out** with `--fade-out` option (default: 0.2s)
+- **Speed control** with `--speed` option (1.0 = normal)
+- **Audio fade-out** with `--fade-out` option (default: 0.2s, use 0 to disable)
 - **Multiple formats** (WAV, FLAC, OGG)
 - **Cross-platform audio playback** (macOS, Linux, Windows)
 
-**Available Voices:**
+##### Available Voices
+
 - `expr-voice-2-m` / `expr-voice-2-f`
 - `expr-voice-3-m` / `expr-voice-3-f`
 - `expr-voice-4-m` / `expr-voice-4-f`
 - `expr-voice-5-m` / `expr-voice-5-f`
 
+</details>
+
 
 
 

From ece72eff7302abca741c8c4a866d779d3c84b09d Mon Sep 17 00:00:00 2001
From: Kirby Rs <bizkirby@gmail.com>
Date: Sat, 8 Nov 2025 17:10:41 +0100
Subject: [PATCH 11/14] Simplify CLI section title

- Changed 'Click to expand CLI usage instructions' to 'CLI Usage Instructions'
- More concise and cleaner collapsible section header
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0400c55..e5ad399 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ sf.write('output.wav', audio, 24000)
 #### Command Line Interface (CLI)
 
 <details>
-<summary>Click to expand CLI usage instructions</summary>
+<summary>CLI Usage Instructions</summary>
 
 ##### Installation
 

From 6b76cdeeb8f86745dc0017449bc0937a7c475dea Mon Sep 17 00:00:00 2001
From: Kirby Rs <andkirby@gmail.com>
Date: Sat, 8 Nov 2025 11:12:57 -0500
Subject: [PATCH 12/14] Update checklist in README for CLI support

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index e5ad399..1bf0e68 100644
--- a/README.md
+++ b/README.md
@@ -123,6 +123,7 @@ Works literally everywhere
 ## Checklist 
 
 - [x] Release a preview model
+- [x] CLI support
 - [ ] Release the fully trained model weights
 - [ ] Release mobile SDK 
 - [ ] Release web version 

From 2348ebece62e7b9264d5fd093abd1acdc983e82c Mon Sep 17 00:00:00 2001
From: Kirby Rs <bizkirby@gmail.com>
Date: Sat, 8 Nov 2025 21:47:15 +0100
Subject: [PATCH 13/14] Optimize CLI startup speed and audio playback system
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Major improvements:

🚀 CLI Performance:
- Implement lazy imports for instant help display (0.04s vs 2.2s)
- Add optimized entry point that only loads heavy dependencies when needed
- Refactor CLI into separate entry and processing modules

🎵 Audio System Enhancements:
- Add direct audio streaming with sounddevice library
- Implement fallback to system temp directory for temp files
- Fix permission issues when running from root directory
- Add proper temp file cleanup and error handling

📦 Package Structure:
- Update pyproject.toml to use optimized entry point
- Make package imports lazy to improve startup performance
- Add sounddevice as optional streaming dependency

💡 User Experience:
- Help commands now appear instantly
- Audio works from any directory including root
- Graceful fallback when sounddevice unavailable
- Maintains full CLI functionality with all existing features
---
 kitten-tts                           |  2 +-
 kittentts/__init__.py                | 13 +++-
 kittentts/cli_entry.py               | 90 ++++++++++++++++++++++++++++
 kittentts/{cli.py => cli_process.py} | 64 ++++++++++++++++----
 pyproject.toml                       |  7 ++-
 requirements.txt                     |  1 +
 6 files changed, 161 insertions(+), 16 deletions(-)
 create mode 100644 kittentts/cli_entry.py
 rename kittentts/{cli.py => cli_process.py} (78%)

diff --git a/kitten-tts b/kitten-tts
index f7f49e0..21f42fd 100755
--- a/kitten-tts
+++ b/kitten-tts
@@ -13,7 +13,7 @@ script_dir = os.path.dirname(os.path.abspath(__file__))
 # Import and run the CLI
 sys.path.insert(0, script_dir)
 
-from kittentts.cli import main
+from kittentts.cli_entry import main
 
 if __name__ == "__main__":
     sys.exit(main())
\ No newline at end of file
diff --git a/kittentts/__init__.py b/kittentts/__init__.py
index 9cf1a2d..6b46051 100644
--- a/kittentts/__init__.py
+++ b/kittentts/__init__.py
@@ -1,7 +1,16 @@
-from kittentts.get_model import get_model, KittenTTS
-
 __version__ = "0.1.0"
 __author__ = "KittenML"
 __description__ = "Ultra-lightweight text-to-speech model with just 15 million parameters"
 
+# Lazy imports - only load heavy dependencies when actually needed
+def get_model(*args, **kwargs):
+    """Lazy import of get_model"""
+    from .get_model import get_model as _get_model
+    return _get_model(*args, **kwargs)
+
+def KittenTTS(*args, **kwargs):
+    """Lazy import of KittenTTS"""
+    from .get_model import KittenTTS as _KittenTTS
+    return _KittenTTS(*args, **kwargs)
+
 __all__ = ["get_model", "KittenTTS"]
diff --git a/kittentts/cli_entry.py b/kittentts/cli_entry.py
new file mode 100644
index 0000000..3b73e10
--- /dev/null
+++ b/kittentts/cli_entry.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+"""
+Optimized entry point for KittenTTS with fast help and lazy imports
+"""
+
+import argparse
+import sys
+
+def show_help():
+    """Show help message without importing heavy dependencies"""
+    parser = argparse.ArgumentParser(
+        description="Kitten TTS - Ultra-lightweight text-to-speech synthesis",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s "Hello world"                           # Speak text
+  %(prog)s "Hello world" --voice expr-voice-2-f    # Use specific voice
+  %(prog)s "Hello world" --output output.wav       # Save to file
+  %(prog)s "Hello world" --speed 1.2               # Faster speech
+  %(prog)s "Hello world" --fade-out 0.1            # 0.1s fade out
+  echo "Hello world" | %(prog)s                    # Read from stdin
+  %(prog)s --list-voices                          # List available voices
+        """
+    )
+
+    parser.add_argument(
+        "text",
+        nargs="?",
+        help="Text to synthesize into speech (if not provided, reads from stdin)"
+    )
+
+    parser.add_argument(
+        "--model",
+        default="KittenML/kitten-tts-nano-0.2",
+        help="Model name or path (default: KittenML/kitten-tts-nano-0.2)"
+    )
+
+    parser.add_argument(
+        "--voice",
+        default="expr-voice-2-m",
+        help="Voice to use (default: expr-voice-2-m)"
+    )
+
+    parser.add_argument(
+        "--speed",
+        type=float,
+        default=1.0,
+        help="Speech speed (1.0 = normal, higher = faster, lower = slower)"
+    )
+
+    parser.add_argument(
+        "--fade-out",
+        type=float,
+        default=0.2,
+        help="Fade out duration in seconds (default: 0.2, use 0 to disable)"
+    )
+
+    parser.add_argument(
+        "--output", "-o",
+        help="Output file path (saves as WAV). If not specified, plays through speakers."
+    )
+
+    parser.add_argument(
+        "--list-voices",
+        action="store_true",
+        help="List available voices and exit"
+    )
+
+    parser.add_argument(
+        "--format",
+        choices=["wav", "flac", "ogg"],
+        default="wav",
+        help="Audio format for output file (default: wav)"
+    )
+
+    parser.print_help()
+
+def main():
+    """Optimized main entry point - fast help, full functionality when needed"""
+    # Check if user just wants help
+    if len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ['-h', '--help']):
+        show_help()
+        return 0
+
+    # For any other operation, run the full CLI
+    from .cli_process import main as cli_main
+    return cli_main()
+
+if __name__ == "__main__":
+    sys.exit(main())
\ No newline at end of file
diff --git a/kittentts/cli.py b/kittentts/cli_process.py
similarity index 78%
rename from kittentts/cli.py
rename to kittentts/cli_process.py
index 008916e..8372836 100755
--- a/kittentts/cli.py
+++ b/kittentts/cli_process.py
@@ -15,6 +15,7 @@
 import os
 import numpy as np
 import soundfile as sf
+import tempfile
 
 # Add the current directory to Python path so we can import kittentts
 # We need to add the parent directory since we're inside kittentts/cli.py
@@ -25,12 +26,18 @@
 # Default fade out duration in seconds
 DEFAULT_FADE_OUT = 0.2
 
-try:
-    from kittentts import KittenTTS
-except ImportError:
-    print("Error: KittenTTS not found. Please install it with:")
-    print("pip install https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl")
-    sys.exit(1)
+
+# Lazy import - only load KittenTTS when actually needed (not for help)
+def get_kittentts():
+    try:
+        # Import directly from get_model to avoid package-level imports
+        from kittentts.get_model import KittenTTS
+        return KittenTTS
+    except ImportError:
+        print("Error: KittenTTS not found. Please install it with:")
+        print(
+            "pip install https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl")
+        sys.exit(1)
 
 
 def apply_fade_out(audio_data, sample_rate=24000, fade_duration=DEFAULT_FADE_OUT):
@@ -69,10 +76,37 @@ def list_voices(model):
 
 
 def play_audio_simple(audio_data, sample_rate=24000):
-    """Simple audio playback using system command."""
-    # Save to temporary file and play with system command
-    temp_file = "temp_kitten_tts_output.wav"
+    """Direct audio streaming without temporary files."""
     try:
+        # Try to import sounddevice for direct audio streaming
+        import sounddevice as sd
+        import numpy as np
+
+        # Convert audio data to proper format if needed
+        if audio_data.dtype != np.float32:
+            audio_data = audio_data.astype(np.float32)
+
+        # Play audio directly
+        sd.play(audio_data, sample_rate)
+        sd.wait()  # Wait for playback to complete
+
+    except ImportError:
+        # Fallback to temp file method if sounddevice not available
+        print("sounddevice not available, falling back to temp file method...")
+        play_audio_with_tempfile(audio_data, sample_rate)
+    except Exception as e:
+        # Try alternative streaming method or fallback
+        print(f"Direct streaming failed: {e}")
+        play_audio_with_tempfile(audio_data, sample_rate)
+
+
+def play_audio_with_tempfile(audio_data, sample_rate=24000):
+    """Fallback method using temporary file in system temp directory."""
+    temp_file = None
+    try:
+        # Create temp file in system temp directory
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+            temp_file = tmp.name
         sf.write(temp_file, audio_data, sample_rate)
 
         # Try different system audio players based on OS
@@ -99,13 +133,17 @@ def play_audio_simple(audio_data, sample_rate=24000):
 
         # Clean up temp file
         try:
-            os.remove(temp_file)
+            if temp_file and os.path.exists(temp_file):
+                os.remove(temp_file)
         except:
             pass
 
     except Exception as e:
         print(f"Error playing audio: {e}")
-        print(f"Audio saved to {temp_file}")
+        if temp_file and os.path.exists(temp_file):
+            print(f"Audio saved to {temp_file}")
+        else:
+            print("Audio could not be saved - temp file creation failed")
 
 
 def main():
@@ -179,6 +217,7 @@ def main():
     # Handle --list-voices
     if args.list_voices:
         try:
+            KittenTTS = get_kittentts()
             model = KittenTTS(args.model)
             list_voices(model)
             return 0
@@ -210,6 +249,7 @@ def main():
     try:
         # Initialize the model
         print(f"Loading model: {args.model}...")
+        KittenTTS = get_kittentts()
         model = KittenTTS(args.model)
 
         # Validate voice
@@ -254,4 +294,4 @@ def main():
 
 
 if __name__ == "__main__":
-    sys.exit(main())
\ No newline at end of file
+    sys.exit(main())
diff --git a/pyproject.toml b/pyproject.toml
index 7a460b8..addfa97 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,13 +28,18 @@ dependencies = [
     "huggingface_hub",
 ]
 
+[project.optional-dependencies]
+streaming = [
+    "sounddevice",
+]
+
 [project.urls]
 Homepage = "https://github.com/kittenml/kittentts"
 Repository = "https://github.com/kittenml/kittentts"
 Issues = "https://github.com/kittenml/kittentts/issues"
 
 [project.scripts]
-kitten-tts = "kittentts.cli:main"
+kitten-tts = "kittentts.cli_entry:main"
 
 [tool.setuptools.packages.find]
 where = ["."]
diff --git a/requirements.txt b/requirements.txt
index 37bfbb3..64ffd5b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,4 @@ onnxruntime
 soundfile
 numpy
 huggingface_hub
+sounddevice

From fba13265e95fc627a87b1b5ed855930d8e6ba323 Mon Sep 17 00:00:00 2001
From: Kirby Rs <bizkirby@gmail.com>
Date: Sat, 8 Nov 2025 23:32:04 +0100
Subject: [PATCH 14/14] Add old_trim parameter to generate method for backward
 compatibility

---
 kittentts/cli_process.py | 10 ++++----
 kittentts/get_model.py   |  4 ++--
 kittentts/onnx_model.py  | 52 ++++++++++++++++++++++------------------
 3 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/kittentts/cli_process.py b/kittentts/cli_process.py
index 8372836..142e3c6 100755
--- a/kittentts/cli_process.py
+++ b/kittentts/cli_process.py
@@ -24,7 +24,7 @@
 sys.path.insert(0, parent_dir)
 
 # Default fade out duration in seconds
-DEFAULT_FADE_OUT = 0.2
+DEFAULT_FADE_OUT = 0.3
 
 
 # Lazy import - only load KittenTTS when actually needed (not for help)
@@ -259,13 +259,13 @@ def main():
             return 1
 
         # Add dots at the end to prevent cutoff (simple fix)
-        if not text.endswith('...'):
-            text = text + '...'
-            print(f"Added dots to prevent audio cutoff")
+        # if not text.endswith('...'):
+            # text = text + '...'
+            # print(f"Added dots to prevent audio cutoff")
 
         # Generate audio
         print(f"Generating speech using voice: {args.voice}...")
-        audio = model.generate(text, voice=args.voice, speed=args.speed)
+        audio = model.generate(text, voice=args.voice, speed=args.speed, old_trim=True)
 
         # Apply fade out if specified
         if args.fade_out > 0:
diff --git a/kittentts/get_model.py b/kittentts/get_model.py
index ed8354a..7cd984f 100644
--- a/kittentts/get_model.py
+++ b/kittentts/get_model.py
@@ -24,7 +24,7 @@ def __init__(self, model_name="KittenML/kitten-tts-nano-0.1", cache_dir=None):
             
         self.model = download_from_huggingface(repo_id=repo_id, cache_dir=cache_dir)
 
-    def generate(self, text: str, voice: str = "expr-voice-5-m", speed: float = 1.0) -> np.ndarray:
+    def generate(self, text: str, voice: str = "expr-voice-5-m", speed: float = 1.0, old_trim=False) -> np.ndarray:
         """Generate audio from text.
         
         Args:
@@ -37,7 +37,7 @@ def generate(self, text: str, voice: str = "expr-voice-5-m", speed: float = 1.0)
         """
         if not text:
             raise ValueError("Input text cannot be empty.")
-        return self.model.generate(text, voice=voice, speed=speed)
+        return self.model.generate(text, voice=voice, speed=speed, old_trim)
 
     def generate_to_file(self, text: str, output_path: str, voice: str = "expr-voice-5-m", speed: float = 1.0, sample_rate: int = 24000):
         """Generate audio from text and save to file.
diff --git a/kittentts/onnx_model.py b/kittentts/onnx_model.py
index f6ab787..7fe663a 100644
--- a/kittentts/onnx_model.py
+++ b/kittentts/onnx_model.py
@@ -24,7 +24,7 @@ def __init__(self, dummy=None):
         _letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
 
         symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
-        
+
         dicts = {}
         for i in range(len(symbols)):
             dicts[symbols[i]] = i
@@ -51,40 +51,40 @@ def __init__(self, model_path="kitten_tts_nano_preview.onnx", voices_path="voice
             language="en-us", preserve_punctuation=True, with_stress=True
         )
         self.text_cleaner = TextCleaner()
-        
+
         # Available voices
         self.available_voices = [
-            'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f', 
+            'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',
             'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f'
         ]
-    
+
     def _prepare_inputs(self, text: str, voice: str, speed: float = 1.0) -> dict:
         """Prepare ONNX model inputs from text and voice parameters."""
         if voice not in self.available_voices:
             raise ValueError(f"Voice '{voice}' not available. Choose from: {self.available_voices}")
-        
+
         # Phonemize the input text
         phonemes_list = self.phonemizer.phonemize([text])
-        
+
         # Process phonemes to get token IDs
         phonemes = basic_english_tokenize(phonemes_list[0])
         phonemes = ' '.join(phonemes)
         tokens = self.text_cleaner(phonemes)
-        
+
         # Add start and end tokens
         tokens.insert(0, 0)
         tokens.append(0)
-        
+
         input_ids = np.array([tokens], dtype=np.int64)
         ref_s = self.voices[voice]
-        
+
         return {
             "input_ids": input_ids,
             "style": ref_s,
             "speed": np.array([speed], dtype=np.float32),
         }
-    
-    def generate(self, text: str, voice: str = "expr-voice-5-m", speed: float = 1.0) -> np.ndarray:
+
+    def generate(self, text: str, voice: str = "expr-voice-5-m", speed: float = 1.0, old_trim=False) -> np.ndarray:
         """Synthesize speech from text.
         
         Args:
@@ -96,20 +96,26 @@ def generate(self, text: str, voice: str = "expr-voice-5-m", speed: float = 1.0)
             Audio data as numpy array
         """
         onnx_inputs = self._prepare_inputs(text, voice, speed)
-        
+
         outputs = self.session.run(None, onnx_inputs)
 
-        audio = outputs[0]  # shape (n,)
-        # Trim edge silence from audio
-        non_silent = np.abs(audio) >= 0.01
-        if np.any(non_silent):
-            indices = np.where(non_silent)[0]
-            start, end = indices[0], indices[-1]
-            audio = audio[start : end + 1]
+        if old_trim:
+            return outputs[0][5000:-10000]
+        else:
+            # new trim approach, PR link:
+            # https://github.com/KittenML/KittenTTS/pull/22/commits/3883bdf80d9e9e4bdf0d1d4707fa68d995d41c56
+            audio = outputs[0]  # shape (n,)
+            # Trim edge silence from audio
+            non_silent = np.abs(audio) >= 0.1
+            if np.any(non_silent):
+                indices = np.where(non_silent)[0]
+                start, end = indices[0], indices[-1]
+                audio = audio[start: end + 1]
+
         return audio
-    
-    def generate_to_file(self, text: str, output_path: str, voice: str = "expr-voice-5-m", 
-                          speed: float = 1.0, sample_rate: int = 24000) -> None:
+
+    def generate_to_file(self, text: str, output_path: str, voice: str = "expr-voice-5-m",
+                         speed: float = 1.0, sample_rate: int = 24000) -> None:
         """Synthesize speech and save to file.
         
         Args:
@@ -127,7 +133,7 @@ def generate_to_file(self, text: str, output_path: str, voice: str = "expr-voice
 # Example usage
 if __name__ == "__main__":
     tts = KittenTTS_1_Onnx()
-    
+
     text = """
     It begins with an "Ugh!" Another mysterious stain appears on a favorite shirt. Every trick has been tried, but the stain persists.
     """