KittenML · andkirby · Aug 5, 2025 · Aug 5, 2025 · Aug 6, 2025 · Aug 6, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+*.egg-info/
diff --git a/MANIFEST.in b/MANIFEST.in
diff --git a/README.md b/README.md
@@ -10,15 +10,13 @@ Kitten TTS is an open-source realistic text-to-speech model with just 15 million
 
 Email the creators with any questions : info@stellonlabs.com
 
-
 ## ✨ Features
 
 - **Ultra-lightweight**: Model size less than 25MB
 - **CPU-optimized**: Runs without GPU on any device
 - **High-quality voices**: Several premium voice options available
 - **Fast inference**: Optimized for real-time speech synthesis
-
-
+- **Command-line interface**: Easy-to-use CLI with pipeline support
 
 ## 🚀 Quick Start
 
@@ -28,38 +26,97 @@ Email the creators with any questions : info@stellonlabs.com
 pip install https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
 ```
 
+### Basic Usage
 
-
- ### Basic Usage 
-
-```
+#### Python API
+```python
 from kittentts import KittenTTS
 m = KittenTTS("KittenML/kitten-tts-nano-0.2")
 
-audio = m.generate("This high quality TTS model works without a GPU", voice='expr-voice-2-f' )
+audio = m.generate("This high quality TTS model works without a GPU", voice='expr-voice-2-f')
 
 # available_voices : [  'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',  'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f' ]
 
 # Save the audio
 import soundfile as sf
 sf.write('output.wav', audio, 24000)
+```
+
+#### Command Line Interface (CLI)
 
+<details>
+<summary>CLI Usage Instructions</summary>
+
+##### Installation
+
+```bash
+# Clone the repository
+git clone https://github.com/KittenML/KittenTTS.git
+cd KittenTTS
+
+# Create and activate virtual environment
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+
+# Install dependencies
+pip install -r requirements.txt
 ```
 
+##### Basic Usage
 
+```bash
+./kitten-tts "Hello world"                           # Speak text
+./kitten-tts "Hello world" --output hello.wav       # Save to file
+echo "Hello world" | ./kitten-tts                   # Read from stdin
+./kitten-tts --list-voices                          # List available voices
+```
 
+##### Advanced Options
 
+```bash
+# With specific voice and fade-out
+./kitten-tts "Hello world" --voice expr-voice-2-f --fade-out 0.3
 
-## 💻 System Requirements
+# Adjust speech speed
+./kitten-tts "Hello world" --speed 1.5
 
-Works literally everywhere
+# Different audio formats
+./kitten-tts "Hello world" --output audio.flac --format flac
+
+# Pipeline usage with files
+cat text_file.txt | ./kitten-tts --output speech.wav
+```
+
+##### CLI Features
+
+- **Text input** via arguments or stdin (pipeline support)
+- **8 different voices** (expr-voice-2/m/f through expr-voice-5/m/f)
+- **Speed control** with `--speed` option (1.0 = normal)
+- **Audio fade-out** with `--fade-out` option (default: 0.2s, use 0 to disable)
+- **Multiple formats** (WAV, FLAC, OGG)
+- **Cross-platform audio playback** (macOS, Linux, Windows)
+
+##### Available Voices
+
+- `expr-voice-2-m` / `expr-voice-2-f`
+- `expr-voice-3-m` / `expr-voice-3-f`
+- `expr-voice-4-m` / `expr-voice-4-f`
+- `expr-voice-5-m` / `expr-voice-5-f`
+
+</details>
 
 
 
+
+
+## 💻 System Requirements
+
+Works literally everywhere
+
 ## Checklist 
 
 - [x] Release a preview model
+- [x] CLI support
 - [ ] Release the fully trained model weights
 - [ ] Release mobile SDK 
 - [ ] Release web version 
-
diff --git a/kitten-tts b/kitten-tts
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+"""
+Kitten TTS Binary - Text-to-Speech Command Line Tool
+This is a wrapper script for the kittentts/cli.py
+"""
+
+import sys
+import os
+
+# Get the directory where this script is located
+script_dir = os.path.dirname(os.path.abspath(__file__))
+
+# Import and run the CLI
+sys.path.insert(0, script_dir)
+
+from kittentts.cli_entry import main
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/kittentts/.gitignore b/kittentts/.gitignore
@@ -0,0 +1 @@
+__pycache__/*
diff --git a/kittentts/__init__.py b/kittentts/__init__.py
@@ -1,7 +1,16 @@
-from kittentts.get_model import get_model, KittenTTS
-
 __version__ = "0.1.0"
 __author__ = "KittenML"
 __description__ = "Ultra-lightweight text-to-speech model with just 15 million parameters"
 
+# Lazy imports - only load heavy dependencies when actually needed
+def get_model(*args, **kwargs):
+    """Lazy import of get_model"""
+    from .get_model import get_model as _get_model
+    return _get_model(*args, **kwargs)
+
+def KittenTTS(*args, **kwargs):
+    """Lazy import of KittenTTS"""
+    from .get_model import KittenTTS as _KittenTTS
+    return _KittenTTS(*args, **kwargs)
+
 __all__ = ["get_model", "KittenTTS"]
diff --git a/kittentts/cli_entry.py b/kittentts/cli_entry.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+"""
+Optimized entry point for KittenTTS with fast help and lazy imports
+"""
+
+import argparse
+import sys
+
+def show_help():
+    """Show help message without importing heavy dependencies"""
+    parser = argparse.ArgumentParser(
+        description="Kitten TTS - Ultra-lightweight text-to-speech synthesis",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s "Hello world"                           # Speak text
+  %(prog)s "Hello world" --voice expr-voice-2-f    # Use specific voice
+  %(prog)s "Hello world" --output output.wav       # Save to file
+  %(prog)s "Hello world" --speed 1.2               # Faster speech
+  %(prog)s "Hello world" --fade-out 0.1            # 0.1s fade out
+  echo "Hello world" | %(prog)s                    # Read from stdin
+  %(prog)s --list-voices                          # List available voices
+        """
+    )
+
+    parser.add_argument(
+        "text",
+        nargs="?",
+        help="Text to synthesize into speech (if not provided, reads from stdin)"
+    )
+
+    parser.add_argument(
+        "--model",
+        default="KittenML/kitten-tts-nano-0.2",
+        help="Model name or path (default: KittenML/kitten-tts-nano-0.2)"
+    )
+
+    parser.add_argument(
+        "--voice",
+        default="expr-voice-2-m",
+        help="Voice to use (default: expr-voice-2-m)"
+    )
+
+    parser.add_argument(
+        "--speed",
+        type=float,
+        default=1.0,
+        help="Speech speed (1.0 = normal, higher = faster, lower = slower)"
+    )
+
+    parser.add_argument(
+        "--fade-out",
+        type=float,
+        default=0.2,
+        help="Fade out duration in seconds (default: 0.2, use 0 to disable)"
+    )
+
+    parser.add_argument(
+        "--output", "-o",
+        help="Output file path (saves as WAV). If not specified, plays through speakers."
+    )
+
+    parser.add_argument(
+        "--list-voices",
+        action="store_true",
+        help="List available voices and exit"
+    )
+
+    parser.add_argument(
+        "--format",
+        choices=["wav", "flac", "ogg"],
+        default="wav",
+        help="Audio format for output file (default: wav)"
+    )
+
+    parser.print_help()
+
+def main():
+    """Optimized main entry point - fast help, full functionality when needed"""
+    # Check if user just wants help
+    if len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ['-h', '--help']):
+        show_help()
+        return 0
+
+    # For any other operation, run the full CLI
+    from .cli_process import main as cli_main
+    return cli_main()
+
+if __name__ == "__main__":
+    sys.exit(main())