diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..aace59f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.egg-info/
\ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 953bb15..0000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,9 +0,0 @@
-include README.md
-include LICENSE
-include requirements.txt
-recursive-include kittentts *.py
-recursive-include kittentts *.json
-recursive-include kittentts *.txt
-recursive-include kittentts *.onnx
-global-exclude __pycache__
-global-exclude *.py[co]
diff --git a/README.md b/README.md
index 0f46ec7..b38a95f 100644
--- a/README.md
+++ b/README.md
@@ -10,15 +10,13 @@ Kitten TTS is an open-source realistic text-to-speech model with just 15 million
 
 Email the creators with any questions : info@stellonlabs.com
 
-
 ## ✨ Features
 
 - **Ultra-lightweight**: Model size less than 25MB
 - **CPU-optimized**: Runs without GPU on any device
 - **High-quality voices**: Several premium voice options available
 - **Fast inference**: Optimized for real-time speech synthesis
-
-
+- **Command-line interface**: Easy-to-use CLI with pipeline support
 
 ## 🚀 Quick Start
 
@@ -28,38 +26,97 @@ Email the creators with any questions : info@stellonlabs.com
 pip install https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
 ```
 
+### Basic Usage
 
-
- ### Basic Usage 
-
-```
+#### Python API
+```python
 from kittentts import KittenTTS
 m = KittenTTS("KittenML/kitten-tts-nano-0.2")
 
-audio = m.generate("This high quality TTS model works without a GPU", voice='expr-voice-2-f' )
+audio = m.generate("This high quality TTS model works without a GPU", voice='expr-voice-2-f')
 
 # available_voices : [  'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',  'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f' ]
 
 # Save the audio
 import soundfile as sf
 sf.write('output.wav', audio, 24000)
+```
+
+#### Command Line Interface (CLI)
 
+<details>
+<summary>CLI Usage Instructions</summary>
+
+##### Installation
+
+```bash
+# Clone the repository
+git clone https://github.com/KittenML/KittenTTS.git
+cd KittenTTS
+
+# Create and activate virtual environment
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+
+# Install dependencies
+pip install -r requirements.txt
 ```
 
+##### Basic Usage
 
+```bash
+./kitten-tts "Hello world"                           # Speak text
+./kitten-tts "Hello world" --output hello.wav       # Save to file
+echo "Hello world" | ./kitten-tts                   # Read from stdin
+./kitten-tts --list-voices                          # List available voices
+```
 
+##### Advanced Options
 
+```bash
+# With specific voice and fade-out
+./kitten-tts "Hello world" --voice expr-voice-2-f --fade-out 0.3
 
-## 💻 System Requirements
+# Adjust speech speed
+./kitten-tts "Hello world" --speed 1.5
 
-Works literally everywhere
+# Different audio formats
+./kitten-tts "Hello world" --output audio.flac --format flac
+
+# Pipeline usage with files
+cat text_file.txt | ./kitten-tts --output speech.wav
+```
+
+##### CLI Features
+
+- **Text input** via arguments or stdin (pipeline support)
+- **8 different voices** (expr-voice-2/m/f through expr-voice-5/m/f)
+- **Speed control** with `--speed` option (1.0 = normal)
+- **Audio fade-out** with `--fade-out` option (default: 0.2s, use 0 to disable)
+- **Multiple formats** (WAV, FLAC, OGG)
+- **Cross-platform audio playback** (macOS, Linux, Windows)
+
+##### Available Voices
+
+- `expr-voice-2-m` / `expr-voice-2-f`
+- `expr-voice-3-m` / `expr-voice-3-f`
+- `expr-voice-4-m` / `expr-voice-4-f`
+- `expr-voice-5-m` / `expr-voice-5-f`
+
+</details>
 
 
 
+
+
+## 💻 System Requirements
+
+Works literally everywhere
+
 ## Checklist 
 
 - [x] Release a preview model
+- [x] CLI support
 - [ ] Release the fully trained model weights
 - [ ] Release mobile SDK 
 - [ ] Release web version 
-
diff --git a/kitten-tts b/kitten-tts
new file mode 100755
index 0000000..21f42fd
--- /dev/null
+++ b/kitten-tts
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+"""
+Kitten TTS Binary - Text-to-Speech Command Line Tool
+This is a wrapper script for the kittentts/cli.py
+"""
+
+import sys
+import os
+
+# Get the directory where this script is located
+script_dir = os.path.dirname(os.path.abspath(__file__))
+
+# Import and run the CLI
+sys.path.insert(0, script_dir)
+
+from kittentts.cli_entry import main
+
+if __name__ == "__main__":
+    sys.exit(main())
\ No newline at end of file
diff --git a/kittentts/.gitignore b/kittentts/.gitignore
new file mode 100644
index 0000000..763624e
--- /dev/null
+++ b/kittentts/.gitignore
@@ -0,0 +1 @@
+__pycache__/*
\ No newline at end of file
diff --git a/kittentts/__init__.py b/kittentts/__init__.py
index 9cf1a2d..6b46051 100644
--- a/kittentts/__init__.py
+++ b/kittentts/__init__.py
@@ -1,7 +1,16 @@
-from kittentts.get_model import get_model, KittenTTS
-
 __version__ = "0.1.0"
 __author__ = "KittenML"
 __description__ = "Ultra-lightweight text-to-speech model with just 15 million parameters"
 
+# Lazy imports - only load heavy dependencies when actually needed
+def get_model(*args, **kwargs):
+    """Lazy import of get_model"""
+    from .get_model import get_model as _get_model
+    return _get_model(*args, **kwargs)
+
+def KittenTTS(*args, **kwargs):
+    """Lazy import of KittenTTS"""
+    from .get_model import KittenTTS as _KittenTTS
+    return _KittenTTS(*args, **kwargs)
+
 __all__ = ["get_model", "KittenTTS"]
diff --git a/kittentts/cli_entry.py b/kittentts/cli_entry.py
new file mode 100644
index 0000000..3b73e10
--- /dev/null
+++ b/kittentts/cli_entry.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+"""
+Optimized entry point for KittenTTS with fast help and lazy imports
+"""
+
+import argparse
+import sys
+
+def show_help():
+    """Show help message without importing heavy dependencies"""
+    parser = argparse.ArgumentParser(
+        description="Kitten TTS - Ultra-lightweight text-to-speech synthesis",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s "Hello world"                           # Speak text
+  %(prog)s "Hello world" --voice expr-voice-2-f    # Use specific voice
+  %(prog)s "Hello world" --output output.wav       # Save to file
+  %(prog)s "Hello world" --speed 1.2               # Faster speech
+  %(prog)s "Hello world" --fade-out 0.1            # 0.1s fade out
+  echo "Hello world" | %(prog)s                    # Read from stdin
+  %(prog)s --list-voices                          # List available voices
+        """
+    )
+
+    parser.add_argument(
+        "text",
+        nargs="?",
+        help="Text to synthesize into speech (if not provided, reads from stdin)"
+    )
+
+    parser.add_argument(
+        "--model",
+        default="KittenML/kitten-tts-nano-0.2",
+        help="Model name or path (default: KittenML/kitten-tts-nano-0.2)"
+    )
+
+    parser.add_argument(
+        "--voice",
+        default="expr-voice-2-m",
+        help="Voice to use (default: expr-voice-2-m)"
+    )
+
+    parser.add_argument(
+        "--speed",
+        type=float,
+        default=1.0,
+        help="Speech speed (1.0 = normal, higher = faster, lower = slower)"
+    )
+
+    parser.add_argument(
+        "--fade-out",
+        type=float,
+        default=0.2,
+        help="Fade out duration in seconds (default: 0.2, use 0 to disable)"
+    )
+
+    parser.add_argument(
+        "--output", "-o",
+        help="Output file path (saves as WAV). If not specified, plays through speakers."
+    )
+
+    parser.add_argument(
+        "--list-voices",
+        action="store_true",
+        help="List available voices and exit"
+    )
+
+    parser.add_argument(
+        "--format",
+        choices=["wav", "flac", "ogg"],
+        default="wav",
+        help="Audio format for output file (default: wav)"
+    )
+
+    parser.print_help()
+
+def main():
+    """Optimized main entry point - fast help, full functionality when needed"""
+    # Check if user just wants help
+    if len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ['-h', '--help']):
+        show_help()
+        return 0
+
+    # For any other operation, run the full CLI
+    from .cli_process import main as cli_main
+    return cli_main()
+
+if __name__ == "__main__":
+    sys.exit(main())
\ No newline at end of file
diff --git a/kittentts/cli_process.py b/kittentts/cli_process.py
new file mode 100755
index 0000000..142e3c6
--- /dev/null
+++ b/kittentts/cli_process.py
@@ -0,0 +1,297 @@
+#!/usr/bin/env python3
+"""
+Kitten TTS CLI - Text-to-Speech Command Line Tool
+
+Usage:
+    python kittentts_cli.py "Hello world"                           # Speak text
+    python kittentts_cli.py "Hello world" --voice expr-voice-2-f    # Use specific voice
+    python kittentts_cli.py "Hello world" --output output.wav       # Save to file
+    python kittentts_cli.py --list-voices                          # List available voices
+    python kittentts_cli.py --help                                 # Show help
+"""
+
+import argparse
+import sys
+import os
+import numpy as np
+import soundfile as sf
+import tempfile
+
+# Add the current directory to Python path so we can import kittentts
+# We need to add the parent directory since we're inside kittentts/cli.py
+current_dir = os.path.dirname(os.path.abspath(__file__))
+parent_dir = os.path.dirname(current_dir)
+sys.path.insert(0, parent_dir)
+
+# Default fade out duration in seconds
+DEFAULT_FADE_OUT = 0.3
+
+
+# Lazy import - only load KittenTTS when actually needed (not for help)
+def get_kittentts():
+    try:
+        # Import directly from get_model to avoid package-level imports
+        from kittentts.get_model import KittenTTS
+        return KittenTTS
+    except ImportError:
+        print("Error: KittenTTS not found. Please install it with:")
+        print(
+            "pip install https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl")
+        sys.exit(1)
+
+
+def apply_fade_out(audio_data, sample_rate=24000, fade_duration=DEFAULT_FADE_OUT):
+    """Apply exponential fade out to audio data.
+
+    Args:
+        audio_data: NumPy array of audio samples
+        sample_rate: Audio sample rate (default: 24000)
+        fade_duration: Fade out duration in seconds (default: {DEFAULT_FADE_OUT}s)
+
+    Returns:
+        Audio data with fade out applied
+    """
+    if len(audio_data) == 0:
+        return audio_data
+
+    fade_samples = int(fade_duration * sample_rate)
+    if fade_samples >= len(audio_data):
+        fade_samples = len(audio_data) // 2  # Limit fade to half of audio if very short
+
+    # Create exponential fade curve
+    fade_curve = np.linspace(1, 0, fade_samples) ** 2  # Quadratic fade for smoother curve
+
+    # Apply fade to the end of audio
+    audio_with_fade = audio_data.copy()
+    audio_with_fade[-fade_samples:] *= fade_curve
+
+    return audio_with_fade
+
+
+def list_voices(model):
+    """List all available voices."""
+    print("Available voices:")
+    for voice in model.available_voices:
+        print(f"  - {voice}")
+
+
+def play_audio_simple(audio_data, sample_rate=24000):
+    """Direct audio streaming without temporary files."""
+    try:
+        # Try to import sounddevice for direct audio streaming
+        import sounddevice as sd
+        import numpy as np
+
+        # Convert audio data to proper format if needed
+        if audio_data.dtype != np.float32:
+            audio_data = audio_data.astype(np.float32)
+
+        # Play audio directly
+        sd.play(audio_data, sample_rate)
+        sd.wait()  # Wait for playback to complete
+
+    except ImportError:
+        # Fallback to temp file method if sounddevice not available
+        print("sounddevice not available, falling back to temp file method...")
+        play_audio_with_tempfile(audio_data, sample_rate)
+    except Exception as e:
+        # Try alternative streaming method or fallback
+        print(f"Direct streaming failed: {e}")
+        play_audio_with_tempfile(audio_data, sample_rate)
+
+
+def play_audio_with_tempfile(audio_data, sample_rate=24000):
+    """Fallback method using temporary file in system temp directory."""
+    temp_file = None
+    try:
+        # Create temp file in system temp directory
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+            temp_file = tmp.name
+        sf.write(temp_file, audio_data, sample_rate)
+
+        # Try different system audio players based on OS
+        import subprocess
+        import platform
+
+        system = platform.system()
+        if system == "Darwin":  # macOS
+            subprocess.run(["afplay", temp_file], check=True)
+        elif system == "Linux":
+            # Try common Linux audio players
+            for player in ["aplay", "paplay", "mpg123", "mplayer"]:
+                try:
+                    subprocess.run([player, temp_file], check=True)
+                    break
+                except (subprocess.CalledProcessError, FileNotFoundError):
+                    continue
+            else:
+                print(f"Audio saved to {temp_file} (no suitable audio player found)")
+        elif system == "Windows":
+            subprocess.run(["start", temp_file], shell=True, check=True)
+        else:
+            print(f"Audio saved to {temp_file} (unsupported OS for direct playback)")
+
+        # Clean up temp file
+        try:
+            if temp_file and os.path.exists(temp_file):
+                os.remove(temp_file)
+        except:
+            pass
+
+    except Exception as e:
+        print(f"Error playing audio: {e}")
+        if temp_file and os.path.exists(temp_file):
+            print(f"Audio saved to {temp_file}")
+        else:
+            print("Audio could not be saved - temp file creation failed")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Kitten TTS - Ultra-lightweight text-to-speech synthesis",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s "Hello world"                           # Speak text
+  %(prog)s "Hello world" --voice expr-voice-2-f    # Use specific voice
+  %(prog)s "Hello world" --output output.wav       # Save to file
+  %(prog)s "Hello world" --speed 1.2               # Faster speech
+  %(prog)s "Hello world" --fade-out 0.1            # 0.1s fade out
+  echo "Hello world" | %(prog)s                    # Read from stdin
+  %(prog)s --list-voices                          # List available voices
+        """
+    )
+
+    parser.add_argument(
+        "text",
+        nargs="?",
+        help="Text to synthesize into speech (if not provided, reads from stdin)"
+    )
+
+    parser.add_argument(
+        "--model",
+        default="KittenML/kitten-tts-nano-0.2",
+        help="Model name or path (default: KittenML/kitten-tts-nano-0.2)"
+    )
+
+    parser.add_argument(
+        "--voice",
+        default="expr-voice-2-m",
+        help="Voice to use (default: expr-voice-2-m)"
+    )
+
+    parser.add_argument(
+        "--speed",
+        type=float,
+        default=1.0,
+        help="Speech speed (1.0 = normal, higher = faster, lower = slower)"
+    )
+
+    parser.add_argument(
+        "--fade-out",
+        type=float,
+        default=DEFAULT_FADE_OUT,
+        help=f"Fade out duration in seconds (default: {DEFAULT_FADE_OUT}, use 0 to disable)"
+    )
+
+    parser.add_argument(
+        "--output", "-o",
+        help="Output file path (saves as WAV). If not specified, plays through speakers."
+    )
+
+    parser.add_argument(
+        "--list-voices",
+        action="store_true",
+        help="List available voices and exit"
+    )
+
+    parser.add_argument(
+        "--format",
+        choices=["wav", "flac", "ogg"],
+        default="wav",
+        help="Audio format for output file (default: wav)"
+    )
+
+    args = parser.parse_args()
+
+    # Handle --list-voices
+    if args.list_voices:
+        try:
+            KittenTTS = get_kittentts()
+            model = KittenTTS(args.model)
+            list_voices(model)
+            return 0
+        except Exception as e:
+            print(f"Error loading model: {e}", file=sys.stderr)
+            return 1
+
+    # Get text from command line or stdin
+    if args.text:
+        text = args.text
+    else:
+        # Read from stdin
+        try:
+            if sys.stdin.isatty():
+                # No pipe, interactive mode
+                parser.print_help()
+                print("\nError: Text input is required (provide as argument or pipe from stdin)", file=sys.stderr)
+                return 1
+            else:
+                # Pipe detected, read from stdin
+                text = sys.stdin.read().strip()
+                if not text:
+                    print("\nError: No text received from stdin", file=sys.stderr)
+                    return 1
+        except Exception as e:
+            print(f"Error reading from stdin: {e}", file=sys.stderr)
+            return 1
+
+    try:
+        # Initialize the model
+        print(f"Loading model: {args.model}...")
+        KittenTTS = get_kittentts()
+        model = KittenTTS(args.model)
+
+        # Validate voice
+        if args.voice not in model.available_voices:
+            print(f"Error: Voice '{args.voice}' not available.", file=sys.stderr)
+            print(f"Available voices: {', '.join(model.available_voices)}")
+            return 1
+
+        # Add dots at the end to prevent cutoff (simple fix)
+        # if not text.endswith('...'):
+            # text = text + '...'
+            # print(f"Added dots to prevent audio cutoff")
+
+        # Generate audio
+        print(f"Generating speech using voice: {args.voice}...")
+        audio = model.generate(text, voice=args.voice, speed=args.speed, old_trim=True)
+
+        # Apply fade out if specified
+        if args.fade_out > 0:
+            print(f"Applying {args.fade_out}s fade out...")
+            audio = apply_fade_out(audio, sample_rate=24000, fade_duration=args.fade_out)
+
+        if args.output:
+            # Save to file
+            print(f"Saving audio to: {args.output}")
+            sf.write(args.output, audio, 24000)
+            print("Done!")
+        else:
+            # Play through speakers
+            print("Playing audio...")
+            play_audio_simple(audio)
+            print("Done!")
+
+        return 0
+
+    except KeyboardInterrupt:
+        print("\nInterrupted by user")
+        return 1
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/kittentts/get_model.py b/kittentts/get_model.py
index f91c28c..7cd984f 100644
--- a/kittentts/get_model.py
+++ b/kittentts/get_model.py
@@ -1,5 +1,6 @@
 import json
 import os
+import numpy as np
 from huggingface_hub import hf_hub_download
 from .onnx_model import KittenTTS_1_Onnx
 
@@ -22,8 +23,8 @@ def __init__(self, model_name="KittenML/kitten-tts-nano-0.1", cache_dir=None):
             repo_id = model_name
             
         self.model = download_from_huggingface(repo_id=repo_id, cache_dir=cache_dir)
-    
-    def generate(self, text, voice="expr-voice-5-m", speed=1.0):
+
+    def generate(self, text: str, voice: str = "expr-voice-5-m", speed: float = 1.0, old_trim=False) -> np.ndarray:
         """Generate audio from text.
         
         Args:
@@ -34,9 +35,11 @@ def generate(self, text, voice="expr-voice-5-m", speed=1.0):
         Returns:
             Audio data as numpy array
         """
-        return self.model.generate(text, voice=voice, speed=speed)
-    
-    def generate_to_file(self, text, output_path, voice="expr-voice-5-m", speed=1.0, sample_rate=24000):
+        if not text:
+            raise ValueError("Input text cannot be empty.")
+        return self.model.generate(text, voice=voice, speed=speed, old_trim)
+
+    def generate_to_file(self, text: str, output_path: str, voice: str = "expr-voice-5-m", speed: float = 1.0, sample_rate: int = 24000):
         """Generate audio from text and save to file.
         
         Args:
@@ -46,7 +49,7 @@ def generate_to_file(self, text, output_path, voice="expr-voice-5-m", speed=1.0,
             speed: Speech speed (1.0 = normal)
             sample_rate: Audio sample rate
         """
-        return self.model.generate_to_file(text, output_path, voice=voice, speed=speed, sample_rate=sample_rate)
+        self.model.generate_to_file(text, output_path, voice=voice, speed=speed, sample_rate=sample_rate)
     
     @property
     def available_voices(self):
@@ -54,7 +57,7 @@ def available_voices(self):
         return self.model.available_voices
 
 
-def download_from_huggingface(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir=None):
+def download_from_huggingface(repo_id: str="KittenML/kitten-tts-nano-0.1", cache_dir=None) -> KittenTTS_1_Onnx:
     """Download model files from Hugging Face repository.
     
     Args:
@@ -97,6 +100,6 @@ def download_from_huggingface(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir=
     return model
 
 
-def get_model(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir=None):
+def get_model(repo_id: str="KittenML/kitten-tts-nano-0.1", cache_dir=None) -> KittenTTS:
     """Get a KittenTTS model (legacy function for backward compatibility)."""
     return KittenTTS(repo_id, cache_dir)
diff --git a/kittentts/onnx_model.py b/kittentts/onnx_model.py
index e93c463..7fe663a 100644
--- a/kittentts/onnx_model.py
+++ b/kittentts/onnx_model.py
@@ -1,11 +1,15 @@
-from misaki import en, espeak
 import numpy as np
 import phonemizer
 import soundfile as sf
 import onnxruntime as ort
+import espeakng_loader
+from phonemizer.backend.espeak.wrapper import EspeakWrapper
 
+EspeakWrapper.set_library(espeakng_loader.get_library_path())
+EspeakWrapper.set_data_path(espeakng_loader.get_data_path())
 
-def basic_english_tokenize(text):
+
+def basic_english_tokenize(text: str) -> list:
     """Basic English tokenizer that splits on whitespace and punctuation."""
     import re
     tokens = re.findall(r"\w+|[^\w\s]", text)
@@ -20,21 +24,16 @@ def __init__(self, dummy=None):
         _letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
 
         symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
-        
+
         dicts = {}
         for i in range(len(symbols)):
             dicts[symbols[i]] = i
 
         self.word_index_dictionary = dicts
 
-    def __call__(self, text):
-        indexes = []
-        for char in text:
-            try:
-                indexes.append(self.word_index_dictionary[char])
-            except KeyError:
-                pass
-        return indexes
+    def __call__(self, text: str) -> list:
+        dicts = self.word_index_dictionary
+        return [dicts[char] for char in text if char in dicts]
 
 
 class KittenTTS_1_Onnx:
@@ -48,45 +47,44 @@ def __init__(self, model_path="kitten_tts_nano_preview.onnx", voices_path="voice
         self.model_path = model_path
         self.voices = np.load(voices_path)
         self.session = ort.InferenceSession(model_path)
-        
         self.phonemizer = phonemizer.backend.EspeakBackend(
             language="en-us", preserve_punctuation=True, with_stress=True
         )
         self.text_cleaner = TextCleaner()
-        
+
         # Available voices
         self.available_voices = [
-            'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f', 
+            'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',
             'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f'
         ]
-    
+
     def _prepare_inputs(self, text: str, voice: str, speed: float = 1.0) -> dict:
         """Prepare ONNX model inputs from text and voice parameters."""
         if voice not in self.available_voices:
             raise ValueError(f"Voice '{voice}' not available. Choose from: {self.available_voices}")
-        
+
         # Phonemize the input text
         phonemes_list = self.phonemizer.phonemize([text])
-        
+
         # Process phonemes to get token IDs
         phonemes = basic_english_tokenize(phonemes_list[0])
         phonemes = ' '.join(phonemes)
         tokens = self.text_cleaner(phonemes)
-        
+
         # Add start and end tokens
         tokens.insert(0, 0)
         tokens.append(0)
-        
+
         input_ids = np.array([tokens], dtype=np.int64)
         ref_s = self.voices[voice]
-        
+
         return {
             "input_ids": input_ids,
             "style": ref_s,
             "speed": np.array([speed], dtype=np.float32),
         }
-    
-    def generate(self, text: str, voice: str = "expr-voice-5-m", speed: float = 1.0) -> np.ndarray:
+
+    def generate(self, text: str, voice: str = "expr-voice-5-m", speed: float = 1.0, old_trim=False) -> np.ndarray:
         """Synthesize speech from text.
         
         Args:
@@ -98,16 +96,26 @@ def generate(self, text: str, voice: str = "expr-voice-5-m", speed: float = 1.0)
             Audio data as numpy array
         """
         onnx_inputs = self._prepare_inputs(text, voice, speed)
-        
+
         outputs = self.session.run(None, onnx_inputs)
-        
-        # Trim audio
-        audio = outputs[0][5000:-10000]
+
+        if old_trim:
+            return outputs[0][5000:-10000]
+        else:
+            # new trim approach, PR link:
+            # https://github.com/KittenML/KittenTTS/pull/22/commits/3883bdf80d9e9e4bdf0d1d4707fa68d995d41c56
+            audio = outputs[0]  # shape (n,)
+            # Trim edge silence from audio
+            non_silent = np.abs(audio) >= 0.1
+            if np.any(non_silent):
+                indices = np.where(non_silent)[0]
+                start, end = indices[0], indices[-1]
+                audio = audio[start: end + 1]
 
         return audio
-    
-    def generate_to_file(self, text: str, output_path: str, voice: str = "expr-voice-5-m", 
-                          speed: float = 1.0, sample_rate: int = 24000) -> None:
+
+    def generate_to_file(self, text: str, output_path: str, voice: str = "expr-voice-5-m",
+                         speed: float = 1.0, sample_rate: int = 24000) -> None:
         """Synthesize speech and save to file.
         
         Args:
@@ -124,10 +132,10 @@ def generate_to_file(self, text: str, output_path: str, voice: str = "expr-voice
 
 # Example usage
 if __name__ == "__main__":
-    tts = KittenTTS()
-    
+    tts = KittenTTS_1_Onnx()
+
     text = """
     It begins with an "Ugh!" Another mysterious stain appears on a favorite shirt. Every trick has been tried, but the stain persists.
     """
 
-    tts.generate_to_file(text, "inference_output25.wav", voice="expr-voice-5-m")
\ No newline at end of file
+    tts.generate_to_file(text, "inference_output25.wav", voice="expr-voice-5-m")
diff --git a/pyproject.toml b/pyproject.toml
index c2d1e5c..189a24f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,31 +1,44 @@
 [build-system]
-requires = ["setuptools>=45", "wheel"]
-build-backend = "setuptools.build_meta"
+requires = ["hatchling"]
+build-backend = "hatchling.build"
 
 [project]
 name = "kittentts"
-version = "0.1.0"
 description = "Ultra-lightweight text-to-speech model with just 15 million parameters"
 readme = "README.md"
 requires-python = ">=3.8"
-license = {text = "Apache 2.0"}
+license = "Apache-2.0"
 authors = [
     {name = "KittenML"}
 ]
 keywords = ["text-to-speech", "tts", "speech-synthesis", "neural-networks", "onnx"]
 classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "Topic :: Multimedia :: Sound/Audio :: Speech",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
 ]
 dependencies = [
-    "num2words",
-    "spacy",
     "espeakng_loader",
-    "misaki[en]>=0.9.4",
+    "num2words",
+    "numpy",
     "onnxruntime",
+    "phonemizer-fork~=3.3.2",
     "soundfile",
-    "numpy",
-    "huggingface_hub",
+    "spacy",
+]
+dynamic = ["version"]
+
+[project.optional-dependencies]
+streaming = [
+    "sounddevice",
 ]
 
 [project.urls]
@@ -33,9 +46,8 @@ Homepage = "https://github.com/kittenml/kittentts"
 Repository = "https://github.com/kittenml/kittentts"
 Issues = "https://github.com/kittenml/kittentts/issues"
 
-[tool.setuptools.packages.find]
-where = ["."]
-include = ["kittentts*"]
+[project.scripts]
+kitten-tts = "kittentts.cli_entry:main"
 
-[tool.setuptools.package-data]
-kittentts = ["*.json", "*.txt", "*.onnx"]
+[tool.hatch.version]
+path = "kittentts/__init__.py"
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 37bfbb3..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-num2words
-spacy
-espeakng_loader
-misaki[en]>=0.9.4
-onnxruntime
-soundfile
-numpy
-huggingface_hub
diff --git a/setup.py b/setup.py
deleted file mode 100644
index d0ac187..0000000
--- a/setup.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from setuptools import setup, find_packages
-
-with open("README.md", "r", encoding="utf-8") as fh:
-    long_description = fh.read()
-
-setup(
-    name="kittentts",
-    version="0.1.0",
-    author="KittenML",
-    author_email="",
-    description="Ultra-lightweight text-to-speech model with just 15 million parameters",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    url="https://github.com/kittenml/kittentts",
-    packages=find_packages(),
-    classifiers=[
-        "Development Status :: 3 - Alpha",
-        "Intended Audience :: Developers",
-        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
-        "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
-        "Programming Language :: Python :: 3.12",
-        "Topic :: Multimedia :: Sound/Audio :: Speech",
-        "Topic :: Scientific/Engineering :: Artificial Intelligence",
-    ],
-    python_requires=">=3.8",
-    install_requires=[
-        "num2words",
-        "spacy",
-        "espeakng_loader",
-        "misaki[en]>=0.9.4",
-        "onnxruntime",
-        "soundfile",
-        "numpy",
-        "huggingface_hub",
-    ],
-    keywords="text-to-speech, tts, speech-synthesis, neural-networks, onnx",
-    project_urls={
-        "Bug Reports": "https://github.com/kittenml/kittentts/issues",
-        "Source": "https://github.com/kittenml/kittentts",
-    },
-)