From fe7670bc631e34ba7780309542451c8b267f2532 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Wed, 6 Aug 2025 13:12:26 +0300
Subject: [PATCH 1/4] Add basic CLI

---
 README.md             | 10 ++++++---
 kittentts/__main__.py | 51 +++++++++++++++++++++++++++++++++++++++++++
 pyproject.toml        |  3 +++
 setup.py              |  5 +++++
 4 files changed, 66 insertions(+), 3 deletions(-)
 create mode 100644 kittentts/__main__.py

diff --git a/README.md b/README.md
index 81536da..f0ea40e 100644
--- a/README.md
+++ b/README.md
@@ -24,9 +24,7 @@ Kitten TTS is an open-source realistic text-to-speech model with just 15 million
 pip install https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
 ```
 
-
-
- ### Basic Usage 
+### API Usage
 
 ```
 from kittentts import KittenTTS
@@ -42,6 +40,12 @@ sf.write('output.wav', audio, 24000)
 
 ```
 
+### CLI usage
+
+```
+kittentts --output output.wav --text "This high quality TTS model works without a GPU"
+```
+
 
 
 
diff --git a/kittentts/__main__.py b/kittentts/__main__.py
new file mode 100644
index 0000000..bcbd842
--- /dev/null
+++ b/kittentts/__main__.py
@@ -0,0 +1,51 @@
+import argparse
+import datetime
+
+voices = [
+    "expr-voice-2-m",
+    "expr-voice-2-f",
+    "expr-voice-3-m",
+    "expr-voice-3-f",
+    "expr-voice-4-m",
+    "expr-voice-4-f",
+    "expr-voice-5-m",
+    "expr-voice-5-f",
+]
+
+
+def run(*, model: str, voice: str, output: str, text: str) -> datetime.timedelta:
+    from kittentts import KittenTTS
+    import soundfile as sf
+
+    m = KittenTTS(model)
+    t0 = datetime.datetime.now()
+    audio = m.generate(text, voice=voice)
+    sf.write(output, audio, 24000)
+    t1 = datetime.datetime.now()
+    return t1 - t0
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(prog="kittentts", description="Run Kitten TTS model")
+    ap.add_argument("--model", default="KittenML/kitten-tts-nano-0.1", help="Model to use")
+    ap.add_argument("--text", required=True, help="Text to synthesize")
+    ap.add_argument("--voice", default="expr-voice-2-f", help="Voice to use", choices=voices)
+    ap.add_argument("--output", help="Output audio file")
+
+    args = ap.parse_args()
+
+    if not args.output:
+        ts = datetime.datetime.now().isoformat(timespec="seconds").replace(":", "-")
+        args.output = f"{args.voice}-{ts}.wav"
+
+    gen_time = run(
+        model=args.model,
+        voice=args.voice,
+        output=args.output,
+        text=args.text,
+    )
+    print(f"Generated audio in {gen_time}, saved to {args.output}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index c2d1e5c..944ff70 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,6 +28,9 @@ dependencies = [
     "huggingface_hub",
 ]
 
+[project.scripts]
+kittentts = "kittentts.__main__:main"
+
 [project.urls]
 Homepage = "https://github.com/kittenml/kittentts"
 Repository = "https://github.com/kittenml/kittentts"
diff --git a/setup.py b/setup.py
index d0ac187..a59cc07 100644
--- a/setup.py
+++ b/setup.py
@@ -43,4 +43,9 @@
         "Bug Reports": "https://github.com/kittenml/kittentts/issues",
         "Source": "https://github.com/kittenml/kittentts",
     },
+    entry_points={
+        "console_scripts": [
+            "kittentts=kittentts.__main__:main",
+        ],
+    },
 )

From ca7c23d1990811f9ffd89353d9a6f56751c0528a Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Wed, 6 Aug 2025 13:12:47 +0300
Subject: [PATCH 2/4] Add standard Python gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..59d11d7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*.egg-info
+*.py[cod]
+*.wav

From 42ad5ccd60285c1fe2b33b5d7746f9e29524df6a Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Wed, 6 Aug 2025 15:19:39 +0300
Subject: [PATCH 3/4] Add support for stdout output

---
 kittentts/__main__.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/kittentts/__main__.py b/kittentts/__main__.py
index bcbd842..a694839 100644
--- a/kittentts/__main__.py
+++ b/kittentts/__main__.py
@@ -1,5 +1,7 @@
 import argparse
 import datetime
+import io
+import sys
 
 voices = [
     "expr-voice-2-m",
@@ -20,7 +22,13 @@ def run(*, model: str, voice: str, output: str, text: str) -> datetime.timedelta
     m = KittenTTS(model)
     t0 = datetime.datetime.now()
     audio = m.generate(text, voice=voice)
-    sf.write(output, audio, 24000)
+    if output == "-":
+        # sf requires a seekable buffer for writing.
+        bio = io.BytesIO()
+        sf.write(bio, audio, 24000, format="WAV", subtype="PCM_16")
+        sys.stdout.buffer.write(bio.getvalue())
+    else:
+        sf.write(output, audio, 24000)
     t1 = datetime.datetime.now()
     return t1 - t0
 
@@ -30,7 +38,7 @@ def main() -> None:
     ap.add_argument("--model", default="KittenML/kitten-tts-nano-0.1", help="Model to use")
     ap.add_argument("--text", required=True, help="Text to synthesize")
     ap.add_argument("--voice", default="expr-voice-2-f", help="Voice to use", choices=voices)
-    ap.add_argument("--output", help="Output audio file")
+    ap.add_argument("--output", help="Output audio file (- for stdout; use with care)")
 
     args = ap.parse_args()
 
@@ -44,7 +52,7 @@ def main() -> None:
         output=args.output,
         text=args.text,
     )
-    print(f"Generated audio in {gen_time}, saved to {args.output}")
+    print(f"Generated audio in {gen_time}, saved to {args.output}", file=sys.stderr)
 
 
 if __name__ == "__main__":

From 0aacfcd3c2674c0b53afae35ad4b64106517d06e Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Wed, 6 Aug 2025 17:14:41 +0300
Subject: [PATCH 4/4] Add optional `--speed`

---
 kittentts/__main__.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kittentts/__main__.py b/kittentts/__main__.py
index a694839..73c7da6 100644
--- a/kittentts/__main__.py
+++ b/kittentts/__main__.py
@@ -15,13 +15,13 @@
 ]
 
 
-def run(*, model: str, voice: str, output: str, text: str) -> datetime.timedelta:
+def run(*, model: str, voice: str, output: str, text: str, speed: float=1.0) -> datetime.timedelta:
     from kittentts import KittenTTS
     import soundfile as sf
 
     m = KittenTTS(model)
     t0 = datetime.datetime.now()
-    audio = m.generate(text, voice=voice)
+    audio = m.generate(text, voice=voice, speed=speed)
     if output == "-":
         # sf requires a seekable buffer for writing.
         bio = io.BytesIO()
@@ -38,6 +38,7 @@ def main() -> None:
     ap.add_argument("--model", default="KittenML/kitten-tts-nano-0.1", help="Model to use")
     ap.add_argument("--text", required=True, help="Text to synthesize")
     ap.add_argument("--voice", default="expr-voice-2-f", help="Voice to use", choices=voices)
+    ap.add_argument("--speed", type=float, default=1.0, help="Speech speed (1.0 = normal)")
     ap.add_argument("--output", help="Output audio file (- for stdout; use with care)")
 
     args = ap.parse_args()
@@ -51,6 +52,7 @@ def main() -> None:
         voice=args.voice,
         output=args.output,
         text=args.text,
+        speed=args.speed,
     )
     print(f"Generated audio in {gen_time}, saved to {args.output}", file=sys.stderr)