KittenML · AABelkhiria · Aug 8, 2025
diff --git a/.docker/Dockerfile b/.docker/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+COPY requirements.txt .
+
+# Install system dependencies for espeak-ng.
+RUN apt-get update && apt-get install -y --no-install-recommends espeak-ng
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+RUN pip install .
+
+COPY .docker/run.py .
+
+# Set the entrypoint
+ENTRYPOINT ["python", "run.py"]
diff --git a/.docker/README.md b/.docker/README.md
@@ -0,0 +1,21 @@
+# KittenTTS Docker Image
+
+## Building the Image
+
+To build the Docker image, run the following command from the root of the project:
+
+```bash
+docker build -t kittentts -f .docker/Dockerfile .
+```
+
+## Running the Container
+
+To generate audio, you can run the Docker container with the following command.
+This example will create an `output` directory on your host if it doesn't exist and save the generated audio file there.
+
+```bash
+docker run --rm kittentts \
+    -v "$(pwd)/output:/app/output" \ 
+    --text "Hello world, this audio was generated from inside a Docker container." \
+    --output "/app/output/hello_docker.wav"
+```
diff --git a/.docker/run.py b/.docker/run.py
@@ -0,0 +1,63 @@
+import argparse
+from kittentts import KittenTTS
+import os
+
+def main():
+    """
+    Command-line interface to generate audio using KittenTTS.
+    """
+    parser = argparse.ArgumentParser(
+        description="Generate speech from text using the KittenTTS library.",
+        formatter_class=argparse.RawTextHelpFormatter
+    )
+
+    parser.add_argument(
+        "--text",
+        type=str,
+        required=True,
+        help="The text to be synthesized."
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        required=True,
+        help="Path to save the output WAV file. E.g., /app/output/speech.wav"
+    )
+    parser.add_argument(
+        "--voice",
+        type=str,
+        default="expr-voice-5-m",
+        help="The voice to use for synthesis. Default: 'expr-voice-5-m'"
+    )
+    parser.add_argument(
+        "--speed",
+        type=float,
+        default=1.0,
+        help="Speech speed multiplier. Default: 1.0"
+    )
+
+    args = parser.parse_args()
+
+    print("Initializing KittenTTS model...")
+    tts = KittenTTS()
+
+    print(f"Available voices: {tts.available_voices}")
+    print(f"Generating audio for text: '{args.text}'")
+
+    # Ensure the output directory exists
+    output_dir = os.path.dirname(args.output)
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    # Generate the audio file
+    tts.generate_to_file(
+        text=args.text,
+        output_path=args.output,
+        voice=args.voice,
+        speed=args.speed
+    )
+
+    print(f"Success! Audio saved to {args.output}")
+
+if __name__ == "__main__":
+    main()