diff --git a/.docker/Dockerfile b/.docker/Dockerfile new file mode 100644 index 0000000..baebcfb --- /dev/null +++ b/.docker/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY requirements.txt . + +# Install system dependencies for espeak-ng. +RUN apt-get update && apt-get install -y --no-install-recommends espeak-ng + +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +RUN pip install . + +COPY .docker/run.py . + +# Set the entrypoint +ENTRYPOINT ["python", "run.py"] diff --git a/.docker/README.md b/.docker/README.md new file mode 100644 index 0000000..350648b --- /dev/null +++ b/.docker/README.md @@ -0,0 +1,21 @@ +# KittenTTS Docker Image + +## Building the Image + +To build the Docker image, run the following command from the root of the project: + +```bash +docker build -t kittentts -f .docker/Dockerfile . +``` + +## Running the Container + +To generate audio, you can run the Docker container with the following command. +This example will create an `output` directory on your host if it doesn't exist and save the generated audio file there. + +```bash +docker run --rm kittentts \ + -v "$(pwd)/output:/app/output" \ + --text "Hello world, this audio was generated from inside a Docker container." \ + --output "/app/output/hello_docker.wav" +``` diff --git a/.docker/run.py b/.docker/run.py new file mode 100644 index 0000000..6cb935d --- /dev/null +++ b/.docker/run.py @@ -0,0 +1,63 @@ +import argparse +from kittentts import KittenTTS +import os + +def main(): + """ + Command-line interface to generate audio using KittenTTS. + """ + parser = argparse.ArgumentParser( + description="Generate speech from text using the KittenTTS library.", + formatter_class=argparse.RawTextHelpFormatter + ) + + parser.add_argument( + "--text", + type=str, + required=True, + help="The text to be synthesized." + ) + parser.add_argument( + "--output", + type=str, + required=True, + help="Path to save the output WAV file. E.g., /app/output/speech.wav" + ) + parser.add_argument( + "--voice", + type=str, + default="expr-voice-5-m", + help="The voice to use for synthesis. Default: 'expr-voice-5-m'" + ) + parser.add_argument( + "--speed", + type=float, + default=1.0, + help="Speech speed multiplier. Default: 1.0" + ) + + args = parser.parse_args() + + print("Initializing KittenTTS model...") + tts = KittenTTS() + + print(f"Available voices: {tts.available_voices}") + print(f"Generating audio for text: '{args.text}'") + + # Ensure the output directory exists + output_dir = os.path.dirname(args.output) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + # Generate the audio file + tts.generate_to_file( + text=args.text, + output_path=args.output, + voice=args.voice, + speed=args.speed + ) + + print(f"Success! Audio saved to {args.output}") + +if __name__ == "__main__": + main()