Skip to content

Commit 70368f8

Browse files
authored
chore: smaller demo image (#17)
1 parent 37223d9 commit 70368f8

File tree

2 files changed

+15
-19
lines changed

2 files changed

+15
-19
lines changed

Dockerfile.demo

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -153,18 +153,12 @@ RUN --mount=type=cache,id=cargo-registry-whisper,target=/usr/local/cargo/registr
153153
mkdir -p /build/plugins/native && \
154154
cp target-portable/release/libwhisper.so /build/plugins/native/
155155

156-
# Download Whisper models (quantized only - no unquantized ggml-base.en.bin)
157-
# - ggml-base.en-q5_1.bin: English STT (quantized)
158-
# - ggml-tiny.en-q5_1.bin: Lightweight English STT
159-
# - ggml-base-q5_1.bin: Multilingual (for ES->EN translation)
156+
# Download Whisper models (demo image uses a single tiny multilingual model).
157+
# - ggml-tiny-q5_1.bin: Tiny multilingual STT (quantized)
160158
# - silero_vad.onnx: VAD model for Whisper
161159
RUN mkdir -p /build/models && \
162-
curl -L -o /build/models/ggml-base.en-q5_1.bin \
163-
https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en-q5_1.bin && \
164-
curl -L -o /build/models/ggml-tiny.en-q5_1.bin \
165-
https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin && \
166-
curl -L -o /build/models/ggml-base-q5_1.bin \
167-
https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base-q5_1.bin && \
160+
curl -L -o /build/models/ggml-tiny-q5_1.bin \
161+
https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q5_1.bin && \
168162
curl -L -o /build/models/silero_vad.onnx \
169163
https://raw.githubusercontent.com/snakers4/silero-vad/master/src/silero_vad/data/silero_vad.onnx
170164

@@ -498,8 +492,15 @@ COPY --chown=app:app samples/pipelines /opt/streamkit/samples/pipelines
498492
COPY --chown=app:app samples/audio/system/*.ogg samples/audio/system/*.ogg.license /opt/streamkit/samples/audio/system/
499493
COPY --chown=app:app samples/audio/system/*.opus samples/audio/system/*.opus.license /opt/streamkit/samples/audio/system/
500494

501-
# Remove pipelines that won't work (NLLB + WASM dependencies not included)
502-
RUN rm -f \
495+
# Demo image uses a single tiny multilingual Whisper model, but the committed sample pipelines
496+
# reference a mix of base.en/tiny.en/multilingual model filenames. Rewrite them inside the image
497+
# so the shipped samples work without bundling multiple Whisper models.
498+
RUN find /opt/streamkit/samples/pipelines -type f \( -name '*.yml' -o -name '*.yaml' \) -print0 | \
499+
xargs -0 sed -i \
500+
-e 's#models/ggml-base\\.en-q5_1\\.bin#models/ggml-tiny-q5_1.bin#g' \
501+
-e 's#models/ggml-tiny\\.en-q5_1\\.bin#models/ggml-tiny-q5_1.bin#g' \
502+
-e 's#models/ggml-base-q5_1\\.bin#models/ggml-tiny-q5_1.bin#g' && \
503+
rm -f \
503504
/opt/streamkit/samples/pipelines/oneshot/speech_to_text_translate.yml \
504505
/opt/streamkit/samples/pipelines/oneshot/gain_filter_rust.yml \
505506
/opt/streamkit/samples/pipelines/dynamic/speech-translate-en-es.yaml \

docker-skit-demo.toml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,10 @@ keep_models_loaded = true
4444
[resources.prewarm]
4545
enabled = true
4646

47-
# Whisper STT - base English model (used by most sample pipelines)
47+
# Whisper STT - demo image uses a single tiny multilingual model
4848
[[resources.prewarm.plugins]]
4949
kind = "plugin::native::whisper"
50-
params = { model_path = "models/ggml-base.en-q5_1.bin", use_gpu = false, gpu_device = 0, n_threads = 2, vad_model_path = "models/silero_vad.onnx" }
51-
52-
# Whisper STT - multilingual model (used by speech-translate-helsinki-es-en.yaml)
53-
[[resources.prewarm.plugins]]
54-
kind = "plugin::native::whisper"
55-
params = { model_path = "models/ggml-base-q5_1.bin", use_gpu = false, gpu_device = 0, n_threads = 2, vad_model_path = "models/silero_vad.onnx" }
50+
params = { model_path = "models/ggml-tiny-q5_1.bin", use_gpu = false, gpu_device = 0, n_threads = 2, vad_model_path = "models/silero_vad.onnx" }
5651

5752
# VAD (Voice Activity Detection)
5853
[[resources.prewarm.plugins]]

0 commit comments

Comments
 (0)