diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 0da07c01..3bdcc103 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -8,6 +8,32 @@ on: push: tags: - 'v*.*.*' + workflow_dispatch: + inputs: + tag: + description: "Image tag to publish (defaults to sha-)" + required: false + default: "" + push: + description: "Push to GHCR (disable for build-only smoke test)" + required: false + default: true + type: boolean + build_cpu: + description: "Build CPU image" + required: false + default: true + type: boolean + build_gpu: + description: "Build GPU image" + required: false + default: true + type: boolean + build_demo: + description: "Build demo image" + required: false + default: true + type: boolean env: REGISTRY: ghcr.io @@ -16,6 +42,7 @@ env: jobs: build-cpu: name: Build CPU Image + if: ${{ github.event_name == 'push' || inputs.build_cpu }} runs-on: self-hosted permissions: contents: read @@ -37,13 +64,22 @@ jobs: - name: Extract version from tag id: meta run: | - VERSION=${GITHUB_REF#refs/tags/} + if [[ "${GITHUB_REF}" == refs/tags/* ]]; then + VERSION=${GITHUB_REF#refs/tags/} + else + VERSION_INPUT='${{ inputs.tag }}' + if [[ -n "${VERSION_INPUT}" ]]; then + VERSION="${VERSION_INPUT}" + else + VERSION="sha-${GITHUB_SHA::12}" + fi + fi echo "version=${VERSION}" >> $GITHUB_OUTPUT echo "Building version: ${VERSION}" { echo "tags<> $GITHUB_OUTPUT echo "Building version: ${VERSION}" { echo "tags<> $GITHUB_OUTPUT + echo "Building version: ${VERSION}" + { + echo "tags<> "$GITHUB_OUTPUT" + + - name: Build and push Demo image + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile.demo + push: ${{ github.event_name == 'push' || inputs.push }} tags: ${{ steps.meta.outputs.tags }} cache-from: type=local,src=/mnt/docker-cache cache-to: type=local,dest=/mnt/docker-cache,mode=max diff --git a/DOCKER.md b/DOCKER.md index 17ea3b17..33469d52 100644 --- a/DOCKER.md +++ b/DOCKER.md @@ -10,11 +10,65 @@ SPDX-License-Identifier: MPL-2.0 > The documentation site is the canonical source for deployment guidance: . > This file is a repo-local snapshot for convenience and may lag behind the website docs. -This guide covers building and running StreamKit Docker images. The Docker images are slim (~200-400+ MB) and contain the server binary (with the web UI embedded), sample pipelines, and a few small audio samples. Models and plugins must be mounted externally. +This guide covers building and running StreamKit Docker images. The official “slim” images are small (~200-400+ MB) and contain the server binary (with the web UI embedded), sample pipelines, and a few small audio samples; models and plugins must be mounted externally. > [!NOTE] > Official Docker images are published for `linux/amd64` (x86_64). On ARM hosts (Raspberry Pi, Apple Silicon, etc.), use “Build from Source” or run with amd64 emulation. +## Image Variants + +- `Dockerfile` (CPU, slim): includes server + sample pipelines + a few small audio samples; mount models/plugins externally. +- `Dockerfile.gpu` (GPU, slim): includes server + sample pipelines; mount models/plugins externally. +- `Dockerfile.demo` (CPU, demo): bundles core native plugins and the models needed by the shipped sample pipelines (including Helsinki OPUS-MT and SenseVoice). This image is much larger and intended for demos/evaluation, not production. + +### Demo Image Quick Start + +```bash +docker build -f Dockerfile.demo -t streamkit:demo . + +docker run \ + -p 127.0.0.1:4545:4545/tcp \ + -p 127.0.0.1:4545:4545/udp \ + streamkit:demo +``` + +If you want the OpenAI-powered sample pipelines, pass `OPENAI_API_KEY` without putting it directly in the command: + +```bash +# Option A: inherit from your current shell environment +export OPENAI_API_KEY=sk-... +docker run --env OPENAI_API_KEY \ + -p 127.0.0.1:4545:4545/tcp -p 127.0.0.1:4545:4545/udp \ + streamkit:demo + +# Option B: env-file (recommended for local dev; keep it out of git) +printf 'OPENAI_API_KEY=%s\n' 'sk-...' > streamkit.env +chmod 600 streamkit.env +docker run --env-file streamkit.env \ + -p 127.0.0.1:4545:4545/tcp -p 127.0.0.1:4545:4545/udp \ + streamkit:demo +``` + +### Debugging native crashes (gdb) + +The demo image includes `gdb`. To attach to the running server inside Docker, run with ptrace enabled: + +```bash +docker run --rm --name streamkit-demo \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + --user root \ + -p 127.0.0.1:4545:4545/tcp -p 127.0.0.1:4545:4545/udp \ + streamkit:demo +``` + +Then, inside the container, the StreamKit server is typically PID 1: + +```bash +ps -eo pid,cmd +gdb -p 1 +``` + ## Quick Start ### 1. Build the Image diff --git a/Dockerfile.demo b/Dockerfile.demo new file mode 100644 index 00000000..2b2732b9 --- /dev/null +++ b/Dockerfile.demo @@ -0,0 +1,543 @@ +# SPDX-FileCopyrightText: © 2025 StreamKit Contributors +# +# SPDX-License-Identifier: MPL-2.0 + +# Demo image: CPU-only with Helsinki translation, all core plugins, and sample pipelines +# syntax=docker/dockerfile:1 + +# Version configuration +ARG SHERPA_ONNX_VERSION=1.12.17 + +# Stage 1: Build Rust dependencies +FROM rust:1.92-slim-bookworm AS rust-deps + +WORKDIR /build + +# Install build dependencies (includes all deps needed by workspace plugins) +RUN apt-get update && apt-get install -y \ + pkg-config \ + libssl-dev \ + g++ \ + cmake \ + libopus-dev \ + libclang-dev \ + clang \ + curl \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Copy workspace files to build dependencies (plugins are now excluded) +COPY Cargo.toml Cargo.lock ./ +COPY apps ./apps +COPY crates ./crates +COPY sdks ./sdks +COPY wit ./wit + +# Create dummy ui/dist directory so server's RustEmbed doesn't fail +# (will be replaced with real UI in Stage 3) +RUN mkdir -p ui/dist && echo 'Building...' > ui/dist/index.html + +# Build dependencies with cache mount (plugins are excluded from workspace) +RUN --mount=type=cache,target=/usr/local/cargo/registry \ + --mount=type=cache,target=/usr/local/cargo/git \ + --mount=type=cache,target=/build/target \ + cargo build --release -p streamkit-server --bin skit --features "moq" && \ + # Copy compiled artifacts out of cache mount so they persist in the layer + mkdir -p /build/target-out && \ + cp -r /build/target/release /build/target-out/ + +# Stage 2: Build UI +FROM oven/bun:1.3.5-alpine AS ui-builder + +WORKDIR /build/ui + +# Install UI dependencies +COPY ui/package.json ui/bun.lock* ./ +RUN --mount=type=cache,target=/root/.bun/install/cache \ + bun install --frozen-lockfile + +# Copy UI source and build +COPY ui/ ./ +RUN bun run build + +# Stage 3: Build final server binary with UI embedded +FROM rust:1.92-slim-bookworm AS rust-builder + +WORKDIR /build + +# Install build dependencies (same as stage 1, includes all deps needed by workspace) +RUN apt-get update && apt-get install -y \ + pkg-config \ + libssl-dev \ + g++ \ + cmake \ + libopus-dev \ + libclang-dev \ + clang \ + curl \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Copy workspace files (plugins are excluded from workspace) +COPY Cargo.toml Cargo.lock ./ +COPY apps ./apps +COPY crates ./crates +COPY sdks ./sdks +COPY wit ./wit + +# Copy built UI from stage 2 +COPY --from=ui-builder /build/ui/dist ./ui/dist + +# Remove the server binary from the previous stage to force rebuild with new UI +# We'll use cache mount with artifacts from stage 1 +RUN --mount=type=cache,target=/usr/local/cargo/registry \ + --mount=type=cache,target=/usr/local/cargo/git \ + --mount=type=cache,target=/build/target \ + --mount=type=bind,from=rust-deps,source=/build/target-out/release,target=/build/target-init \ + bash -c '\ + # Copy pre-built dependencies if target is empty (first build) \ + if [ ! -d "/build/target/release/deps" ]; then \ + echo "Initializing target from cache..."; \ + cp -r /build/target-init/* /build/target/release/ || true; \ + fi; \ + # Remove server binary to force rebuild with new UI \ + rm -rf /build/target/release/skit \ + /build/target/release/skit.d \ + /build/target/release/deps/streamkit_server-* \ + /build/target/release/.fingerprint/streamkit-server-*; \ + # Build only the server binary \ + cargo build --release --features "moq" --bin skit; \ + # Copy final binary out of cache mount \ + mkdir -p /build/bin && cp /build/target/release/skit /build/bin/skit \ + ' + +# Stage 4: Build Whisper plugin +FROM rust:1.92-slim-bookworm AS whisper-builder + +WORKDIR /build + +# Install build dependencies for whisper.cpp +RUN apt-get update && apt-get install -y \ + pkg-config \ + libssl-dev \ + g++ \ + cmake \ + curl \ + libclang-dev \ + clang \ + git \ + && rm -rf /var/lib/apt/lists/* + +# whisper.cpp/ggml defaults to enabling `GGML_NATIVE` (i.e. `-march=native`) when not cross-compiling. +# That can produce binaries that crash with SIGILL on older CPUs. Setting SOURCE_DATE_EPOCH disables +# `GGML_NATIVE_DEFAULT` upstream, making the build portable by default. +ENV SOURCE_DATE_EPOCH=1 + +# Extra defense-in-depth: ensure the toolchain doesn't auto-enable newer x86 features. +ENV CFLAGS="-O3 -pipe -fPIC -march=x86-64 -mtune=generic" \ + CXXFLAGS="-O3 -pipe -fPIC -march=x86-64 -mtune=generic" + +# Copy only what's needed to build whisper plugin +# Note: Cargo.toml needed for workspace dependency resolution in core/ +COPY Cargo.toml Cargo.lock ./ +COPY crates/core ./crates/core +COPY sdks/plugin-sdk ./sdks/plugin-sdk +COPY plugins/native/whisper ./plugins/native/whisper + +# Build whisper plugin +RUN --mount=type=cache,id=cargo-registry-whisper,target=/usr/local/cargo/registry \ + --mount=type=cache,id=cargo-git-whisper,target=/usr/local/cargo/git \ + --mount=type=cache,id=whisper-target-portable-v2,target=/build/plugins/native/whisper/target-portable \ + cd plugins/native/whisper && \ + cargo build --release --target-dir target-portable && \ + mkdir -p /build/plugins/native && \ + cp target-portable/release/libwhisper.so /build/plugins/native/ + +# Download Whisper models (quantized only - no unquantized ggml-base.en.bin) +# - ggml-base.en-q5_1.bin: English STT (quantized) +# - ggml-tiny.en-q5_1.bin: Lightweight English STT +# - ggml-base-q5_1.bin: Multilingual (for ES->EN translation) +# - silero_vad.onnx: VAD model for Whisper +RUN mkdir -p /build/models && \ + curl -L -o /build/models/ggml-base.en-q5_1.bin \ + https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en-q5_1.bin && \ + curl -L -o /build/models/ggml-tiny.en-q5_1.bin \ + https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin && \ + curl -L -o /build/models/ggml-base-q5_1.bin \ + https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base-q5_1.bin && \ + curl -L -o /build/models/silero_vad.onnx \ + https://raw.githubusercontent.com/snakers4/silero-vad/master/src/silero_vad/data/silero_vad.onnx + +# Stage 5: Build Kokoro TTS plugin +FROM rust:1.92-slim-bookworm AS kokoro-builder + +WORKDIR /build + +# Install build dependencies +RUN apt-get update && apt-get install -y \ + pkg-config \ + libssl-dev \ + curl \ + wget \ + bzip2 \ + libclang-dev \ + clang \ + && rm -rf /var/lib/apt/lists/* + +# Download and install sherpa-onnx shared library +ARG SHERPA_ONNX_VERSION +RUN cd /tmp && \ + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v${SHERPA_ONNX_VERSION}/sherpa-onnx-v${SHERPA_ONNX_VERSION}-linux-x64-shared.tar.bz2 && \ + tar xf sherpa-onnx-v${SHERPA_ONNX_VERSION}-linux-x64-shared.tar.bz2 && \ + cp sherpa-onnx-v${SHERPA_ONNX_VERSION}-linux-x64-shared/lib/*.so* /usr/local/lib/ && \ + ldconfig + +# Copy only what's needed to build kokoro plugin +# Note: Cargo.toml needed for workspace dependency resolution in core/ +COPY Cargo.toml Cargo.lock ./ +COPY crates/core ./crates/core +COPY sdks/plugin-sdk ./sdks/plugin-sdk +COPY plugins/native/kokoro ./plugins/native/kokoro + +# Build kokoro plugin +RUN --mount=type=cache,id=cargo-registry-kokoro,target=/usr/local/cargo/registry \ + --mount=type=cache,id=cargo-git-kokoro,target=/usr/local/cargo/git \ + --mount=type=cache,target=/build/plugins/native/kokoro/target \ + cd plugins/native/kokoro && \ + RUSTFLAGS="-L /usr/local/lib" cargo build --release --target-dir target && \ + mkdir -p /build/plugins/native && \ + cp target/release/libkokoro.so /build/plugins/native/ + +# Download Kokoro TTS models +RUN mkdir -p /build/models && \ + cd /build/models && \ + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_1.tar.bz2 && \ + tar xf kokoro-multi-lang-v1_1.tar.bz2 && \ + rm kokoro-multi-lang-v1_1.tar.bz2 + +# Stage 6: Build Piper TTS plugin +FROM rust:1.92-slim-bookworm AS piper-builder + +WORKDIR /build + +# Install build dependencies +RUN apt-get update && apt-get install -y \ + pkg-config \ + libssl-dev \ + curl \ + wget \ + bzip2 \ + libclang-dev \ + clang \ + && rm -rf /var/lib/apt/lists/* + +# Copy sherpa-onnx from kokoro-builder (reuse to avoid duplicate downloads) +COPY --from=kokoro-builder /usr/local/lib/*.so* /usr/local/lib/ +RUN ldconfig + +# Copy only what's needed to build piper plugin +# Note: Cargo.toml needed for workspace dependency resolution in core/ +COPY Cargo.toml Cargo.lock ./ +COPY crates/core ./crates/core +COPY sdks/plugin-sdk ./sdks/plugin-sdk +COPY plugins/native/piper ./plugins/native/piper + +# Build piper plugin +RUN --mount=type=cache,id=cargo-registry-piper,target=/usr/local/cargo/registry \ + --mount=type=cache,id=cargo-git-piper,target=/usr/local/cargo/git \ + --mount=type=cache,target=/build/plugins/native/piper/target \ + cd plugins/native/piper && \ + RUSTFLAGS="-L /usr/local/lib" cargo build --release --target-dir target && \ + mkdir -p /build/plugins/native && \ + cp target/release/libpiper.so /build/plugins/native/ + +# Download Piper TTS models (English + Spanish for translation output) +RUN mkdir -p /build/models && \ + cd /build/models && \ + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2 && \ + tar xf vits-piper-en_US-libritts_r-medium.tar.bz2 && \ + rm vits-piper-en_US-libritts_r-medium.tar.bz2 && \ + cd vits-piper-en_US-libritts_r-medium && \ + if [ ! -f "model.onnx" ] && [ -f "en_US-libritts_r-medium.onnx" ]; then \ + ln -sf en_US-libritts_r-medium.onnx model.onnx; \ + fi && \ + cd /build/models && \ + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-es_MX-claude-high.tar.bz2 && \ + tar xf vits-piper-es_MX-claude-high.tar.bz2 && \ + rm vits-piper-es_MX-claude-high.tar.bz2 && \ + cd vits-piper-es_MX-claude-high && \ + if [ ! -f "model.onnx" ] && [ -f "es_MX-claude-high.onnx" ]; then \ + ln -sf es_MX-claude-high.onnx model.onnx; \ + fi + +# Stage 7: Build SenseVoice STT plugin +FROM rust:1.92-slim-bookworm AS sensevoice-builder + +WORKDIR /build + +# Install build dependencies +RUN apt-get update && apt-get install -y \ + pkg-config \ + libssl-dev \ + curl \ + wget \ + bzip2 \ + libclang-dev \ + clang \ + && rm -rf /var/lib/apt/lists/* + +# Copy sherpa-onnx from kokoro-builder (reuse to avoid duplicate downloads) +COPY --from=kokoro-builder /usr/local/lib/*.so* /usr/local/lib/ +RUN ldconfig + +# Copy only what's needed to build sensevoice plugin +# Note: Cargo.toml needed for workspace dependency resolution in core/ +COPY Cargo.toml Cargo.lock ./ +COPY crates/core ./crates/core +COPY sdks/plugin-sdk ./sdks/plugin-sdk +COPY plugins/native/sensevoice ./plugins/native/sensevoice + +# Build sensevoice plugin +RUN --mount=type=cache,id=cargo-registry-sensevoice,target=/usr/local/cargo/registry \ + --mount=type=cache,id=cargo-git-sensevoice,target=/usr/local/cargo/git \ + --mount=type=cache,target=/build/plugins/native/sensevoice/target \ + cd plugins/native/sensevoice && \ + RUSTFLAGS="-L /usr/local/lib" cargo build --release --target-dir target && \ + mkdir -p /build/plugins/native && \ + cp target/release/libsensevoice.so /build/plugins/native/ + +# Download SenseVoice models +RUN mkdir -p /build/models && \ + cd /build/models && \ + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09.tar.bz2 && \ + tar xf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09.tar.bz2 && \ + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09.tar.bz2 + +# Stage 8: Build VAD plugin +FROM rust:1.92-slim-bookworm AS vad-builder + +WORKDIR /build + +# Install build dependencies +RUN apt-get update && apt-get install -y \ + pkg-config \ + libssl-dev \ + curl \ + wget \ + bzip2 \ + libclang-dev \ + clang \ + && rm -rf /var/lib/apt/lists/* + +# Copy sherpa-onnx from kokoro-builder (reuse to avoid duplicate downloads) +COPY --from=kokoro-builder /usr/local/lib/*.so* /usr/local/lib/ +RUN ldconfig + +# Copy only what's needed to build vad plugin +# Note: Cargo.toml needed for workspace dependency resolution in core/ +COPY Cargo.toml Cargo.lock ./ +COPY crates/core ./crates/core +COPY sdks/plugin-sdk ./sdks/plugin-sdk +COPY plugins/native/vad ./plugins/native/vad + +# Build vad plugin +RUN --mount=type=cache,id=cargo-registry-vad,target=/usr/local/cargo/registry \ + --mount=type=cache,id=cargo-git-vad,target=/usr/local/cargo/git \ + --mount=type=cache,target=/build/plugins/native/vad/target \ + cd plugins/native/vad && \ + RUSTFLAGS="-L /usr/local/lib" cargo build --release --target-dir target && \ + mkdir -p /build/plugins/native && \ + cp target/release/libvad.so /build/plugins/native/ + +# Download ten-vad model +RUN mkdir -p /build/models && \ + curl -L -o /build/models/ten-vad.onnx \ + https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx + +# Stage 9: Build Matcha TTS plugin +FROM rust:1.92-slim-bookworm AS matcha-builder + +WORKDIR /build + +# Install build dependencies +RUN apt-get update && apt-get install -y \ + pkg-config \ + libssl-dev \ + curl \ + wget \ + bzip2 \ + libclang-dev \ + clang \ + && rm -rf /var/lib/apt/lists/* + +# Copy sherpa-onnx from kokoro-builder (reuse to avoid duplicate downloads) +COPY --from=kokoro-builder /usr/local/lib/*.so* /usr/local/lib/ +RUN ldconfig + +# Copy only what's needed to build matcha plugin +# Note: Cargo.toml needed for workspace dependency resolution in core/ +COPY Cargo.toml Cargo.lock ./ +COPY crates/core ./crates/core +COPY sdks/plugin-sdk ./sdks/plugin-sdk +COPY plugins/native/matcha ./plugins/native/matcha + +# Build matcha plugin +RUN --mount=type=cache,id=cargo-registry-matcha,target=/usr/local/cargo/registry \ + --mount=type=cache,id=cargo-git-matcha,target=/usr/local/cargo/git \ + --mount=type=cache,target=/build/plugins/native/matcha/target \ + cd plugins/native/matcha && \ + RUSTFLAGS="-L /usr/local/lib" cargo build --release --target-dir target && \ + mkdir -p /build/plugins/native && \ + cp target/release/libmatcha.so /build/plugins/native/ + +# Download Matcha TTS models +RUN mkdir -p /build/models && \ + cd /build/models && \ + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 && \ + tar xf matcha-icefall-en_US-ljspeech.tar.bz2 && \ + rm matcha-icefall-en_US-ljspeech.tar.bz2 && \ + cd matcha-icefall-en_US-ljspeech && \ + wget -O vocos-22khz-univ.onnx https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx + +# Stage 10: Build Helsinki Translation plugin (CPU-only) +FROM rust:1.92-slim-bookworm AS helsinki-builder + +WORKDIR /build + +# Install dependencies (Rust + Python for model conversion) +RUN apt-get update && apt-get install -y \ + curl \ + git \ + pkg-config \ + libssl-dev \ + libclang-dev \ + clang \ + python3 \ + python3-pip \ + && rm -rf /var/lib/apt/lists/* + +# Copy only what's needed to build helsinki plugin +COPY Cargo.toml Cargo.lock ./ +COPY crates/core ./crates/core +COPY sdks/plugin-sdk ./sdks/plugin-sdk +COPY plugins/native/helsinki ./plugins/native/helsinki + +# Build helsinki plugin (CPU-only, no cuda feature) +RUN --mount=type=cache,id=helsinki-cargo-registry,target=/usr/local/cargo/registry \ + --mount=type=cache,id=helsinki-cargo-git,target=/usr/local/cargo/git \ + --mount=type=cache,id=helsinki-target,target=/build/plugins/native/helsinki/target \ + cd plugins/native/helsinki && \ + cargo build --release --target-dir target && \ + mkdir -p /build/plugins/native && \ + cp target/release/libhelsinki.so /build/plugins/native/ + +# Download and convert OPUS-MT models (EN<->ES) +RUN PIP_BREAK_SYSTEM_PACKAGES=1 pip3 install --no-cache-dir \ + transformers \ + sentencepiece \ + safetensors \ + torch \ + tokenizers && \ + python3 plugins/native/helsinki/download-models.py + +# Runtime stage +FROM debian:bookworm-slim + +# Install runtime dependencies (include gdb for debugging demo image crashes) +RUN apt-get update && apt-get install -y \ + ca-certificates \ + libssl3 \ + libopus0 \ + libgomp1 \ + gdb \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Create app user +RUN useradd -m -u 1000 -s /bin/bash app + +# Copy binary from rust builder +COPY --from=rust-builder /build/bin/skit /usr/local/bin/skit + +# Copy sherpa-onnx shared libraries from kokoro-builder +COPY --from=kokoro-builder /usr/local/lib/*.so* /usr/local/lib/ +RUN ldconfig + +# Copy whisper plugin and models +COPY --from=whisper-builder /build/plugins /opt/streamkit/plugins +COPY --from=whisper-builder /build/models /opt/streamkit/models + +# Copy kokoro plugin and models (merge into /opt/streamkit/plugins and /opt/streamkit/models) +COPY --from=kokoro-builder /build/plugins/native/* /opt/streamkit/plugins/native/ +COPY --from=kokoro-builder /build/models/kokoro-multi-lang-v1_1 /opt/streamkit/models/kokoro-multi-lang-v1_1 + +# Copy piper plugin and models (merge into /opt/streamkit/plugins and /opt/streamkit/models) +COPY --from=piper-builder /build/plugins/native/* /opt/streamkit/plugins/native/ +COPY --from=piper-builder /build/models/vits-piper-en_US-libritts_r-medium /opt/streamkit/models/vits-piper-en_US-libritts_r-medium +COPY --from=piper-builder /build/models/vits-piper-es_MX-claude-high /opt/streamkit/models/vits-piper-es_MX-claude-high + +# Copy sensevoice plugin and models (merge into /opt/streamkit/plugins and /opt/streamkit/models) +COPY --from=sensevoice-builder /build/plugins/native/* /opt/streamkit/plugins/native/ +COPY --from=sensevoice-builder /build/models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09 /opt/streamkit/models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09 + +# Copy helsinki plugin and models +COPY --from=helsinki-builder /build/plugins/native/* /opt/streamkit/plugins/native/ +COPY --from=helsinki-builder /build/models/opus-mt-en-es /opt/streamkit/models/opus-mt-en-es +COPY --from=helsinki-builder /build/models/opus-mt-es-en /opt/streamkit/models/opus-mt-es-en + +# Copy vad plugin and model (merge into /opt/streamkit/plugins and /opt/streamkit/models) +COPY --from=vad-builder /build/plugins/native/* /opt/streamkit/plugins/native/ +COPY --from=vad-builder /build/models/ten-vad.onnx /opt/streamkit/models/ten-vad.onnx + +# Copy matcha plugin and models (merge into /opt/streamkit/plugins and /opt/streamkit/models) +COPY --from=matcha-builder /build/plugins/native/* /opt/streamkit/plugins/native/ +COPY --from=matcha-builder /build/models/matcha-icefall-en_US-ljspeech /opt/streamkit/models/matcha-icefall-en_US-ljspeech + +# Copy sample pipelines + small bundled audio samples (Opus/Ogg only) +COPY --chown=app:app samples/pipelines /opt/streamkit/samples/pipelines +COPY --chown=app:app samples/audio/system/*.ogg samples/audio/system/*.ogg.license /opt/streamkit/samples/audio/system/ +COPY --chown=app:app samples/audio/system/*.opus samples/audio/system/*.opus.license /opt/streamkit/samples/audio/system/ + +# Remove pipelines that won't work (NLLB + WASM dependencies not included) +RUN rm -f \ + /opt/streamkit/samples/pipelines/oneshot/speech_to_text_translate.yml \ + /opt/streamkit/samples/pipelines/oneshot/gain_filter_rust.yml \ + /opt/streamkit/samples/pipelines/dynamic/speech-translate-en-es.yaml \ + /opt/streamkit/samples/pipelines/dynamic/speech-translate-es-en.yaml + +# Copy demo configuration +RUN mkdir -p /opt/streamkit/config +COPY --chown=app:app docker-skit-demo.toml /opt/streamkit/config/skit.toml + +# Ensure runtime-writable directories exist, and keep /app/* as compatibility symlinks. +RUN mkdir -p /opt/streamkit/.plugins /opt/streamkit/logs && \ + chown -R app:app /opt/streamkit && \ + mkdir -p /app && \ + ln -sfn /opt/streamkit/config/skit.toml /opt/streamkit/skit.toml && \ + ln -sfn /opt/streamkit/plugins /app/plugins && \ + ln -sfn /opt/streamkit/models /app/models && \ + ln -sfn /opt/streamkit/samples /app/samples && \ + ln -sfn /opt/streamkit/config /app/config && \ + ln -sfn /opt/streamkit/.plugins /app/.plugins && \ + ln -sfn /opt/streamkit/logs /app/logs + +WORKDIR /opt/streamkit +USER app + +# Expose HTTP and UDP ports +EXPOSE 4545/tcp +EXPOSE 4545/udp + +# OCI image labels +LABEL org.opencontainers.image.title="StreamKit Demo" +LABEL org.opencontainers.image.description="Demo image with pre-configured plugins for speech-to-text, text-to-speech, and translation" +LABEL org.opencontainers.image.source="https://github.com/streamer45/streamkit" +LABEL org.opencontainers.image.licenses="MPL-2.0" +LABEL org.opencontainers.image.vendor="StreamKit Contributors" + +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \ + CMD curl -f http://localhost:4545/healthz || exit 1 + +# Default command +CMD ["skit", "serve"] diff --git a/Dockerfile.full b/Dockerfile.full index f9ca55a6..4321a5fb 100644 --- a/Dockerfile.full +++ b/Dockerfile.full @@ -130,6 +130,15 @@ RUN apt-get update && apt-get install -y \ git \ && rm -rf /var/lib/apt/lists/* +# whisper.cpp/ggml defaults to enabling `GGML_NATIVE` (i.e. `-march=native`) when not cross-compiling. +# That can produce binaries that crash with SIGILL on older CPUs. Setting SOURCE_DATE_EPOCH disables +# `GGML_NATIVE_DEFAULT` upstream, making the build portable by default. +ENV SOURCE_DATE_EPOCH=1 + +# Extra defense-in-depth: ensure the toolchain doesn't auto-enable newer x86 features. +ENV CFLAGS="-O3 -pipe -fPIC -march=x86-64 -mtune=generic" \ + CXXFLAGS="-O3 -pipe -fPIC -march=x86-64 -mtune=generic" + # Copy only what's needed to build whisper plugin # Note: Cargo.toml needed for workspace dependency resolution in core/ COPY Cargo.toml Cargo.lock ./ @@ -140,11 +149,11 @@ COPY plugins/native/whisper ./plugins/native/whisper # Build whisper plugin RUN --mount=type=cache,id=cargo-registry-whisper,target=/usr/local/cargo/registry \ --mount=type=cache,id=cargo-git-whisper,target=/usr/local/cargo/git \ - --mount=type=cache,target=/build/plugins/native/whisper/target \ + --mount=type=cache,id=whisper-target-portable-v2,target=/build/plugins/native/whisper/target-portable \ cd plugins/native/whisper && \ - cargo build --release --target-dir target && \ + cargo build --release --target-dir target-portable && \ mkdir -p /build/plugins/native && \ - cp target/release/libwhisper.so /build/plugins/native/ + cp target-portable/release/libwhisper.so /build/plugins/native/ # Download Whisper models (base.en and tiny.en) and Silero VAD model RUN mkdir -p /build/models && \ diff --git a/Dockerfile.full-gpu b/Dockerfile.full-gpu index 16064f20..80c3ab4f 100644 --- a/Dockerfile.full-gpu +++ b/Dockerfile.full-gpu @@ -137,6 +137,15 @@ RUN apt-get update && apt-get install -y \ ENV PATH="/root/.cargo/bin:${PATH}" +# whisper.cpp/ggml defaults to enabling `GGML_NATIVE` (i.e. `-march=native`) when not cross-compiling. +# That can produce binaries that crash with SIGILL on older CPUs. Setting SOURCE_DATE_EPOCH disables +# `GGML_NATIVE_DEFAULT` upstream, making the build portable by default. +ENV SOURCE_DATE_EPOCH=1 + +# Extra defense-in-depth: ensure the toolchain doesn't auto-enable newer x86 features. +ENV CFLAGS="-O3 -pipe -fPIC -march=x86-64 -mtune=generic" \ + CXXFLAGS="-O3 -pipe -fPIC -march=x86-64 -mtune=generic" + # Copy only what's needed to build whisper plugin # Note: Cargo.toml needed for workspace dependency resolution in core/ COPY Cargo.toml Cargo.lock ./ @@ -151,11 +160,11 @@ RUN cd plugins/native/whisper && \ # Build whisper plugin with CUDA support RUN --mount=type=cache,id=whisper-cargo-registry,target=/root/.cargo/registry \ --mount=type=cache,id=whisper-cargo-git,target=/root/.cargo/git \ - --mount=type=cache,id=whisper-target,target=/build/plugins/native/whisper/target \ + --mount=type=cache,id=whisper-target-portable,target=/build/plugins/native/whisper/target-portable \ cd plugins/native/whisper && \ - cargo build --release --target-dir target -j$(nproc) && \ + cargo build --release --target-dir target-portable -j$(nproc) && \ mkdir -p /build/plugins/native && \ - cp target/release/libwhisper.so /build/plugins/native/ + cp target-portable/release/libwhisper.so /build/plugins/native/ # Download Whisper models (base.en, base multilingual, and tiny.en) and Silero VAD model RUN mkdir -p /build/models && \ diff --git a/README.md b/README.md index c63f9e3f..276cac71 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,37 @@ docker run --rm \ # Press Ctrl+C to stop ``` +### Demo image (batteries included) + +The `:latest-demo` image bundles core plugins plus the models needed by the shipped sample pipelines (much larger image; intended for demos/evaluation, not production). + +```bash +docker run --rm \ + -p 127.0.0.1:4545:4545/tcp \ + -p 127.0.0.1:4545:4545/udp \ + ghcr.io/streamer45/streamkit:latest-demo +``` + +If you want the OpenAI-powered sample pipelines, pass `OPENAI_API_KEY` without putting it directly in the command: + +```bash +# Inherit OPENAI_API_KEY from your current shell environment (recommended). +# (Make sure it's set on the host before you run this.) +docker run --rm --env OPENAI_API_KEY \ + -p 127.0.0.1:4545:4545/tcp -p 127.0.0.1:4545:4545/udp \ + ghcr.io/streamer45/streamkit:latest-demo +``` + +Or use an env-file so the secret never appears in your shell history: + +```bash +printf 'OPENAI_API_KEY=%s\n' 'sk-...' > streamkit.env +chmod 600 streamkit.env +docker run --rm --env-file streamkit.env \ + -p 127.0.0.1:4545:4545/tcp -p 127.0.0.1:4545:4545/udp \ + ghcr.io/streamer45/streamkit:latest-demo +``` + ### Next steps - Open the UI at `http://localhost:4545` and load an example from the **Samples** list (bundled in the image under `/opt/streamkit/samples/pipelines`). diff --git a/apps/skit/src/permissions.rs b/apps/skit/src/permissions.rs index dc8edd4a..4eb8fd97 100644 --- a/apps/skit/src/permissions.rs +++ b/apps/skit/src/permissions.rs @@ -171,6 +171,9 @@ impl Permissions { "core::telemetry_tap".to_string(), "core::telemetry_out".to_string(), "core::sink".to_string(), + // Plugins are represented as node kinds too (e.g. plugin::native::whisper). + // This must be aligned with allowed_plugins for RBAC to work as expected. + "plugin::*".to_string(), ], allowed_plugins: vec![ // Users can use already-loaded plugins. @@ -444,6 +447,14 @@ mod tests { assert!(!perms.is_plugin_allowed("plugin::native::network")); } + #[test] + fn test_default_user_allows_plugin_nodes() { + let user = Permissions::user(); + assert!(user.is_node_allowed("plugin::native::whisper")); + assert!(user.is_node_allowed("plugin::native::kokoro")); + assert!(user.is_node_allowed("plugin::wasm::gain_filter_rust")); + } + #[test] fn test_global_session_limits() { let config = PermissionsConfig { max_concurrent_sessions: Some(10), ..Default::default() }; diff --git a/docker-skit-demo.toml b/docker-skit-demo.toml new file mode 100644 index 00000000..76fd82b4 --- /dev/null +++ b/docker-skit-demo.toml @@ -0,0 +1,129 @@ +# SPDX-FileCopyrightText: © 2025 StreamKit Contributors +# +# SPDX-License-Identifier: MPL-2.0 + +# Demo configuration with pre-warming and script support for OpenAI/weather pipelines. +# +# Usage: +# docker run -e OPENAI_API_KEY=sk-... \ +# -p 127.0.0.1:4545:4545/tcp \ +# -p 127.0.0.1:4545:4545/udp \ +# ghcr.io/streamer45/streamkit:latest-demo + +[server] +address = "0.0.0.0:4545" +samples_dir = "/opt/streamkit/samples/pipelines" +max_body_size = 104857600 + +# MoQ Gateway URL for the frontend to connect via WebTransport +# Default assumes Docker is running locally with ports mapped to localhost +# Override with SK_SERVER__MOQ_GATEWAY_URL env var for remote deployments +moq_gateway_url = "http://127.0.0.1:4545/moq" + +[plugins] +directory = "/opt/streamkit/plugins" + +[log] +console_enable = true +file_enable = false +console_level = "info" + +[telemetry] +enable = true +tokio_console = false + +[engine] +packet_batch_size = 8 +node_input_capacity = 8 +pin_distributor_capacity = 4 + +[resources] +keep_models_loaded = true + +# Pre-warming: Load models at startup to eliminate first-use latency +[resources.prewarm] +enabled = true + +# Whisper STT - base English model (used by most sample pipelines) +[[resources.prewarm.plugins]] +kind = "plugin::native::whisper" +params = { model_path = "models/ggml-base.en-q5_1.bin", use_gpu = false, gpu_device = 0, n_threads = 2, vad_model_path = "models/silero_vad.onnx" } + +# Whisper STT - multilingual model (used by speech-translate-helsinki-es-en.yaml) +[[resources.prewarm.plugins]] +kind = "plugin::native::whisper" +params = { model_path = "models/ggml-base-q5_1.bin", use_gpu = false, gpu_device = 0, n_threads = 2, vad_model_path = "models/silero_vad.onnx" } + +# VAD (Voice Activity Detection) +[[resources.prewarm.plugins]] +kind = "plugin::native::vad" +params = { model_path = "models/ten-vad.onnx", output_mode = "events", num_threads = 1, provider = "cpu" } + +# Helsinki OPUS-MT Translation (EN<->ES) +[[resources.prewarm.plugins]] +kind = "plugin::native::helsinki" +params = { model_dir = "models/opus-mt-en-es", source_language = "en", target_language = "es", device = "cpu", max_length = 512, warmup = true } + +[[resources.prewarm.plugins]] +kind = "plugin::native::helsinki" +params = { model_dir = "models/opus-mt-es-en", source_language = "es", target_language = "en", device = "cpu", max_length = 512, warmup = true } + +# Kokoro TTS (default voice) +[[resources.prewarm.plugins]] +kind = "plugin::native::kokoro" +params = { model_dir = "models/kokoro-multi-lang-v1_1", speaker_id = 0, speed = 1.0, num_threads = 4, execution_provider = "cpu" } + +# Piper TTS (English + Spanish voices) +[[resources.prewarm.plugins]] +kind = "plugin::native::piper" +params = { model_dir = "models/vits-piper-en_US-libritts_r-medium", speaker_id = 0, speed = 1.0, num_threads = 4 } + +[[resources.prewarm.plugins]] +kind = "plugin::native::piper" +params = { model_dir = "models/vits-piper-es_MX-claude-high", speaker_id = 0, speed = 1.0, num_threads = 4 } + +# Matcha TTS +[[resources.prewarm.plugins]] +kind = "plugin::native::matcha" +params = { model_dir = "models/matcha-icefall-en_US-ljspeech", speaker_id = 0, speed = 1.0, num_threads = 4, execution_provider = "cpu" } + +# SenseVoice STT +[[resources.prewarm.plugins]] +kind = "plugin::native::sensevoice" +params = { model_dir = "models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09", language = "auto", num_threads = 4, execution_provider = "cpu", use_vad = true, vad_model_path = "models/silero_vad.onnx" } + +# Script node configuration for OpenAI/weather pipelines +[script] + +# OpenAI API Key for LLM integration (voice-agent-openai.yaml) +[script.secrets.openai_key] +env = "OPENAI_API_KEY" +type = "apikey" +description = "OpenAI API key for GPT-4 integration in voice agent pipelines" +allowed_fetch_urls = ["https://api.openai.com/*"] + +# Allow fetch() calls to OpenAI API +[[script.global_fetch_allowlist]] +url = "https://api.openai.com/v1/chat/completions" +methods = ["POST"] + +# Allow Open-Meteo (voice-weather-open-meteo.yaml) +[[script.global_fetch_allowlist]] +url = "https://geocoding-api.open-meteo.com/*" +methods = ["GET"] + +[[script.global_fetch_allowlist]] +url = "https://api.open-meteo.com/*" +methods = ["GET"] + +# Allow useless-facts API (useless-facts-tts.yml) +[[script.global_fetch_allowlist]] +url = "https://uselessfacts.jsph.pl/*" +methods = ["GET"] + +[permissions] +default_role = "user" +# Docker containers must bind to 0.0.0.0 for published ports to work. +# This is only safe when the published ports are bound to localhost (recommended), +# e.g. `-p 127.0.0.1:4545:4545/tcp -p 127.0.0.1:4545:4545/udp`, or otherwise firewalled. +allow_insecure_no_auth = true diff --git a/docs/src/content/docs/deployment/docker.md b/docs/src/content/docs/deployment/docker.md index 2e24ec7c..1af764c1 100644 --- a/docs/src/content/docs/deployment/docker.md +++ b/docs/src/content/docs/deployment/docker.md @@ -18,6 +18,53 @@ docker run --rm \ skit serve # optional: this is the image default ``` +## Demo Image (Batteries Included) + +StreamKit also publishes a `-demo` image intended for demos/evaluation. It bundles core plugins plus the models needed by the shipped sample pipelines, so it should work out of the box (but is much larger than the slim images). + +```bash +docker run --rm \ + -p 127.0.0.1:4545:4545/tcp \ + -p 127.0.0.1:4545:4545/udp \ + ghcr.io/streamer45/streamkit:${TAG}-demo +``` + +If you want the OpenAI-powered sample pipelines, pass `OPENAI_API_KEY` without putting it directly in the command: + +```bash +# Inherit OPENAI_API_KEY from your current shell environment (recommended). +# (Make sure it's set on the host before you run this.) +docker run --rm --env OPENAI_API_KEY \ + -p 127.0.0.1:4545:4545/tcp -p 127.0.0.1:4545:4545/udp \ + ghcr.io/streamer45/streamkit:${TAG}-demo +``` + +Or use an env-file so the secret never appears in your shell history: + +```bash +printf 'OPENAI_API_KEY=%s\n' 'sk-...' > streamkit.env +chmod 600 streamkit.env +docker run --rm --env-file streamkit.env \ + -p 127.0.0.1:4545:4545/tcp -p 127.0.0.1:4545:4545/udp \ + ghcr.io/streamer45/streamkit:${TAG}-demo +``` + +### Debugging native crashes (gdb) + +The demo image includes `gdb`. To attach to the running server inside Docker, run with ptrace enabled: + +```bash +docker run --rm --name streamkit-demo \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + --user root \ + -p 127.0.0.1:4545:4545/tcp -p 127.0.0.1:4545:4545/udp \ + ghcr.io/streamer45/streamkit:${TAG}-demo + +ps -eo pid,cmd +gdb -p 1 +``` + > [!NOTE] > Official Docker images are published for `linux/amd64` (x86_64). On ARM hosts, use “Build from Source” or run with amd64 emulation. @@ -51,6 +98,18 @@ services: # streamkit-plugins: ``` +### Demo Image with Secrets + +Use `env_file` to avoid putting secrets in your `docker-compose.yml`: + +```yaml +services: + streamkit: + image: ghcr.io/streamer45/streamkit:v0.1.0-demo + env_file: + - ./streamkit.env +``` + ## Building Images ### CPU-only Image