diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2d7dcdef..825de4ee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,26 +14,117 @@ concurrency: cancel-in-progress: true jobs: + # ---------- Change detection ---------- + changes: + name: Detect Changes + runs-on: ubuntu-22.04 + permissions: + contents: read + pull-requests: read + outputs: + ci: ${{ steps.filter.outputs.ci }} + rust: ${{ steps.filter.outputs.rust }} + ui: ${{ steps.filter.outputs.ui }} + plugins: ${{ steps.filter.outputs.plugins }} + e2e: ${{ steps.filter.outputs.e2e }} + steps: + - uses: actions/checkout@v5 + - uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + ci: + - '.github/**' + rust: + - 'crates/**' + - 'apps/**' + - 'sdks/**' + - 'Cargo.toml' + - 'Cargo.lock' + - 'deny.toml' + - 'rust-toolchain.toml' + - 'clippy.toml' + - 'rustfmt.toml' + - '.cargo/config.toml' + ui: + - 'ui/**' + plugins: + - 'plugins/native/**' + e2e: + - 'e2e/**' + + # ---------- Sub-workflows ---------- skit: name: Skit + needs: changes + if: >- + github.event_name == 'workflow_dispatch' || + needs.changes.outputs.ci == 'true' || + needs.changes.outputs.rust == 'true' || + needs.changes.outputs.ui == 'true' uses: ./.github/workflows/skit.yml ui: name: UI + needs: changes + if: >- + github.event_name == 'workflow_dispatch' || + needs.changes.outputs.ci == 'true' || + needs.changes.outputs.ui == 'true' uses: ./.github/workflows/ui.yml plugins: name: Plugins + needs: changes + if: >- + github.event_name == 'workflow_dispatch' || + needs.changes.outputs.ci == 'true' || + needs.changes.outputs.plugins == 'true' || + needs.changes.outputs.rust == 'true' uses: ./.github/workflows/plugins.yml e2e: name: E2E + needs: changes + if: >- + github.event_name == 'workflow_dispatch' || + needs.changes.outputs.ci == 'true' || + needs.changes.outputs.rust == 'true' || + needs.changes.outputs.ui == 'true' || + needs.changes.outputs.e2e == 'true' uses: ./.github/workflows/e2e.yml + # ---------- Always-run checks ---------- + reuse: + name: REUSE Compliance + runs-on: ubuntu-22.04 + permissions: + contents: read + steps: + - uses: actions/checkout@v5 + - uses: fsfe/reuse-action@v6 + + # ---------- Gate ---------- all-checks: name: All Checks Passed + if: always() runs-on: ubuntu-22.04 - needs: [skit, ui, plugins, e2e] + needs: [changes, skit, ui, plugins, e2e, reuse] steps: - - name: All checks passed - run: echo "All CI checks passed successfully!" + - name: Verify results + run: | + results=( + "${{ needs.changes.result }}" + "${{ needs.skit.result }}" + "${{ needs.ui.result }}" + "${{ needs.plugins.result }}" + "${{ needs.e2e.result }}" + "${{ needs.reuse.result }}" + ) + for r in "${results[@]}"; do + if [[ "$r" == "failure" || "$r" == "cancelled" ]]; then + echo "::error::CI check failed or was cancelled ($r)" + exit 1 + fi + done + echo "All checks passed or were appropriately skipped." diff --git a/.github/workflows/plugins.yml b/.github/workflows/plugins.yml index 7c8dbcd8..cb1a01b8 100644 --- a/.github/workflows/plugins.yml +++ b/.github/workflows/plugins.yml @@ -6,6 +6,7 @@ name: Plugins CI on: workflow_call: + # Allow standalone runs for debugging plugin builds without a full CI cycle. workflow_dispatch: env: @@ -15,137 +16,71 @@ env: SHERPA_ONNX_VERSION: "1.12.17" jobs: - # Format check for all plugins (no native deps required) - format: - name: Format Check - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v5 - - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@master - with: - toolchain: "1.92.0" - components: rustfmt - - - name: Check formatting - VAD - working-directory: plugins/native/vad - run: cargo fmt -- --check - - - name: Check formatting - Whisper - working-directory: plugins/native/whisper - run: cargo fmt -- --check - - - name: Check formatting - Kokoro - working-directory: plugins/native/kokoro - run: cargo fmt -- --check - - - name: Check formatting - Piper - working-directory: plugins/native/piper - run: cargo fmt -- --check - - - name: Check formatting - Matcha - working-directory: plugins/native/matcha - run: cargo fmt -- --check - - - name: Check formatting - SenseVoice - working-directory: plugins/native/sensevoice - run: cargo fmt -- --check - - - name: Check formatting - NLLB - working-directory: plugins/native/nllb - run: cargo fmt -- --check - - - name: Check formatting - Slint - working-directory: plugins/native/slint - run: cargo fmt -- --check - - # Lint plugins that can build without pre-installed native libraries - lint-simple: - name: Lint (Simple Plugins) + lint: + name: Lint (${{ matrix.group }}) runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + include: + # Plugins with no heavy native dependencies + - group: simple + plugins: "slint supertonic whisper helsinki pocket-tts aac-encoder" + apt_extra: "libfontconfig1-dev libfdk-aac-dev" + # Plugins that link against sherpa-onnx + - group: sherpa + plugins: "vad kokoro piper matcha sensevoice parakeet" + apt_extra: "wget" + # NLLB requires CTranslate2 + - group: nllb + plugins: "nllb" + apt_extra: "libopenblas-dev" steps: - uses: actions/checkout@v5 - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install -y cmake pkg-config libclang-dev libfontconfig1-dev - - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@master - with: - toolchain: "1.92.0" - components: clippy + sudo apt-get install -y cmake pkg-config libclang-dev ${{ matrix.apt_extra }} - - uses: mozilla-actions/sccache-action@v0.0.9 - - - uses: Swatinem/rust-cache@v2 + # --- sherpa-onnx (sherpa group only) --- + - name: Cache sherpa-onnx + if: matrix.group == 'sherpa' + id: cache-sherpa + uses: actions/cache@v4 with: - workspaces: | - plugins/native/vad - plugins/native/slint - cache-on-failure: true + path: | + /usr/local/lib/libsherpa* + /usr/local/include/sherpa-onnx + key: sherpa-onnx-v2-${{ env.SHERPA_ONNX_VERSION }}-${{ runner.os }} - - name: Clippy - VAD - working-directory: plugins/native/vad - run: cargo clippy -- -D warnings - - - name: Clippy - Slint - working-directory: plugins/native/slint - run: cargo clippy -- -D warnings - - # Lint Whisper plugin (builds whisper.cpp from source) - lint-whisper: - name: Lint (Whisper) - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v5 - - - name: Install system dependencies + - name: Install sherpa-onnx + if: matrix.group == 'sherpa' && steps.cache-sherpa.outputs.cache-hit != 'true' run: | - sudo apt-get update - sudo apt-get install -y cmake pkg-config libclang-dev - - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@master - with: - toolchain: "1.92.0" - components: clippy - - - uses: mozilla-actions/sccache-action@v0.0.9 - - - uses: Swatinem/rust-cache@v2 - with: - workspaces: | - plugins/native/whisper - cache-on-failure: true - - - name: Clippy - Whisper - working-directory: plugins/native/whisper - run: cargo clippy -- -D warnings - - # Lint NLLB plugin (requires CTranslate2) - lint-nllb: - name: Lint (NLLB) - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v5 + cd /tmp + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v${SHERPA_ONNX_VERSION}/sherpa-onnx-v${SHERPA_ONNX_VERSION}-linux-x64-shared.tar.bz2 + tar xf sherpa-onnx-v${SHERPA_ONNX_VERSION}-linux-x64-shared.tar.bz2 + sudo cp -r sherpa-onnx-v${SHERPA_ONNX_VERSION}-linux-x64-shared/lib/* /usr/local/lib/ + sudo cp -r sherpa-onnx-v${SHERPA_ONNX_VERSION}-linux-x64-shared/include/* /usr/local/include/ + sudo ldconfig - - name: Install system dependencies - run: | - sudo apt-get update - sudo apt-get install -y cmake pkg-config libclang-dev libopenblas-dev + - name: Refresh linker cache (sherpa-onnx from cache) + if: matrix.group == 'sherpa' && steps.cache-sherpa.outputs.cache-hit == 'true' + run: sudo ldconfig + # --- CTranslate2 (nllb group only) --- - name: Cache CTranslate2 + if: matrix.group == 'nllb' id: cache-ct2 uses: actions/cache@v4 with: - path: /usr/local/lib/libctranslate2* - key: ctranslate2-4.5.0-openmp-comp-${{ runner.os }} + path: | + /usr/local/lib/libctranslate2* + /usr/local/include/ctranslate2 + key: ctranslate2-v2-4.5.0-openmp-comp-${{ runner.os }} - name: Build CTranslate2 - if: steps.cache-ct2.outputs.cache-hit != 'true' + if: matrix.group == 'nllb' && steps.cache-ct2.outputs.cache-hit != 'true' run: | git clone --depth 1 --recurse-submodules --branch v4.5.0 https://github.com/OpenNMT/CTranslate2.git cd CTranslate2 @@ -161,82 +96,50 @@ jobs: sudo make install sudo ldconfig + - name: Refresh linker cache (CTranslate2 from cache) + if: matrix.group == 'nllb' && steps.cache-ct2.outputs.cache-hit == 'true' + run: sudo ldconfig + + # --- Rust toolchain & caches --- - name: Install Rust toolchain uses: dtolnay/rust-toolchain@master with: toolchain: "1.92.0" - components: clippy + components: rustfmt, clippy - uses: mozilla-actions/sccache-action@v0.0.9 - - uses: Swatinem/rust-cache@v2 - with: - workspaces: | - plugins/native/nllb - cache-on-failure: true - - - name: Clippy - NLLB - working-directory: plugins/native/nllb - run: cargo clippy -- -D warnings - - # Lint sherpa-onnx based plugins (Kokoro, Piper, Matcha, SenseVoice) - lint-sherpa: - name: Lint (Sherpa-ONNX Plugins) - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v5 - - - name: Install system dependencies - run: | - sudo apt-get update - sudo apt-get install -y cmake pkg-config libclang-dev wget - - - name: Cache sherpa-onnx - id: cache-sherpa - uses: actions/cache@v4 - with: - path: /usr/local/lib/libsherpa* - key: sherpa-onnx-${{ env.SHERPA_ONNX_VERSION }}-${{ runner.os }} - - - name: Install sherpa-onnx - if: steps.cache-sherpa.outputs.cache-hit != 'true' + - name: Build workspace list for cache + id: cache-ws run: | - cd /tmp - wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v${SHERPA_ONNX_VERSION}/sherpa-onnx-v${SHERPA_ONNX_VERSION}-linux-x64-shared.tar.bz2 - tar xf sherpa-onnx-v${SHERPA_ONNX_VERSION}-linux-x64-shared.tar.bz2 - sudo cp -r sherpa-onnx-v${SHERPA_ONNX_VERSION}-linux-x64-shared/lib/* /usr/local/lib/ - sudo cp -r sherpa-onnx-v${SHERPA_ONNX_VERSION}-linux-x64-shared/include/* /usr/local/include/ - sudo ldconfig - - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@master - with: - toolchain: "1.92.0" - components: clippy - - - uses: mozilla-actions/sccache-action@v0.0.9 + { + echo "list< ../../../target/plugins" + done + echo "EOF" + } >> "$GITHUB_OUTPUT" - uses: Swatinem/rust-cache@v2 with: - workspaces: | - plugins/native/kokoro - plugins/native/piper - plugins/native/matcha - plugins/native/sensevoice + workspaces: ${{ steps.cache-ws.outputs.list }} cache-on-failure: true - - name: Clippy - Kokoro - working-directory: plugins/native/kokoro - run: cargo clippy -- -D warnings - - - name: Clippy - Piper - working-directory: plugins/native/piper - run: cargo clippy -- -D warnings - - - name: Clippy - Matcha - working-directory: plugins/native/matcha - run: cargo clippy -- -D warnings + # --- Lint --- + - name: Format check + run: | + for plugin in ${{ matrix.plugins }}; do + echo "::group::fmt — ${plugin}" + (cd "plugins/native/${plugin}" && cargo fmt -- --check) + echo "::endgroup::" + done - - name: Clippy - SenseVoice - working-directory: plugins/native/sensevoice - run: cargo clippy -- -D warnings + - name: Clippy + run: | + for plugin in ${{ matrix.plugins }}; do + echo "::group::clippy — ${plugin}" + # Shared target dir deduplicates common crate compilation across plugins in the group. + # If flaky clippy results appear, conflicting feature flags between plugins could be the cause. + (cd "plugins/native/${plugin}" && CARGO_TARGET_DIR="${{ github.workspace }}/target/plugins" cargo clippy -- -D warnings) + echo "::endgroup::" + done diff --git a/.github/workflows/reuse.yml b/.github/workflows/reuse.yml deleted file mode 100644 index 270d6eeb..00000000 --- a/.github/workflows/reuse.yml +++ /dev/null @@ -1,24 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Claudio Costa -# -# SPDX-License-Identifier: MPL-2.0 - -name: REUSE Compliance Check - -on: - push: - branches: [main] - pull_request: - branches: [main] - -permissions: - contents: read - -jobs: - reuse-compliance-check: - runs-on: ubuntu-22.04 - steps: - - name: Checkout repository - uses: actions/checkout@v5 - - - name: REUSE Compliance Check - uses: fsfe/reuse-action@v6 diff --git a/.github/workflows/ui.yml b/.github/workflows/ui.yml index 744bd32c..06c25412 100644 --- a/.github/workflows/ui.yml +++ b/.github/workflows/ui.yml @@ -4,8 +4,8 @@ on: workflow_call: jobs: - lint: - name: Lint (TypeScript + ESLint) + ci: + name: Lint, Test & Build runs-on: ubuntu-22.04 defaults: run: @@ -29,89 +29,14 @@ jobs: - name: Install dependencies run: bun install --frozen-lockfile - - name: Run lint + - name: Lint run: bun run lint - test: - name: Test - runs-on: ubuntu-22.04 - defaults: - run: - working-directory: ./ui - steps: - - uses: actions/checkout@v5 - - - name: Setup Bun - uses: oven-sh/setup-bun@v2 - with: - bun-version: "1.3.5" - - - name: Cache node_modules - uses: actions/cache@v4 - with: - path: ui/node_modules - key: ${{ runner.os }}-bun-${{ hashFiles('ui/bun.lock') }} - restore-keys: | - ${{ runner.os }}-bun- - - - name: Install dependencies - run: bun install --frozen-lockfile - - - name: Run tests + - name: Test run: bun run test:run - build: - name: Build - runs-on: ubuntu-22.04 - defaults: - run: - working-directory: ./ui - steps: - - uses: actions/checkout@v5 - - - name: Setup Bun - uses: oven-sh/setup-bun@v2 - with: - bun-version: "1.3.5" - - - name: Cache node_modules - uses: actions/cache@v4 - with: - path: ui/node_modules - key: ${{ runner.os }}-bun-${{ hashFiles('ui/bun.lock') }} - restore-keys: | - ${{ runner.os }}-bun- - - - name: Install dependencies - run: bun install --frozen-lockfile - - - name: Build production bundle + - name: Build run: bun run build - knip: - name: Knip (unused code) - runs-on: ubuntu-22.04 - defaults: - run: - working-directory: ./ui - steps: - - uses: actions/checkout@v5 - - - name: Setup Bun - uses: oven-sh/setup-bun@v2 - with: - bun-version: "1.3.5" - - - name: Cache node_modules - uses: actions/cache@v4 - with: - path: ui/node_modules - key: ${{ runner.os }}-bun-${{ hashFiles('ui/bun.lock') }} - restore-keys: | - ${{ runner.os }}-bun- - - - name: Install dependencies - run: bun install --frozen-lockfile - - - name: Run knip + - name: Knip (unused code) run: bun run knip diff --git a/plugins/native/aac-encoder/src/lib.rs b/plugins/native/aac-encoder/src/lib.rs index a15e8aec..c903fce0 100644 --- a/plugins/native/aac-encoder/src/lib.rs +++ b/plugins/native/aac-encoder/src/lib.rs @@ -102,7 +102,10 @@ impl AacEncoderNode { // 21.333… µs per frame; using integer arithmetic: // timestamp = sequence * 1024 * 1_000_000 / 48_000 // duration = next_timestamp − this_timestamp - let ts = |seq: u64| (seq as u128 * 1_024 * 1_000_000 / 48_000) as u64; + // Allow: the division by 48_000 keeps the result well within u64 + // range for any realistic sequence count. + #[allow(clippy::cast_possible_truncation)] + let ts = |seq: u64| (u128::from(seq) * 1_024 * 1_000_000 / 48_000) as u64; let timestamp_us = ts(self.sequence); let duration_us = ts(self.sequence + 1) - timestamp_us; @@ -145,10 +148,13 @@ impl NativeProcessorNode for AacEncoderNode { }), ], ) - .output("out", PacketType::EncodedAudio(EncodedAudioFormat { - codec: AudioCodec::Aac, - codec_private: None, - })) + .output( + "out", + PacketType::EncodedAudio(EncodedAudioFormat { + codec: AudioCodec::Aac, + codec_private: None, + }), + ) .param_schema(serde_json::json!({ "type": "object", "properties": { diff --git a/plugins/native/helsinki/Cargo.lock b/plugins/native/helsinki/Cargo.lock index db4b8382..d3d605f1 100644 --- a/plugins/native/helsinki/Cargo.lock +++ b/plugins/native/helsinki/Cargo.lock @@ -851,7 +851,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "helsinki-plugin-native" -version = "0.1.0" +version = "0.2.0" dependencies = [ "candle-core", "candle-nn", @@ -1791,9 +1791,9 @@ dependencies = [ [[package]] name = "schemars" -version = "1.1.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9558e172d4e8533736ba97870c4b2cd63f84b382a3d6eb063da41b91cce17289" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" dependencies = [ "dyn-clone", "ref-cast", @@ -1804,9 +1804,9 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "1.1.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301858a4023d78debd2353c7426dc486001bddc91ae31a76fb1f55132f7e2633" +checksum = "7d115b50f4aaeea07e79c1912f645c7513d81715d0420f8bc77a18c6260b307f" dependencies = [ "proc-macro2", "quote", @@ -1944,7 +1944,7 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "streamkit-core" -version = "0.1.0" +version = "0.2.0" dependencies = [ "async-trait", "base64 0.22.1", @@ -1962,7 +1962,7 @@ dependencies = [ [[package]] name = "streamkit-plugin-sdk-native" -version = "0.1.0" +version = "0.2.0" dependencies = [ "async-trait", "bytes", @@ -2139,9 +2139,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.48.0" +version = "1.51.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "f66bf9585cda4b724d3e78ab34b73fb2bbaba9011b9bfdf69dc836382ea13b8c" dependencies = [ "pin-project-lite", "tokio-macros", @@ -2149,9 +2149,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" dependencies = [ "proc-macro2", "quote", @@ -2160,9 +2160,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.17" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", @@ -2234,9 +2234,9 @@ dependencies = [ [[package]] name = "ts-rs" -version = "11.1.0" +version = "12.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4994acea2522cd2b3b85c1d9529a55991e3ad5e25cdcd3de9d505972c4379424" +checksum = "756050066659291d47a554a9f558125db17428b073c5ffce1daf5dcb0f7231d8" dependencies = [ "serde_json", "thiserror 2.0.17", @@ -2245,9 +2245,9 @@ dependencies = [ [[package]] name = "ts-rs-macros" -version = "11.1.0" +version = "12.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee6ff59666c9cbaec3533964505d39154dc4e0a56151fdea30a09ed0301f62e2" +checksum = "38d90eea51bc7988ef9e674bf80a85ba6804739e535e9cab48e4bb34a8b652aa" dependencies = [ "proc-macro2", "quote", diff --git a/plugins/native/helsinki/src/config.rs b/plugins/native/helsinki/src/config.rs index 16d135cc..f18c4ca7 100644 --- a/plugins/native/helsinki/src/config.rs +++ b/plugins/native/helsinki/src/config.rs @@ -110,16 +110,10 @@ impl HelsinkiConfig { // Validate max_length if self.max_length < 32 { - return Err(format!( - "max_length must be at least 32, got {}", - self.max_length - )); + return Err(format!("max_length must be at least 32, got {}", self.max_length)); } if self.max_length > 2048 { - return Err(format!( - "max_length must be at most 2048, got {}", - self.max_length - )); + return Err(format!("max_length must be at most 2048, got {}", self.max_length)); } Ok(()) @@ -132,14 +126,11 @@ impl HelsinkiConfig { } /// Check if the model directory matches the expected language pair. - pub fn check_model_language_match(&self) -> Result<(), String> { + pub fn check_model_language_match(&self) { let model_dir_lower = self.model_dir.to_lowercase(); // Expected pattern: opus-mt-{src}-{tgt} - let expected_suffix = format!( - "opus-mt-{}-{}", - self.source_language, self.target_language - ); + let expected_suffix = format!("opus-mt-{}-{}", self.source_language, self.target_language); if !model_dir_lower.contains(&expected_suffix) { tracing::warn!( @@ -150,8 +141,6 @@ impl HelsinkiConfig { expected_suffix ); } - - Ok(()) } } @@ -186,10 +175,7 @@ mod tests { #[test] fn test_validate_invalid_language() { - let config = HelsinkiConfig { - source_language: "fr".to_string(), - ..Default::default() - }; + let config = HelsinkiConfig { source_language: "fr".to_string(), ..Default::default() }; assert!(config.validate().is_err()); } } diff --git a/plugins/native/helsinki/src/lib.rs b/plugins/native/helsinki/src/lib.rs index ca9520f5..ee2fb530 100644 --- a/plugins/native/helsinki/src/lib.rs +++ b/plugins/native/helsinki/src/lib.rs @@ -47,8 +47,7 @@ fn preview_for_log(text: &str, max_chars: usize) -> String { fn canonicalize_model_dir(model_dir: &str) -> String { std::fs::canonicalize(model_dir) - .map(|path| path.to_string_lossy().to_string()) - .unwrap_or_else(|_| model_dir.to_string()) + .map_or_else(|_| model_dir.to_string(), |path| path.to_string_lossy().to_string()) } fn warmup_translate( @@ -68,7 +67,7 @@ fn warmup_translate( start.elapsed().as_millis(), result.chars().count() ); - } + }, Err(e) => { plugin_warn!( logger, @@ -76,7 +75,7 @@ fn warmup_translate( start.elapsed().as_millis(), e ); - } + }, } } @@ -151,15 +150,11 @@ impl NativeProcessorNode for HelsinkiPlugin { } fn new(params: Option, logger: Logger) -> Result { - plugin_info!( - logger, - "Helsinki plugin new() called with params: {:?}", - params - ); + plugin_info!(logger, "Helsinki plugin new() called with params: {:?}", params); let mut config: HelsinkiConfig = if let Some(p) = params { serde_json::from_value(p).map_err(|e| { - let error_msg = format!("Invalid config: {}", e); + let error_msg = format!("Invalid config: {e}"); plugin_error!(logger, "{}", error_msg); error_msg })? @@ -185,9 +180,7 @@ impl NativeProcessorNode for HelsinkiPlugin { } // Warn if model directory doesn't match language pair - if let Err(e) = config.check_model_language_match() { - plugin_error!(logger, "{}", e); - } + config.check_model_language_match(); plugin_info!( logger, @@ -207,11 +200,7 @@ impl NativeProcessorNode for HelsinkiPlugin { plugin_info!(logger, "Helsinki plugin initialized successfully"); - Ok(Self { - config, - translator, - logger, - }) + Ok(Self { config, translator, logger }) } fn process(&mut self, _pin: &str, packet: Packet, output: &OutputSender) -> Result<(), String> { @@ -219,12 +208,7 @@ impl NativeProcessorNode for HelsinkiPlugin { let text: String = match &packet { Packet::Text(t) => t.as_ref().to_string(), Packet::Transcription(t) => t.text.clone(), - _ => { - return Err(format!( - "Expected Text or Transcription packet, got {:?}", - packet - )) - } + _ => return Err(format!("Expected Text or Transcription packet, got {packet:?}")), }; // Skip empty text @@ -261,8 +245,8 @@ impl NativeProcessorNode for HelsinkiPlugin { fn update_params(&mut self, params: Option) -> Result<(), String> { if let Some(p) = params { - let mut new_config: HelsinkiConfig = serde_json::from_value(p) - .map_err(|e| format!("Invalid config: {}", e))?; + let mut new_config: HelsinkiConfig = + serde_json::from_value(p).map_err(|e| format!("Invalid config: {e}"))?; new_config.validate()?; @@ -283,10 +267,7 @@ impl NativeProcessorNode for HelsinkiPlugin { || new_config.device_index != self.config.device_index; if needs_reload { - plugin_info!( - self.logger, - "Model parameters changed, reloading translator" - ); + plugin_info!(self.logger, "Model parameters changed, reloading translator"); self.translator = get_or_load_translator(&new_config, &self.logger)?; } diff --git a/plugins/native/helsinki/src/model.rs b/plugins/native/helsinki/src/model.rs index de84a482..2eea9633 100644 --- a/plugins/native/helsinki/src/model.rs +++ b/plugins/native/helsinki/src/model.rs @@ -9,9 +9,9 @@ use std::path::Path; use std::sync::{Arc, LazyLock, Mutex}; use candle_core::{DType, Device}; +use candle_nn::Activation; use candle_nn::VarBuilder; use candle_transformers::models::marian::{Config, MTModel}; -use candle_nn::Activation; use serde::Deserialize; use serde_json::Value as JsonValue; use streamkit_plugin_sdk_native::prelude::*; @@ -22,6 +22,8 @@ use crate::config::HelsinkiConfig; /// HuggingFace/Transformers config.json format for Marian models. /// Maps to Candle's Config struct with proper type conversions. +// Allow: fields mirror the HuggingFace Transformers config.json schema. +#[allow(clippy::struct_excessive_bools)] #[derive(Debug, Deserialize)] struct HfMarianConfig { vocab_size: usize, @@ -52,18 +54,24 @@ struct HfMarianConfig { share_encoder_decoder_embeddings: bool, } -fn default_max_position_embeddings() -> usize { 512 } -fn default_true() -> bool { true } -fn default_activation() -> String { "gelu".to_string() } +const fn default_max_position_embeddings() -> usize { + 512 +} +const fn default_true() -> bool { + true +} +fn default_activation() -> String { + "gelu".to_string() +} impl HfMarianConfig { /// Convert to Candle's Config struct. fn to_candle_config(&self) -> Config { let activation = match self.activation_function.as_str() { "swish" | "silu" => Activation::Silu, - "gelu" | "gelu_new" => Activation::Gelu, "relu" => Activation::Relu, - _ => Activation::Gelu, // Default to GELU for unknown activations + // Covers "gelu", "gelu_new", and any unknown activation function. + _ => Activation::Gelu, }; Config { @@ -138,10 +146,7 @@ fn is_gpu_available() -> bool { let available = check_cuda_available(); GPU_AVAILABILITY.store(if available { 1 } else { 2 }, Ordering::Relaxed); - tracing::info!( - "[Helsinki Plugin] GPU availability check: available={}", - available - ); + tracing::info!("[Helsinki Plugin] GPU availability check: available={}", available); available } @@ -177,14 +182,15 @@ pub fn get_device(config: &HelsinkiConfig) -> Result { "cuda" => { #[cfg(feature = "cuda")] { - Device::new_cuda(config.device_index) - .map_err(|e| format!("CUDA device {} not available: {}", config.device_index, e)) + Device::new_cuda(config.device_index).map_err(|e| { + format!("CUDA device {} not available: {}", config.device_index, e) + }) } #[cfg(not(feature = "cuda"))] { Err("CUDA support not compiled in. Rebuild with --features cuda".to_string()) } - } + }, "auto" => { #[cfg(feature = "cuda")] { @@ -198,11 +204,8 @@ pub fn get_device(config: &HelsinkiConfig) -> Result { { Ok(Device::Cpu) } - } - other => Err(format!( - "Invalid device '{}'. Use 'cpu', 'cuda', or 'auto'", - other - )), + }, + other => Err(format!("Invalid device '{other}'. Use 'cpu', 'cuda', or 'auto'")), } } @@ -213,9 +216,9 @@ fn load_config(model_dir: &str, source_lang: &str, target_lang: &str) -> Result< if config_path.exists() { // Load from file and convert to Candle format let config_str = std::fs::read_to_string(&config_path) - .map_err(|e| format!("Failed to read config.json: {}", e))?; + .map_err(|e| format!("Failed to read config.json: {e}"))?; let hf_config: HfMarianConfig = serde_json::from_str(&config_str) - .map_err(|e| format!("Failed to parse config.json: {}", e))?; + .map_err(|e| format!("Failed to parse config.json: {e}"))?; tracing::info!( "[Helsinki Plugin] Loaded config from file: vocab_size={}, d_model={}, encoder_layers={}, decoder_layers={}", @@ -230,7 +233,9 @@ fn load_config(model_dir: &str, source_lang: &str, target_lang: &str) -> Result< // Use preset based on language pair tracing::warn!( "[Helsinki Plugin] config.json not found in {}, using preset for {}-{}", - model_dir, source_lang, target_lang + model_dir, + source_lang, + target_lang ); match (source_lang, target_lang) { ("en", "es") => Ok(Config::opus_mt_en_es()), @@ -240,8 +245,7 @@ fn load_config(model_dir: &str, source_lang: &str, target_lang: &str) -> Result< ("en", "hi") => Ok(Config::opus_mt_en_hi()), ("fr", "en") => Ok(Config::opus_mt_fr_en()), _ => Err(format!( - "No preset config for {}->{} and config.json not found in {}", - source_lang, target_lang, model_dir + "No preset config for {source_lang}->{target_lang} and config.json not found in {model_dir}" )), } } @@ -261,7 +265,7 @@ fn load_weights(model_dir: &str, device: &Device) -> Result, // Load safetensors with f32 dtype let vb = unsafe { VarBuilder::from_mmaped_safetensors(&[model_path], DType::F32, device) - .map_err(|e| format!("Failed to load model weights: {}", e))? + .map_err(|e| format!("Failed to load model weights: {e}"))? }; Ok(vb) @@ -277,24 +281,23 @@ fn load_tokenizers(model_dir: &str) -> Result<(Tokenizer, Tokenizer), String> { validate_tokenizer_json(&source_path)?; validate_tokenizer_json(&target_path)?; let source = Tokenizer::from_file(&source_path) - .map_err(|e| format!("Failed to load source_tokenizer.json: {}", e))?; + .map_err(|e| format!("Failed to load source_tokenizer.json: {e}"))?; let target = Tokenizer::from_file(&target_path) - .map_err(|e| format!("Failed to load target_tokenizer.json: {}", e))?; + .map_err(|e| format!("Failed to load target_tokenizer.json: {e}"))?; return Ok((source, target)); } if shared_path.exists() { validate_tokenizer_json(&shared_path)?; let source = Tokenizer::from_file(&shared_path) - .map_err(|e| format!("Failed to load tokenizer.json: {}", e))?; + .map_err(|e| format!("Failed to load tokenizer.json: {e}"))?; let target = Tokenizer::from_file(&shared_path) - .map_err(|e| format!("Failed to load tokenizer.json: {}", e))?; + .map_err(|e| format!("Failed to load tokenizer.json: {e}"))?; return Ok((source, target)); } Err(format!( - "Tokenizers not found in {}. Expected source_tokenizer.json + target_tokenizer.json (preferred) or tokenizer.json. Re-run: just download-helsinki-models", - model_dir + "Tokenizers not found in {model_dir}. Expected source_tokenizer.json + target_tokenizer.json (preferred) or tokenizer.json. Re-run: just download-helsinki-models" )) } @@ -303,17 +306,11 @@ pub fn get_or_load_translator( config: &HelsinkiConfig, logger: &Logger, ) -> Result>, String> { - let cache_key = ( - config.model_dir.clone(), - config.normalized_device(), - config.device_index, - ); + let cache_key = (config.model_dir.clone(), config.normalized_device(), config.device_index); // Check cache first { - let cache = TRANSLATOR_CACHE - .lock() - .map_err(|e| format!("Cache lock failed: {}", e))?; + let cache = TRANSLATOR_CACHE.lock().map_err(|e| format!("Cache lock failed: {e}"))?; if let Some(entry) = cache.get(&cache_key) { plugin_info!(logger, "CACHE HIT: Reusing Helsinki translator"); @@ -321,18 +318,11 @@ pub fn get_or_load_translator( } } - plugin_warn!( - logger, - "CACHE MISS: Loading Helsinki model from {}", - config.model_dir - ); + plugin_warn!(logger, "CACHE MISS: Loading Helsinki model from {}", config.model_dir); // Load model configuration - let model_config = load_config( - &config.model_dir, - &config.source_language, - &config.target_language, - )?; + let model_config = + load_config(&config.model_dir, &config.source_language, &config.target_language)?; // Initialize device let device = get_device(config)?; @@ -342,8 +332,8 @@ pub fn get_or_load_translator( let vb = load_weights(&config.model_dir, &device)?; // Create model - let model = MTModel::new(&model_config, vb) - .map_err(|e| format!("Failed to create MTModel: {}", e))?; + let model = + MTModel::new(&model_config, vb).map_err(|e| format!("Failed to create MTModel: {e}"))?; // Load tokenizers let (source_tokenizer, target_tokenizer) = load_tokenizers(&config.model_dir)?; @@ -365,16 +355,9 @@ pub fn get_or_load_translator( // Store in cache { - let mut cache = TRANSLATOR_CACHE - .lock() - .map_err(|e| format!("Cache lock failed: {}", e))?; - - cache.insert( - cache_key, - CachedTranslatorEntry { - translator: translator.clone(), - }, - ); + let mut cache = TRANSLATOR_CACHE.lock().map_err(|e| format!("Cache lock failed: {e}"))?; + + cache.insert(cache_key, CachedTranslatorEntry { translator: translator.clone() }); } Ok(translator) @@ -382,9 +365,7 @@ pub fn get_or_load_translator( fn validate_tokenizer(tokenizer: &Tokenizer, cfg: &Config) -> Result<(), String> { let text = "tokenizer self-check"; - let enc = tokenizer - .encode(text, true) - .map_err(|e| format!("Tokenizer encode failed: {}", e))?; + let enc = tokenizer.encode(text, true).map_err(|e| format!("Tokenizer encode failed: {e}"))?; let ids = enc.get_ids(); if ids.is_empty() { @@ -393,31 +374,26 @@ fn validate_tokenizer(tokenizer: &Tokenizer, cfg: &Config) -> Result<(), String> // Ensure the tokenizer understands the model's special-token ids by round-tripping them. let specials = [cfg.eos_token_id, cfg.pad_token_id, cfg.decoder_start_token_id]; - let decoded = tokenizer - .decode(&specials, false) - .map_err(|e| format!("Tokenizer decode failed: {}", e))?; + let decoded = + tokenizer.decode(&specials, false).map_err(|e| format!("Tokenizer decode failed: {e}"))?; if decoded.trim().is_empty() { - return Err(format!( - "Tokenizer appears incompatible with model ids (decoded specials empty). \ + return Err("Tokenizer appears incompatible with model ids (decoded specials empty). \ Please regenerate tokenizer.json via: just download-helsinki-models" - )); + .to_string()); } Ok(()) } fn validate_tokenizer_json(path: &Path) -> Result<(), String> { - let raw = std::fs::read_to_string(path) - .map_err(|e| format!("Failed to read tokenizer.json: {}", e))?; + let raw = + std::fs::read_to_string(path).map_err(|e| format!("Failed to read tokenizer.json: {e}"))?; let json: JsonValue = - serde_json::from_str(&raw).map_err(|e| format!("Failed to parse tokenizer.json: {}", e))?; + serde_json::from_str(&raw).map_err(|e| format!("Failed to parse tokenizer.json: {e}"))?; - let model_type = json - .get("model") - .and_then(|m| m.get("type")) - .and_then(|t| t.as_str()) - .unwrap_or("unknown"); + let model_type = + json.get("model").and_then(|m| m.get("type")).and_then(|t| t.as_str()).unwrap_or("unknown"); // MarianTokenizerFast should produce a SentencePiece/Unigram-based tokenizer. // A WordLevel tokenizer here is a known-bad fallback that yields garbage translations. diff --git a/plugins/native/helsinki/src/translation.rs b/plugins/native/helsinki/src/translation.rs index 9a38d07b..23afacc6 100644 --- a/plugins/native/helsinki/src/translation.rs +++ b/plugins/native/helsinki/src/translation.rs @@ -17,9 +17,8 @@ pub fn translate( text: &str, config: &HelsinkiConfig, ) -> Result { - let mut translator = translator - .lock() - .map_err(|e| format!("Failed to lock translator: {}", e))?; + let mut translator = + translator.lock().map_err(|e| format!("Failed to lock translator: {e}"))?; // Reset KV cache for new sequence translator.model.reset_kv_cache(); @@ -28,7 +27,7 @@ pub fn translate( let encoding = translator .source_tokenizer .encode(text, false) - .map_err(|e| format!("Tokenization failed: {}", e))?; + .map_err(|e| format!("Tokenization failed: {e}"))?; let mut input_ids: Vec = encoding.get_ids().to_vec(); if input_ids.is_empty() { @@ -42,16 +41,16 @@ pub fn translate( // Convert to tensor let input_tensor = Tensor::new(&input_ids[..], &translator.device) - .map_err(|e| format!("Failed to create input tensor: {}", e))? + .map_err(|e| format!("Failed to create input tensor: {e}"))? .unsqueeze(0) - .map_err(|e| format!("Failed to unsqueeze input: {}", e))?; + .map_err(|e| format!("Failed to unsqueeze input: {e}"))?; // Run encoder let encoder_output = translator .model .encoder() .forward(&input_tensor, 0) - .map_err(|e| format!("Encoder forward failed: {}", e))?; + .map_err(|e| format!("Encoder forward failed: {e}"))?; // Autoregressive decoding let decoder_start_token_id = translator.config.decoder_start_token_id; @@ -68,30 +67,29 @@ pub fn translate( .last() .ok_or_else(|| "Internal error: decoder_input is empty".to_string())?; let decoder_tensor = Tensor::new(&[input_token], &translator.device) - .map_err(|e| format!("Failed to create decoder tensor: {}", e))? + .map_err(|e| format!("Failed to create decoder tensor: {e}"))? .unsqueeze(0) - .map_err(|e| format!("Failed to unsqueeze decoder input: {}", e))?; + .map_err(|e| format!("Failed to unsqueeze decoder input: {e}"))?; // Run decoder let logits = translator .model .decode(&decoder_tensor, &encoder_output, step) - .map_err(|e| format!("Decoder forward failed: {}", e))?; + .map_err(|e| format!("Decoder forward failed: {e}"))?; // Get last token logits (shape: [batch, seq_len, vocab]) - let seq_len = logits.dim(1).map_err(|e| format!("Failed to get dim: {}", e))?; - let last_logits = logits - .i((.., seq_len - 1, ..)) - .map_err(|e| format!("Failed to slice logits: {}", e))?; + let seq_len = logits.dim(1).map_err(|e| format!("Failed to get dim: {e}"))?; + let last_logits = + logits.i((.., seq_len - 1, ..)).map_err(|e| format!("Failed to slice logits: {e}"))?; // Greedy sampling: take argmax let next_token = last_logits .argmax(D::Minus1) - .map_err(|e| format!("Argmax failed: {}", e))? + .map_err(|e| format!("Argmax failed: {e}"))? .squeeze(0) - .map_err(|e| format!("Squeeze failed: {}", e))? + .map_err(|e| format!("Squeeze failed: {e}"))? .to_scalar::() - .map_err(|e| format!("to_scalar failed: {}", e))?; + .map_err(|e| format!("to_scalar failed: {e}"))?; // Check for EOS if next_token == eos_token_id || next_token == pad_token_id { @@ -111,7 +109,8 @@ pub fn translate( let decoded = translator .target_tokenizer .decode(&output_ids, true) - .map_err(|e| format!("Decoding failed: {}", e))?; + .map_err(|e| format!("Decoding failed: {e}"))?; + drop(translator); Ok(decoded.trim().to_string()) } diff --git a/plugins/native/parakeet/src/parakeet_node.rs b/plugins/native/parakeet/src/parakeet_node.rs index fdfcd6b7..7f0d77be 100644 --- a/plugins/native/parakeet/src/parakeet_node.rs +++ b/plugins/native/parakeet/src/parakeet_node.rs @@ -496,7 +496,11 @@ impl ParakeetNode { None } else { let lang = unsafe { CStr::from_ptr(result.lang).to_string_lossy().into_owned() }; - if lang.is_empty() { None } else { Some(lang) } + if lang.is_empty() { + None + } else { + Some(lang) + } }; // Cleanup @@ -637,10 +641,7 @@ unsafe fn create_recognizer( model: empty_cstr.as_ptr(), }, }, - lm_config: ffi::SherpaOnnxOfflineLMConfig { - model: empty_cstr.as_ptr(), - scale: 0.0, - }, + lm_config: ffi::SherpaOnnxOfflineLMConfig { model: empty_cstr.as_ptr(), scale: 0.0 }, decoding_method: decoding_method_cstr.as_ptr(), max_active_paths: 4, hotwords_file: empty_cstr.as_ptr(), diff --git a/plugins/native/parakeet/src/vad.rs b/plugins/native/parakeet/src/vad.rs index b479ddb4..a20986f6 100644 --- a/plugins/native/parakeet/src/vad.rs +++ b/plugins/native/parakeet/src/vad.rs @@ -141,6 +141,8 @@ impl SileroVAD { /// Update speech threshold #[allow(dead_code)] + // Allow: f32::clamp is not const-stable, so this cannot be const fn. + #[allow(clippy::missing_const_for_fn)] pub fn set_threshold(&mut self, threshold: f32) { self.threshold = threshold.clamp(0.0, 1.0); }