streamer45
diff --git a/‎.dockerignore‎
Lines changed: 2 additions & 0 deletions b/‎.dockerignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎crates/plugin-native/src/wrapper.rs‎
Lines changed: 9 additions & 14 deletions b/‎crates/plugin-native/src/wrapper.rs‎
Lines changed: 9 additions & 14 deletions
diff --git a/‎plugins/native/pocket-tts/README.md‎
Lines changed: 1 addition & 0 deletions b/‎plugins/native/pocket-tts/README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎plugins/native/pocket-tts/vendor/pocket-tts/src/models/flow_lm.rs‎
Lines changed: 169 additions & 0 deletions b/‎plugins/native/pocket-tts/vendor/pocket-tts/src/models/flow_lm.rs‎
Lines changed: 169 additions & 0 deletions
@@ -9,6 +9,8 @@ target/
 # Large local artifacts (not needed for image builds)
 models/
 .plugins/
+!plugins/native/pocket-tts/vendor/pocket-tts/src/models/
+!plugins/native/pocket-tts/vendor/pocket-tts/src/models/**
 
 # Node modules
 node_modules/
 
@@ -59,6 +59,8 @@ samples/audio/system/*.flac
 samples/audio/system/*.mp3
 samples/audio/system/*.m4a
 models
+!plugins/native/pocket-tts/vendor/pocket-tts/src/models/
+!plugins/native/pocket-tts/vendor/pocket-tts/src/models/**
 
 # Example plugin build outputs
 /examples/plugins/*/build/
 
@@ -310,9 +310,6 @@ impl ProcessorNode for NativeNodeWrapper {
 
                             // Move the blocking FFI call to spawn_blocking
                             let state = Arc::clone(&self.state);
-                            // spawn_blocking can only fail with JoinError if the task panics.
-                            // If that happens, it's a serious bug that should crash.
-                            #[allow(clippy::expect_used)]
                             let error_msg = tokio::task::spawn_blocking(move || {
                                 let handle = state.begin_call()?;
 
@@ -338,8 +335,11 @@ impl ProcessorNode for NativeNodeWrapper {
                                 error
                             })
                             .await
-                            // spawn_blocking only panics if the task panics, which indicates a serious bug
-                            .expect("Update params task panicked");
+                            .map_err(|e| {
+                                StreamKitError::Runtime(format!(
+                                    "Update params task panicked: {e}"
+                                ))
+                            })?;
 
                             if let Some(err) = error_msg {
                                 warn!(node = %node_name, error = %err, "Parameter update failed");
@@ -369,7 +369,6 @@ impl ProcessorNode for NativeNodeWrapper {
                         let session_id = context.session_id.clone();
                         let node_id = node_name.clone();
 
-                        #[allow(clippy::expect_used)]
                         let (outputs, error) = tokio::task::spawn_blocking(move || {
                             let Some(handle) = state.begin_call() else {
                                 return (Vec::new(), None);
@@ -418,7 +417,7 @@ impl ProcessorNode for NativeNodeWrapper {
                             (outputs, error)
                         })
                         .await
-                        .expect("Plugin flush task panicked");
+                        .map_err(|e| StreamKitError::Runtime(format!("Plugin flush task panicked: {e}")))?;
 
                         // Send flush outputs
                         for (pin, pkt) in outputs {
@@ -439,12 +438,7 @@ impl ProcessorNode for NativeNodeWrapper {
                     let telemetry_tx = context.telemetry_tx.clone();
                     let session_id = context.session_id.clone();
                     let node_id = node_name.clone();
-                    // spawn_blocking can only fail with JoinError if the task panics.
-                    // If that happens, it's a serious bug that should crash.
                     let pin_cstr = Arc::clone(&input_pin_cstrs[pin_index]);
-                    // spawn_blocking can only fail with JoinError if the task panics.
-                    // If that happens, it's a serious bug that should crash.
-                    #[allow(clippy::expect_used)]
                     let (outputs, error) = tokio::task::spawn_blocking(move || {
                         let Some(handle) = state.begin_call() else {
                             return (Vec::new(), None);
@@ -499,8 +493,9 @@ impl ProcessorNode for NativeNodeWrapper {
                         (outputs, error)
                     })
                     .await
-                    // spawn_blocking only panics if the task panics, which indicates a serious bug
-                    .expect("Plugin processing task panicked");
+                    .map_err(|e| {
+                        StreamKitError::Runtime(format!("Plugin processing task panicked: {e}"))
+                    })?;
 
             // Now send outputs (after dropping c_packet and result)
             for (pin, pkt) in outputs {
 
@@ -7,6 +7,7 @@ SPDX-License-Identifier: MPL-2.0
 # Pocket TTS Native Plugin
 
 A native StreamKit plugin for Kyutai Pocket TTS using the Rust/Candle port.
+Upstream Rust port: https://github.com/babybirdprd/pocket-tts
 This plugin runs fully on CPU and streams 24kHz mono audio.
 
 ## Build
 
@@ -0,0 +1,169 @@
+// SPDX-FileCopyrightText: Copyright (c) 2024 Pocket TTS Contributors
+//
+// SPDX-License-Identifier: MIT OR Apache-2.0
+
+use crate::ModelState;
+use crate::models::transformer::StreamingTransformer;
+use crate::modules::mlp::{LayerNorm, ModulationParams, SimpleMLPAdaLN};
+use candle_core::{Result, Tensor};
+use candle_nn::{Linear, Module, VarBuilder};
+
+pub fn lsd_decode(
+    flow_net: &SimpleMLPAdaLN,
+    modulations: &[Vec<ModulationParams>],
+    x_0: &Tensor,
+) -> Result<Tensor> {
+    let mut current = x_0.clone();
+    let num_steps = modulations.len();
+
+    let step_factor = 1.0 / num_steps as f64;
+    for step_mod in modulations {
+        // Use forward_step_cached with pre-computed modulation batch for this ODE step
+        let flow_dir = flow_net.forward_step_cached(&current, step_mod)?;
+        current = (current + flow_dir.affine(step_factor, 0.0)?)?;
+    }
+    Ok(current)
+}
+
+#[derive(Clone)]
+pub struct FlowLMModel {
+    pub flow_net: SimpleMLPAdaLN,
+    pub transformer: StreamingTransformer,
+    pub input_linear: Linear,
+    pub out_norm: LayerNorm,
+    pub out_eos: Linear,
+    pub bos_emb: Tensor,
+    pub emb_mean: Tensor,
+    pub emb_std: Tensor,
+    pub ldim: usize,
+    pub dim: usize,
+    pub noise_clamp: Option<f32>,
+}
+
+fn sample_noise(
+    device: &candle_core::Device,
+    shape: (usize, usize),
+    temp: f32,
+    clamp: Option<f32>,
+) -> Result<Tensor> {
+    let std = temp.sqrt();
+    match clamp {
+        None => Tensor::randn(0.0f32, std, shape, device),
+        Some(limit) => {
+            // Rejection sampling for truncated normal
+            let count = shape.0 * shape.1;
+            let mut data = Vec::with_capacity(count);
+            let mut rng = rand::thread_rng();
+            let dist = rand_distr::Normal::new(0.0f32, std)
+                .map_err(|e| candle_core::Error::Msg(e.to_string()))?;
+
+            while data.len() < count {
+                let v = rand_distr::Distribution::sample(&dist, &mut rng);
+                if v.abs() <= limit {
+                    data.push(v);
+                }
+            }
+            Tensor::from_vec(data, shape, device)
+        }
+    }
+}
+
+impl FlowLMModel {
+    pub fn new(
+        flow_net: SimpleMLPAdaLN,
+        transformer: StreamingTransformer,
+        ldim: usize,
+        dim: usize,
+        vb: VarBuilder,
+    ) -> Result<Self> {
+        let input_linear = candle_nn::linear_no_bias(ldim, dim, vb.pp("input_linear"))?;
+        let out_norm = LayerNorm::new(dim, 1e-5, true, vb.pp("out_norm"))?;
+        let out_eos = candle_nn::linear(dim, 1, vb.pp("out_eos"))?;
+        let bos_emb = vb.get(ldim, "bos_emb")?;
+        let emb_mean = vb.get(ldim, "emb_mean")?;
+        let emb_std = vb.get(ldim, "emb_std")?;
+
+        Ok(Self {
+            flow_net,
+            transformer,
+            input_linear,
+            out_norm,
+            out_eos,
+            bos_emb,
+            emb_mean,
+            emb_std,
+            ldim,
+            dim,
+            noise_clamp: None, // Default to no clamp
+        })
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    pub fn forward(
+        &self,
+        sequence: &Tensor,
+        text_embeddings: &Tensor,
+        model_state: &mut ModelState,
+        time_embeddings: &Tensor,
+        temp: f32,
+        eos_threshold: f32,
+        step: usize,
+    ) -> Result<(Tensor, bool)> {
+        // sequence is [B, T, ldim]
+        // text_embeddings is [B, S, dim]
+
+        // Handle BOS (if NaN, use bos_emb) - simplistic check for NaN
+        // In Candle we can use `Tensor::where_cond`
+        // But for now let's assume sequence passed in doesn't have NaNs or handled upstream.
+        // Original: sequence = torch.where(torch.isnan(sequence), self.bos_emb, sequence)
+
+        // Let's assume BOS is handled by caller for now or if sequence empty.
+
+        let x = self.input_linear.forward(sequence)?;
+        let s_len = text_embeddings.dims()[1];
+
+        // Cat text embeddings and sequence embeddings only if text_embeddings is not empty
+        let transformer_out_pre_norm = if s_len > 0 {
+            let input = Tensor::cat(&[text_embeddings, &x], 1)?;
+            let mut out = self.transformer.forward(&input, model_state, step)?;
+            // Remove prefix (text embeddings length)
+            out = out.narrow(1, s_len, out.dims()[1] - s_len)?;
+            out
+        } else {
+            self.transformer.forward(&x, model_state, step)?
+        };
+
+        let transformer_out = self.out_norm.forward(&transformer_out_pre_norm)?;
+
+        // Only use the last frame for generation
+        let last_frame = transformer_out
+            .narrow(1, transformer_out.dims()[1] - 1, 1)?
+            .squeeze(1)?;
+
+        let eos_score = self
+            .out_eos
+            .forward(&last_frame)?
+            .squeeze(0)?
+            .squeeze(0)?
+            .to_scalar::<f32>()?;
+        let is_eos = eos_score > eos_threshold;
+
+        // Generate noise with optional clamping
+        let noise = sample_noise(
+            last_frame.device(),
+            (last_frame.dims()[0], self.ldim),
+            temp,
+            self.noise_clamp,
+        )?;
+
+        // Pre-compute all modulations for this frame's ODE steps (8 steps * N blocks) in batch
+        let c_emb = self.flow_net.embed_condition(&last_frame)?;
+        let modulations = self
+            .flow_net
+            .precompute_modulations(&c_emb, time_embeddings)?;
+
+        let next_latent = lsd_decode(&self.flow_net, &modulations, &noise)?;
+
+        Ok((next_latent, is_eos))
+    }
+}