diff --git a/src/serve/demo.html b/src/serve/demo.html index 4a41f52..f86a364 100644 --- a/src/serve/demo.html +++ b/src/serve/demo.html @@ -222,7 +222,7 @@

Settings

- +
@@ -268,6 +268,25 @@

Settings

let settings = loadSettings(); +// Populate voice dropdown from /v1/voices on page load. +(async function loadVoices() { + const sel = document.getElementById('s-voice'); + try { + const r = await fetch('/v1/voices'); + const data = await r.json(); + if (data.voices && data.voices.length > 0) { + sel.innerHTML = ''; + for (const v of data.voices) { + const opt = document.createElement('option'); + opt.value = v.id; + opt.textContent = `${v.id} — ${v.name}`; + if (v.id === settings.voice) opt.selected = true; + sel.appendChild(opt); + } + } + } catch (_) { /* keep default option */ } +})(); + // DOM refs const orbEl = document.getElementById('orb'); const orbLabel = document.getElementById('orb-label'); diff --git a/src/serve/voice.rs b/src/serve/voice.rs index d4cf4ff..37c744b 100644 --- a/src/serve/voice.rs +++ b/src/serve/voice.rs @@ -428,6 +428,15 @@ async fn stream_llm( // Buffer for incomplete SSE lines across chunk boundaries. let mut line_buf = String::new(); + // Incremental tracker for `...` blocks emitted by + // reasoning models (Qwen, DeepSeek, etc.). Tokens inside these + // blocks are recorded in full_text (for display) but NOT forwarded + // to the sentence buffer / TTS pipeline. + let mut inside_think = false; + // Partial tag buffer: when we see `<` we accumulate chars until we + // can decide whether it's `` or `` or something else. + let mut tag_buf = String::new(); + while let Some(chunk_result) = stream.next().await { let chunk = chunk_result.map_err(|e| format!("LLM stream error: {e}"))?; let text = String::from_utf8_lossy(&chunk); @@ -451,9 +460,15 @@ async fn stream_llm( && let Some(content) = json["choices"][0]["delta"]["content"].as_str() { full_text.push_str(content); - sentence_buf.push_str(content); - // Send partial text update. + // Filter thinking blocks: only pass visible text to TTS. + let visible = strip_think_incremental(content, &mut inside_think, &mut tag_buf); + if !visible.is_empty() { + sentence_buf.push_str(&visible); + } + + // Send partial text update (full_text includes thinking + // for UI display; the server can show it greyed out). let _ = partial_tx.send(full_text.clone()).await; // Check for sentence boundaries and emit complete sentences. @@ -485,6 +500,54 @@ fn find_sentence_boundary(text: &str) -> Option { .map(|(i, ch)| i + ch.len_utf8() - 1) } +// --------------------------------------------------------------------------- +// Thinking block filter +// --------------------------------------------------------------------------- + +/// Incrementally strip `...` blocks from a token stream. +/// +/// Reasoning models (`Qwen`, `DeepSeek`) wrap internal reasoning in these tags. +/// Since tokens arrive one at a time, the opening/closing tags may be split +/// across multiple calls. We maintain state via `inside_think` (whether we +/// are currently inside a block) and `tag_buf` (partial tag being accumulated). +/// +/// Returns the portion of `token` that is visible (outside thinking blocks). +fn strip_think_incremental(token: &str, inside_think: &mut bool, tag_buf: &mut String) -> String { + let mut visible = String::new(); + + for ch in token.chars() { + if !tag_buf.is_empty() { + // We're accumulating a potential tag. + tag_buf.push(ch); + if ch == '>' { + // Tag complete — check what it is. + let tag = tag_buf.to_lowercase(); + if tag == "" { + *inside_think = true; + } else if tag == "" { + *inside_think = false; + } else if !*inside_think { + // Not a think tag and we're outside — emit the buffer. + visible.push_str(tag_buf); + } + tag_buf.clear(); + } else if tag_buf.len() > 10 { + // Too long to be `` or `` — flush and reset. + if !*inside_think { + visible.push_str(tag_buf); + } + tag_buf.clear(); + } + } else if ch == '<' { + tag_buf.push(ch); + } else if !*inside_think { + visible.push(ch); + } + } + + visible +} + // --------------------------------------------------------------------------- // TTS helper // ---------------------------------------------------------------------------