diff --git a/src/main.js b/src/main.js index c6719b8..c40e887 100644 --- a/src/main.js +++ b/src/main.js @@ -802,6 +802,7 @@ async function playMessageAudio(message) { // -------------------- Voice playback (TTS) -------------------- let currentTtsJob = null; const SILENT_WAV_DATA_URL = 'data:audio/wav;base64,UklGRhYAAABXQVZFZm10IBIAAAABAAEAIlYAAESsAAACABAAZGF0YQAAAAA='; +const TTS_CHUNK_MAX_CHARS = 250; const TTS_CHUNK_ERROR = Symbol('tts-chunk-error'); let audioUnlocked = false; let audioUnlockPromise = null; @@ -958,9 +959,9 @@ function groupSentences(sentences, groupSize = 2) { } // Build TTS chunks by character length, prefer ending at sentence boundaries. -// - maxChars: hard cap per chunk (default 500) +// - maxChars: hard cap per chunk (default 250) // - If a single sentence exceeds max, split it on whitespace near the limit. -function buildTtsChunks(text, { maxChars = 500 } = {}) { +function buildTtsChunks(text, { maxChars = TTS_CHUNK_MAX_CHARS } = {}) { const sents = splitIntoSentences(text); const chunks = []; let i = 0; @@ -1049,6 +1050,7 @@ function cancelCurrentTtsJob() { try { currentTtsJob.audio.pause(); } catch {} currentTtsJob.audio = null; } + currentTtsJob.activeIndex = null; currentTtsJob = null; } catch {} } @@ -1057,7 +1059,7 @@ function startVoicePlaybackForMessage(message, voice) { cancelCurrentTtsJob(); const raw = stripNonSpokenParts(message.content || ''); if (!raw) return; - const chunks = buildTtsChunks(raw, { maxChars: 500 }); + const chunks = buildTtsChunks(raw, { maxChars: TTS_CHUNK_MAX_CHARS }); if (!chunks.length) return; const job = { @@ -1070,6 +1072,7 @@ function startVoicePlaybackForMessage(message, voice) { // Playback ordering results: new Array(chunks.length), // urls by index playIndex: 0, + activeIndex: null, // Misc timers: [], audio: null, @@ -1145,6 +1148,23 @@ function startVoicePlaybackForMessage(message, voice) { function tryStartPlayback(job) { if (job.cancelled) return; + if (typeof job.activeIndex === 'number') { + const activeStatus = job.status[job.activeIndex]; + if (activeStatus !== 'done' && activeStatus !== 'error') { + const activeAudio = job.audio; + if (activeAudio && !activeAudio.ended) { + if (activeAudio.paused) { + void playAudioWithUnlock(activeAudio); + } + return; + } + if (!activeAudio) { + return; + } + } else { + job.activeIndex = null; + } + } // If already playing, nothing to do; the 'ended' handler will pick next if (job.audio && !job.audio.ended && !job.audio.paused) return; while (job.playIndex < job.groups.length) { @@ -1167,6 +1187,7 @@ function tryStartPlayback(job) { try { audio.playsInline = true; } catch {} try { audio.crossOrigin = 'anonymous'; } catch {} job.audio = audio; + job.activeIndex = index; let started = false; let watchdog = null; const clearWatchdog = () => { if (watchdog) { clearTimeout(watchdog); watchdog = null; } }; @@ -1226,12 +1247,16 @@ function tryStartPlayback(job) { clearTimeout(stallTimer); clearWatchdog(); setTtsChunkState(job, index, 'done'); + job.activeIndex = null; + job.audio = null; job.playIndex += 1; tryStartPlayback(job); }); audio.addEventListener('error', () => { if (job.cancelled) return; setTtsChunkState(job, index, 'error'); + job.activeIndex = null; + job.audio = null; job.playIndex += 1; // skip broken chunk tryStartPlayback(job); }); diff --git a/tests/tts-chunker.test.mjs b/tests/tts-chunker.test.mjs index 5ca1f6d..4c5be6e 100644 --- a/tests/tts-chunker.test.mjs +++ b/tests/tts-chunker.test.mjs @@ -1,15 +1,21 @@ import assert from 'node:assert/strict'; import { readFile } from 'node:fs/promises'; -export const name = 'TTS chunker: halved payload (500 char cap) wired in call site'; +export const name = 'TTS chunker: halved payload (250 char cap) wired in call site'; export async function run() { const js = await readFile(new URL('../src/main.js', import.meta.url), 'utf8'); - const defMatch = js.match(/function\s+buildTtsChunks\s*\(text,\s*\{\s*maxChars\s*=\s*(\d+)\s*\}\s*=\s*\{\}\)\s*\{/); - const defaultMax = defMatch ? Number(defMatch[1]) : null; - assert.equal(defaultMax, 500, 'default maxChars must be 500'); + const constMatch = js.match(/const\s+TTS_CHUNK_MAX_CHARS\s*=\s*(\d+)\s*;/); + const constantValue = constMatch ? Number(constMatch[1]) : null; + assert.equal(constantValue, 250, 'TTS chunk limit constant must be 250 characters'); - const callMatch = js.match(/buildTtsChunks\(raw,\s*\{\s*maxChars:\s*(\d+)\s*\}\s*\)/); - const callMax = callMatch ? Number(callMatch[1]) : null; - assert.equal(callMax, 500, 'startVoicePlaybackForMessage should request 500-char chunks'); + assert.ok( + /function\s+buildTtsChunks\s*\(text,\s*\{\s*maxChars\s*=\s*TTS_CHUNK_MAX_CHARS\s*\}\s*=\s*\{\}\)\s*\{/.test(js), + 'buildTtsChunks should default to TTS_CHUNK_MAX_CHARS', + ); + + assert.ok( + /buildTtsChunks\(raw,\s*\{\s*maxChars:\s*TTS_CHUNK_MAX_CHARS\s*\}\s*\)/.test(js), + 'startVoicePlaybackForMessage should request TTS_CHUNK_MAX_CHARS-sized chunks', + ); }