From 16df1e5a671349d0f76a6fee4b466b5fda778302 Mon Sep 17 00:00:00 2001 From: CheneyY Date: Tue, 17 Mar 2026 16:59:27 +0800 Subject: [PATCH 1/3] fix: Resolve Mac Chrome TTS invocation and Qwen TTS missing parameters issues Core changes: 1. lib/audio/tts-providers.ts: - Add default voice parameter 'Cherry' for Qwen TTS - Ensure language_type parameter is correctly passed 2. lib/utils/audio-player.ts: - Add BrowserTTSOptions interface definition - Implement playWithBrowserTTS method supporting native Web Speech API - Add async voices loading handling logic - Add setBrowserTTSEnabled method for enabling/disabling browser TTS fallback 3. lib/playback/engine.ts: - Integrate browser TTS support in playback engine - Dynamically prepare browserTTSOptions based on settings - Enable AudioPlayer's browser TTS fallback mechanism 4. components/audio/tts-config-popover.tsx: - Optimize TTS preview functionality with separate browser native TTS handling - Improve voice selection and error handling logic 5. Related files updated: - components/generation/media-popover.tsx: TTS testing optimization in media preview - components/settings/tts-settings.tsx: TTS testing improvements in settings page - components/stage.tsx: Audio playback adaptation in stage component - lib/action/engine.ts: TTS integration in action engine Issues resolved: - Mac Chrome browser TTS voice invocation failure - Classroom silence due to missing Qwen TTS service parameters - Async voices loading issues in browser native TTS - Provided complete browser TTS fallback mechanism for enhanced UX --- components/audio/tts-config-popover.tsx | 25 +++++ components/generation/media-popover.tsx | 85 ++++++++------ components/settings/tts-settings.tsx | 18 +-- components/stage.tsx | 6 + lib/action/engine.ts | 22 +++- lib/audio/tts-providers.ts | 4 +- lib/playback/engine.ts | 22 +++- lib/utils/audio-player.ts | 142 ++++++++++++++++++++++-- 8 files changed, 271 insertions(+), 53 deletions(-) diff --git a/components/audio/tts-config-popover.tsx b/components/audio/tts-config-popover.tsx index bda0905..bb35059 100644 --- a/components/audio/tts-config-popover.tsx +++ b/components/audio/tts-config-popover.tsx @@ -56,12 +56,37 @@ export function TtsConfigPopover() { if (previewing) { audioRef.current?.pause(); audioRef.current = null; + window.speechSynthesis?.cancel(); setPreviewing(false); return; } setPreviewing(true); try { + // Handle browser native TTS separately + if (ttsProviderId === 'browser-native-tts') { + if (!('speechSynthesis' in window)) { + setPreviewing(false); + return; + } + + const utterance = new SpeechSynthesisUtterance('你好,欢迎来到AI课堂!让我们一起学习吧。'); + const voices = window.speechSynthesis.getVoices(); + const selectedVoice = voices.find( + (v) => v.name === ttsVoice || v.lang === ttsVoice, + ); + if (selectedVoice) utterance.voice = selectedVoice; + + utterance.onend = () => { + setPreviewing(false); + }; + utterance.onerror = () => { + setPreviewing(false); + }; + window.speechSynthesis.speak(utterance); + return; + } + const providerConfig = ttsProvidersConfig[ttsProviderId]; const res = await fetch('/api/generate/tts', { method: 'POST', diff --git a/components/generation/media-popover.tsx b/components/generation/media-popover.tsx index 0a46230..2cbcea5 100644 --- a/components/generation/media-popover.tsx +++ b/components/generation/media-popover.tsx @@ -33,7 +33,7 @@ import { VIDEO_PROVIDERS } from '@/lib/media/video-providers'; import { TTS_PROVIDERS, getTTSVoices } from '@/lib/audio/constants'; import { ASR_PROVIDERS, getASRSupportedLanguages } from '@/lib/audio/constants'; import type { ImageProviderId, VideoProviderId } from '@/lib/media/types'; -import type { ASRProviderId } from '@/lib/audio/types'; +import type { ASRProviderId, TTSProviderId } from '@/lib/audio/types'; import type { SettingsSection } from '@/lib/types/settings'; interface MediaPopoverProps { @@ -104,6 +104,7 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { const ttsVoice = useSettingsStore((s) => s.ttsVoice); const ttsSpeed = useSettingsStore((s) => s.ttsSpeed); const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig); + const setTTSProvider = useSettingsStore((s) => s.setTTSProvider); const setTTSVoice = useSettingsStore((s) => s.setTTSVoice); const setTTSSpeed = useSettingsStore((s) => s.setTTSSpeed); @@ -170,14 +171,22 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { [videoProvidersConfig], ); - // TTS: flat voice list from current provider, localized - const ttsVoices = useMemo( + // TTS: grouped by provider (only available providers) + const ttsGroups = useMemo( () => - getTTSVoices(ttsProviderId).map((v) => ({ - id: v.id, - name: getVoiceDisplayName(v.name, locale), - })), - [ttsProviderId, locale], + Object.values(TTS_PROVIDERS) + .filter((p) => cfgOk(ttsProvidersConfig, p.id, p.requiresApiKey)) + .map((p) => ({ + groupId: p.id, + groupName: p.name, + groupIcon: p.icon, + available: true, + items: getTTSVoices(p.id).map((v) => ({ + id: v.id, + name: getVoiceDisplayName(v.name, locale), + })), + })), + [ttsProvidersConfig, locale], ); // TTS preview @@ -185,11 +194,37 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { if (previewing) { audioRef.current?.pause(); audioRef.current = null; + window.speechSynthesis?.cancel(); setPreviewing(false); return; } setPreviewing(true); try { + // Handle browser native TTS separately + if (ttsProviderId === 'browser-native-tts') { + if (!('speechSynthesis' in window)) { + setPreviewing(false); + return; + } + + const utterance = new SpeechSynthesisUtterance('你好,欢迎来到AI课堂!让我们一起学习吧。'); + utterance.rate = ttsSpeed; + const voices = window.speechSynthesis.getVoices(); + const selectedVoice = voices.find( + (v) => v.name === ttsVoice || v.lang === ttsVoice, + ); + if (selectedVoice) utterance.voice = selectedVoice; + + utterance.onend = () => { + setPreviewing(false); + }; + utterance.onerror = () => { + setPreviewing(false); + }; + window.speechSynthesis.speak(utterance); + return; + } + const providerConfig = ttsProvidersConfig[ttsProviderId]; const res = await fetch('/api/generate/tts', { method: 'POST', @@ -221,7 +256,7 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { } catch { setPreviewing(false); } - }, [ttsProviderId, ttsVoice, ttsProvidersConfig, previewing]); + }, [ttsProviderId, ttsVoice, ttsProvidersConfig, previewing, ttsSpeed]); // ASR: only available providers const asrGroups = useMemo( @@ -348,29 +383,15 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { > {/* Voice select + preview */}
- + { + setTTSProvider(gid as TTSProviderId); + setTTSVoice(iid); + }} + />