From eb5869e59f65308a120f6402498d4db3d4702ce9 Mon Sep 17 00:00:00 2001 From: James Pine Date: Wed, 18 Mar 2026 10:59:59 -0700 Subject: [PATCH 01/12] fix: GUI startup with external server + data refresh on server switch Two fixes for issue #312: 1. GUI stuck on loading screen when backend is already running externally (e.g. via python/uvicorn/Docker): - Rust: add HTTP health check fallback when the process on the port doesn't have 'voicebox' in its name. If /health responds with a valid Voicebox response, reuse the server instead of erroring. - Frontend: when startServer() fails, fall back to polling the health endpoint every 2s instead of permanently blocking. 2. No data refresh when switching server URLs in settings: - serverStore.setServerUrl() now invalidates all React Query caches when the URL actually changes, so profiles/history/models/stories are re-fetched from the new server. - Export queryClient from main.tsx for store-level cache invalidation. Fixes #312 --- app/src/App.tsx | 19 ++++++++++ app/src/main.tsx | 2 +- app/src/stores/serverStore.ts | 22 ++++++++++-- tauri/src-tauri/src/main.rs | 66 ++++++++++++++++++++++++++++++----- 4 files changed, 98 insertions(+), 11 deletions(-) diff --git a/app/src/App.tsx b/app/src/App.tsx index b6964db1..ff913cdf 100644 --- a/app/src/App.tsx +++ b/app/src/App.tsx @@ -4,6 +4,7 @@ import voiceboxLogo from '@/assets/voicebox-logo.png'; import ShinyText from '@/components/ShinyText'; import { TitleBarDragRegion } from '@/components/TitleBarDragRegion'; import { useAutoUpdater } from '@/hooks/useAutoUpdater'; +import { apiClient } from '@/lib/api/client'; import { TOP_SAFE_AREA_PADDING } from '@/lib/constants/ui'; import { cn } from '@/lib/utils/cn'; import { usePlatform } from '@/platform/PlatformContext'; @@ -122,6 +123,24 @@ function App() { serverStartingRef.current = false; // @ts-expect-error - adding property to window window.__voiceboxServerStartedByApp = false; + + // Fall back to polling: the server may already be running externally + // (e.g. started via python/uvicorn/Docker). Poll the health endpoint + // until it responds, then transition to the main UI. + console.log('Falling back to health-check polling...'); + const pollInterval = setInterval(async () => { + try { + await apiClient.getHealth(); + console.log('External server detected via health check'); + clearInterval(pollInterval); + setServerReady(true); + } catch { + // Server not ready yet, keep polling + } + }, 2000); + + // Stop polling after 2 minutes to avoid polling forever + setTimeout(() => clearInterval(pollInterval), 120_000); }); // Cleanup: stop server on actual unmount (not StrictMode remount) diff --git a/app/src/main.tsx b/app/src/main.tsx index e4a5e482..d6cb9026 100644 --- a/app/src/main.tsx +++ b/app/src/main.tsx @@ -5,7 +5,7 @@ import ReactDOM from 'react-dom/client'; import App from './App'; import './index.css'; -const queryClient = new QueryClient({ +export const queryClient = new QueryClient({ defaultOptions: { queries: { staleTime: 1000 * 60 * 5, // 5 minutes diff --git a/app/src/stores/serverStore.ts b/app/src/stores/serverStore.ts index 8f983049..9f843f46 100644 --- a/app/src/stores/serverStore.ts +++ b/app/src/stores/serverStore.ts @@ -30,11 +30,29 @@ interface ServerStore { setCustomModelsDir: (dir: string | null) => void; } +/** + * Invalidate all React Query caches and reset UI selection state. + * Called when the server URL changes so stale data from the previous + * server is not shown. + */ +function invalidateAllServerData() { + // Lazy import to avoid circular dependency (main.tsx -> serverStore -> main.tsx) + import('@/main').then(({ queryClient }) => { + queryClient.invalidateQueries(); + }); +} + export const useServerStore = create()( persist( - (set) => ({ + (set, get) => ({ serverUrl: 'http://127.0.0.1:17493', - setServerUrl: (url) => set({ serverUrl: url }), + setServerUrl: (url) => { + const prev = get().serverUrl; + set({ serverUrl: url }); + if (url !== prev) { + invalidateAllServerData(); + } + }, isConnected: false, setIsConnected: (connected) => set({ isConnected: connected }), diff --git a/tauri/src-tauri/src/main.rs b/tauri/src-tauri/src/main.rs index 415961f2..cfdb8290 100644 --- a/tauri/src-tauri/src/main.rs +++ b/tauri/src-tauri/src/main.rs @@ -53,6 +53,35 @@ fn find_voicebox_pid_on_port(port: u16) -> Option { None } +/// Check if a Voicebox server is responding on the given port. +/// +/// Sends an HTTP GET to `/health` and returns `true` if the response +/// contains the expected JSON field (`"status"`), confirming it's +/// a Voicebox backend rather than an unrelated service. +#[allow(dead_code)] // Used in platform-specific cfg blocks +fn check_health(port: u16) -> bool { + let url = format!("http://127.0.0.1:{}/health", port); + match reqwest::blocking::Client::builder() + .timeout(std::time::Duration::from_secs(3)) + .build() + { + Ok(client) => match client.get(&url).send() { + Ok(resp) => { + if !resp.status().is_success() { + return false; + } + // Verify the body looks like a Voicebox health response + match resp.text() { + Ok(body) => body.contains("status"), + Err(_) => false, + } + } + Err(_) => false, + }, + Err(_) => false, + } +} + struct ServerState { child: Mutex>, server_pid: Mutex>, @@ -80,7 +109,8 @@ async fn start_server( return Ok(format!("http://127.0.0.1:{}", SERVER_PORT)); } - // Check if a voicebox server is already running on our port (from previous session with keep_running=true) + // Check if a voicebox server is already running on our port (from previous session with keep_running=true, + // or an externally started server e.g. via `python`, `uvicorn`, Docker, etc.) #[cfg(unix)] { use std::process::Command; @@ -101,6 +131,20 @@ async fn start_server( *state.server_pid.lock().unwrap() = Some(pid); return Ok(format!("http://127.0.0.1:{}", SERVER_PORT)); } + } else { + // Process name doesn't contain "voicebox" — could be an external + // Python/uvicorn/Docker server. Verify via HTTP health check. + println!("Port {} in use by '{}' (PID: {}), checking if it's a Voicebox server...", SERVER_PORT, command, pid_str); + if check_health(SERVER_PORT) { + println!("Health check passed — reusing external server on port {}", SERVER_PORT); + return Ok(format!("http://127.0.0.1:{}", SERVER_PORT)); + } + println!("Health check failed — port is occupied by a non-Voicebox process"); + return Err(format!( + "Port {} is already in use by another application ({}). \ + Close it or change the Voicebox server port.", + SERVER_PORT, command + )); } } } @@ -114,18 +158,24 @@ async fn start_server( &format!("127.0.0.1:{}", SERVER_PORT).parse().unwrap(), std::time::Duration::from_secs(1), ).is_ok() { - // Port is in use — check if it's a voicebox process + // Port is in use — check if it's a voicebox process by name first if let Some(pid) = find_voicebox_pid_on_port(SERVER_PORT) { println!("Found existing voicebox-server on port {} (PID: {}), reusing it", SERVER_PORT, pid); *state.server_pid.lock().unwrap() = Some(pid); return Ok(format!("http://127.0.0.1:{}", SERVER_PORT)); - } else { - return Err(format!( - "Port {} is already in use by another application. \ - Close the other application or change the Voicebox port.", - SERVER_PORT - )); } + // Process name doesn't match — could be an external Python/Docker server. + // Verify via HTTP health check before giving up. + println!("Port {} in use by unknown process, checking if it's a Voicebox server...", SERVER_PORT); + if check_health(SERVER_PORT) { + println!("Health check passed — reusing external server on port {}", SERVER_PORT); + return Ok(format!("http://127.0.0.1:{}", SERVER_PORT)); + } + return Err(format!( + "Port {} is already in use by another application. \ + Close the other application or change the Voicebox port.", + SERVER_PORT + )); } } From 12ed2d51ce5337b95011474d4391ebdccc4936e9 Mon Sep 17 00:00:00 2001 From: Lior Shahverdi Date: Wed, 18 Mar 2026 15:44:37 -0400 Subject: [PATCH 02/12] Adds a trash icon button alongside the existing retry button for failed generations, giving users a way to clean up failed entries without having to retry them first. --- app/src/components/History/HistoryTable.tsx | 30 ++++++++++++++------- tauri/src-tauri/Cargo.lock | 2 +- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/app/src/components/History/HistoryTable.tsx b/app/src/components/History/HistoryTable.tsx index e88c7701..914c7fcb 100644 --- a/app/src/components/History/HistoryTable.tsx +++ b/app/src/components/History/HistoryTable.tsx @@ -569,15 +569,27 @@ export function HistoryTable() { )} {isFailed ? ( - + <> + + + ) : ( <> diff --git a/tauri/src-tauri/Cargo.lock b/tauri/src-tauri/Cargo.lock index b133dfc8..194314de 100644 --- a/tauri/src-tauri/Cargo.lock +++ b/tauri/src-tauri/Cargo.lock @@ -5041,7 +5041,7 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "voicebox" -version = "0.2.3" +version = "0.3.1" dependencies = [ "base64 0.22.1", "core-foundation-sys", From 3584283d8410b4cf2aed80b6248c06e032106e52 Mon Sep 17 00:00:00 2001 From: James Pine Date: Thu, 19 Mar 2026 10:09:48 -0700 Subject: [PATCH 03/12] feat: Kokoro 82M TTS engine + voice profile type system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Kokoro-82M as a new TTS engine — 82M params, CPU realtime, 8 languages, Apache 2.0. Unlike cloning engines, Kokoro uses pre-built voice styles, which required a new profile type system to support non-cloning engines cleanly. Kokoro engine: - New kokoro_backend.py implementing TTSBackend protocol - 50 built-in voices across en/es/fr/hi/it/pt/ja/zh - KPipeline API with language-aware G2P routing via misaki - PyInstaller bundling for misaki, language_tags, espeakng_loader, en_core_web_sm Voice profile type system: - New voice_type column: 'cloned' | 'preset' | 'designed' (future) - Preset profiles store engine + voice ID instead of audio samples - default_engine field on profiles — auto-selects engine on profile pick - Create Voice dialog: toggle between 'Clone from audio' and 'Built-in voice' - Edit dialog shows preset voice info instead of sample list for preset profiles - Engine selector locks to preset engine when preset profile is selected - Profile grid filters by engine — shows Kokoro voices when Kokoro selected - Custom empty state when no preset profiles exist for selected engine Bug fixes: - Fix relative audio paths in DB causing 404s in production builds - config.set_data_dir() now resolves to absolute paths - Startup migration converts existing relative paths to absolute Also updates PROJECT_STATUS.md and tts-engines.mdx developer guide. --- .../Generation/EngineModelSelector.tsx | 70 ++- .../Generation/FloatingGenerateBox.tsx | 64 ++- .../components/Generation/GenerationForm.tsx | 2 +- .../ServerSettings/ModelManagement.tsx | 5 +- .../components/VoiceProfiles/ProfileCard.tsx | 10 + .../components/VoiceProfiles/ProfileForm.tsx | 481 +++++++++++++----- .../components/VoiceProfiles/ProfileList.tsx | 35 +- app/src/lib/api/client.ts | 11 + app/src/lib/api/types.ts | 21 +- app/src/lib/constants/languages.ts | 2 + app/src/lib/hooks/useGenerationForm.ts | 14 +- app/src/stores/uiStore.ts | 7 + backend/backends/__init__.py | 13 + backend/backends/kokoro_backend.py | 288 +++++++++++ backend/build_binary.py | 38 ++ backend/config.py | 6 +- backend/database/migrations.py | 56 ++ backend/database/models.py | 16 +- backend/models.py | 12 +- backend/requirements.txt | 7 + backend/routes/profiles.py | 97 ++++ backend/services/profiles.py | 54 +- backend/voicebox-server.spec | 17 +- docs/content/docs/developer/tts-engines.mdx | 28 + docs/notes/PROJECT_STATUS.md | 239 ++++----- tauri/src-tauri/Cargo.lock | 2 +- 26 files changed, 1303 insertions(+), 292 deletions(-) create mode 100644 backend/backends/kokoro_backend.py diff --git a/app/src/components/Generation/EngineModelSelector.tsx b/app/src/components/Generation/EngineModelSelector.tsx index 4382d3f7..773aa089 100644 --- a/app/src/components/Generation/EngineModelSelector.tsx +++ b/app/src/components/Generation/EngineModelSelector.tsx @@ -7,6 +7,7 @@ import { SelectTrigger, SelectValue, } from '@/components/ui/select'; +import type { VoiceProfileResponse } from '@/lib/api/types'; import { getLanguageOptionsForEngine } from '@/lib/constants/languages'; import type { GenerationFormValues } from '@/lib/hooks/useGenerationForm'; @@ -15,13 +16,14 @@ import type { GenerationFormValues } from '@/lib/hooks/useGenerationForm'; * Adding a new engine means adding one entry here. */ const ENGINE_OPTIONS = [ - { value: 'qwen:1.7B', label: 'Qwen3-TTS 1.7B' }, - { value: 'qwen:0.6B', label: 'Qwen3-TTS 0.6B' }, - { value: 'luxtts', label: 'LuxTTS' }, - { value: 'chatterbox', label: 'Chatterbox' }, - { value: 'chatterbox_turbo', label: 'Chatterbox Turbo' }, - { value: 'tada:1B', label: 'TADA 1B' }, - { value: 'tada:3B', label: 'TADA 3B Multilingual' }, + { value: 'qwen:1.7B', label: 'Qwen3-TTS 1.7B', engine: 'qwen' }, + { value: 'qwen:0.6B', label: 'Qwen3-TTS 0.6B', engine: 'qwen' }, + { value: 'luxtts', label: 'LuxTTS', engine: 'luxtts' }, + { value: 'chatterbox', label: 'Chatterbox', engine: 'chatterbox' }, + { value: 'chatterbox_turbo', label: 'Chatterbox Turbo', engine: 'chatterbox_turbo' }, + { value: 'tada:1B', label: 'TADA 1B', engine: 'tada' }, + { value: 'tada:3B', label: 'TADA 3B Multilingual', engine: 'tada' }, + { value: 'kokoro', label: 'Kokoro 82M', engine: 'kokoro' }, ] as const; const ENGINE_DESCRIPTIONS: Record = { @@ -30,11 +32,38 @@ const ENGINE_DESCRIPTIONS: Record = { chatterbox: '23 languages, incl. Hebrew', chatterbox_turbo: 'English, [laugh] [cough] tags', tada: 'HumeAI, 700s+ coherent audio', + kokoro: '82M params, CPU realtime, 8 langs', }; /** Engines that only support English and should force language to 'en' on select. */ const ENGLISH_ONLY_ENGINES = new Set(['luxtts', 'chatterbox_turbo']); +/** Engines that support cloned (reference audio) profiles. */ +const CLONING_ENGINES = new Set(['qwen', 'luxtts', 'chatterbox', 'chatterbox_turbo', 'tada']); + +/** Engines that are preset-only (no cloning). */ +const PRESET_ONLY_ENGINES = new Set(['kokoro']); + +/** + * Get which engine options are available for the selected profile. + * + * - Preset profiles: locked to their preset engine + * - All other profiles: all engines available + */ +function getAvailableOptions(selectedProfile?: VoiceProfileResponse | null) { + if (!selectedProfile) return ENGINE_OPTIONS; + + const voiceType = selectedProfile.voice_type || 'cloned'; + + if (voiceType === 'preset') { + // Preset profiles lock to their specific engine + const presetEngine = selectedProfile.preset_engine; + return ENGINE_OPTIONS.filter((opt) => opt.engine === presetEngine); + } + + return ENGINE_OPTIONS; +} + function getSelectValue(engine: string, modelSize?: string): string { if (engine === 'qwen') return `qwen:${modelSize || '1.7B'}`; if (engine === 'tada') return `tada:${modelSize || '1B'}`; @@ -85,12 +114,21 @@ function handleEngineChange(form: UseFormReturn, value: st interface EngineModelSelectorProps { form: UseFormReturn; compact?: boolean; + selectedProfile?: VoiceProfileResponse | null; } -export function EngineModelSelector({ form, compact }: EngineModelSelectorProps) { +export function EngineModelSelector({ form, compact, selectedProfile }: EngineModelSelectorProps) { const engine = form.watch('engine') || 'qwen'; const modelSize = form.watch('modelSize'); const selectValue = getSelectValue(engine, modelSize); + const availableOptions = getAvailableOptions(selectedProfile); + + // If current engine isn't in available options, auto-switch to first available + const currentEngineAvailable = availableOptions.some((opt) => opt.value === selectValue); + if (!currentEngineAvailable && availableOptions.length > 0) { + // Defer to avoid setting state during render + setTimeout(() => handleEngineChange(form, availableOptions[0].value), 0); + } const itemClass = compact ? 'text-xs text-muted-foreground' : undefined; const triggerClass = compact @@ -105,7 +143,7 @@ export function EngineModelSelector({ form, compact }: EngineModelSelectorProps) - {ENGINE_OPTIONS.map((opt) => ( + {availableOptions.map((opt) => ( {opt.label} @@ -119,3 +157,17 @@ export function EngineModelSelector({ form, compact }: EngineModelSelectorProps) export function getEngineDescription(engine: string): string { return ENGINE_DESCRIPTIONS[engine] ?? ''; } + +/** + * Check if a profile is compatible with the currently selected engine. + * Useful for UI hints. + */ +export function isProfileCompatibleWithEngine( + profile: VoiceProfileResponse, + engine: string, +): boolean { + const voiceType = profile.voice_type || 'cloned'; + if (voiceType === 'preset') return profile.preset_engine === engine; + if (voiceType === 'cloned') return CLONING_ENGINES.has(engine); + return !PRESET_ONLY_ENGINES.has(engine); // designed — future +} diff --git a/app/src/components/Generation/FloatingGenerateBox.tsx b/app/src/components/Generation/FloatingGenerateBox.tsx index 96e8f553..ae1cad26 100644 --- a/app/src/components/Generation/FloatingGenerateBox.tsx +++ b/app/src/components/Generation/FloatingGenerateBox.tsx @@ -36,6 +36,7 @@ export function FloatingGenerateBox({ }: FloatingGenerateBoxProps) { const selectedProfileId = useUIStore((state) => state.selectedProfileId); const setSelectedProfileId = useUIStore((state) => state.setSelectedProfileId); + const setSelectedEngine = useUIStore((state) => state.setSelectedEngine); const { data: selectedProfile } = useProfile(selectedProfileId || ''); const { data: profiles } = useProfiles(); const [isExpanded, setIsExpanded] = useState(false); @@ -67,7 +68,12 @@ export function FloatingGenerateBox({ } }, getEffectsChain: () => { - if (!selectedPresetId || !effectPresets) return undefined; + if (!selectedPresetId) return undefined; + // Profile's own effects chain (no matching preset) + if (selectedPresetId === '_profile') { + return selectedProfile?.effects_chain ?? undefined; + } + if (!effectPresets) return undefined; const preset = effectPresets.find((p) => p.id === selectedPresetId); return preset?.effects_chain; }, @@ -110,12 +116,56 @@ export function FloatingGenerateBox({ } }, [selectedProfileId, profiles, setSelectedProfileId]); - // Sync generation form language with selected profile's language + // Sync engine selection to global store so ProfileList can filter + const watchedEngine = form.watch('engine'); + useEffect(() => { + if (watchedEngine) { + setSelectedEngine(watchedEngine); + } + }, [watchedEngine, setSelectedEngine]); + + // Sync generation form language, engine, and effects with selected profile useEffect(() => { if (selectedProfile?.language) { form.setValue('language', selectedProfile.language as LanguageCode); } - }, [selectedProfile, form]); + // Auto-switch engine if profile has a default + if (selectedProfile?.default_engine) { + form.setValue( + 'engine', + selectedProfile.default_engine as + | 'qwen' + | 'luxtts' + | 'chatterbox' + | 'chatterbox_turbo' + | 'tada' + | 'kokoro', + ); + } + // Pre-fill effects from profile defaults + if ( + selectedProfile?.effects_chain && + selectedProfile.effects_chain.length > 0 && + effectPresets + ) { + // Try to match against a known preset + const profileChainJson = JSON.stringify(selectedProfile.effects_chain); + const matchingPreset = effectPresets.find( + (p) => JSON.stringify(p.effects_chain) === profileChainJson, + ); + if (matchingPreset) { + setSelectedPresetId(matchingPreset.id); + } else { + // No matching preset — use special value to pass profile chain directly + setSelectedPresetId('_profile'); + } + } else if ( + selectedProfile && + (!selectedProfile.effects_chain || selectedProfile.effects_chain.length === 0) + ) { + setSelectedPresetId(null); + } + }, [selectedProfile, effectPresets, form]); // Auto-resize textarea based on content (only when expanded) useEffect(() => { @@ -358,7 +408,7 @@ export function FloatingGenerateBox({ /> - + @@ -375,6 +425,12 @@ export function FloatingGenerateBox({ No effects + {selectedProfile?.effects_chain && + selectedProfile.effects_chain.length > 0 && ( + + Profile default + + )} {effectPresets?.map((preset) => ( {preset.name} diff --git a/app/src/components/Generation/GenerationForm.tsx b/app/src/components/Generation/GenerationForm.tsx index 225e8dfa..9f7a7cd7 100644 --- a/app/src/components/Generation/GenerationForm.tsx +++ b/app/src/components/Generation/GenerationForm.tsx @@ -118,7 +118,7 @@ export function GenerationForm() {
Model - + {getEngineDescription(form.watch('engine') || 'qwen')} diff --git a/app/src/components/ServerSettings/ModelManagement.tsx b/app/src/components/ServerSettings/ModelManagement.tsx index c415306d..e7eda69f 100644 --- a/app/src/components/ServerSettings/ModelManagement.tsx +++ b/app/src/components/ServerSettings/ModelManagement.tsx @@ -66,6 +66,8 @@ const MODEL_DESCRIPTIONS: Record = { 'HumeAI TADA 1B — English speech-language model built on Llama 3.2 1B. Generates 700s+ of coherent audio with synchronized text-acoustic alignment.', 'tada-3b-ml': 'HumeAI TADA 3B Multilingual — built on Llama 3.2 3B. Supports 10 languages with high-fidelity voice cloning via text-acoustic dual alignment.', + kokoro: + 'Kokoro 82M by hexgrad. Tiny 82M-parameter TTS that runs at CPU realtime. Supports 8 languages with pre-built voice styles. Apache 2.0 licensed.', 'whisper-base': 'Smallest Whisper model (74M parameters). Fast transcription with moderate accuracy.', 'whisper-small': @@ -396,7 +398,8 @@ export function ModelManagement() { m.model_name.startsWith('qwen-tts') || m.model_name.startsWith('luxtts') || m.model_name.startsWith('chatterbox') || - m.model_name.startsWith('tada'), + m.model_name.startsWith('tada') || + m.model_name.startsWith('kokoro'), ) ?? []; const whisperModels = modelStatus?.models.filter((m) => m.model_name.startsWith('whisper')) ?? []; diff --git a/app/src/components/VoiceProfiles/ProfileCard.tsx b/app/src/components/VoiceProfiles/ProfileCard.tsx index 3675b765..e2a9d4d4 100644 --- a/app/src/components/VoiceProfiles/ProfileCard.tsx +++ b/app/src/components/VoiceProfiles/ProfileCard.tsx @@ -97,6 +97,16 @@ export function ProfileCard({ profile }: ProfileCardProps) { {profile.language} + {profile.voice_type === 'preset' && ( + + {profile.preset_engine} + + )} + {profile.voice_type === 'designed' && ( + + designed + + )} {profile.effects_chain && profile.effects_chain.length > 0 && ( )} diff --git a/app/src/components/VoiceProfiles/ProfileForm.tsx b/app/src/components/VoiceProfiles/ProfileForm.tsx index 13edf6f4..d3f53eac 100644 --- a/app/src/components/VoiceProfiles/ProfileForm.tsx +++ b/app/src/components/VoiceProfiles/ProfileForm.tsx @@ -1,9 +1,11 @@ import { zodResolver } from '@hookform/resolvers/zod'; -import { Edit2, Mic, Monitor, Upload, X } from 'lucide-react'; +import { useQuery } from '@tanstack/react-query'; +import { Edit2, Mic, Monitor, Music, Upload, X } from 'lucide-react'; import { useEffect, useRef, useState } from 'react'; import { useForm } from 'react-hook-form'; import * as z from 'zod'; import { EffectsChainEditor } from '@/components/Effects/EffectsChainEditor'; +import { Badge } from '@/components/ui/badge'; import { Button } from '@/components/ui/button'; import { Dialog, @@ -15,6 +17,7 @@ import { import { Form, FormControl, + FormDescription, FormField, FormItem, FormLabel, @@ -32,7 +35,7 @@ import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'; import { Textarea } from '@/components/ui/textarea'; import { useToast } from '@/components/ui/use-toast'; import { apiClient } from '@/lib/api/client'; -import type { EffectConfig } from '@/lib/api/types'; +import type { EffectConfig, PresetVoice, VoiceType } from '@/lib/api/types'; import { LANGUAGE_CODES, LANGUAGE_OPTIONS, type LanguageCode } from '@/lib/constants/languages'; import { useAudioPlayer } from '@/lib/hooks/useAudioPlayer'; import { useAudioRecording } from '@/lib/hooks/useAudioRecording'; @@ -120,16 +123,20 @@ export function ProfileForm() { const deleteAvatar = useDeleteAvatar(); const transcribe = useTranscription(); const { toast } = useToast(); + const [voiceSource, setVoiceSource] = useState<'clone' | 'builtin'>('clone'); const [sampleMode, setSampleMode] = useState<'upload' | 'record' | 'system'>('record'); const [audioDuration, setAudioDuration] = useState(null); const [isValidatingAudio, setIsValidatingAudio] = useState(false); const [avatarPreview, setAvatarPreview] = useState(null); + const [selectedPresetEngine, setSelectedPresetEngine] = useState('kokoro'); + const [selectedPresetVoiceId, setSelectedPresetVoiceId] = useState(''); const avatarInputRef = useRef(null); const { isPlaying, playPause, cleanup: cleanupAudio } = useAudioPlayer(); const isCreating = !editingProfileId; const serverUrl = useServerStore((state) => state.serverUrl); const [profileEffectsChain, setProfileEffectsChain] = useState([]); const [effectsDirty, setEffectsDirty] = useState(false); + const [defaultEngine, setDefaultEngine] = useState(''); const form = useForm({ resolver: zodResolver(profileSchema), @@ -239,6 +246,20 @@ export function ProfileForm() { }, }); + // Fetch available preset voices for the selected engine + const presetEngineToQuery = isCreating + ? selectedPresetEngine + : (editingProfile?.preset_engine ?? ''); + const { data: presetVoicesData } = useQuery({ + queryKey: ['presetVoices', presetEngineToQuery], + queryFn: () => apiClient.listPresetVoices(presetEngineToQuery), + enabled: + !!presetEngineToQuery && + ((voiceSource === 'builtin' && isCreating) || + (!isCreating && editingProfile?.voice_type === 'preset')), + }); + const presetVoices = presetVoicesData?.voices ?? []; + // Show recording errors useEffect(() => { if (recordingError) { @@ -287,6 +308,7 @@ export function ProfileForm() { }); setProfileEffectsChain(editingProfile.effects_chain ?? []); setEffectsDirty(false); + setDefaultEngine(editingProfile.default_engine ?? ''); } else if (profileFormDraft && open) { // Restore from draft when opening in create mode form.reset({ @@ -415,13 +437,14 @@ export function ProfileForm() { async function onSubmit(data: ProfileFormValues) { try { if (editingProfileId) { - // Editing: just update profile + // Editing: update profile await updateProfile.mutateAsync({ profileId: editingProfileId, data: { name: data.name, description: data.description, language: data.language, + default_engine: defaultEngine || undefined, }, }); @@ -464,8 +487,50 @@ export function ProfileForm() { title: 'Voice updated', description: `"${data.name}" has been updated successfully.`, }); + } else if (voiceSource === 'builtin') { + // Creating preset profile from built-in voice + if (!selectedPresetVoiceId) { + toast({ + title: 'No voice selected', + description: 'Please select a built-in voice.', + variant: 'destructive', + }); + return; + } + + const profile = await createProfile.mutateAsync({ + name: data.name, + description: data.description, + language: data.language, + voice_type: 'preset' as VoiceType, + preset_engine: selectedPresetEngine, + preset_voice_id: selectedPresetVoiceId, + default_engine: selectedPresetEngine, + }); + + // Handle avatar upload if provided + if (data.avatarFile) { + try { + await uploadAvatar.mutateAsync({ + profileId: profile.id, + file: data.avatarFile, + }); + } catch (avatarError) { + toast({ + title: 'Avatar upload failed', + description: + avatarError instanceof Error ? avatarError.message : 'Failed to upload avatar', + variant: 'destructive', + }); + } + } + + toast({ + title: 'Profile created', + description: `"${data.name}" has been created with a built-in voice.`, + }); } else { - // Creating: require sample file and reference text + // Creating cloned profile: require sample file and reference text const sampleFile = form.getValues('sampleFile'); const referenceText = form.getValues('referenceText'); @@ -528,6 +593,7 @@ export function ProfileForm() { name: data.name, description: data.description, language: data.language, + default_engine: defaultEngine || undefined, }); // Convert non-WAV uploads to WAV so the backend can always use soundfile. @@ -642,16 +708,16 @@ export function ProfileForm() { return ( - -
+ +
- {editingProfileId ? 'Edit Voice' : 'Clone voice'} + {editingProfileId ? 'Edit Voice' : 'Create Voice'} {editingProfileId ? 'Update your voice profile details and manage samples.' - : 'Create a new voice profile with an audio sample to clone the voice.'} + : 'Create a new voice profile from an audio sample or a built-in voice.'} {isCreating && profileFormDraft && (
@@ -682,143 +748,275 @@ export function ProfileForm() {
-
+
{/* Left column: Sample management */} -
+
{isCreating ? ( <> - { - const newMode = v as 'upload' | 'record' | 'system'; - // Cancel any active recordings when switching modes - if (isRecording && newMode !== 'record') { - cancelRecording(); - } - if (isSystemRecording && newMode !== 'system') { - cancelSystemRecording(); - } - setSampleMode(newMode); - }} - > - - - - Upload - - - - Record - - {platform.metadata.isTauri && isSystemAudioSupported && ( - - - System Audio - - )} - - - - ( - MAX_AUDIO_DURATION_SECONDS - } - fieldName={name} + {/* Voice source selector */} +
+
+ + +
+
+ + {voiceSource === 'builtin' ? ( +
+ + Choose a pre-built voice. These don't require an audio sample. + + + {/* Engine selector */} + + Engine + + + + {/* Voice picker */} + + Voice +
+ {presetVoices.map((voice: PresetVoice) => ( + + ))} +
+
+
+ ) : ( + <> + { + const newMode = v as 'upload' | 'record' | 'system'; + // Cancel any active recordings when switching modes + if (isRecording && newMode !== 'record') { + cancelRecording(); + } + if (isSystemRecording && newMode !== 'system') { + cancelSystemRecording(); + } + setSampleMode(newMode); + }} + > + + + + Upload + + + + Record + + {platform.metadata.isTauri && isSystemAudioSupported && ( + + + System Audio + + )} + + + + ( + MAX_AUDIO_DURATION_SECONDS + } + fieldName={name} + /> + )} + /> + + + + ( + + )} /> + + + {platform.metadata.isTauri && isSystemAudioSupported && ( + + ( + + )} + /> + )} - /> -
+
- ( - + name="referenceText" + render={({ field }) => ( + + Reference Text + +