From 3584283d8410b4cf2aed80b6248c06e032106e52 Mon Sep 17 00:00:00 2001
From: James Pine <ijamespine@me.com>
Date: Thu, 19 Mar 2026 10:09:48 -0700
Subject: [PATCH 1/4] feat: Kokoro 82M TTS engine + voice profile type system
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add Kokoro-82M as a new TTS engine — 82M params, CPU realtime, 8 languages,
Apache 2.0. Unlike cloning engines, Kokoro uses pre-built voice styles, which
required a new profile type system to support non-cloning engines cleanly.

Kokoro engine:
- New kokoro_backend.py implementing TTSBackend protocol
- 50 built-in voices across en/es/fr/hi/it/pt/ja/zh
- KPipeline API with language-aware G2P routing via misaki
- PyInstaller bundling for misaki, language_tags, espeakng_loader, en_core_web_sm

Voice profile type system:
- New voice_type column: 'cloned' | 'preset' | 'designed' (future)
- Preset profiles store engine + voice ID instead of audio samples
- default_engine field on profiles — auto-selects engine on profile pick
- Create Voice dialog: toggle between 'Clone from audio' and 'Built-in voice'
- Edit dialog shows preset voice info instead of sample list for preset profiles
- Engine selector locks to preset engine when preset profile is selected
- Profile grid filters by engine — shows Kokoro voices when Kokoro selected
- Custom empty state when no preset profiles exist for selected engine

Bug fixes:
- Fix relative audio paths in DB causing 404s in production builds
- config.set_data_dir() now resolves to absolute paths
- Startup migration converts existing relative paths to absolute

Also updates PROJECT_STATUS.md and tts-engines.mdx developer guide.
---
 .../Generation/EngineModelSelector.tsx        |  70 ++-
 .../Generation/FloatingGenerateBox.tsx        |  64 ++-
 .../components/Generation/GenerationForm.tsx  |   2 +-
 .../ServerSettings/ModelManagement.tsx        |   5 +-
 .../components/VoiceProfiles/ProfileCard.tsx  |  10 +
 .../components/VoiceProfiles/ProfileForm.tsx  | 481 +++++++++++++-----
 .../components/VoiceProfiles/ProfileList.tsx  |  35 +-
 app/src/lib/api/client.ts                     |  11 +
 app/src/lib/api/types.ts                      |  21 +-
 app/src/lib/constants/languages.ts            |   2 +
 app/src/lib/hooks/useGenerationForm.ts        |  14 +-
 app/src/stores/uiStore.ts                     |   7 +
 backend/backends/__init__.py                  |  13 +
 backend/backends/kokoro_backend.py            | 288 +++++++++++
 backend/build_binary.py                       |  38 ++
 backend/config.py                             |   6 +-
 backend/database/migrations.py                |  56 ++
 backend/database/models.py                    |  16 +-
 backend/models.py                             |  12 +-
 backend/requirements.txt                      |   7 +
 backend/routes/profiles.py                    |  97 ++++
 backend/services/profiles.py                  |  54 +-
 backend/voicebox-server.spec                  |  17 +-
 docs/content/docs/developer/tts-engines.mdx   |  28 +
 docs/notes/PROJECT_STATUS.md                  | 239 ++++-----
 tauri/src-tauri/Cargo.lock                    |   2 +-
 26 files changed, 1303 insertions(+), 292 deletions(-)
 create mode 100644 backend/backends/kokoro_backend.py

diff --git a/app/src/components/Generation/EngineModelSelector.tsx b/app/src/components/Generation/EngineModelSelector.tsx
index 4382d3f7..773aa089 100644
--- a/app/src/components/Generation/EngineModelSelector.tsx
+++ b/app/src/components/Generation/EngineModelSelector.tsx
@@ -7,6 +7,7 @@ import {
   SelectTrigger,
   SelectValue,
 } from '@/components/ui/select';
+import type { VoiceProfileResponse } from '@/lib/api/types';
 import { getLanguageOptionsForEngine } from '@/lib/constants/languages';
 import type { GenerationFormValues } from '@/lib/hooks/useGenerationForm';
 
@@ -15,13 +16,14 @@ import type { GenerationFormValues } from '@/lib/hooks/useGenerationForm';
  * Adding a new engine means adding one entry here.
  */
 const ENGINE_OPTIONS = [
-  { value: 'qwen:1.7B', label: 'Qwen3-TTS 1.7B' },
-  { value: 'qwen:0.6B', label: 'Qwen3-TTS 0.6B' },
-  { value: 'luxtts', label: 'LuxTTS' },
-  { value: 'chatterbox', label: 'Chatterbox' },
-  { value: 'chatterbox_turbo', label: 'Chatterbox Turbo' },
-  { value: 'tada:1B', label: 'TADA 1B' },
-  { value: 'tada:3B', label: 'TADA 3B Multilingual' },
+  { value: 'qwen:1.7B', label: 'Qwen3-TTS 1.7B', engine: 'qwen' },
+  { value: 'qwen:0.6B', label: 'Qwen3-TTS 0.6B', engine: 'qwen' },
+  { value: 'luxtts', label: 'LuxTTS', engine: 'luxtts' },
+  { value: 'chatterbox', label: 'Chatterbox', engine: 'chatterbox' },
+  { value: 'chatterbox_turbo', label: 'Chatterbox Turbo', engine: 'chatterbox_turbo' },
+  { value: 'tada:1B', label: 'TADA 1B', engine: 'tada' },
+  { value: 'tada:3B', label: 'TADA 3B Multilingual', engine: 'tada' },
+  { value: 'kokoro', label: 'Kokoro 82M', engine: 'kokoro' },
 ] as const;
 
 const ENGINE_DESCRIPTIONS: Record<string, string> = {
@@ -30,11 +32,38 @@ const ENGINE_DESCRIPTIONS: Record<string, string> = {
   chatterbox: '23 languages, incl. Hebrew',
   chatterbox_turbo: 'English, [laugh] [cough] tags',
   tada: 'HumeAI, 700s+ coherent audio',
+  kokoro: '82M params, CPU realtime, 8 langs',
 };
 
 /** Engines that only support English and should force language to 'en' on select. */
 const ENGLISH_ONLY_ENGINES = new Set(['luxtts', 'chatterbox_turbo']);
 
+/** Engines that support cloned (reference audio) profiles. */
+const CLONING_ENGINES = new Set(['qwen', 'luxtts', 'chatterbox', 'chatterbox_turbo', 'tada']);
+
+/** Engines that are preset-only (no cloning). */
+const PRESET_ONLY_ENGINES = new Set(['kokoro']);
+
+/**
+ * Get which engine options are available for the selected profile.
+ *
+ * - Preset profiles: locked to their preset engine
+ * - All other profiles: all engines available
+ */
+function getAvailableOptions(selectedProfile?: VoiceProfileResponse | null) {
+  if (!selectedProfile) return ENGINE_OPTIONS;
+
+  const voiceType = selectedProfile.voice_type || 'cloned';
+
+  if (voiceType === 'preset') {
+    // Preset profiles lock to their specific engine
+    const presetEngine = selectedProfile.preset_engine;
+    return ENGINE_OPTIONS.filter((opt) => opt.engine === presetEngine);
+  }
+
+  return ENGINE_OPTIONS;
+}
+
 function getSelectValue(engine: string, modelSize?: string): string {
   if (engine === 'qwen') return `qwen:${modelSize || '1.7B'}`;
   if (engine === 'tada') return `tada:${modelSize || '1B'}`;
@@ -85,12 +114,21 @@ function handleEngineChange(form: UseFormReturn<GenerationFormValues>, value: st
 interface EngineModelSelectorProps {
   form: UseFormReturn<GenerationFormValues>;
   compact?: boolean;
+  selectedProfile?: VoiceProfileResponse | null;
 }
 
-export function EngineModelSelector({ form, compact }: EngineModelSelectorProps) {
+export function EngineModelSelector({ form, compact, selectedProfile }: EngineModelSelectorProps) {
   const engine = form.watch('engine') || 'qwen';
   const modelSize = form.watch('modelSize');
   const selectValue = getSelectValue(engine, modelSize);
+  const availableOptions = getAvailableOptions(selectedProfile);
+
+  // If current engine isn't in available options, auto-switch to first available
+  const currentEngineAvailable = availableOptions.some((opt) => opt.value === selectValue);
+  if (!currentEngineAvailable && availableOptions.length > 0) {
+    // Defer to avoid setting state during render
+    setTimeout(() => handleEngineChange(form, availableOptions[0].value), 0);
+  }
 
   const itemClass = compact ? 'text-xs text-muted-foreground' : undefined;
   const triggerClass = compact
@@ -105,7 +143,7 @@ export function EngineModelSelector({ form, compact }: EngineModelSelectorProps)
         </SelectTrigger>
       </FormControl>
       <SelectContent>
-        {ENGINE_OPTIONS.map((opt) => (
+        {availableOptions.map((opt) => (
           <SelectItem key={opt.value} value={opt.value} className={itemClass}>
             {opt.label}
           </SelectItem>
@@ -119,3 +157,17 @@ export function EngineModelSelector({ form, compact }: EngineModelSelectorProps)
 export function getEngineDescription(engine: string): string {
   return ENGINE_DESCRIPTIONS[engine] ?? '';
 }
+
+/**
+ * Check if a profile is compatible with the currently selected engine.
+ * Useful for UI hints.
+ */
+export function isProfileCompatibleWithEngine(
+  profile: VoiceProfileResponse,
+  engine: string,
+): boolean {
+  const voiceType = profile.voice_type || 'cloned';
+  if (voiceType === 'preset') return profile.preset_engine === engine;
+  if (voiceType === 'cloned') return CLONING_ENGINES.has(engine);
+  return !PRESET_ONLY_ENGINES.has(engine); // designed — future
+}
diff --git a/app/src/components/Generation/FloatingGenerateBox.tsx b/app/src/components/Generation/FloatingGenerateBox.tsx
index 96e8f553..ae1cad26 100644
--- a/app/src/components/Generation/FloatingGenerateBox.tsx
+++ b/app/src/components/Generation/FloatingGenerateBox.tsx
@@ -36,6 +36,7 @@ export function FloatingGenerateBox({
 }: FloatingGenerateBoxProps) {
   const selectedProfileId = useUIStore((state) => state.selectedProfileId);
   const setSelectedProfileId = useUIStore((state) => state.setSelectedProfileId);
+  const setSelectedEngine = useUIStore((state) => state.setSelectedEngine);
   const { data: selectedProfile } = useProfile(selectedProfileId || '');
   const { data: profiles } = useProfiles();
   const [isExpanded, setIsExpanded] = useState(false);
@@ -67,7 +68,12 @@ export function FloatingGenerateBox({
       }
     },
     getEffectsChain: () => {
-      if (!selectedPresetId || !effectPresets) return undefined;
+      if (!selectedPresetId) return undefined;
+      // Profile's own effects chain (no matching preset)
+      if (selectedPresetId === '_profile') {
+        return selectedProfile?.effects_chain ?? undefined;
+      }
+      if (!effectPresets) return undefined;
       const preset = effectPresets.find((p) => p.id === selectedPresetId);
       return preset?.effects_chain;
     },
@@ -110,12 +116,56 @@ export function FloatingGenerateBox({
     }
   }, [selectedProfileId, profiles, setSelectedProfileId]);
 
-  // Sync generation form language with selected profile's language
+  // Sync engine selection to global store so ProfileList can filter
+  const watchedEngine = form.watch('engine');
+  useEffect(() => {
+    if (watchedEngine) {
+      setSelectedEngine(watchedEngine);
+    }
+  }, [watchedEngine, setSelectedEngine]);
+
+  // Sync generation form language, engine, and effects with selected profile
   useEffect(() => {
     if (selectedProfile?.language) {
       form.setValue('language', selectedProfile.language as LanguageCode);
     }
-  }, [selectedProfile, form]);
+    // Auto-switch engine if profile has a default
+    if (selectedProfile?.default_engine) {
+      form.setValue(
+        'engine',
+        selectedProfile.default_engine as
+          | 'qwen'
+          | 'luxtts'
+          | 'chatterbox'
+          | 'chatterbox_turbo'
+          | 'tada'
+          | 'kokoro',
+      );
+    }
+    // Pre-fill effects from profile defaults
+    if (
+      selectedProfile?.effects_chain &&
+      selectedProfile.effects_chain.length > 0 &&
+      effectPresets
+    ) {
+      // Try to match against a known preset
+      const profileChainJson = JSON.stringify(selectedProfile.effects_chain);
+      const matchingPreset = effectPresets.find(
+        (p) => JSON.stringify(p.effects_chain) === profileChainJson,
+      );
+      if (matchingPreset) {
+        setSelectedPresetId(matchingPreset.id);
+      } else {
+        // No matching preset — use special value to pass profile chain directly
+        setSelectedPresetId('_profile');
+      }
+    } else if (
+      selectedProfile &&
+      (!selectedProfile.effects_chain || selectedProfile.effects_chain.length === 0)
+    ) {
+      setSelectedPresetId(null);
+    }
+  }, [selectedProfile, effectPresets, form]);
 
   // Auto-resize textarea based on content (only when expanded)
   useEffect(() => {
@@ -358,7 +408,7 @@ export function FloatingGenerateBox({
                   />
 
                   <FormItem className="flex-1 space-y-0">
-                    <EngineModelSelector form={form} compact />
+                    <EngineModelSelector form={form} compact selectedProfile={selectedProfile} />
                   </FormItem>
 
                   <FormItem className="flex-1 space-y-0">
@@ -375,6 +425,12 @@ export function FloatingGenerateBox({
                         <SelectItem value="none" className="text-xs">
                           No effects
                         </SelectItem>
+                        {selectedProfile?.effects_chain &&
+                          selectedProfile.effects_chain.length > 0 && (
+                            <SelectItem value="_profile" className="text-xs">
+                              Profile default
+                            </SelectItem>
+                          )}
                         {effectPresets?.map((preset) => (
                           <SelectItem key={preset.id} value={preset.id} className="text-xs">
                             {preset.name}
diff --git a/app/src/components/Generation/GenerationForm.tsx b/app/src/components/Generation/GenerationForm.tsx
index 225e8dfa..9f7a7cd7 100644
--- a/app/src/components/Generation/GenerationForm.tsx
+++ b/app/src/components/Generation/GenerationForm.tsx
@@ -118,7 +118,7 @@ export function GenerationForm() {
             <div className="grid gap-4 md:grid-cols-3">
               <FormItem>
                 <FormLabel>Model</FormLabel>
-                <EngineModelSelector form={form} />
+                <EngineModelSelector form={form} selectedProfile={selectedProfile} />
                 <FormDescription>
                   {getEngineDescription(form.watch('engine') || 'qwen')}
                 </FormDescription>
diff --git a/app/src/components/ServerSettings/ModelManagement.tsx b/app/src/components/ServerSettings/ModelManagement.tsx
index c415306d..e7eda69f 100644
--- a/app/src/components/ServerSettings/ModelManagement.tsx
+++ b/app/src/components/ServerSettings/ModelManagement.tsx
@@ -66,6 +66,8 @@ const MODEL_DESCRIPTIONS: Record<string, string> = {
     'HumeAI TADA 1B — English speech-language model built on Llama 3.2 1B. Generates 700s+ of coherent audio with synchronized text-acoustic alignment.',
   'tada-3b-ml':
     'HumeAI TADA 3B Multilingual — built on Llama 3.2 3B. Supports 10 languages with high-fidelity voice cloning via text-acoustic dual alignment.',
+  kokoro:
+    'Kokoro 82M by hexgrad. Tiny 82M-parameter TTS that runs at CPU realtime. Supports 8 languages with pre-built voice styles. Apache 2.0 licensed.',
   'whisper-base':
     'Smallest Whisper model (74M parameters). Fast transcription with moderate accuracy.',
   'whisper-small':
@@ -396,7 +398,8 @@ export function ModelManagement() {
         m.model_name.startsWith('qwen-tts') ||
         m.model_name.startsWith('luxtts') ||
         m.model_name.startsWith('chatterbox') ||
-        m.model_name.startsWith('tada'),
+        m.model_name.startsWith('tada') ||
+        m.model_name.startsWith('kokoro'),
     ) ?? [];
   const whisperModels = modelStatus?.models.filter((m) => m.model_name.startsWith('whisper')) ?? [];
 
diff --git a/app/src/components/VoiceProfiles/ProfileCard.tsx b/app/src/components/VoiceProfiles/ProfileCard.tsx
index 3675b765..e2a9d4d4 100644
--- a/app/src/components/VoiceProfiles/ProfileCard.tsx
+++ b/app/src/components/VoiceProfiles/ProfileCard.tsx
@@ -97,6 +97,16 @@ export function ProfileCard({ profile }: ProfileCardProps) {
             <Badge variant="outline" className="text-xs h-5 px-1.5 text-muted-foreground">
               {profile.language}
             </Badge>
+            {profile.voice_type === 'preset' && (
+              <Badge variant="secondary" className="text-xs h-5 px-1.5">
+                {profile.preset_engine}
+              </Badge>
+            )}
+            {profile.voice_type === 'designed' && (
+              <Badge variant="secondary" className="text-xs h-5 px-1.5">
+                designed
+              </Badge>
+            )}
             {profile.effects_chain && profile.effects_chain.length > 0 && (
               <Sparkles className="h-3.5 w-3.5 text-accent fill-accent" />
             )}
diff --git a/app/src/components/VoiceProfiles/ProfileForm.tsx b/app/src/components/VoiceProfiles/ProfileForm.tsx
index 13edf6f4..d3f53eac 100644
--- a/app/src/components/VoiceProfiles/ProfileForm.tsx
+++ b/app/src/components/VoiceProfiles/ProfileForm.tsx
@@ -1,9 +1,11 @@
 import { zodResolver } from '@hookform/resolvers/zod';
-import { Edit2, Mic, Monitor, Upload, X } from 'lucide-react';
+import { useQuery } from '@tanstack/react-query';
+import { Edit2, Mic, Monitor, Music, Upload, X } from 'lucide-react';
 import { useEffect, useRef, useState } from 'react';
 import { useForm } from 'react-hook-form';
 import * as z from 'zod';
 import { EffectsChainEditor } from '@/components/Effects/EffectsChainEditor';
+import { Badge } from '@/components/ui/badge';
 import { Button } from '@/components/ui/button';
 import {
   Dialog,
@@ -15,6 +17,7 @@ import {
 import {
   Form,
   FormControl,
+  FormDescription,
   FormField,
   FormItem,
   FormLabel,
@@ -32,7 +35,7 @@ import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs';
 import { Textarea } from '@/components/ui/textarea';
 import { useToast } from '@/components/ui/use-toast';
 import { apiClient } from '@/lib/api/client';
-import type { EffectConfig } from '@/lib/api/types';
+import type { EffectConfig, PresetVoice, VoiceType } from '@/lib/api/types';
 import { LANGUAGE_CODES, LANGUAGE_OPTIONS, type LanguageCode } from '@/lib/constants/languages';
 import { useAudioPlayer } from '@/lib/hooks/useAudioPlayer';
 import { useAudioRecording } from '@/lib/hooks/useAudioRecording';
@@ -120,16 +123,20 @@ export function ProfileForm() {
   const deleteAvatar = useDeleteAvatar();
   const transcribe = useTranscription();
   const { toast } = useToast();
+  const [voiceSource, setVoiceSource] = useState<'clone' | 'builtin'>('clone');
   const [sampleMode, setSampleMode] = useState<'upload' | 'record' | 'system'>('record');
   const [audioDuration, setAudioDuration] = useState<number | null>(null);
   const [isValidatingAudio, setIsValidatingAudio] = useState(false);
   const [avatarPreview, setAvatarPreview] = useState<string | null>(null);
+  const [selectedPresetEngine, setSelectedPresetEngine] = useState<string>('kokoro');
+  const [selectedPresetVoiceId, setSelectedPresetVoiceId] = useState<string>('');
   const avatarInputRef = useRef<HTMLInputElement>(null);
   const { isPlaying, playPause, cleanup: cleanupAudio } = useAudioPlayer();
   const isCreating = !editingProfileId;
   const serverUrl = useServerStore((state) => state.serverUrl);
   const [profileEffectsChain, setProfileEffectsChain] = useState<EffectConfig[]>([]);
   const [effectsDirty, setEffectsDirty] = useState(false);
+  const [defaultEngine, setDefaultEngine] = useState<string>('');
 
   const form = useForm<ProfileFormValues>({
     resolver: zodResolver(profileSchema),
@@ -239,6 +246,20 @@ export function ProfileForm() {
     },
   });
 
+  // Fetch available preset voices for the selected engine
+  const presetEngineToQuery = isCreating
+    ? selectedPresetEngine
+    : (editingProfile?.preset_engine ?? '');
+  const { data: presetVoicesData } = useQuery({
+    queryKey: ['presetVoices', presetEngineToQuery],
+    queryFn: () => apiClient.listPresetVoices(presetEngineToQuery),
+    enabled:
+      !!presetEngineToQuery &&
+      ((voiceSource === 'builtin' && isCreating) ||
+        (!isCreating && editingProfile?.voice_type === 'preset')),
+  });
+  const presetVoices = presetVoicesData?.voices ?? [];
+
   // Show recording errors
   useEffect(() => {
     if (recordingError) {
@@ -287,6 +308,7 @@ export function ProfileForm() {
       });
       setProfileEffectsChain(editingProfile.effects_chain ?? []);
       setEffectsDirty(false);
+      setDefaultEngine(editingProfile.default_engine ?? '');
     } else if (profileFormDraft && open) {
       // Restore from draft when opening in create mode
       form.reset({
@@ -415,13 +437,14 @@ export function ProfileForm() {
   async function onSubmit(data: ProfileFormValues) {
     try {
       if (editingProfileId) {
-        // Editing: just update profile
+        // Editing: update profile
         await updateProfile.mutateAsync({
           profileId: editingProfileId,
           data: {
             name: data.name,
             description: data.description,
             language: data.language,
+            default_engine: defaultEngine || undefined,
           },
         });
 
@@ -464,8 +487,50 @@ export function ProfileForm() {
           title: 'Voice updated',
           description: `"${data.name}" has been updated successfully.`,
         });
+      } else if (voiceSource === 'builtin') {
+        // Creating preset profile from built-in voice
+        if (!selectedPresetVoiceId) {
+          toast({
+            title: 'No voice selected',
+            description: 'Please select a built-in voice.',
+            variant: 'destructive',
+          });
+          return;
+        }
+
+        const profile = await createProfile.mutateAsync({
+          name: data.name,
+          description: data.description,
+          language: data.language,
+          voice_type: 'preset' as VoiceType,
+          preset_engine: selectedPresetEngine,
+          preset_voice_id: selectedPresetVoiceId,
+          default_engine: selectedPresetEngine,
+        });
+
+        // Handle avatar upload if provided
+        if (data.avatarFile) {
+          try {
+            await uploadAvatar.mutateAsync({
+              profileId: profile.id,
+              file: data.avatarFile,
+            });
+          } catch (avatarError) {
+            toast({
+              title: 'Avatar upload failed',
+              description:
+                avatarError instanceof Error ? avatarError.message : 'Failed to upload avatar',
+              variant: 'destructive',
+            });
+          }
+        }
+
+        toast({
+          title: 'Profile created',
+          description: `"${data.name}" has been created with a built-in voice.`,
+        });
       } else {
-        // Creating: require sample file and reference text
+        // Creating cloned profile: require sample file and reference text
         const sampleFile = form.getValues('sampleFile');
         const referenceText = form.getValues('referenceText');
 
@@ -528,6 +593,7 @@ export function ProfileForm() {
           name: data.name,
           description: data.description,
           language: data.language,
+          default_engine: defaultEngine || undefined,
         });
 
         // Convert non-WAV uploads to WAV so the backend can always use soundfile.
@@ -642,16 +708,16 @@ export function ProfileForm() {
 
   return (
     <Dialog open={open} onOpenChange={handleOpenChange}>
-      <DialogContent className="max-w-none w-screen h-screen left-0 top-0 translate-x-0 translate-y-0 rounded-none p-6 overflow-y-auto">
-        <div className="max-w-5xl max-h-[85vh] mx-auto my-auto w-full flex flex-col">
+      <DialogContent className="max-w-none w-screen h-screen left-0 top-0 translate-x-0 translate-y-0 rounded-none p-6 overflow-hidden">
+        <div className="max-w-5xl h-[85vh] mx-auto my-auto w-full flex flex-col overflow-hidden">
           <DialogHeader>
             <DialogTitle className="text-2xl">
-              {editingProfileId ? 'Edit Voice' : 'Clone voice'}
+              {editingProfileId ? 'Edit Voice' : 'Create Voice'}
             </DialogTitle>
             <DialogDescription>
               {editingProfileId
                 ? 'Update your voice profile details and manage samples.'
-                : 'Create a new voice profile with an audio sample to clone the voice.'}
+                : 'Create a new voice profile from an audio sample or a built-in voice.'}
             </DialogDescription>
             {isCreating && profileFormDraft && (
               <div className="flex items-center gap-2 pt-2">
@@ -682,143 +748,275 @@ export function ProfileForm() {
 
           <Form {...form}>
             <form onSubmit={form.handleSubmit(onSubmit)} className="flex-1 min-h-0 flex flex-col">
-              <div className="grid gap-6 grid-cols-2 flex-1 overflow-y-auto min-h-0">
+              <div className="grid gap-6 grid-cols-2 flex-1 min-h-0 overflow-hidden">
                 {/* Left column: Sample management */}
-                <div className="space-y-4 border-r pr-6">
+                <div className="space-y-4 border-r pr-6 overflow-y-auto min-h-0">
                   {isCreating ? (
                     <>
-                      <Tabs
-                        className="pt-4"
-                        value={sampleMode}
-                        onValueChange={(v) => {
-                          const newMode = v as 'upload' | 'record' | 'system';
-                          // Cancel any active recordings when switching modes
-                          if (isRecording && newMode !== 'record') {
-                            cancelRecording();
-                          }
-                          if (isSystemRecording && newMode !== 'system') {
-                            cancelSystemRecording();
-                          }
-                          setSampleMode(newMode);
-                        }}
-                      >
-                        <TabsList
-                          className={`grid w-full ${platform.metadata.isTauri && isSystemAudioSupported ? 'grid-cols-3' : 'grid-cols-2'}`}
-                        >
-                          <TabsTrigger value="upload" className="flex items-center gap-2">
-                            <Upload className="h-4 w-4 shrink-0" />
-                            Upload
-                          </TabsTrigger>
-                          <TabsTrigger value="record" className="flex items-center gap-2">
-                            <Mic className="h-4 w-4 shrink-0" />
-                            Record
-                          </TabsTrigger>
-                          {platform.metadata.isTauri && isSystemAudioSupported && (
-                            <TabsTrigger value="system" className="flex items-center gap-2">
-                              <Monitor className="h-4 w-4 shrink-0" />
-                              System Audio
-                            </TabsTrigger>
-                          )}
-                        </TabsList>
-
-                        <TabsContent value="upload" className="space-y-4">
-                          <FormField
-                            control={form.control}
-                            name="sampleFile"
-                            render={({ field: { onChange, name } }) => (
-                              <AudioSampleUpload
-                                file={selectedFile}
-                                onFileChange={onChange}
-                                onTranscribe={handleTranscribe}
-                                onPlayPause={handlePlayPause}
-                                isPlaying={isPlaying}
-                                isValidating={isValidatingAudio}
-                                isTranscribing={transcribe.isPending}
-                                isDisabled={
-                                  audioDuration !== null &&
-                                  audioDuration > MAX_AUDIO_DURATION_SECONDS
-                                }
-                                fieldName={name}
+                      {/* Voice source selector */}
+                      <div className="flex pt-4 pb-2">
+                        <div className="inline-flex rounded-lg border border-border p-0.5 bg-muted/50">
+                          <button
+                            type="button"
+                            onClick={() => setVoiceSource('clone')}
+                            className={`inline-flex items-center gap-2 px-3 py-1.5 text-sm rounded-md transition-colors ${
+                              voiceSource === 'clone'
+                                ? 'bg-accent text-accent-foreground shadow-sm'
+                                : 'text-muted-foreground hover:text-foreground'
+                            }`}
+                          >
+                            <Mic className="h-3.5 w-3.5" />
+                            Clone from audio
+                          </button>
+                          <button
+                            type="button"
+                            onClick={() => setVoiceSource('builtin')}
+                            className={`inline-flex items-center gap-2 px-3 py-1.5 text-sm rounded-md transition-colors ${
+                              voiceSource === 'builtin'
+                                ? 'bg-accent text-accent-foreground shadow-sm'
+                                : 'text-muted-foreground hover:text-foreground'
+                            }`}
+                          >
+                            <Music className="h-3.5 w-3.5" />
+                            Built-in voice
+                          </button>
+                        </div>
+                      </div>
+
+                      {voiceSource === 'builtin' ? (
+                        <div className="space-y-4">
+                          <FormDescription>
+                            Choose a pre-built voice. These don't require an audio sample.
+                          </FormDescription>
+
+                          {/* Engine selector */}
+                          <FormItem>
+                            <FormLabel>Engine</FormLabel>
+                            <Select
+                              value={selectedPresetEngine}
+                              onValueChange={setSelectedPresetEngine}
+                            >
+                              <FormControl>
+                                <SelectTrigger>
+                                  <SelectValue />
+                                </SelectTrigger>
+                              </FormControl>
+                              <SelectContent>
+                                <SelectItem value="kokoro">Kokoro 82M</SelectItem>
+                              </SelectContent>
+                            </Select>
+                          </FormItem>
+
+                          {/* Voice picker */}
+                          <FormItem>
+                            <FormLabel>Voice</FormLabel>
+                            <div className="grid grid-cols-2 gap-1.5 max-h-[340px] overflow-y-auto pr-1">
+                              {presetVoices.map((voice: PresetVoice) => (
+                                <button
+                                  key={voice.voice_id}
+                                  type="button"
+                                  onClick={() => {
+                                    setSelectedPresetVoiceId(voice.voice_id);
+                                    // Auto-set language from voice
+                                    if (voice.language) {
+                                      form.setValue('language', voice.language as LanguageCode);
+                                    }
+                                  }}
+                                  className={`text-left px-3 py-2 rounded-md border text-sm transition-colors ${
+                                    selectedPresetVoiceId === voice.voice_id
+                                      ? 'border-accent bg-accent/10 text-accent-foreground'
+                                      : 'border-border hover:bg-muted'
+                                  }`}
+                                >
+                                  <div className="font-medium">{voice.name}</div>
+                                  <div className="flex gap-1.5 mt-0.5">
+                                    <Badge variant="outline" className="text-[10px] h-4 px-1">
+                                      {voice.gender}
+                                    </Badge>
+                                    <Badge variant="outline" className="text-[10px] h-4 px-1">
+                                      {voice.language}
+                                    </Badge>
+                                  </div>
+                                </button>
+                              ))}
+                            </div>
+                          </FormItem>
+                        </div>
+                      ) : (
+                        <>
+                          <Tabs
+                            className="pt-0"
+                            value={sampleMode}
+                            onValueChange={(v) => {
+                              const newMode = v as 'upload' | 'record' | 'system';
+                              // Cancel any active recordings when switching modes
+                              if (isRecording && newMode !== 'record') {
+                                cancelRecording();
+                              }
+                              if (isSystemRecording && newMode !== 'system') {
+                                cancelSystemRecording();
+                              }
+                              setSampleMode(newMode);
+                            }}
+                          >
+                            <TabsList
+                              className={`grid w-full ${platform.metadata.isTauri && isSystemAudioSupported ? 'grid-cols-3' : 'grid-cols-2'}`}
+                            >
+                              <TabsTrigger value="upload" className="flex items-center gap-2">
+                                <Upload className="h-4 w-4 shrink-0" />
+                                Upload
+                              </TabsTrigger>
+                              <TabsTrigger value="record" className="flex items-center gap-2">
+                                <Mic className="h-4 w-4 shrink-0" />
+                                Record
+                              </TabsTrigger>
+                              {platform.metadata.isTauri && isSystemAudioSupported && (
+                                <TabsTrigger value="system" className="flex items-center gap-2">
+                                  <Monitor className="h-4 w-4 shrink-0" />
+                                  System Audio
+                                </TabsTrigger>
+                              )}
+                            </TabsList>
+
+                            <TabsContent value="upload" className="space-y-4">
+                              <FormField
+                                control={form.control}
+                                name="sampleFile"
+                                render={({ field: { onChange, name } }) => (
+                                  <AudioSampleUpload
+                                    file={selectedFile}
+                                    onFileChange={onChange}
+                                    onTranscribe={handleTranscribe}
+                                    onPlayPause={handlePlayPause}
+                                    isPlaying={isPlaying}
+                                    isValidating={isValidatingAudio}
+                                    isTranscribing={transcribe.isPending}
+                                    isDisabled={
+                                      audioDuration !== null &&
+                                      audioDuration > MAX_AUDIO_DURATION_SECONDS
+                                    }
+                                    fieldName={name}
+                                  />
+                                )}
+                              />
+                            </TabsContent>
+
+                            <TabsContent value="record" className="space-y-4">
+                              <FormField
+                                control={form.control}
+                                name="sampleFile"
+                                render={() => (
+                                  <AudioSampleRecording
+                                    file={selectedFile}
+                                    isRecording={isRecording}
+                                    duration={duration}
+                                    onStart={startRecording}
+                                    onStop={stopRecording}
+                                    onCancel={handleCancelRecording}
+                                    onTranscribe={handleTranscribe}
+                                    onPlayPause={handlePlayPause}
+                                    isPlaying={isPlaying}
+                                    isTranscribing={transcribe.isPending}
+                                  />
+                                )}
                               />
+                            </TabsContent>
+
+                            {platform.metadata.isTauri && isSystemAudioSupported && (
+                              <TabsContent value="system" className="space-y-4">
+                                <FormField
+                                  control={form.control}
+                                  name="sampleFile"
+                                  render={() => (
+                                    <AudioSampleSystem
+                                      file={selectedFile}
+                                      isRecording={isSystemRecording}
+                                      duration={systemDuration}
+                                      onStart={startSystemRecording}
+                                      onStop={stopSystemRecording}
+                                      onCancel={handleCancelRecording}
+                                      onTranscribe={handleTranscribe}
+                                      onPlayPause={handlePlayPause}
+                                      isPlaying={isPlaying}
+                                      isTranscribing={transcribe.isPending}
+                                    />
+                                  )}
+                                />
+                              </TabsContent>
                             )}
-                          />
-                        </TabsContent>
+                          </Tabs>
 
-                        <TabsContent value="record" className="space-y-4">
                           <FormField
                             control={form.control}
-                            name="sampleFile"
-                            render={() => (
-                              <AudioSampleRecording
-                                file={selectedFile}
-                                isRecording={isRecording}
-                                duration={duration}
-                                onStart={startRecording}
-                                onStop={stopRecording}
-                                onCancel={handleCancelRecording}
-                                onTranscribe={handleTranscribe}
-                                onPlayPause={handlePlayPause}
-                                isPlaying={isPlaying}
-                                isTranscribing={transcribe.isPending}
-                              />
+                            name="referenceText"
+                            render={({ field }) => (
+                              <FormItem>
+                                <FormLabel>Reference Text</FormLabel>
+                                <FormControl>
+                                  <Textarea
+                                    placeholder="Enter the exact text spoken in the audio..."
+                                    className="min-h-[100px]"
+                                    {...field}
+                                  />
+                                </FormControl>
+                                <FormMessage />
+                              </FormItem>
                             )}
                           />
-                        </TabsContent>
-
-                        {platform.metadata.isTauri && isSystemAudioSupported && (
-                          <TabsContent value="system" className="space-y-4">
-                            <FormField
-                              control={form.control}
-                              name="sampleFile"
-                              render={() => (
-                                <AudioSampleSystem
-                                  file={selectedFile}
-                                  isRecording={isSystemRecording}
-                                  duration={systemDuration}
-                                  onStart={startSystemRecording}
-                                  onStop={stopSystemRecording}
-                                  onCancel={handleCancelRecording}
-                                  onTranscribe={handleTranscribe}
-                                  onPlayPause={handlePlayPause}
-                                  isPlaying={isPlaying}
-                                  isTranscribing={transcribe.isPending}
-                                />
-                              )}
-                            />
-                          </TabsContent>
-                        )}
-                      </Tabs>
-
-                      <FormField
-                        control={form.control}
-                        name="referenceText"
-                        render={({ field }) => (
-                          <FormItem>
-                            <FormLabel>Reference Text</FormLabel>
-                            <FormControl>
-                              <Textarea
-                                placeholder="Enter the exact text spoken in the audio..."
-                                className="min-h-[100px]"
-                                {...field}
-                              />
-                            </FormControl>
-                            <FormMessage />
-                          </FormItem>
-                        )}
-                      />
+                        </>
+                      )}
                     </>
                   ) : (
-                    // Show sample list when editing
-                    editingProfileId && (
+                    // Editing mode
+                    editingProfileId &&
+                    editingProfile &&
+                    (editingProfile.voice_type === 'preset' ? (
+                      <div className="space-y-4 pt-4">
+                        <div className="rounded-lg border border-border p-4 space-y-3">
+                          <div className="text-sm font-medium text-muted-foreground">
+                            Built-in Voice
+                          </div>
+                          <div className="flex items-center gap-3">
+                            <div className="text-lg font-semibold">
+                              {presetVoices.find(
+                                (v: PresetVoice) => v.voice_id === editingProfile.preset_voice_id,
+                              )?.name ?? editingProfile.preset_voice_id}
+                            </div>
+                            <Badge variant="secondary" className="text-xs">
+                              {editingProfile.preset_engine}
+                            </Badge>
+                          </div>
+                          {(() => {
+                            const voice = presetVoices.find(
+                              (v: PresetVoice) => v.voice_id === editingProfile.preset_voice_id,
+                            );
+                            return voice ? (
+                              <div className="flex gap-1.5">
+                                <Badge variant="outline" className="text-xs">
+                                  {voice.gender}
+                                </Badge>
+                                <Badge variant="outline" className="text-xs">
+                                  {voice.language}
+                                </Badge>
+                              </div>
+                            ) : null;
+                          })()}
+                        </div>
+                        <p className="text-xs text-muted-foreground">
+                          This profile uses a built-in voice. The voice cannot be changed after
+                          creation.
+                        </p>
+                      </div>
+                    ) : (
                       <div>
                         <SampleList profileId={editingProfileId} />
                       </div>
-                    )
+                    ))
                   )}
                 </div>
 
                 {/* Right column: Profile info */}
-                <div className="space-y-4">
+                <div className="space-y-4 overflow-y-auto min-h-0">
                   {/* Avatar Upload */}
                   <FormField
                     control={form.control}
@@ -924,6 +1122,37 @@ export function ProfileForm() {
                     )}
                   />
 
+                  <FormItem>
+                    <FormLabel>Default Engine</FormLabel>
+                    <Select
+                      value={defaultEngine || '_none'}
+                      onValueChange={(v) => {
+                        setDefaultEngine(v === '_none' ? '' : v);
+                      }}
+                      disabled={
+                        voiceSource === 'builtin' || editingProfile?.voice_type === 'preset'
+                      }
+                    >
+                      <FormControl>
+                        <SelectTrigger>
+                          <SelectValue placeholder="No preference" />
+                        </SelectTrigger>
+                      </FormControl>
+                      <SelectContent>
+                        <SelectItem value="_none">No preference</SelectItem>
+                        <SelectItem value="qwen">Qwen3-TTS</SelectItem>
+                        <SelectItem value="luxtts">LuxTTS</SelectItem>
+                        <SelectItem value="chatterbox">Chatterbox</SelectItem>
+                        <SelectItem value="chatterbox_turbo">Chatterbox Turbo</SelectItem>
+                        <SelectItem value="tada">TADA</SelectItem>
+                        <SelectItem value="kokoro">Kokoro 82M</SelectItem>
+                      </SelectContent>
+                    </Select>
+                    <p className="text-xs text-muted-foreground">
+                      Auto-selects this engine when the profile is chosen.
+                    </p>
+                  </FormItem>
+
                   {editingProfileId && (
                     <div className="space-y-2">
                       <FormLabel>Default Effects</FormLabel>
diff --git a/app/src/components/VoiceProfiles/ProfileList.tsx b/app/src/components/VoiceProfiles/ProfileList.tsx
index 89252433..97dde233 100644
--- a/app/src/components/VoiceProfiles/ProfileList.tsx
+++ b/app/src/components/VoiceProfiles/ProfileList.tsx
@@ -1,4 +1,4 @@
-import { Mic, Sparkles } from 'lucide-react';
+import { Mic, Music, Sparkles } from 'lucide-react';
 import { Button } from '@/components/ui/button';
 import { Card, CardContent } from '@/components/ui/card';
 import { useProfiles } from '@/lib/hooks/useProfiles';
@@ -6,9 +6,18 @@ import { useUIStore } from '@/stores/uiStore';
 import { ProfileCard } from './ProfileCard';
 import { ProfileForm } from './ProfileForm';
 
+/** Engines that use preset (built-in) voices instead of cloned profiles. */
+const PRESET_ENGINES = new Set(['kokoro']);
+
+/** Human-readable engine names for empty state messages. */
+const ENGINE_NAMES: Record<string, string> = {
+  kokoro: 'Kokoro',
+};
+
 export function ProfileList() {
   const { data: profiles, isLoading, error } = useProfiles();
   const setDialogOpen = useUIStore((state) => state.setProfileDialogOpen);
+  const selectedEngine = useUIStore((state) => state.selectedEngine);
 
   if (isLoading) {
     return null;
@@ -23,6 +32,12 @@ export function ProfileList() {
   }
 
   const allProfiles = profiles || [];
+  const isPresetEngine = PRESET_ENGINES.has(selectedEngine);
+
+  // Filter profiles based on selected engine
+  const filteredProfiles = isPresetEngine
+    ? allProfiles.filter((p) => p.voice_type === 'preset' && p.preset_engine === selectedEngine)
+    : allProfiles.filter((p) => p.voice_type !== 'preset');
 
   return (
     <div className="flex flex-col">
@@ -40,9 +55,25 @@ export function ProfileList() {
               </Button>
             </CardContent>
           </Card>
+        ) : filteredProfiles.length === 0 && isPresetEngine ? (
+          <Card>
+            <CardContent className="flex flex-col items-center justify-center py-12">
+              <Music className="h-12 w-12 text-muted-foreground mb-4" />
+              <p className="text-muted-foreground mb-2">
+                No {ENGINE_NAMES[selectedEngine] ?? selectedEngine} voices created yet.
+              </p>
+              <p className="text-sm text-muted-foreground mb-4">
+                The default voice will be used. Create a profile to choose a specific voice.
+              </p>
+              <Button onClick={() => setDialogOpen(true)}>
+                <Sparkles className="mr-2 h-4 w-4" />
+                Create {ENGINE_NAMES[selectedEngine] ?? selectedEngine} Voice
+              </Button>
+            </CardContent>
+          </Card>
         ) : (
           <div className="flex gap-4 overflow-x-auto p-1 pb-1 lg:grid lg:grid-cols-3 lg:auto-rows-auto lg:overflow-x-visible lg:pb-[150px]">
-            {allProfiles.map((profile) => (
+            {filteredProfiles.map((profile) => (
               <div key={profile.id} className="shrink-0 w-[200px] lg:w-auto lg:shrink">
                 <ProfileCard profile={profile} />
               </div>
diff --git a/app/src/lib/api/client.ts b/app/src/lib/api/client.ts
index 98f98182..6849b8c7 100644
--- a/app/src/lib/api/client.ts
+++ b/app/src/lib/api/client.ts
@@ -17,6 +17,7 @@ import type {
   HistoryResponse,
   ModelDownloadRequest,
   ModelStatusListResponse,
+  PresetVoice,
   ProfileSampleResponse,
   StoryCreate,
   StoryDetailResponse,
@@ -97,6 +98,16 @@ class ApiClient {
     return this.request<VoiceProfileResponse>(`/profiles/${profileId}`);
   }
 
+  async listPresetVoices(engine: string): Promise<{ engine: string; voices: PresetVoice[] }> {
+    return this.request<{ engine: string; voices: PresetVoice[] }>(`/profiles/presets/${engine}`);
+  }
+
+  async seedPresetProfiles(
+    engine: string,
+  ): Promise<{ engine: string; created: number; total_available: number }> {
+    return this.request(`/profiles/presets/${engine}/seed`, { method: 'POST' });
+  }
+
   async updateProfile(profileId: string, data: VoiceProfileCreate): Promise<VoiceProfileResponse> {
     return this.request<VoiceProfileResponse>(`/profiles/${profileId}`, {
       method: 'PUT',
diff --git a/app/src/lib/api/types.ts b/app/src/lib/api/types.ts
index aa85d001..34e8038c 100644
--- a/app/src/lib/api/types.ts
+++ b/app/src/lib/api/types.ts
@@ -1,10 +1,17 @@
 // API Types matching backend Pydantic models
 import type { LanguageCode } from '@/lib/constants/languages';
 
+export type VoiceType = 'cloned' | 'preset' | 'designed';
+
 export interface VoiceProfileCreate {
   name: string;
   description?: string;
   language: LanguageCode;
+  voice_type?: VoiceType;
+  preset_engine?: string;
+  preset_voice_id?: string;
+  design_prompt?: string;
+  default_engine?: string;
 }
 
 export interface VoiceProfileResponse {
@@ -14,12 +21,24 @@ export interface VoiceProfileResponse {
   language: string;
   avatar_path?: string;
   effects_chain?: EffectConfig[];
+  voice_type: VoiceType;
+  preset_engine?: string;
+  preset_voice_id?: string;
+  design_prompt?: string;
+  default_engine?: string;
   generation_count: number;
   sample_count: number;
   created_at: string;
   updated_at: string;
 }
 
+export interface PresetVoice {
+  voice_id: string;
+  name: string;
+  gender: 'male' | 'female';
+  language: string;
+}
+
 export interface ProfileSampleCreate {
   reference_text: string;
 }
@@ -43,7 +62,7 @@ export interface GenerationRequest {
   language: LanguageCode;
   seed?: number;
   model_size?: '1.7B' | '0.6B' | '1B' | '3B';
-  engine?: 'qwen' | 'luxtts' | 'chatterbox' | 'chatterbox_turbo' | 'tada';
+  engine?: 'qwen' | 'luxtts' | 'chatterbox' | 'chatterbox_turbo' | 'tada' | 'kokoro';
   instruct?: string;
   max_chunk_chars?: number;
   crossfade_ms?: number;
diff --git a/app/src/lib/constants/languages.ts b/app/src/lib/constants/languages.ts
index a0d233a5..1a5c5f26 100644
--- a/app/src/lib/constants/languages.ts
+++ b/app/src/lib/constants/languages.ts
@@ -5,6 +5,7 @@
  * LuxTTS is English-only.
  * Chatterbox Multilingual supports 23 languages.
  * Chatterbox Turbo is English-only.
+ * Kokoro supports 8 languages.
  */
 
 /** All languages that any engine supports. */
@@ -67,6 +68,7 @@ export const ENGINE_LANGUAGES: Record<string, readonly LanguageCode[]> = {
   ],
   chatterbox_turbo: ['en'],
   tada: ['en', 'ar', 'zh', 'de', 'es', 'fr', 'it', 'ja', 'pl', 'pt'],
+  kokoro: ['en', 'es', 'fr', 'hi', 'it', 'pt', 'ja', 'zh'],
 } as const;
 
 /** Helper: get language options for a given engine. */
diff --git a/app/src/lib/hooks/useGenerationForm.ts b/app/src/lib/hooks/useGenerationForm.ts
index 8e73ce07..894d9333 100644
--- a/app/src/lib/hooks/useGenerationForm.ts
+++ b/app/src/lib/hooks/useGenerationForm.ts
@@ -17,7 +17,7 @@ const generationSchema = z.object({
   seed: z.number().int().optional(),
   modelSize: z.enum(['1.7B', '0.6B', '1B', '3B']).optional(),
   instruct: z.string().max(500).optional(),
-  engine: z.enum(['qwen', 'luxtts', 'chatterbox', 'chatterbox_turbo', 'tada']).optional(),
+  engine: z.enum(['qwen', 'luxtts', 'chatterbox', 'chatterbox_turbo', 'tada', 'kokoro']).optional(),
 });
 
 export type GenerationFormValues = z.infer<typeof generationSchema>;
@@ -83,7 +83,9 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
                 ? data.modelSize === '3B'
                   ? 'tada-3b-ml'
                   : 'tada-1b'
-                : `qwen-tts-${data.modelSize}`;
+                : engine === 'kokoro'
+                  ? 'kokoro'
+                  : `qwen-tts-${data.modelSize}`;
       const displayName =
         engine === 'luxtts'
           ? 'LuxTTS'
@@ -95,9 +97,11 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
                 ? data.modelSize === '3B'
                   ? 'TADA 3B Multilingual'
                   : 'TADA 1B'
-                : data.modelSize === '1.7B'
-                  ? 'Qwen TTS 1.7B'
-                  : 'Qwen TTS 0.6B';
+                : engine === 'kokoro'
+                  ? 'Kokoro 82M'
+                  : data.modelSize === '1.7B'
+                    ? 'Qwen TTS 1.7B'
+                    : 'Qwen TTS 0.6B';
 
       // Check if model needs downloading
       try {
diff --git a/app/src/stores/uiStore.ts b/app/src/stores/uiStore.ts
index f2db88a2..38a089e0 100644
--- a/app/src/stores/uiStore.ts
+++ b/app/src/stores/uiStore.ts
@@ -31,6 +31,10 @@ interface UIStore {
   selectedProfileId: string | null;
   setSelectedProfileId: (id: string | null) => void;
 
+  // Currently selected engine (synced from generation form)
+  selectedEngine: string;
+  setSelectedEngine: (engine: string) => void;
+
   // Selected voice in Voices tab inspector
   selectedVoiceId: string | null;
   setSelectedVoiceId: (id: string | null) => void;
@@ -59,6 +63,9 @@ export const useUIStore = create<UIStore>((set) => ({
   selectedProfileId: null,
   setSelectedProfileId: (id) => set({ selectedProfileId: id }),
 
+  selectedEngine: 'qwen',
+  setSelectedEngine: (engine) => set({ selectedEngine: engine }),
+
   selectedVoiceId: null,
   setSelectedVoiceId: (id) => set({ selectedVoiceId: id }),
 
diff --git a/backend/backends/__init__.py b/backend/backends/__init__.py
index a4f5113a..33ae57d8 100644
--- a/backend/backends/__init__.py
+++ b/backend/backends/__init__.py
@@ -167,6 +167,7 @@ def is_loaded(self) -> bool:
     "chatterbox": "Chatterbox TTS",
     "chatterbox_turbo": "Chatterbox Turbo",
     "tada": "TADA",
+    "kokoro": "Kokoro",
 }
 
 
@@ -278,6 +279,14 @@ def _get_non_qwen_tts_configs() -> list[ModelConfig]:
             size_mb=8000,
             languages=["en", "ar", "zh", "de", "es", "fr", "it", "ja", "pl", "pt"],
         ),
+        ModelConfig(
+            model_name="kokoro",
+            display_name="Kokoro 82M",
+            engine="kokoro",
+            hf_repo_id="hexgrad/Kokoro-82M",
+            size_mb=350,
+            languages=["en", "es", "fr", "hi", "it", "pt", "ja", "zh"],
+        ),
     ]
 
 
@@ -515,6 +524,10 @@ def get_tts_backend_for_engine(engine: str) -> TTSBackend:
             from .hume_backend import HumeTadaBackend
 
             backend = HumeTadaBackend()
+        elif engine == "kokoro":
+            from .kokoro_backend import KokoroTTSBackend
+
+            backend = KokoroTTSBackend()
         else:
             raise ValueError(f"Unknown TTS engine: {engine}. Supported: {list(TTS_ENGINES.keys())}")
 
diff --git a/backend/backends/kokoro_backend.py b/backend/backends/kokoro_backend.py
new file mode 100644
index 00000000..efe91dfc
--- /dev/null
+++ b/backend/backends/kokoro_backend.py
@@ -0,0 +1,288 @@
+"""
+Kokoro TTS backend implementation.
+
+Wraps the Kokoro-82M model for fast, lightweight text-to-speech.
+82M parameters, CPU realtime, 24kHz output, Apache 2.0 license.
+
+Kokoro uses pre-built voice style vectors (not traditional zero-shot cloning
+from arbitrary audio). Voice prompts are stored as deferred references to
+HF-hosted voice .pt files.
+
+Languages supported (via misaki G2P):
+  - American English (a), British English (b)
+  - Spanish (e), French (f), Hindi (h), Italian (i), Portuguese (p)
+  - Japanese (j) — requires misaki[ja]
+  - Chinese (z) — requires misaki[zh]
+"""
+
+import asyncio
+import logging
+import os
+from typing import Optional
+
+import numpy as np
+
+from . import TTSBackend
+from .base import (
+    get_torch_device,
+    combine_voice_prompts as _combine_voice_prompts,
+    model_load_progress,
+)
+
+logger = logging.getLogger(__name__)
+
+# HuggingFace repo for model + voice detection
+KOKORO_HF_REPO = "hexgrad/Kokoro-82M"
+KOKORO_SAMPLE_RATE = 24000
+
+# Default voice if none specified
+KOKORO_DEFAULT_VOICE = "af_heart"
+
+# All available Kokoro voices: (voice_id, display_name, gender, lang_code)
+KOKORO_VOICES = [
+    # American English female
+    ("af_alloy", "Alloy", "female", "en"),
+    ("af_aoede", "Aoede", "female", "en"),
+    ("af_bella", "Bella", "female", "en"),
+    ("af_heart", "Heart", "female", "en"),
+    ("af_jessica", "Jessica", "female", "en"),
+    ("af_kore", "Kore", "female", "en"),
+    ("af_nicole", "Nicole", "female", "en"),
+    ("af_nova", "Nova", "female", "en"),
+    ("af_river", "River", "female", "en"),
+    ("af_sarah", "Sarah", "female", "en"),
+    ("af_sky", "Sky", "female", "en"),
+    # American English male
+    ("am_adam", "Adam", "male", "en"),
+    ("am_echo", "Echo", "male", "en"),
+    ("am_eric", "Eric", "male", "en"),
+    ("am_fenrir", "Fenrir", "male", "en"),
+    ("am_liam", "Liam", "male", "en"),
+    ("am_michael", "Michael", "male", "en"),
+    ("am_onyx", "Onyx", "male", "en"),
+    ("am_puck", "Puck", "male", "en"),
+    ("am_santa", "Santa", "male", "en"),
+    # British English female
+    ("bf_alice", "Alice", "female", "en"),
+    ("bf_emma", "Emma", "female", "en"),
+    ("bf_isabella", "Isabella", "female", "en"),
+    ("bf_lily", "Lily", "female", "en"),
+    # British English male
+    ("bm_daniel", "Daniel", "male", "en"),
+    ("bm_fable", "Fable", "male", "en"),
+    ("bm_george", "George", "male", "en"),
+    ("bm_lewis", "Lewis", "male", "en"),
+    # Spanish
+    ("ef_dora", "Dora", "female", "es"),
+    ("em_alex", "Alex", "male", "es"),
+    ("em_santa", "Santa", "male", "es"),
+    # French
+    ("ff_siwis", "Siwis", "female", "fr"),
+    # Hindi
+    ("hf_alpha", "Alpha", "female", "hi"),
+    ("hf_beta", "Beta", "female", "hi"),
+    ("hm_omega", "Omega", "male", "hi"),
+    ("hm_psi", "Psi", "male", "hi"),
+    # Italian
+    ("if_sara", "Sara", "female", "it"),
+    ("im_nicola", "Nicola", "male", "it"),
+    # Japanese
+    ("jf_alpha", "Alpha", "female", "ja"),
+    ("jf_gongitsune", "Gongitsune", "female", "ja"),
+    ("jf_nezumi", "Nezumi", "female", "ja"),
+    ("jf_tebukuro", "Tebukuro", "female", "ja"),
+    ("jm_kumo", "Kumo", "male", "ja"),
+    # Portuguese
+    ("pf_dora", "Dora", "female", "pt"),
+    ("pm_alex", "Alex", "male", "pt"),
+    ("pm_santa", "Santa", "male", "pt"),
+    # Chinese
+    ("zf_xiaobei", "Xiaobei", "female", "zh"),
+    ("zf_xiaoni", "Xiaoni", "female", "zh"),
+    ("zf_xiaoxiao", "Xiaoxiao", "female", "zh"),
+    ("zf_xiaoyi", "Xiaoyi", "female", "zh"),
+]
+
+# Map our ISO language codes to Kokoro lang_code characters
+LANG_CODE_MAP = {
+    "en": "a",  # American English
+    "es": "e",
+    "fr": "f",
+    "hi": "h",
+    "it": "i",
+    "pt": "p",
+    "ja": "j",
+    "zh": "z",
+}
+
+
+class KokoroTTSBackend:
+    """Kokoro-82M TTS backend — tiny, fast, CPU-friendly."""
+
+    def __init__(self):
+        self._model = None
+        self._pipelines: dict = {}  # lang_code -> KPipeline
+        self._device: Optional[str] = None
+        self.model_size = "default"
+
+    def _get_device(self) -> str:
+        """Select device. Kokoro supports CUDA and CPU. MPS needs fallback env var."""
+        device = get_torch_device(allow_mps=False)
+        # Kokoro can use MPS but requires PYTORCH_ENABLE_MPS_FALLBACK=1
+        # For now, skip MPS to avoid user confusion — CPU is already realtime
+        return device
+
+    @property
+    def device(self) -> str:
+        if self._device is None:
+            self._device = self._get_device()
+        return self._device
+
+    def is_loaded(self) -> bool:
+        return self._model is not None
+
+    def _get_model_path(self, model_size: str) -> str:
+        return KOKORO_HF_REPO
+
+    def _is_model_cached(self, model_size: str = "default") -> bool:
+        """Check if Kokoro model files are cached locally."""
+        from .base import is_model_cached
+
+        return is_model_cached(
+            KOKORO_HF_REPO,
+            required_files=["config.json", "kokoro-v1_0.pth"],
+        )
+
+    async def load_model(self, model_size: str = "default") -> None:
+        """Load the Kokoro model."""
+        if self._model is not None:
+            return
+        await asyncio.to_thread(self._load_model_sync)
+
+    def _load_model_sync(self):
+        """Synchronous model loading."""
+        model_name = "kokoro"
+        is_cached = self._is_model_cached()
+
+        with model_load_progress(model_name, is_cached):
+            from kokoro import KModel
+
+            device = self.device
+            logger.info(f"Loading Kokoro-82M on {device}...")
+
+            self._model = KModel(repo_id=KOKORO_HF_REPO).to(device).eval()
+
+        logger.info("Kokoro-82M loaded successfully")
+
+    def _get_pipeline(self, lang_code: str):
+        """Get or create a KPipeline for the given language code."""
+        kokoro_lang = LANG_CODE_MAP.get(lang_code, "a")
+
+        if kokoro_lang not in self._pipelines:
+            from kokoro import KPipeline
+
+            # Create pipeline with our existing model (no redundant model loading)
+            self._pipelines[kokoro_lang] = KPipeline(
+                lang_code=kokoro_lang,
+                repo_id=KOKORO_HF_REPO,
+                model=self._model,
+            )
+
+        return self._pipelines[kokoro_lang]
+
+    def unload_model(self) -> None:
+        """Unload model to free memory."""
+        if self._model is not None:
+            del self._model
+            self._model = None
+            self._pipelines.clear()
+
+            import torch
+
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+
+            logger.info("Kokoro unloaded")
+
+    async def create_voice_prompt(
+        self,
+        audio_path: str,
+        reference_text: str,
+        use_cache: bool = True,
+    ) -> tuple[dict, bool]:
+        """
+        Create voice prompt for Kokoro.
+
+        Kokoro doesn't do traditional voice cloning from arbitrary audio.
+        When called for a cloned profile (fallback), uses the default voice.
+        For preset profiles, the voice_prompt dict is built by the profile
+        service and bypasses this method entirely.
+        """
+        return {
+            "voice_type": "preset",
+            "preset_engine": "kokoro",
+            "preset_voice_id": KOKORO_DEFAULT_VOICE,
+        }, False
+
+    async def combine_voice_prompts(
+        self,
+        audio_paths: list[str],
+        reference_texts: list[str],
+    ) -> tuple[np.ndarray, str]:
+        """Combine voice prompts — uses base implementation for audio concatenation."""
+        return await _combine_voice_prompts(
+            audio_paths, reference_texts, sample_rate=KOKORO_SAMPLE_RATE
+        )
+
+    async def generate(
+        self,
+        text: str,
+        voice_prompt: dict,
+        language: str = "en",
+        seed: Optional[int] = None,
+        instruct: Optional[str] = None,
+    ) -> tuple[np.ndarray, int]:
+        """
+        Generate audio from text using Kokoro.
+
+        Args:
+            text: Text to synthesize
+            voice_prompt: Dict with kokoro_voice key
+            language: Language code
+            seed: Random seed for reproducibility
+            instruct: Not supported by Kokoro (ignored)
+
+        Returns:
+            Tuple of (audio_array, sample_rate)
+        """
+        await self.load_model()
+
+        voice_name = voice_prompt.get("preset_voice_id") or voice_prompt.get("kokoro_voice") or KOKORO_DEFAULT_VOICE
+
+        def _generate_sync():
+            import torch
+
+            if seed is not None:
+                torch.manual_seed(seed)
+                if torch.cuda.is_available():
+                    torch.cuda.manual_seed(seed)
+
+            pipeline = self._get_pipeline(language)
+
+            # Generate all chunks and concatenate
+            audio_chunks = []
+            for result in pipeline(text, voice=voice_name, speed=1.0):
+                if result.audio is not None:
+                    chunk = result.audio
+                    if isinstance(chunk, torch.Tensor):
+                        chunk = chunk.detach().cpu().numpy()
+                    audio_chunks.append(chunk.squeeze())
+
+            if not audio_chunks:
+                # Return 1 second of silence as fallback
+                return np.zeros(KOKORO_SAMPLE_RATE, dtype=np.float32), KOKORO_SAMPLE_RATE
+
+            audio = np.concatenate(audio_chunks)
+            return audio.astype(np.float32), KOKORO_SAMPLE_RATE
+
+        return await asyncio.to_thread(_generate_sync)
diff --git a/backend/build_binary.py b/backend/build_binary.py
index 7655f331..ca7b21ac 100644
--- a/backend/build_binary.py
+++ b/backend/build_binary.py
@@ -228,6 +228,44 @@ def build_server(cuda=False):
             "torchaudio",
             "--collect-submodules",
             "tada",
+            # Kokoro 82M — lightweight TTS engine using misaki G2P
+            "--hidden-import",
+            "backend.backends.kokoro_backend",
+            "--hidden-import",
+            "kokoro",
+            "--hidden-import",
+            "kokoro.pipeline",
+            "--hidden-import",
+            "kokoro.model",
+            "--hidden-import",
+            "kokoro.istftnet",
+            "--hidden-import",
+            "kokoro.modules",
+            "--hidden-import",
+            "kokoro.custom_stft",
+            # misaki ships G2P data files (dictionaries, phoneme tables)
+            # that must be bundled for espeak/en/ja/zh G2P to work
+            "--collect-all",
+            "misaki",
+            # language_tags ships JSON data files (index.json etc.) loaded at
+            # runtime via: misaki → phonemizer → segments → csvw → language_tags
+            "--collect-all",
+            "language_tags",
+            # espeakng_loader ships the entire espeak-ng-data directory (369 files)
+            # loaded at import time by misaki.espeak via get_data_path()
+            "--collect-all",
+            "espeakng_loader",
+            # spacy en_core_web_sm model — misaki.en tries to spacy.cli.download()
+            # at runtime if not found, which calls pip as a subprocess and crashes
+            # the frozen binary. Bundle the model so spacy.util.is_package() passes.
+            "--collect-all",
+            "en_core_web_sm",
+            "--copy-metadata",
+            "en_core_web_sm",
+            "--hidden-import",
+            "en_core_web_sm",
+            "--hidden-import",
+            "loguru",
         ]
     )
 
diff --git a/backend/config.py b/backend/config.py
index 0eb3cbf7..ecfc3920 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -19,7 +19,7 @@
     logger.info("Model download path set to: %s", _custom_models_dir)
 
 # Default data directory (used in development)
-_data_dir = Path("data")
+_data_dir = Path("data").resolve()
 
 
 def set_data_dir(path: str | Path):
@@ -30,9 +30,9 @@ def set_data_dir(path: str | Path):
         path: Path to the data directory
     """
     global _data_dir
-    _data_dir = Path(path)
+    _data_dir = Path(path).resolve()
     _data_dir.mkdir(parents=True, exist_ok=True)
-    logger.info("Data directory set to: %s", _data_dir.absolute())
+    logger.info("Data directory set to: %s", _data_dir)
 
 
 def get_data_dir() -> Path:
diff --git a/backend/database/migrations.py b/backend/database/migrations.py
index 52757a68..cf805c8d 100644
--- a/backend/database/migrations.py
+++ b/backend/database/migrations.py
@@ -34,6 +34,7 @@ def run_migrations(engine) -> None:
     _migrate_generations(engine, inspector, tables)
     _migrate_effect_presets(engine, inspector, tables)
     _migrate_generation_versions(engine, inspector, tables)
+    _resolve_relative_paths(engine, tables)
 
 
 # -- helpers ---------------------------------------------------------------
@@ -134,6 +135,17 @@ def _migrate_profiles(engine, inspector, tables: set[str]) -> None:
         _add_column(engine, "profiles", "avatar_path VARCHAR", "avatar_path")
     if "effects_chain" not in columns:
         _add_column(engine, "profiles", "effects_chain TEXT", "effects_chain")
+    # Voice type system — v0.3.x
+    if "voice_type" not in columns:
+        _add_column(engine, "profiles", "voice_type VARCHAR DEFAULT 'cloned'", "voice_type")
+    if "preset_engine" not in columns:
+        _add_column(engine, "profiles", "preset_engine VARCHAR", "preset_engine")
+    if "preset_voice_id" not in columns:
+        _add_column(engine, "profiles", "preset_voice_id VARCHAR", "preset_voice_id")
+    if "design_prompt" not in columns:
+        _add_column(engine, "profiles", "design_prompt TEXT", "design_prompt")
+    if "default_engine" not in columns:
+        _add_column(engine, "profiles", "default_engine VARCHAR", "default_engine")
 
 
 def _migrate_generations(engine, inspector, tables: set[str]) -> None:
@@ -168,3 +180,47 @@ def _migrate_generation_versions(engine, inspector, tables: set[str]) -> None:
     columns = _get_columns(inspector, "generation_versions")
     if "source_version_id" not in columns:
         _add_column(engine, "generation_versions", "source_version_id VARCHAR", "source_version_id")
+
+
+def _resolve_relative_paths(engine, tables: set[str]) -> None:
+    """Resolve any relative file paths in the database to absolute paths.
+
+    Earlier versions stored paths relative to CWD (e.g. "data/generations/abc.wav").
+    These break when the production binary's CWD differs from the data directory.
+    This migration converts them to absolute paths using the configured data dir.
+    Idempotent: absolute paths are left untouched.
+    """
+    from pathlib import Path
+
+    path_columns = [
+        ("generations", "audio_path"),
+        ("generation_versions", "audio_path"),
+        ("profile_samples", "audio_path"),
+        ("profiles", "avatar_path"),
+    ]
+
+    total_fixed = 0
+    with engine.connect() as conn:
+        for table, column in path_columns:
+            if table not in tables:
+                continue
+            rows = conn.execute(
+                text(f"SELECT id, {column} FROM {table} WHERE {column} IS NOT NULL")
+            ).fetchall()
+            for row_id, path_val in rows:
+                if not path_val:
+                    continue
+                p = Path(path_val)
+                if p.is_absolute():
+                    continue
+                # Resolve relative to CWD (which is where they were created)
+                resolved = p.resolve()
+                if resolved.exists():
+                    conn.execute(
+                        text(f"UPDATE {table} SET {column} = :path WHERE id = :id"),
+                        {"path": str(resolved), "id": row_id},
+                    )
+                    total_fixed += 1
+        if total_fixed > 0:
+            conn.commit()
+            logger.info("Resolved %d relative file paths to absolute", total_fixed)
diff --git a/backend/database/models.py b/backend/database/models.py
index 19cefff2..ca03d47e 100644
--- a/backend/database/models.py
+++ b/backend/database/models.py
@@ -10,7 +10,13 @@
 
 
 class VoiceProfile(Base):
-    """Voice profile."""
+    """Voice profile.
+
+    voice_type discriminates three flavours:
+      - "cloned"   — traditional reference-audio profiles (all cloning engines)
+      - "preset"   — engine-specific pre-built voice (e.g. Kokoro voices)
+      - "designed"  — text-described voice (e.g. Qwen CustomVoice, future)
+    """
 
     __tablename__ = "profiles"
 
@@ -20,6 +26,14 @@ class VoiceProfile(Base):
     language = Column(String, default="en")
     avatar_path = Column(String, nullable=True)
     effects_chain = Column(Text, nullable=True)
+
+    # Voice type system — added v0.3.x
+    voice_type = Column(String, default="cloned")  # "cloned" | "preset" | "designed"
+    preset_engine = Column(String, nullable=True)   # e.g. "kokoro" — only for preset
+    preset_voice_id = Column(String, nullable=True)  # e.g. "am_adam" — only for preset
+    design_prompt = Column(Text, nullable=True)      # text description — only for designed
+    default_engine = Column(String, nullable=True)   # auto-selected engine, locked for preset
+
     created_at = Column(DateTime, default=datetime.utcnow)
     updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
 
diff --git a/backend/models.py b/backend/models.py
index 4dd2b368..f568e0fa 100644
--- a/backend/models.py
+++ b/backend/models.py
@@ -15,6 +15,11 @@ class VoiceProfileCreate(BaseModel):
     language: str = Field(
         default="en", pattern="^(zh|en|ja|ko|de|fr|ru|pt|es|it|he|ar|da|el|fi|hi|ms|nl|no|pl|sv|sw|tr)$"
     )
+    voice_type: Optional[str] = Field(default="cloned", pattern="^(cloned|preset|designed)$")
+    preset_engine: Optional[str] = Field(None, max_length=50)
+    preset_voice_id: Optional[str] = Field(None, max_length=100)
+    design_prompt: Optional[str] = Field(None, max_length=2000)
+    default_engine: Optional[str] = Field(None, max_length=50)
 
 
 class VoiceProfileResponse(BaseModel):
@@ -26,6 +31,11 @@ class VoiceProfileResponse(BaseModel):
     language: str
     avatar_path: Optional[str] = None
     effects_chain: Optional[List["EffectConfig"]] = None
+    voice_type: str = "cloned"
+    preset_engine: Optional[str] = None
+    preset_voice_id: Optional[str] = None
+    design_prompt: Optional[str] = None
+    default_engine: Optional[str] = None
     generation_count: int = 0
     sample_count: int = 0
     created_at: datetime
@@ -68,7 +78,7 @@ class GenerationRequest(BaseModel):
     seed: Optional[int] = Field(None, ge=0)
     model_size: Optional[str] = Field(default="1.7B", pattern="^(1\\.7B|0\\.6B|1B|3B)$")
     instruct: Optional[str] = Field(None, max_length=500)
-    engine: Optional[str] = Field(default="qwen", pattern="^(qwen|luxtts|chatterbox|chatterbox_turbo|tada)$")
+    engine: Optional[str] = Field(default="qwen", pattern="^(qwen|luxtts|chatterbox|chatterbox_turbo|tada|kokoro)$")
     max_chunk_chars: int = Field(
         default=800, ge=100, le=5000, description="Max characters per chunk for long text splitting"
     )
diff --git a/backend/requirements.txt b/backend/requirements.txt
index ea4a2dc0..c9c65b0f 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -40,6 +40,13 @@ pyloudnorm
 # provides the only class TADA uses: Snake1d.)
 torchaudio
 
+# Kokoro TTS (lightweight 82M-param engine)
+kokoro>=0.9.4
+misaki[en]>=0.9.4
+# spacy model for misaki English G2P — must be pre-installed or misaki
+# tries spacy.cli.download() at runtime which crashes frozen builds
+en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
+
 # Audio processing
 librosa>=0.10.0
 soundfile>=0.12.0
diff --git a/backend/routes/profiles.py b/backend/routes/profiles.py
index 5b2257e0..92552e79 100644
--- a/backend/routes/profiles.py
+++ b/backend/routes/profiles.py
@@ -1,9 +1,13 @@
 """Voice profile endpoints."""
 
 import io
+import json as _json
+import logging
 import tempfile
+import uuid
 from datetime import datetime
 from pathlib import Path
+from typing import Optional
 
 from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
 from fastapi.responses import FileResponse, StreamingResponse
@@ -15,6 +19,8 @@
 from ..services import channels, export_import, profiles
 from ..services.profiles import _profile_to_response
 
+logger = logging.getLogger(__name__)
+
 router = APIRouter()
 
 
@@ -62,6 +68,97 @@ async def import_profile(
         raise HTTPException(status_code=500, detail=str(e))
 
 
+# ── Preset Voice Endpoints ───────────────────────────────────────────
+# These MUST be declared before /profiles/{profile_id} to avoid the
+# wildcard swallowing "presets" as a profile_id.
+
+
+@router.get("/profiles/presets/{engine}")
+async def list_preset_voices(engine: str):
+    """List available preset voices for an engine."""
+    if engine == "kokoro":
+        from ..backends.kokoro_backend import KOKORO_VOICES
+
+        return {
+            "engine": engine,
+            "voices": [
+                {
+                    "voice_id": vid,
+                    "name": name,
+                    "gender": gender,
+                    "language": lang,
+                }
+                for vid, name, gender, lang in KOKORO_VOICES
+            ],
+        }
+    return {"engine": engine, "voices": []}
+
+
+@router.post("/profiles/presets/{engine}/seed")
+async def seed_preset_profiles_route(
+    engine: str,
+    db: Session = Depends(get_db),
+):
+    """Seed preset voice profiles for an engine.
+
+    Creates profiles for all available preset voices that don't already exist.
+    Returns the count of newly created profiles.
+    """
+    if engine != "kokoro":
+        raise HTTPException(status_code=400, detail=f"No presets available for engine: {engine}")
+
+    try:
+        from ..backends.kokoro_backend import KOKORO_VOICES
+
+        created = 0
+        for voice_id, display_name, gender, lang in KOKORO_VOICES:
+            profile_name = display_name
+
+            # Disambiguate duplicate display names across languages
+            # (e.g. "Alpha" exists in Hindi and Japanese, "Dora" in Spanish and Portuguese)
+            dupes = [v for v in KOKORO_VOICES if v[1] == display_name]
+            if len(dupes) > 1:
+                lang_labels = {"en": "English", "es": "Spanish", "fr": "French", "hi": "Hindi",
+                               "it": "Italian", "pt": "Portuguese", "ja": "Japanese", "zh": "Chinese"}
+                profile_name = f"{display_name} {lang_labels.get(lang, lang)}"
+
+            # Skip if preset already exists
+            existing = (
+                db.query(DBVoiceProfile)
+                .filter_by(preset_engine="kokoro", preset_voice_id=voice_id)
+                .first()
+            )
+            if existing:
+                continue
+
+            # Skip name collisions
+            if db.query(DBVoiceProfile).filter_by(name=profile_name).first():
+                continue
+
+            profile = DBVoiceProfile(
+                id=str(uuid.uuid4()),
+                name=profile_name,
+                description=f"Kokoro preset voice — {display_name} ({gender})",
+                language=lang,
+                voice_type="preset",
+                preset_engine="kokoro",
+                preset_voice_id=voice_id,
+                created_at=datetime.utcnow(),
+                updated_at=datetime.utcnow(),
+            )
+            db.add(profile)
+            created += 1
+
+        if created > 0:
+            db.commit()
+            logger.info(f"Seeded {created} Kokoro preset profiles")
+
+        return {"engine": engine, "created": created, "total_available": len(KOKORO_VOICES)}
+    except Exception as e:
+        logger.exception(f"Failed to seed Kokoro profiles: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
 @router.get("/profiles/{profile_id}", response_model=models.VoiceProfileResponse)
 async def get_profile(
     profile_id: str,
diff --git a/backend/services/profiles.py b/backend/services/profiles.py
index d20c142d..32fa3bb9 100644
--- a/backend/services/profiles.py
+++ b/backend/services/profiles.py
@@ -2,6 +2,7 @@
 Voice profile management module.
 """
 
+import logging
 from typing import List, Optional
 from datetime import datetime
 import uuid
@@ -10,6 +11,8 @@
 from sqlalchemy.orm import Session
 from sqlalchemy import func, select
 
+logger = logging.getLogger(__name__)
+
 from ..models import (
     VoiceProfileCreate,
     VoiceProfileResponse,
@@ -52,6 +55,11 @@ def _profile_to_response(
         language=profile.language,
         avatar_path=profile.avatar_path,
         effects_chain=effects_chain,
+        voice_type=getattr(profile, "voice_type", None) or "cloned",
+        preset_engine=getattr(profile, "preset_engine", None),
+        preset_voice_id=getattr(profile, "preset_voice_id", None),
+        design_prompt=getattr(profile, "design_prompt", None),
+        default_engine=getattr(profile, "default_engine", None),
         generation_count=generation_count,
         sample_count=sample_count,
         created_at=profile.created_at,
@@ -80,11 +88,22 @@ async def create_profile(
     if existing_profile:
         raise ValueError(f"A profile with the name '{data.name}' already exists. Please choose a different name.")
 
+    # Auto-set default_engine for preset profiles
+    default_engine = data.default_engine
+    voice_type = data.voice_type or "cloned"
+    if voice_type == "preset" and data.preset_engine and not default_engine:
+        default_engine = data.preset_engine
+
     db_profile = DBVoiceProfile(
         id=str(uuid.uuid4()),
         name=data.name,
         description=data.description,
         language=data.language,
+        voice_type=voice_type,
+        preset_engine=data.preset_engine,
+        preset_voice_id=data.preset_voice_id,
+        design_prompt=data.design_prompt,
+        default_engine=default_engine,
         created_at=datetime.utcnow(),
         updated_at=datetime.utcnow(),
     )
@@ -265,6 +284,8 @@ async def update_profile(
     profile.name = data.name
     profile.description = data.description
     profile.language = data.language
+    if data.default_engine is not None:
+        profile.default_engine = data.default_engine or None  # empty string → NULL
     profile.updated_at = datetime.utcnow()
 
     db.commit()
@@ -382,19 +403,45 @@ async def create_voice_prompt_for_profile(
     engine: str = "qwen",
 ) -> dict:
     """
-    Create a combined voice prompt from all samples in a profile.
+    Create a voice prompt from a profile.
+
+    For cloned profiles: combines all audio samples into a voice prompt.
+    For preset profiles: returns the engine-specific preset voice reference.
+    For designed profiles: returns the text design prompt (future).
 
     Args:
         profile_id: Profile ID
         db: Database session
         use_cache: Whether to use cached prompts
-        engine: TTS engine to create prompt for ("qwen" or "luxtts")
+        engine: TTS engine to create prompt for
 
     Returns:
         Voice prompt dictionary
     """
     from ..backends import get_tts_backend_for_engine
 
+    profile = db.query(DBVoiceProfile).filter_by(id=profile_id).first()
+    if not profile:
+        raise ValueError(f"Profile not found: {profile_id}")
+
+    voice_type = getattr(profile, "voice_type", None) or "cloned"
+
+    # ── Preset profiles: return engine-specific voice reference ──
+    if voice_type == "preset":
+        return {
+            "voice_type": "preset",
+            "preset_engine": profile.preset_engine,
+            "preset_voice_id": profile.preset_voice_id,
+        }
+
+    # ── Designed profiles: return text description (future) ──
+    if voice_type == "designed":
+        return {
+            "voice_type": "designed",
+            "design_prompt": profile.design_prompt,
+        }
+
+    # ── Cloned profiles: create from audio samples ──
     samples = db.query(DBProfileSample).filter_by(profile_id=profile_id).all()
 
     if not samples:
@@ -524,3 +571,6 @@ async def delete_avatar(
     db.commit()
 
     return True
+
+
+
diff --git a/backend/voicebox-server.spec b/backend/voicebox-server.spec
index b5756c66..1c208ba3 100644
--- a/backend/voicebox-server.spec
+++ b/backend/voicebox-server.spec
@@ -1,13 +1,11 @@
 # -*- mode: python ; coding: utf-8 -*-
-from PyInstaller.utils.hooks import collect_data_files
 from PyInstaller.utils.hooks import collect_submodules
 from PyInstaller.utils.hooks import collect_all
 from PyInstaller.utils.hooks import copy_metadata
 
 datas = []
 binaries = []
-hiddenimports = ['backend', 'backend.main', 'backend.config', 'backend.database', 'backend.models', 'backend.services.profiles', 'backend.services.history', 'backend.services.tts', 'backend.services.transcribe', 'backend.utils.platform_detect', 'backend.backends', 'backend.backends.pytorch_backend', 'backend.utils.audio', 'backend.utils.cache', 'backend.utils.progress', 'backend.utils.hf_progress', 'backend.services.cuda', 'backend.services.effects', 'backend.utils.effects', 'backend.services.versions', 'pedalboard', 'chatterbox', 'chatterbox.tts_turbo', 'chatterbox.mtl_tts', 'backend.backends.chatterbox_backend', 'backend.backends.chatterbox_turbo_backend', 'backend.backends.luxtts_backend', 'zipvoice', 'zipvoice.luxvoice', 'torch', 'transformers', 'fastapi', 'uvicorn', 'sqlalchemy', 'soundfile', 'qwen_tts', 'qwen_tts.inference', 'qwen_tts.inference.qwen3_tts_model', 'qwen_tts.inference.qwen3_tts_tokenizer', 'qwen_tts.core', 'qwen_tts.cli', 'requests', 'pkg_resources.extern', 'backend.backends.mlx_backend', 'mlx', 'mlx.core', 'mlx.nn', 'mlx_audio', 'mlx_audio.tts', 'mlx_audio.stt']
-datas += collect_data_files('qwen_tts')
+hiddenimports = ['backend', 'backend.main', 'backend.config', 'backend.database', 'backend.models', 'backend.services.profiles', 'backend.services.history', 'backend.services.tts', 'backend.services.transcribe', 'backend.utils.platform_detect', 'backend.backends', 'backend.backends.pytorch_backend', 'backend.utils.audio', 'backend.utils.cache', 'backend.utils.progress', 'backend.utils.hf_progress', 'backend.services.cuda', 'backend.services.effects', 'backend.utils.effects', 'backend.services.versions', 'pedalboard', 'chatterbox', 'chatterbox.tts_turbo', 'chatterbox.mtl_tts', 'backend.backends.chatterbox_backend', 'backend.backends.chatterbox_turbo_backend', 'backend.backends.luxtts_backend', 'zipvoice', 'zipvoice.luxvoice', 'torch', 'transformers', 'fastapi', 'uvicorn', 'sqlalchemy', 'soundfile', 'qwen_tts', 'qwen_tts.inference', 'qwen_tts.inference.qwen3_tts_model', 'qwen_tts.inference.qwen3_tts_tokenizer', 'qwen_tts.core', 'qwen_tts.cli', 'requests', 'pkg_resources.extern', 'backend.backends.hume_backend', 'tada', 'tada.modules', 'tada.modules.tada', 'tada.modules.encoder', 'tada.modules.decoder', 'tada.modules.aligner', 'tada.modules.acoustic_spkr_verf', 'tada.nn', 'tada.nn.vibevoice', 'tada.utils', 'tada.utils.gray_code', 'tada.utils.text', 'backend.utils.dac_shim', 'torchaudio', 'backend.backends.kokoro_backend', 'kokoro', 'kokoro.pipeline', 'kokoro.model', 'kokoro.istftnet', 'kokoro.modules', 'kokoro.custom_stft', 'en_core_web_sm', 'loguru', 'backend.backends.mlx_backend', 'mlx', 'mlx.core', 'mlx.nn', 'mlx_audio', 'mlx_audio.tts', 'mlx_audio.stt']
 datas += copy_metadata('qwen-tts')
 datas += copy_metadata('requests')
 datas += copy_metadata('transformers')
@@ -15,8 +13,9 @@ datas += copy_metadata('huggingface-hub')
 datas += copy_metadata('tokenizers')
 datas += copy_metadata('safetensors')
 datas += copy_metadata('tqdm')
-hiddenimports += collect_submodules('qwen_tts')
+datas += copy_metadata('en_core_web_sm')
 hiddenimports += collect_submodules('jaraco')
+hiddenimports += collect_submodules('tada')
 hiddenimports += collect_submodules('mlx')
 hiddenimports += collect_submodules('mlx_audio')
 tmp_ret = collect_all('zipvoice')
@@ -27,12 +26,22 @@ tmp_ret = collect_all('lazy_loader')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('librosa')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
+tmp_ret = collect_all('qwen_tts')
+datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('inflect')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('perth')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('piper_phonemize')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
+tmp_ret = collect_all('misaki')
+datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
+tmp_ret = collect_all('language_tags')
+datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
+tmp_ret = collect_all('espeakng_loader')
+datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
+tmp_ret = collect_all('en_core_web_sm')
+datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('mlx')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('mlx_audio')
diff --git a/docs/content/docs/developer/tts-engines.mdx b/docs/content/docs/developer/tts-engines.mdx
index dc749b02..21f07880 100644
--- a/docs/content/docs/developer/tts-engines.mdx
+++ b/docs/content/docs/developer/tts-engines.mdx
@@ -285,6 +285,34 @@ In `app/src/lib/hooks/useGenerationForm.ts`:
 
 In `app/src/components/ServerSettings/ModelManagement.tsx`:
 - Add description to `MODEL_DESCRIPTIONS` record
+- Add model name to `voiceModels` filter condition
+
+### 3.6 Non-Cloning Engines (Preset Voices)
+
+If your engine uses **pre-built voices** instead of zero-shot cloning from reference audio (e.g. Kokoro), additional integration is needed:
+
+**Backend:**
+- In `kokoro_backend.py` (or your engine), define a `VOICES` list of `(voice_id, display_name, gender, language)` tuples
+- `create_voice_prompt()` should return `{"voice_type": "preset", "preset_engine": "<engine>", "preset_voice_id": "<id>"}`
+- `generate()` should read `voice_prompt.get("preset_voice_id")` to select the voice
+- Add a `seed_preset_profiles("<engine>")` call in `backend/routes/models.py` after model download completes
+- The `seed_preset_profiles()` function in `backend/services/profiles.py` creates DB profiles with `voice_type="preset"`
+
+**Frontend:**
+- The `EngineModelSelector` filters options based on `selectedProfile.voice_type`:
+  - `"cloned"` profiles → only cloning engines shown (Kokoro hidden)
+  - `"preset"` profiles → only the preset's engine shown
+- Profile cards show the engine name as a badge for preset profiles
+- When a preset profile is selected, the engine auto-switches
+
+**Profile schema fields for presets:**
+- `voice_type: "preset"` (vs `"cloned"` for traditional profiles)
+- `preset_engine: "<engine>"` — which engine owns this voice
+- `preset_voice_id: "<id>"` — the engine-specific voice identifier
+
+**For future "designed" voices** (text description instead of audio, e.g. Qwen CustomVoice):
+- Use `voice_type: "designed"` with `design_prompt` field
+- `create_voice_prompt_for_profile()` already returns the design prompt for this type
 
 ## Phase 4: Dependencies
 
diff --git a/docs/notes/PROJECT_STATUS.md b/docs/notes/PROJECT_STATUS.md
index 71c06453..dee437f5 100644
--- a/docs/notes/PROJECT_STATUS.md
+++ b/docs/notes/PROJECT_STATUS.md
@@ -1,6 +1,6 @@
 # Voicebox Project Status & Roadmap
 
-> Last updated: 2026-03-13 | Current version: **v0.1.13** | 13.1k stars | ~176 open issues | 25 open PRs
+> Last updated: 2026-03-18 | Current version: **v0.3.0** | 13.4k stars | ~136 open issues | 9 open PRs
 
 ---
 
@@ -100,7 +100,7 @@ POST /generate
 
 ## Current State
 
-### What's Shipped (v0.1.13 + recent merges)
+### What's Shipped (v0.3.0)
 
 **Core TTS:**
 - Qwen3-TTS voice cloning (1.7B and 0.6B models)
@@ -108,30 +108,42 @@ POST /generate
 - Multi-engine TTS architecture with thread-safe backend registry (PR #254)
 - LuxTTS integration — fast, CPU-friendly English TTS (PR #254)
 - Chatterbox Multilingual TTS — 23 languages including Hebrew (PR #257)
-- Instruct parameter UI exists but is non-functional across all backends (see #224, Known Limitations)
+- Chatterbox Turbo — paralinguistic tags, low latency English (PR #258)
 - HumeAI TADA integration — 1B English + 3B Multilingual speech-language model (PR #296)
-- Single flat model dropdown (Qwen 1.7B, Qwen 0.6B, LuxTTS, Chatterbox, Chatterbox Turbo, TADA 1B, TADA 3B)
-- Centralized model config registry (`ModelConfig` dataclass) — no per-engine dispatch maps in `main.py`
+- Chunked TTS generation for long text — engine-agnostic, removes ~500 char limit (PR #266)
+- Async generation queue (PR #269)
+- Post-processing audio effects system (PR #271)
+- Centralized model config registry (`ModelConfig` dataclass) — no per-engine dispatch maps
 - Shared `EngineModelSelector` component — engine/model dropdown defined once, used in both generation forms
 
 **Infrastructure:**
-- CUDA backend swap via binary download and restart (PR #252)
-- GPU acceleration settings UI
+- CUDA backend swap via binary download and restart (PR #252), upgraded to cu128 (PR #316)
+- CUDA backend split into independently versioned server + libs archives (PR #298)
+- Docker + web deployment (PR #161)
+- Backend refactor: modular architecture, style guide, tooling (PR #285)
+- Settings overhaul: routed sub-tabs, server logs, changelog, about page (PR #294)
+- Windows support: CUDA detection, cross-platform justfile, clean server shutdown (PR #272)
 - Voice profiles with multi-sample support
 - Stories editor (multi-track DAW timeline)
 - Whisper transcription (base, small, medium, large variants)
-- Model management UI with inline download progress bars (HFProgressTracker)
+- Model management UI with inline download progress bars + folder migration (PR #268)
 - Download cancel/clear UI with error panel (PR #238)
 - Generation history with caching
 - Streaming generation endpoint (MLX only)
-- Duplicate profile name validation (PR #175)
-- Linux NVIDIA GBM buffer + WebKitGTK microphone fix (PR #210)
+- Audio player freeze fix + UX improvements (PR #293)
+- CORS restriction to known local origins (PR #88)
+
+### Abandoned Integrations
+
+| Model | PR | Reason |
+|-------|----|--------|
+| **CosyVoice2/3** | PR #311 | Output quality too poor. Heavy deps, no PyPI, needed 5+ shims. |
 
 ### What's In-Flight
 
 | Feature | Branch/PR | Status |
 |---------|-----------|--------|
-| Chatterbox Turbo + per-engine language lists | `feat/chatterbox-turbo` / PR #258 | Open, ready for review |
+| Kokoro 82M TTS engine | WIP | In development — 82M CPU-realtime engine, 8 languages |
 
 ### TTS Engine Comparison
 
@@ -144,6 +156,7 @@ POST /generate
 | Chatterbox Turbo | `chatterbox-turbo` | English | ~1.5 GB | Paralinguistic tags ([laugh], [cough]), 350M params, low latency | Partial — inline tags only, no separate instruct param |
 | TADA 1B | `tada-1b` | English | ~4 GB | HumeAI speech-language model, 700s+ coherent audio | None |
 | TADA 3B Multilingual | `tada-3b-ml` | 10 (en, ar, zh, de, es, fr, it, ja, pl, pt) | ~8 GB | Multilingual, text-acoustic dual alignment | None |
+| Kokoro 82M | `kokoro` | 8 (en, es, fr, hi, it, pt, ja, zh) | ~350 MB | 82M params, CPU realtime, Apache 2.0, pre-built voices | None |
 
 ### Multi-Engine Architecture (Shipped)
 
@@ -173,69 +186,41 @@ The singleton TTS backend blocker described in the previous version of this doc
 
 | PR | Title | Merged |
 |----|-------|--------|
-| **#257** | feat: Chatterbox TTS engine with multilingual voice cloning | 2026-03-13 |
-| **#254** | feat: LuxTTS integration — multi-engine TTS support | 2026-03-13 |
-| **#252** | feat: CUDA backend swap via binary download and restart | 2026-03-13 |
-| **#238** | Download cancel/clear UI, fixed model downloading | 2026-03-13 |
-| **#250** | docs: align local API port examples | 2026-03-13 |
-| **#210** | fix: Linux NVIDIA GBM buffer crash | 2026-03-13 |
-| **#175** | Fix #134: duplicate profile name validation | 2026-03-13 |
-
-### In-Flight (Our Work)
+| **#316** | Upgrade CUDA backend from cu126 to cu128, fix GPU settings UI | 2026-03-18 |
+| **#305** | fix: bundle qwen_tts source files in PyInstaller build | 2026-03-17 |
+| **#298** | feat: split CUDA backend into independently versioned server + libs archives | 2026-03-17 |
+| **#296** | Add HumeAI TADA TTS engine (1B English + 3B Multilingual) | 2026-03-17 |
+| **#295** | fix: batch of bug fixes from issue tracker | 2026-03-17 |
+| **#293** | Fix audio player freezing and improve UX | 2026-03-17 |
+| **#294** | Settings overhaul: routed sub-tabs, server logs, changelog, about page | 2026-03-16 |
+| **#288** | Better docs | 2026-03-16 |
+| **#285** | Backend refactor: modular architecture, style guide, tooling | 2026-03-16 |
+| **#274** | Landing page v0.2.0 redesign | 2026-03-15 |
+| **#272** | Windows support: CUDA detection, cross-platform justfile, clean server shutdown | 2026-03-15 |
+| **#271** | Add post-processing audio effects system | 2026-03-14 |
+| **#269** | feat: async generation queue | 2026-03-13 |
+| **#268** | feat: model management improvements and folder migration | 2026-03-13 |
+| **#266** | feat: chunked TTS generation for long text (engine-agnostic) | 2026-03-13 |
+| **#265** | feat: paralinguistic tag autocomplete for Chatterbox Turbo | 2026-03-13 |
+| **#264** | fix: Chatterbox float64 dtype mismatch + model unload button | 2026-03-13 |
+| **#258** | feat: Chatterbox Turbo engine + per-engine language lists | 2026-03-13 |
+| **#230** | docs: fix README grammar | 2026-03-13 |
+| **#161** | feat: Docker + web deployment | 2026-03-13 |
+| **#88** | security: restrict CORS to known local origins | 2026-03-13 |
+
+### Currently Open (9 PRs)
 
 | PR | Title | Status | Notes |
 |----|-------|--------|-------|
-| **#258** | feat: Chatterbox Turbo engine + per-engine language lists | Open | Ready for review. Adds Turbo engine + dynamic language dropdown. |
-
-### Merge-Ready / Near-Ready (Bug Fixes & Small Features)
-
-| PR | Title | Risk | Notes |
-|----|-------|------|-------|
-| **#230** | docs: fix README grammar | None | Docs-only |
-| **#243** | a11y: screen reader and keyboard improvements | Low | Accessibility, no backend changes |
-| **#178** | Fix #168 #140: generation error handling | Low | Error handling improvements |
-| **#152** | Fix: prevent crashes when HuggingFace unreachable | Medium | Monkey-patches HF hub; solves real offline bug (#150, #151) |
-| **#218** | fix: unify qwen tts cache dir on Windows | Low | Windows-specific path fix |
-| **#214** | fix: panic on launch from tokio::spawn | Low | Rust-side Tauri fix |
-| **#88** | security: restrict CORS to known local origins | Low | Security hardening |
-| **#133** | feat: network access toggle | Low | Wires up existing plumbing |
-
-### Significant Feature PRs
-
-| PR | Title | Complexity | Notes |
-|----|-------|-----------|-------|
-| **#253** | Enhance speech tokenizer with 48kHz version | Medium | Qwen tokenizer upgrade |
-| **#97** | fix: pass language parameter to TTS models | Medium | May be partially obsoleted by multi-engine work — needs review |
-| **#99** | feat: chunked TTS with quality selector | Medium | Solves 500-char limit. Addresses #191, #203, #69, #111. |
-| **#154** | feat: Audiobook tab | Medium | Full audiobook workflow. Depends on #99 concepts. |
-| **#91** | fix: CoreAudio device enumeration | Medium | macOS audio device handling |
-
-### Architectural PRs (Need Careful Review)
-
-| PR | Title | Complexity | Notes |
-|----|-------|-----------|-------|
-| **#225** | feat: custom HuggingFace model support | High | Arbitrary HF repo loading. May need rework given multi-engine arch is now shipped. |
-| **#194** | feat: Hebrew + Chatterbox TTS | High | **Superseded** by PR #257 which shipped Chatterbox multilingual (23 langs incl. Hebrew). May be closeable. |
-| **#195** | feat: per-profile LoRA fine-tuning | Very High | Training pipeline, adapter management, 15 new endpoints. Depends on #194 (now superseded). |
-| **#161** | feat: Docker + web deployment | High | 3-stage Dockerfile, SPA serving. Independent of TTS engine work. |
-| **#124** / **#123** | Docker (simpler attempts) | Low-Medium | Overlap with #161 |
-| **#227** | fix: harden input validation & file safety | Medium | Coupled to #225 (custom models) |
-
-### PRs That Need Author Action / Are Stale
-
-| PR | Title | Notes |
-|----|-------|-------|
-| **#237** | fix: bundle qwen_tts source files in PyInstaller | Build system, needs review |
-| **#215** | Update prerequisites with Tauri deps | Branch is `main` — will have conflicts |
-| **#89** | Linux Support | Branch is `main` — will have conflicts. Broad scope. |
-| **#83** | Update download links for v0.1.12 | Outdated (we're on v0.1.13) |
-
-### PRs Likely Superseded
-
-| PR | Superseded By | Notes |
-|----|--------------|-------|
-| **#194** (Hebrew + Chatterbox) | PR #257 (merged) | #257 ships Chatterbox multilingual with 23 languages including Hebrew. #194 took a different approach (route by language). Can likely be closed. |
-| **#33** (External provider binaries) | PR #252 (merged) | #252 shipped CUDA backend swap. #33's broader provider architecture may still have value but needs reassessment. |
+| **#311** | feat: add CosyVoice2/3 TTS engine | **Will close** | Model quality too poor. See Abandoned Integrations. |
+| **#253** | Enhance speech tokenizer with 48kHz version | Community PR | Qwen tokenizer upgrade. Worth reviewing. |
+| **#237** | fix: bundle qwen_tts source files in PyInstaller | Superseded | Our PR #305 shipped this. Can close. |
+| **#227** | fix: harden input validation & file safety | Community PR | Coupled to #225 (custom models). |
+| **#225** | feat: custom HuggingFace model support | Community PR | Needs rework for multi-engine arch. |
+| **#218** | fix: unify qwen tts cache dir on Windows | Community PR | Windows-specific path fix. Still relevant. |
+| **#195** | feat: per-profile LoRA fine-tuning | Draft | Complex. 15 new endpoints. |
+| **#154** | feat: Audiobook tab | Community PR | Chunked generation now shipped (#266). |
+| **#91** | fix: CoreAudio device enumeration | Draft | macOS audio device handling. |
 
 ---
 
@@ -280,7 +265,7 @@ Strong demand for: Hindi (#245), Indonesian (#247), Dutch (#236), Hebrew (#199),
 | #132 | LavaSR (transcription) |
 | #76 | (General model expansion) |
 
-Community also requests: XTTS-v2, Fish Speech, CosyVoice, Kokoro. The multi-engine architecture is now in place, making new model integration significantly easier.
+Community also requests: XTTS-v2, Fish Speech, Kokoro. CosyVoice was tried and abandoned. The multi-engine architecture is in place, making new model integration straightforward.
 
 ### Long-Form / Chunking (5 issues)
 
@@ -288,7 +273,7 @@ Users hitting the ~500 character practical limit.
 
 **Key issues:** #234 (queue system), #203 (500 char limit), #191 (auto-split), #111, #69
 
-**Fix path:** PR #99 (chunked TTS + quality selector) directly addresses this. PR #154 (Audiobook tab) builds on it.
+**Fix path:** **Mostly resolved.** PR #266 (engine-agnostic chunked TTS) and PR #269 (async generation queue) are both merged. PR #154 (Audiobook tab) is still open.
 
 ### Feature Requests (23 issues)
 
@@ -326,7 +311,7 @@ Notable requests:
 | `CUDA_BACKEND_SWAP_FINAL.md` | — | **Shipped** (PR #252) | Final implementation plan |
 | `EXTERNAL_PROVIDERS.md` | v0.2.0 | **Not started** | Remote server support |
 | `MLX_AUDIO.md` | — | **Shipped** | MLX backend is live |
-| `DOCKER_DEPLOYMENT.md` | v0.2.0 | **PR exists** (#161) | Waiting on review |
+| `DOCKER_DEPLOYMENT.md` | v0.2.0 | **Shipped** (PR #161) | Docker + web deployment |
 | `OPENAI_SUPPORT.md` | v0.2.0 | **Not started** | OpenAI-compatible API layer |
 | `PR33_CUDA_PROVIDER_REVIEW.md` | — | **Reference** | Analysis of the original provider approach |
 
@@ -334,31 +319,31 @@ Notable requests:
 
 ## New Model Integration — Landscape
 
-### Models Worth Supporting (2026 SOTA — updated March 13)
+### Models Worth Supporting (2026 SOTA — updated March 18)
 
 | Model | Cloning | Speed | Sample Rate | Languages | VRAM | Instruct Support | Integration Ease | Status |
 |-------|---------|-------|-------------|-----------|------|-----------------|-----------------|--------|
 | **Qwen3-TTS** | 10s zero-shot | Medium | 24 kHz | 10 | Medium | None (Base); Yes (CustomVoice variant, predefined speakers only) | **Shipped** | v0.1.13 |
 | **LuxTTS** | 3s zero-shot | 150x RT, CPU ok | 48 kHz | English | <1 GB | None | **Shipped** | PR #254 |
 | **Chatterbox MTL** | 5s zero-shot | Medium | 24 kHz | 23 | Medium | Partial — `exaggeration` float | **Shipped** | PR #257 |
-| **Chatterbox Turbo** | 5s zero-shot | Fast | 24 kHz | English | Low | Partial — inline tags only | **PR #258** | In review |
-| **CosyVoice2-0.5B** | 3-10s zero-shot | Very fast | 24 kHz | Multilingual | Low | **Yes** — `inference_instruct2()`, works with cloning | Ready | Best instruct candidate |
-| **Fish Speech** | 10-30s few-shot | Real-time | 24-44 kHz | 50+ | Medium | **Yes** — inline text descriptions, word-level control | Ready | Multi-engine arch in place |
-| **MOSS-TTS Family** | Zero-shot | — | — | Multilingual | Medium | **Yes** — text prompts for style + timbre design | Needs vetting | Apache 2.0, multi-speaker dialogue |
-| **HumeAI TADA 1B/3B** | Zero-shot | 5× faster than LLM-TTS | 24 kHz | EN (1B), Multilingual (3B) | Medium | Partial — automatic prosody from text context | **Shipped** | PR #296, MIT, 700s+ coherent |
-| **VoxCPM 1.5** | Zero-shot (seconds) | ~0.15 RTF streaming | — | Bilingual (EN/ZH) | Medium | Partial — automatic context-aware prosody | Needs vetting | Apache 2.0, tokenizer-free continuous diffusion |
-| **Kokoro-82M** | 3s instant | CPU realtime | 24 kHz | English | Tiny (82M) | Partial — automatic style inference | Ready | Apache 2.0, multi-engine arch in place |
-| **XTTS-v2** | 6s zero-shot | Mid-GPU | 24 kHz | 17+ | Medium | Partial — style transfer from ref audio only | Ready | Multi-engine arch in place |
-| **Pocket TTS** | Zero-shot + streaming | >1× RT on CPU | — | English | ~100M params, CPU-first | None | Needs vetting | MIT, Kyutai Labs, no GPU required |
-
-#### Notes on New Candidates (March 2026)
-
-- **CosyVoice2-0.5B** — Best candidate for instruct support. `inference_instruct2()` accepts a text instruct parameter for emotions, speed, volume, dialects — and it works alongside voice cloning. This is the closest match to what users expect from our instruct UI. [HF: FunAudioLLM/CosyVoice2-0.5B](https://huggingface.co/FunAudioLLM/CosyVoice2-0.5B)
-- **HumeAI TADA** — Text-Audio Dual Alignment arch. Near-zero hallucinations/drift, free synced transcript. 700+ seconds coherent audio. Best candidate for Stories long-form reliability. Prosody/emotion is automatic from text context, not user-controllable. [HF: HumeAI/tada-1b](https://huggingface.co/HumeAI/tada-1b) | [GitHub: HumeAI/tada](https://github.com/HumeAI/tada)
-- **MOSS-TTS** — Modular suite: flagship cloning, MOSS-TTSD (multi-speaker dialogue), MOSS-VoiceGenerator (create voices from text descriptions). VoiceGenerator unifies timbre design and style control via text prompts, usable as a layer for downstream TTS including cloning. [HF: OpenMOSS-Team/MOSS-VoiceGenerator](https://huggingface.co/OpenMOSS-Team/MOSS-VoiceGenerator) | [GitHub: OpenMOSS/MOSS-TTS](https://github.com/OpenMOSS/MOSS-TTS)
-- **Fish Speech** — Word-level fine-grained control using plain language descriptions inline in the script. Works with cloning. Note: Fish Audio S2 has a restrictive research license (commercial use requires approval), but the open-source Fish Speech model may differ. Needs license clarification. [fish.audio blog](https://fish.audio/blog/fish-audio-s2-fine-grained-ai-voice-control-at-the-word-level)
-- **VoxCPM 1.5** — Tokenizer-free continuous diffusion + autoregressive. No discrete token artifacts. Prosody/emotion is context-aware but automatic, not explicitly controllable via text prompt. Real-time streaming, LoRA fine-tuning. Trained on 1.8M+ hours. [GitHub: OpenBMB/VoxCPM](https://github.com/OpenBMB/VoxCPM)
-- **Pocket TTS** — 100M param CPU-first model from Kyutai Labs (Moshi team). Runs >1× realtime without GPU. No style control. Broadens hardware support significantly. [GitHub: kyutai-labs/pocket-tts](https://github.com/kyutai-labs/pocket-tts)
+| **Chatterbox Turbo** | 5s zero-shot | Fast | 24 kHz | English | Low | Partial — inline tags only | **Shipped** | PR #258 |
+| **HumeAI TADA 1B/3B** | Zero-shot | 5x faster than LLM-TTS | 24 kHz | EN (1B), Multilingual (3B) | Medium | Partial — automatic prosody | **Shipped** | PR #296 |
+| **Kokoro-82M** | Pre-built voices | CPU realtime | 24 kHz | 8 | Tiny (82M) | None | **In progress** | Apache 2.0, pip install, ~350MB |
+| ~~**CosyVoice2-0.5B**~~ | 3-10s zero-shot | Very fast | 24 kHz | Multilingual | Low | Yes — `inference_instruct2()` | **Abandoned** | PR #311 — poor output quality |
+| **Fish Speech** | 10-30s few-shot | Real-time | 24-44 kHz | 50+ | Medium | **Yes** — inline text descriptions, word-level control | Ready | Needs license clarification |
+| **XTTS-v2** | 6s zero-shot | Mid-GPU | 24 kHz | 17+ | Medium | Partial — style transfer from ref audio only | Ready | Mature pip package |
+| **Pocket TTS** | Zero-shot + streaming | >1x RT on CPU | — | English | ~100M params, CPU-first | None | Ready | MIT, Kyutai Labs |
+| **MOSS-TTS Family** | Zero-shot | — | — | Multilingual | Medium | **Yes** — text prompts for style + timbre design | Needs vetting | Apache 2.0 |
+| **VoxCPM 1.5** | Zero-shot (seconds) | ~0.15 RTF streaming | — | Bilingual (EN/ZH) | Medium | Partial — automatic context-aware prosody | Needs vetting | Apache 2.0 |
+
+#### Notes on Candidates (March 2026)
+
+- **CosyVoice2-0.5B** — **Tried and abandoned** (PR #311). Despite having the best instruct API, output quality was poor. No PyPI package, needed 5+ shims, heavy deps. Not worth it.
+- **HumeAI TADA** — **Shipped** (PR #296). 700+ seconds coherent audio. [GitHub: HumeAI/tada](https://github.com/HumeAI/tada)
+- **Kokoro-82M** — **In progress.** 82M params, CPU realtime, Apache 2.0, clean `pip install kokoro`. Uses pre-built voice styles (not zero-shot cloning from arbitrary audio). [GitHub: hexgrad/kokoro](https://github.com/hexgrad/kokoro)
+- **Fish Speech** — Word-level fine-grained control. License needs clarification. [fish.audio blog](https://fish.audio/blog/fish-audio-s2-fine-grained-ai-voice-control-at-the-word-level)
+- **XTTS-v2** — Coqui's multilingual cloning. 17+ languages, pip-installable. [GitHub: coqui-ai/TTS](https://github.com/coqui-ai/TTS)
+- **Pocket TTS** — 100M param CPU-first model from Kyutai Labs. [GitHub: kyutai-labs/pocket-tts](https://github.com/kyutai-labs/pocket-tts)
 - **Watch list:** MioTTS-2.6B (fast LLM-based EN/JP, vLLM compatible), Oolel-Voices (Soynade Research, expressive modular control)
 
 ### Adding a New Engine (Now Straightforward)
@@ -402,49 +387,44 @@ The generation form now uses a flat model dropdown with engine-based routing. Pe
 
 ## Recommended Priorities
 
-### Tier 1 — Ship Now (Low Risk)
+### Tier 1 — Ship Now
 
 | Priority | PR/Item | Impact | Effort |
 |----------|---------|--------|--------|
-| 1 | **#258** — Chatterbox Turbo + per-engine languages | Paralinguistic tags, proper language filtering | Review only |
-| 2 | **#152** — Offline mode crash fix | Fixes #150, #151 | Low |
-| 3 | **#99** — Chunked TTS + quality selector | Removes 500-char limit, addresses 5 issues | Medium |
-| 4 | **#218** — Windows HF cache dir fix | Windows-specific pain | Low |
-| 5 | **#178** — Generation error handling | Error UX | Low |
-| 6 | **#230** — Docs fixes | Zero risk | None |
-| 7 | **#133** — Network access toggle | Wires up existing code | Low |
-| 8 | **#88** — CORS restriction | Security improvement | Low |
-| 9 | **#214** — Tauri window close panic fix | Stability | Low |
-| 10 | Triage GPU issues | Many may be resolved by CUDA swap (#252) | Low |
-| 11 | Close superseded PRs | #194 (superseded by #257), #83 (outdated) | None |
-
-### Tier 2 — Next Release (v0.2.0)
+| 1 | **Kokoro 82M** — finish integration | New engine, CPU-friendly, 8 langs | Low (nearly done) |
+| 2 | Close PR #311 (CosyVoice) and #237 (superseded by #305) | Housekeeping | None |
+| 3 | **#218** — Windows HF cache dir fix | Windows-specific pain | Low |
+| 4 | **#253** — 48kHz speech tokenizer | Quality improvement for Qwen | Medium |
+
+### Tier 2 — Feature Work
 
 | Priority | Item | Impact | Effort |
 |----------|------|--------|--------|
-| 1 | **#253** — 48kHz speech tokenizer | Quality improvement | Medium |
-| 2 | **#161** — Docker deployment | Server/headless users | Medium |
-| 3 | **#154** — Audiobook tab | Long-form users | Medium |
-| 4 | ~~**Model config registry**~~ | ~~Reduce dispatch duplication in main.py~~ | **Done** |
-| 5 | **#225** — Custom HuggingFace models | User-supplied models | High (needs rework for multi-engine) |
+| 1 | **#154** — Audiobook tab | Long-form users. Chunking + queue now shipped. | Medium |
+| 2 | **#225** — Custom HuggingFace models | User-supplied models. Needs rework. | High |
+| 3 | OpenAI-compatible API (plan doc exists) | Low effort once API is stable | Low |
+| 4 | LoRA fine-tuning (PR #195) | Complex, needs rework for multi-engine | Very High |
+| 5 | Streaming for non-MLX engines | Currently MLX-only | Medium |
 
-### Tier 3 — Future (v0.3.0+)
+### Tier 3 — Future Engines
 
 | Priority | Item | Notes |
 |----------|------|-------|
-| 1 | **HumeAI TADA** | Long-form reliability for Stories, synced transcripts. Addresses #234, #203, #191, #111, #69. Needs API vetting. |
-| 2 | **Pocket TTS** (Kyutai) | CPU-first 100M model, broadens hardware support. Kyutai ships clean code. Needs API vetting. |
-| 3 | **MOSS-TTS** | Text-to-voice design (no ref audio) is unique. Multi-speaker dialogue for Stories. Needs thorough API vetting. |
-| 4 | **Kokoro-82M** | 82M params, CPU realtime, Apache 2.0. Easy win. |
-| 5 | ~~**Model config registry refactor**~~ | **Done** — consolidated in `backend/backends/__init__.py` + `EngineModelSelector.tsx` |
-| 6 | XTTS-v2 / Fish Speech / CosyVoice | Multi-engine arch is ready; just needs backend implementation |
-| 7 | **VoxCPM 1.5** | Tokenizer-free streaming, interesting but uncertain integration surface |
-| 8 | OpenAI-compatible API (plan doc exists) | Low effort once API is stable |
-| 9 | LoRA fine-tuning (PR #195) | Complex, needs rework for multi-engine |
-| 10 | External/remote providers | Depends on use case demand |
-| 11 | GGUF support (#226) | Depends on model ecosystem maturity |
-| 12 | Queue system (#234) | Batch generation |
-| 13 | Streaming for non-MLX engines | Currently MLX-only |
+| 1 | **Fish Speech** | 50+ langs, word-level instruct. License TBD. |
+| 2 | **XTTS-v2** | 17+ langs, mature pip package. Best multilingual cloning. |
+| 3 | **Pocket TTS** (Kyutai) | CPU-first 100M model. MIT. |
+| 4 | **MOSS-TTS** | Text-to-voice design. Multi-speaker dialogue for Stories. |
+| 5 | **VoxCPM 1.5** | Tokenizer-free streaming. Uncertain integration surface. |
+
+### ~~Previously Prioritized — Now Done~~
+
+- ~~#258 — Chatterbox Turbo~~ **Merged**
+- ~~#99 — Chunked TTS~~ **Superseded by #266, merged**
+- ~~#88 — CORS restriction~~ **Merged**
+- ~~#161 — Docker deployment~~ **Merged**
+- ~~#234 — Queue system~~ **Addressed by #269, merged**
+- ~~HumeAI TADA~~ **Shipped** (PR #296)
+- ~~Kokoro-82M~~ **In progress**
 
 ---
 
@@ -452,13 +432,10 @@ The generation form now uses a flat model dropdown with engine-based routing. Pe
 
 | Branch | PR | Status | Notes |
 |--------|-----|--------|-------|
-| `feat/chatterbox-turbo` | #258 | Open | Chatterbox Turbo + per-engine languages |
+| `feat/cosyvoice-engine` | #311 | Open — closing | CosyVoice2/3 — abandoned, poor quality |
+| `feat/chatterbox-turbo` | #258 | **Merged** | Chatterbox Turbo + per-engine languages |
 | `feat/chatterbox` | #257 | **Merged** | Chatterbox Multilingual |
 | `feat/luxtts` | #254 | **Merged** | LuxTTS + multi-engine arch |
-| `external-provider-binaries` | #33 | Superseded by #252 | Original CUDA provider approach |
-| `feat/dual-server-binaries` | — | No PR | Related to provider split |
-| `fix-multi-sample` | — | No PR | Voice profile multi-sample fix |
-| `fix-dl-notification-...` | — | No PR | Model download UX |
 
 ---
 
diff --git a/tauri/src-tauri/Cargo.lock b/tauri/src-tauri/Cargo.lock
index b133dfc8..194314de 100644
--- a/tauri/src-tauri/Cargo.lock
+++ b/tauri/src-tauri/Cargo.lock
@@ -5041,7 +5041,7 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 
 [[package]]
 name = "voicebox"
-version = "0.2.3"
+version = "0.3.1"
 dependencies = [
  "base64 0.22.1",
  "core-foundation-sys",

From 9e726ad048b7fa7ce94de6186221bbbd55b1a3f5 Mon Sep 17 00:00:00 2001
From: James Pine <ijamespine@me.com>
Date: Thu, 19 Mar 2026 10:14:33 -0700
Subject: [PATCH 2/4] =?UTF-8?q?fix:=20remove=20engine=20dropdown=20filteri?=
 =?UTF-8?q?ng=20=E2=80=94=20profile=20grid=20handles=20it?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../Generation/EngineModelSelector.tsx        | 23 ++++---------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/app/src/components/Generation/EngineModelSelector.tsx b/app/src/components/Generation/EngineModelSelector.tsx
index 773aa089..1dfd4e9b 100644
--- a/app/src/components/Generation/EngineModelSelector.tsx
+++ b/app/src/components/Generation/EngineModelSelector.tsx
@@ -41,26 +41,11 @@ const ENGLISH_ONLY_ENGINES = new Set(['luxtts', 'chatterbox_turbo']);
 /** Engines that support cloned (reference audio) profiles. */
 const CLONING_ENGINES = new Set(['qwen', 'luxtts', 'chatterbox', 'chatterbox_turbo', 'tada']);
 
-/** Engines that are preset-only (no cloning). */
-const PRESET_ONLY_ENGINES = new Set(['kokoro']);
-
 /**
- * Get which engine options are available for the selected profile.
- *
- * - Preset profiles: locked to their preset engine
- * - All other profiles: all engines available
+ * All engine options are always available. The profile grid already
+ * filters by engine, so the dropdown doesn't need to restrict options.
  */
-function getAvailableOptions(selectedProfile?: VoiceProfileResponse | null) {
-  if (!selectedProfile) return ENGINE_OPTIONS;
-
-  const voiceType = selectedProfile.voice_type || 'cloned';
-
-  if (voiceType === 'preset') {
-    // Preset profiles lock to their specific engine
-    const presetEngine = selectedProfile.preset_engine;
-    return ENGINE_OPTIONS.filter((opt) => opt.engine === presetEngine);
-  }
-
+function getAvailableOptions(_selectedProfile?: VoiceProfileResponse | null) {
   return ENGINE_OPTIONS;
 }
 
@@ -169,5 +154,5 @@ export function isProfileCompatibleWithEngine(
   const voiceType = profile.voice_type || 'cloned';
   if (voiceType === 'preset') return profile.preset_engine === engine;
   if (voiceType === 'cloned') return CLONING_ENGINES.has(engine);
-  return !PRESET_ONLY_ENGINES.has(engine); // designed — future
+  return true; // designed — future
 }

From 0fc2192204755fe33bdc1700c707da6e846c2b61 Mon Sep 17 00:00:00 2001
From: James Pine <ijamespine@me.com>
Date: Thu, 19 Mar 2026 10:37:11 -0700
Subject: [PATCH 3/4] fix: resolve relative paths using configured data dir,
 not CWD

---
 backend/database/migrations.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/backend/database/migrations.py b/backend/database/migrations.py
index cf805c8d..9c00fa8f 100644
--- a/backend/database/migrations.py
+++ b/backend/database/migrations.py
@@ -189,8 +189,16 @@ def _resolve_relative_paths(engine, tables: set[str]) -> None:
     These break when the production binary's CWD differs from the data directory.
     This migration converts them to absolute paths using the configured data dir.
     Idempotent: absolute paths are left untouched.
+
+    Strategy: paths like "data/generations/abc.wav" are rebased onto the
+    configured data directory.  If the path starts with "data/", strip that
+    prefix and prepend get_data_dir().  Otherwise, try resolving relative to
+    CWD as a fallback.
     """
     from pathlib import Path
+    from ..config import get_data_dir
+
+    data_dir = get_data_dir()
 
     path_columns = [
         ("generations", "audio_path"),
@@ -213,8 +221,20 @@ def _resolve_relative_paths(engine, tables: set[str]) -> None:
                 p = Path(path_val)
                 if p.is_absolute():
                     continue
-                # Resolve relative to CWD (which is where they were created)
-                resolved = p.resolve()
+
+                # Try rebasing: "data/generations/abc.wav" → data_dir / "generations/abc.wav"
+                parts = p.parts
+                if parts and parts[0] == "data":
+                    rebased = data_dir / Path(*parts[1:])
+                else:
+                    rebased = data_dir / p
+
+                if rebased.exists():
+                    resolved = rebased
+                else:
+                    # Fallback: resolve relative to CWD
+                    resolved = p.resolve()
+
                 if resolved.exists():
                     conn.execute(
                         text(f"UPDATE {table} SET {column} = :path WHERE id = :id"),

From d6f48ace3eef07906468358da8427af2aa50f01c Mon Sep 17 00:00:00 2001
From: James Pine <ijamespine@me.com>
Date: Thu, 19 Mar 2026 16:11:38 -0700
Subject: [PATCH 4/4] Mirror the regular /generate endpoint behavior more
 closely

---
 backend/routes/generations.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/backend/routes/generations.py b/backend/routes/generations.py
index 8541b659..9f985fcc 100644
--- a/backend/routes/generations.py
+++ b/backend/routes/generations.py
@@ -230,7 +230,15 @@ async def stream_speech(
     if not profile:
         raise HTTPException(status_code=404, detail="Profile not found")
 
-    engine = data.engine or "qwen"
+    # Mirror the regular /generate endpoint behavior more closely:
+    # if the caller doesn't specify an engine, prefer the profile's default
+    # engine (or preset engine) before falling back to qwen.
+    engine = (
+        data.engine
+        or getattr(profile, "default_engine", None)
+        or getattr(profile, "preset_engine", None)
+        or "qwen"
+    )
     tts_model = get_tts_backend_for_engine(engine)
     model_size = data.model_size or "1.7B"
 
@@ -263,6 +271,22 @@ async def stream_speech(
         trim_fn=trim_fn,
     )
 
+    effects_chain_config = None
+    if data.effects_chain is not None:
+        effects_chain_config = [e.model_dump() for e in data.effects_chain]
+    elif profile.effects_chain:
+        import json as _json
+
+        try:
+            effects_chain_config = _json.loads(profile.effects_chain)
+        except Exception:
+            effects_chain_config = None
+
+    if effects_chain_config:
+        from ..utils.effects import apply_effects
+
+        audio = apply_effects(audio, sample_rate, effects_chain_config)
+
     if data.normalize:
         from ..utils.audio import normalize_audio