Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions app/src/components/Generation/EngineModelSelector.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import type { GenerationFormValues } from '@/lib/hooks/useGenerationForm';
const ENGINE_OPTIONS = [
{ value: 'qwen:1.7B', label: 'Qwen3-TTS 1.7B', engine: 'qwen' },
{ value: 'qwen:0.6B', label: 'Qwen3-TTS 0.6B', engine: 'qwen' },
{ value: 'qwen_custom_voice:1.7B', label: 'Qwen CustomVoice 1.7B', engine: 'qwen_custom_voice' },
{ value: 'qwen_custom_voice:0.6B', label: 'Qwen CustomVoice 0.6B', engine: 'qwen_custom_voice' },
{ value: 'luxtts', label: 'LuxTTS', engine: 'luxtts' },
{ value: 'chatterbox', label: 'Chatterbox', engine: 'chatterbox' },
{ value: 'chatterbox_turbo', label: 'Chatterbox Turbo', engine: 'chatterbox_turbo' },
Expand All @@ -29,6 +31,7 @@ const ENGINE_OPTIONS = [

const ENGINE_DESCRIPTIONS: Record<string, string> = {
qwen: 'Multi-language, two sizes',
qwen_custom_voice: '9 preset voices, instruct control',
luxtts: 'Fast, English-focused',
chatterbox: '23 languages, incl. Hebrew',
chatterbox_turbo: 'English, [laugh] [cough] tags',
Expand All @@ -49,12 +52,22 @@ function getAvailableOptions(selectedProfile?: VoiceProfileResponse | null) {

function getSelectValue(engine: string, modelSize?: string): string {
if (engine === 'qwen') return `qwen:${modelSize || '1.7B'}`;
if (engine === 'qwen_custom_voice') return `qwen_custom_voice:${modelSize || '1.7B'}`;
if (engine === 'tada') return `tada:${modelSize || '1B'}`;
return engine;
}

function handleEngineChange(form: UseFormReturn<GenerationFormValues>, value: string) {
if (value.startsWith('qwen:')) {
export function applyEngineSelection(form: UseFormReturn<GenerationFormValues>, value: string) {
if (value.startsWith('qwen_custom_voice:')) {
const [, modelSize] = value.split(':');
form.setValue('engine', 'qwen_custom_voice');
form.setValue('modelSize', modelSize as '1.7B' | '0.6B');
const currentLang = form.getValues('language');
const available = getLanguageOptionsForEngine('qwen_custom_voice');
if (!available.some((l) => l.value === currentLang)) {
form.setValue('language', available[0]?.value ?? 'en');
}
} else if (value.startsWith('qwen:')) {
const [, modelSize] = value.split(':');
form.setValue('engine', 'qwen');
form.setValue('modelSize', modelSize as '1.7B' | '0.6B');
Expand Down Expand Up @@ -110,7 +123,7 @@ export function EngineModelSelector({ form, compact, selectedProfile }: EngineMo

useEffect(() => {
if (!currentEngineAvailable && availableOptions.length > 0) {
handleEngineChange(form, availableOptions[0].value);
applyEngineSelection(form, availableOptions[0].value);
}
}, [availableOptions, currentEngineAvailable, form]);

Expand All @@ -120,7 +133,7 @@ export function EngineModelSelector({ form, compact, selectedProfile }: EngineMo
: undefined;

return (
<Select value={selectValue} onValueChange={(v) => handleEngineChange(form, v)}>
<Select value={selectValue} onValueChange={(v) => applyEngineSelection(form, v)}>
<FormControl>
<SelectTrigger className={triggerClass}>
<SelectValue />
Expand Down
2 changes: 1 addition & 1 deletion app/src/components/Generation/FloatingGenerateBox.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ export function FloatingGenerateBox({
/>

<FormItem className="flex-1 space-y-0">
<EngineModelSelector form={form} compact selectedProfile={selectedProfile} />
<EngineModelSelector form={form} compact />
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Restore selectedProfile in EngineModelSelector to keep profile-engine compatibility checks active.

Dropping selectedProfile here bypasses profile-aware engine filtering and auto-correction in the floating flow, so users can keep incompatible engine/profile combinations (behavior now differs from GenerationForm).

🔧 Proposed fix
-                    <EngineModelSelector form={form} compact />
+                    <EngineModelSelector form={form} compact selectedProfile={selectedProfile} />
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
<EngineModelSelector form={form} compact />
<EngineModelSelector form={form} compact selectedProfile={selectedProfile} />
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@app/src/components/Generation/FloatingGenerateBox.tsx` at line 411, The
floating flow removed the selectedProfile prop from EngineModelSelector causing
profile-aware engine filtering and auto-correction to be skipped; restore
passing selectedProfile from the surrounding form/state into EngineModelSelector
(same prop used by GenerationForm) so EngineModelSelector receives
selectedProfile and resumes compatibility checks and auto-corrections for
engine/profile combinations.

</FormItem>

<FormItem className="flex-1 space-y-0">
Expand Down
29 changes: 26 additions & 3 deletions app/src/components/Generation/GenerationForm.tsx
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { useEffect } from 'react';
import { Loader2, Mic } from 'lucide-react';
import { Button } from '@/components/ui/button';
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
Expand All @@ -19,19 +20,41 @@ import {
SelectValue,
} from '@/components/ui/select';
import { Textarea } from '@/components/ui/textarea';
import { getLanguageOptionsForEngine } from '@/lib/constants/languages';
import { getLanguageOptionsForEngine, type LanguageCode } from '@/lib/constants/languages';
import { useGenerationForm } from '@/lib/hooks/useGenerationForm';
import { useProfile } from '@/lib/hooks/useProfiles';
import { useUIStore } from '@/stores/uiStore';
import { EngineModelSelector, getEngineDescription } from './EngineModelSelector';
import { EngineModelSelector, applyEngineSelection, getEngineDescription } from './EngineModelSelector';
import { ParalinguisticInput } from './ParalinguisticInput';

function getEngineSelectValue(engine: string): string {
if (engine === 'qwen') return 'qwen:1.7B';
if (engine === 'qwen_custom_voice') return 'qwen_custom_voice:1.7B';
if (engine === 'tada') return 'tada:1B';
return engine;
}
Comment on lines +30 to +35
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don’t resolve tada to the English-only model by default.

Profiles only persist default_engine: 'tada', but this helper maps that to tada:1B. For any non-English profile, applyEngineSelection(...) then forces language back to 'en', so selecting the profile loses its saved language. Pick tada:3B when the profile language is not English, or persist the preferred TADA size with the profile.

Suggested fix
-function getEngineSelectValue(engine: string): string {
+function getEngineSelectValue(engine: string, profileLanguage?: string): string {
   if (engine === 'qwen') return 'qwen:1.7B';
   if (engine === 'qwen_custom_voice') return 'qwen_custom_voice:1.7B';
-  if (engine === 'tada') return 'tada:1B';
+  if (engine === 'tada') return profileLanguage === 'en' ? 'tada:1B' : 'tada:3B';
   return engine;
 }
@@
-      applyEngineSelection(form, getEngineSelectValue(preferredEngine));
+      applyEngineSelection(
+        form,
+        getEngineSelectValue(preferredEngine, selectedProfile.language),
+      );

Also applies to: 52-55

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@app/src/components/Generation/GenerationForm.tsx` around lines 30 - 35,
getEngineSelectValue currently maps the bare 'tada' profile value to 'tada:1B'
which forces applyEngineSelection to reset non-English profiles to English;
update getEngineSelectValue to accept the profile language (e.g., add a second
parameter like language: string | undefined) and return 'tada:1B' only when
language === 'en' (or undefined) and return 'tada:3B' when language !== 'en';
update any callers (such as where applyEngineSelection is used) to pass the
profile's language so the helper can choose the correct TADA size and avoid
overwriting the saved language.


export function GenerationForm() {
const selectedProfileId = useUIStore((state) => state.selectedProfileId);
const { data: selectedProfile } = useProfile(selectedProfileId || '');

const { form, handleSubmit, isPending } = useGenerationForm();

useEffect(() => {
if (!selectedProfile) {
return;
}

if (selectedProfile.language) {
form.setValue('language', selectedProfile.language as LanguageCode);
}

const preferredEngine = selectedProfile.default_engine || selectedProfile.preset_engine;
if (preferredEngine) {
applyEngineSelection(form, getEngineSelectValue(preferredEngine));
}
}, [form, selectedProfile]);

async function onSubmit(data: Parameters<typeof handleSubmit>[0]) {
await handleSubmit(data, selectedProfileId);
}
Expand Down Expand Up @@ -91,7 +114,7 @@ export function GenerationForm() {
)}
/>

{form.watch('engine') === 'qwen' && (
{(form.watch('engine') === 'qwen' || form.watch('engine') === 'qwen_custom_voice') && (
<FormField
control={form.control}
name="instruct"
Expand Down
5 changes: 5 additions & 0 deletions app/src/components/ServerSettings/ModelManagement.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ const MODEL_DESCRIPTIONS: Record<string, string> = {
'HumeAI TADA 3B Multilingual — built on Llama 3.2 3B. Supports 10 languages with high-fidelity voice cloning via text-acoustic dual alignment.',
kokoro:
'Kokoro 82M by hexgrad. Tiny 82M-parameter TTS that runs at CPU realtime. Supports 8 languages with pre-built voice styles. Apache 2.0 licensed.',
'qwen-custom-voice-1.7B':
'Qwen3-TTS CustomVoice 1.7B by Alibaba. 9 premium preset voices with instruct-based style control for tone, emotion, and prosody. Supports 10 languages.',
'qwen-custom-voice-0.6B':
'Qwen3-TTS CustomVoice 0.6B by Alibaba. Lightweight version with the same 9 preset voices and instruct control. Faster inference for lower-end hardware.',
'whisper-base':
'Smallest Whisper model (74M parameters). Fast transcription with moderate accuracy.',
'whisper-small':
Expand Down Expand Up @@ -396,6 +400,7 @@ export function ModelManagement() {
modelStatus?.models.filter(
(m) =>
m.model_name.startsWith('qwen-tts') ||
m.model_name.startsWith('qwen-custom-voice') ||
m.model_name.startsWith('luxtts') ||
m.model_name.startsWith('chatterbox') ||
m.model_name.startsWith('tada') ||
Expand Down
8 changes: 7 additions & 1 deletion app/src/components/VoiceProfiles/ProfileCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ import { useDeleteProfile, useExportProfile } from '@/lib/hooks/useProfiles';
import { cn } from '@/lib/utils/cn';
import { useUIStore } from '@/stores/uiStore';

/** Human-readable display names for preset engine badges. */
const ENGINE_DISPLAY_NAMES: Record<string, string> = {
kokoro: 'Kokoro',
qwen_custom_voice: 'CustomVoice',
};
Comment on lines +20 to +24
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Use the same qwen_custom_voice label across profile surfaces.

ProfileList.tsx renders this engine as "Qwen CustomVoice", but the badge here renders "CustomVoice". The same preset type ends up with two names in adjacent UI. Reuse a shared engine-name map or align this label with the list.

Also applies to: 108-108

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@app/src/components/VoiceProfiles/ProfileCard.tsx` around lines 20 - 24,
ENGINE_DISPLAY_NAMES in ProfileCard.tsx uses "CustomVoice" for the key
qwen_custom_voice which mismatches the "Qwen CustomVoice" label rendered by
ProfileList.tsx; fix by centralizing the engine display names
(exporting/importing a shared map) or change
ENGINE_DISPLAY_NAMES['qwen_custom_voice'] to "Qwen CustomVoice" so both
ProfileCard and ProfileList use the identical label (update the constant named
ENGINE_DISPLAY_NAMES or replace its usage with the shared map).


interface ProfileCardProps {
profile: VoiceProfileResponse;
}
Expand Down Expand Up @@ -99,7 +105,7 @@ export function ProfileCard({ profile }: ProfileCardProps) {
</Badge>
{profile.voice_type === 'preset' && (
<Badge variant="secondary" className="text-xs h-5 px-1.5">
{profile.preset_engine}
{ENGINE_DISPLAY_NAMES[profile.preset_engine ?? ''] ?? profile.preset_engine}
</Badge>
)}
{profile.voice_type === 'designed' && (
Expand Down
13 changes: 12 additions & 1 deletion app/src/components/VoiceProfiles/ProfileForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,10 @@ import { AudioSampleUpload } from './AudioSampleUpload';
import { SampleList } from './SampleList';

const MAX_AUDIO_DURATION_SECONDS = 30;
const PRESET_ONLY_ENGINES = new Set(['kokoro']);
const PRESET_ONLY_ENGINES = new Set(['kokoro', 'qwen_custom_voice']);
const DEFAULT_ENGINE_OPTIONS = [
{ value: 'qwen', label: 'Qwen3-TTS' },
{ value: 'qwen_custom_voice', label: 'Qwen CustomVoice' },
{ value: 'luxtts', label: 'LuxTTS' },
{ value: 'chatterbox', label: 'Chatterbox' },
{ value: 'chatterbox_turbo', label: 'Chatterbox Turbo' },
Expand Down Expand Up @@ -374,6 +375,15 @@ export function ProfileForm() {
}
}, [availableDefaultEngines, defaultEngine]);

useEffect(() => {
if (!selectedPresetVoiceId) {
return;
}

if (!presetVoices.some((voice: PresetVoice) => voice.voice_id === selectedPresetVoiceId)) {
setSelectedPresetVoiceId('');
}
}, [presetVoices, selectedPresetVoiceId]);
async function handleTranscribe() {
const file = form.getValues('sampleFile');
if (!file) {
Expand Down Expand Up @@ -849,6 +859,7 @@ export function ProfileForm() {
</FormControl>
<SelectContent>
<SelectItem value="kokoro">Kokoro 82M</SelectItem>
<SelectItem value="qwen_custom_voice">Qwen CustomVoice</SelectItem>
</SelectContent>
</Select>
</FormItem>
Expand Down
3 changes: 2 additions & 1 deletion app/src/components/VoiceProfiles/ProfileList.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ import { ProfileCard } from './ProfileCard';
import { ProfileForm } from './ProfileForm';

/** Engines that use preset (built-in) voices instead of cloned profiles. */
const PRESET_ENGINES = new Set(['kokoro']);
const PRESET_ENGINES = new Set(['kokoro', 'qwen_custom_voice']);

/** Human-readable engine names for empty state messages. */
const ENGINE_NAMES: Record<string, string> = {
kokoro: 'Kokoro',
qwen_custom_voice: 'Qwen CustomVoice',
};

export function ProfileList() {
Expand Down
6 changes: 0 additions & 6 deletions app/src/lib/api/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,6 @@ class ApiClient {
return this.request<{ engine: string; voices: PresetVoice[] }>(`/profiles/presets/${engine}`);
}

async seedPresetProfiles(
engine: string,
): Promise<{ engine: string; created: number; total_available: number }> {
return this.request(`/profiles/presets/${engine}/seed`, { method: 'POST' });
}

async updateProfile(profileId: string, data: VoiceProfileCreate): Promise<VoiceProfileResponse> {
return this.request<VoiceProfileResponse>(`/profiles/${profileId}`, {
method: 'PUT',
Expand Down
9 changes: 8 additions & 1 deletion app/src/lib/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,14 @@ export interface GenerationRequest {
language: LanguageCode;
seed?: number;
model_size?: '1.7B' | '0.6B' | '1B' | '3B';
engine?: 'qwen' | 'luxtts' | 'chatterbox' | 'chatterbox_turbo' | 'tada' | 'kokoro';
engine?:
| 'qwen'
| 'qwen_custom_voice'
| 'luxtts'
| 'chatterbox'
| 'chatterbox_turbo'
| 'tada'
| 'kokoro';
instruct?: string;
max_chunk_chars?: number;
crossfade_ms?: number;
Expand Down
1 change: 1 addition & 0 deletions app/src/lib/constants/languages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ export const ENGINE_LANGUAGES: Record<string, readonly LanguageCode[]> = {
chatterbox_turbo: ['en'],
tada: ['en', 'ar', 'zh', 'de', 'es', 'fr', 'it', 'ja', 'pl', 'pt'],
kokoro: ['en', 'es', 'fr', 'hi', 'it', 'pt', 'ja', 'zh'],
qwen_custom_voice: ['zh', 'en', 'ja', 'ko', 'de', 'fr', 'ru', 'pt', 'es', 'it'],
} as const;

/** Helper: get language options for a given engine. */
Expand Down
32 changes: 25 additions & 7 deletions app/src/lib/hooks/useGenerationForm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,17 @@ const generationSchema = z.object({
seed: z.number().int().optional(),
modelSize: z.enum(['1.7B', '0.6B', '1B', '3B']).optional(),
instruct: z.string().max(500).optional(),
engine: z.enum(['qwen', 'luxtts', 'chatterbox', 'chatterbox_turbo', 'tada', 'kokoro']).optional(),
engine: z
.enum([
'qwen',
'qwen_custom_voice',
'luxtts',
'chatterbox',
'chatterbox_turbo',
'tada',
'kokoro',
])
.optional(),
});

export type GenerationFormValues = z.infer<typeof generationSchema>;
Expand Down Expand Up @@ -85,7 +95,9 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
: 'tada-1b'
: engine === 'kokoro'
? 'kokoro'
: `qwen-tts-${data.modelSize}`;
: engine === 'qwen_custom_voice'
? `qwen-custom-voice-${data.modelSize}`
: `qwen-tts-${data.modelSize}`;
const displayName =
engine === 'luxtts'
? 'LuxTTS'
Expand All @@ -99,9 +111,13 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
: 'TADA 1B'
: engine === 'kokoro'
? 'Kokoro 82M'
: data.modelSize === '1.7B'
? 'Qwen TTS 1.7B'
: 'Qwen TTS 0.6B';
: engine === 'qwen_custom_voice'
? data.modelSize === '1.7B'
? 'Qwen CustomVoice 1.7B'
: 'Qwen CustomVoice 0.6B'
: data.modelSize === '1.7B'
? 'Qwen TTS 1.7B'
: 'Qwen TTS 0.6B';

// Check if model needs downloading
try {
Expand All @@ -116,7 +132,9 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
console.error('Failed to check model status:', error);
}

const hasModelSizes = engine === 'qwen' || engine === 'tada';
const hasModelSizes =
engine === 'qwen' || engine === 'qwen_custom_voice' || engine === 'tada';
const supportsInstruct = engine === 'qwen' || engine === 'qwen_custom_voice';
const effectsChain = options.getEffectsChain?.();
// This now returns immediately with status="generating"
const result = await generation.mutateAsync({
Expand All @@ -126,7 +144,7 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
seed: data.seed,
model_size: hasModelSizes ? data.modelSize : undefined,
engine,
instruct: engine === 'qwen' ? data.instruct || undefined : undefined,
instruct: supportsInstruct ? data.instruct || undefined : undefined,
max_chunk_chars: maxChunkChars,
crossfade_ms: crossfadeMs,
normalize: normalizeAudio,
Expand Down
Loading