From 70ca7f66cb79b058ac09e3ac2bec215afe294d91 Mon Sep 17 00:00:00 2001
From: James Pine <ijamespine@me.com>
Date: Fri, 13 Mar 2026 06:21:34 -0700
Subject: [PATCH 1/4] feat: chunked TTS generation for long text
 (engine-agnostic)

Text exceeding max_chunk_chars (default 800) is automatically split at
sentence boundaries, generated per-chunk, and concatenated with a 50ms
crossfade.  Works with all engines (Qwen, LuxTTS, Chatterbox, Turbo).

- Abbreviation-aware sentence splitter (Dr., Mr., e.g., decimals)
- CJK sentence-ending punctuation support
- Paralinguistic tag preservation ([laugh], [cough], etc.)
- Per-chunk seed variation to avoid correlated RNG artefacts
- Per-chunk Chatterbox trim (catches hallucination at each boundary)
- max_chunk_chars exposed as per-request param on GenerationRequest
- Text max_length raised to 50,000 characters

Closes #99
---
 backend/main.py              |  47 +++---
 backend/models.py            |   3 +-
 backend/utils/chunked_tts.py | 298 +++++++++++++++++++++++++++++++++++
 3 files changed, 329 insertions(+), 19 deletions(-)
 create mode 100644 backend/utils/chunked_tts.py

diff --git a/backend/main.py b/backend/main.py
index 69fcc869..39e135e5 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -824,18 +824,24 @@ async def download_chatterbox_turbo_background():
             engine=engine,
         )
 
-        audio, sample_rate = await tts_model.generate(
-            data.text,
-            voice_prompt,
-            data.language,
-            data.seed,
-            data.instruct,
-        )
+        from .utils.chunked_tts import generate_chunked
 
-        # Trim trailing silence/hallucination for Chatterbox output
+        # Resolve per-chunk trim function for engines that need it
+        trim_fn = None
         if engine in ("chatterbox", "chatterbox_turbo"):
             from .utils.audio import trim_tts_output
-            audio = trim_tts_output(audio, sample_rate)
+            trim_fn = trim_tts_output
+
+        audio, sample_rate = await generate_chunked(
+            tts_model,
+            data.text,
+            voice_prompt,
+            language=data.language,
+            seed=data.seed,
+            instruct=data.instruct,
+            max_chunk_chars=data.max_chunk_chars,
+            trim_fn=trim_fn,
+        )
 
         # Calculate duration
         duration = len(audio) / sample_rate
@@ -949,18 +955,23 @@ async def stream_speech(
         data.profile_id, db, engine=engine,
     )
 
-    audio, sample_rate = await tts_model.generate(
-        data.text,
-        voice_prompt,
-        data.language,
-        data.seed,
-        data.instruct,
-    )
+    from .utils.chunked_tts import generate_chunked
 
-    # Trim trailing silence/hallucination for Chatterbox output
+    trim_fn = None
     if engine in ("chatterbox", "chatterbox_turbo"):
         from .utils.audio import trim_tts_output
-        audio = trim_tts_output(audio, sample_rate)
+        trim_fn = trim_tts_output
+
+    audio, sample_rate = await generate_chunked(
+        tts_model,
+        data.text,
+        voice_prompt,
+        language=data.language,
+        seed=data.seed,
+        instruct=data.instruct,
+        max_chunk_chars=data.max_chunk_chars,
+        trim_fn=trim_fn,
+    )
 
     wav_bytes = tts.audio_to_wav_bytes(audio, sample_rate)
 
diff --git a/backend/models.py b/backend/models.py
index ebfe70e9..771dfa7b 100644
--- a/backend/models.py
+++ b/backend/models.py
@@ -52,12 +52,13 @@ class Config:
 class GenerationRequest(BaseModel):
     """Request model for voice generation."""
     profile_id: str
-    text: str = Field(..., min_length=1, max_length=5000)
+    text: str = Field(..., min_length=1, max_length=50000)
     language: str = Field(default="en", pattern="^(zh|en|ja|ko|de|fr|ru|pt|es|it|he)$")
     seed: Optional[int] = Field(None, ge=0)
     model_size: Optional[str] = Field(default="1.7B", pattern="^(1\\.7B|0\\.6B)$")
     instruct: Optional[str] = Field(None, max_length=500)
     engine: Optional[str] = Field(default="qwen", pattern="^(qwen|luxtts|chatterbox|chatterbox_turbo)$")
+    max_chunk_chars: int = Field(default=800, ge=100, le=5000, description="Max characters per chunk for long text splitting")
 
 
 class GenerationResponse(BaseModel):
diff --git a/backend/utils/chunked_tts.py b/backend/utils/chunked_tts.py
new file mode 100644
index 00000000..b9b9f0cc
--- /dev/null
+++ b/backend/utils/chunked_tts.py
@@ -0,0 +1,298 @@
+"""
+Chunked TTS generation utilities.
+
+Splits long text into sentence-boundary chunks, generates audio per-chunk
+via any TTSBackend, and concatenates with crossfade.  All logic is
+engine-agnostic — it wraps the standard ``TTSBackend.generate()`` interface.
+
+Short text (≤ max_chunk_chars) uses the single-shot fast path with zero
+overhead.
+"""
+
+import logging
+import re
+from typing import List, Tuple
+
+import numpy as np
+
+logger = logging.getLogger("voicebox.chunked-tts")
+
+# Default chunk size in characters.  Can be overridden per-request via
+# the ``max_chunk_chars`` field on GenerationRequest.
+DEFAULT_MAX_CHUNK_CHARS = 800
+
+# Common abbreviations that should NOT be treated as sentence endings.
+# Lowercase for case-insensitive matching.
+_ABBREVIATIONS = frozenset(
+    {
+        "mr",
+        "mrs",
+        "ms",
+        "dr",
+        "prof",
+        "sr",
+        "jr",
+        "st",
+        "ave",
+        "blvd",
+        "inc",
+        "ltd",
+        "corp",
+        "dept",
+        "est",
+        "approx",
+        "vs",
+        "etc",
+        "e.g",
+        "i.e",
+        "a.m",
+        "p.m",
+        "u.s",
+        "u.s.a",
+        "u.k",
+    }
+)
+
+# Paralinguistic tags used by Chatterbox Turbo.  The splitter must never
+# cut inside one of these.
+_PARA_TAG_RE = re.compile(r"\[[^\]]*\]")
+
+
+# ---------------------------------------------------------------------------
+# Text splitting
+# ---------------------------------------------------------------------------
+
+
+def split_text_into_chunks(text: str, max_chars: int = DEFAULT_MAX_CHUNK_CHARS) -> List[str]:
+    """Split *text* at natural boundaries into chunks of at most *max_chars*.
+
+    Priority: sentence-end (``.!?`` not preceded by an abbreviation and not
+    inside brackets) → clause boundary (``;:,—``) → whitespace → hard cut.
+
+    Paralinguistic tags like ``[laugh]`` are treated as atomic and will not
+    be split across chunks.
+    """
+    text = text.strip()
+    if not text:
+        return []
+    if len(text) <= max_chars:
+        return [text]
+
+    chunks: List[str] = []
+    remaining = text
+
+    while remaining:
+        remaining = remaining.lstrip()
+        if not remaining:
+            break
+        if len(remaining) <= max_chars:
+            chunks.append(remaining)
+            break
+
+        segment = remaining[:max_chars]
+
+        # Try to split at the last real sentence ending
+        split_pos = _find_last_sentence_end(segment)
+        if split_pos == -1:
+            split_pos = _find_last_clause_boundary(segment)
+        if split_pos == -1:
+            split_pos = segment.rfind(" ")
+        if split_pos == -1:
+            # Absolute fallback: hard cut but avoid splitting inside a tag
+            split_pos = _safe_hard_cut(segment, max_chars)
+
+        chunk = remaining[: split_pos + 1].strip()
+        if chunk:
+            chunks.append(chunk)
+        remaining = remaining[split_pos + 1 :]
+
+    return chunks
+
+
+def _find_last_sentence_end(text: str) -> int:
+    """Return the index of the last sentence-ending punctuation in *text*.
+
+    Skips periods that follow common abbreviations (``Dr.``, ``Mr.``, etc.)
+    and periods inside bracket tags (``[laugh]``).  Also handles CJK
+    sentence-ending punctuation (``。！？``).
+    """
+    best = -1
+    # ASCII sentence ends
+    for m in re.finditer(r"[.!?](?:\s|$)", text):
+        pos = m.start()
+        char = text[pos]
+        # Skip periods after abbreviations
+        if char == ".":
+            # Walk backwards to find the preceding word
+            word_start = pos - 1
+            while word_start >= 0 and text[word_start].isalpha():
+                word_start -= 1
+            word = text[word_start + 1 : pos].lower()
+            if word in _ABBREVIATIONS:
+                continue
+            # Skip decimal numbers (digit immediately before the period)
+            if word_start >= 0 and text[word_start].isdigit():
+                continue
+        # Skip if we're inside a bracket tag
+        if _inside_bracket_tag(text, pos):
+            continue
+        best = pos
+    # CJK sentence-ending punctuation
+    for m in re.finditer(r"[\u3002\uff01\uff1f]", text):
+        if m.start() > best:
+            best = m.start()
+    return best
+
+
+def _find_last_clause_boundary(text: str) -> int:
+    """Return the index of the last clause-boundary punctuation."""
+    best = -1
+    for m in re.finditer(r"[;:,\u2014](?:\s|$)", text):
+        pos = m.start()
+        # Skip if inside a bracket tag
+        if _inside_bracket_tag(text, pos):
+            continue
+        best = pos
+    return best
+
+
+def _inside_bracket_tag(text: str, pos: int) -> bool:
+    """Return True if *pos* falls inside a ``[...]`` tag."""
+    for m in _PARA_TAG_RE.finditer(text):
+        if m.start() < pos < m.end():
+            return True
+    return False
+
+
+def _safe_hard_cut(segment: str, max_chars: int) -> int:
+    """Find a hard-cut position that doesn't split a ``[tag]``."""
+    cut = max_chars - 1
+    # Check if the cut falls inside a bracket tag; if so, move before it
+    for m in _PARA_TAG_RE.finditer(segment):
+        if m.start() < cut < m.end():
+            return m.start() - 1 if m.start() > 0 else cut
+    return cut
+
+
+# ---------------------------------------------------------------------------
+# Audio concatenation
+# ---------------------------------------------------------------------------
+
+
+def concatenate_audio_chunks(
+    chunks: List[np.ndarray],
+    sample_rate: int,
+    crossfade_ms: int = 50,
+) -> np.ndarray:
+    """Concatenate audio arrays with a short crossfade to eliminate clicks.
+
+    Each chunk is expected to be a 1-D float32 ndarray at *sample_rate* Hz.
+    """
+    if not chunks:
+        return np.array([], dtype=np.float32)
+    if len(chunks) == 1:
+        return chunks[0]
+
+    crossfade_samples = int(sample_rate * crossfade_ms / 1000)
+    result = np.array(chunks[0], dtype=np.float32, copy=True)
+
+    for chunk in chunks[1:]:
+        if len(chunk) == 0:
+            continue
+        overlap = min(crossfade_samples, len(result), len(chunk))
+        if overlap > 0:
+            fade_out = np.linspace(1.0, 0.0, overlap, dtype=np.float32)
+            fade_in = np.linspace(0.0, 1.0, overlap, dtype=np.float32)
+            result[-overlap:] = result[-overlap:] * fade_out + chunk[:overlap] * fade_in
+            result = np.concatenate([result, chunk[overlap:]])
+        else:
+            result = np.concatenate([result, chunk])
+
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Engine-agnostic chunked generation
+# ---------------------------------------------------------------------------
+
+
+async def generate_chunked(
+    backend,
+    text: str,
+    voice_prompt: dict,
+    language: str = "en",
+    seed: int | None = None,
+    instruct: str | None = None,
+    max_chunk_chars: int = DEFAULT_MAX_CHUNK_CHARS,
+    trim_fn=None,
+) -> Tuple[np.ndarray, int]:
+    """Generate audio with automatic chunking for long text.
+
+    For text shorter than *max_chunk_chars* this is a thin wrapper around
+    ``backend.generate()`` with zero overhead.
+
+    For longer text the input is split at natural sentence boundaries,
+    each chunk is generated independently, optionally trimmed (useful for
+    Chatterbox engines that hallucinate trailing noise), and the results
+    are concatenated with a short crossfade.
+
+    Parameters
+    ----------
+    backend : TTSBackend
+        Any backend implementing the ``generate()`` protocol.
+    text : str
+        Input text (may be arbitrarily long).
+    voice_prompt, language, seed, instruct
+        Forwarded to ``backend.generate()`` verbatim.
+    max_chunk_chars : int
+        Maximum characters per chunk (default 800).
+    trim_fn : callable | None
+        Optional ``(audio, sample_rate) -> audio`` post-processing
+        function applied to each chunk before concatenation (e.g.
+        ``trim_tts_output`` for Chatterbox engines).
+
+    Returns
+    -------
+    (audio, sample_rate) : Tuple[np.ndarray, int]
+    """
+    chunks = split_text_into_chunks(text, max_chunk_chars)
+
+    if len(chunks) <= 1:
+        # Short text — single-shot fast path
+        audio, sample_rate = await backend.generate(
+            text, voice_prompt, language, seed, instruct,
+        )
+        if trim_fn is not None:
+            audio = trim_fn(audio, sample_rate)
+        return audio, sample_rate
+
+    # Long text — chunked generation
+    logger.info(
+        "Splitting %d chars into %d chunks (max %d chars each)",
+        len(text), len(chunks), max_chunk_chars,
+    )
+    audio_chunks: List[np.ndarray] = []
+    sample_rate: int | None = None
+
+    for i, chunk_text in enumerate(chunks):
+        logger.info(
+            "Generating chunk %d/%d (%d chars)",
+            i + 1, len(chunks), len(chunk_text),
+        )
+        # Vary the seed per chunk to avoid correlated RNG artefacts,
+        # but keep it deterministic so the same (text, seed) pair
+        # always produces the same output.
+        chunk_seed = (seed + i) if seed is not None else None
+
+        chunk_audio, chunk_sr = await backend.generate(
+            chunk_text, voice_prompt, language, chunk_seed, instruct,
+        )
+        if trim_fn is not None:
+            chunk_audio = trim_fn(chunk_audio, chunk_sr)
+
+        audio_chunks.append(np.asarray(chunk_audio, dtype=np.float32))
+        if sample_rate is None:
+            sample_rate = chunk_sr
+
+    audio = concatenate_audio_chunks(audio_chunks, sample_rate)
+    return audio, sample_rate

From 837f8525d89aec2faa4318630b58b889e2eb7d92 Mon Sep 17 00:00:00 2001
From: James Pine <ijamespine@me.com>
Date: Fri, 13 Mar 2026 06:35:39 -0700
Subject: [PATCH 2/4] feat: add auto-chunking limit slider to settings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Persisted setting (default 800 chars) controls how long text is split
before generation.  Lower values improve quality for long outputs by
keeping each chunk well within the model's context window.

- Slider in Server Connection settings (100–2000 chars, step 50)
- Stored in localStorage via Zustand persist
- Passed as max_chunk_chars on every generation request
- Frontend text limit raised to 50,000 to match backend
---
 .../ServerSettings/ConnectionForm.tsx         | 39 +++++++++++++++----
 app/src/lib/api/types.ts                      |  1 +
 app/src/lib/hooks/useGenerationForm.ts        |  5 ++-
 app/src/stores/serverStore.ts                 |  6 +++
 4 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/app/src/components/ServerSettings/ConnectionForm.tsx b/app/src/components/ServerSettings/ConnectionForm.tsx
index 44eeb4e6..a5521870 100644
--- a/app/src/components/ServerSettings/ConnectionForm.tsx
+++ b/app/src/components/ServerSettings/ConnectionForm.tsx
@@ -4,6 +4,7 @@ import { useForm } from 'react-hook-form';
 import * as z from 'zod';
 import { Button } from '@/components/ui/button';
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
+import { Checkbox } from '@/components/ui/checkbox';
 import {
   Form,
   FormControl,
@@ -14,10 +15,10 @@ import {
   FormMessage,
 } from '@/components/ui/form';
 import { Input } from '@/components/ui/input';
-import { Checkbox } from '@/components/ui/checkbox';
+import { Slider } from '@/components/ui/slider';
 import { useToast } from '@/components/ui/use-toast';
-import { useServerStore } from '@/stores/serverStore';
 import { usePlatform } from '@/platform/PlatformContext';
+import { useServerStore } from '@/stores/serverStore';
 
 const connectionSchema = z.object({
   serverUrl: z.string().url('Please enter a valid URL'),
@@ -33,6 +34,8 @@ export function ConnectionForm() {
   const setKeepServerRunningOnClose = useServerStore((state) => state.setKeepServerRunningOnClose);
   const mode = useServerStore((state) => state.mode);
   const setMode = useServerStore((state) => state.setMode);
+  const maxChunkChars = useServerStore((state) => state.maxChunkChars);
+  const setMaxChunkChars = useServerStore((state) => state.setMaxChunkChars);
   const { toast } = useToast();
 
   const form = useForm<ConnectionFormValues>({
@@ -59,11 +62,7 @@ export function ConnectionForm() {
   }
 
   return (
-    <Card
-      role="region"
-      aria-label="Server Connection"
-      tabIndex={0}
-    >
+    <Card role="region" aria-label="Server Connection" tabIndex={0}>
       <CardHeader>
         <CardTitle>Server Connection</CardTitle>
       </CardHeader>
@@ -153,6 +152,32 @@ export function ConnectionForm() {
             </div>
           </div>
         )}
+
+        <div className="mt-6 pt-6 border-t">
+          <div className="space-y-3">
+            <div className="flex items-center justify-between">
+              <label htmlFor="maxChunkChars" className="text-sm font-medium leading-none">
+                Auto-chunking limit
+              </label>
+              <span className="text-sm tabular-nums text-muted-foreground">
+                {maxChunkChars} chars
+              </span>
+            </div>
+            <Slider
+              id="maxChunkChars"
+              value={[maxChunkChars]}
+              onValueChange={([value]) => setMaxChunkChars(value)}
+              min={100}
+              max={2000}
+              step={50}
+              aria-label="Auto-chunking character limit"
+            />
+            <p className="text-sm text-muted-foreground">
+              Long text is split into chunks at sentence boundaries before generating. Lower values
+              can improve quality for long outputs. Default is 800.
+            </p>
+          </div>
+        </div>
       </CardContent>
     </Card>
   );
diff --git a/app/src/lib/api/types.ts b/app/src/lib/api/types.ts
index fe5f05a1..d8ad6b13 100644
--- a/app/src/lib/api/types.ts
+++ b/app/src/lib/api/types.ts
@@ -36,6 +36,7 @@ export interface GenerationRequest {
   model_size?: '1.7B' | '0.6B';
   engine?: 'qwen' | 'luxtts' | 'chatterbox' | 'chatterbox_turbo';
   instruct?: string;
+  max_chunk_chars?: number;
 }
 
 export interface GenerationResponse {
diff --git a/app/src/lib/hooks/useGenerationForm.ts b/app/src/lib/hooks/useGenerationForm.ts
index 5a83ce41..e1244d22 100644
--- a/app/src/lib/hooks/useGenerationForm.ts
+++ b/app/src/lib/hooks/useGenerationForm.ts
@@ -9,9 +9,10 @@ import { useGeneration } from '@/lib/hooks/useGeneration';
 import { useModelDownloadToast } from '@/lib/hooks/useModelDownloadToast';
 import { useGenerationStore } from '@/stores/generationStore';
 import { usePlayerStore } from '@/stores/playerStore';
+import { useServerStore } from '@/stores/serverStore';
 
 const generationSchema = z.object({
-  text: z.string().min(1, 'Text is required').max(5000),
+  text: z.string().min(1, 'Text is required').max(50000),
   language: z.enum(LANGUAGE_CODES as [LanguageCode, ...LanguageCode[]]),
   seed: z.number().int().optional(),
   modelSize: z.enum(['1.7B', '0.6B']).optional(),
@@ -31,6 +32,7 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
   const generation = useGeneration();
   const setAudioWithAutoPlay = usePlayerStore((state) => state.setAudioWithAutoPlay);
   const setIsGenerating = useGenerationStore((state) => state.setIsGenerating);
+  const maxChunkChars = useServerStore((state) => state.maxChunkChars);
   const [downloadingModelName, setDownloadingModelName] = useState<string | null>(null);
   const [downloadingDisplayName, setDownloadingDisplayName] = useState<string | null>(null);
 
@@ -110,6 +112,7 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
         model_size: isQwen ? data.modelSize : undefined,
         engine,
         instruct: isQwen ? data.instruct || undefined : undefined,
+        max_chunk_chars: maxChunkChars,
       });
 
       toast({
diff --git a/app/src/stores/serverStore.ts b/app/src/stores/serverStore.ts
index 36d9f0af..9d4ad89c 100644
--- a/app/src/stores/serverStore.ts
+++ b/app/src/stores/serverStore.ts
@@ -13,6 +13,9 @@ interface ServerStore {
 
   keepServerRunningOnClose: boolean;
   setKeepServerRunningOnClose: (keepRunning: boolean) => void;
+
+  maxChunkChars: number;
+  setMaxChunkChars: (value: number) => void;
 }
 
 export const useServerStore = create<ServerStore>()(
@@ -29,6 +32,9 @@ export const useServerStore = create<ServerStore>()(
 
       keepServerRunningOnClose: false,
       setKeepServerRunningOnClose: (keepRunning) => set({ keepServerRunningOnClose: keepRunning }),
+
+      maxChunkChars: 800,
+      setMaxChunkChars: (value) => set({ maxChunkChars: value }),
     }),
     {
       name: 'voicebox-server',

From 97292ecef732dd64af1a8ee583e6bd0a210dacaf Mon Sep 17 00:00:00 2001
From: James Pine <ijamespine@me.com>
Date: Fri, 13 Mar 2026 06:48:06 -0700
Subject: [PATCH 3/4] feat: add chunk crossfade slider (0ms = hard cut)

Persisted setting (default 50ms) controls how audio chunks are blended
together.  Set to 0 for a clean hard cut with no overlap.
---
 .../ServerSettings/ConnectionForm.tsx         | 25 +++++++++++++++++++
 app/src/lib/api/types.ts                      |  1 +
 app/src/lib/hooks/useGenerationForm.ts        |  2 ++
 app/src/stores/serverStore.ts                 |  6 +++++
 backend/main.py                               |  2 ++
 backend/models.py                             |  1 +
 backend/utils/chunked_tts.py                  |  8 ++++--
 7 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/app/src/components/ServerSettings/ConnectionForm.tsx b/app/src/components/ServerSettings/ConnectionForm.tsx
index a5521870..c9de46a6 100644
--- a/app/src/components/ServerSettings/ConnectionForm.tsx
+++ b/app/src/components/ServerSettings/ConnectionForm.tsx
@@ -36,6 +36,8 @@ export function ConnectionForm() {
   const setMode = useServerStore((state) => state.setMode);
   const maxChunkChars = useServerStore((state) => state.maxChunkChars);
   const setMaxChunkChars = useServerStore((state) => state.setMaxChunkChars);
+  const crossfadeMs = useServerStore((state) => state.crossfadeMs);
+  const setCrossfadeMs = useServerStore((state) => state.setCrossfadeMs);
   const { toast } = useToast();
 
   const form = useForm<ConnectionFormValues>({
@@ -177,6 +179,29 @@ export function ConnectionForm() {
               can improve quality for long outputs. Default is 800.
             </p>
           </div>
+
+          <div className="space-y-3 mt-4">
+            <div className="flex items-center justify-between">
+              <label htmlFor="crossfadeMs" className="text-sm font-medium leading-none">
+                Chunk crossfade
+              </label>
+              <span className="text-sm tabular-nums text-muted-foreground">
+                {crossfadeMs === 0 ? 'Cut' : `${crossfadeMs}ms`}
+              </span>
+            </div>
+            <Slider
+              id="crossfadeMs"
+              value={[crossfadeMs]}
+              onValueChange={([value]) => setCrossfadeMs(value)}
+              min={0}
+              max={200}
+              step={10}
+              aria-label="Chunk crossfade duration"
+            />
+            <p className="text-sm text-muted-foreground">
+              Blends audio between chunks to smooth transitions. Set to 0 for a hard cut.
+            </p>
+          </div>
         </div>
       </CardContent>
     </Card>
diff --git a/app/src/lib/api/types.ts b/app/src/lib/api/types.ts
index d8ad6b13..af5a6c15 100644
--- a/app/src/lib/api/types.ts
+++ b/app/src/lib/api/types.ts
@@ -37,6 +37,7 @@ export interface GenerationRequest {
   engine?: 'qwen' | 'luxtts' | 'chatterbox' | 'chatterbox_turbo';
   instruct?: string;
   max_chunk_chars?: number;
+  crossfade_ms?: number;
 }
 
 export interface GenerationResponse {
diff --git a/app/src/lib/hooks/useGenerationForm.ts b/app/src/lib/hooks/useGenerationForm.ts
index e1244d22..66effd22 100644
--- a/app/src/lib/hooks/useGenerationForm.ts
+++ b/app/src/lib/hooks/useGenerationForm.ts
@@ -33,6 +33,7 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
   const setAudioWithAutoPlay = usePlayerStore((state) => state.setAudioWithAutoPlay);
   const setIsGenerating = useGenerationStore((state) => state.setIsGenerating);
   const maxChunkChars = useServerStore((state) => state.maxChunkChars);
+  const crossfadeMs = useServerStore((state) => state.crossfadeMs);
   const [downloadingModelName, setDownloadingModelName] = useState<string | null>(null);
   const [downloadingDisplayName, setDownloadingDisplayName] = useState<string | null>(null);
 
@@ -113,6 +114,7 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
         engine,
         instruct: isQwen ? data.instruct || undefined : undefined,
         max_chunk_chars: maxChunkChars,
+        crossfade_ms: crossfadeMs,
       });
 
       toast({
diff --git a/app/src/stores/serverStore.ts b/app/src/stores/serverStore.ts
index 9d4ad89c..1795b61c 100644
--- a/app/src/stores/serverStore.ts
+++ b/app/src/stores/serverStore.ts
@@ -16,6 +16,9 @@ interface ServerStore {
 
   maxChunkChars: number;
   setMaxChunkChars: (value: number) => void;
+
+  crossfadeMs: number;
+  setCrossfadeMs: (value: number) => void;
 }
 
 export const useServerStore = create<ServerStore>()(
@@ -35,6 +38,9 @@ export const useServerStore = create<ServerStore>()(
 
       maxChunkChars: 800,
       setMaxChunkChars: (value) => set({ maxChunkChars: value }),
+
+      crossfadeMs: 50,
+      setCrossfadeMs: (value) => set({ crossfadeMs: value }),
     }),
     {
       name: 'voicebox-server',
diff --git a/backend/main.py b/backend/main.py
index 39e135e5..cb9a2bd3 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -840,6 +840,7 @@ async def download_chatterbox_turbo_background():
             seed=data.seed,
             instruct=data.instruct,
             max_chunk_chars=data.max_chunk_chars,
+            crossfade_ms=data.crossfade_ms,
             trim_fn=trim_fn,
         )
 
@@ -970,6 +971,7 @@ async def stream_speech(
         seed=data.seed,
         instruct=data.instruct,
         max_chunk_chars=data.max_chunk_chars,
+        crossfade_ms=data.crossfade_ms,
         trim_fn=trim_fn,
     )
 
diff --git a/backend/models.py b/backend/models.py
index 771dfa7b..b462b67a 100644
--- a/backend/models.py
+++ b/backend/models.py
@@ -59,6 +59,7 @@ class GenerationRequest(BaseModel):
     instruct: Optional[str] = Field(None, max_length=500)
     engine: Optional[str] = Field(default="qwen", pattern="^(qwen|luxtts|chatterbox|chatterbox_turbo)$")
     max_chunk_chars: int = Field(default=800, ge=100, le=5000, description="Max characters per chunk for long text splitting")
+    crossfade_ms: int = Field(default=50, ge=0, le=500, description="Crossfade duration in ms between chunks (0 for hard cut)")
 
 
 class GenerationResponse(BaseModel):
diff --git a/backend/utils/chunked_tts.py b/backend/utils/chunked_tts.py
index b9b9f0cc..53a454c6 100644
--- a/backend/utils/chunked_tts.py
+++ b/backend/utils/chunked_tts.py
@@ -224,6 +224,7 @@ async def generate_chunked(
     seed: int | None = None,
     instruct: str | None = None,
     max_chunk_chars: int = DEFAULT_MAX_CHUNK_CHARS,
+    crossfade_ms: int = 50,
     trim_fn=None,
 ) -> Tuple[np.ndarray, int]:
     """Generate audio with automatic chunking for long text.
@@ -234,7 +235,7 @@ async def generate_chunked(
     For longer text the input is split at natural sentence boundaries,
     each chunk is generated independently, optionally trimmed (useful for
     Chatterbox engines that hallucinate trailing noise), and the results
-    are concatenated with a short crossfade.
+    are concatenated with a crossfade (or hard cut if *crossfade_ms* is 0).
 
     Parameters
     ----------
@@ -246,6 +247,9 @@ async def generate_chunked(
         Forwarded to ``backend.generate()`` verbatim.
     max_chunk_chars : int
         Maximum characters per chunk (default 800).
+    crossfade_ms : int
+        Crossfade duration in milliseconds between chunks.  0 for a hard
+        cut with no overlap (default 50).
     trim_fn : callable | None
         Optional ``(audio, sample_rate) -> audio`` post-processing
         function applied to each chunk before concatenation (e.g.
@@ -294,5 +298,5 @@ async def generate_chunked(
         if sample_rate is None:
             sample_rate = chunk_sr
 
-    audio = concatenate_audio_chunks(audio_chunks, sample_rate)
+    audio = concatenate_audio_chunks(audio_chunks, sample_rate, crossfade_ms=crossfade_ms)
     return audio, sample_rate

From 9aa7080c513fbac03a5792ca4e395c4c8221a69d Mon Sep 17 00:00:00 2001
From: James Pine <ijamespine@me.com>
Date: Fri, 13 Mar 2026 07:26:46 -0700
Subject: [PATCH 4/4] refactor: restructure server settings and models UI

- Split chunking/crossfade sliders into dedicated GenerationSettings card
- Merge connection status badges into ConnectionForm (remove ServerStatus card)
- 2-column grid layout for the entire settings page
- GPU Acceleration: remove icon, badge, and MLX info card
- Models: merge 'Other Voice Models' into single 'Voice Generation' list
- Model detail: remove 'Downloaded' badge, border above actions, swap
  badges above stats row, match disk size font to stats
---
 .../ServerSettings/ConnectionForm.tsx         | 91 ++++++++-----------
 .../ServerSettings/GenerationSettings.tsx     | 71 +++++++++++++++
 .../ServerSettings/GpuAcceleration.tsx        | 41 ++-------
 .../ServerSettings/ModelManagement.tsx        | 59 ++++++------
 .../ServerSettings/ServerStatus.tsx           | 17 +---
 app/src/components/ServerTab/ServerTab.tsx    | 10 +-
 6 files changed, 150 insertions(+), 139 deletions(-)
 create mode 100644 app/src/components/ServerSettings/GenerationSettings.tsx

diff --git a/app/src/components/ServerSettings/ConnectionForm.tsx b/app/src/components/ServerSettings/ConnectionForm.tsx
index c9de46a6..3b5ad845 100644
--- a/app/src/components/ServerSettings/ConnectionForm.tsx
+++ b/app/src/components/ServerSettings/ConnectionForm.tsx
@@ -1,7 +1,9 @@
 import { zodResolver } from '@hookform/resolvers/zod';
+import { Loader2, XCircle } from 'lucide-react';
 import { useEffect } from 'react';
 import { useForm } from 'react-hook-form';
 import * as z from 'zod';
+import { Badge } from '@/components/ui/badge';
 import { Button } from '@/components/ui/button';
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
 import { Checkbox } from '@/components/ui/checkbox';
@@ -15,8 +17,8 @@ import {
   FormMessage,
 } from '@/components/ui/form';
 import { Input } from '@/components/ui/input';
-import { Slider } from '@/components/ui/slider';
 import { useToast } from '@/components/ui/use-toast';
+import { useServerHealth } from '@/lib/hooks/useServer';
 import { usePlatform } from '@/platform/PlatformContext';
 import { useServerStore } from '@/stores/serverStore';
 
@@ -34,11 +36,8 @@ export function ConnectionForm() {
   const setKeepServerRunningOnClose = useServerStore((state) => state.setKeepServerRunningOnClose);
   const mode = useServerStore((state) => state.mode);
   const setMode = useServerStore((state) => state.setMode);
-  const maxChunkChars = useServerStore((state) => state.maxChunkChars);
-  const setMaxChunkChars = useServerStore((state) => state.setMaxChunkChars);
-  const crossfadeMs = useServerStore((state) => state.crossfadeMs);
-  const setCrossfadeMs = useServerStore((state) => state.setCrossfadeMs);
   const { toast } = useToast();
+  const { data: health, isLoading, error: healthError } = useServerHealth();
 
   const form = useForm<ConnectionFormValues>({
     resolver: zodResolver(connectionSchema),
@@ -56,7 +55,7 @@ export function ConnectionForm() {
 
   function onSubmit(data: ConnectionFormValues) {
     setServerUrl(data.serverUrl);
-    form.reset(data); // Reset form state after successful submission
+    form.reset(data);
     toast({
       title: 'Server URL updated',
       description: `Connected to ${data.serverUrl}`,
@@ -90,6 +89,37 @@ export function ConnectionForm() {
           </form>
         </Form>
 
+        {/* Connection status */}
+        <div className="mt-4">
+          {isLoading ? (
+            <div className="flex items-center gap-2">
+              <Loader2 className="h-4 w-4 animate-spin" />
+              <span className="text-sm text-muted-foreground">Checking connection...</span>
+            </div>
+          ) : healthError ? (
+            <div className="flex items-center gap-2">
+              <XCircle className="h-4 w-4 text-destructive" />
+              <span className="text-sm text-destructive">
+                Connection failed: {healthError.message}
+              </span>
+            </div>
+          ) : health ? (
+            <div className="flex flex-wrap gap-2">
+              <Badge
+                variant={health.model_loaded || health.model_downloaded ? 'default' : 'secondary'}
+              >
+                {health.model_loaded || health.model_downloaded ? 'Model Ready' : 'No Model'}
+              </Badge>
+              <Badge variant={health.gpu_available ? 'default' : 'secondary'}>
+                GPU: {health.gpu_available ? 'Available' : 'Not Available'}
+              </Badge>
+              {health.vram_used_mb && (
+                <Badge variant="outline">VRAM: {health.vram_used_mb.toFixed(0)} MB</Badge>
+              )}
+            </div>
+          ) : null}
+        </div>
+
         <div className="mt-6 pt-6 border-t">
           <div className="flex items-start space-x-3">
             <Checkbox
@@ -154,55 +184,6 @@ export function ConnectionForm() {
             </div>
           </div>
         )}
-
-        <div className="mt-6 pt-6 border-t">
-          <div className="space-y-3">
-            <div className="flex items-center justify-between">
-              <label htmlFor="maxChunkChars" className="text-sm font-medium leading-none">
-                Auto-chunking limit
-              </label>
-              <span className="text-sm tabular-nums text-muted-foreground">
-                {maxChunkChars} chars
-              </span>
-            </div>
-            <Slider
-              id="maxChunkChars"
-              value={[maxChunkChars]}
-              onValueChange={([value]) => setMaxChunkChars(value)}
-              min={100}
-              max={2000}
-              step={50}
-              aria-label="Auto-chunking character limit"
-            />
-            <p className="text-sm text-muted-foreground">
-              Long text is split into chunks at sentence boundaries before generating. Lower values
-              can improve quality for long outputs. Default is 800.
-            </p>
-          </div>
-
-          <div className="space-y-3 mt-4">
-            <div className="flex items-center justify-between">
-              <label htmlFor="crossfadeMs" className="text-sm font-medium leading-none">
-                Chunk crossfade
-              </label>
-              <span className="text-sm tabular-nums text-muted-foreground">
-                {crossfadeMs === 0 ? 'Cut' : `${crossfadeMs}ms`}
-              </span>
-            </div>
-            <Slider
-              id="crossfadeMs"
-              value={[crossfadeMs]}
-              onValueChange={([value]) => setCrossfadeMs(value)}
-              min={0}
-              max={200}
-              step={10}
-              aria-label="Chunk crossfade duration"
-            />
-            <p className="text-sm text-muted-foreground">
-              Blends audio between chunks to smooth transitions. Set to 0 for a hard cut.
-            </p>
-          </div>
-        </div>
       </CardContent>
     </Card>
   );
diff --git a/app/src/components/ServerSettings/GenerationSettings.tsx b/app/src/components/ServerSettings/GenerationSettings.tsx
new file mode 100644
index 00000000..b9b45de3
--- /dev/null
+++ b/app/src/components/ServerSettings/GenerationSettings.tsx
@@ -0,0 +1,71 @@
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card';
+import { Slider } from '@/components/ui/slider';
+import { useServerStore } from '@/stores/serverStore';
+
+export function GenerationSettings() {
+  const maxChunkChars = useServerStore((state) => state.maxChunkChars);
+  const setMaxChunkChars = useServerStore((state) => state.setMaxChunkChars);
+  const crossfadeMs = useServerStore((state) => state.crossfadeMs);
+  const setCrossfadeMs = useServerStore((state) => state.setCrossfadeMs);
+
+  return (
+    <Card role="region" aria-label="Generation Settings" tabIndex={0}>
+      <CardHeader>
+        <CardTitle>Generation Settings</CardTitle>
+        <CardDescription>
+          Controls for long text generation. These settings apply to all engines.
+        </CardDescription>
+      </CardHeader>
+      <CardContent>
+        <div className="space-y-6">
+          <div className="space-y-3">
+            <div className="flex items-center justify-between">
+              <label htmlFor="maxChunkChars" className="text-sm font-medium leading-none">
+                Auto-chunking limit
+              </label>
+              <span className="text-sm tabular-nums text-muted-foreground">
+                {maxChunkChars} chars
+              </span>
+            </div>
+            <Slider
+              id="maxChunkChars"
+              value={[maxChunkChars]}
+              onValueChange={([value]) => setMaxChunkChars(value)}
+              min={100}
+              max={2000}
+              step={50}
+              aria-label="Auto-chunking character limit"
+            />
+            <p className="text-sm text-muted-foreground">
+              Long text is split into chunks at sentence boundaries before generating. Lower values
+              can improve quality for long outputs.
+            </p>
+          </div>
+
+          <div className="space-y-3">
+            <div className="flex items-center justify-between">
+              <label htmlFor="crossfadeMs" className="text-sm font-medium leading-none">
+                Chunk crossfade
+              </label>
+              <span className="text-sm tabular-nums text-muted-foreground">
+                {crossfadeMs === 0 ? 'Cut' : `${crossfadeMs}ms`}
+              </span>
+            </div>
+            <Slider
+              id="crossfadeMs"
+              value={[crossfadeMs]}
+              onValueChange={([value]) => setCrossfadeMs(value)}
+              min={0}
+              max={200}
+              step={10}
+              aria-label="Chunk crossfade duration"
+            />
+            <p className="text-sm text-muted-foreground">
+              Blends audio between chunks to smooth transitions. Set to 0 for a hard cut.
+            </p>
+          </div>
+        </div>
+      </CardContent>
+    </Card>
+  );
+}
diff --git a/app/src/components/ServerSettings/GpuAcceleration.tsx b/app/src/components/ServerSettings/GpuAcceleration.tsx
index 69824d63..94cc1d6d 100644
--- a/app/src/components/ServerSettings/GpuAcceleration.tsx
+++ b/app/src/components/ServerSettings/GpuAcceleration.tsx
@@ -1,7 +1,6 @@
 import { useQuery, useQueryClient } from '@tanstack/react-query';
-import { AlertCircle, Cpu, Download, Loader2, RotateCw, Trash2, Zap } from 'lucide-react';
+import { AlertCircle, Download, Loader2, RotateCw, Trash2 } from 'lucide-react';
 import { useCallback, useEffect, useRef, useState } from 'react';
-import { Badge } from '@/components/ui/badge';
 import { Button } from '@/components/ui/button';
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
 import { Progress } from '@/components/ui/progress';
@@ -216,31 +215,19 @@ export function GpuAcceleration() {
   return (
     <Card>
       <CardHeader>
-        <CardTitle className="flex items-center gap-2">
-          <Zap className="h-4 w-4" />
-          GPU Acceleration
-        </CardTitle>
+        <CardTitle>GPU Acceleration</CardTitle>
       </CardHeader>
       <CardContent className="space-y-4">
         {/* Current status */}
-        <div className="flex items-center justify-between">
-          <div className="space-y-1">
-            <div className="text-sm font-medium">Backend</div>
-            <div className="text-sm text-muted-foreground">
-              {isCurrentlyCuda ? 'CUDA (GPU accelerated)' : 'CPU'}
-            </div>
+        <div className="space-y-1">
+          <div className="text-sm font-medium">Backend</div>
+          <div className="text-sm text-muted-foreground">
+            {isCurrentlyCuda
+              ? 'CUDA (GPU accelerated)'
+              : hasNativeGpu
+                ? `${health.backend_type === 'mlx' ? 'MLX' : 'PyTorch'} (GPU accelerated)`
+                : 'CPU'}
           </div>
-          <Badge variant={isCurrentlyCuda ? 'default' : 'secondary'}>
-            {isCurrentlyCuda ? (
-              <>
-                <Zap className="h-3 w-3 mr-1" /> CUDA
-              </>
-            ) : (
-              <>
-                <Cpu className="h-3 w-3 mr-1" /> CPU
-              </>
-            )}
-          </Badge>
         </div>
 
         {/* GPU info from health */}
@@ -257,14 +244,6 @@ export function GpuAcceleration() {
         )}
 
         {/* Native GPU detected - no CUDA download needed */}
-        {hasNativeGpu && (
-          <div className="p-3 rounded-lg bg-accent/10 border border-accent/20">
-            <div className="text-sm">
-              Your system uses <strong>{health.gpu_type}</strong> for acceleration. No additional
-              downloads needed.
-            </div>
-          </div>
-        )}
 
         {/* CUDA download section - only show when native GPU is NOT detected (i.e., Windows/Linux NVIDIA users) */}
         {!hasNativeGpu && (
diff --git a/app/src/components/ServerSettings/ModelManagement.tsx b/app/src/components/ServerSettings/ModelManagement.tsx
index 9811f643..37e8b229 100644
--- a/app/src/components/ServerSettings/ModelManagement.tsx
+++ b/app/src/components/ServerSettings/ModelManagement.tsx
@@ -342,17 +342,18 @@ export function ModelManagement() {
     setDetailOpen(true);
   };
 
-  const ttsModels = modelStatus?.models.filter((m) => m.model_name.startsWith('qwen-tts')) ?? [];
-  const otherTtsModels =
+  const voiceModels =
     modelStatus?.models.filter(
-      (m) => m.model_name.startsWith('luxtts') || m.model_name.startsWith('chatterbox'),
+      (m) =>
+        m.model_name.startsWith('qwen-tts') ||
+        m.model_name.startsWith('luxtts') ||
+        m.model_name.startsWith('chatterbox'),
     ) ?? [];
   const whisperModels = modelStatus?.models.filter((m) => m.model_name.startsWith('whisper')) ?? [];
 
   // Build sections
   const sections: { label: string; models: ModelStatus[] }[] = [
-    { label: 'Voice Generation', models: ttsModels },
-    ...(otherTtsModels.length > 0 ? [{ label: 'Other Voice Models', models: otherTtsModels }] : []),
+    { label: 'Voice Generation', models: voiceModels },
     { label: 'Transcription', models: whisperModels },
   ];
 
@@ -564,12 +565,6 @@ export function ModelManagement() {
                       Loaded
                     </Badge>
                   )}
-                  {freshSelectedModel.downloaded && !freshSelectedModel.loaded && (
-                    <Badge variant="secondary" className="text-xs">
-                      <CircleCheck className="h-3 w-3 mr-1" />
-                      Downloaded
-                    </Badge>
-                  )}
                   {selectedState?.hasError && (
                     <Badge variant="destructive" className="text-xs">
                       <CircleX className="h-3 w-3 mr-1" />
@@ -595,24 +590,6 @@ export function ModelManagement() {
 
                 {hfModelInfo && (
                   <div className="space-y-3">
-                    {/* Stats row */}
-                    <div className="flex items-center gap-4 text-xs text-muted-foreground">
-                      <span className="flex items-center gap-1" title="Downloads">
-                        <Download className="h-3.5 w-3.5" />
-                        {formatDownloads(hfModelInfo.downloads)}
-                      </span>
-                      <span className="flex items-center gap-1" title="Likes">
-                        <Heart className="h-3.5 w-3.5" />
-                        {formatDownloads(hfModelInfo.likes)}
-                      </span>
-                      {license && (
-                        <span className="flex items-center gap-1" title="License">
-                          <Scale className="h-3.5 w-3.5" />
-                          {formatLicense(license)}
-                        </span>
-                      )}
-                    </div>
-
                     {/* Pipeline tag + author */}
                     <div className="flex flex-wrap gap-1.5">
                       {hfModelInfo.pipeline_tag && (
@@ -632,6 +609,24 @@ export function ModelManagement() {
                       )}
                     </div>
 
+                    {/* Stats row */}
+                    <div className="flex items-center gap-4 text-xs text-muted-foreground">
+                      <span className="flex items-center gap-1" title="Downloads">
+                        <Download className="h-3.5 w-3.5" />
+                        {formatDownloads(hfModelInfo.downloads)}
+                      </span>
+                      <span className="flex items-center gap-1" title="Likes">
+                        <Heart className="h-3.5 w-3.5" />
+                        {formatDownloads(hfModelInfo.likes)}
+                      </span>
+                      {license && (
+                        <span className="flex items-center gap-1" title="License">
+                          <Scale className="h-3.5 w-3.5" />
+                          {formatLicense(license)}
+                        </span>
+                      )}
+                    </div>
+
                     {/* Languages */}
                     {hfModelInfo.cardData?.language && hfModelInfo.cardData.language.length > 0 && (
                       <div>
@@ -647,8 +642,8 @@ export function ModelManagement() {
 
                 {/* Disk size */}
                 {freshSelectedModel.downloaded && freshSelectedModel.size_mb && (
-                  <div className="flex items-center gap-2 text-sm text-muted-foreground">
-                    <HardDrive className="h-4 w-4" />
+                  <div className="flex items-center gap-2 text-xs text-muted-foreground">
+                    <HardDrive className="h-3.5 w-3.5" />
                     <span>{formatSize(freshSelectedModel.size_mb)} on disk</span>
                   </div>
                 )}
@@ -661,7 +656,7 @@ export function ModelManagement() {
                 )}
 
                 {/* Actions */}
-                <div className="flex items-center gap-2 pt-2 border-t">
+                <div className="flex items-center gap-2 pt-2">
                   {selectedState?.hasError ? (
                     <>
                       <Button
diff --git a/app/src/components/ServerSettings/ServerStatus.tsx b/app/src/components/ServerSettings/ServerStatus.tsx
index 5bd0b22b..093d3664 100644
--- a/app/src/components/ServerSettings/ServerStatus.tsx
+++ b/app/src/components/ServerSettings/ServerStatus.tsx
@@ -3,18 +3,13 @@ import { Badge } from '@/components/ui/badge';
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
 import { useServerHealth } from '@/lib/hooks/useServer';
 import { useServerStore } from '@/stores/serverStore';
-import { ModelProgress } from './ModelProgress';
 
 export function ServerStatus() {
   const { data: health, isLoading, error } = useServerHealth();
   const serverUrl = useServerStore((state) => state.serverUrl);
 
   return (
-    <Card
-      role="region"
-      aria-label="Server Status"
-      tabIndex={0}
-    >
+    <Card role="region" aria-label="Server Status" tabIndex={0}>
       <CardHeader>
         <CardTitle>Server Status</CardTitle>
       </CardHeader>
@@ -24,16 +19,6 @@ export function ServerStatus() {
           <div className="font-mono text-sm">{serverUrl}</div>
         </div>
 
-        {/* Model download progress */}
-        <div className="space-y-2">
-          <ModelProgress modelName="qwen-tts-1.7B" displayName="Qwen TTS 1.7B" />
-          <ModelProgress modelName="qwen-tts-0.6B" displayName="Qwen TTS 0.6B" />
-          <ModelProgress modelName="whisper-base" displayName="Whisper Base" />
-          <ModelProgress modelName="whisper-small" displayName="Whisper Small" />
-          <ModelProgress modelName="whisper-medium" displayName="Whisper Medium" />
-          <ModelProgress modelName="whisper-large" displayName="Whisper Large" />
-        </div>
-
         {isLoading ? (
           <div className="flex items-center gap-2">
             <Loader2 className="h-4 w-4 animate-spin" />
diff --git a/app/src/components/ServerTab/ServerTab.tsx b/app/src/components/ServerTab/ServerTab.tsx
index 1f32ac04..000ec5b7 100644
--- a/app/src/components/ServerTab/ServerTab.tsx
+++ b/app/src/components/ServerTab/ServerTab.tsx
@@ -1,19 +1,19 @@
 import { ConnectionForm } from '@/components/ServerSettings/ConnectionForm';
+import { GenerationSettings } from '@/components/ServerSettings/GenerationSettings';
 import { GpuAcceleration } from '@/components/ServerSettings/GpuAcceleration';
-import { ServerStatus } from '@/components/ServerSettings/ServerStatus';
 import { UpdateStatus } from '@/components/ServerSettings/UpdateStatus';
 import { usePlatform } from '@/platform/PlatformContext';
 
 export function ServerTab() {
   const platform = usePlatform();
   return (
-    <div className="space-y-4 overflow-y-auto flex flex-col">
+    <div className="overflow-y-auto flex flex-col">
       <div className="grid gap-4 md:grid-cols-2">
         <ConnectionForm />
-        <ServerStatus />
+        <GenerationSettings />
+        {platform.metadata.isTauri && <GpuAcceleration />}
+        {platform.metadata.isTauri && <UpdateStatus />}
       </div>
-      {platform.metadata.isTauri && <GpuAcceleration />}
-      {platform.metadata.isTauri && <UpdateStatus />}
       <div className="py-8 text-center text-sm text-muted-foreground">
         Created by{' '}
         <a