From 47ce4cafdfe5d543eb3b028fb0c1121cb7d6a074 Mon Sep 17 00:00:00 2001
From: James Pine <ijamespine@me.com>
Date: Fri, 13 Mar 2026 04:18:34 -0700
Subject: [PATCH 1/5] fix: patch VoiceEncoder.forward to cast float64 mels to
 float32

The previous approach of patching librosa.load didn't work because
melspectrogram itself performs float64 math (numpy dot, signal.lfilter)
regardless of input dtype. The actual mismatch happens when pack()
creates a float64 tensor from the mel arrays and passes it into the
float32 LSTM weights in VoiceEncoder.forward().

Fix by monkey-patching VoiceEncoder.forward() to call mels.float()
before the LSTM, ensuring the input always matches the model dtype.
---
 backend/backends/chatterbox_backend.py       | 15 +++++++++++++++
 backend/backends/chatterbox_turbo_backend.py | 15 +++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/backend/backends/chatterbox_backend.py b/backend/backends/chatterbox_backend.py
index 88f87d4e..826d7c05 100644
--- a/backend/backends/chatterbox_backend.py
+++ b/backend/backends/chatterbox_backend.py
@@ -178,6 +178,21 @@ def _patched_load(*args, **kwargs):
                 progress_manager.mark_complete(model_name)
                 task_manager.complete_download(model_name)
 
+            # Monkey-patch VoiceEncoder.forward to cast input to float32.
+            # The upstream melspectrogram returns float64 numpy arrays when
+            # hp.normalized_mels is False (the default).  pack() preserves
+            # the dtype, so double tensors hit float32 LSTM weights →
+            # "expected m1 and m2 to have the same dtype: float != double".
+            _ve = self.model.ve
+            _orig_ve_forward = _ve.forward.__func__ if hasattr(_ve.forward, '__func__') else _ve.forward
+
+            import types
+
+            def _f32_forward(self_ve, mels):
+                return _orig_ve_forward(self_ve, mels.float())
+
+            _ve.forward = types.MethodType(_f32_forward, _ve)
+
             logger.info("Chatterbox Multilingual TTS loaded successfully")
 
         except ImportError as e:
diff --git a/backend/backends/chatterbox_turbo_backend.py b/backend/backends/chatterbox_turbo_backend.py
index 16bb5d70..6c5bf842 100644
--- a/backend/backends/chatterbox_turbo_backend.py
+++ b/backend/backends/chatterbox_turbo_backend.py
@@ -178,6 +178,21 @@ def _patched_load(*args, **kwargs):
                 progress_manager.mark_complete(model_name)
                 task_manager.complete_download(model_name)
 
+            # Monkey-patch VoiceEncoder.forward to cast input to float32.
+            # The upstream melspectrogram returns float64 numpy arrays when
+            # hp.normalized_mels is False (the default).  pack() preserves
+            # the dtype, so double tensors hit float32 LSTM weights →
+            # "expected m1 and m2 to have the same dtype: float != double".
+            _ve = self.model.ve
+            _orig_ve_forward = _ve.forward.__func__ if hasattr(_ve.forward, '__func__') else _ve.forward
+
+            import types
+
+            def _f32_forward(self_ve, mels):
+                return _orig_ve_forward(self_ve, mels.float())
+
+            _ve.forward = types.MethodType(_f32_forward, _ve)
+
             logger.info("Chatterbox Turbo TTS loaded successfully")
 
         except ImportError as e:

From cac80f6af0fa034310414f54f152735d25b16579 Mon Sep 17 00:00:00 2001
From: James Pine <ijamespine@me.com>
Date: Fri, 13 Mar 2026 04:44:13 -0700
Subject: [PATCH 2/5] feat: add per-model unload endpoint and UI button
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- POST /models/{model_name}/unload — unloads a specific model from
  memory without deleting from disk, supports all engine types
- Frontend: Unload button in model detail dialog when model is loaded
- Delete button remains disabled while loaded (unload first)
---
 .../ServerSettings/ModelManagement.tsx        | 82 ++++++++++++++-----
 app/src/lib/api/client.ts                     |  6 ++
 backend/main.py                               | 64 ++++++++++++++-
 3 files changed, 131 insertions(+), 21 deletions(-)

diff --git a/app/src/components/ServerSettings/ModelManagement.tsx b/app/src/components/ServerSettings/ModelManagement.tsx
index bcc944b2..9811f643 100644
--- a/app/src/components/ServerSettings/ModelManagement.tsx
+++ b/app/src/components/ServerSettings/ModelManagement.tsx
@@ -13,6 +13,7 @@ import {
   RotateCcw,
   Scale,
   Trash2,
+  Unplug,
   X,
 } from 'lucide-react';
 import { useCallback, useMemo, useState } from 'react';
@@ -300,6 +301,27 @@ export function ModelManagement() {
     },
   });
 
+  const unloadMutation = useMutation({
+    mutationFn: async (modelName: string) => {
+      return await apiClient.unloadModel(modelName);
+    },
+    onSuccess: async (_data, modelName) => {
+      toast({
+        title: 'Model unloaded',
+        description: `${modelName} has been unloaded from memory.`,
+      });
+      await queryClient.invalidateQueries({ queryKey: ['modelStatus'], refetchType: 'all' });
+      await queryClient.refetchQueries({ queryKey: ['modelStatus'] });
+    },
+    onError: (error: Error) => {
+      toast({
+        title: 'Unload failed',
+        description: error.message,
+        variant: 'destructive',
+      });
+    },
+  });
+
   const formatSize = (sizeMb?: number): string => {
     if (!sizeMb) return 'Unknown size';
     if (sizeMb < 1024) return `${sizeMb.toFixed(1)} MB`;
@@ -697,26 +719,46 @@ export function ModelManagement() {
                       </Button>
                     </>
                   ) : freshSelectedModel.downloaded ? (
-                    <Button
-                      size="sm"
-                      onClick={() => {
-                        setModelToDelete({
-                          name: freshSelectedModel.model_name,
-                          displayName: freshSelectedModel.display_name,
-                          sizeMb: freshSelectedModel.size_mb,
-                        });
-                        setDeleteDialogOpen(true);
-                      }}
-                      variant="outline"
-                      disabled={freshSelectedModel.loaded}
-                      title={
-                        freshSelectedModel.loaded ? 'Unload model before deleting' : 'Delete model'
-                      }
-                      className="flex-1"
-                    >
-                      <Trash2 className="h-4 w-4 mr-2" />
-                      {freshSelectedModel.loaded ? 'Unload to Delete' : 'Delete Model'}
-                    </Button>
+                    <div className="flex gap-2 flex-1">
+                      {freshSelectedModel.loaded && (
+                        <Button
+                          size="sm"
+                          onClick={() => unloadMutation.mutate(freshSelectedModel.model_name)}
+                          variant="outline"
+                          disabled={unloadMutation.isPending}
+                          className="flex-1"
+                        >
+                          {unloadMutation.isPending ? (
+                            <Loader2 className="h-4 w-4 mr-2 animate-spin" />
+                          ) : (
+                            <Unplug className="h-4 w-4 mr-2" />
+                          )}
+                          {unloadMutation.isPending ? 'Unloading...' : 'Unload'}
+                        </Button>
+                      )}
+                      <Button
+                        size="sm"
+                        onClick={() => {
+                          setModelToDelete({
+                            name: freshSelectedModel.model_name,
+                            displayName: freshSelectedModel.display_name,
+                            sizeMb: freshSelectedModel.size_mb,
+                          });
+                          setDeleteDialogOpen(true);
+                        }}
+                        variant="outline"
+                        disabled={freshSelectedModel.loaded}
+                        title={
+                          freshSelectedModel.loaded
+                            ? 'Unload model before deleting'
+                            : 'Delete model'
+                        }
+                        className="flex-1"
+                      >
+                        <Trash2 className="h-4 w-4 mr-2" />
+                        Delete Model
+                      </Button>
+                    </div>
                   ) : (
                     <Button
                       size="sm"
diff --git a/app/src/lib/api/client.ts b/app/src/lib/api/client.ts
index eb78e440..e522ef48 100644
--- a/app/src/lib/api/client.ts
+++ b/app/src/lib/api/client.ts
@@ -337,6 +337,12 @@ class ApiClient {
     });
   }
 
+  async unloadModel(modelName: string): Promise<{ message: string }> {
+    return this.request<{ message: string }>(`/models/${modelName}/unload`, {
+      method: 'POST',
+    });
+  }
+
   async cancelDownload(modelName: string): Promise<{ message: string }> {
     return this.request<{ message: string }>('/models/download/cancel', {
       method: 'POST',
diff --git a/backend/main.py b/backend/main.py
index c9dc3566..79a1d43d 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -1479,7 +1479,7 @@ async def load_model(model_size: str = "1.7B"):
 
 @app.post("/models/unload")
 async def unload_model():
-    """Unload TTS model to free memory."""
+    """Unload the default Qwen TTS model to free memory."""
     try:
         tts.unload_tts_model()
         return {"message": "Model unloaded successfully"}
@@ -1487,6 +1487,68 @@ async def unload_model():
         raise HTTPException(status_code=500, detail=str(e))
 
 
+@app.post("/models/{model_name}/unload")
+async def unload_model_by_name(model_name: str):
+    """Unload a specific model from memory without deleting it from disk."""
+    # Map of model_name -> (model_type, model_size)
+    model_types = {
+        "qwen-tts-1.7B": ("tts", "1.7B"),
+        "qwen-tts-0.6B": ("tts", "0.6B"),
+        "luxtts": ("luxtts", "default"),
+        "chatterbox-tts": ("chatterbox", "default"),
+        "chatterbox-turbo": ("chatterbox_turbo", "default"),
+        "whisper-base": ("whisper", "base"),
+        "whisper-small": ("whisper", "small"),
+        "whisper-medium": ("whisper", "medium"),
+        "whisper-large": ("whisper", "large"),
+        "whisper-turbo": ("whisper", "turbo"),
+    }
+
+    if model_name not in model_types:
+        raise HTTPException(status_code=400, detail=f"Unknown model: {model_name}")
+
+    model_type, model_size = model_types[model_name]
+
+    try:
+        if model_type == "tts":
+            tts_model = tts.get_tts_model()
+            if tts_model.is_loaded() and tts_model.model_size == model_size:
+                tts.unload_tts_model()
+            else:
+                return {"message": f"Model {model_name} is not loaded"}
+        elif model_type == "luxtts":
+            from .backends import get_tts_backend_for_engine
+            backend = get_tts_backend_for_engine("luxtts")
+            if backend.is_loaded():
+                backend.unload_model()
+            else:
+                return {"message": f"Model {model_name} is not loaded"}
+        elif model_type == "chatterbox":
+            from .backends import get_tts_backend_for_engine
+            backend = get_tts_backend_for_engine("chatterbox")
+            if backend.is_loaded():
+                backend.unload_model()
+            else:
+                return {"message": f"Model {model_name} is not loaded"}
+        elif model_type == "chatterbox_turbo":
+            from .backends import get_tts_backend_for_engine
+            backend = get_tts_backend_for_engine("chatterbox_turbo")
+            if backend.is_loaded():
+                backend.unload_model()
+            else:
+                return {"message": f"Model {model_name} is not loaded"}
+        elif model_type == "whisper":
+            whisper_model = transcribe.get_whisper_model()
+            if whisper_model.is_loaded() and whisper_model.model_size == model_size:
+                transcribe.unload_whisper_model()
+            else:
+                return {"message": f"Model {model_name} is not loaded"}
+
+        return {"message": f"Model {model_name} unloaded successfully"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
 @app.get("/models/progress/{model_name}")
 async def get_model_progress(model_name: str):
     """Get model download progress via Server-Sent Events."""

From bfd7b815a596c95fabd57a8570be56db994a3d5f Mon Sep 17 00:00:00 2001
From: James Pine <ijamespine@me.com>
Date: Fri, 13 Mar 2026 04:55:32 -0700
Subject: [PATCH 3/5] =?UTF-8?q?fix:=20patch=20S3Tokenizer.log=5Fmel=5Fspec?=
 =?UTF-8?q?trogram=20for=20float64=E2=86=92float32=20cast?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The actual dtype mismatch was in S3Tokenizer.log_mel_spectrogram, not
VoiceEncoder.forward. librosa.load returns float64 numpy, which
torch.from_numpy preserves as double. The STFT output (double) then
hits _mel_filters (float32) in a matmul at s3tokenizer.py:163.

Now patching both entry points after model load:
1. S3Tokenizer.log_mel_spectrogram — cast audio to float32 before STFT
2. VoiceEncoder.forward — cast mels to float32 before LSTM

Remove debug traceback logging (no longer needed).
---
 backend/backends/chatterbox_backend.py       | 32 +++++++++++------
 backend/backends/chatterbox_turbo_backend.py | 38 ++++++++++++++------
 2 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/backend/backends/chatterbox_backend.py b/backend/backends/chatterbox_backend.py
index 826d7c05..a440f998 100644
--- a/backend/backends/chatterbox_backend.py
+++ b/backend/backends/chatterbox_backend.py
@@ -178,20 +178,32 @@ def _patched_load(*args, **kwargs):
                 progress_manager.mark_complete(model_name)
                 task_manager.complete_download(model_name)
 
-            # Monkey-patch VoiceEncoder.forward to cast input to float32.
-            # The upstream melspectrogram returns float64 numpy arrays when
-            # hp.normalized_mels is False (the default).  pack() preserves
-            # the dtype, so double tensors hit float32 LSTM weights →
-            # "expected m1 and m2 to have the same dtype: float != double".
-            _ve = self.model.ve
-            _orig_ve_forward = _ve.forward.__func__ if hasattr(_ve.forward, '__func__') else _ve.forward
-
+            # Patch float64 → float32 dtype mismatches in upstream chatterbox.
+            # librosa.load returns float64 numpy; multiple upstream code paths
+            # convert it to a torch tensor via torch.from_numpy() without
+            # casting, then matmul it against float32 model weights.
             import types
 
-            def _f32_forward(self_ve, mels):
+            # Patch S3Tokenizer (used by s3gen.tokenizer)
+            _tokzr = self.model.s3gen.tokenizer
+            _orig_log_mel = _tokzr.log_mel_spectrogram.__func__
+
+            def _f32_log_mel(self_tokzr, audio, padding=0):
+                import torch as _torch
+                if _torch.is_tensor(audio):
+                    audio = audio.float()
+                return _orig_log_mel(self_tokzr, audio, padding)
+
+            _tokzr.log_mel_spectrogram = types.MethodType(_f32_log_mel, _tokzr)
+
+            # Patch VoiceEncoder
+            _ve = self.model.ve
+            _orig_ve_forward = _ve.forward.__func__
+
+            def _f32_ve_forward(self_ve, mels):
                 return _orig_ve_forward(self_ve, mels.float())
 
-            _ve.forward = types.MethodType(_f32_forward, _ve)
+            _ve.forward = types.MethodType(_f32_ve_forward, _ve)
 
             logger.info("Chatterbox Multilingual TTS loaded successfully")
 
diff --git a/backend/backends/chatterbox_turbo_backend.py b/backend/backends/chatterbox_turbo_backend.py
index 6c5bf842..3ce54f38 100644
--- a/backend/backends/chatterbox_turbo_backend.py
+++ b/backend/backends/chatterbox_turbo_backend.py
@@ -178,20 +178,38 @@ def _patched_load(*args, **kwargs):
                 progress_manager.mark_complete(model_name)
                 task_manager.complete_download(model_name)
 
-            # Monkey-patch VoiceEncoder.forward to cast input to float32.
-            # The upstream melspectrogram returns float64 numpy arrays when
-            # hp.normalized_mels is False (the default).  pack() preserves
-            # the dtype, so double tensors hit float32 LSTM weights →
-            # "expected m1 and m2 to have the same dtype: float != double".
-            _ve = self.model.ve
-            _orig_ve_forward = _ve.forward.__func__ if hasattr(_ve.forward, '__func__') else _ve.forward
-
+            # Patch float64 → float32 dtype mismatches in upstream chatterbox.
+            # librosa.load returns float64 numpy; multiple upstream code paths
+            # convert it to a torch tensor via torch.from_numpy() without
+            # casting, then matmul it against float32 model weights.
+            # We patch the two known entry points:
+            #
+            # 1. S3Tokenizer.log_mel_spectrogram — the audio tensor from
+            #    librosa hits _mel_filters (float32) in a matmul.
+            # 2. VoiceEncoder.forward — float64 mel spectrograms hit the
+            #    float32 LSTM weights.
             import types
 
-            def _f32_forward(self_ve, mels):
+            # Patch S3Tokenizer (used by s3gen.tokenizer)
+            _tokzr = self.model.s3gen.tokenizer
+            _orig_log_mel = _tokzr.log_mel_spectrogram.__func__
+
+            def _f32_log_mel(self_tokzr, audio, padding=0):
+                import torch as _torch
+                if _torch.is_tensor(audio):
+                    audio = audio.float()
+                return _orig_log_mel(self_tokzr, audio, padding)
+
+            _tokzr.log_mel_spectrogram = types.MethodType(_f32_log_mel, _tokzr)
+
+            # Patch VoiceEncoder
+            _ve = self.model.ve
+            _orig_ve_forward = _ve.forward.__func__
+
+            def _f32_ve_forward(self_ve, mels):
                 return _orig_ve_forward(self_ve, mels.float())
 
-            _ve.forward = types.MethodType(_f32_forward, _ve)
+            _ve.forward = types.MethodType(_f32_ve_forward, _ve)
 
             logger.info("Chatterbox Turbo TTS loaded successfully")
 

From b42063795788610ade2e7ffeba2c8967ab24b977 Mon Sep 17 00:00:00 2001
From: James Pine <ijamespine@me.com>
Date: Fri, 13 Mar 2026 05:19:23 -0700
Subject: [PATCH 4/5] feat: paralinguistic tag autocomplete for Chatterbox
 Turbo

Type / in the text input when using Chatterbox Turbo to open an
autocomplete dropdown with 9 supported paralinguistic tags ([laugh],
[chuckle], [gasp], [cough], [sigh], [groan], [sniff], [shush],
[clear throat]).

- contentEditable div replaces textarea for Turbo engine only
- Tags render as inline styled badges
- Pasting text with [tag] patterns auto-converts to badges
- Badges serialize back to plain [tag] text for the API
- Dropdown portalled to body, opens above caret to avoid overflow
---
 .../Generation/FloatingGenerateBox.tsx        |  76 ++--
 .../components/Generation/GenerationForm.tsx  |  26 +-
 .../Generation/ParalinguisticInput.tsx        | 410 ++++++++++++++++++
 3 files changed, 480 insertions(+), 32 deletions(-)
 create mode 100644 app/src/components/Generation/ParalinguisticInput.tsx

diff --git a/app/src/components/Generation/FloatingGenerateBox.tsx b/app/src/components/Generation/FloatingGenerateBox.tsx
index a9814432..a9b830dc 100644
--- a/app/src/components/Generation/FloatingGenerateBox.tsx
+++ b/app/src/components/Generation/FloatingGenerateBox.tsx
@@ -20,6 +20,7 @@ import { useAddStoryItem, useStory } from '@/lib/hooks/useStories';
 import { cn } from '@/lib/utils/cn';
 import { useStoryStore } from '@/stores/storyStore';
 import { useUIStore } from '@/stores/uiStore';
+import { ParalinguisticInput } from './ParalinguisticInput';
 
 interface FloatingGenerateBoxProps {
   isPlayerOpen?: boolean;
@@ -219,34 +220,57 @@ export function FloatingGenerateBox({
                             transition={{ duration: 0.15, ease: 'easeOut' }}
                             style={{ overflow: 'hidden' }}
                           >
-                            <Textarea
-                              {...field}
-                              ref={(node: HTMLTextAreaElement | null) => {
-                                // Store ref for auto-resize (only for active field)
-                                if (!isInstructMode) {
-                                  textareaRef.current = node;
+                            {form.watch('engine') === 'chatterbox_turbo' ? (
+                              <ParalinguisticInput
+                                value={field.value}
+                                onChange={field.onChange}
+                                placeholder={
+                                  isStoriesRoute && currentStory
+                                    ? `Generate speech for "${currentStory.name}"... (type / for effects)`
+                                    : selectedProfile
+                                      ? `Type / for effects like [laugh], [sigh]...`
+                                      : 'Select a voice profile above...'
                                 }
-                                // Forward ref to react-hook-form
-                                if (typeof field.ref === 'function') {
-                                  field.ref(node);
+                                className="px-3 py-2 resize-none bg-transparent border-none focus-visible:ring-0 focus-visible:ring-offset-0 focus:outline-none focus:ring-0 outline-none ring-0 rounded-2xl text-sm w-full"
+                                style={{
+                                  minHeight: isExpanded ? '100px' : '32px',
+                                  maxHeight: '300px',
+                                  overflowY: 'auto',
+                                }}
+                                disabled={!selectedProfileId}
+                                onClick={() => setIsExpanded(true)}
+                                onFocus={() => setIsExpanded(true)}
+                              />
+                            ) : (
+                              <Textarea
+                                {...field}
+                                ref={(node: HTMLTextAreaElement | null) => {
+                                  // Store ref for auto-resize (only for active field)
+                                  if (!isInstructMode) {
+                                    textareaRef.current = node;
+                                  }
+                                  // Forward ref to react-hook-form
+                                  if (typeof field.ref === 'function') {
+                                    field.ref(node);
+                                  }
+                                }}
+                                placeholder={
+                                  isStoriesRoute && currentStory
+                                    ? `Generate speech for "${currentStory.name}"...`
+                                    : selectedProfile
+                                      ? `Generate speech using ${selectedProfile.name}...`
+                                      : 'Select a voice profile above...'
                                 }
-                              }}
-                              placeholder={
-                                isStoriesRoute && currentStory
-                                  ? `Generate speech for "${currentStory.name}"...`
-                                  : selectedProfile
-                                    ? `Generate speech using ${selectedProfile.name}...`
-                                    : 'Select a voice profile above...'
-                              }
-                              className="resize-none bg-transparent border-none focus-visible:ring-0 focus-visible:ring-offset-0 focus:outline-none focus:ring-0 outline-none ring-0 rounded-2xl text-sm placeholder:text-muted-foreground/60 w-full"
-                              style={{
-                                minHeight: isExpanded ? '100px' : '32px',
-                                maxHeight: '300px',
-                              }}
-                              disabled={!selectedProfileId}
-                              onClick={() => setIsExpanded(true)}
-                              onFocus={() => setIsExpanded(true)}
-                            />
+                                className="resize-none bg-transparent border-none focus-visible:ring-0 focus-visible:ring-offset-0 focus:outline-none focus:ring-0 outline-none ring-0 rounded-2xl text-sm placeholder:text-muted-foreground/60 w-full"
+                                style={{
+                                  minHeight: isExpanded ? '100px' : '32px',
+                                  maxHeight: '300px',
+                                }}
+                                disabled={!selectedProfileId}
+                                onClick={() => setIsExpanded(true)}
+                                onFocus={() => setIsExpanded(true)}
+                              />
+                            )}
                           </motion.div>
                         </FormControl>
                         <FormMessage className="text-xs" />
diff --git a/app/src/components/Generation/GenerationForm.tsx b/app/src/components/Generation/GenerationForm.tsx
index a3c96cbc..e326bec4 100644
--- a/app/src/components/Generation/GenerationForm.tsx
+++ b/app/src/components/Generation/GenerationForm.tsx
@@ -23,6 +23,7 @@ import { getLanguageOptionsForEngine } from '@/lib/constants/languages';
 import { useGenerationForm } from '@/lib/hooks/useGenerationForm';
 import { useProfile } from '@/lib/hooks/useProfiles';
 import { useUIStore } from '@/stores/uiStore';
+import { ParalinguisticInput } from './ParalinguisticInput';
 
 export function GenerationForm() {
   const selectedProfileId = useUIStore((state) => state.selectedProfileId);
@@ -64,13 +65,26 @@ export function GenerationForm() {
                 <FormItem>
                   <FormLabel>Text to Speak</FormLabel>
                   <FormControl>
-                    <Textarea
-                      placeholder="Enter the text you want to generate..."
-                      className="min-h-[150px]"
-                      {...field}
-                    />
+                    {form.watch('engine') === 'chatterbox_turbo' ? (
+                      <ParalinguisticInput
+                        value={field.value}
+                        onChange={field.onChange}
+                        placeholder="Enter text... type / for effects like [laugh], [sigh]"
+                        className="min-h-[150px] rounded-md border border-input bg-background px-3 py-2"
+                      />
+                    ) : (
+                      <Textarea
+                        placeholder="Enter the text you want to generate..."
+                        className="min-h-[150px]"
+                        {...field}
+                      />
+                    )}
                   </FormControl>
-                  <FormDescription>Max 5000 characters</FormDescription>
+                  <FormDescription>
+                    {form.watch('engine') === 'chatterbox_turbo'
+                      ? 'Max 5000 characters. Type / to insert sound effects.'
+                      : 'Max 5000 characters'}
+                  </FormDescription>
                   <FormMessage />
                 </FormItem>
               )}
diff --git a/app/src/components/Generation/ParalinguisticInput.tsx b/app/src/components/Generation/ParalinguisticInput.tsx
new file mode 100644
index 00000000..b3200628
--- /dev/null
+++ b/app/src/components/Generation/ParalinguisticInput.tsx
@@ -0,0 +1,410 @@
+/**
+ * ParalinguisticInput — a contentEditable rich text input that renders
+ * Chatterbox Turbo paralinguistic tags (e.g. [laugh]) as inline badges.
+ *
+ * Trigger: typing "/" opens an autocomplete dropdown.
+ * Paste:   pasting text with [tag] patterns auto-converts to badges.
+ * Output:  serializes badges back to plain [tag] text for the API.
+ */
+
+import { AnimatePresence, motion } from 'framer-motion';
+import { forwardRef, useCallback, useEffect, useImperativeHandle, useRef, useState } from 'react';
+import { createPortal } from 'react-dom';
+import { cn } from '@/lib/utils/cn';
+
+// ── Tag definitions ─────────────────────────────────────────────────
+const PARALINGUISTIC_TAGS = [
+  { tag: '[laugh]', label: 'laugh', emoji: '\u{1F602}' },
+  { tag: '[chuckle]', label: 'chuckle', emoji: '\u{1F60F}' },
+  { tag: '[gasp]', label: 'gasp', emoji: '\u{1F62E}' },
+  { tag: '[cough]', label: 'cough', emoji: '\u{1F637}' },
+  { tag: '[sigh]', label: 'sigh', emoji: '\u{1F614}' },
+  { tag: '[groan]', label: 'groan', emoji: '\u{1F629}' },
+  { tag: '[sniff]', label: 'sniff', emoji: '\u{1F443}' },
+  { tag: '[shush]', label: 'shush', emoji: '\u{1F92B}' },
+  { tag: '[clear throat]', label: 'clear throat', emoji: '\u{1F64A}' },
+] as const;
+
+const TAG_REGEX = /\[(laugh|chuckle|gasp|cough|sigh|groan|sniff|shush|clear throat)\]/gi;
+
+// Data attribute used to identify badge spans in the DOM
+const BADGE_ATTR = 'data-ptag';
+
+// ── Helpers ─────────────────────────────────────────────────────────
+
+/** Build an inline badge <span> for a tag. */
+function makeBadgeHTML(tag: string): string {
+  const entry = PARALINGUISTIC_TAGS.find((t) => t.tag.toLowerCase() === tag.toLowerCase());
+  const label = entry?.label ?? tag.replace(/[[\]]/g, '');
+  const emoji = entry?.emoji ?? '';
+  // Non-editable inline badge. Zero-width spaces around it let the
+  // caret sit on either side so the user can type before/after.
+  return `\u200B<span ${BADGE_ATTR}="${tag}" contenteditable="false" class="ptag-badge">${emoji ? `${emoji}\u00A0` : ''}${label}</span>\u200B`;
+}
+
+/** Convert plain text with [tag] patterns into HTML with badge spans. */
+function textToHTML(text: string): string {
+  // Escape HTML entities first
+  const escaped = text.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
+  // Replace tag patterns with badge HTML
+  return escaped.replace(TAG_REGEX, (match) => makeBadgeHTML(match));
+}
+
+/** Serialize the contentEditable innerHTML back to plain text with [tag] syntax. */
+function htmlToText(container: HTMLElement): string {
+  let result = '';
+  for (const node of container.childNodes) {
+    if (node.nodeType === Node.TEXT_NODE) {
+      // Strip zero-width spaces we added around badges
+      result += (node.textContent ?? '').replace(/\u200B/g, '');
+    } else if (node.nodeType === Node.ELEMENT_NODE) {
+      const el = node as HTMLElement;
+      if (el.hasAttribute(BADGE_ATTR)) {
+        result += el.getAttribute(BADGE_ATTR) ?? '';
+      } else if (el.tagName === 'BR') {
+        result += '\n';
+      } else {
+        // Recurse for nested elements (e.g. spans from paste)
+        result += htmlToText(el);
+      }
+    }
+  }
+  return result;
+}
+
+/** Get the text content from the current caret position back to the last
+ *  whitespace or start of container, to detect the "/" trigger. */
+function getWordBeforeCaret(_container: HTMLElement): { word: string; range: Range | null } {
+  const sel = window.getSelection();
+  if (!sel || sel.rangeCount === 0) return { word: '', range: null };
+  const range = sel.getRangeAt(0).cloneRange();
+  range.collapse(true);
+
+  // Walk backwards from caret through the text node
+  const textNode = range.startContainer;
+  if (textNode.nodeType !== Node.TEXT_NODE) return { word: '', range: null };
+  const text = textNode.textContent ?? '';
+  const offset = range.startOffset;
+
+  let start = offset;
+  while (
+    start > 0 &&
+    text[start - 1] !== ' ' &&
+    text[start - 1] !== '\n' &&
+    text[start - 1] !== '\u00A0'
+  ) {
+    start--;
+  }
+
+  const word = text.slice(start, offset);
+  const wordRange = document.createRange();
+  wordRange.setStart(textNode, start);
+  wordRange.setEnd(textNode, offset);
+
+  return { word, range: wordRange };
+}
+
+// ── Component ───────────────────────────────────────────────────────
+
+export interface ParalinguisticInputProps {
+  value?: string;
+  onChange?: (value: string) => void;
+  placeholder?: string;
+  disabled?: boolean;
+  className?: string;
+  style?: React.CSSProperties;
+  onClick?: () => void;
+  onFocus?: () => void;
+}
+
+export interface ParalinguisticInputRef {
+  focus: () => void;
+  element: HTMLDivElement | null;
+}
+
+export const ParalinguisticInput = forwardRef<ParalinguisticInputRef, ParalinguisticInputProps>(
+  function ParalinguisticInput(
+    { value, onChange, placeholder, disabled, className, style, onClick, onFocus },
+    ref,
+  ) {
+    const editorRef = useRef<HTMLDivElement>(null);
+    const [showMenu, setShowMenu] = useState(false);
+    const [menuFilter, setMenuFilter] = useState('');
+    const [menuIndex, setMenuIndex] = useState(0);
+    const [menuPosition, setMenuPosition] = useState<{ bottom: number; left: number }>({
+      bottom: 0,
+      left: 0,
+    });
+    const triggerRangeRef = useRef<Range | null>(null);
+    const lastSerializedRef = useRef<string>(value ?? '');
+    const isComposingRef = useRef(false);
+
+    useImperativeHandle(ref, () => ({
+      focus: () => editorRef.current?.focus(),
+      element: editorRef.current,
+    }));
+
+    // Filtered tag list for the autocomplete menu
+    const filteredTags = PARALINGUISTIC_TAGS.filter((t) =>
+      t.label.toLowerCase().includes(menuFilter.toLowerCase()),
+    );
+
+    // ── Sync external value → editor ──────────────────────────────
+    useEffect(() => {
+      const el = editorRef.current;
+      if (!el) return;
+      // Only update DOM if the external value differs from what we last emitted
+      if (value !== undefined && value !== lastSerializedRef.current) {
+        lastSerializedRef.current = value;
+        el.innerHTML = value ? textToHTML(value) : '';
+      }
+    }, [value]);
+
+    // ── Emit plain-text value on input ────────────────────────────
+    const emitChange = useCallback(() => {
+      const el = editorRef.current;
+      if (!el || !onChange) return;
+      const text = htmlToText(el);
+      lastSerializedRef.current = text;
+      onChange(text);
+    }, [onChange]);
+
+    // ── Insert a tag badge at the caret ───────────────────────────
+    const insertTag = useCallback(
+      (tag: string) => {
+        const el = editorRef.current;
+        if (!el) return;
+
+        // Delete the /filter text
+        const wordRange = triggerRangeRef.current;
+        if (wordRange) {
+          wordRange.deleteContents();
+        }
+
+        // Insert badge HTML
+        const temp = document.createElement('span');
+        temp.innerHTML = makeBadgeHTML(tag);
+        const frag = document.createDocumentFragment();
+        let lastNode: Node | null = null;
+        while (temp.firstChild) {
+          lastNode = frag.appendChild(temp.firstChild);
+        }
+
+        const sel = window.getSelection();
+        if (sel && sel.rangeCount > 0) {
+          const range = sel.getRangeAt(0);
+          range.deleteContents();
+          range.insertNode(frag);
+
+          // Move caret after the badge
+          if (lastNode) {
+            const newRange = document.createRange();
+            newRange.setStartAfter(lastNode);
+            newRange.collapse(true);
+            sel.removeAllRanges();
+            sel.addRange(newRange);
+          }
+        }
+
+        setShowMenu(false);
+        setMenuFilter('');
+        emitChange();
+        el.focus();
+      },
+      [emitChange],
+    );
+
+    // ── Handle keydown for autocomplete navigation ────────────────
+    const handleKeyDown = useCallback(
+      (e: React.KeyboardEvent) => {
+        if (showMenu) {
+          if (e.key === 'ArrowDown') {
+            e.preventDefault();
+            setMenuIndex((i) => (i + 1) % filteredTags.length);
+          } else if (e.key === 'ArrowUp') {
+            e.preventDefault();
+            setMenuIndex((i) => (i - 1 + filteredTags.length) % filteredTags.length);
+          } else if (e.key === 'Enter' || e.key === 'Tab') {
+            e.preventDefault();
+            if (filteredTags[menuIndex]) {
+              insertTag(filteredTags[menuIndex].tag);
+            }
+          } else if (e.key === 'Escape') {
+            e.preventDefault();
+            setShowMenu(false);
+          }
+        } else {
+          // Prevent Enter from creating <div> blocks in contentEditable
+          if (e.key === 'Enter' && !e.shiftKey) {
+            // Let the form handle submit
+          }
+        }
+      },
+      [showMenu, filteredTags, menuIndex, insertTag],
+    );
+
+    // ── Handle input (check for / trigger) ────────────────────────
+    const handleInput = useCallback(() => {
+      if (isComposingRef.current) return;
+      const el = editorRef.current;
+      if (!el) return;
+
+      const { word, range } = getWordBeforeCaret(el);
+
+      if (word.startsWith('/')) {
+        const filter = word.slice(1); // strip the /
+        setMenuFilter(filter);
+        setMenuIndex(0);
+        triggerRangeRef.current = range;
+
+        // Position the menu above the caret using viewport coords (portalled)
+        const sel = window.getSelection();
+        if (sel && sel.rangeCount > 0) {
+          const rect = sel.getRangeAt(0).getBoundingClientRect();
+          setMenuPosition({
+            bottom: window.innerHeight - rect.top + 4,
+            left: rect.left,
+          });
+        }
+
+        setShowMenu(true);
+      } else {
+        setShowMenu(false);
+      }
+
+      emitChange();
+    }, [emitChange]);
+
+    // ── Handle paste — convert [tag] patterns to badges ───────────
+    const handlePaste = useCallback(
+      (e: React.ClipboardEvent) => {
+        e.preventDefault();
+        const text = e.clipboardData.getData('text/plain');
+        if (!text) return;
+
+        const el = editorRef.current;
+        if (!el) return;
+
+        const html = textToHTML(text);
+
+        // Insert at caret
+        const sel = window.getSelection();
+        if (sel && sel.rangeCount > 0) {
+          const range = sel.getRangeAt(0);
+          range.deleteContents();
+          const temp = document.createElement('div');
+          temp.innerHTML = html;
+          const frag = document.createDocumentFragment();
+          let lastNode: Node | null = null;
+          while (temp.firstChild) {
+            lastNode = frag.appendChild(temp.firstChild);
+          }
+          range.insertNode(frag);
+          if (lastNode) {
+            const newRange = document.createRange();
+            newRange.setStartAfter(lastNode);
+            newRange.collapse(true);
+            sel.removeAllRanges();
+            sel.addRange(newRange);
+          }
+        }
+
+        emitChange();
+      },
+      [emitChange],
+    );
+
+    // ── Show placeholder ──────────────────────────────────────────
+    const isEmpty = !value || value.trim() === '';
+
+    return (
+      <div className="relative">
+        {/* Placeholder */}
+        {isEmpty && placeholder && (
+          <div
+            className="pointer-events-none absolute inset-0 text-sm text-muted-foreground/60 px-3 py-2 select-none"
+            aria-hidden
+          >
+            {placeholder}
+          </div>
+        )}
+
+        {/* Editable area */}
+        <div
+          ref={editorRef}
+          contentEditable={!disabled}
+          suppressContentEditableWarning
+          role="textbox"
+          aria-multiline
+          aria-placeholder={placeholder}
+          aria-disabled={disabled}
+          className={cn(
+            'min-h-[32px] text-sm whitespace-pre-wrap break-words outline-none',
+            '[&_.ptag-badge]:inline-flex [&_.ptag-badge]:items-center [&_.ptag-badge]:rounded-full',
+            '[&_.ptag-badge]:bg-accent/20 [&_.ptag-badge]:text-accent [&_.ptag-badge]:border [&_.ptag-badge]:border-accent/30',
+            '[&_.ptag-badge]:px-2 [&_.ptag-badge]:py-0 [&_.ptag-badge]:text-xs [&_.ptag-badge]:font-medium',
+            '[&_.ptag-badge]:mx-0.5 [&_.ptag-badge]:select-none [&_.ptag-badge]:cursor-default',
+            '[&_.ptag-badge]:align-baseline',
+            disabled && 'opacity-50 cursor-not-allowed',
+            className,
+          )}
+          style={style}
+          onInput={handleInput}
+          onKeyDown={handleKeyDown}
+          onPaste={handlePaste}
+          onClick={onClick}
+          onFocus={onFocus}
+          onCompositionStart={() => {
+            isComposingRef.current = true;
+          }}
+          onCompositionEnd={() => {
+            isComposingRef.current = false;
+            handleInput();
+          }}
+        />
+
+        {/* Autocomplete dropdown — portalled to body, positioned above the caret */}
+        {showMenu &&
+          filteredTags.length > 0 &&
+          createPortal(
+            <AnimatePresence>
+              <motion.div
+                initial={{ opacity: 0, y: 4 }}
+                animate={{ opacity: 1, y: 0 }}
+                exit={{ opacity: 0, y: 4 }}
+                transition={{ duration: 0.12 }}
+                className="fixed z-[9999] min-w-[200px] max-h-[280px] overflow-y-auto rounded-lg border border-border bg-popover shadow-lg"
+                style={{
+                  bottom: menuPosition.bottom,
+                  left: menuPosition.left,
+                }}
+              >
+                {filteredTags.map((t, i) => (
+                  <button
+                    key={t.tag}
+                    type="button"
+                    className={cn(
+                      'flex items-center gap-2 w-full px-3 py-1.5 text-sm text-left transition-colors',
+                      i === menuIndex
+                        ? 'bg-accent/20 text-accent-foreground'
+                        : 'text-popover-foreground hover:bg-muted/50',
+                    )}
+                    onMouseDown={(e) => {
+                      e.preventDefault(); // Keep focus in editor
+                      insertTag(t.tag);
+                    }}
+                    onMouseEnter={() => setMenuIndex(i)}
+                  >
+                    <span className="text-base leading-none">{t.emoji}</span>
+                    <span>{t.label}</span>
+                    <span className="ml-auto text-xs text-muted-foreground font-mono">{t.tag}</span>
+                  </button>
+                ))}
+              </motion.div>
+            </AnimatePresence>,
+            document.body,
+          )}
+      </div>
+    );
+  },
+);

From 139fa38e3f05df86694a4a3de5a9b40121df4796 Mon Sep 17 00:00:00 2001
From: James Pine <ijamespine@me.com>
Date: Fri, 13 Mar 2026 05:52:06 -0700
Subject: [PATCH 5/5] fix: address review feedback for ParalinguisticInput

- Initialize lastSerializedRef to empty string so first-mount hydration
  always runs (fixes initial value not rendering)
- Guard arrow-key menu nav against empty filteredTags (avoids NaN index)
- Disable ARIA role/multiline and detach event handlers when disabled
- Add onBlur to close autocomplete dropdown when editor loses focus
- Chain exception with 'from e' in unload endpoint for better tracebacks
---
 .../Generation/ParalinguisticInput.tsx        | 28 +++++++++++++------
 backend/main.py                               |  2 +-
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/app/src/components/Generation/ParalinguisticInput.tsx b/app/src/components/Generation/ParalinguisticInput.tsx
index b3200628..d6c44210 100644
--- a/app/src/components/Generation/ParalinguisticInput.tsx
+++ b/app/src/components/Generation/ParalinguisticInput.tsx
@@ -136,7 +136,7 @@ export const ParalinguisticInput = forwardRef<ParalinguisticInputRef, Paralingui
       left: 0,
     });
     const triggerRangeRef = useRef<Range | null>(null);
-    const lastSerializedRef = useRef<string>(value ?? '');
+    const lastSerializedRef = useRef<string>('');
     const isComposingRef = useRef(false);
 
     useImperativeHandle(ref, () => ({
@@ -218,6 +218,13 @@ export const ParalinguisticInput = forwardRef<ParalinguisticInputRef, Paralingui
     const handleKeyDown = useCallback(
       (e: React.KeyboardEvent) => {
         if (showMenu) {
+          if (filteredTags.length === 0) {
+            if (e.key === 'Escape') {
+              e.preventDefault();
+              setShowMenu(false);
+            }
+            return;
+          }
           if (e.key === 'ArrowDown') {
             e.preventDefault();
             setMenuIndex((i) => (i + 1) % filteredTags.length);
@@ -334,10 +341,11 @@ export const ParalinguisticInput = forwardRef<ParalinguisticInputRef, Paralingui
           ref={editorRef}
           contentEditable={!disabled}
           suppressContentEditableWarning
-          role="textbox"
-          aria-multiline
+          role={disabled ? undefined : 'textbox'}
+          aria-multiline={disabled ? undefined : true}
           aria-placeholder={placeholder}
           aria-disabled={disabled}
+          tabIndex={disabled ? -1 : 0}
           className={cn(
             'min-h-[32px] text-sm whitespace-pre-wrap break-words outline-none',
             '[&_.ptag-badge]:inline-flex [&_.ptag-badge]:items-center [&_.ptag-badge]:rounded-full',
@@ -349,11 +357,15 @@ export const ParalinguisticInput = forwardRef<ParalinguisticInputRef, Paralingui
             className,
           )}
           style={style}
-          onInput={handleInput}
-          onKeyDown={handleKeyDown}
-          onPaste={handlePaste}
-          onClick={onClick}
-          onFocus={onFocus}
+          onInput={!disabled ? handleInput : undefined}
+          onKeyDown={!disabled ? handleKeyDown : undefined}
+          onPaste={!disabled ? handlePaste : undefined}
+          onClick={!disabled ? onClick : undefined}
+          onFocus={!disabled ? onFocus : undefined}
+          onBlur={() => {
+            setShowMenu(false);
+            triggerRangeRef.current = null;
+          }}
           onCompositionStart={() => {
             isComposingRef.current = true;
           }}
diff --git a/backend/main.py b/backend/main.py
index 79a1d43d..93acc157 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -1546,7 +1546,7 @@ async def unload_model_by_name(model_name: str):
 
         return {"message": f"Model {model_name} unloaded successfully"}
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
+        raise HTTPException(status_code=500, detail=str(e)) from e
 
 
 @app.get("/models/progress/{model_name}")