diff --git a/app/src/components/AudioPlayer/AudioPlayer.tsx b/app/src/components/AudioPlayer/AudioPlayer.tsx
index 75e1b4e5..667404f3 100644
--- a/app/src/components/AudioPlayer/AudioPlayer.tsx
+++ b/app/src/components/AudioPlayer/AudioPlayer.tsx
@@ -139,7 +139,11 @@ export function AudioPlayer() {
barRadius: 2,
height: 80,
normalize: true,
- backend: 'WebAudio',
+ // Use MediaElement backend (default). Unlike the WebAudio backend,
+ // MediaElement uses a standard element for playback which
+ // benefits from the browser/webview's built-in audio session recovery.
+ // This prevents audio loss when another app steals audio output or
+ // the system audio session is interrupted.
interact: true, // Enable interaction (click to seek)
mediaControls: false, // Don't show native controls
});
@@ -189,15 +193,6 @@ export function AudioPlayer() {
const currentVolume = usePlayerStore.getState().volume;
wavesurfer.setVolume(currentVolume);
- // Get the underlying audio element and ensure it's not muted
- // (unless we're using native playback, which will be set later)
- const mediaElement = wavesurfer.getMediaElement();
- if (mediaElement && !isUsingNativePlaybackRef.current) {
- mediaElement.volume = currentVolume;
- mediaElement.muted = false;
- debug.log('Audio element volume:', mediaElement.volume, 'muted:', mediaElement.muted);
- }
-
// Auto-play when ready - check if we should use native playback
// Get current values from the store and queries at runtime (not captured closure values)
const currentAudioUrl = usePlayerStore.getState().audioUrl;
@@ -264,21 +259,8 @@ export function AudioPlayer() {
debug.log('Should use native playback:', shouldUseNative);
if (!shouldUseNative) {
- debug.log('No custom devices assigned, falling back to WaveSurfer');
- // Reset native playback flag and unmute WaveSurfer
+ debug.log('No custom devices assigned, using standard playback');
isUsingNativePlaybackRef.current = false;
- const mediaElement = wavesurfer.getMediaElement();
- if (mediaElement) {
- const currentVolume = usePlayerStore.getState().volume;
- mediaElement.volume = currentVolume;
- mediaElement.muted = false;
- debug.log(
- 'WaveSurfer unmuted for normal playback - volume:',
- mediaElement.volume,
- 'muted:',
- mediaElement.muted,
- );
- }
} else {
const deviceIds = assignedChannels.flatMap((ch: any) => ch.device_ids);
debug.log('Device IDs to play to:', deviceIds);
@@ -299,19 +281,10 @@ export function AudioPlayer() {
// Mark that we're using native playback
isUsingNativePlaybackRef.current = true;
- // Mute WaveSurfer's audio element to prevent UI audio output
- // Keep WaveSurfer running for visualization
- const mediaElement = wavesurfer.getMediaElement();
- if (mediaElement) {
- mediaElement.volume = 0;
- mediaElement.muted = true;
- debug.log(
- 'WaveSurfer muted for native playback - volume:',
- mediaElement.volume,
- 'muted:',
- mediaElement.muted,
- );
- }
+ // Mute WaveSurfer's audio output — native handles the actual sound
+ // Keep WaveSurfer running for waveform visualization
+ wavesurfer.setVolume(0);
+ wavesurfer.setMuted(true);
// Start WaveSurfer playback for visualization (muted)
wavesurfer.play().catch((error) => {
@@ -334,38 +307,15 @@ export function AudioPlayer() {
'Native playback failed during auto-play, falling back to WaveSurfer:',
error,
);
- // Reset native playback flag and unmute WaveSurfer
isUsingNativePlaybackRef.current = false;
- const mediaElement = wavesurfer.getMediaElement();
- if (mediaElement) {
- const currentVolume = usePlayerStore.getState().volume;
- mediaElement.volume = currentVolume;
- mediaElement.muted = false;
- debug.log(
- 'WaveSurfer unmuted after native playback failure - volume:',
- mediaElement.volume,
- 'muted:',
- mediaElement.muted,
- );
- }
// Fall through to WaveSurfer playback
}
- } else {
- debug.log('Not using native playback, using WaveSurfer');
- // Reset native playback flag and unmute WaveSurfer
- isUsingNativePlaybackRef.current = false;
- const mediaElement = wavesurfer.getMediaElement();
- if (mediaElement) {
- const currentVolume = usePlayerStore.getState().volume;
- mediaElement.volume = currentVolume;
- mediaElement.muted = false;
- debug.log(
- 'WaveSurfer unmuted for normal playback - volume:',
- mediaElement.volume,
- 'muted:',
- mediaElement.muted,
- );
- }
+ }
+
+ // Standard playback path — ensure WaveSurfer is unmuted
+ if (!isUsingNativePlaybackRef.current) {
+ wavesurfer.setMuted(false);
+ wavesurfer.setVolume(usePlayerStore.getState().volume);
}
// Only auto-play if shouldAutoPlay flag is set (user explicitly clicked to play)
@@ -389,28 +339,6 @@ export function AudioPlayer() {
// Handle play/pause
wavesurfer.on('play', () => {
setIsPlaying(true);
- // Ensure audio element volume is set correctly
- const mediaElement = wavesurfer.getMediaElement();
- if (mediaElement) {
- // Double-check: if using native playback, keep WaveSurfer muted
- // Otherwise, ensure it's unmuted
- if (isUsingNativePlaybackRef.current) {
- mediaElement.volume = 0;
- mediaElement.muted = true;
- debug.log('Playing (native mode) - WaveSurfer muted for visualization only');
- } else {
- // Ensure WaveSurfer is unmuted for normal playback
- const currentVolume = usePlayerStore.getState().volume;
- mediaElement.volume = currentVolume;
- mediaElement.muted = false;
- debug.log(
- 'Playing (normal mode) - volume:',
- mediaElement.volume,
- 'muted:',
- mediaElement.muted,
- );
- }
- }
});
wavesurfer.on('pause', () => setIsPlaying(false));
wavesurfer.on('finish', () => {
@@ -492,11 +420,6 @@ export function AudioPlayer() {
if (wavesurferRef.current) {
debug.log('Destroying WaveSurfer instance');
try {
- const mediaElement = wavesurferRef.current.getMediaElement();
- if (mediaElement) {
- mediaElement.pause();
- mediaElement.src = '';
- }
wavesurferRef.current.destroy();
} catch (error) {
debug.error('Error destroying WaveSurfer:', error);
@@ -537,13 +460,10 @@ export function AudioPlayer() {
}
// Reset native playback flag when loading new audio
- // Also unmute WaveSurfer if it was muted
+ // Unmute WaveSurfer if it was muted for native playback
if (isUsingNativePlaybackRef.current) {
- const mediaElement = wavesurfer.getMediaElement();
- if (mediaElement) {
- mediaElement.muted = false;
- mediaElement.volume = usePlayerStore.getState().volume;
- }
+ wavesurfer.setMuted(false);
+ wavesurfer.setVolume(usePlayerStore.getState().volume);
}
isUsingNativePlaybackRef.current = false;
@@ -559,16 +479,7 @@ export function AudioPlayer() {
wavesurfer.pause();
}
- // Stop the media element explicitly
- const mediaElement = wavesurfer.getMediaElement();
- if (mediaElement) {
- debug.log('Stopping media element');
- mediaElement.pause();
- mediaElement.currentTime = 0;
- mediaElement.src = '';
- }
-
- // Use empty() to completely destroy the waveform and media element
+ // Use empty() to completely destroy the waveform and reset media
debug.log('Calling wavesurfer.empty() to destroy audio');
wavesurfer.empty();
} catch (error) {
@@ -623,20 +534,13 @@ export function AudioPlayer() {
// Sync volume
useEffect(() => {
if (wavesurferRef.current) {
- wavesurferRef.current.setVolume(volume);
- // Also ensure the underlying audio element volume is set
- const mediaElement = wavesurferRef.current.getMediaElement();
- if (mediaElement) {
- // If using native playback, keep WaveSurfer muted regardless of volume setting
- if (isUsingNativePlaybackRef.current) {
- mediaElement.volume = 0;
- mediaElement.muted = true;
- debug.log('Volume sync: Using native playback, keeping WaveSurfer muted');
- } else {
- mediaElement.volume = volume;
- mediaElement.muted = volume === 0;
- debug.log('Volume synced:', volume, 'muted:', mediaElement.muted);
- }
+ // If using native playback, keep WaveSurfer muted regardless of volume setting
+ if (isUsingNativePlaybackRef.current) {
+ wavesurferRef.current.setVolume(0);
+ debug.log('Volume sync: Using native playback, keeping WaveSurfer muted');
+ } else {
+ wavesurferRef.current.setVolume(volume);
+ debug.log('Volume synced:', volume);
}
}
}, [volume]);
@@ -757,11 +661,8 @@ export function AudioPlayer() {
isUsingNativePlaybackRef.current = true;
// Mute WaveSurfer and start it for visualization
- const mediaElement = wavesurferRef.current.getMediaElement();
- if (mediaElement) {
- mediaElement.volume = 0;
- mediaElement.muted = true;
- }
+ wavesurferRef.current.setVolume(0);
+ wavesurferRef.current.setMuted(true);
// Start WaveSurfer for visualization (muted)
wavesurferRef.current.play().catch((error) => {
@@ -785,11 +686,8 @@ export function AudioPlayer() {
} else {
// Ensure WaveSurfer is not muted if not using native playback
if (!isUsingNativePlaybackRef.current) {
- const mediaElement = wavesurferRef.current.getMediaElement();
- if (mediaElement) {
- mediaElement.muted = false;
- mediaElement.volume = volume;
- }
+ wavesurferRef.current.setMuted(false);
+ wavesurferRef.current.setVolume(volume);
}
wavesurferRef.current.play().catch((error) => {
diff --git a/app/src/components/EffectsTab/EffectsDetail.tsx b/app/src/components/EffectsTab/EffectsDetail.tsx
index f877c914..60c2bf7f 100644
--- a/app/src/components/EffectsTab/EffectsDetail.tsx
+++ b/app/src/components/EffectsTab/EffectsDetail.tsx
@@ -5,6 +5,14 @@ import { useEffect, useRef, useState } from 'react';
import { EffectsChainEditor } from '@/components/Effects/EffectsChainEditor';
import { GenerationPicker } from '@/components/Effects/GenerationPicker';
import { Button } from '@/components/ui/button';
+import {
+ Dialog,
+ DialogContent,
+ DialogDescription,
+ DialogFooter,
+ DialogHeader,
+ DialogTitle,
+} from '@/components/ui/dialog';
import { Input } from '@/components/ui/input';
import { Label } from '@/components/ui/label';
import { Separator } from '@/components/ui/separator';
@@ -29,6 +37,11 @@ export function EffectsDetail() {
const [saving, setSaving] = useState(false);
const [deleting, setDeleting] = useState(false);
+ // "Save as Custom" dialog state
+ const [saveAsDialogOpen, setSaveAsDialogOpen] = useState(false);
+ const [saveAsName, setSaveAsName] = useState('');
+ const [saveAsDescription, setSaveAsDescription] = useState('');
+
// Preview state
const [previewGenId, setPreviewGenId] = useState(null);
const [previewLoading, setPreviewLoading] = useState(false);
@@ -165,8 +178,38 @@ export function EffectsDetail() {
}
}
- async function handleSaveAsNew() {
- await handleSaveNew();
+ function handleSaveAsNew() {
+ // Open the dialog with a suggested name based on the current preset
+ setSaveAsName(`${name} (Copy)`);
+ setSaveAsDescription(description);
+ setSaveAsDialogOpen(true);
+ }
+
+ async function handleSaveAsConfirm() {
+ if (!saveAsName.trim()) {
+ toast({ title: 'Name required', variant: 'destructive' });
+ return;
+ }
+ setSaving(true);
+ try {
+ const created = await apiClient.createEffectPreset({
+ name: saveAsName.trim(),
+ description: saveAsDescription.trim() || undefined,
+ effects_chain: workingChain,
+ });
+ queryClient.invalidateQueries({ queryKey: ['effect-presets'] });
+ setSaveAsDialogOpen(false);
+ setSelectedPresetId(created.id);
+ toast({ title: 'Preset saved', description: `"${created.name}" has been created.` });
+ } catch (error) {
+ toast({
+ title: 'Failed to save',
+ description: error instanceof Error ? error.message : 'Unknown error',
+ variant: 'destructive',
+ });
+ } finally {
+ setSaving(false);
+ }
}
async function handleDelete() {
@@ -327,6 +370,53 @@ export function EffectsDetail() {
+
+ {/* Save as Custom dialog */}
+
- {tabs.map((tab) => {
+ {tabs.map((tab, index) => {
const Icon = tab.icon;
// For index route, use exact match; for others, use default matching
const isActive =
tab.path === '/' ? matchRoute({ to: '/', exact: true }) : matchRoute({ to: tab.path });
+ // Accent fades as buttons get further from the logo
+ const accentOpacity = Math.max(0.08, 0.5 - index * 0.07);
+
return (
)}
diff --git a/bun.lock b/bun.lock
index b507da48..d6b63ee8 100644
--- a/bun.lock
+++ b/bun.lock
@@ -17,7 +17,7 @@
},
"app": {
"name": "@voicebox/app",
- "version": "0.1.13",
+ "version": "0.2.0",
"dependencies": {
"@dnd-kit/core": "^6.3.1",
"@dnd-kit/sortable": "^10.0.0",
@@ -72,13 +72,15 @@
},
"landing": {
"name": "@voicebox/landing",
- "version": "0.1.13",
+ "version": "0.2.0",
"dependencies": {
+ "@fontsource/space-grotesk": "^5.2.10",
"@radix-ui/react-separator": "^1.1.8",
"@radix-ui/react-slot": "^1.2.4",
"autoprefixer": "^10.4.17",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
+ "framer-motion": "^12.36.0",
"lucide-react": "^0.316.0",
"next": "^16.1.3",
"postcss": "^8.4.33",
@@ -87,6 +89,7 @@
"tailwind-merge": "^3.4.0",
"tailwindcss": "^3.4.1",
"tailwindcss-animate": "^1.0.7",
+ "wavesurfer.js": "^7.12.2",
},
"devDependencies": {
"@types/node": "^20.11.5",
@@ -97,7 +100,7 @@
},
"tauri": {
"name": "@voicebox/tauri",
- "version": "0.1.13",
+ "version": "0.2.0",
"dependencies": {
"@tauri-apps/api": "^2.0.0",
"@tauri-apps/plugin-dialog": "^2.0.0",
@@ -120,7 +123,7 @@
},
"web": {
"name": "@voicebox/web",
- "version": "0.1.13",
+ "version": "0.2.0",
"dependencies": {
"@tanstack/react-query": "^5.0.0",
"react": "^18.3.0",
@@ -274,6 +277,8 @@
"@floating-ui/utils": ["@floating-ui/utils@0.2.10", "", {}, "sha512-aGTxbpbg8/b5JfU1HXSrbH3wXZuLPJcNEcZQFMxLs3oSzgtVu6nFPkbbGGUvBcUjKV2YyB9Wxxabo+HEH9tcRQ=="],
+ "@fontsource/space-grotesk": ["@fontsource/space-grotesk@5.2.10", "", {}, "sha512-XNXEbT74OIITPqw2H6HXwPDp85fy43uxfBwFR5PU+9sLnjuLj12KlhVM9nZVN6q6dlKjkuN8JisW/OBxwxgUew=="],
+
"@hookform/resolvers": ["@hookform/resolvers@3.10.0", "", { "peerDependencies": { "react-hook-form": "^7.0.0" } }, "sha512-79Dv+3mDF7i+2ajj7SkypSKHhl1cbln1OGavqrsF7p6mbUv11xpqpacPsGDCTRvCSjEEIez2ef1NveSVL3b0Ag=="],
"@humanwhocodes/config-array": ["@humanwhocodes/config-array@0.13.0", "", { "dependencies": { "@humanwhocodes/object-schema": "^2.0.3", "debug": "^4.3.1", "minimatch": "^3.0.5" } }, "sha512-DZLEEqFWQFiyK6h5YIeynKx7JlvCYWL0cImfSRXZ9l4Sg2efkFGTuFf6vzXjK1cq6IYkU+Eg/JizXw+TD2vRNw=="],
@@ -1152,12 +1157,16 @@
"@typescript-eslint/typescript-estree/semver": ["semver@7.7.3", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q=="],
+ "@voicebox/landing/framer-motion": ["framer-motion@12.36.0", "", { "dependencies": { "motion-dom": "^12.36.0", "motion-utils": "^12.36.0", "tslib": "^2.4.0" }, "peerDependencies": { "@emotion/is-prop-valid": "*", "react": "^18.0.0 || ^19.0.0", "react-dom": "^18.0.0 || ^19.0.0" }, "optionalPeers": ["@emotion/is-prop-valid", "react", "react-dom"] }, "sha512-4PqYHAT7gev0ke0wos+PyrcFxI0HScjm3asgU8nSYa8YzJFuwgIvdj3/s3ZaxLq0bUSboIn19A2WS/MHwLCvfw=="],
+
"@voicebox/landing/lucide-react": ["lucide-react@0.316.0", "", { "peerDependencies": { "react": "^16.5.1 || ^17.0.0 || ^18.0.0" } }, "sha512-dTmYX1H4IXsRfVcj/KUxworV6814ApTl7iXaS21AimK2RUEl4j4AfOmqD3VR8phe5V91m4vEJ8tCK4uT1jE5nA=="],
"@voicebox/landing/tailwind-merge": ["tailwind-merge@3.4.0", "", {}, "sha512-uSaO4gnW+b3Y2aWoWfFpX62vn2sR3skfhbjsEnaBI81WD1wBLlHZe5sWf0AqjksNdYTbGBEd0UasQMT3SNV15g=="],
"@voicebox/landing/tailwindcss": ["tailwindcss@3.4.19", "", { "dependencies": { "@alloc/quick-lru": "^5.2.0", "arg": "^5.0.2", "chokidar": "^3.6.0", "didyoumean": "^1.2.2", "dlv": "^1.1.3", "fast-glob": "^3.3.2", "glob-parent": "^6.0.2", "is-glob": "^4.0.3", "jiti": "^1.21.7", "lilconfig": "^3.1.3", "micromatch": "^4.0.8", "normalize-path": "^3.0.0", "object-hash": "^3.0.0", "picocolors": "^1.1.1", "postcss": "^8.4.47", "postcss-import": "^15.1.0", "postcss-js": "^4.0.1", "postcss-load-config": "^4.0.2 || ^5.0 || ^6.0", "postcss-nested": "^6.2.0", "postcss-selector-parser": "^6.1.2", "resolve": "^1.22.8", "sucrase": "^3.35.0" }, "bin": { "tailwind": "lib/cli.js", "tailwindcss": "lib/cli.js" } }, "sha512-3ofp+LL8E+pK/JuPLPggVAIaEuhvIz4qNcf3nA1Xn2o/7fb7s/TYpHhwGDv1ZU3PkBluUVaF8PyCHcm48cKLWQ=="],
+ "@voicebox/landing/wavesurfer.js": ["wavesurfer.js@7.12.2", "", {}, "sha512-akVYISAHCw2gNw/7n8Pk/zH1Zz91WJyL/2MaNQCLD1XV3A226gKlWoDHWp9UdWqQ3zXnWttDf9ewZQQ3cxbOmQ=="],
+
"chokidar/glob-parent": ["glob-parent@5.1.2", "", { "dependencies": { "is-glob": "^4.0.1" } }, "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow=="],
"fast-glob/glob-parent": ["glob-parent@5.1.2", "", { "dependencies": { "is-glob": "^4.0.1" } }, "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow=="],
@@ -1169,5 +1178,9 @@
"tinyglobby/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],
"@typescript-eslint/typescript-estree/minimatch/brace-expansion": ["brace-expansion@2.0.2", "", { "dependencies": { "balanced-match": "^1.0.0" } }, "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ=="],
+
+ "@voicebox/landing/framer-motion/motion-dom": ["motion-dom@12.36.0", "", { "dependencies": { "motion-utils": "^12.36.0" } }, "sha512-Ep1pq8P88rGJ75om8lTCA13zqd7ywPGwCqwuWwin6BKc0hMLkVfcS6qKlRqEo2+t0DwoUcgGJfXwaiFn4AOcQA=="],
+
+ "@voicebox/landing/framer-motion/motion-utils": ["motion-utils@12.36.0", "", {}, "sha512-eHWisygbiwVvf6PZ1vhaHCLamvkSbPIeAYxWUuL3a2PD/TROgE7FvfHWTIH4vMl798QLfMw15nRqIaRDXTlYRg=="],
}
}
diff --git a/docs/RELEASE_v0.2.0.md b/docs/RELEASE_v0.2.0.md
new file mode 100644
index 00000000..9d2d8e7b
--- /dev/null
+++ b/docs/RELEASE_v0.2.0.md
@@ -0,0 +1,163 @@
+# Voicebox v0.2.0 -- Release Notes
+
+## The story
+
+Voicebox v0.1.x shipped as a single-engine voice cloning app built around Qwen3-TTS. It worked, but it was limited: one model family, 10 languages, English-centric emotion, a synchronous generation pipeline that locked the UI, and a hard ceiling on how much text you could generate at once.
+
+v0.2.0 is a ground-up rethink. Voicebox is now a **multi-engine voice cloning platform**. Four TTS engines. 23 languages. Expressive paralinguistic controls. A full post-processing effects pipeline. Unlimited generation length. Asynchronous everything. And it runs on every major GPU vendor -- NVIDIA, AMD, Intel Arc, Apple Silicon -- plus Docker for headless deployment.
+
+This is the release where Voicebox stops being a proof of concept and starts being a real tool.
+
+---
+
+## Major New Features
+
+### Multi-Engine Architecture
+Voicebox now supports **four TTS engines**, each with different strengths. Switch between them per-generation from a single unified interface:
+
+| Engine | Languages | Strengths |
+|--------|-----------|-----------|
+| **Qwen3-TTS** (0.6B / 1.7B) | 10 | High-quality multilingual cloning, delivery instructions ("speak slowly", "whisper") |
+| **LuxTTS** | English | Lightweight (~1GB VRAM), 48kHz output, 150x realtime on CPU |
+| **Chatterbox Multilingual** | 23 | Broadest language coverage -- Arabic, Danish, Finnish, Greek, Hebrew, Hindi, Malay, Norwegian, Polish, Swahili, Swedish, Turkish and more |
+| **Chatterbox Turbo** | English | Fast 350M model with paralinguistic emotion/sound tags |
+
+### Emotions and Paralinguistic Tags (Chatterbox Turbo)
+Type `/` in the text input to open an autocomplete for **9 expressive tags** that the model synthesizes inline with speech:
+
+`[laugh]` `[chuckle]` `[gasp]` `[cough]` `[sigh]` `[groan]` `[sniff]` `[shush]` `[clear throat]`
+
+Tags render as inline badges in a rich text editor and serialize cleanly to the API. This makes generated speech sound natural and expressive in a way that plain TTS can't.
+
+### 23 Languages via Chatterbox Multilingual
+The Chatterbox Multilingual engine brings zero-shot voice cloning to **23 languages**: Arabic, Chinese, Danish, Dutch, English, Finnish, French, German, Greek, Hebrew, Hindi, Italian, Japanese, Korean, Malay, Norwegian, Polish, Portuguese, Russian, Spanish, Swahili, Swedish, and Turkish. The language dropdown dynamically filters to show only languages supported by the selected engine.
+
+### Unlimited Generation Length (Auto-Chunking)
+Previously, long text would hit model context limits and degrade. Now, text is **automatically split at sentence boundaries** and each chunk is generated independently, then crossfaded back together. This is fully engine-agnostic and works with all four engines.
+
+- **Auto-chunking limit slider** (100-5,000 chars, default 800) -- controls when text gets split
+- **Crossfade slider** (0-200ms, default 50ms) -- blends chunk boundaries smoothly, or set to 0 for a hard cut
+- **Max text length raised to 50,000 characters** -- generate entire scripts, chapters, or articles in one go
+- Smart splitting respects abbreviations (Dr., e.g., a.m.), CJK punctuation, and never breaks inside paralinguistic `[tags]`
+
+### Asynchronous Generation Queue
+Generation is now fully **non-blocking**. Submit a generation and immediately start typing the next one -- no more frozen UI waiting for inference to complete.
+
+- Serial execution queue prevents GPU contention across all backends
+- Real-time SSE status streaming (`generating` -> `completed` / `failed`)
+- Failed generations can be retried without re-entering text
+- Stale generations from crashes are auto-recovered on startup
+- Generating status pill shown inline in the story editor
+
+### Post-Processing Effects Pipeline
+A full audio effects system powered by Spotify's `pedalboard` library. Apply effects after generation, preview them in real time, and build reusable presets -- all without leaving the app.
+
+**8 effects available:**
+
+| Effect | What it does |
+|--------|-------------|
+| **Pitch Shift** | Shift pitch up or down by up to 12 semitones |
+| **Reverb** | Room reverb with configurable size, damping, and wet/dry mix |
+| **Delay** | Echo with adjustable delay time, feedback, and mix |
+| **Chorus / Flanger** | Modulated delay -- short for metallic flanger, longer for lush chorus |
+| **Compressor** | Dynamic range compression with threshold, ratio, attack, and release |
+| **Gain** | Volume adjustment from -40 to +40 dB |
+| **High-Pass Filter** | Remove low frequencies below a configurable cutoff |
+| **Low-Pass Filter** | Remove high frequencies above a configurable cutoff |
+
+**Effects presets** -- Four built-in presets ship out of the box (Robotic, Radio, Echo Chamber, Deep Voice), and you can create unlimited custom presets. Presets are drag-and-drop chains of effects with per-parameter sliders.
+
+**Per-profile default effects** -- Assign an effects chain to a voice profile and it applies automatically to every generation with that voice. Override per-generation from the generate box.
+
+**Live preview** -- Audition any effects chain against an existing generation before committing. The preview streams processed audio without saving anything.
+
+### Generation Versions
+Every generation now supports **multiple versions** with full provenance tracking:
+
+- **Original** -- the clean, unprocessed TTS output (always preserved)
+- **Effects versions** -- apply different effects chains to create new versions from any source version
+- **Takes** -- regenerate with the same text and voice but a new seed for variation
+- **Source tracking** -- each version records which version it was derived from
+- **Version pinning in stories** -- pin a specific version to a track clip in the story editor, independent of the generation's default
+- **Favorites** -- star generations to mark them for quick access
+
+---
+
+## New Platform Support
+
+### Linux (Native)
+Full Linux support with `.deb` and `.rpm` packages. Includes PulseAudio/PipeWire audio capture for voice sample recording.
+
+### AMD ROCm GPU Acceleration
+AMD GPU users now get hardware-accelerated inference via ROCm, with automatic `HSA_OVERRIDE_GFX_VERSION` configuration for GPUs not officially in the ROCm compatibility list (e.g., RX 6600).
+
+### NVIDIA CUDA Backend Swap
+The CPU-only release can download and swap in a CUDA-accelerated backend binary from within the app -- no reinstall required. Handles GitHub's 2GB asset limit by downloading split parts and verifying SHA-256 checksums.
+
+### Intel Arc (XPU) and DirectML
+PyTorch backend also supports Intel Arc GPUs via IPEX/XPU and Windows any-GPU via DirectML.
+
+### Docker + Web Deployment
+Run Voicebox headless as a Docker container with the full web UI:
+```bash
+docker compose up
+```
+3-stage build, non-root runtime, health checks, persistent model cache across rebuilds. Binds to localhost only by default.
+
+---
+
+## Model Management
+- **Per-model unload** -- free GPU memory without deleting downloaded models
+- **Custom models directory** -- set `VOICEBOX_MODELS_DIR` to store models anywhere
+- **Model folder migration** -- move all models to a new location with progress tracking
+- **Whisper Turbo** -- added `openai/whisper-large-v3-turbo` as a transcription model option
+- **Download cancel/clear UI** -- cancel in-progress downloads, VS Code-style problems panel for errors
+
+---
+
+## Security
+- **CORS hardening** -- replaced wildcard `*` with an explicit allowlist of local origins; extensible via `VOICEBOX_CORS_ORIGINS` env var
+- **Network access toggle** -- fully disable outbound network requests for air-gapped deployments
+
+## Accessibility
+- Comprehensive screen reader support (tested with NVDA/Narrator) across all major UI surfaces
+- Keyboard navigation for voice cards, history rows, model management, and story editor
+- State-aware `aria-label` attributes on all interactive controls
+
+## Reliability
+- **Atomic audio saves** -- two-phase write prevents corrupted files on crash/interrupt
+- **Filesystem health endpoint** -- proactive disk space and directory writability checks
+- **Errno-specific error messages** -- clear feedback for permission denied, disk full, missing directory
+
+## UX Polish
+- Responsive layout with horizontal-scroll voice cards on mobile
+- App version shown in sidebar
+- Voice card heights normalized
+- Audio player title hidden at narrow widths to prevent overflow
+
+---
+
+## Installation
+
+| Platform | Download |
+|----------|----------|
+| **macOS (Apple Silicon)** | `Voicebox_0.2.0_aarch64.dmg` |
+| **macOS (Intel)** | `Voicebox_0.2.0_x64.dmg` |
+| **Windows** | `Voicebox_0.2.0_x64_en-US.msi` or `x64-setup.exe` |
+| **Linux** | `.deb` / `.rpm` packages |
+| **Docker** | `docker compose up` |
+
+The app includes automatic updates -- future patches will be installed automatically.
+
+---
+
+## Video Script Beats
+
+For the marketing video, focus on these six beats:
+
+1. **"Four engines, one app"** -- show the engine dropdown switching between Qwen, LuxTTS, Chatterbox, and Turbo
+2. **"23 languages"** -- generate the same voice clone in Arabic, Japanese, Hindi, etc.
+3. **"Make it expressive"** -- type `/laugh` and `/sigh` with Chatterbox Turbo, play back the result
+4. **"Shape your sound"** -- apply the Robotic or Deep Voice preset, preview it live, then build a custom effects chain with drag-and-drop
+5. **"No limits"** -- paste a long script, show it auto-chunk and generate seamlessly
+6. **"Queue and go"** -- fire off multiple generations back-to-back without waiting
diff --git a/landing/package.json b/landing/package.json
index 60acdd13..88464f93 100644
--- a/landing/package.json
+++ b/landing/package.json
@@ -9,11 +9,13 @@
"lint": "next lint"
},
"dependencies": {
+ "@fontsource/space-grotesk": "^5.2.10",
"@radix-ui/react-separator": "^1.1.8",
"@radix-ui/react-slot": "^1.2.4",
"autoprefixer": "^10.4.17",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
+ "framer-motion": "^12.36.0",
"lucide-react": "^0.316.0",
"next": "^16.1.3",
"postcss": "^8.4.33",
@@ -21,7 +23,8 @@
"react-dom": "^18.2.0",
"tailwind-merge": "^3.4.0",
"tailwindcss": "^3.4.1",
- "tailwindcss-animate": "^1.0.7"
+ "tailwindcss-animate": "^1.0.7",
+ "wavesurfer.js": "^7.12.2"
},
"devDependencies": {
"@types/node": "^20.11.5",
diff --git a/landing/public/audio/fireship.webm b/landing/public/audio/fireship.webm
new file mode 100644
index 00000000..0561ea18
Binary files /dev/null and b/landing/public/audio/fireship.webm differ
diff --git a/landing/public/audio/jarvis.webm b/landing/public/audio/jarvis.webm
new file mode 100644
index 00000000..4eefdbe8
Binary files /dev/null and b/landing/public/audio/jarvis.webm differ
diff --git a/landing/public/audio/linus.webm b/landing/public/audio/linus.webm
new file mode 100644
index 00000000..63b7ad03
Binary files /dev/null and b/landing/public/audio/linus.webm differ
diff --git a/landing/public/audio/morganfreeman.webm b/landing/public/audio/morganfreeman.webm
new file mode 100644
index 00000000..eb5cddd4
Binary files /dev/null and b/landing/public/audio/morganfreeman.webm differ
diff --git a/landing/public/audio/samaltman.webm b/landing/public/audio/samaltman.webm
new file mode 100644
index 00000000..7400bece
Binary files /dev/null and b/landing/public/audio/samaltman.webm differ
diff --git a/landing/public/audio/samjackson.webm b/landing/public/audio/samjackson.webm
new file mode 100644
index 00000000..02f43a96
Binary files /dev/null and b/landing/public/audio/samjackson.webm differ
diff --git a/landing/public/voicebox-logo-app.webp b/landing/public/voicebox-logo-app.webp
new file mode 100644
index 00000000..9fedd439
Binary files /dev/null and b/landing/public/voicebox-logo-app.webp differ
diff --git a/landing/src/app/api/stars/route.ts b/landing/src/app/api/stars/route.ts
new file mode 100644
index 00000000..bc8f73c5
--- /dev/null
+++ b/landing/src/app/api/stars/route.ts
@@ -0,0 +1,15 @@
+import { NextResponse } from 'next/server';
+import { getStarCount } from '@/lib/releases';
+
+export const dynamic = 'force-dynamic';
+export const revalidate = 600;
+
+export async function GET() {
+ try {
+ const count = await getStarCount();
+ return NextResponse.json({ count });
+ } catch (error) {
+ console.error('Error fetching star count:', error);
+ return NextResponse.json({ error: 'Failed to fetch star count' }, { status: 500 });
+ }
+}
diff --git a/landing/src/app/globals.css b/landing/src/app/globals.css
index da5b7b99..143203e7 100644
--- a/landing/src/app/globals.css
+++ b/landing/src/app/globals.css
@@ -16,7 +16,7 @@
--secondary-foreground: 0 0% 0%;
--muted: 0 0% 96%;
--muted-foreground: 0 0% 45%;
- --accent: 0 0% 96%;
+ --accent: 43 50% 50%;
--accent-foreground: 0 0% 0%;
--destructive: 0 0% 0%;
--destructive-foreground: 0 0% 100%;
@@ -27,26 +27,52 @@
}
.dark {
- --background: 0 0% 3%;
- --foreground: 0 0% 98%;
- --card: 0 0% 8% / 0.6;
- --card-foreground: 0 0% 98%;
- --popover: 0 0% 8% / 0.8;
- --popover-foreground: 0 0% 98%;
- --primary: 0 0% 98%;
- --primary-foreground: 0 0% 8%;
- --secondary: 0 0% 12% / 0.5;
- --secondary-foreground: 0 0% 98%;
- --muted: 0 0% 12% / 0.4;
- --muted-foreground: 0 0% 65%;
- --accent: 0 0% 15% / 0.5;
- --accent-foreground: 0 0% 98%;
+ /* Surfaces -- slightly warm-tinted darks */
+ --background: 30 4% 4%;
+ --foreground: 30 10% 94%;
+ --card: 30 4% 7%;
+ --card-foreground: 30 10% 94%;
+ --popover: 30 4% 7%;
+ --popover-foreground: 30 10% 94%;
+ --primary: 30 10% 94%;
+ --primary-foreground: 30 4% 7%;
+ --secondary: 30 4% 10%;
+ --secondary-foreground: 30 10% 94%;
+ --muted: 30 3% 12%;
+ --muted-foreground: 30 5% 55%;
+ --accent: 43 50% 45%;
+ --accent-foreground: 30 10% 94%;
--destructive: 0 62% 50%;
- --destructive-foreground: 0 0% 98%;
- --border: 0 0% 15% / 0.5;
- --input: 0 0% 15% / 0.5;
- --ring: 0 0% 98% / 0.2;
- --radius: 1rem;
+ --destructive-foreground: 30 10% 94%;
+ --border: 30 4% 13%;
+ --input: 30 4% 13%;
+ --ring: 30 10% 94% / 0.2;
+ --radius: 0.75rem;
+
+ /* App-specific surface tokens */
+ --app: 30 4% 4%;
+ --app-box: 30 4% 7%;
+ --app-dark-box: 30 4% 5%;
+ --app-darker-box: 30 4% 3%;
+ --app-light-box: 30 4% 14%;
+ --app-line: 30 4% 13%;
+ --app-button: 30 4% 11%;
+ --app-hover: 30 4% 15%;
+ --app-selected: 30 4% 17%;
+
+ /* Text hierarchy */
+ --ink: 30 10% 94%;
+ --ink-dull: 30 5% 55%;
+ --ink-faint: 30 3% 38%;
+
+ /* Accent shades */
+ --accent-faint: 43 45% 55%;
+ --accent-deep: 43 55% 35%;
+ --accent-glow: 43 60% 50%;
+
+ /* Sidebar */
+ --sidebar: 30 4% 3%;
+ --sidebar-line: 30 4% 10%;
}
}
@@ -60,9 +86,9 @@
body {
@apply bg-background text-foreground antialiased;
overflow-x: hidden;
- background-image:
- radial-gradient(at 0% 0%, rgba(255, 255, 255, 0.03) 0px, transparent 50%),
- radial-gradient(at 100% 100%, rgba(255, 255, 255, 0.02) 0px, transparent 50%);
+ font-family:
+ ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
+ "Helvetica Neue", Arial, sans-serif;
}
}
@@ -71,3 +97,42 @@
text-wrap: balance;
}
}
+
+/* Staggered fade-in animation for hero elements */
+@keyframes fadeUp {
+ from {
+ opacity: 0;
+ transform: translateY(16px);
+ }
+ to {
+ opacity: 1;
+ transform: translateY(0);
+ }
+}
+
+.fade-in {
+ opacity: 0;
+ animation: fadeUp 0.6s ease-out forwards;
+}
+
+/* Noise texture overlay for hero glow */
+/* .hero-glow::after {
+ content: "";
+ position: absolute;
+ inset: 0;
+ z-index: 5;
+ pointer-events: none;
+ background: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='2048' height='2048'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='1.5' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)'/%3E%3C/svg%3E") center / 100% 100% no-repeat;
+ opacity: 0.35;
+ mix-blend-mode: overlay;
+ will-change: transform;
+} */
+
+/* Scrollbar hiding */
+::-webkit-scrollbar {
+ display: none;
+}
+
+* {
+ scrollbar-width: none;
+}
diff --git a/landing/src/app/layout.tsx b/landing/src/app/layout.tsx
index f29ef15e..164b7f71 100644
--- a/landing/src/app/layout.tsx
+++ b/landing/src/app/layout.tsx
@@ -1,17 +1,19 @@
import type { Metadata } from 'next';
-import { Inter } from 'next/font/google';
import './globals.css';
-import { Banner } from '@/components/Banner';
-import { Footer } from '@/components/Footer';
-import { Header } from '@/components/Header';
-
-const inter = Inter({ subsets: ['latin'], variable: '--font-sans' });
export const metadata: Metadata = {
- title: 'Voicebox - Open Source Voice Cloning Desktop App Powered by Qwen3-TTS',
+ title: 'Voicebox - Open Source Voice Cloning Desktop App',
description:
- 'Near-perfect voice cloning powered by Qwen3-TTS. Desktop app for Mac, Windows, and Linux. Multi-sample support, smart caching, local or remote inference.',
- keywords: ['voice cloning', 'TTS', 'Qwen3', 'desktop app', 'AI voice'],
+ 'Near-perfect voice cloning with multiple TTS engines. Desktop app for Mac, Windows, and Linux. Multi-sample support, smart caching, local or remote inference.',
+ keywords: [
+ 'voice cloning',
+ 'TTS',
+ 'multi-engine',
+ 'desktop app',
+ 'AI voice',
+ 'open source',
+ 'text to speech',
+ ],
icons: {
icon: [
{ url: '/favicon.png', type: 'image/png' },
@@ -20,8 +22,8 @@ export const metadata: Metadata = {
apple: [{ url: '/apple-touch-icon.png', sizes: '180x180', type: 'image/png' }],
},
openGraph: {
- title: 'voicebox',
- description: 'Professional voice cloning with Qwen3-TTS',
+ title: 'Voicebox',
+ description: 'Open source voice cloning. Local-first. Free forever.',
type: 'website',
url: 'https://voicebox.sh',
},
@@ -30,15 +32,16 @@ export const metadata: Metadata = {
export default function RootLayout({ children }: { children: React.ReactNode }) {
return (
-
-
-
-
-
- {children}
-
-
-
+
+
+
+
+
+
+
{children}
);
diff --git a/landing/src/app/page.tsx b/landing/src/app/page.tsx
index 4ba9d8c4..e78d1ca3 100644
--- a/landing/src/app/page.tsx
+++ b/landing/src/app/page.tsx
@@ -1,304 +1,327 @@
'use client';
-import { Cloud, Code, Cpu, Github, Shield, Zap } from 'lucide-react';
+import { Github, Globe, Languages, MessageSquare, Zap } from 'lucide-react';
import Image from 'next/image';
import { useEffect, useState } from 'react';
+import { ControlUI } from '@/components/ControlUI';
+import { Features } from '@/components/Features';
+import { Footer } from '@/components/Footer';
+import { Navbar } from '@/components/Navbar';
import { AppleIcon, LinuxIcon, WindowsIcon } from '@/components/PlatformIcons';
-import { Button } from '@/components/ui/button';
-import { Section } from '@/components/ui/section';
+import { VoiceCreator } from '@/components/VoiceCreator';
import { DOWNLOAD_LINKS, GITHUB_REPO } from '@/lib/constants';
import type { DownloadLinks } from '@/lib/releases';
-import { FeatureCard } from '../components/ui/feature-card';
export default function Home() {
const [downloadLinks, setDownloadLinks] = useState
(DOWNLOAD_LINKS);
useEffect(() => {
- // Fetch latest release info
fetch('/api/releases')
.then((res) => {
- if (!res.ok) {
- throw new Error('Failed to fetch releases');
- }
+ if (!res.ok) throw new Error('Failed to fetch releases');
return res.json();
})
.then((data) => {
- if (data.downloadLinks) {
- setDownloadLinks(data.downloadLinks);
- }
+ if (data.downloadLinks) setDownloadLinks(data.downloadLinks);
})
.catch((error) => {
console.error('Failed to fetch release info:', error);
- // Keep fallback links (releases page) on error
});
}, []);
- const features = [
- {
- title: 'Near-Perfect Voice Cloning',
- description:
- "Powered by Alibaba's Qwen3-TTS model for exceptional voice quality and accuracy.",
- icon: ,
- },
- {
- title: 'Stories Editor',
- description:
- 'Create multi-voice narratives with a timeline-based editor. Arrange tracks, trim clips, and mix conversations.',
- icon: ,
- },
- {
- title: 'Multi-Sample Support',
- description:
- 'Combine multiple voice samples for higher quality and more natural-sounding results.',
- icon: ,
- },
-
- {
- title: 'Local or Remote',
- description:
- 'Run GPU inference locally or connect to a remote machine. One-click server setup.',
- icon: ,
- },
- {
- title: 'Audio Transcription',
- description:
- 'Powered by Whisper for accurate speech-to-text. Extract reference text from voice samples automatically.',
- icon: ,
- },
- {
- title: 'Cross-Platform',
- description: 'Available for macOS, Windows, and Linux. No Python installation required.',
- icon: ,
- },
- ];
return (
-
- {/* Hero Section */}
-
-
-
- {/* Left side - Content */}
-
-
-
+ <>
+
+
+ {/* ── Hero Section ─────────────────────────────────────────────── */}
+
+ {/* Background glow */}
+
+
+
+ {/* Logo */}
+
+
+
+
+ {/* Headline */}
+
+
+ Your voice, your machine.
+
+
+
+ {/* Subtitle */}
+
+ Open source voice cloning studio with support for multiple TTS engines. Clone any voice,
+ generate natural speech, and compose multi-voice projects — all running locally.
+
+
+ {/* CTAs */}
+
+
+ {/* Version */}
+
+ Free and open source · macOS, Windows, Linux
+
+
+
+ {/* ── ControlUI mockup ─────────────────────────────────────── */}
+
+
+
+
+
+ {/* ── Features ─────────────────────────────────────────────── */}
+
+
+ {/* ── Voice Creator ────────────────────────────────────────── */}
+
+
+ {/* ── Models ─────────────────────────────────────────────────── */}
+
+
+
+
+ Multi-Engine Architecture
+
+
+ Choose the right model for every job. All models run locally on your hardware —
+ download once, use forever.
+
+
+
+
+ {/* Qwen3-TTS */}
+
+
+
+
Qwen3-TTS
+ by Alibaba
+
+
+
+ 1.7B
+
+
+ 0.6B
+
+
-
- Voicebox
-
-
- Open source voice cloning powered by Qwen3-TTS. Create natural-sounding speech from
- text with near-perfect voice replication.
+
+ High-quality multilingual voice cloning with natural prosody. The only engine with
+ delivery instructions — control tone, pace, and emotion with natural language.
+
+
+
+ 10 languages
+
+
+
+ Delivery instructions
+
+
+
- {/* Mobile: centered screenshot above download buttons */}
-
-
-
+ {/* Chatterbox */}
+
+
+
+
Chatterbox
+ by Resemble AI
+
+ Production-grade voice cloning with the broadest language support. 23 languages with
+ zero-shot cloning and emotion exaggeration control.
+
+
+
+
+ 23 languages
+
+
+
- {/* Download buttons under left content */}
-
-
-
-
-
- macOS (ARM)
-
-
-
-
-
- macOS (Intel)
-
-
-
-
-
- Windows
-
-
-
- e.preventDefault()}
- className="flex items-center w-full relative opacity-50 cursor-not-allowed"
- title="Linux builds coming soon — Currently blocked by GitHub runner disk space limitations."
- aria-label="Linux builds coming soon — Currently blocked by GitHub runner disk space limitations."
- >
-
- Linux
-
-
+ {/* Chatterbox Turbo */}
+
+
+
+ Lightweight and fast. Supports paralinguistic tags — embed [laugh], [sigh], [gasp]
+ and more directly in your text for expressive, natural speech.
+
+
+
+
+ 350M params
+
+
+
+ [laugh] [sigh] tags
+
- {/* Desktop: Large screenshot positioned off-screen */}
-
-
-
+ {/* LuxTTS */}
+
+
+
+
LuxTTS
+ by ZipVoice
+
+
+
+ Ultra-fast, CPU-friendly voice cloning at 48kHz. Exceeds 150x realtime on CPU with
+ ~1GB VRAM. The fastest engine for quick iterations.
+
+
+
+
+ 150x realtime
+
+
+ 48kHz output
+
- {/* Screenshots Section */}
-
-
-
-
-
-
-
-
-
-
-
-
+ {/* ── Download Section ─────────────────────────────────────── */}
+
+
+
+
+ Download Voicebox
+
+
+ Available for macOS, Windows, and Linux. No dependencies required.
+
-
-
- {/* Description Section */}
-
-
-
- What is Voicebox?
-
-
-
- Voicebox is a local-first voice cloning studio with DAW-like features
- for professional voice synthesis. Think of it as a{' '}
- local, free and open-source alternative to ElevenLabs — download
- models, clone voices, and generate speech entirely on your machine.
-
-
- Unlike cloud services that lock your voice data behind subscriptions, Voicebox gives
- you complete privacy, professional tools, and native performance. Download a voice
- model, clone any voice from a few seconds of audio, and compose multi-voice projects
- with studio-grade editing tools.
-
-
- Optimized for performance with Metal acceleration on Mac and{' '}
- CUDA acceleration on Windows/Linux for fast, local inference.
-
-
No Python install required.
+
-
-
- {/* Demo Video Section */}
-
-
-
- See it in action...
-
-
-
- {/** biome-ignore lint/a11y/useMediaCaption: not generating captions for this, ya damn linter */}
-
-
- Your browser does not support the video tag.
-
-
+ {/* GitHub link */}
+
- {/* Features Section */}
-
-
- {features.map((feature) => (
-
- ))}
-
-
-
+ {/* ── Footer ───────────────────────────────────────────────── */}
+
+ >
);
}
diff --git a/landing/src/components/ControlUI.tsx b/landing/src/components/ControlUI.tsx
new file mode 100644
index 00000000..dcef66ac
--- /dev/null
+++ b/landing/src/components/ControlUI.tsx
@@ -0,0 +1,889 @@
+'use client';
+
+import { motion } from 'framer-motion';
+import {
+ AudioLines,
+ Box,
+ Download,
+ Mic,
+ MoreHorizontal,
+ Pencil,
+ Server,
+ Sparkles,
+ Speaker,
+ Star,
+ Trash2,
+ Volume2,
+ Wand2,
+} from 'lucide-react';
+import { useCallback, useEffect, useRef, useState } from 'react';
+import { LandingAudioPlayer, unlockAudioContext } from './LandingAudioPlayer';
+
+// ─── Data ───────────────────────────────────────────────────────────────────
+// Edit this section to customise all the content shown in the ControlUI demo.
+
+interface VoiceProfile {
+ name: string;
+ description: string;
+ language: string;
+ hasEffects: boolean;
+}
+
+/** Voice profiles shown in the grid / scroll strip. Index matters — DemoScript references profiles by index. */
+const PROFILES: VoiceProfile[] = [
+ {
+ name: 'Jarvis',
+ description: 'Dry wit, composed British AI assistant',
+ language: 'en',
+ hasEffects: true,
+ },
+ {
+ name: 'Samuel L. Jackson',
+ description: 'Commanding intensity with sharp, punchy delivery',
+ language: 'en',
+ hasEffects: true,
+ },
+ {
+ name: 'Bob Ross',
+ description: 'Gentle, soothing voice full of quiet encouragement',
+ language: 'en',
+ hasEffects: false,
+ },
+ {
+ name: 'Sam Altman',
+ description: 'Measured, thoughtful Silicon Valley cadence',
+ language: 'en',
+ hasEffects: false,
+ },
+ {
+ name: 'Morgan Freeman',
+ description: 'Rich, warm baritone with gravitas and calm authority',
+ language: 'en',
+ hasEffects: false,
+ },
+ {
+ name: 'Linus Tech Tips',
+ description: 'Enthusiastic, fast-paced tech explainer energy',
+ language: 'en',
+ hasEffects: false,
+ },
+ {
+ name: 'Fireship',
+ description: 'Rapid-fire, deadpan tech humor with zero filler',
+ language: 'en',
+ hasEffects: false,
+ },
+ {
+ name: 'Scarlett Johansson',
+ description: 'Smooth, low alto with understated warmth',
+ language: 'en',
+ hasEffects: false,
+ },
+ {
+ name: 'Dario Amodei',
+ description: 'Calm, precise articulation with academic depth',
+ language: 'en',
+ hasEffects: false,
+ },
+ {
+ name: 'David Attenborough',
+ description: 'Warm, reverent narration with wonder and precision',
+ language: 'en',
+ hasEffects: false,
+ },
+ {
+ name: 'Zendaya',
+ description: 'Relaxed, modern delivery with effortless cool',
+ language: 'en',
+ hasEffects: false,
+ },
+ {
+ name: 'Barack Obama',
+ description: 'Measured cadence with rhythmic pauses and gravitas',
+ language: 'en',
+ hasEffects: false,
+ },
+];
+
+/** Each entry is one cycle of the demo animation: select a profile → type text → generate → play audio. */
+interface DemoStep {
+ profileIndex: number;
+ text: string;
+ audioUrl: string;
+ engine: string;
+ duration: string;
+ effect?: string;
+}
+
+const DEMO_SCRIPT: DemoStep[] = [
+ {
+ profileIndex: 0,
+ text: 'Sir, I have completed the analysis. Your code has twelve critical vulnerabilities, your coffee is cold, and frankly your commit messages could use some work.',
+ audioUrl: '/audio/jarvis.webm',
+ engine: 'Qwen 1.7B',
+ duration: '0:10',
+ effect: 'Robot',
+ },
+ {
+ profileIndex: 4,
+ text: "I've narrated penguins, galaxies, and the entire history of mankind. But nothing prepared me for the moment a computer learned to do my job from a five second audio clip.",
+ audioUrl: '/audio/morganfreeman.webm',
+ engine: 'Qwen 1.7B',
+ duration: '0:11',
+ effect: 'Radio',
+ },
+ {
+ profileIndex: 3,
+ text: "Open source? [laugh] What's that?",
+ audioUrl: '/audio/samaltman.webm',
+ engine: 'Chatterbox',
+ duration: '0:03',
+ },
+ {
+ profileIndex: 1,
+ text: "So let me get this straight. You downloaded an app, pressed a button, and now there's two of me? The world was not ready for one",
+ audioUrl: '/audio/samjackson.webm',
+ engine: 'Qwen 1.7B',
+ duration: '0:10',
+ },
+ {
+ profileIndex: 5,
+ text: "So we got this voice cloning software and honestly it's kind of terrifying. Like, my wife could not tell the difference. Voicebox dot s h, link in the description!",
+ audioUrl: '/audio/linus.webm',
+ engine: 'Qwen 1.7B',
+ duration: '0:11',
+ },
+ {
+ profileIndex: 6,
+ text: 'This is Voicebox in one hundred seconds. It clones voices locally, it runs on your GPU, and no, OpenAI cannot hear you. Lets go.',
+ audioUrl: '/audio/fireship.webm',
+ engine: 'Qwen 0.6B',
+ duration: '0:09',
+ },
+];
+
+/** History rows pre-populated on first load. Oldest first visually (array index 0 = top row). */
+interface Generation {
+ id: number;
+ profileName: string;
+ text: string;
+ language: string;
+ engine: string;
+ duration: string;
+ timeAgo: string;
+ favorited: boolean;
+ versions: number;
+}
+
+const INITIAL_GENERATIONS: Generation[] = [
+ {
+ id: 1,
+ profileName: 'Morgan Freeman',
+ text: 'The neural pathways of human speech contain more complexity than any language model can fully capture, yet we keep pushing the boundaries of what is possible.',
+ language: 'en',
+ engine: 'Qwen 1.7B',
+ duration: '0:08',
+ timeAgo: '2 minutes ago',
+ favorited: true,
+ versions: 3,
+ },
+ {
+ id: 2,
+ profileName: 'Samuel L. Jackson',
+ text: 'In a world increasingly shaped by artificial intelligence, the human voice remains our most powerful tool for connection and storytelling.',
+ language: 'en',
+ engine: 'Qwen 1.7B',
+ duration: '0:07',
+ timeAgo: '15 minutes ago',
+ favorited: false,
+ versions: 1,
+ },
+ {
+ id: 3,
+ profileName: 'Jarvis',
+ text: 'The architecture of modern text-to-speech systems reveals an elegant interplay between transformer models and acoustic feature prediction.',
+ language: 'en',
+ engine: 'Qwen 0.6B',
+ duration: '0:09',
+ timeAgo: '1 hour ago',
+ favorited: false,
+ versions: 2,
+ },
+ {
+ id: 4,
+ profileName: 'Bob Ross',
+ text: 'Welcome to the next chapter. Every great story begins with a single voice, and today that voice can be yours.',
+ language: 'en',
+ engine: 'Chatterbox',
+ duration: '0:06',
+ timeAgo: '3 hours ago',
+ favorited: true,
+ versions: 1,
+ },
+ {
+ id: 5,
+ profileName: 'Linus Tech Tips',
+ text: 'Local inference gives you complete control over your voice data. No cloud, no subscriptions, no compromises.',
+ language: 'en',
+ engine: 'Qwen 1.7B',
+ duration: '0:05',
+ timeAgo: '5 hours ago',
+ favorited: false,
+ versions: 1,
+ },
+];
+
+const SIDEBAR_ITEMS = [
+ { icon: Volume2, label: 'Generate' },
+ { icon: AudioLines, label: 'Stories' },
+ { icon: Mic, label: 'Voices' },
+ { icon: Wand2, label: 'Effects' },
+ { icon: Speaker, label: 'Audio' },
+ { icon: Box, label: 'Models' },
+ { icon: Server, label: 'Server' },
+];
+
+// ─── Phase system ───────────────────────────────────────────────────────────
+
+type Phase = 'idle' | 'selecting' | 'typing' | 'generating' | 'complete' | 'playing';
+
+const PHASE_DURATIONS: Record = {
+ idle: 2500,
+ selecting: 800,
+ typing: 6000,
+ generating: 2800,
+ complete: 1200,
+ playing: 4000,
+};
+
+// ─── Typewriter ─────────────────────────────────────────────────────────────
+
+function TypewriterText({ text, speed }: { text: string; speed?: number }) {
+ // Default: fill the typing phase duration, leaving 500ms buffer at the end
+ const resolvedSpeed =
+ speed ?? Math.max(20, Math.floor((PHASE_DURATIONS.typing - 500) / text.length));
+ const [displayed, setDisplayed] = useState('');
+ const indexRef = useRef(0);
+
+ useEffect(() => {
+ indexRef.current = 0;
+ setDisplayed('');
+ const interval = setInterval(() => {
+ indexRef.current += 1;
+ if (indexRef.current <= text.length) {
+ setDisplayed(text.slice(0, indexRef.current));
+ } else {
+ clearInterval(interval);
+ }
+ }, resolvedSpeed);
+ return () => clearInterval(interval);
+ }, [text, resolvedSpeed]);
+
+ return (
+ <>
+ {displayed}
+
+ >
+ );
+}
+
+// ─── Loading bars (simplified react-loaders replacement) ────────────────────
+
+function LoadingBars({ mode }: { mode: 'idle' | 'generating' | 'playing' }) {
+ const barColor = mode !== 'idle' ? 'bg-accent' : 'bg-muted-foreground/40';
+ return (
+
+ {[0, 1, 2, 3, 4].map((i) => (
+
+ ))}
+
+ );
+}
+
+// ─── Profile Card ───────────────────────────────────────────────────────────
+
+const ProfileCard = ({
+ profile,
+ selected,
+ selecting,
+ cardRef,
+}: {
+ profile: VoiceProfile;
+ selected: boolean;
+ selecting: boolean;
+ cardRef?: React.Ref;
+}) => {
+ return (
+
+ {profile.name}
+
+ {profile.description}
+
+
+
+ {profile.language}
+
+ {profile.hasEffects && }
+
+
+
+ );
+};
+
+// ─── History Row ────────────────────────────────────────────────────────────
+
+function HistoryRow({
+ gen,
+ mode,
+ isNew,
+}: {
+ gen: Generation;
+ mode: 'idle' | 'generating' | 'playing';
+ isNew: boolean;
+}) {
+ return (
+
+
+ {/* Status icon */}
+
+
+
+
+ {/* Meta info */}
+
+
{gen.profileName}
+
+ {gen.language}
+ {gen.engine}
+ {mode !== 'generating' && {gen.duration} }
+
+
+ {mode === 'generating' ? (
+ Generating...
+ ) : (
+ gen.timeAgo
+ )}
+
+
+
+ {/* Transcript */}
+
+
+ {/* Action buttons */}
+
+
+
+
+ {gen.versions > 1 && (
+
+
+
+ )}
+
+
+
+
+
+
+ );
+}
+
+// ─── Floating Generate Box ──────────────────────────────────────────────────
+
+function FloatingGenerateBox({
+ phase,
+ typingText,
+ selectedProfile,
+ engine,
+ effect,
+}: {
+ phase: Phase;
+ typingText: string;
+ selectedProfile: VoiceProfile | null;
+ engine: string;
+ effect?: string;
+}) {
+ const isFocused = phase === 'typing' || phase === 'generating';
+ const isGenerating = phase === 'generating';
+
+ return (
+
+ {/* Text area + generate button */}
+
+
+
+
+ {phase === 'typing' ? (
+
+
+
+ ) : phase === 'generating' ? (
+ {typingText}
+ ) : (
+
+ {selectedProfile
+ ? `Generate speech using ${selectedProfile.name}...`
+ : 'Select a voice profile above...'}
+
+ )}
+
+
+
+
+ {/* Generate button */}
+
+
+
+
+
+ {/* Bottom selectors */}
+
+
+ English
+
+
+ {engine}
+
+
+
+ {effect || 'Effect'}
+
+
+
+ );
+}
+
+// ─── Main ControlUI ─────────────────────────────────────────────────────────
+
+export function ControlUI() {
+ const [phase, setPhase] = useState('idle');
+ const [selectedIndex, setSelectedIndex] = useState(DEMO_SCRIPT[0].profileIndex);
+ const [cycle, setCycle] = useState(0);
+ const [newGenId, setNewGenId] = useState(null);
+ const [generations, setGenerations] = useState([...INITIAL_GENERATIONS]);
+ const [isMuted, setIsMuted] = useState(true);
+ const [isVisible, setIsVisible] = useState(true);
+ const [pageHidden, setPageHidden] = useState(false);
+ const containerRef = useRef(null);
+ const phaseRef = useRef(phase);
+ const mobileCardRefs = useRef>(new Map());
+ const desktopCardRefs = useRef>(new Map());
+ const profileGridRef = useRef(null);
+ const [scrollLeft, setScrollLeft] = useState(0);
+ phaseRef.current = phase;
+
+ const step = DEMO_SCRIPT[cycle % DEMO_SCRIPT.length];
+ const selectedProfile = PROFILES[selectedIndex];
+
+ // Scroll to selected profile card — accounts for generate box overlay on desktop
+ useEffect(() => {
+ const isMobile = window.innerWidth < 768;
+
+ if (isMobile) {
+ const el = mobileCardRefs.current.get(selectedIndex);
+ if (el) el.scrollIntoView({ behavior: 'smooth', block: 'nearest', inline: 'center' });
+ return;
+ }
+
+ // Desktop
+ const el = desktopCardRefs.current.get(selectedIndex);
+ const scrollContainer = profileGridRef.current;
+ if (!el || !scrollContainer) return;
+
+ const containerTop = scrollContainer.getBoundingClientRect().top;
+ const elTop = el.getBoundingClientRect().top;
+ const elRelTop = elTop - containerTop + scrollContainer.scrollTop;
+
+ const rowHeight = 145;
+ const generateBoxHeight = 200;
+ const visibleTop = scrollContainer.scrollTop;
+ const visibleBottom = visibleTop + scrollContainer.clientHeight - generateBoxHeight;
+ const elRelBottom = elRelTop + el.offsetHeight;
+
+ if (elRelTop >= visibleTop && elRelBottom <= visibleBottom) {
+ return;
+ }
+
+ const target = elRelTop - rowHeight;
+ scrollContainer.scrollTo({ top: Math.max(0, target), behavior: 'smooth' });
+ }, [selectedIndex]);
+
+ // Visibility detection
+ useEffect(() => {
+ const observer = new IntersectionObserver(([entry]) => setIsVisible(entry.isIntersecting), {
+ threshold: 0,
+ });
+ if (containerRef.current) observer.observe(containerRef.current);
+
+ const handleVisibility = () => setPageHidden(document.visibilityState !== 'visible');
+ document.addEventListener('visibilitychange', handleVisibility);
+
+ return () => {
+ observer.disconnect();
+ document.removeEventListener('visibilitychange', handleVisibility);
+ };
+ }, []);
+
+ const paused = !isVisible || pageHidden;
+
+ // Phase cycling — `playing` phase is driven by audio finish, not a timeout
+ useEffect(() => {
+ if (paused || phase === 'playing') return;
+
+ const duration = PHASE_DURATIONS[phase];
+ const timer = setTimeout(() => {
+ console.log(
+ '[ControlUI] phase transition',
+ phase,
+ '→ next, cycle:',
+ cycle,
+ 'step profile:',
+ PROFILES[step.profileIndex].name,
+ );
+ switch (phase) {
+ case 'idle': {
+ setSelectedIndex(step.profileIndex);
+ setPhase('selecting');
+ break;
+ }
+ case 'selecting':
+ setPhase('typing');
+ break;
+ case 'typing': {
+ const profile = PROFILES[step.profileIndex];
+ const newGen: Generation = {
+ id: Date.now(),
+ profileName: profile.name,
+ text: step.text,
+ language: profile.language,
+ engine: step.engine,
+ duration: step.duration,
+ timeAgo: 'just now',
+ favorited: false,
+ versions: 1,
+ };
+ setGenerations((prev) => [newGen, ...prev.slice(0, 5)]);
+ setNewGenId(newGen.id);
+ setPhase('generating');
+ break;
+ }
+ case 'generating':
+ setPhase('playing');
+ break;
+ }
+ }, duration);
+
+ return () => clearTimeout(timer);
+ }, [phase, paused, step, cycle]);
+
+ const handleAudioFinish = useCallback(() => {
+ if (phaseRef.current !== 'playing') return;
+ setPhase('idle');
+ setCycle((c) => c + 1);
+ setNewGenId(null);
+ }, []);
+
+ const isGenerating = phase === 'generating';
+
+ return (
+
+ {/* Unmute button with handwritten hint */}
+
+
+ {/* Handwritten hint — absolutely positioned above the button */}
+ {isMuted && (
+
+
+ try me!
+
+ {/* Curved arrow from text down-right toward the button */}
+
+ Arrow
+
+
+
+
+ )}
+
{
+ unlockAudioContext();
+ setIsMuted(!isMuted);
+ }}
+ className="flex items-center gap-2 px-3 py-1.5 rounded-full border border-border bg-card/50 backdrop-blur text-xs text-muted-foreground hover:text-foreground transition-colors"
+ >
+ {isMuted ? (
+ <>
+
+ Unmute
+ >
+ ) : (
+ <>
+
+ Mute
+ >
+ )}
+
+
+
+
+
+
+ {/* ── Sidebar (hidden on mobile) ─────────────────────────── */}
+
+ {/* Logo */}
+
+
+
+
+
+
+ {/* Nav items */}
+
+ {SIDEBAR_ITEMS.map((item, i) => {
+ const Icon = item.icon;
+ const active = i === 0;
+ return (
+
+
+
+ );
+ })}
+
+
+ {/* Version */}
+
v0.2.0
+
+
+ {/* ── Main content ──────────────────────────────────────── */}
+
+ {/* Left: Profiles + Generate box */}
+
+ {/* Gradient fade overlay — sits between header and scroll content */}
+
+
+ {/* Header — floats above everything */}
+
+
Voicebox
+
+
+ Import Voice
+
+
+ Create Voice
+
+
+
+
+ {/* Scrollable profile cards — scrolls behind header + gradient */}
+
+
+ {/* Mobile: horizontal scroll strip with edge fade */}
+
+ {scrollLeft > 0 && (
+
+ )}
+
+
setScrollLeft(e.currentTarget.scrollLeft)}
+ >
+ {PROFILES.map((profile, i) => (
+
{
+ if (el) mobileCardRefs.current.set(i, el);
+ }}
+ >
+
+
+ ))}
+
+
+
+ {/* Desktop: 3-col grid */}
+
+ {PROFILES.map((profile, i) => (
+
{
+ if (el) desktopCardRefs.current.set(i, el);
+ }}
+ />
+ ))}
+
+
+
+
+ {/* Floating generate box — desktop: absolute overlay, mobile: inline */}
+
+
+
+
+
+ {/* Right/Below: History */}
+
+
+
+ {generations.map((gen) => {
+ const isThisNew = gen.id === newGenId;
+ const rowMode: 'idle' | 'generating' | 'playing' =
+ isThisNew && isGenerating
+ ? 'generating'
+ : isThisNew && phase === 'playing'
+ ? 'playing'
+ : 'idle';
+ return ;
+ })}
+
+
+
+
+ {/* Audio player */}
+
{}}
+ />
+
+
+
+
+ );
+}
diff --git a/landing/src/components/Features.tsx b/landing/src/components/Features.tsx
new file mode 100644
index 00000000..ad77aea2
--- /dev/null
+++ b/landing/src/components/Features.tsx
@@ -0,0 +1,855 @@
+'use client';
+
+import { motion } from 'framer-motion';
+import { AudioLines, Cloud, MessageSquareText, Mic, Sparkles, TextCursorInput } from 'lucide-react';
+import { useEffect, useMemo, useRef, useState } from 'react';
+
+// ─── Lazy load wrapper ──────────────────────────────────────────────────────
+
+function LazyLoad({
+ children,
+ className,
+ rootMargin = '200px',
+}: {
+ children: React.ReactNode;
+ className?: string;
+ rootMargin?: string;
+}) {
+ const ref = useRef(null);
+ const [visible, setVisible] = useState(false);
+
+ useEffect(() => {
+ const el = ref.current;
+ if (!el) return;
+ const observer = new IntersectionObserver(
+ ([entry]) => {
+ if (entry.isIntersecting) {
+ setVisible(true);
+ observer.disconnect();
+ }
+ },
+ { rootMargin },
+ );
+ observer.observe(el);
+ return () => observer.disconnect();
+ }, [rootMargin]);
+
+ return (
+
+ {visible ? children : null}
+
+ );
+}
+
+// ─── Animation: Voice Cloning ───────────────────────────────────────────────
+
+function VoiceCloningAnimation() {
+ const [phase, setPhase] = useState(0);
+
+ useEffect(() => {
+ const interval = setInterval(() => {
+ setPhase((p) => (p + 1) % 3);
+ }, 2400);
+ return () => clearInterval(interval);
+ }, []);
+
+ const samples = ['Sample 1', 'Sample 2', 'Sample 3'];
+ const bars = [0.4, 0.7, 0.5, 0.9, 0.3, 0.6, 0.8, 0.4, 0.7, 0.5, 0.3, 0.6];
+
+ return (
+
+
+ {/* Sample pills */}
+
+ {samples.map((s, i) => (
+
+ {s}
+
+ ))}
+
+
+ {/* Waveform visualization */}
+
+ {bars.map((h, i) => (
+
+ ))}
+
+
+ {/* Result label */}
+
+ voice profile ready
+
+
+
+ );
+}
+
+// ─── Mini waveform for clips ────────────────────────────────────────────────
+// Fixed-width dense waveform that overflows — the clip container clips it.
+// This way resizing a clip just reveals/hides bars instead of re-rendering.
+
+const WAVEFORM_BAR_COUNT = 60;
+
+function MiniWaveform({ seed, color }: { seed: number; color: string }) {
+ // Deterministic pseudo-random waveform that looks like real speech audio.
+ // Uses layered noise at different frequencies for natural envelope + detail.
+ const bars = useMemo(() => {
+ // Seeded pseudo-random number generator (deterministic per seed)
+ let s = seed * 9301 + 49297;
+ const rand = () => {
+ s = (s * 16807 + 0) % 2147483647;
+ return s / 2147483647;
+ };
+
+ // Pre-generate random values
+ const r = Array.from({ length: WAVEFORM_BAR_COUNT }, () => rand());
+
+ return Array.from({ length: WAVEFORM_BAR_COUNT }, (_, i) => {
+ const t = i / WAVEFORM_BAR_COUNT;
+
+ // Slow envelope — broad amplitude shape (words / phrases)
+ const envelope =
+ 0.3 +
+ 0.35 *
+ Math.sin(t * Math.PI * (2 + (seed % 3))) *
+ Math.sin(t * Math.PI * (1.3 + seed * 0.7)) +
+ 0.2 * Math.sin(t * Math.PI * (4.7 + seed * 1.3));
+
+ // Medium variation — syllable-level bumps
+ const mid = 0.15 * Math.sin(i * 0.8 + seed * 3.1) * Math.cos(i * 1.3 + seed);
+
+ // High-frequency noise — individual sample jitter
+ const noise = (r[i] - 0.5) * 0.25;
+
+ // Combine and clamp
+ const raw = envelope + mid + noise;
+ return Math.max(0.06, Math.min(1, raw));
+ });
+ }, [seed]);
+
+ return (
+
+ {bars.map((h, i) => (
+
+ ))}
+
+ );
+}
+
+// ─── Animation: Stories Editor ───────────────────────────────────────────────
+
+// Clip shape: id, profile, track, left (px out of 220), width (px), waveform seed
+type DemoClip = { id: string; profile: string; track: number; x: number; w: number; seed: number };
+
+const INITIAL_CLIPS: DemoClip[] = [
+ { id: 'n1', profile: 'Morgan', track: 0, x: 4, w: 70, seed: 1 },
+ { id: 'n2', profile: 'Morgan', track: 0, x: 135, w: 35, seed: 2 },
+ { id: 'a1', profile: 'Scarlett', track: 1, x: 25, w: 40, seed: 3 },
+ { id: 'a2', profile: 'Scarlett', track: 1, x: 120, w: 35, seed: 4 },
+ { id: 'b1', profile: 'Jarvis', track: 2, x: 70, w: 45, seed: 5 },
+];
+
+// Timeline width the clips live inside
+const TL_W = 220;
+// Each action returns a new clips array (or modifies in place)
+type Action = { label: string; apply: (clips: DemoClip[]) => DemoClip[] };
+
+const ACTIONS: Action[] = [
+ // 0 — move Jarvis clip earlier
+ { label: 'Move clip', apply: (c) => c.map((cl) => (cl.id === 'b1' ? { ...cl, x: 55 } : cl)) },
+ // 1 — split Morgan's first clip into two with visible gap
+ {
+ label: 'Split clip',
+ apply: (c) => {
+ // Idempotent: if n1b already exists, the split already happened
+ if (c.some((cl) => cl.id === 'n1b')) return c;
+ const clip = c.find((cl) => cl.id === 'n1');
+ if (!clip) return c;
+ const leftW = 25;
+ const gap = 8;
+ const rightW = clip.w - leftW - gap;
+ return [
+ ...c.filter((cl) => cl.id !== 'n1'),
+ { ...clip, w: leftW, id: 'n1' },
+ {
+ id: 'n1b',
+ profile: clip.profile,
+ track: clip.track,
+ x: clip.x + leftW + gap,
+ w: rightW,
+ seed: 6,
+ },
+ ];
+ },
+ },
+ // 2 — trim Scarlett's second clip shorter
+ { label: 'Trim clip', apply: (c) => c.map((cl) => (cl.id === 'a2' ? { ...cl, w: 25 } : cl)) },
+ // 3 — duplicate Jarvis to track 0
+ {
+ label: 'Duplicate',
+ apply: (c) => {
+ // Idempotent: if b1d already exists, the duplicate already happened
+ if (c.some((cl) => cl.id === 'b1d')) return c;
+ const clip = c.find((cl) => cl.id === 'b1');
+ if (!clip) return c;
+ return [...c, { ...clip, id: 'b1d', track: 0, x: 180, w: 35, seed: 7 }];
+ },
+ },
+ // 4 — reset
+ { label: '', apply: () => INITIAL_CLIPS },
+];
+
+function StoriesAnimation() {
+ const [clips, setClips] = useState(INITIAL_CLIPS);
+ const [actionIndex, setActionIndex] = useState(-1);
+ const [playheadX, setPlayheadX] = useState(0);
+ const [selectedId, setSelectedId] = useState(null);
+ const playheadRef = useRef>(0);
+
+ // Animate the playhead continuously
+ useEffect(() => {
+ let start: number | null = null;
+ const speed = 12; // px per second
+ const animate = (ts: number) => {
+ if (start === null) start = ts;
+ const elapsed = (ts - start) / 1000;
+ setPlayheadX((elapsed * speed) % TL_W);
+ playheadRef.current = requestAnimationFrame(animate);
+ };
+ playheadRef.current = requestAnimationFrame(animate);
+ return () => cancelAnimationFrame(playheadRef.current);
+ }, []);
+
+ // Step through actions
+ useEffect(() => {
+ const interval = setInterval(() => {
+ setActionIndex((prev) => {
+ const next = (prev + 1) % ACTIONS.length;
+ setClips((current) => ACTIONS[next].apply(current));
+ // Highlight the clip being acted on
+ if (next === 0) setSelectedId('b1');
+ else if (next === 1) setSelectedId('n1');
+ else if (next === 2) setSelectedId('a2');
+ else if (next === 3) setSelectedId('b1');
+ else setSelectedId(null);
+ return next;
+ });
+ }, 2600);
+ return () => clearInterval(interval);
+ }, []);
+
+ const trackLabels = ['1', '0', '-1'];
+ const timeMarkers = [0, 2, 4, 6, 8];
+ const accentColor = 'hsl(43 50% 45%)';
+ const accentFg = 'hsl(30 10% 94%)';
+
+ return (
+
+ {/* Toolbar */}
+
+
+
+
0:03 / 0:10
+
+ {actionIndex >= 0 && actionIndex < ACTIONS.length - 1 && (
+
+ {ACTIONS[actionIndex].label}
+
+ )}
+
+
+
+ {/* Timeline */}
+
+ {/* Track labels sidebar */}
+
+
+ {trackLabels.map((label) => (
+
+ {label}
+
+ ))}
+
+
+ {/* Tracks area */}
+
+ {/* Time ruler */}
+
+ {timeMarkers.map((t) => (
+
+ ))}
+
+
+ {/* Track rows + clips — same parent so percentages match */}
+
+ {/* Track rows background */}
+ {trackLabels.map((label, i) => (
+
+ ))}
+
+ {/* Clips */}
+ {clips.map((clip) => {
+ const trackIdx = clip.track;
+ const isSelected = clip.id === selectedId;
+ const clipTop = `calc(${(trackIdx * 100) / 3}% + 2px)`;
+ const clipHeight = `calc(${100 / 3}% - 4px)`;
+ return (
+
+
+ {/* Profile label — scaled to bypass browser min font size */}
+
+
+ {clip.profile}
+
+
+ {/* Waveform — absolutely positioned so it never affects clip width */}
+
+
+
+
+ {/* Trim handles on selected */}
+ {isSelected && (
+ <>
+
+
+ >
+ )}
+
+ );
+ })}
+
+ {/* Playhead */}
+
+
+
+
+
+
+
+ );
+}
+
+// ─── Animation: Effects Pipeline ────────────────────────────────────────────
+
+function EffectsAnimation() {
+ const [activeEffect, setActiveEffect] = useState(0);
+ const effects = [
+ { name: 'Pitch Shift', param: '-3 semitones', color: '#3b82f6' },
+ { name: 'Reverb', param: 'Room 0.7', color: '#8b5cf6' },
+ { name: 'Compressor', param: '-15 dB', color: '#ec4899' },
+ { name: 'Low-Pass', param: '6000 Hz', color: '#14b8a6' },
+ ];
+
+ // Waveform bars — original shape
+ const rawBars = [0.3, 0.6, 0.8, 0.5, 0.9, 0.4, 0.7, 0.3, 0.6, 0.5, 0.8, 0.4, 0.7, 0.9, 0.3];
+
+ useEffect(() => {
+ const interval = setInterval(() => {
+ setActiveEffect((p) => (p + 1) % effects.length);
+ }, 2200);
+ return () => clearInterval(interval);
+ }, [effects.length]);
+
+ return (
+
+ {/* Effects chain */}
+
+ {effects.map((fx, i) => (
+
+
+ {fx.name}
+
+ {i < effects.length - 1 && (
+
+ →
+
+ )}
+
+ ))}
+
+
+ {/* Waveform that morphs as effects are applied */}
+
+ {rawBars.map((h, i) => {
+ // Each effect stage progressively transforms the shape
+ const shifted = activeEffect >= 0 ? h * (0.7 + 0.3 * Math.sin(i * 0.8)) : h;
+ const dampened = activeEffect >= 1 ? shifted * (0.6 + 0.4 * Math.cos(i * 0.3)) : shifted;
+ const compressed = activeEffect >= 2 ? 0.3 + dampened * 0.5 : dampened;
+ const filtered = activeEffect >= 3 ? compressed * (1 - i * 0.03) : compressed;
+ const finalH = Math.max(0.08, Math.min(1, filtered));
+
+ return (
+
+ );
+ })}
+
+
+ {/* Active effect detail */}
+
+ {effects[activeEffect].name}: {effects[activeEffect].param}
+
+
+ );
+}
+
+// ─── Animation: Local or Remote ─────────────────────────────────────────────
+
+function LocalRemoteAnimation() {
+ const [mode, setMode] = useState(0);
+ const modes = ['Local GPU', 'Remote Server'];
+
+ useEffect(() => {
+ const interval = setInterval(() => {
+ setMode((p) => (p + 1) % 2);
+ }, 2800);
+ return () => clearInterval(interval);
+ }, []);
+
+ return (
+
+
+ {/* Toggle */}
+
+ {modes.map((m, i) => (
+
+ {m}
+
+ ))}
+
+
+ {/* Status */}
+
+
+
+ {mode === 0 ? 'Metal acceleration active' : 'Connected to 192.168.1.50'}
+
+
+ {mode === 0 ? 'VRAM: 8.2 / 16.0 GB' : 'Latency: 12ms | CUDA'}
+
+
+
+
+ );
+}
+
+// ─── Animation: Transcription ───────────────────────────────────────────────
+
+function TranscriptionAnimation() {
+ const [charIndex, setCharIndex] = useState(0);
+ const text = 'The quick brown fox jumps over the lazy dog near the riverbank.';
+
+ useEffect(() => {
+ const interval = setInterval(() => {
+ setCharIndex((p) => {
+ if (p >= text.length) return 0;
+ return p + 1;
+ });
+ }, 80);
+ return () => clearInterval(interval);
+ }, [text.length]);
+
+ return (
+
+ {/* Fake waveform */}
+
+ {Array.from({ length: 30 }, (_, i) => {
+ const h = 0.2 + 0.8 * Math.abs(Math.sin(i * 0.5 + charIndex * 0.1));
+ const active = i < (charIndex / text.length) * 30;
+ return (
+
+ );
+ })}
+
+
+ {/* Transcribed text */}
+
+ {text.slice(0, charIndex)}
+ {charIndex < text.length && (
+
+ )}
+
+
+ );
+}
+
+// ─── Animation: Unlimited Length ─────────────────────────────────────────────
+
+function UnlimitedLengthAnimation() {
+ const [phase, setPhase] = useState(0);
+
+ const chunks = [
+ 'The morning sun crept over the mountains, casting long shadows across the valley below.',
+ 'Birds stirred in the canopy, their songs weaving through the cool air like threads of gold.',
+ 'Far below, a river wound its way through ancient stones, carrying whispers of the night.',
+ ];
+
+ useEffect(() => {
+ const interval = setInterval(() => {
+ setPhase((p) => (p + 1) % 4); // 0-2 = processing chunks, 3 = crossfade/done
+ }, 2000);
+ return () => clearInterval(interval);
+ }, []);
+
+ return (
+
+ {/* Chunk pills */}
+
+ {chunks.map((chunk, i) => (
+
+ {/* Status indicator */}
+
+
+ {chunk}
+
+
+ ))}
+
+
+ {/* Crossfade / result bar */}
+
+ {chunks.map((_, i) => (
+
+ ))}
+
+
+ {/* Status text */}
+
+
+ {phase < 3
+ ? `generating chunk ${phase + 1} of ${chunks.length}...`
+ : 'crossfaded & ready'}
+
+
+
+ );
+}
+
+// ─── Feature data ───────────────────────────────────────────────────────────
+
+const FEATURES = [
+ {
+ title: 'Near-Perfect Voice Cloning',
+ description:
+ 'Multiple TTS engines for exceptional voice quality. Clone any voice from a few seconds of audio with natural intonation and emotion.',
+ icon: Mic,
+ animation: VoiceCloningAnimation,
+ },
+ {
+ title: 'Stories Editor',
+ description:
+ 'Create multi-voice narratives with a timeline-based editor. Arrange tracks, trim clips, and mix conversations between characters.',
+ icon: AudioLines,
+ animation: StoriesAnimation,
+ },
+ {
+ title: 'Audio Effects Pipeline',
+ description:
+ 'Apply pitch shift, reverb, delay, compression, and more — then save as presets. Preview effects live and set defaults per voice profile.',
+ icon: Sparkles,
+ animation: EffectsAnimation,
+ },
+ {
+ title: 'Local or Remote',
+ description:
+ 'Run GPU inference locally with Metal, CUDA, ROCm, Intel Arc, or DirectML — or connect to a remote machine. One-click server setup with automatic discovery.',
+ icon: Cloud,
+ animation: LocalRemoteAnimation,
+ },
+ {
+ title: 'Audio Transcription',
+ description:
+ 'Powered by Whisper for accurate speech-to-text. Automatically extract reference text from voice samples.',
+ icon: MessageSquareText,
+ animation: TranscriptionAnimation,
+ },
+ {
+ title: 'Unlimited Generation Length',
+ description:
+ 'Generate up to 50,000 characters in one go. Text is auto-split at sentence boundaries, generated per-chunk, and crossfaded seamlessly.',
+ icon: TextCursorInput,
+ animation: UnlimitedLengthAnimation,
+ },
+];
+
+// ─── Feature Card ───────────────────────────────────────────────────────────
+
+function FeatureCard({ feature }: { feature: (typeof FEATURES)[number] }) {
+ const Icon = feature.icon;
+ const Animation = feature.animation;
+
+ return (
+
+
+
+
+
+
+
+
{feature.title}
+
+
{feature.description}
+
+
+ );
+}
+
+// ─── Features Section ───────────────────────────────────────────────────────
+
+export function Features() {
+ return (
+
+
+
+
+ Professional voice tools, zero compromise
+
+
+ Everything you need to clone voices, generate speech, and produce multi-voice content —
+ running entirely on your machine.
+
+
+
+ {FEATURES.map((feature) => (
+
+ ))}
+
+
+
+ );
+}
diff --git a/landing/src/components/Footer.tsx b/landing/src/components/Footer.tsx
index 9370ac86..9e435a82 100644
--- a/landing/src/components/Footer.tsx
+++ b/landing/src/components/Footer.tsx
@@ -1,22 +1,33 @@
+import Image from 'next/image';
import Link from 'next/link';
-import { Separator } from '@/components/ui/separator';
import { GITHUB_REPO } from '@/lib/constants';
export function Footer() {
return (
-
-
-
-
-
voicebox
-
- Professional voice cloning powered by Qwen3-TTS. Desktop app for Mac, Windows, and
- Linux.
+
+
+
+ {/* Brand */}
+
+
+
+ Voicebox
+
+
+ Open source voice cloning studio. Local-first, free forever.
+
+ {/* Product */}
+
+ {/* Resources */}
+
+ {/* Also by */}
+
-
-
-
© 2026 voicebox. All rights reserved.
+
+
+
+ © {new Date().getFullYear()} Voicebox. Open source under MIT license.
+
diff --git a/landing/src/components/LandingAudioPlayer.tsx b/landing/src/components/LandingAudioPlayer.tsx
new file mode 100644
index 00000000..a92cd443
--- /dev/null
+++ b/landing/src/components/LandingAudioPlayer.tsx
@@ -0,0 +1,296 @@
+'use client';
+
+import { Pause, Play, Repeat, Volume2, VolumeX } from 'lucide-react';
+import { useCallback, useEffect, useRef, useState } from 'react';
+import WaveSurfer from 'wavesurfer.js';
+
+function formatDuration(seconds: number): string {
+ const m = Math.floor(seconds / 60);
+ const s = Math.floor(seconds % 60);
+ return `${m}:${s.toString().padStart(2, '0')}`;
+}
+
+// Shared ref so the unmute button can unlock WaveSurfer's audio on iOS Safari
+// Must call .play() on WaveSurfer's actual media element during a user gesture
+let sharedWaveSurfer: WaveSurfer | null = null;
+let audioUnlocked = false;
+
+export function unlockAudioContext() {
+ if (audioUnlocked) return;
+ audioUnlocked = true;
+
+ // Unlock WaveSurfer's internal audio element
+ if (sharedWaveSurfer) {
+ const media = sharedWaveSurfer.getMediaElement();
+ if (media) {
+ media.muted = true;
+ media
+ .play()
+ .then(() => {
+ media.pause();
+ media.muted = false;
+ media.currentTime = 0;
+ })
+ .catch(() => {});
+ }
+ }
+
+ // Also unlock a standalone AudioContext as fallback
+ try {
+ const ctx = new (
+ window.AudioContext ||
+ (window as unknown as { webkitAudioContext: typeof AudioContext }).webkitAudioContext
+ )();
+ const buffer = ctx.createBuffer(1, 1, 22050);
+ const source = ctx.createBufferSource();
+ source.buffer = buffer;
+ source.connect(ctx.destination);
+ source.start(0);
+ } catch {
+ // Silently fail
+ }
+}
+
+interface LandingAudioPlayerProps {
+ audioUrl: string;
+ title: string;
+ playing: boolean;
+ muted: boolean;
+ onFinish: () => void;
+ onClose: () => void;
+}
+
+export function LandingAudioPlayer({
+ audioUrl,
+ title,
+ playing,
+ muted,
+ onFinish,
+ onClose,
+}: LandingAudioPlayerProps) {
+ const waveformRef = useRef
(null);
+ const wavesurferRef = useRef(null);
+ const [isPlaying, setIsPlaying] = useState(false);
+ const [currentTime, setCurrentTime] = useState(0);
+ const [duration, setDuration] = useState(0);
+ const [volume, setVolume] = useState(0.75);
+ const [isLooping, setIsLooping] = useState(false);
+ const [isReady, setIsReady] = useState(false);
+ const onFinishRef = useRef(onFinish);
+ onFinishRef.current = onFinish;
+ const playingRef = useRef(playing);
+ playingRef.current = playing;
+ const mutedRef = useRef(muted);
+ mutedRef.current = muted;
+
+ // Initialize WaveSurfer
+ useEffect(() => {
+ const initWaveSurfer = () => {
+ const container = waveformRef.current;
+ if (!container) {
+ setTimeout(initWaveSurfer, 50);
+ return;
+ }
+
+ const rect = container.getBoundingClientRect();
+ if (rect.width === 0 || rect.height === 0) {
+ setTimeout(initWaveSurfer, 50);
+ return;
+ }
+
+ // Clean up existing instance
+ if (wavesurferRef.current) {
+ wavesurferRef.current.destroy();
+ wavesurferRef.current = null;
+ }
+
+ const root = document.documentElement;
+ const getCSSVar = (varName: string) => {
+ const value = getComputedStyle(root).getPropertyValue(varName).trim();
+ return value ? `hsl(${value})` : '';
+ };
+
+ const ws = WaveSurfer.create({
+ container,
+ waveColor: getCSSVar('--muted'),
+ progressColor: getCSSVar('--accent'),
+ cursorColor: getCSSVar('--accent'),
+ barWidth: 2,
+ barRadius: 2,
+ height: 80,
+ normalize: true,
+ interact: true,
+ mediaControls: false,
+ });
+
+ ws.on('ready', () => {
+ setDuration(ws.getDuration());
+ ws.setVolume(mutedRef.current ? 0 : volume);
+ setIsReady(true);
+ });
+
+ ws.on('play', () => {
+ console.log('[Player] play event');
+ setIsPlaying(true);
+ });
+ ws.on('pause', () => {
+ console.log('[Player] pause event');
+ setIsPlaying(false);
+ });
+
+ ws.on('timeupdate', (time: number) => {
+ setCurrentTime(Math.min(time, ws.getDuration()));
+ });
+
+ let didFinish = false;
+ ws.on('finish', () => {
+ if (didFinish) return;
+ didFinish = true;
+ console.log(
+ '[Player] finish event, currentTime:',
+ ws.getCurrentTime(),
+ 'duration:',
+ ws.getDuration(),
+ );
+ setIsPlaying(false);
+ onFinishRef.current();
+ });
+
+ ws.load(audioUrl);
+ wavesurferRef.current = ws;
+ sharedWaveSurfer = ws;
+ };
+
+ setIsReady(false);
+ setCurrentTime(0);
+ setDuration(0);
+
+ requestAnimationFrame(() => {
+ requestAnimationFrame(() => {
+ setTimeout(initWaveSurfer, 10);
+ });
+ });
+
+ return () => {
+ if (wavesurferRef.current) {
+ wavesurferRef.current.destroy();
+ wavesurferRef.current = null;
+ }
+ };
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, [audioUrl]);
+
+ // Respond to external play/stop signals
+ useEffect(() => {
+ const ws = wavesurferRef.current;
+ console.log('[Player] effect', { playing, isReady, hasWs: !!ws });
+ if (!ws || !isReady) return;
+
+ if (playing) {
+ // Resume the AudioContext first (required for iOS Safari after unlock)
+ const backend = ws.getMediaElement();
+ if (backend && 'context' in backend) {
+ const ctx = (backend as unknown as { context: AudioContext }).context;
+ if (ctx?.state === 'suspended') ctx.resume();
+ }
+ ws.play()
+ .then(() => {
+ console.log('[Player] play succeeded');
+ })
+ .catch((e: Error) => {
+ if (e.name === 'NotAllowedError') {
+ console.warn('[Player] Autoplay blocked by browser — waiting for user gesture');
+ } else {
+ console.error('[Player] play failed', e);
+ }
+ });
+ } else {
+ ws.pause();
+ }
+ }, [playing, isReady]);
+
+ // Sync volume and muted state
+ useEffect(() => {
+ if (wavesurferRef.current) {
+ wavesurferRef.current.setVolume(muted ? 0 : volume);
+ }
+ }, [volume, muted]);
+
+ const handlePlayPause = useCallback(() => {
+ if (!wavesurferRef.current) return;
+ wavesurferRef.current.playPause();
+ }, []);
+
+ return (
+
+
+ {/* Waveform — full width row on mobile, inline on desktop */}
+
+
+ {/* Controls row */}
+
+ {/* Play/Pause */}
+
+ {isPlaying ? (
+
+ ) : (
+
+ )}
+
+
+ {/* Time */}
+
+ {formatDuration(currentTime)}
+ /
+ {formatDuration(duration)}
+
+
+ {/* Title */}
+ {title && (
+
+ {title}
+
+ )}
+
+ {/* Loop */}
+
setIsLooping(!isLooping)}
+ className={`h-8 w-8 flex items-center justify-center rounded-sm shrink-0 hover:bg-muted md:order-5 ${
+ isLooping ? 'text-foreground' : 'text-muted-foreground'
+ }`}
+ >
+
+
+
+ {/* Volume */}
+
+ setVolume(volume > 0 ? 0 : 0.75)}
+ className="h-8 w-8 flex items-center justify-center hover:bg-muted rounded-sm"
+ >
+ {volume > 0 ? (
+
+ ) : (
+
+ )}
+
+ setVolume(Number(e.target.value) / 100)}
+ className="flex-1 h-1 appearance-none bg-muted rounded-full accent-foreground cursor-pointer [&::-webkit-slider-thumb]:appearance-none [&::-webkit-slider-thumb]:h-3 [&::-webkit-slider-thumb]:w-3 [&::-webkit-slider-thumb]:rounded-full [&::-webkit-slider-thumb]:bg-foreground"
+ />
+
+
+
+
+ );
+}
diff --git a/landing/src/components/Navbar.tsx b/landing/src/components/Navbar.tsx
new file mode 100644
index 00000000..c0b0b186
--- /dev/null
+++ b/landing/src/components/Navbar.tsx
@@ -0,0 +1,88 @@
+'use client';
+
+import { Github } from 'lucide-react';
+import Image from 'next/image';
+import { useEffect, useState } from 'react';
+import { GITHUB_REPO } from '@/lib/constants';
+
+function formatStarCount(count: number): string {
+ if (count >= 1000) {
+ const k = count / 1000;
+ return k % 1 === 0 ? `${k}k` : `${k.toFixed(1)}k`;
+ }
+ return count.toString();
+}
+
+export function Navbar() {
+ const [starCount, setStarCount] = useState(null);
+
+ useEffect(() => {
+ fetch('/api/stars')
+ .then((res) => {
+ if (!res.ok) throw new Error('Failed to fetch stars');
+ return res.json();
+ })
+ .then((data) => {
+ if (typeof data.count === 'number') setStarCount(data.count);
+ })
+ .catch((error) => {
+ console.error('Failed to fetch star count:', error);
+ });
+ }, []);
+
+ return (
+
+
+
+ );
+}
diff --git a/landing/src/components/VoiceCreator.tsx b/landing/src/components/VoiceCreator.tsx
new file mode 100644
index 00000000..86855967
--- /dev/null
+++ b/landing/src/components/VoiceCreator.tsx
@@ -0,0 +1,479 @@
+'use client';
+
+import { AnimatePresence, motion } from 'framer-motion';
+import { Mic, Monitor, Upload } from 'lucide-react';
+import { useEffect, useMemo, useState } from 'react';
+
+// ─── Waveform bars generator ────────────────────────────────────────────────
+
+function generateWaveformBars(count: number, seed: number): number[] {
+ const bars: number[] = [];
+ for (let i = 0; i < count; i++) {
+ const x = i / count;
+ // Speech-like envelope: ramp up, sustain, taper
+ const envelope = Math.sin(x * Math.PI) * 0.8 + 0.2;
+ // Layered pseudo-random noise
+ const n1 = Math.sin(seed * 127.1 + i * 43.7) * 0.5 + 0.5;
+ const n2 = Math.sin(seed * 269.5 + i * 17.3) * 0.3 + 0.5;
+ const n3 = Math.sin(seed * 53.9 + i * 97.1) * 0.2 + 0.5;
+ const noise = (n1 + n2 + n3) / 3;
+ bars.push(envelope * noise);
+ }
+ return bars;
+}
+
+// ─── Animated waveform background ───────────────────────────────────────────
+
+function WaveformBackground({ active }: { active: boolean }) {
+ const bars = useMemo(() => generateWaveformBars(60, 42), []);
+
+ return (
+
+
+ {bars.map((h, i) => {
+ const maxH = 120; // max bar height in px
+ const baseH = 4;
+ const activeH = baseH + h * maxH;
+ const idleH = baseH + h * maxH * 0.25;
+ return (
+
+ );
+ })}
+
+
+ );
+}
+
+// ─── Tab content panels ─────────────────────────────────────────────────────
+
+function UploadPanel() {
+ const [hasFile, setHasFile] = useState(false);
+
+ useEffect(() => {
+ // Simulate file drop after 2s
+ const t1 = setTimeout(() => setHasFile(true), 2000);
+ const t2 = setTimeout(() => setHasFile(false), 5000);
+ return () => {
+ clearTimeout(t1);
+ clearTimeout(t2);
+ };
+ }, []);
+
+ return (
+
+
+ {!hasFile ? (
+
+
+
+ Choose File
+
+
+ Drag and drop an audio file, or click to browse.
+
+ Maximum duration: 30 seconds.
+
+
+ ) : (
+
+
+
+ sample-voice-clip.wav
+
+
+
+ 0:04
+
+
+
+ Transcribe
+
+
+
+ )}
+
+
+ );
+}
+
+function RecordPanel() {
+ const [state, setState] = useState<'idle' | 'recording' | 'done'>('idle');
+ const [elapsed, setElapsed] = useState(0);
+
+ useEffect(() => {
+ const t1 = setTimeout(() => setState('recording'), 1500);
+ const t2 = setTimeout(() => setState('done'), 5500);
+ const t3 = setTimeout(() => {
+ setState('idle');
+ setElapsed(0);
+ }, 8000);
+ return () => {
+ clearTimeout(t1);
+ clearTimeout(t2);
+ clearTimeout(t3);
+ };
+ }, []);
+
+ // Timer
+ useEffect(() => {
+ if (state !== 'recording') return;
+ setElapsed(0);
+ const interval = setInterval(() => setElapsed((e) => e + 1), 1000);
+ return () => clearInterval(interval);
+ }, [state]);
+
+ const formatTime = (s: number) => `${Math.floor(s / 60)}:${(s % 60).toString().padStart(2, '0')}`;
+
+ return (
+
+
+
+
+ {state === 'idle' && (
+
+
+
+ Start Recording
+
+
+ Click to record from your microphone.
+
+ Maximum duration: 30 seconds.
+
+
+ )}
+
+ {state === 'recording' && (
+
+
+
+
{formatTime(elapsed)}
+
+
+ {formatTime(30 - elapsed)} remaining
+
+ )}
+
+ {state === 'done' && (
+
+
+
+ Recording complete
+
+
+
+ 0:04
+
+
+
+ Transcribe
+
+
+
+ )}
+
+
+ );
+}
+
+function SystemPanel() {
+ const [state, setState] = useState<'idle' | 'capturing' | 'done'>('idle');
+ const [elapsed, setElapsed] = useState(0);
+
+ useEffect(() => {
+ const t1 = setTimeout(() => setState('capturing'), 1500);
+ const t2 = setTimeout(() => setState('done'), 5500);
+ const t3 = setTimeout(() => {
+ setState('idle');
+ setElapsed(0);
+ }, 8000);
+ return () => {
+ clearTimeout(t1);
+ clearTimeout(t2);
+ clearTimeout(t3);
+ };
+ }, []);
+
+ useEffect(() => {
+ if (state !== 'capturing') return;
+ setElapsed(0);
+ const interval = setInterval(() => setElapsed((e) => e + 1), 1000);
+ return () => clearInterval(interval);
+ }, [state]);
+
+ const formatTime = (s: number) => `${Math.floor(s / 60)}:${(s % 60).toString().padStart(2, '0')}`;
+
+ return (
+
+
+
+
+ {state === 'idle' && (
+
+
+
+ Start Capture
+
+
+ Capture audio playing on your system.
+
+ Maximum duration: 30 seconds.
+
+
+ )}
+
+ {state === 'capturing' && (
+
+
+
+
{formatTime(elapsed)}
+
+
+ {formatTime(30 - elapsed)} remaining
+
+ )}
+
+ {state === 'done' && (
+
+
+
+ Capture complete
+
+
+
+ 0:04
+
+
+
+ Transcribe
+
+
+
+ )}
+
+
+ );
+}
+
+// ─── Tab selector ───────────────────────────────────────────────────────────
+
+const TABS = [
+ { id: 'upload' as const, label: 'Upload', icon: Upload },
+ { id: 'record' as const, label: 'Microphone', icon: Mic },
+ { id: 'system' as const, label: 'System Audio', icon: Monitor },
+];
+
+type TabId = (typeof TABS)[number]['id'];
+
+// ─── Main section ───────────────────────────────────────────────────────────
+
+export function VoiceCreator() {
+ const [activeTab, setActiveTab] = useState('record');
+ const [cycleKey, setCycleKey] = useState(0);
+
+ // Auto-cycle tabs
+ useEffect(() => {
+ const tabOrder: TabId[] = ['record', 'upload', 'system'];
+ let idx = tabOrder.indexOf(activeTab);
+
+ const interval = setInterval(() => {
+ idx = (idx + 1) % tabOrder.length;
+ setActiveTab(tabOrder[idx]);
+ setCycleKey((k) => k + 1);
+ }, 9000);
+
+ return () => clearInterval(interval);
+ }, [activeTab]);
+
+ return (
+
+
+
+ {/* Left: Copy */}
+
+
+ Clone any voice in seconds
+
+
+ Three ways to capture a voice sample. Upload a clip, record from your microphone, or
+ capture audio playing on your system. Voicebox clones the voice from as little as 3
+ seconds of audio.
+
+
+
+
+
+
+
+
Upload a clip
+
+ Drag and drop any audio file — WAV, MP3, FLAC, or WebM.
+
+
+
+
+
+
+
+
+
Record from microphone
+
+ Live waveform preview while you record. Up to 30 seconds.
+
+
+
+
+
+
+
+
+
System audio capture
+
+ Clone a voice from a YouTube video, podcast, or any app playing audio.
+
+
+
+
+
+
+ {/* Right: Animated UI mock */}
+
+
+ {/* Tab bar */}
+
+ {TABS.map((tab) => {
+ const Icon = tab.icon;
+ const isActive = activeTab === tab.id;
+ return (
+ {
+ setActiveTab(tab.id);
+ setCycleKey((k) => k + 1);
+ }}
+ className={`flex-1 flex items-center justify-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium transition-colors ${
+ isActive
+ ? 'bg-background text-foreground shadow-sm'
+ : 'text-muted-foreground hover:text-foreground'
+ }`}
+ >
+
+ {tab.label}
+
+ );
+ })}
+
+
+ {/* Panel */}
+
+
+ {activeTab === 'upload' && }
+ {activeTab === 'record' && }
+ {activeTab === 'system' && }
+
+
+
+
+
+
+
+ );
+}
diff --git a/landing/src/lib/releases.ts b/landing/src/lib/releases.ts
index ad19c548..3f985d31 100644
--- a/landing/src/lib/releases.ts
+++ b/landing/src/lib/releases.ts
@@ -19,6 +19,10 @@ let cachedReleaseInfo: ReleaseInfo | null = null;
let cacheTimestamp: number = 0;
const CACHE_DURATION = 1000 * 60 * 10; // 10 minutes
+// Cache for star count
+let cachedStarCount: number | null = null;
+let starCacheTimestamp: number = 0;
+
/**
* Fetches the latest release from GitHub and extracts download links
*/
@@ -92,3 +96,38 @@ export async function getLatestRelease(): Promise {
throw error;
}
}
+
+/**
+ * Fetches the star count for the repo from GitHub
+ */
+export async function getStarCount(): Promise {
+ const now = Date.now();
+ if (cachedStarCount !== null && now - starCacheTimestamp < CACHE_DURATION) {
+ return cachedStarCount;
+ }
+
+ try {
+ const response = await fetch(`${GITHUB_API_BASE}/repos/${GITHUB_REPO}`, {
+ next: { revalidate: 600 },
+ headers: {
+ Accept: 'application/vnd.github.v3+json',
+ },
+ });
+
+ if (!response.ok) {
+ throw new Error(`GitHub API error: ${response.status}`);
+ }
+
+ const repo = await response.json();
+ const count = repo.stargazers_count ?? 0;
+
+ cachedStarCount = count;
+ starCacheTimestamp = now;
+
+ return count;
+ } catch (error) {
+ console.error('Failed to fetch star count:', error);
+ if (cachedStarCount !== null) return cachedStarCount;
+ throw error;
+ }
+}
diff --git a/landing/tailwind.config.js b/landing/tailwind.config.js
index 0a06b883..274291d1 100644
--- a/landing/tailwind.config.js
+++ b/landing/tailwind.config.js
@@ -8,6 +8,9 @@ module.exports = {
],
theme: {
extend: {
+ fontFamily: {
+ sans: ['var(--font-sans)', 'system-ui', 'sans-serif'],
+ },
colors: {
border: 'hsl(var(--border))',
input: 'hsl(var(--input))',
@@ -33,6 +36,9 @@ module.exports = {
accent: {
DEFAULT: 'hsl(var(--accent))',
foreground: 'hsl(var(--accent-foreground))',
+ faint: 'hsl(var(--accent-faint))',
+ deep: 'hsl(var(--accent-deep))',
+ glow: 'hsl(var(--accent-glow))',
},
popover: {
DEFAULT: 'hsl(var(--popover))',
@@ -42,6 +48,27 @@ module.exports = {
DEFAULT: 'hsl(var(--card))',
foreground: 'hsl(var(--card-foreground))',
},
+ // App surface tokens
+ app: {
+ DEFAULT: 'hsl(var(--app))',
+ box: 'hsl(var(--app-box))',
+ darkBox: 'hsl(var(--app-dark-box))',
+ darkerBox: 'hsl(var(--app-darker-box))',
+ lightBox: 'hsl(var(--app-light-box))',
+ line: 'hsl(var(--app-line))',
+ button: 'hsl(var(--app-button))',
+ hover: 'hsl(var(--app-hover))',
+ selected: 'hsl(var(--app-selected))',
+ },
+ ink: {
+ DEFAULT: 'hsl(var(--ink))',
+ dull: 'hsl(var(--ink-dull))',
+ faint: 'hsl(var(--ink-faint))',
+ },
+ sidebar: {
+ DEFAULT: 'hsl(var(--sidebar))',
+ line: 'hsl(var(--sidebar-line))',
+ },
},
borderRadius: {
lg: 'var(--radius)',
diff --git a/tauri/src-tauri/gen/Assets.car b/tauri/src-tauri/gen/Assets.car
index 6f92edd0..8f321854 100644
Binary files a/tauri/src-tauri/gen/Assets.car and b/tauri/src-tauri/gen/Assets.car differ
diff --git a/tauri/src-tauri/gen/voicebox.icns b/tauri/src-tauri/gen/voicebox.icns
index 59661d99..e4492f52 100644
Binary files a/tauri/src-tauri/gen/voicebox.icns and b/tauri/src-tauri/gen/voicebox.icns differ