diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 055a0cbb..adcfbcc5 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.3.0 +current_version = 0.3.1 commit = True tag = True tag_name = v{new_version} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d00df528..1312ca37 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -200,33 +200,37 @@ jobs: run: | python -c "import torch; print(f'CUDA available in build: {torch.cuda.is_available()}'); print(f'CUDA version: {torch.version.cuda}')" - - name: Build CUDA server binary + - name: Build CUDA server binary (onedir) shell: bash working-directory: backend run: python build_binary.py --cuda - - name: Split binary for GitHub Releases + - name: Package into server core + CUDA libs archives shell: bash run: | - python scripts/split_binary.py \ - backend/dist/voicebox-server-cuda.exe \ - --output release-assets/ + python scripts/package_cuda.py \ + backend/dist/voicebox-server-cuda/ \ + --output release-assets/ \ + --cuda-libs-version cu126-v1 \ + --torch-compat ">=2.6.0,<2.11.0" - - name: Upload split parts to GitHub Release + - name: Upload archives to GitHub Release if: startsWith(github.ref, 'refs/tags/') - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 with: files: | - release-assets/voicebox-server-cuda.part*.exe - release-assets/voicebox-server-cuda.sha256 - release-assets/voicebox-server-cuda.manifest + release-assets/voicebox-server-cuda.tar.gz + release-assets/voicebox-server-cuda.tar.gz.sha256 + release-assets/cuda-libs-cu126-v1.tar.gz + release-assets/cuda-libs-cu126-v1.tar.gz.sha256 + release-assets/cuda-libs.json draft: true env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Upload binary as workflow artifact + - name: Upload onedir as workflow artifact uses: actions/upload-artifact@v4 with: name: voicebox-server-cuda-windows - path: backend/dist/voicebox-server-cuda.exe + path: backend/dist/voicebox-server-cuda/ retention-days: 7 diff --git a/.gitignore b/.gitignore index e9755251..130a7aa1 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,12 @@ tauri/src-tauri/gen/Assets.car tauri/src-tauri/gen/voicebox.icns tauri/src-tauri/gen/partial.plist +# PyInstaller +*.spec + +# Windows artifacts +nul + # Temporary tmp/ temp/ diff --git a/app/package.json b/app/package.json index 55a182e5..0ed030fb 100644 --- a/app/package.json +++ b/app/package.json @@ -1,6 +1,6 @@ { "name": "@voicebox/app", - "version": "0.3.0", + "version": "0.3.1", "private": true, "type": "module", "scripts": { diff --git a/backend/__init__.py b/backend/__init__.py index 108ff87a..1a7dfe5d 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -1,3 +1,3 @@ # Backend package -__version__ = "0.3.0" +__version__ = "0.3.1" diff --git a/backend/build_binary.py b/backend/build_binary.py index 0574894f..c2b4b510 100644 --- a/backend/build_binary.py +++ b/backend/build_binary.py @@ -34,9 +34,15 @@ def build_server(cuda=False): binary_name = "voicebox-server-cuda" if cuda else "voicebox-server" # PyInstaller arguments + # CUDA builds use --onedir so we can split the output into two archives: + # 1. Server core (~200-400MB) — versioned with the app + # 2. CUDA libs (~2GB) — versioned independently (only redownloaded on + # CUDA toolkit / torch major version changes) + # CPU builds remain --onefile for simplicity. + pack_mode = "--onedir" if cuda else "--onefile" args = [ "server.py", # Use server.py as entry point instead of main.py - "--onefile", + pack_mode, "--name", binary_name, ] diff --git a/backend/services/cuda.py b/backend/services/cuda.py index 7e4d9602..6e5420f3 100644 --- a/backend/services/cuda.py +++ b/backend/services/cuda.py @@ -1,16 +1,22 @@ """ -CUDA backend binary download, assembly, and verification. +CUDA backend download, assembly, and verification. -Downloads split parts of the CUDA-enabled voicebox-server binary from -GitHub Releases, reassembles them, verifies integrity via SHA-256, -and places the binary in the app's data directory for use on next -backend restart. +Downloads two archives from GitHub Releases: + 1. Server core (voicebox-server-cuda.tar.gz) — the exe + non-NVIDIA deps, + versioned with the app. + 2. CUDA libs (cuda-libs-{version}.tar.gz) — NVIDIA runtime libraries, + versioned independently (only redownloaded on CUDA toolkit bump). + +Both archives are extracted into {data_dir}/backends/cuda/ which forms the +complete PyInstaller --onedir directory structure that torch expects. """ import hashlib +import json import logging import os import sys +import tarfile from pathlib import Path from typing import Optional @@ -24,6 +30,10 @@ PROGRESS_KEY = "cuda-backend" +# The current expected CUDA libs version. Bump this when we change the +# CUDA toolkit version or torch's CUDA dependency changes (e.g. cu126 -> cu128). +CUDA_LIBS_VERSION = "cu126-v1" + def get_backends_dir() -> Path: """Directory where downloaded backend binaries are stored.""" @@ -32,21 +42,46 @@ def get_backends_dir() -> Path: return d -def get_cuda_binary_name() -> str: - """Platform-specific CUDA binary filename.""" +def get_cuda_dir() -> Path: + """Directory where the CUDA backend (onedir) is extracted.""" + d = get_backends_dir() / "cuda" + d.mkdir(parents=True, exist_ok=True) + return d + + +def get_cuda_exe_name() -> str: + """Platform-specific CUDA executable filename.""" if sys.platform == "win32": return "voicebox-server-cuda.exe" return "voicebox-server-cuda" def get_cuda_binary_path() -> Optional[Path]: - """Return path to CUDA binary if it exists.""" - p = get_backends_dir() / get_cuda_binary_name() + """Return path to the CUDA executable if it exists inside the onedir.""" + p = get_cuda_dir() / get_cuda_exe_name() if p.exists(): return p return None +def get_cuda_libs_manifest_path() -> Path: + """Path to the cuda-libs.json manifest inside the CUDA dir.""" + return get_cuda_dir() / "cuda-libs.json" + + +def get_installed_cuda_libs_version() -> Optional[str]: + """Read the installed CUDA libs version from cuda-libs.json, or None.""" + manifest_path = get_cuda_libs_manifest_path() + if not manifest_path.exists(): + return None + try: + data = json.loads(manifest_path.read_text()) + return data.get("version") + except Exception as e: + logger.warning(f"Could not read cuda-libs.json: {e}") + return None + + def is_cuda_active() -> bool: """Check if the current process is the CUDA binary. @@ -60,25 +95,151 @@ def get_cuda_status() -> dict: progress_manager = get_progress_manager() cuda_path = get_cuda_binary_path() progress = progress_manager.get_progress(PROGRESS_KEY) + cuda_libs_version = get_installed_cuda_libs_version() return { "available": cuda_path is not None, "active": is_cuda_active(), "binary_path": str(cuda_path) if cuda_path else None, + "cuda_libs_version": cuda_libs_version, "downloading": progress is not None and progress.get("status") == "downloading", "download_progress": progress, } +def _needs_server_download(version: Optional[str] = None) -> bool: + """Check if the server core archive needs to be (re)downloaded.""" + cuda_path = get_cuda_binary_path() + if not cuda_path: + return True + # Check if the binary version matches the expected app version + installed = get_cuda_binary_version() + expected = version or __version__ + if expected.startswith("v"): + expected = expected[1:] + return installed != expected + + +def _needs_cuda_libs_download() -> bool: + """Check if the CUDA libs archive needs to be (re)downloaded.""" + installed = get_installed_cuda_libs_version() + if installed is None: + return True + return installed != CUDA_LIBS_VERSION + + +async def _download_and_extract_archive( + client, + url: str, + sha256_url: Optional[str], + dest_dir: Path, + label: str, + progress_offset: int, + total_size: int, +): + """Download a .tar.gz archive and extract it into dest_dir. + + Args: + client: httpx.AsyncClient + url: URL of the .tar.gz archive + sha256_url: URL of the .sha256 checksum file (optional) + dest_dir: Directory to extract into + label: Human-readable label for progress updates + progress_offset: Byte offset for progress reporting (when downloading + multiple archives sequentially) + total_size: Total bytes across all downloads (for progress bar) + """ + progress = get_progress_manager() + temp_path = dest_dir / f".download-{label.replace(' ', '-')}.tmp" + + # Clean up leftover partial download + if temp_path.exists(): + temp_path.unlink() + + # Fetch expected checksum (fail-fast: never extract an unverified archive) + expected_sha = None + if sha256_url: + try: + sha_resp = await client.get(sha256_url) + sha_resp.raise_for_status() + expected_sha = sha_resp.text.strip().split()[0] + logger.info(f"{label}: expected SHA-256: {expected_sha[:16]}...") + except Exception as e: + raise RuntimeError(f"{label}: failed to fetch checksum from {sha256_url}") from e + + # Stream download, verify, and extract — always clean up temp file + downloaded = 0 + try: + async with client.stream("GET", url) as response: + response.raise_for_status() + with open(temp_path, "wb") as f: + async for chunk in response.aiter_bytes(chunk_size=1024 * 1024): + f.write(chunk) + downloaded += len(chunk) + progress.update_progress( + PROGRESS_KEY, + current=progress_offset + downloaded, + total=total_size, + filename=f"Downloading {label}", + status="downloading", + ) + + # Verify integrity + if expected_sha: + progress.update_progress( + PROGRESS_KEY, + current=progress_offset + downloaded, + total=total_size, + filename=f"Verifying {label}...", + status="downloading", + ) + sha256 = hashlib.sha256() + with open(temp_path, "rb") as f: + while True: + data = f.read(1024 * 1024) + if not data: + break + sha256.update(data) + actual = sha256.hexdigest() + if actual != expected_sha: + raise ValueError( + f"{label} integrity check failed: expected {expected_sha[:16]}..., got {actual[:16]}..." + ) + logger.info(f"{label}: integrity verified") + + # Extract (use data filter for path traversal protection on Python 3.12+) + progress.update_progress( + PROGRESS_KEY, + current=progress_offset + downloaded, + total=total_size, + filename=f"Extracting {label}...", + status="downloading", + ) + with tarfile.open(temp_path, "r:gz") as tar: + if sys.version_info >= (3, 12): + tar.extractall(path=dest_dir, filter="data") + else: + tar.extractall(path=dest_dir) + + logger.info(f"{label}: extracted to {dest_dir}") + finally: + if temp_path.exists(): + temp_path.unlink() + return downloaded + + async def download_cuda_binary(version: Optional[str] = None): - """Download the CUDA backend binary from GitHub Releases. + """Download the CUDA backend (server core + CUDA libs if needed). + + Downloads both archives from GitHub Releases, extracts them into + {data_dir}/backends/cuda/, and writes the cuda-libs.json manifest. - Downloads split parts listed in a manifest file, concatenates them, - and verifies the SHA-256 checksum for integrity. Atomic write - (temp file -> rename). + Only downloads what's needed: + - Server core: always redownloaded (versioned with app) + - CUDA libs: only if missing or version mismatch Args: - version: Version tag (e.g. "v0.2.0"). Defaults to current app version. + version: Version tag (e.g. "v0.3.0"). Defaults to current app version. """ import httpx @@ -86,114 +247,91 @@ async def download_cuda_binary(version: Optional[str] = None): version = f"v{__version__}" progress = get_progress_manager() - binary_name = get_cuda_binary_name() - dest_dir = get_backends_dir() - final_path = dest_dir / binary_name - temp_path = dest_dir / f"{binary_name}.download" + cuda_dir = get_cuda_dir() - # Clean up any leftover partial download - if temp_path.exists(): - temp_path.unlink() + need_server = _needs_server_download(version) + need_libs = _needs_cuda_libs_download() - logger.info(f"Starting CUDA backend download for {version}") + if not need_server and not need_libs: + logger.info("CUDA backend is up to date, nothing to download") + return + + logger.info( + f"Starting CUDA backend download for {version} " + f"(server={'yes' if need_server else 'cached'}, " + f"libs={'yes' if need_libs else 'cached'})" + ) progress.update_progress( - PROGRESS_KEY, current=0, total=0, - filename="Fetching manifest...", status="downloading", + PROGRESS_KEY, + current=0, + total=0, + filename="Preparing download...", + status="downloading", ) base_url = f"{GITHUB_RELEASES_URL}/{version}" - stem = Path(binary_name).stem # voicebox-server-cuda + server_archive = "voicebox-server-cuda.tar.gz" + libs_archive = f"cuda-libs-{CUDA_LIBS_VERSION}.tar.gz" try: async with httpx.AsyncClient(follow_redirects=True, timeout=30.0) as client: - # Fetch the manifest (list of split part filenames) - manifest_url = f"{base_url}/{stem}.manifest" - manifest_resp = await client.get(manifest_url) - manifest_resp.raise_for_status() - parts = [p.strip() for p in manifest_resp.text.strip().splitlines() if p.strip()] - - if not parts: - raise ValueError("Empty manifest — no split parts found") - - logger.info(f"Found {len(parts)} split parts to download") - - # Fetch expected checksum (optional — for integrity verification) - expected_sha = None - try: - sha_url = f"{base_url}/{stem}.sha256" - sha_resp = await client.get(sha_url) - if sha_resp.status_code == 200: - # Format: "sha256hex filename\n" - expected_sha = sha_resp.text.strip().split()[0] - logger.info(f"Expected SHA-256: {expected_sha[:16]}...") - except Exception as e: - logger.warning(f"Could not fetch checksum file — skipping verification: {e}") - - # Get total size across all parts by issuing HEAD requests + # Estimate total download size total_size = 0 - for part_name in parts: + if need_server: try: - head_resp = await client.head(f"{base_url}/{part_name}") - content_length = int(head_resp.headers.get("content-length", 0)) - total_size += content_length + head = await client.head(f"{base_url}/{server_archive}") + total_size += int(head.headers.get("content-length", 0)) + except Exception: + pass + if need_libs: + try: + head = await client.head(f"{base_url}/{libs_archive}") + total_size += int(head.headers.get("content-length", 0)) except Exception: pass - logger.info(f"Total download size: {total_size / 1024 / 1024:.1f} MB") - # Download and concatenate parts - total_downloaded = 0 - with open(temp_path, "wb") as f: - for i, part_name in enumerate(parts): - part_url = f"{base_url}/{part_name}" - logger.info(f"Downloading part {i + 1}/{len(parts)}: {part_name}") - - async with client.stream("GET", part_url) as response: - response.raise_for_status() - async for chunk in response.aiter_bytes(chunk_size=1024 * 1024): - f.write(chunk) - total_downloaded += len(chunk) - progress.update_progress( - PROGRESS_KEY, current=total_downloaded, total=total_size, - filename=f"Downloading CUDA backend ({i + 1}/{len(parts)})", - status="downloading", - ) - - # Verify integrity if checksum was available - if expected_sha: - progress.update_progress( - PROGRESS_KEY, current=total_downloaded, total=total_downloaded, - filename="Verifying integrity...", status="downloading", - ) - sha256 = hashlib.sha256() - with open(temp_path, "rb") as f: - while True: - chunk = f.read(1024 * 1024) - if not chunk: - break - sha256.update(chunk) + logger.info(f"Total download size: {total_size / 1024 / 1024:.1f} MB") - actual = sha256.hexdigest() - if actual != expected_sha: - raise ValueError( - f"Integrity check failed: expected {expected_sha[:16]}..., " - f"got {actual[:16]}..." + offset = 0 + + # Download server core + if need_server: + server_downloaded = await _download_and_extract_archive( + client, + url=f"{base_url}/{server_archive}", + sha256_url=f"{base_url}/{server_archive}.sha256", + dest_dir=cuda_dir, + label="CUDA server", + progress_offset=offset, + total_size=total_size, + ) + offset += server_downloaded + + # Make executable on Unix + exe_path = cuda_dir / get_cuda_exe_name() + if sys.platform != "win32" and exe_path.exists(): + exe_path.chmod(0o755) + + # Download CUDA libs + if need_libs: + await _download_and_extract_archive( + client, + url=f"{base_url}/{libs_archive}", + sha256_url=f"{base_url}/{libs_archive}.sha256", + dest_dir=cuda_dir, + label="CUDA libraries", + progress_offset=offset, + total_size=total_size, ) - logger.info(f"Integrity verified: {actual[:16]}...") - - # Atomic move into place (replace handles existing target on all platforms) - temp_path.replace(final_path) - # Make executable on Unix - if sys.platform != "win32": - final_path.chmod(0o755) + # Write local cuda-libs.json manifest + manifest = {"version": CUDA_LIBS_VERSION} + get_cuda_libs_manifest_path().write_text(json.dumps(manifest, indent=2) + "\n") - logger.info(f"CUDA backend downloaded to {final_path}") + logger.info(f"CUDA backend ready at {cuda_dir}") progress.mark_complete(PROGRESS_KEY) except Exception as e: - # Clean up on failure - if temp_path.exists(): - temp_path.unlink() logger.error(f"CUDA backend download failed: {e}") progress.mark_error(PROGRESS_KEY, str(e)) raise @@ -202,15 +340,19 @@ async def download_cuda_binary(version: Optional[str] = None): def get_cuda_binary_version() -> Optional[str]: """Get the version of the installed CUDA binary, or None if not installed.""" import subprocess + cuda_path = get_cuda_binary_path() if not cuda_path: return None try: result = subprocess.run( [str(cuda_path), "--version"], - capture_output=True, text=True, timeout=30, + capture_output=True, + text=True, + timeout=30, + cwd=str(cuda_path.parent), # Run from the onedir directory ) - # Output format: "voicebox-server 0.2.0" + # Output format: "voicebox-server 0.3.0" for line in result.stdout.strip().splitlines(): if "voicebox-server" in line: return line.split()[-1] @@ -222,26 +364,29 @@ def get_cuda_binary_version() -> Optional[str]: async def check_and_update_cuda_binary(): """Check if the CUDA binary is outdated and auto-download if so. - Called on server startup. If a CUDA binary exists but its version - doesn't match the current app version, triggers a background download - of the updated CUDA binary. The download progress is visible to the - frontend via the existing SSE progress endpoint. + Called on server startup. Checks both server version and CUDA libs + version. Downloads only what's needed. """ cuda_path = get_cuda_binary_path() if not cuda_path: return # No CUDA binary installed, nothing to update - cuda_version = get_cuda_binary_version() - current_version = __version__ + need_server = _needs_server_download() + need_libs = _needs_cuda_libs_download() - if cuda_version == current_version: - logger.info(f"CUDA binary is up to date (v{current_version})") + if not need_server and not need_libs: + logger.info(f"CUDA binary is up to date (server=v{__version__}, libs={get_installed_cuda_libs_version()})") return - logger.info( - f"CUDA binary version mismatch: binary=v{cuda_version}, app=v{current_version}. " - f"Auto-downloading updated CUDA backend..." - ) + reasons = [] + if need_server: + cuda_version = get_cuda_binary_version() + reasons.append(f"server v{cuda_version} != v{__version__}") + if need_libs: + installed_libs = get_installed_cuda_libs_version() + reasons.append(f"libs {installed_libs} != {CUDA_LIBS_VERSION}") + + logger.info(f"CUDA backend needs update ({', '.join(reasons)}). Auto-downloading...") try: await download_cuda_binary() @@ -250,10 +395,12 @@ async def check_and_update_cuda_binary(): async def delete_cuda_binary() -> bool: - """Delete the downloaded CUDA binary. Returns True if deleted.""" - path = get_cuda_binary_path() - if path and path.exists(): - path.unlink() - logger.info(f"Deleted CUDA binary: {path}") + """Delete the downloaded CUDA backend directory. Returns True if deleted.""" + import shutil + + cuda_dir = get_cuda_dir() + if cuda_dir.exists() and any(cuda_dir.iterdir()): + shutil.rmtree(cuda_dir) + logger.info(f"Deleted CUDA backend directory: {cuda_dir}") return True return False diff --git a/docs/content/docs/developer/building.mdx b/docs/content/docs/developer/building.mdx index 0b3593ad..5ea6c747 100644 --- a/docs/content/docs/developer/building.mdx +++ b/docs/content/docs/developer/building.mdx @@ -159,12 +159,14 @@ Tauri looks for `voicebox-server-${PLATFORM}` in `src-tauri/binaries/` and bundl The `build-cuda-windows` job runs separately: -1. Install PyTorch with CUDA 12.1 -2. Build with `build_binary.py --cuda` -3. Split binary with `scripts/split_binary.py` -4. Upload parts as release artifacts - -This binary is downloaded on-demand by users who enable CUDA in settings. +1. Install PyTorch with CUDA 12.6 +2. Build with `build_binary.py --cuda` (produces `--onedir` output) +3. Package with `scripts/package_cuda.py` into two archives: + - `voicebox-server-cuda.tar.gz` — server core (~945 MB) + - `cuda-libs-cu126-v1.tar.gz` — NVIDIA runtime libraries (~1.7 GB, cached independently) +4. Upload archives as release artifacts + +This binary is downloaded on-demand by users who enable CUDA in settings. The CUDA libs archive is only re-downloaded when the CUDA toolkit version changes, not on every app update. ## Troubleshooting diff --git a/docs/content/docs/developer/tts-engines.mdx b/docs/content/docs/developer/tts-engines.mdx index 90135a37..dc749b02 100644 --- a/docs/content/docs/developer/tts-engines.mdx +++ b/docs/content/docs/developer/tts-engines.mdx @@ -80,6 +80,15 @@ grep -r 'token=True\|token=os.getenv' . # Float64/Float32 assumptions — librosa returns float64, many models assume float32 grep -r "torch.from_numpy\|\.double()\|float64" . + +# @torch.jit.script — calls inspect.getsource(), crashes in frozen builds +grep -r "@torch.jit.script\|torch.jit.script" . + +# torchaudio.load — requires torchcodec in torchaudio 2.10+, use soundfile.read() instead +grep -r "torchaudio.load\|torchaudio.save" . + +# Gated HuggingFace repos — models that hardcode gated repos as tokenizer/config sources +grep -r "from_pretrained\|tokenizer_name\|AutoTokenizer" . | grep -i "llama\|meta-llama\|gated" ``` ### 0.3 Install and Trace in a Throwaway Venv @@ -270,6 +279,8 @@ In `app/src/lib/hooks/useGenerationForm.ts`: - Add engine-to-model-name mapping - Update payload construction for engine-specific fields +**Watch out for model naming inconsistencies.** The HuggingFace repo name, the model size label, and the API model name don't always follow predictable patterns. For example, TADA's 3B model is named `tada-3b-ml` (not `tada-3b`), because it's a multilingual variant. Always check the actual repo names and build the frontend model name mapping from those, not from assumptions like `{engine}-{size}`. + ### 3.5 Model Management In `app/src/components/ServerSettings/ModelManagement.tsx`: @@ -391,6 +402,7 @@ These are actual production failures from shipping new engines. Every one of the | Chatterbox | `FileNotFoundError` for watermark model | `perth` ships pretrained model files (`hparams.yaml`, `.pth.tar`) that PyInstaller doesn't bundle by default | `--collect-all perth` | | All engines | `importlib.metadata` failures | Frozen binary doesn't include package metadata for `huggingface-hub`, `transformers`, etc. | `--copy-metadata` for each affected package | | All engines | Download progress bars stuck at 0% | `huggingface_hub` silently disables tqdm progress bars based on logger level in frozen builds — our progress tracker never receives byte updates | Force-enable tqdm's internal counter in `HFProgressTracker` | +| TADA | `inspect.getsource` error in DAC's `Snake1d` | `@torch.jit.script` calls `inspect.getsource()` which fails without `.py` source files | Wrote a lightweight shim (`dac_shim.py`) reimplementing `Snake1d` without `@torch.jit.script`, registered fake `dac.*` modules in `sys.modules` | | All engines | `NameError: name 'obj' is not defined` on macOS | Python 3.12.0 has a [CPython bug](https://github.com/pyinstaller/pyinstaller/issues/7992) that corrupts bytecode when PyInstaller rewrites code objects | Upgrade to Python 3.12.13+ | | All engines | `resource_tracker` subprocess crash | `multiprocessing` in frozen binaries needs `freeze_support()` called before anything else | Added to `server.py` entry point | @@ -480,6 +492,90 @@ def _get_device(self): return "cpu" # Skip MPS ``` +### Gated HuggingFace repos as hardcoded config sources + +Some models hardcode a gated HuggingFace repo as their tokenizer or config source (e.g., TADA hardcodes `"meta-llama/Llama-3.2-1B"` in both its `AlignerConfig` and `TadaConfig`). This silently fails without HF authentication. + +**Fix:** Download from an ungated mirror and patch the config objects directly: + +```python +# Download tokenizer from ungated mirror +UNGATED_TOKENIZER = "unsloth/Llama-3.2-1B" +tokenizer_path = snapshot_download(UNGATED_TOKENIZER, token=None) + +# Patch the model config to use the local path instead of the gated repo +config = ModelConfig.from_pretrained(model_path) +config.tokenizer_name = tokenizer_path +model = ModelClass.from_pretrained(model_path, config=config) +``` + +**Do NOT monkey-patch `AutoTokenizer.from_pretrained`** — it's a classmethod, and replacing it corrupts the descriptor, which breaks other engines that use different tokenizers (e.g., Qwen uses a Qwen tokenizer via `AutoTokenizer`). Always patch at the config level, not the class method level. + +### `torchaudio.load()` requires `torchcodec` in 2.10+ + +As of `torchaudio>=2.10`, `torchaudio.load()` requires the `torchcodec` package for audio I/O. If your engine or backend code uses `torchaudio.load()`, replace it with `soundfile`: + +```python +# Before (breaks without torchcodec): +import torchaudio +waveform, sr = torchaudio.load("audio.wav") + +# After: +import soundfile as sf +import torch +data, sr = sf.read("audio.wav", dtype="float32") +waveform = torch.from_numpy(data).unsqueeze(0) +``` + +Note: `torchaudio.functional.resample()` and other pure-PyTorch math functions work fine without `torchcodec` — only the I/O functions are affected. + +### `@torch.jit.script` breaks in frozen builds + +`torch.jit.script` calls `inspect.getsource()` to parse the decorated function's source code. In a PyInstaller binary, `.py` source files aren't available, so this crashes at import time. + +**Fix:** Remove or avoid `@torch.jit.script` decorators. If the decorated function comes from an upstream dependency, write a shim that reimplements the function without the decorator (see "Toxic dependency chains" below). + +### Toxic dependency chains — the shim pattern + +Sometimes a model library depends on a package with a massive, hostile transitive dependency tree, but only uses a tiny piece of it. When the dependency chain is unbuildable or would pull in dozens of unwanted packages, the right move is to write a lightweight shim. + +**Example:** TADA depends on `descript-audio-codec` (DAC), which pulls in `descript-audiotools` -> `onnx`, `tensorboard`, `protobuf`, `matplotlib`, `pystoi`, etc. The `onnx` package fails to build from source on macOS. But TADA only uses `Snake1d` from DAC — a 7-line PyTorch module. + +**Solution:** Create a shim at `backend/utils/dac_shim.py` that registers fake modules in `sys.modules`: + +```python +import sys +import types +import torch +from torch import nn + +def snake(x, alpha): + """Snake activation — reimplemented without @torch.jit.script.""" + return x + (1.0 / (alpha + 1e-9)) * torch.sin(alpha * x).pow(2) + +class Snake1d(nn.Module): + def __init__(self, channels): + super().__init__() + self.alpha = nn.Parameter(torch.ones(1, channels, 1)) + def forward(self, x): + return snake(x, self.alpha) + +# Register fake dac.* modules so "from dac.nn.layers import Snake1d" works +_nn = types.ModuleType("dac.nn") +_layers = types.ModuleType("dac.nn.layers") +_layers.Snake1d = Snake1d +_nn.layers = _layers + +for name, mod in [("dac", types.ModuleType("dac")), + ("dac.nn", _nn), ("dac.nn.layers", _layers)]: + sys.modules[name] = mod +``` + +**Key rules for shims:** +- Import the shim **before** importing the model library (so it finds the fake modules first) +- Do NOT use `@torch.jit.script` in the shim (see above) +- Only reimplement what the model actually uses — check the import chain carefully + ## Upcoming Engines Based on the current model landscape, these are candidates for future integration: @@ -490,7 +586,6 @@ Based on the current model landscape, these are candidates for future integratio | **Fish Speech** | 50+ | Medium | Word-level control via inline text | Ready | | **Kokoro-82M** | English | 82M | CPU realtime, Apache 2.0 | Ready | | **XTTS-v2** | 17+ | Medium | Zero-shot cloning | Ready | -| **HumeAI TADA** | EN (1B), Multi (3B) | Medium | 700s+ coherent audio, synced transcripts | Shipped | | **MOSS-TTS** | Multilingual | Medium | Text-to-voice design, multi-speaker dialogue | Needs vetting | | **Pocket TTS** | English | ~100M | CPU-first, >1× realtime | Needs vetting | @@ -508,6 +603,10 @@ Use this as a gate between phases. Do not proceed to the next phase until every - [ ] Searched for `torch.load` calls missing `map_location` - [ ] Searched for `torch.from_numpy` without `.float()` cast - [ ] Searched for `token=True` or `token=os.getenv("HF_TOKEN")` in HuggingFace calls +- [ ] Searched for `@torch.jit.script` / `torch.jit.script` (crashes in frozen builds) +- [ ] Searched for `torchaudio.load` / `torchaudio.save` (requires `torchcodec` in 2.10+) +- [ ] Searched for hardcoded gated HuggingFace repo names (e.g., `meta-llama/*`) +- [ ] Evaluated whether any dependency is used minimally enough to shim instead of install - [ ] Tested model loading and generation on CPU in a throwaway venv - [ ] Tested with a clean HuggingFace cache (no pre-downloaded models) - [ ] Produced a written dependency audit documenting all findings diff --git a/docs/plans/CUDA_LIBS_ADDON.md b/docs/plans/CUDA_LIBS_ADDON.md new file mode 100644 index 00000000..28cbe1b6 --- /dev/null +++ b/docs/plans/CUDA_LIBS_ADDON.md @@ -0,0 +1,173 @@ +# CUDA Libs as a Bolt-On Addon + +## Problem + +Every time we bump `__version__` (even for a UI tweak or bugfix), the exact-match version check in both `main.rs:222` and `cuda.py:237` invalidates the user's ~2.4GB CUDA binary, forcing a full redownload. The CUDA binary is the entire server rebuilt with NVIDIA libs included -- there's no separation between app logic and the CUDA runtime. + +## Why This Is Hard With `--onefile` + +The core tension is PyInstaller `--onefile` mode (`build_binary.py:39`). In onefile mode, everything -- Python code, all dependencies, torch, the NVIDIA `.dll`/`.so` files -- gets packed into a single self-extracting archive. There's no concept of "swap out one part." The binary IS the server. + +## Options + +### Option A: Switch to `--onedir` for the CUDA Build (Recommended) + +Instead of `--onefile`, build the CUDA variant as a directory (a folder with the exe + all the shared libs alongside it). Then split the distribution into two archives: + +1. **`voicebox-server-cuda` executable + non-NVIDIA deps** (~200-400MB) -- versioned with the app, redownloaded on every app update. +2. **`cuda-libs-cu126.tar.gz`** (~2GB) -- the `nvidia.*` packages (cublas, cudnn, cuda_runtime, etc.), versioned independently (e.g., `cuda-libs-cu126-v1`). Only redownloaded when we bump the CUDA toolkit version or torch's CUDA dependency changes. + +#### How it would work at runtime + +- Tauri downloads the server binary archive and extracts it to `{data_dir}/backends/cuda/` +- On first CUDA setup (or when cuda-libs version bumps), downloads and extracts the libs archive into the same directory +- The CUDA server exe finds the `.dll`/`.so` files next to it (standard PyInstaller onedir behavior) +- Version check becomes two checks: server version + cuda-libs version + +#### Independent versioning + +Add a `cuda-libs.json` manifest: + +```json +{"version": "cu126-v1", "torch_compat": ">=2.6.0,<2.8.0"} +``` + +The server checks this on startup. The Tauri side checks it before launching. Only bump `cu126-v1` -> `cu126-v2` when we actually change the CUDA toolkit or torch major version. + +#### Build pipeline changes + +The CI `build-cuda-windows` job would build with `--onedir`, then separate the output into two archives. The CUDA libs archive could be built less frequently (only when torch/CUDA version changes) and stored as a pinned release asset. + +#### Download experience + +- First-time CUDA setup: ~2.4GB total (same as today) +- Subsequent app updates: ~200-400MB for the server, CUDA libs stay cached +- CUDA toolkit bump: ~2GB for just the libs + +#### Pros + +- PyInstaller `--onedir` natively produces this structure -- NVIDIA DLLs end up as discrete files in the output directory +- The separation is natural: PyInstaller puts torch's NVIDIA deps in predictable paths (`nvidia/cublas/lib/`, etc.) +- CUDA libs are highly stable -- only rebundle when changing CUDA toolkit version (e.g., cu126 -> cu128) or major torch version +- Server updates become ~200-400MB instead of ~2.4GB +- No library path hacking needed -- torch finds NVIDIA DLLs because they're in the same directory tree + +#### Cons + +- Onedir means a folder with hundreds of files instead of a single exe -- more complex to manage, extract, and clean up +- Need to modify download/assembly logic in `cuda.py` to handle two separate archives +- The Tauri side (`main.rs`) needs to point at an exe inside a directory rather than a standalone binary +- Users who manually manage the file may find the folder structure confusing + +#### TTS engine compatibility + +No issues. The TTS engines are pure Python + torch. They don't care whether NVIDIA libs are inside the binary or sitting next to it -- torch's dynamic loader finds them either way. + +--- + +### Option B: Keep `--onefile` but Externalize CUDA Libs via Library Path + +Keep the server as a single `--onefile` binary (with NVIDIA packages excluded, same as the CPU build). Ship the CUDA libs as a separate download that gets extracted to `{data_dir}/backends/cuda-libs/`. Before launching, set the library search path to include that directory. + +**Important caveat:** The CPU torch wheel (`whl/cpu`) doesn't have CUDA kernels compiled in -- it's a fundamentally different build. So the binary would need to be built with CUDA-compiled torch but with the NVIDIA runtime libraries excluded. The runtime libs (cublas, cudnn, etc.) would be provided externally. + +#### How it would work + +- Build ONE "CUDA-ready" server binary with CUDA-compiled torch but NVIDIA runtime packages excluded +- Ship `cuda-libs-cu126-v1.tar.gz` separately (~2GB of `.dll`/`.so` files) +- When launching, Tauri sets `PATH` (Windows) or `LD_LIBRARY_PATH` (Linux) to include the cuda-libs directory + +#### Pros + +- Single server binary for both CPU and CUDA users -- simplifies build pipeline enormously +- True bolt-on CUDA libs with fully independent versioning +- Server updates are always small (~150MB for the onefile binary) + +#### Cons + +- **Fragile on Windows.** PyInstaller `--onefile` extracts to a temp directory at runtime and the internal torch may not find externally-placed NVIDIA libs. DLL resolution on Windows is notoriously unreliable in this scenario. +- `os.add_dll_directory()` only affects `LoadLibraryEx` with `LOAD_LIBRARY_SEARCH_USER_DIRS` flag -- not all DLL loads go through this path +- PyInstaller's onefile bootloader may configure DLL search paths before Python code runs +- Could work on Linux but is fragile on Windows + +--- + +### Option C: Hybrid -- `--onefile` Server + Dynamic CUDA Lib Loading at Runtime + +Build the server as `--onefile` with CUDA-compiled torch but with NVIDIA packages excluded. At startup, before torch initializes CUDA, explicitly load the NVIDIA shared libraries using `ctypes.CDLL` or `os.add_dll_directory()`. + +In `server.py`, before any torch imports: + +```python +cuda_libs_dir = os.environ.get("VOICEBOX_CUDA_LIBS") +if cuda_libs_dir and os.path.isdir(cuda_libs_dir): + if sys.platform == "win32": + os.add_dll_directory(cuda_libs_dir) + os.environ["PATH"] = cuda_libs_dir + os.pathsep + os.environ.get("PATH", "") + else: + os.environ["LD_LIBRARY_PATH"] = cuda_libs_dir + ":" + os.environ.get("LD_LIBRARY_PATH", "") +``` + +#### Pros + +- Single server binary, true bolt-on CUDA libs +- Clean separation of concerns +- Independent versioning + +#### Cons + +- Needs careful testing with each torch version -- CUDA initialization happens deep in C++ extension layer +- On Windows, `os.add_dll_directory()` may not cover all DLL load paths +- PyInstaller's onefile bootloader may have already configured DLL search paths before Python code runs +- Most complex to get right and maintain + +## Recommendation + +**Option A (`--onedir` with split archives)** is the most reliable path: + +1. **It actually works.** `--onedir` puts all files on disk as regular files. Torch finds NVIDIA DLLs because they're in the same directory tree, exactly as they would be in a normal pip install. +2. **Natural separation.** PyInstaller's `--onedir` output already separates the NVIDIA `.dll`/`.so` files into `nvidia/` subdirectories. We can split the output directory into "core" and "nvidia-libs" archives after building. +3. **Independent versioning is straightforward.** A `cuda-libs.json` manifest controls when redownloads are needed. +4. **Build pipeline simplification.** Build CUDA libs archive less frequently, store as a pinned release asset. + +The main cost is managing a directory instead of a single file, but we already have sophisticated download/assembly infrastructure in `cuda.py` with manifests and split parts. Extending that to handle two archives is incremental work. + +## Tauri Compatibility (Validated) + +Tauri handles PyInstaller `--onedir` with no issues. The key insight is that we're **not** using a static sidecar for CUDA -- we're downloading and extracting at runtime (the existing `cuda.py` + `main.rs` flow). For runtime-launched processes, Tauri's `tauri::shell::Command` supports arbitrary directories natively. + +### The critical change in `main.rs` + +The only Tauri-side change needed is adding `.current_dir()` when spawning the CUDA backend: + +```rust +let cuda_dir = data_dir.join("backends/cuda"); +let exe_path = cuda_dir.join("voicebox-server-cuda.exe"); + +let mut cmd = app.shell().command(exe_path.to_str().unwrap()); +cmd = cmd.current_dir(&cuda_dir); // PyInstaller finds all DLLs relative to exe +cmd = cmd.args(["--data-dir", &data_dir_str, "--port", &port_str, "--parent-pid", &parent_pid_str]); +``` + +`.current_dir()` tells the PyInstaller bootloader that everything (DLLs, `nvidia/cublas/lib/`, `_internal/`, torch extensions, etc.) lives relative to the exe. Torch finds the NVIDIA libs exactly as it does in a normal `pip install` or dev environment -- no `LD_LIBRARY_PATH` hacks, no `os.add_dll_directory` gymnastics. + +### Community evidence + +- Multiple Tauri users run this exact pattern: Nuitka folders (exe + pythonXX.dll + supporting files), multi-file .NET apps, and PyInstaller onedir backends (GitHub issues #5719, discussion #5206). +- The shell plugin explicitly supports `cwd` in both Rust and JS APIs. +- No reports of torch/CUDA-specific breakage -- the onedir layout is identical to what PyInstaller produces in normal usage. + +### Known gotcha: process termination on Windows + +PyInstaller onedir creates a parent bootloader + child Python process on Windows. `child.kill()` only hits the outer process in some cases (Tauri issue #11686). Mitigation: keep a reference to the parent PID or use `taskkill /F /T` for clean shutdown. This is not a blocker -- our existing `--parent-pid` watchdog mechanism in `server.py` already handles orphan cleanup. + +## Next Steps + +1. Prototype: Build the current CUDA binary with `--onedir` and verify torch CUDA works from the output directory +2. Measure the size split: how much is NVIDIA libs vs everything else +3. Design the two-archive download flow and dual version checking +4. Update `cuda.py` for dual-archive extraction (server core + cuda-libs) +5. Update `main.rs`: change launch path to `backends/cuda/` dir + add `.current_dir()` +6. Add `ensure_cuda_structure()` helper in Rust to verify exe + nvidia/ subdirs exist before spawning +7. Update CI pipeline: `build-cuda-windows` produces two archives instead of split parts +8. ~~Update `split_binary.py` or replace with archive-based distribution~~ Done: replaced with `package_cuda.py` diff --git a/justfile b/justfile index fd8bf962..a1d40133 100644 --- a/justfile +++ b/justfile @@ -208,10 +208,11 @@ build-server-cuda: _ensure-venv $env:PATH = "{{ venv_bin }};$env:PATH"; \ & "{{ python }}" backend/build_binary.py --cuda; \ if ($LASTEXITCODE -ne 0) { throw "build_binary.py --cuda failed with exit code $LASTEXITCODE" }; \ - $dest = "$env:APPDATA/com.voicebox.app/backends"; \ + $dest = "$env:APPDATA/sh.voicebox.app/backends/cuda"; \ + if (Test-Path $dest) { Remove-Item -Recurse -Force $dest }; \ New-Item -ItemType Directory -Path $dest -Force | Out-Null; \ - Copy-Item "backend/dist/voicebox-server-cuda.exe" "$dest/voicebox-server-cuda.exe" -Force; \ - Write-Host "Copied CUDA binary to $dest" + Copy-Item "backend/dist/voicebox-server-cuda/*" $dest -Recurse -Force; \ + Write-Host "Copied CUDA backend to $dest" # Build everything locally: CPU server + CUDA server + installable Tauri app [windows] diff --git a/landing/package.json b/landing/package.json index f9aa7e74..316fc7dd 100644 --- a/landing/package.json +++ b/landing/package.json @@ -1,6 +1,6 @@ { "name": "@voicebox/landing", - "version": "0.3.0", + "version": "0.3.1", "description": "Landing page for voicebox.sh", "scripts": { "dev": "bun --bun next dev --turbo", diff --git a/package.json b/package.json index d6d94b43..72b229f5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "voicebox", - "version": "0.3.0", + "version": "0.3.1", "private": true, "workspaces": [ "app", diff --git a/scripts/package_cuda.py b/scripts/package_cuda.py new file mode 100644 index 00000000..189f531a --- /dev/null +++ b/scripts/package_cuda.py @@ -0,0 +1,232 @@ +""" +Package the PyInstaller --onedir CUDA build into two archives. + +Takes the PyInstaller --onedir output directory and splits it into: + 1. voicebox-server-cuda.tar.gz — server core (exe + non-NVIDIA deps) + 2. cuda-libs-cu126.tar.gz — NVIDIA runtime libraries only + 3. cuda-libs.json — version manifest for the CUDA libs + +Usage: + python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ + python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ --output release-assets/ + python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ --cuda-libs-version cu126-v1 +""" + +import argparse +import hashlib +import json +import sys +import tarfile +from pathlib import Path + +# DLL name prefixes that identify NVIDIA CUDA runtime libraries. +# These DLLs may appear in different locations depending on the torch +# and PyInstaller version: +# - nvidia/ subdirectories (older torch with separate nvidia-* packages) +# - _internal/torch/lib/ (torch 2.10+ bundles NVIDIA DLLs directly) +# - Top-level directory (some PyInstaller versions) +NVIDIA_DLL_PREFIXES = ( + "cublas", + "cublaslt", + "cudart", + "cudnn", + "cufft", + "cufftw", + "curand", + "cusolver", + "cusolvermg", + "cusparse", + "nvjitlink", + "nvrtc", + "nccl", + "caffe2_nvrtc", +) + +# Files to keep in the server core even if they match NVIDIA prefixes. +# These are small Python modules or stubs, not the large runtime DLLs. +NVIDIA_KEEP_IN_CORE = { + "torch/cuda/nccl.py", + "torch/_inductor/codegen/cuda/cutlass_lib_extensions/cutlass_mock_imports/cuda/cudart.py", +} + + +def is_nvidia_file(rel_path: str) -> bool: + """Check if a relative path belongs to the NVIDIA CUDA libs. + + Identifies large NVIDIA runtime DLLs (.dll/.so) regardless of where + PyInstaller placed them. Excludes small Python stubs that happen to + share NVIDIA-related names. + """ + rel_lower = rel_path.lower().replace("\\", "/") + + # Never split out Python source files or small stubs + if rel_lower in NVIDIA_KEEP_IN_CORE: + return False + + # Files under nvidia/ subdirectory tree (older torch layout) + if rel_lower.startswith("nvidia/") or "/nvidia/" in rel_lower: + # Only DLLs/shared objects — not .py, .dist-info, etc. + if rel_lower.endswith((".dll", ".so")): + return True + # Include entire nvidia/ namespace package tree + for part in rel_lower.split("/"): + if part == "nvidia": + return True + + # NVIDIA DLLs anywhere in the tree (e.g. _internal/torch/lib/cublas64_12.dll) + name = rel_lower.rsplit("/", 1)[-1] + if name.endswith(".dll") or name.endswith(".so"): + name_no_ext = name.rsplit(".", 1)[0] + for prefix in NVIDIA_DLL_PREFIXES: + if name_no_ext.startswith(prefix): + return True + + return False + + +def sha256_file(path: Path) -> str: + """Compute SHA-256 hex digest of a file.""" + h = hashlib.sha256() + with open(path, "rb") as f: + while True: + chunk = f.read(1024 * 1024) + if not chunk: + break + h.update(chunk) + return h.hexdigest() + + +def package( + onedir_path: Path, + output_dir: Path, + cuda_libs_version: str, + torch_compat: str, +): + output_dir.mkdir(parents=True, exist_ok=True) + + # Collect all files in the onedir output, split into core vs nvidia + core_files = [] + nvidia_files = [] + + for item in sorted(onedir_path.rglob("*")): + if item.is_dir(): + continue + rel = item.relative_to(onedir_path) + rel_str = str(rel) + if is_nvidia_file(rel_str): + nvidia_files.append((rel_str, item)) + else: + core_files.append((rel_str, item)) + + core_size = sum(f.stat().st_size for _, f in core_files) + nvidia_size = sum(f.stat().st_size for _, f in nvidia_files) + + print(f"Input directory: {onedir_path}") + print(f"Core files: {len(core_files)} ({core_size / (1024**2):.1f} MB)") + print(f"NVIDIA files: {len(nvidia_files)} ({nvidia_size / (1024**2):.1f} MB)") + + if not nvidia_files: + print( + f"ERROR: No NVIDIA files found in {onedir_path}. " + "Refusing to create an empty CUDA libs archive.", + file=sys.stderr, + ) + print( + "Make sure you built with --cuda and the NVIDIA packages are present.", + file=sys.stderr, + ) + sys.exit(1) + + # Create server core archive + # Files are stored relative to the archive root (no parent directory prefix) + # so extracting to backends/cuda/ puts everything at the right level. + server_archive = output_dir / "voicebox-server-cuda.tar.gz" + print(f"\nCreating server core archive: {server_archive.name}") + with tarfile.open(server_archive, "w:gz") as tar: + for rel_str, full_path in core_files: + tar.add(full_path, arcname=rel_str) + server_sha = sha256_file(server_archive) + (output_dir / "voicebox-server-cuda.tar.gz.sha256").write_text( + f"{server_sha} voicebox-server-cuda.tar.gz\n" + ) + print(f" Size: {server_archive.stat().st_size / (1024**2):.1f} MB") + print(f" SHA-256: {server_sha[:16]}...") + + # Create CUDA libs archive + cuda_libs_archive = output_dir / f"cuda-libs-{cuda_libs_version}.tar.gz" + print(f"\nCreating CUDA libs archive: {cuda_libs_archive.name}") + with tarfile.open(cuda_libs_archive, "w:gz") as tar: + for rel_str, full_path in nvidia_files: + tar.add(full_path, arcname=rel_str) + cuda_sha = sha256_file(cuda_libs_archive) + (output_dir / f"cuda-libs-{cuda_libs_version}.tar.gz.sha256").write_text( + f"{cuda_sha} cuda-libs-{cuda_libs_version}.tar.gz\n" + ) + print(f" Size: {cuda_libs_archive.stat().st_size / (1024**2):.1f} MB") + print(f" SHA-256: {cuda_sha[:16]}...") + + # Write cuda-libs.json manifest + manifest = { + "version": cuda_libs_version, + "torch_compat": torch_compat, + "archive": cuda_libs_archive.name, + "sha256": cuda_sha, + } + manifest_path = output_dir / "cuda-libs.json" + manifest_path.write_text(json.dumps(manifest, indent=2) + "\n") + print(f"\nManifest: {manifest_path.name}") + print(json.dumps(manifest, indent=2)) + + # Summary + total_input = core_size + nvidia_size + total_output = server_archive.stat().st_size + cuda_libs_archive.stat().st_size + print(f"\nTotal input: {total_input / (1024**3):.2f} GB") + print(f"Total output: {total_output / (1024**3):.2f} GB (compressed)") + print( + f"Server core: {server_archive.stat().st_size / (1024**2):.1f} MB (redownloaded on app update)" + ) + print( + f"CUDA libs: {cuda_libs_archive.stat().st_size / (1024**2):.1f} MB (cached until CUDA toolkit bump)" + ) + + +def main(): + parser = argparse.ArgumentParser( + description="Package PyInstaller --onedir CUDA build into server + CUDA libs archives" + ) + parser.add_argument( + "input", + type=Path, + help="Path to PyInstaller --onedir output directory (e.g. backend/dist/voicebox-server-cuda/)", + ) + parser.add_argument( + "--output", + type=Path, + default=None, + help="Output directory for archives (default: same as input parent)", + ) + parser.add_argument( + "--cuda-libs-version", + type=str, + default="cu126-v1", + help="Version string for the CUDA libs archive (default: cu126-v1)", + ) + parser.add_argument( + "--torch-compat", + type=str, + default=">=2.6.0,<2.11.0", + help="Torch version compatibility range (default: >=2.6.0,<2.11.0)", + ) + args = parser.parse_args() + + if not args.input.is_dir(): + print(f"Error: {args.input} is not a directory", file=sys.stderr) + print("Expected a PyInstaller --onedir output directory.", file=sys.stderr) + sys.exit(1) + + output_dir = args.output or args.input.parent + package(args.input, output_dir, args.cuda_libs_version, args.torch_compat) + + +if __name__ == "__main__": + main() diff --git a/scripts/split_binary.py b/scripts/split_binary.py deleted file mode 100644 index 0310fbd8..00000000 --- a/scripts/split_binary.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -Split a large binary into chunks for GitHub Releases (<2 GB each). - -Usage: - python scripts/split_binary.py backend/dist/voicebox-server-cuda.exe - python scripts/split_binary.py backend/dist/voicebox-server-cuda.exe --chunk-size 1900000000 - python scripts/split_binary.py backend/dist/voicebox-server-cuda.exe --output release-assets/ - -The script produces: - - voicebox-server-cuda.part00.exe, .part01.exe, ... (binary chunks) - - voicebox-server-cuda.sha256 (SHA-256 checksum of the complete file) - - voicebox-server-cuda.manifest (ordered list of part filenames) -""" - -import argparse -import hashlib -import sys -from pathlib import Path - - -def split(input_path: Path, chunk_size: int, output_dir: Path): - output_dir.mkdir(parents=True, exist_ok=True) - data = input_path.read_bytes() - total_size = len(data) - - # Write SHA-256 of the complete file - sha256 = hashlib.sha256(data).hexdigest() - checksum_file = output_dir / f"{input_path.stem}.sha256" - checksum_file.write_text(f"{sha256} {input_path.name}\n") - - # Split into chunks - parts = [] - for i in range(0, total_size, chunk_size): - part_index = len(parts) - part_name = f"{input_path.stem}.part{part_index:02d}{input_path.suffix}" - part_path = output_dir / part_name - part_path.write_bytes(data[i:i + chunk_size]) - parts.append(part_name) - - # Write manifest (ordered list of part filenames) - manifest_file = output_dir / f"{input_path.stem}.manifest" - manifest_file.write_text("\n".join(parts) + "\n") - - print(f"Input: {input_path} ({total_size / (1024**3):.2f} GB)") - print(f"Output: {output_dir}/") - print(f"Parts: {len(parts)} (chunk size: {chunk_size / (1024**3):.2f} GB)") - print(f"SHA-256: {sha256}") - print(f"Manifest: {manifest_file.name}") - for p in parts: - size = (output_dir / p).stat().st_size - print(f" {p} ({size / (1024**3):.2f} GB)") - - -def main(): - parser = argparse.ArgumentParser( - description="Split a large binary into chunks for GitHub Releases" - ) - parser.add_argument("input", type=Path, help="Path to the binary file to split") - parser.add_argument( - "--chunk-size", - type=int, - default=1_900_000_000, # 1.9 GB — safely under 2 GB GitHub limit - help="Maximum chunk size in bytes (default: 1.9 GB)", - ) - parser.add_argument( - "--output", - type=Path, - default=None, - help="Output directory (default: same directory as input)", - ) - args = parser.parse_args() - - if not args.input.exists(): - print(f"Error: {args.input} does not exist", file=sys.stderr) - sys.exit(1) - - output_dir = args.output or args.input.parent - split(args.input, args.chunk_size, output_dir) - - -if __name__ == "__main__": - main() diff --git a/tauri/package.json b/tauri/package.json index 31eb0790..6f569f6a 100644 --- a/tauri/package.json +++ b/tauri/package.json @@ -1,7 +1,7 @@ { "name": "@voicebox/tauri", "private": true, - "version": "0.3.0", + "version": "0.3.1", "type": "module", "scripts": { "dev": "vite", diff --git a/tauri/src-tauri/Cargo.toml b/tauri/src-tauri/Cargo.toml index 1f707f8e..e587de9f 100644 --- a/tauri/src-tauri/Cargo.toml +++ b/tauri/src-tauri/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "voicebox" -version = "0.3.0" +version = "0.3.1" description = "A production-quality desktop app for Qwen3-TTS voice cloning and generation" authors = ["you"] license = "" diff --git a/tauri/src-tauri/src/main.rs b/tauri/src-tauri/src/main.rs index a881f46e..415961f2 100644 --- a/tauri/src-tauri/src/main.rs +++ b/tauri/src-tauri/src/main.rs @@ -197,22 +197,24 @@ async fn start_server( println!("Data directory: {:?}", data_dir); println!("Remote mode: {}", remote.unwrap_or(false)); - // Check for CUDA backend binary in data directory + // Check for CUDA backend in data directory (onedir layout: backends/cuda/) let cuda_binary = { - let backends_dir = data_dir.join("backends"); + let cuda_dir = data_dir.join("backends").join("cuda"); let cuda_name = if cfg!(windows) { "voicebox-server-cuda.exe" } else { "voicebox-server-cuda" }; - let path = backends_dir.join(cuda_name); - if path.exists() { - println!("Found CUDA backend binary at {:?}", path); + let exe_path = cuda_dir.join(cuda_name); + if exe_path.exists() { + println!("Found CUDA backend at {:?}", cuda_dir); - // Version check: run --version and compare to app version + // Version check: run --version from the onedir directory so + // PyInstaller can find its support files for the fast --version path let app_version = app.config().version.clone().unwrap_or_default(); - let version_ok = match std::process::Command::new(&path) + let version_ok = match std::process::Command::new(&exe_path) .arg("--version") + .current_dir(&cuda_dir) .output() { Ok(output) => { @@ -237,7 +239,7 @@ async fn start_server( }; if version_ok { - Some(path) + Some(exe_path) } else { None } @@ -300,10 +302,14 @@ async fn start_server( println!("Custom models directory: {}", dir); } - // If CUDA binary exists, launch it directly instead of the bundled sidecar + // If CUDA binary exists, launch it from the onedir directory. + // .current_dir() is critical: PyInstaller onedir expects all DLLs and + // support files (nvidia/, _internal/, etc.) relative to the exe. let spawn_result = if let Some(ref cuda_path) = cuda_binary { - println!("Launching CUDA backend: {:?}", cuda_path); + let cuda_dir = cuda_path.parent().unwrap(); + println!("Launching CUDA backend: {:?} (cwd: {:?})", cuda_path, cuda_dir); let mut cmd = app.shell().command(cuda_path.to_str().unwrap()); + cmd = cmd.current_dir(cuda_dir); cmd = cmd.args(["--data-dir", &data_dir_str, "--port", &port_str, "--parent-pid", &parent_pid_str]); if is_remote { cmd = cmd.args(["--host", "0.0.0.0"]); diff --git a/tauri/src-tauri/tauri.conf.json b/tauri/src-tauri/tauri.conf.json index c4c5da85..89fc038f 100644 --- a/tauri/src-tauri/tauri.conf.json +++ b/tauri/src-tauri/tauri.conf.json @@ -1,7 +1,7 @@ { "$schema": "https://schema.tauri.app/config/2", "productName": "Voicebox", - "version": "0.3.0", + "version": "0.3.1", "identifier": "sh.voicebox.app", "build": { "beforeDevCommand": "bun run dev", diff --git a/web/package.json b/web/package.json index 74247f87..8d59b9cc 100644 --- a/web/package.json +++ b/web/package.json @@ -1,7 +1,7 @@ { "name": "@voicebox/web", "private": true, - "version": "0.3.0", + "version": "0.3.1", "type": "module", "scripts": { "dev": "vite",