diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 79fcc0c..faed5b4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,6 +5,7 @@ on: branches: [main] pull_request: branches: [main] + workflow_dispatch: env: CARGO_TERM_COLOR: always diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 693dd80..e5e9ced 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -123,3 +123,97 @@ jobs: files: release/* env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + homebrew: + name: Update Homebrew formula + needs: [release] + runs-on: ubuntu-latest + steps: + - name: Get version + id: version + run: | + TAG="${{ inputs.tag }}" + if [ -z "$TAG" ]; then + TAG="${{ github.event.release.tag_name }}" + fi + VERSION="${TAG#v}" + echo "tag=$TAG" >> $GITHUB_OUTPUT + echo "version=$VERSION" >> $GITHUB_OUTPUT + + - name: Download checksums + run: | + gh release download "${{ steps.version.outputs.tag }}" \ + --repo rtk-ai/vox \ + --pattern checksums.txt + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Parse checksums + id: sha + run: | + echo "arm=$(grep aarch64-apple-darwin checksums.txt | awk '{print $1}')" >> $GITHUB_OUTPUT + echo "intel=$(grep x86_64-apple-darwin checksums.txt | awk '{print $1}')" >> $GITHUB_OUTPUT + echo "linux=$(grep x86_64-unknown-linux-gnu checksums.txt | awk '{print $1}')" >> $GITHUB_OUTPUT + + - name: Update formula + run: | + VERSION="${{ steps.version.outputs.version }}" + TAG="${{ steps.version.outputs.tag }}" + cat > vox.rb << 'FORMULA' + class Vox < Formula + desc "Cross-platform TTS CLI — local voice synthesis with three backends" + homepage "https://github.com/rtk-ai/vox" + version "VERSION_PLACEHOLDER" + license "MIT" + + on_macos do + on_intel do + url "https://github.com/rtk-ai/vox/releases/download/TAG_PLACEHOLDER/vox-x86_64-apple-darwin.tar.gz" + sha256 "SHA_INTEL_PLACEHOLDER" + end + on_arm do + url "https://github.com/rtk-ai/vox/releases/download/TAG_PLACEHOLDER/vox-aarch64-apple-darwin.tar.gz" + sha256 "SHA_ARM_PLACEHOLDER" + end + end + + on_linux do + on_intel do + url "https://github.com/rtk-ai/vox/releases/download/TAG_PLACEHOLDER/vox-x86_64-unknown-linux-gnu.tar.gz" + sha256 "SHA_LINUX_PLACEHOLDER" + end + end + + def install + bin.install "vox" + end + + test do + assert_match "vox #{version}", shell_output("#{bin}/vox --version") + end + end + FORMULA + sed -i "s/VERSION_PLACEHOLDER/$VERSION/g" vox.rb + sed -i "s/TAG_PLACEHOLDER/$TAG/g" vox.rb + sed -i "s/SHA_INTEL_PLACEHOLDER/${{ steps.sha.outputs.intel }}/g" vox.rb + sed -i "s/SHA_ARM_PLACEHOLDER/${{ steps.sha.outputs.arm }}/g" vox.rb + sed -i "s/SHA_LINUX_PLACEHOLDER/${{ steps.sha.outputs.linux }}/g" vox.rb + # Remove leading spaces from heredoc + sed -i 's/^ //' vox.rb + + - name: Push to homebrew-tap + run: | + CONTENT=$(base64 -w 0 vox.rb) + SHA=$(gh api repos/rtk-ai/homebrew-tap/contents/Formula/vox.rb --jq '.sha' 2>/dev/null || echo "") + if [ -n "$SHA" ]; then + gh api -X PUT repos/rtk-ai/homebrew-tap/contents/Formula/vox.rb \ + -f message="vox ${{ steps.version.outputs.version }}" \ + -f content="$CONTENT" \ + -f sha="$SHA" + else + gh api -X PUT repos/rtk-ai/homebrew-tap/contents/Formula/vox.rb \ + -f message="vox ${{ steps.version.outputs.version }}" \ + -f content="$CONTENT" + fi + env: + GH_TOKEN: ${{ secrets.HOMEBREW_TAP_TOKEN }} diff --git a/Formula/vox.rb b/Formula/vox.rb deleted file mode 100644 index c3de5c2..0000000 --- a/Formula/vox.rb +++ /dev/null @@ -1,31 +0,0 @@ -# typed: false -# frozen_string_literal: true - -# Homebrew formula for vox -# To install: brew tap rtk-ai/tap && brew install vox -class Vox < Formula - desc "TTS CLI for macOS — local voice synthesis with Qwen and system say" - homepage "https://github.com/rtk-ai/vox" - version "0.0.1" - license "MIT" - - on_macos do - on_intel do - url "https://github.com/rtk-ai/vox/releases/download/v#{version}/vox-x86_64-apple-darwin.tar.gz" - sha256 "PLACEHOLDER_SHA256_INTEL" - end - - on_arm do - url "https://github.com/rtk-ai/vox/releases/download/v#{version}/vox-aarch64-apple-darwin.tar.gz" - sha256 "PLACEHOLDER_SHA256_ARM" - end - end - - def install - bin.install "vox" - end - - test do - assert_match "vox #{version}", shell_output("#{bin}/vox --version") - end -end diff --git a/README.md b/README.md index f552938..28b236f 100644 --- a/README.md +++ b/README.md @@ -1,94 +1,85 @@ # vox -TTS CLI for macOS — local voice synthesis with Qwen and system `say`. +Cross-platform TTS CLI — local voice synthesis with three backends. -## Features - -- **Two backends**: macOS native `say` and [Qwen TTS](https://github.com/ml-explore/mlx-audio) (local, Apple Silicon) -- **Voice cloning**: clone a voice from an audio sample, use it for all speech -- **Voice chat**: have a spoken conversation with Claude (STT + LLM + TTS) -- **Pipeline playback**: multi-sentence text plays without gaps between chunks -- **Preferences**: persist backend, voice, language, rate, style settings +``` + vox + | + +-------------+-------------+ + | | | + say qwen qwen-native + (macOS) (MLX/Python) (pure Rust) + native Apple Silicon cross-platform + CPU/Metal/CUDA + | + rodio + (audio playback) +``` ## Install ```bash -# Quick install (macOS) -curl -fsSL https://raw.githubusercontent.com/rtk-ai/vox/main/install.sh | sh - # From source cargo install --path . -# Or via Homebrew (coming soon) -brew tap rtk-ai/tap && brew install vox +# Quick install (macOS / Linux / WSL) +curl -fsSL https://raw.githubusercontent.com/rtk-ai/vox/main/install.sh | sh ``` -### Requirements +| Platform | Default backend | GPU | +|----------|----------------|-----| +| macOS | `say` | `--features metal` | +| Linux / WSL | `qwen-native` | `--features cuda` | -- macOS (uses `say` and `afplay`) +Linux requires `sudo apt install libasound2-dev`. -For the Qwen backend (local neural TTS, Apple Silicon only): +## Usage with Claude Code ```bash -brew install python3 -pip install mlx-audio +vox init # all integrations (default) +vox init -m mcp # MCP server only +vox init -m cli # CLI hook only +vox init -m skill # slash command only ``` -This pulls in [mlx-audio](https://github.com/ml-explore/mlx-audio) which provides both TTS (`mlx_audio.tts`) and STT (`mlx_audio.stt`). The model `mlx-community/Qwen3-TTS-12Hz-0.6B-Base-bf16` is downloaded automatically on first use (~1.2 GB). +Each mode sets up a different integration: -For voice chat: +| Mode | What it does | +|------|-------------| +| `mcp` | Registers `vox serve` as an MCP server in `~/.claude.json` (Claude Code) and Claude Desktop config. Exposes 8 tools: `vox_speak`, `vox_list_voices`, `vox_clone_*`, `vox_config_*`, `vox_stats`. | +| `cli` | Creates a `CLAUDE.md` in your project with instructions for Claude to call `vox` after significant tasks. Adds a `Stop` hook in `.claude/settings.json` that says "Terminé" after each response. | +| `skill` | Creates a `/speak` slash command in `~/.claude/commands/speak.md`. | +| `all` | Runs all three modes (default). | -```bash -brew install sox # audio recording (rec command) -export ANTHROPIC_API_KEY=sk-ant-... +``` + Claude Code + | + MCP stdio + | + vox serve ──> vox_speak, vox_list_voices, ... ``` -## Usage +Running `vox init` again is safe — it skips files that are already configured. + +## Standalone CLI ```bash -# Speak text (default: say backend) vox "Hello, world." - -# Use Qwen backend -vox -b qwen "Bonjour le monde." - -# Pipe from stdin +vox -b qwen-native "Cross-platform TTS." echo "Hello" | vox - -# List voices vox --list-voices -vox -b qwen --list-voices - -# Set voice and language -vox -b qwen -v Chelsie -l en "Good morning." ``` ### Voice cloning ```bash -# Add a clone from an audio file -vox clone add patrick --audio ~/voice.wav --text "Transcription of the audio" - -# Record a clone from microphone +vox clone add patrick --audio ~/voice.wav --text "Transcription" vox clone record myvoice --duration 10 - -# Use a cloned voice vox -v patrick "This speaks with your voice." - -# List / remove clones vox clone list vox clone remove patrick ``` -### Voice chat - -```bash -# Start a voice conversation with Claude -export ANTHROPIC_API_KEY=sk-ant-... -vox chat -vox chat -v patrick -l fr -``` - ### Preferences ```bash @@ -99,34 +90,30 @@ vox config set voice Chelsie vox config reset ``` -### Stats - -```bash -vox stats -``` - -## AI Integration +### Optional: Qwen backend (macOS) -Set up your project so that Claude Code provides spoken summaries after completing tasks: +Neural TTS via Python/MLX on Apple Silicon: ```bash -cd your-project -vox init +uv pip install mlx-audio ``` -This creates: -- **CLAUDE.md** — instructions for Claude to call `vox` after significant tasks -- **.claude/settings.json** — a `Stop` hook that says "Terminé" after each response +Model downloaded automatically on first use (~1.2 GB). -Running `vox init` again is safe — it skips files that are already configured. +## Data -## Configuration +All state is stored locally in `~/.config/vox/`: + +``` +~/.config/vox/ +├── vox.db # SQLite: preferences, voice clones, usage logs +└── clones/ # audio files for voice clones +``` | Env var | Description | |---------|-------------| -| `VOX_CONFIG_DIR` | Override config directory (default: `~/.config/vox/`) | -| `VOX_DB_PATH` | Override database path (default: `~/.config/vox/vox.db`) | -| `ANTHROPIC_API_KEY` | Required for `vox chat` | +| `VOX_CONFIG_DIR` | Override config directory | +| `VOX_DB_PATH` | Override database path | ## License diff --git a/install.sh b/install.sh index 7e96fba..fd1ecc7 100755 --- a/install.sh +++ b/install.sh @@ -27,10 +27,11 @@ error() { exit 1 } -detect_os() { +detect_platform() { case "$(uname -s)" in Darwin*) OS="darwin";; - *) error "vox requires macOS (uses system 'say' and 'afplay')";; + Linux*) OS="linux";; + *) error "Unsupported OS: $(uname -s). Use WSL on Windows.";; esac } @@ -42,6 +43,18 @@ detect_arch() { esac } +get_target() { + case "$OS" in + darwin) TARGET="${ARCH}-apple-darwin";; + linux) + if [ "$ARCH" != "x86_64" ]; then + error "Linux builds are only available for x86_64 (got: $ARCH)" + fi + TARGET="x86_64-unknown-linux-gnu" + ;; + esac +} + get_latest_version() { VERSION=$(curl -fsSL "https://api.github.com/repos/${REPO}/releases/latest" | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') if [ -z "$VERSION" ]; then @@ -49,12 +62,8 @@ get_latest_version() { fi } -get_target() { - TARGET="${ARCH}-apple-darwin" -} - install() { - info "Detected: macOS $ARCH" + info "Detected: $OS $ARCH" info "Target: $TARGET" info "Version: $VERSION" @@ -83,6 +92,14 @@ install() { info "Successfully installed ${BINARY_NAME} to ${INSTALL_DIR}/${BINARY_NAME}" } +check_deps() { + if [ "$OS" = "linux" ]; then + if ! ldconfig -p 2>/dev/null | grep -q libasound; then + warn "ALSA not found. Install it: sudo apt install libasound2-dev" + fi + fi +} + verify() { if command -v "$BINARY_NAME" >/dev/null 2>&1; then info "Verification: $($BINARY_NAME --version)" @@ -94,11 +111,12 @@ verify() { main() { info "Installing $BINARY_NAME..." - detect_os + detect_platform detect_arch get_target get_latest_version install + check_deps verify echo "" diff --git a/src/main.rs b/src/main.rs index 4bd142a..f0ca525 100644 --- a/src/main.rs +++ b/src/main.rs @@ -385,7 +385,10 @@ fn handle_init(mode: InitMode) -> Result<()> { if do_mcp { let vox_bin = std::env::current_exe().context("cannot determine vox binary path")?; let vox_bin_str = vox_bin.to_string_lossy().to_string(); - let home = std::env::var("HOME").context("HOME not set")?; + let home = dirs::home_dir() + .context("cannot determine home directory")? + .to_string_lossy() + .to_string(); let mcp_entry = serde_json::json!({ "command": vox_bin_str, @@ -393,22 +396,35 @@ fn handle_init(mode: InitMode) -> Result<()> { "env": {} }); - let code_path = std::path::PathBuf::from(&home).join(".claude.json"); + let home_path = std::path::PathBuf::from(&home); + + let code_path = home_path.join(".claude.json"); let code_status = init::inject_mcp_server(&code_path, "vox", &mcp_entry) .unwrap_or_else(|e| format!("error: {e}")); + println!("[mcp] Claude Code: {code_status}"); + + // Claude Desktop config path is platform-specific + #[cfg(target_os = "macos")] + let desktop_path = + home_path.join("Library/Application Support/Claude/claude_desktop_config.json"); + #[cfg(target_os = "windows")] + let desktop_path = dirs::config_dir() + .map(|d| d.join("Claude/claude_desktop_config.json")) + .unwrap_or_else(|| home_path.join("AppData/Roaming/Claude/claude_desktop_config.json")); + #[cfg(target_os = "linux")] + let desktop_path = home_path.join(".config/Claude/claude_desktop_config.json"); - let desktop_path = std::path::PathBuf::from(&home) - .join("Library/Application Support/Claude/claude_desktop_config.json"); let desktop_status = init::inject_mcp_server(&desktop_path, "vox", &mcp_entry) .unwrap_or_else(|e| format!("error: {e}")); - - println!("[mcp] Claude Code: {code_status}"); println!("[mcp] Claude Desktop: {desktop_status}"); } // --- Skill mode: create /speak slash command --- if do_skill { - let home = std::env::var("HOME").context("HOME not set")?; + let home = dirs::home_dir() + .context("cannot determine home directory")? + .to_string_lossy() + .to_string(); let skills_dir = std::path::PathBuf::from(&home).join(".claude/commands"); std::fs::create_dir_all(&skills_dir).ok();