diff --git a/src/init.rs b/src/init.rs index ac9f2b2..79e0f72 100644 --- a/src/init.rs +++ b/src/init.rs @@ -1,5 +1,5 @@ use std::fs; -use std::path::Path; +use std::path::{Path, PathBuf}; use anyhow::{Context, Result}; use serde_json::Value; @@ -177,3 +177,42 @@ pub fn run_init(project_dir: &Path) -> Result { Ok(result) } + +/// Inject an MCP server entry into a Claude config file. Returns a status string. +pub fn inject_mcp_server(config_path: &PathBuf, name: &str, entry: &Value) -> Result { + let mut config: Value = if config_path.exists() { + let content = fs::read_to_string(config_path) + .with_context(|| format!("cannot read {}", config_path.display()))?; + serde_json::from_str(&content) + .with_context(|| format!("invalid JSON in {}", config_path.display()))? + } else { + if let Some(parent) = config_path.parent() { + fs::create_dir_all(parent).ok(); + } + serde_json::json!({}) + }; + + let mcp_servers = config + .as_object_mut() + .context("config is not a JSON object")? + .entry("mcpServers") + .or_insert_with(|| serde_json::json!({})); + + if let Some(existing) = mcp_servers.get(name) + && existing.get("command").and_then(|v| v.as_str()) + == entry.get("command").and_then(|v| v.as_str()) + { + return Ok("already configured".into()); + } + + mcp_servers + .as_object_mut() + .unwrap() + .insert(name.to_string(), entry.clone()); + + let output = serde_json::to_string_pretty(&config)?; + fs::write(config_path, output) + .with_context(|| format!("cannot write {}", config_path.display()))?; + + Ok("configured".into()) +} diff --git a/src/lib.rs b/src/lib.rs index 7c74f9d..aec2fbb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,5 +7,6 @@ pub mod config; pub mod db; pub mod init; pub mod input; +pub mod mcp; #[cfg(target_os = "macos")] pub mod stt; diff --git a/src/main.rs b/src/main.rs index 35f43d5..4bd142a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,11 +1,11 @@ use std::time::Instant; use anyhow::{Context, Result}; -use clap::{Parser, Subcommand}; +use clap::{Parser, Subcommand, ValueEnum}; use vox::backend::{self, SpeakOptions}; use vox::config::DEFAULT_BACKEND; -use vox::{clone, db, init, input}; +use vox::{clone, db, init, input, mcp}; #[derive(Parser)] #[command(name = "vox", version, about = "Voice Command — read text aloud")] @@ -63,8 +63,14 @@ enum Commands { }, /// Show usage statistics Stats, - /// Set up AI assistant integration (Claude Code) - Init, + /// Set up AI assistant integration (Claude Code + Claude Desktop) + Init { + /// Integration mode: mcp, cli, skill, or all (default: all) + #[arg(short, long, default_value = "all")] + mode: InitMode, + }, + /// Launch MCP server (stdio transport for Claude Code / Claude Desktop) + Serve, /// Start a voice conversation with Claude (macOS only) #[cfg(target_os = "macos")] Chat { @@ -110,6 +116,18 @@ enum CloneAction { }, } +#[derive(Clone, ValueEnum)] +enum InitMode { + /// MCP server plugin (Claude calls vox tools natively) + Mcp, + /// CLAUDE.md instructions + Stop hook (Claude calls vox via Bash) + Cli, + /// Claude Code slash command /speak + Skill, + /// All integration modes + All, +} + #[derive(Subcommand)] enum ConfigAction { /// Show current preferences @@ -132,7 +150,8 @@ fn main() -> Result<()> { Some(Commands::Clone { action }) => handle_clone(action), Some(Commands::Config { action }) => handle_config(action), Some(Commands::Stats) => handle_stats(), - Some(Commands::Init) => handle_init(), + Some(Commands::Init { mode }) => handle_init(mode), + Some(Commands::Serve) => mcp::run_server(), #[cfg(target_os = "macos")] Some(Commands::Chat { voice, lang }) => handle_chat(voice, lang), None => handle_speak(cli), @@ -341,21 +360,79 @@ fn handle_chat(voice: Option, lang: Option) -> Result<()> { chat::run_chat_loop(config) } -fn handle_init() -> Result<()> { - let cwd = std::env::current_dir().context("Failed to get current directory")?; - let result = init::run_init(&cwd)?; +fn handle_init(mode: InitMode) -> Result<()> { + let do_cli = matches!(mode, InitMode::Cli | InitMode::All); + let do_mcp = matches!(mode, InitMode::Mcp | InitMode::All); + let do_skill = matches!(mode, InitMode::Skill | InitMode::All); + + // --- CLI mode: CLAUDE.md + Stop hook --- + if do_cli { + let cwd = std::env::current_dir().context("Failed to get current directory")?; + let result = init::run_init(&cwd)?; - if !result.claude_md_written && !result.settings_written { - println!("Already configured — nothing to do."); - } else { if result.claude_md_written { - println!("CLAUDE.md configured with vox instructions."); + println!("[cli] CLAUDE.md configured with vox instructions."); } if result.settings_written { - println!(".claude/settings.json configured with Stop hook."); + println!("[cli] .claude/settings.json configured with Stop hook."); + } + if !result.claude_md_written && !result.settings_written { + println!("[cli] already configured."); } } + // --- MCP mode: configure MCP server --- + if do_mcp { + let vox_bin = std::env::current_exe().context("cannot determine vox binary path")?; + let vox_bin_str = vox_bin.to_string_lossy().to_string(); + let home = std::env::var("HOME").context("HOME not set")?; + + let mcp_entry = serde_json::json!({ + "command": vox_bin_str, + "args": ["serve"], + "env": {} + }); + + let code_path = std::path::PathBuf::from(&home).join(".claude.json"); + let code_status = init::inject_mcp_server(&code_path, "vox", &mcp_entry) + .unwrap_or_else(|e| format!("error: {e}")); + + let desktop_path = std::path::PathBuf::from(&home) + .join("Library/Application Support/Claude/claude_desktop_config.json"); + let desktop_status = init::inject_mcp_server(&desktop_path, "vox", &mcp_entry) + .unwrap_or_else(|e| format!("error: {e}")); + + println!("[mcp] Claude Code: {code_status}"); + println!("[mcp] Claude Desktop: {desktop_status}"); + } + + // --- Skill mode: create /speak slash command --- + if do_skill { + let home = std::env::var("HOME").context("HOME not set")?; + let skills_dir = std::path::PathBuf::from(&home).join(".claude/commands"); + std::fs::create_dir_all(&skills_dir).ok(); + + let skill_path = skills_dir.join("speak.md"); + if skill_path.exists() { + println!("[skill] /speak already configured."); + } else { + std::fs::write( + &skill_path, + "Use vox to speak the following text aloud: $ARGUMENTS\n\ + \n\ + Call the vox_speak MCP tool if available, otherwise run:\n\ + ```bash\n\ + vox -b say \"$ARGUMENTS\"\n\ + ```\n", + ) + .context("cannot write skill file")?; + println!("[skill] /speak command created."); + } + } + + println!(); + println!("Restart Claude Code / Claude Desktop to activate."); + Ok(()) } diff --git a/src/mcp.rs b/src/mcp.rs new file mode 100644 index 0000000..45f81de --- /dev/null +++ b/src/mcp.rs @@ -0,0 +1,633 @@ +use std::io::{self, BufRead, Write}; + +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use serde_json::{Value, json}; + +use crate::backend::{self, SpeakOptions}; +use crate::clone; +use crate::db; + +const SERVER_NAME: &str = "vox"; +const SERVER_VERSION: &str = env!("CARGO_PKG_VERSION"); +const PROTOCOL_VERSION: &str = "2024-11-05"; + +const VOX_INSTRUCTIONS: &str = "\ +You have access to vox, a text-to-speech tool. Use it to give spoken feedback to the user.\n\ +\n\ +WHEN TO SPEAK (vox_speak):\n\ +- After completing a significant task (feature, bug fix, refactor): summarize what was done\n\ +- When the user asks you to explain something verbally\n\ +- For important warnings or status updates the user should hear\n\ +\n\ +WHEN NOT TO SPEAK:\n\ +- Trivial operations (formatting, single-line fixes, file reads)\n\ +- When the user is clearly reading the output already\n\ +- Rapid back-and-forth conversation\n\ +\n\ +GUIDELINES:\n\ +- Keep summaries under 2 sentences\n\ +- Use French by default (the user prefers it)\n\ +- Use vox_config_show to check the user's preferred voice/backend before first use\n\ +- For longer explanations, use vox_speak with a concise summary, not the full text"; + +#[derive(Deserialize)] +struct JsonRpcMessage { + id: Option, + method: Option, + params: Option, +} + +#[derive(Serialize)] +struct JsonRpcResponse { + jsonrpc: String, + id: Value, + #[serde(skip_serializing_if = "Option::is_none")] + result: Option, + #[serde(skip_serializing_if = "Option::is_none")] + error: Option, +} + +impl JsonRpcResponse { + fn ok(id: Value, result: Value) -> Self { + Self { + jsonrpc: "2.0".into(), + id, + result: Some(result), + error: None, + } + } + + fn err(id: Value, code: i64, message: String) -> Self { + Self { + jsonrpc: "2.0".into(), + id, + result: None, + error: Some(json!({"code": code, "message": message})), + } + } +} + +/// Run the vox MCP server on stdio. +pub fn run_server() -> Result<()> { + let stdin = io::stdin(); + let mut stdout = io::stdout(); + + for line in stdin.lock().lines() { + let line = match line { + Ok(l) => l, + Err(_) => break, + }; + let line = line.trim(); + if line.is_empty() { + continue; + } + + let msg: JsonRpcMessage = match serde_json::from_str(line) { + Ok(m) => m, + Err(e) => { + let resp = JsonRpcResponse::err(Value::Null, -32700, format!("parse error: {e}")); + write_response(&mut stdout, &resp)?; + continue; + } + }; + + let method = msg.method.as_deref().unwrap_or(""); + + // Notifications have no id + let id = match msg.id { + Some(id) => id, + None => continue, + }; + + let response = match method { + "initialize" => handle_initialize(id), + "ping" => JsonRpcResponse::ok(id, json!({})), + "tools/list" => handle_tools_list(id), + "tools/call" => handle_tools_call(id, &msg.params), + _ => JsonRpcResponse::err(id, -32601, format!("method not found: {method}")), + }; + + write_response(&mut stdout, &response)?; + } + + Ok(()) +} + +fn write_response(stdout: &mut io::Stdout, resp: &JsonRpcResponse) -> Result<()> { + let json = serde_json::to_string(resp)?; + writeln!(stdout, "{json}")?; + stdout.flush()?; + Ok(()) +} + +fn handle_initialize(id: Value) -> JsonRpcResponse { + JsonRpcResponse::ok( + id, + json!({ + "protocolVersion": PROTOCOL_VERSION, + "capabilities": { "tools": {} }, + "serverInfo": { + "name": SERVER_NAME, + "version": SERVER_VERSION + }, + "instructions": VOX_INSTRUCTIONS + }), + ) +} + +fn handle_tools_list(id: Value) -> JsonRpcResponse { + JsonRpcResponse::ok(id, json!({ "tools": tool_definitions() })) +} + +fn handle_tools_call(id: Value, params: &Option) -> JsonRpcResponse { + let params = match params { + Some(p) => p, + None => return JsonRpcResponse::err(id, -32602, "missing params".into()), + }; + + let tool_name = match params.get("name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return JsonRpcResponse::err(id, -32602, "missing tool name".into()), + }; + + let args = params.get("arguments").cloned().unwrap_or(json!({})); + let result = call_tool(tool_name, &args); + + JsonRpcResponse::ok(id, json!(result)) +} + +// --------------------------------------------------------------------------- +// Tool definitions +// --------------------------------------------------------------------------- + +fn tool_definitions() -> Value { + json!([ + { + "name": "vox_speak", + "description": "Read text aloud using text-to-speech. Supports multiple backends, voices, languages, and styles.", + "inputSchema": { + "type": "object", + "properties": { + "text": { + "type": "string", + "description": "Text to speak aloud" + }, + "voice": { + "type": "string", + "description": "Voice name or clone name (optional)" + }, + "lang": { + "type": "string", + "description": "Language code: en, fr, es, de, it, pt, zh, ja, ko, ru, ar, nl" + }, + "backend": { + "type": "string", + "description": "TTS backend: say (macOS), qwen (macOS, neural), qwen-native (cross-platform)" + }, + "style": { + "type": "string", + "description": "Intonation style: calm, energetic, warm, authoritative, cheerful, serious" + }, + "gender": { + "type": "string", + "description": "Gender hint: feminine, masculine" + }, + "rate": { + "type": "integer", + "description": "Speech rate in words per minute (say backend only)" + } + }, + "required": ["text"] + } + }, + { + "name": "vox_list_voices", + "description": "List available voices for a given TTS backend.", + "inputSchema": { + "type": "object", + "properties": { + "backend": { + "type": "string", + "description": "TTS backend: say, qwen, qwen-native (defaults to system default)" + } + } + } + }, + { + "name": "vox_clone_list", + "description": "List all saved voice clones.", + "inputSchema": { "type": "object", "properties": {} } + }, + { + "name": "vox_clone_add", + "description": "Add a voice clone from an audio file.", + "inputSchema": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name for the voice clone" + }, + "audio": { + "type": "string", + "description": "Path to the reference audio file" + }, + "text": { + "type": "string", + "description": "Transcription of the reference audio (improves quality)" + } + }, + "required": ["name", "audio"] + } + }, + { + "name": "vox_clone_remove", + "description": "Remove a voice clone by name.", + "inputSchema": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the voice clone to remove" + } + }, + "required": ["name"] + } + }, + { + "name": "vox_config_show", + "description": "Show current vox preferences (backend, voice, language, rate, style).", + "inputSchema": { "type": "object", "properties": {} } + }, + { + "name": "vox_config_set", + "description": "Set a vox preference.", + "inputSchema": { + "type": "object", + "properties": { + "key": { + "type": "string", + "description": "Preference key: backend, voice, lang, rate, gender, style, model" + }, + "value": { + "type": "string", + "description": "Preference value" + } + }, + "required": ["key", "value"] + } + }, + { + "name": "vox_stats", + "description": "Show vox usage statistics (total requests, characters spoken, recent history).", + "inputSchema": { "type": "object", "properties": {} } + } + ]) +} + +// --------------------------------------------------------------------------- +// Tool implementations +// --------------------------------------------------------------------------- + +#[derive(Serialize)] +struct ToolResult { + content: Vec, + #[serde(rename = "isError", skip_serializing_if = "std::ops::Not::not")] + is_error: bool, +} + +#[derive(Serialize)] +struct ToolContent { + #[serde(rename = "type")] + content_type: String, + text: String, +} + +fn tool_ok(text: String) -> ToolResult { + ToolResult { + content: vec![ToolContent { + content_type: "text".into(), + text, + }], + is_error: false, + } +} + +fn tool_err(text: String) -> ToolResult { + ToolResult { + content: vec![ToolContent { + content_type: "text".into(), + text, + }], + is_error: true, + } +} + +fn call_tool(name: &str, args: &Value) -> ToolResult { + match name { + "vox_speak" => tool_speak(args), + "vox_list_voices" => tool_list_voices(args), + "vox_clone_list" => tool_clone_list(), + "vox_clone_add" => tool_clone_add(args), + "vox_clone_remove" => tool_clone_remove(args), + "vox_config_show" => tool_config_show(), + "vox_config_set" => tool_config_set(args), + "vox_stats" => tool_stats(), + _ => tool_err(format!("unknown tool: {name}")), + } +} + +fn tool_speak(args: &Value) -> ToolResult { + let text = match args.get("text").and_then(|v| v.as_str()) { + Some(t) => t, + None => return tool_err("missing required parameter: text".into()), + }; + + let conn = match db::open() { + Ok(c) => c, + Err(e) => return tool_err(format!("database error: {e}")), + }; + let prefs = db::get_preferences(&conn).unwrap_or_default(); + + // Merge MCP args > DB preferences > defaults + let backend_name = args + .get("backend") + .and_then(|v| v.as_str()) + .map(String::from) + .or(prefs.backend) + .unwrap_or_else(|| crate::config::DEFAULT_BACKEND.to_string()); + + let mut voice = args + .get("voice") + .and_then(|v| v.as_str()) + .map(String::from) + .or(prefs.voice); + let lang = args + .get("lang") + .and_then(|v| v.as_str()) + .map(String::from) + .or(prefs.lang); + let rate = args + .get("rate") + .and_then(|v| v.as_u64()) + .map(|r| r as u32) + .or(prefs.rate); + let gender = args + .get("gender") + .and_then(|v| v.as_str()) + .map(String::from) + .or(prefs.gender); + let style = args + .get("style") + .and_then(|v| v.as_str()) + .map(String::from) + .or(prefs.style); + + // Resolve voice clone + let mut ref_audio = None; + let mut ref_text = None; + let mut effective_backend = backend_name; + + if let Some(ref voice_name) = voice + && let Ok(Some(vc)) = clone::resolve_voice(&conn, voice_name) + { + ref_audio = Some(vc.ref_audio); + ref_text = vc.ref_text; + if effective_backend != "qwen" && effective_backend != "qwen-native" { + #[cfg(target_os = "macos")] + { + effective_backend = "qwen".to_string(); + } + #[cfg(not(target_os = "macos"))] + { + effective_backend = "qwen-native".to_string(); + } + } + voice = None; + } + + let bk = match backend::get_backend(&effective_backend) { + Ok(b) => b, + Err(e) => return tool_err(format!("backend error: {e}")), + }; + + let opts = SpeakOptions { + voice, + lang: lang.clone(), + rate, + gender, + style, + ref_audio, + ref_text, + model: None, + }; + + let start = std::time::Instant::now(); + if let Err(e) = bk.speak(text, &opts) { + return tool_err(format!("speak error: {e}")); + } + let duration_ms = start.elapsed().as_millis() as u64; + + let _ = db::log_usage( + &conn, + &effective_backend, + opts.voice.as_deref(), + opts.lang.as_deref(), + text.len(), + Some(duration_ms), + ); + + tool_ok(format!( + "Spoken: \"{}\" ({duration_ms}ms, {effective_backend})", + if text.len() > 80 { + format!("{}...", &text[..77]) + } else { + text.to_string() + } + )) +} + +fn tool_list_voices(args: &Value) -> ToolResult { + let backend_name = args + .get("backend") + .and_then(|v| v.as_str()) + .unwrap_or(crate::config::DEFAULT_BACKEND); + + let bk = match backend::get_backend(backend_name) { + Ok(b) => b, + Err(e) => return tool_err(format!("backend error: {e}")), + }; + + match bk.list_voices() { + Ok(voices) => tool_ok(voices.join("\n")), + Err(e) => tool_err(format!("error listing voices: {e}")), + } +} + +fn tool_clone_list() -> ToolResult { + let conn = match db::open() { + Ok(c) => c, + Err(e) => return tool_err(format!("database error: {e}")), + }; + + match db::list_clones(&conn) { + Ok(clones) => { + if clones.is_empty() { + tool_ok("No voice clones.".into()) + } else { + let lines: Vec = clones + .iter() + .map(|c| { + let text_info = c + .ref_text + .as_deref() + .map(|t| format!(" (text: \"{t}\")")) + .unwrap_or_default(); + format!( + "{}: {}{} [{}]", + c.name, c.ref_audio, text_info, c.created_at + ) + }) + .collect(); + tool_ok(lines.join("\n")) + } + } + Err(e) => tool_err(format!("error: {e}")), + } +} + +fn tool_clone_add(args: &Value) -> ToolResult { + let name = match args.get("name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return tool_err("missing required parameter: name".into()), + }; + let audio = match args.get("audio").and_then(|v| v.as_str()) { + Some(a) => a, + None => return tool_err("missing required parameter: audio".into()), + }; + let text = args.get("text").and_then(|v| v.as_str()); + + if let Err(e) = clone::validate_audio(audio) { + return tool_err(format!("invalid audio: {e}")); + } + + let conn = match db::open() { + Ok(c) => c, + Err(e) => return tool_err(format!("database error: {e}")), + }; + + match db::add_clone(&conn, name, audio, text) { + Ok(_) => tool_ok(format!("Voice clone '{name}' added.")), + Err(e) => tool_err(format!("error: {e}")), + } +} + +fn tool_clone_remove(args: &Value) -> ToolResult { + let name = match args.get("name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return tool_err("missing required parameter: name".into()), + }; + + let conn = match db::open() { + Ok(c) => c, + Err(e) => return tool_err(format!("database error: {e}")), + }; + + match db::remove_clone(&conn, name) { + Ok(true) => tool_ok(format!("Voice clone '{name}' removed.")), + Ok(false) => tool_err(format!("Voice clone '{name}' not found.")), + Err(e) => tool_err(format!("error: {e}")), + } +} + +fn tool_config_show() -> ToolResult { + let conn = match db::open() { + Ok(c) => c, + Err(e) => return tool_err(format!("database error: {e}")), + }; + + match db::get_preferences(&conn) { + Ok(prefs) => { + let lines = [ + format!( + "backend: {}", + prefs.backend.as_deref().unwrap_or("(default)") + ), + format!("voice: {}", prefs.voice.as_deref().unwrap_or("(default)")), + format!("lang: {}", prefs.lang.as_deref().unwrap_or("(default)")), + format!( + "rate: {}", + prefs + .rate + .map(|r| r.to_string()) + .as_deref() + .unwrap_or("(default)") + ), + format!( + "gender: {}", + prefs.gender.as_deref().unwrap_or("(default)") + ), + format!("style: {}", prefs.style.as_deref().unwrap_or("(default)")), + format!("model: {}", prefs.model.as_deref().unwrap_or("(default)")), + ]; + tool_ok(lines.join("\n")) + } + Err(e) => tool_err(format!("error: {e}")), + } +} + +fn tool_config_set(args: &Value) -> ToolResult { + let key = match args.get("key").and_then(|v| v.as_str()) { + Some(k) => k, + None => return tool_err("missing required parameter: key".into()), + }; + let value = match args.get("value").and_then(|v| v.as_str()) { + Some(v) => v, + None => return tool_err("missing required parameter: value".into()), + }; + + let conn = match db::open() { + Ok(c) => c, + Err(e) => return tool_err(format!("database error: {e}")), + }; + + match db::set_preference(&conn, key, value) { + Ok(_) => tool_ok(format!("{key} = {value}")), + Err(e) => tool_err(format!("error: {e}")), + } +} + +fn tool_stats() -> ToolResult { + let conn = match db::open() { + Ok(c) => c, + Err(e) => return tool_err(format!("database error: {e}")), + }; + + let (count, total_chars) = match db::get_usage_summary(&conn) { + Ok(s) => s, + Err(e) => return tool_err(format!("error: {e}")), + }; + + let mut output = format!("Total requests: {count}\nTotal characters: {total_chars}"); + + if count > 0 + && let Ok(entries) = db::get_usage_stats(&conn) + { + output.push_str("\n\nRecent usage:"); + for e in entries.iter().take(10) { + let voice_str = e.voice.as_deref().unwrap_or("-"); + let lang_str = e.lang.as_deref().unwrap_or("-"); + let dur_str = e + .duration_ms + .map(|d| format!("{d}ms")) + .unwrap_or_else(|| "-".into()); + output.push_str(&format!( + "\n {} | {} | voice={} lang={} | {}chars | {}", + e.timestamp, e.backend, voice_str, lang_str, e.text_len, dur_str + )); + } + } + + tool_ok(output) +} diff --git a/tests/cli_test.rs b/tests/cli_test.rs index 47c30fd..338494b 100644 --- a/tests/cli_test.rs +++ b/tests/cli_test.rs @@ -311,7 +311,7 @@ fn test_init_idempotent() { .current_dir(dir.path()) .assert() .success() - .stdout(predicate::str::contains("Already configured")); + .stdout(predicate::str::contains("already configured")); } // --- Help subcommands ---