diff --git a/package-lock.json b/package-lock.json index 20c50c25e..ae282b413 100644 --- a/package-lock.json +++ b/package-lock.json @@ -70,6 +70,7 @@ "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -1848,6 +1849,7 @@ "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.8.tgz", "integrity": "sha512-3MbSL37jEchWZz2p2mjntRZtPt837ij10ApxKfgmXCTuHWagYg7iA5bqPw6C8BMPfwidlvfPI/fxOc42HLhcyg==", "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.2.2" } @@ -1908,6 +1910,7 @@ "integrity": "sha512-4Z+L8I2OqhZV8qA132M4wNL30ypZGYOQVBfMgxDH/K5UX0PNqTu1c6za9ST5r9+tavvHiTWmBnKzpCJ/GlVFtg==", "dev": true, "license": "BSD-2-Clause", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "7.18.0", "@typescript-eslint/types": "7.18.0", @@ -2114,7 +2117,8 @@ "version": "5.5.0", "resolved": "https://registry.npmjs.org/@xterm/xterm/-/xterm-5.5.0.tgz", "integrity": "sha512-hqJHYaQb5OptNunnyAnkHyM8aCjZ1MEIDTQu1iIbbTD/xops91NB5yq1ZK/dC2JDbVWtF23zUtl9JE2NqwT87A==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/acorn": { "version": "8.15.0", @@ -2122,6 +2126,7 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -2433,6 +2438,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", @@ -3114,6 +3120,7 @@ "deprecated": "This version is no longer supported. Please see https://eslint.org/version-support for other options.", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", @@ -5995,6 +6002,7 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -6118,6 +6126,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.3.tgz", "integrity": "sha512-Ku/hhYbVjOQnXDZFv2+RibmLFGwFdeeKHFcOTlrt7xplBnya5OGn/hIRDsqDiSUcfORsDC7MPxwork8jBwsIWA==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -6127,6 +6136,7 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.3.tgz", "integrity": "sha512-yELu4WmLPw5Mr/lmeEpox5rw3RETacE++JgHqQzd2dg+YbJuat3jH4ingc+WPZhxaoFzdv9y33G+F7Nl5O0GBg==", "license": "MIT", + "peer": true, "dependencies": { "scheduler": "^0.27.0" }, @@ -7078,6 +7088,7 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -7267,6 +7278,7 @@ "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.27.0", "fdir": "^6.5.0", diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index ddb75cff4..73f8756b8 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -566,13 +566,16 @@ dependencies = [ name = "codex-monitor" version = "0.1.0" dependencies = [ + "block2", "chrono", "cpal", "fix-path-env", "git2", "ignore", "libc", + "objc2", "objc2-app-kit", + "objc2-av-foundation", "objc2-foundation", "portable-pty", "reqwest", @@ -2633,6 +2636,38 @@ dependencies = [ "objc2-quartz-core", ] +[[package]] +name = "objc2-av-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "478ae33fcac9df0a18db8302387c666b8ef08a3e2d62b510ca4fc278a384b6c0" +dependencies = [ + "bitflags 2.10.0", + "block2", + "dispatch2", + "objc2", + "objc2-avf-audio", + "objc2-core-audio-types", + "objc2-core-foundation", + "objc2-core-graphics", + "objc2-core-image", + "objc2-core-video", + "objc2-foundation", + "objc2-image-io", + "objc2-media-toolbox", + "objc2-quartz-core", +] + +[[package]] +name = "objc2-avf-audio" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13a380031deed8e99db00065c45937da434ca987c034e13b87e4441f9e4090be" +dependencies = [ + "objc2", + "objc2-foundation", +] + [[package]] name = "objc2-cloud-kit" version = "0.3.2" @@ -2644,6 +2679,28 @@ dependencies = [ "objc2-foundation", ] +[[package]] +name = "objc2-core-audio" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1eebcea8b0dbff5f7c8504f3107c68fc061a3eb44932051c8cf8a68d969c3b2" +dependencies = [ + "dispatch2", + "objc2", + "objc2-core-audio-types", + "objc2-core-foundation", +] + +[[package]] +name = "objc2-core-audio-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a89f2ec274a0cf4a32642b2991e8b351a404d290da87bb6a9a9d8632490bd1c" +dependencies = [ + "bitflags 2.10.0", + "objc2", +] + [[package]] name = "objc2-core-data" version = "0.3.2" @@ -2689,6 +2746,21 @@ dependencies = [ "objc2-foundation", ] +[[package]] +name = "objc2-core-media" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05ec576860167a15dd9fce7fbee7512beb4e31f532159d3482d1f9c6caedf31d" +dependencies = [ + "bitflags 2.10.0", + "dispatch2", + "objc2", + "objc2-core-audio", + "objc2-core-audio-types", + "objc2-core-foundation", + "objc2-core-video", +] + [[package]] name = "objc2-core-text" version = "0.3.2" @@ -2742,6 +2814,17 @@ dependencies = [ "objc2-core-foundation", ] +[[package]] +name = "objc2-image-io" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32b0446e98cf4a784cc7a0177715ff317eeaa8463841c616cfc78aa4f953c4ea" +dependencies = [ + "objc2", + "objc2-core-foundation", + "objc2-core-graphics", +] + [[package]] name = "objc2-io-surface" version = "0.3.2" @@ -2763,6 +2846,18 @@ dependencies = [ "objc2-core-foundation", ] +[[package]] +name = "objc2-media-toolbox" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd9fdde720df3da7046bb9097811000c1e7ab5cd579fa89d96b27d56781fb30" +dependencies = [ + "objc2", + "objc2-core-audio-types", + "objc2-core-foundation", + "objc2-core-media", +] + [[package]] name = "objc2-osa-kit" version = "0.3.2" diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 35548c275..9ff5aeb3b 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -44,5 +44,8 @@ whisper-rs = "0.12" sha2 = "0.10" [target."cfg(target_os = \"macos\")".dependencies] +objc2 = "0.6" objc2-app-kit = { version = "0.3", features = ["NSAppearance", "NSResponder", "NSWindow"] } objc2-foundation = { version = "0.3", features = ["NSString"] } +objc2-av-foundation = { version = "0.3", features = ["AVCaptureDevice", "AVMediaFormat"] } +block2 = "0.6" diff --git a/src-tauri/src/dictation.rs b/src-tauri/src/dictation.rs index b93f8c6c6..55300b2f4 100644 --- a/src-tauri/src/dictation.rs +++ b/src-tauri/src/dictation.rs @@ -16,9 +16,80 @@ use sha2::{Digest, Sha256}; use whisper_rs::get_lang_id; use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters}; +#[cfg(target_os = "macos")] +use objc2_av_foundation::{AVAuthorizationStatus, AVCaptureDevice, AVMediaTypeAudio}; + const DEFAULT_MODEL_ID: &str = "base"; const MAX_CAPTURE_SECONDS: u32 = 120; +/// Checks microphone authorization status on macOS. +#[cfg(target_os = "macos")] +fn check_microphone_authorization() -> Result { + let media_type = unsafe { AVMediaTypeAudio.ok_or("Failed to get audio media type")? }; + let status = unsafe { AVCaptureDevice::authorizationStatusForMediaType(media_type) }; + Ok(status) +} + +/// Triggers the microphone permission request dialog on macOS. +/// This must be called from a thread (not across await points) due to RcBlock not being Send. +#[cfg(target_os = "macos")] +fn trigger_microphone_permission_request() -> Result<(), String> { + use block2::RcBlock; + use objc2::runtime::Bool; + + let media_type = unsafe { AVMediaTypeAudio.ok_or("Failed to get audio media type")? }; + + let block = RcBlock::new(|_granted: Bool| { + // Completion handler - we poll the status separately + }); + + unsafe { + AVCaptureDevice::requestAccessForMediaType_completionHandler(media_type, &block); + } + + Ok(()) +} + +/// Requests microphone permission on macOS. +/// Returns Ok(true) if permission was granted, Ok(false) if denied, +/// or Err with a message if the request failed. +#[cfg(target_os = "macos")] +async fn request_microphone_permission() -> Result { + let status = check_microphone_authorization()?; + + match status { + AVAuthorizationStatus::Authorized => Ok(true), + AVAuthorizationStatus::Denied | AVAuthorizationStatus::Restricted => Ok(false), + AVAuthorizationStatus::NotDetermined | _ => { + // Trigger the permission request (this shows the system dialog) + // We do this in a sync context to avoid RcBlock Send issues + trigger_microphone_permission_request()?; + + // Poll the authorization status until it changes from NotDetermined + let mut attempts = 0; + loop { + tokio::time::sleep(Duration::from_millis(100)).await; + let new_status = check_microphone_authorization()?; + if new_status != AVAuthorizationStatus::NotDetermined { + return Ok(new_status == AVAuthorizationStatus::Authorized); + } + attempts += 1; + if attempts > 600 { + // 60 seconds timeout + return Err("Microphone permission request timed out.".to_string()); + } + } + } + } +} + +#[cfg(not(target_os = "macos"))] +async fn request_microphone_permission() -> Result { + // On non-macOS platforms, assume permission is granted + // (Linux doesn't have the same permission model) + Ok(true) +} + struct DictationModelInfo { id: &'static str, filename: &'static str, @@ -678,6 +749,22 @@ pub(crate) async fn dictation_start( } } + // Request microphone permission before attempting to capture audio + match request_microphone_permission().await { + Ok(true) => { + // Permission granted, continue + } + Ok(false) => { + let message = "Microphone access was denied. Please grant microphone permission in System Settings > Privacy & Security > Microphone.".to_string(); + emit_event(&app, DictationEvent::Error { message: message.clone() }); + return Err(message); + } + Err(error) => { + emit_event(&app, DictationEvent::Error { message: error.clone() }); + return Err(error); + } + } + let audio = Arc::new(Mutex::new(Vec::new())); let (stop_tx, stop_rx) = mpsc::channel(); let stop_tx_thread = stop_tx.clone();