diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 83d68cf2d..b393d2f09 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,6 +12,8 @@ jobs: steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable + - name: Install CMake + run: command -v cmake >/dev/null 2>&1 || brew install cmake - name: Rust tests run: cargo test working-directory: src-tauri @@ -24,6 +26,8 @@ jobs: node-version: "20" cache: "npm" - uses: dtolnay/rust-toolchain@stable + - name: Install CMake + run: command -v cmake >/dev/null 2>&1 || brew install cmake - name: Install dependencies run: npm ci - name: Typecheck diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5f75cba42..b192abafa 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -38,6 +38,9 @@ jobs: with: workspaces: './src-tauri -> target' + - name: Install CMake + run: brew install cmake + - name: Install dependencies run: npm ci @@ -178,7 +181,7 @@ jobs: - name: install dependencies (linux only) run: | sudo apt-get update - sudo apt-get install -y libwebkit2gtk-4.1-dev libgtk-3-dev libayatana-appindicator3-dev librsvg2-dev patchelf libfuse2 xdg-utils + sudo apt-get install -y cmake libwebkit2gtk-4.1-dev libgtk-3-dev libayatana-appindicator3-dev librsvg2-dev patchelf libfuse2 xdg-utils - name: setup node uses: actions/setup-node@v4 diff --git a/README.md b/README.md index 76242f3d8..4105d0c71 100644 --- a/README.md +++ b/README.md @@ -28,11 +28,17 @@ CodexMonitor is a macOS Tauri app for orchestrating multiple Codex agents across - Node.js + npm - Rust toolchain (stable) +- CMake (required to build native Whisper bindings) - Codex installed on your system and available as `codex` in `PATH` - Git CLI (used for worktree operations) - GitHub CLI (`gh`) for the Issues panel (optional) If the `codex` binary is not in `PATH`, update the backend to pass a custom path per workspace. +If you hit native build errors, run: + +```bash +npm run doctor +``` ## Getting Started diff --git a/package.json b/package.json index 316168bf1..98b5dbebe 100644 --- a/package.json +++ b/package.json @@ -7,9 +7,13 @@ "dev": "vite", "build": "tsc && vite build", "build:appimage": "NO_STRIP=1 tauri build --bundles appimage", + "doctor": "sh scripts/doctor.sh", + "doctor:strict": "sh scripts/doctor.sh --strict", "typecheck": "tsc --noEmit", "preview": "vite preview", - "tauri": "tauri" + "tauri": "tauri", + "tauri:dev": "npm run doctor:strict && tauri dev", + "tauri:build": "npm run doctor:strict && tauri build" }, "dependencies": { "@tauri-apps/api": "^2", diff --git a/scripts/doctor.sh b/scripts/doctor.sh new file mode 100644 index 000000000..f997f6451 --- /dev/null +++ b/scripts/doctor.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env sh +set -u + +STRICT=0 +if [ "${1:-}" = "--strict" ]; then + STRICT=1 +fi + +missing="" +if ! command -v cmake >/dev/null 2>&1; then + missing="cmake" +fi + +if [ -z "$missing" ]; then + echo "Doctor: OK" + exit 0 +fi + +echo "Doctor: missing dependencies: $missing" + +case "$(uname -s)" in + Darwin) + echo "Install: brew install cmake" + ;; + Linux) + echo "Ubuntu/Debian: sudo apt-get install cmake" + echo "Fedora: sudo dnf install cmake" + echo "Arch: sudo pacman -S cmake" + ;; + MINGW*|MSYS*|CYGWIN*) + echo "Install: choco install cmake" + echo "Or download from: https://cmake.org/download/" + ;; + *) + echo "Install CMake from: https://cmake.org/download/" + ;; +esac + +if [ "$STRICT" -eq 1 ]; then + exit 1 +fi + +exit 0 diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 7e43f59ad..6f2831d07 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -32,6 +32,28 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "alsa" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed7572b7ba83a31e20d1b48970ee402d2e3e0537dcfe0a3ff4d6eb7508617d43" +dependencies = [ + "alsa-sys", + "bitflags 2.10.0", + "cfg-if", + "libc", +] + +[[package]] +name = "alsa-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db8fee663d06c4e303404ef5f40488a53e062f89ba8bfed81f42325aafad1527" +dependencies = [ + "libc", + "pkg-config", +] + [[package]] name = "android_system_properties" version = "0.1.5" @@ -107,7 +129,7 @@ dependencies = [ "futures-lite", "parking", "polling", - "rustix", + "rustix 1.1.3", "slab", "windows-sys 0.61.2", ] @@ -138,7 +160,7 @@ dependencies = [ "cfg-if", "event-listener", "futures-lite", - "rustix", + "rustix 1.1.3", ] [[package]] @@ -164,7 +186,7 @@ dependencies = [ "cfg-if", "futures-core", "futures-io", - "rustix", + "rustix 1.1.3", "signal-hook-registry", "slab", "windows-sys 0.61.2", @@ -234,6 +256,47 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bindgen" +version = "0.69.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +dependencies = [ + "bitflags 2.10.0", + "cexpr", + "clang-sys", + "itertools 0.12.1", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 1.1.0", + "shlex", + "syn 2.0.114", + "which", +] + +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags 2.10.0", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "proc-macro2", + "quote", + "regex", + "rustc-hash 2.1.1", + "shlex", + "syn 2.0.114", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -423,6 +486,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfb" version = "0.7.3" @@ -468,16 +540,39 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading 0.8.9", +] + +[[package]] +name = "cmake" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +dependencies = [ + "cc", +] + [[package]] name = "codex-monitor" version = "0.1.0" dependencies = [ + "cpal", "fix-path-env", "git2", "ignore", "portable-pty", + "reqwest", "serde", "serde_json", + "sha2", "tauri", "tauri-build", "tauri-plugin-dialog", @@ -486,6 +581,7 @@ dependencies = [ "tauri-plugin-updater", "tokio", "uuid", + "whisper-rs", ] [[package]] @@ -563,6 +659,49 @@ dependencies = [ "libc", ] +[[package]] +name = "coreaudio-rs" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "321077172d79c662f64f5071a03120748d5bb652f5231570141be24cfcd2bace" +dependencies = [ + "bitflags 1.3.2", + "core-foundation-sys", + "coreaudio-sys", +] + +[[package]] +name = "coreaudio-sys" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ceec7a6067e62d6f931a2baf6f3a751f4a892595bcec1461a3c94ef9949864b6" +dependencies = [ + "bindgen 0.72.1", +] + +[[package]] +name = "cpal" +version = "0.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "873dab07c8f743075e57f524c583985fbaf745602acbe916a01539364369a779" +dependencies = [ + "alsa", + "core-foundation-sys", + "coreaudio-rs", + "dasp_sample", + "jni", + "js-sys", + "libc", + "mach2", + "ndk 0.8.0", + "ndk-context", + "oboe", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "windows 0.54.0", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -697,6 +836,12 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "dasp_sample" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c87e182de0887fd5361989c677c4e8f5000cd9491d6d563161a8f3a5519fc7f" + [[package]] name = "deranged" version = "0.5.5" @@ -856,6 +1001,12 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "embed-resource" version = "3.0.6" @@ -1067,6 +1218,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "futf" version = "0.1.5" @@ -1886,6 +2043,24 @@ dependencies = [ "once_cell", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.17" @@ -2008,6 +2183,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "libappindicator" version = "0.9.0" @@ -2028,7 +2209,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e9ec52138abedcc58dc17a7c6c0c00a2bdb4f3427c7f63fa97fd0d859155caf" dependencies = [ "gtk-sys", - "libloading", + "libloading 0.7.4", "once_cell", ] @@ -2062,6 +2243,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link 0.2.1", +] + [[package]] name = "libredox" version = "0.1.12" @@ -2099,6 +2290,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -2138,6 +2335,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" +[[package]] +name = "mach2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d640282b302c0bb0a2a8e0233ead9035e3bed871f0b7e81fe4a1ec829765db44" +dependencies = [ + "libc", +] + [[package]] name = "markup5ever" version = "0.14.1" @@ -2199,6 +2405,12 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "minisign-verify" version = "0.2.4" @@ -2247,6 +2459,20 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "ndk" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2076a31b7010b17a38c01907c45b945e8f11495ee4dd588309718901b1f7a5b7" +dependencies = [ + "bitflags 2.10.0", + "jni-sys", + "log", + "ndk-sys 0.5.0+25.2.9519653", + "num_enum", + "thiserror 1.0.69", +] + [[package]] name = "ndk" version = "0.9.0" @@ -2256,7 +2482,7 @@ dependencies = [ "bitflags 2.10.0", "jni-sys", "log", - "ndk-sys", + "ndk-sys 0.6.0+11769913", "num_enum", "raw-window-handle", "thiserror 1.0.69", @@ -2268,6 +2494,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b" +[[package]] +name = "ndk-sys" +version = "0.5.0+25.2.9519653" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c196769dd60fd4f363e11d948139556a344e79d451aeb2fa2fd040738ef7691" +dependencies = [ + "jni-sys", +] + [[package]] name = "ndk-sys" version = "0.6.0+11769913" @@ -2303,12 +2538,33 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num-conv" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-derive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -2564,6 +2820,29 @@ dependencies = [ "objc2-security", ] +[[package]] +name = "oboe" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8b61bebd49e5d43f5f8cc7ee2891c16e0f41ec7954d36bcb6c14c5e0de867fb" +dependencies = [ + "jni", + "ndk 0.8.0", + "ndk-context", + "num-derive", + "num-traits", + "oboe-sys", +] + +[[package]] +name = "oboe-sys" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8bb09a4a2b1d668170cfe0a7d5bc103f8999fb316c98099b6a9939c9f2e79d" +dependencies = [ + "cc", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -2895,7 +3174,7 @@ dependencies = [ "concurrent-queue", "hermit-abi", "pin-project-lite", - "rustix", + "rustix 1.1.3", "windows-sys 0.61.2", ] @@ -2950,6 +3229,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.114", +] + [[package]] name = "proc-macro-crate" version = "1.3.1" @@ -3038,7 +3327,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "socket2", "thiserror 2.0.17", @@ -3058,7 +3347,7 @@ dependencies = [ "lru-slab", "rand 0.9.2", "ring", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "rustls-pki-types", "slab", @@ -3370,6 +3659,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc-hash" version = "2.1.1" @@ -3385,6 +3680,19 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags 2.10.0", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + [[package]] name = "rustix" version = "1.1.3" @@ -3394,7 +3702,7 @@ dependencies = [ "bitflags 2.10.0", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.11.0", "windows-sys 0.61.2", ] @@ -3842,7 +4150,7 @@ checksum = "aac18da81ebbf05109ab275b157c22a653bb3c12cf884450179942f81bcbf6c3" dependencies = [ "bytemuck", "js-sys", - "ndk", + "ndk 0.9.0", "objc2", "objc2-core-foundation", "objc2-core-graphics", @@ -4021,9 +4329,9 @@ dependencies = [ "lazy_static", "libc", "log", - "ndk", + "ndk 0.9.0", "ndk-context", - "ndk-sys", + "ndk-sys 0.6.0+11769913", "objc2", "objc2-app-kit", "objc2-foundation", @@ -4034,7 +4342,7 @@ dependencies = [ "tao-macros", "unicode-segmentation", "url", - "windows", + "windows 0.61.3", "windows-core 0.61.2", "windows-version", "x11-dl", @@ -4116,7 +4424,7 @@ dependencies = [ "webkit2gtk", "webview2-com", "window-vibrancy", - "windows", + "windows 0.61.3", ] [[package]] @@ -4257,7 +4565,7 @@ dependencies = [ "tauri-plugin", "thiserror 2.0.17", "url", - "windows", + "windows 0.61.3", "zbus", ] @@ -4325,7 +4633,7 @@ dependencies = [ "url", "webkit2gtk", "webview2-com", - "windows", + "windows 0.61.3", ] [[package]] @@ -4351,7 +4659,7 @@ dependencies = [ "url", "webkit2gtk", "webview2-com", - "windows", + "windows 0.61.3", "wry", ] @@ -4413,7 +4721,7 @@ dependencies = [ "fastrand", "getrandom 0.3.4", "once_cell", - "rustix", + "rustix 1.1.3", "windows-sys 0.61.2", ] @@ -5141,7 +5449,7 @@ checksum = "7130243a7a5b33c54a444e54842e6a9e133de08b5ad7b5861cd8ed9a6a5bc96a" dependencies = [ "webview2-com-macros", "webview2-com-sys", - "windows", + "windows 0.61.3", "windows-core 0.61.2", "windows-implement", "windows-interface", @@ -5165,10 +5473,43 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "381336cfffd772377d291702245447a5251a2ffa5bad679c99e61bc48bacbf9c" dependencies = [ "thiserror 2.0.17", - "windows", + "windows 0.61.3", "windows-core 0.61.2", ] +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.44", +] + +[[package]] +name = "whisper-rs" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c597ac8a9d5c4719fee232abc871da184ea50a4fea38d2d00348fd95072b2b0" +dependencies = [ + "whisper-rs-sys", +] + +[[package]] +name = "whisper-rs-sys" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d22f00ed0995463eecc34ef89905845f6bf6fd37ea70789fed180520050da8f8" +dependencies = [ + "bindgen 0.69.5", + "cfg-if", + "cmake", + "fs_extra", +] + [[package]] name = "winapi" version = "0.3.9" @@ -5215,6 +5556,16 @@ dependencies = [ "windows-version", ] +[[package]] +name = "windows" +version = "0.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49" +dependencies = [ + "windows-core 0.54.0", + "windows-targets 0.52.6", +] + [[package]] name = "windows" version = "0.61.3" @@ -5237,6 +5588,16 @@ dependencies = [ "windows-core 0.61.2", ] +[[package]] +name = "windows-core" +version = "0.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12661b9c89351d684a50a8a643ce5f608e20243b9fb84687800163429f161d65" +dependencies = [ + "windows-result 0.1.2", + "windows-targets 0.52.6", +] + [[package]] name = "windows-core" version = "0.61.2" @@ -5318,6 +5679,15 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-result" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-result" version = "0.3.4" @@ -5673,7 +6043,7 @@ dependencies = [ "jni", "kuchikiki", "libc", - "ndk", + "ndk 0.9.0", "objc2", "objc2-app-kit", "objc2-core-foundation", @@ -5691,7 +6061,7 @@ dependencies = [ "webkit2gtk", "webkit2gtk-sys", "webview2-com", - "windows", + "windows 0.61.3", "windows-core 0.61.2", "windows-version", "x11-dl", @@ -5725,7 +6095,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" dependencies = [ "libc", - "rustix", + "rustix 1.1.3", ] [[package]] @@ -5773,7 +6143,7 @@ dependencies = [ "hex", "libc", "ordered-stream", - "rustix", + "rustix 1.1.3", "serde", "serde_repr", "tracing", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index bbf5c8741..f00e6e18e 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -24,13 +24,17 @@ tauri-plugin-opener = "2" tauri-plugin-process = "2" serde = { version = "1", features = ["derive"] } serde_json = "1" -tokio = { version = "1", features = ["io-util", "net", "process", "rt", "sync", "time"] } +tokio = { version = "1", features = ["fs", "net", "io-util", "process", "rt", "sync", "time"] } uuid = { version = "1", features = ["v4"] } tauri-plugin-dialog = "2" git2 = "0.20.3" fix-path-env = { git = "https://github.com/tauri-apps/fix-path-env-rs" } ignore = "0.4.25" portable-pty = "0.8" +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "stream"] } +cpal = "0.15" +whisper-rs = "0.12" +sha2 = "0.10" [target."cfg(not(any(target_os = \"android\", target_os = \"ios\")))".dependencies] tauri-plugin-updater = "2" diff --git a/src-tauri/Info.plist b/src-tauri/Info.plist new file mode 100644 index 000000000..9d3a6ffdd --- /dev/null +++ b/src-tauri/Info.plist @@ -0,0 +1,8 @@ + + + + + NSMicrophoneUsageDescription + Allow access to the microphone for dictation. + + diff --git a/src-tauri/src/dictation.rs b/src-tauri/src/dictation.rs new file mode 100644 index 000000000..b93f8c6c6 --- /dev/null +++ b/src-tauri/src/dictation.rs @@ -0,0 +1,1328 @@ +use std::path::PathBuf; +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use std::sync::{mpsc, Arc, Mutex}; +use std::time::{Duration, Instant}; + +use serde::Serialize; +use tauri::{AppHandle, Emitter, Manager, State}; +use tokio::io::AsyncWriteExt; +use tokio::sync::oneshot; + +use crate::state::AppState; + +use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; +use cpal::{FromSample, Sample, SampleFormat, SizedSample}; +use sha2::{Digest, Sha256}; +use whisper_rs::get_lang_id; +use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters}; + +const DEFAULT_MODEL_ID: &str = "base"; +const MAX_CAPTURE_SECONDS: u32 = 120; + +struct DictationModelInfo { + id: &'static str, + filename: &'static str, + url: &'static str, + sha256: &'static str, +} + +const MODEL_CATALOG: &[DictationModelInfo] = &[ + DictationModelInfo { + id: "tiny", + filename: "ggml-tiny.bin", + url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin", + sha256: "be07e048e1e599ad46341c8d2a135645097a538221678b7acdd1b1919c6e1b21", + }, + DictationModelInfo { + id: "base", + filename: "ggml-base.bin", + url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin", + sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe", + }, + DictationModelInfo { + id: "small", + filename: "ggml-small.bin", + url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin", + sha256: "1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b", + }, + DictationModelInfo { + id: "medium", + filename: "ggml-medium.bin", + url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin", + sha256: "6c14d5adee5f86394037b4e4e8b59f1673b6cee10e3cf0b11bbdbee79c156208", + }, + DictationModelInfo { + id: "large-v3", + filename: "ggml-large-v3.bin", + url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin", + sha256: "64d182b440b98d5203c4f9bd541544d84c605196c4f7b845dfa11fb23594d1e2", + }, +]; + +#[derive(Debug, Serialize, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub(crate) enum DictationModelState { + Missing, + Downloading, + Ready, + Error, +} + +#[derive(Debug, Serialize, Clone)] +pub(crate) struct DictationDownloadProgress { + #[serde(rename = "downloadedBytes")] + pub(crate) downloaded_bytes: u64, + #[serde(rename = "totalBytes")] + pub(crate) total_bytes: Option, +} + +#[derive(Debug, Serialize, Clone)] +pub(crate) struct DictationModelStatus { + pub(crate) state: DictationModelState, + #[serde(rename = "modelId")] + pub(crate) model_id: String, + pub(crate) progress: Option, + pub(crate) error: Option, + pub(crate) path: Option, +} + +#[derive(Debug, Serialize, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub(crate) enum DictationSessionState { + Idle, + Listening, + Processing, +} + +#[derive(Debug, Serialize, Clone)] +#[serde(tag = "type", rename_all = "snake_case")] +pub(crate) enum DictationEvent { + State { state: DictationSessionState }, + Level { value: f32 }, + Transcript { text: String }, + Error { message: String }, + Canceled { message: String }, +} + +pub(crate) struct DictationSessionHandle { + pub(crate) stop: mpsc::Sender<()>, + pub(crate) stopped: oneshot::Receiver<()>, + pub(crate) audio: Arc>>, + pub(crate) sample_rate: u32, + pub(crate) model_id: String, + pub(crate) preferred_language: Option, +} + +pub(crate) struct DictationState { + pub(crate) model_status: DictationModelStatus, + pub(crate) download_cancel: Option>, + pub(crate) download_task: Option>, + pub(crate) session_state: DictationSessionState, + pub(crate) session: Option, + pub(crate) processing_cancel: Option>, + pub(crate) cached_context: Option, +} + +pub(crate) struct CachedWhisperContext { + pub(crate) model_id: String, + pub(crate) context: Arc, +} + +impl Default for DictationState { + fn default() -> Self { + Self { + model_status: missing_status(DEFAULT_MODEL_ID), + download_cancel: None, + download_task: None, + session_state: DictationSessionState::Idle, + session: None, + processing_cancel: None, + cached_context: None, + } + } +} + +fn model_dir(app: &AppHandle) -> PathBuf { + app.path() + .app_data_dir() + .unwrap_or_else(|_| std::env::current_dir().unwrap_or_else(|_| ".".into())) + .join("models") + .join("whisper") +} + +fn model_info(model_id: &str) -> Option<&'static DictationModelInfo> { + MODEL_CATALOG.iter().find(|info| info.id == model_id) +} + +fn model_path(app: &AppHandle, model_id: &str) -> Result { + let info = model_info(model_id) + .ok_or_else(|| format!("Unknown dictation model: {model_id}"))?; + Ok(model_dir(app).join(info.filename)) +} + +fn model_temp_path(app: &AppHandle, model_id: &str) -> Result { + let info = model_info(model_id) + .ok_or_else(|| format!("Unknown dictation model: {model_id}"))?; + Ok(model_dir(app).join(format!("{}.partial", info.filename))) +} + +fn missing_status(model_id: &str) -> DictationModelStatus { + DictationModelStatus { + state: DictationModelState::Missing, + model_id: model_id.to_string(), + progress: None, + error: None, + path: None, + } +} + +fn ready_status(model_id: &str, path: &PathBuf) -> DictationModelStatus { + DictationModelStatus { + state: DictationModelState::Ready, + model_id: model_id.to_string(), + progress: None, + error: None, + path: Some(path.to_string_lossy().to_string()), + } +} + +fn emit_status(app: &AppHandle, status: &DictationModelStatus) { + let _ = app.emit("dictation-download", status); +} + +fn emit_event(app: &AppHandle, event: DictationEvent) { + let _ = app.emit("dictation-event", event); +} + +async fn clear_processing_cancel( + app: &AppHandle, + cancel_flag: &Arc, +) -> bool { + let state_handle = app.state::(); + let mut dictation = state_handle.dictation.lock().await; + if dictation + .processing_cancel + .as_ref() + .map_or(false, |flag| Arc::ptr_eq(flag, cancel_flag)) + { + dictation.processing_cancel = None; + return true; + } + false +} + +async fn update_status( + app: &AppHandle, + state: &State<'_, AppState>, + status: DictationModelStatus, +) { + { + let mut dictation = state.dictation.lock().await; + dictation.model_status = status.clone(); + } + emit_status(app, &status); +} + +async fn clear_download_state(state: &State<'_, AppState>) { + let mut dictation = state.dictation.lock().await; + dictation.download_cancel = None; + dictation.download_task = None; +} + +async fn resolve_model_id( + state: &State<'_, AppState>, + model_id: Option, +) -> String { + let candidate = if let Some(model_id) = model_id { + model_id + } else { + let settings = state.app_settings.lock().await; + if settings.dictation_model_id.trim().is_empty() { + DEFAULT_MODEL_ID.to_string() + } else { + settings.dictation_model_id.clone() + } + }; + if model_info(&candidate).is_some() { + candidate + } else { + DEFAULT_MODEL_ID.to_string() + } +} + +async fn refresh_status( + app: &AppHandle, + state: &State<'_, AppState>, + model_id: &str, +) -> DictationModelStatus { + let mut dictation = state.dictation.lock().await; + if dictation.model_status.state == DictationModelState::Downloading + && dictation.model_status.model_id == model_id + { + return dictation.model_status.clone(); + } + + let path = match model_path(app, model_id) { + Ok(path) => path, + Err(error) => { + dictation.model_status = DictationModelStatus { + state: DictationModelState::Error, + model_id: model_id.to_string(), + progress: None, + error: Some(error), + path: None, + }; + return dictation.model_status.clone(); + } + }; + + if path.exists() { + dictation.model_status = ready_status(model_id, &path); + } else { + dictation.model_status = missing_status(model_id); + } + dictation.model_status.clone() +} + +#[tauri::command] +pub(crate) async fn dictation_model_status( + app: AppHandle, + state: State<'_, AppState>, + model_id: Option, +) -> Result { + let model_id = resolve_model_id(&state, model_id).await; + Ok(refresh_status(&app, &state, &model_id).await) +} + +#[tauri::command] +pub(crate) async fn dictation_download_model( + app: AppHandle, + state: State<'_, AppState>, + model_id: Option, +) -> Result { + let model_id = resolve_model_id(&state, model_id).await; + let current = refresh_status(&app, &state, &model_id).await; + if current.state == DictationModelState::Ready { + return Ok(current); + } + if current.state == DictationModelState::Downloading + && current.model_id == model_id + { + return Ok(current); + } + + let cancel_flag = Arc::new(AtomicBool::new(false)); + { + let mut dictation = state.dictation.lock().await; + if dictation.model_status.state == DictationModelState::Downloading + && dictation.model_status.model_id != model_id + { + if let Some(flag) = dictation.download_cancel.take() { + flag.store(true, Ordering::SeqCst); + } + if let Some(task) = dictation.download_task.take() { + task.abort(); + } + } + dictation.download_cancel = Some(cancel_flag.clone()); + dictation.model_status = DictationModelStatus { + state: DictationModelState::Downloading, + model_id: model_id.clone(), + progress: Some(DictationDownloadProgress { + downloaded_bytes: 0, + total_bytes: None, + }), + error: None, + path: None, + }; + } + emit_status(&app, &refresh_status(&app, &state, &model_id).await); + + let app_handle = app.clone(); + let model_id_clone = model_id.clone(); + let task = tokio::spawn(async move { + let state = app_handle.state::(); + let model_dir = model_dir(&app_handle); + let model_path = match model_path(&app_handle, &model_id_clone) { + Ok(path) => path, + Err(error) => { + let status = DictationModelStatus { + state: DictationModelState::Error, + model_id: model_id_clone.clone(), + progress: None, + error: Some(error), + path: None, + }; + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + return; + } + }; + let temp_path = match model_temp_path(&app_handle, &model_id_clone) { + Ok(path) => path, + Err(error) => { + let status = DictationModelStatus { + state: DictationModelState::Error, + model_id: model_id_clone.clone(), + progress: None, + error: Some(error), + path: None, + }; + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + return; + } + }; + + if let Err(error) = tokio::fs::create_dir_all(&model_dir).await { + let status = DictationModelStatus { + state: DictationModelState::Error, + model_id: model_id_clone.clone(), + progress: None, + error: Some(format!("Failed to create model directory: {error}")), + path: None, + }; + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + return; + } + + let (url, expected_sha) = match model_info(&model_id_clone) { + Some(info) => (info.url, info.sha256), + None => { + let status = DictationModelStatus { + state: DictationModelState::Error, + model_id: model_id_clone.clone(), + progress: None, + error: Some("Unknown dictation model.".to_string()), + path: None, + }; + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + return; + } + }; + let client = match reqwest::Client::builder() + .connect_timeout(Duration::from_secs(10)) + .timeout(Duration::from_secs(30 * 60)) + .build() + { + Ok(client) => client, + Err(error) => { + let status = DictationModelStatus { + state: DictationModelState::Error, + model_id: model_id_clone.clone(), + progress: None, + error: Some(format!("Failed to configure download client: {error}")), + path: None, + }; + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + return; + } + }; + let response = match client.get(url).send().await { + Ok(response) => response, + Err(error) => { + let status = DictationModelStatus { + state: DictationModelState::Error, + model_id: model_id_clone.clone(), + progress: None, + error: Some(format!("Failed to download model: {error}")), + path: None, + }; + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + return; + } + }; + let response = match response.error_for_status() { + Ok(response) => response, + Err(error) => { + let status = DictationModelStatus { + state: DictationModelState::Error, + model_id: model_id_clone.clone(), + progress: None, + error: Some(format!("Model download failed: {error}")), + path: None, + }; + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + return; + } + }; + + let total = response.content_length(); + let mut downloaded = 0u64; + let mut file = match tokio::fs::File::create(&temp_path).await { + Ok(file) => file, + Err(error) => { + let status = DictationModelStatus { + state: DictationModelState::Error, + model_id: model_id_clone.clone(), + progress: None, + error: Some(format!("Failed to write model: {error}")), + path: None, + }; + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + return; + } + }; + + let mut response = response; + let mut hasher = Sha256::new(); + let mut last_progress = Instant::now(); + loop { + let cancel = { + let dictation = state.dictation.lock().await; + dictation + .download_cancel + .as_ref() + .map(|flag| flag.load(Ordering::Relaxed)) + .unwrap_or(false) + }; + if cancel { + let _ = tokio::fs::remove_file(&temp_path).await; + let status = missing_status(&model_id_clone); + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + return; + } + + let chunk = match response.chunk().await { + Ok(Some(chunk)) => chunk, + Ok(None) => break, + Err(error) => { + let _ = tokio::fs::remove_file(&temp_path).await; + let status = DictationModelStatus { + state: DictationModelState::Error, + model_id: model_id_clone.clone(), + progress: None, + error: Some(format!("Model download failed: {error}")), + path: None, + }; + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + return; + } + }; + + if let Err(error) = file.write_all(&chunk).await { + let _ = tokio::fs::remove_file(&temp_path).await; + let status = DictationModelStatus { + state: DictationModelState::Error, + model_id: model_id_clone.clone(), + progress: None, + error: Some(format!("Failed to write model: {error}")), + path: None, + }; + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + return; + } + downloaded += chunk.len() as u64; + hasher.update(&chunk); + + if last_progress.elapsed() >= Duration::from_millis(150) { + last_progress = Instant::now(); + let status = DictationModelStatus { + state: DictationModelState::Downloading, + model_id: model_id_clone.clone(), + progress: Some(DictationDownloadProgress { + downloaded_bytes: downloaded, + total_bytes: total, + }), + error: None, + path: None, + }; + update_status(&app_handle, &state, status).await; + } + } + + let hash = hasher.finalize(); + let mut hash_hex = String::with_capacity(64); + for byte in hash { + use std::fmt::Write; + let _ = write!(&mut hash_hex, "{:02x}", byte); + } + if hash_hex != expected_sha { + let _ = tokio::fs::remove_file(&temp_path).await; + let status = DictationModelStatus { + state: DictationModelState::Error, + model_id: model_id_clone.clone(), + progress: None, + error: Some("Model hash mismatch; download canceled.".to_string()), + path: None, + }; + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + return; + } + + if let Err(error) = file.flush().await { + let _ = tokio::fs::remove_file(&temp_path).await; + let status = DictationModelStatus { + state: DictationModelState::Error, + model_id: model_id_clone.clone(), + progress: None, + error: Some(format!("Failed to finalize model: {error}")), + path: None, + }; + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + return; + } + + if let Err(error) = tokio::fs::rename(&temp_path, &model_path).await { + let _ = tokio::fs::remove_file(&temp_path).await; + let status = DictationModelStatus { + state: DictationModelState::Error, + model_id: model_id_clone.clone(), + progress: None, + error: Some(format!("Failed to move model into place: {error}")), + path: None, + }; + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + return; + } + + let status = ready_status(&model_id_clone, &model_path); + update_status(&app_handle, &state, status).await; + clear_download_state(&state).await; + }); + + { + let mut dictation = state.dictation.lock().await; + dictation.download_task = Some(task); + } + + Ok(refresh_status(&app, &state, &model_id).await) +} + +#[tauri::command] +pub(crate) async fn dictation_cancel_download( + app: AppHandle, + state: State<'_, AppState>, + model_id: Option, +) -> Result { + let model_id = resolve_model_id(&state, model_id).await; + { + let mut dictation = state.dictation.lock().await; + if let Some(flag) = dictation.download_cancel.take() { + flag.store(true, Ordering::Relaxed); + } + if let Some(task) = dictation.download_task.take() { + task.abort(); + } + dictation.model_status = missing_status(&model_id); + } + if let Ok(temp_path) = model_temp_path(&app, &model_id) { + let _ = tokio::fs::remove_file(&temp_path).await; + } + let status = refresh_status(&app, &state, &model_id).await; + emit_status(&app, &status); + Ok(status) +} + +#[tauri::command] +pub(crate) async fn dictation_remove_model( + app: AppHandle, + state: State<'_, AppState>, + model_id: Option, +) -> Result { + let model_id = resolve_model_id(&state, model_id).await; + let model_path = model_path(&app, &model_id)?; + if model_path.exists() { + tokio::fs::remove_file(&model_path) + .await + .map_err(|error| format!("Failed to remove model: {error}"))?; + } + { + let mut dictation = state.dictation.lock().await; + if dictation + .cached_context + .as_ref() + .map(|cached| cached.model_id.as_str() == model_id) + .unwrap_or(false) + { + dictation.cached_context = None; + } + dictation.model_status = missing_status(&model_id); + } + let status = refresh_status(&app, &state, &model_id).await; + emit_status(&app, &status); + Ok(status) +} + +#[tauri::command] +pub(crate) async fn dictation_start( + preferred_language: Option, + app: AppHandle, + state: State<'_, AppState>, +) -> Result { + let model_id = resolve_model_id(&state, None).await; + let model_status = refresh_status(&app, &state, &model_id).await; + if model_status.state != DictationModelState::Ready { + let message = "Dictation model is not downloaded yet.".to_string(); + emit_event(&app, DictationEvent::Error { message: message.clone() }); + return Err(message); + } + { + let dictation = state.dictation.lock().await; + if dictation.session_state != DictationSessionState::Idle { + let message = "Dictation is already active.".to_string(); + emit_event(&app, DictationEvent::Error { message: message.clone() }); + return Err(message); + } + } + + let audio = Arc::new(Mutex::new(Vec::new())); + let (stop_tx, stop_rx) = mpsc::channel(); + let stop_tx_thread = stop_tx.clone(); + let (ready_tx, ready_rx) = oneshot::channel(); + let (stopped_tx, stopped_rx) = oneshot::channel(); + let app_handle = app.clone(); + let preferred_clone = preferred_language.clone(); + let audio_capture = audio.clone(); + + std::thread::spawn(move || { + start_capture_thread( + app_handle, + audio_capture, + stop_rx, + stop_tx_thread, + stopped_tx, + ready_tx, + ); + }); + + let sample_rate = match ready_rx.await { + Ok(Ok(rate)) => rate, + Ok(Err(message)) => { + emit_event(&app, DictationEvent::Error { message: message.clone() }); + return Err(message); + } + Err(_) => { + let message = "Failed to start microphone capture.".to_string(); + emit_event(&app, DictationEvent::Error { message: message.clone() }); + return Err(message); + } + }; + + { + let mut dictation = state.dictation.lock().await; + dictation.session_state = DictationSessionState::Listening; + dictation.session = Some(DictationSessionHandle { + stop: stop_tx, + stopped: stopped_rx, + audio, + sample_rate, + model_id: model_id.clone(), + preferred_language: preferred_clone, + }); + } + + emit_event( + &app, + DictationEvent::State { + state: DictationSessionState::Listening, + }, + ); + + Ok(DictationSessionState::Listening) +} + +#[tauri::command] +pub(crate) async fn dictation_stop( + app: AppHandle, + state: State<'_, AppState>, +) -> Result { + let cancel_flag = Arc::new(AtomicBool::new(false)); + let (audio, sample_rate, model_id, preferred_language, stopped, stop_tx) = { + let mut dictation = state.dictation.lock().await; + if dictation.session_state != DictationSessionState::Listening { + let message = "Dictation is not currently listening.".to_string(); + emit_event(&app, DictationEvent::Error { message: message.clone() }); + return Err(message); + } + dictation.session_state = DictationSessionState::Processing; + dictation.processing_cancel = Some(Arc::clone(&cancel_flag)); + let session = dictation + .session + .take() + .ok_or_else(|| "Dictation session is unavailable.".to_string())?; + ( + session.audio, + session.sample_rate, + session.model_id, + session.preferred_language, + session.stopped, + session.stop, + ) + }; + + emit_event( + &app, + DictationEvent::State { + state: DictationSessionState::Processing, + }, + ); + + let app_handle = app.clone(); + let _ = stop_tx.send(()); + let _ = stopped.await; + tokio::spawn(async move { + let samples = { + let mut guard = audio.lock().unwrap(); + let captured = guard.clone(); + guard.clear(); + captured + }; + if cancel_flag.load(Ordering::Relaxed) { + clear_processing_cancel(&app_handle, &cancel_flag).await; + return; + } + + let state_handle = app_handle.state::(); + let cached_context = { + let dictation = state_handle.dictation.lock().await; + dictation + .cached_context + .as_ref() + .filter(|cached| cached.model_id == model_id) + .map(|cached| Arc::clone(&cached.context)) + }; + + let context = if let Some(context) = cached_context { + context + } else { + let model_path = match model_path(&app_handle, &model_id) { + Ok(path) => path, + Err(error) => { + emit_event(&app_handle, DictationEvent::Error { message: error }); + let mut dictation = state_handle.dictation.lock().await; + dictation.session_state = DictationSessionState::Idle; + emit_event( + &app_handle, + DictationEvent::State { + state: DictationSessionState::Idle, + }, + ); + return; + } + }; + let path = model_path.to_string_lossy().into_owned(); + let created = tokio::task::spawn_blocking(move || { + WhisperContext::new_with_params(&path, WhisperContextParameters::default()) + }) + .await; + let context = match created { + Ok(Ok(context)) => context, + Ok(Err(error)) => { + emit_event( + &app_handle, + DictationEvent::Error { + message: format!("Failed to load Whisper model: {error}"), + }, + ); + let mut dictation = state_handle.dictation.lock().await; + dictation.session_state = DictationSessionState::Idle; + emit_event( + &app_handle, + DictationEvent::State { + state: DictationSessionState::Idle, + }, + ); + return; + } + Err(error) => { + emit_event( + &app_handle, + DictationEvent::Error { + message: format!("Failed to load Whisper model: {error}"), + }, + ); + let mut dictation = state_handle.dictation.lock().await; + dictation.session_state = DictationSessionState::Idle; + emit_event( + &app_handle, + DictationEvent::State { + state: DictationSessionState::Idle, + }, + ); + return; + } + }; + let context = Arc::new(context); + let mut dictation = state_handle.dictation.lock().await; + dictation.cached_context = Some(CachedWhisperContext { + model_id: model_id.clone(), + context: Arc::clone(&context), + }); + context + }; + + let preferred = preferred_language.clone(); + + let result = tokio::task::spawn_blocking(move || { + transcribe_audio(samples, sample_rate, &context, preferred) + }) + .await; + + let outcome = match result { + Ok(result) => result, + Err(error) => Err(format!("Transcription task failed: {error}")), + }; + + if cancel_flag.load(Ordering::Relaxed) { + clear_processing_cancel(&app_handle, &cancel_flag).await; + return; + } + + match outcome { + Ok(text) => { + if !text.trim().is_empty() { + emit_event( + &app_handle, + DictationEvent::Transcript { text }, + ); + } + } + Err(message) => { + emit_event( + &app_handle, + DictationEvent::Error { message }, + ); + } + } + + clear_processing_cancel(&app_handle, &cancel_flag).await; + let state_handle = app_handle.state::(); + let mut dictation = state_handle.dictation.lock().await; + dictation.session_state = DictationSessionState::Idle; + emit_event( + &app_handle, + DictationEvent::State { + state: DictationSessionState::Idle, + }, + ); + }); + + Ok(DictationSessionState::Processing) +} + +#[tauri::command] +pub(crate) async fn dictation_cancel( + app: AppHandle, + state: State<'_, AppState>, +) -> Result { + { + let mut dictation = state.dictation.lock().await; + if dictation.session_state == DictationSessionState::Processing { + if let Some(flag) = dictation.processing_cancel.take() { + flag.store(true, Ordering::Relaxed); + } + dictation.session_state = DictationSessionState::Idle; + emit_event( + &app, + DictationEvent::State { + state: DictationSessionState::Idle, + }, + ); + emit_event( + &app, + DictationEvent::Canceled { + message: "Canceled".to_string(), + }, + ); + return Ok(DictationSessionState::Idle); + } + } + let (audio, stopped, stop_tx) = { + let mut dictation = state.dictation.lock().await; + if dictation.session_state != DictationSessionState::Listening { + let message = "Dictation is not currently listening.".to_string(); + emit_event(&app, DictationEvent::Error { message: message.clone() }); + return Err(message); + } + dictation.session_state = DictationSessionState::Idle; + let session = dictation + .session + .take() + .ok_or_else(|| "Dictation session is unavailable.".to_string())?; + (session.audio, session.stopped, session.stop) + }; + + let _ = stop_tx.send(()); + let _ = stopped.await; + { + let mut guard = audio.lock().unwrap(); + guard.clear(); + } + + emit_event( + &app, + DictationEvent::State { + state: DictationSessionState::Idle, + }, + ); + emit_event( + &app, + DictationEvent::Canceled { + message: "Canceled".to_string(), + }, + ); + + Ok(DictationSessionState::Idle) +} + +fn start_capture_thread( + app: AppHandle, + audio: Arc>>, + stop_rx: mpsc::Receiver<()>, + stop_tx: mpsc::Sender<()>, + stopped_tx: oneshot::Sender<()>, + ready_tx: oneshot::Sender>, +) { + let host = cpal::default_host(); + let device = host + .default_input_device() + .ok_or_else(|| "No microphone input device available.".to_string()); + let device = match device { + Ok(device) => device, + Err(error) => { + let _ = ready_tx.send(Err(error)); + let _ = stopped_tx.send(()); + return; + } + }; + let config = device + .default_input_config() + .map_err(|error| format!("Failed to read microphone config: {error}")); + let config = match config { + Ok(config) => config, + Err(error) => { + let _ = ready_tx.send(Err(error)); + let _ = stopped_tx.send(()); + return; + } + }; + let sample_rate = config.sample_rate().0; + let sample_format = config.sample_format(); + let stream_config: cpal::StreamConfig = config.into(); + let channels = stream_config.channels as usize; + let max_samples = (sample_rate as usize) + .saturating_mul(MAX_CAPTURE_SECONDS as usize) + .max(1); + let app_handle = app.clone(); + let audio_capture = audio.clone(); + let stop_on_error = stop_tx.clone(); + + let err_fn = move |error| { + emit_event( + &app_handle, + DictationEvent::Error { + message: format!("Microphone error: {error}"), + }, + ); + let _ = stop_on_error.send(()); + let state_app = app_handle.clone(); + tauri::async_runtime::spawn(async move { + let state_handle = state_app.state::(); + let should_emit = { + let mut dictation = state_handle.dictation.lock().await; + if dictation.session_state == DictationSessionState::Idle { + false + } else { + dictation.session_state = DictationSessionState::Idle; + dictation.session = None; + true + } + }; + if should_emit { + emit_event( + &state_app, + DictationEvent::State { + state: DictationSessionState::Idle, + }, + ); + } + }); + }; + + let level_value = Arc::new(AtomicU32::new(0)); + + let stream = match sample_format { + SampleFormat::F32 => build_stream::( + &device, + &stream_config, + channels, + max_samples, + audio_capture, + level_value.clone(), + err_fn, + ), + SampleFormat::I16 => build_stream::( + &device, + &stream_config, + channels, + max_samples, + audio_capture, + level_value.clone(), + err_fn, + ), + SampleFormat::U16 => build_stream::( + &device, + &stream_config, + channels, + max_samples, + audio_capture, + level_value.clone(), + err_fn, + ), + _ => { + let _ = ready_tx.send(Err("Unsupported microphone sample format.".to_string())); + let _ = stopped_tx.send(()); + return; + } + }; + + let stream = match stream { + Ok(stream) => stream, + Err(error) => { + let _ = ready_tx.send(Err(error)); + let _ = stopped_tx.send(()); + return; + } + }; + if let Err(error) = stream.play() { + let _ = ready_tx.send(Err(format!("Failed to start microphone: {error}"))); + let _ = stopped_tx.send(()); + return; + } + + let running = Arc::new(AtomicBool::new(true)); + let level_task_app = app.clone(); + let level_task_value = level_value.clone(); + let level_task_running = running.clone(); + std::thread::spawn(move || { + while level_task_running.load(Ordering::Relaxed) { + let value = f32::from_bits(level_task_value.load(Ordering::Relaxed)); + emit_event(&level_task_app, DictationEvent::Level { value }); + std::thread::sleep(Duration::from_millis(33)); + } + }); + + eprintln!( + "dictation: capture started (rate={}Hz, channels={}, format={:?})", + sample_rate, channels, sample_format + ); + let _ = ready_tx.send(Ok(sample_rate)); + let _ = stop_rx.recv(); + running.store(false, Ordering::Relaxed); + drop(stream); + let _ = stopped_tx.send(()); +} + +fn build_stream( + device: &cpal::Device, + config: &cpal::StreamConfig, + channels: usize, + max_samples: usize, + audio: Arc>>, + level_value: Arc, + err_fn: impl FnMut(cpal::StreamError) + Send + 'static, +) -> Result +where + T: Sample + SizedSample, + f32: FromSample, +{ + let channels = channels.max(1); + let mut mono_buffer: Vec = Vec::with_capacity(2048); + device + .build_input_stream( + config, + move |data: &[T], _| { + if data.is_empty() { + return; + } + let mut sum = 0.0f32; + let mut frames = 0usize; + mono_buffer.clear(); + let target_len = data.len() / channels; + if mono_buffer.capacity() < target_len { + mono_buffer.reserve(target_len - mono_buffer.capacity()); + } + for frame in data.chunks(channels) { + let mut frame_sum = 0.0f32; + let mut count = 0usize; + for sample in frame { + let value: f32 = sample.to_sample(); + frame_sum += value; + count += 1; + } + if count == 0 { + continue; + } + let mono = frame_sum / count as f32; + mono_buffer.push(mono); + sum += mono * mono; + frames += 1; + } + if frames == 0 { + return; + } + if let Ok(mut buffer) = audio.lock() { + if buffer.len() < max_samples { + let remaining = max_samples.saturating_sub(buffer.len()); + let slice_len = remaining.min(mono_buffer.len()); + if slice_len > 0 { + buffer.extend_from_slice(&mono_buffer[..slice_len]); + } + } + } + let rms = (sum / frames as f32).sqrt(); + let scaled = (rms * 6.0).clamp(0.0, 1.0); + level_value.store(scaled.to_bits(), Ordering::Relaxed); + }, + err_fn, + None, + ) + .map_err(|error| format!("Failed to build microphone stream: {error}")) +} + +fn transcribe_audio( + samples: Vec, + sample_rate: u32, + context: &WhisperContext, + preferred_language: Option, +) -> Result { + if samples.is_empty() { + return Ok(String::new()); + } + let mut max = 0.0f32; + let mut sum = 0.0f32; + let mean = samples.iter().copied().sum::() / samples.len() as f32; + let mut normalized = Vec::with_capacity(samples.len()); + for value in &samples { + let centered = value - mean; + let abs = centered.abs(); + if abs > max { + max = abs; + } + sum += centered * centered; + normalized.push(centered); + } + let rms = (sum / samples.len() as f32).sqrt(); + let duration = samples.len() as f32 / sample_rate as f32; + let gain = if max > 0.0 { (0.6 / max).min(10.0) } else { 1.0 }; + if gain != 1.0 { + for value in &mut normalized { + *value = (*value * gain).clamp(-1.0, 1.0); + } + } + eprintln!( + "dictation: captured {} samples ({:.2}s), max={:.4}, rms={:.4}, gain={:.2}", + samples.len(), + duration, + max, + rms, + gain + ); + if duration < 0.2 { + return Err("Audio too short for transcription.".to_string()); + } + let audio = if sample_rate == 16_000 { + normalized + } else { + resample_audio(&normalized, sample_rate, 16_000) + }; + + let mut state = context + .create_state() + .map_err(|error| format!("Failed to initialize Whisper: {error}"))?; + let threads = std::thread::available_parallelism() + .map(|value| value.get()) + .unwrap_or(4); + let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 }); + params.set_print_special(false); + params.set_print_progress(false); + params.set_print_realtime(false); + params.set_print_timestamps(false); + params.set_no_timestamps(true); + params.set_translate(false); + params.set_no_context(true); + params.set_single_segment(false); + let mut forced_language: Option = None; + if let Some(preferred) = preferred_language.clone() { + if let Some(pref_id) = get_lang_id(&preferred) { + if state.pcm_to_mel(&audio, threads).is_ok() { + if let Ok((_detected, probs)) = state.lang_detect(0, threads) { + let pref_index = pref_id.max(0) as usize; + let pref_prob = probs.get(pref_index).copied().unwrap_or(0.0); + let best_prob = probs + .iter() + .copied() + .fold(0.0_f32, |acc, value| acc.max(value)); + if best_prob > 0.0 && (best_prob - pref_prob) <= 0.30 { + forced_language = Some(preferred); + } + } + } + } + } + + if let Some(language) = forced_language.as_deref() { + // Use the preferred language only when detection is ambiguous. + params.set_language(Some(language)); + } else { + // Auto-detect language while still running transcription. + params.set_language(Some("auto")); + } + params.set_n_threads(threads as i32); + + state + .full(params, &audio) + .map_err(|error| format!("Transcription failed: {error}"))?; + + let segments = state + .full_n_segments() + .map_err(|error| format!("Failed to read segments: {error}"))?; + eprintln!("dictation: whisper segments={}", segments); + let mut transcript = String::new(); + for index in 0..segments { + let segment = state + .full_get_segment_text(index) + .map_err(|error| format!("Failed to read segment: {error}"))?; + transcript.push_str(&segment); + } + let cleaned = transcript.trim().to_string(); + if cleaned.is_empty() { + eprintln!( + "dictation: no speech detected (rms={:.4}, max={:.4}, duration={:.2}s, segments={})", + rms, max, duration, segments + ); + return Ok(String::new()); + } + Ok(cleaned) +} + +fn resample_audio(samples: &[f32], from_rate: u32, to_rate: u32) -> Vec { + if from_rate == to_rate || samples.is_empty() { + return samples.to_vec(); + } + let ratio = to_rate as f64 / from_rate as f64; + let new_len = (samples.len() as f64 * ratio).round() as usize; + let mut out = Vec::with_capacity(new_len.max(1)); + for i in 0..new_len { + let pos = i as f64 / ratio; + let idx = pos.floor() as usize; + let frac = pos - idx as f64; + let s0 = samples.get(idx).copied().unwrap_or(0.0); + let s1 = samples.get(idx + 1).copied().unwrap_or(s0); + out.push((s0 as f64 + (s1 as f64 - s0 as f64) * frac) as f32); + } + out +} diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index b6f9f1ff1..57d8af5d4 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -3,6 +3,7 @@ use tauri::{Manager, WebviewUrl, WebviewWindowBuilder}; mod backend; mod codex; +mod dictation; mod event_sink; mod git; mod prompts; @@ -172,7 +173,14 @@ pub fn run() { terminal::terminal_open, terminal::terminal_write, terminal::terminal_resize, - terminal::terminal_close + terminal::terminal_close, + dictation::dictation_model_status, + dictation::dictation_download_model, + dictation::dictation_cancel_download, + dictation::dictation_remove_model, + dictation::dictation_start, + dictation::dictation_stop, + dictation::dictation_cancel ]) .run(tauri::generate_context!()) .expect("error while running tauri application"); diff --git a/src-tauri/src/state.rs b/src-tauri/src/state.rs index b0d6f426a..5465e697f 100644 --- a/src-tauri/src/state.rs +++ b/src-tauri/src/state.rs @@ -5,6 +5,7 @@ use std::sync::Arc; use tauri::{AppHandle, Manager}; use tokio::sync::Mutex; +use crate::dictation::DictationState; use crate::storage::{read_settings, read_workspaces}; use crate::types::{AppSettings, WorkspaceEntry}; @@ -16,6 +17,7 @@ pub(crate) struct AppState { pub(crate) storage_path: PathBuf, pub(crate) settings_path: PathBuf, pub(crate) app_settings: Mutex, + pub(crate) dictation: Mutex, } impl AppState { @@ -35,6 +37,7 @@ impl AppState { storage_path, settings_path, app_settings: Mutex::new(app_settings), + dictation: Mutex::new(DictationState::default()), } } } diff --git a/src-tauri/src/types.rs b/src-tauri/src/types.rs index e35e80701..a9714928e 100644 --- a/src-tauri/src/types.rs +++ b/src-tauri/src/types.rs @@ -142,6 +142,20 @@ pub(crate) struct AppSettings { rename = "experimentalSteerEnabled" )] pub(crate) experimental_steer_enabled: bool, + #[serde(default = "default_dictation_enabled", rename = "dictationEnabled")] + pub(crate) dictation_enabled: bool, + #[serde( + default = "default_dictation_model_id", + rename = "dictationModelId" + )] + pub(crate) dictation_model_id: String, + #[serde(default, rename = "dictationPreferredLanguage")] + pub(crate) dictation_preferred_language: Option, + #[serde( + default = "default_dictation_hold_key", + rename = "dictationHoldKey" + )] + pub(crate) dictation_hold_key: String, } fn default_access_mode() -> String { @@ -160,6 +174,18 @@ fn default_experimental_steer_enabled() -> bool { false } +fn default_dictation_enabled() -> bool { + false +} + +fn default_dictation_model_id() -> String { + "base".to_string() +} + +fn default_dictation_hold_key() -> String { + "alt".to_string() +} + impl Default for AppSettings { fn default() -> Self { Self { @@ -168,6 +194,10 @@ impl Default for AppSettings { ui_scale: 1.0, notification_sounds_enabled: true, experimental_steer_enabled: false, + dictation_enabled: false, + dictation_model_id: default_dictation_model_id(), + dictation_preferred_language: None, + dictation_hold_key: default_dictation_hold_key(), } } } @@ -184,6 +214,10 @@ mod tests { assert!((settings.ui_scale - 1.0).abs() < f64::EPSILON); assert!(settings.notification_sounds_enabled); assert!(!settings.experimental_steer_enabled); + assert!(!settings.dictation_enabled); + assert_eq!(settings.dictation_model_id, "base"); + assert!(settings.dictation_preferred_language.is_none()); + assert_eq!(settings.dictation_hold_key, "alt"); } #[test] diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index 21698e16c..ee80ef0c4 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -46,7 +46,10 @@ "icons/128x128@2x.png", "icons/icon.icns", "icons/icon.ico" - ] + ], + "macOS": { + "infoPlist": "Info.plist" + } }, "plugins": { "updater": { diff --git a/src/App.tsx b/src/App.tsx index ba0baad21..be1a1ae59 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -58,6 +58,9 @@ import { import { useAppSettings } from "./features/settings/hooks/useAppSettings"; import { useUpdater } from "./features/update/hooks/useUpdater"; import { useComposerImages } from "./features/composer/hooks/useComposerImages"; +import { useDictationModel } from "./features/dictation/hooks/useDictationModel"; +import { useDictation } from "./features/dictation/hooks/useDictation"; +import { useHoldToDictate } from "./features/dictation/hooks/useHoldToDictate"; import { useQueuedSend } from "./features/threads/hooks/useQueuedSend"; import { useWorktreePrompt } from "./features/workspaces/hooks/useWorktreePrompt"; import { useUiScaleShortcuts } from "./features/layout/hooks/useUiScaleShortcuts"; @@ -91,6 +94,20 @@ function MainApp() { saveSettings, doctor } = useAppSettings(); + const dictationModel = useDictationModel(appSettings.dictationModelId); + const { + state: dictationState, + level: dictationLevel, + transcript: dictationTranscript, + error: dictationError, + hint: dictationHint, + start: startDictation, + stop: stopDictation, + cancel: cancelDictation, + clearTranscript: clearDictationTranscript, + clearError: clearDictationError, + clearHint: clearDictationHint, + } = useDictation(); const { uiScale, scaleShortcutTitle, @@ -152,11 +169,66 @@ function MainApp() { const [composerInsert, setComposerInsert] = useState( null ); + type SettingsSection = "projects" | "display" | "dictation" | "codex" | "experimental"; const [settingsOpen, setSettingsOpen] = useState(false); + const [settingsSection, setSettingsSection] = useState( + null, + ); const [reduceTransparency, setReduceTransparency] = useState(() => { const stored = localStorage.getItem("reduceTransparency"); return stored === "true"; }); + const dictationReady = dictationModel.status?.state === "ready"; + const holdDictationKey = (appSettings.dictationHoldKey ?? "").toLowerCase(); + const handleToggleDictation = useCallback(async () => { + if (!appSettings.dictationEnabled || !dictationReady) { + return; + } + try { + if (dictationState === "listening") { + await stopDictation(); + return; + } + if (dictationState === "idle") { + await startDictation(appSettings.dictationPreferredLanguage); + } + } catch { + // Errors are surfaced through dictation events. + } + }, [ + appSettings.dictationEnabled, + appSettings.dictationPreferredLanguage, + dictationReady, + dictationState, + startDictation, + stopDictation, + ]); + + useEffect(() => { + const handleEscape = (event: KeyboardEvent) => { + if (event.key !== "Escape") { + return; + } + if (dictationState !== "listening" && dictationState !== "processing") { + return; + } + event.preventDefault(); + void cancelDictation(); + }; + window.addEventListener("keydown", handleEscape); + return () => window.removeEventListener("keydown", handleEscape); + }, [dictationState, cancelDictation]); + + useHoldToDictate({ + enabled: appSettings.dictationEnabled, + ready: dictationReady, + state: dictationState, + preferredLanguage: appSettings.dictationPreferredLanguage, + holdKey: holdDictationKey, + startDictation, + stopDictation, + cancelDictation, + }); const { debugOpen, setDebugOpen, @@ -593,7 +665,13 @@ function MainApp() { }); } - const handleOpenSettings = () => setSettingsOpen(true); + const handleOpenSettings = useCallback( + (section?: SettingsSection) => { + setSettingsSection(section ?? null); + setSettingsOpen(true); + }, + [], + ); const orderValue = (entry: WorkspaceInfo) => typeof entry.settings.sortOrder === "number" @@ -704,7 +782,8 @@ function MainApp() { activeRateLimits, approvals, handleApprovalDecision, - onOpenSettings: handleOpenSettings, + onOpenSettings: () => handleOpenSettings(), + onOpenDictationSettings: () => handleOpenSettings("dictation"), onOpenDebug: handleDebugClick, showDebugButton, onAddWorkspace: handleAddWorkspace, @@ -885,6 +964,18 @@ function MainApp() { prompts, files, textareaRef: composerInputRef, + dictationEnabled: appSettings.dictationEnabled && dictationReady, + dictationState, + dictationLevel, + onToggleDictation: handleToggleDictation, + dictationTranscript, + onDictationTranscriptHandled: (id) => { + clearDictationTranscript(id); + }, + dictationError, + onDismissDictationError: clearDictationError, + dictationHint, + onDismissDictationHint: clearDictationHint, showComposer, plan: activePlan, debugEntries, @@ -1010,7 +1101,10 @@ function MainApp() { {settingsOpen && ( setSettingsOpen(false)} + onClose={() => { + setSettingsOpen(false); + setSettingsSection(null); + }} onMoveWorkspace={handleMoveWorkspace} onDeleteWorkspace={(workspaceId) => { void removeWorkspace(workspaceId); @@ -1028,6 +1122,11 @@ function MainApp() { scaleShortcutTitle={scaleShortcutTitle} scaleShortcutText={scaleShortcutText} onTestNotificationSound={handleTestNotificationSound} + dictationModelStatus={dictationModel.status} + onDownloadDictationModel={dictationModel.download} + onCancelDictationDownload={dictationModel.cancel} + onRemoveDictationModel={dictationModel.remove} + initialSection={settingsSection ?? undefined} /> )} diff --git a/src/features/composer/components/Composer.tsx b/src/features/composer/components/Composer.tsx index fc0a3ed25..3b60c8625 100644 --- a/src/features/composer/components/Composer.tsx +++ b/src/features/composer/components/Composer.tsx @@ -1,5 +1,11 @@ import { useCallback, useEffect, useRef, useState } from "react"; -import type { CustomPromptOption, QueuedMessage, ThreadTokenUsage } from "../../../types"; +import type { + CustomPromptOption, + DictationTranscript, + QueuedMessage, + ThreadTokenUsage, +} from "../../../types"; +import { computeDictationInsertion } from "../../../utils/dictation"; import { useComposerAutocompleteState } from "../hooks/useComposerAutocompleteState"; import { ComposerInput } from "./ComposerInput"; import { ComposerMetaBar } from "./ComposerMetaBar"; @@ -40,6 +46,17 @@ type ComposerProps = { insertText?: QueuedMessage | null; onInsertHandled?: (id: string) => void; textareaRef?: React.RefObject; + dictationEnabled?: boolean; + dictationState?: "idle" | "listening" | "processing"; + dictationLevel?: number; + onToggleDictation?: () => void; + onOpenDictationSettings?: () => void; + dictationTranscript?: DictationTranscript | null; + onDictationTranscriptHandled?: (id: string) => void; + dictationError?: string | null; + onDismissDictationError?: () => void; + dictationHint?: string | null; + onDismissDictationHint?: () => void; }; export function Composer({ @@ -77,11 +94,23 @@ export function Composer({ insertText = null, onInsertHandled, textareaRef: externalTextareaRef, + dictationEnabled = false, + dictationState = "idle", + dictationLevel = 0, + onToggleDictation, + onOpenDictationSettings, + dictationTranscript = null, + onDictationTranscriptHandled, + dictationError = null, + onDismissDictationError, + dictationHint = null, + onDismissDictationHint, }: ComposerProps) { const [text, setText] = useState(draftText); const [selectionStart, setSelectionStart] = useState(null); const internalRef = useRef(null); const textareaRef = externalTextareaRef ?? internalRef; + const isDictationBusy = dictationState !== "idle"; useEffect(() => { setText((prev) => (prev === draftText ? prev : draftText)); @@ -156,6 +185,44 @@ export function Composer({ onInsertHandled?.(insertText.id); }, [insertText, onInsertHandled, setComposerText]); + useEffect(() => { + if (!dictationTranscript) { + return; + } + const textToInsert = dictationTranscript.text.trim(); + if (!textToInsert) { + onDictationTranscriptHandled?.(dictationTranscript.id); + return; + } + const textarea = textareaRef.current; + const start = textarea?.selectionStart ?? selectionStart ?? text.length; + const end = textarea?.selectionEnd ?? start; + const { nextText, nextCursor } = computeDictationInsertion( + text, + textToInsert, + start, + end, + ); + setComposerText(nextText); + requestAnimationFrame(() => { + if (!textareaRef.current) { + return; + } + textareaRef.current.focus(); + textareaRef.current.setSelectionRange(nextCursor, nextCursor); + handleSelectionChange(nextCursor); + }); + onDictationTranscriptHandled?.(dictationTranscript.id); + }, [ + dictationTranscript, + handleSelectionChange, + onDictationTranscriptHandled, + selectionStart, + setComposerText, + text, + textareaRef, + ]); + return (