diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 83d68cf2d..b393d2f09 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,6 +12,8 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@stable
+      - name: Install CMake
+        run: command -v cmake >/dev/null 2>&1 || brew install cmake
       - name: Rust tests
         run: cargo test
         working-directory: src-tauri
@@ -24,6 +26,8 @@ jobs:
           node-version: "20"
           cache: "npm"
       - uses: dtolnay/rust-toolchain@stable
+      - name: Install CMake
+        run: command -v cmake >/dev/null 2>&1 || brew install cmake
       - name: Install dependencies
         run: npm ci
       - name: Typecheck
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 5f75cba42..b192abafa 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -38,6 +38,9 @@ jobs:
         with:
           workspaces: './src-tauri -> target'
 
+      - name: Install CMake
+        run: brew install cmake
+
       - name: Install dependencies
         run: npm ci
 
@@ -178,7 +181,7 @@ jobs:
       - name: install dependencies (linux only)
         run: |
           sudo apt-get update
-          sudo apt-get install -y libwebkit2gtk-4.1-dev libgtk-3-dev libayatana-appindicator3-dev librsvg2-dev patchelf libfuse2 xdg-utils
+          sudo apt-get install -y cmake libwebkit2gtk-4.1-dev libgtk-3-dev libayatana-appindicator3-dev librsvg2-dev patchelf libfuse2 xdg-utils
 
       - name: setup node
         uses: actions/setup-node@v4
diff --git a/README.md b/README.md
index 76242f3d8..4105d0c71 100644
--- a/README.md
+++ b/README.md
@@ -28,11 +28,17 @@ CodexMonitor is a macOS Tauri app for orchestrating multiple Codex agents across
 
 - Node.js + npm
 - Rust toolchain (stable)
+- CMake (required to build native Whisper bindings)
 - Codex installed on your system and available as `codex` in `PATH`
 - Git CLI (used for worktree operations)
 - GitHub CLI (`gh`) for the Issues panel (optional)
 
 If the `codex` binary is not in `PATH`, update the backend to pass a custom path per workspace.
+If you hit native build errors, run:
+
+```bash
+npm run doctor
+```
 
 ## Getting Started
 
diff --git a/package.json b/package.json
index 316168bf1..98b5dbebe 100644
--- a/package.json
+++ b/package.json
@@ -7,9 +7,13 @@
     "dev": "vite",
     "build": "tsc && vite build",
     "build:appimage": "NO_STRIP=1 tauri build --bundles appimage",
+    "doctor": "sh scripts/doctor.sh",
+    "doctor:strict": "sh scripts/doctor.sh --strict",
     "typecheck": "tsc --noEmit",
     "preview": "vite preview",
-    "tauri": "tauri"
+    "tauri": "tauri",
+    "tauri:dev": "npm run doctor:strict && tauri dev",
+    "tauri:build": "npm run doctor:strict && tauri build"
   },
   "dependencies": {
     "@tauri-apps/api": "^2",
diff --git a/scripts/doctor.sh b/scripts/doctor.sh
new file mode 100644
index 000000000..f997f6451
--- /dev/null
+++ b/scripts/doctor.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env sh
+set -u
+
+STRICT=0
+if [ "${1:-}" = "--strict" ]; then
+  STRICT=1
+fi
+
+missing=""
+if ! command -v cmake >/dev/null 2>&1; then
+  missing="cmake"
+fi
+
+if [ -z "$missing" ]; then
+  echo "Doctor: OK"
+  exit 0
+fi
+
+echo "Doctor: missing dependencies: $missing"
+
+case "$(uname -s)" in
+  Darwin)
+    echo "Install: brew install cmake"
+    ;;
+  Linux)
+    echo "Ubuntu/Debian: sudo apt-get install cmake"
+    echo "Fedora: sudo dnf install cmake"
+    echo "Arch: sudo pacman -S cmake"
+    ;;
+  MINGW*|MSYS*|CYGWIN*)
+    echo "Install: choco install cmake"
+    echo "Or download from: https://cmake.org/download/"
+    ;;
+  *)
+    echo "Install CMake from: https://cmake.org/download/"
+    ;;
+esac
+
+if [ "$STRICT" -eq 1 ]; then
+  exit 1
+fi
+
+exit 0
diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock
index 7e43f59ad..6f2831d07 100644
--- a/src-tauri/Cargo.lock
+++ b/src-tauri/Cargo.lock
@@ -32,6 +32,28 @@ dependencies = [
  "alloc-no-stdlib",
 ]
 
+[[package]]
+name = "alsa"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed7572b7ba83a31e20d1b48970ee402d2e3e0537dcfe0a3ff4d6eb7508617d43"
+dependencies = [
+ "alsa-sys",
+ "bitflags 2.10.0",
+ "cfg-if",
+ "libc",
+]
+
+[[package]]
+name = "alsa-sys"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db8fee663d06c4e303404ef5f40488a53e062f89ba8bfed81f42325aafad1527"
+dependencies = [
+ "libc",
+ "pkg-config",
+]
+
 [[package]]
 name = "android_system_properties"
 version = "0.1.5"
@@ -107,7 +129,7 @@ dependencies = [
  "futures-lite",
  "parking",
  "polling",
- "rustix",
+ "rustix 1.1.3",
  "slab",
  "windows-sys 0.61.2",
 ]
@@ -138,7 +160,7 @@ dependencies = [
  "cfg-if",
  "event-listener",
  "futures-lite",
- "rustix",
+ "rustix 1.1.3",
 ]
 
 [[package]]
@@ -164,7 +186,7 @@ dependencies = [
  "cfg-if",
  "futures-core",
  "futures-io",
- "rustix",
+ "rustix 1.1.3",
  "signal-hook-registry",
  "slab",
  "windows-sys 0.61.2",
@@ -234,6 +256,47 @@ version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 
+[[package]]
+name = "bindgen"
+version = "0.69.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
+dependencies = [
+ "bitflags 2.10.0",
+ "cexpr",
+ "clang-sys",
+ "itertools 0.12.1",
+ "lazy_static",
+ "lazycell",
+ "log",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash 1.1.0",
+ "shlex",
+ "syn 2.0.114",
+ "which",
+]
+
+[[package]]
+name = "bindgen"
+version = "0.72.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
+dependencies = [
+ "bitflags 2.10.0",
+ "cexpr",
+ "clang-sys",
+ "itertools 0.13.0",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash 2.1.1",
+ "shlex",
+ "syn 2.0.114",
+]
+
 [[package]]
 name = "bitflags"
 version = "1.3.2"
@@ -423,6 +486,15 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
 
+[[package]]
+name = "cexpr"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
+dependencies = [
+ "nom",
+]
+
 [[package]]
 name = "cfb"
 version = "0.7.3"
@@ -468,16 +540,39 @@ dependencies = [
  "windows-link 0.2.1",
 ]
 
+[[package]]
+name = "clang-sys"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading 0.8.9",
+]
+
+[[package]]
+name = "cmake"
+version = "0.1.57"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d"
+dependencies = [
+ "cc",
+]
+
 [[package]]
 name = "codex-monitor"
 version = "0.1.0"
 dependencies = [
+ "cpal",
  "fix-path-env",
  "git2",
  "ignore",
  "portable-pty",
+ "reqwest",
  "serde",
  "serde_json",
+ "sha2",
  "tauri",
  "tauri-build",
  "tauri-plugin-dialog",
@@ -486,6 +581,7 @@ dependencies = [
  "tauri-plugin-updater",
  "tokio",
  "uuid",
+ "whisper-rs",
 ]
 
 [[package]]
@@ -563,6 +659,49 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "coreaudio-rs"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "321077172d79c662f64f5071a03120748d5bb652f5231570141be24cfcd2bace"
+dependencies = [
+ "bitflags 1.3.2",
+ "core-foundation-sys",
+ "coreaudio-sys",
+]
+
+[[package]]
+name = "coreaudio-sys"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ceec7a6067e62d6f931a2baf6f3a751f4a892595bcec1461a3c94ef9949864b6"
+dependencies = [
+ "bindgen 0.72.1",
+]
+
+[[package]]
+name = "cpal"
+version = "0.15.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "873dab07c8f743075e57f524c583985fbaf745602acbe916a01539364369a779"
+dependencies = [
+ "alsa",
+ "core-foundation-sys",
+ "coreaudio-rs",
+ "dasp_sample",
+ "jni",
+ "js-sys",
+ "libc",
+ "mach2",
+ "ndk 0.8.0",
+ "ndk-context",
+ "oboe",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+ "windows 0.54.0",
+]
+
 [[package]]
 name = "cpufeatures"
 version = "0.2.17"
@@ -697,6 +836,12 @@ dependencies = [
  "syn 2.0.114",
 ]
 
+[[package]]
+name = "dasp_sample"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c87e182de0887fd5361989c677c4e8f5000cd9491d6d563161a8f3a5519fc7f"
+
 [[package]]
 name = "deranged"
 version = "0.5.5"
@@ -856,6 +1001,12 @@ version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
 
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
 [[package]]
 name = "embed-resource"
 version = "3.0.6"
@@ -1067,6 +1218,12 @@ dependencies = [
  "percent-encoding",
 ]
 
+[[package]]
+name = "fs_extra"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
+
 [[package]]
 name = "futf"
 version = "0.1.5"
@@ -1886,6 +2043,24 @@ dependencies = [
  "once_cell",
 ]
 
+[[package]]
+name = "itertools"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itertools"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itoa"
 version = "1.0.17"
@@ -2008,6 +2183,12 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
 
+[[package]]
+name = "lazycell"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
+
 [[package]]
 name = "libappindicator"
 version = "0.9.0"
@@ -2028,7 +2209,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6e9ec52138abedcc58dc17a7c6c0c00a2bdb4f3427c7f63fa97fd0d859155caf"
 dependencies = [
  "gtk-sys",
- "libloading",
+ "libloading 0.7.4",
  "once_cell",
 ]
 
@@ -2062,6 +2243,16 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "libloading"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
+dependencies = [
+ "cfg-if",
+ "windows-link 0.2.1",
+]
+
 [[package]]
 name = "libredox"
 version = "0.1.12"
@@ -2099,6 +2290,12 @@ dependencies = [
  "vcpkg",
 ]
 
+[[package]]
+name = "linux-raw-sys"
+version = "0.4.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
+
 [[package]]
 name = "linux-raw-sys"
 version = "0.11.0"
@@ -2138,6 +2335,15 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
 
+[[package]]
+name = "mach2"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d640282b302c0bb0a2a8e0233ead9035e3bed871f0b7e81fe4a1ec829765db44"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "markup5ever"
 version = "0.14.1"
@@ -2199,6 +2405,12 @@ version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
 [[package]]
 name = "minisign-verify"
 version = "0.2.4"
@@ -2247,6 +2459,20 @@ dependencies = [
  "windows-sys 0.60.2",
 ]
 
+[[package]]
+name = "ndk"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2076a31b7010b17a38c01907c45b945e8f11495ee4dd588309718901b1f7a5b7"
+dependencies = [
+ "bitflags 2.10.0",
+ "jni-sys",
+ "log",
+ "ndk-sys 0.5.0+25.2.9519653",
+ "num_enum",
+ "thiserror 1.0.69",
+]
+
 [[package]]
 name = "ndk"
 version = "0.9.0"
@@ -2256,7 +2482,7 @@ dependencies = [
  "bitflags 2.10.0",
  "jni-sys",
  "log",
- "ndk-sys",
+ "ndk-sys 0.6.0+11769913",
  "num_enum",
  "raw-window-handle",
  "thiserror 1.0.69",
@@ -2268,6 +2494,15 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b"
 
+[[package]]
+name = "ndk-sys"
+version = "0.5.0+25.2.9519653"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c196769dd60fd4f363e11d948139556a344e79d451aeb2fa2fd040738ef7691"
+dependencies = [
+ "jni-sys",
+]
+
 [[package]]
 name = "ndk-sys"
 version = "0.6.0+11769913"
@@ -2303,12 +2538,33 @@ version = "0.1.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
 
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
 [[package]]
 name = "num-conv"
 version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
 
+[[package]]
+name = "num-derive"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+]
+
 [[package]]
 name = "num-traits"
 version = "0.2.19"
@@ -2564,6 +2820,29 @@ dependencies = [
  "objc2-security",
 ]
 
+[[package]]
+name = "oboe"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8b61bebd49e5d43f5f8cc7ee2891c16e0f41ec7954d36bcb6c14c5e0de867fb"
+dependencies = [
+ "jni",
+ "ndk 0.8.0",
+ "ndk-context",
+ "num-derive",
+ "num-traits",
+ "oboe-sys",
+]
+
+[[package]]
+name = "oboe-sys"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c8bb09a4a2b1d668170cfe0a7d5bc103f8999fb316c98099b6a9939c9f2e79d"
+dependencies = [
+ "cc",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.21.3"
@@ -2895,7 +3174,7 @@ dependencies = [
  "concurrent-queue",
  "hermit-abi",
  "pin-project-lite",
- "rustix",
+ "rustix 1.1.3",
  "windows-sys 0.61.2",
 ]
 
@@ -2950,6 +3229,16 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
 
+[[package]]
+name = "prettyplease"
+version = "0.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
+dependencies = [
+ "proc-macro2",
+ "syn 2.0.114",
+]
+
 [[package]]
 name = "proc-macro-crate"
 version = "1.3.1"
@@ -3038,7 +3327,7 @@ dependencies = [
  "pin-project-lite",
  "quinn-proto",
  "quinn-udp",
- "rustc-hash",
+ "rustc-hash 2.1.1",
  "rustls",
  "socket2",
  "thiserror 2.0.17",
@@ -3058,7 +3347,7 @@ dependencies = [
  "lru-slab",
  "rand 0.9.2",
  "ring",
- "rustc-hash",
+ "rustc-hash 2.1.1",
  "rustls",
  "rustls-pki-types",
  "slab",
@@ -3370,6 +3659,12 @@ dependencies = [
  "windows-sys 0.52.0",
 ]
 
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
 [[package]]
 name = "rustc-hash"
 version = "2.1.1"
@@ -3385,6 +3680,19 @@ dependencies = [
  "semver",
 ]
 
+[[package]]
+name = "rustix"
+version = "0.38.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
+dependencies = [
+ "bitflags 2.10.0",
+ "errno",
+ "libc",
+ "linux-raw-sys 0.4.15",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "rustix"
 version = "1.1.3"
@@ -3394,7 +3702,7 @@ dependencies = [
  "bitflags 2.10.0",
  "errno",
  "libc",
- "linux-raw-sys",
+ "linux-raw-sys 0.11.0",
  "windows-sys 0.61.2",
 ]
 
@@ -3842,7 +4150,7 @@ checksum = "aac18da81ebbf05109ab275b157c22a653bb3c12cf884450179942f81bcbf6c3"
 dependencies = [
  "bytemuck",
  "js-sys",
- "ndk",
+ "ndk 0.9.0",
  "objc2",
  "objc2-core-foundation",
  "objc2-core-graphics",
@@ -4021,9 +4329,9 @@ dependencies = [
  "lazy_static",
  "libc",
  "log",
- "ndk",
+ "ndk 0.9.0",
  "ndk-context",
- "ndk-sys",
+ "ndk-sys 0.6.0+11769913",
  "objc2",
  "objc2-app-kit",
  "objc2-foundation",
@@ -4034,7 +4342,7 @@ dependencies = [
  "tao-macros",
  "unicode-segmentation",
  "url",
- "windows",
+ "windows 0.61.3",
  "windows-core 0.61.2",
  "windows-version",
  "x11-dl",
@@ -4116,7 +4424,7 @@ dependencies = [
  "webkit2gtk",
  "webview2-com",
  "window-vibrancy",
- "windows",
+ "windows 0.61.3",
 ]
 
 [[package]]
@@ -4257,7 +4565,7 @@ dependencies = [
  "tauri-plugin",
  "thiserror 2.0.17",
  "url",
- "windows",
+ "windows 0.61.3",
  "zbus",
 ]
 
@@ -4325,7 +4633,7 @@ dependencies = [
  "url",
  "webkit2gtk",
  "webview2-com",
- "windows",
+ "windows 0.61.3",
 ]
 
 [[package]]
@@ -4351,7 +4659,7 @@ dependencies = [
  "url",
  "webkit2gtk",
  "webview2-com",
- "windows",
+ "windows 0.61.3",
  "wry",
 ]
 
@@ -4413,7 +4721,7 @@ dependencies = [
  "fastrand",
  "getrandom 0.3.4",
  "once_cell",
- "rustix",
+ "rustix 1.1.3",
  "windows-sys 0.61.2",
 ]
 
@@ -5141,7 +5449,7 @@ checksum = "7130243a7a5b33c54a444e54842e6a9e133de08b5ad7b5861cd8ed9a6a5bc96a"
 dependencies = [
  "webview2-com-macros",
  "webview2-com-sys",
- "windows",
+ "windows 0.61.3",
  "windows-core 0.61.2",
  "windows-implement",
  "windows-interface",
@@ -5165,10 +5473,43 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "381336cfffd772377d291702245447a5251a2ffa5bad679c99e61bc48bacbf9c"
 dependencies = [
  "thiserror 2.0.17",
- "windows",
+ "windows 0.61.3",
  "windows-core 0.61.2",
 ]
 
+[[package]]
+name = "which"
+version = "4.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
+dependencies = [
+ "either",
+ "home",
+ "once_cell",
+ "rustix 0.38.44",
+]
+
+[[package]]
+name = "whisper-rs"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c597ac8a9d5c4719fee232abc871da184ea50a4fea38d2d00348fd95072b2b0"
+dependencies = [
+ "whisper-rs-sys",
+]
+
+[[package]]
+name = "whisper-rs-sys"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d22f00ed0995463eecc34ef89905845f6bf6fd37ea70789fed180520050da8f8"
+dependencies = [
+ "bindgen 0.69.5",
+ "cfg-if",
+ "cmake",
+ "fs_extra",
+]
+
 [[package]]
 name = "winapi"
 version = "0.3.9"
@@ -5215,6 +5556,16 @@ dependencies = [
  "windows-version",
 ]
 
+[[package]]
+name = "windows"
+version = "0.54.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49"
+dependencies = [
+ "windows-core 0.54.0",
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows"
 version = "0.61.3"
@@ -5237,6 +5588,16 @@ dependencies = [
  "windows-core 0.61.2",
 ]
 
+[[package]]
+name = "windows-core"
+version = "0.54.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "12661b9c89351d684a50a8a643ce5f608e20243b9fb84687800163429f161d65"
+dependencies = [
+ "windows-result 0.1.2",
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows-core"
 version = "0.61.2"
@@ -5318,6 +5679,15 @@ dependencies = [
  "windows-link 0.1.3",
 ]
 
+[[package]]
+name = "windows-result"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows-result"
 version = "0.3.4"
@@ -5673,7 +6043,7 @@ dependencies = [
  "jni",
  "kuchikiki",
  "libc",
- "ndk",
+ "ndk 0.9.0",
  "objc2",
  "objc2-app-kit",
  "objc2-core-foundation",
@@ -5691,7 +6061,7 @@ dependencies = [
  "webkit2gtk",
  "webkit2gtk-sys",
  "webview2-com",
- "windows",
+ "windows 0.61.3",
  "windows-core 0.61.2",
  "windows-version",
  "x11-dl",
@@ -5725,7 +6095,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156"
 dependencies = [
  "libc",
- "rustix",
+ "rustix 1.1.3",
 ]
 
 [[package]]
@@ -5773,7 +6143,7 @@ dependencies = [
  "hex",
  "libc",
  "ordered-stream",
- "rustix",
+ "rustix 1.1.3",
  "serde",
  "serde_repr",
  "tracing",
diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml
index bbf5c8741..f00e6e18e 100644
--- a/src-tauri/Cargo.toml
+++ b/src-tauri/Cargo.toml
@@ -24,13 +24,17 @@ tauri-plugin-opener = "2"
 tauri-plugin-process = "2"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
-tokio = { version = "1", features = ["io-util", "net", "process", "rt", "sync", "time"] }
+tokio = { version = "1", features = ["fs", "net", "io-util", "process", "rt", "sync", "time"] }
 uuid = { version = "1", features = ["v4"] }
 tauri-plugin-dialog = "2"
 git2 = "0.20.3"
 fix-path-env = { git = "https://github.com/tauri-apps/fix-path-env-rs" }
 ignore = "0.4.25"
 portable-pty = "0.8"
+reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "stream"] }
+cpal = "0.15"
+whisper-rs = "0.12"
+sha2 = "0.10"
 
 [target."cfg(not(any(target_os = \"android\", target_os = \"ios\")))".dependencies]
 tauri-plugin-updater = "2"
diff --git a/src-tauri/Info.plist b/src-tauri/Info.plist
new file mode 100644
index 000000000..9d3a6ffdd
--- /dev/null
+++ b/src-tauri/Info.plist
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+  <dict>
+    <key>NSMicrophoneUsageDescription</key>
+    <string>Allow access to the microphone for dictation.</string>
+  </dict>
+</plist>
diff --git a/src-tauri/src/dictation.rs b/src-tauri/src/dictation.rs
new file mode 100644
index 000000000..b93f8c6c6
--- /dev/null
+++ b/src-tauri/src/dictation.rs
@@ -0,0 +1,1328 @@
+use std::path::PathBuf;
+use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
+use std::sync::{mpsc, Arc, Mutex};
+use std::time::{Duration, Instant};
+
+use serde::Serialize;
+use tauri::{AppHandle, Emitter, Manager, State};
+use tokio::io::AsyncWriteExt;
+use tokio::sync::oneshot;
+
+use crate::state::AppState;
+
+use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
+use cpal::{FromSample, Sample, SampleFormat, SizedSample};
+use sha2::{Digest, Sha256};
+use whisper_rs::get_lang_id;
+use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
+
+const DEFAULT_MODEL_ID: &str = "base";
+const MAX_CAPTURE_SECONDS: u32 = 120;
+
+struct DictationModelInfo {
+    id: &'static str,
+    filename: &'static str,
+    url: &'static str,
+    sha256: &'static str,
+}
+
+const MODEL_CATALOG: &[DictationModelInfo] = &[
+    DictationModelInfo {
+        id: "tiny",
+        filename: "ggml-tiny.bin",
+        url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin",
+        sha256: "be07e048e1e599ad46341c8d2a135645097a538221678b7acdd1b1919c6e1b21",
+    },
+    DictationModelInfo {
+        id: "base",
+        filename: "ggml-base.bin",
+        url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin",
+        sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe",
+    },
+    DictationModelInfo {
+        id: "small",
+        filename: "ggml-small.bin",
+        url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin",
+        sha256: "1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b",
+    },
+    DictationModelInfo {
+        id: "medium",
+        filename: "ggml-medium.bin",
+        url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin",
+        sha256: "6c14d5adee5f86394037b4e4e8b59f1673b6cee10e3cf0b11bbdbee79c156208",
+    },
+    DictationModelInfo {
+        id: "large-v3",
+        filename: "ggml-large-v3.bin",
+        url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin",
+        sha256: "64d182b440b98d5203c4f9bd541544d84c605196c4f7b845dfa11fb23594d1e2",
+    },
+];
+
+#[derive(Debug, Serialize, Clone, Copy, PartialEq, Eq)]
+#[serde(rename_all = "lowercase")]
+pub(crate) enum DictationModelState {
+    Missing,
+    Downloading,
+    Ready,
+    Error,
+}
+
+#[derive(Debug, Serialize, Clone)]
+pub(crate) struct DictationDownloadProgress {
+    #[serde(rename = "downloadedBytes")]
+    pub(crate) downloaded_bytes: u64,
+    #[serde(rename = "totalBytes")]
+    pub(crate) total_bytes: Option<u64>,
+}
+
+#[derive(Debug, Serialize, Clone)]
+pub(crate) struct DictationModelStatus {
+    pub(crate) state: DictationModelState,
+    #[serde(rename = "modelId")]
+    pub(crate) model_id: String,
+    pub(crate) progress: Option<DictationDownloadProgress>,
+    pub(crate) error: Option<String>,
+    pub(crate) path: Option<String>,
+}
+
+#[derive(Debug, Serialize, Clone, Copy, PartialEq, Eq)]
+#[serde(rename_all = "lowercase")]
+pub(crate) enum DictationSessionState {
+    Idle,
+    Listening,
+    Processing,
+}
+
+#[derive(Debug, Serialize, Clone)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub(crate) enum DictationEvent {
+    State { state: DictationSessionState },
+    Level { value: f32 },
+    Transcript { text: String },
+    Error { message: String },
+    Canceled { message: String },
+}
+
+pub(crate) struct DictationSessionHandle {
+    pub(crate) stop: mpsc::Sender<()>,
+    pub(crate) stopped: oneshot::Receiver<()>,
+    pub(crate) audio: Arc<Mutex<Vec<f32>>>,
+    pub(crate) sample_rate: u32,
+    pub(crate) model_id: String,
+    pub(crate) preferred_language: Option<String>,
+}
+
+pub(crate) struct DictationState {
+    pub(crate) model_status: DictationModelStatus,
+    pub(crate) download_cancel: Option<Arc<AtomicBool>>,
+    pub(crate) download_task: Option<tokio::task::JoinHandle<()>>,
+    pub(crate) session_state: DictationSessionState,
+    pub(crate) session: Option<DictationSessionHandle>,
+    pub(crate) processing_cancel: Option<Arc<AtomicBool>>,
+    pub(crate) cached_context: Option<CachedWhisperContext>,
+}
+
+pub(crate) struct CachedWhisperContext {
+    pub(crate) model_id: String,
+    pub(crate) context: Arc<WhisperContext>,
+}
+
+impl Default for DictationState {
+    fn default() -> Self {
+        Self {
+            model_status: missing_status(DEFAULT_MODEL_ID),
+            download_cancel: None,
+            download_task: None,
+            session_state: DictationSessionState::Idle,
+            session: None,
+            processing_cancel: None,
+            cached_context: None,
+        }
+    }
+}
+
+fn model_dir(app: &AppHandle) -> PathBuf {
+    app.path()
+        .app_data_dir()
+        .unwrap_or_else(|_| std::env::current_dir().unwrap_or_else(|_| ".".into()))
+        .join("models")
+        .join("whisper")
+}
+
+fn model_info(model_id: &str) -> Option<&'static DictationModelInfo> {
+    MODEL_CATALOG.iter().find(|info| info.id == model_id)
+}
+
+fn model_path(app: &AppHandle, model_id: &str) -> Result<PathBuf, String> {
+    let info = model_info(model_id)
+        .ok_or_else(|| format!("Unknown dictation model: {model_id}"))?;
+    Ok(model_dir(app).join(info.filename))
+}
+
+fn model_temp_path(app: &AppHandle, model_id: &str) -> Result<PathBuf, String> {
+    let info = model_info(model_id)
+        .ok_or_else(|| format!("Unknown dictation model: {model_id}"))?;
+    Ok(model_dir(app).join(format!("{}.partial", info.filename)))
+}
+
+fn missing_status(model_id: &str) -> DictationModelStatus {
+    DictationModelStatus {
+        state: DictationModelState::Missing,
+        model_id: model_id.to_string(),
+        progress: None,
+        error: None,
+        path: None,
+    }
+}
+
+fn ready_status(model_id: &str, path: &PathBuf) -> DictationModelStatus {
+    DictationModelStatus {
+        state: DictationModelState::Ready,
+        model_id: model_id.to_string(),
+        progress: None,
+        error: None,
+        path: Some(path.to_string_lossy().to_string()),
+    }
+}
+
+fn emit_status(app: &AppHandle, status: &DictationModelStatus) {
+    let _ = app.emit("dictation-download", status);
+}
+
+fn emit_event(app: &AppHandle, event: DictationEvent) {
+    let _ = app.emit("dictation-event", event);
+}
+
+async fn clear_processing_cancel(
+    app: &AppHandle,
+    cancel_flag: &Arc<AtomicBool>,
+) -> bool {
+    let state_handle = app.state::<AppState>();
+    let mut dictation = state_handle.dictation.lock().await;
+    if dictation
+        .processing_cancel
+        .as_ref()
+        .map_or(false, |flag| Arc::ptr_eq(flag, cancel_flag))
+    {
+        dictation.processing_cancel = None;
+        return true;
+    }
+    false
+}
+
+async fn update_status(
+    app: &AppHandle,
+    state: &State<'_, AppState>,
+    status: DictationModelStatus,
+) {
+    {
+        let mut dictation = state.dictation.lock().await;
+        dictation.model_status = status.clone();
+    }
+    emit_status(app, &status);
+}
+
+async fn clear_download_state(state: &State<'_, AppState>) {
+    let mut dictation = state.dictation.lock().await;
+    dictation.download_cancel = None;
+    dictation.download_task = None;
+}
+
+async fn resolve_model_id(
+    state: &State<'_, AppState>,
+    model_id: Option<String>,
+) -> String {
+    let candidate = if let Some(model_id) = model_id {
+        model_id
+    } else {
+        let settings = state.app_settings.lock().await;
+        if settings.dictation_model_id.trim().is_empty() {
+            DEFAULT_MODEL_ID.to_string()
+        } else {
+            settings.dictation_model_id.clone()
+        }
+    };
+    if model_info(&candidate).is_some() {
+        candidate
+    } else {
+        DEFAULT_MODEL_ID.to_string()
+    }
+}
+
+async fn refresh_status(
+    app: &AppHandle,
+    state: &State<'_, AppState>,
+    model_id: &str,
+) -> DictationModelStatus {
+    let mut dictation = state.dictation.lock().await;
+    if dictation.model_status.state == DictationModelState::Downloading
+        && dictation.model_status.model_id == model_id
+    {
+        return dictation.model_status.clone();
+    }
+
+    let path = match model_path(app, model_id) {
+        Ok(path) => path,
+        Err(error) => {
+            dictation.model_status = DictationModelStatus {
+                state: DictationModelState::Error,
+                model_id: model_id.to_string(),
+                progress: None,
+                error: Some(error),
+                path: None,
+            };
+            return dictation.model_status.clone();
+        }
+    };
+
+    if path.exists() {
+        dictation.model_status = ready_status(model_id, &path);
+    } else {
+        dictation.model_status = missing_status(model_id);
+    }
+    dictation.model_status.clone()
+}
+
+#[tauri::command]
+pub(crate) async fn dictation_model_status(
+    app: AppHandle,
+    state: State<'_, AppState>,
+    model_id: Option<String>,
+) -> Result<DictationModelStatus, String> {
+    let model_id = resolve_model_id(&state, model_id).await;
+    Ok(refresh_status(&app, &state, &model_id).await)
+}
+
+#[tauri::command]
+pub(crate) async fn dictation_download_model(
+    app: AppHandle,
+    state: State<'_, AppState>,
+    model_id: Option<String>,
+) -> Result<DictationModelStatus, String> {
+    let model_id = resolve_model_id(&state, model_id).await;
+    let current = refresh_status(&app, &state, &model_id).await;
+    if current.state == DictationModelState::Ready {
+        return Ok(current);
+    }
+    if current.state == DictationModelState::Downloading
+        && current.model_id == model_id
+    {
+        return Ok(current);
+    }
+
+    let cancel_flag = Arc::new(AtomicBool::new(false));
+    {
+        let mut dictation = state.dictation.lock().await;
+        if dictation.model_status.state == DictationModelState::Downloading
+            && dictation.model_status.model_id != model_id
+        {
+            if let Some(flag) = dictation.download_cancel.take() {
+                flag.store(true, Ordering::SeqCst);
+            }
+            if let Some(task) = dictation.download_task.take() {
+                task.abort();
+            }
+        }
+        dictation.download_cancel = Some(cancel_flag.clone());
+        dictation.model_status = DictationModelStatus {
+            state: DictationModelState::Downloading,
+            model_id: model_id.clone(),
+            progress: Some(DictationDownloadProgress {
+                downloaded_bytes: 0,
+                total_bytes: None,
+            }),
+            error: None,
+            path: None,
+        };
+    }
+    emit_status(&app, &refresh_status(&app, &state, &model_id).await);
+
+    let app_handle = app.clone();
+    let model_id_clone = model_id.clone();
+    let task = tokio::spawn(async move {
+        let state = app_handle.state::<AppState>();
+        let model_dir = model_dir(&app_handle);
+        let model_path = match model_path(&app_handle, &model_id_clone) {
+            Ok(path) => path,
+            Err(error) => {
+                let status = DictationModelStatus {
+                    state: DictationModelState::Error,
+                    model_id: model_id_clone.clone(),
+                    progress: None,
+                    error: Some(error),
+                    path: None,
+                };
+                update_status(&app_handle, &state, status).await;
+                clear_download_state(&state).await;
+                return;
+            }
+        };
+        let temp_path = match model_temp_path(&app_handle, &model_id_clone) {
+            Ok(path) => path,
+            Err(error) => {
+                let status = DictationModelStatus {
+                    state: DictationModelState::Error,
+                    model_id: model_id_clone.clone(),
+                    progress: None,
+                    error: Some(error),
+                    path: None,
+                };
+                update_status(&app_handle, &state, status).await;
+                clear_download_state(&state).await;
+                return;
+            }
+        };
+
+        if let Err(error) = tokio::fs::create_dir_all(&model_dir).await {
+            let status = DictationModelStatus {
+                state: DictationModelState::Error,
+                model_id: model_id_clone.clone(),
+                progress: None,
+                error: Some(format!("Failed to create model directory: {error}")),
+                path: None,
+            };
+            update_status(&app_handle, &state, status).await;
+            clear_download_state(&state).await;
+            return;
+        }
+
+        let (url, expected_sha) = match model_info(&model_id_clone) {
+            Some(info) => (info.url, info.sha256),
+            None => {
+                let status = DictationModelStatus {
+                    state: DictationModelState::Error,
+                    model_id: model_id_clone.clone(),
+                    progress: None,
+                    error: Some("Unknown dictation model.".to_string()),
+                    path: None,
+                };
+                update_status(&app_handle, &state, status).await;
+                clear_download_state(&state).await;
+                return;
+            }
+        };
+        let client = match reqwest::Client::builder()
+            .connect_timeout(Duration::from_secs(10))
+            .timeout(Duration::from_secs(30 * 60))
+            .build()
+        {
+            Ok(client) => client,
+            Err(error) => {
+                let status = DictationModelStatus {
+                    state: DictationModelState::Error,
+                    model_id: model_id_clone.clone(),
+                    progress: None,
+                    error: Some(format!("Failed to configure download client: {error}")),
+                    path: None,
+                };
+                update_status(&app_handle, &state, status).await;
+                clear_download_state(&state).await;
+                return;
+            }
+        };
+        let response = match client.get(url).send().await {
+            Ok(response) => response,
+            Err(error) => {
+                let status = DictationModelStatus {
+                    state: DictationModelState::Error,
+                    model_id: model_id_clone.clone(),
+                    progress: None,
+                    error: Some(format!("Failed to download model: {error}")),
+                    path: None,
+                };
+                update_status(&app_handle, &state, status).await;
+                clear_download_state(&state).await;
+                return;
+            }
+        };
+        let response = match response.error_for_status() {
+            Ok(response) => response,
+            Err(error) => {
+                let status = DictationModelStatus {
+                    state: DictationModelState::Error,
+                    model_id: model_id_clone.clone(),
+                    progress: None,
+                    error: Some(format!("Model download failed: {error}")),
+                    path: None,
+                };
+                update_status(&app_handle, &state, status).await;
+                clear_download_state(&state).await;
+                return;
+            }
+        };
+
+        let total = response.content_length();
+        let mut downloaded = 0u64;
+        let mut file = match tokio::fs::File::create(&temp_path).await {
+            Ok(file) => file,
+            Err(error) => {
+                let status = DictationModelStatus {
+                    state: DictationModelState::Error,
+                    model_id: model_id_clone.clone(),
+                    progress: None,
+                    error: Some(format!("Failed to write model: {error}")),
+                    path: None,
+                };
+                update_status(&app_handle, &state, status).await;
+                clear_download_state(&state).await;
+                return;
+            }
+        };
+
+        let mut response = response;
+        let mut hasher = Sha256::new();
+        let mut last_progress = Instant::now();
+        loop {
+            let cancel = {
+                let dictation = state.dictation.lock().await;
+                dictation
+                    .download_cancel
+                    .as_ref()
+                    .map(|flag| flag.load(Ordering::Relaxed))
+                    .unwrap_or(false)
+            };
+            if cancel {
+                let _ = tokio::fs::remove_file(&temp_path).await;
+                let status = missing_status(&model_id_clone);
+                update_status(&app_handle, &state, status).await;
+                clear_download_state(&state).await;
+                return;
+            }
+
+            let chunk = match response.chunk().await {
+                Ok(Some(chunk)) => chunk,
+                Ok(None) => break,
+                Err(error) => {
+                    let _ = tokio::fs::remove_file(&temp_path).await;
+                    let status = DictationModelStatus {
+                        state: DictationModelState::Error,
+                        model_id: model_id_clone.clone(),
+                        progress: None,
+                        error: Some(format!("Model download failed: {error}")),
+                        path: None,
+                    };
+                    update_status(&app_handle, &state, status).await;
+                    clear_download_state(&state).await;
+                    return;
+                }
+            };
+
+            if let Err(error) = file.write_all(&chunk).await {
+                let _ = tokio::fs::remove_file(&temp_path).await;
+                let status = DictationModelStatus {
+                    state: DictationModelState::Error,
+                    model_id: model_id_clone.clone(),
+                    progress: None,
+                    error: Some(format!("Failed to write model: {error}")),
+                    path: None,
+                };
+                update_status(&app_handle, &state, status).await;
+                clear_download_state(&state).await;
+                return;
+            }
+            downloaded += chunk.len() as u64;
+            hasher.update(&chunk);
+
+            if last_progress.elapsed() >= Duration::from_millis(150) {
+                last_progress = Instant::now();
+                let status = DictationModelStatus {
+                    state: DictationModelState::Downloading,
+                    model_id: model_id_clone.clone(),
+                    progress: Some(DictationDownloadProgress {
+                        downloaded_bytes: downloaded,
+                        total_bytes: total,
+                    }),
+                    error: None,
+                    path: None,
+                };
+                update_status(&app_handle, &state, status).await;
+            }
+        }
+
+        let hash = hasher.finalize();
+        let mut hash_hex = String::with_capacity(64);
+        for byte in hash {
+            use std::fmt::Write;
+            let _ = write!(&mut hash_hex, "{:02x}", byte);
+        }
+        if hash_hex != expected_sha {
+            let _ = tokio::fs::remove_file(&temp_path).await;
+            let status = DictationModelStatus {
+                state: DictationModelState::Error,
+                model_id: model_id_clone.clone(),
+                progress: None,
+                error: Some("Model hash mismatch; download canceled.".to_string()),
+                path: None,
+            };
+            update_status(&app_handle, &state, status).await;
+            clear_download_state(&state).await;
+            return;
+        }
+
+        if let Err(error) = file.flush().await {
+            let _ = tokio::fs::remove_file(&temp_path).await;
+            let status = DictationModelStatus {
+                state: DictationModelState::Error,
+                model_id: model_id_clone.clone(),
+                progress: None,
+                error: Some(format!("Failed to finalize model: {error}")),
+                path: None,
+            };
+            update_status(&app_handle, &state, status).await;
+            clear_download_state(&state).await;
+            return;
+        }
+
+        if let Err(error) = tokio::fs::rename(&temp_path, &model_path).await {
+            let _ = tokio::fs::remove_file(&temp_path).await;
+            let status = DictationModelStatus {
+                state: DictationModelState::Error,
+                model_id: model_id_clone.clone(),
+                progress: None,
+                error: Some(format!("Failed to move model into place: {error}")),
+                path: None,
+            };
+            update_status(&app_handle, &state, status).await;
+            clear_download_state(&state).await;
+            return;
+        }
+
+        let status = ready_status(&model_id_clone, &model_path);
+        update_status(&app_handle, &state, status).await;
+        clear_download_state(&state).await;
+    });
+
+    {
+        let mut dictation = state.dictation.lock().await;
+        dictation.download_task = Some(task);
+    }
+
+    Ok(refresh_status(&app, &state, &model_id).await)
+}
+
+#[tauri::command]
+pub(crate) async fn dictation_cancel_download(
+    app: AppHandle,
+    state: State<'_, AppState>,
+    model_id: Option<String>,
+) -> Result<DictationModelStatus, String> {
+    let model_id = resolve_model_id(&state, model_id).await;
+    {
+        let mut dictation = state.dictation.lock().await;
+        if let Some(flag) = dictation.download_cancel.take() {
+            flag.store(true, Ordering::Relaxed);
+        }
+        if let Some(task) = dictation.download_task.take() {
+            task.abort();
+        }
+        dictation.model_status = missing_status(&model_id);
+    }
+    if let Ok(temp_path) = model_temp_path(&app, &model_id) {
+        let _ = tokio::fs::remove_file(&temp_path).await;
+    }
+    let status = refresh_status(&app, &state, &model_id).await;
+    emit_status(&app, &status);
+    Ok(status)
+}
+
+#[tauri::command]
+pub(crate) async fn dictation_remove_model(
+    app: AppHandle,
+    state: State<'_, AppState>,
+    model_id: Option<String>,
+) -> Result<DictationModelStatus, String> {
+    let model_id = resolve_model_id(&state, model_id).await;
+    let model_path = model_path(&app, &model_id)?;
+    if model_path.exists() {
+        tokio::fs::remove_file(&model_path)
+            .await
+            .map_err(|error| format!("Failed to remove model: {error}"))?;
+    }
+    {
+        let mut dictation = state.dictation.lock().await;
+        if dictation
+            .cached_context
+            .as_ref()
+            .map(|cached| cached.model_id.as_str() == model_id)
+            .unwrap_or(false)
+        {
+            dictation.cached_context = None;
+        }
+        dictation.model_status = missing_status(&model_id);
+    }
+    let status = refresh_status(&app, &state, &model_id).await;
+    emit_status(&app, &status);
+    Ok(status)
+}
+
+#[tauri::command]
+pub(crate) async fn dictation_start(
+    preferred_language: Option<String>,
+    app: AppHandle,
+    state: State<'_, AppState>,
+) -> Result<DictationSessionState, String> {
+    let model_id = resolve_model_id(&state, None).await;
+    let model_status = refresh_status(&app, &state, &model_id).await;
+    if model_status.state != DictationModelState::Ready {
+        let message = "Dictation model is not downloaded yet.".to_string();
+        emit_event(&app, DictationEvent::Error { message: message.clone() });
+        return Err(message);
+    }
+    {
+        let dictation = state.dictation.lock().await;
+        if dictation.session_state != DictationSessionState::Idle {
+            let message = "Dictation is already active.".to_string();
+            emit_event(&app, DictationEvent::Error { message: message.clone() });
+            return Err(message);
+        }
+    }
+
+    let audio = Arc::new(Mutex::new(Vec::new()));
+    let (stop_tx, stop_rx) = mpsc::channel();
+    let stop_tx_thread = stop_tx.clone();
+    let (ready_tx, ready_rx) = oneshot::channel();
+    let (stopped_tx, stopped_rx) = oneshot::channel();
+    let app_handle = app.clone();
+    let preferred_clone = preferred_language.clone();
+    let audio_capture = audio.clone();
+
+    std::thread::spawn(move || {
+        start_capture_thread(
+            app_handle,
+            audio_capture,
+            stop_rx,
+            stop_tx_thread,
+            stopped_tx,
+            ready_tx,
+        );
+    });
+
+    let sample_rate = match ready_rx.await {
+        Ok(Ok(rate)) => rate,
+        Ok(Err(message)) => {
+            emit_event(&app, DictationEvent::Error { message: message.clone() });
+            return Err(message);
+        }
+        Err(_) => {
+            let message = "Failed to start microphone capture.".to_string();
+            emit_event(&app, DictationEvent::Error { message: message.clone() });
+            return Err(message);
+        }
+    };
+
+    {
+        let mut dictation = state.dictation.lock().await;
+        dictation.session_state = DictationSessionState::Listening;
+        dictation.session = Some(DictationSessionHandle {
+            stop: stop_tx,
+            stopped: stopped_rx,
+            audio,
+            sample_rate,
+            model_id: model_id.clone(),
+            preferred_language: preferred_clone,
+        });
+    }
+
+    emit_event(
+        &app,
+        DictationEvent::State {
+            state: DictationSessionState::Listening,
+        },
+    );
+
+    Ok(DictationSessionState::Listening)
+}
+
+#[tauri::command]
+pub(crate) async fn dictation_stop(
+    app: AppHandle,
+    state: State<'_, AppState>,
+) -> Result<DictationSessionState, String> {
+    let cancel_flag = Arc::new(AtomicBool::new(false));
+    let (audio, sample_rate, model_id, preferred_language, stopped, stop_tx) = {
+        let mut dictation = state.dictation.lock().await;
+        if dictation.session_state != DictationSessionState::Listening {
+            let message = "Dictation is not currently listening.".to_string();
+            emit_event(&app, DictationEvent::Error { message: message.clone() });
+            return Err(message);
+        }
+        dictation.session_state = DictationSessionState::Processing;
+        dictation.processing_cancel = Some(Arc::clone(&cancel_flag));
+        let session = dictation
+            .session
+            .take()
+            .ok_or_else(|| "Dictation session is unavailable.".to_string())?;
+        (
+            session.audio,
+            session.sample_rate,
+            session.model_id,
+            session.preferred_language,
+            session.stopped,
+            session.stop,
+        )
+    };
+
+    emit_event(
+        &app,
+        DictationEvent::State {
+            state: DictationSessionState::Processing,
+        },
+    );
+
+    let app_handle = app.clone();
+    let _ = stop_tx.send(());
+    let _ = stopped.await;
+    tokio::spawn(async move {
+        let samples = {
+            let mut guard = audio.lock().unwrap();
+            let captured = guard.clone();
+            guard.clear();
+            captured
+        };
+        if cancel_flag.load(Ordering::Relaxed) {
+            clear_processing_cancel(&app_handle, &cancel_flag).await;
+            return;
+        }
+
+        let state_handle = app_handle.state::<AppState>();
+        let cached_context = {
+            let dictation = state_handle.dictation.lock().await;
+            dictation
+                .cached_context
+                .as_ref()
+                .filter(|cached| cached.model_id == model_id)
+                .map(|cached| Arc::clone(&cached.context))
+        };
+
+        let context = if let Some(context) = cached_context {
+            context
+        } else {
+            let model_path = match model_path(&app_handle, &model_id) {
+                Ok(path) => path,
+                Err(error) => {
+                    emit_event(&app_handle, DictationEvent::Error { message: error });
+                    let mut dictation = state_handle.dictation.lock().await;
+                    dictation.session_state = DictationSessionState::Idle;
+                    emit_event(
+                        &app_handle,
+                        DictationEvent::State {
+                            state: DictationSessionState::Idle,
+                        },
+                    );
+                    return;
+                }
+            };
+            let path = model_path.to_string_lossy().into_owned();
+            let created = tokio::task::spawn_blocking(move || {
+                WhisperContext::new_with_params(&path, WhisperContextParameters::default())
+            })
+            .await;
+            let context = match created {
+                Ok(Ok(context)) => context,
+                Ok(Err(error)) => {
+                    emit_event(
+                        &app_handle,
+                        DictationEvent::Error {
+                            message: format!("Failed to load Whisper model: {error}"),
+                        },
+                    );
+                    let mut dictation = state_handle.dictation.lock().await;
+                    dictation.session_state = DictationSessionState::Idle;
+                    emit_event(
+                        &app_handle,
+                        DictationEvent::State {
+                            state: DictationSessionState::Idle,
+                        },
+                    );
+                    return;
+                }
+                Err(error) => {
+                    emit_event(
+                        &app_handle,
+                        DictationEvent::Error {
+                            message: format!("Failed to load Whisper model: {error}"),
+                        },
+                    );
+                    let mut dictation = state_handle.dictation.lock().await;
+                    dictation.session_state = DictationSessionState::Idle;
+                    emit_event(
+                        &app_handle,
+                        DictationEvent::State {
+                            state: DictationSessionState::Idle,
+                        },
+                    );
+                    return;
+                }
+            };
+            let context = Arc::new(context);
+            let mut dictation = state_handle.dictation.lock().await;
+            dictation.cached_context = Some(CachedWhisperContext {
+                model_id: model_id.clone(),
+                context: Arc::clone(&context),
+            });
+            context
+        };
+
+        let preferred = preferred_language.clone();
+
+        let result = tokio::task::spawn_blocking(move || {
+            transcribe_audio(samples, sample_rate, &context, preferred)
+        })
+        .await;
+
+        let outcome = match result {
+            Ok(result) => result,
+            Err(error) => Err(format!("Transcription task failed: {error}")),
+        };
+
+        if cancel_flag.load(Ordering::Relaxed) {
+            clear_processing_cancel(&app_handle, &cancel_flag).await;
+            return;
+        }
+
+        match outcome {
+            Ok(text) => {
+                if !text.trim().is_empty() {
+                    emit_event(
+                        &app_handle,
+                        DictationEvent::Transcript { text },
+                    );
+                }
+            }
+            Err(message) => {
+                emit_event(
+                    &app_handle,
+                    DictationEvent::Error { message },
+                );
+            }
+        }
+
+        clear_processing_cancel(&app_handle, &cancel_flag).await;
+        let state_handle = app_handle.state::<AppState>();
+        let mut dictation = state_handle.dictation.lock().await;
+        dictation.session_state = DictationSessionState::Idle;
+        emit_event(
+            &app_handle,
+            DictationEvent::State {
+                state: DictationSessionState::Idle,
+            },
+        );
+    });
+
+    Ok(DictationSessionState::Processing)
+}
+
+#[tauri::command]
+pub(crate) async fn dictation_cancel(
+    app: AppHandle,
+    state: State<'_, AppState>,
+) -> Result<DictationSessionState, String> {
+    {
+        let mut dictation = state.dictation.lock().await;
+        if dictation.session_state == DictationSessionState::Processing {
+            if let Some(flag) = dictation.processing_cancel.take() {
+                flag.store(true, Ordering::Relaxed);
+            }
+            dictation.session_state = DictationSessionState::Idle;
+            emit_event(
+                &app,
+                DictationEvent::State {
+                    state: DictationSessionState::Idle,
+                },
+            );
+            emit_event(
+                &app,
+                DictationEvent::Canceled {
+                    message: "Canceled".to_string(),
+                },
+            );
+            return Ok(DictationSessionState::Idle);
+        }
+    }
+    let (audio, stopped, stop_tx) = {
+        let mut dictation = state.dictation.lock().await;
+        if dictation.session_state != DictationSessionState::Listening {
+            let message = "Dictation is not currently listening.".to_string();
+            emit_event(&app, DictationEvent::Error { message: message.clone() });
+            return Err(message);
+        }
+        dictation.session_state = DictationSessionState::Idle;
+        let session = dictation
+            .session
+            .take()
+            .ok_or_else(|| "Dictation session is unavailable.".to_string())?;
+        (session.audio, session.stopped, session.stop)
+    };
+
+    let _ = stop_tx.send(());
+    let _ = stopped.await;
+    {
+        let mut guard = audio.lock().unwrap();
+        guard.clear();
+    }
+
+    emit_event(
+        &app,
+        DictationEvent::State {
+            state: DictationSessionState::Idle,
+        },
+    );
+    emit_event(
+        &app,
+        DictationEvent::Canceled {
+            message: "Canceled".to_string(),
+        },
+    );
+
+    Ok(DictationSessionState::Idle)
+}
+
+fn start_capture_thread(
+    app: AppHandle,
+    audio: Arc<Mutex<Vec<f32>>>,
+    stop_rx: mpsc::Receiver<()>,
+    stop_tx: mpsc::Sender<()>,
+    stopped_tx: oneshot::Sender<()>,
+    ready_tx: oneshot::Sender<Result<u32, String>>,
+) {
+    let host = cpal::default_host();
+    let device = host
+        .default_input_device()
+        .ok_or_else(|| "No microphone input device available.".to_string());
+    let device = match device {
+        Ok(device) => device,
+        Err(error) => {
+            let _ = ready_tx.send(Err(error));
+            let _ = stopped_tx.send(());
+            return;
+        }
+    };
+    let config = device
+        .default_input_config()
+        .map_err(|error| format!("Failed to read microphone config: {error}"));
+    let config = match config {
+        Ok(config) => config,
+        Err(error) => {
+            let _ = ready_tx.send(Err(error));
+            let _ = stopped_tx.send(());
+            return;
+        }
+    };
+    let sample_rate = config.sample_rate().0;
+    let sample_format = config.sample_format();
+    let stream_config: cpal::StreamConfig = config.into();
+    let channels = stream_config.channels as usize;
+    let max_samples = (sample_rate as usize)
+        .saturating_mul(MAX_CAPTURE_SECONDS as usize)
+        .max(1);
+    let app_handle = app.clone();
+    let audio_capture = audio.clone();
+    let stop_on_error = stop_tx.clone();
+
+    let err_fn = move |error| {
+        emit_event(
+            &app_handle,
+            DictationEvent::Error {
+                message: format!("Microphone error: {error}"),
+            },
+        );
+        let _ = stop_on_error.send(());
+        let state_app = app_handle.clone();
+        tauri::async_runtime::spawn(async move {
+            let state_handle = state_app.state::<AppState>();
+            let should_emit = {
+                let mut dictation = state_handle.dictation.lock().await;
+                if dictation.session_state == DictationSessionState::Idle {
+                    false
+                } else {
+                    dictation.session_state = DictationSessionState::Idle;
+                    dictation.session = None;
+                    true
+                }
+            };
+            if should_emit {
+                emit_event(
+                    &state_app,
+                    DictationEvent::State {
+                        state: DictationSessionState::Idle,
+                    },
+                );
+            }
+        });
+    };
+
+    let level_value = Arc::new(AtomicU32::new(0));
+
+    let stream = match sample_format {
+        SampleFormat::F32 => build_stream::<f32>(
+            &device,
+            &stream_config,
+            channels,
+            max_samples,
+            audio_capture,
+            level_value.clone(),
+            err_fn,
+        ),
+        SampleFormat::I16 => build_stream::<i16>(
+            &device,
+            &stream_config,
+            channels,
+            max_samples,
+            audio_capture,
+            level_value.clone(),
+            err_fn,
+        ),
+        SampleFormat::U16 => build_stream::<u16>(
+            &device,
+            &stream_config,
+            channels,
+            max_samples,
+            audio_capture,
+            level_value.clone(),
+            err_fn,
+        ),
+        _ => {
+            let _ = ready_tx.send(Err("Unsupported microphone sample format.".to_string()));
+            let _ = stopped_tx.send(());
+            return;
+        }
+    };
+
+    let stream = match stream {
+        Ok(stream) => stream,
+        Err(error) => {
+            let _ = ready_tx.send(Err(error));
+            let _ = stopped_tx.send(());
+            return;
+        }
+    };
+    if let Err(error) = stream.play() {
+        let _ = ready_tx.send(Err(format!("Failed to start microphone: {error}")));
+        let _ = stopped_tx.send(());
+        return;
+    }
+
+    let running = Arc::new(AtomicBool::new(true));
+    let level_task_app = app.clone();
+    let level_task_value = level_value.clone();
+    let level_task_running = running.clone();
+    std::thread::spawn(move || {
+        while level_task_running.load(Ordering::Relaxed) {
+            let value = f32::from_bits(level_task_value.load(Ordering::Relaxed));
+            emit_event(&level_task_app, DictationEvent::Level { value });
+            std::thread::sleep(Duration::from_millis(33));
+        }
+    });
+
+    eprintln!(
+        "dictation: capture started (rate={}Hz, channels={}, format={:?})",
+        sample_rate, channels, sample_format
+    );
+    let _ = ready_tx.send(Ok(sample_rate));
+    let _ = stop_rx.recv();
+    running.store(false, Ordering::Relaxed);
+    drop(stream);
+    let _ = stopped_tx.send(());
+}
+
+fn build_stream<T>(
+    device: &cpal::Device,
+    config: &cpal::StreamConfig,
+    channels: usize,
+    max_samples: usize,
+    audio: Arc<Mutex<Vec<f32>>>,
+    level_value: Arc<AtomicU32>,
+    err_fn: impl FnMut(cpal::StreamError) + Send + 'static,
+) -> Result<cpal::Stream, String>
+where
+    T: Sample + SizedSample,
+    f32: FromSample<T>,
+{
+    let channels = channels.max(1);
+    let mut mono_buffer: Vec<f32> = Vec::with_capacity(2048);
+    device
+        .build_input_stream(
+            config,
+            move |data: &[T], _| {
+                if data.is_empty() {
+                    return;
+                }
+                let mut sum = 0.0f32;
+                let mut frames = 0usize;
+                mono_buffer.clear();
+                let target_len = data.len() / channels;
+                if mono_buffer.capacity() < target_len {
+                    mono_buffer.reserve(target_len - mono_buffer.capacity());
+                }
+                for frame in data.chunks(channels) {
+                    let mut frame_sum = 0.0f32;
+                    let mut count = 0usize;
+                    for sample in frame {
+                        let value: f32 = sample.to_sample();
+                        frame_sum += value;
+                        count += 1;
+                    }
+                    if count == 0 {
+                        continue;
+                    }
+                    let mono = frame_sum / count as f32;
+                    mono_buffer.push(mono);
+                    sum += mono * mono;
+                    frames += 1;
+                }
+                if frames == 0 {
+                    return;
+                }
+                if let Ok(mut buffer) = audio.lock() {
+                    if buffer.len() < max_samples {
+                        let remaining = max_samples.saturating_sub(buffer.len());
+                        let slice_len = remaining.min(mono_buffer.len());
+                        if slice_len > 0 {
+                            buffer.extend_from_slice(&mono_buffer[..slice_len]);
+                        }
+                    }
+                }
+                let rms = (sum / frames as f32).sqrt();
+                let scaled = (rms * 6.0).clamp(0.0, 1.0);
+                level_value.store(scaled.to_bits(), Ordering::Relaxed);
+            },
+            err_fn,
+            None,
+        )
+        .map_err(|error| format!("Failed to build microphone stream: {error}"))
+}
+
+fn transcribe_audio(
+    samples: Vec<f32>,
+    sample_rate: u32,
+    context: &WhisperContext,
+    preferred_language: Option<String>,
+) -> Result<String, String> {
+    if samples.is_empty() {
+        return Ok(String::new());
+    }
+    let mut max = 0.0f32;
+    let mut sum = 0.0f32;
+    let mean = samples.iter().copied().sum::<f32>() / samples.len() as f32;
+    let mut normalized = Vec::with_capacity(samples.len());
+    for value in &samples {
+        let centered = value - mean;
+        let abs = centered.abs();
+        if abs > max {
+            max = abs;
+        }
+        sum += centered * centered;
+        normalized.push(centered);
+    }
+    let rms = (sum / samples.len() as f32).sqrt();
+    let duration = samples.len() as f32 / sample_rate as f32;
+    let gain = if max > 0.0 { (0.6 / max).min(10.0) } else { 1.0 };
+    if gain != 1.0 {
+        for value in &mut normalized {
+            *value = (*value * gain).clamp(-1.0, 1.0);
+        }
+    }
+    eprintln!(
+        "dictation: captured {} samples ({:.2}s), max={:.4}, rms={:.4}, gain={:.2}",
+        samples.len(),
+        duration,
+        max,
+        rms,
+        gain
+    );
+    if duration < 0.2 {
+        return Err("Audio too short for transcription.".to_string());
+    }
+    let audio = if sample_rate == 16_000 {
+        normalized
+    } else {
+        resample_audio(&normalized, sample_rate, 16_000)
+    };
+
+    let mut state = context
+        .create_state()
+        .map_err(|error| format!("Failed to initialize Whisper: {error}"))?;
+    let threads = std::thread::available_parallelism()
+        .map(|value| value.get())
+        .unwrap_or(4);
+    let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
+    params.set_print_special(false);
+    params.set_print_progress(false);
+    params.set_print_realtime(false);
+    params.set_print_timestamps(false);
+    params.set_no_timestamps(true);
+    params.set_translate(false);
+    params.set_no_context(true);
+    params.set_single_segment(false);
+    let mut forced_language: Option<String> = None;
+    if let Some(preferred) = preferred_language.clone() {
+        if let Some(pref_id) = get_lang_id(&preferred) {
+            if state.pcm_to_mel(&audio, threads).is_ok() {
+                if let Ok((_detected, probs)) = state.lang_detect(0, threads) {
+                    let pref_index = pref_id.max(0) as usize;
+                    let pref_prob = probs.get(pref_index).copied().unwrap_or(0.0);
+                    let best_prob = probs
+                        .iter()
+                        .copied()
+                        .fold(0.0_f32, |acc, value| acc.max(value));
+                    if best_prob > 0.0 && (best_prob - pref_prob) <= 0.30 {
+                        forced_language = Some(preferred);
+                    }
+                }
+            }
+        }
+    }
+
+    if let Some(language) = forced_language.as_deref() {
+        // Use the preferred language only when detection is ambiguous.
+        params.set_language(Some(language));
+    } else {
+        // Auto-detect language while still running transcription.
+        params.set_language(Some("auto"));
+    }
+    params.set_n_threads(threads as i32);
+
+    state
+        .full(params, &audio)
+        .map_err(|error| format!("Transcription failed: {error}"))?;
+
+    let segments = state
+        .full_n_segments()
+        .map_err(|error| format!("Failed to read segments: {error}"))?;
+    eprintln!("dictation: whisper segments={}", segments);
+    let mut transcript = String::new();
+    for index in 0..segments {
+        let segment = state
+            .full_get_segment_text(index)
+            .map_err(|error| format!("Failed to read segment: {error}"))?;
+        transcript.push_str(&segment);
+    }
+    let cleaned = transcript.trim().to_string();
+    if cleaned.is_empty() {
+        eprintln!(
+            "dictation: no speech detected (rms={:.4}, max={:.4}, duration={:.2}s, segments={})",
+            rms, max, duration, segments
+        );
+        return Ok(String::new());
+    }
+    Ok(cleaned)
+}
+
+fn resample_audio(samples: &[f32], from_rate: u32, to_rate: u32) -> Vec<f32> {
+    if from_rate == to_rate || samples.is_empty() {
+        return samples.to_vec();
+    }
+    let ratio = to_rate as f64 / from_rate as f64;
+    let new_len = (samples.len() as f64 * ratio).round() as usize;
+    let mut out = Vec::with_capacity(new_len.max(1));
+    for i in 0..new_len {
+        let pos = i as f64 / ratio;
+        let idx = pos.floor() as usize;
+        let frac = pos - idx as f64;
+        let s0 = samples.get(idx).copied().unwrap_or(0.0);
+        let s1 = samples.get(idx + 1).copied().unwrap_or(s0);
+        out.push((s0 as f64 + (s1 as f64 - s0 as f64) * frac) as f32);
+    }
+    out
+}
diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index b6f9f1ff1..57d8af5d4 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -3,6 +3,7 @@ use tauri::{Manager, WebviewUrl, WebviewWindowBuilder};
 
 mod backend;
 mod codex;
+mod dictation;
 mod event_sink;
 mod git;
 mod prompts;
@@ -172,7 +173,14 @@ pub fn run() {
             terminal::terminal_open,
             terminal::terminal_write,
             terminal::terminal_resize,
-            terminal::terminal_close
+            terminal::terminal_close,
+            dictation::dictation_model_status,
+            dictation::dictation_download_model,
+            dictation::dictation_cancel_download,
+            dictation::dictation_remove_model,
+            dictation::dictation_start,
+            dictation::dictation_stop,
+            dictation::dictation_cancel
         ])
         .run(tauri::generate_context!())
         .expect("error while running tauri application");
diff --git a/src-tauri/src/state.rs b/src-tauri/src/state.rs
index b0d6f426a..5465e697f 100644
--- a/src-tauri/src/state.rs
+++ b/src-tauri/src/state.rs
@@ -5,6 +5,7 @@ use std::sync::Arc;
 use tauri::{AppHandle, Manager};
 use tokio::sync::Mutex;
 
+use crate::dictation::DictationState;
 use crate::storage::{read_settings, read_workspaces};
 use crate::types::{AppSettings, WorkspaceEntry};
 
@@ -16,6 +17,7 @@ pub(crate) struct AppState {
     pub(crate) storage_path: PathBuf,
     pub(crate) settings_path: PathBuf,
     pub(crate) app_settings: Mutex<AppSettings>,
+    pub(crate) dictation: Mutex<DictationState>,
 }
 
 impl AppState {
@@ -35,6 +37,7 @@ impl AppState {
             storage_path,
             settings_path,
             app_settings: Mutex::new(app_settings),
+            dictation: Mutex::new(DictationState::default()),
         }
     }
 }
diff --git a/src-tauri/src/types.rs b/src-tauri/src/types.rs
index e35e80701..a9714928e 100644
--- a/src-tauri/src/types.rs
+++ b/src-tauri/src/types.rs
@@ -142,6 +142,20 @@ pub(crate) struct AppSettings {
         rename = "experimentalSteerEnabled"
     )]
     pub(crate) experimental_steer_enabled: bool,
+    #[serde(default = "default_dictation_enabled", rename = "dictationEnabled")]
+    pub(crate) dictation_enabled: bool,
+    #[serde(
+        default = "default_dictation_model_id",
+        rename = "dictationModelId"
+    )]
+    pub(crate) dictation_model_id: String,
+    #[serde(default, rename = "dictationPreferredLanguage")]
+    pub(crate) dictation_preferred_language: Option<String>,
+    #[serde(
+        default = "default_dictation_hold_key",
+        rename = "dictationHoldKey"
+    )]
+    pub(crate) dictation_hold_key: String,
 }
 
 fn default_access_mode() -> String {
@@ -160,6 +174,18 @@ fn default_experimental_steer_enabled() -> bool {
     false
 }
 
+fn default_dictation_enabled() -> bool {
+    false
+}
+
+fn default_dictation_model_id() -> String {
+    "base".to_string()
+}
+
+fn default_dictation_hold_key() -> String {
+    "alt".to_string()
+}
+
 impl Default for AppSettings {
     fn default() -> Self {
         Self {
@@ -168,6 +194,10 @@ impl Default for AppSettings {
             ui_scale: 1.0,
             notification_sounds_enabled: true,
             experimental_steer_enabled: false,
+            dictation_enabled: false,
+            dictation_model_id: default_dictation_model_id(),
+            dictation_preferred_language: None,
+            dictation_hold_key: default_dictation_hold_key(),
         }
     }
 }
@@ -184,6 +214,10 @@ mod tests {
         assert!((settings.ui_scale - 1.0).abs() < f64::EPSILON);
         assert!(settings.notification_sounds_enabled);
         assert!(!settings.experimental_steer_enabled);
+        assert!(!settings.dictation_enabled);
+        assert_eq!(settings.dictation_model_id, "base");
+        assert!(settings.dictation_preferred_language.is_none());
+        assert_eq!(settings.dictation_hold_key, "alt");
     }
 
     #[test]
diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json
index 21698e16c..ee80ef0c4 100644
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@@ -46,7 +46,10 @@
       "icons/128x128@2x.png",
       "icons/icon.icns",
       "icons/icon.ico"
-    ]
+    ],
+    "macOS": {
+      "infoPlist": "Info.plist"
+    }
   },
   "plugins": {
     "updater": {
diff --git a/src/App.tsx b/src/App.tsx
index ba0baad21..be1a1ae59 100644
--- a/src/App.tsx
+++ b/src/App.tsx
@@ -58,6 +58,9 @@ import {
 import { useAppSettings } from "./features/settings/hooks/useAppSettings";
 import { useUpdater } from "./features/update/hooks/useUpdater";
 import { useComposerImages } from "./features/composer/hooks/useComposerImages";
+import { useDictationModel } from "./features/dictation/hooks/useDictationModel";
+import { useDictation } from "./features/dictation/hooks/useDictation";
+import { useHoldToDictate } from "./features/dictation/hooks/useHoldToDictate";
 import { useQueuedSend } from "./features/threads/hooks/useQueuedSend";
 import { useWorktreePrompt } from "./features/workspaces/hooks/useWorktreePrompt";
 import { useUiScaleShortcuts } from "./features/layout/hooks/useUiScaleShortcuts";
@@ -91,6 +94,20 @@ function MainApp() {
     saveSettings,
     doctor
   } = useAppSettings();
+  const dictationModel = useDictationModel(appSettings.dictationModelId);
+  const {
+    state: dictationState,
+    level: dictationLevel,
+    transcript: dictationTranscript,
+    error: dictationError,
+    hint: dictationHint,
+    start: startDictation,
+    stop: stopDictation,
+    cancel: cancelDictation,
+    clearTranscript: clearDictationTranscript,
+    clearError: clearDictationError,
+    clearHint: clearDictationHint,
+  } = useDictation();
   const {
     uiScale,
     scaleShortcutTitle,
@@ -152,11 +169,66 @@ function MainApp() {
   const [composerInsert, setComposerInsert] = useState<QueuedMessage | null>(
     null
   );
+  type SettingsSection = "projects" | "display" | "dictation" | "codex" | "experimental";
   const [settingsOpen, setSettingsOpen] = useState(false);
+  const [settingsSection, setSettingsSection] = useState<SettingsSection | null>(
+    null,
+  );
   const [reduceTransparency, setReduceTransparency] = useState(() => {
     const stored = localStorage.getItem("reduceTransparency");
     return stored === "true";
   });
+  const dictationReady = dictationModel.status?.state === "ready";
+  const holdDictationKey = (appSettings.dictationHoldKey ?? "").toLowerCase();
+  const handleToggleDictation = useCallback(async () => {
+    if (!appSettings.dictationEnabled || !dictationReady) {
+      return;
+    }
+    try {
+      if (dictationState === "listening") {
+        await stopDictation();
+        return;
+      }
+      if (dictationState === "idle") {
+        await startDictation(appSettings.dictationPreferredLanguage);
+      }
+    } catch {
+      // Errors are surfaced through dictation events.
+    }
+  }, [
+    appSettings.dictationEnabled,
+    appSettings.dictationPreferredLanguage,
+    dictationReady,
+    dictationState,
+    startDictation,
+    stopDictation,
+  ]);
+
+  useEffect(() => {
+    const handleEscape = (event: KeyboardEvent) => {
+      if (event.key !== "Escape") {
+        return;
+      }
+      if (dictationState !== "listening" && dictationState !== "processing") {
+        return;
+      }
+      event.preventDefault();
+      void cancelDictation();
+    };
+    window.addEventListener("keydown", handleEscape);
+    return () => window.removeEventListener("keydown", handleEscape);
+  }, [dictationState, cancelDictation]);
+
+  useHoldToDictate({
+    enabled: appSettings.dictationEnabled,
+    ready: dictationReady,
+    state: dictationState,
+    preferredLanguage: appSettings.dictationPreferredLanguage,
+    holdKey: holdDictationKey,
+    startDictation,
+    stopDictation,
+    cancelDictation,
+  });
   const {
     debugOpen,
     setDebugOpen,
@@ -593,7 +665,13 @@ function MainApp() {
     });
   }
 
-  const handleOpenSettings = () => setSettingsOpen(true);
+  const handleOpenSettings = useCallback(
+    (section?: SettingsSection) => {
+      setSettingsSection(section ?? null);
+      setSettingsOpen(true);
+    },
+    [],
+  );
 
   const orderValue = (entry: WorkspaceInfo) =>
     typeof entry.settings.sortOrder === "number"
@@ -704,7 +782,8 @@ function MainApp() {
     activeRateLimits,
     approvals,
     handleApprovalDecision,
-    onOpenSettings: handleOpenSettings,
+    onOpenSettings: () => handleOpenSettings(),
+    onOpenDictationSettings: () => handleOpenSettings("dictation"),
     onOpenDebug: handleDebugClick,
     showDebugButton,
     onAddWorkspace: handleAddWorkspace,
@@ -885,6 +964,18 @@ function MainApp() {
     prompts,
     files,
     textareaRef: composerInputRef,
+    dictationEnabled: appSettings.dictationEnabled && dictationReady,
+    dictationState,
+    dictationLevel,
+    onToggleDictation: handleToggleDictation,
+    dictationTranscript,
+    onDictationTranscriptHandled: (id) => {
+      clearDictationTranscript(id);
+    },
+    dictationError,
+    onDismissDictationError: clearDictationError,
+    dictationHint,
+    onDismissDictationHint: clearDictationHint,
     showComposer,
     plan: activePlan,
     debugEntries,
@@ -1010,7 +1101,10 @@ function MainApp() {
       {settingsOpen && (
         <SettingsView
           workspaces={workspaces}
-          onClose={() => setSettingsOpen(false)}
+          onClose={() => {
+            setSettingsOpen(false);
+            setSettingsSection(null);
+          }}
           onMoveWorkspace={handleMoveWorkspace}
           onDeleteWorkspace={(workspaceId) => {
             void removeWorkspace(workspaceId);
@@ -1028,6 +1122,11 @@ function MainApp() {
           scaleShortcutTitle={scaleShortcutTitle}
           scaleShortcutText={scaleShortcutText}
           onTestNotificationSound={handleTestNotificationSound}
+          dictationModelStatus={dictationModel.status}
+          onDownloadDictationModel={dictationModel.download}
+          onCancelDictationDownload={dictationModel.cancel}
+          onRemoveDictationModel={dictationModel.remove}
+          initialSection={settingsSection ?? undefined}
         />
       )}
     </div>
diff --git a/src/features/composer/components/Composer.tsx b/src/features/composer/components/Composer.tsx
index fc0a3ed25..3b60c8625 100644
--- a/src/features/composer/components/Composer.tsx
+++ b/src/features/composer/components/Composer.tsx
@@ -1,5 +1,11 @@
 import { useCallback, useEffect, useRef, useState } from "react";
-import type { CustomPromptOption, QueuedMessage, ThreadTokenUsage } from "../../../types";
+import type {
+  CustomPromptOption,
+  DictationTranscript,
+  QueuedMessage,
+  ThreadTokenUsage,
+} from "../../../types";
+import { computeDictationInsertion } from "../../../utils/dictation";
 import { useComposerAutocompleteState } from "../hooks/useComposerAutocompleteState";
 import { ComposerInput } from "./ComposerInput";
 import { ComposerMetaBar } from "./ComposerMetaBar";
@@ -40,6 +46,17 @@ type ComposerProps = {
   insertText?: QueuedMessage | null;
   onInsertHandled?: (id: string) => void;
   textareaRef?: React.RefObject<HTMLTextAreaElement | null>;
+  dictationEnabled?: boolean;
+  dictationState?: "idle" | "listening" | "processing";
+  dictationLevel?: number;
+  onToggleDictation?: () => void;
+  onOpenDictationSettings?: () => void;
+  dictationTranscript?: DictationTranscript | null;
+  onDictationTranscriptHandled?: (id: string) => void;
+  dictationError?: string | null;
+  onDismissDictationError?: () => void;
+  dictationHint?: string | null;
+  onDismissDictationHint?: () => void;
 };
 
 export function Composer({
@@ -77,11 +94,23 @@ export function Composer({
   insertText = null,
   onInsertHandled,
   textareaRef: externalTextareaRef,
+  dictationEnabled = false,
+  dictationState = "idle",
+  dictationLevel = 0,
+  onToggleDictation,
+  onOpenDictationSettings,
+  dictationTranscript = null,
+  onDictationTranscriptHandled,
+  dictationError = null,
+  onDismissDictationError,
+  dictationHint = null,
+  onDismissDictationHint,
 }: ComposerProps) {
   const [text, setText] = useState(draftText);
   const [selectionStart, setSelectionStart] = useState<number | null>(null);
   const internalRef = useRef<HTMLTextAreaElement | null>(null);
   const textareaRef = externalTextareaRef ?? internalRef;
+  const isDictationBusy = dictationState !== "idle";
 
   useEffect(() => {
     setText((prev) => (prev === draftText ? prev : draftText));
@@ -156,6 +185,44 @@ export function Composer({
     onInsertHandled?.(insertText.id);
   }, [insertText, onInsertHandled, setComposerText]);
 
+  useEffect(() => {
+    if (!dictationTranscript) {
+      return;
+    }
+    const textToInsert = dictationTranscript.text.trim();
+    if (!textToInsert) {
+      onDictationTranscriptHandled?.(dictationTranscript.id);
+      return;
+    }
+    const textarea = textareaRef.current;
+    const start = textarea?.selectionStart ?? selectionStart ?? text.length;
+    const end = textarea?.selectionEnd ?? start;
+    const { nextText, nextCursor } = computeDictationInsertion(
+      text,
+      textToInsert,
+      start,
+      end,
+    );
+    setComposerText(nextText);
+    requestAnimationFrame(() => {
+      if (!textareaRef.current) {
+        return;
+      }
+      textareaRef.current.focus();
+      textareaRef.current.setSelectionRange(nextCursor, nextCursor);
+      handleSelectionChange(nextCursor);
+    });
+    onDictationTranscriptHandled?.(dictationTranscript.id);
+  }, [
+    dictationTranscript,
+    handleSelectionChange,
+    onDictationTranscriptHandled,
+    selectionStart,
+    setComposerText,
+    text,
+    textareaRef,
+  ]);
+
   return (
     <footer className={`composer${disabled ? " is-disabled" : ""}`}>
       <ComposerQueue
@@ -170,6 +237,15 @@ export function Composer({
         canStop={canStop}
         onStop={onStop}
         onSend={handleSend}
+        dictationEnabled={dictationEnabled}
+        dictationState={dictationState}
+        dictationLevel={dictationLevel}
+        onToggleDictation={onToggleDictation}
+        onOpenDictationSettings={onOpenDictationSettings}
+        dictationError={dictationError}
+        onDismissDictationError={onDismissDictationError}
+        dictationHint={dictationHint}
+        onDismissDictationHint={onDismissDictationHint}
         attachments={attachedImages}
         onAddAttachment={onPickImages}
         onAttachImages={onAttachImages}
@@ -211,6 +287,10 @@ export function Composer({
             return;
           }
           if (event.key === "Enter" && !event.shiftKey) {
+            if (isDictationBusy) {
+              event.preventDefault();
+              return;
+            }
             event.preventDefault();
             handleSend();
           }
diff --git a/src/features/composer/components/ComposerInput.tsx b/src/features/composer/components/ComposerInput.tsx
index 11fd1e5c7..db3514d27 100644
--- a/src/features/composer/components/ComposerInput.tsx
+++ b/src/features/composer/components/ComposerInput.tsx
@@ -1,9 +1,10 @@
 import { useEffect, useRef } from "react";
 import type { KeyboardEvent, RefObject } from "react";
 import type { AutocompleteItem } from "../hooks/useComposerAutocomplete";
-import { ImagePlus } from "lucide-react";
+import { ImagePlus, Mic, Square } from "lucide-react";
 import { useComposerImageDrop } from "../hooks/useComposerImageDrop";
 import { ComposerAttachments } from "./ComposerAttachments";
+import { DictationWaveform } from "../../dictation/components/DictationWaveform";
 
 type ComposerInputProps = {
   text: string;
@@ -12,6 +13,15 @@ type ComposerInputProps = {
   canStop: boolean;
   onStop: () => void;
   onSend: () => void;
+  dictationState?: "idle" | "listening" | "processing";
+  dictationLevel?: number;
+  dictationEnabled?: boolean;
+  onToggleDictation?: () => void;
+  onOpenDictationSettings?: () => void;
+  dictationError?: string | null;
+  onDismissDictationError?: () => void;
+  dictationHint?: string | null;
+  onDismissDictationHint?: () => void;
   attachments?: string[];
   onAddAttachment?: () => void;
   onAttachImages?: (paths: string[]) => void;
@@ -34,6 +44,15 @@ export function ComposerInput({
   canStop,
   onStop,
   onSend,
+  dictationState = "idle",
+  dictationLevel = 0,
+  dictationEnabled = false,
+  onToggleDictation,
+  onOpenDictationSettings,
+  dictationError = null,
+  onDismissDictationError,
+  dictationHint = null,
+  onDismissDictationHint,
   attachments = [],
   onAddAttachment,
   onAttachImages,
@@ -110,6 +129,37 @@ export function ComposerInput({
       onSend();
     }
   };
+  const isDictating = dictationState === "listening";
+  const isDictationBusy = dictationState !== "idle";
+  const allowOpenDictationSettings = Boolean(
+    onOpenDictationSettings && !dictationEnabled && !disabled,
+  );
+  const micDisabled =
+    disabled || dictationState === "processing" || !dictationEnabled || !onToggleDictation;
+  const micAriaLabel = allowOpenDictationSettings
+    ? "Open dictation settings"
+    : dictationState === "processing"
+      ? "Dictation processing"
+      : isDictating
+        ? "Stop dictation"
+        : "Start dictation";
+  const micTitle = allowOpenDictationSettings
+    ? "Dictation disabled. Open settings"
+    : dictationState === "processing"
+      ? "Processing dictation"
+      : isDictating
+        ? "Stop dictation"
+        : "Start dictation";
+  const handleMicClick = () => {
+    if (allowOpenDictationSettings) {
+      onOpenDictationSettings?.();
+      return;
+    }
+    if (!onToggleDictation || micDisabled) {
+      return;
+    }
+    onToggleDictation();
+  };
 
   return (
     <div className="composer-input">
@@ -162,6 +212,39 @@ export function ComposerInput({
             onPaste={handlePaste}
           />
         </div>
+        {isDictationBusy && (
+          <DictationWaveform
+            active={isDictating}
+            processing={dictationState === "processing"}
+            level={dictationLevel}
+          />
+        )}
+        {dictationError && (
+          <div className="composer-dictation-error" role="status">
+            <span>{dictationError}</span>
+            <button
+              type="button"
+              className="ghost composer-dictation-error-dismiss"
+              onClick={onDismissDictationError}
+            >
+              Dismiss
+            </button>
+          </div>
+        )}
+        {dictationHint && (
+          <div className="composer-dictation-hint" role="status">
+            <span>{dictationHint}</span>
+            {onDismissDictationHint && (
+              <button
+                type="button"
+                className="ghost composer-dictation-error-dismiss"
+                onClick={onDismissDictationHint}
+              >
+                Dismiss
+              </button>
+            )}
+          </div>
+        )}
         {suggestionsOpen && (
           <div
             className="composer-suggestions popover-surface"
@@ -213,10 +296,27 @@ export function ComposerInput({
           </div>
         )}
       </div>
+      <button
+        className={`composer-action composer-action--mic${
+          isDictationBusy ? " is-active" : ""
+        }${dictationState === "processing" ? " is-processing" : ""}${
+          micDisabled ? " is-disabled" : ""
+        }`}
+        onClick={handleMicClick}
+        disabled={
+          disabled ||
+          dictationState === "processing" ||
+          (!onToggleDictation && !allowOpenDictationSettings)
+        }
+        aria-label={micAriaLabel}
+        title={micTitle}
+      >
+        {isDictating ? <Square aria-hidden /> : <Mic aria-hidden />}
+      </button>
       <button
         className={`composer-action${canStop ? " is-stop" : " is-send"}`}
         onClick={handleActionClick}
-        disabled={disabled}
+        disabled={disabled || isDictationBusy}
         aria-label={canStop ? "Stop" : sendLabel}
       >
         {canStop ? (
diff --git a/src/features/dictation/components/DictationWaveform.tsx b/src/features/dictation/components/DictationWaveform.tsx
new file mode 100644
index 000000000..761d5e27e
--- /dev/null
+++ b/src/features/dictation/components/DictationWaveform.tsx
@@ -0,0 +1,63 @@
+import { useEffect, useMemo, useState } from "react";
+
+type DictationWaveformProps = {
+  active: boolean;
+  processing: boolean;
+  level: number;
+};
+
+const MAX_BARS = 36;
+const MIN_BAR = 0.08;
+
+function normalizeLevel(level: number) {
+  if (!Number.isFinite(level)) {
+    return 0;
+  }
+  return Math.min(1, Math.max(0, level));
+}
+
+export function DictationWaveform({
+  active,
+  processing,
+  level,
+}: DictationWaveformProps) {
+  const [bars, setBars] = useState<number[]>(
+    () => new Array(MAX_BARS).fill(0),
+  );
+  const normalized = normalizeLevel(level);
+
+  useEffect(() => {
+    if (!active) {
+      setBars(new Array(MAX_BARS).fill(0));
+      return;
+    }
+    setBars((prev) => {
+      const next = prev.slice(1);
+      const value = Math.max(MIN_BAR, normalized);
+      next.push(value);
+      return next;
+    });
+  }, [active, normalized]);
+
+  const barHeights = useMemo(
+    () =>
+      bars.map((value) => `${Math.round((MIN_BAR + value * 0.92) * 100)}%`),
+    [bars],
+  );
+
+  return (
+    <div
+      className={`composer-waveform${processing ? " is-processing" : ""}`}
+      aria-hidden
+    >
+      {processing && <span className="composer-waveform-label">Processing...</span>}
+      {barHeights.map((height, index) => (
+        <span
+          key={index}
+          className="composer-waveform-bar"
+          style={{ height }}
+        />
+      ))}
+    </div>
+  );
+}
diff --git a/src/features/dictation/hooks/useDictation.ts b/src/features/dictation/hooks/useDictation.ts
new file mode 100644
index 000000000..9e51f0c79
--- /dev/null
+++ b/src/features/dictation/hooks/useDictation.ts
@@ -0,0 +1,144 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import type {
+  DictationEvent,
+  DictationSessionState,
+  DictationTranscript,
+} from "../../../types";
+import { cancelDictation, startDictation, stopDictation } from "../../../services/tauri";
+import { subscribeDictationEvents } from "../../../services/events";
+
+type UseDictationResult = {
+  state: DictationSessionState;
+  level: number;
+  transcript: DictationTranscript | null;
+  error: string | null;
+  hint: string | null;
+  start: (preferredLanguage: string | null) => Promise<void>;
+  stop: () => Promise<void>;
+  cancel: () => Promise<void>;
+  clearTranscript: (id: string) => void;
+  clearError: () => void;
+  clearHint: () => void;
+};
+
+export function useDictation(): UseDictationResult {
+  const [state, setState] = useState<DictationSessionState>("idle");
+  const [level, setLevel] = useState(0);
+  const [transcript, setTranscript] = useState<DictationTranscript | null>(null);
+  const [error, setError] = useState<string | null>(null);
+  const [hint, setHint] = useState<string | null>(null);
+  const hintTimeoutRef = useRef<number | null>(null);
+
+  useEffect(() => {
+    let active = true;
+    let unlisten: (() => void) | null = null;
+
+    void (async () => {
+      try {
+        const handler = await subscribeDictationEvents((event: DictationEvent) => {
+          if (!active) {
+            return;
+          }
+          if (event.type === "state") {
+            setState(event.state);
+            if (event.state === "idle") {
+              setLevel(0);
+            }
+            return;
+          }
+          if (event.type === "level") {
+            setLevel(event.value);
+            return;
+          }
+          if (event.type === "transcript") {
+            setTranscript({
+              id: `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
+              text: event.text,
+            });
+            return;
+          }
+          if (event.type === "error") {
+            setError(event.message);
+            return;
+          }
+          if (event.type === "canceled") {
+            setHint(event.message);
+            if (hintTimeoutRef.current) {
+              window.clearTimeout(hintTimeoutRef.current);
+            }
+            hintTimeoutRef.current = window.setTimeout(() => {
+              setHint(null);
+              hintTimeoutRef.current = null;
+            }, 2000);
+            return;
+          }
+        });
+        if (active) {
+          unlisten = handler;
+        } else {
+          handler();
+        }
+      } catch {
+        // Ignore dictation event errors.
+      }
+    })();
+
+    return () => {
+      active = false;
+      if (unlisten) {
+        unlisten();
+      }
+      if (hintTimeoutRef.current) {
+        window.clearTimeout(hintTimeoutRef.current);
+        hintTimeoutRef.current = null;
+      }
+    };
+  }, []);
+
+  const start = useCallback(async (preferredLanguage: string | null) => {
+    setError(null);
+    setHint(null);
+    await startDictation(preferredLanguage);
+  }, []);
+
+  const stop = useCallback(async () => {
+    await stopDictation();
+  }, []);
+
+  const cancel = useCallback(async () => {
+    await cancelDictation();
+  }, []);
+
+  const clearTranscript = useCallback(
+    (id: string) => {
+      setTranscript((prev) => (prev?.id === id ? null : prev));
+    },
+    [],
+  );
+
+  const clearError = useCallback(() => {
+    setError(null);
+  }, []);
+
+  const clearHint = useCallback(() => {
+    setHint(null);
+    if (hintTimeoutRef.current) {
+      window.clearTimeout(hintTimeoutRef.current);
+      hintTimeoutRef.current = null;
+    }
+  }, []);
+
+  return {
+    state,
+    level,
+    transcript,
+    error,
+    hint,
+    start,
+    stop,
+    cancel,
+    clearTranscript,
+    clearError,
+    clearHint,
+  };
+}
diff --git a/src/features/dictation/hooks/useDictationModel.ts b/src/features/dictation/hooks/useDictationModel.ts
new file mode 100644
index 000000000..e5189aa1a
--- /dev/null
+++ b/src/features/dictation/hooks/useDictationModel.ts
@@ -0,0 +1,87 @@
+import { useCallback, useEffect, useState } from "react";
+import type { DictationModelStatus } from "../../../types";
+import {
+  cancelDictationDownload,
+  downloadDictationModel,
+  getDictationModelStatus,
+  removeDictationModel,
+} from "../../../services/tauri";
+import { subscribeDictationDownload } from "../../../services/events";
+
+type UseDictationModelResult = {
+  status: DictationModelStatus | null;
+  refresh: () => Promise<void>;
+  download: () => Promise<void>;
+  cancel: () => Promise<void>;
+  remove: () => Promise<void>;
+};
+
+export function useDictationModel(modelId: string | null): UseDictationModelResult {
+  const [status, setStatus] = useState<DictationModelStatus | null>(null);
+
+  const refresh = useCallback(async () => {
+    const next = await getDictationModelStatus(modelId);
+    setStatus(next);
+  }, [modelId]);
+
+  useEffect(() => {
+    let active = true;
+    let unlisten: (() => void) | null = null;
+
+    void (async () => {
+      try {
+        const next = await getDictationModelStatus(modelId);
+        if (active) {
+          setStatus(next);
+        }
+      } catch {
+        // Ignore dictation status errors during startup.
+      }
+
+      try {
+        const handler = await subscribeDictationDownload((event) => {
+          if (!modelId || event.modelId === modelId) {
+            setStatus(event);
+          }
+        });
+        if (active) {
+          unlisten = handler;
+        } else {
+          handler();
+        }
+      } catch {
+        // Ignore dictation event errors.
+      }
+    })();
+
+    return () => {
+      active = false;
+      if (unlisten) {
+        unlisten();
+      }
+    };
+  }, [modelId]);
+
+  const download = useCallback(async () => {
+    const next = await downloadDictationModel(modelId);
+    setStatus(next);
+  }, [modelId]);
+
+  const cancel = useCallback(async () => {
+    const next = await cancelDictationDownload(modelId);
+    setStatus(next);
+  }, [modelId]);
+
+  const remove = useCallback(async () => {
+    const next = await removeDictationModel(modelId);
+    setStatus(next);
+  }, [modelId]);
+
+  return {
+    status,
+    refresh,
+    download,
+    cancel,
+    remove,
+  };
+}
diff --git a/src/features/dictation/hooks/useHoldToDictate.ts b/src/features/dictation/hooks/useHoldToDictate.ts
new file mode 100644
index 000000000..a8f272466
--- /dev/null
+++ b/src/features/dictation/hooks/useHoldToDictate.ts
@@ -0,0 +1,129 @@
+import { useEffect, useRef } from "react";
+import { matchesHoldKey } from "../../../utils/keys";
+import type { DictationSessionState } from "../../../types";
+
+type UseHoldToDictateArgs = {
+  enabled: boolean;
+  ready: boolean;
+  state: DictationSessionState;
+  preferredLanguage: string | null;
+  holdKey: string;
+  startDictation: (preferredLanguage: string | null) => void | Promise<void>;
+  stopDictation: () => void;
+  cancelDictation: () => void;
+};
+
+const HOLD_STOP_GRACE_MS = 1500;
+
+export function useHoldToDictate({
+  enabled,
+  ready,
+  state,
+  preferredLanguage,
+  holdKey,
+  startDictation,
+  stopDictation,
+  cancelDictation,
+}: UseHoldToDictateArgs) {
+  const holdDictationActive = useRef(false);
+  const holdDictationStopPending = useRef(false);
+  const holdDictationStopTimeout = useRef<number | null>(null);
+
+  useEffect(() => {
+    const normalizedHoldKey = holdKey.toLowerCase();
+    if (!normalizedHoldKey) {
+      return;
+    }
+
+    if (holdDictationStopPending.current && state === "listening") {
+      holdDictationStopPending.current = false;
+      if (holdDictationStopTimeout.current !== null) {
+        window.clearTimeout(holdDictationStopTimeout.current);
+        holdDictationStopTimeout.current = null;
+      }
+      stopDictation();
+    }
+
+    const handleKeyDown = (event: KeyboardEvent) => {
+      if (!matchesHoldKey(event, normalizedHoldKey) || event.repeat) {
+        return;
+      }
+      if (!enabled || !ready) {
+        return;
+      }
+      if (state !== "idle") {
+        return;
+      }
+      holdDictationActive.current = true;
+      holdDictationStopPending.current = false;
+      if (holdDictationStopTimeout.current !== null) {
+        window.clearTimeout(holdDictationStopTimeout.current);
+        holdDictationStopTimeout.current = null;
+      }
+      startDictation(preferredLanguage);
+    };
+
+    const handleKeyUp = (event: KeyboardEvent) => {
+      if (!matchesHoldKey(event, normalizedHoldKey)) {
+        return;
+      }
+      if (!holdDictationActive.current) {
+        return;
+      }
+      holdDictationActive.current = false;
+      holdDictationStopPending.current = true;
+      if (holdDictationStopTimeout.current !== null) {
+        window.clearTimeout(holdDictationStopTimeout.current);
+      }
+      holdDictationStopTimeout.current = window.setTimeout(() => {
+        holdDictationStopPending.current = false;
+        holdDictationStopTimeout.current = null;
+      }, HOLD_STOP_GRACE_MS);
+      if (state === "listening") {
+        holdDictationStopPending.current = false;
+        if (holdDictationStopTimeout.current !== null) {
+          window.clearTimeout(holdDictationStopTimeout.current);
+          holdDictationStopTimeout.current = null;
+        }
+        stopDictation();
+      }
+    };
+
+    const handleBlur = () => {
+      if (!holdDictationActive.current) {
+        return;
+      }
+      holdDictationActive.current = false;
+      holdDictationStopPending.current = false;
+      if (holdDictationStopTimeout.current !== null) {
+        window.clearTimeout(holdDictationStopTimeout.current);
+        holdDictationStopTimeout.current = null;
+      }
+      if (state === "listening") {
+        cancelDictation();
+      }
+    };
+
+    window.addEventListener("keydown", handleKeyDown);
+    window.addEventListener("keyup", handleKeyUp);
+    window.addEventListener("blur", handleBlur);
+    return () => {
+      window.removeEventListener("keydown", handleKeyDown);
+      window.removeEventListener("keyup", handleKeyUp);
+      window.removeEventListener("blur", handleBlur);
+      if (holdDictationStopTimeout.current !== null) {
+        window.clearTimeout(holdDictationStopTimeout.current);
+        holdDictationStopTimeout.current = null;
+      }
+    };
+  }, [
+    cancelDictation,
+    enabled,
+    holdKey,
+    preferredLanguage,
+    ready,
+    startDictation,
+    state,
+    stopDictation,
+  ]);
+}
diff --git a/src/features/layout/hooks/useLayoutNodes.tsx b/src/features/layout/hooks/useLayoutNodes.tsx
index b0839647f..18760f09e 100644
--- a/src/features/layout/hooks/useLayoutNodes.tsx
+++ b/src/features/layout/hooks/useLayoutNodes.tsx
@@ -24,6 +24,8 @@ import type {
   CustomPromptOption,
   DebugEntry,
   DiffLineReference,
+  DictationSessionState,
+  DictationTranscript,
   GitFileStatus,
   GitHubIssue,
   GitLogEntry,
@@ -72,6 +74,7 @@ type LayoutNodesOptions = {
     decision: "accept" | "decline",
   ) => void;
   onOpenSettings: () => void;
+  onOpenDictationSettings?: () => void;
   onOpenDebug: () => void;
   showDebugButton: boolean;
   onAddWorkspace: () => void;
@@ -184,6 +187,16 @@ type LayoutNodesOptions = {
   prompts: CustomPromptOption[];
   files: string[];
   textareaRef: RefObject<HTMLTextAreaElement | null>;
+  dictationEnabled: boolean;
+  dictationState: DictationSessionState;
+  dictationLevel: number;
+  onToggleDictation: () => void;
+  dictationTranscript: DictationTranscript | null;
+  onDictationTranscriptHandled: (id: string) => void;
+  dictationError: string | null;
+  onDismissDictationError: () => void;
+  dictationHint: string | null;
+  onDismissDictationHint: () => void;
   showComposer: boolean;
   plan: TurnPlan | null;
   debugEntries: DebugEntry[];
@@ -312,6 +325,17 @@ export function useLayoutNodes(options: LayoutNodesOptions): LayoutNodesResult {
       prompts={options.prompts}
       files={options.files}
       textareaRef={options.textareaRef}
+      dictationEnabled={options.dictationEnabled}
+      dictationState={options.dictationState}
+      dictationLevel={options.dictationLevel}
+      onToggleDictation={options.onToggleDictation}
+      onOpenDictationSettings={options.onOpenDictationSettings}
+      dictationTranscript={options.dictationTranscript}
+      onDictationTranscriptHandled={options.onDictationTranscriptHandled}
+      dictationError={options.dictationError}
+      onDismissDictationError={options.onDismissDictationError}
+      dictationHint={options.dictationHint}
+      onDismissDictationHint={options.onDismissDictationHint}
     />
   ) : null;
 
diff --git a/src/features/settings/components/SettingsView.tsx b/src/features/settings/components/SettingsView.tsx
index d810efc89..5bbf347c7 100644
--- a/src/features/settings/components/SettingsView.tsx
+++ b/src/features/settings/components/SettingsView.tsx
@@ -5,16 +5,29 @@ import {
   ChevronUp,
   LayoutGrid,
   SlidersHorizontal,
+  Mic,
   Stethoscope,
   TerminalSquare,
   Trash2,
   X,
   FlaskConical,
 } from "lucide-react";
-import type { AppSettings, CodexDoctorResult, WorkspaceInfo } from "../../../types";
-import {
-  clampUiScale,
-} from "../../../utils/uiScale";
+import type {
+  AppSettings,
+  CodexDoctorResult,
+  DictationModelStatus,
+  WorkspaceInfo,
+} from "../../../types";
+import { formatDownloadSize } from "../../../utils/formatting";
+import { clampUiScale } from "../../../utils/uiScale";
+
+const DICTATION_MODELS = [
+  { id: "tiny", label: "Tiny", size: "75 MB", note: "Fastest, least accurate." },
+  { id: "base", label: "Base", size: "142 MB", note: "Balanced default." },
+  { id: "small", label: "Small", size: "466 MB", note: "Better accuracy." },
+  { id: "medium", label: "Medium", size: "1.5 GB", note: "High accuracy." },
+  { id: "large-v3", label: "Large V3", size: "3.0 GB", note: "Best accuracy, heavy download." },
+];
 
 type SettingsViewProps = {
   workspaces: WorkspaceInfo[];
@@ -30,9 +43,14 @@ type SettingsViewProps = {
   scaleShortcutTitle: string;
   scaleShortcutText: string;
   onTestNotificationSound: () => void;
+  dictationModelStatus?: DictationModelStatus | null;
+  onDownloadDictationModel?: () => void;
+  onCancelDictationDownload?: () => void;
+  onRemoveDictationModel?: () => void;
+  initialSection?: CodexSection;
 };
 
-type SettingsSection = "projects" | "display";
+type SettingsSection = "projects" | "display" | "dictation";
 type CodexSection = SettingsSection | "codex" | "experimental";
 
 function orderValue(workspace: WorkspaceInfo) {
@@ -54,6 +72,11 @@ export function SettingsView({
   scaleShortcutTitle,
   scaleShortcutText,
   onTestNotificationSound,
+  dictationModelStatus,
+  onDownloadDictationModel,
+  onCancelDictationDownload,
+  onRemoveDictationModel,
+  initialSection,
 }: SettingsViewProps) {
   const [activeSection, setActiveSection] = useState<CodexSection>("projects");
   const [codexPathDraft, setCodexPathDraft] = useState(appSettings.codexBin ?? "");
@@ -66,6 +89,15 @@ export function SettingsView({
     result: CodexDoctorResult | null;
   }>({ status: "idle", result: null });
   const [isSavingSettings, setIsSavingSettings] = useState(false);
+  const dictationReady = dictationModelStatus?.state === "ready";
+  const dictationProgress = dictationModelStatus?.progress ?? null;
+  const selectedDictationModel = useMemo(() => {
+    return (
+      DICTATION_MODELS.find(
+        (model) => model.id === appSettings.dictationModelId,
+      ) ?? DICTATION_MODELS[1]
+    );
+  }, [appSettings.dictationModelId]);
 
   const projects = useMemo(() => {
     return workspaces
@@ -99,6 +131,12 @@ export function SettingsView({
     });
   }, [projects]);
 
+  useEffect(() => {
+    if (initialSection) {
+      setActiveSection(initialSection);
+    }
+  }, [initialSection]);
+
   const codexDirty =
     (codexPathDraft.trim() || null) !== (appSettings.codexBin ?? null);
 
@@ -214,6 +252,14 @@ export function SettingsView({
               <SlidersHorizontal aria-hidden />
               Display &amp; Sound
             </button>
+            <button
+              type="button"
+              className={`settings-nav ${activeSection === "dictation" ? "active" : ""}`}
+              onClick={() => setActiveSection("dictation")}
+            >
+              <Mic aria-hidden />
+              Dictation
+            </button>
             <button
               type="button"
               className={`settings-nav ${activeSection === "codex" ? "active" : ""}`}
@@ -383,6 +429,208 @@ export function SettingsView({
                 </div>
               </section>
             )}
+            {activeSection === "dictation" && (
+              <section className="settings-section">
+                <div className="settings-section-title">Dictation</div>
+                <div className="settings-section-subtitle">
+                  Enable microphone dictation with on-device transcription.
+                </div>
+                <div className="settings-toggle-row">
+                  <div>
+                    <div className="settings-toggle-title">Enable dictation</div>
+                    <div className="settings-toggle-subtitle">
+                      Downloads the selected Whisper model on first use.
+                    </div>
+                  </div>
+                  <button
+                    type="button"
+                    className={`settings-toggle ${appSettings.dictationEnabled ? "on" : ""}`}
+                    onClick={() => {
+                      const nextEnabled = !appSettings.dictationEnabled;
+                      void onUpdateAppSettings({
+                        ...appSettings,
+                        dictationEnabled: nextEnabled,
+                      });
+                      if (
+                        !nextEnabled &&
+                        dictationModelStatus?.state === "downloading" &&
+                        onCancelDictationDownload
+                      ) {
+                        onCancelDictationDownload();
+                      }
+                      if (
+                        nextEnabled &&
+                        dictationModelStatus?.state === "missing" &&
+                        onDownloadDictationModel
+                      ) {
+                        onDownloadDictationModel();
+                      }
+                    }}
+                    aria-pressed={appSettings.dictationEnabled}
+                  >
+                    <span className="settings-toggle-knob" />
+                  </button>
+                </div>
+                <div className="settings-field">
+                  <label className="settings-field-label" htmlFor="dictation-model">
+                    Dictation model
+                  </label>
+                  <select
+                    id="dictation-model"
+                    className="settings-select"
+                    value={appSettings.dictationModelId}
+                    onChange={(event) =>
+                      void onUpdateAppSettings({
+                        ...appSettings,
+                        dictationModelId: event.target.value,
+                      })
+                    }
+                  >
+                    {DICTATION_MODELS.map((model) => (
+                      <option key={model.id} value={model.id}>
+                        {model.label} ({model.size})
+                      </option>
+                    ))}
+                  </select>
+                  <div className="settings-help">
+                    {selectedDictationModel.note} Download size: {selectedDictationModel.size}.
+                  </div>
+                </div>
+                <div className="settings-field">
+                  <label className="settings-field-label" htmlFor="dictation-language">
+                    Preferred dictation language
+                  </label>
+                  <select
+                    id="dictation-language"
+                    className="settings-select"
+                    value={appSettings.dictationPreferredLanguage ?? ""}
+                    onChange={(event) =>
+                      void onUpdateAppSettings({
+                        ...appSettings,
+                        dictationPreferredLanguage: event.target.value || null,
+                      })
+                    }
+                  >
+                    <option value="">Auto-detect only</option>
+                    <option value="en">English</option>
+                    <option value="es">Spanish</option>
+                    <option value="fr">French</option>
+                    <option value="de">German</option>
+                    <option value="it">Italian</option>
+                    <option value="pt">Portuguese</option>
+                    <option value="nl">Dutch</option>
+                    <option value="sv">Swedish</option>
+                    <option value="no">Norwegian</option>
+                    <option value="da">Danish</option>
+                    <option value="fi">Finnish</option>
+                    <option value="pl">Polish</option>
+                    <option value="tr">Turkish</option>
+                    <option value="ru">Russian</option>
+                    <option value="uk">Ukrainian</option>
+                    <option value="ja">Japanese</option>
+                    <option value="ko">Korean</option>
+                    <option value="zh">Chinese</option>
+                  </select>
+                  <div className="settings-help">
+                    Auto-detect stays on; this nudges the decoder toward your preference.
+                  </div>
+                </div>
+                <div className="settings-field">
+                  <label className="settings-field-label" htmlFor="dictation-hold-key">
+                    Hold-to-dictate key
+                  </label>
+                  <select
+                    id="dictation-hold-key"
+                    className="settings-select"
+                    value={appSettings.dictationHoldKey ?? ""}
+                    onChange={(event) =>
+                      void onUpdateAppSettings({
+                        ...appSettings,
+                        dictationHoldKey: event.target.value,
+                      })
+                    }
+                  >
+                    <option value="">Off</option>
+                    <option value="alt">Option / Alt</option>
+                    <option value="shift">Shift</option>
+                    <option value="control">Control</option>
+                    <option value="meta">Command / Meta</option>
+                  </select>
+                  <div className="settings-help">
+                    Hold the key to start dictation, release to stop and process.
+                  </div>
+                </div>
+                {dictationModelStatus && (
+                  <div className="settings-field">
+                    <div className="settings-field-label">
+                      Model status ({selectedDictationModel.label})
+                    </div>
+                    <div className="settings-help">
+                      {dictationModelStatus.state === "ready" && "Ready for dictation."}
+                      {dictationModelStatus.state === "missing" && "Model not downloaded yet."}
+                      {dictationModelStatus.state === "downloading" &&
+                        "Downloading model..."}
+                      {dictationModelStatus.state === "error" &&
+                        (dictationModelStatus.error ?? "Download error.")}
+                    </div>
+                    {dictationProgress && (
+                      <div className="settings-download-progress">
+                        <div className="settings-download-bar">
+                          <div
+                            className="settings-download-fill"
+                            style={{
+                              width: dictationProgress.totalBytes
+                                ? `${Math.min(
+                                    100,
+                                    (dictationProgress.downloadedBytes /
+                                      dictationProgress.totalBytes) *
+                                      100,
+                                  )}%`
+                                : "0%",
+                            }}
+                          />
+                        </div>
+                        <div className="settings-download-meta">
+                          {formatDownloadSize(dictationProgress.downloadedBytes)}
+                        </div>
+                      </div>
+                    )}
+                    <div className="settings-field-actions">
+                      {dictationModelStatus.state === "missing" && (
+                        <button
+                          type="button"
+                          className="primary"
+                          onClick={onDownloadDictationModel}
+                          disabled={!onDownloadDictationModel}
+                        >
+                          Download model
+                        </button>
+                      )}
+                      {dictationModelStatus.state === "downloading" && (
+                        <button
+                          type="button"
+                          className="ghost settings-button-compact"
+                          onClick={onCancelDictationDownload}
+                          disabled={!onCancelDictationDownload}
+                        >
+                          Cancel download
+                        </button>
+                      )}
+                      {dictationReady && (
+                        <button
+                          type="button"
+                          className="ghost settings-button-compact"
+                          onClick={onRemoveDictationModel}
+                          disabled={!onRemoveDictationModel}
+                        >
+                          Remove model
+                        </button>
+                      )}
+                    </div>
+                  </div>
+                )}
+              </section>
+            )}
             {activeSection === "codex" && (
               <section className="settings-section">
                 <div className="settings-section-title">Codex</div>
diff --git a/src/features/settings/hooks/useAppSettings.ts b/src/features/settings/hooks/useAppSettings.ts
index e431ea9a5..5dbb6b355 100644
--- a/src/features/settings/hooks/useAppSettings.ts
+++ b/src/features/settings/hooks/useAppSettings.ts
@@ -9,6 +9,10 @@ const defaultSettings: AppSettings = {
   uiScale: UI_SCALE_DEFAULT,
   notificationSoundsEnabled: true,
   experimentalSteerEnabled: false,
+  dictationEnabled: false,
+  dictationModelId: "base",
+  dictationPreferredLanguage: null,
+  dictationHoldKey: "alt",
 };
 
 function normalizeAppSettings(settings: AppSettings): AppSettings {
diff --git a/src/services/events.ts b/src/services/events.ts
index 9ebfafb7d..d00a2b839 100644
--- a/src/services/events.ts
+++ b/src/services/events.ts
@@ -1,5 +1,5 @@
 import { listen } from "@tauri-apps/api/event";
-import type { AppServerEvent } from "../types";
+import type { AppServerEvent, DictationEvent, DictationModelStatus } from "../types";
 
 export type Unsubscribe = () => void;
 
@@ -17,6 +17,22 @@ export async function subscribeAppServerEvents(
   });
 }
 
+export async function subscribeDictationDownload(
+  onEvent: (event: DictationModelStatus) => void,
+): Promise<Unsubscribe> {
+  return listen<DictationModelStatus>("dictation-download", (event) => {
+    onEvent(event.payload);
+  });
+}
+
+export async function subscribeDictationEvents(
+  onEvent: (event: DictationEvent) => void,
+): Promise<Unsubscribe> {
+  return listen<DictationEvent>("dictation-event", (event) => {
+    onEvent(event.payload);
+  });
+}
+
 export async function subscribeTerminalOutput(
   onEvent: (event: TerminalOutputEvent) => void,
 ): Promise<Unsubscribe> {
diff --git a/src/services/tauri.ts b/src/services/tauri.ts
index e587ee6bd..ef661525b 100644
--- a/src/services/tauri.ts
+++ b/src/services/tauri.ts
@@ -3,6 +3,8 @@ import { open } from "@tauri-apps/plugin-dialog";
 import type {
   AppSettings,
   CodexDoctorResult,
+  DictationModelStatus,
+  DictationSessionState,
   WorkspaceInfo,
   WorkspaceSettings,
 } from "../types";
@@ -223,6 +225,60 @@ export async function createGitBranch(workspaceId: string, name: string) {
   return invoke("create_git_branch", { workspaceId, name });
 }
 
+function withModelId(modelId?: string | null) {
+  return modelId ? { modelId } : {};
+}
+
+export async function getDictationModelStatus(
+  modelId?: string | null,
+): Promise<DictationModelStatus> {
+  return invoke<DictationModelStatus>(
+    "dictation_model_status",
+    withModelId(modelId),
+  );
+}
+
+export async function downloadDictationModel(
+  modelId?: string | null,
+): Promise<DictationModelStatus> {
+  return invoke<DictationModelStatus>(
+    "dictation_download_model",
+    withModelId(modelId),
+  );
+}
+
+export async function cancelDictationDownload(
+  modelId?: string | null,
+): Promise<DictationModelStatus> {
+  return invoke<DictationModelStatus>(
+    "dictation_cancel_download",
+    withModelId(modelId),
+  );
+}
+
+export async function removeDictationModel(
+  modelId?: string | null,
+): Promise<DictationModelStatus> {
+  return invoke<DictationModelStatus>(
+    "dictation_remove_model",
+    withModelId(modelId),
+  );
+}
+
+export async function startDictation(
+  preferredLanguage: string | null,
+): Promise<DictationSessionState> {
+  return invoke("dictation_start", { preferredLanguage });
+}
+
+export async function stopDictation(): Promise<DictationSessionState> {
+  return invoke("dictation_stop");
+}
+
+export async function cancelDictation(): Promise<DictationSessionState> {
+  return invoke("dictation_cancel");
+}
+
 export async function openTerminalSession(
   workspaceId: string,
   terminalId: string,
diff --git a/src/styles/composer.css b/src/styles/composer.css
index bc58f211a..987e50acd 100644
--- a/src/styles/composer.css
+++ b/src/styles/composer.css
@@ -66,7 +66,7 @@
 
 .composer-input {
   display: grid;
-  grid-template-columns: 1fr auto;
+  grid-template-columns: 1fr auto auto;
   gap: 12px;
   align-items: center;
 }
@@ -197,6 +197,95 @@
   justify-content: center;
 }
 
+.composer-action--mic.is-active {
+  border-color: rgba(120, 235, 190, 0.6);
+  background: rgba(120, 235, 190, 0.12);
+}
+
+.composer-action--mic.is-active:hover {
+  background: rgba(120, 235, 190, 0.18);
+}
+
+.composer-action--mic.is-processing {
+  border-color: rgba(160, 200, 255, 0.6);
+  background: rgba(160, 200, 255, 0.12);
+}
+
+.composer-waveform {
+  margin-top: 10px;
+  padding: 6px 8px;
+  border-radius: 10px;
+  border: 1px solid var(--border-muted);
+  background: var(--surface-card);
+  display: flex;
+  align-items: flex-end;
+  gap: 3px;
+  height: 40px;
+  position: relative;
+}
+
+.composer-waveform.is-processing {
+  background: linear-gradient(
+    90deg,
+    rgba(100, 200, 255, 0.15),
+    rgba(120, 235, 190, 0.15)
+  );
+}
+
+.composer-waveform-bar {
+  flex: 1;
+  min-width: 2px;
+  border-radius: 999px;
+  background: rgba(180, 220, 255, 0.7);
+  transition: height 0.12s ease;
+}
+
+.composer-waveform-label {
+  position: absolute;
+  inset: 0;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  font-size: 11px;
+  letter-spacing: 0.02em;
+  text-transform: uppercase;
+  color: var(--text-subtle);
+  pointer-events: none;
+}
+
+.composer-dictation-error {
+  margin-top: 8px;
+  padding: 8px 10px;
+  border-radius: 10px;
+  border: 1px solid rgba(255, 120, 120, 0.4);
+  background: rgba(255, 120, 120, 0.08);
+  color: var(--text-strong);
+  font-size: 12px;
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 10px;
+}
+
+.composer-dictation-error-dismiss {
+  font-size: 11px;
+  padding: 4px 8px;
+}
+
+.composer-dictation-hint {
+  margin-top: 8px;
+  padding: 8px 10px;
+  border-radius: 10px;
+  border: 1px solid rgba(120, 190, 255, 0.4);
+  background: rgba(120, 190, 255, 0.12);
+  color: var(--text-strong);
+  font-size: 12px;
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 10px;
+}
+
 .composer-action.is-stop {
   border-color: rgba(255, 107, 107, 0.6);
   background: rgba(255, 107, 107, 0.12);
@@ -242,6 +331,14 @@
   border-color: var(--border-subtle);
 }
 
+.composer-action.is-disabled {
+  opacity: 0.5;
+  cursor: not-allowed;
+  background: var(--surface-control-disabled);
+  color: var(--text-fainter);
+  border-color: var(--border-subtle);
+}
+
 .composer-action.is-stop:disabled {
   opacity: 0.4;
   cursor: not-allowed;
diff --git a/src/styles/settings.css b/src/styles/settings.css
index c36bb945c..ae662c9d2 100644
--- a/src/styles/settings.css
+++ b/src/styles/settings.css
@@ -144,6 +144,30 @@
   color: var(--text-subtle);
 }
 
+.settings-download-progress {
+  display: flex;
+  flex-direction: column;
+  gap: 6px;
+}
+
+.settings-download-bar {
+  height: 6px;
+  border-radius: 999px;
+  background: var(--surface-control);
+  border: 1px solid var(--border-muted);
+  overflow: hidden;
+}
+
+.settings-download-fill {
+  height: 100%;
+  background: linear-gradient(90deg, rgba(100, 200, 255, 0.7), rgba(120, 235, 190, 0.8));
+}
+
+.settings-download-meta {
+  font-size: 11px;
+  color: var(--text-subtle);
+}
+
 .settings-input {
   flex: 1;
   min-width: 0;
@@ -346,6 +370,10 @@
   margin-top: 12px;
 }
 
+.settings-toggle-row + .settings-field {
+  margin-top: 12px;
+}
+
 .settings-sound-actions {
   display: flex;
   gap: 10px;
diff --git a/src/types.ts b/src/types.ts
index f725411d4..51807c7da 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -68,6 +68,10 @@ export type AppSettings = {
   uiScale: number;
   notificationSoundsEnabled: boolean;
   experimentalSteerEnabled: boolean;
+  dictationEnabled: boolean;
+  dictationModelId: string;
+  dictationPreferredLanguage: string | null;
+  dictationHoldKey: string | null;
 };
 
 export type CodexDoctorResult = {
@@ -231,3 +235,32 @@ export type DebugEntry = {
 };
 
 export type TerminalStatus = "idle" | "connecting" | "ready" | "error";
+
+export type DictationModelState = "missing" | "downloading" | "ready" | "error";
+
+export type DictationDownloadProgress = {
+  totalBytes?: number | null;
+  downloadedBytes: number;
+};
+
+export type DictationModelStatus = {
+  state: DictationModelState;
+  modelId: string;
+  progress?: DictationDownloadProgress | null;
+  error?: string | null;
+  path?: string | null;
+};
+
+export type DictationSessionState = "idle" | "listening" | "processing";
+
+export type DictationEvent =
+  | { type: "state"; state: DictationSessionState }
+  | { type: "level"; value: number }
+  | { type: "transcript"; text: string }
+  | { type: "error"; message: string }
+  | { type: "canceled"; message: string };
+
+export type DictationTranscript = {
+  id: string;
+  text: string;
+};
diff --git a/src/utils/dictation.ts b/src/utils/dictation.ts
new file mode 100644
index 000000000..a544743fb
--- /dev/null
+++ b/src/utils/dictation.ts
@@ -0,0 +1,35 @@
+export type DictationInsertionResult = {
+  nextText: string;
+  nextCursor: number;
+};
+
+export function computeDictationInsertion(
+  currentText: string,
+  transcriptText: string,
+  start: number,
+  end: number,
+): DictationInsertionResult {
+  const beforeChar = start > 0 ? currentText[start - 1] : "";
+  const afterChar = end < currentText.length ? currentText[end] : "";
+  const firstChar = transcriptText[0] ?? "";
+  const lastChar = transcriptText[transcriptText.length - 1] ?? "";
+  const isWordChar = (value: string) => /[A-Za-z0-9]/.test(value);
+  const needsPrefixSpace =
+    beforeChar &&
+    !/\s/.test(beforeChar) &&
+    firstChar &&
+    isWordChar(beforeChar) &&
+    isWordChar(firstChar);
+  const needsSuffixSpace =
+    afterChar &&
+    !/\s/.test(afterChar) &&
+    lastChar &&
+    isWordChar(lastChar) &&
+    isWordChar(afterChar);
+  const insertText = `${needsPrefixSpace ? " " : ""}${transcriptText}${
+    needsSuffixSpace ? " " : ""
+  }`;
+  const nextText = `${currentText.slice(0, start)}${insertText}${currentText.slice(end)}`;
+  const nextCursor = start + insertText.length;
+  return { nextText, nextCursor };
+}
diff --git a/src/utils/formatting.ts b/src/utils/formatting.ts
new file mode 100644
index 000000000..feef58f40
--- /dev/null
+++ b/src/utils/formatting.ts
@@ -0,0 +1,13 @@
+export function formatDownloadSize(bytes: number | null | undefined) {
+  if (!bytes || bytes <= 0) {
+    return "0 MB";
+  }
+  const gb = bytes / (1024 ** 3);
+  if (gb >= 1) {
+    const digits = gb >= 10 ? 0 : 1;
+    return `${gb.toFixed(digits)} GB`;
+  }
+  const mb = bytes / (1024 ** 2);
+  const digits = mb >= 10 ? 0 : 1;
+  return `${mb.toFixed(digits)} MB`;
+}
diff --git a/src/utils/keys.ts b/src/utils/keys.ts
new file mode 100644
index 000000000..3aa7657f9
--- /dev/null
+++ b/src/utils/keys.ts
@@ -0,0 +1,14 @@
+export function matchesHoldKey(event: KeyboardEvent, holdKey: string) {
+  switch (holdKey) {
+    case "alt":
+      return event.key === "Alt";
+    case "shift":
+      return event.key === "Shift";
+    case "control":
+      return event.key === "Control";
+    case "meta":
+      return event.key === "Meta";
+    default:
+      return false;
+  }
+}