diff --git a/DEV-LOG.md b/DEV-LOG.md index 18700fca6..5edde06b3 100644 --- a/DEV-LOG.md +++ b/DEV-LOG.md @@ -1,5 +1,35 @@ # DEV-LOG +## Enable Voice Mode / VOICE_MODE (2026-04-03) + +恢复 `/voice` 语音输入功能。`src/` 下所有 voice 相关源码已与官方一致(0 行差异),问题出在:① `VOICE_MODE` 编译开关未开,命令不显示;② `audio-capture-napi` 是 SoX 子进程 stub(Windows 不支持),缺少官方原生 `.node` 二进制。 + +**新增文件:** + +| 文件 | 说明 | +|------|------| +| `vendor/audio-capture/{platform}/audio-capture.node` | 6 个平台的原生音频二进制(cpal,来自参考项目) | +| `vendor/audio-capture-src/index.ts` | 原生模块加载器(按 `${arch}-${platform}` 动态 require `.node`) | + +**修改文件:** + +| 文件 | 变更 | +|------|------| +| `packages/audio-capture-napi/src/index.ts` | SoX 子进程 stub → 原生 `.node` 加载器(含 `process.cwd()` workspace 路径 fallback) | +| `scripts/dev.ts` | `DEFAULT_FEATURES` 加 `"VOICE_MODE"` | +| `build.ts` | `DEFAULT_BUILD_FEATURES` 加 `"VOICE_MODE"` | +| `docs/features/voice-mode.md` | 追加恢复计划章节(第八节) | + +**验证结果:** + +- `isNativeAudioAvailable()` → `true`(Windows x64 原生 `.node` 加载成功) +- `feature('VOICE_MODE')` → `ENABLED` +- `bun run build` → voice 代码编入产物 + +**运行时前置条件:** claude.ai OAuth 登录 + 麦克风权限 + +--- + ## Enable Remote Control / BRIDGE_MODE (2026-04-03) **PR**: [claude-code-best/claude-code#60](https://github.com/claude-code-best/claude-code/pull/60) diff --git a/build.ts b/build.ts index 11c4a2481..f871b1329 100644 --- a/build.ts +++ b/build.ts @@ -10,7 +10,7 @@ rmSync(outdir, { recursive: true, force: true }); // Default features that match the official CLI build. // Additional features can be enabled via FEATURE_=1 env vars. -const DEFAULT_BUILD_FEATURES = ["AGENT_TRIGGERS_REMOTE"]; +const DEFAULT_BUILD_FEATURES = ["AGENT_TRIGGERS_REMOTE", "VOICE_MODE"]; // Collect FEATURE_* env vars → Bun.build features const envFeatures = Object.keys(process.env) diff --git a/packages/audio-capture-napi/src/index.ts b/packages/audio-capture-napi/src/index.ts index a06c36b79..d511f924d 100644 --- a/packages/audio-capture-napi/src/index.ts +++ b/packages/audio-capture-napi/src/index.ts @@ -1,151 +1,152 @@ -// audio-capture-napi: cross-platform audio capture using SoX (rec) on macOS -// and arecord (ALSA) on Linux. Replaces the original cpal-based native module. -import { type ChildProcess, spawn, spawnSync } from 'child_process' - -// ─── State ─────────────────────────────────────────────────────────── - -let recordingProcess: ChildProcess | null = null -let availabilityCache: boolean | null = null - -// ─── Helpers ───────────────────────────────────────────────────────── - -function commandExists(cmd: string): boolean { - const result = spawnSync(cmd, ['--version'], { - stdio: 'ignore', - timeout: 3000, - }) - return result.error === undefined +type AudioCaptureNapi = { + startRecording( + onData: (data: Buffer) => void, + onEnd: () => void, + ): boolean + stopRecording(): void + isRecording(): boolean + startPlayback(sampleRate: number, channels: number): boolean + writePlaybackData(data: Buffer): void + stopPlayback(): void + isPlaying(): boolean + // TCC microphone authorization status (macOS only): + // 0 = notDetermined, 1 = restricted, 2 = denied, 3 = authorized. + // Linux: always returns 3 (authorized) — no system-level microphone permission API. + // Windows: returns 3 (authorized) if registry key absent or allowed, + // 2 (denied) if microphone access is explicitly denied. + microphoneAuthorizationStatus?(): number } -// ─── Public API ────────────────────────────────────────────────────── +let cachedModule: AudioCaptureNapi | null = null +let loadAttempted = false -/** - * Check whether a supported audio recording command is available. - * Returns true if `rec` (SoX) is found on macOS, or `arecord` (ALSA) on Linux. - * Windows is not supported and always returns false. - */ -export function isNativeAudioAvailable(): boolean { - if (availabilityCache !== null) { - return availabilityCache +function loadModule(): AudioCaptureNapi | null { + if (loadAttempted) { + return cachedModule } + loadAttempted = true - if (process.platform === 'win32') { - availabilityCache = false - return false + // Supported platforms: macOS (darwin), Linux, Windows (win32) + const platform = process.platform + if (platform !== 'darwin' && platform !== 'linux' && platform !== 'win32') { + return null } - if (process.platform === 'darwin') { - // macOS: use SoX rec - availabilityCache = commandExists('rec') - return availabilityCache + // Candidate 1: native-embed path (bun compile). AUDIO_CAPTURE_NODE_PATH is + // defined at build time in build-with-plugins.ts for native builds only — the + // define resolves it to the static literal "../../audio-capture.node" so bun + // compile can rewrite it to /$bunfs/root/audio-capture.node. MUST stay a + // direct require(env var) — bun cannot analyze require(variable) from a loop. + if (process.env.AUDIO_CAPTURE_NODE_PATH) { + try { + // eslint-disable-next-line @typescript-eslint/no-require-imports + cachedModule = require( + process.env.AUDIO_CAPTURE_NODE_PATH, + ) as AudioCaptureNapi + return cachedModule + } catch { + // fall through to runtime fallbacks below + } } - if (process.platform === 'linux') { - // Linux: prefer arecord, fall back to rec - availabilityCache = commandExists('arecord') || commandExists('rec') - return availabilityCache + // Candidates 2-4: npm-install, dev/source, and workspace layouts. + // In bundled output, require() resolves relative to cli.js at the package root. + // In dev, it resolves relative to this file. When loaded from a workspace + // package (packages/audio-capture-napi/src/), we need an absolute path fallback. + const platformDir = `${process.arch}-${platform}` + const fallbacks = [ + `./vendor/audio-capture/${platformDir}/audio-capture.node`, + `../audio-capture/${platformDir}/audio-capture.node`, + `${process.cwd()}/vendor/audio-capture/${platformDir}/audio-capture.node`, + ] + for (const p of fallbacks) { + try { + // eslint-disable-next-line @typescript-eslint/no-require-imports + cachedModule = require(p) as AudioCaptureNapi + return cachedModule + } catch { + // try next + } } - - availabilityCache = false - return false + return null } -/** - * Check whether a recording is currently in progress. - */ -export function isNativeRecordingActive(): boolean { - return recordingProcess !== null && !recordingProcess.killed -} - -/** - * Stop the active recording process, if any. - */ -export function stopNativeRecording(): void { - if (recordingProcess) { - const proc = recordingProcess - recordingProcess = null - if (!proc.killed) { - proc.kill('SIGTERM') - } - } +export function isNativeAudioAvailable(): boolean { + return loadModule() !== null } -/** - * Start recording audio. Raw PCM data (16kHz, 16-bit signed, mono) is - * streamed via the onData callback. onEnd is called when recording stops - * (either from silence detection or process termination). - * - * Returns true if recording started successfully, false otherwise. - */ export function startNativeRecording( onData: (data: Buffer) => void, onEnd: () => void, ): boolean { - // Don't start if already recording - if (isNativeRecordingActive()) { - stopNativeRecording() - } - - if (!isNativeAudioAvailable()) { + const mod = loadModule() + if (!mod) { return false } + return mod.startRecording(onData, onEnd) +} - let child: ChildProcess +export function stopNativeRecording(): void { + const mod = loadModule() + if (!mod) { + return + } + mod.stopRecording() +} - if (process.platform === 'darwin' || (process.platform === 'linux' && commandExists('rec'))) { - // Use SoX rec: output raw PCM 16kHz 16-bit signed mono to stdout - child = spawn( - 'rec', - [ - '-q', // quiet - '--buffer', - '1024', // small buffer for low latency - '-t', 'raw', // raw PCM output - '-r', '16000', // 16kHz sample rate - '-e', 'signed', // signed integer encoding - '-b', '16', // 16-bit - '-c', '1', // mono - '-', // output to stdout - ], - { stdio: ['pipe', 'pipe', 'pipe'] }, - ) - } else if (process.platform === 'linux' && commandExists('arecord')) { - // Use arecord: output raw PCM 16kHz 16-bit signed LE mono to stdout - child = spawn( - 'arecord', - [ - '-f', 'S16_LE', // signed 16-bit little-endian - '-r', '16000', // 16kHz sample rate - '-c', '1', // mono - '-t', 'raw', // raw PCM, no header - '-q', // quiet - '-', // output to stdout - ], - { stdio: ['pipe', 'pipe', 'pipe'] }, - ) - } else { +export function isNativeRecordingActive(): boolean { + const mod = loadModule() + if (!mod) { return false } + return mod.isRecording() +} - recordingProcess = child - - child.stdout?.on('data', (chunk: Buffer) => { - onData(chunk) - }) +export function startNativePlayback( + sampleRate: number, + channels: number, +): boolean { + const mod = loadModule() + if (!mod) { + return false + } + return mod.startPlayback(sampleRate, channels) +} - // Consume stderr to prevent backpressure - child.stderr?.on('data', () => {}) +export function writeNativePlaybackData(data: Buffer): void { + const mod = loadModule() + if (!mod) { + return + } + mod.writePlaybackData(data) +} - child.on('close', () => { - recordingProcess = null - onEnd() - }) +export function stopNativePlayback(): void { + const mod = loadModule() + if (!mod) { + return + } + mod.stopPlayback() +} - child.on('error', () => { - recordingProcess = null - onEnd() - }) +export function isNativePlaying(): boolean { + const mod = loadModule() + if (!mod) { + return false + } + return mod.isPlaying() +} - return true +// Returns the microphone authorization status. +// On macOS, returns the TCC status: 0=notDetermined, 1=restricted, 2=denied, 3=authorized. +// On Linux, always returns 3 (authorized) — no system-level mic permission API. +// On Windows, returns 3 (authorized) if registry key absent or allowed, 2 (denied) if explicitly denied. +// Returns 0 (notDetermined) if the native module is unavailable. +export function microphoneAuthorizationStatus(): number { + const mod = loadModule() + if (!mod || !mod.microphoneAuthorizationStatus) { + return 0 + } + return mod.microphoneAuthorizationStatus() } diff --git a/scripts/dev.ts b/scripts/dev.ts index 437508988..f8d20a031 100644 --- a/scripts/dev.ts +++ b/scripts/dev.ts @@ -15,7 +15,7 @@ const defineArgs = Object.entries(defines).flatMap(([k, v]) => [ // Bun --feature flags: enable feature() gates at runtime. // Default features enabled in dev mode. -const DEFAULT_FEATURES = ["BUDDY", "TRANSCRIPT_CLASSIFIER", "BRIDGE_MODE", "AGENT_TRIGGERS_REMOTE"]; +const DEFAULT_FEATURES = ["BUDDY", "TRANSCRIPT_CLASSIFIER", "BRIDGE_MODE", "AGENT_TRIGGERS_REMOTE", "VOICE_MODE"]; // Any env var matching FEATURE_=1 will also enable that feature. // e.g. FEATURE_PROACTIVE=1 bun run dev diff --git a/vendor/audio-capture-src/index.ts b/vendor/audio-capture-src/index.ts new file mode 100644 index 000000000..4cac2037c --- /dev/null +++ b/vendor/audio-capture-src/index.ts @@ -0,0 +1,151 @@ + +type AudioCaptureNapi = { + startRecording( + onData: (data: Buffer) => void, + onEnd: () => void, + ): boolean + stopRecording(): void + isRecording(): boolean + startPlayback(sampleRate: number, channels: number): boolean + writePlaybackData(data: Buffer): void + stopPlayback(): void + isPlaying(): boolean + // TCC microphone authorization status (macOS only): + // 0 = notDetermined, 1 = restricted, 2 = denied, 3 = authorized. + // Linux: always returns 3 (authorized) — no system-level microphone permission API. + // Windows: returns 3 (authorized) if registry key absent or allowed, + // 2 (denied) if microphone access is explicitly denied. + microphoneAuthorizationStatus?(): number +} + +let cachedModule: AudioCaptureNapi | null = null +let loadAttempted = false + +function loadModule(): AudioCaptureNapi | null { + if (loadAttempted) { + return cachedModule + } + loadAttempted = true + + // Supported platforms: macOS (darwin), Linux, Windows (win32) + const platform = process.platform + if (platform !== 'darwin' && platform !== 'linux' && platform !== 'win32') { + return null + } + + // Candidate 1: native-embed path (bun compile). AUDIO_CAPTURE_NODE_PATH is + // defined at build time in build-with-plugins.ts for native builds only — the + // define resolves it to the static literal "../../audio-capture.node" so bun + // compile can rewrite it to /$bunfs/root/audio-capture.node. MUST stay a + // direct require(env var) — bun cannot analyze require(variable) from a loop. + if (process.env.AUDIO_CAPTURE_NODE_PATH) { + try { + // eslint-disable-next-line @typescript-eslint/no-require-imports + cachedModule = require( + process.env.AUDIO_CAPTURE_NODE_PATH, + ) as AudioCaptureNapi + return cachedModule + } catch { + // fall through to runtime fallbacks below + } + } + + // Candidates 2/3: npm-install and dev/source layouts. Dynamic require is + // fine here — in bundled output (node --target build) require() resolves at + // runtime relative to cli.js at the package root; in dev it resolves + // relative to this file (vendor/audio-capture-src/index.ts). + const platformDir = `${process.arch}-${platform}` + const fallbacks = [ + `./vendor/audio-capture/${platformDir}/audio-capture.node`, + `../audio-capture/${platformDir}/audio-capture.node`, + ] + for (const p of fallbacks) { + try { + // eslint-disable-next-line @typescript-eslint/no-require-imports + cachedModule = require(p) as AudioCaptureNapi + return cachedModule + } catch { + // try next + } + } + return null +} + +export function isNativeAudioAvailable(): boolean { + return loadModule() !== null +} + +export function startNativeRecording( + onData: (data: Buffer) => void, + onEnd: () => void, +): boolean { + const mod = loadModule() + if (!mod) { + return false + } + return mod.startRecording(onData, onEnd) +} + +export function stopNativeRecording(): void { + const mod = loadModule() + if (!mod) { + return + } + mod.stopRecording() +} + +export function isNativeRecordingActive(): boolean { + const mod = loadModule() + if (!mod) { + return false + } + return mod.isRecording() +} + +export function startNativePlayback( + sampleRate: number, + channels: number, +): boolean { + const mod = loadModule() + if (!mod) { + return false + } + return mod.startPlayback(sampleRate, channels) +} + +export function writeNativePlaybackData(data: Buffer): void { + const mod = loadModule() + if (!mod) { + return + } + mod.writePlaybackData(data) +} + +export function stopNativePlayback(): void { + const mod = loadModule() + if (!mod) { + return + } + mod.stopPlayback() +} + +export function isNativePlaying(): boolean { + const mod = loadModule() + if (!mod) { + return false + } + return mod.isPlaying() +} + +// Returns the microphone authorization status. +// On macOS, returns the TCC status: 0=notDetermined, 1=restricted, 2=denied, 3=authorized. +// On Linux, always returns 3 (authorized) — no system-level mic permission API. +// On Windows, returns 3 (authorized) if registry key absent or allowed, 2 (denied) if explicitly denied. +// Returns 0 (notDetermined) if the native module is unavailable. +export function microphoneAuthorizationStatus(): number { + const mod = loadModule() + if (!mod || !mod.microphoneAuthorizationStatus) { + return 0 + } + return mod.microphoneAuthorizationStatus() +} diff --git a/vendor/audio-capture/arm64-darwin/audio-capture.node b/vendor/audio-capture/arm64-darwin/audio-capture.node new file mode 100644 index 000000000..a60bd3c5c Binary files /dev/null and b/vendor/audio-capture/arm64-darwin/audio-capture.node differ diff --git a/vendor/audio-capture/arm64-linux/audio-capture.node b/vendor/audio-capture/arm64-linux/audio-capture.node new file mode 100644 index 000000000..34f862ac3 Binary files /dev/null and b/vendor/audio-capture/arm64-linux/audio-capture.node differ diff --git a/vendor/audio-capture/arm64-win32/audio-capture.node b/vendor/audio-capture/arm64-win32/audio-capture.node new file mode 100644 index 000000000..43b0fbbc7 Binary files /dev/null and b/vendor/audio-capture/arm64-win32/audio-capture.node differ diff --git a/vendor/audio-capture/x64-darwin/audio-capture.node b/vendor/audio-capture/x64-darwin/audio-capture.node new file mode 100644 index 000000000..2b4f4c5f3 Binary files /dev/null and b/vendor/audio-capture/x64-darwin/audio-capture.node differ diff --git a/vendor/audio-capture/x64-linux/audio-capture.node b/vendor/audio-capture/x64-linux/audio-capture.node new file mode 100644 index 000000000..3f57d8550 Binary files /dev/null and b/vendor/audio-capture/x64-linux/audio-capture.node differ diff --git a/vendor/audio-capture/x64-win32/audio-capture.node b/vendor/audio-capture/x64-win32/audio-capture.node new file mode 100644 index 000000000..559527664 Binary files /dev/null and b/vendor/audio-capture/x64-win32/audio-capture.node differ