diff --git a/src/components/SettingsPage.tsx b/src/components/SettingsPage.tsx index 9178d5bc..9e1fe89e 100644 --- a/src/components/SettingsPage.tsx +++ b/src/components/SettingsPage.tsx @@ -26,6 +26,7 @@ import { RotateCw, BookOpen, Copy, + Zap, } from "lucide-react"; import { useAuth } from "../hooks/useAuth"; import { NEON_AUTH_URL, signOut } from "../lib/neonAuth"; @@ -176,6 +177,8 @@ interface TranscriptionSectionProps { setCustomTranscriptionApiKey: (key: string) => void; cloudTranscriptionBaseUrl?: string; setCloudTranscriptionBaseUrl: (url: string) => void; + dictationMode: "normal" | "fast"; + setDictationMode: (mode: string) => void; toast: (opts: { title: string; description: string; @@ -211,6 +214,8 @@ function TranscriptionSection({ setCustomTranscriptionApiKey, cloudTranscriptionBaseUrl, setCloudTranscriptionBaseUrl, + dictationMode, + setDictationMode, toast, }: TranscriptionSectionProps) { const { t } = useTranslation(); @@ -350,6 +355,57 @@ function TranscriptionSection({ )} + {/* Dictation mode selector — visible for all cloud modes, signed in or not */} + {!useLocalWhisper && ( + + + + {t("settingsPage.transcription.dictationMode.title")} + + + { + setDictationMode("normal"); + logger.info("Dictation mode changed", { mode: "batch" }, "audio"); + }} + className={`flex-1 flex items-center gap-2 px-3 py-2 rounded-md border text-xs font-medium transition-colors cursor-pointer ${ + dictationMode === "normal" + ? "border-primary bg-primary/10 dark:bg-primary/15 text-primary" + : "border-border bg-background text-muted-foreground hover:bg-muted/60" + }`} + > + + + {t("settingsPage.transcription.dictationMode.normal")} + + {t("settingsPage.transcription.dictationMode.normalDescription")} + + + + { + setDictationMode("fast"); + logger.info("Dictation mode changed", { mode: "streaming" }, "streaming"); + }} + className={`flex-1 flex items-center gap-2 px-3 py-2 rounded-md border text-xs font-medium transition-colors cursor-pointer ${ + dictationMode === "fast" + ? "border-primary bg-primary/10 dark:bg-primary/15 text-primary" + : "border-border bg-background text-muted-foreground hover:bg-muted/60" + }`} + > + + + {t("settingsPage.transcription.dictationMode.fast")} + + {t("settingsPage.transcription.dictationMode.fastDescription")} + + + + + + + )} + {/* Custom Setup model picker — shown when Custom Setup is active or not signed in */} {(isCustomMode || !isSignedIn) && ( ); diff --git a/src/helpers/audioManager.js b/src/helpers/audioManager.js index 4d49b91f..2f24db78 100644 --- a/src/helpers/audioManager.js +++ b/src/helpers/audioManager.js @@ -324,6 +324,13 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor); ); } + const useStreaming = this.shouldUseStreaming(); + logger.info( + "Transcription mode", + { mode: useStreaming ? "streaming" : "batch" }, + useStreaming ? "streaming" : "audio" + ); + // Mix in system audio if enabled let recordingStream = micStream; if (this.systemAudioEnabled) { @@ -2003,6 +2010,7 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor); shouldUseStreaming(isSignedInOverride) { const s = getSettings(); + if (s.dictationMode === "normal") return false; if (s.useLocalWhisper) return false; if (REALTIME_MODELS.has(s.cloudTranscriptionModel)) { diff --git a/src/hooks/useSettings.ts b/src/hooks/useSettings.ts index 56d0c62f..a91070b0 100644 --- a/src/hooks/useSettings.ts +++ b/src/hooks/useSettings.ts @@ -20,6 +20,7 @@ export interface TranscriptionSettings { cloudTranscriptionMode: string; customDictionary: string[]; assemblyAiStreaming: boolean; + dictationMode: "normal" | "fast"; } export interface ReasoningSettings { @@ -179,6 +180,8 @@ function useSettingsInternal() { customDictionary: store.customDictionary, assemblyAiStreaming: store.assemblyAiStreaming, setAssemblyAiStreaming: store.setAssemblyAiStreaming, + dictationMode: store.dictationMode, + setDictationMode: store.setDictationMode, useReasoningModel: store.useReasoningModel, reasoningModel: store.reasoningModel, reasoningProvider: store.reasoningProvider, diff --git a/src/locales/de/translation.json b/src/locales/de/translation.json index 3bb3a6dd..6667ff9b 100644 --- a/src/locales/de/translation.json +++ b/src/locales/de/translation.json @@ -1319,6 +1319,13 @@ "description": "Konfigurieren Sie Ihren eigenen Anbieter und API-Schlüssel.", "title": "Zu benutzerdefinierter Einrichtung gewechselt" } + }, + "dictationMode": { + "title": "Diktiermodus", + "normal": "Normal", + "normalDescription": "Beste Genauigkeit, braucht aber einen Moment zur Verarbeitung nach der Aufnahme.", + "fast": "Schnell", + "fastDescription": "WebSocket-Streaming über OpenAI Realtime API. Live-Transkription während Sie sprechen." } } }, diff --git a/src/locales/en/translation.json b/src/locales/en/translation.json index 9cfa4475..f55e08f0 100644 --- a/src/locales/en/translation.json +++ b/src/locales/en/translation.json @@ -1389,6 +1389,13 @@ "description": "Configure your own provider and API key.", "title": "Switched to Custom Setup" } + }, + "dictationMode": { + "title": "Dictation Mode", + "normal": "Normal", + "normalDescription": "Best accuracy, but takes a moment to process after recording.", + "fast": "Fast", + "fastDescription": "WebSocket streaming via OpenAI Realtime API. Live transcription as you speak." } } }, diff --git a/src/locales/es/translation.json b/src/locales/es/translation.json index 65c8f091..41e22639 100644 --- a/src/locales/es/translation.json +++ b/src/locales/es/translation.json @@ -1319,6 +1319,13 @@ "description": "Configura tu propio proveedor y clave API.", "title": "Cambiado a configuración personalizada" } + }, + "dictationMode": { + "title": "Modo de dictado", + "normal": "Normal", + "normalDescription": "Máxima precisión, pero tarda un momento en procesar tras la grabación.", + "fast": "Rápido", + "fastDescription": "Streaming WebSocket a través de OpenAI Realtime API. Transcripción en tiempo real." } } }, diff --git a/src/locales/fr/translation.json b/src/locales/fr/translation.json index 50b0ef89..698ca097 100644 --- a/src/locales/fr/translation.json +++ b/src/locales/fr/translation.json @@ -1319,6 +1319,13 @@ "description": "Configurez votre propre fournisseur et clé API.", "title": "Basculé vers la configuration personnalisée" } + }, + "dictationMode": { + "title": "Mode de dictée", + "normal": "Normal", + "normalDescription": "Meilleure précision, mais prend un moment pour traiter après l'enregistrement.", + "fast": "Rapide", + "fastDescription": "Streaming WebSocket via OpenAI Realtime API. Transcription en temps réel." } } }, diff --git a/src/locales/it/translation.json b/src/locales/it/translation.json index 5577e040..b6fbc462 100644 --- a/src/locales/it/translation.json +++ b/src/locales/it/translation.json @@ -1319,6 +1319,13 @@ "description": "Configura il tuo provider e la tua chiave API.", "title": "Passato a configurazione personalizzata" } + }, + "dictationMode": { + "title": "Modalità dettatura", + "normal": "Normale", + "normalDescription": "Massima accuratezza, ma richiede un momento per elaborare dopo la registrazione.", + "fast": "Veloce", + "fastDescription": "Streaming WebSocket tramite OpenAI Realtime API. Trascrizione in tempo reale." } } }, diff --git a/src/locales/ja/translation.json b/src/locales/ja/translation.json index 6584d4fe..1b964f3d 100644 --- a/src/locales/ja/translation.json +++ b/src/locales/ja/translation.json @@ -1319,6 +1319,13 @@ "description": "独自のプロバイダーと API キーを設定してください。", "title": "カスタム設定に切り替えました" } + }, + "dictationMode": { + "title": "ディクテーションモード", + "normal": "通常", + "normalDescription": "最高の精度ですが、録音後に処理に少し時間がかかります。", + "fast": "高速", + "fastDescription": "OpenAI Realtime API経由のWebSocketストリーミング。リアルタイム文字起こし。" } } }, diff --git a/src/locales/pt/translation.json b/src/locales/pt/translation.json index d49d8e62..72fbdbff 100644 --- a/src/locales/pt/translation.json +++ b/src/locales/pt/translation.json @@ -1291,6 +1291,13 @@ "description": "Configure seu próprio provedor e chave API.", "title": "Mudou para configuração personalizada" } + }, + "dictationMode": { + "title": "Modo de ditado", + "normal": "Normal", + "normalDescription": "Melhor precisão, mas leva um momento para processar após a gravação.", + "fast": "Rápido", + "fastDescription": "Streaming WebSocket via OpenAI Realtime API. Transcrição em tempo real." } } }, diff --git a/src/locales/ru/translation.json b/src/locales/ru/translation.json index 0f98f327..430fffb3 100644 --- a/src/locales/ru/translation.json +++ b/src/locales/ru/translation.json @@ -1319,6 +1319,13 @@ "description": "Настройте собственного провайдера и API-ключ.", "title": "Переключено на пользовательскую настройку" } + }, + "dictationMode": { + "title": "Режим диктовки", + "normal": "Обычный", + "normalDescription": "Лучшая точность, но требуется момент на обработку после записи.", + "fast": "Быстрый", + "fastDescription": "WebSocket-стриминг через OpenAI Realtime API. Транскрипция в реальном времени." } } }, diff --git a/src/locales/zh-CN/translation.json b/src/locales/zh-CN/translation.json index b12e1c62..2757b379 100644 --- a/src/locales/zh-CN/translation.json +++ b/src/locales/zh-CN/translation.json @@ -1319,6 +1319,13 @@ "description": "请配置你的服务商和 API Key。", "title": "已切换到自定义设置" } + }, + "dictationMode": { + "title": "听写模式", + "normal": "普通", + "normalDescription": "最佳准确度,但录音后需要片刻处理。", + "fast": "快速", + "fastDescription": "通过 OpenAI Realtime API 进行 WebSocket 流式传输。实时转录。" } } }, diff --git a/src/locales/zh-TW/translation.json b/src/locales/zh-TW/translation.json index 80c45443..d5ef1540 100644 --- a/src/locales/zh-TW/translation.json +++ b/src/locales/zh-TW/translation.json @@ -1319,6 +1319,13 @@ "description": "設定你自己的供應商和 API Key。", "title": "已切換至自訂設定" } + }, + "dictationMode": { + "title": "聽寫模式", + "normal": "一般", + "normalDescription": "最佳準確度,但錄音後需要片刻處理。", + "fast": "快速", + "fastDescription": "透過 OpenAI Realtime API 進行 WebSocket 串流。即時轉錄。" } } }, diff --git a/src/stores/settingsStore.ts b/src/stores/settingsStore.ts index bfb84313..87a34418 100644 --- a/src/stores/settingsStore.ts +++ b/src/stores/settingsStore.ts @@ -121,6 +121,7 @@ export interface SettingsState setCloudReasoningBaseUrl: (value: string) => void; setCustomDictionary: (words: string[]) => void; setAssemblyAiStreaming: (value: boolean) => void; + setDictationMode: (value: string) => void; setUseReasoningModel: (value: boolean) => void; setReasoningModel: (value: string) => void; setReasoningProvider: (value: string) => void; @@ -242,6 +243,9 @@ export const useSettingsStore = create()((set, get) => ({ cloudReasoningBaseUrl: readString("cloudReasoningBaseUrl", API_ENDPOINTS.OPENAI_BASE), customDictionary: readStringArray("customDictionary", []), assemblyAiStreaming: readBoolean("assemblyAiStreaming", true), + dictationMode: (readString("dictationMode", "normal") === "fast" ? "fast" : "normal") as + | "normal" + | "fast", useReasoningModel: readBoolean("useReasoningModel", true), reasoningModel: readString("reasoningModel", ""), @@ -330,6 +334,7 @@ export const useSettingsStore = create()((set, get) => ({ setCloudReasoningMode: createStringSetter("cloudReasoningMode"), setCloudReasoningBaseUrl: createStringSetter("cloudReasoningBaseUrl"), setAssemblyAiStreaming: createBooleanSetter("assemblyAiStreaming"), + setDictationMode: createStringSetter("dictationMode"), setUseReasoningModel: createBooleanSetter("useReasoningModel"), setReasoningModel: createStringSetter("reasoningModel"), setReasoningProvider: createStringSetter("reasoningProvider"), @@ -524,6 +529,7 @@ export const useSettingsStore = create()((set, get) => ({ if (settings.customDictionary !== undefined) s.setCustomDictionary(settings.customDictionary); if (settings.assemblyAiStreaming !== undefined) s.setAssemblyAiStreaming(settings.assemblyAiStreaming); + if (settings.dictationMode !== undefined) s.setDictationMode(settings.dictationMode); }, updateReasoningSettings: (settings: Partial) => {
+ {t("settingsPage.transcription.dictationMode.title")} +