From 4de7c5f8f4ab09d2c368c6e3435d9ab88f6c42f9 Mon Sep 17 00:00:00 2001 From: Haz Date: Tue, 10 Mar 2026 16:30:34 +0100 Subject: [PATCH 1/9] feat(soniox): add Soniox real-time streaming STT provider Add Soniox as a fourth cloud streaming provider alongside Deepgram, AssemblyAI, and OpenAI Realtime. Includes WebSocket streaming core with cold-start buffering, full Electron IPC pipeline, settings UI with API key management, onboarding validation, and BYOK detection. --- preload.js | 28 ++ src/components/OnboardingFlow.tsx | 6 + src/components/SettingsPage.tsx | 10 + src/components/TranscriptionModelPicker.tsx | 136 +++++++- src/helpers/audioManager.js | 22 +- src/helpers/environment.js | 9 + src/helpers/ipcHandlers.js | 94 ++++++ src/helpers/sonioxStreaming.js | 339 ++++++++++++++++++++ src/hooks/useSettings.ts | 3 + src/locales/de/translation.json | 3 +- src/locales/en/translation.json | 3 +- src/locales/es/translation.json | 3 +- src/locales/fr/translation.json | 3 +- src/locales/it/translation.json | 3 +- src/locales/ja/translation.json | 3 +- src/locales/pt/translation.json | 3 +- src/locales/ru/translation.json | 3 +- src/locales/zh-CN/translation.json | 3 +- src/locales/zh-TW/translation.json | 3 +- src/models/modelRegistryData.json | 13 + src/stores/settingsStore.ts | 13 + src/types/electron.ts | 26 ++ src/utils/byokDetection.ts | 1 + src/utils/providerIcons.ts | 2 + 24 files changed, 706 insertions(+), 26 deletions(-) create mode 100644 src/helpers/sonioxStreaming.js diff --git a/preload.js b/preload.js index a17e5b70..349b5471 100644 --- a/preload.js +++ b/preload.js @@ -299,6 +299,10 @@ contextBridge.exposeInMainWorld("electronAPI", { saveMistralKey: (key) => ipcRenderer.invoke("save-mistral-key", key), proxyMistralTranscription: (data) => ipcRenderer.invoke("proxy-mistral-transcription", data), + // Soniox API + getSonioxKey: () => ipcRenderer.invoke("get-soniox-key"), + saveSonioxKey: (key) => ipcRenderer.invoke("save-soniox-key", key), + // Custom endpoint API keys getCustomTranscriptionKey: () => ipcRenderer.invoke("get-custom-transcription-key"), saveCustomTranscriptionKey: (key) => ipcRenderer.invoke("save-custom-transcription-key", key), @@ -493,6 +497,30 @@ contextBridge.exposeInMainWorld("electronAPI", { (callback) => (_event, data) => callback(data) ), + // Soniox streaming + sonioxStreamingWarmup: (options) => ipcRenderer.invoke("soniox-streaming-warmup", options), + sonioxStreamingStart: (options) => ipcRenderer.invoke("soniox-streaming-start", options), + sonioxStreamingSend: (audioBuffer) => ipcRenderer.send("soniox-streaming-send", audioBuffer), + sonioxStreamingFinalize: () => ipcRenderer.send("soniox-streaming-finalize"), + sonioxStreamingStop: () => ipcRenderer.invoke("soniox-streaming-stop"), + sonioxStreamingStatus: () => ipcRenderer.invoke("soniox-streaming-status"), + onSonioxPartialTranscript: registerListener( + "soniox-streaming-partial", + (callback) => (_event, text) => callback(text) + ), + onSonioxFinalTranscript: registerListener( + "soniox-streaming-final", + (callback) => (_event, text) => callback(text) + ), + onSonioxError: registerListener( + "soniox-streaming-error", + (callback) => (_event, error) => callback(error) + ), + onSonioxSessionEnd: registerListener( + "soniox-streaming-session-end", + (callback) => (_event, data) => callback(data) + ), + // Usage limit events (for showing UpgradePrompt in ControlPanel) notifyLimitReached: (data) => ipcRenderer.send("limit-reached", data), onLimitReached: registerListener("limit-reached", (callback) => (_event, data) => callback(data)), diff --git a/src/components/OnboardingFlow.tsx b/src/components/OnboardingFlow.tsx index c250c003..b03918b3 100644 --- a/src/components/OnboardingFlow.tsx +++ b/src/components/OnboardingFlow.tsx @@ -83,6 +83,8 @@ export default function OnboardingFlow({ onComplete }: OnboardingFlowProps) { openaiApiKey, groqApiKey, mistralApiKey, + sonioxApiKey, + setSonioxApiKey, customTranscriptionApiKey, setCustomTranscriptionApiKey, dictationKey, @@ -502,6 +504,8 @@ export default function OnboardingFlow({ onComplete }: OnboardingFlowProps) { setGroqApiKey={setGroqApiKey} mistralApiKey={mistralApiKey} setMistralApiKey={setMistralApiKey} + sonioxApiKey={sonioxApiKey} + setSonioxApiKey={setSonioxApiKey} customTranscriptionApiKey={customTranscriptionApiKey} setCustomTranscriptionApiKey={setCustomTranscriptionApiKey} cloudTranscriptionBaseUrl={cloudTranscriptionBaseUrl} @@ -725,6 +729,8 @@ export default function OnboardingFlow({ onComplete }: OnboardingFlowProps) { return groqApiKey.trim().length > 0; } else if (cloudTranscriptionProvider === "mistral") { return mistralApiKey.trim().length > 0; + } else if (cloudTranscriptionProvider === "soniox") { + return sonioxApiKey.trim().length > 0; } else if (cloudTranscriptionProvider === "custom") { // Custom can work without API key for local endpoints return true; diff --git a/src/components/SettingsPage.tsx b/src/components/SettingsPage.tsx index 9178d5bc..909192c1 100644 --- a/src/components/SettingsPage.tsx +++ b/src/components/SettingsPage.tsx @@ -172,6 +172,8 @@ interface TranscriptionSectionProps { setGroqApiKey: (key: string) => void; mistralApiKey: string; setMistralApiKey: (key: string) => void; + sonioxApiKey: string; + setSonioxApiKey: (key: string) => void; customTranscriptionApiKey: string; setCustomTranscriptionApiKey: (key: string) => void; cloudTranscriptionBaseUrl?: string; @@ -207,6 +209,8 @@ function TranscriptionSection({ setGroqApiKey, mistralApiKey, setMistralApiKey, + sonioxApiKey, + setSonioxApiKey, customTranscriptionApiKey, setCustomTranscriptionApiKey, cloudTranscriptionBaseUrl, @@ -383,6 +387,8 @@ function TranscriptionSection({ setGroqApiKey={setGroqApiKey} mistralApiKey={mistralApiKey} setMistralApiKey={setMistralApiKey} + sonioxApiKey={sonioxApiKey} + setSonioxApiKey={setSonioxApiKey} customTranscriptionApiKey={customTranscriptionApiKey} setCustomTranscriptionApiKey={setCustomTranscriptionApiKey} cloudTranscriptionBaseUrl={cloudTranscriptionBaseUrl} @@ -676,6 +682,8 @@ export default function SettingsPage({ activeSection = "general" }: SettingsPage setGeminiApiKey, setGroqApiKey, setMistralApiKey, + sonioxApiKey, + setSonioxApiKey, customTranscriptionApiKey, setCustomTranscriptionApiKey, customReasoningApiKey, @@ -2670,6 +2678,8 @@ EOF`, setGroqApiKey={setGroqApiKey} mistralApiKey={mistralApiKey} setMistralApiKey={setMistralApiKey} + sonioxApiKey={sonioxApiKey} + setSonioxApiKey={setSonioxApiKey} customTranscriptionApiKey={customTranscriptionApiKey} setCustomTranscriptionApiKey={setCustomTranscriptionApiKey} cloudTranscriptionBaseUrl={cloudTranscriptionBaseUrl} diff --git a/src/components/TranscriptionModelPicker.tsx b/src/components/TranscriptionModelPicker.tsx index c7c103eb..e811db3f 100644 --- a/src/components/TranscriptionModelPicker.tsx +++ b/src/components/TranscriptionModelPicker.tsx @@ -199,6 +199,8 @@ interface TranscriptionModelPickerProps { setMistralApiKey: (key: string) => void; customTranscriptionApiKey?: string; setCustomTranscriptionApiKey?: (key: string) => void; + sonioxApiKey?: string; + setSonioxApiKey?: (key: string) => void; cloudTranscriptionBaseUrl?: string; setCloudTranscriptionBaseUrl?: (url: string) => void; className?: string; @@ -209,6 +211,7 @@ const CLOUD_PROVIDER_TABS = [ { id: "openai", name: "OpenAI" }, { id: "groq", name: "Groq", recommended: true }, { id: "mistral", name: "Mistral" }, + { id: "soniox", name: "Soniox" }, { id: "custom", name: "Custom" }, ]; @@ -274,6 +277,8 @@ export default function TranscriptionModelPicker({ setMistralApiKey, customTranscriptionApiKey = "", setCustomTranscriptionApiKey, + sonioxApiKey = "", + setSonioxApiKey, cloudTranscriptionBaseUrl = "", setCloudTranscriptionBaseUrl, className = "", @@ -393,11 +398,22 @@ export default function TranscriptionModelPicker({ } } } - } else if (selectedCloudProvider !== "custom" && !selectedCloudModel) { - const provider = cloudProviders.find((p) => p.id === selectedCloudProvider); + } else if ( + selectedCloudProvider !== "custom" && + selectedCloudProvider !== "soniox" && + !selectedCloudModel + ) { + const provider = cloudProviders.find( + (p) => p.id === selectedCloudProvider + ); if (provider?.models?.length) { onCloudModelSelect(provider.models[0].id); } + } else if ( + selectedCloudProvider === "soniox" && + selectedCloudModel !== "stt-rt-v4" + ) { + onCloudModelSelect("stt-rt-v4"); } }, [ cloudProviders, @@ -527,13 +543,20 @@ export default function TranscriptionModelPicker({ const handleCloudProviderChange = useCallback( (providerId: string) => { onCloudProviderSelect(providerId); - const provider = cloudProviders.find((p) => p.id === providerId); + const provider = cloudProviders.find( + (p) => p.id === providerId + ); if (providerId === "custom") { onCloudModelSelect("whisper-1"); return; } + if (providerId === "soniox") { + onCloudModelSelect("stt-rt-v4"); + return; + } + if (provider) { setCloudTranscriptionBaseUrl?.(provider.baseUrl); if (provider.models?.length) { @@ -541,7 +564,12 @@ export default function TranscriptionModelPicker({ } } }, - [cloudProviders, onCloudProviderSelect, onCloudModelSelect, setCloudTranscriptionBaseUrl] + [ + cloudProviders, + onCloudProviderSelect, + onCloudModelSelect, + setCloudTranscriptionBaseUrl, + ] ); const handleLocalProviderChange = useCallback( @@ -837,7 +865,11 @@ export default function TranscriptionModelPicker({ setCloudTranscriptionBaseUrl?.(e.target.value)} + onChange={(e) => + setCloudTranscriptionBaseUrl?.( + e.target.value + ) + } onBlur={handleBaseUrlBlur} placeholder="https://your-api.example.com/v1" className="h-8 text-sm" @@ -846,7 +878,10 @@ export default function TranscriptionModelPicker({ {})} + setApiKey={ + setCustomTranscriptionApiKey || + (() => {}) + } label={t("transcription.apiKeyOptional")} helpText="" /> @@ -857,12 +892,74 @@ export default function TranscriptionModelPicker({ onCloudModelSelect(e.target.value)} + onChange={(e) => + onCloudModelSelect(e.target.value) + } placeholder="whisper-1" className="h-8 text-sm" /> + ) : selectedCloudProvider === "soniox" ? ( +
+
+
+ + +
+ {})} + placeholder={t( + "transcription.sonioxApiKeyPlaceholder", + { + defaultValue: + "Enter your Soniox API key", + } + )} + label="" + helpText="" + /> +
+ +
+ +
+ + + stt-rt-v4 + + + {t( + "transcription.sonioxModelDesc", + { + defaultValue: + "Real-time streaming STT", + } + )} + +
+
+
) : (
@@ -877,7 +974,8 @@ export default function TranscriptionModelPicker({ groq: "https://console.groq.com/keys", mistral: "https://console.mistral.ai/api-keys", openai: "https://platform.openai.com/api-keys", - }[selectedCloudProvider] || "https://platform.openai.com/api-keys" + }[selectedCloudProvider] || + "https://platform.openai.com/api-keys" )} className="text-xs text-primary/70 hover:text-primary transition-colors cursor-pointer" > @@ -886,14 +984,20 @@ export default function TranscriptionModelPicker({
- + window.electronAPI.onDictationRealtimeError(cb), onSessionEnd: (cb) => window.electronAPI.onDictationRealtimeSessionEnd(cb), }, + soniox: { + warmup: (opts) => window.electronAPI.sonioxStreamingWarmup(opts), + start: (opts) => window.electronAPI.sonioxStreamingStart(opts), + send: (buf) => window.electronAPI.sonioxStreamingSend(buf), + finalize: () => window.electronAPI.sonioxStreamingFinalize(), + stop: () => window.electronAPI.sonioxStreamingStop(), + status: () => window.electronAPI.sonioxStreamingStatus(), + onPartial: (cb) => window.electronAPI.onSonioxPartialTranscript(cb), + onFinal: (cb) => window.electronAPI.onSonioxFinalTranscript(cb), + onError: (cb) => window.electronAPI.onSonioxError(cb), + onSessionEnd: (cb) => window.electronAPI.onSonioxSessionEnd(cb), + }, }; class AudioManager { @@ -221,7 +233,10 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor); } getStreamingProvider() { - const { cloudTranscriptionModel } = getSettings(); + const { cloudTranscriptionProvider, cloudTranscriptionModel } = getSettings(); + if (cloudTranscriptionProvider === "soniox") { + return STREAMING_PROVIDERS.soniox; + } if (REALTIME_MODELS.has(cloudTranscriptionModel)) { return STREAMING_PROVIDERS["openai-realtime"]; } @@ -2005,6 +2020,11 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor); const s = getSettings(); if (s.useLocalWhisper) return false; + // Soniox is always streaming (BYOK only) + if (s.cloudTranscriptionProvider === "soniox" && s.sonioxApiKey) { + return true; + } + if (REALTIME_MODELS.has(s.cloudTranscriptionModel)) { if (s.cloudTranscriptionMode === "byok") return !!s.openaiApiKey; if (s.cloudTranscriptionMode === "openwhispr") return !!(isSignedInOverride ?? s.isSignedIn); diff --git a/src/helpers/environment.js b/src/helpers/environment.js index d11f24f8..590a97f5 100644 --- a/src/helpers/environment.js +++ b/src/helpers/environment.js @@ -10,6 +10,7 @@ const PERSISTED_KEYS = [ "GEMINI_API_KEY", "GROQ_API_KEY", "MISTRAL_API_KEY", + "SONIOX_API_KEY", "CUSTOM_TRANSCRIPTION_API_KEY", "CUSTOM_REASONING_API_KEY", "LOCAL_TRANSCRIPTION_PROVIDER", @@ -107,6 +108,14 @@ class EnvironmentManager { return this._saveKey("MISTRAL_API_KEY", key); } + getSonioxKey() { + return this._getKey("SONIOX_API_KEY"); + } + + saveSonioxKey(key) { + return this._saveKey("SONIOX_API_KEY", key); + } + getCustomTranscriptionKey() { return this._getKey("CUSTOM_TRANSCRIPTION_API_KEY"); } diff --git a/src/helpers/ipcHandlers.js b/src/helpers/ipcHandlers.js index e872c521..a36a199c 100644 --- a/src/helpers/ipcHandlers.js +++ b/src/helpers/ipcHandlers.js @@ -10,6 +10,7 @@ const AssemblyAiStreaming = require("./assemblyAiStreaming"); const { i18nMain, changeLanguage } = require("./i18nMain"); const DeepgramStreaming = require("./deepgramStreaming"); const OpenAIRealtimeStreaming = require("./openaiRealtimeStreaming"); +const SonioxStreaming = require("./sonioxStreaming"); const AudioStorageManager = require("./audioStorage"); const MISTRAL_TRANSCRIPTION_URL = "https://api.mistral.ai/v1/audio/transcriptions"; @@ -108,6 +109,7 @@ class IPCHandlers { this.deepgramStreaming = null; this.openaiRealtimeStreaming = null; this._dictationStreaming = null; + this._sonioxStreaming = null; this._autoLearnEnabled = true; // Default on, synced from renderer this._autoLearnDebounceTimer = null; this._autoLearnLatestData = null; @@ -1424,6 +1426,14 @@ class IPCHandlers { return this.environmentManager.saveMistralKey(key); }); + ipcMain.handle("get-soniox-key", async () => { + return this.environmentManager.getSonioxKey(); + }); + + ipcMain.handle("save-soniox-key", async (event, key) => { + return this.environmentManager.saveSonioxKey(key); + }); + ipcMain.handle( "proxy-mistral-transcription", async (event, { audioBuffer, model, language, contextBias }) => { @@ -2422,6 +2432,90 @@ class IPCHandlers { return { success: true, text: result.text || "" }; }); + // --- Soniox streaming --- + ipcMain.handle("soniox-streaming-warmup", async (event, options = {}) => { + try { + const apiKey = options.apiKey || this.environmentManager.getSonioxKey(); + if (!apiKey) { + return { success: false, error: "Soniox API key not configured", code: "NO_API" }; + } + + if (this._sonioxStreaming?.isConnected) { + await this._sonioxStreaming.disconnect(); + } + this._sonioxStreaming = new SonioxStreaming(); + this._sonioxStreaming.onPartialTranscript = (text) => + event.sender.send("soniox-streaming-partial", text); + this._sonioxStreaming.onFinalTranscript = (text) => + event.sender.send("soniox-streaming-final", text); + this._sonioxStreaming.onError = (err) => + event.sender.send("soniox-streaming-error", err.message); + this._sonioxStreaming.onSessionEnd = (data) => + event.sender.send("soniox-streaming-session-end", data || {}); + + await this._sonioxStreaming.connect({ + apiKey, + model: options.model || "stt-rt-v4", + language: options.language, + }); + return { success: true }; + } catch (err) { + return { success: false, error: err.message }; + } + }); + + ipcMain.handle("soniox-streaming-start", async (event, options = {}) => { + try { + if (!this._sonioxStreaming?.isConnected) { + const apiKey = options.apiKey || this.environmentManager.getSonioxKey(); + if (!apiKey) { + return { success: false, error: "Soniox API key not configured", code: "NO_API" }; + } + + // Cold start: create new connection + this._sonioxStreaming = new SonioxStreaming(); + this._sonioxStreaming.onPartialTranscript = (text) => + event.sender.send("soniox-streaming-partial", text); + this._sonioxStreaming.onFinalTranscript = (text) => + event.sender.send("soniox-streaming-final", text); + this._sonioxStreaming.onError = (err) => + event.sender.send("soniox-streaming-error", err.message); + this._sonioxStreaming.onSessionEnd = (data) => + event.sender.send("soniox-streaming-session-end", data || {}); + + await this._sonioxStreaming.connect({ + apiKey, + model: options.model || "stt-rt-v4", + language: options.language, + }); + } + return { success: true }; + } catch (err) { + return { success: false, error: err.message }; + } + }); + + ipcMain.on("soniox-streaming-send", (_event, audioBuffer) => { + this._sonioxStreaming?.sendAudio(Buffer.from(audioBuffer)); + }); + + ipcMain.on("soniox-streaming-finalize", () => { + this._sonioxStreaming?.finalize(); + }); + + ipcMain.handle("soniox-streaming-stop", async () => { + if (!this._sonioxStreaming) { + return { success: true, text: "" }; + } + const result = await this._sonioxStreaming.disconnect().catch(() => ({ text: "" })); + this._sonioxStreaming = null; + return { success: true, text: result.text || "" }; + }); + + ipcMain.handle("soniox-streaming-status", async () => { + return { connected: !!this._sonioxStreaming?.isConnected }; + }); + ipcMain.handle("update-transcription-text", async (_event, id, text, rawText) => { try { this.databaseManager.updateTranscriptionText(id, text, rawText); diff --git a/src/helpers/sonioxStreaming.js b/src/helpers/sonioxStreaming.js new file mode 100644 index 00000000..6174723d --- /dev/null +++ b/src/helpers/sonioxStreaming.js @@ -0,0 +1,339 @@ +const WebSocket = require("ws"); +const debugLogger = require("./debugLogger"); + +const WEBSOCKET_TIMEOUT_MS = 15000; +const DISCONNECT_TIMEOUT_MS = 3000; +const KEEPALIVE_INTERVAL_MS = 5000; +const COLD_START_BUFFER_MAX = 3 * 16000 * 2; // 3 seconds of 16-bit PCM at 16kHz +const SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket"; + +class SonioxStreaming { + constructor() { + this.ws = null; + this.isConnected = false; + this.finalTokens = []; + this.currentNonFinalText = ""; + this.coldStartBuffer = []; + this.coldStartBufferSize = 0; + this.onPartialTranscript = null; + this.onFinalTranscript = null; + this.onError = null; + this.onSessionEnd = null; + this.pendingResolve = null; + this.pendingReject = null; + this.connectionTimeout = null; + this.keepAliveInterval = null; + this.isDisconnecting = false; + this.audioBytesSent = 0; + this.closeResolve = null; + } + + getFullTranscript() { + return this.finalTokens.map((t) => t.text).join(""); + } + + async connect(options = {}) { + const { apiKey, model, language } = options; + if (!apiKey) throw new Error("Soniox API key is required"); + + if (this.isConnected) { + debugLogger.debug("Soniox already connected"); + return; + } + + this.finalTokens = []; + this.currentNonFinalText = ""; + this.audioBytesSent = 0; + this.coldStartBuffer = []; + this.coldStartBufferSize = 0; + + debugLogger.debug("Soniox connecting", { model: model || "stt-rt-v4", language }); + + const languageHints = + !language || language === "auto" ? [] : [language]; + + const configMessage = { + api_key: apiKey, + model: model || "stt-rt-v4", + audio_format: "pcm_s16le", + sample_rate: 16000, + num_channels: 1, + language_hints: languageHints, + }; + + return new Promise((resolve, reject) => { + this.pendingResolve = resolve; + this.pendingReject = reject; + + this.connectionTimeout = setTimeout(() => { + this.cleanup(); + reject(new Error("Soniox WebSocket connection timeout")); + }, WEBSOCKET_TIMEOUT_MS); + + this.ws = new WebSocket(SONIOX_WS_URL); + + this.ws.on("open", () => { + debugLogger.debug("Soniox WebSocket opened, sending config"); + this.ws.send(JSON.stringify(configMessage)); + this.startKeepAlive(); + this.flushColdStartBuffer(); + + clearTimeout(this.connectionTimeout); + this.isConnected = true; + this.pendingResolve(); + this.pendingResolve = null; + this.pendingReject = null; + }); + + this.ws.on("message", (data) => { + this.handleMessage(data); + }); + + this.ws.on("error", (error) => { + debugLogger.error("Soniox WebSocket error", { error: error.message }); + this.cleanup(); + if (this.pendingReject) { + this.pendingReject(error); + this.pendingReject = null; + this.pendingResolve = null; + } + this.onError?.(error); + }); + + this.ws.on("close", (code, reason) => { + const wasActive = this.isConnected; + debugLogger.debug("Soniox WebSocket closed", { + code, + reason: reason?.toString(), + wasActive, + }); + if (this.pendingReject) { + this.pendingReject(new Error(`WebSocket closed before ready (code: ${code})`)); + this.pendingReject = null; + this.pendingResolve = null; + } + if (this.closeResolve) { + this.closeResolve({ text: this.getFullTranscript() }); + } + this.cleanup(); + if (wasActive && !this.isDisconnecting) { + this.onSessionEnd?.({ text: this.getFullTranscript() }); + } + }); + }); + } + + handleMessage(data) { + try { + const res = JSON.parse(data.toString()); + + if (res.error_code) { + debugLogger.error("Soniox error response", { + code: res.error_code, + message: res.error_message, + }); + this.onError?.(new Error(`Soniox error ${res.error_code}: ${res.error_message}`)); + return; + } + + if (res.finished) { + debugLogger.debug("Soniox session finished", { + finalTokens: this.finalTokens.length, + textLength: this.getFullTranscript().length, + }); + this.onSessionEnd?.({ text: this.getFullTranscript() }); + return; + } + + let nonFinalTexts = []; + let newFinalTokens = false; + for (const token of res.tokens || []) { + if (token.text === "") continue; + if (token.is_final) { + this.finalTokens.push(token); + newFinalTokens = true; + } else { + nonFinalTexts.push(token.text); + } + } + + const finalText = this.finalTokens.map((t) => t.text).join(""); + this.currentNonFinalText = nonFinalTexts.join(""); + + this.onPartialTranscript?.(finalText + this.currentNonFinalText); + + if (newFinalTokens) { + this.onFinalTranscript?.(finalText); + } + } catch (err) { + debugLogger.error("Soniox message parse error", { error: err.message }); + } + } + + flushColdStartBuffer() { + if (this.coldStartBuffer.length === 0) return; + + debugLogger.debug("Soniox flushing cold-start buffer", { + chunks: this.coldStartBuffer.length, + bytes: this.coldStartBufferSize, + }); + for (const buf of this.coldStartBuffer) { + this.ws.send(buf); + this.audioBytesSent += buf.length; + } + this.coldStartBuffer = []; + this.coldStartBufferSize = 0; + } + + sendAudio(pcmBuffer) { + if (!this.ws) return false; + + if ( + this.ws.readyState === WebSocket.CONNECTING && + this.coldStartBufferSize < COLD_START_BUFFER_MAX + ) { + const copy = Buffer.from(pcmBuffer); + this.coldStartBuffer.push(copy); + this.coldStartBufferSize += copy.length; + return false; + } + + if (this.ws.readyState !== WebSocket.OPEN) return false; + + this.flushColdStartBuffer(); + this.ws.send(pcmBuffer); + this.audioBytesSent += pcmBuffer.length; + return true; + } + + finalize() { + if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return false; + + this.ws.send(JSON.stringify({ type: "finalize" })); + debugLogger.debug("Soniox finalize sent"); + return true; + } + + startKeepAlive() { + this.stopKeepAlive(); + this.keepAliveInterval = setInterval(() => { + if (this.ws && this.ws.readyState === WebSocket.OPEN) { + try { + this.ws.send(JSON.stringify({ type: "keepalive" })); + } catch (err) { + debugLogger.debug("Soniox keep-alive failed", { error: err.message }); + this.stopKeepAlive(); + } + } else { + this.stopKeepAlive(); + } + }, KEEPALIVE_INTERVAL_MS); + } + + stopKeepAlive() { + if (this.keepAliveInterval) { + clearInterval(this.keepAliveInterval); + this.keepAliveInterval = null; + } + } + + async disconnect() { + debugLogger.debug("Soniox disconnect", { + audioBytesSent: this.audioBytesSent, + finalTokens: this.finalTokens.length, + textLength: this.getFullTranscript().length, + }); + + if (!this.ws) return { text: this.getFullTranscript() }; + + this.isDisconnecting = true; + + if (this.ws.readyState === WebSocket.OPEN && this.audioBytesSent > 0) { + await this.drainFinalTokens(); + await this.drainSessionEnd(); + } + + if (this.ws) { + this.ws.close(); + } + + const result = { text: this.getFullTranscript() }; + this.cleanup(); + this.isDisconnecting = false; + return result; + } + + drainFinalTokens() { + return new Promise((resolve) => { + const prevOnFinal = this.onFinalTranscript; + + const tid = setTimeout(() => { + debugLogger.debug("Soniox finalize timeout, using accumulated text"); + this.onFinalTranscript = prevOnFinal; + resolve(); + }, DISCONNECT_TIMEOUT_MS); + + this.onFinalTranscript = (text) => { + clearTimeout(tid); + this.onFinalTranscript = prevOnFinal; + prevOnFinal?.(text); + resolve(); + }; + + try { + this.ws.send(JSON.stringify({ type: "finalize" })); + } catch { + clearTimeout(tid); + this.onFinalTranscript = prevOnFinal; + resolve(); + } + }); + } + + drainSessionEnd() { + return new Promise((resolve) => { + const prevOnSessionEnd = this.onSessionEnd; + + const tid = setTimeout(() => { + debugLogger.debug("Soniox session end timeout, closing"); + this.onSessionEnd = prevOnSessionEnd; + resolve(); + }, DISCONNECT_TIMEOUT_MS); + + this.onSessionEnd = (result) => { + clearTimeout(tid); + this.onSessionEnd = prevOnSessionEnd; + prevOnSessionEnd?.(result); + resolve(); + }; + + try { + this.ws.send(""); + } catch { + clearTimeout(tid); + this.onSessionEnd = prevOnSessionEnd; + resolve(); + } + }); + } + + cleanup() { + this.stopKeepAlive(); + clearTimeout(this.connectionTimeout); + this.connectionTimeout = null; + + if (this.ws) { + try { + this.ws.close(); + } catch (err) { + // ignore + } + this.ws = null; + } + + this.isConnected = false; + this.closeResolve = null; + } +} + +module.exports = SonioxStreaming; diff --git a/src/hooks/useSettings.ts b/src/hooks/useSettings.ts index 56d0c62f..03f03c85 100644 --- a/src/hooks/useSettings.ts +++ b/src/hooks/useSettings.ts @@ -48,6 +48,7 @@ export interface ApiKeySettings { mistralApiKey: string; customTranscriptionApiKey: string; customReasoningApiKey: string; + sonioxApiKey: string; } export interface PrivacySettings { @@ -213,6 +214,8 @@ function useSettingsInternal() { setGeminiApiKey: store.setGeminiApiKey, setGroqApiKey: store.setGroqApiKey, setMistralApiKey: store.setMistralApiKey, + sonioxApiKey: store.sonioxApiKey, + setSonioxApiKey: store.setSonioxApiKey, customTranscriptionApiKey: store.customTranscriptionApiKey, setCustomTranscriptionApiKey: store.setCustomTranscriptionApiKey, customReasoningApiKey: store.customReasoningApiKey, diff --git a/src/locales/de/translation.json b/src/locales/de/translation.json index 3bb3a6dd..655e6c71 100644 --- a/src/locales/de/translation.json +++ b/src/locales/de/translation.json @@ -1341,7 +1341,8 @@ "openai_whisper_1": "Originales Whisper-Modell", "groq_whisper_large_v3": "Hochpräzise Spracherkennung", "groq_whisper_large_v3_turbo": "216x Echtzeitgeschwindigkeit", - "mistral_voxtral_mini_latest": "Schnelle mehrsprachige Transkription" + "mistral_voxtral_mini_latest": "Schnelle mehrsprachige Transkription", + "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" }, "cloud": { "openai_gpt_5_2": "Neuestes Flaggschiff-Modell für Reasoning", diff --git a/src/locales/en/translation.json b/src/locales/en/translation.json index 9cfa4475..4a48d27b 100644 --- a/src/locales/en/translation.json +++ b/src/locales/en/translation.json @@ -1411,7 +1411,8 @@ "openai_whisper_1": "Original Whisper model", "groq_whisper_large_v3": "High accuracy speech recognition", "groq_whisper_large_v3_turbo": "216x real-time speed", - "mistral_voxtral_mini_latest": "Fast multilingual transcription" + "mistral_voxtral_mini_latest": "Fast multilingual transcription", + "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" }, "cloud": { "openai_gpt_5_2": "Latest flagship reasoning model", diff --git a/src/locales/es/translation.json b/src/locales/es/translation.json index 65c8f091..3cf3bddf 100644 --- a/src/locales/es/translation.json +++ b/src/locales/es/translation.json @@ -1341,7 +1341,8 @@ "openai_whisper_1": "Modelo Whisper original", "groq_whisper_large_v3": "Reconocimiento de voz de alta precisión", "groq_whisper_large_v3_turbo": "Velocidad 216x en tiempo real", - "mistral_voxtral_mini_latest": "Transcripción multilingüe rápida" + "mistral_voxtral_mini_latest": "Transcripción multilingüe rápida", + "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" }, "cloud": { "openai_gpt_5_2": "Modelo insignia de razonamiento más reciente", diff --git a/src/locales/fr/translation.json b/src/locales/fr/translation.json index 50b0ef89..3fdc0c0d 100644 --- a/src/locales/fr/translation.json +++ b/src/locales/fr/translation.json @@ -1341,7 +1341,8 @@ "openai_whisper_1": "Modèle Whisper original", "groq_whisper_large_v3": "Reconnaissance vocale haute précision", "groq_whisper_large_v3_turbo": "Vitesse 216x en temps réel", - "mistral_voxtral_mini_latest": "Transcription multilingue rapide" + "mistral_voxtral_mini_latest": "Transcription multilingue rapide", + "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" }, "cloud": { "openai_gpt_5_2": "Dernier modèle phare pour le raisonnement", diff --git a/src/locales/it/translation.json b/src/locales/it/translation.json index 5577e040..1123958b 100644 --- a/src/locales/it/translation.json +++ b/src/locales/it/translation.json @@ -1341,7 +1341,8 @@ "openai_whisper_1": "Modello Whisper originale", "groq_whisper_large_v3_turbo": "Velocità 216x in tempo reale", "groq_whisper_large_v3": "Modello Large v3, veloce e preciso", - "mistral_voxtral_mini_latest": "Trascrizione multilingue veloce" + "mistral_voxtral_mini_latest": "Trascrizione multilingue veloce", + "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" }, "cloud": { "openai_gpt_5_2": "Ultimo modello di punta per il ragionamento", diff --git a/src/locales/ja/translation.json b/src/locales/ja/translation.json index 6584d4fe..185b240a 100644 --- a/src/locales/ja/translation.json +++ b/src/locales/ja/translation.json @@ -1341,7 +1341,8 @@ "openai_whisper_1": "オリジナル Whisper モデル", "groq_whisper_large_v3_turbo": "リアルタイムの 216 倍速", "groq_whisper_large_v3": "Large v3 モデル、高速かつ高精度", - "mistral_voxtral_mini_latest": "高速多言語文字起こし" + "mistral_voxtral_mini_latest": "高速多言語文字起こし", + "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" }, "cloud": { "openai_gpt_5_2": "最新のフラッグシップ推論モデル", diff --git a/src/locales/pt/translation.json b/src/locales/pt/translation.json index d49d8e62..f3c113c3 100644 --- a/src/locales/pt/translation.json +++ b/src/locales/pt/translation.json @@ -1313,7 +1313,8 @@ "openai_whisper_1": "Modelo Whisper original", "groq_whisper_large_v3_turbo": "Velocidade 216x em tempo real", "groq_whisper_large_v3": "Modelo Large v3, rápido e preciso", - "mistral_voxtral_mini_latest": "Transcrição multilíngue rápida" + "mistral_voxtral_mini_latest": "Transcrição multilíngue rápida", + "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" }, "cloud": { "openai_gpt_5_2": "Modelo principal de raciocínio mais recente", diff --git a/src/locales/ru/translation.json b/src/locales/ru/translation.json index 0f98f327..9f8af60a 100644 --- a/src/locales/ru/translation.json +++ b/src/locales/ru/translation.json @@ -1341,7 +1341,8 @@ "openai_whisper_1": "Оригинальная модель Whisper", "groq_whisper_large_v3_turbo": "Скорость в 216 раз быстрее реального времени", "groq_whisper_large_v3": "Модель Large v3, быстрая и точная", - "mistral_voxtral_mini_latest": "Быстрая многоязычная транскрипция" + "mistral_voxtral_mini_latest": "Быстрая многоязычная транскрипция", + "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" }, "cloud": { "openai_gpt_5_2": "Новейшая флагманская модель с рассуждением", diff --git a/src/locales/zh-CN/translation.json b/src/locales/zh-CN/translation.json index b12e1c62..b290917f 100644 --- a/src/locales/zh-CN/translation.json +++ b/src/locales/zh-CN/translation.json @@ -1341,7 +1341,8 @@ "openai_whisper_1": "原始 Whisper 模型", "groq_whisper_large_v3_turbo": "216 倍实时速度", "groq_whisper_large_v3": "Large v3 模型,快速且精准", - "mistral_voxtral_mini_latest": "快速多语言转录" + "mistral_voxtral_mini_latest": "快速多语言转录", + "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" }, "cloud": { "openai_gpt_5_2": "最新旗舰推理模型", diff --git a/src/locales/zh-TW/translation.json b/src/locales/zh-TW/translation.json index 80c45443..78dc8099 100644 --- a/src/locales/zh-TW/translation.json +++ b/src/locales/zh-TW/translation.json @@ -1341,7 +1341,8 @@ "openai_whisper_1": "原始 Whisper 模型", "groq_whisper_large_v3_turbo": "216 倍即時速度", "groq_whisper_large_v3": "Large v3 模型,快速且精準", - "mistral_voxtral_mini_latest": "快速多語言轉錄" + "mistral_voxtral_mini_latest": "快速多語言轉錄", + "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" }, "cloud": { "openai_gpt_5_2": "最新旗艦推理模型", diff --git a/src/models/modelRegistryData.json b/src/models/modelRegistryData.json index 96fc97b1..b68d3c6c 100644 --- a/src/models/modelRegistryData.json +++ b/src/models/modelRegistryData.json @@ -159,6 +159,19 @@ "descriptionKey": "models.descriptions.transcription.mistral_voxtral_mini_latest" } ] + }, + { + "id": "soniox", + "name": "Soniox", + "baseUrl": "wss://stt-rt.soniox.com", + "models": [ + { + "id": "stt-rt-v4", + "name": "Soniox STT RT v4", + "description": "Real-time streaming speech-to-text with high accuracy", + "descriptionKey": "models.descriptions.transcription.soniox_stt_rt_v4" + } + ] } ], "cloudProviders": [ diff --git a/src/stores/settingsStore.ts b/src/stores/settingsStore.ts index bfb84313..6b5e37d4 100644 --- a/src/stores/settingsStore.ts +++ b/src/stores/settingsStore.ts @@ -131,6 +131,7 @@ export interface SettingsState setGeminiApiKey: (key: string) => void; setGroqApiKey: (key: string) => void; setMistralApiKey: (key: string) => void; + setSonioxApiKey: (key: string) => void; setCustomTranscriptionApiKey: (key: string) => void; setCustomReasoningApiKey: (key: string) => void; @@ -252,6 +253,7 @@ export const useSettingsStore = create()((set, get) => ({ geminiApiKey: readString("geminiApiKey", ""), groqApiKey: readString("groqApiKey", ""), mistralApiKey: readString("mistralApiKey", ""), + sonioxApiKey: readString("sonioxApiKey", ""), customTranscriptionApiKey: readString("customTranscriptionApiKey", ""), customReasoningApiKey: readString("customReasoningApiKey", ""), @@ -392,6 +394,12 @@ export const useSettingsStore = create()((set, get) => ({ window.electronAPI?.saveMistralKey?.(key); invalidateApiKeyCaches("mistral"); }, + setSonioxApiKey: (key: string) => { + if (isBrowser) localStorage.setItem("sonioxApiKey", key); + set({ sonioxApiKey: key }); + window.electronAPI?.saveSonioxKey?.(key); + invalidateApiKeyCaches(); + }, setCustomTranscriptionApiKey: (key: string) => { if (isBrowser) localStorage.setItem("customTranscriptionApiKey", key); set({ customTranscriptionApiKey: key }); @@ -546,6 +554,7 @@ export const useSettingsStore = create()((set, get) => ({ if (keys.geminiApiKey !== undefined) s.setGeminiApiKey(keys.geminiApiKey); if (keys.groqApiKey !== undefined) s.setGroqApiKey(keys.groqApiKey); if (keys.mistralApiKey !== undefined) s.setMistralApiKey(keys.mistralApiKey); + if (keys.sonioxApiKey !== undefined) s.setSonioxApiKey(keys.sonioxApiKey); if (keys.customTranscriptionApiKey !== undefined) s.setCustomTranscriptionApiKey(keys.customTranscriptionApiKey); if (keys.customReasoningApiKey !== undefined) @@ -632,6 +641,10 @@ export async function initializeSettings(): Promise { const envKey = await window.electronAPI.getMistralKey?.(); if (envKey) createStringSetter("mistralApiKey")(envKey); } + if (!state.sonioxApiKey) { + const envKey = await window.electronAPI.getSonioxKey?.(); + if (envKey) createStringSetter("sonioxApiKey")(envKey); + } if (!state.customTranscriptionApiKey) { const envKey = await window.electronAPI.getCustomTranscriptionKey?.(); if (envKey) createStringSetter("customTranscriptionApiKey")(envKey); diff --git a/src/types/electron.ts b/src/types/electron.ts index 4acdfd90..59db3c0e 100644 --- a/src/types/electron.ts +++ b/src/types/electron.ts @@ -677,6 +677,10 @@ declare global { // Mistral API key management getMistralKey: () => Promise; saveMistralKey: (key: string) => Promise; + + // Soniox API key management + getSonioxKey?: () => Promise; + saveSonioxKey?: (key: string) => Promise; proxyMistralTranscription: (data: { audioBuffer: ArrayBuffer; model?: string; @@ -1146,6 +1150,28 @@ declare global { onDictationRealtimeError?: (callback: (error: string) => void) => () => void; onDictationRealtimeSessionEnd?: (callback: (data: { text: string }) => void) => () => void; + // Soniox streaming + sonioxStreamingWarmup?: (options?: { + apiKey?: string; + model?: string; + language?: string; + }) => Promise<{ success: boolean; error?: string }>; + sonioxStreamingStart?: (options?: { + apiKey?: string; + model?: string; + language?: string; + }) => Promise<{ success: boolean; error?: string }>; + sonioxStreamingSend?: (audioBuffer: ArrayBuffer) => void; + sonioxStreamingFinalize?: () => void; + sonioxStreamingStop?: () => Promise<{ success: boolean; text?: string }>; + sonioxStreamingStatus?: () => Promise<{ connected: boolean }>; + onSonioxPartialTranscript?: (callback: (text: string) => void) => () => void; + onSonioxFinalTranscript?: (callback: (text: string) => void) => () => void; + onSonioxError?: (callback: (error: string) => void) => () => void; + onSonioxSessionEnd?: ( + callback: (data: { text?: string }) => void + ) => () => void; + // Desktop audio capture getDesktopSources?: (types: string[]) => Promise>; diff --git a/src/utils/byokDetection.ts b/src/utils/byokDetection.ts index a7b989d5..bce4e74e 100644 --- a/src/utils/byokDetection.ts +++ b/src/utils/byokDetection.ts @@ -3,5 +3,6 @@ export const hasStoredByokKey = () => localStorage.getItem("openaiApiKey") || localStorage.getItem("groqApiKey") || localStorage.getItem("mistralApiKey") || + localStorage.getItem("sonioxApiKey") || localStorage.getItem("customTranscriptionApiKey") ); diff --git a/src/utils/providerIcons.ts b/src/utils/providerIcons.ts index 3be07a38..fb89f408 100644 --- a/src/utils/providerIcons.ts +++ b/src/utils/providerIcons.ts @@ -22,6 +22,8 @@ export const PROVIDER_ICONS: Record = { "openai-oss": openaiOssIcon, gemma: gemmaIcon, }; +// Soniox has no icon asset yet; getProviderIcon returns +// undefined, and ProviderIcon.tsx falls back to a Brain icon. export function getProviderIcon(provider: string): string | undefined { return PROVIDER_ICONS[provider]; From 69e5e529fd5453e05696cee71b2ecc522aad989a Mon Sep 17 00:00:00 2001 From: Haz Date: Tue, 10 Mar 2026 17:55:14 +0100 Subject: [PATCH 2/9] fix(soniox): unify UI with other providers, add icon and translations - Remove Soniox-specific render branch in TranscriptionModelPicker, use same ModelCardList + API key maps as OpenAI/Groq/Mistral - Replace hardcoded "stt-rt-v4" in UI with registry-based model selection - Add Soniox "S" icon SVG (from official wordmark) - Translate soniox_stt_rt_v4 model description in 9 locale files --- src/assets/icons/providers/soniox.svg | 1 + src/components/TranscriptionModelPicker.tsx | 76 +++------------------ src/locales/de/translation.json | 2 +- src/locales/es/translation.json | 2 +- src/locales/fr/translation.json | 2 +- src/locales/it/translation.json | 2 +- src/locales/ja/translation.json | 2 +- src/locales/pt/translation.json | 2 +- src/locales/ru/translation.json | 2 +- src/locales/zh-CN/translation.json | 2 +- src/locales/zh-TW/translation.json | 2 +- src/utils/providerIcons.ts | 4 +- 12 files changed, 21 insertions(+), 78 deletions(-) create mode 100644 src/assets/icons/providers/soniox.svg diff --git a/src/assets/icons/providers/soniox.svg b/src/assets/icons/providers/soniox.svg new file mode 100644 index 00000000..2df2916e --- /dev/null +++ b/src/assets/icons/providers/soniox.svg @@ -0,0 +1 @@ + diff --git a/src/components/TranscriptionModelPicker.tsx b/src/components/TranscriptionModelPicker.tsx index e811db3f..a453f67d 100644 --- a/src/components/TranscriptionModelPicker.tsx +++ b/src/components/TranscriptionModelPicker.tsx @@ -400,7 +400,6 @@ export default function TranscriptionModelPicker({ } } else if ( selectedCloudProvider !== "custom" && - selectedCloudProvider !== "soniox" && !selectedCloudModel ) { const provider = cloudProviders.find( @@ -409,11 +408,6 @@ export default function TranscriptionModelPicker({ if (provider?.models?.length) { onCloudModelSelect(provider.models[0].id); } - } else if ( - selectedCloudProvider === "soniox" && - selectedCloudModel !== "stt-rt-v4" - ) { - onCloudModelSelect("stt-rt-v4"); } }, [ cloudProviders, @@ -553,7 +547,12 @@ export default function TranscriptionModelPicker({ } if (providerId === "soniox") { - onCloudModelSelect("stt-rt-v4"); + const sonioxProvider = cloudProviders.find( + (p) => p.id === "soniox" + ); + if (sonioxProvider?.models?.length) { + onCloudModelSelect(sonioxProvider.models[0].id); + } return; } @@ -900,66 +899,6 @@ export default function TranscriptionModelPicker({ />
- ) : selectedCloudProvider === "soniox" ? ( -
-
-
- - -
- {})} - placeholder={t( - "transcription.sonioxApiKeyPlaceholder", - { - defaultValue: - "Enter your Soniox API key", - } - )} - label="" - helpText="" - /> -
- -
- -
- - - stt-rt-v4 - - - {t( - "transcription.sonioxModelDesc", - { - defaultValue: - "Real-time streaming STT", - } - )} - -
-
-
) : (
@@ -974,6 +913,7 @@ export default function TranscriptionModelPicker({ groq: "https://console.groq.com/keys", mistral: "https://console.mistral.ai/api-keys", openai: "https://platform.openai.com/api-keys", + soniox: "https://console.soniox.com/", }[selectedCloudProvider] || "https://platform.openai.com/api-keys" )} @@ -988,6 +928,7 @@ export default function TranscriptionModelPicker({ groq: groqApiKey, mistral: mistralApiKey, openai: openaiApiKey, + soniox: sonioxApiKey, }[selectedCloudProvider] || openaiApiKey } @@ -996,6 +937,7 @@ export default function TranscriptionModelPicker({ groq: setGroqApiKey, mistral: setMistralApiKey, openai: setOpenaiApiKey, + soniox: setSonioxApiKey, }[selectedCloudProvider] || setOpenaiApiKey } diff --git a/src/locales/de/translation.json b/src/locales/de/translation.json index 655e6c71..9f6f2df4 100644 --- a/src/locales/de/translation.json +++ b/src/locales/de/translation.json @@ -1342,7 +1342,7 @@ "groq_whisper_large_v3": "Hochpräzise Spracherkennung", "groq_whisper_large_v3_turbo": "216x Echtzeitgeschwindigkeit", "mistral_voxtral_mini_latest": "Schnelle mehrsprachige Transkription", - "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" + "soniox_stt_rt_v4": "Echtzeit-Spracherkennung mit hoher Genauigkeit" }, "cloud": { "openai_gpt_5_2": "Neuestes Flaggschiff-Modell für Reasoning", diff --git a/src/locales/es/translation.json b/src/locales/es/translation.json index 3cf3bddf..ee19244c 100644 --- a/src/locales/es/translation.json +++ b/src/locales/es/translation.json @@ -1342,7 +1342,7 @@ "groq_whisper_large_v3": "Reconocimiento de voz de alta precisión", "groq_whisper_large_v3_turbo": "Velocidad 216x en tiempo real", "mistral_voxtral_mini_latest": "Transcripción multilingüe rápida", - "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" + "soniox_stt_rt_v4": "Transcripción de voz en tiempo real con alta precisión" }, "cloud": { "openai_gpt_5_2": "Modelo insignia de razonamiento más reciente", diff --git a/src/locales/fr/translation.json b/src/locales/fr/translation.json index 3fdc0c0d..713c688c 100644 --- a/src/locales/fr/translation.json +++ b/src/locales/fr/translation.json @@ -1342,7 +1342,7 @@ "groq_whisper_large_v3": "Reconnaissance vocale haute précision", "groq_whisper_large_v3_turbo": "Vitesse 216x en temps réel", "mistral_voxtral_mini_latest": "Transcription multilingue rapide", - "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" + "soniox_stt_rt_v4": "Transcription vocale en temps réel avec une grande précision" }, "cloud": { "openai_gpt_5_2": "Dernier modèle phare pour le raisonnement", diff --git a/src/locales/it/translation.json b/src/locales/it/translation.json index 1123958b..2918306a 100644 --- a/src/locales/it/translation.json +++ b/src/locales/it/translation.json @@ -1342,7 +1342,7 @@ "groq_whisper_large_v3_turbo": "Velocità 216x in tempo reale", "groq_whisper_large_v3": "Modello Large v3, veloce e preciso", "mistral_voxtral_mini_latest": "Trascrizione multilingue veloce", - "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" + "soniox_stt_rt_v4": "Trascrizione vocale in tempo reale con elevata precisione" }, "cloud": { "openai_gpt_5_2": "Ultimo modello di punta per il ragionamento", diff --git a/src/locales/ja/translation.json b/src/locales/ja/translation.json index 185b240a..f46fd7e3 100644 --- a/src/locales/ja/translation.json +++ b/src/locales/ja/translation.json @@ -1342,7 +1342,7 @@ "groq_whisper_large_v3_turbo": "リアルタイムの 216 倍速", "groq_whisper_large_v3": "Large v3 モデル、高速かつ高精度", "mistral_voxtral_mini_latest": "高速多言語文字起こし", - "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" + "soniox_stt_rt_v4": "高精度リアルタイムストリーミング音声認識" }, "cloud": { "openai_gpt_5_2": "最新のフラッグシップ推論モデル", diff --git a/src/locales/pt/translation.json b/src/locales/pt/translation.json index f3c113c3..d226d6a6 100644 --- a/src/locales/pt/translation.json +++ b/src/locales/pt/translation.json @@ -1314,7 +1314,7 @@ "groq_whisper_large_v3_turbo": "Velocidade 216x em tempo real", "groq_whisper_large_v3": "Modelo Large v3, rápido e preciso", "mistral_voxtral_mini_latest": "Transcrição multilíngue rápida", - "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" + "soniox_stt_rt_v4": "Transcrição de voz em tempo real com alta precisão" }, "cloud": { "openai_gpt_5_2": "Modelo principal de raciocínio mais recente", diff --git a/src/locales/ru/translation.json b/src/locales/ru/translation.json index 9f8af60a..0afb414a 100644 --- a/src/locales/ru/translation.json +++ b/src/locales/ru/translation.json @@ -1342,7 +1342,7 @@ "groq_whisper_large_v3_turbo": "Скорость в 216 раз быстрее реального времени", "groq_whisper_large_v3": "Модель Large v3, быстрая и точная", "mistral_voxtral_mini_latest": "Быстрая многоязычная транскрипция", - "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" + "soniox_stt_rt_v4": "Распознавание речи в реальном времени с высокой точностью" }, "cloud": { "openai_gpt_5_2": "Новейшая флагманская модель с рассуждением", diff --git a/src/locales/zh-CN/translation.json b/src/locales/zh-CN/translation.json index b290917f..cebb2f8c 100644 --- a/src/locales/zh-CN/translation.json +++ b/src/locales/zh-CN/translation.json @@ -1342,7 +1342,7 @@ "groq_whisper_large_v3_turbo": "216 倍实时速度", "groq_whisper_large_v3": "Large v3 模型,快速且精准", "mistral_voxtral_mini_latest": "快速多语言转录", - "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" + "soniox_stt_rt_v4": "高精度实时流式语音转文字" }, "cloud": { "openai_gpt_5_2": "最新旗舰推理模型", diff --git a/src/locales/zh-TW/translation.json b/src/locales/zh-TW/translation.json index 78dc8099..0f244912 100644 --- a/src/locales/zh-TW/translation.json +++ b/src/locales/zh-TW/translation.json @@ -1342,7 +1342,7 @@ "groq_whisper_large_v3_turbo": "216 倍即時速度", "groq_whisper_large_v3": "Large v3 模型,快速且精準", "mistral_voxtral_mini_latest": "快速多語言轉錄", - "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy" + "soniox_stt_rt_v4": "高精度即時串流語音轉文字" }, "cloud": { "openai_gpt_5_2": "最新旗艦推理模型", diff --git a/src/utils/providerIcons.ts b/src/utils/providerIcons.ts index fb89f408..da742a0c 100644 --- a/src/utils/providerIcons.ts +++ b/src/utils/providerIcons.ts @@ -8,6 +8,7 @@ import groqIcon from "@/assets/icons/providers/groq.svg"; import nvidiaIcon from "@/assets/icons/providers/nvidia.svg"; import openaiOssIcon from "@/assets/icons/providers/openai-oss.svg"; import gemmaIcon from "@/assets/icons/providers/gemma.svg"; +import sonioxIcon from "@/assets/icons/providers/soniox.svg"; export const PROVIDER_ICONS: Record = { openai: openaiIcon, @@ -21,9 +22,8 @@ export const PROVIDER_ICONS: Record = { nvidia: nvidiaIcon, "openai-oss": openaiOssIcon, gemma: gemmaIcon, + soniox: sonioxIcon, }; -// Soniox has no icon asset yet; getProviderIcon returns -// undefined, and ProviderIcon.tsx falls back to a Brain icon. export function getProviderIcon(provider: string): string | undefined { return PROVIDER_ICONS[provider]; From 74598f93baea99735bfd152b86dc1a1c25faa994 Mon Sep 17 00:00:00 2001 From: Haz Date: Tue, 10 Mar 2026 20:13:34 +0100 Subject: [PATCH 3/9] fix(soniox): skip duplicate finalize in disconnect when already sent When audioManager calls finalize() before disconnect(), the server has already received it. Sending it again in drainFinalTokens() caused a 3s timeout waiting for a response that would never come. Track finalize state with _finalizeSent flag and skip the redundant call. --- src/helpers/sonioxStreaming.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/helpers/sonioxStreaming.js b/src/helpers/sonioxStreaming.js index 6174723d..500fb372 100644 --- a/src/helpers/sonioxStreaming.js +++ b/src/helpers/sonioxStreaming.js @@ -26,6 +26,7 @@ class SonioxStreaming { this.isDisconnecting = false; this.audioBytesSent = 0; this.closeResolve = null; + this._finalizeSent = false; } getFullTranscript() { @@ -46,6 +47,7 @@ class SonioxStreaming { this.audioBytesSent = 0; this.coldStartBuffer = []; this.coldStartBufferSize = 0; + this._finalizeSent = false; debugLogger.debug("Soniox connecting", { model: model || "stt-rt-v4", language }); @@ -209,6 +211,7 @@ class SonioxStreaming { finalize() { if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return false; + this._finalizeSent = true; this.ws.send(JSON.stringify({ type: "finalize" })); debugLogger.debug("Soniox finalize sent"); return true; @@ -249,7 +252,9 @@ class SonioxStreaming { this.isDisconnecting = true; if (this.ws.readyState === WebSocket.OPEN && this.audioBytesSent > 0) { - await this.drainFinalTokens(); + if (!this._finalizeSent) { + await this.drainFinalTokens(); + } await this.drainSessionEnd(); } From 98b81eef0d6f5a7bcccd7075ed9c6fe52bcfcc26 Mon Sep 17 00:00:00 2001 From: Haz Date: Tue, 10 Mar 2026 20:29:29 +0100 Subject: [PATCH 4/9] fix(soniox): make warmup a no-op for cold-start-only design Soniox connects in ~250ms, no benefit from keeping an idle WebSocket between dictation sessions. Avoids unnecessary Soniox session usage and potential idle timeout issues. --- src/helpers/ipcHandlers.js | 32 +++----------------------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/src/helpers/ipcHandlers.js b/src/helpers/ipcHandlers.js index a36a199c..14aafe71 100644 --- a/src/helpers/ipcHandlers.js +++ b/src/helpers/ipcHandlers.js @@ -2433,35 +2433,9 @@ class IPCHandlers { }); // --- Soniox streaming --- - ipcMain.handle("soniox-streaming-warmup", async (event, options = {}) => { - try { - const apiKey = options.apiKey || this.environmentManager.getSonioxKey(); - if (!apiKey) { - return { success: false, error: "Soniox API key not configured", code: "NO_API" }; - } - - if (this._sonioxStreaming?.isConnected) { - await this._sonioxStreaming.disconnect(); - } - this._sonioxStreaming = new SonioxStreaming(); - this._sonioxStreaming.onPartialTranscript = (text) => - event.sender.send("soniox-streaming-partial", text); - this._sonioxStreaming.onFinalTranscript = (text) => - event.sender.send("soniox-streaming-final", text); - this._sonioxStreaming.onError = (err) => - event.sender.send("soniox-streaming-error", err.message); - this._sonioxStreaming.onSessionEnd = (data) => - event.sender.send("soniox-streaming-session-end", data || {}); - - await this._sonioxStreaming.connect({ - apiKey, - model: options.model || "stt-rt-v4", - language: options.language, - }); - return { success: true }; - } catch (err) { - return { success: false, error: err.message }; - } + // Soniox cold-starts fast (~250ms), no warmup needed. + ipcMain.handle("soniox-streaming-warmup", async () => { + return { success: true }; }); ipcMain.handle("soniox-streaming-start", async (event, options = {}) => { From 936e7208d54b9d09e1e6c906a8da5419341fe9b7 Mon Sep 17 00:00:00 2001 From: Haz Date: Tue, 10 Mar 2026 20:44:24 +0100 Subject: [PATCH 5/9] refactor(soniox): remove dead code and redundant special-case - Remove closeResolve (never assigned, close handler check unreachable) - Use getFullTranscript() instead of inline .map().join() duplicate - Remove soniox special-case in handleCloudProviderChange (generic path handles it) --- src/components/TranscriptionModelPicker.tsx | 10 ---------- src/helpers/sonioxStreaming.js | 7 +------ 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/src/components/TranscriptionModelPicker.tsx b/src/components/TranscriptionModelPicker.tsx index a453f67d..e6befc66 100644 --- a/src/components/TranscriptionModelPicker.tsx +++ b/src/components/TranscriptionModelPicker.tsx @@ -546,16 +546,6 @@ export default function TranscriptionModelPicker({ return; } - if (providerId === "soniox") { - const sonioxProvider = cloudProviders.find( - (p) => p.id === "soniox" - ); - if (sonioxProvider?.models?.length) { - onCloudModelSelect(sonioxProvider.models[0].id); - } - return; - } - if (provider) { setCloudTranscriptionBaseUrl?.(provider.baseUrl); if (provider.models?.length) { diff --git a/src/helpers/sonioxStreaming.js b/src/helpers/sonioxStreaming.js index 500fb372..f08e6b86 100644 --- a/src/helpers/sonioxStreaming.js +++ b/src/helpers/sonioxStreaming.js @@ -25,7 +25,6 @@ class SonioxStreaming { this.keepAliveInterval = null; this.isDisconnecting = false; this.audioBytesSent = 0; - this.closeResolve = null; this._finalizeSent = false; } @@ -114,9 +113,6 @@ class SonioxStreaming { this.pendingReject = null; this.pendingResolve = null; } - if (this.closeResolve) { - this.closeResolve({ text: this.getFullTranscript() }); - } this.cleanup(); if (wasActive && !this.isDisconnecting) { this.onSessionEnd?.({ text: this.getFullTranscript() }); @@ -159,7 +155,7 @@ class SonioxStreaming { } } - const finalText = this.finalTokens.map((t) => t.text).join(""); + const finalText = this.getFullTranscript(); this.currentNonFinalText = nonFinalTexts.join(""); this.onPartialTranscript?.(finalText + this.currentNonFinalText); @@ -337,7 +333,6 @@ class SonioxStreaming { } this.isConnected = false; - this.closeResolve = null; } } From fdd73847dd8757c7e1bda1ac50cff0133bcfccfe Mon Sep 17 00:00:00 2001 From: Haz Date: Tue, 10 Mar 2026 21:25:20 +0100 Subject: [PATCH 6/9] feat(soniox): add secondary language hints for mixed-language speech MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Soniox supports multi-language transcription via language_hints array. Add a secondary language selector in the Soniox provider tab so users can hint a second language (e.g. Polish + English) for code-switching. - New sonioxSecondaryLanguage setting in store/hook - LanguageSelector dropdown in Soniox tab (inline layout) - Disabled when primary language is auto (no bias needed) - Language codes normalized to base form (en-US → en) - i18n keys added for all 10 locales --- src/components/SettingsPage.tsx | 10 +++++++ src/components/TranscriptionModelPicker.tsx | 30 +++++++++++++++++++++ src/helpers/audioManager.js | 2 ++ src/helpers/ipcHandlers.js | 1 + src/helpers/sonioxStreaming.js | 9 ++++--- src/hooks/useSettings.ts | 3 +++ src/locales/de/translation.json | 4 ++- src/locales/en/translation.json | 4 ++- src/locales/es/translation.json | 4 ++- src/locales/fr/translation.json | 4 ++- src/locales/it/translation.json | 4 ++- src/locales/ja/translation.json | 4 ++- src/locales/pt/translation.json | 4 ++- src/locales/ru/translation.json | 4 ++- src/locales/zh-CN/translation.json | 4 ++- src/locales/zh-TW/translation.json | 4 ++- src/stores/settingsStore.ts | 3 +++ 17 files changed, 84 insertions(+), 14 deletions(-) diff --git a/src/components/SettingsPage.tsx b/src/components/SettingsPage.tsx index 909192c1..f2a5129f 100644 --- a/src/components/SettingsPage.tsx +++ b/src/components/SettingsPage.tsx @@ -174,6 +174,8 @@ interface TranscriptionSectionProps { setMistralApiKey: (key: string) => void; sonioxApiKey: string; setSonioxApiKey: (key: string) => void; + sonioxSecondaryLanguage: string; + setSonioxSecondaryLanguage: (lang: string) => void; customTranscriptionApiKey: string; setCustomTranscriptionApiKey: (key: string) => void; cloudTranscriptionBaseUrl?: string; @@ -211,6 +213,8 @@ function TranscriptionSection({ setMistralApiKey, sonioxApiKey, setSonioxApiKey, + sonioxSecondaryLanguage, + setSonioxSecondaryLanguage, customTranscriptionApiKey, setCustomTranscriptionApiKey, cloudTranscriptionBaseUrl, @@ -389,6 +393,8 @@ function TranscriptionSection({ setMistralApiKey={setMistralApiKey} sonioxApiKey={sonioxApiKey} setSonioxApiKey={setSonioxApiKey} + sonioxSecondaryLanguage={sonioxSecondaryLanguage} + setSonioxSecondaryLanguage={setSonioxSecondaryLanguage} customTranscriptionApiKey={customTranscriptionApiKey} setCustomTranscriptionApiKey={setCustomTranscriptionApiKey} cloudTranscriptionBaseUrl={cloudTranscriptionBaseUrl} @@ -684,6 +690,8 @@ export default function SettingsPage({ activeSection = "general" }: SettingsPage setMistralApiKey, sonioxApiKey, setSonioxApiKey, + sonioxSecondaryLanguage, + setSonioxSecondaryLanguage, customTranscriptionApiKey, setCustomTranscriptionApiKey, customReasoningApiKey, @@ -2680,6 +2688,8 @@ EOF`, setMistralApiKey={setMistralApiKey} sonioxApiKey={sonioxApiKey} setSonioxApiKey={setSonioxApiKey} + sonioxSecondaryLanguage={sonioxSecondaryLanguage} + setSonioxSecondaryLanguage={setSonioxSecondaryLanguage} customTranscriptionApiKey={customTranscriptionApiKey} setCustomTranscriptionApiKey={setCustomTranscriptionApiKey} cloudTranscriptionBaseUrl={cloudTranscriptionBaseUrl} diff --git a/src/components/TranscriptionModelPicker.tsx b/src/components/TranscriptionModelPicker.tsx index e6befc66..cdee8ffe 100644 --- a/src/components/TranscriptionModelPicker.tsx +++ b/src/components/TranscriptionModelPicker.tsx @@ -8,6 +8,8 @@ import { ProviderTabs } from "./ui/ProviderTabs"; import ModelCardList from "./ui/ModelCardList"; import { DownloadProgressBar } from "./ui/DownloadProgressBar"; import ApiKeyInput from "./ui/ApiKeyInput"; +import LanguageSelector, { type LanguageOption } from "./ui/LanguageSelector"; +import languageRegistry from "../config/languageRegistry.json"; import { ConfirmDialog } from "./ui/dialog"; import { useDialogs } from "../hooks/useDialogs"; import { useModelDownload, type DownloadProgress } from "../hooks/useModelDownload"; @@ -26,6 +28,7 @@ import { getProviderIcon, isMonochromeProvider } from "../utils/providerIcons"; import { API_ENDPOINTS, normalizeBaseUrl } from "../config/constants"; import { createExternalLinkHandler } from "../utils/externalLinks"; import { getCachedPlatform } from "../utils/platform"; +import { useSettingsStore } from "../stores/settingsStore"; import type { CudaWhisperStatus } from "../types/electron"; import logger from "../utils/logger"; @@ -201,12 +204,18 @@ interface TranscriptionModelPickerProps { setCustomTranscriptionApiKey?: (key: string) => void; sonioxApiKey?: string; setSonioxApiKey?: (key: string) => void; + sonioxSecondaryLanguage?: string; + setSonioxSecondaryLanguage?: (lang: string) => void; cloudTranscriptionBaseUrl?: string; setCloudTranscriptionBaseUrl?: (url: string) => void; className?: string; variant?: "onboarding" | "settings"; } +const SECONDARY_LANGUAGE_OPTIONS: LanguageOption[] = languageRegistry.languages + .filter((l) => l.code !== "auto") + .map(({ code, label, flag }) => ({ value: code, label, flag })); + const CLOUD_PROVIDER_TABS = [ { id: "openai", name: "OpenAI" }, { id: "groq", name: "Groq", recommended: true }, @@ -279,12 +288,16 @@ export default function TranscriptionModelPicker({ setCustomTranscriptionApiKey, sonioxApiKey = "", setSonioxApiKey, + sonioxSecondaryLanguage = "", + setSonioxSecondaryLanguage, cloudTranscriptionBaseUrl = "", setCloudTranscriptionBaseUrl, className = "", variant = "settings", }: TranscriptionModelPickerProps) { const { t } = useTranslation(); + const preferredLanguage = useSettingsStore((s) => s.preferredLanguage); + const isAutoLanguage = !preferredLanguage || preferredLanguage === "auto"; const [localModels, setLocalModels] = useState([]); const [parakeetModels, setParakeetModels] = useState([]); const [internalLocalProvider, setInternalLocalProvider] = useState(selectedLocalProvider); @@ -936,6 +949,23 @@ export default function TranscriptionModelPicker({ />
+ {selectedCloudProvider === "soniox" && setSonioxSecondaryLanguage && ( +
+ + setSonioxSecondaryLanguage(value === "none" ? "" : value)} + options={[ + { value: "none", label: t("common.none"), flag: "" }, + ...SECONDARY_LANGUAGE_OPTIONS, + ]} + className="min-w-32" + /> +
+ )} +