{})}
+ setApiKey={
+ setCustomTranscriptionApiKey ||
+ (() => {})
+ }
label={t("transcription.apiKeyOptional")}
helpText=""
/>
@@ -857,7 +894,9 @@ export default function TranscriptionModelPicker({
onCloudModelSelect(e.target.value)}
+ onChange={(e) =>
+ onCloudModelSelect(e.target.value)
+ }
placeholder="whisper-1"
className="h-8 text-sm"
/>
@@ -877,7 +916,9 @@ export default function TranscriptionModelPicker({
groq: "https://console.groq.com/keys",
mistral: "https://console.mistral.ai/api-keys",
openai: "https://platform.openai.com/api-keys",
- }[selectedCloudProvider] || "https://platform.openai.com/api-keys"
+ soniox: "https://console.soniox.com/",
+ }[selectedCloudProvider] ||
+ "https://platform.openai.com/api-keys"
)}
className="text-xs text-primary/70 hover:text-primary transition-colors cursor-pointer"
>
@@ -886,22 +927,49 @@ export default function TranscriptionModelPicker({
+ {selectedCloudProvider === "soniox" && setSonioxSecondaryLanguage && (
+
+
+ setSonioxSecondaryLanguage(value === "none" ? "" : value)}
+ options={[
+ { value: "none", label: t("common.none"), flag: "" },
+ ...SECONDARY_LANGUAGE_OPTIONS,
+ ]}
+ className="min-w-32"
+ />
+
+ )}
+
-
+
window.electronAPI.onDictationRealtimeError(cb),
onSessionEnd: (cb) => window.electronAPI.onDictationRealtimeSessionEnd(cb),
},
+ soniox: {
+ warmup: (opts) => window.electronAPI.sonioxStreamingWarmup(opts),
+ start: (opts) => window.electronAPI.sonioxStreamingStart(opts),
+ send: (buf) => window.electronAPI.sonioxStreamingSend(buf),
+ finalize: () => window.electronAPI.sonioxStreamingFinalize(),
+ stop: () => window.electronAPI.sonioxStreamingStop(),
+ status: () => window.electronAPI.sonioxStreamingStatus(),
+ onPartial: (cb) => window.electronAPI.onSonioxPartialTranscript(cb),
+ onFinal: (cb) => window.electronAPI.onSonioxFinalTranscript(cb),
+ onError: (cb) => window.electronAPI.onSonioxError(cb),
+ onSessionEnd: (cb) => window.electronAPI.onSonioxSessionEnd(cb),
+ },
};
class AudioManager {
@@ -221,7 +233,10 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor);
}
getStreamingProvider() {
- const { cloudTranscriptionModel } = getSettings();
+ const { cloudTranscriptionProvider, cloudTranscriptionModel } = getSettings();
+ if (cloudTranscriptionProvider === "soniox") {
+ return STREAMING_PROVIDERS.soniox;
+ }
if (REALTIME_MODELS.has(cloudTranscriptionModel)) {
return STREAMING_PROVIDERS["openai-realtime"];
}
@@ -2005,6 +2020,11 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor);
const s = getSettings();
if (s.useLocalWhisper) return false;
+ // Soniox is always streaming (BYOK only)
+ if (s.cloudTranscriptionProvider === "soniox" && s.sonioxApiKey) {
+ return true;
+ }
+
if (REALTIME_MODELS.has(s.cloudTranscriptionModel)) {
if (s.cloudTranscriptionMode === "byok") return !!s.openaiApiKey;
if (s.cloudTranscriptionMode === "openwhispr") return !!(isSignedInOverride ?? s.isSignedIn);
@@ -2271,12 +2291,14 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor);
const result = await withSessionRefresh(async () => {
const {
preferredLanguage: preferredLang,
+ sonioxSecondaryLanguage,
cloudTranscriptionModel,
cloudTranscriptionMode,
} = getSettings();
const res = await provider.start({
sampleRate: 16000,
language: preferredLang && preferredLang !== "auto" ? preferredLang : undefined,
+ secondaryLanguage: preferredLang && preferredLang !== "auto" ? (sonioxSecondaryLanguage || undefined) : undefined,
keyterms: this.getKeyterms(),
model: cloudTranscriptionModel,
mode: cloudTranscriptionMode === "byok" ? "byok" : "openwhispr",
@@ -2577,6 +2599,7 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor);
}
}
+ finalText = finalText.trim();
if (finalText) {
const tBeforePaste = performance.now();
const clientTotalMs = Math.round(tBeforePaste - t0);
diff --git a/src/helpers/environment.js b/src/helpers/environment.js
index d11f24f8..590a97f5 100644
--- a/src/helpers/environment.js
+++ b/src/helpers/environment.js
@@ -10,6 +10,7 @@ const PERSISTED_KEYS = [
"GEMINI_API_KEY",
"GROQ_API_KEY",
"MISTRAL_API_KEY",
+ "SONIOX_API_KEY",
"CUSTOM_TRANSCRIPTION_API_KEY",
"CUSTOM_REASONING_API_KEY",
"LOCAL_TRANSCRIPTION_PROVIDER",
@@ -107,6 +108,14 @@ class EnvironmentManager {
return this._saveKey("MISTRAL_API_KEY", key);
}
+ getSonioxKey() {
+ return this._getKey("SONIOX_API_KEY");
+ }
+
+ saveSonioxKey(key) {
+ return this._saveKey("SONIOX_API_KEY", key);
+ }
+
getCustomTranscriptionKey() {
return this._getKey("CUSTOM_TRANSCRIPTION_API_KEY");
}
diff --git a/src/helpers/ipcHandlers.js b/src/helpers/ipcHandlers.js
index e872c521..d54940e1 100644
--- a/src/helpers/ipcHandlers.js
+++ b/src/helpers/ipcHandlers.js
@@ -10,6 +10,7 @@ const AssemblyAiStreaming = require("./assemblyAiStreaming");
const { i18nMain, changeLanguage } = require("./i18nMain");
const DeepgramStreaming = require("./deepgramStreaming");
const OpenAIRealtimeStreaming = require("./openaiRealtimeStreaming");
+const SonioxStreaming = require("./sonioxStreaming");
const AudioStorageManager = require("./audioStorage");
const MISTRAL_TRANSCRIPTION_URL = "https://api.mistral.ai/v1/audio/transcriptions";
@@ -108,6 +109,7 @@ class IPCHandlers {
this.deepgramStreaming = null;
this.openaiRealtimeStreaming = null;
this._dictationStreaming = null;
+ this._sonioxStreaming = null;
this._autoLearnEnabled = true; // Default on, synced from renderer
this._autoLearnDebounceTimer = null;
this._autoLearnLatestData = null;
@@ -146,6 +148,34 @@ class IPCHandlers {
}
}
+ cleanupAllStreaming() {
+ const backends = [
+ { name: "deepgram", instance: this.deepgramStreaming },
+ { name: "openai-realtime", instance: this.openaiRealtimeStreaming },
+ { name: "soniox", instance: this._sonioxStreaming },
+ { name: "dictation", instance: this._dictationStreaming },
+ { name: "assemblyai", instance: this.assemblyAiStreaming },
+ ];
+ for (const { name, instance } of backends) {
+ if (!instance) continue;
+ try {
+ if (typeof instance.cleanupAll === "function") {
+ instance.cleanupAll();
+ } else {
+ instance.cleanup();
+ }
+ debugLogger.debug(`Cleaned up ${name} streaming`);
+ } catch (err) {
+ debugLogger.debug(`Error cleaning up ${name} streaming`, { error: err.message });
+ }
+ }
+ this.deepgramStreaming = null;
+ this.openaiRealtimeStreaming = null;
+ this._sonioxStreaming = null;
+ this._dictationStreaming = null;
+ this.assemblyAiStreaming = null;
+ }
+
_setupAudioCleanup() {
const DEFAULT_RETENTION_DAYS = 30;
const SIX_HOURS_MS = 6 * 60 * 60 * 1000;
@@ -1424,6 +1454,14 @@ class IPCHandlers {
return this.environmentManager.saveMistralKey(key);
});
+ ipcMain.handle("get-soniox-key", async () => {
+ return this.environmentManager.getSonioxKey();
+ });
+
+ ipcMain.handle("save-soniox-key", async (event, key) => {
+ return this.environmentManager.saveSonioxKey(key);
+ });
+
ipcMain.handle(
"proxy-mistral-transcription",
async (event, { audioBuffer, model, language, contextBias }) => {
@@ -2277,12 +2315,18 @@ class IPCHandlers {
};
const setupDictationCallbacks = (streaming, event) => {
- streaming.onPartialTranscript = (text) =>
- event.sender.send("dictation-realtime-partial", text);
- streaming.onFinalTranscript = (text) => event.sender.send("dictation-realtime-final", text);
- streaming.onError = (err) => event.sender.send("dictation-realtime-error", err.message);
- streaming.onSessionEnd = (data) =>
- event.sender.send("dictation-realtime-session-end", data || {});
+ streaming.onPartialTranscript = (text) => {
+ if (!event.sender.isDestroyed()) event.sender.send("dictation-realtime-partial", text);
+ };
+ streaming.onFinalTranscript = (text) => {
+ if (!event.sender.isDestroyed()) event.sender.send("dictation-realtime-final", text);
+ };
+ streaming.onError = (err) => {
+ if (!event.sender.isDestroyed()) event.sender.send("dictation-realtime-error", err.message);
+ };
+ streaming.onSessionEnd = (data) => {
+ if (!event.sender.isDestroyed()) event.sender.send("dictation-realtime-session-end", data || {});
+ };
};
const connectDictationStreaming = async (event, options) => {
@@ -2422,6 +2466,69 @@ class IPCHandlers {
return { success: true, text: result.text || "" };
});
+ // --- Soniox streaming ---
+ // Soniox cold-starts fast (~250ms), no warmup needed.
+ ipcMain.handle("soniox-streaming-warmup", async () => {
+ return { success: true };
+ });
+
+ ipcMain.handle("soniox-streaming-start", async (event, options = {}) => {
+ try {
+ if (!this._sonioxStreaming?.isConnected) {
+ const apiKey = options.apiKey || this.environmentManager.getSonioxKey();
+ if (!apiKey) {
+ return { success: false, error: "Soniox API key not configured", code: "NO_API" };
+ }
+
+ // Cold start: create new connection
+ this._sonioxStreaming = new SonioxStreaming();
+ this._sonioxStreaming.onPartialTranscript = (text) => {
+ if (!event.sender.isDestroyed()) event.sender.send("soniox-streaming-partial", text);
+ };
+ this._sonioxStreaming.onFinalTranscript = (text) => {
+ if (!event.sender.isDestroyed()) event.sender.send("soniox-streaming-final", text);
+ };
+ this._sonioxStreaming.onError = (err) => {
+ if (!event.sender.isDestroyed()) event.sender.send("soniox-streaming-error", err.message);
+ };
+ this._sonioxStreaming.onSessionEnd = (data) => {
+ if (!event.sender.isDestroyed()) event.sender.send("soniox-streaming-session-end", data || {});
+ };
+
+ await this._sonioxStreaming.connect({
+ apiKey,
+ model: options.model || "stt-rt-v4",
+ language: options.language,
+ secondaryLanguage: options.secondaryLanguage,
+ });
+ }
+ return { success: true };
+ } catch (err) {
+ return { success: false, error: err.message };
+ }
+ });
+
+ ipcMain.on("soniox-streaming-send", (_event, audioBuffer) => {
+ this._sonioxStreaming?.sendAudio(Buffer.from(audioBuffer));
+ });
+
+ ipcMain.on("soniox-streaming-finalize", () => {
+ this._sonioxStreaming?.finalize();
+ });
+
+ ipcMain.handle("soniox-streaming-stop", async () => {
+ if (!this._sonioxStreaming) {
+ return { success: true, text: "" };
+ }
+ const result = await this._sonioxStreaming.disconnect().catch(() => ({ text: "" }));
+ this._sonioxStreaming = null;
+ return { success: true, text: result.text || "" };
+ });
+
+ ipcMain.handle("soniox-streaming-status", async () => {
+ return { connected: !!this._sonioxStreaming?.isConnected };
+ });
+
ipcMain.handle("update-transcription-text", async (_event, id, text, rawText) => {
try {
this.databaseManager.updateTranscriptionText(id, text, rawText);
diff --git a/src/helpers/sonioxStreaming.js b/src/helpers/sonioxStreaming.js
new file mode 100644
index 00000000..381d7cda
--- /dev/null
+++ b/src/helpers/sonioxStreaming.js
@@ -0,0 +1,375 @@
+const WebSocket = require("ws");
+const debugLogger = require("./debugLogger");
+
+const WEBSOCKET_TIMEOUT_MS = 15000;
+const DISCONNECT_TIMEOUT_MS = 3000;
+const KEEPALIVE_INTERVAL_MS = 5000;
+const KEEPALIVE_IDLE_LIMIT_MS = 30000; // Stop keepalive if no audio sent for 30s
+const COLD_START_BUFFER_MAX = 3 * 16000 * 2; // 3 seconds of 16-bit PCM at 16kHz
+const SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
+
+// Filler words / hesitations to strip from assembled text.
+// Soniox uses sub-word (BPE) tokenization, so fillers must be removed from the
+// joined text rather than individual tokens.
+const FILLER_WORD = "(?:uh+|um+|yyy+|eee+|mmm+|hmm+)";
+const FILLER_RE = new RegExp(`\\s*,?\\s*\\b${FILLER_WORD}\\b[,.]?\\s*`, "gi");
+const LEADING_FILLER_RE = new RegExp(`^\\s*,?\\s*\\b${FILLER_WORD}\\b`, "i");
+const POST_SENTENCE_CAP_RE = /([.!?]\s+)(\p{Ll})/gu;
+
+function removeFillers(text) {
+ const hadLeadingFiller = LEADING_FILLER_RE.test(text);
+ let result = text.replace(FILLER_RE, " ");
+ result = result.replace(/ +/g, " ").trim();
+ result = result.replace(POST_SENTENCE_CAP_RE, (_, punct, letter) =>
+ punct + letter.toUpperCase()
+ );
+ if (hadLeadingFiller) {
+ result = result.replace(/^\p{Ll}/u, (c) => c.toUpperCase());
+ }
+ return result;
+}
+
+class SonioxStreaming {
+ constructor() {
+ this.ws = null;
+ this.isConnected = false;
+ this.finalTokens = [];
+ this.currentNonFinalText = "";
+ this.coldStartBuffer = [];
+ this.coldStartBufferSize = 0;
+ this.onPartialTranscript = null;
+ this.onFinalTranscript = null;
+ this.onError = null;
+ this.onSessionEnd = null;
+ this.pendingResolve = null;
+ this.pendingReject = null;
+ this.connectionTimeout = null;
+ this.keepAliveInterval = null;
+ this.isDisconnecting = false;
+ this.audioBytesSent = 0;
+ this._finalizeSent = false;
+ this._lastAudioSentAt = 0;
+ }
+
+ getFullTranscript() {
+ return removeFillers(this.finalTokens.map((t) => t.text).join(""));
+ }
+
+ async connect(options = {}) {
+ const { apiKey, model, language, secondaryLanguage } = options;
+ if (!apiKey) throw new Error("Soniox API key is required");
+
+ if (this.isConnected) {
+ debugLogger.debug("Soniox already connected");
+ return;
+ }
+
+ this.finalTokens = [];
+ this.currentNonFinalText = "";
+ this.audioBytesSent = 0;
+ this.coldStartBuffer = [];
+ this.coldStartBufferSize = 0;
+ this._finalizeSent = false;
+
+ const toBase = (l) => l && l !== "auto" ? l.split("-")[0] : null;
+ const languageHints =
+ [toBase(language), toBase(secondaryLanguage)].filter(Boolean);
+
+ debugLogger.debug("Soniox connecting", { model: model || "stt-rt-v4", languageHints });
+
+ const configMessage = {
+ api_key: apiKey,
+ model: model || "stt-rt-v4",
+ audio_format: "pcm_s16le",
+ sample_rate: 16000,
+ num_channels: 1,
+ language_hints: languageHints,
+ };
+
+ return new Promise((resolve, reject) => {
+ this.pendingResolve = resolve;
+ this.pendingReject = reject;
+
+ this.connectionTimeout = setTimeout(() => {
+ this.cleanup();
+ reject(new Error("Soniox WebSocket connection timeout"));
+ }, WEBSOCKET_TIMEOUT_MS);
+
+ this.ws = new WebSocket(SONIOX_WS_URL);
+
+ this.ws.on("open", () => {
+ debugLogger.debug("Soniox WebSocket opened, sending config");
+ this.ws.send(JSON.stringify(configMessage));
+ this.startKeepAlive();
+ this.flushColdStartBuffer();
+
+ clearTimeout(this.connectionTimeout);
+ this.isConnected = true;
+ this.pendingResolve();
+ this.pendingResolve = null;
+ this.pendingReject = null;
+ });
+
+ this.ws.on("message", (data) => {
+ this.handleMessage(data);
+ });
+
+ this.ws.on("error", (error) => {
+ debugLogger.error("Soniox WebSocket error", { error: error.message });
+ this.cleanup();
+ if (this.pendingReject) {
+ this.pendingReject(error);
+ this.pendingReject = null;
+ this.pendingResolve = null;
+ }
+ this.onError?.(error);
+ });
+
+ this.ws.on("close", (code, reason) => {
+ const wasActive = this.isConnected;
+ debugLogger.debug("Soniox WebSocket closed", {
+ code,
+ reason: reason?.toString(),
+ wasActive,
+ });
+ if (this.pendingReject) {
+ this.pendingReject(new Error(`WebSocket closed before ready (code: ${code})`));
+ this.pendingReject = null;
+ this.pendingResolve = null;
+ }
+ this.cleanup();
+ if (wasActive && !this.isDisconnecting) {
+ this.onSessionEnd?.({ text: this.getFullTranscript() });
+ }
+ });
+ });
+ }
+
+ handleMessage(data) {
+ try {
+ const res = JSON.parse(data.toString());
+
+ if (res.error_code) {
+ debugLogger.error("Soniox error response", {
+ code: res.error_code,
+ message: res.error_message,
+ });
+ this.onError?.(new Error(`Soniox error ${res.error_code}: ${res.error_message}`));
+ return;
+ }
+
+ if (res.finished) {
+ debugLogger.debug("Soniox session finished", {
+ finalTokens: this.finalTokens.length,
+ textLength: this.getFullTranscript().length,
+ });
+ this.onSessionEnd?.({ text: this.getFullTranscript() });
+ return;
+ }
+
+ let nonFinalTexts = [];
+ let newFinalTokens = false;
+ for (const token of res.tokens || []) {
+ if (token.text === "") continue;
+ if (!token.text || !token.text.trim() || token.text === "\ufffd") continue;
+ if (token.is_final) {
+ this.finalTokens.push(token);
+ newFinalTokens = true;
+ } else {
+ nonFinalTexts.push(token.text);
+ }
+ }
+
+ const rawFinal = this.finalTokens.map((t) => t.text).join("");
+ this.currentNonFinalText = nonFinalTexts.join("");
+
+ this.onPartialTranscript?.(
+ removeFillers(rawFinal + this.currentNonFinalText)
+ );
+
+ if (newFinalTokens) {
+ this.onFinalTranscript?.(removeFillers(rawFinal));
+ }
+ } catch (err) {
+ debugLogger.error("Soniox message parse error", { error: err.message });
+ }
+ }
+
+ flushColdStartBuffer() {
+ if (this.coldStartBuffer.length === 0) return;
+
+ debugLogger.debug("Soniox flushing cold-start buffer", {
+ chunks: this.coldStartBuffer.length,
+ bytes: this.coldStartBufferSize,
+ });
+ for (const buf of this.coldStartBuffer) {
+ this.ws.send(buf);
+ this.audioBytesSent += buf.length;
+ }
+ this.coldStartBuffer = [];
+ this.coldStartBufferSize = 0;
+ }
+
+ sendAudio(pcmBuffer) {
+ if (!this.ws) return false;
+
+ if (
+ this.ws.readyState === WebSocket.CONNECTING &&
+ this.coldStartBufferSize < COLD_START_BUFFER_MAX
+ ) {
+ const copy = Buffer.from(pcmBuffer);
+ this.coldStartBuffer.push(copy);
+ this.coldStartBufferSize += copy.length;
+ return false;
+ }
+
+ if (this.ws.readyState !== WebSocket.OPEN) return false;
+
+ this.flushColdStartBuffer();
+ this.ws.send(pcmBuffer);
+ this.audioBytesSent += pcmBuffer.length;
+ this._lastAudioSentAt = Date.now();
+ return true;
+ }
+
+ finalize() {
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return false;
+
+ this._finalizeSent = true;
+ this.ws.send(JSON.stringify({ type: "finalize" }));
+ debugLogger.debug("Soniox finalize sent");
+ return true;
+ }
+
+ startKeepAlive() {
+ this.stopKeepAlive();
+ this._lastAudioSentAt = Date.now();
+ this.keepAliveInterval = setInterval(() => {
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
+ this.stopKeepAlive();
+ return;
+ }
+ if (Date.now() - this._lastAudioSentAt > KEEPALIVE_IDLE_LIMIT_MS) {
+ debugLogger.debug("Soniox idle timeout, closing connection");
+ this.cleanup();
+ this.onSessionEnd?.({ text: this.getFullTranscript() });
+ return;
+ }
+ try {
+ this.ws.send(JSON.stringify({ type: "keepalive" }));
+ } catch (err) {
+ debugLogger.debug("Soniox keep-alive failed", { error: err.message });
+ this.stopKeepAlive();
+ }
+ }, KEEPALIVE_INTERVAL_MS);
+ }
+
+ stopKeepAlive() {
+ if (this.keepAliveInterval) {
+ clearInterval(this.keepAliveInterval);
+ this.keepAliveInterval = null;
+ }
+ }
+
+ async disconnect() {
+ debugLogger.debug("Soniox disconnect", {
+ audioBytesSent: this.audioBytesSent,
+ finalTokens: this.finalTokens.length,
+ textLength: this.getFullTranscript().length,
+ });
+
+ if (!this.ws) return { text: this.getFullTranscript() };
+
+ this.isDisconnecting = true;
+
+ if (this.ws.readyState === WebSocket.OPEN && this.audioBytesSent > 0) {
+ if (!this._finalizeSent) {
+ await this.drainFinalTokens();
+ }
+ await this.drainSessionEnd();
+ }
+
+ if (this.ws) {
+ this.ws.close();
+ }
+
+ const result = { text: this.getFullTranscript() };
+ this.cleanup();
+ this.isDisconnecting = false;
+ return result;
+ }
+
+ drainFinalTokens() {
+ return new Promise((resolve) => {
+ const prevOnFinal = this.onFinalTranscript;
+
+ const tid = setTimeout(() => {
+ debugLogger.debug("Soniox finalize timeout, using accumulated text");
+ this.onFinalTranscript = prevOnFinal;
+ resolve();
+ }, DISCONNECT_TIMEOUT_MS);
+
+ this.onFinalTranscript = (text) => {
+ clearTimeout(tid);
+ this.onFinalTranscript = prevOnFinal;
+ prevOnFinal?.(text);
+ resolve();
+ };
+
+ try {
+ this.ws.send(JSON.stringify({ type: "finalize" }));
+ } catch {
+ clearTimeout(tid);
+ this.onFinalTranscript = prevOnFinal;
+ resolve();
+ }
+ });
+ }
+
+ drainSessionEnd() {
+ return new Promise((resolve) => {
+ const prevOnSessionEnd = this.onSessionEnd;
+
+ const tid = setTimeout(() => {
+ debugLogger.debug("Soniox session end timeout, closing");
+ this.onSessionEnd = prevOnSessionEnd;
+ resolve();
+ }, DISCONNECT_TIMEOUT_MS);
+
+ this.onSessionEnd = (result) => {
+ clearTimeout(tid);
+ this.onSessionEnd = prevOnSessionEnd;
+ prevOnSessionEnd?.(result);
+ resolve();
+ };
+
+ try {
+ this.ws.send("");
+ } catch {
+ clearTimeout(tid);
+ this.onSessionEnd = prevOnSessionEnd;
+ resolve();
+ }
+ });
+ }
+
+ cleanup() {
+ this.stopKeepAlive();
+ clearTimeout(this.connectionTimeout);
+ this.connectionTimeout = null;
+
+ if (this.ws) {
+ try {
+ this.ws.close();
+ } catch (err) {
+ // ignore
+ }
+ this.ws = null;
+ }
+
+ this.isConnected = false;
+ }
+}
+
+module.exports = SonioxStreaming;
+module.exports.removeFillers = removeFillers;
diff --git a/src/hooks/useSettings.ts b/src/hooks/useSettings.ts
index 56d0c62f..07c255db 100644
--- a/src/hooks/useSettings.ts
+++ b/src/hooks/useSettings.ts
@@ -14,6 +14,7 @@ export interface TranscriptionSettings {
allowLocalFallback: boolean;
fallbackWhisperModel: string;
preferredLanguage: string;
+ sonioxSecondaryLanguage: string;
cloudTranscriptionProvider: string;
cloudTranscriptionModel: string;
cloudTranscriptionBaseUrl?: string;
@@ -48,6 +49,7 @@ export interface ApiKeySettings {
mistralApiKey: string;
customTranscriptionApiKey: string;
customReasoningApiKey: string;
+ sonioxApiKey: string;
}
export interface PrivacySettings {
@@ -170,6 +172,8 @@ function useSettingsInternal() {
allowLocalFallback: store.allowLocalFallback,
fallbackWhisperModel: store.fallbackWhisperModel,
preferredLanguage: store.preferredLanguage,
+ sonioxSecondaryLanguage: store.sonioxSecondaryLanguage,
+ setSonioxSecondaryLanguage: store.setSonioxSecondaryLanguage,
cloudTranscriptionProvider: store.cloudTranscriptionProvider,
cloudTranscriptionModel: store.cloudTranscriptionModel,
cloudTranscriptionBaseUrl: store.cloudTranscriptionBaseUrl,
@@ -213,6 +217,8 @@ function useSettingsInternal() {
setGeminiApiKey: store.setGeminiApiKey,
setGroqApiKey: store.setGroqApiKey,
setMistralApiKey: store.setMistralApiKey,
+ sonioxApiKey: store.sonioxApiKey,
+ setSonioxApiKey: store.setSonioxApiKey,
customTranscriptionApiKey: store.customTranscriptionApiKey,
setCustomTranscriptionApiKey: store.setCustomTranscriptionApiKey,
customReasoningApiKey: store.customReasoningApiKey,
diff --git a/src/locales/de/translation.json b/src/locales/de/translation.json
index 3bb3a6dd..e9176f48 100644
--- a/src/locales/de/translation.json
+++ b/src/locales/de/translation.json
@@ -131,7 +131,9 @@
"private": "Privat",
"tap": "Tippen",
"close": "Schließen",
- "dismiss": "Verwerfen"
+ "dismiss": "Verwerfen",
+ "none": "Keine",
+ "secondaryLanguage": "Zweitsprache"
},
"onboarding": {
"steps": {
@@ -1341,7 +1343,8 @@
"openai_whisper_1": "Originales Whisper-Modell",
"groq_whisper_large_v3": "Hochpräzise Spracherkennung",
"groq_whisper_large_v3_turbo": "216x Echtzeitgeschwindigkeit",
- "mistral_voxtral_mini_latest": "Schnelle mehrsprachige Transkription"
+ "mistral_voxtral_mini_latest": "Schnelle mehrsprachige Transkription",
+ "soniox_stt_rt_v4": "Echtzeit-Spracherkennung mit hoher Genauigkeit"
},
"cloud": {
"openai_gpt_5_2": "Neuestes Flaggschiff-Modell für Reasoning",
diff --git a/src/locales/en/translation.json b/src/locales/en/translation.json
index 9cfa4475..e69b3ac1 100644
--- a/src/locales/en/translation.json
+++ b/src/locales/en/translation.json
@@ -131,7 +131,9 @@
"private": "Private",
"tap": "Tap",
"close": "Close",
- "dismiss": "Dismiss"
+ "dismiss": "Dismiss",
+ "none": "None",
+ "secondaryLanguage": "Secondary language"
},
"onboarding": {
"steps": {
@@ -1411,7 +1413,8 @@
"openai_whisper_1": "Original Whisper model",
"groq_whisper_large_v3": "High accuracy speech recognition",
"groq_whisper_large_v3_turbo": "216x real-time speed",
- "mistral_voxtral_mini_latest": "Fast multilingual transcription"
+ "mistral_voxtral_mini_latest": "Fast multilingual transcription",
+ "soniox_stt_rt_v4": "Real-time streaming speech-to-text with high accuracy"
},
"cloud": {
"openai_gpt_5_2": "Latest flagship reasoning model",
diff --git a/src/locales/es/translation.json b/src/locales/es/translation.json
index 65c8f091..f991a894 100644
--- a/src/locales/es/translation.json
+++ b/src/locales/es/translation.json
@@ -131,7 +131,9 @@
"private": "Privado",
"tap": "Pulsar",
"close": "Cerrar",
- "dismiss": "Descartar"
+ "dismiss": "Descartar",
+ "none": "Ninguno",
+ "secondaryLanguage": "Idioma secundario"
},
"onboarding": {
"steps": {
@@ -1341,7 +1343,8 @@
"openai_whisper_1": "Modelo Whisper original",
"groq_whisper_large_v3": "Reconocimiento de voz de alta precisión",
"groq_whisper_large_v3_turbo": "Velocidad 216x en tiempo real",
- "mistral_voxtral_mini_latest": "Transcripción multilingüe rápida"
+ "mistral_voxtral_mini_latest": "Transcripción multilingüe rápida",
+ "soniox_stt_rt_v4": "Transcripción de voz en tiempo real con alta precisión"
},
"cloud": {
"openai_gpt_5_2": "Modelo insignia de razonamiento más reciente",
diff --git a/src/locales/fr/translation.json b/src/locales/fr/translation.json
index 50b0ef89..6c95989f 100644
--- a/src/locales/fr/translation.json
+++ b/src/locales/fr/translation.json
@@ -131,7 +131,9 @@
"private": "Privé",
"tap": "Appui",
"close": "Fermer",
- "dismiss": "Ignorer"
+ "dismiss": "Ignorer",
+ "none": "Aucun",
+ "secondaryLanguage": "Langue secondaire"
},
"onboarding": {
"steps": {
@@ -1341,7 +1343,8 @@
"openai_whisper_1": "Modèle Whisper original",
"groq_whisper_large_v3": "Reconnaissance vocale haute précision",
"groq_whisper_large_v3_turbo": "Vitesse 216x en temps réel",
- "mistral_voxtral_mini_latest": "Transcription multilingue rapide"
+ "mistral_voxtral_mini_latest": "Transcription multilingue rapide",
+ "soniox_stt_rt_v4": "Transcription vocale en temps réel avec une grande précision"
},
"cloud": {
"openai_gpt_5_2": "Dernier modèle phare pour le raisonnement",
diff --git a/src/locales/it/translation.json b/src/locales/it/translation.json
index 5577e040..fb904976 100644
--- a/src/locales/it/translation.json
+++ b/src/locales/it/translation.json
@@ -131,7 +131,9 @@
"private": "Privato",
"tap": "Tocca",
"close": "Chiudi",
- "dismiss": "Ignora"
+ "dismiss": "Ignora",
+ "none": "Nessuno",
+ "secondaryLanguage": "Lingua secondaria"
},
"onboarding": {
"steps": {
@@ -1341,7 +1343,8 @@
"openai_whisper_1": "Modello Whisper originale",
"groq_whisper_large_v3_turbo": "Velocità 216x in tempo reale",
"groq_whisper_large_v3": "Modello Large v3, veloce e preciso",
- "mistral_voxtral_mini_latest": "Trascrizione multilingue veloce"
+ "mistral_voxtral_mini_latest": "Trascrizione multilingue veloce",
+ "soniox_stt_rt_v4": "Trascrizione vocale in tempo reale con elevata precisione"
},
"cloud": {
"openai_gpt_5_2": "Ultimo modello di punta per il ragionamento",
diff --git a/src/locales/ja/translation.json b/src/locales/ja/translation.json
index 6584d4fe..ddc87729 100644
--- a/src/locales/ja/translation.json
+++ b/src/locales/ja/translation.json
@@ -131,7 +131,9 @@
"private": "プライベート",
"tap": "タップ",
"close": "閉じる",
- "dismiss": "閉じる"
+ "dismiss": "閉じる",
+ "none": "なし",
+ "secondaryLanguage": "補助言語"
},
"onboarding": {
"steps": {
@@ -1341,7 +1343,8 @@
"openai_whisper_1": "オリジナル Whisper モデル",
"groq_whisper_large_v3_turbo": "リアルタイムの 216 倍速",
"groq_whisper_large_v3": "Large v3 モデル、高速かつ高精度",
- "mistral_voxtral_mini_latest": "高速多言語文字起こし"
+ "mistral_voxtral_mini_latest": "高速多言語文字起こし",
+ "soniox_stt_rt_v4": "高精度リアルタイムストリーミング音声認識"
},
"cloud": {
"openai_gpt_5_2": "最新のフラッグシップ推論モデル",
diff --git a/src/locales/pt/translation.json b/src/locales/pt/translation.json
index d49d8e62..c745dbec 100644
--- a/src/locales/pt/translation.json
+++ b/src/locales/pt/translation.json
@@ -103,7 +103,9 @@
"private": "Privado",
"tap": "Toque",
"close": "Fechar",
- "dismiss": "Dispensar"
+ "dismiss": "Dispensar",
+ "none": "Nenhum",
+ "secondaryLanguage": "Idioma secundário"
},
"onboarding": {
"steps": {
@@ -1313,7 +1315,8 @@
"openai_whisper_1": "Modelo Whisper original",
"groq_whisper_large_v3_turbo": "Velocidade 216x em tempo real",
"groq_whisper_large_v3": "Modelo Large v3, rápido e preciso",
- "mistral_voxtral_mini_latest": "Transcrição multilíngue rápida"
+ "mistral_voxtral_mini_latest": "Transcrição multilíngue rápida",
+ "soniox_stt_rt_v4": "Transcrição de voz em tempo real com alta precisão"
},
"cloud": {
"openai_gpt_5_2": "Modelo principal de raciocínio mais recente",
diff --git a/src/locales/ru/translation.json b/src/locales/ru/translation.json
index 0f98f327..27c8fa88 100644
--- a/src/locales/ru/translation.json
+++ b/src/locales/ru/translation.json
@@ -131,7 +131,9 @@
"private": "Приватный",
"tap": "Нажатие",
"close": "Закрыть",
- "dismiss": "Скрыть"
+ "dismiss": "Скрыть",
+ "none": "Нет",
+ "secondaryLanguage": "Второй язык"
},
"onboarding": {
"steps": {
@@ -1341,7 +1343,8 @@
"openai_whisper_1": "Оригинальная модель Whisper",
"groq_whisper_large_v3_turbo": "Скорость в 216 раз быстрее реального времени",
"groq_whisper_large_v3": "Модель Large v3, быстрая и точная",
- "mistral_voxtral_mini_latest": "Быстрая многоязычная транскрипция"
+ "mistral_voxtral_mini_latest": "Быстрая многоязычная транскрипция",
+ "soniox_stt_rt_v4": "Распознавание речи в реальном времени с высокой точностью"
},
"cloud": {
"openai_gpt_5_2": "Новейшая флагманская модель с рассуждением",
diff --git a/src/locales/zh-CN/translation.json b/src/locales/zh-CN/translation.json
index b12e1c62..c550ef13 100644
--- a/src/locales/zh-CN/translation.json
+++ b/src/locales/zh-CN/translation.json
@@ -131,7 +131,9 @@
"private": "隐私",
"tap": "点按",
"close": "关闭",
- "dismiss": "忽略"
+ "dismiss": "忽略",
+ "none": "无",
+ "secondaryLanguage": "辅助语言"
},
"onboarding": {
"steps": {
@@ -1341,7 +1343,8 @@
"openai_whisper_1": "原始 Whisper 模型",
"groq_whisper_large_v3_turbo": "216 倍实时速度",
"groq_whisper_large_v3": "Large v3 模型,快速且精准",
- "mistral_voxtral_mini_latest": "快速多语言转录"
+ "mistral_voxtral_mini_latest": "快速多语言转录",
+ "soniox_stt_rt_v4": "高精度实时流式语音转文字"
},
"cloud": {
"openai_gpt_5_2": "最新旗舰推理模型",
diff --git a/src/locales/zh-TW/translation.json b/src/locales/zh-TW/translation.json
index 80c45443..993add9a 100644
--- a/src/locales/zh-TW/translation.json
+++ b/src/locales/zh-TW/translation.json
@@ -131,7 +131,9 @@
"private": "私密",
"tap": "點按",
"close": "關閉",
- "dismiss": "忽略"
+ "dismiss": "忽略",
+ "none": "無",
+ "secondaryLanguage": "輔助語言"
},
"onboarding": {
"steps": {
@@ -1341,7 +1343,8 @@
"openai_whisper_1": "原始 Whisper 模型",
"groq_whisper_large_v3_turbo": "216 倍即時速度",
"groq_whisper_large_v3": "Large v3 模型,快速且精準",
- "mistral_voxtral_mini_latest": "快速多語言轉錄"
+ "mistral_voxtral_mini_latest": "快速多語言轉錄",
+ "soniox_stt_rt_v4": "高精度即時串流語音轉文字"
},
"cloud": {
"openai_gpt_5_2": "最新旗艦推理模型",
diff --git a/src/models/modelRegistryData.json b/src/models/modelRegistryData.json
index 96fc97b1..b68d3c6c 100644
--- a/src/models/modelRegistryData.json
+++ b/src/models/modelRegistryData.json
@@ -159,6 +159,19 @@
"descriptionKey": "models.descriptions.transcription.mistral_voxtral_mini_latest"
}
]
+ },
+ {
+ "id": "soniox",
+ "name": "Soniox",
+ "baseUrl": "wss://stt-rt.soniox.com",
+ "models": [
+ {
+ "id": "stt-rt-v4",
+ "name": "Soniox STT RT v4",
+ "description": "Real-time streaming speech-to-text with high accuracy",
+ "descriptionKey": "models.descriptions.transcription.soniox_stt_rt_v4"
+ }
+ ]
}
],
"cloudProviders": [
diff --git a/src/stores/settingsStore.ts b/src/stores/settingsStore.ts
index bfb84313..bd9b9b79 100644
--- a/src/stores/settingsStore.ts
+++ b/src/stores/settingsStore.ts
@@ -113,6 +113,7 @@ export interface SettingsState
setAllowLocalFallback: (value: boolean) => void;
setFallbackWhisperModel: (value: string) => void;
setPreferredLanguage: (value: string) => void;
+ setSonioxSecondaryLanguage: (value: string) => void;
setCloudTranscriptionProvider: (value: string) => void;
setCloudTranscriptionModel: (value: string) => void;
setCloudTranscriptionBaseUrl: (value: string) => void;
@@ -131,6 +132,7 @@ export interface SettingsState
setGeminiApiKey: (key: string) => void;
setGroqApiKey: (key: string) => void;
setMistralApiKey: (key: string) => void;
+ setSonioxApiKey: (key: string) => void;
setCustomTranscriptionApiKey: (key: string) => void;
setCustomReasoningApiKey: (key: string) => void;
@@ -228,6 +230,7 @@ export const useSettingsStore = create()((set, get) => ({
allowLocalFallback: readBoolean("allowLocalFallback", false),
fallbackWhisperModel: readString("fallbackWhisperModel", "base"),
preferredLanguage: readString("preferredLanguage", "auto"),
+ sonioxSecondaryLanguage: readString("sonioxSecondaryLanguage", ""),
cloudTranscriptionProvider: readString("cloudTranscriptionProvider", "openai"),
cloudTranscriptionModel: readString("cloudTranscriptionModel", "gpt-4o-mini-transcribe"),
cloudTranscriptionBaseUrl: readString(
@@ -252,6 +255,7 @@ export const useSettingsStore = create()((set, get) => ({
geminiApiKey: readString("geminiApiKey", ""),
groqApiKey: readString("groqApiKey", ""),
mistralApiKey: readString("mistralApiKey", ""),
+ sonioxApiKey: readString("sonioxApiKey", ""),
customTranscriptionApiKey: readString("customTranscriptionApiKey", ""),
customReasoningApiKey: readString("customReasoningApiKey", ""),
@@ -323,6 +327,7 @@ export const useSettingsStore = create()((set, get) => ({
setAllowLocalFallback: createBooleanSetter("allowLocalFallback"),
setFallbackWhisperModel: createStringSetter("fallbackWhisperModel"),
setPreferredLanguage: createStringSetter("preferredLanguage"),
+ setSonioxSecondaryLanguage: createStringSetter("sonioxSecondaryLanguage"),
setCloudTranscriptionProvider: createStringSetter("cloudTranscriptionProvider"),
setCloudTranscriptionModel: createStringSetter("cloudTranscriptionModel"),
setCloudTranscriptionBaseUrl: createStringSetter("cloudTranscriptionBaseUrl"),
@@ -392,6 +397,12 @@ export const useSettingsStore = create()((set, get) => ({
window.electronAPI?.saveMistralKey?.(key);
invalidateApiKeyCaches("mistral");
},
+ setSonioxApiKey: (key: string) => {
+ if (isBrowser) localStorage.setItem("sonioxApiKey", key);
+ set({ sonioxApiKey: key });
+ window.electronAPI?.saveSonioxKey?.(key);
+ invalidateApiKeyCaches();
+ },
setCustomTranscriptionApiKey: (key: string) => {
if (isBrowser) localStorage.setItem("customTranscriptionApiKey", key);
set({ customTranscriptionApiKey: key });
@@ -546,6 +557,7 @@ export const useSettingsStore = create()((set, get) => ({
if (keys.geminiApiKey !== undefined) s.setGeminiApiKey(keys.geminiApiKey);
if (keys.groqApiKey !== undefined) s.setGroqApiKey(keys.groqApiKey);
if (keys.mistralApiKey !== undefined) s.setMistralApiKey(keys.mistralApiKey);
+ if (keys.sonioxApiKey !== undefined) s.setSonioxApiKey(keys.sonioxApiKey);
if (keys.customTranscriptionApiKey !== undefined)
s.setCustomTranscriptionApiKey(keys.customTranscriptionApiKey);
if (keys.customReasoningApiKey !== undefined)
@@ -632,6 +644,10 @@ export async function initializeSettings(): Promise {
const envKey = await window.electronAPI.getMistralKey?.();
if (envKey) createStringSetter("mistralApiKey")(envKey);
}
+ if (!state.sonioxApiKey) {
+ const envKey = await window.electronAPI.getSonioxKey?.();
+ if (envKey) createStringSetter("sonioxApiKey")(envKey);
+ }
if (!state.customTranscriptionApiKey) {
const envKey = await window.electronAPI.getCustomTranscriptionKey?.();
if (envKey) createStringSetter("customTranscriptionApiKey")(envKey);
diff --git a/src/types/electron.ts b/src/types/electron.ts
index 4acdfd90..59db3c0e 100644
--- a/src/types/electron.ts
+++ b/src/types/electron.ts
@@ -677,6 +677,10 @@ declare global {
// Mistral API key management
getMistralKey: () => Promise;
saveMistralKey: (key: string) => Promise;
+
+ // Soniox API key management
+ getSonioxKey?: () => Promise;
+ saveSonioxKey?: (key: string) => Promise;
proxyMistralTranscription: (data: {
audioBuffer: ArrayBuffer;
model?: string;
@@ -1146,6 +1150,28 @@ declare global {
onDictationRealtimeError?: (callback: (error: string) => void) => () => void;
onDictationRealtimeSessionEnd?: (callback: (data: { text: string }) => void) => () => void;
+ // Soniox streaming
+ sonioxStreamingWarmup?: (options?: {
+ apiKey?: string;
+ model?: string;
+ language?: string;
+ }) => Promise<{ success: boolean; error?: string }>;
+ sonioxStreamingStart?: (options?: {
+ apiKey?: string;
+ model?: string;
+ language?: string;
+ }) => Promise<{ success: boolean; error?: string }>;
+ sonioxStreamingSend?: (audioBuffer: ArrayBuffer) => void;
+ sonioxStreamingFinalize?: () => void;
+ sonioxStreamingStop?: () => Promise<{ success: boolean; text?: string }>;
+ sonioxStreamingStatus?: () => Promise<{ connected: boolean }>;
+ onSonioxPartialTranscript?: (callback: (text: string) => void) => () => void;
+ onSonioxFinalTranscript?: (callback: (text: string) => void) => () => void;
+ onSonioxError?: (callback: (error: string) => void) => () => void;
+ onSonioxSessionEnd?: (
+ callback: (data: { text?: string }) => void
+ ) => () => void;
+
// Desktop audio capture
getDesktopSources?: (types: string[]) => Promise>;
diff --git a/src/utils/byokDetection.ts b/src/utils/byokDetection.ts
index a7b989d5..bce4e74e 100644
--- a/src/utils/byokDetection.ts
+++ b/src/utils/byokDetection.ts
@@ -3,5 +3,6 @@ export const hasStoredByokKey = () =>
localStorage.getItem("openaiApiKey") ||
localStorage.getItem("groqApiKey") ||
localStorage.getItem("mistralApiKey") ||
+ localStorage.getItem("sonioxApiKey") ||
localStorage.getItem("customTranscriptionApiKey")
);
diff --git a/src/utils/providerIcons.ts b/src/utils/providerIcons.ts
index 3be07a38..da742a0c 100644
--- a/src/utils/providerIcons.ts
+++ b/src/utils/providerIcons.ts
@@ -8,6 +8,7 @@ import groqIcon from "@/assets/icons/providers/groq.svg";
import nvidiaIcon from "@/assets/icons/providers/nvidia.svg";
import openaiOssIcon from "@/assets/icons/providers/openai-oss.svg";
import gemmaIcon from "@/assets/icons/providers/gemma.svg";
+import sonioxIcon from "@/assets/icons/providers/soniox.svg";
export const PROVIDER_ICONS: Record = {
openai: openaiIcon,
@@ -21,6 +22,7 @@ export const PROVIDER_ICONS: Record = {
nvidia: nvidiaIcon,
"openai-oss": openaiOssIcon,
gemma: gemmaIcon,
+ soniox: sonioxIcon,
};
export function getProviderIcon(provider: string): string | undefined {
diff --git a/tests/helpers/sonioxStreaming.test.js b/tests/helpers/sonioxStreaming.test.js
new file mode 100644
index 00000000..49ef70d2
--- /dev/null
+++ b/tests/helpers/sonioxStreaming.test.js
@@ -0,0 +1,154 @@
+const { describe, it } = require("node:test");
+const assert = require("node:assert/strict");
+const { removeFillers } = require("../../src/helpers/sonioxStreaming");
+
+describe("removeFillers", () => {
+ it("passes through normal text unchanged", () => {
+ assert.equal(removeFillers("Hello world."), "Hello world.");
+ });
+
+ it("removes filler mid-sentence", () => {
+ assert.equal(removeFillers("I uh think so"), "I think so");
+ });
+
+ it("removes filler with trailing comma mid-sentence", () => {
+ assert.equal(removeFillers("I, um, think so"), "I think so");
+ });
+
+ it("removes filler after period and capitalizes next word", () => {
+ assert.equal(
+ removeFillers("done. Yyy, let me check"),
+ "done. Let me check"
+ );
+ });
+
+ it("removes filler after question mark and capitalizes", () => {
+ assert.equal(
+ removeFillers("right? Mmm, or maybe not"),
+ "right? Or maybe not"
+ );
+ });
+
+ it("removes filler after exclamation mark and capitalizes", () => {
+ assert.equal(
+ removeFillers("wow! Um, that was great"),
+ "wow! That was great"
+ );
+ });
+
+ it("removes standalone filler sentence (Hmm.)", () => {
+ assert.equal(
+ removeFillers("really? Hmm. Maybe so."),
+ "really? Maybe so."
+ );
+ });
+
+ it("removes multiple fillers in one text", () => {
+ assert.equal(
+ removeFillers("OK so let's try. Yyy, does it work? Mmm, or not? Eee, let me check again."),
+ "OK so let's try. Does it work? Or not? Let me check again."
+ );
+ });
+
+ it("removes filler at start of text and capitalizes", () => {
+ assert.equal(removeFillers("Uh, so anyway"), "So anyway");
+ });
+
+ it("removes filler at end of text", () => {
+ assert.equal(removeFillers("That's all um"), "That's all");
+ });
+
+ it("removes consecutive fillers", () => {
+ assert.equal(removeFillers("Well uh um ok"), "Well ok");
+ });
+
+ it("handles text with only fillers", () => {
+ assert.equal(removeFillers("Uh um mmm"), "");
+ });
+
+ it("handles empty string", () => {
+ assert.equal(removeFillers(""), "");
+ });
+
+ it("is case-insensitive", () => {
+ assert.equal(removeFillers("So UH yeah"), "So yeah");
+ assert.equal(removeFillers("So UHH yeah"), "So yeah");
+ assert.equal(removeFillers("So YYY yeah"), "So yeah");
+ });
+
+ it("handles filler variations with repeated letters", () => {
+ assert.equal(removeFillers("So uhhh yeah"), "So yeah");
+ assert.equal(removeFillers("So ummm yeah"), "So yeah");
+ assert.equal(removeFillers("So hmmm yeah"), "So yeah");
+ assert.equal(removeFillers("So eeeee yeah"), "So yeah");
+ assert.equal(removeFillers("So yyyy yeah"), "So yeah");
+ });
+
+ // False positive protection: real words must NOT be removed
+
+ it("preserves real words containing filler substrings", () => {
+ assert.equal(removeFillers("The umbrella is here."), "The umbrella is here.");
+ assert.equal(removeFillers("She is human."), "She is human.");
+ assert.equal(removeFillers("Check the ohms."), "Check the ohms.");
+ assert.equal(removeFillers("It is yummy."), "It is yummy.");
+ assert.equal(removeFillers("Hot summer day."), "Hot summer day.");
+ });
+
+ it("preserves 'Oh' as a real exclamation", () => {
+ assert.equal(removeFillers("Oh really?"), "Oh really?");
+ assert.equal(removeFillers("Oh, that is nice."), "Oh, that is nice.");
+ assert.equal(removeFillers("oh no!"), "oh no!");
+ assert.equal(removeFillers("Hello. Oh, nice!"), "Hello. Oh, nice!");
+ });
+
+ it("preserves 'Ah' as a real exclamation", () => {
+ assert.equal(removeFillers("Ah, I see."), "Ah, I see.");
+ assert.equal(removeFillers("done. Ah, great."), "done. Ah, great.");
+ });
+
+ it("preserves short tokens like 'ee' and 'hm'", () => {
+ assert.equal(removeFillers("I see ee in the code"), "I see ee in the code");
+ assert.equal(removeFillers("Hm, interesting."), "Hm, interesting.");
+ });
+
+ // Unicode capitalization
+
+ it("capitalizes Unicode letters after filler at sentence boundary", () => {
+ assert.equal(
+ removeFillers("tak. Yyy, ćwiczenie drugie. Eee, ósmy punkt. Mmm, świetnie"),
+ "tak. Ćwiczenie drugie. Ósmy punkt. Świetnie"
+ );
+ });
+
+ it("capitalizes accented Latin letters after filler", () => {
+ assert.equal(
+ removeFillers("bien. Um, él sabe"),
+ "bien. Él sabe"
+ );
+ });
+
+ it("capitalizes Cyrillic letters after filler", () => {
+ assert.equal(
+ removeFillers("done. Uhh, это работает"),
+ "done. Это работает"
+ );
+ });
+
+ it("does not capitalize mid-sentence after filler removal", () => {
+ assert.equal(removeFillers("I uh think so"), "I think so");
+ assert.equal(removeFillers("let's um try"), "let's try");
+ });
+
+ it("does not capitalize first letter when no leading filler was removed", () => {
+ assert.equal(removeFillers("iPhone is great"), "iPhone is great");
+ });
+
+ // Realistic Soniox output
+
+ it("handles realistic Soniox output with sub-word assembled fillers", () => {
+ assert.equal(
+ removeFillers("No dobra, to robimy test. Yyy, w takim razie: czy to będzie działać? Hmm. A może jednak nie będzie działać? Hmm. Ciekawe, czy to zadziała."),
+ "No dobra, to robimy test. W takim razie: czy to będzie działać? A może jednak nie będzie działać? Ciekawe, czy to zadziała."
+ );
+ });
+});