diff --git a/build.gradle b/build.gradle index 79c62096fc..7b3c86eeed 100644 --- a/build.gradle +++ b/build.gradle @@ -328,6 +328,10 @@ android { res.srcDirs = ['java/unstable/res', translationsWithoutEngValues('translations/devbuild')] } + test { + java.srcDirs = ['java/test'] + } + androidTest { res.srcDirs = ['tests/res'] java.srcDirs = ['tests/src'] diff --git a/java/res/values/strings-uix.xml b/java/res/values/strings-uix.xml index 6627fea9f4..5a33ae51c0 100644 --- a/java/res/values/strings-uix.xml +++ b/java/res/values/strings-uix.xml @@ -538,6 +538,26 @@ Models To change the models, visit Languages & Models menu + + Dictation Commands + Replace spoken phrases like \"new line\" or \"dollar sign\" with symbols and formatting + Formatting + New line, new paragraph, tab, numeral, no space on/off + Capitalization + Caps on/off, all caps + Punctuation & Brackets + Quotes, brackets, parentheses, dash, ellipsis + Symbols + Ampersand, asterisk, at sign, hashtag, etc. + Math Symbols + Equal, plus, minus, greater than, less than + Currency Symbols + Dollar, euro, pound, yen, cent + Emoticons + Smiley, frowny, winky face + Intellectual Property Marks + Copyright, registered, trademark + Text Prediction Transformer LM diff --git a/java/src/org/futo/inputmethod/latin/uix/VoiceInputSettingKeys.kt b/java/src/org/futo/inputmethod/latin/uix/VoiceInputSettingKeys.kt index 8eb4d8e18e..8d2cbe06d0 100644 --- a/java/src/org/futo/inputmethod/latin/uix/VoiceInputSettingKeys.kt +++ b/java/src/org/futo/inputmethod/latin/uix/VoiceInputSettingKeys.kt @@ -67,4 +67,50 @@ val LANGUAGE_TOGGLES = SettingsKey( val USE_PERSONAL_DICT = SettingsKey( key = booleanPreferencesKey("use_personal_dict_voice_input"), default = true +) + +// Dictation command settings +val DICTATION_COMMANDS_ENABLED = SettingsKey( + key = booleanPreferencesKey("dictation_commands_enabled"), + default = true +) + +val DICTATION_FORMATTING = SettingsKey( + key = booleanPreferencesKey("dictation_formatting"), + default = true +) + +val DICTATION_CAPITALIZATION = SettingsKey( + key = booleanPreferencesKey("dictation_capitalization"), + default = true +) + +val DICTATION_PUNCTUATION = SettingsKey( + key = booleanPreferencesKey("dictation_punctuation"), + default = true +) + +val DICTATION_SYMBOLS = SettingsKey( + key = booleanPreferencesKey("dictation_symbols"), + default = true +) + +val DICTATION_MATH = SettingsKey( + key = booleanPreferencesKey("dictation_math"), + default = true +) + +val DICTATION_CURRENCY = SettingsKey( + key = booleanPreferencesKey("dictation_currency"), + default = true +) + +val DICTATION_EMOTICONS = SettingsKey( + key = booleanPreferencesKey("dictation_emoticons"), + default = true +) + +val DICTATION_IP_MARKS = SettingsKey( + key = booleanPreferencesKey("dictation_ip_marks"), + default = true ) \ No newline at end of file diff --git a/java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt b/java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt index 4bfbbca400..8e1b03240a 100644 --- a/java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt +++ b/java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt @@ -25,11 +25,20 @@ import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import kotlinx.coroutines.yield import org.futo.inputmethod.latin.R -import org.futo.inputmethod.latin.uix.AUDIO_FOCUS import org.futo.inputmethod.latin.uix.Action import org.futo.inputmethod.latin.uix.ActionWindow +import org.futo.inputmethod.latin.uix.AUDIO_FOCUS import org.futo.inputmethod.latin.uix.CAN_EXPAND_SPACE import org.futo.inputmethod.latin.uix.CloseResult +import org.futo.inputmethod.latin.uix.DICTATION_CAPITALIZATION +import org.futo.inputmethod.latin.uix.DICTATION_COMMANDS_ENABLED +import org.futo.inputmethod.latin.uix.DICTATION_CURRENCY +import org.futo.inputmethod.latin.uix.DICTATION_EMOTICONS +import org.futo.inputmethod.latin.uix.DICTATION_FORMATTING +import org.futo.inputmethod.latin.uix.DICTATION_IP_MARKS +import org.futo.inputmethod.latin.uix.DICTATION_MATH +import org.futo.inputmethod.latin.uix.DICTATION_PUNCTUATION +import org.futo.inputmethod.latin.uix.DICTATION_SYMBOLS import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS import org.futo.inputmethod.latin.uix.ENABLE_SOUND import org.futo.inputmethod.latin.uix.KeyboardManagerForAction @@ -42,6 +51,8 @@ import org.futo.inputmethod.latin.uix.VERBOSE_PROGRESS import org.futo.inputmethod.latin.uix.getSetting import org.futo.inputmethod.latin.uix.setSetting import org.futo.inputmethod.latin.uix.settings.SettingsActivity +import org.futo.inputmethod.latin.uix.utils.DictationCommandProcessor +import org.futo.inputmethod.latin.uix.utils.DictationSettings import org.futo.inputmethod.latin.uix.utils.ModelOutputSanitizer import org.futo.inputmethod.latin.xlm.UserDictionaryObserver import org.futo.inputmethod.updates.openURI @@ -118,6 +129,7 @@ private class VoiceInputActionWindow( val context = manager.getContext() private var shouldPlaySounds: Boolean = false + private var dictationSettings: DictationSettings = DictationSettings() private fun loadSettings(): RecognizerViewSettings { val enableSound = context.getSetting(ENABLE_SOUND) val verboseFeedback = false//context.getSetting(VERBOSE_PROGRESS) @@ -139,6 +151,18 @@ private class VoiceInputActionWindow( shouldPlaySounds = enableSound + dictationSettings = DictationSettings( + enabled = context.getSetting(DICTATION_COMMANDS_ENABLED), + formatting = context.getSetting(DICTATION_FORMATTING), + capitalization = context.getSetting(DICTATION_CAPITALIZATION), + punctuation = context.getSetting(DICTATION_PUNCTUATION), + symbols = context.getSetting(DICTATION_SYMBOLS), + math = context.getSetting(DICTATION_MATH), + currency = context.getSetting(DICTATION_CURRENCY), + emoticons = context.getSetting(DICTATION_EMOTICONS), + ipMarks = context.getSetting(DICTATION_IP_MARKS) + ) + return RecognizerViewSettings( shouldShowInlinePartialResult = false, shouldShowVerboseFeedback = verboseFeedback, @@ -263,8 +287,10 @@ private class VoiceInputActionWindow( wasFinished = true manager.getLifecycleScope().launch(Dispatchers.Main) { + // Sanitize first so dictation formatting chars aren't mangled by trim() val sanitized = ModelOutputSanitizer.sanitize(result, inputTransaction.textContext) - inputTransaction.commit(sanitized) + val processed = DictationCommandProcessor.process(sanitized, dictationSettings) + inputTransaction.commit(processed) manager.announce(result) manager.closeActionWindow() } @@ -273,7 +299,8 @@ private class VoiceInputActionWindow( override fun partialResult(result: String) { manager.getLifecycleScope().launch(Dispatchers.Main) { val sanitized = ModelOutputSanitizer.sanitize(result, inputTransaction.textContext) - inputTransaction.updatePartial(sanitized) + val processed = DictationCommandProcessor.process(sanitized, dictationSettings) + inputTransaction.updatePartial(processed) } } diff --git a/java/src/org/futo/inputmethod/latin/uix/settings/SettingsNavigator.kt b/java/src/org/futo/inputmethod/latin/uix/settings/SettingsNavigator.kt index d0394742f0..e35b4f6f52 100644 --- a/java/src/org/futo/inputmethod/latin/uix/settings/SettingsNavigator.kt +++ b/java/src/org/futo/inputmethod/latin/uix/settings/SettingsNavigator.kt @@ -55,6 +55,7 @@ import org.futo.inputmethod.latin.uix.settings.pages.SelectLanguageScreen import org.futo.inputmethod.latin.uix.settings.pages.SelectLayoutsScreen import org.futo.inputmethod.latin.uix.settings.pages.themes.ThemeScreen import org.futo.inputmethod.latin.uix.settings.pages.TypingSettingsMenu +import org.futo.inputmethod.latin.uix.settings.pages.DictationCommandsMenu import org.futo.inputmethod.latin.uix.settings.pages.VoiceInputMenu import org.futo.inputmethod.latin.uix.settings.pages.addModelManagerNavigation import org.futo.inputmethod.latin.uix.settings.pages.buggyeditors.BuggyTextEditVariations @@ -86,6 +87,7 @@ val SettingsMenus = listOf( PredictiveTextMenu, BlacklistScreenLite, VoiceInputMenu, + DictationCommandsMenu, ActionsScreen, HelpMenu, MiscMenu, diff --git a/java/src/org/futo/inputmethod/latin/uix/settings/pages/VoiceInput.kt b/java/src/org/futo/inputmethod/latin/uix/settings/pages/VoiceInput.kt index 3d132d0e51..8dc61d5f99 100644 --- a/java/src/org/futo/inputmethod/latin/uix/settings/pages/VoiceInput.kt +++ b/java/src/org/futo/inputmethod/latin/uix/settings/pages/VoiceInput.kt @@ -5,6 +5,15 @@ import androidx.compose.runtime.Composable import org.futo.inputmethod.latin.R import org.futo.inputmethod.latin.uix.AUDIO_FOCUS import org.futo.inputmethod.latin.uix.CAN_EXPAND_SPACE +import org.futo.inputmethod.latin.uix.DICTATION_CAPITALIZATION +import org.futo.inputmethod.latin.uix.DICTATION_COMMANDS_ENABLED +import org.futo.inputmethod.latin.uix.DICTATION_CURRENCY +import org.futo.inputmethod.latin.uix.DICTATION_EMOTICONS +import org.futo.inputmethod.latin.uix.DICTATION_FORMATTING +import org.futo.inputmethod.latin.uix.DICTATION_IP_MARKS +import org.futo.inputmethod.latin.uix.DICTATION_MATH +import org.futo.inputmethod.latin.uix.DICTATION_PUNCTUATION +import org.futo.inputmethod.latin.uix.DICTATION_SYMBOLS import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS import org.futo.inputmethod.latin.uix.ENABLE_SOUND import org.futo.inputmethod.latin.uix.PREFER_BLUETOOTH @@ -22,6 +31,66 @@ private val visibilityCheckNotSystemVoiceInput = @Composable { useDataStoreValue(USE_SYSTEM_VOICE_INPUT) == false } +val DictationCommandsMenu = UserSettingsMenu( + title = R.string.dictation_commands_title, + navPath = "dictationCommands", registerNavPath = true, + settings = listOf( + userSettingToggleDataStore( + title = R.string.dictation_commands_title, + subtitle = R.string.dictation_commands_subtitle, + setting = DICTATION_COMMANDS_ENABLED + ), + + userSettingToggleDataStore( + title = R.string.dictation_formatting_title, + subtitle = R.string.dictation_formatting_subtitle, + setting = DICTATION_FORMATTING + ), + + userSettingToggleDataStore( + title = R.string.dictation_capitalization_title, + subtitle = R.string.dictation_capitalization_subtitle, + setting = DICTATION_CAPITALIZATION + ), + + userSettingToggleDataStore( + title = R.string.dictation_punctuation_title, + subtitle = R.string.dictation_punctuation_subtitle, + setting = DICTATION_PUNCTUATION + ), + + userSettingToggleDataStore( + title = R.string.dictation_symbols_title, + subtitle = R.string.dictation_symbols_subtitle, + setting = DICTATION_SYMBOLS + ), + + userSettingToggleDataStore( + title = R.string.dictation_math_title, + subtitle = R.string.dictation_math_subtitle, + setting = DICTATION_MATH + ), + + userSettingToggleDataStore( + title = R.string.dictation_currency_title, + subtitle = R.string.dictation_currency_subtitle, + setting = DICTATION_CURRENCY + ), + + userSettingToggleDataStore( + title = R.string.dictation_emoticons_title, + subtitle = R.string.dictation_emoticons_subtitle, + setting = DICTATION_EMOTICONS + ), + + userSettingToggleDataStore( + title = R.string.dictation_ip_marks_title, + subtitle = R.string.dictation_ip_marks_subtitle, + setting = DICTATION_IP_MARKS + ) + ) +) + val VoiceInputMenu = UserSettingsMenu( title = R.string.voice_input_settings_title, navPath = "voiceInput", registerNavPath = true, @@ -82,6 +151,13 @@ val VoiceInputMenu = UserSettingsMenu( setting = USE_VAD_AUTOSTOP ).copy(visibilityCheck = visibilityCheckNotSystemVoiceInput), + userSettingNavigationItem( + title = R.string.dictation_commands_title, + subtitle = R.string.dictation_commands_subtitle, + style = NavigationItemStyle.Misc, + navigateTo = "dictationCommands" + ).copy(visibilityCheck = visibilityCheckNotSystemVoiceInput), + userSettingNavigationItem( title = R.string.voice_input_settings_change_models, subtitle = R.string.voice_input_settings_change_models_subtitle, diff --git a/java/src/org/futo/inputmethod/latin/uix/utils/DictationCommandProcessor.kt b/java/src/org/futo/inputmethod/latin/uix/utils/DictationCommandProcessor.kt new file mode 100644 index 0000000000..57d3b6edd0 --- /dev/null +++ b/java/src/org/futo/inputmethod/latin/uix/utils/DictationCommandProcessor.kt @@ -0,0 +1,422 @@ +package org.futo.inputmethod.latin.uix.utils + +data class DictationSettings( + val enabled: Boolean = true, + val formatting: Boolean = true, + val capitalization: Boolean = true, + val punctuation: Boolean = true, + val symbols: Boolean = true, + val math: Boolean = true, + val currency: Boolean = true, + val emoticons: Boolean = true, + val ipMarks: Boolean = true +) + +/** + * Dictation command processor for FUTO Keyboard voice input. + * + * Intercepts Whisper transcription output and replaces spoken command phrases + * (e.g., "new line", "caps on", "dollar sign") with the corresponding characters + * or formatting. Runs after [ModelOutputSanitizer] in the voice input pipeline. + * + * Each command category can be independently toggled via [DictationSettings]. + */ +object DictationCommandProcessor { + + private enum class Spacing { NORMAL, NO_SPACE_BEFORE, NO_SPACE_AFTER, NO_SPACE_EITHER } + + private data class Replacement(val text: String, val spacing: Spacing = Spacing.NORMAL) + + // -- Formatting commands -- + private val formattingCommands = mapOf( + "new line" to Replacement("\n", Spacing.NO_SPACE_EITHER), + "new paragraph" to Replacement("\n\n", Spacing.NO_SPACE_EITHER), + "tab key" to Replacement("\t", Spacing.NO_SPACE_EITHER) + ) + + // -- Punctuation & bracket commands -- + private val punctuationCommands = mapOf( + "apostrophe" to Replacement("'", Spacing.NO_SPACE_BEFORE), + "open square bracket" to Replacement("[", Spacing.NO_SPACE_AFTER), + "close square bracket" to Replacement("]", Spacing.NO_SPACE_BEFORE), + "open parenthesis" to Replacement("(", Spacing.NO_SPACE_AFTER), + "close parenthesis" to Replacement(")", Spacing.NO_SPACE_BEFORE), + "open brace" to Replacement("{", Spacing.NO_SPACE_AFTER), + "close brace" to Replacement("}", Spacing.NO_SPACE_BEFORE), + "open angle bracket" to Replacement("<", Spacing.NO_SPACE_AFTER), + "close angle bracket" to Replacement(">", Spacing.NO_SPACE_BEFORE), + "dash" to Replacement("\u2013", Spacing.NO_SPACE_BEFORE), // en-dash – + "ellipsis" to Replacement("\u2026", Spacing.NO_SPACE_BEFORE), // … + "hyphen" to Replacement("-", Spacing.NO_SPACE_BEFORE), + "quote" to Replacement("\u201C", Spacing.NO_SPACE_AFTER), // left double smart quote " + "begin quote" to Replacement("\u201C", Spacing.NO_SPACE_AFTER), + "end quote" to Replacement("\u201D", Spacing.NO_SPACE_BEFORE), // right double smart quote " + "begin single quote" to Replacement("\u2018", Spacing.NO_SPACE_AFTER), // ' + "end single quote" to Replacement("\u2019", Spacing.NO_SPACE_BEFORE), // ' + "period" to Replacement(".", Spacing.NO_SPACE_BEFORE), + "point" to Replacement(".", Spacing.NO_SPACE_BEFORE), + "dot" to Replacement(".", Spacing.NO_SPACE_BEFORE), + "full stop" to Replacement(".", Spacing.NO_SPACE_BEFORE), + "comma" to Replacement(",", Spacing.NO_SPACE_BEFORE), + "exclamation mark" to Replacement("!", Spacing.NO_SPACE_BEFORE), + "exclamation point" to Replacement("!", Spacing.NO_SPACE_BEFORE), + "exclamation" to Replacement("!", Spacing.NO_SPACE_BEFORE), + "question mark" to Replacement("?", Spacing.NO_SPACE_BEFORE), + "colon" to Replacement(":", Spacing.NO_SPACE_BEFORE), + "semicolon" to Replacement(";", Spacing.NO_SPACE_BEFORE) + ) + + // -- Typography symbol commands -- + private val symbolCommands = mapOf( + "ampersand" to Replacement("&"), + "asterisk" to Replacement("*"), + "at sign" to Replacement("@"), + "backslash" to Replacement("\\"), + "forward slash" to Replacement("/"), + "caret" to Replacement("^"), + "center dot" to Replacement("\u00B7"), // · + "large center dot" to Replacement("\u25CF"), // ● + "degree sign" to Replacement("\u00B0"), // ° + "hashtag" to Replacement("#"), + "pound sign" to Replacement("#"), + "percent sign" to Replacement("%"), + "underscore" to Replacement("_"), + "vertical bar" to Replacement("|") + ) + + // -- Math symbol commands -- + private val mathCommands = mapOf( + "equal sign" to Replacement("="), + "greater than sign" to Replacement(">"), + "less than sign" to Replacement("<"), + "minus sign" to Replacement("-"), + "multiplication sign" to Replacement("\u00D7"), // × + "plus sign" to Replacement("+") + ) + + // -- Currency symbol commands -- + private val currencyCommands = mapOf( + "dollar sign" to Replacement("$"), + "cent sign" to Replacement("\u00A2", Spacing.NO_SPACE_BEFORE), // ¢ + "pound sterling sign" to Replacement("\u00A3"), // £ + "euro sign" to Replacement("\u20AC"), // € + "yen sign" to Replacement("\u00A5") // ¥ + ) + + // -- Emoticon commands -- + private val emoticonCommands = mapOf( + "smiley face" to Replacement(":-)"), + "frowny face" to Replacement(":-("), + "winky face" to Replacement(";-)"), + "cross-eyed laughing face" to Replacement("XD") + ) + + // -- Intellectual property mark commands -- + private val ipMarkCommands = mapOf( + "copyright sign" to Replacement("\u00A9"), // © + "registered sign" to Replacement("\u00AE"), // ® + "trademark sign" to Replacement("\u2122") // ™ + ) + + // -- Number word to digit mappings -- + private val numberWords = mapOf( + "zero" to 0, "one" to 1, "two" to 2, "three" to 3, "four" to 4, + "five" to 5, "six" to 6, "seven" to 7, "eight" to 8, "nine" to 9, + "ten" to 10, "eleven" to 11, "twelve" to 12, "thirteen" to 13, + "fourteen" to 14, "fifteen" to 15, "sixteen" to 16, "seventeen" to 17, + "eighteen" to 18, "nineteen" to 19, "twenty" to 20, "thirty" to 30, + "forty" to 40, "fifty" to 50, "sixty" to 60, "seventy" to 70, + "eighty" to 80, "ninety" to 90, "hundred" to 100, "thousand" to 1000 + ) + + // -- Roman numeral mappings -- + private val romanNumerals = mapOf( + 1 to "I", 2 to "II", 3 to "III", 4 to "IV", 5 to "V", + 6 to "VI", 7 to "VII", 8 to "VIII", 9 to "IX", 10 to "X", + 11 to "XI", 12 to "XII", 13 to "XIII", 14 to "XIV", 15 to "XV", + 16 to "XVI", 17 to "XVII", 18 to "XVIII", 19 to "XIX", 20 to "XX", + 30 to "XXX", 40 to "XL", 50 to "L", 60 to "LX", 70 to "LXX", + 80 to "LXXX", 90 to "XC", 100 to "C", 1000 to "M" + ) + + private enum class CapsMode { NONE, TITLE_CASE, ALL_CAPS } + + private val whisperPunct = charArrayOf('.', ',', '!', '?', ';', ':') + + private fun String.stripTrailingPunct(): String = this.trimEnd(*whisperPunct) + + private fun stripTrailingPunct(sb: StringBuilder) { + while (sb.isNotEmpty() && sb.last() in whisperPunct) { + sb.deleteCharAt(sb.length - 1) + } + } + + /** + * Replaces spoken command phrases with their corresponding characters or formatting. + * Returns [text] unchanged if [DictationSettings.enabled] is false. + */ + @JvmStatic + fun process(text: String, settings: DictationSettings): String { + if (!settings.enabled || text.isBlank()) return text + + val leadingSpace = text.takeWhile { it == ' ' } + val trailingSpace = text.takeLastWhile { it == ' ' } + val content = text.trim() + if (content.isEmpty()) return text + + val activeCommands = buildActiveCommands(settings) + + val words = content.split(" ") + val result = StringBuilder() + var capsMode = CapsMode.NONE + var allCapsNextWord = false + var noSpaceMode = false + var numeralNextWord = false + var romanNumeralNextWord = false + var suppressNextSpace = false + var lastWasCommand = false + var i = 0 + + while (i < words.size) { + val matchResult = tryMatchCommand(words, i, activeCommands, settings) + + if (matchResult != null) { + if (matchResult.type != CommandType.NUMERAL && + matchResult.type != CommandType.ROMAN_NUMERAL) { + numeralNextWord = false + romanNumeralNextWord = false + } + + when (matchResult.type) { + CommandType.CAPS_ON -> { + capsMode = CapsMode.TITLE_CASE + i += matchResult.wordsConsumed + continue + } + CommandType.CAPS_OFF -> { + capsMode = CapsMode.NONE + i += matchResult.wordsConsumed + continue + } + CommandType.ALL_CAPS_NEXT -> { + allCapsNextWord = true + i += matchResult.wordsConsumed + continue + } + CommandType.ALL_CAPS_ON -> { + capsMode = CapsMode.ALL_CAPS + i += matchResult.wordsConsumed + continue + } + CommandType.ALL_CAPS_OFF -> { + capsMode = CapsMode.NONE + i += matchResult.wordsConsumed + continue + } + CommandType.NO_SPACE_ON -> { + noSpaceMode = true + i += matchResult.wordsConsumed + continue + } + CommandType.NO_SPACE_OFF -> { + noSpaceMode = false + i += matchResult.wordsConsumed + continue + } + CommandType.NUMERAL -> { + numeralNextWord = true + i += matchResult.wordsConsumed + continue + } + CommandType.ROMAN_NUMERAL -> { + romanNumeralNextWord = true + i += matchResult.wordsConsumed + continue + } + CommandType.REPLACEMENT -> { + val spacing = matchResult.spacing + val skipBefore = noSpaceMode || + spacing == Spacing.NO_SPACE_BEFORE || + spacing == Spacing.NO_SPACE_EITHER || + suppressNextSpace + if (skipBefore && result.isNotEmpty() && !lastWasCommand) { + stripTrailingPunct(result) + } + if (result.isNotEmpty() && !skipBefore) { + result.append(" ") + } + result.append(matchResult.replacement) + suppressNextSpace = spacing == Spacing.NO_SPACE_AFTER || + spacing == Spacing.NO_SPACE_EITHER + lastWasCommand = true + i += matchResult.wordsConsumed + continue + } + } + } + + var word = words[i] + + if (numeralNextWord && settings.formatting) { + val num = numberWords[word.stripTrailingPunct().lowercase()] + if (num != null) { + word = num.toString() + } + numeralNextWord = false + } else if (romanNumeralNextWord && settings.formatting) { + val num = numberWords[word.stripTrailingPunct().lowercase()] + if (num != null) { + val roman = romanNumerals[num] + if (roman != null) { + word = roman + } + } + romanNumeralNextWord = false + } + + word = when (capsMode) { + CapsMode.TITLE_CASE -> { + allCapsNextWord = false + word.replaceFirstChar { it.uppercaseChar() } + } + CapsMode.ALL_CAPS -> { + allCapsNextWord = false + word.uppercase() + } + CapsMode.NONE -> { + if (allCapsNextWord) { + allCapsNextWord = false + word.uppercase() + } else { + word + } + } + } + + if (result.isNotEmpty() && !noSpaceMode && !suppressNextSpace) { + result.append(" ") + } + suppressNextSpace = false + lastWasCommand = false + result.append(word) + i++ + } + + val body = result.toString() + + val prefix = if (leadingSpace.isNotEmpty() && body.isNotEmpty() && + body[0] != '\n' && body[0] != '\t') leadingSpace else "" + val suffix = if (trailingSpace.isNotEmpty() && body.isNotEmpty() && + body.last() != '\n' && body.last() != '\t') trailingSpace else "" + + return prefix + body + suffix + } + + private data class CommandMatch( + val type: CommandType, + val replacement: String, + val spacing: Spacing, + val wordsConsumed: Int + ) + + private enum class CommandType { + REPLACEMENT, + CAPS_ON, CAPS_OFF, + ALL_CAPS_NEXT, ALL_CAPS_ON, ALL_CAPS_OFF, + NO_SPACE_ON, NO_SPACE_OFF, + NUMERAL, ROMAN_NUMERAL + } + + private fun buildActiveCommands(settings: DictationSettings): Map { + val commands = mutableMapOf() + if (settings.formatting) commands.putAll(formattingCommands) + if (settings.punctuation) commands.putAll(punctuationCommands) + if (settings.symbols) commands.putAll(symbolCommands) + if (settings.math) commands.putAll(mathCommands) + if (settings.currency) commands.putAll(currencyCommands) + if (settings.emoticons) commands.putAll(emoticonCommands) + if (settings.ipMarks) commands.putAll(ipMarkCommands) + return commands + } + + private fun tryMatchCommand( + words: List, + startIndex: Int, + activeCommands: Map, + settings: DictationSettings + ): CommandMatch? { + if (settings.capitalization) { + matchStatefulCommand(words, startIndex)?.let { return it } + } + if (settings.formatting) { + matchFormattingStatefulCommand(words, startIndex)?.let { return it } + } + + for (length in minOf(4, words.size - startIndex) downTo 2) { + val phrase = words.subList(startIndex, startIndex + length) + .joinToString(" ") { it.stripTrailingPunct().lowercase() } + val replacement = activeCommands[phrase] + if (replacement != null) { + return CommandMatch(CommandType.REPLACEMENT, replacement.text, replacement.spacing, length) + } + } + + val singleWord = words[startIndex].stripTrailingPunct().lowercase() + val replacement = activeCommands[singleWord] + if (replacement != null) { + return CommandMatch(CommandType.REPLACEMENT, replacement.text, replacement.spacing, 1) + } + + return null + } + + private fun matchStatefulCommand(words: List, startIndex: Int): CommandMatch? { + val remaining = words.size - startIndex + val w0 = words[startIndex].stripTrailingPunct().lowercase() + + if (remaining >= 3) { + val phrase3 = "$w0 ${words[startIndex + 1].stripTrailingPunct().lowercase()} ${words[startIndex + 2].stripTrailingPunct().lowercase()}" + when (phrase3) { + "all caps on" -> return CommandMatch(CommandType.ALL_CAPS_ON, "", Spacing.NORMAL, 3) + "all caps off" -> return CommandMatch(CommandType.ALL_CAPS_OFF, "", Spacing.NORMAL, 3) + } + } + + if (remaining >= 2) { + val phrase2 = "$w0 ${words[startIndex + 1].stripTrailingPunct().lowercase()}" + when (phrase2) { + "caps on" -> return CommandMatch(CommandType.CAPS_ON, "", Spacing.NORMAL, 2) + "caps off" -> return CommandMatch(CommandType.CAPS_OFF, "", Spacing.NORMAL, 2) + "all caps" -> return CommandMatch(CommandType.ALL_CAPS_NEXT, "", Spacing.NORMAL, 2) + } + } + + return null + } + + private fun matchFormattingStatefulCommand(words: List, startIndex: Int): CommandMatch? { + val remaining = words.size - startIndex + val w0 = words[startIndex].stripTrailingPunct().lowercase() + + if (remaining >= 3) { + val phrase3 = "$w0 ${words[startIndex + 1].stripTrailingPunct().lowercase()} ${words[startIndex + 2].stripTrailingPunct().lowercase()}" + when (phrase3) { + "no space on" -> return CommandMatch(CommandType.NO_SPACE_ON, "", Spacing.NORMAL, 3) + "no space off" -> return CommandMatch(CommandType.NO_SPACE_OFF, "", Spacing.NORMAL, 3) + } + } + + if (remaining >= 2 && w0 == "numeral") { + return CommandMatch(CommandType.NUMERAL, "", Spacing.NORMAL, 1) + } + + if (remaining >= 3) { + val phrase2 = "$w0 ${words[startIndex + 1].stripTrailingPunct().lowercase()}" + if (phrase2 == "roman numeral") { + return CommandMatch(CommandType.ROMAN_NUMERAL, "", Spacing.NORMAL, 2) + } + } + + return null + } +} diff --git a/java/test/org/futo/inputmethod/latin/uix/utils/DictationCommandProcessorTest.kt b/java/test/org/futo/inputmethod/latin/uix/utils/DictationCommandProcessorTest.kt new file mode 100644 index 0000000000..78ddd7c2d1 --- /dev/null +++ b/java/test/org/futo/inputmethod/latin/uix/utils/DictationCommandProcessorTest.kt @@ -0,0 +1,648 @@ +package org.futo.inputmethod.latin.uix.utils + +import org.junit.Assert.assertEquals +import org.junit.Test + +class DictationCommandProcessorTest { + + private val allEnabled = DictationSettings() + private val allDisabled = DictationSettings(enabled = false) + + private fun process(text: String, settings: DictationSettings = allEnabled): String { + return DictationCommandProcessor.process(text, settings) + } + + // -- Master toggle -- + + @Test + fun testMasterToggleOff_returnsUnchanged() { + assertEquals("hello new line world", process("hello new line world", allDisabled)) + } + + @Test + fun testEmptyInput_returnsEmpty() { + assertEquals("", process("")) + assertEquals(" ", process(" ")) + } + + @Test + fun testPlainText_passesThrough() { + assertEquals("hello world", process("hello world")) + } + + // -- Formatting commands -- + + @Test + fun testNewLine() { + assertEquals("hello\nworld", process("hello new line world")) + } + + @Test + fun testNewParagraph() { + assertEquals("hello\n\nworld", process("hello new paragraph world")) + } + + @Test + fun testTabKey() { + assertEquals("hello\tworld", process("hello tab key world")) + } + + @Test + fun testNewLineAtStart() { + assertEquals("\nhello", process("new line hello")) + } + + @Test + fun testNewLineAtEnd() { + assertEquals("hello\n", process("hello new line")) + } + + // -- Capitalization commands -- + + @Test + fun testCapsOn_titleCase() { + assertEquals("Hello World", process("caps on hello world")) + } + + @Test + fun testCapsOnOff() { + assertEquals("Hello World back to normal", process("caps on hello world caps off back to normal")) + } + + @Test + fun testAllCaps_singleWord() { + assertEquals("HELLO world", process("all caps hello world")) + } + + @Test + fun testAllCapsOn_multipleWords() { + assertEquals("HELLO WORLD", process("all caps on hello world")) + } + + @Test + fun testAllCapsOnOff() { + assertEquals("HELLO WORLD back to normal", process("all caps on hello world all caps off back to normal")) + } + + @Test + fun testCapsOnDoesNotAffectNextSentenceAfterOff() { + assertEquals("Big small", process("caps on big caps off small")) + } + + // -- Spacing commands -- + + @Test + fun testNoSpaceOn() { + assertEquals("helloworld", process("no space on hello world")) + } + + @Test + fun testNoSpaceOnOff() { + assertEquals("helloworld back to normal", process("no space on hello world no space off back to normal")) + } + + // -- Numeral commands -- + + @Test + fun testNumeral() { + assertEquals("5", process("numeral five")) + } + + @Test + fun testNumeralInContext() { + assertEquals("I have 3 cats", process("I have numeral three cats")) + } + + @Test + fun testRomanNumeral() { + assertEquals("Chapter V", process("caps on chapter caps off roman numeral five")) + } + + @Test + fun testNumeralUnknownWord_passesThrough() { + assertEquals("banana", process("numeral banana")) + } + + // -- Punctuation commands -- + + @Test + fun testApostrophe() { + assertEquals("don't", process("don no space on apostrophe t")) + } + + @Test + fun testBrackets() { + assertEquals("hello [world]", process("hello open square bracket world close square bracket")) + } + + @Test + fun testParentheses() { + assertEquals("hello (world)", process("hello open parenthesis world close parenthesis")) + } + + @Test + fun testBraces() { + assertEquals("hello {world}", process("hello open brace world close brace")) + } + + @Test + fun testAngleBrackets() { + assertEquals("hello ", process("hello open angle bracket world close angle bracket")) + } + + @Test + fun testClosingBracketsNoSpaceBefore() { + // Closing brackets/parens should attach to the preceding word + assertEquals("(hello)", process("open parenthesis hello close parenthesis")) + assertEquals("[hello]", process("open square bracket hello close square bracket")) + assertEquals("{hello}", process("open brace hello close brace")) + assertEquals("", process("open angle bracket hello close angle bracket")) + } + + @Test + fun testSmartQuotes() { + // end quote attaches to preceding word (no space before) + assertEquals("he said \u201Chello\u201D", process("he said begin quote hello end quote")) + } + + @Test + fun testSmartSingleQuotes() { + assertEquals("he said \u2018hello\u2019", process("he said begin single quote hello end single quote")) + } + + @Test + fun testDash() { + assertEquals("hello\u2013world", process("hello no space on dash world")) + } + + @Test + fun testEllipsis() { + assertEquals("hello\u2026", process("hello ellipsis")) + } + + @Test + fun testHyphen() { + assertEquals("well-known", process("well no space on hyphen known")) + } + + @Test + fun testPeriodFallback() { + assertEquals("hello.", process("hello period")) + } + + @Test + fun testCommaFallback() { + assertEquals("hello,", process("hello comma")) + } + + @Test + fun testQuestionMarkFallback() { + assertEquals("hello?", process("hello question mark")) + } + + @Test + fun testExclamationMarkFallback() { + assertEquals("hello!", process("hello exclamation mark")) + } + + @Test + fun testExclamationPoint() { + assertEquals("hello!", process("hello exclamation point")) + } + + @Test + fun testExclamationAlone() { + assertEquals("hello!", process("hello exclamation")) + } + + @Test + fun testWhisperExclamationWithPeriodAndCaps() { + // Whisper outputs "this is a sentence. Exclamation." for spoken "exclamation point" + assertEquals("this is a sentence!", process("this is a sentence. Exclamation.")) + } + + @Test + fun testWhisperExclamationTrailingPeriod() { + // Whisper outputs "another sentence spoken exclamation." + assertEquals("another sentence spoken!", process("another sentence spoken exclamation.")) + } + + @Test + fun testColonFallback() { + assertEquals("hello:", process("hello colon")) + } + + @Test + fun testSemicolonFallback() { + assertEquals("hello;", process("hello semicolon")) + } + + // -- Symbol commands -- + + @Test + fun testAmpersand() { + assertEquals("rock & roll", process("rock ampersand roll")) + } + + @Test + fun testAsterisk() { + assertEquals("hello * world", process("hello asterisk world")) + } + + @Test + fun testAtSign() { + assertEquals("user @ domain", process("user at sign domain")) + } + + @Test + fun testBackslash() { + assertEquals("path \\ file", process("path backslash file")) + } + + @Test + fun testForwardSlash() { + assertEquals("path / file", process("path forward slash file")) + } + + @Test + fun testHashtag() { + assertEquals("# trending", process("hashtag trending")) + } + + @Test + fun testPercentSign() { + assertEquals("100 %", process("100 percent sign")) + } + + @Test + fun testUnderscore() { + assertEquals("snake _ case", process("snake underscore case")) + } + + @Test + fun testVerticalBar() { + assertEquals("a | b", process("a vertical bar b")) + } + + @Test + fun testDegreeSign() { + assertEquals("72 \u00B0", process("72 degree sign")) + } + + @Test + fun testCaret() { + assertEquals("x ^", process("x caret")) + } + + // -- Math commands -- + + @Test + fun testEqualSign() { + assertEquals("x = 5", process("x equal sign 5")) + } + + @Test + fun testPlusSign() { + assertEquals("2 + 3", process("2 plus sign 3")) + } + + @Test + fun testMinusSign() { + assertEquals("5 - 2", process("5 minus sign 2")) + } + + @Test + fun testMultiplicationSign() { + assertEquals("3 \u00D7 4", process("3 multiplication sign 4")) + } + + @Test + fun testGreaterThanSign() { + assertEquals("5 > 3", process("5 greater than sign 3")) + } + + @Test + fun testLessThanSign() { + assertEquals("3 < 5", process("3 less than sign 5")) + } + + // -- Currency commands -- + + @Test + fun testDollarSign() { + assertEquals("$ 100", process("dollar sign 100")) + } + + @Test + fun testCentSign() { + assertEquals("50\u00A2", process("50 cent sign")) + } + + @Test + fun testPoundSterlingSign() { + assertEquals("\u00A3 50", process("pound sterling sign 50")) + } + + @Test + fun testEuroSign() { + assertEquals("\u20AC 100", process("euro sign 100")) + } + + @Test + fun testYenSign() { + assertEquals("\u00A5 1000", process("yen sign 1000")) + } + + // -- Emoticon commands -- + + @Test + fun testSmileyFace() { + assertEquals("hello :-)", process("hello smiley face")) + } + + @Test + fun testFrownyFace() { + assertEquals(":-(", process("frowny face")) + } + + @Test + fun testWinkyFace() { + assertEquals(";-)", process("winky face")) + } + + @Test + fun testCrossEyedLaughingFace() { + assertEquals("XD", process("cross-eyed laughing face")) + } + + // -- IP mark commands -- + + @Test + fun testCopyrightSign() { + assertEquals("\u00A9 2024", process("copyright sign 2024")) + } + + @Test + fun testRegisteredSign() { + assertEquals("Brand \u00AE", process("Brand registered sign")) + } + + @Test + fun testTrademarkSign() { + assertEquals("Name \u2122", process("Name trademark sign")) + } + + // -- Category toggle tests -- + + @Test + fun testFormattingDisabled_passesThrough() { + val settings = DictationSettings(formatting = false) + assertEquals("hello new line world", process("hello new line world", settings)) + } + + @Test + fun testCapitalizationDisabled_passesThrough() { + val settings = DictationSettings(capitalization = false) + assertEquals("hello caps on world", process("hello caps on world", settings)) + } + + @Test + fun testPunctuationDisabled_passesThrough() { + val settings = DictationSettings(punctuation = false) + assertEquals("hello open parenthesis world", process("hello open parenthesis world", settings)) + } + + @Test + fun testSymbolsDisabled_passesThrough() { + val settings = DictationSettings(symbols = false) + assertEquals("hello ampersand world", process("hello ampersand world", settings)) + } + + @Test + fun testMathDisabled_passesThrough() { + val settings = DictationSettings(math = false) + assertEquals("hello equal sign world", process("hello equal sign world", settings)) + } + + @Test + fun testCurrencyDisabled_passesThrough() { + val settings = DictationSettings(currency = false) + assertEquals("hello dollar sign world", process("hello dollar sign world", settings)) + } + + @Test + fun testEmoticonsDisabled_passesThrough() { + val settings = DictationSettings(emoticons = false) + assertEquals("hello smiley face world", process("hello smiley face world", settings)) + } + + @Test + fun testIpMarksDisabled_passesThrough() { + val settings = DictationSettings(ipMarks = false) + assertEquals("hello copyright sign world", process("hello copyright sign world", settings)) + } + + // -- Mixed / complex scenarios -- + + @Test + fun testMixedCommands() { + assertEquals( + "Dear Sir,\nI have $ 100.", + process("caps on dear sir caps off comma new line I have dollar sign 100 period") + ) + } + + @Test + fun testAllCapsWithSymbols() { + assertEquals("HELLO @ world", process("all caps hello at sign world")) + } + + @Test + fun testNoSpaceWithSymbols() { + assertEquals("user@domain", process("no space on user at sign domain")) + } + + @Test + fun testMultipleNewLines() { + assertEquals("a\nb\nc", process("a new line b new line c")) + } + + @Test + fun testCapsModeResetByOff() { + assertEquals("Hello world test", process("caps on hello caps off world test")) + } + + @Test + fun testNumeralFollowedByCommand() { + assertEquals("5\n", process("numeral five new line")) + } + + @Test + fun testCaseInsensitiveMatching() { + assertEquals("hello\nworld", process("hello New Line world")) + } + + @Test + fun testSingleWordInput() { + assertEquals("hello", process("hello")) + } + + @Test + fun testCommandOnly() { + assertEquals("\n", process("new line")) + } + + @Test + fun testConsecutiveCommands() { + assertEquals("\n\n", process("new line new line")) + } + + @Test + fun testFormattingDoesNotAddExtraSpaceAfterNewline() { + // After a newline, the next word should not have a leading space + assertEquals("hello\nworld", process("hello new line world")) + } + + // -- Additional punctuation aliases -- + + @Test + fun testPointFallback() { + assertEquals("hello.", process("hello point")) + } + + @Test + fun testDotFallback() { + assertEquals("hello.", process("hello dot")) + } + + @Test + fun testFullStopFallback() { + assertEquals("hello.", process("hello full stop")) + } + + // -- Additional symbol coverage -- + + @Test + fun testCenterDot() { + assertEquals("hello \u00B7 world", process("hello center dot world")) + } + + @Test + fun testLargeCenterDot() { + assertEquals("hello \u25CF world", process("hello large center dot world")) + } + + @Test + fun testPoundSign() { + assertEquals("# 5", process("pound sign 5")) + } + + // -- Edge cases for dangling state flags -- + + @Test + fun testNumeralAtEnd_passesThrough() { + // "numeral" as last word should pass through (no following word to convert) + assertEquals("numeral", process("numeral")) + } + + @Test + fun testRomanNumeralAtEnd_passesThrough() { + // "roman numeral" as last words should pass through + assertEquals("roman numeral", process("roman numeral")) + } + + @Test + fun testNumeralFollowedByCommand_resetsFlag() { + // "numeral" then a command — the numeral flag should not leak to later words + assertEquals("\nfive", process("numeral new line five")) + } + + @Test + fun testAllCapsNextConsumedInTitleCaseMode() { + // "all caps" then "caps on" — the allCapsNextWord flag should be consumed, not leak + assertEquals("Hello World", process("all caps caps on hello world")) + } + + // -- Punctuation attaches to preceding word -- + + @Test + fun testPunctuationNoSpaceBefore() { + assertEquals("hello. world", process("hello period world")) + assertEquals("hello, world", process("hello comma world")) + assertEquals("hello! world", process("hello exclamation mark world")) + assertEquals("hello? world", process("hello question mark world")) + assertEquals("hello: world", process("hello colon world")) + assertEquals("hello; world", process("hello semicolon world")) + } + + @Test + fun testEllipsisNoSpaceBefore() { + assertEquals("hello\u2026 world", process("hello ellipsis world")) + } + + @Test + fun testDashNoSpaceBefore() { + assertEquals("hello\u2013 world", process("hello dash world")) + } + + // -- Whisper auto-punctuation tolerance -- + + @Test + fun testWhisperCommaOnCommandWord() { + // Whisper may output "hello, new line, world" — commas should not break matching + assertEquals("hello\nworld", process("hello, new line, world")) + } + + @Test + fun testWhisperPeriodOnCommandWord() { + // Whisper may output "hello. New line. World." + assertEquals("hello\nworld.", process("hello. new line. world.")) + } + + @Test + fun testWhisperPunctOnSingleWordCommand() { + // "ampersand," should still match ampersand + assertEquals("hello & world", process("hello ampersand, world")) + } + + @Test + fun testWhisperPunctOnMultiWordCommand() { + // "question mark" has NO_SPACE_BEFORE — attaches to preceding word + assertEquals("hello? world", process("hello question mark, world")) + } + + @Test + fun testWhisperPunctBeforeNewLine() { + // Comma before "new line" should be stripped from output + assertEquals("hello\nworld", process("hello, new line world")) + } + + @Test + fun testWhisperPunctOnStatefulCommands() { + assertEquals("Hello World", process("caps on, hello world")) + } + + // -- Cursor-context whitespace preservation -- + + @Test + fun testLeadingSpacePreserved() { + // Sanitizer adds leading space for cursor context — should be preserved + assertEquals(" hello world", process(" hello world")) + } + + @Test + fun testLeadingSpaceSuppressedBeforeNewline() { + // Leading space makes no sense before a newline + assertEquals("\nhello", process(" new line hello")) + } + + @Test + fun testTrailingSpacePreserved() { + assertEquals("hello world ", process("hello world ")) + } + + @Test + fun testTrailingSpaceSuppressedAfterNewline() { + assertEquals("hello\n", process("hello new line ")) + } +}