From 0ff56ad9f00880073650daae0cb7701ce6c3f752 Mon Sep 17 00:00:00 2001 From: subha0319 Date: Fri, 21 Nov 2025 22:47:42 +0530 Subject: [PATCH 1/3] feat: add language auto-detection logic to filter irrelevant suggestions --- .../latin/DictionaryFacilitatorImpl.kt | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt index 5b2f238f6e..58903e2597 100644 --- a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt +++ b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt @@ -499,6 +499,27 @@ class DictionaryFacilitatorImpl : DictionaryFacilitator { ) waitForOtherDicts?.await() + // Filter out suggestions from other languages if we have an exact match in one language + val typedWord = composedData.mTypedWord + if (typedWord.isNotEmpty()) { + val groupsWithExactMatch = ArrayList() + for (i in suggestionsArray.indices) { + val suggestions = suggestionsArray[i] ?: continue + // Check if any suggestion in this group matches the typed word exactly (case-insensitive) + if (suggestions.any { it.word.equals(typedWord, ignoreCase = true) }) { + groupsWithExactMatch.add(i) + } + } + // If exact matches exist in some languages but not all, discard the results from the non-matching languages + if (groupsWithExactMatch.isNotEmpty() && groupsWithExactMatch.size < suggestionsArray.size) { + for (i in suggestionsArray.indices) { + if (!groupsWithExactMatch.contains(i)) { + suggestionsArray[i] = null + } + } + } + } + suggestionsArray.forEach { if (it == null) return@forEach suggestionResults.addAll(it) From 87c2a90e2a2d9ead168b0ac69a985a6440cd6d5d Mon Sep 17 00:00:00 2001 From: subha0319 Date: Fri, 21 Nov 2025 22:47:42 +0530 Subject: [PATCH 2/3] feat: add language auto-detection logic to filter irrelevant suggestions --- .gitignore | 1 + .../latin/DictionaryFacilitatorImpl.kt | 51 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/.gitignore b/.gitignore index 955e1c4887..8425390ff6 100755 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ app/.cxx app/.attach_* fastlane/Appfile tools/*.txt +app/src/test/java/helium314/keyboard/latin/DictionaryFacilitatorImplTest.kt diff --git a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt index 5b2f238f6e..b15bd797c1 100644 --- a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt +++ b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt @@ -499,6 +499,57 @@ class DictionaryFacilitatorImpl : DictionaryFacilitator { ) waitForOtherDicts?.await() + var indicesToKeep: ArrayList? = null + + // Strategy A: Context Detection (The "Previous Word" Approach) + // If the previous word exists in Language X but NOT in Language Y, lock to Language X. + val prevWord = ngramContext.getNthPrevWord(1)?.toString() + if (!prevWord.isNullOrEmpty() && !ngramContext.isNthPrevWordBeginningOfSentence(1)) { + val validContextIndices = ArrayList() + for (i in dictionaryGroups.indices) { + val group = dictionaryGroups[i] + // Check main dictionary of this group for the previous word + val mainDict = group.getDict(Dictionary.TYPE_MAIN) + // We check if the word is valid in this language + if (mainDict?.isValidWord(prevWord) == true) { + validContextIndices.add(i) + } + } + // If we found a subset of languages that match the context (and not ALL languages), use them. + if (validContextIndices.isNotEmpty() && validContextIndices.size < dictionaryGroups.size) { + indicesToKeep = validContextIndices + } + } + + // Strategy B: Current Word Exact Match (Fallback) + // Only runs if Context Strategy failed (e.g. start of sentence, or prev word is in all dicts). + if (indicesToKeep == null) { + val typedWord = composedData.mTypedWord + if (typedWord.isNotEmpty()) { + val exactMatchIndices = ArrayList() + for (i in suggestionsArray.indices) { + val suggestions = suggestionsArray[i] ?: continue + // Check if this language offers the exact typed word as a suggestion + if (suggestions.any { it.word.equals(typedWord, ignoreCase = true) }) { + exactMatchIndices.add(i) + } + } + // If only a subset of languages match the typed word exactly, lock to them. + if (exactMatchIndices.isNotEmpty() && exactMatchIndices.size < suggestionsArray.size) { + indicesToKeep = exactMatchIndices + } + } + } + + // Apply Filter: Nullify suggestions from languages that didn't pass the checks + if (indicesToKeep != null) { + for (i in suggestionsArray.indices) { + if (!indicesToKeep.contains(i)) { + suggestionsArray[i] = null + } + } + } + suggestionsArray.forEach { if (it == null) return@forEach suggestionResults.addAll(it) From 4c9acc50a27124f39c58314b68935272a90ef74d Mon Sep 17 00:00:00 2001 From: subha0319 Date: Tue, 25 Nov 2025 01:13:19 +0530 Subject: [PATCH 3/3] feat: improve language auto-detection to reduce noise --- .gitignore | 1 - .../latin/DictionaryFacilitatorImpl.kt | 2 +- .../latin/DictionaryFacilitatorImplTest.kt | 237 ++++++++++++++++++ 3 files changed, 238 insertions(+), 2 deletions(-) create mode 100644 app/src/test/java/helium314/keyboard/latin/DictionaryFacilitatorImplTest.kt diff --git a/.gitignore b/.gitignore index 8425390ff6..955e1c4887 100755 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,3 @@ app/.cxx app/.attach_* fastlane/Appfile tools/*.txt -app/src/test/java/helium314/keyboard/latin/DictionaryFacilitatorImplTest.kt diff --git a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt index b15bd797c1..4d6793fb5d 100644 --- a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt +++ b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt @@ -511,7 +511,7 @@ class DictionaryFacilitatorImpl : DictionaryFacilitator { // Check main dictionary of this group for the previous word val mainDict = group.getDict(Dictionary.TYPE_MAIN) // We check if the word is valid in this language - if (mainDict?.isValidWord(prevWord) == true) { + if (mainDict?.isValidWord(prevWord) == true || mainDict?.isValidWord(prevWord.lowercase()) == true) { validContextIndices.add(i) } } diff --git a/app/src/test/java/helium314/keyboard/latin/DictionaryFacilitatorImplTest.kt b/app/src/test/java/helium314/keyboard/latin/DictionaryFacilitatorImplTest.kt new file mode 100644 index 0000000000..6ddfc00821 --- /dev/null +++ b/app/src/test/java/helium314/keyboard/latin/DictionaryFacilitatorImplTest.kt @@ -0,0 +1,237 @@ +package helium314.keyboard.latin + +import android.content.Context +import com.android.inputmethod.keyboard.ProximityInfo +import helium314.keyboard.keyboard.Keyboard +import helium314.keyboard.latin.SuggestedWords.SuggestedWordInfo +import helium314.keyboard.latin.common.ComposedData +import helium314.keyboard.latin.common.InputPointers +import helium314.keyboard.latin.dictionary.Dictionary +import helium314.keyboard.latin.settings.SettingsValuesForSuggestion +import helium314.keyboard.latin.utils.SuggestionResults +import org.junit.Assert.assertEquals +import org.junit.Assert.assertTrue +import org.junit.Test +import org.junit.runner.RunWith +import org.mockito.Mockito +import org.robolectric.RobolectricTestRunner +import java.util.ArrayList +import java.util.Locale + +@RunWith(RobolectricTestRunner::class) +class DictionaryFacilitatorImplTest { + + private val localeEn = Locale.ENGLISH + private val localeIt = Locale.ITALIAN + + @Test + fun `Strategy A - Previous word how (English) locks context to English`() { + val prevWord = "how" + val typedWord = "are" + val italianNoise = "area" + + val facilitator = setupFacilitator( + dictEn = listOf("how", "are"), + dictIt = listOf("are", "area") + ) + + val prevWordInfo = NgramContext.WordInfo(prevWord) + val ngramContext = NgramContext(prevWordInfo) + + val results = getSuggestions(facilitator, typedWord, ngramContext) + + assertTrue("Should contain English 'are'", results.any { it.word == "are" }) + assertEquals("Should filter out Italian noise 'area'", + 0, results.filter { it.word == italianNoise }.size) + } + + @Test + fun `Strategy A - Next Word Prediction Hello to how`() { + // User typed "Hello " (Space pressed). + // Current Input: "" (Empty). + // Context: "Hello". + // Expectation: English dict contains "Hello", so it locks to English and shows "how". + + val prevWord = "Hello" + val typedWord = "" // Empty because space was pressed + val nextWordSuggestion = "how" + val italianNoise = "come" + + val facilitator = setupFacilitator( + dictEn = listOf("Hello", "how"), + dictIt = listOf("come") // Italian does NOT have Hello + ) + + val prevWordInfo = NgramContext.WordInfo(prevWord) + val ngramContext = NgramContext(prevWordInfo) + + val results = getSuggestions(facilitator, typedWord, ngramContext) + + assertTrue("Should suggest 'how' based on context 'Hello'", + results.any { it.word == nextWordSuggestion }) + + assertEquals("Should filter out Italian noise 'come'", + 0, results.filter { it.word == italianNoise }.size) + } + + @Test + fun `Strategy A - Case Insensitive Context Capitalized The matches lowercase the`() { + val prevWord = "The" + val typedWord = "end" + val italianNoise = "endo" + + val facilitator = setupFacilitator( + dictEn = listOf("the", "end"), + dictIt = listOf("endo") + ) + + val prevWordInfo = NgramContext.WordInfo(prevWord) + val ngramContext = NgramContext(prevWordInfo) + + val results = getSuggestions(facilitator, typedWord, NgramContext.EMPTY_PREV_WORDS_INFO) // FIX: Use EMPTY here or update getSuggestions logic to use NgramContext if your code relies on it. + // Actually, wait, your code uses ngramContext.getNthPrevWord(1). + // So passing ngramContext here is required. + val resultsWithContext = getSuggestions(facilitator, typedWord, ngramContext) + + assertTrue("Should match 'The' to 'the' and allow English suggestions", + resultsWithContext.any { it.word == "end" }) + assertEquals("Should filter out Italian noise", + 0, resultsWithContext.filter { it.word == italianNoise }.size) + } + + @Test + fun `Edge Case - Dictionary Overlap The Hello Problem`() { + // User types "Hel". Matches start of "Hello" (English) and "Helio" (Italian). + // BUT "Hello" exists in BOTH dictionaries (Simulating bad dict). + // Expectation: NO filtering. + + val typedWord = "Hel" + val englishWord = "Hello" + val italianNoise = "Helio" // Changed from "Bello" to match "Hel" prefix + + val facilitator = setupFacilitator( + dictEn = listOf("Hello"), + dictIt = listOf("Hello", "Helio") + ) + + val results = getSuggestions(facilitator, typedWord, NgramContext.EMPTY_PREV_WORDS_INFO) + + assertTrue("Should contain English 'Hello'", results.any { it.word == englishWord }) + assertTrue("Should preserve Italian 'Helio' because 'Hello' is ambiguous", + results.any { it.word == italianNoise }) + } + + @Test + fun `Edge Case - No Match The Give Problem`() { + // User types "Gi". + // English has "Givenchy". Italian has "Gia". + // "Gi" is NOT an exact match for either. + // Expectation: NO filtering. + val typedWord = "Gi" + val englishApprox = "Givenchy" + val italianApprox = "Gia" + + val facilitator = setupFacilitator( + dictEn = listOf("Givenchy"), + dictIt = listOf("Gia") + ) + + val results = getSuggestions(facilitator, typedWord, NgramContext.EMPTY_PREV_WORDS_INFO) + + assertTrue("Should show English approx", results.any { it.word == englishApprox }) + assertTrue("Should show Italian approx", results.any { it.word == italianApprox }) + } + + @Test + fun `Simulate Firefox - Main Dict fails Noise is hidden`() { + // Strict Filtering Test + // Context: "the" (English). + // English Dict: Returns NOTHING for input "G" (Simulating failure). + // Italian Dict: Returns "Già" (Noise). + // Expectation: Empty Result (Strict filtering hides Italian). + + val prevWord = "the" + val typedWord = "G" + + val facilitator = setupFacilitator( + dictEn = listOf("the"), + dictIt = listOf("Già") + ) + + val prevWordInfo = NgramContext.WordInfo(prevWord) + val ngramContext = NgramContext(prevWordInfo) + + val results = getSuggestions(facilitator, typedWord, ngramContext) + + assertEquals("Should be EMPTY. English failed, Italian filtered out.", + 0, results.size) + } + + // --- Helper Methods --- + + private fun getSuggestions( + facilitator: DictionaryFacilitatorImpl, + typedWord: String, + ngramContext: NgramContext + ): SuggestionResults { + val composedData = ComposedData(InputPointers(1), false, typedWord) + val keyboard = Mockito.mock(Keyboard::class.java) + val proximityInfo = Mockito.mock(ProximityInfo::class.java) + Mockito.`when`(keyboard.proximityInfo).thenReturn(proximityInfo) + val settings = Mockito.mock(SettingsValuesForSuggestion::class.java) + + return facilitator.getSuggestionResults( + composedData, ngramContext, keyboard, settings, 0, SuggestedWords.INPUT_STYLE_TYPING + ) + } + + private fun setupFacilitator(dictEn: List, dictIt: List): DictionaryFacilitatorImpl { + val facilitator = DictionaryFacilitatorImpl() + + val stubEn = StubDictionary("main_en", localeEn, dictEn) + val stubIt = StubDictionary("main_it", localeIt, dictIt) + + val dictGroupClass = Class.forName("helium314.keyboard.latin.DictionaryGroup") + val dictGroupConstructor = dictGroupClass.getDeclaredConstructor( + Locale::class.java, Dictionary::class.java, Map::class.java, Context::class.java + ) + dictGroupConstructor.isAccessible = true + + val groupEn = dictGroupConstructor.newInstance(localeEn, stubEn, emptyMap(), null) + val groupIt = dictGroupConstructor.newInstance(localeIt, stubIt, emptyMap(), null) + + val groupsField = facilitator.javaClass.getDeclaredField("dictionaryGroups") + groupsField.isAccessible = true + groupsField.set(facilitator, listOf(groupEn, groupIt)) + + return facilitator + } +} + +// Smart Stub Dictionary +class StubDictionary(type: String, locale: Locale, private val wordList: List) : Dictionary(type, locale) { + override fun getSuggestions( + composedData: ComposedData?, ngramContext: NgramContext?, proximityInfoHandle: Long, + settingsValuesForSuggestion: SettingsValuesForSuggestion?, sessionId: Int, weightForLocale: Float, + inOutWeightOfLangModelVsSpatialModel: FloatArray? + ): ArrayList { + val list = ArrayList() + val typedWord = composedData?.mTypedWord ?: "" + + wordList.forEach { word -> + // If typedWord is empty (Next Word Prediction) OR word starts with typedWord + if (typedWord.isEmpty() || word.startsWith(typedWord, ignoreCase = true)) { + list.add(SuggestedWordInfo(word, "", 1000, SuggestedWordInfo.KIND_TYPED, this, 0, 0)) + } + } + return list + } + + override fun isValidWord(word: String?): Boolean { + return wordList.any { it.equals(word, ignoreCase = true) } + } + + override fun isInDictionary(word: String?): Boolean { + return isValidWord(word) + } +} \ No newline at end of file