From 66ee63b0082af173ff5aeeba817d01142e9b61a5 Mon Sep 17 00:00:00 2001
From: tenextractor <139619642+tenextractor@users.noreply.github.com>
Date: Tue, 17 Feb 2026 18:35:31 +0530
Subject: [PATCH 1/2] add Vietnamese Telex and VNI combiners

---
 .../event/combiners/vietnamese/Common.kt      |  98 +++++++
 .../event/combiners/vietnamese/Telex.kt       | 253 ++++++++++++++++++
 .../event/combiners/vietnamese/ToneMark.kt    | 148 ++++++++++
 .../event/combiners/vietnamese/VNI.kt         | 133 +++++++++
 .../event/combiners/vietnamese/VNICombiner.kt |  58 ++++
 .../combiners/vietnamese/VietTelexCombiner.kt |  43 +++
 .../inputmethod/v2keyboard/CombinerKind.kt    |   4 +
 7 files changed, 737 insertions(+)
 create mode 100644 java/src/org/futo/inputmethod/event/combiners/vietnamese/Common.kt
 create mode 100644 java/src/org/futo/inputmethod/event/combiners/vietnamese/Telex.kt
 create mode 100644 java/src/org/futo/inputmethod/event/combiners/vietnamese/ToneMark.kt
 create mode 100644 java/src/org/futo/inputmethod/event/combiners/vietnamese/VNI.kt
 create mode 100644 java/src/org/futo/inputmethod/event/combiners/vietnamese/VNICombiner.kt
 create mode 100644 java/src/org/futo/inputmethod/event/combiners/vietnamese/VietTelexCombiner.kt

diff --git a/java/src/org/futo/inputmethod/event/combiners/vietnamese/Common.kt b/java/src/org/futo/inputmethod/event/combiners/vietnamese/Common.kt
new file mode 100644
index 0000000000..eb5e4def13
--- /dev/null
+++ b/java/src/org/futo/inputmethod/event/combiners/vietnamese/Common.kt
@@ -0,0 +1,98 @@
+package org.futo.inputmethod.event.combiners.vietnamese
+
+/** Code common to both Telex and VNI */
+object Common {
+    /** get_tone_mark_placement() function from vi-rs/src/editing.rs
+     * Get nth character to place tone mark
+     *
+     * # Rules:
+     * 1. If a vowel contains ơ or ê, tone mark goes there
+     * 2. If a vowel contains `oa`, `oe`, `oo`, `oy`, tone mark should be on the
+     *    second character
+     *
+     * If the accent style is [`AccentStyle::Old`], then:
+     * - 3. For vowel length 3 or vowel length 2 with a final consonant, put it on the second vowel character
+     * - 4. Else, put it on the first vowel character
+     *
+     * Otherwise:
+     * - 3. If a vowel has 2 characters, put the tone mark on the first one
+     * - 4. Otherwise, put the tone mark on the second vowel character
+     */
+    fun getToneMarkPosition(
+        outputWithoutTone: CharSequence,
+        firstVowelIndex: Int,
+        vowelCount: Int
+    ): Int {
+        val specialVowelPairs = setOf("oa", "oe", "oo", "uy", "uo", "ie")
+
+        // If there's only one vowel, then it's guaranteed that the tone mark will go there
+        if (vowelCount == 1) return firstVowelIndex
+
+        for (i in firstVowelIndex ..< firstVowelIndex + vowelCount) {
+            when (outputWithoutTone[i]) {
+                'ơ', 'Ơ' -> return i
+                'ê', 'Ê' -> return i
+                'â', 'Â' -> return i
+            }
+        }
+
+        val vowel = outputWithoutTone.slice(firstVowelIndex ..< firstVowelIndex + vowelCount)
+
+        // If there is only one vowel with a diacritic (circumflex, breve, horn, etc.), it should
+        // get the tone mark
+        val vowelsWithDiacritics = vowel.withIndex().filter { it.value !in VOWELS }
+        if (vowelsWithDiacritics.size == 1) {
+            return firstVowelIndex + vowelsWithDiacritics[0].index
+        }
+
+        // Special vowels require the tone mark to be placed on the second character
+        if (specialVowelPairs.any { vowel.contains(it, ignoreCase = true) })
+            return firstVowelIndex + 1
+
+        // If a syllable end with 2 character vowel, put it on the first character
+        if (firstVowelIndex + vowelCount == outputWithoutTone.length && vowelCount == 2)
+            return firstVowelIndex
+
+        // Else, put tone mark on second vowel
+        return firstVowelIndex + 1
+    }
+
+    
+    val CONSONANTS = setOf(
+        'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z')
+
+    val VOWELS = setOf('a', 'e', 'i', 'o', 'u', 'y', 'A', 'E', 'I', 'O', 'U', 'Y')
+
+    /** A map of characters without accent to character with circumflex accent */
+    public val CIRCUMFLEX_MAP = mapOf(
+        'a' to 'â',
+        'e' to 'ê',
+        'o' to 'ô',
+        // uppercase
+        'A' to 'Â',
+        'E' to 'Ê',
+        'O' to 'Ô',
+    )
+
+    /** A map of characters without accent to character with dyet (D WITH STROKE) accent */
+    public val STROKE_MAP = mapOf(
+        'd' to 'đ',
+        'D' to 'Đ',
+    )
+
+    /** A map of characters without accent to character with horn accent */
+    public val HORN_MAP = mapOf(
+        'u' to 'ư',
+        'o' to 'ơ',
+        // uppercase
+        'U' to 'Ư',
+        'O' to 'Ơ',
+    )
+
+    /** A map of characters without accent to character with breve accent */
+    public val BREVE_MAP = mapOf(
+        'a' to 'ă',
+        // uppercase
+        'A' to 'Ă',
+    )
+}
diff --git a/java/src/org/futo/inputmethod/event/combiners/vietnamese/Telex.kt b/java/src/org/futo/inputmethod/event/combiners/vietnamese/Telex.kt
new file mode 100644
index 0000000000..3c7685f986
--- /dev/null
+++ b/java/src/org/futo/inputmethod/event/combiners/vietnamese/Telex.kt
@@ -0,0 +1,253 @@
+package org.futo.inputmethod.event.combiners.vietnamese
+
+object Telex {
+    val TONES = mapOf(
+        'f' to ToneMark.GRAVE,
+        'j' to ToneMark.DOT,
+        'r' to ToneMark.HOOK,
+        's' to ToneMark.ACUTE,
+        'x' to ToneMark.TILDE
+    )
+
+    /** These are the modifiers that should only be active if they come after the first vowel letter.
+     * For example, `sao` should not output any tone marks, but `aso` should output `áo`.
+     */
+    val AFTER_VOWEL_MODIFIERS = setOf('f', 'j', 'r', 's', 'w', 'x')
+
+    /** Convert a string that represents a Vietnamese syllable written in the Telex convention ([input])
+     * to a syllable written in Vietnamese orthography.
+     * Example: input = "vietej", output = "việt"
+    */
+    public fun telexToVietnamese(input: String): String {
+
+        // STAGE 1: calculate modifierIndices, firstVowelIndex, startedFinal and lowercaseVowel
+        // Example:
+        //   Input: "vietej"
+        //   Output:
+        //     modifierIndices: { 'e': [2, 4], 'j': [5], the rest are empty lists }
+        //     firstVowelIndex: 1
+        //     startedFinal: true
+        //     lowercaseVowel: "ie"
+        val lowercaseInput = input.lowercase()
+        var startedVowel = false
+        var startedFinal = false
+        var firstVowelIndex = -1
+
+        val lowercaseVowel = StringBuilder()
+
+        /** Map of 'modifier' characters that can add a diacritic or tone mark,
+         * to lists of indices of occurrences of these characters
+         */
+        val modifierIndices: Map<Char, MutableList<Int>> = mapOf(
+            'a' to mutableListOf(),
+            'd' to mutableListOf(),
+            'e' to mutableListOf(),
+            'f' to mutableListOf(),
+            'j' to mutableListOf(),
+            'o' to mutableListOf(),
+            'r' to mutableListOf(),
+            's' to mutableListOf(),
+            'w' to mutableListOf(),
+            'x' to mutableListOf(),
+        )
+
+        for ((index, ch) in lowercaseInput.withIndex()) {
+
+            if (!startedVowel) {
+                if (Common.VOWELS.contains(ch)) {
+                    // TODO: this code needs to be refined further
+                    // if a syllable has a weird initial (like 'cl' in 'clown') that we are sure does not belong to Vietnamese,
+                    // then stop the conversion process and just output the input as it is
+                    // if (!(index in 0..3)) return input
+                    // if (index in 2..3)
+                    //     if (!INITIALS.contains(lowercaseInput.slice(0..<index)))
+                    //         return input
+
+                    firstVowelIndex = index
+                    startedVowel = true
+                }
+            }
+
+            if (startedVowel && !startedFinal && !AFTER_VOWEL_MODIFIERS.contains(ch)) {
+                if (Common.CONSONANTS.contains(ch)) {
+                    startedFinal = true
+                } else {
+                    lowercaseVowel.append(ch)
+                }
+            }
+
+            if (AFTER_VOWEL_MODIFIERS.contains(ch)) {
+                if (startedVowel) modifierIndices[ch]!!.add(index)
+            } else if (modifierIndices.containsKey(ch)) {
+                modifierIndices[ch]!!.add(index)
+            }
+        }
+
+
+        // STAGE 1.5: apply a correction to firstVowelIndex
+        // If the input contains more than one 'd' before the vowel starts
+        // (example: "ddi" > "đi", "dddi" > "ddi"), one of the characters will be deleted
+        // and therefore the firstVowelIndex needs to be corrected to account for this
+        if (modifierIndices['d']!!.size > 1 && modifierIndices['d']!!.last() < firstVowelIndex)
+            firstVowelIndex--
+        
+        // apply correction to lowercaseVowel:
+        // "gi" (unless there is no other vowel letter) and "qu" should be considered consonants
+        if (lowercaseVowel.length > 1 && (lowercaseInput.slice(0..<2) == "gi" || lowercaseInput.slice(0..<2) == "qu"))
+            lowercaseVowel.deleteAt(0)
+
+
+        // STAGE 2: use modifierIndices to apply diacritics (except tone marks) to the syllable
+        // Example:
+        //   Input: "vietej" with its modifierIndices and firstVowelIndex as detailed in Stage 1
+        //   Output:
+        //     outputWithoutTone: "viêt"
+        //     tone: ToneMark.DOT
+        //     vowelCount: 2
+        val output = StringBuilder()
+        var tone: ToneMark? = null
+        var doNotOutputNextChar = false // this handles the "uwow" edge case
+        var vowelCount = 0
+        var wHasBeenUsed = false
+
+        for ((index, ch) in input.withIndex()) {
+            if (doNotOutputNextChar) {
+                doNotOutputNextChar = false
+                continue
+            }
+
+            val lowercaseCh = lowercaseInput[index]
+
+            when (lowercaseCh) {
+                'a', 'd', 'e', 'o' -> {
+                    // handle letters that can be doubled
+
+                    val thisModifierIndices = modifierIndices[lowercaseCh]!!
+
+                    // if there is a string such as `ddi` (output: `đi`) or `dddi` (output: ddi),
+                    // the last `d` (or any modifier that can be doubled) needs to be omitted from the output
+                    if (thisModifierIndices.size >= 2 && index == thisModifierIndices.last()) continue
+
+                    // if there is a string such as `ddi` (output: `đi`),
+                    // a diacritic needs to be applied to the first `d`
+                    if (thisModifierIndices.size == 2 && index == thisModifierIndices[0]) {
+                        if (lowercaseCh == 'd') {
+                            output.append(Common.STROKE_MAP[ch])
+                        } else if (lowercaseCh == 'o' && lowercaseVowel.contentEquals("oeo")) {
+                            // handle "oeo" edge case (should output "oeo", not "ôe"):
+                            // remove the second 'o''s index from modifierIndices so that it will be outputted
+                            modifierIndices['o']!!.removeAt(modifierIndices['o']!!.lastIndex)
+                            output.append(ch)
+                        } else {
+                            output.append(Common.CIRCUMFLEX_MAP[ch])
+                            vowelCount++
+                        }
+
+                        continue // after outputting the character with diacritic,
+                        // suppress outputting the original character
+                    }
+
+                    val wIndices = modifierIndices['w']!!
+
+                    if (wIndices.size == 1 && lowercaseCh == 'a' && !wHasBeenUsed) {
+                        output.append(Common.BREVE_MAP[ch])
+                        wHasBeenUsed = true
+                        vowelCount++
+                        continue
+                    }
+
+                    if (wIndices.size == 1 && lowercaseCh == 'o'
+                    && !lowercaseVowel.contentEquals("oa")
+                    // ↑ add edge case for "oaw" (should output "oă", not "ơă" or "ơa")
+                    && !(firstVowelIndex != 0 && lowercaseVowel.contentEquals("ou"))
+                    // ↑ add edge case: any initial consonant + vowel "ou" with modifier 'w' + no final
+                    // should output "oư" and not "ơư"
+                    ) {
+                        output.append(Common.HORN_MAP[ch])
+                        wHasBeenUsed = true
+                        vowelCount++
+                        continue
+                    }
+                }
+
+                // handling tones
+                'f', 'j', 'r', 's', 'x' -> {
+                    val thisModifierIndices = modifierIndices[lowercaseCh]!!
+
+                    if (thisModifierIndices.size == 1)
+                        tone = TONES[lowercaseCh]!!
+
+                    if (thisModifierIndices.size >= 1 && index == thisModifierIndices.last()) continue
+                }
+
+                'u' -> {
+                    // edge case for `uwow` > `ươ`:
+                    // the first instance of
+                    if (lowercaseInput.length >= index + 4) {
+                        if (lowercaseInput.slice(index..<index+4) == "uwow" && modifierIndices['w']!!.size == 2) {
+                            modifierIndices['w']!!.removeAt(0)
+                            doNotOutputNextChar = true
+                        }
+                    }
+
+                    // Check if "uo" with modifier 'w' should output "uơ" instead of "ươ"
+                    // This only applies when:
+                    // * There is an initial consonant, i.e. the syllable does not start with a vowel
+                    // * The vowel is only "uo", nothing else
+                    // * There is no final consonant
+                    // For example: "huow" -> "huơ" (uowIsNotUwow=true), but "uow" -> "ươ" (uowIsNotUwow=false)
+                    var uowIsNotUwow = false
+                    if ((firstVowelIndex > 0) && !startedFinal && !doNotOutputNextChar
+                        && modifierIndices['w']!!.size == 1 && lowercaseVowel.contentEquals("uo")) {
+                         uowIsNotUwow = true
+                    }
+
+                    if (modifierIndices['w']!!.size == 1 && !wHasBeenUsed && !(lowercaseInput[0] == 'q' && index == 1) && !uowIsNotUwow) {
+                        output.append(Common.HORN_MAP[ch])
+                        vowelCount++
+                        wHasBeenUsed = true
+                        continue
+                    }
+                }
+
+                'w' -> {
+                    if (modifierIndices['w']!!.size >= 1 && index == modifierIndices['w']!!.last() &&
+                        lowercaseVowel.any { it == 'a' || it == 'o' || it == 'u'}) continue
+                }
+            }
+
+            output.append(ch) // default behavior: just output the character from input as it is
+            if (Common.VOWELS.contains(lowercaseCh)) vowelCount++
+        }
+
+        // STAGE 3: apply a tone mark (if any)
+        if (tone == null) return output.toString()
+
+        // edge case: "gija" should output "gịa"
+        if (lowercaseInput == "gija") {
+            output[1] = tone.map[output[1]] ?: output[1]
+            return output.toString()
+        }
+
+        // apply corrections to vowelCount and firstVowelIndex:
+        // 'gi' (if there is another vowel after it) and 'qu' should be considered as consonants
+        // There is no Vietnamese word which consists of the initial 'qu' without another vowel letter,
+        // but for the sake of better error/edge case handling the correction will only be applied
+        // if there is another vowel letter.
+        if (vowelCount > 1 && (lowercaseInput.slice(0..<2) == "gi" || lowercaseInput.slice(0..<2) == "qu")) {
+                vowelCount--
+                firstVowelIndex++
+        }
+
+        // if there has been some error applying the correction, just output without the tone mark
+        if (vowelCount <= 0 || firstVowelIndex < 0 || firstVowelIndex + vowelCount - 1 >= output.length)
+            return output.toString()
+
+        // add tone mark
+        val toneMarkPosition = Common.getToneMarkPosition(output, firstVowelIndex, vowelCount)
+        output[toneMarkPosition] = tone.map[output[toneMarkPosition]] ?:
+            output[toneMarkPosition]
+
+        return output.toString()
+    }
+}
diff --git a/java/src/org/futo/inputmethod/event/combiners/vietnamese/ToneMark.kt b/java/src/org/futo/inputmethod/event/combiners/vietnamese/ToneMark.kt
new file mode 100644
index 0000000000..2e572b4ada
--- /dev/null
+++ b/java/src/org/futo/inputmethod/event/combiners/vietnamese/ToneMark.kt
@@ -0,0 +1,148 @@
+package org.futo.inputmethod.event.combiners.vietnamese
+
+/** Vietnamese tone marks.
+ *
+ * Represents the five tone marks used in Vietnamese writing system.
+ */
+enum class ToneMark(val map: Map<Char, Char>) {
+    /** Dấu sắc (acute accent) - rising tone */
+    ACUTE(mapOf(
+        'a' to 'á',
+        'â' to 'ấ',
+        'ă' to 'ắ',
+        'e' to 'é',
+        'ê' to 'ế',
+        'i' to 'í',
+        'o' to 'ó',
+        'ô' to 'ố',
+        'ơ' to 'ớ',
+        'u' to 'ú',
+        'ư' to 'ứ',
+        'y' to 'ý',
+        // uppercase
+        'A' to 'Á',
+        'Â' to 'Ấ',
+        'Ă' to 'Ắ',
+        'E' to 'É',
+        'Ê' to 'Ế',
+        'I' to 'Í',
+        'O' to 'Ó',
+        'Ô' to 'Ố',
+        'Ơ' to 'Ớ',
+        'U' to 'Ú',
+        'Ư' to 'Ứ',
+        'Y' to 'Ý',
+    )),
+    /** Dấu huyền (grave accent) - falling tone */
+    GRAVE(mapOf(
+        'a' to 'à',
+        'â' to 'ầ',
+        'ă' to 'ằ',
+        'e' to 'è',
+        'ê' to 'ề',
+        'i' to 'ì',
+        'o' to 'ò',
+        'ô' to 'ồ',
+        'ơ' to 'ờ',
+        'u' to 'ù',
+        'ư' to 'ừ',
+        'y' to 'ỳ',
+        // uppercase
+        'A' to 'À',
+        'Â' to 'Ầ',
+        'Ă' to 'Ằ',
+        'E' to 'È',
+        'Ê' to 'Ề',
+        'I' to 'Ì',
+        'O' to 'Ò',
+        'Ô' to 'Ồ',
+        'Ơ' to 'Ờ',
+        'U' to 'Ù',
+        'Ư' to 'Ừ',
+        'Y' to 'Ỳ',
+    )),
+    /** Dấu hỏi (hook above) - dipping tone */
+    HOOK(mapOf(
+        'a' to 'ả',
+        'â' to 'ẩ',
+        'ă' to 'ẳ',
+        'e' to 'ẻ',
+        'ê' to 'ể',
+        'i' to 'ỉ',
+        'o' to 'ỏ',
+        'ô' to 'ổ',
+        'ơ' to 'ở',
+        'u' to 'ủ',
+        'ư' to 'ử',
+        'y' to 'ỷ',
+        // uppercase
+        'A' to 'Ả',
+        'Ă' to 'Ẳ',
+        'Â' to 'Ẩ',
+        'E' to 'Ẻ',
+        'Ê' to 'Ể',
+        'O' to 'Ỏ',
+        'Ô' to 'Ổ',
+        'Ơ' to 'Ở',
+        'I' to 'Ỉ',
+        'U' to 'Ủ',
+        'Ư' to 'Ử',
+        'Y' to 'Ỷ',
+    )),
+    /** Dấu ngã (tilde) - creaky rising tone */
+    TILDE(mapOf(
+        'a' to 'ã',
+        'ă' to 'ẵ',
+        'â' to 'ẫ',
+        'e' to 'ẽ',
+        'ê' to 'ễ',
+        'o' to 'õ',
+        'ô' to 'ỗ',
+        'ơ' to 'ỡ',
+        'i' to 'ĩ',
+        'u' to 'ũ',
+        'ư' to 'ữ',
+        'y' to 'ỹ',
+        // uppercase
+        'A' to 'Ã',
+        'Ă' to 'Ẵ',
+        'Â' to 'Ẫ',
+        'E' to 'Ẽ',
+        'Ê' to 'Ễ',
+        'O' to 'Õ',
+        'Ô' to 'Ỗ',
+        'Ơ' to 'Ỡ',
+        'I' to 'Ĩ',
+        'U' to 'Ũ',
+        'Ư' to 'Ữ',
+        'Y' to 'Ỹ',
+    )),
+    /** Dấu nặng (dot below) - creaky falling tone */
+    DOT(mapOf(
+        'a' to 'ạ',
+        'ă' to 'ặ',
+        'â' to 'ậ',
+        'e' to 'ẹ',
+        'ê' to 'ệ',
+        'o' to 'ọ',
+        'ô' to 'ộ',
+        'ơ' to 'ợ',
+        'i' to 'ị',
+        'u' to 'ụ',
+        'ư' to 'ự',
+        'y' to 'ỵ',
+        // uppercase
+        'A' to 'Ạ',
+        'Ă' to 'Ặ',
+        'Â' to 'Ậ',
+        'E' to 'Ẹ',
+        'Ê' to 'Ệ',
+        'O' to 'Ọ',
+        'Ô' to 'Ộ',
+        'Ơ' to 'Ợ',
+        'I' to 'Ị',
+        'U' to 'Ụ',
+        'Ư' to 'Ự',
+        'Y' to 'Ỵ',
+    )),
+}
diff --git a/java/src/org/futo/inputmethod/event/combiners/vietnamese/VNI.kt b/java/src/org/futo/inputmethod/event/combiners/vietnamese/VNI.kt
new file mode 100644
index 0000000000..e5ff613180
--- /dev/null
+++ b/java/src/org/futo/inputmethod/event/combiners/vietnamese/VNI.kt
@@ -0,0 +1,133 @@
+package org.futo.inputmethod.event.combiners.vietnamese
+
+object VNI {
+    val TONES = mapOf(
+        '1' to ToneMark.ACUTE,
+        '2' to ToneMark.GRAVE,
+        '3' to ToneMark.HOOK,
+        '4' to ToneMark.TILDE,
+        '5' to ToneMark.DOT
+    )
+
+    fun VNIToVietnamese(input: String): String {
+        val lowercaseInput = input.lowercase()
+
+        val modifierExists = MutableList(10) { false }
+
+        val lowercaseInitial = StringBuilder()
+        val lowercaseVowel = StringBuilder()
+
+        var hasLetters = false
+
+        var startedVowel = false
+        var startedFinal = false
+
+        var tone: ToneMark? = null
+
+        // STAGE 1: build modifierIndices and lowercaseVowel
+        for ((index, ch) in lowercaseInput.withIndex()) {
+            //if (ch.isAsciiDigit()) modifierIndices[ch.digitToInt()].add(index)
+            if (ch.isLetter()) hasLetters = true
+
+                // update firstModifierIndex
+                if (ch.isDigit() && !modifierExists[ch.digitToInt()])
+                    modifierExists[ch.digitToInt()] = true
+
+                    if (!startedVowel && Common.CONSONANTS.contains(ch)) lowercaseInitial.append(ch)
+
+                        if (!startedFinal && Common.VOWELS.contains(ch)) {
+                            if (!startedVowel) startedVowel = true
+                                lowercaseVowel.append(ch)
+                        }
+
+                        if (startedVowel && Common.CONSONANTS.contains(ch))
+                            startedFinal = true
+
+                            when (ch) {
+                                '1', '2', '3', '4', '5' -> tone = TONES[ch]!!
+                            }
+        }
+
+        // apply correction to lowercaseInitial and lowercaseVowel
+        var giQuCorrectionApplied = false
+        if (lowercaseVowel.length > 1 && (lowercaseInitial.contentEquals("q") && lowercaseVowel[0] == 'u' ||
+            lowercaseInitial.contentEquals("g") && lowercaseVowel[0] == 'i'
+        )) {
+            giQuCorrectionApplied = true
+            lowercaseInitial.append(lowercaseVowel[0])
+            lowercaseVowel.deleteAt(0)
+        }
+
+        if (!hasLetters) return input
+
+            // STAGE 2: remove numbers and add diacritics
+            val output = StringBuilder()
+
+            /** Tracks if an 'u' has been converted to 'ư'.
+             * This variable is checked to ensure that only the first 'u' is converted to 'ư' when there are multiple 'u's.
+             * For example, "uou7" should output "ươu", not "ươư"; "uu7" should output "ưu", not "ưư".*/
+            var uHornOutputted = false
+
+            for ((index, ch) in lowercaseInput.withIndex()) {
+                when (ch) {
+                    // handle numbers
+                    '1', '2', '3', '4', '5', '6', '7', '8', '9', '0' -> continue
+
+                    // handle modifiable characters
+                    'a' -> {
+                        if (modifierExists[8]) {
+                            output.append(Common.BREVE_MAP[input[index]])
+                            continue
+                        }
+
+                        if (modifierExists[6]) {
+                            output.append(Common.CIRCUMFLEX_MAP[input[index]])
+                            continue
+                        }
+                    }
+                    'd' -> if (modifierExists[9]) {
+                        output.append(Common.STROKE_MAP[input[index]])
+                        continue
+                    }
+                    'e', 'o' -> {
+                        if (modifierExists[6]) {
+                            output.append(Common.CIRCUMFLEX_MAP[input[index]])
+                            continue
+                        }
+
+                        if (ch == 'o' && modifierExists[7] &&
+                            !(output.length != 0 && lowercaseVowel.contentEquals("ou") && !startedFinal)) {
+                            output.append(Common.HORN_MAP[input[index]])
+                            continue
+                            }
+                    }
+
+                    'u' -> if (modifierExists[7] &&
+                    !uHornOutputted &&
+                    !(output.getOrNull(0)?.lowercaseChar() == 'q' && output.length == 1) &&
+                    !(output.length != 0 && lowercaseVowel.contentEquals("uo") && !startedFinal)) {
+                        output.append(Common.HORN_MAP[input[index]])
+                        uHornOutputted = true
+                        continue
+                    }
+                }
+
+                //default behavior: output the char in input
+                output.append(input[index])
+            }
+
+            // STAGE 3: add tone mark
+            if (tone == null) return output.toString()
+
+                //edge case for gi5a > gịa
+                if (lowercaseInput == "gi5a") {
+                    output[1] = tone.map[output[1]] ?: output[1]
+                    return output.toString()
+                }
+
+                val toneMarkPosition = Common.getToneMarkPosition(output, lowercaseInitial.length, lowercaseVowel.length)
+                output[toneMarkPosition] = tone.map[output[toneMarkPosition]] ?: output[toneMarkPosition]
+
+                return output.toString()
+    }
+}
diff --git a/java/src/org/futo/inputmethod/event/combiners/vietnamese/VNICombiner.kt b/java/src/org/futo/inputmethod/event/combiners/vietnamese/VNICombiner.kt
new file mode 100644
index 0000000000..973e68d202
--- /dev/null
+++ b/java/src/org/futo/inputmethod/event/combiners/vietnamese/VNICombiner.kt
@@ -0,0 +1,58 @@
+package org.futo.inputmethod.event.combiners.vietnamese
+
+import android.text.TextUtils
+import org.futo.inputmethod.event.Combiner
+import org.futo.inputmethod.event.Event
+import org.futo.inputmethod.latin.common.Constants
+import java.util.ArrayList
+
+class VNICombiner: Combiner {
+    private val buffer = StringBuilder() // holds a single Vietnamese word/syllable
+
+    override fun processEvent(
+        previousEvents: ArrayList<Event?>?,
+        event: Event?
+    ): Event {
+        if (event == null) return Event.createNotHandledEvent()
+        if (event.eventType != Event.EVENT_TYPE_INPUT_KEYPRESS) return event
+
+        val keypress = event.mCodePoint.toChar()
+
+        // The normal ASCII digits are left untouched by the combiner and always result in digits
+        // being committed to the output. On the other hand, fullwidth digits are intercepted by
+        // this combiner, converted into ASCII digits, and sent to the VNI converter.
+        // This lets the user explicitly enter numbers that will not get converted into diacritics.
+        // For example, if ASCII '1' (U+0031 DIGIT ONE) is given to this combiner, it will always
+        // output an ASCII '1' (U+0031).
+        // But if a fullwidth '１' (U+FF11 FULLWIDTH DIGIT ONE) is given to this combiner, it will be
+        // converted to an ASCII '1' (U+0031) and given to the VNI converter, where it might result
+        // in an acute accent being placed over a letter.
+        // So, the input sequence [V][i][e][t][U+FF15][U+FF16] will result in the output "Việt"
+        if (keypress.code in 0xFF10..0xFF19) {
+            buffer.append((keypress.code - 0xFEE0).toChar())
+            return Event.createConsumedEvent(event)
+        }
+
+        if (!(keypress in 'A'..'Z' || keypress in 'a'..'z')) {
+            if (!TextUtils.isEmpty(buffer)) {
+                if (event.mKeyCode == Constants.CODE_DELETE) {
+                    buffer.setLength(buffer.length - 1)
+                    return Event.createConsumedEvent(event)
+                }
+            }
+
+            if(!event.isFunctionalKeyEvent) return Event.createResetEvent(event)
+            return event
+        }
+
+        buffer.append(keypress)
+        return Event.createConsumedEvent(event)
+    }
+
+    override fun getCombiningStateFeedback(): CharSequence? =
+        VNI.VNIToVietnamese(buffer.toString())
+
+    override fun reset() {
+        buffer.clear()
+    }
+}
\ No newline at end of file
diff --git a/java/src/org/futo/inputmethod/event/combiners/vietnamese/VietTelexCombiner.kt b/java/src/org/futo/inputmethod/event/combiners/vietnamese/VietTelexCombiner.kt
new file mode 100644
index 0000000000..59841c534b
--- /dev/null
+++ b/java/src/org/futo/inputmethod/event/combiners/vietnamese/VietTelexCombiner.kt
@@ -0,0 +1,43 @@
+package org.futo.inputmethod.event.combiners.vietnamese
+
+import android.text.TextUtils
+import org.futo.inputmethod.event.Combiner
+import org.futo.inputmethod.event.Event
+import org.futo.inputmethod.latin.common.Constants
+import java.util.ArrayList
+
+class VietTelexCombiner: Combiner {
+    private val buffer = StringBuilder() // holds a single Vietnamese word/syllable
+
+    override fun processEvent(
+        previousEvents: ArrayList<Event?>?,
+        event: Event?
+    ): Event {
+        if (event == null) return Event.createNotHandledEvent()
+        if (event.eventType != Event.EVENT_TYPE_INPUT_KEYPRESS) return event
+
+        val keypress = event.mCodePoint.toChar()
+
+        if (!(keypress in 'A'..'Z' || keypress in 'a'..'z')) {
+            if (!TextUtils.isEmpty(buffer)) {
+                if (event.mKeyCode == Constants.CODE_DELETE) {
+                    buffer.setLength(buffer.length - 1)
+                    return Event.createConsumedEvent(event)
+                }
+            }
+
+            if(!event.isFunctionalKeyEvent) return Event.createResetEvent(event)
+            return event
+        }
+
+        buffer.append(keypress)
+        return Event.createConsumedEvent(event)
+    }
+
+    override fun getCombiningStateFeedback(): CharSequence? =
+        Telex.telexToVietnamese(buffer.toString())
+
+    override fun reset() {
+        buffer.clear()
+    }
+}
\ No newline at end of file
diff --git a/java/src/org/futo/inputmethod/v2keyboard/CombinerKind.kt b/java/src/org/futo/inputmethod/v2keyboard/CombinerKind.kt
index bf3bb765c4..5bff16fcc9 100644
--- a/java/src/org/futo/inputmethod/v2keyboard/CombinerKind.kt
+++ b/java/src/org/futo/inputmethod/v2keyboard/CombinerKind.kt
@@ -5,6 +5,8 @@ import org.futo.inputmethod.event.DeadKeyCombiner
 import org.futo.inputmethod.event.combiners.NFCNormalizingCombiner
 import org.futo.inputmethod.event.combiners.DeadKeyPreCombiner
 import org.futo.inputmethod.event.combiners.KoreanCombiner
+import org.futo.inputmethod.event.combiners.vietnamese.VNICombiner
+import org.futo.inputmethod.event.combiners.vietnamese.VietTelexCombiner
 import org.futo.inputmethod.event.combiners.wylie.WylieCombiner
 
 enum class CombinerKind(val factory: () -> Combiner) {
@@ -13,5 +15,7 @@ enum class CombinerKind(val factory: () -> Combiner) {
     NFCNormalize({ NFCNormalizingCombiner() }),
     Korean({ KoreanCombiner() }),
     KoreanCombineInitials({ KoreanCombiner(combineInitials = true) }),
+    VietTelex( { VietTelexCombiner() }),
+    VNI( { VNICombiner() }),
     Wylie({ WylieCombiner() }),
 }
\ No newline at end of file

From c4e3f0da82d5e3f677352be0b38e1d5391a6075f Mon Sep 17 00:00:00 2001
From: tenextractor <139619642+tenextractor@users.noreply.github.com>
Date: Tue, 17 Feb 2026 20:14:47 +0530
Subject: [PATCH 2/2] fix index out of bounds error

---
 .../event/combiners/vietnamese/Telex.kt       |  23 +--
 .../event/combiners/vietnamese/VNI.kt         | 145 +++++++++---------
 .../event/combiners/vietnamese/VNICombiner.kt |   6 +-
 .../combiners/vietnamese/VietTelexCombiner.kt |   6 +-
 4 files changed, 99 insertions(+), 81 deletions(-)

diff --git a/java/src/org/futo/inputmethod/event/combiners/vietnamese/Telex.kt b/java/src/org/futo/inputmethod/event/combiners/vietnamese/Telex.kt
index 3c7685f986..32c5152e93 100644
--- a/java/src/org/futo/inputmethod/event/combiners/vietnamese/Telex.kt
+++ b/java/src/org/futo/inputmethod/event/combiners/vietnamese/Telex.kt
@@ -17,7 +17,7 @@ object Telex {
     /** Convert a string that represents a Vietnamese syllable written in the Telex convention ([input])
      * to a syllable written in Vietnamese orthography.
      * Example: input = "vietej", output = "việt"
-    */
+     */
     public fun telexToVietnamese(input: String): String {
 
         // STAGE 1: calculate modifierIndices, firstVowelIndex, startedFinal and lowercaseVowel
@@ -90,7 +90,7 @@ object Telex {
         // and therefore the firstVowelIndex needs to be corrected to account for this
         if (modifierIndices['d']!!.size > 1 && modifierIndices['d']!!.last() < firstVowelIndex)
             firstVowelIndex--
-        
+
         // apply correction to lowercaseVowel:
         // "gi" (unless there is no other vowel letter) and "qu" should be considered consonants
         if (lowercaseVowel.length > 1 && (lowercaseInput.slice(0..<2) == "gi" || lowercaseInput.slice(0..<2) == "qu"))
@@ -136,7 +136,7 @@ object Telex {
                         } else if (lowercaseCh == 'o' && lowercaseVowel.contentEquals("oeo")) {
                             // handle "oeo" edge case (should output "oeo", not "ôe"):
                             // remove the second 'o''s index from modifierIndices so that it will be outputted
-                            modifierIndices['o']!!.removeAt(modifierIndices['o']!!.lastIndex)
+                            modifierIndices['o']!!.removeLast()
                             output.append(ch)
                         } else {
                             output.append(Common.CIRCUMFLEX_MAP[ch])
@@ -157,9 +157,9 @@ object Telex {
                     }
 
                     if (wIndices.size == 1 && lowercaseCh == 'o'
-                    && !lowercaseVowel.contentEquals("oa")
-                    // ↑ add edge case for "oaw" (should output "oă", not "ơă" or "ơa")
-                    && !(firstVowelIndex != 0 && lowercaseVowel.contentEquals("ou"))
+                        && !lowercaseVowel.contentEquals("oa")
+                        // ↑ add edge case for "oaw" (should output "oă", not "ơă" or "ơa")
+                        && !(firstVowelIndex != 0 && lowercaseVowel.contentEquals("ou"))
                     // ↑ add edge case: any initial consonant + vowel "ou" with modifier 'w' + no final
                     // should output "oư" and not "ơư"
                     ) {
@@ -199,7 +199,7 @@ object Telex {
                     var uowIsNotUwow = false
                     if ((firstVowelIndex > 0) && !startedFinal && !doNotOutputNextChar
                         && modifierIndices['w']!!.size == 1 && lowercaseVowel.contentEquals("uo")) {
-                         uowIsNotUwow = true
+                        uowIsNotUwow = true
                     }
 
                     if (modifierIndices['w']!!.size == 1 && !wHasBeenUsed && !(lowercaseInput[0] == 'q' && index == 1) && !uowIsNotUwow) {
@@ -235,8 +235,8 @@ object Telex {
         // but for the sake of better error/edge case handling the correction will only be applied
         // if there is another vowel letter.
         if (vowelCount > 1 && (lowercaseInput.slice(0..<2) == "gi" || lowercaseInput.slice(0..<2) == "qu")) {
-                vowelCount--
-                firstVowelIndex++
+            vowelCount--
+            firstVowelIndex++
         }
 
         // if there has been some error applying the correction, just output without the tone mark
@@ -245,8 +245,11 @@ object Telex {
 
         // add tone mark
         val toneMarkPosition = Common.getToneMarkPosition(output, firstVowelIndex, vowelCount)
+        // avoid index out of bounds error
+        if (toneMarkPosition !in 0..<output.length)
+            return output.toString()
         output[toneMarkPosition] = tone.map[output[toneMarkPosition]] ?:
-            output[toneMarkPosition]
+                output[toneMarkPosition]
 
         return output.toString()
     }
diff --git a/java/src/org/futo/inputmethod/event/combiners/vietnamese/VNI.kt b/java/src/org/futo/inputmethod/event/combiners/vietnamese/VNI.kt
index e5ff613180..aee629ea53 100644
--- a/java/src/org/futo/inputmethod/event/combiners/vietnamese/VNI.kt
+++ b/java/src/org/futo/inputmethod/event/combiners/vietnamese/VNI.kt
@@ -29,30 +29,30 @@ object VNI {
             //if (ch.isAsciiDigit()) modifierIndices[ch.digitToInt()].add(index)
             if (ch.isLetter()) hasLetters = true
 
-                // update firstModifierIndex
-                if (ch.isDigit() && !modifierExists[ch.digitToInt()])
-                    modifierExists[ch.digitToInt()] = true
+            // update firstModifierIndex
+            if (ch.isDigit() && !modifierExists[ch.digitToInt()])
+                modifierExists[ch.digitToInt()] = true
 
-                    if (!startedVowel && Common.CONSONANTS.contains(ch)) lowercaseInitial.append(ch)
+            if (!startedVowel && Common.CONSONANTS.contains(ch)) lowercaseInitial.append(ch)
 
-                        if (!startedFinal && Common.VOWELS.contains(ch)) {
-                            if (!startedVowel) startedVowel = true
-                                lowercaseVowel.append(ch)
-                        }
+            if (!startedFinal && Common.VOWELS.contains(ch)) {
+                if (!startedVowel) startedVowel = true
+                lowercaseVowel.append(ch)
+            }
 
-                        if (startedVowel && Common.CONSONANTS.contains(ch))
-                            startedFinal = true
+            if (startedVowel && Common.CONSONANTS.contains(ch))
+                startedFinal = true
 
-                            when (ch) {
-                                '1', '2', '3', '4', '5' -> tone = TONES[ch]!!
-                            }
+            when (ch) {
+                '1', '2', '3', '4', '5' -> tone = TONES[ch]!!
+            }
         }
 
         // apply correction to lowercaseInitial and lowercaseVowel
         var giQuCorrectionApplied = false
         if (lowercaseVowel.length > 1 && (lowercaseInitial.contentEquals("q") && lowercaseVowel[0] == 'u' ||
-            lowercaseInitial.contentEquals("g") && lowercaseVowel[0] == 'i'
-        )) {
+                    lowercaseInitial.contentEquals("g") && lowercaseVowel[0] == 'i'
+                    )) {
             giQuCorrectionApplied = true
             lowercaseInitial.append(lowercaseVowel[0])
             lowercaseVowel.deleteAt(0)
@@ -60,74 +60,81 @@ object VNI {
 
         if (!hasLetters) return input
 
-            // STAGE 2: remove numbers and add diacritics
-            val output = StringBuilder()
-
-            /** Tracks if an 'u' has been converted to 'ư'.
-             * This variable is checked to ensure that only the first 'u' is converted to 'ư' when there are multiple 'u's.
-             * For example, "uou7" should output "ươu", not "ươư"; "uu7" should output "ưu", not "ưư".*/
-            var uHornOutputted = false
-
-            for ((index, ch) in lowercaseInput.withIndex()) {
-                when (ch) {
-                    // handle numbers
-                    '1', '2', '3', '4', '5', '6', '7', '8', '9', '0' -> continue
-
-                    // handle modifiable characters
-                    'a' -> {
-                        if (modifierExists[8]) {
-                            output.append(Common.BREVE_MAP[input[index]])
-                            continue
-                        }
-
-                        if (modifierExists[6]) {
-                            output.append(Common.CIRCUMFLEX_MAP[input[index]])
-                            continue
-                        }
+        // STAGE 2: remove numbers and add diacritics
+        val output = StringBuilder()
+
+        /** Tracks if an 'u' has been converted to 'ư'.
+         * This variable is checked to ensure that only the first 'u' is converted to 'ư' when there are multiple 'u's.
+         * For example, "uou7" should output "ươu", not "ươư"; "uu7" should output "ưu", not "ưư".*/
+        var uHornOutputted = false
+
+        for ((index, ch) in lowercaseInput.withIndex()) {
+            when (ch) {
+                // handle numbers
+                '1', '2', '3', '4', '5', '6', '7', '8', '9', '0' -> continue
+
+                // handle modifiable characters
+                'a' -> {
+                    if (modifierExists[8]) {
+                        output.append(Common.BREVE_MAP[input[index]])
+                        continue
                     }
-                    'd' -> if (modifierExists[9]) {
-                        output.append(Common.STROKE_MAP[input[index]])
+
+                    if (modifierExists[6]) {
+                        output.append(Common.CIRCUMFLEX_MAP[input[index]])
                         continue
                     }
-                    'e', 'o' -> {
-                        if (modifierExists[6]) {
-                            output.append(Common.CIRCUMFLEX_MAP[input[index]])
-                            continue
-                        }
-
-                        if (ch == 'o' && modifierExists[7] &&
-                            !(output.length != 0 && lowercaseVowel.contentEquals("ou") && !startedFinal)) {
-                            output.append(Common.HORN_MAP[input[index]])
-                            continue
-                            }
+                }
+                'd' -> if (modifierExists[9]) {
+                    output.append(Common.STROKE_MAP[input[index]])
+                    continue
+                }
+                'e', 'o' -> {
+                    if (modifierExists[6]) {
+                        output.append(Common.CIRCUMFLEX_MAP[input[index]])
+                        continue
                     }
 
-                    'u' -> if (modifierExists[7] &&
-                    !uHornOutputted &&
-                    !(output.getOrNull(0)?.lowercaseChar() == 'q' && output.length == 1) &&
-                    !(output.length != 0 && lowercaseVowel.contentEquals("uo") && !startedFinal)) {
+                    if (ch == 'o' && modifierExists[7] &&
+                        !(output.length != 0 && lowercaseVowel.contentEquals("ou") && !startedFinal)) {
                         output.append(Common.HORN_MAP[input[index]])
-                        uHornOutputted = true
                         continue
                     }
                 }
 
-                //default behavior: output the char in input
-                output.append(input[index])
+                'u' -> if (modifierExists[7] &&
+                    !uHornOutputted &&
+                    !(output.getOrNull(0)?.lowercaseChar() == 'q' && output.length == 1) &&
+                    !(output.length != 0 && lowercaseVowel.contentEquals("uo") && !startedFinal)) {
+                    output.append(Common.HORN_MAP[input[index]])
+                    uHornOutputted = true
+                    continue
+                }
             }
 
-            // STAGE 3: add tone mark
-            if (tone == null) return output.toString()
+            //default behavior: output the char in input
+            output.append(input[index])
+        }
 
-                //edge case for gi5a > gịa
-                if (lowercaseInput == "gi5a") {
-                    output[1] = tone.map[output[1]] ?: output[1]
-                    return output.toString()
-                }
+        // STAGE 3: add tone mark
+        if (tone == null) return output.toString()
+
+        //edge case for gi5a > gịa
+        if (lowercaseInput == "gi5a") {
+            output[1] = tone.map[output[1]] ?: output[1]
+            return output.toString()
+        }
+
+        // handle errors
+        if (lowercaseVowel.isEmpty() || lowercaseInitial.length + lowercaseVowel.length - 1 >= output.length)
+            return output.toString()
 
-                val toneMarkPosition = Common.getToneMarkPosition(output, lowercaseInitial.length, lowercaseVowel.length)
-                output[toneMarkPosition] = tone.map[output[toneMarkPosition]] ?: output[toneMarkPosition]
+        val toneMarkPosition = Common.getToneMarkPosition(output, lowercaseInitial.length, lowercaseVowel.length)
+        // avoid index out of bounds error
+        if (toneMarkPosition !in 0..<output.length)
+            return output.toString()
+        output[toneMarkPosition] = tone.map[output[toneMarkPosition]] ?: output[toneMarkPosition]
 
-                return output.toString()
+        return output.toString()
     }
 }
diff --git a/java/src/org/futo/inputmethod/event/combiners/vietnamese/VNICombiner.kt b/java/src/org/futo/inputmethod/event/combiners/vietnamese/VNICombiner.kt
index 973e68d202..d0d594268d 100644
--- a/java/src/org/futo/inputmethod/event/combiners/vietnamese/VNICombiner.kt
+++ b/java/src/org/futo/inputmethod/event/combiners/vietnamese/VNICombiner.kt
@@ -50,7 +50,11 @@ class VNICombiner: Combiner {
     }
 
     override fun getCombiningStateFeedback(): CharSequence? =
-        VNI.VNIToVietnamese(buffer.toString())
+        try{
+            VNI.VNIToVietnamese(buffer.toString())
+        } catch(e: Exception) {
+            buffer
+        }
 
     override fun reset() {
         buffer.clear()
diff --git a/java/src/org/futo/inputmethod/event/combiners/vietnamese/VietTelexCombiner.kt b/java/src/org/futo/inputmethod/event/combiners/vietnamese/VietTelexCombiner.kt
index 59841c534b..7d9fb66aed 100644
--- a/java/src/org/futo/inputmethod/event/combiners/vietnamese/VietTelexCombiner.kt
+++ b/java/src/org/futo/inputmethod/event/combiners/vietnamese/VietTelexCombiner.kt
@@ -35,7 +35,11 @@ class VietTelexCombiner: Combiner {
     }
 
     override fun getCombiningStateFeedback(): CharSequence? =
-        Telex.telexToVietnamese(buffer.toString())
+        try {
+            Telex.telexToVietnamese(buffer.toString())
+        } catch (e: Exception) {
+            buffer
+        }
 
     override fun reset() {
         buffer.clear()