From 07dd6a9333ed1a74c7fa7da632a85547637e6d90 Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Mon, 21 Jul 2025 15:01:10 +0100 Subject: [PATCH 01/21] More math --- Compose | 72 ++++++++++++++++++++++++++++++++++++++-- Compose.md | 16 ++++----- src/xcompose/__init__.py | 5 +-- 3 files changed, 81 insertions(+), 12 deletions(-) diff --git a/Compose b/Compose index 2ee3347..9ac9528 100644 --- a/Compose +++ b/Compose @@ -35,7 +35,7 @@ include "%L" : " " U200A # HAIR SPACE : " " U200A # HAIR SPACE <0> : "​" U200B # ZERO WIDTH SPACE - : " " U205F # MEDIUM MATHEMATICAL SPACE + : " " U205F # MEDIUM MATHEMATICAL SPACE : " " U3000 # IDEOGRAPHIC SPACE : " " U202F # NARROW NO-BREAK SPACE : "␣" U2423 # OPEN BOX @@ -2636,6 +2636,7 @@ include "%L" <7> : "𜳷" U1CCF7 # OUTLINED DIGIT SEVEN <8> : "𜳸" U1CCF8 # OUTLINED DIGIT EIGHT <9> : "𜳹" U1CCF9 # OUTLINED DIGIT NINE + : "⨟" U2A1F # Z NOTATION SCHEMA COMPOSITION : "Ꝺ" UA779 # LATIN CAPITAL LETTER INSULAR D : "Ꝼ" UA77B # LATIN CAPITAL LETTER INSULAR F @@ -14863,6 +14864,7 @@ include "%L" : "Ɑ" U2C6D # LATIN CAPITAL LETTER ALPHA (alternatively AH) : "Ꞵ" UA7B4 # LATIN CAPITAL LETTER BETA (alternatively BH) : "Ɣ" U0194 # LATIN CAPITAL LETTER GAMMA (alternatively GH) + : "Ɛ" U0190 # LATIN CAPITAL LETTER OPEN EꞫ (alternatively EH) : "Ɩ" U0196 # LATIN CAPITAL LETTER IOTA : "Ꟛ" UA7DA # LATIN CAPITAL LETTER LAMBDA : "Ʊ" U01B1 # LATIN CAPITAL LETTER UPSILON (alternatively UH) @@ -14874,6 +14876,7 @@ include "%L" : "ꞵ" UA7B5 # LATIN SMALL LETTER BETA : "ɣ" U0263 # LATIN SMALL LETTER GAMMA (alternatively gh) : "ẟ" U1E9F # LATIN SMALL LETTER DELTA + : "ɛ" U025B # LATIN SMALL LETTER OPEN E (alternatively eh) : "ɩ" U0269 # LATIN SMALL LETTER IOTA : "ꟛ" UA7DB # LATIN SMALL LETTER LAMBDA : "ʊ" U028A # LATIN SMALL LETTER UPSILON (alternatively uh) @@ -23915,12 +23918,15 @@ include "Logograms" : "⊥" U22A5 # UP TACK : "∧" U2227 # LOGICAL AND : "∨" U2228 # LOGICAL OR + : "⩑" U2A51 # LOGICAL AND WITH DOT ABOVE + : "⩒" U2A52 # LOGICAL OR WITH DOT ABOVE : "⌐" U2310 # REVERSED NOT SIGN (conflicts with ¬, use -,) : "⌙" U2319 # TURNED NOT SIGN : "⫬" U2AEC # DOUBLE STROKE NOT SIGN : "⫭" U2AED # REVERSED DOUBLE STROKE NOT SIGN : "∣" U2223 # DIVIDES : "∤" U2224 # DOES NOT DIVIDE + : "⫮" U2AEE # DOES NOT DIVIDE WITH REVERSED NEGATION SLASH : "∷" U2237 # PROPORTION (AS) : "⋮" U22EE # VERTICAL ELLIPSIS : "⋯" U22EF # MIDLINE HORIZONTAL ELLIPSIS (not ideal but -. is taken by ė̄) @@ -23929,6 +23935,7 @@ include "Logograms" : "∻" U223B # HOMOTHETIC : "∻" U223B # HOMOTHETIC : "≃" U2243 # ASYMPTOTICALLY EQUAL TO + : "≂" U2242 # MINUS TILDE : "≄" U2244 # NOT ASYMPTOTICALLY EQUAL TO : "≅" U2245 # APPROXIMATELY EQUAL TO (ISOMORPHIC) : "≁" U2241 # NOT TILDE @@ -23938,7 +23945,10 @@ include "Logograms" : "≉" U2249 # NOT ALMOST EQUAL TO <3> : "≋" U224B # TRIPLE TILDE : "≔" U2254 # COLON EQUALS + : "⧴" U29F4 # RULE-DELAYED + : "⩳" U2A73 # EQUALS SIGN ABOVE TILDE OPERATOR <2> : "⩴" U2A74 # DOUBLE COLON EQUAL + : "⩷" U2A77 # EQUALS SIGN WITH TWO DOTS ABOVE AND TWO DOTS BELOW : "≕" U2255 # EQUALS COLON : "≛" U225B # STAR EQUALS : "≜" U225C # DELTA EQUAL TO @@ -23952,6 +23962,7 @@ include "Logograms" : "⩱" U2A71 # EQUALS SIGN ABOVE PLUS SIGN <8> <8> : "⯹" U2BF9 # EQUALS SIGN WITH INFINITY BELOW : "⩯" U2A6F # ALMOST EQUAL TO WITH CIRCUMFLEX ACCENT + <4> : "⩸" U2A78 # EQUIVALENT WITH FOUR DOTS ABOVE <2> : "⩵" U2A75 # TWO CONSECUTIVE EQUALS SIGNS <3> : "⩶" U2A76 # THREE CONSECUTIVE EQUALS SIGNS <2> : "≪" U226A # MUCH LESS-THAN @@ -23983,10 +23994,13 @@ include "Logograms" : "≗" U2257 # RING EQUAL TO : "≙" U2259 # ESTIMATES : "≚" U225A # EQUIANGULAR TO + : "⩮" U2A6E # EQUALS WITH ASTERISK : "≦" U2266 # LESS-THAN OVER EQUAL TO : "≧" U2267 # GREATER-THAN OVER EQUAL TO : "∸" U2238 # DOT MINUS + : "⨩" U2A29 # MINUS SIGN WITH COMMA ABOVE : "∹" U2239 # EXCESS (really should swap with ÷ but that's much more useful) + : "⨣" U2A23 # PLUS SIGN WITH CIRCUMFLEX ACCENT ABOVE : "⩪" U2A6A # TILDE OPERATOR WITH DOT ABOVE : "⪪" U2AAA # SMALLER THAN : "⪬" U2AAC # SMALLER THAN OR EQUAL TO @@ -23994,6 +24008,8 @@ include "Logograms" : "⪭" U2AAD # LARGER THAN OR EQUAL TO : "≢" U2262 # NOT IDENTICAL TO + : "⩧" U2A67 # IDENTICAL WITH DOT ABOVE + : "⩭" U2A6D # CONGRUENT WITH DOT ABOVE : "≨" U2268 # LESS-THAN BUT NOT EQUAL TO : "⋦" U22E6 # LESS-THAN BUT NOT EQUIVALENT TO : "⪉" U2A89 # LESS-THAN AND NOT APPROXIMATE @@ -24018,7 +24034,6 @@ include "Logograms" : "≊" U224A # ALMOST EQUAL OR EQUAL TO : "⩬" U2A6C # SIMILAR MINUS SIMILAR : "⩰" U2A70 # APPROXIMATELY EQUAL OR EQUAL TO - : "⩳" U2A73 # EQUALS SIGN ABOVE TILDE OPERATOR : "⋜" U22DC # EQUAL TO OR LESS-THAN : "⋝" U22DD # EQUAL TO OR GREATER-THAN : "≶" U2276 # LESS-THAN OR GREATER-THAN @@ -24027,6 +24042,14 @@ include "Logograms" : "⋛" U22DB # GREATER-THAN EQUAL TO OR LESS-THAN : "⥶" U2976 # LESS-THAN ABOVE LEFTWARDS ARROW : "⥸" U2978 # GREATER-THAN ABOVE RIGHTWARDS ARROW + : "⩻" U2A7B # LESS-THAN WITH QUESTION MARK ABOVE + : "⩼" U2A7C # GREATER-THAN WITH QUESTION MARK ABOVE + : "⩿" U2A7F # LESS-THAN OR SLANTED EQUAL TO WITH DOT INSIDE + : "⪁" U2A81 # LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE + : "⪃" U2A83 # LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE RIGHT + : "⪀" U2A80 # GREATER-THAN OR SLANTED EQUAL TO WITH DOT INSIDE + : "⪂" U2A82 # GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE + : "⪄" U2A84 # GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE LEFT : "⪅" U2A85 # LESS-THAN OR APPROXIMATE : "⪆" U2A86 # GREATER-THAN OR APPROXIMATE : "⪋" U2A8B # LESS-THAN ABOVE DOUBLE-LINE EQUAL ABOVE GREATER-THAN @@ -24092,6 +24115,31 @@ include "Logograms" : "⟎" U27CE # SQUARED LOGICAL AND : "⟏" U27CF # SQUARED LOGICAL OR : "⊻" U22BB # XOR (alternatively xor) + : "⨭" U2A2D # PLUS SIGN IN LEFT HALF CIRCLE + : "⨮" U2A2E # PLUS SIGN IN RIGHT HALF CIRCLE + : "⨴" U2A34 # MULTIPLICATION SIGN IN LEFT HALF CIRCLE + : "⨵" U2A35 # MULTIPLICATION SIGN IN RIGHT HALF CIRCLE + + : "∲" U2232 # CLOCKWISE CONTOUR INTEGRAL + : "∳" U2233 # ANTICLOCKWISE CONTOUR INTEGRAL + : "∱" U2231 # CLOCKWISE INTEGRAL + : "⨑" U2A11 # ANTICLOCKWISE INTEGRATION + : "⨗" U2A17 # INTEGRAL WITH LEFTWARDS ARROW WITH HOOK + : "⨍" U2A0D # FINITE PART INTEGRAL + : "⨘" U2A18 # INTEGRAL WITH TIMES SIGN + : "⨎" U2A0E # INTEGRAL WITH DOUBLE STROKE + : "⨏" U2A0F # INTEGRAL AVERAGE WITH SLASH + : "⨖" U2A16 # QUATERNION INTEGRAL OPERATOR (conflicts with ⌷, use ][) + : "⨙" U2A19 # INTEGRAL WITH INTERSECTION + : "⨚" U2A1A # INTEGRAL WITH UNION + : "⨕" U2A15 # INTEGRAL AROUND A POINT OPERATOR (use oint for ∮) + : "⨐" U2A10 # CIRCULATION FUNCTION + : "⨒" U2A12 # LINE INTEGRATION WITH RECTANGULAR PATH AROUND POLE + : "⨓" U2A13 # LINE INTEGRATION WITH SEMICIRCULAR PATH AROUND POLE + : "⨔" U2A14 # LINE INTEGRATION NOT INCLUDING THE POLE + : "⨛" U2A1B # INTEGRAL WITH OVERBAR + : "⨜" U2A1C # INTEGRAL WITH UNDERBAR + : "⨊" U2A0A # MODULO TWO SUM # LaTeX-inspired names @@ -24135,6 +24183,7 @@ include "Logograms"

: "∩" U2229 # SET INTERSECTION (conflicts with ǎ, use va) (use hat for 👒, bbcap for 🧢)

: "⋒" U22D2 # DOUBLE INTERSECTION

: "⋂" U22C2 # N-ARY INTERSECTION + <2>

: "⩋" U2A4B # INTERSECTION BESIDE AND JOINED WITH INTERSECTION : "℄" U2104 # CENTRE LINE SYMBOL

: "∁" U2201 # COMPLEMENT (conflicts with ǒ, use vo)

: "∐" U2210 # N-ARY COPRODUCT (conflicts with ǒ, use vo) @@ -24145,6 +24194,7 @@ include "Logograms"

: "∪" U222A # SET UNION (conflicts with ǔ, use vu)

: "⋓" U22D3 # DOUBLE UNION

: "⋃" U22C3 # N-ARY UNION + <2>

: "⩊" U2A4A # UNION BESIDE AND JOINED WITH UNION : "⌀" U2300 # DIAMETER SIGN (overrides di) : "⋄" U22C4 # DIAMOND OPERATOR (conflicts with ⌀, use diameter) (use gem for 💎) : "⫾" U2AFE # WHITE VERTICAL BAR (conflicts with ⌀, use diameter) (DIJKSTRA'S CHOICE) @@ -24159,6 +24209,7 @@ include "Logograms" : "∫" U222B # INTEGRAL : "∈" U2208 # ELEMENT OF (alternatively isin) : "∈" U2208 # ELEMENT OF + : "⨝" U2A1D # JOIN : "∧" U2227 # LOGICAL AND (alternatively /\) : "⩓" U2A53 # DOUBLE LOGICAL AND : "⋀" U22C0 # N-ARY LOGICAL AND @@ -24259,6 +24310,10 @@ include "Logograms" : "≀" U2240 # WREATH PRODUCT : "⊻" U22BB # XOR (conflicts with ¤, use ox) +

: "⩇" U2A47 # INTERSECTION ABOVE UNION +

: "⩉" U2A49 # INTERSECTION ABOVE BAR ABOVE UNION +

: "⩆" U2A46 # UNION ABOVE INTERSECTION +

: "⩈" U2A48 # UNION ABOVE BAR ABOVE INTERSECTION

: "⫓" U2AD3 # SUBSET ABOVE SUPERSET

: "⫔" U2AD4 # SUPERSET ABOVE SUBSET (use supsub for ⫗) : "⫕" U2AD5 # SUBSET ABOVE SUBSET @@ -26253,6 +26308,9 @@ include "Logograms" : "𜲰𜲱" U1CCB0 U1CCB1 # FROWNING FACE (BOTTOM)

: "𜳆𜳇" U1CCC6 U1CCC7 # CHESS BISHOP (TOP)

: "𜳈𜳉" U1CCC8 U1CCC9 # CHESS BISHOP (BOTTOM) + : "⌠" U2320 # TOP HALF INTEGRAL + : "⎮" U23AE # INTEGRAL EXTENSION + : "⌡" U2321 # BOTTOM HALF INTEGRAL : "𜲺𜲻" U1CCBA U1CCBB # CHESS KING (TOP) : "𜲼𜲽" U1CCBC U1CCBD # CHESS KING (BOTTOM) : "𜳊𜳋" U1CCCA U1CCCB # CHESS KNIGHT (TOP) @@ -27321,6 +27379,16 @@ include "Logograms" : "⯮" U2BEE # RIGHTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS : "⯯" U2BEF # DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS +# Mathematical operators that best fit here + + : "⊶" U22B6 # ORIGINAL OF + : "⊷" U22B7 # IMAGE OF + : "⊸" U22B8 # MULTIMAP (alternatively just --o) + : "⟜" U27DC # LEFT MULTIMAP + : "⧟" U29DF # DOUBLE-ENDED MULTIMAP + : "⫯" U2AEF # VERTICAL LINE WITH CIRCLE ABOVE + : "⫰" U2AF0 # VERTICAL LINE WITH CIRCLE BELOW + ########################## # Box drawing characters # ########################## diff --git a/Compose.md b/Compose.md index c42d01f..dcdbd79 100644 --- a/Compose.md +++ b/Compose.md @@ -19,16 +19,16 @@ The file aims for memorability and consistency. As a result, a small number of t * **Combining diacritics** (174): ń n̊ n̫ m͡n Zǎ̺̣͆̚l⃪ğ̶̍ö̱̰̥̂̃ etc * **Control characters** (271): RLI PDI ZWJ VS16 etc -### Latin script characters (2734) +### Latin script characters (2735) * **International Phonetic Alphabet** (128): ⫽ˈɹɛ.dɪt⫽ [aɪ̯ pʰiː eɪ̯] etc * **Latin script letters** (513): Ƿ Ȝ ␢ ȵ etc -* **Mathematical alphanumerics** (792): 𝐀 𝐴 𝑨 A 𝗔 𝘈 𝘼 𝒜 𝓐 𝔄 𝕬 𝙰 𝔸 𜳖 etc +* **Mathematical alphanumerics** (793): 𝐀 𝐴 𝑨 A 𝗔 𝘈 𝘼 𝒜 𝓐 𝔄 𝕬 𝙰 𝔸 𜳖 etc * **Enclosed alphanumerics** (288): ⓼ 🅛 🆛 ⒜ ⒓ etc * **Superscripts and subscripts** (309): ᵃ ᴬ ₐ ᴀ ◌ͣ etc * **Multigraphs and ligatures** (389): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (32885) +### Non-Latin script characters (32887) * **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (1989): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc * **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3191): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc @@ -37,7 +37,7 @@ The file aims for memorability and consistency. As a result, a small number of t * **Cyrillic**, Glagolitic & Old Permic (565): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc * **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1308): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc * **Georgian** & Caucasian Albanian (207): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc -* **Greek**, Coptic, Cypriot, Elbasan, Gothic, Linear B, Lycian, Todhri & Vithkuqi (1132): Ρέντιτ, Ⲣⲉⲇⲇⲓⲧ, ⁧𐠤𐠯𐠮⁩, 𐔙𐔇𐔄𐔍𐔝, 𐍂𐌴𐌳𐌳𐌹𐍄, 𐀩𐀇𐀵, 𐊕𐊁𐊅𐊅𐊆𐊗, 𐗝𐗉𐗆𐗒𐗢, 𐖊𐖞𐖜𐖜𐖥𐖵 etc +* **Greek**, Coptic, Cypriot, Elbasan, Gothic, Linear B, Lycian, Todhri & Vithkuqi (1138): Ρέντιτ, Ⲣⲉⲇⲇⲓⲧ, ⁧𐠤𐠯𐠮⁩, 𐔙𐔇𐔄𐔍𐔝, 𐍂𐌴𐌳𐌳𐌹𐍄, 𐀩𐀇𐀵, 𐊕𐊁𐊅𐊅𐊆𐊗, 𐗝𐗉𐗆𐗒𐗢, 𐖊𐖞𐖜𐖜𐖥𐖵 etc * **Hebrew**, Aramaic, Elymaic, Lydian, Palmyrene, Phoenician, Samaritan & Ugaritic (350): ⁧רֶדִיט⁩, ⁧𐡓𐡃𐡉𐡕⁩, ⁧𐿳𐿣𐿵⁩, ⁧𐤭𐤤𐤣𐤣𐤦𐤯⁩, ⁧𐡴𐡣𐡩𐡶⁩, ⁧𐤓𐤃𐤕⁩, ⁧ࠓࠝࠃࠪࠕ⁩, 𐎗𐎄𐎚 etc * **Japanese** (mostly kana) (866): 「レディット」 etc * **Kawi Scripts**: Balinese, Batak, Baybayin, Buginese, Buhid, Hanunoo, Javanese, Old Kawi, Makasar, Rejang, Sundanese & Tagbanwa (485): ᬭᬾᬤᬶᬢ᭄, ᯒᯧᯑᯪᯖ᯲, ᜍᜒᜇᜒᜆ᜔, ᨑᨙᨉᨗ, ᝍᝒᝇᝒ, ᜭᜲᜧᜲᜦ᜴, ꦫꦺꦢꦶꦠ, 𑼬𑼾𑼤𑼶𑼢, 𑻭𑻵𑻧𑻳, ꤽꥉꤴꥇꤳ, ᮛᮨᮓᮤᮒ᮪, ᝮᝲᝧᝲ, etc @@ -50,10 +50,10 @@ The file aims for memorability and consistency. As a result, a small number of t * **Logograms/undeciphered**: Anatolian Hieroglyphs, Cuneiform, Cypro-Minoan, Egyptian Hieroglyphs, Linear A&B (4691): 𔐅 𒆛 𒿌 𓀁 𐙞 etc * Language names (150): 🇯🇵 日本語 🇮🇷 ⁧فارس⁩ 🇻🇳 Tiếng Việt etc -### Symbols (4874) +### Symbols (4933) * **Emoji** (1515): 😉 👌🏾 🇳🇿 🫡 👉🏼 💔 🤣 🤦🏽‍♀️ 🏳️‍⚧️ ✨ etc -* **Sprites** (147): 🗫 🯅 ㋡ etc -* **Math and science** (402): ρ(∂v⃗/∂t + (v⃗·∇)v) ∫πeⁱᶿ dθ etc. +* **Sprites** (150): 🗫 🯅 ㋡ etc +* **Math and science** (451): ρ(∂v⃗/∂t + (v⃗·∇)v) ∫πeⁱᶿ dθ etc. * **APL** (107): ⍟ ⍫ ⍉ etc * **Technical** (106): ⏻ ⎙ ⌘ etc * **Numerals** (338): 𝍸𝍷 𝍵 Ⅻ ↁ etc @@ -62,6 +62,6 @@ The file aims for memorability and consistency. As a result, a small number of t * **Astrology** (81): ♈ 🐉 🌒 ☿ ♇ etc * **I Ching** (166): ䷇ ☰☷☲☵ etc * **Hieroglyphs** (300): 𓁖 𓁹 𓃠 etc -* **Arrows** (312): ↦ ↺ ⇄ ⇼ ⏎ ⇬ etc +* **Arrows** (319): ↦ ↺ ⇄ ⇼ ⏎ ⇬ etc * **Geometric shapes** (240): ⬛ ⬚ 🟣 ◐ ◭ ◈ ✶ etc * **Box drawing** (665): ╞╦╕ etc diff --git a/src/xcompose/__init__.py b/src/xcompose/__init__.py index eea019c..57af303 100644 --- a/src/xcompose/__init__.py +++ b/src/xcompose/__init__.py @@ -21,7 +21,7 @@ KEYSYMS: set[str] = set() -def read_keysms(file: Path | None = None) -> None: +def read_keysyms(file: Path | None = None) -> None: """Populate keysym mappings. If the file is unspecified, then default to then value of $KEYSYMDEF, then to KEYSYM_DEF_DEFAULT_PATH, then to the packaged resource.""" @@ -211,6 +211,7 @@ def add( comment: str | None = None, ): """Utility function to simplify calling add independently.""" + if not KEYSYMS: read_keysyms() add_fn( args=argparse.Namespace( value=value, @@ -528,7 +529,7 @@ def main() -> None: if args.modifier_key == ANY_KEY: args.modifier_key = None - read_keysms(args.keysymdef) + read_keysyms(args.keysymdef) args.func(args) From 816654777f1427451c9025518f533ccdd6d0c613 Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Tue, 22 Jul 2025 14:32:09 +0100 Subject: [PATCH 02/21] More math --- Compose | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- Compose.md | 8 +++--- 2 files changed, 76 insertions(+), 6 deletions(-) diff --git a/Compose b/Compose index 9ac9528..14ead37 100644 --- a/Compose +++ b/Compose @@ -2636,6 +2636,7 @@ include "%L" <7> : "𜳷" U1CCF7 # OUTLINED DIGIT SEVEN <8> : "𜳸" U1CCF8 # OUTLINED DIGIT EIGHT <9> : "𜳹" U1CCF9 # OUTLINED DIGIT NINE + : "⦂" U2982 # Z NOTATION TYPE COLON : "⨟" U2A1F # Z NOTATION SCHEMA COMPOSITION : "Ꝺ" UA779 # LATIN CAPITAL LETTER INSULAR D @@ -23978,6 +23979,7 @@ include "Logograms" : "⋈" U22C8 # BOWTIE (JOIN) : "⋉" U22C9 # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT : "⋊" U22CA # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT + : "⧖" U29D6 # WHITE HOURGLASS : "⋖" U22D6 # LESS-THAN WITH DOT : "⋗" U22D7 # GREATER-THAN WITH DOT : "⊸" U22B8 # MULTIMAP @@ -23988,6 +23990,11 @@ include "Logograms" : "⪕" U2A95 # SLANTED EQUAL TO OR LESS-THAN (conflicts with useless \) : "⪖" U2A96 # SLANTED EQUAL TO OR GREATER-THAN : "⪥" U2AA5 # GREATER-THAN BESIDE LESS-THAN (conflicts with ⋄, use diamond) + : "⪤" U2AA4 # GREATER-THAN OVERLAPPING LESS-THAN + : "⪦" U2AA6 # LESS-THAN CLOSED BY CURVE + : "⪧" U2AA7 # GREATER-THAN CLOSED BY CURVE + : "⪨" U2AA8 # LESS-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL + : "⪩" U2AA9 # GREATER-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL : "≐" U2250 # APPROACHES THE LIMIT : "≑" U2251 # GEOMETRICALLY EQUAL TO @@ -24004,8 +24011,14 @@ include "Logograms" : "⩪" U2A6A # TILDE OPERATOR WITH DOT ABOVE : "⪪" U2AAA # SMALLER THAN : "⪬" U2AAC # SMALLER THAN OR EQUAL TO + : "⩹" U2A79 # LESS-THAN WITH CIRCLE INSIDE + : "⦓" U2993 # LEFT ARC LESS-THAN BRACKET + : "⦖" U2996 # DOUBLE RIGHT ARC LESS-THAN BRACKET : "⪫" U2AAB # LARGER THAN : "⪭" U2AAD # LARGER THAN OR EQUAL TO + : "⩺" U2A7A # GREATER-THAN WITH CIRCLE INSIDE + : "⦔" U2994 # RIGHT ARC GREATER-THAN BRACKET + : "⦕" U2995 # DOUBLE LEFT ARC GREATER-THAN BRACKET : "≢" U2262 # NOT IDENTICAL TO : "⩧" U2A67 # IDENTICAL WITH DOT ABOVE @@ -24120,6 +24133,24 @@ include "Logograms" : "⨴" U2A34 # MULTIPLICATION SIGN IN LEFT HALF CIRCLE : "⨵" U2A35 # MULTIPLICATION SIGN IN RIGHT HALF CIRCLE +

: "⩀" U2A40 # INTERSECTION WITH DOT +

: "⩃" U2A43 # INTERSECTION WITH OVERBAR +

: "⩄" U2A44 # INTERSECTION WITH LOGICAL AND +

: "⩄" U2A44 # INTERSECTION WITH LOGICAL AND +

: "⊌" U228C # MULTISET +

: "⊍" U228D # MULTISET MULTIPLICATION +

: "⊎" U228E # MULTISET UNION +

: "⩁" U2A41 # UNION WITH MINUS SIGN +

: "⩂" U2A42 # UNION WITH OVERBAR +

: "⩅" U2A45 # UNION WITH LOGICAL OR +

: "⩅" U2A45 # UNION WITH LOGICAL OR + : "⋵" U22F5 # ELEMENT OF WITH DOT ABOVE + : "⋶" U22F6 # ELEMENT OF WITH OVERBAR + : "⋲" U22F2 # ELEMENT OF WITH LONG HORIZONTAL STROKE + : "⋸" U22F8 # ELEMENT OF WITH UNDERBAR + : "⋹" U22F9 # ELEMENT OF WITH TWO HORIZONTAL STROKES + : "⋳" U22F3 # ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE + : "∲" U2232 # CLOCKWISE CONTOUR INTEGRAL : "∳" U2233 # ANTICLOCKWISE CONTOUR INTEGRAL : "∱" U2231 # CLOCKWISE INTEGRAL @@ -24178,6 +24209,7 @@ include "Logograms" : "∠" U2220 # ANGLE : "∶" U2236 # RATIO (AS) : "≬" U226C # BETWEEN (conflicts with ĕ, use ue) + : "⋈" U22C8 # BOWTIE (JOIN) (conflicts with ŏ, use uo) (alternatively |X|)

: "≏" U224F # DIFFERENCE BETWEEN (conflicts with ŭ, use uu)

: "≎" U224E # GEOMETRICALLY EQUIVALENT TO

: "∩" U2229 # SET INTERSECTION (conflicts with ǎ, use va) (use hat for 👒, bbcap for 🧢) @@ -24199,9 +24231,15 @@ include "Logograms" : "⋄" U22C4 # DIAMOND OPERATOR (conflicts with ⌀, use diameter) (use gem for 💎) : "⫾" U2AFE # WHITE VERTICAL BAR (conflicts with ⌀, use diameter) (DIJKSTRA'S CHOICE) : "⫿" U2AFF # N-ARY WHITE VERTICAL BAR (DIJKSTRA'S CHOICE) + : "⫙" U2AD9 # ELEMENT OF OPENING DOWNWARDS : "≍" U224D # EQUIVALENT TO (use =_ for ≡) : "∃" U2203 # THERE EXISTS + : "⧓" U29D3 # BLACK BOWTIE + : "⧗" U29D7 # BLACK HOURGLASS : "∀" U2200 # FOR ALL + : "⫸" U2AF8 # TRIPLE NESTED GREATER-THAN + : "⪢" U2AA2 # DOUBLE NESTED GREATER-THAN + : "⧖" U29D6 # WHITE HOURGLASS (use done for ⌛ and wait for ⏳) : "⇔" U21D4 # LEFT RIGHT DOUBLE ARROW : "⨌" U2A0C # QUADRUPLE INTEGRAL OPERATOR : "∭" U222D # TRIPLE INTEGRAL @@ -24213,9 +24251,16 @@ include "Logograms" : "∧" U2227 # LOGICAL AND (alternatively /\) : "⩓" U2A53 # DOUBLE LOGICAL AND : "⋀" U22C0 # N-ARY LOGICAL AND + : "∋" U220B # CONTAINS AS MEMBER (alternatively ni␣) + : "⧑" U29D1 # BOWTIE WITH LEFT HALF BLACK + : "⧔" U29D4 # TIMES WITH LEFT HALF BLACK + : "⫷" U2AF7 # TRIPLE NESTED LESS-THAN + : "⟌" U27CC # LONG DIVISION : "∨" U2228 # LOGICAL OR (alternatively \/) : "⩔" U2A54 # DOUBLE LOGICAL OR : "⋁" U22C1 # N-ARY LOGICAL OR + : "⪡" U2AA1 # DOUBLE NESTED LESS-THAN + : "⋉" U22C9 # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT (conflicts with useless <) (alternatively |x) : "∡" U2221 # MEASURED ANGLE (not ideal) : "⊼" U22BC # NAND : "≭" U226D # NOT EQUIVALENT TO (use /⎄=_ for ≢) @@ -24246,14 +24291,20 @@ include "Logograms" : "⨂" U2A02 # N-ARY CIRCLED TIMES OPERATOR

: "⅊" U214A # PROPERTY LINE

: "≺" U227A # PRECEDES (alternatively -< based shortcuts) +

: "⪻" U2ABB # DOUBLE PRECEDES

: "≼" U227C # PRECEDES OR EQUAL TO

: "≾" U227E # PRECEDES OR EQUIVALENT TO

: "∏" U220F # N-ARY PRODUCT

: "∝" U221D # PROPORTIONAL TO : "∎" U220E # END OF PROOF : "∟" U221F # RIGHT ANGLE + : "⧒" U29D2 # BOWTIE WITH RIGHT HALF BLACK + : "⧕" U29D5 # TIMES WITH RIGHT HALF BLACK + : "⋊" U22CA # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT (alternatively x|) : "∿" U223F # SINE WAVE : "⨋" U2A0B # SUMMATION WITH INTEGRAL + : "∊" U220A # SMALL ELEMENT OF (conflicts with ℠, use SM) + : "∍" U220D # SMALL CONTAINS AS MEMBER (conflicts with ℠, use SM)

: "∢" U2222 # SPHERICAL ANGLE

: "⊓" U2293 # SQUARE CAP

: "⩎" U2A4E # DOUBLE SQUARE INTERSECTION @@ -24283,6 +24334,7 @@ include "Logograms" : "⫋" U2ACB # SUBSET OF ABOVE NOT EQUAL TO : "⟈" U27C8 # REVERSE SOLIDUS PRECEDING SUBSET : "≻" U227B # SUCCEEDS (alternatively >- based shortcuts) + : "⪼" U2ABC # DOUBLE SUCCEEDS : "≽" U227D # SUCCEEDS OR EQUAL TO : "≿" U227F # SUCCEEDS OR EQUIVALENT TO : "∑" U2211 # N-ARY SUMMATION @@ -24305,7 +24357,10 @@ include "Logograms"

: "⟉" U27C9 # SUPERSET PRECEDING SOLIDUS : "×" U00D7 # MULTIPLICATION SIGN (alternatively xx) : "⨉" U2A09 # N-ARY TIMES OPERATOR + : "⧿" U29FF # MINY (conflicts with ™, use TM) +

: "⧾" U29FE # TINY : "⨃" U2A03 # N-ARY UNION OPERATOR WITH DOT +

: "⟒" U27D2 # ELEMENT OF OPENING UPWARDS

: "⨄" U2A04 # N-ARY UNION OPERATOR WITH PLUS : "≀" U2240 # WREATH PRODUCT : "⊻" U22BB # XOR (conflicts with ¤, use ox) @@ -24319,6 +24374,22 @@ include "Logograms" : "⫕" U2AD5 # SUBSET ABOVE SUBSET

: "⫖" U2AD6 # SUPERSET ABOVE SUPERSET +# Z notation (start with z) + + : "⦂" U2982 # Z NOTATION TYPE COLON + : "⦇" U2987 # Z NOTATION LEFT IMAGE BRACKET + : "⦈" U2988 # Z NOTATION RIGHT IMAGE BRACKET + : "⦉" U2989 # Z NOTATION LEFT BINDING BRACKET (conflicts with ž, use vz) + : "⦊" U298A # Z NOTATION RIGHT BINDING BRACKET + : "⋿" U22FF # Z NOTATION BAG MEMBERSHIP + : "⩤" U2A64 # Z NOTATION DOMAIN ANTIRESTRICTION +

: "⨠" U2A20 # Z NOTATION SCHEMA PIPING +

: "⨡" U2A21 # Z NOTATION SCHEMA PROJECTION +

: "⨾" U2A3E # Z NOTATION RELATIONAL COMPOSITION + : "⩥" U2A65 # Z NOTATION RANGE ANTIRESTRICTION +

: "⨟" U2A1F # Z NOTATION SCHEMA COMPOSITION +

: "⦁" U2981 # Z NOTATION SPOT + # Logic symbols (start with &) : "¬" U00AC # NOT SIGN (alternatively -,) @@ -24961,7 +25032,7 @@ include "Logograms"

: "✋" U270B # RAISED HAND (STOP) : "💪" U1F4AA # FLEXED BICEPS (STRONG) : "💦" U1F4A6 # SPLASHING SWEAT SYMBOL - : "🤏" U1F90F # PINCHING HAND (TINY) + : "🤏" U1F90F # PINCHING HAND (TINY) (use tplus for ⧾) : "👅" U1F445 # TONGUE : "🦷" U1F9B7 # TOOTH : "✍️" U270D UFE0F # WRITING HAND EMOJI @@ -29496,7 +29567,6 @@ include "Logograms" : "⌅" U2305 # PROJECTIVE : "⎊" U238A # CIRCLED TRIANGLE DOWN : "⎉" U2389 # CIRCLED HORIZONTAL BAR WITH NOTCH - : "⎄" U2384 # COMPOSITION SYMBOL # keysyms (useful for editing this file, though other \ shortcuts take precedence) diff --git a/Compose.md b/Compose.md index dcdbd79..69acb02 100644 --- a/Compose.md +++ b/Compose.md @@ -19,10 +19,10 @@ The file aims for memorability and consistency. As a result, a small number of t * **Combining diacritics** (174): ń n̊ n̫ m͡n Zǎ̺̣͆̚l⃪ğ̶̍ö̱̰̥̂̃ etc * **Control characters** (271): RLI PDI ZWJ VS16 etc -### Latin script characters (2735) +### Latin script characters (2736) * **International Phonetic Alphabet** (128): ⫽ˈɹɛ.dɪt⫽ [aɪ̯ pʰiː eɪ̯] etc * **Latin script letters** (513): Ƿ Ȝ ␢ ȵ etc -* **Mathematical alphanumerics** (793): 𝐀 𝐴 𝑨 A 𝗔 𝘈 𝘼 𝒜 𝓐 𝔄 𝕬 𝙰 𝔸 𜳖 etc +* **Mathematical alphanumerics** (794): 𝐀 𝐴 𝑨 A 𝗔 𝘈 𝘼 𝒜 𝓐 𝔄 𝕬 𝙰 𝔸 𜳖 etc * **Enclosed alphanumerics** (288): ⓼ 🅛 🆛 ⒜ ⒓ etc * **Superscripts and subscripts** (309): ᵃ ᴬ ₐ ᴀ ◌ͣ etc * **Multigraphs and ligatures** (389): ʣ ㏈ etc @@ -50,10 +50,10 @@ The file aims for memorability and consistency. As a result, a small number of t * **Logograms/undeciphered**: Anatolian Hieroglyphs, Cuneiform, Cypro-Minoan, Egyptian Hieroglyphs, Linear A&B (4691): 𔐅 𒆛 𒿌 𓀁 𐙞 etc * Language names (150): 🇯🇵 日本語 🇮🇷 ⁧فارس⁩ 🇻🇳 Tiếng Việt etc -### Symbols (4933) +### Symbols (4990) * **Emoji** (1515): 😉 👌🏾 🇳🇿 🫡 👉🏼 💔 🤣 🤦🏽‍♀️ 🏳️‍⚧️ ✨ etc * **Sprites** (150): 🗫 🯅 ㋡ etc -* **Math and science** (451): ρ(∂v⃗/∂t + (v⃗·∇)v) ∫πeⁱᶿ dθ etc. +* **Math and science** (508): ρ(∂v⃗/∂t + (v⃗·∇)v) ∫πeⁱᶿ dθ etc. * **APL** (107): ⍟ ⍫ ⍉ etc * **Technical** (106): ⏻ ⎙ ⌘ etc * **Numerals** (338): 𝍸𝍷 𝍵 Ⅻ ↁ etc From dbde70c5e0d67ddfcb62a2faadcf8d39080f04f8 Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Wed, 23 Jul 2025 11:37:29 +0100 Subject: [PATCH 03/21] Brackets --- Compose | 29 +++++++++++++++++++++++++++-- Compose.md | 8 ++++---- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/Compose b/Compose index 14ead37..09319b5 100644 --- a/Compose +++ b/Compose @@ -73,22 +73,43 @@ include "%L" : "⎹" U23B9 # RIGHT VERTICAL BOX LINE : "⟅" U27C5 # LEFT S-SHAPED BAG DELIMITER : "⟆" U27C6 # RIGHT S-SHAPED BAG DELIMITER + <0> : "⟦" U27E6 # MATHEMATICAL LEFT WHITE SQUARE BRACKET : "⟦" U27E6 # MATHEMATICAL LEFT WHITE SQUARE BRACKET + <0> : "⟧" U27E7 # MATHEMATICAL RIGHT WHITE SQUARE BRACKET : "⟧" U27E7 # MATHEMATICAL RIGHT WHITE SQUARE BRACKET : "⟨" U27E8 # MATHEMATICAL LEFT ANGLE BRACKET : "⟩" U27E9 # MATHEMATICAL RIGHT ANGLE BRACKET : "⟪" U27EA # MATHEMATICAL LEFT DOUBLE ANGLE BRACKET : "⟫" U27EB # MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET - : "⟬" U27EC # MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET - : "⟭" U27ED # MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET + <0> : "⟬" U27EC # MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET + <0> : "⟭" U27ED # MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET + : "⟮" U27EE # MATHEMATICAL LEFT FLATTENED PARENTHESIS + : "⟯" U27EF # MATHEMATICAL RIGHT FLATTENED PARENTHESIS + <0> : "⦃" U2983 # LEFT WHITE CURLY BRACKET : "⦃" U2983 # LEFT WHITE CURLY BRACKET + <0> : "⦄" U2984 # RIGHT WHITE CURLY BRACKET : "⦄" U2984 # RIGHT WHITE CURLY BRACKET + <0> : "⦅" U2985 # LEFT WHITE PARENTHESIS + <0> : "⦆" U2986 # RIGHT WHITE PARENTHESIS + : "⦍" U298D # LEFT SQUARE BRACKET WITH TICK IN TOP CORNER + : "⦎" U298E # RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER + : "⦏" U298F # LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER + : "⦐" U2990 # RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER + <1> : "⦗" U2997 # LEFT BLACK TORTOISE SHELL BRACKET + <1> : "⦘" U2998 # RIGHT BLACK TORTOISE SHELL BRACKET + : "⧼" U29FC # LEFT-POINTING CURVED ANGLE BRACKET + : "⧽" U29FD # RIGHT-POINTING CURVED ANGLE BRACKET : "⫽" U2AFD # DOUBLE SOLIDUS OPERATOR : "⫽" U2AFD # DOUBLE SOLIDUS OPERATOR : "⫽" U2AFD # DOUBLE SOLIDUS OPERATOR (conflicts with useless \) : "⸨" U2E28 # LEFT DOUBLE PARENTHESIS (conflicts with useless [) : "⸩" U2E29 # RIGHT DOUBLE PARENTHESIS (conflicts with useless ]) + : "⦋" U298B # LEFT SQUARE BRACKET WITH UNDERBAR + : "⦌" U298C # RIGHT SQUARE BRACKET WITH UNDERBAR + : "⦑" U2991 # LEFT ANGLE BRACKET WITH DOT + : "⦒" U2992 # RIGHT ANGLE BRACKET WITH DOT + ########### # Bullets # ########### @@ -23886,6 +23907,8 @@ include "Logograms" : "⨰" U2A30 # MULTIPLICATION SIGN WITH DOT ABOVE : "⨱" U2A31 # MULTIPLICATION SIGN WITH UNDERBAR : "⋅" U22C5 # DOT OPERATOR + : "∗" U2217 # ASTERISK OPERATOR + : "∙" U2219 # BULLET OPERATOR : "⨯" U2A2F # VECTOR OR CROSS PRODUCT : "⋇" U22C7 # DIVISION TIMES <3> : "∛" U221B # CUBE ROOT @@ -24114,6 +24137,8 @@ include "Logograms" : "⪹" U2AB9 # PRECEDES ABOVE NOT ALMOST EQUAL TO : "⋞" U22DE # EQUAL TO OR PRECEDES <2> : "⪻" U2ABB # DOUBLE PRECEDES + : "⋎" U22CE # CURLY LOGICAL OR (conflicts with ↓, use |v) + : "⋏" U22CF # CURLY LOGICAL AND (conflicts with ↑, use ^|) : "⊕" U2295 # CIRCLED PLUS : "⊖" U2296 # CIRCLED MINUS (conflicts with useless {) diff --git a/Compose.md b/Compose.md index 69acb02..c10f107 100644 --- a/Compose.md +++ b/Compose.md @@ -9,10 +9,10 @@ The file aims for memorability and consistency. As a result, a small number of t ## Table of contents and examples -### Common script characters (587) +### Common script characters (603) * **Spaces** (15): NBSP MMSP ZWSP etc * **Dashes** (12): – — ⁓ ⸻ etc -* **Brackets** (24): ⟨ ⟦ ⸨ ⌈ ⫽ etc +* **Brackets** (40): ⟨ ⟦ ⸨ ⌈ ⫽ etc * **Bullets** (11): • ‣ ⁃ ◉ etc * **General punctuation** (60): ⁁ ⁂ ⸎ etc * **Currency symbols** (20): ₱ ₿ ₪ etc @@ -50,10 +50,10 @@ The file aims for memorability and consistency. As a result, a small number of t * **Logograms/undeciphered**: Anatolian Hieroglyphs, Cuneiform, Cypro-Minoan, Egyptian Hieroglyphs, Linear A&B (4691): 𔐅 𒆛 𒿌 𓀁 𐙞 etc * Language names (150): 🇯🇵 日本語 🇮🇷 ⁧فارس⁩ 🇻🇳 Tiếng Việt etc -### Symbols (4990) +### Symbols (4994) * **Emoji** (1515): 😉 👌🏾 🇳🇿 🫡 👉🏼 💔 🤣 🤦🏽‍♀️ 🏳️‍⚧️ ✨ etc * **Sprites** (150): 🗫 🯅 ㋡ etc -* **Math and science** (508): ρ(∂v⃗/∂t + (v⃗·∇)v) ∫πeⁱᶿ dθ etc. +* **Math and science** (512): ρ(∂v⃗/∂t + (v⃗·∇)v) ∫πeⁱᶿ dθ etc. * **APL** (107): ⍟ ⍫ ⍉ etc * **Technical** (106): ⏻ ⎙ ⌘ etc * **Numerals** (338): 𝍸𝍷 𝍵 Ⅻ ↁ etc From 6f5699bb2c049775ebbdb76ea58d793ca09da8dc Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Wed, 23 Jul 2025 13:52:39 +0100 Subject: [PATCH 04/21] More --- Compose | 21 ++++++++++++++++++++- Compose.md | 18 +++++++++--------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/Compose b/Compose index 09319b5..795c10c 100644 --- a/Compose +++ b/Compose @@ -809,6 +809,7 @@ include "%L" : "ɘ" U0258 # LATIN SMALL LETTER REVERSED E (conflicts with ě, use ve) : "ʕ" U0295 # LATIN LETTER PHARYNGEAL VOICED FRICATIVE : "ʢ" U02A2 # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE + : "ʖ" U0296 # LATIN LETTER INVERTED GLOTTAL STOP : "ƾ" U01BE # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE : "ɖ" U0256 # LATIN SMALL LETTER D WITH TAIL @@ -3338,6 +3339,10 @@ include "%L" : "ʧ" U02A7 # LATIN SMALL LETTER TESH DIGRAPH : "㎂" U3382 # SQUARE MU A : "㎌" U338C # SQUARE MU F + : "㎍" U338D # SQUARE MU G + : "㎕" U3395 # SQUARE MU L + : "㎛" U339B # SQUARE MU M + : "㎲" U33B2 # SQUARE MU S : "㎶" U33B6 # SQUARE MU V : "㎼" U33BC # SQUARE MU W : "ſt" UFB05 # LATIN SMALL LIGATURE LONG S T @@ -3927,6 +3932,7 @@ include "%L" : "ٓ" U0653 # ARABIC MADDAH ABOVE (conflicts with ą, use ,a) <2> : "ٔ" U0654 # ARABIC HAMZA ABOVE (conflicts with ą, use ,a) <2> : "ٕ" U0655 # ARABIC HAMZA BELOW (conflicts with ą, use ,a) + <2> : "ٟ" U065F # ARABIC WAVY HAMZA BELOW (conflicts with ą, use ,a) : "ً" U064B # ARABIC FATHATAN (conflicts with ą, use ,a) : "ٍ" U064D # ARABIC KASRATAN (conflicts with ą, use ,a) : "ٍ" U064D # ARABIC KASRATAN (conflicts with ą, use ,a) @@ -3966,8 +3972,10 @@ include "%L" : "آ" U0622 # ARABIC LETTER ALEF WITH MADDA ABOVE <2> : "أ" U0623 # ARABIC LETTER ALEF WITH HAMZA ABOVE + <2> : "ٲ" U0672 # ARABIC LETTER ALEF WITH WAVY HAMZA ABOVE <2> : "إ" U0625 # ARABIC LETTER ALEF WITH HAMZA BELOW <2> : "ݳ" U0773 # ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE + <2> : "ٳ" U0673 # ARABIC LETTER ALEF WITH WAVY HAMZA BELOW <3> : "ݴ" U0774 # ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE : "ࡺ" U087A # ARABIC LETTER ALEF WITH DOT ABOVE @@ -7684,6 +7692,7 @@ include "%L" : "ਃ" U0A03 # GURMUKHI SIGN VISARGA (conflicts) : "਼" U0A3C # GURMUKHI SIGN NUKTA (conflicts) : "੍" U0A4D # GURMUKHI SIGN VIRAMA (conflicts) + : "ੰ" U0A70 # GURMUKHI TIPPI (conflicts) # Numerals @@ -10995,6 +11004,10 @@ include "%L" : "һ" U04BB # CYRILLIC SMALL LETTER SHHA (conflict) : "Ꚕ" UA694 # CYRILLIC CAPITAL LETTER HWE : "ꚕ" UA695 # CYRILLIC SMALL LETTER HWE + <1> : "Ӏ" U04C0 # CYRILLIC LETTER PALOCHKA + <1> : "ӏ" U04CF # CYRILLIC SMALL LETTER PALOCHKA + : "ҡ" U04A1 # CYRILLIC SMALL LETTER BASHKIR KA + : "Ҡ" U04A0 # CYRILLIC CAPITAL LETTER BASHKIR KA : "Ԕ" U0514 # CYRILLIC CAPITAL LETTER LHA : "Ԕ" U0514 # CYRILLIC CAPITAL LETTER LHA : "ԕ" U0515 # CYRILLIC SMALL LETTER LHA @@ -11219,7 +11232,8 @@ include "%L" : "ꚝ" UA69D # MODIFIER LETTER CYRILLIC SOFT SIGN (conflicts) : "𞁈" U1E048 # MODIFIER LETTER CYRILLIC SMALL E (conflict) : "𞁉" U1E049 # MODIFIER LETTER CYRILLIC SMALL YU (conflict) - : "𞁌" U1E04C # MODIFIER LETTER CYRILLIC SMALL BYELORUSSIAN-UKRAINIAN I + : "𞁌" U1E04C # MODIFIER LETTER CYRILLIC SMALL BYELORUSSIAN-UKRAINIAN I + <1> : "𞁐" U1E050 # MODIFIER LETTER CYRILLIC SMALL PALOCHKA : "𞁍" U1E04D # MODIFIER LETTER CYRILLIC SMALL JE : "𞁫" U1E06B # MODIFIER LETTER CYRILLIC SMALL ES WITH DESCENDER (conflict) : "𞁭" U1E06D # MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE (conflict) @@ -14939,6 +14953,7 @@ include "%L" : "ლ" U10DA # GEORGIAN LETTER LAS : "მ" U10DB # GEORGIAN LETTER MAN : "ნ" U10DC # GEORGIAN LETTER NAR + : "ჼ" U10FC # MODIFIER LETTER GEORGIAN NAR : "ო" U10DD # GEORGIAN LETTER ON

: "პ" U10DE # GEORGIAN LETTER PAR : "ჟ" U10DF # GEORGIAN LETTER ZHAR (ž) @@ -17063,6 +17078,8 @@ include "HangulSyllables" <3>

: "ퟺ" UD7FA # HANGUL JONGSEONG PHIEUPH-SIOS <3>

: "ퟻ" UD7FB # HANGUL JONGSEONG PHIEUPH-THIEUTH + : "ㅥ" U3165 # HANGUL LETTER SSANGNIEUN + : "ㅦ" U3166 # HANGUL LETTER NIEUN-TIKEUT : "ㅧ" U3167 # HANGUL LETTER NIEUN-SIOS : "ㅩ" U3169 # HANGUL LETTER RIEUL-KIYEOK-SIOS : "ㅪ" U316A # HANGUL LETTER RIEUL-TIKEUT @@ -20932,6 +20949,8 @@ include "HangulSyllables" # Other signs + : "་" U0F0B # TIBETAN MARK INTERSYLLABIC TSHEG + : "༌" U0F0C # TIBETAN MARK DELIMITER TSHEG BSTAR : "།" U0F0D # TIBETAN MARK SHAD <2> : "༎" U0F0E # TIBETAN MARK NYIS SHAD : "༏" U0F0F # TIBETAN MARK TSHEG SHAD diff --git a/Compose.md b/Compose.md index c10f107..2c6debd 100644 --- a/Compose.md +++ b/Compose.md @@ -19,8 +19,8 @@ The file aims for memorability and consistency. As a result, a small number of t * **Combining diacritics** (174): ń n̊ n̫ m͡n Zǎ̺̣͆̚l⃪ğ̶̍ö̱̰̥̂̃ etc * **Control characters** (271): RLI PDI ZWJ VS16 etc -### Latin script characters (2736) -* **International Phonetic Alphabet** (128): ⫽ˈɹɛ.dɪt⫽ [aɪ̯ pʰiː eɪ̯] etc +### Latin script characters (2737) +* **International Phonetic Alphabet** (129): ⫽ˈɹɛ.dɪt⫽ [aɪ̯ pʰiː eɪ̯] etc * **Latin script letters** (513): Ƿ Ȝ ␢ ȵ etc * **Mathematical alphanumerics** (794): 𝐀 𝐴 𝑨 A 𝗔 𝘈 𝘼 𝒜 𝓐 𝔄 𝕬 𝙰 𝔸 𜳖 etc * **Enclosed alphanumerics** (288): ⓼ 🅛 🆛 ⒜ ⒓ etc @@ -28,25 +28,25 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (389): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (32887) -* **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (1989): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc +### Non-Latin script characters (32901) +* **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (1992): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc -* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3191): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc +* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3192): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc * **Braille**, UEB (272): ⠗⠫⠙⠊⠞ etc * **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1787): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc -* **Cyrillic**, Glagolitic & Old Permic (565): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc +* **Cyrillic**, Glagolitic & Old Permic (570): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc * **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1308): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc -* **Georgian** & Caucasian Albanian (207): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc +* **Georgian** & Caucasian Albanian (208): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc * **Greek**, Coptic, Cypriot, Elbasan, Gothic, Linear B, Lycian, Todhri & Vithkuqi (1138): Ρέντιτ, Ⲣⲉⲇⲇⲓⲧ, ⁧𐠤𐠯𐠮⁩, 𐔙𐔇𐔄𐔍𐔝, 𐍂𐌴𐌳𐌳𐌹𐍄, 𐀩𐀇𐀵, 𐊕𐊁𐊅𐊅𐊆𐊗, 𐗝𐗉𐗆𐗒𐗢, 𐖊𐖞𐖜𐖜𐖥𐖵 etc * **Hebrew**, Aramaic, Elymaic, Lydian, Palmyrene, Phoenician, Samaritan & Ugaritic (350): ⁧רֶדִיט⁩, ⁧𐡓𐡃𐡉𐡕⁩, ⁧𐿳𐿣𐿵⁩, ⁧𐤭𐤤𐤣𐤣𐤦𐤯⁩, ⁧𐡴𐡣𐡩𐡶⁩, ⁧𐤓𐤃𐤕⁩, ⁧ࠓࠝࠃࠪࠕ⁩, 𐎗𐎄𐎚 etc * **Japanese** (mostly kana) (866): 「レディット」 etc * **Kawi Scripts**: Balinese, Batak, Baybayin, Buginese, Buhid, Hanunoo, Javanese, Old Kawi, Makasar, Rejang, Sundanese & Tagbanwa (485): ᬭᬾᬤᬶᬢ᭄, ᯒᯧᯑᯪᯖ᯲, ᜍᜒᜇᜒᜆ᜔, ᨑᨙᨉᨗ, ᝍᝒᝇᝒ, ᜭᜲᜧᜲᜦ᜴, ꦫꦺꦢꦶꦠ, 𑼬𑼾𑼤𑼶𑼢, 𑻭𑻵𑻧𑻳, ꤽꥉꤴꥇꤳ, ᮛᮨᮓᮤᮒ᮪, ᝮᝲᝧᝲ, etc -* **Korean** (11736): 레딧 etc +* **Korean** (11738): 레딧 etc * **International Morse Code** (76): ·-· · -·· -·· ·· - etc * **Old Italic**, Carian, Ogham, Old Hungarian, Runic & Old Turkic (382): 𐌓𐌄𐌃𐌃𐌉𐌕, 𐊥𐊺𐊢𐊢𐊹𐊭, ᚏᚓᚇᚔᚈ, ⁧𐲢𐳉𐳇𐳇𐳐𐳦⁩, ᚱᛖᛞᛞᛁᛏ, ⁧𐰺𐰅𐰑𐰃𐱃⁩ etc * **Sutton SignWriting** (667): 𝧿𝨾𝡇𝪜𝪡𝦈𝪪 etc * **Thai**, Cham, Khmer, Lao, Tai Tham & Tai Viet (502): เรดดิต, ꨣꨮꨖꨪꩅ, រេទិត, ເຣັດິຕ, ᩁᩮᨯᩥᨲ, ꪧꪵꪒꪲꪒ etc -* **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2430): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc +* **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2432): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc * **Logograms/undeciphered**: Anatolian Hieroglyphs, Cuneiform, Cypro-Minoan, Egyptian Hieroglyphs, Linear A&B (4691): 𔐅 𒆛 𒿌 𓀁 𐙞 etc * Language names (150): 🇯🇵 日本語 🇮🇷 ⁧فارس⁩ 🇻🇳 Tiếng Việt etc From 8aa1b9343792f5e8145a09c8b8f308642b3efbb7 Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Thu, 24 Jul 2025 10:28:15 +0100 Subject: [PATCH 05/21] Tones --- Compose | 22 ++++++++++++++++++++++ Compose.md | 10 +++++----- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/Compose b/Compose index 795c10c..a95d0df 100644 --- a/Compose +++ b/Compose @@ -904,6 +904,17 @@ include "%L" <2> : "꜕" UA715 # MODIFIER LETTER LOW LEFT-STEM TONE BAR <1> : "꜖" UA716 # MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR + <5> : "꜈" UA708 # MODIFIER LETTER EXTRA-HIGH DOTTED TONE BAR + <4> : "꜉" UA709 # MODIFIER LETTER HIGH DOTTED TONE BAR + <3> : "꜊" UA70A # MODIFIER LETTER MID DOTTED TONE BAR + <2> : "꜋" UA70B # MODIFIER LETTER LOW DOTTED TONE BAR + <1> : "꜌" UA70C # MODIFIER LETTER EXTRA-LOW DOTTED TONE BAR + <5> : "꜍" UA70D # MODIFIER LETTER EXTRA-HIGH DOTTED LEFT-STEM TONE BAR + <4> : "꜎" UA70E # MODIFIER LETTER HIGH DOTTED LEFT-STEM TONE BAR + <3> : "꜏" UA70F # MODIFIER LETTER MID DOTTED LEFT-STEM TONE BAR + <2> : "꜐" UA710 # MODIFIER LETTER LOW DOTTED LEFT-STEM TONE BAR + <1> : "꜑" UA711 # MODIFIER LETTER EXTRA-LOW DOTTED LEFT-STEM TONE BAR + ######################## # Latin script letters # ######################## @@ -11067,6 +11078,8 @@ include "%L" : "ꙟ" UA65F # CYRILLIC SMALL LETTER YN : "Ꙁ" UA640 # CYRILLIC CAPITAL LETTER ZEMLYA : "ꙁ" UA641 # CYRILLIC SMALL LETTER ZEMLYA + : "Ꙑ" UA650 # CYRILLIC CAPITAL LETTER YERU WITH BACK YER + : "ꙑ" UA651 # CYRILLIC SMALL LETTER YERU WITH BACK YER # diacritics @@ -11241,6 +11254,7 @@ include "%L" : "𞁋" U1E04B # MODIFIER LETTER CYRILLIC SMALL SCHWA : "𞁎" U1E04E # MODIFIER LETTER CYRILLIC SMALL BARRED O (conflicts) : "𞁏" U1E04F # MODIFIER LETTER CYRILLIC SMALL STRAIGHT U + : "𞁬" U1E06C # MODIFIER LETTER CYRILLIC SMALL YERU WITH BACK YER : "𞁑" U1E051 # CYRILLIC SUBSCRIPT SMALL LETTER A (conflicts) : "𞁒" U1E052 # CYRILLIC SUBSCRIPT SMALL LETTER BE (conflicts) @@ -20700,6 +20714,14 @@ include "HangulSyllables"

: "𖾑" U16F91 # MIAO TONE ABOVE

: "𖾒" U16F92 # MIAO TONE BELOW +

<2> : "𖾓" U16F93 # MIAO LETTER TONE-2 +

<3> : "𖾔" U16F94 # MIAO LETTER TONE-3 +

<4> : "𖾕" U16F95 # MIAO LETTER TONE-4 +

<5> : "𖾖" U16F96 # MIAO LETTER TONE-5 +

<6> : "𖾗" U16F97 # MIAO LETTER TONE-6 +

<7> : "𖾘" U16F98 # MIAO LETTER TONE-7 +

<8> : "𖾙" U16F99 # MIAO LETTER TONE-8 + # (P = ʼPhags-Pa) follows ISO 15919 like other Brahmic scripts

: "ꡀ" UA840 # PHAGS-PA LETTER KA diff --git a/Compose.md b/Compose.md index 2c6debd..c8dd0cf 100644 --- a/Compose.md +++ b/Compose.md @@ -19,8 +19,8 @@ The file aims for memorability and consistency. As a result, a small number of t * **Combining diacritics** (174): ń n̊ n̫ m͡n Zǎ̺̣͆̚l⃪ğ̶̍ö̱̰̥̂̃ etc * **Control characters** (271): RLI PDI ZWJ VS16 etc -### Latin script characters (2737) -* **International Phonetic Alphabet** (129): ⫽ˈɹɛ.dɪt⫽ [aɪ̯ pʰiː eɪ̯] etc +### Latin script characters (2747) +* **International Phonetic Alphabet** (139): ⫽ˈɹɛ.dɪt⫽ [aɪ̯ pʰiː eɪ̯] etc * **Latin script letters** (513): Ƿ Ȝ ␢ ȵ etc * **Mathematical alphanumerics** (794): 𝐀 𝐴 𝑨 A 𝗔 𝘈 𝘼 𝒜 𝓐 𝔄 𝕬 𝙰 𝔸 𜳖 etc * **Enclosed alphanumerics** (288): ⓼ 🅛 🆛 ⒜ ⒓ etc @@ -28,13 +28,13 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (389): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (32901) +### Non-Latin script characters (32911) * **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (1992): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc * **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3192): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc * **Braille**, UEB (272): ⠗⠫⠙⠊⠞ etc * **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1787): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc -* **Cyrillic**, Glagolitic & Old Permic (570): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc +* **Cyrillic**, Glagolitic & Old Permic (573): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc * **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1308): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc * **Georgian** & Caucasian Albanian (208): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc * **Greek**, Coptic, Cypriot, Elbasan, Gothic, Linear B, Lycian, Todhri & Vithkuqi (1138): Ρέντιτ, Ⲣⲉⲇⲇⲓⲧ, ⁧𐠤𐠯𐠮⁩, 𐔙𐔇𐔄𐔍𐔝, 𐍂𐌴𐌳𐌳𐌹𐍄, 𐀩𐀇𐀵, 𐊕𐊁𐊅𐊅𐊆𐊗, 𐗝𐗉𐗆𐗒𐗢, 𐖊𐖞𐖜𐖜𐖥𐖵 etc @@ -46,7 +46,7 @@ The file aims for memorability and consistency. As a result, a small number of t * **Old Italic**, Carian, Ogham, Old Hungarian, Runic & Old Turkic (382): 𐌓𐌄𐌃𐌃𐌉𐌕, 𐊥𐊺𐊢𐊢𐊹𐊭, ᚏᚓᚇᚔᚈ, ⁧𐲢𐳉𐳇𐳇𐳐𐳦⁩, ᚱᛖᛞᛞᛁᛏ, ⁧𐰺𐰅𐰑𐰃𐱃⁩ etc * **Sutton SignWriting** (667): 𝧿𝨾𝡇𝪜𝪡𝦈𝪪 etc * **Thai**, Cham, Khmer, Lao, Tai Tham & Tai Viet (502): เรดดิต, ꨣꨮꨖꨪꩅ, រេទិត, ເຣັດິຕ, ᩁᩮᨯᩥᨲ, ꪧꪵꪒꪲꪒ etc -* **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2432): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc +* **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2439): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc * **Logograms/undeciphered**: Anatolian Hieroglyphs, Cuneiform, Cypro-Minoan, Egyptian Hieroglyphs, Linear A&B (4691): 𔐅 𒆛 𒿌 𓀁 𐙞 etc * Language names (150): 🇯🇵 日本語 🇮🇷 ⁧فارس⁩ 🇻🇳 Tiếng Việt etc From 34a71329fc90d8324d7d6a49bb9731856cb8c88e Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Thu, 24 Jul 2025 13:27:37 +0100 Subject: [PATCH 06/21] Geometric shapes --- Compose | 93 +++++++++++++++++++++++++++++++++++++++++++++++------- Compose.md | 16 +++++----- 2 files changed, 89 insertions(+), 20 deletions(-) diff --git a/Compose b/Compose index a95d0df..10a3eaf 100644 --- a/Compose +++ b/Compose @@ -915,6 +915,11 @@ include "%L" <2> : "꜐" UA710 # MODIFIER LETTER LOW DOTTED LEFT-STEM TONE BAR <1> : "꜑" UA711 # MODIFIER LETTER EXTRA-LOW DOTTED LEFT-STEM TONE BAR + : "ꜗ" UA717 # MODIFIER LETTER DOT VERTICAL BAR + : "ꜘ" UA718 # MODIFIER LETTER DOT SLASH + : "ꜙ" UA719 # MODIFIER LETTER DOT HORIZONTAL BAR + : "ꜚ" UA71A # MODIFIER LETTER LOWER RIGHT CORNER ANGLE + ######################## # Latin script letters # ######################## @@ -1030,6 +1035,7 @@ include "%L" : "ʽ" U02BD # MODIFIER LETTER REVERSED COMMA : "ᴥ" U1D25 # LATIN LETTER AIN : "ꞏ" UA78F # LATIN LETTER SINOLOGICAL DOT (conflicts with ·, use .^) + : "꞉" UA789 # MODIFIER LETTER COLON (not ideal but :␣ is taken) # (Zhuang tone letters) @@ -14555,7 +14561,7 @@ include "%L" : "ϱ" U03F1 # GREEK RHO SYMBOL : "ϕ" U03D5 # GREEK PHI SYMBOL

: "ϕ" U03D5 # GREEK PHI SYMBOL -# additional Greek letter variants not represented in the alphanumerics block +# additional Greek letter symbols not represented in the alphanumerics block : "ϒ" U03D2 # GREEK UPSILON WITH HOOK SYMBOL : "ϒ" U03D2 # GREEK UPSILON WITH HOOK SYMBOL : "ϓ" U03D3 # GREEK UPSILON WITH ACUTE AND HOOK SYMBOL @@ -24440,6 +24446,12 @@ include "Logograms" : "⫕" U2AD5 # SUBSET ABOVE SUBSET

: "⫖" U2AD6 # SUPERSET ABOVE SUPERSET + : "C₂H₅OH" # ETHANOL (ALCOHOL) + : "C₈H₁₀N₄O₂" # CAFFEINE (conflicts with ă, use ua) +

: "C₁₇H₁₉NO₃" # MORPHINE +

: "C₁₈H₂₇NO₃" # CAPSAICIN (SPICE) (use spicy for 🌶️) + : "C₂₁H₃₀O₂" # THC (WEED) + # Z notation (start with z) : "⦂" U2982 # Z NOTATION TYPE COLON @@ -25135,6 +25147,7 @@ include "Logograms" : "🧀" U1F9C0 # CHEESE WEDGE (conflicts with ȟ, use vh) : "🍒" U1F352 # CHERRIES (conflicts with ȟ, use vh) : "🌰" U1F330 # CHESTNUT (conflicts with ȟ, use vh) + : "🌶️" U1F336 UFE0F # HOT PEPPER EMOJI (CHILLI) (conflicts with ȟ, use vh)

: "🍟" U1F35F # FRENCH FRIES (CHIPS) (conflicts with ȟ, use vh) : "🍫" U1F36B # CHOCOLATE BAR (conflicts with ȟ, use vh)

: "🥢" U1F962 # CHOPSTICKS (conflicts with ȟ, use vh) @@ -25989,6 +26002,7 @@ include "Logograms" : "🚷" U1F6B7 # NO PEDESTRIANS <1> <8> : "🔞" U1F51E # NO ONE UNDER EIGHTEEN SYMBOL (alternatively NSFW) <0> : "🚫" U1F6AB # NO ENTRY SIGN + <0> : "🛇" U1F6C7 # PROHIBITED SIGN # Shapes (see geometric shapes section for many more) @@ -26356,6 +26370,8 @@ include "Logograms" <0> : "🗰" U1F5F0 # MOOD BUBBLE : "🗱" U1F5F1 # LIGHTNING MOOD BUBBLE + : "☥" U2625 # ANKH + # Legacy computing sprites and related pictographs : "☻" U263B # BLACK SMILING FACE @@ -26906,12 +26922,6 @@ include "Logograms" : "▧" U25A7 # SQUARE WITH UPPER LEFT TO LOWER RIGHT FILL (conflicts) : "▨" U25A8 # SQUARE WITH UPPER RIGHT TO LOWER LEFT FILL (conflicts) : "▩" U25A9 # SQUARE WITH DIAGONAL CROSSHATCH FILL (conflicts) - <0> : "🞎" U1F78E # LIGHT WHITE SQUARE (conflicts) - <1> : "🞏" U1F78F # MEDIUM WHITE SQUARE (conflicts) - <2> : "🞐" U1F790 # BOLD WHITE SQUARE (conflicts) - <3> : "🞑" U1F791 # HEAVY WHITE SQUARE (conflicts) - <4> : "🞒" U1F792 # VERY HEAVY WHITE SQUARE (conflicts) - <5> : "🞓" U1F793 # EXTREMELY HEAVY WHITE SQUARE (conflicts) : "⃞" U20DE # COMBINING ENCLOSING SQUARE (conflicts) : "⛋" U26CB # WHITE DIAMOND IN SQUARE (conflicts) @@ -26988,11 +26998,6 @@ include "Logograms" : "◍" U25CD # CIRCLE WITH VERTICAL FILL : "⦵" U29B5 # CIRCLE WITH HORIZONTAL BAR : "⦻" U29BB # CIRCLE WITH SUPERIMPOSED X - <1> : "🞅" U1F785 # MEDIUM BOLD WHITE CIRCLE - <2> : "🞆" U1F786 # BOLD WHITE CIRCLE - <3> : "🞇" U1F787 # HEAVY WHITE CIRCLE - <4> : "🞈" U1F788 # VERY HEAVY WHITE CIRCLE - <5> : "🞉" U1F789 # EXTREMELY HEAVY WHITE CIRCLE : "⃝" U20DD # COMBINING ENCLOSING CIRCLE : "⃠" U20E0 # COMBINING ENCLOSING CIRCLE BACKSLASH @@ -27123,14 +27128,78 @@ include "Logograms" <5> : "⯫" U2BEB # STAR WITH RIGHT HALF BLACK <5> : "⛤" U26E4 # PENTAGRAM <5> : "⛧" U26E7 # INVERTED PENTAGRAM + <5> : "✯" U272F # PINWHEEL STAR <6> <1> : "✶" U2736 # SIX POINTED BLACK STAR <6> : "✡" U2721 # STAR OF DAVID (alternatively *6) + <6> : "🟍" U1F7CD # SIX POINTED PINWHEEL STAR <8> <1> : "✴" U2734 # EIGHT POINTED BLACK STAR <8> : "❂" U2742 # CIRCLED OPEN CENTRE EIGHT POINTED STAR <8> : "۞" U06DE # ARABIC START OF RUB EL HIZB + <8> : "✵" U2735 # EIGHT POINTED PINWHEEL STAR <9> <0> : "🟙" U1F7D9 # NINE POINTED WHITE STAR <1> <2> <1> : "✹" U2739 # TWELVE POINTED BLACK STAR +# Weighted shapes + + <1> : "🞎" U1F78E # LIGHT WHITE SQUARE (conflicts) + <2> : "🞏" U1F78F # MEDIUM WHITE SQUARE (conflicts) + <3> : "🞐" U1F790 # BOLD WHITE SQUARE (conflicts) + <4> : "🞑" U1F791 # HEAVY WHITE SQUARE (conflicts) + <5> : "🞒" U1F792 # VERY HEAVY WHITE SQUARE (conflicts) + <6> : "🞓" U1F793 # EXTREMELY HEAVY WHITE SQUARE (conflicts) + <2> : "🞅" U1F785 # MEDIUM BOLD WHITE CIRCLE + <3> : "🞆" U1F786 # BOLD WHITE CIRCLE + <4> : "🞇" U1F787 # HEAVY WHITE CIRCLE + <5> : "🞈" U1F788 # VERY HEAVY WHITE CIRCLE + <6> : "🞉" U1F789 # EXTREMELY HEAVY WHITE CIRCLE + + <0> : "🞡" U1F7A1 # THIN GREEK CROSS + <1> : "🞢" U1F7A2 # LIGHT GREEK CROSS + <2> : "🞣" U1F7A3 # MEDIUM GREEK CROSS + <3> : "🞤" U1F7A4 # BOLD GREEK CROSS + <4> : "🞥" U1F7A5 # VERY BOLD GREEK CROSS + <5> : "🞦" U1F7A6 # VERY HEAVY GREEK CROSS + <6> : "🞧" U1F7A7 # EXTREMELY HEAVY GREEK CROSS + <0> : "🞨" U1F7A8 # THIN SALTIRE + <1> : "🞩" U1F7A9 # LIGHT SALTIRE + <2> : "🞪" U1F7AA # MEDIUM SALTIRE + <3> : "🞫" U1F7AB # BOLD SALTIRE + <4> : "🞬" U1F7AC # HEAVY SALTIRE + <5> : "🞭" U1F7AD # VERY HEAVY SALTIRE + <6> : "🞮" U1F7AE # EXTREMELY HEAVY SALTIRE + + <5> <1> : "🞯" U1F7AF # LIGHT FIVE SPOKED ASTERISK + <5> <2> : "🞰" U1F7B0 # MEDIUM FIVE SPOKED ASTERISK + <5> <3> : "🞱" U1F7B1 # BOLD FIVE SPOKED ASTERISK + <5> <4> : "🞲" U1F7B2 # HEAVY FIVE SPOKED ASTERISK + <5> <5> : "🞳" U1F7B3 # VERY HEAVY FIVE SPOKED ASTERISK + <5> <6> : "🞴" U1F7B4 # EXTREMELY HEAVY FIVE SPOKED ASTERISK + <6> <1> : "🞵" U1F7B5 # LIGHT SIX SPOKED ASTERISK + <6> <2> : "🞶" U1F7B6 # MEDIUM SIX SPOKED ASTERISK + <6> <3> : "🞷" U1F7B7 # BOLD SIX SPOKED ASTERISK + <6> <4> : "🞸" U1F7B8 # HEAVY SIX SPOKED ASTERISK + <6> <5> : "🞹" U1F7B9 # VERY HEAVY SIX SPOKED ASTERISK + <6> <6> : "🞺" U1F7BA # EXTREMELY HEAVY SIX SPOKED ASTERISK + <8> <1> : "🞻" U1F7BB # LIGHT EIGHT SPOKED ASTERISK + <8> <2> : "🞼" U1F7BC # MEDIUM EIGHT SPOKED ASTERISK + <8> <3> : "🞽" U1F7BD # BOLD EIGHT SPOKED ASTERISK + <8> <4> : "🞾" U1F7BE # HEAVY EIGHT SPOKED ASTERISK + <8> <5> : "🞿" U1F7BF # VERY HEAVY EIGHT SPOKED ASTERISK + + <3> <1> : "🟀" U1F7C0 # LIGHT THREE POINTED BLACK STAR + <3> <2> : "🟁" U1F7C1 # MEDIUM THREE POINTED BLACK STAR + <4> <1> : "🟄" U1F7C4 # LIGHT FOUR POINTED BLACK STAR + <4> <2> : "🟅" U1F7C5 # MEDIUM FOUR POINTED BLACK STAR + <5> <1> : "🟉" U1F7C9 # LIGHT FIVE POINTED BLACK STAR + <5> <4> : "🟊" U1F7CA # HEAVY FIVE POINTED BLACK STAR + <6> <2> : "🟋" U1F7CB # MEDIUM SIX POINTED BLACK STAR + <6> <4> : "🟌" U1F7CC # HEAVY SIX POINTED BLACK STAR + <8> <2> : "🟎" U1F7CE # MEDIUM EIGHT POINTED BLACK STAR + <8> <4> : "🟏" U1F7CF # HEAVY EIGHT POINTED BLACK STAR + <8> <5> : "🟐" U1F7D0 # VERY HEAVY EIGHT POINTED BLACK STAR + <1> <2> <1> : "🟒" U1F7D2 # LIGHT TWELVE POINTED BLACK STAR + <1> <2> <4> : "🟓" U1F7D3 # HEAVY TWELVE POINTED BLACK STAR + ########## # Arrows # ########## diff --git a/Compose.md b/Compose.md index c8dd0cf..63484cf 100644 --- a/Compose.md +++ b/Compose.md @@ -19,9 +19,9 @@ The file aims for memorability and consistency. As a result, a small number of t * **Combining diacritics** (174): ń n̊ n̫ m͡n Zǎ̺̣͆̚l⃪ğ̶̍ö̱̰̥̂̃ etc * **Control characters** (271): RLI PDI ZWJ VS16 etc -### Latin script characters (2747) -* **International Phonetic Alphabet** (139): ⫽ˈɹɛ.dɪt⫽ [aɪ̯ pʰiː eɪ̯] etc -* **Latin script letters** (513): Ƿ Ȝ ␢ ȵ etc +### Latin script characters (2752) +* **International Phonetic Alphabet** (143): ⫽ˈɹɛ.dɪt⫽ [aɪ̯ pʰiː eɪ̯] etc +* **Latin script letters** (514): Ƿ Ȝ ␢ ȵ etc * **Mathematical alphanumerics** (794): 𝐀 𝐴 𝑨 A 𝗔 𝘈 𝘼 𝒜 𝓐 𝔄 𝕬 𝙰 𝔸 𜳖 etc * **Enclosed alphanumerics** (288): ⓼ 🅛 🆛 ⒜ ⒓ etc * **Superscripts and subscripts** (309): ᵃ ᴬ ₐ ᴀ ◌ͣ etc @@ -50,10 +50,10 @@ The file aims for memorability and consistency. As a result, a small number of t * **Logograms/undeciphered**: Anatolian Hieroglyphs, Cuneiform, Cypro-Minoan, Egyptian Hieroglyphs, Linear A&B (4691): 𔐅 𒆛 𒿌 𓀁 𐙞 etc * Language names (150): 🇯🇵 日本語 🇮🇷 ⁧فارس⁩ 🇻🇳 Tiếng Việt etc -### Symbols (4994) -* **Emoji** (1515): 😉 👌🏾 🇳🇿 🫡 👉🏼 💔 🤣 🤦🏽‍♀️ 🏳️‍⚧️ ✨ etc -* **Sprites** (150): 🗫 🯅 ㋡ etc -* **Math and science** (512): ρ(∂v⃗/∂t + (v⃗·∇)v) ∫πeⁱᶿ dθ etc. +### Symbols (5048) +* **Emoji** (1516): 😉 👌🏾 🇳🇿 🫡 👉🏼 💔 🤣 🤦🏽‍♀️ 🏳️‍⚧️ ✨ etc +* **Sprites** (151): 🗫 🯅 ㋡ etc +* **Math and science** (517): ρ(∂v⃗/∂t + (v⃗·∇)v) ∫πeⁱᶿ dθ etc. * **APL** (107): ⍟ ⍫ ⍉ etc * **Technical** (106): ⏻ ⎙ ⌘ etc * **Numerals** (338): 𝍸𝍷 𝍵 Ⅻ ↁ etc @@ -63,5 +63,5 @@ The file aims for memorability and consistency. As a result, a small number of t * **I Ching** (166): ䷇ ☰☷☲☵ etc * **Hieroglyphs** (300): 𓁖 𓁹 𓃠 etc * **Arrows** (319): ↦ ↺ ⇄ ⇼ ⏎ ⇬ etc -* **Geometric shapes** (240): ⬛ ⬚ 🟣 ◐ ◭ ◈ ✶ etc +* **Geometric shapes** (287): ⬛ ⬚ 🟣 ◐ ◭ ◈ ✶ etc * **Box drawing** (665): ╞╦╕ etc From 3b7637139bd83389b737671055bdb1a4de18794d Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Sat, 26 Jul 2025 21:52:13 +0100 Subject: [PATCH 07/21] More Batak --- Compose | 72 +++++++++++++++++++++++++++++++++++++++++------------- Compose.md | 10 ++++---- 2 files changed, 60 insertions(+), 22 deletions(-) diff --git a/Compose b/Compose index 10a3eaf..be3f0b0 100644 --- a/Compose +++ b/Compose @@ -3944,7 +3944,9 @@ include "%L" : "ُ" U064F # ARABIC DAMMA (conflicts with ą, use ,a) : "ِ" U0650 # ARABIC KASRA (conflicts with ą, use ,a) : "ِ" U0650 # ARABIC KASRA (conflicts with ą, use ,a) + : "ّ" U0651 # ARABIC SHADDA (conflicts with ą, use ,a) : "ّ" U0651 # ARABIC SHADDA (conflicts with ą, use ,a) + <0> : "ْ" U0652 # ARABIC SUKUN (conflicts with ą, use ,a) : "ْ" U0652 # ARABIC SUKUN (conflicts with ą, use ,a) : "ٓ" U0653 # ARABIC MADDAH ABOVE (conflicts with ą, use ,a) <2> : "ٔ" U0654 # ARABIC HAMZA ABOVE (conflicts with ą, use ,a) @@ -11218,6 +11220,11 @@ include "%L" : "Ҹ" U04B8 # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE : "ҹ" U04B9 # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE + : "Ꙩ" UA668 # CYRILLIC CAPITAL LETTER MONOCULAR O + : "ꙩ" UA669 # CYRILLIC SMALL LETTER MONOCULAR O + : "Ꙫ" UA66A # CYRILLIC CAPITAL LETTER BINOCULAR O + : "ꙫ" UA66B # CYRILLIC SMALL LETTER BINOCULAR O + # (superscripts and subscripts) : "𞀰" U1E030 # MODIFIER LETTER CYRILLIC SMALL A (conflict) @@ -17525,42 +17532,72 @@ include "HangulSyllables" <2> : "᭟" U1B5F # BALINESE CARIK PAREREN (DOUBLE DANDA) : "᭠" U1B60 # BALINESE PAMENENG -# (BT = Batak) +# (BT = Batak) minority forms suffixed by K(aro), M(andailing), P(akpak), T(oba), S(imalungun) : "ᯀ" U1BC0 # BATAK LETTER A - : "ᯂ" U1BC2 # BATAK LETTER HA - : "ᯅ" U1BC5 # BATAK LETTER BA -

: "ᯇ" U1BC7 # BATAK LETTER PA + : "ᯁ" U1BC1 # BATAK LETTER SIMALUNGUN A + : "ᯀ" U1BC0 # BATAK LETTER A + : "ᯄ" U1BC4 # BATAK LETTER MANDAILING HA +

: "ᯀ" U1BC0 # BATAK LETTER A + : "ᯂ" U1BC2 # BATAK LETTER HA + : "ᯃ" U1BC3 # BATAK LETTER SIMALUNGUN HA + : "ᯂ" U1BC2 # BATAK LETTER HA + : "ᯂ᯦" U1BC2 U1BE6 # BATAK LETTER HA BATAK SIGN TOMPI + : "ᯃ" U1BC3 # BATAK LETTER SIMALUNGUN HA + : "ᯅ" U1BC5 # BATAK LETTER BA + : "ᯆ" U1BC6 # BATAK LETTER KARO BA +

: "ᯇ" U1BC7 # BATAK LETTER PA +

: "ᯈ" U1BC8 # BATAK LETTER SIMALUNGUN PA : "ᯉ" U1BC9 # BATAK LETTER NA - : "ᯋ" U1BCB # BATAK LETTER WA - : "ᯎ" U1BCE # BATAK LETTER GA + : "ᯊ" U1BCA # BATAK LETTER MANDAILING NA + : "ᯋ" U1BCB # BATAK LETTER WA +

: "ᯍ" U1BCD # BATAK LETTER PAKPAK WA + : "ᯌ" U1BCC # BATAK LETTER SIMALUNGUN WA + : "ᯎ" U1BCE # BATAK LETTER GA + : "ᯏ" U1BCF # BATAK LETTER SIMALUNGUN GA : "ᯐ" U1BD0 # BATAK LETTER JA : "ᯑ" U1BD1 # BATAK LETTER DA - : "ᯒ" U1BD2 # BATAK LETTER RA + : "ᯒ" U1BD2 # BATAK LETTER RA + : "ᯓ" U1BD3 # BATAK LETTER SIMALUNGUN RA : "ᯔ" U1BD4 # BATAK LETTER MA - : "ᯖ" U1BD6 # BATAK LETTER SOUTHERN TA - : "ᯗ" U1BD7 # BATAK LETTER NORTHERN TA - : "ᯘ" U1BD8 # BATAK LETTER SA - : "ᯛ" U1BDB # BATAK LETTER YA + : "ᯕ" U1BD5 # BATAK LETTER SIMALUNGUN MA + : "ᯖ" U1BD6 # BATAK LETTER SOUTHERN TA + : "ᯗ" U1BD7 # BATAK LETTER NORTHERN TA +

: "ᯗ" U1BD7 # BATAK LETTER NORTHERN TA + : "ᯘ" U1BD8 # BATAK LETTER SA + : "ᯚ" U1BDA # BATAK LETTER MANDAILING SA + : "ᯙ" U1BD9 # BATAK LETTER SIMALUNGUN SA + : "ᯛ" U1BDB # BATAK LETTER YA + : "ᯜ" U1BDC # BATAK LETTER SIMALUNGUN YA : "ᯝ" U1BDD # BATAK LETTER NGA - : "ᯞ" U1BDE # BATAK LETTER LA + : "ᯞ" U1BDE # BATAK LETTER LA + : "ᯟ" U1BDF # BATAK LETTER SIMALUNGUN LA : "ᯠ" U1BE0 # BATAK LETTER NYA - : "ᯡ" U1BE1 # BATAK LETTER CA + : "ᯡ" U1BE1 # BATAK LETTER CA + : "ᯚ᯦" U1BDA U1BE6 # BATAK LETTER MANDAILING SA BATAK SIGN TOMPI +

: "ᯘ" U1BD8 # BATAK LETTER SA : "ᯢ" U1BE2 # BATAK LETTER NDA : "ᯣ" U1BE3 # BATAK LETTER MBA : "ᯤ" U1BE4 # BATAK LETTER I : "ᯥ" U1BE5 # BATAK LETTER U : "᯦" U1BE6 # BATAK SIGN TOMPI - : "ᯧ" U1BE7 # BATAK VOWEL SIGN E - : "ᯩ" U1BE9 # BATAK VOWEL SIGN EE + : "ᯩ" U1BE9 # BATAK VOWEL SIGN EE + : "ᯧ" U1BE7 # BATAK VOWEL SIGN E +

: "ᯨ" U1BE8 # BATAK VOWEL SIGN PAKPAK E : "ᯪ" U1BEA # BATAK VOWEL SIGN I - : "ᯬ" U1BEC # BATAK VOWEL SIGN O + : "ᯫ" U1BEB # BATAK VOWEL SIGN KARO I + : "ᯫ" U1BEB # BATAK VOWEL SIGN KARO I + : "ᯬ" U1BEC # BATAK VOWEL SIGN O + : "ᯭ" U1BED # BATAK VOWEL SIGN KARO O : "ᯮ" U1BEE # BATAK VOWEL SIGN U + : "ᯬ" U1BEC # BATAK VOWEL SIGN O + : "ᯯ" U1BEF # BATAK VOWEL SIGN U FOR SIMALUNGUN SA : "ᯰ" U1BF0 # BATAK CONSONANT SIGN NG : "ᯱ" U1BF1 # BATAK CONSONANT SIGN H <0> : "᯲" U1BF2 # BATAK PANGOLAT (VIRAMA) - <2> <0> : "᯳" U1BF3 # BATAK PANONGONAN (VIRAMA) + <0> : "᯳" U1BF3 # BATAK PANONGONAN (VIRAMA) + <0> : "᯳" U1BF3 # BATAK PANONGONAN (VIRAMA) # (h = Hanunoo) @@ -29815,6 +29852,7 @@ include "Logograms" : "🖧" U1F5A7 # THREE NETWORKED COMPUTERS : "🖩" U1F5A9 # POCKET CALCULATOR + : "🖳" U1F5B3 # OLD PERSONAL COMPUTER (DESKTOP)

: "🖬" U1F5AC # SOFT SHELL FLOPPY DISK

: "🖭" U1F5AD # TAPE CARTRIDGE : "⌨" U2328 # KEYBOARD diff --git a/Compose.md b/Compose.md index 63484cf..cacc770 100644 --- a/Compose.md +++ b/Compose.md @@ -28,19 +28,19 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (389): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (32911) +### Non-Latin script characters (32936) * **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (1992): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc * **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3192): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc * **Braille**, UEB (272): ⠗⠫⠙⠊⠞ etc * **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1787): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc -* **Cyrillic**, Glagolitic & Old Permic (573): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc +* **Cyrillic**, Glagolitic & Old Permic (577): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc * **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1308): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc * **Georgian** & Caucasian Albanian (208): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc * **Greek**, Coptic, Cypriot, Elbasan, Gothic, Linear B, Lycian, Todhri & Vithkuqi (1138): Ρέντιτ, Ⲣⲉⲇⲇⲓⲧ, ⁧𐠤𐠯𐠮⁩, 𐔙𐔇𐔄𐔍𐔝, 𐍂𐌴𐌳𐌳𐌹𐍄, 𐀩𐀇𐀵, 𐊕𐊁𐊅𐊅𐊆𐊗, 𐗝𐗉𐗆𐗒𐗢, 𐖊𐖞𐖜𐖜𐖥𐖵 etc * **Hebrew**, Aramaic, Elymaic, Lydian, Palmyrene, Phoenician, Samaritan & Ugaritic (350): ⁧רֶדִיט⁩, ⁧𐡓𐡃𐡉𐡕⁩, ⁧𐿳𐿣𐿵⁩, ⁧𐤭𐤤𐤣𐤣𐤦𐤯⁩, ⁧𐡴𐡣𐡩𐡶⁩, ⁧𐤓𐤃𐤕⁩, ⁧ࠓࠝࠃࠪࠕ⁩, 𐎗𐎄𐎚 etc * **Japanese** (mostly kana) (866): 「レディット」 etc -* **Kawi Scripts**: Balinese, Batak, Baybayin, Buginese, Buhid, Hanunoo, Javanese, Old Kawi, Makasar, Rejang, Sundanese & Tagbanwa (485): ᬭᬾᬤᬶᬢ᭄, ᯒᯧᯑᯪᯖ᯲, ᜍᜒᜇᜒᜆ᜔, ᨑᨙᨉᨗ, ᝍᝒᝇᝒ, ᜭᜲᜧᜲᜦ᜴, ꦫꦺꦢꦶꦠ, 𑼬𑼾𑼤𑼶𑼢, 𑻭𑻵𑻧𑻳, ꤽꥉꤴꥇꤳ, ᮛᮨᮓᮤᮒ᮪, ᝮᝲᝧᝲ, etc +* **Kawi Scripts**: Balinese, Batak, Baybayin, Buginese, Buhid, Hanunoo, Javanese, Old Kawi, Makasar, Rejang, Sundanese & Tagbanwa (506): ᬭᬾᬤᬶᬢ᭄, ᯒᯧᯑᯪᯖ᯲, ᜍᜒᜇᜒᜆ᜔, ᨑᨙᨉᨗ, ᝍᝒᝇᝒ, ᜭᜲᜧᜲᜦ᜴, ꦫꦺꦢꦶꦠ, 𑼬𑼾𑼤𑼶𑼢, 𑻭𑻵𑻧𑻳, ꤽꥉꤴꥇꤳ, ᮛᮨᮓᮤᮒ᮪, ᝮᝲᝧᝲ, etc * **Korean** (11738): 레딧 etc * **International Morse Code** (76): ·-· · -·· -·· ·· - etc * **Old Italic**, Carian, Ogham, Old Hungarian, Runic & Old Turkic (382): 𐌓𐌄𐌃𐌃𐌉𐌕, 𐊥𐊺𐊢𐊢𐊹𐊭, ᚏᚓᚇᚔᚈ, ⁧𐲢𐳉𐳇𐳇𐳐𐳦⁩, ᚱᛖᛞᛞᛁᛏ, ⁧𐰺𐰅𐰑𐰃𐱃⁩ etc @@ -50,12 +50,12 @@ The file aims for memorability and consistency. As a result, a small number of t * **Logograms/undeciphered**: Anatolian Hieroglyphs, Cuneiform, Cypro-Minoan, Egyptian Hieroglyphs, Linear A&B (4691): 𔐅 𒆛 𒿌 𓀁 𐙞 etc * Language names (150): 🇯🇵 日本語 🇮🇷 ⁧فارس⁩ 🇻🇳 Tiếng Việt etc -### Symbols (5048) +### Symbols (5049) * **Emoji** (1516): 😉 👌🏾 🇳🇿 🫡 👉🏼 💔 🤣 🤦🏽‍♀️ 🏳️‍⚧️ ✨ etc * **Sprites** (151): 🗫 🯅 ㋡ etc * **Math and science** (517): ρ(∂v⃗/∂t + (v⃗·∇)v) ∫πeⁱᶿ dθ etc. * **APL** (107): ⍟ ⍫ ⍉ etc -* **Technical** (106): ⏻ ⎙ ⌘ etc +* **Technical** (107): ⏻ ⎙ ⌘ etc * **Numerals** (338): 𝍸𝍷 𝍵 Ⅻ ↁ etc * **Music** (137): 𝄞 𝅗𝅨𝅥 𝅃𝅥𝅮 𝆍𝆑𝆎 etc * **Games** (358): ♞c6 🩡 🂽 🁖 🀄︎ etc From 6f010d1bdddf8b0b0f72f005edb9280149996cbe Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Sat, 26 Jul 2025 22:06:08 +0100 Subject: [PATCH 08/21] More Vai --- Compose | 20 ++++++++++++++++++++ Compose.md | 4 ++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/Compose b/Compose index be3f0b0..32cef76 100644 --- a/Compose +++ b/Compose @@ -23778,6 +23778,26 @@ include "HangulSyllables" <8> : "꘨" UA628 # VAI DIGIT EIGHT <9> : "꘩" UA629 # VAI DIGIT NINE + : "ꘓ" UA613 # VAI SYMBOL FEENG (THING) + : "ꘔ" UA614 # VAI SYMBOL KEENG (FOOT) + : "ꘕ" UA615 # VAI SYMBOL TING (ISLE) + : "ꘖ" UA616 # VAI SYMBOL NII (COW) + : "ꘗ" UA617 # VAI SYMBOL BANG (END) + : "ꘘ" UA618 # VAI SYMBOL FAA (DIE) + : "ꘙ" UA619 # VAI SYMBOL TAA (GO) + : "ꘚ" UA61A # VAI SYMBOL DANG (HEAR) + : "ꘛ" UA61B # VAI SYMBOL DOONG (ENTER) + : "ꘜ" UA61C # VAI SYMBOL KUNG (CAN) + : "ꘝ" UA61D # VAI SYMBOL TONG (NAME) + : "ꘞ" UA61E # VAI SYMBOL DO-O (SMALL) + : "ꘟ" UA61F # VAI SYMBOL JONG (SLAVE) + + : "ꘐ" UA610 # VAI SYLLABLE NDOLE FA + : "ꘑ" UA611 # VAI SYLLABLE NDOLE KA + : "ꘒ" UA612 # VAI SYLLABLE NDOLE SOO + : "ꘪ" UA62A # VAI SYLLABLE NDOLE MA + : "ꘫ" UA62B # VAI SYLLABLE NDOLE DO + ########################################## # (}) Logograms and undeciphered scripts # ########################################## diff --git a/Compose.md b/Compose.md index cacc770..b385b65 100644 --- a/Compose.md +++ b/Compose.md @@ -28,14 +28,14 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (389): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (32936) +### Non-Latin script characters (32954) * **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (1992): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc * **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3192): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc * **Braille**, UEB (272): ⠗⠫⠙⠊⠞ etc * **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1787): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc * **Cyrillic**, Glagolitic & Old Permic (577): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc -* **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1308): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc +* **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1326): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc * **Georgian** & Caucasian Albanian (208): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc * **Greek**, Coptic, Cypriot, Elbasan, Gothic, Linear B, Lycian, Todhri & Vithkuqi (1138): Ρέντιτ, Ⲣⲉⲇⲇⲓⲧ, ⁧𐠤𐠯𐠮⁩, 𐔙𐔇𐔄𐔍𐔝, 𐍂𐌴𐌳𐌳𐌹𐍄, 𐀩𐀇𐀵, 𐊕𐊁𐊅𐊅𐊆𐊗, 𐗝𐗉𐗆𐗒𐗢, 𐖊𐖞𐖜𐖜𐖥𐖵 etc * **Hebrew**, Aramaic, Elymaic, Lydian, Palmyrene, Phoenician, Samaritan & Ugaritic (350): ⁧רֶדִיט⁩, ⁧𐡓𐡃𐡉𐡕⁩, ⁧𐿳𐿣𐿵⁩, ⁧𐤭𐤤𐤣𐤣𐤦𐤯⁩, ⁧𐡴𐡣𐡩𐡶⁩, ⁧𐤓𐤃𐤕⁩, ⁧ࠓࠝࠃࠪࠕ⁩, 𐎗𐎄𐎚 etc From 7c9384ede988331f361b12a387fd8ccffdcff502 Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Sat, 26 Jul 2025 22:16:31 +0100 Subject: [PATCH 09/21] Coptic symbols --- Compose | 7 +++++++ Compose.md | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Compose b/Compose index 32cef76..9050b7d 100644 --- a/Compose +++ b/Compose @@ -14361,6 +14361,13 @@ include "%L"

: "ⳤ" U2CE4 # COPTIC SYMBOL KAI

: "⳾" U2CFE # COPTIC FULL STOP +

: "⳥" U2CE5 # COPTIC SYMBOL MI RO +

: "⳦" U2CE6 # COPTIC SYMBOL PI RO +

: "⳧" U2CE7 # COPTIC SYMBOL STAUROS +

: "⳨" U2CE8 # COPTIC SYMBOL TAU RO +

: "⳩" U2CE9 # COPTIC SYMBOL KHI RO +

: "⳪" U2CEA # COPTIC SYMBOL SHIMA SIMA +

<1> <2> : "⳽" U2CFD # COPTIC FRACTION ONE HALF

<1> : "𐋡" U102E1 # COPTIC EPACT DIGIT ONE

<2> : "𐋢" U102E2 # COPTIC EPACT DIGIT TWO diff --git a/Compose.md b/Compose.md index b385b65..e4bac20 100644 --- a/Compose.md +++ b/Compose.md @@ -28,7 +28,7 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (389): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (32954) +### Non-Latin script characters (32960) * **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (1992): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc * **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3192): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc @@ -37,7 +37,7 @@ The file aims for memorability and consistency. As a result, a small number of t * **Cyrillic**, Glagolitic & Old Permic (577): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc * **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1326): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc * **Georgian** & Caucasian Albanian (208): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc -* **Greek**, Coptic, Cypriot, Elbasan, Gothic, Linear B, Lycian, Todhri & Vithkuqi (1138): Ρέντιτ, Ⲣⲉⲇⲇⲓⲧ, ⁧𐠤𐠯𐠮⁩, 𐔙𐔇𐔄𐔍𐔝, 𐍂𐌴𐌳𐌳𐌹𐍄, 𐀩𐀇𐀵, 𐊕𐊁𐊅𐊅𐊆𐊗, 𐗝𐗉𐗆𐗒𐗢, 𐖊𐖞𐖜𐖜𐖥𐖵 etc +* **Greek**, Coptic, Cypriot, Elbasan, Gothic, Linear B, Lycian, Todhri & Vithkuqi (1144): Ρέντιτ, Ⲣⲉⲇⲇⲓⲧ, ⁧𐠤𐠯𐠮⁩, 𐔙𐔇𐔄𐔍𐔝, 𐍂𐌴𐌳𐌳𐌹𐍄, 𐀩𐀇𐀵, 𐊕𐊁𐊅𐊅𐊆𐊗, 𐗝𐗉𐗆𐗒𐗢, 𐖊𐖞𐖜𐖜𐖥𐖵 etc * **Hebrew**, Aramaic, Elymaic, Lydian, Palmyrene, Phoenician, Samaritan & Ugaritic (350): ⁧רֶדִיט⁩, ⁧𐡓𐡃𐡉𐡕⁩, ⁧𐿳𐿣𐿵⁩, ⁧𐤭𐤤𐤣𐤣𐤦𐤯⁩, ⁧𐡴𐡣𐡩𐡶⁩, ⁧𐤓𐤃𐤕⁩, ⁧ࠓࠝࠃࠪࠕ⁩, 𐎗𐎄𐎚 etc * **Japanese** (mostly kana) (866): 「レディット」 etc * **Kawi Scripts**: Balinese, Batak, Baybayin, Buginese, Buhid, Hanunoo, Javanese, Old Kawi, Makasar, Rejang, Sundanese & Tagbanwa (506): ᬭᬾᬤᬶᬢ᭄, ᯒᯧᯑᯪᯖ᯲, ᜍᜒᜇᜒᜆ᜔, ᨑᨙᨉᨗ, ᝍᝒᝇᝒ, ᜭᜲᜧᜲᜦ᜴, ꦫꦺꦢꦶꦠ, 𑼬𑼾𑼤𑼶𑼢, 𑻭𑻵𑻧𑻳, ꤽꥉꤴꥇꤳ, ᮛᮨᮓᮤᮒ᮪, ᝮᝲᝧᝲ, etc From 0e8fc766ecd3f7fff0c3331763770777c16f2fbd Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Sat, 26 Jul 2025 23:23:21 +0100 Subject: [PATCH 10/21] Afghani --- Compose | 5 ++++- Compose.md | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Compose b/Compose index 9050b7d..44274d4 100644 --- a/Compose +++ b/Compose @@ -729,6 +729,7 @@ include "%L" : "⃀" U20C0 # SOM SIGN : "⃀" U20C0 # SOM SIGN + : "؋" U060B # AFGHANI SIGN : "₪" U20AA # NEW SHEQEL SIGN : "﷼" UFDFC # RIAL SIGN : "៛" U17DB # KHMER CURRENCY SYMBOL RIEL @@ -3062,6 +3063,7 @@ include "%L" : "㎗" U3397 # SQUARE DL : "㍷" U3377 # SQUARE DM : "Ⰸ" U2C08 # GLAGOLITIC CAPITAL LETTER ZEMLJA (visually similar) +

: "₯" U20AF # DRACHMA SIGN : "DZ" U01F1 # LATIN CAPITAL LETTER DZ : "Dz" U01F2 # LATIN CAPITAL LETTER D WITH SMALL LETTER Z : "ʣ" U02A3 # LATIN SMALL LETTER DZ DIGRAPH @@ -3252,6 +3254,7 @@ include "%L"

: "㏚" U33DA # SQUARE PR

: "㎰" U33B0 # SQUARE PS

: "㉐" U3250 # PARTNERSHIP SIGN +

: "₧" U20A7 # PESETA SIGN

: "㎴" U33B4 # SQUARE PV

: "㎺" U33BA # SQUARE PW @@ -7039,7 +7042,7 @@ include "%L" : "্" U09CD # BENGALI SIGN VIRAMA <2> : "ঽ" U09BD # BENGALI SIGN AVAGRAHA - : "৳" U09F3 # BENGALI RUPEE SIGN + : "৲" U09F2 # BENGALI RUPEE MARK (use Tk for ৳) # Numerals diff --git a/Compose.md b/Compose.md index e4bac20..110205a 100644 --- a/Compose.md +++ b/Compose.md @@ -9,23 +9,23 @@ The file aims for memorability and consistency. As a result, a small number of t ## Table of contents and examples -### Common script characters (603) +### Common script characters (604) * **Spaces** (15): NBSP MMSP ZWSP etc * **Dashes** (12): – — ⁓ ⸻ etc * **Brackets** (40): ⟨ ⟦ ⸨ ⌈ ⫽ etc * **Bullets** (11): • ‣ ⁃ ◉ etc * **General punctuation** (60): ⁁ ⁂ ⸎ etc -* **Currency symbols** (20): ₱ ₿ ₪ etc +* **Currency symbols** (21): ₱ ₿ ₪ etc * **Combining diacritics** (174): ń n̊ n̫ m͡n Zǎ̺̣͆̚l⃪ğ̶̍ö̱̰̥̂̃ etc * **Control characters** (271): RLI PDI ZWJ VS16 etc -### Latin script characters (2752) +### Latin script characters (2754) * **International Phonetic Alphabet** (143): ⫽ˈɹɛ.dɪt⫽ [aɪ̯ pʰiː eɪ̯] etc * **Latin script letters** (514): Ƿ Ȝ ␢ ȵ etc * **Mathematical alphanumerics** (794): 𝐀 𝐴 𝑨 A 𝗔 𝘈 𝘼 𝒜 𝓐 𝔄 𝕬 𝙰 𝔸 𜳖 etc * **Enclosed alphanumerics** (288): ⓼ 🅛 🆛 ⒜ ⒓ etc * **Superscripts and subscripts** (309): ᵃ ᴬ ₐ ᴀ ◌ͣ etc -* **Multigraphs and ligatures** (389): ʣ ㏈ etc +* **Multigraphs and ligatures** (391): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc ### Non-Latin script characters (32960) From d522a5238c4d6f40f3432ef78b49aaf32f0f3e3c Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Sun, 27 Jul 2025 09:58:16 +0100 Subject: [PATCH 11/21] Move punctuation before numerals --- Compose | 172 ++++++++++++++++++++++++++++------------------------- Compose.md | 12 ++-- 2 files changed, 98 insertions(+), 86 deletions(-) diff --git a/Compose b/Compose index 44274d4..1d472f0 100644 --- a/Compose +++ b/Compose @@ -717,8 +717,11 @@ include "%L" : "₳" U20B3 # AUSTRAL SIGN : "₴" U20B4 # HRYVNIA SIGN : "₴" U20B4 # HRYVNIA SIGN + : "₶" U20B6 # LIVRE TOURNOIS SIGN (conflicts with useless <) + : "₷" U20B7 # SPESMILO SIGN : "₸" U20B8 # TENGE SIGN : "₺" U20BA # TURKISH LIRA SIGN + : "₻" U20BB # NORDIC MARK SIGN : "₼" U20BC # MANAT SIGN : "₼" U20BC # MANAT SIGN

: "₽" U20BD # RUBLE SIGN @@ -2709,8 +2712,11 @@ include "%L" # (normal circled alphanumerics are defined as in the system config, with no prefix) + : "🅭" U1F16D # CIRCLED CC : "🄭" U1F12D # CIRCLED CD : "🄮" U1F12E # CIRCLED WZ + <0> : "🄍" U1F10D # CIRCLED ZERO WITH SLASH + <0> : "🄍" U1F10D # CIRCLED ZERO WITH SLASH : "🄫" U1F12B # CIRCLED ITALIC LATIN CAPITAL LETTER C : "🄬" U1F12C # CIRCLED ITALIC LATIN CAPITAL LETTER R @@ -6846,6 +6852,12 @@ include "%L" : "ॐ" U0950 # DEVANAGARI OM : "௹" U0BF9 # TAMIL RUPEE SIGN +# Punctuation + + : "।" U0964 # DEVANAGARI DANDA + <2> : "॥" U0965 # DEVANAGARI DOUBLE DANDA + : "॰" U0970 # DEVANAGARI ABBREVIATION SIGN + # Numerals <0> : "०" U0966 # DEVANAGARI DIGIT ZERO @@ -6870,12 +6882,6 @@ include "%L" <8> : "꣨" UA8E8 # COMBINING DEVANAGARI DIGIT EIGHT <9> : "꣩" UA8E9 # COMBINING DEVANAGARI DIGIT NINE -# Punctuation - - : "।" U0964 # DEVANAGARI DANDA - <2> : "॥" U0965 # DEVANAGARI DOUBLE DANDA - : "॰" U0970 # DEVANAGARI ABBREVIATION SIGN - # (A = Ahom) # Consonants @@ -6937,6 +6943,13 @@ include "%L" <0> : "𑜫" U1172B # AHOM SIGN KILLER (VIRAMA) (conflicts) : "𑜫" U1172B # AHOM SIGN KILLER (VIRAMA) (conflicts) +# Punctuation + + : "𑜼" U1173C # AHOM SIGN SMALL SECTION (DANDA) (conflicts) + <2> : "𑜽" U1173D # AHOM SIGN SECTION (DOUBLE DANDA) (conflicts) + : "𑜾" U1173E # AHOM SIGN RULAI (conflicts) + : "𑜿" U1173F # AHOM SYMBOL VI (conflicts) + # Numbers <0> : "𑜰" U11730 # AHOM DIGIT ZERO (conflicts) @@ -6952,13 +6965,6 @@ include "%L" <1> <0> : "𑜺" U1173A # AHOM NUMBER TEN (conflicts) <2> <0> : "𑜻" U1173B # AHOM NUMBER TWENTY (conflicts) -# Punctuation - - : "𑜼" U1173C # AHOM SIGN SMALL SECTION (DANDA) (conflicts) - <2> : "𑜽" U1173D # AHOM SIGN SECTION (DOUBLE DANDA) (conflicts) - : "𑜾" U1173E # AHOM SIGN RULAI (conflicts) - : "𑜿" U1173F # AHOM SYMBOL VI (conflicts) - # (b = Bengali) # Consonants @@ -7368,6 +7374,13 @@ include "%L" : "𑄳" U11133 # CHAKMA VIRAMA : "𑄴" U11134 # CHAKMA MAAYYAA +# Punctuation + + : "𑅁" U11141 # CHAKMA DANDA + <2> : "𑅂" U11142 # CHAKMA DOUBLE DANDA + : "𑅃" U11143 # CHAKMA QUESTION MARK + : "𑅀" U11140 # CHAKMA SECTION MARK + # Numerals <0> : "𑄶" U11136 # CHAKMA DIGIT ZERO @@ -7381,13 +7394,6 @@ include "%L" <8> : "𑄾" U1113E # CHAKMA DIGIT EIGHT <9> : "𑄿" U1113F # CHAKMA DIGIT NINE -# Punctuation - - : "𑅁" U11141 # CHAKMA DANDA - <2> : "𑅂" U11142 # CHAKMA DOUBLE DANDA - : "𑅃" U11143 # CHAKMA QUESTION MARK - : "𑅀" U11140 # CHAKMA SECTION MARK - # (D = Dogra) : "𑠀" U11800 # DOGRA LETTER A @@ -7621,6 +7627,10 @@ include "%L" : "ૐ" U0AD0 # GUJARATI OM (conflicts) : "૱" U0AF1 # GUJARATI RUPEE SIGN (conflicts) +# Punctuation + + : "૰" U0AF0 # GUJARATI ABBREVIATION SIGN (conflicts) + # Numerals <0> : "૦" U0AE6 # GUJARATI DIGIT ZERO (conflicts) @@ -7634,10 +7644,6 @@ include "%L" <8> : "૮" U0AEE # GUJARATI DIGIT EIGHT (conflicts) <9> : "૯" U0AEF # GUJARATI DIGIT NINE (conflicts) -# Punctuation - - : "૰" U0AF0 # GUJARATI ABBREVIATION SIGN (conflicts) - # (G = Gurmukhi) # Consonants @@ -7716,6 +7722,10 @@ include "%L" : "੍" U0A4D # GURMUKHI SIGN VIRAMA (conflicts) : "ੰ" U0A70 # GURMUKHI TIPPI (conflicts) +# Punctuation + + : "੶" U0A76 # GURMUKHI ABBREVIATION SIGN (conflicts) + # Numerals <0> : "੦" U0A66 # GURMUKHI DIGIT ZERO (conflicts) @@ -7729,10 +7739,6 @@ include "%L" <8> : "੮" U0A6E # GURMUKHI DIGIT EIGHT (conflicts) <9> : "੯" U0A6F # GURMUKHI DIGIT NINE (conflicts) -# Punctuation - - : "੶" U0A76 # GURMUKHI ABBREVIATION SIGN (conflicts) - # (GG = Gujala Gondi) # Consonants @@ -8253,6 +8259,11 @@ include "%L" : "𐨺" U10A3A # KHAROSHTHI SIGN DOT BELOW : "𐨿" U10A3F # KHAROSHTHI VIRAMA +# Punctuation + + : "𐩖" U10A56 # KHAROSHTHI PUNCTUATION DANDA + <2> : "𐩗" U10A57 # KHAROSHTHI PUNCTUATION DOUBLE DANDA + # Numerals <1> : "𐩀" U10A40 # KHAROSHTHI DIGIT ONE @@ -8265,11 +8276,6 @@ include "%L" <1> <0> <0> <0> : "𐩇" U10A47 # KHAROSHTHI NUMBER ONE THOUSAND <1> <2> : "𐩈" U10A48 # KHAROSHTHI FRACTION ONE HALF -# Punctuation - - : "𐩖" U10A56 # KHAROSHTHI PUNCTUATION DANDA - <2> : "𐩗" U10A57 # KHAROSHTHI PUNCTUATION DOUBLE DANDA - # (KJ = Khojki) # Consonants @@ -8408,6 +8414,11 @@ include "%L" : "𖵂" U16D42 # KIRAT RAI SIGN VISARGA : "𖵫" U16D6B # KIRAT RAI SIGN VIRAMA +# Punctuation + + : "𖵮" U16D6E # KIRAT RAI DANDA + <2> : "𖵯" U16D6F # KIRAT RAI DOUBLE DANDA + # Numerals <0> : "𖵰" U16D70 # KIRAT RAI DIGIT ZERO @@ -8421,11 +8432,6 @@ include "%L" <8> : "𖵸" U16D78 # KIRAT RAI DIGIT EIGHT <9> : "𖵹" U16D79 # KIRAT RAI DIGIT NINE -# Punctuation - - : "𖵮" U16D6E # KIRAT RAI DANDA - <2> : "𖵯" U16D6F # KIRAT RAI DOUBLE DANDA - # (KT = Kaithi) # Consonants @@ -8585,6 +8591,11 @@ include "%L" : "᤻" U193B # LIMBU SIGN SA-I : "᥀" U1940 # LIMBU SIGN LOO +# Punctuation + + : "᥄" U1944 # LIMBU EXCLAMATION MARK + : "᥅" U1945 # LIMBU QUESTION MARK + # Numerals <0> : "᥆" U1946 # LIMBU DIGIT ZERO @@ -8598,11 +8609,6 @@ include "%L" <8> : "᥎" U194E # LIMBU DIGIT EIGHT <9> : "᥏" U194F # LIMBU DIGIT NINE -# Punctuation - - : "᥄" U1944 # LIMBU EXCLAMATION MARK - : "᥅" U1945 # LIMBU QUESTION MARK - # (LP = Lepcha)

: "ᰀ" U1C00 # LEPCHA LETTER KA @@ -9144,6 +9150,12 @@ include "%L" : "ꯩ" UABE9 # MEETEI MAYEK VOWEL SIGN CHEINAP : "ꯪ" UABEA # MEETEI MAYEK VOWEL SIGN NUNG +# Punctuation + + <2> : "꯫" UABEB # MEETEI MAYEK CHEIKHEI + : "꯬" UABEC # MEETEI MAYEK LUM IYEK + : "꯭" UABED # MEETEI MAYEK APUN IYEK + # Numerals <0> : "꯰" UABF0 # MEETEI MAYEK DIGIT ZERO @@ -9157,12 +9169,6 @@ include "%L" <8> : "꯸" UABF8 # MEETEI MAYEK DIGIT EIGHT <9> : "꯹" UABF9 # MEETEI MAYEK DIGIT NINE -# Punctuation - - <2> : "꯫" UABEB # MEETEI MAYEK CHEIKHEI - : "꯬" UABEC # MEETEI MAYEK LUM IYEK - : "꯭" UABED # MEETEI MAYEK APUN IYEK - # (MR = Marchen) # Consonants @@ -9331,6 +9337,14 @@ include "%L" : "𑑈" U11448 # NEWA SIGN FINAL ANUSVARA : "𑑉" U11449 # NEWA OM +# Punctuation + + : "𑑋" U1144B # NEWA DANDA + <2> : "𑑌" U1144C # NEWA DOUBLE DANDA + : "𑑍" U1144D # NEWA COMMA + : "𑑏" U1144F # NEWA ABBREVIATION SIGN + : "𑑚" U1145A # NEWA DOUBLE COMMA + # Numerals <0> : "𑑐" U11450 # NEWA DIGIT ZERO @@ -9344,14 +9358,6 @@ include "%L" <8> : "𑑘" U11458 # NEWA DIGIT EIGHT <9> : "𑑙" U11459 # NEWA DIGIT NINE -# Punctuation - - : "𑑋" U1144B # NEWA DANDA - <2> : "𑑌" U1144C # NEWA DOUBLE DANDA - : "𑑍" U1144D # NEWA COMMA - : "𑑏" U1144F # NEWA ABBREVIATION SIGN - : "𑑚" U1145A # NEWA DOUBLE COMMA - # (N = Nandinagari) # Consonants @@ -9639,6 +9645,10 @@ include "%L" : "ඃ" U0D83 # SINHALA SIGN VISARGAYA

: "්" U0DCA # SINHALA SIGN AL-LAKUNA +# Punctuation + + : "෴" U0DF4 # SINHALA PUNCTUATION KUNDDALIYA + # Numerals <0> : "෦" U0DE6 # SINHALA LITH DIGIT ZERO @@ -9843,6 +9853,13 @@ include "%L" : "𑇊" U111CA # SHARADA SIGN NUKTA : "𑇏" U111CF # SHARADA SIGN INVERTED CANDRABINDU +# Punctuation + + : "𑇅" U111C5 # SHARADA DANDA + <2> : "𑇆" U111C6 # SHARADA DOUBLE DANDA + : "𑇇" U111C7 # SHARADA ABBREVIATION SIGN + : "𑇈" U111C8 # SHARADA SEPARATOR + # Numerals <0> : "𑇐" U111D0 # SHARADA DIGIT ZERO @@ -9856,13 +9873,6 @@ include "%L" <8> : "𑇘" U111D8 # SHARADA DIGIT EIGHT <9> : "𑇙" U111D9 # SHARADA DIGIT NINE -# Punctuation - - : "𑇅" U111C5 # SHARADA DANDA - <2> : "𑇆" U111C6 # SHARADA DOUBLE DANDA - : "𑇇" U111C7 # SHARADA ABBREVIATION SIGN - : "𑇈" U111C8 # SHARADA SEPARATOR - # (SN = Sylheti Nagri) # Consonants @@ -10006,6 +10016,11 @@ include "%L" : "꣄" UA8C4 # SAURASHTRA SIGN VIRAMA : "ꣅ" UA8C5 # SAURASHTRA SIGN CANDRABINDU +# Punctuation + + : "꣎" UA8CE # SAURASHTRA DANDA + <2> : "꣏" UA8CF # SAURASHTRA DOUBLE DANDA + # Numerals <0> : "꣐" UA8D0 # SAURASHTRA DIGIT ZERO @@ -10019,11 +10034,6 @@ include "%L" <8> : "꣘" UA8D8 # SAURASHTRA DIGIT EIGHT <9> : "꣙" UA8D9 # SAURASHTRA DIGIT NINE -# Punctuation - - : "꣎" UA8CE # SAURASHTRA DANDA - <2> : "꣏" UA8CF # SAURASHTRA DOUBLE DANDA - # (t = Tamil) # Consonants @@ -17356,6 +17366,12 @@ include "HangulSyllables" <0> : "𑽁" U11F41 # KAWI SIGN KILLER : "𑽂" U11F42 # KAWI CONJOINER +# Punctuation + + : "𑽃" U11F43 # KAWI DANDA + <2> : "𑽄" U11F44 # KAWI DOUBLE DANDA + : "𑽅" U11F45 # KAWI PUNCTUATION SECTION MARKER + # Numerals <0> : "𑽐" U11F50 # KAWI DIGIT ZERO @@ -17369,12 +17385,6 @@ include "HangulSyllables" <8> : "𑽘" U11F58 # KAWI DIGIT EIGHT <9> : "𑽙" U11F59 # KAWI DIGIT NINE -# Punctuation - - : "𑽃" U11F43 # KAWI DANDA - <2> : "𑽄" U11F44 # KAWI DOUBLE DANDA - : "𑽅" U11F45 # KAWI PUNCTUATION SECTION MARKER - # (b = Babayin) : "ᜃ" U1703 # TAGALOG LETTER KA @@ -19713,6 +19723,11 @@ include "HangulSyllables" <4> : "᩸" U1A78 # TAI THAM SIGN KHUEN TONE-4 <5> : "᩹" U1A79 # TAI THAM SIGN KHUEN TONE-5 +# Punctuation + + : "᪨" U1AA8 # TAI THAM SIGN KAAN + <2> : "᪩" U1AA9 # TAI THAM SIGN KAANKUU + # Numerals <0> : "᪀" U1A80 # TAI THAM HORA DIGIT ZERO @@ -19737,11 +19752,6 @@ include "HangulSyllables" <8> : "᪘" U1A98 # TAI THAM THAM DIGIT EIGHT <9> : "᪙" U1A99 # TAI THAM THAM DIGIT NINE -# Punctuation - - : "᪨" U1AA8 # TAI THAM SIGN KAAN - <2> : "᪩" U1AA9 # TAI THAM SIGN KAANKUU - # (v = Tai Viet) : "ꪀ" UAA80 # TAI VIET LETTER LOW KO @@ -26070,6 +26080,8 @@ include "Logograms" <1> <8> : "🔞" U1F51E # NO ONE UNDER EIGHTEEN SYMBOL (alternatively NSFW) <0> : "🚫" U1F6AB # NO ENTRY SIGN <0> : "🛇" U1F6C7 # PROHIBITED SIGN + : "🄏" U1F10F # CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH + : "🅮" U1F16E # CIRCLED C WITH OVERLAID BACKSLASH # Shapes (see geometric shapes section for many more) diff --git a/Compose.md b/Compose.md index 110205a..636e6cd 100644 --- a/Compose.md +++ b/Compose.md @@ -9,29 +9,29 @@ The file aims for memorability and consistency. As a result, a small number of t ## Table of contents and examples -### Common script characters (604) +### Common script characters (607) * **Spaces** (15): NBSP MMSP ZWSP etc * **Dashes** (12): – — ⁓ ⸻ etc * **Brackets** (40): ⟨ ⟦ ⸨ ⌈ ⫽ etc * **Bullets** (11): • ‣ ⁃ ◉ etc * **General punctuation** (60): ⁁ ⁂ ⸎ etc -* **Currency symbols** (21): ₱ ₿ ₪ etc +* **Currency symbols** (24): ₱ ₿ ₪ etc * **Combining diacritics** (174): ń n̊ n̫ m͡n Zǎ̺̣͆̚l⃪ğ̶̍ö̱̰̥̂̃ etc * **Control characters** (271): RLI PDI ZWJ VS16 etc -### Latin script characters (2754) +### Latin script characters (2756) * **International Phonetic Alphabet** (143): ⫽ˈɹɛ.dɪt⫽ [aɪ̯ pʰiː eɪ̯] etc * **Latin script letters** (514): Ƿ Ȝ ␢ ȵ etc * **Mathematical alphanumerics** (794): 𝐀 𝐴 𝑨 A 𝗔 𝘈 𝘼 𝒜 𝓐 𝔄 𝕬 𝙰 𝔸 𜳖 etc -* **Enclosed alphanumerics** (288): ⓼ 🅛 🆛 ⒜ ⒓ etc +* **Enclosed alphanumerics** (290): ⓼ 🅛 🆛 ⒜ ⒓ etc * **Superscripts and subscripts** (309): ᵃ ᴬ ₐ ᴀ ◌ͣ etc * **Multigraphs and ligatures** (391): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (32960) +### Non-Latin script characters (32961) * **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (1992): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc -* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3192): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc +* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3193): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc * **Braille**, UEB (272): ⠗⠫⠙⠊⠞ etc * **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1787): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc * **Cyrillic**, Glagolitic & Old Permic (577): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc From fa8851cf11682e29f5c18f2cb4a8674bf86ff642 Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Sun, 27 Jul 2025 10:15:12 +0100 Subject: [PATCH 12/21] Gaps --- Compose | 11 ++++++++++- Compose.md | 6 +++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Compose b/Compose index 1d472f0..3776655 100644 --- a/Compose +++ b/Compose @@ -9247,6 +9247,7 @@ include "%L" # Other signs : "𑱱" U11C71 # MARCHEN MARK SHAD + : "𑱰" U11C70 # MARCHEN HEAD MARK : "𑲵" U11CB5 # MARCHEN SIGN ANUSVARA : "𑲶" U11CB6 # MARCHEN SIGN CANDRABINDU @@ -20648,6 +20649,7 @@ include "HangulSyllables" <0> : "᧐" U19D0 # NEW TAI LUE DIGIT ZERO <1> : "᧑" U19D1 # NEW TAI LUE DIGIT ONE + <1> : "᧚" U19DA # NEW TAI LUE THAM DIGIT ONE <2> : "᧒" U19D2 # NEW TAI LUE DIGIT TWO <3> : "᧓" U19D3 # NEW TAI LUE DIGIT THREE <4> : "᧔" U19D4 # NEW TAI LUE DIGIT FOUR @@ -20837,6 +20839,8 @@ include "HangulSyllables"

: "ꡱ" UA871 # PHAGS-PA SUBJOINED LETTER RA

: "ꡳ" UA873 # PHAGS-PA LETTER CANDRABINDU +

: "꡴" UA874 # PHAGS-PA SINGLE HEAD MARK +

<2> : "꡵" UA875 # PHAGS-PA DOUBLE HEAD MARK # (s = Soyombo) @@ -22405,9 +22409,14 @@ include "HangulSyllables" : "𑨽" U11A3D # ZANABAZAR SQUARE CLUSTER-FINAL LETTER LA : "𑨾" U11A3E # ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA + : "𑩇" U11A47 # ZANABAZAR SQUARE SUBJOINER + : "𑩂" U11A42 # ZANABAZAR SQUARE MARK SHAD <2> : "𑩃" U11A43 # ZANABAZAR SQUARE MARK DOUBLE SHAD - : "𑩇" U11A47 # ZANABAZAR SQUARE SUBJOINER + : "𑨿" U11A3F # ZANABAZAR SQUARE INITIAL HEAD MARK + : "𑩀" U11A40 # ZANABAZAR SQUARE CLOSING HEAD MARK + : "𑩅" U11A45 # ZANABAZAR SQUARE INITIAL DOUBLE-LINED HEAD MARK + : "𑩆" U11A46 # ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK ######################################## # (Z) Ge'ez (and other nearby scripts) # diff --git a/Compose.md b/Compose.md index 636e6cd..7a62f9f 100644 --- a/Compose.md +++ b/Compose.md @@ -28,10 +28,10 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (391): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (32961) +### Non-Latin script characters (32969) * **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (1992): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc -* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3193): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc +* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3194): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc * **Braille**, UEB (272): ⠗⠫⠙⠊⠞ etc * **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1787): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc * **Cyrillic**, Glagolitic & Old Permic (577): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc @@ -46,7 +46,7 @@ The file aims for memorability and consistency. As a result, a small number of t * **Old Italic**, Carian, Ogham, Old Hungarian, Runic & Old Turkic (382): 𐌓𐌄𐌃𐌃𐌉𐌕, 𐊥𐊺𐊢𐊢𐊹𐊭, ᚏᚓᚇᚔᚈ, ⁧𐲢𐳉𐳇𐳇𐳐𐳦⁩, ᚱᛖᛞᛞᛁᛏ, ⁧𐰺𐰅𐰑𐰃𐱃⁩ etc * **Sutton SignWriting** (667): 𝧿𝨾𝡇𝪜𝪡𝦈𝪪 etc * **Thai**, Cham, Khmer, Lao, Tai Tham & Tai Viet (502): เรดดิต, ꨣꨮꨖꨪꩅ, រេទិត, ເຣັດິຕ, ᩁᩮᨯᩥᨲ, ꪧꪵꪒꪲꪒ etc -* **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2439): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc +* **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2446): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc * **Logograms/undeciphered**: Anatolian Hieroglyphs, Cuneiform, Cypro-Minoan, Egyptian Hieroglyphs, Linear A&B (4691): 𔐅 𒆛 𒿌 𓀁 𐙞 etc * Language names (150): 🇯🇵 日本語 🇮🇷 ⁧فارس⁩ 🇻🇳 Tiếng Việt etc From ec8206e36a2049a4dfc643c0caeebc8806c32d74 Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Sun, 27 Jul 2025 11:11:24 +0100 Subject: [PATCH 13/21] More gaps --- Compose | 50 +++++++++++++++++++++++++++++++++++++------------- Compose.md | 12 ++++++------ 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/Compose b/Compose index 3776655..fea4e8a 100644 --- a/Compose +++ b/Compose @@ -8396,23 +8396,26 @@ include "%L" : "𖵡" U16D61 # KIRAT RAI LETTER SHA : "𖵢" U16D62 # KIRAT RAI LETTER HA -# Dependent vowels - - : "𖵣" U16D63 # KIRAT RAI VOWEL SIGN AA - : "𖵤" U16D64 # KIRAT RAI VOWEL SIGN I - : "𖵥" U16D65 # KIRAT RAI VOWEL SIGN U - : "𖵦" U16D66 # KIRAT RAI VOWEL SIGN UE - : "𖵧" U16D67 # KIRAT RAI VOWEL SIGN E - : "𖵨" U16D68 # KIRAT RAI VOWEL SIGN AI - : "𖵩" U16D69 # KIRAT RAI VOWEL SIGN O - : "𖵪" U16D6A # KIRAT RAI VOWEL SIGN AU - <0> : "𖵫" U16D6B # KIRAT RAI SIGN VIRAMA +# Modifying vowels (note that these aren't combining characters) + + : "𖵣" U16D63 # KIRAT RAI VOWEL SIGN AA + : "𖵤" U16D64 # KIRAT RAI VOWEL SIGN I + : "𖵥" U16D65 # KIRAT RAI VOWEL SIGN U + : "𖵦" U16D66 # KIRAT RAI VOWEL SIGN UE + : "𖵧" U16D67 # KIRAT RAI VOWEL SIGN E + : "𖵨" U16D68 # KIRAT RAI VOWEL SIGN AI + : "𖵩" U16D69 # KIRAT RAI VOWEL SIGN O + : "𖵪" U16D6A # KIRAT RAI VOWEL SIGN AU + <0> : "𖵫" U16D6B # KIRAT RAI SIGN VIRAMA # Other signs : "𖵀" U16D40 # KIRAT RAI SIGN ANUSVARA - : "𖵂" U16D42 # KIRAT RAI SIGN VISARGA - : "𖵫" U16D6B # KIRAT RAI SIGN VIRAMA + : "𖵁" U16D41 # KIRAT RAI SIGN TONPI + : "𖵂" U16D42 # KIRAT RAI SIGN VISARGA + : "𖵫" U16D6B # KIRAT RAI SIGN VIRAMA + : "𖵬" U16D6C # KIRAT RAI SIGN SAAT + : "𖵭" U16D6D # KIRAT RAI SIGN YUPI (for 𖵀 use bKR.[Return]) # Punctuation @@ -8956,6 +8959,9 @@ include "%L" : "𑴯" U11D2F # MASARAM GONDI LETTER JNYA : "𑴰" U11D30 # MASARAM GONDI LETTER TRA + : "𑵆" U11D46 # MASARAM GONDI REPHA + : "𑵇" U11D47 # MASARAM GONDI RA-KARA + # Independent vowels : "𑴀" U11D00 # MASARAM GONDI LETTER A @@ -9514,8 +9520,10 @@ include "%L" : "ୄ" U0B44 # ORIYA VOWEL SIGN VOCALIC RR (conflicts) : "େ" U0B47 # ORIYA VOWEL SIGN E (conflicts) : "ୈ" U0B48 # ORIYA VOWEL SIGN AI (conflicts) + : "ୖ" U0B56 # ORIYA AI LENGTH MARK (conflicts) : "ୋ" U0B4B # ORIYA VOWEL SIGN O (conflicts) : "ୌ" U0B4C # ORIYA VOWEL SIGN AU (conflicts) + : "ୗ" U0B57 # ORIYA AU LENGTH MARK (conflicts) : "ୢ" U0B62 # ORIYA VOWEL SIGN VOCALIC L (conflicts) : "ୣ" U0B63 # ORIYA VOWEL SIGN VOCALIC LL (conflicts) <0> : "୍" U0B4D # ORIYA SIGN VIRAMA (conflicts) @@ -9528,8 +9536,10 @@ include "%L" <8> : "ଃ" U0B03 # ORIYA SIGN VISARGA (conflicts) : "଼" U0B3C # ORIYA SIGN NUKTA (conflicts) : "୍" U0B4D # ORIYA SIGN VIRAMA (conflicts) + : "୕" U0B55 # ORIYA SIGN OVERLINE (conflicts) : "ଽ" U0B3D # ORIYA SIGN AVAGRAHA (conflicts) +

: "୰" U0B70 # ORIYA ISSHAR (conflicts) # Numerals @@ -17620,6 +17630,11 @@ include "HangulSyllables" <0> : "᯳" U1BF3 # BATAK PANONGONAN (VIRAMA) <0> : "᯳" U1BF3 # BATAK PANONGONAN (VIRAMA) + : "᯼" U1BFC # BATAK SYMBOL BINDU NA METEK + : "᯽" U1BFD # BATAK SYMBOL BINDU PINARBORAS + : "᯾" U1BFE # BATAK SYMBOL BINDU JUDUL (TITLE) + : "᯿" U1BFF # BATAK SYMBOL BINDU PANGOLAT (TRAILING) + # (h = Hanunoo) : "ᜣ" U1723 # HANUNOO LETTER KA @@ -18221,6 +18236,7 @@ include "HangulSyllables" : "𐲕" U10C95 # OLD HUNGARIAN CAPITAL LETTER UNK

: "𐲡" U10CA1 # OLD HUNGARIAN CAPITAL LETTER EMP : "𐲧" U10CA7 # OLD HUNGARIAN CAPITAL LETTER ENT +

: "𐲱" U10CB1 # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN : "𐲲" U10CB2 # OLD HUNGARIAN CAPITAL LETTER US : "𐳃" U10CC3 # OLD HUNGARIAN SMALL LETTER AMB : "𐳅" U10CC5 # OLD HUNGARIAN SMALL LETTER ENC @@ -18228,6 +18244,7 @@ include "HangulSyllables" : "𐳕" U10CD5 # OLD HUNGARIAN SMALL LETTER UNK

: "𐳡" U10CE1 # OLD HUNGARIAN SMALL LETTER EMP : "𐳧" U10CE7 # OLD HUNGARIAN SMALL LETTER ENT +

: "𐳱" U10CF1 # OLD HUNGARIAN SMALL LETTER ENT-SHAPED SIGN : "𐳲" U10CF2 # OLD HUNGARIAN SMALL LETTER US # (o = Ogham) @@ -19829,6 +19846,9 @@ include "HangulSyllables" : "ꫛ" UAADB # TAI VIET SYMBOL KON : "ꫜ" UAADC # TAI VIET SYMBOL NUENG + : "ꫝ" UAADD # TAI VIET SYMBOL SAM + : "꫞" UAADE # TAI VIET SYMBOL HO HOI + : "꫟" UAADF # TAI VIET SYMBOL KOI KOI ################################################## # (z) Zhuyin/Bopomofo (and other nearby scripts) # @@ -23076,6 +23096,8 @@ include "HangulSyllables" : "𐵡" U10D61 # GARAY CAPITAL LETTER NA

: "𐵢" U10D62 # GARAY CAPITAL LETTER PA : "𐵣" U10D63 # GARAY CAPITAL LETTER HA + <2> : "𐵤" U10D64 # GARAY CAPITAL LETTER OLD KA + <2> : "𐵥" U10D65 # GARAY CAPITAL LETTER OLD NA : "𐵪" U10D6A # GARAY CONSONANT GEMINATION MARK : "𐵫" U10D6B # GARAY COMBINING DOT ABOVE @@ -23104,6 +23126,8 @@ include "HangulSyllables" : "𐶁" U10D81 # GARAY SMALL LETTER NA

: "𐶂" U10D82 # GARAY SMALL LETTER PA : "𐶃" U10D83 # GARAY SMALL LETTER HA + <2> : "𐶄" U10D84 # GARAY SMALL LETTER OLD KA + <2> : "𐶅" U10D85 # GARAY SMALL LETTER OLD NA : "𐶎" U10D8E # GARAY PLUS SIGN : "𐶏" U10D8F # GARAY MINUS SIGN diff --git a/Compose.md b/Compose.md index 7a62f9f..46cf960 100644 --- a/Compose.md +++ b/Compose.md @@ -28,24 +28,24 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (391): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (32969) +### Non-Latin script characters (32991) * **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (1992): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc -* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3194): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc +* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3203): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc * **Braille**, UEB (272): ⠗⠫⠙⠊⠞ etc * **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1787): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc * **Cyrillic**, Glagolitic & Old Permic (577): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc -* **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1326): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc +* **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1330): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc * **Georgian** & Caucasian Albanian (208): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc * **Greek**, Coptic, Cypriot, Elbasan, Gothic, Linear B, Lycian, Todhri & Vithkuqi (1144): Ρέντιτ, Ⲣⲉⲇⲇⲓⲧ, ⁧𐠤𐠯𐠮⁩, 𐔙𐔇𐔄𐔍𐔝, 𐍂𐌴𐌳𐌳𐌹𐍄, 𐀩𐀇𐀵, 𐊕𐊁𐊅𐊅𐊆𐊗, 𐗝𐗉𐗆𐗒𐗢, 𐖊𐖞𐖜𐖜𐖥𐖵 etc * **Hebrew**, Aramaic, Elymaic, Lydian, Palmyrene, Phoenician, Samaritan & Ugaritic (350): ⁧רֶדִיט⁩, ⁧𐡓𐡃𐡉𐡕⁩, ⁧𐿳𐿣𐿵⁩, ⁧𐤭𐤤𐤣𐤣𐤦𐤯⁩, ⁧𐡴𐡣𐡩𐡶⁩, ⁧𐤓𐤃𐤕⁩, ⁧ࠓࠝࠃࠪࠕ⁩, 𐎗𐎄𐎚 etc * **Japanese** (mostly kana) (866): 「レディット」 etc -* **Kawi Scripts**: Balinese, Batak, Baybayin, Buginese, Buhid, Hanunoo, Javanese, Old Kawi, Makasar, Rejang, Sundanese & Tagbanwa (506): ᬭᬾᬤᬶᬢ᭄, ᯒᯧᯑᯪᯖ᯲, ᜍᜒᜇᜒᜆ᜔, ᨑᨙᨉᨗ, ᝍᝒᝇᝒ, ᜭᜲᜧᜲᜦ᜴, ꦫꦺꦢꦶꦠ, 𑼬𑼾𑼤𑼶𑼢, 𑻭𑻵𑻧𑻳, ꤽꥉꤴꥇꤳ, ᮛᮨᮓᮤᮒ᮪, ᝮᝲᝧᝲ, etc +* **Kawi Scripts**: Balinese, Batak, Baybayin, Buginese, Buhid, Hanunoo, Javanese, Old Kawi, Makasar, Rejang, Sundanese & Tagbanwa (510): ᬭᬾᬤᬶᬢ᭄, ᯒᯧᯑᯪᯖ᯲, ᜍᜒᜇᜒᜆ᜔, ᨑᨙᨉᨗ, ᝍᝒᝇᝒ, ᜭᜲᜧᜲᜦ᜴, ꦫꦺꦢꦶꦠ, 𑼬𑼾𑼤𑼶𑼢, 𑻭𑻵𑻧𑻳, ꤽꥉꤴꥇꤳ, ᮛᮨᮓᮤᮒ᮪, ᝮᝲᝧᝲ, etc * **Korean** (11738): 레딧 etc * **International Morse Code** (76): ·-· · -·· -·· ·· - etc -* **Old Italic**, Carian, Ogham, Old Hungarian, Runic & Old Turkic (382): 𐌓𐌄𐌃𐌃𐌉𐌕, 𐊥𐊺𐊢𐊢𐊹𐊭, ᚏᚓᚇᚔᚈ, ⁧𐲢𐳉𐳇𐳇𐳐𐳦⁩, ᚱᛖᛞᛞᛁᛏ, ⁧𐰺𐰅𐰑𐰃𐱃⁩ etc +* **Old Italic**, Carian, Ogham, Old Hungarian, Runic & Old Turkic (384): 𐌓𐌄𐌃𐌃𐌉𐌕, 𐊥𐊺𐊢𐊢𐊹𐊭, ᚏᚓᚇᚔᚈ, ⁧𐲢𐳉𐳇𐳇𐳐𐳦⁩, ᚱᛖᛞᛞᛁᛏ, ⁧𐰺𐰅𐰑𐰃𐱃⁩ etc * **Sutton SignWriting** (667): 𝧿𝨾𝡇𝪜𝪡𝦈𝪪 etc -* **Thai**, Cham, Khmer, Lao, Tai Tham & Tai Viet (502): เรดดิต, ꨣꨮꨖꨪꩅ, រេទិត, ເຣັດິຕ, ᩁᩮᨯᩥᨲ, ꪧꪵꪒꪲꪒ etc +* **Thai**, Cham, Khmer, Lao, Tai Tham & Tai Viet (505): เรดดิต, ꨣꨮꨖꨪꩅ, រេទិត, ເຣັດິຕ, ᩁᩮᨯᩥᨲ, ꪧꪵꪒꪲꪒ etc * **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2446): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc * **Logograms/undeciphered**: Anatolian Hieroglyphs, Cuneiform, Cypro-Minoan, Egyptian Hieroglyphs, Linear A&B (4691): 𔐅 𒆛 𒿌 𓀁 𐙞 etc * Language names (150): 🇯🇵 日本語 🇮🇷 ⁧فارس⁩ 🇻🇳 Tiếng Việt etc From 47263a628fec29ef005c0231443d5320c45e2e08 Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Sun, 27 Jul 2025 11:42:09 +0100 Subject: [PATCH 14/21] More gaps --- Compose | 29 ++++++++++++++++++++++++++--- Compose.md | 8 ++++---- src/xcompose/__init__.py | 3 ++- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/Compose b/Compose index fea4e8a..4f6e116 100644 --- a/Compose +++ b/Compose @@ -6621,6 +6621,7 @@ include "%L" : "𑀲" U11032 # BRAHMI LETTER SA : "𑀳" U11033 # BRAHMI LETTER HA : "𑀴" U11034 # BRAHMI LETTER LLA + <2> : "𑁵" U11075 # BRAHMI LETTER OLD TAMIL LLA : "𑀵" U11035 # BRAHMI LETTER OLD TAMIL LLLA : "𑀶" U11036 # BRAHMI LETTER OLD TAMIL RRA : "𑀷" U11037 # BRAHMI LETTER OLD TAMIL NNNA @@ -6647,6 +6648,7 @@ include "%L" # Dependent vowels : "𑀸" U11038 # BRAHMI VOWEL SIGN AA + : "𑀹" U11039 # BRAHMI VOWEL SIGN BHATTIPROLU AA : "𑀺" U1103A # BRAHMI VOWEL SIGN I : "𑀻" U1103B # BRAHMI VOWEL SIGN II : "𑀼" U1103C # BRAHMI VOWEL SIGN U @@ -6670,6 +6672,7 @@ include "%L" : "𑀂" U11002 # BRAHMI SIGN VISARGA : "𑁆" U11046 # BRAHMI VIRAMA : "𑁰" U11070 # BRAHMI SIGN OLD TAMIL VIRAMA + : "𑁿" U1107F # BRAHMI NUMBER JOINER : "𑀃" U11003 # BRAHMI SIGN JIHVAMULIYA : "𑀄" U11004 # BRAHMI SIGN UPADHMANIYA @@ -6681,6 +6684,8 @@ include "%L" : "𑁉" U11049 # BRAHMI PUNCTUATION DOT : "𑁊" U1104A # BRAHMI PUNCTUATION DOUBLE DOT : "𑁋" U1104B # BRAHMI PUNCTUATION LINE +

: "𑁌" U1104C # BRAHMI PUNCTUATION CRESCENT BAR + : "𑁍" U1104D # BRAHMI PUNCTUATION LOTUS # Numbers @@ -18317,13 +18322,17 @@ include "HangulSyllables" # N = Norse (= Short Twig Younger Futhark) # D = Danish (= Long Branch Younger Futhark) # M = Medieval +# F = Frans Casket +# T = Tolkienian(!) # ␣ = majority form : "ᛅ" U16C5 # RUNIC LETTER LONG-BRANCH-AR AE : "ᚪ" U16AA # RUNIC LETTER AC A : "ᚨ" U16A8 # RUNIC LETTER ANSUZ A : "ᛆ" U16C6 # RUNIC LETTER SHORT-TWIG-AR A - : "ᚫ" U16AB # RUNIC LETTER AESC + : "ᛷ" U16F7 # RUNIC LETTER FRANKS CASKET AC + : "ᚫ" U16AB # RUNIC LETTER AESC + : "ᛸ" U16F8 # RUNIC LETTER FRANKS CASKET AESC : "ᛓ" U16D3 # RUNIC LETTER SHORT-TWIG-BJARKAN B : "ᛒ" U16D2 # RUNIC LETTER BERKANAN BEORC BJARKAN B : "ᚳ" U16B3 # RUNIC LETTER CEN @@ -18336,6 +18345,7 @@ include "HangulSyllables" : "ᛖ" U16D6 # RUNIC LETTER EHWAZ EH E : "ᛖ" U16D6 # RUNIC LETTER EHWAZ EH E : "ᛂ" U16C2 # RUNIC LETTER E + : "ᛶ" U16F6 # RUNIC LETTER FRANKS CASKET EH : "ᛠ" U16E0 # RUNIC LETTER EAR : "ᚠ" U16A0 # RUNIC LETTER FEHU FEOH FE F : "ᚸ" U16B8 # RUNIC LETTER GAR @@ -18345,13 +18355,15 @@ include "HangulSyllables" <2> : "ᚻ" U16BB # RUNIC LETTER HAEGL H : "ᚼ" U16BC # RUNIC LETTER LONG-BRANCH-HAGALL H : "ᚽ" U16BD # RUNIC LETTER SHORT-TWIG-HAGALL H - : "ᛁ" U16C1 # RUNIC LETTER ISAZ IS ISS I + : "ᛁ" U16C1 # RUNIC LETTER ISAZ IS ISS I + : "ᛵ" U16F5 # RUNIC LETTER FRANKS CASKET IS <1> : "ᛄ" U16C4 # RUNIC LETTER GER <2> : "ᛡ" U16E1 # RUNIC LETTER IOR : "ᛃ" U16C3 # RUNIC LETTER JERAN J : "ᛣ" U16E3 # RUNIC LETTER CALC : "ᚲ" U16B2 # RUNIC LETTER KAUNA : "ᚴ" U16B4 # RUNIC LETTER KAUN K + : "ᛱ" U16F1 # RUNIC LETTER K : "ᛚ" U16DA # RUNIC LETTER LAUKAZ LAGU LOGR L : "ᛘ" U16D8 # RUNIC LETTER LONG-BRANCH-MADR M : "ᛗ" U16D7 # RUNIC LETTER MANNAZ MAN M @@ -18367,9 +18379,11 @@ include "HangulSyllables" : "ᛟ" U16DF # RUNIC LETTER OTHALAN ETHEL O : "ᚮ" U16AE # RUNIC LETTER O : "ᚭ" U16AD # RUNIC LETTER SHORT-TWIG-OSS O + : "ᛴ" U16F4 # RUNIC LETTER FRANKS CASKET OS : "ᛟ" U16DF # RUNIC LETTER OTHALAN ETHEL O : "ᚯ" U16AF # RUNIC LETTER OE : "ᚰ" U16B0 # RUNIC LETTER ON + : "ᛳ" U16F3 # RUNIC LETTER OO

: "ᛈ" U16C8 # RUNIC LETTER PERTHO PEORTH P

: "ᛈ" U16C8 # RUNIC LETTER PERTHO PEORTH P

: "ᛕ" U16D5 # RUNIC LETTER OPEN-P @@ -18382,6 +18396,7 @@ include "HangulSyllables" : "ᛊ" U16CA # RUNIC LETTER SOWILO S : "ᛌ" U16CC # RUNIC LETTER SHORT-TWIG-SOL S : "ᛥ" U16E5 # RUNIC LETTER STAN + : "ᛲ" U16F2 # RUNIC LETTER SH : "ᛐ" U16D0 # RUNIC LETTER SHORT-TWIG-TYR T : "ᛏ" U16CF # RUNIC LETTER TIWAZ TIR TYR T : "ᚦ" U16A6 # RUNIC LETTER THURISAZ THURS THORN @@ -18402,6 +18417,10 @@ include "HangulSyllables" : "ᛧ" U16E7 # RUNIC LETTER SHORT-TWIG-YR : "ᛇ" U16C7 # RUNIC LETTER IWAZ EOH + <1> <6> : "ᛮ" U16EE # RUNIC ARLAUG SYMBOL + <1> <7> : "ᛯ" U16EF # RUNIC TVIMADUR SYMBOL + <1> <8> : "ᛰ" U16F0 # RUNIC BELGTHOR SYMBOL + # (t = Old Turkic) " used to indicate back vowel consonants : "𐰀" U10C00 # OLD TURKIC LETTER ORKHON A @@ -22374,6 +22393,7 @@ include "HangulSyllables" : "𑨇" U11A07 # ZANABAZAR SQUARE VOWEL SIGN AI : "𑨈" U11A08 # ZANABAZAR SQUARE VOWEL SIGN AU : "𑨊" U11A0A # ZANABAZAR SQUARE VOWEL LENGTH MARK + : "𑨉" U11A09 # ZANABAZAR SQUARE VOWEL SIGN REVERSED I <0> : "𑨴" U11A34 # ZANABAZAR SQUARE SIGN VIRAMA : "𑨋" U11A0B # ZANABAZAR SQUARE LETTER KA @@ -22418,8 +22438,9 @@ include "HangulSyllables" : "𑨲" U11A32 # ZANABAZAR SQUARE LETTER KSSA : "𑨳" U11A33 # ZANABAZAR SQUARE FINAL CONSONANT MARK - : "𑨴" U11A34 # ZANABAZAR SQUARE SIGN VIRAMA : "𑨵" U11A35 # ZANABAZAR SQUARE SIGN CANDRABINDU + : "𑨶" U11A36 # ZANABAZAR SQUARE SIGN CANDRABINDU WITH ORNAMENT + : "𑨷" U11A37 # ZANABAZAR SQUARE SIGN CANDRA WITH ORNAMENT : "𑨸" U11A38 # ZANABAZAR SQUARE SIGN ANUSVARA : "𑨹" U11A39 # ZANABAZAR SQUARE SIGN VISARGA @@ -22431,8 +22452,10 @@ include "HangulSyllables" : "𑩇" U11A47 # ZANABAZAR SQUARE SUBJOINER + : "𑩁" U11A41 # ZANABAZAR SQUARE MARK TSHEG : "𑩂" U11A42 # ZANABAZAR SQUARE MARK SHAD <2> : "𑩃" U11A43 # ZANABAZAR SQUARE MARK DOUBLE SHAD + : "𑩄" U11A44 # ZANABAZAR SQUARE MARK LONG TSHEG : "𑨿" U11A3F # ZANABAZAR SQUARE INITIAL HEAD MARK : "𑩀" U11A40 # ZANABAZAR SQUARE CLOSING HEAD MARK : "𑩅" U11A45 # ZANABAZAR SQUARE INITIAL DOUBLE-LINED HEAD MARK diff --git a/Compose.md b/Compose.md index 46cf960..66a92eb 100644 --- a/Compose.md +++ b/Compose.md @@ -28,10 +28,10 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (391): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (32991) +### Non-Latin script characters (33012) * **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (1992): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc -* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3203): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc +* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3208): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc * **Braille**, UEB (272): ⠗⠫⠙⠊⠞ etc * **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1787): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc * **Cyrillic**, Glagolitic & Old Permic (577): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc @@ -43,10 +43,10 @@ The file aims for memorability and consistency. As a result, a small number of t * **Kawi Scripts**: Balinese, Batak, Baybayin, Buginese, Buhid, Hanunoo, Javanese, Old Kawi, Makasar, Rejang, Sundanese & Tagbanwa (510): ᬭᬾᬤᬶᬢ᭄, ᯒᯧᯑᯪᯖ᯲, ᜍᜒᜇᜒᜆ᜔, ᨑᨙᨉᨗ, ᝍᝒᝇᝒ, ᜭᜲᜧᜲᜦ᜴, ꦫꦺꦢꦶꦠ, 𑼬𑼾𑼤𑼶𑼢, 𑻭𑻵𑻧𑻳, ꤽꥉꤴꥇꤳ, ᮛᮨᮓᮤᮒ᮪, ᝮᝲᝧᝲ, etc * **Korean** (11738): 레딧 etc * **International Morse Code** (76): ·-· · -·· -·· ·· - etc -* **Old Italic**, Carian, Ogham, Old Hungarian, Runic & Old Turkic (384): 𐌓𐌄𐌃𐌃𐌉𐌕, 𐊥𐊺𐊢𐊢𐊹𐊭, ᚏᚓᚇᚔᚈ, ⁧𐲢𐳉𐳇𐳇𐳐𐳦⁩, ᚱᛖᛞᛞᛁᛏ, ⁧𐰺𐰅𐰑𐰃𐱃⁩ etc +* **Old Italic**, Carian, Ogham, Old Hungarian, Runic & Old Turkic (395): 𐌓𐌄𐌃𐌃𐌉𐌕, 𐊥𐊺𐊢𐊢𐊹𐊭, ᚏᚓᚇᚔᚈ, ⁧𐲢𐳉𐳇𐳇𐳐𐳦⁩, ᚱᛖᛞᛞᛁᛏ, ⁧𐰺𐰅𐰑𐰃𐱃⁩ etc * **Sutton SignWriting** (667): 𝧿𝨾𝡇𝪜𝪡𝦈𝪪 etc * **Thai**, Cham, Khmer, Lao, Tai Tham & Tai Viet (505): เรดดิต, ꨣꨮꨖꨪꩅ, រេទិត, ເຣັດິຕ, ᩁᩮᨯᩥᨲ, ꪧꪵꪒꪲꪒ etc -* **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2446): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc +* **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2451): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc * **Logograms/undeciphered**: Anatolian Hieroglyphs, Cuneiform, Cypro-Minoan, Egyptian Hieroglyphs, Linear A&B (4691): 𔐅 𒆛 𒿌 𓀁 𐙞 etc * Language names (150): 🇯🇵 日本語 🇮🇷 ⁧فارس⁩ 🇻🇳 Tiếng Việt etc diff --git a/src/xcompose/__init__.py b/src/xcompose/__init__.py index 57af303..b9fb070 100644 --- a/src/xcompose/__init__.py +++ b/src/xcompose/__init__.py @@ -211,7 +211,8 @@ def add( comment: str | None = None, ): """Utility function to simplify calling add independently.""" - if not KEYSYMS: read_keysyms() + if not KEYSYMS: + read_keysyms() add_fn( args=argparse.Namespace( value=value, From d31ea3bf0147dd69803067dc3b97e42bef27ddbd Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Sun, 27 Jul 2025 19:57:12 +0100 Subject: [PATCH 15/21] More gaps --- Compose | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- Compose.md | 10 +++++----- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/Compose b/Compose index 4f6e116..9a34bb9 100644 --- a/Compose +++ b/Compose @@ -7678,7 +7678,8 @@ include "%L" : "ਬ" U0A2C # GURMUKHI LETTER BA (conflicts) : "ਭ" U0A2D # GURMUKHI LETTER BHA (conflicts) : "ਮ" U0A2E # GURMUKHI LETTER MA (conflicts) - : "ਯ" U0A2F # GURMUKHI LETTER YA (conflicts) + : "ਯ" U0A2F # GURMUKHI LETTER YA (conflicts) + : "ੵ" U0A75 # GURMUKHI SIGN YAKASH (conflicts) : "ਰ" U0A30 # GURMUKHI LETTER RA (conflicts) : "ਲ" U0A32 # GURMUKHI LETTER LA (conflicts) : "ਲ਼" U0A33 # GURMUKHI LETTER LLA (conflicts) @@ -7705,6 +7706,9 @@ include "%L" : "ਓ" U0A13 # GURMUKHI LETTER OO (conflicts) : "ਔ" U0A14 # GURMUKHI LETTER AU (conflicts) + : "ੲ" U0A72 # GURMUKHI IRI (conflicts) + : "ੳ" U0A73 # GURMUKHI URA (conflicts) + # Dependent vowels : "ਾ" U0A3E # GURMUKHI VOWEL SIGN AA (conflicts) @@ -7725,7 +7729,13 @@ include "%L" : "ਃ" U0A03 # GURMUKHI SIGN VISARGA (conflicts) : "਼" U0A3C # GURMUKHI SIGN NUKTA (conflicts) : "੍" U0A4D # GURMUKHI SIGN VIRAMA (conflicts) + : "ੑ" U0A51 # GURMUKHI SIGN UDAAT (conflicts) (HIGH TONE) : "ੰ" U0A70 # GURMUKHI TIPPI (conflicts) + : "ੱ" U0A71 # GURMUKHI ADDAK (conflicts) + : "ੱ" U0A71 # GURMUKHI ADDAK (conflicts) (GEMINATION PREFIX) + + : "ੴ" U0A74 # GURMUKHI EK ONKAR (conflicts) + : "ੴ" U0A74 # GURMUKHI EK ONKAR (conflicts) # Punctuation @@ -8266,8 +8276,15 @@ include "%L" # Punctuation + : "𐩐" U10A50 # KHAROSHTHI PUNCTUATION DOT + : "𐩑" U10A51 # KHAROSHTHI PUNCTUATION SMALL CIRCLE + : "𐩒" U10A52 # KHAROSHTHI PUNCTUATION CIRCLE + : "𐩓" U10A53 # KHAROSHTHI PUNCTUATION CRESCENT BAR + : "𐩔" U10A54 # KHAROSHTHI PUNCTUATION MANGALAM + : "𐩕" U10A55 # KHAROSHTHI PUNCTUATION LOTUS : "𐩖" U10A56 # KHAROSHTHI PUNCTUATION DANDA <2> : "𐩗" U10A57 # KHAROSHTHI PUNCTUATION DOUBLE DANDA + : "𐩘" U10A58 # KHAROSHTHI PUNCTUATION LINES # Numerals @@ -16452,6 +16469,22 @@ include "%L" : "〔" U3014 # LEFT TORTOISE SHELL BRACKET : "〕" U3015 # RIGHT TORTOISE SHELL BRACKET +# Tones used for writing Taiwanese + + <2> : "𚿰" U1AFF0 # KATAKANA LETTER MINNAN TONE-2 + <3> : "𚿱" U1AFF1 # KATAKANA LETTER MINNAN TONE-3 + <4> : "𚿲" U1AFF2 # KATAKANA LETTER MINNAN TONE-4 + <5> : "𚿳" U1AFF3 # KATAKANA LETTER MINNAN TONE-5 + <7> : "𚿵" U1AFF5 # KATAKANA LETTER MINNAN TONE-7 + <8> : "𚿶" U1AFF6 # KATAKANA LETTER MINNAN TONE-8 + <1> : "𚿷" U1AFF7 # KATAKANA LETTER MINNAN NASALIZED TONE-1 + <2> : "𚿸" U1AFF8 # KATAKANA LETTER MINNAN NASALIZED TONE-2 + <3> : "𚿹" U1AFF9 # KATAKANA LETTER MINNAN NASALIZED TONE-3 + <4> : "𚿺" U1AFFA # KATAKANA LETTER MINNAN NASALIZED TONE-4 + <5> : "𚿻" U1AFFB # KATAKANA LETTER MINNAN NASALIZED TONE-5 + <7> : "𚿽" U1AFFD # KATAKANA LETTER MINNAN NASALIZED TONE-7 + <8> : "𚿾" U1AFFE # KATAKANA LETTER MINNAN NASALIZED TONE-8 + # (f = fullwidth forms) : "!" UFF01 # FULLWIDTH EXCLAMATION MARK @@ -20865,6 +20898,12 @@ include "HangulSyllables"

: "ꡫ" UA86B # PHAGS-PA LETTER DDA

: "ꡬ" UA86C # PHAGS-PA LETTER NNA +

<2> : "ꡭ" UA86D # PHAGS-PA LETTER ALTERNATE YA +

<2> : "ꡮ" UA86E # PHAGS-PA LETTER VOICELESS SHA +

<2> : "ꡯ" UA86F # PHAGS-PA LETTER VOICED HA +

<2> : "ꡰ" UA870 # PHAGS-PA LETTER ASPIRATED FA +

<2> : "ꡲ" UA872 # PHAGS-PA SUPERFIXED LETTER RA +

: "ꡝ" UA85D # PHAGS-PA LETTER A

: "ꡞ" UA85E # PHAGS-PA LETTER I

: "ꡟ" UA85F # PHAGS-PA LETTER U @@ -20880,6 +20919,8 @@ include "HangulSyllables"

: "ꡳ" UA873 # PHAGS-PA LETTER CANDRABINDU

: "꡴" UA874 # PHAGS-PA SINGLE HEAD MARK

<2> : "꡵" UA875 # PHAGS-PA DOUBLE HEAD MARK +

: "꡶" UA876 # PHAGS-PA MARK SHAD +

<2> : "꡷" UA877 # PHAGS-PA MARK DOUBLE SHAD # (s = Soyombo) @@ -20965,6 +21006,12 @@ include "HangulSyllables" : "𑪚" U11A9A # SOYOMBO MARK TSHEG : "𑪛" U11A9B # SOYOMBO MARK SHAD <2> : "𑪜" U11A9C # SOYOMBO MARK DOUBLE SHAD + : "𑪝" U11A9D # SOYOMBO MARK PLUTA + <3> : "𑪞" U11A9E # SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME + <2> : "𑪟" U11A9F # SOYOMBO HEAD MARK WITH MOON AND SUN AND FLAME + <1> : "𑪠" U11AA0 # SOYOMBO HEAD MARK WITH MOON AND SUN + <1> : "𑪡" U11AA1 # SOYOMBO TERMINAL MARK-1 + <2> : "𑪢" U11AA2 # SOYOMBO TERMINAL MARK-2 # (t = Tibetan) follows ISO 15919 like other Brahmic scripts diff --git a/Compose.md b/Compose.md index 66a92eb..e4db13c 100644 --- a/Compose.md +++ b/Compose.md @@ -28,10 +28,10 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (391): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (33012) +### Non-Latin script characters (33051) * **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (1992): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc -* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3208): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc +* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3221): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc * **Braille**, UEB (272): ⠗⠫⠙⠊⠞ etc * **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1787): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc * **Cyrillic**, Glagolitic & Old Permic (577): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc @@ -39,18 +39,18 @@ The file aims for memorability and consistency. As a result, a small number of t * **Georgian** & Caucasian Albanian (208): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc * **Greek**, Coptic, Cypriot, Elbasan, Gothic, Linear B, Lycian, Todhri & Vithkuqi (1144): Ρέντιτ, Ⲣⲉⲇⲇⲓⲧ, ⁧𐠤𐠯𐠮⁩, 𐔙𐔇𐔄𐔍𐔝, 𐍂𐌴𐌳𐌳𐌹𐍄, 𐀩𐀇𐀵, 𐊕𐊁𐊅𐊅𐊆𐊗, 𐗝𐗉𐗆𐗒𐗢, 𐖊𐖞𐖜𐖜𐖥𐖵 etc * **Hebrew**, Aramaic, Elymaic, Lydian, Palmyrene, Phoenician, Samaritan & Ugaritic (350): ⁧רֶדִיט⁩, ⁧𐡓𐡃𐡉𐡕⁩, ⁧𐿳𐿣𐿵⁩, ⁧𐤭𐤤𐤣𐤣𐤦𐤯⁩, ⁧𐡴𐡣𐡩𐡶⁩, ⁧𐤓𐤃𐤕⁩, ⁧ࠓࠝࠃࠪࠕ⁩, 𐎗𐎄𐎚 etc -* **Japanese** (mostly kana) (866): 「レディット」 etc +* **Japanese** (mostly kana) (879): 「レディット」 etc * **Kawi Scripts**: Balinese, Batak, Baybayin, Buginese, Buhid, Hanunoo, Javanese, Old Kawi, Makasar, Rejang, Sundanese & Tagbanwa (510): ᬭᬾᬤᬶᬢ᭄, ᯒᯧᯑᯪᯖ᯲, ᜍᜒᜇᜒᜆ᜔, ᨑᨙᨉᨗ, ᝍᝒᝇᝒ, ᜭᜲᜧᜲᜦ᜴, ꦫꦺꦢꦶꦠ, 𑼬𑼾𑼤𑼶𑼢, 𑻭𑻵𑻧𑻳, ꤽꥉꤴꥇꤳ, ᮛᮨᮓᮤᮒ᮪, ᝮᝲᝧᝲ, etc * **Korean** (11738): 레딧 etc * **International Morse Code** (76): ·-· · -·· -·· ·· - etc * **Old Italic**, Carian, Ogham, Old Hungarian, Runic & Old Turkic (395): 𐌓𐌄𐌃𐌃𐌉𐌕, 𐊥𐊺𐊢𐊢𐊹𐊭, ᚏᚓᚇᚔᚈ, ⁧𐲢𐳉𐳇𐳇𐳐𐳦⁩, ᚱᛖᛞᛞᛁᛏ, ⁧𐰺𐰅𐰑𐰃𐱃⁩ etc * **Sutton SignWriting** (667): 𝧿𝨾𝡇𝪜𝪡𝦈𝪪 etc * **Thai**, Cham, Khmer, Lao, Tai Tham & Tai Viet (505): เรดดิต, ꨣꨮꨖꨪꩅ, រេទិត, ເຣັດິຕ, ᩁᩮᨯᩥᨲ, ꪧꪵꪒꪲꪒ etc -* **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2451): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc +* **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2464): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc * **Logograms/undeciphered**: Anatolian Hieroglyphs, Cuneiform, Cypro-Minoan, Egyptian Hieroglyphs, Linear A&B (4691): 𔐅 𒆛 𒿌 𓀁 𐙞 etc * Language names (150): 🇯🇵 日本語 🇮🇷 ⁧فارس⁩ 🇻🇳 Tiếng Việt etc -### Symbols (5049) +### Symbols (5051) * **Emoji** (1516): 😉 👌🏾 🇳🇿 🫡 👉🏼 💔 🤣 🤦🏽‍♀️ 🏳️‍⚧️ ✨ etc * **Sprites** (151): 🗫 🯅 ㋡ etc * **Math and science** (517): ρ(∂v⃗/∂t + (v⃗·∇)v) ∫πeⁱᶿ dθ etc. From 786c3c764ac426a418e694f3c2cacf801a192be5 Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Sun, 27 Jul 2025 20:24:58 +0100 Subject: [PATCH 16/21] More gaps --- Compose | 17 +++++++++++++++++ Compose.md | 8 ++++---- README.md | 2 +- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/Compose b/Compose index 9a34bb9..fdd8425 100644 --- a/Compose +++ b/Compose @@ -17397,6 +17397,7 @@ include "HangulSyllables" # Dependent vowels : "𑼴" U11F34 # KAWI VOWEL SIGN AA + <2> : "𑼵" U11F35 # KAWI VOWEL SIGN ALTERNATE AA : "𑼶" U11F36 # KAWI VOWEL SIGN I : "𑼷" U11F37 # KAWI VOWEL SIGN II : "𑼸" U11F38 # KAWI VOWEL SIGN U @@ -17414,12 +17415,23 @@ include "HangulSyllables" : "𑽚" U11F5A # KAWI SIGN NUKTA <0> : "𑽁" U11F41 # KAWI SIGN KILLER : "𑽂" U11F42 # KAWI CONJOINER + : "𑼂" U11F02 # KAWI SIGN REPHA # Punctuation : "𑽃" U11F43 # KAWI DANDA <2> : "𑽄" U11F44 # KAWI DOUBLE DANDA : "𑽅" U11F45 # KAWI PUNCTUATION SECTION MARKER + <2> : "𑽆" U11F46 # KAWI PUNCTUATION ALTERNATE SECTION MARKER + : "𑽇" U11F47 # KAWI PUNCTUATION FLOWER + : "𑽈" U11F48 # KAWI PUNCTUATION SPACE FILLER + <1> : "𑽉" U11F49 # KAWI PUNCTUATION DOT + <2> : "𑽊" U11F4A # KAWI PUNCTUATION DOUBLE DOT + <3> : "𑽋" U11F4B # KAWI PUNCTUATION TRIPLE DOT + : "𑽌" U11F4C # KAWI PUNCTUATION CIRCLE + : "𑽍" U11F4D # KAWI PUNCTUATION FILLED CIRCLE + : "𑽎" U11F4E # KAWI PUNCTUATION SPIRAL + : "𑽏" U11F4F # KAWI PUNCTUATION CLOSING SPIRAL # Numerals @@ -23578,12 +23590,17 @@ include "HangulSyllables" : "ߝ" U07DD # NKO LETTER FA : "ߞ" U07DE # NKO LETTER KA : "ߟ" U07DF # NKO LETTER LA + : "ߠ" U07E0 # NKO LETTER NA WOLOSO : "ߡ" U07E1 # NKO LETTER MA : "ߢ" U07E2 # NKO LETTER NYA : "ߣ" U07E3 # NKO LETTER NA : "ߤ" U07E4 # NKO LETTER HA : "ߥ" U07E5 # NKO LETTER WA : "ߦ" U07E6 # NKO LETTER YA + : "ߧ" U07E7 # NKO LETTER NYA WOLOSO + <2> : "ߨ" U07E8 # NKO LETTER JONA JA + <2> : "ߩ" U07E9 # NKO LETTER JONA CHA + <2> : "ߪ" U07EA # NKO LETTER JONA RA : "߫" U07EB # NKO COMBINING SHORT HIGH TONE : "߬" U07EC # NKO COMBINING SHORT LOW TONE diff --git a/Compose.md b/Compose.md index e4db13c..c0e62ac 100644 --- a/Compose.md +++ b/Compose.md @@ -3,7 +3,7 @@ > Link: https://github.com/Udzu/xcompose/blob/master/Compose -This repo also hosts my personal .XCompose file, which contains over 42 000 memorable compose key sequences. To use them, copy all or parts of the config into your ~/.XCompose file. You can browse the sequences either directly in the config or using the [xcompose](https://github.com/Udzu/xcompose/) utility. Note that the shortcuts for Hangul syllables and logograms are in separate files. +This repo also hosts my personal .XCompose file, which contains over 43 000 memorable compose key sequences. To use them, copy all or parts of the config into your ~/.XCompose file. You can browse the sequences either directly in the config or using the [xcompose](https://github.com/Udzu/xcompose/) utility. Note that the shortcuts for Hangul syllables and logograms are in separate files. The file aims for memorability and consistency. As a result, a small number of the rules here conflict with the default system rules, with workarounds given in the comments (e.g. \'s usefulness for subscripting often interferes with its system use for macrons, for which \ works equally well as an alternative). @@ -28,19 +28,19 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (391): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (33051) +### Non-Latin script characters (33068) * **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (1992): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc * **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3221): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc * **Braille**, UEB (272): ⠗⠫⠙⠊⠞ etc * **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1787): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc * **Cyrillic**, Glagolitic & Old Permic (577): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc -* **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1330): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc +* **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1335): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc * **Georgian** & Caucasian Albanian (208): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc * **Greek**, Coptic, Cypriot, Elbasan, Gothic, Linear B, Lycian, Todhri & Vithkuqi (1144): Ρέντιτ, Ⲣⲉⲇⲇⲓⲧ, ⁧𐠤𐠯𐠮⁩, 𐔙𐔇𐔄𐔍𐔝, 𐍂𐌴𐌳𐌳𐌹𐍄, 𐀩𐀇𐀵, 𐊕𐊁𐊅𐊅𐊆𐊗, 𐗝𐗉𐗆𐗒𐗢, 𐖊𐖞𐖜𐖜𐖥𐖵 etc * **Hebrew**, Aramaic, Elymaic, Lydian, Palmyrene, Phoenician, Samaritan & Ugaritic (350): ⁧רֶדִיט⁩, ⁧𐡓𐡃𐡉𐡕⁩, ⁧𐿳𐿣𐿵⁩, ⁧𐤭𐤤𐤣𐤣𐤦𐤯⁩, ⁧𐡴𐡣𐡩𐡶⁩, ⁧𐤓𐤃𐤕⁩, ⁧ࠓࠝࠃࠪࠕ⁩, 𐎗𐎄𐎚 etc * **Japanese** (mostly kana) (879): 「レディット」 etc -* **Kawi Scripts**: Balinese, Batak, Baybayin, Buginese, Buhid, Hanunoo, Javanese, Old Kawi, Makasar, Rejang, Sundanese & Tagbanwa (510): ᬭᬾᬤᬶᬢ᭄, ᯒᯧᯑᯪᯖ᯲, ᜍᜒᜇᜒᜆ᜔, ᨑᨙᨉᨗ, ᝍᝒᝇᝒ, ᜭᜲᜧᜲᜦ᜴, ꦫꦺꦢꦶꦠ, 𑼬𑼾𑼤𑼶𑼢, 𑻭𑻵𑻧𑻳, ꤽꥉꤴꥇꤳ, ᮛᮨᮓᮤᮒ᮪, ᝮᝲᝧᝲ, etc +* **Kawi Scripts**: Balinese, Batak, Baybayin, Buginese, Buhid, Hanunoo, Javanese, Old Kawi, Makasar, Rejang, Sundanese & Tagbanwa (522): ᬭᬾᬤᬶᬢ᭄, ᯒᯧᯑᯪᯖ᯲, ᜍᜒᜇᜒᜆ᜔, ᨑᨙᨉᨗ, ᝍᝒᝇᝒ, ᜭᜲᜧᜲᜦ᜴, ꦫꦺꦢꦶꦠ, 𑼬𑼾𑼤𑼶𑼢, 𑻭𑻵𑻧𑻳, ꤽꥉꤴꥇꤳ, ᮛᮨᮓᮤᮒ᮪, ᝮᝲᝧᝲ, etc * **Korean** (11738): 레딧 etc * **International Morse Code** (76): ·-· · -·· -·· ·· - etc * **Old Italic**, Carian, Ogham, Old Hungarian, Runic & Old Turkic (395): 𐌓𐌄𐌃𐌃𐌉𐌕, 𐊥𐊺𐊢𐊢𐊹𐊭, ᚏᚓᚇᚔᚈ, ⁧𐲢𐳉𐳇𐳇𐳐𐳦⁩, ᚱᛖᛞᛞᛁᛏ, ⁧𐰺𐰅𐰑𐰃𐱃⁩ etc diff --git a/README.md b/README.md index 006def0..2c2285b 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Simple utilities to help configure X11 compose key sequences. -> The repo also hosts a very extensive [.XCompose file](Compose) with over 42 000 new sequences; [more details here](Compose.md). +> The repo also hosts a very extensive [.XCompose file](Compose) with over 43 000 new sequences; [more details here](Compose.md). ## Installation From 12851ff661b0eb2229bcffe0a93c07fe4110fa40 Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Mon, 28 Jul 2025 14:25:04 +0100 Subject: [PATCH 17/21] More gaps --- Compose | 102 ++++++++++++++++++++++++++++++++++++++++++++++------- Compose.md | 10 +++--- 2 files changed, 94 insertions(+), 18 deletions(-) diff --git a/Compose b/Compose index fdd8425..005befa 100644 --- a/Compose +++ b/Compose @@ -6023,7 +6023,9 @@ include "%L" : "ܑ" U0711 # SYRIAC LETTER SUPERSCRIPT ALAPH : "ܒ" U0712 # SYRIAC LETTER BETH : "ܓ" U0713 # SYRIAC LETTER GAMAL + <2> : "ܔ" U0714 # SYRIAC LETTER GAMAL GARSHUNI : "ܕ" U0715 # SYRIAC LETTER DALATH + <2> : "ܖ" U0716 # SYRIAC LETTER DOTLESS DALATH RISH : "ܗ" U0717 # SYRIAC LETTER HE : "ܘ" U0718 # SYRIAC LETTER WAW : "ܙ" U0719 # SYRIAC LETTER ZAIN @@ -6033,6 +6035,8 @@ include "%L" : "ܚ" U071A # SYRIAC LETTER HETH (ḥ) : "ܛ" U071B # SYRIAC LETTER TETH (ṭ) : "ܛ" U071B # SYRIAC LETTER TETH + <2> : "ܜ" U071C # SYRIAC LETTER TETH GARSHUNI (ṭ) + <2> : "ܜ" U071C # SYRIAC LETTER TETH GARSHUNI : "ܝ" U071D # SYRIAC LETTER YUDH : "ܞ" U071E # SYRIAC LETTER YUDH HE : "ܟ" U071F # SYRIAC LETTER KAPH @@ -6048,6 +6052,7 @@ include "%L" : "ܨ" U0728 # SYRIAC LETTER SADHE (ṣ) : "ܩ" U0729 # SYRIAC LETTER QAPH : "ܪ" U072A # SYRIAC LETTER RISH + <2> : "ܖ" U0716 # SYRIAC LETTER DOTLESS DALATH RISH : "ܫ" U072B # SYRIAC LETTER SHIN : "ܬ" U072C # SYRIAC LETTER TAW : "ܭ" U072D # SYRIAC LETTER PERSIAN BHETH @@ -6102,6 +6107,20 @@ include "%L" : "܍" U070D # SYRIAC HARKLEAN ASTERISCUS : "܏" U070F # SYRIAC ABBREVIATION MARK +# Malayalam letters (using equivalent abbreviations) + + : "ࡠ" U0860 # SYRIAC LETTER MALAYALAM NGA + : "ࡡ" U0861 # SYRIAC LETTER MALAYALAM JA + : "ࡢ" U0862 # SYRIAC LETTER MALAYALAM NYA + : "ࡣ" U0863 # SYRIAC LETTER MALAYALAM TTA + : "ࡤ" U0864 # SYRIAC LETTER MALAYALAM NNA + : "ࡥ" U0865 # SYRIAC LETTER MALAYALAM NNNA + : "ࡦ" U0866 # SYRIAC LETTER MALAYALAM BHA + : "ࡧ" U0867 # SYRIAC LETTER MALAYALAM RA + : "ࡨ" U0868 # SYRIAC LETTER MALAYALAM LLA + : "ࡩ" U0869 # SYRIAC LETTER MALAYALAM LLLA + : "ࡪ" U086A # SYRIAC LETTER MALAYALAM SSA + # (t = Neo-Tifinagh) : "ⴰ" U2D30 # TIFINAGH LETTER YA @@ -7649,6 +7668,16 @@ include "%L" <8> : "૮" U0AEE # GUJARATI DIGIT EIGHT (conflicts) <9> : "૯" U0AEF # GUJARATI DIGIT NINE (conflicts) +# Arabic transliteration + + <0> : "ૺ" U0AFA # GUJARATI SIGN SUKUN (conflicts) + : "ૻ" U0AFB # GUJARATI SIGN SHADDA (conflicts) + : "ૻ" U0AFB # GUJARATI SIGN SHADDA (conflicts) + : "ૼ" U0AFC # GUJARATI SIGN MADDAH (conflicts) + : "૽" U0AFD # GUJARATI SIGN THREE-DOT NUKTA ABOVE (conflicts) + : "૾" U0AFE # GUJARATI SIGN CIRCLE NUKTA ABOVE (conflicts) + : "૿" U0AFF # GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE (conflicts) + # (G = Gurmukhi) # Consonants @@ -8056,7 +8085,7 @@ include "%L" : "ಸ" U0CB8 # KANNADA LETTER SA : "ಹ" U0CB9 # KANNADA LETTER HA : "ೞ" U0CDE # KANNADA LETTER FA - <2> : "ೝ" U0CDD # KANNADA LETTER NAKAARA POLLU + <0> : "ೝ" U0CDD # KANNADA LETTER NAKAARA POLLU # Independent vowels @@ -8760,12 +8789,27 @@ include "%L" : "ഹ" U0D39 # MALAYALAM LETTER HA : "ഺ" U0D3A # MALAYALAM LETTER TTTA + : "ൎ" U0D4E # MALAYALAM LETTER DOT REPH + +# Chillu consonants + + <0> : "ൔ" U0D54 # MALAYALAM LETTER CHILLU M + <0> : "ൕ" U0D55 # MALAYALAM LETTER CHILLU Y + <0> : "ൖ" U0D56 # MALAYALAM LETTER CHILLU LLL + <0> : "ൺ" U0D7A # MALAYALAM LETTER CHILLU NN + <0> : "ൻ" U0D7B # MALAYALAM LETTER CHILLU N + <0> : "ർ" U0D7C # MALAYALAM LETTER CHILLU RR + <0> : "ൽ" U0D7D # MALAYALAM LETTER CHILLU L + <0> : "ൾ" U0D7E # MALAYALAM LETTER CHILLU LL + <0> : "ൿ" U0D7F # MALAYALAM LETTER CHILLU K + # Independent vowels : "അ" U0D05 # MALAYALAM LETTER A : "ആ" U0D06 # MALAYALAM LETTER AA : "ഇ" U0D07 # MALAYALAM LETTER I : "ഈ" U0D08 # MALAYALAM LETTER II + <2> : "ൟ" U0D5F # MALAYALAM LETTER ARCHAIC II : "ഉ" U0D09 # MALAYALAM LETTER U : "ഊ" U0D0A # MALAYALAM LETTER UU : "ഋ" U0D0B # MALAYALAM LETTER VOCALIC R @@ -8807,9 +8851,12 @@ include "%L" : "്" U0D4D # MALAYALAM SIGN VIRAMA : "഻" U0D3B # MALAYALAM SIGN VERTICAL BAR VIRAMA <0> <0> : "഼" U0D3C # MALAYALAM SIGN CIRCULAR VIRAMA + : "ൗ" U0D57 # MALAYALAM AU LENGTH MARK : "ഄ" U0D04 # MALAYALAM LETTER VEDIC ANUSVARA : "ഽ" U0D3D # MALAYALAM SIGN AVAGRAHA +

: "൏" U0D4F # MALAYALAM SIGN PARA + : "൹" U0D79 # MALAYALAM DATE MARK # Numerals @@ -10214,6 +10261,8 @@ include "%L" : "హ" U0C39 # TELUGU LETTER HA : "ౘ" U0C58 # TELUGU LETTER TSA : "ౙ" U0C59 # TELUGU LETTER DZA + : "ౚ" U0C5A # TELUGU LETTER RRRA + <0> : "ౝ" U0C5D # TELUGU LETTER NAKAARA POLLU # Independent vowels @@ -10262,8 +10311,11 @@ include "%L" : "ఄ" U0C04 # TELUGU SIGN COMBINING ANUSVARA ABOVE : "఼" U0C3C # TELUGU SIGN NUKTA : "్" U0C4D # TELUGU SIGN VIRAMA + : "ౕ" U0C55 # TELUGU LENGTH MARK + : "ౖ" U0C56 # TELUGU AI LENGTH MARK : "ఽ" U0C3D # TELUGU SIGN AVAGRAHA + : "౷" U0C77 # TELUGU SIGN SIDDHAM # Numerals @@ -10278,6 +10330,15 @@ include "%L" <8> : "౮" U0C6E # TELUGU DIGIT EIGHT <9> : "౯" U0C6F # TELUGU DIGIT NINE + <0> <4> : "౸" U0C78 # TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR + <1> <4> : "౹" U0C79 # TELUGU FRACTION DIGIT ONE FOR ODD POWERS OF FOUR + <2> <4> : "౺" U0C7A # TELUGU FRACTION DIGIT TWO FOR ODD POWERS OF FOUR + <3> <4> : "౻" U0C7B # TELUGU FRACTION DIGIT THREE FOR ODD POWERS OF FOUR + <1> <1> <6> : "౼" U0C7C # TELUGU FRACTION DIGIT ONE FOR EVEN POWERS OF FOUR + <2> <1> <6> : "౽" U0C7D # TELUGU FRACTION DIGIT TWO FOR EVEN POWERS OF FOUR + <3> <1> <6> : "౾" U0C7E # TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR + : "౿" U0C7F # TELUGU SIGN TUUMU + # (TK = Takri) # Consonants @@ -20752,6 +20813,7 @@ include "HangulSyllables"

: "𖼁" U16F01 # MIAO LETTER BA

: "𖼃" U16F03 # MIAO LETTER PLA

: "𖼄" U16F04 # MIAO LETTER MA +

<2> : "𖼆" U16F06 # MIAO LETTER ARCHAIC MA

: "𖼅" U16F05 # MIAO LETTER MHA

: "𖼇" U16F07 # MIAO LETTER FA

: "𖼈" U16F08 # MIAO LETTER VA @@ -20761,6 +20823,7 @@ include "HangulSyllables"

: "𖼎" U16F0E # MIAO LETTER TTA

: "𖼏" U16F0F # MIAO LETTER DDA

: "𖼐" U16F10 # MIAO LETTER NA +

<2> : "𖼓" U16F13 # MIAO LETTER ARCHAIC NA

: "𖼑" U16F11 # MIAO LETTER NHA

: "𖼔" U16F14 # MIAO LETTER NNA

: "𖼕" U16F15 # MIAO LETTER NNHA @@ -20777,6 +20840,7 @@ include "HangulSyllables"

: "𖼡" U16F21 # MIAO LETTER QA

: "𖼢" U16F22 # MIAO LETTER QGA

: "𖼣" U16F23 # MIAO LETTER NGA +

<2> : "𖼥" U16F25 # MIAO LETTER ARCHAIC NGA

: "𖼤" U16F24 # MIAO LETTER NGHA

: "𖼦" U16F26 # MIAO LETTER HA

: "𖼧" U16F27 # MIAO LETTER XA @@ -20858,18 +20922,25 @@ include "HangulSyllables"

: "𖾆" U16F86 # MIAO VOWEL SIGN IONG

: "𖾇" U16F87 # MIAO VOWEL SIGN UI -

: "𖾏" U16F8F # MIAO TONE RIGHT -

: "𖾐" U16F90 # MIAO TONE TOP RIGHT -

: "𖾑" U16F91 # MIAO TONE ABOVE -

: "𖾒" U16F92 # MIAO TONE BELOW - -

<2> : "𖾓" U16F93 # MIAO LETTER TONE-2 -

<3> : "𖾔" U16F94 # MIAO LETTER TONE-3 -

<4> : "𖾕" U16F95 # MIAO LETTER TONE-4 -

<5> : "𖾖" U16F96 # MIAO LETTER TONE-5 -

<6> : "𖾗" U16F97 # MIAO LETTER TONE-6 -

<7> : "𖾘" U16F98 # MIAO LETTER TONE-7 -

<8> : "𖾙" U16F99 # MIAO LETTER TONE-8 +

: "𖾏" U16F8F # MIAO TONE RIGHT +

: "𖾐" U16F90 # MIAO TONE TOP RIGHT +

: "𖾑" U16F91 # MIAO TONE ABOVE +

: "𖾒" U16F92 # MIAO TONE BELOW + +

<2> : "𖾓" U16F93 # MIAO LETTER TONE-2 +

<3> : "𖾔" U16F94 # MIAO LETTER TONE-3 +

<4> : "𖾕" U16F95 # MIAO LETTER TONE-4 +

<5> : "𖾖" U16F96 # MIAO LETTER TONE-5 +

<6> : "𖾗" U16F97 # MIAO LETTER TONE-6 +

<7> : "𖾘" U16F98 # MIAO LETTER TONE-7 +

<8> : "𖾙" U16F99 # MIAO LETTER TONE-8 + +

<1> : "𖾚" U16F9A # MIAO LETTER REFORMED TONE-1 +

<2> : "𖾛" U16F9B # MIAO LETTER REFORMED TONE-2 +

<4> : "𖾜" U16F9C # MIAO LETTER REFORMED TONE-4 +

<5> : "𖾝" U16F9D # MIAO LETTER REFORMED TONE-5 +

<6> : "𖾞" U16F9E # MIAO LETTER REFORMED TONE-6 +

<8> : "𖾟" U16F9F # MIAO LETTER REFORMED TONE-8 # (P = ʼPhags-Pa) follows ISO 15919 like other Brahmic scripts @@ -23619,6 +23690,11 @@ include "HangulSyllables"

: "߷" U07F7 # NKO SYMBOL GBAKURUNEN : "؟" U061F # ARABIC QUESTION MARK : "؛" U061B # ARABIC SEMICOLON + <9> <2> : "߶" U07F6 # NKO SYMBOL OO DENNEN + : "ߺ" U07FA # NKO LAJANYALAN + : "߽" U07FD # NKO DANTAYALAN + : "߾" U07FE # NKO DOROME SIGN (đ) + : "߿" U07FF # NKO TAMAN SIGN (ŧ) <0> : "߀" U07C0 # NKO DIGIT ZERO <1> : "߁" U07C1 # NKO DIGIT ONE diff --git a/Compose.md b/Compose.md index c0e62ac..125b8b7 100644 --- a/Compose.md +++ b/Compose.md @@ -28,14 +28,14 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (391): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (33068) -* **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (1992): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc +### Non-Latin script characters (33129) +* **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (2006): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc -* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3221): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc +* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3254): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc * **Braille**, UEB (272): ⠗⠫⠙⠊⠞ etc * **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1787): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc * **Cyrillic**, Glagolitic & Old Permic (577): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc -* **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1335): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc +* **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1340): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc * **Georgian** & Caucasian Albanian (208): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc * **Greek**, Coptic, Cypriot, Elbasan, Gothic, Linear B, Lycian, Todhri & Vithkuqi (1144): Ρέντιτ, Ⲣⲉⲇⲇⲓⲧ, ⁧𐠤𐠯𐠮⁩, 𐔙𐔇𐔄𐔍𐔝, 𐍂𐌴𐌳𐌳𐌹𐍄, 𐀩𐀇𐀵, 𐊕𐊁𐊅𐊅𐊆𐊗, 𐗝𐗉𐗆𐗒𐗢, 𐖊𐖞𐖜𐖜𐖥𐖵 etc * **Hebrew**, Aramaic, Elymaic, Lydian, Palmyrene, Phoenician, Samaritan & Ugaritic (350): ⁧רֶדִיט⁩, ⁧𐡓𐡃𐡉𐡕⁩, ⁧𐿳𐿣𐿵⁩, ⁧𐤭𐤤𐤣𐤣𐤦𐤯⁩, ⁧𐡴𐡣𐡩𐡶⁩, ⁧𐤓𐤃𐤕⁩, ⁧ࠓࠝࠃࠪࠕ⁩, 𐎗𐎄𐎚 etc @@ -46,7 +46,7 @@ The file aims for memorability and consistency. As a result, a small number of t * **Old Italic**, Carian, Ogham, Old Hungarian, Runic & Old Turkic (395): 𐌓𐌄𐌃𐌃𐌉𐌕, 𐊥𐊺𐊢𐊢𐊹𐊭, ᚏᚓᚇᚔᚈ, ⁧𐲢𐳉𐳇𐳇𐳐𐳦⁩, ᚱᛖᛞᛞᛁᛏ, ⁧𐰺𐰅𐰑𐰃𐱃⁩ etc * **Sutton SignWriting** (667): 𝧿𝨾𝡇𝪜𝪡𝦈𝪪 etc * **Thai**, Cham, Khmer, Lao, Tai Tham & Tai Viet (505): เรดดิต, ꨣꨮꨖꨪꩅ, រេទិត, ເຣັດິຕ, ᩁᩮᨯᩥᨲ, ꪧꪵꪒꪲꪒ etc -* **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2464): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc +* **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2473): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc * **Logograms/undeciphered**: Anatolian Hieroglyphs, Cuneiform, Cypro-Minoan, Egyptian Hieroglyphs, Linear A&B (4691): 𔐅 𒆛 𒿌 𓀁 𐙞 etc * Language names (150): 🇯🇵 日本語 🇮🇷 ⁧فارس⁩ 🇻🇳 Tiếng Việt etc From 9329998890b3ef58700b5f648a017b541dd5ddc7 Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Tue, 29 Jul 2025 11:24:54 +0100 Subject: [PATCH 18/21] More gaps --- Compose | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++---- Compose.md | 8 +++---- 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/Compose b/Compose index 005befa..3460df1 100644 --- a/Compose +++ b/Compose @@ -9887,6 +9887,9 @@ include "%L" : "𑆱" U111B1 # SHARADA LETTER SA : "𑆲" U111B2 # SHARADA LETTER HA + : "𑇂" U111C2 # SHARADA SIGN JIHVAMULIYA + : "𑇃" U111C3 # SHARADA SIGN UPADHMANIYA + # Independent vowels : "𑆃" U11183 # SHARADA LETTER A @@ -9930,7 +9933,10 @@ include "%L" : "𑆂" U11182 # SHARADA SIGN VISARGA : "𑇁" U111C1 # SHARADA SIGN AVAGRAHA : "𑇄" U111C4 # SHARADA OM + : "𑇉" U111C9 # SHARADA SANDHI MARK : "𑇊" U111CA # SHARADA SIGN NUKTA + : "𑇋" U111CB # SHARADA VOWEL MODIFIER MARK + : "𑇌" U111CC # SHARADA EXTRA SHORT VOWEL MARK : "𑇏" U111CF # SHARADA SIGN INVERTED CANDRABINDU # Punctuation @@ -9939,6 +9945,14 @@ include "%L" <2> : "𑇆" U111C6 # SHARADA DOUBLE DANDA : "𑇇" U111C7 # SHARADA ABBREVIATION SIGN : "𑇈" U111C8 # SHARADA SEPARATOR + : "𑇍" U111CD # SHARADA SUTRA MARK + : "𑇚" U111DA # SHARADA EKAM + : "𑇛" U111DB # SHARADA SIGN SIDDHAM + : "𑇜" U111DC # SHARADA HEADSTROKE + : "𑇜" U111DC # SHARADA HEADSTROKE + : "𑇝" U111DD # SHARADA CONTINUATION SIGN + : "𑇞" U111DE # SHARADA SECTION MARK-1 + <2> : "𑇟" U111DF # SHARADA SECTION MARK-2 # Numerals @@ -11122,8 +11136,10 @@ include "%L" : "ꙅ" UA645 # CYRILLIC SMALL LETTER REVERSED DZE : "Ꚃ" UA682 # CYRILLIC CAPITAL LETTER DZWE : "ꚃ" UA683 # CYRILLIC SMALL LETTER DZWE - : "Ꚉ" UA688 # CYRILLIC CAPITAL LETTER DZZE - : "ꚉ" UA689 # CYRILLIC SMALL LETTER DZZE + : "Ꚉ" UA688 # CYRILLIC CAPITAL LETTER DZZE + : "ꚉ" UA689 # CYRILLIC SMALL LETTER DZZE + : "Ԫ" U052A # CYRILLIC CAPITAL LETTER DZZHE + : "ԫ" U052B # CYRILLIC SMALL LETTER DZZHE : "Ә" U04D8 # CYRILLIC CAPITAL LETTER SCHWA : "ә" U04D9 # CYRILLIC SMALL LETTER SCHWA : "Һ" U04BA # CYRILLIC CAPITAL LETTER SHHA (conflict) @@ -11138,10 +11154,18 @@ include "%L" : "Ԕ" U0514 # CYRILLIC CAPITAL LETTER LHA : "ԕ" U0515 # CYRILLIC SMALL LETTER LHA : "ԕ" U0515 # CYRILLIC SMALL LETTER LHA + : "Ҥ" U04A4 # CYRILLIC CAPITAL LIGATURE EN GHE + : "ҥ" U04A5 # CYRILLIC SMALL LIGATURE EN GHE : "Ԗ" U0516 # CYRILLIC CAPITAL LETTER RHA : "Ԗ" U0516 # CYRILLIC CAPITAL LETTER RHA : "ԗ" U0517 # CYRILLIC SMALL LETTER RHA : "ԗ" U0517 # CYRILLIC SMALL LETTER RHA + : "Ꚗ" UA696 # CYRILLIC CAPITAL LETTER SHWE + : "ꚗ" UA697 # CYRILLIC SMALL LETTER SHWE + : "Ҵ" U04B4 # CYRILLIC CAPITAL LIGATURE TE TSE + : "ҵ" U04B5 # CYRILLIC SMALL LIGATURE TE TSE + : "Ꚓ" UA692 # CYRILLIC CAPITAL LETTER TCHE + : "ꚓ" UA693 # CYRILLIC SMALL LETTER TCHE : "Ᲊ" U1C89 # CYRILLIC CAPITAL LETTER TJE : "ᲊ" U1C8A # CYRILLIC SMALL LETTER TJE : "Ꙡ" UA660 # CYRILLIC CAPITAL LETTER REVERSED TSE @@ -11154,6 +11178,23 @@ include "%L" : "ꙕ" UA655 # CYRILLIC SMALL LETTER REVERSED YU (conflicts) : "Ԑ" U0510 # CYRILLIC CAPITAL LETTER REVERSED ZE (conflicts) : "ԑ" U0511 # CYRILLIC SMALL LETTER REVERSED ZE (conflicts) + : "Ꚅ" UA684 # CYRILLIC CAPITAL LETTER ZHWE + : "ꚅ" UA685 # CYRILLIC SMALL LETTER ZHWE + + : "Ԃ" U0502 # CYRILLIC CAPITAL LETTER KOMI DJE + : "ԃ" U0503 # CYRILLIC SMALL LETTER KOMI DJE + : "Ԅ" U0504 # CYRILLIC CAPITAL LETTER KOMI ZJE + : "ԅ" U0505 # CYRILLIC SMALL LETTER KOMI ZJE + : "Ԇ" U0506 # CYRILLIC CAPITAL LETTER KOMI DZJE + : "ԇ" U0507 # CYRILLIC SMALL LETTER KOMI DZJE + : "Ԉ" U0508 # CYRILLIC CAPITAL LETTER KOMI LJE + : "ԉ" U0509 # CYRILLIC SMALL LETTER KOMI LJE + : "Ԋ" U050A # CYRILLIC CAPITAL LETTER KOMI NJE + : "ԋ" U050B # CYRILLIC SMALL LETTER KOMI NJE + : "Ԍ" U050C # CYRILLIC CAPITAL LETTER KOMI SJE + : "ԍ" U050D # CYRILLIC SMALL LETTER KOMI SJE + : "Ԏ" U050E # CYRILLIC CAPITAL LETTER KOMI TJE + : "ԏ" U050F # CYRILLIC SMALL LETTER KOMI TJE # archaic letters @@ -11173,12 +11214,16 @@ include "%L" : "ѥ" U0465 # CYRILLIC SMALL LETTER IOTIFIED E : "Ѭ" U046C # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS : "ѭ" U046D # CYRILLIC SMALL LETTER IOTIFIED BIG YUS - : "Ѯ" U046E # CYRILLIC CAPITAL LETTER KSI - : "ѯ" U046F # CYRILLIC SMALL LETTER KSI + : "Ѯ" U046E # CYRILLIC CAPITAL LETTER KSI + : "ѯ" U046F # CYRILLIC SMALL LETTER KSI : "Ѫ" U046A # CYRILLIC CAPITAL LETTER BIG YUS : "ѫ" U046B # CYRILLIC SMALL LETTER BIG YUS + : "Ꚙ" UA698 # CYRILLIC CAPITAL LETTER DOUBLE O + : "ꚙ" UA699 # CYRILLIC SMALL LETTER DOUBLE O : "Ѹ" U0478 # CYRILLIC CAPITAL LETTER UK : "ѹ" U0479 # CYRILLIC SMALL LETTER UK + : "Ꚛ" UA69A # CYRILLIC CAPITAL LETTER CROSSED O + : "ꚛ" UA69B # CYRILLIC SMALL LETTER CROSSED O

: "Ѱ" U0470 # CYRILLIC CAPITAL LETTER PSI

: "ѱ" U0471 # CYRILLIC SMALL LETTER PSI : "Ҁ" U0480 # CYRILLIC CAPITAL LETTER KOPPA (conflicts) @@ -17990,6 +18035,15 @@ include "HangulSyllables" <8> : "᮸" U1BB8 # SUNDANESE DIGIT EIGHT <9> : "᮹" U1BB9 # SUNDANESE DIGIT NINE + : "᳀" U1CC0 # SUNDANESE PUNCTUATION BINDU SURYA + : "᳁" U1CC1 # SUNDANESE PUNCTUATION BINDU PANGLONG + : "᳂" U1CC2 # SUNDANESE PUNCTUATION BINDU PURNAMA + : "᳃" U1CC3 # SUNDANESE PUNCTUATION BINDU CAKRA + : "᳄" U1CC4 # SUNDANESE PUNCTUATION BINDU LEU SATANGA + : "᳅" U1CC5 # SUNDANESE PUNCTUATION BINDU KA SATANGA + : "᳆" U1CC6 # SUNDANESE PUNCTUATION BINDU DA SATANGA + : "᳇" U1CC7 # SUNDANESE PUNCTUATION BINDU BA SATANGA + # (t = Tagbanwa) : "ᝣ" U1763 # TAGBANWA LETTER KA diff --git a/Compose.md b/Compose.md index 125b8b7..653c589 100644 --- a/Compose.md +++ b/Compose.md @@ -28,19 +28,19 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (391): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (33129) +### Non-Latin script characters (33179) * **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (2006): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc -* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3254): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc +* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3266): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc * **Braille**, UEB (272): ⠗⠫⠙⠊⠞ etc * **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1787): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc -* **Cyrillic**, Glagolitic & Old Permic (577): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc +* **Cyrillic**, Glagolitic & Old Permic (607): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc * **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1340): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc * **Georgian** & Caucasian Albanian (208): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc * **Greek**, Coptic, Cypriot, Elbasan, Gothic, Linear B, Lycian, Todhri & Vithkuqi (1144): Ρέντιτ, Ⲣⲉⲇⲇⲓⲧ, ⁧𐠤𐠯𐠮⁩, 𐔙𐔇𐔄𐔍𐔝, 𐍂𐌴𐌳𐌳𐌹𐍄, 𐀩𐀇𐀵, 𐊕𐊁𐊅𐊅𐊆𐊗, 𐗝𐗉𐗆𐗒𐗢, 𐖊𐖞𐖜𐖜𐖥𐖵 etc * **Hebrew**, Aramaic, Elymaic, Lydian, Palmyrene, Phoenician, Samaritan & Ugaritic (350): ⁧רֶדִיט⁩, ⁧𐡓𐡃𐡉𐡕⁩, ⁧𐿳𐿣𐿵⁩, ⁧𐤭𐤤𐤣𐤣𐤦𐤯⁩, ⁧𐡴𐡣𐡩𐡶⁩, ⁧𐤓𐤃𐤕⁩, ⁧ࠓࠝࠃࠪࠕ⁩, 𐎗𐎄𐎚 etc * **Japanese** (mostly kana) (879): 「レディット」 etc -* **Kawi Scripts**: Balinese, Batak, Baybayin, Buginese, Buhid, Hanunoo, Javanese, Old Kawi, Makasar, Rejang, Sundanese & Tagbanwa (522): ᬭᬾᬤᬶᬢ᭄, ᯒᯧᯑᯪᯖ᯲, ᜍᜒᜇᜒᜆ᜔, ᨑᨙᨉᨗ, ᝍᝒᝇᝒ, ᜭᜲᜧᜲᜦ᜴, ꦫꦺꦢꦶꦠ, 𑼬𑼾𑼤𑼶𑼢, 𑻭𑻵𑻧𑻳, ꤽꥉꤴꥇꤳ, ᮛᮨᮓᮤᮒ᮪, ᝮᝲᝧᝲ, etc +* **Kawi Scripts**: Balinese, Batak, Baybayin, Buginese, Buhid, Hanunoo, Javanese, Old Kawi, Makasar, Rejang, Sundanese & Tagbanwa (530): ᬭᬾᬤᬶᬢ᭄, ᯒᯧᯑᯪᯖ᯲, ᜍᜒᜇᜒᜆ᜔, ᨑᨙᨉᨗ, ᝍᝒᝇᝒ, ᜭᜲᜧᜲᜦ᜴, ꦫꦺꦢꦶꦠ, 𑼬𑼾𑼤𑼶𑼢, 𑻭𑻵𑻧𑻳, ꤽꥉꤴꥇꤳ, ᮛᮨᮓᮤᮒ᮪, ᝮᝲᝧᝲ, etc * **Korean** (11738): 레딧 etc * **International Morse Code** (76): ·-· · -·· -·· ·· - etc * **Old Italic**, Carian, Ogham, Old Hungarian, Runic & Old Turkic (395): 𐌓𐌄𐌃𐌃𐌉𐌕, 𐊥𐊺𐊢𐊢𐊹𐊭, ᚏᚓᚇᚔᚈ, ⁧𐲢𐳉𐳇𐳇𐳐𐳦⁩, ᚱᛖᛞᛞᛁᛏ, ⁧𐰺𐰅𐰑𐰃𐱃⁩ etc From 8f7df8b0df8f48396190b6e3f4682475394cb13c Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Tue, 29 Jul 2025 21:47:35 +0100 Subject: [PATCH 19/21] More gaps --- Compose | 87 ++++++++++++++++++++++++++++++++++++++++++++++-------- Compose.md | 8 ++--- 2 files changed, 79 insertions(+), 16 deletions(-) diff --git a/Compose b/Compose index 3460df1..dd0be8e 100644 --- a/Compose +++ b/Compose @@ -1,7 +1,7 @@ # -*- coding: utf-8; mode: conf -*- -# Some additional Compose key sequences, focusing on maths, linguistics, emoji and general -# text input. To use them, copy all or parts of this config into your ~/.XCompose file. +# A comprehensive collection of Compose key sequences, covering a significant proportion +# of Unicode. To use this, copy all or parts of this config into your ~/.XCompose file. # For full Hangul support also copy the HangulSyllables file, which is included from here. # Similarly, copy the Logogram file for logographic and undeciphered scripts. # @@ -12,6 +12,9 @@ # # For a simple command line tool to help manage your compose configuration, check out # https://github.com/Udzu/xcompose +# +# For a helpful guide to various scripts, see https://r12a.github.io/scripts/index.html, +# though much of this file was written before becoming aware of that. include "%L" @@ -6693,8 +6696,9 @@ include "%L" : "𑁰" U11070 # BRAHMI SIGN OLD TAMIL VIRAMA : "𑁿" U1107F # BRAHMI NUMBER JOINER - : "𑀃" U11003 # BRAHMI SIGN JIHVAMULIYA - : "𑀄" U11004 # BRAHMI SIGN UPADHMANIYA + : "𑀃" U11003 # BRAHMI SIGN JIHVAMULIYA + : "𑀄" U11004 # BRAHMI SIGN UPADHMANIYA + : "𑀄" U11004 # BRAHMI SIGN UPADHMANIYA # Punctuation @@ -7009,6 +7013,7 @@ include "%L" : "ঢ" U09A2 # BENGALI LETTER DDHA : "ণ" U09A3 # BENGALI LETTER NNA : "ত" U09A4 # BENGALI LETTER TA + <0> : "ৎ" U09CE # BENGALI LETTER KHANDA TA : "থ" U09A5 # BENGALI LETTER THA : "দ" U09A6 # BENGALI LETTER DA : "ধ" U09A7 # BENGALI LETTER DHA @@ -7028,6 +7033,8 @@ include "%L" : "ড়" U09DC # BENGALI LETTER RRA : "ঢ়" U09DD # BENGALI LETTER RHA : "য়" U09DF # BENGALI LETTER YYA + : "ৰ" U09F0 # BENGALI LETTER RA WITH MIDDLE DIAGONAL + : "ৱ" U09F1 # BENGALI LETTER RA WITH LOWER DIAGONAL # Independent vowels @@ -7062,6 +7069,7 @@ include "%L" : "ৢ" U09E2 # BENGALI VOWEL SIGN VOCALIC L : "ৣ" U09E3 # BENGALI VOWEL SIGN VOCALIC LL <0> : "্" U09CD # BENGALI SIGN VIRAMA + : "ৗ" U09D7 # BENGALI AU LENGTH MARK # Other signs @@ -7070,9 +7078,15 @@ include "%L" : "ঃ" U0983 # BENGALI SIGN VISARGA : "়" U09BC # BENGALI SIGN NUKTA : "্" U09CD # BENGALI SIGN VIRAMA + : "৾" U09FE # BENGALI SANDHI MARK - <2> : "ঽ" U09BD # BENGALI SIGN AVAGRAHA + : "ঀ" U0980 # BENGALI ANJI + : "ঽ" U09BD # BENGALI SIGN AVAGRAHA : "৲" U09F2 # BENGALI RUPEE MARK (use Tk for ৳) + : "৺" U09FA # BENGALI ISSHAR + : "৻" U09FB # BENGALI GANDA MARK + : "ৼ" U09FC # BENGALI LETTER VEDIC ANUSVARA + : "৽" U09FD # BENGALI ABBREVIATION SIGN # Numerals @@ -7087,6 +7101,13 @@ include "%L" <8> : "৮" U09EE # BENGALI DIGIT EIGHT <9> : "৯" U09EF # BENGALI DIGIT NINE + <1> : "৴" U09F4 # BENGALI CURRENCY NUMERATOR ONE + <2> : "৵" U09F5 # BENGALI CURRENCY NUMERATOR TWO + <3> : "৶" U09F6 # BENGALI CURRENCY NUMERATOR THREE + <4> : "৷" U09F7 # BENGALI CURRENCY NUMERATOR FOUR + <1> : "৸" U09F8 # BENGALI CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR + <1> <6> : "৹" U09F9 # BENGALI CURRENCY DENOMINATOR SIXTEEN + # (B = Burmese) - not covered by ISO 15919 but similar enough to fit # Consonants @@ -8084,7 +8105,7 @@ include "%L" : "ಷ" U0CB7 # KANNADA LETTER SSA : "ಸ" U0CB8 # KANNADA LETTER SA : "ಹ" U0CB9 # KANNADA LETTER HA - : "ೞ" U0CDE # KANNADA LETTER FA + : "ೞ" U0CDE # KANNADA LETTER FA <0> : "ೝ" U0CDD # KANNADA LETTER NAKAARA POLLU # Independent vowels @@ -8139,8 +8160,8 @@ include "%L" : "಄" U0C84 # KANNADA SIGN SIDDHAM : "ಽ" U0CBD # KANNADA SIGN AVAGRAHA - : "ೱ" U0CF1 # KANNADA SIGN JIHVAMULIYA - : "ೲ" U0CF2 # KANNADA SIGN UPADHMANIYA + : "ೱ" U0CF1 # KANNADA SIGN JIHVAMULIYA + : "ೲ" U0CF2 # KANNADA SIGN UPADHMANIYA # Numerals @@ -9368,6 +9389,9 @@ include "%L" : "𑐳" U11433 # NEWA LETTER SA : "𑐴" U11434 # NEWA LETTER HA + : "𑑠" U11460 # NEWA SIGN JIHVAMULIYA + : "𑑡" U11461 # NEWA SIGN UPADHMANIYA + # Independent vowels : "𑐀" U11400 # NEWA LETTER A @@ -9412,14 +9436,21 @@ include "%L" : "𑑇" U11447 # NEWA SIGN AVAGRAHA : "𑑈" U11448 # NEWA SIGN FINAL ANUSVARA : "𑑉" U11449 # NEWA OM + <2> : "𑑊" U1144A # NEWA SIDDHI (not ideal) + : "𑑟" U1145F # NEWA LETTER VEDIC ANUSVARA # Punctuation : "𑑋" U1144B # NEWA DANDA <2> : "𑑌" U1144C # NEWA DOUBLE DANDA : "𑑍" U1144D # NEWA COMMA + : "𑑎" U1144E # NEWA GAP FILLER + : "𑑎" U1144E # NEWA GAP FILLER : "𑑏" U1144F # NEWA ABBREVIATION SIGN : "𑑚" U1145A # NEWA DOUBLE COMMA + : "𑑛" U1145B # NEWA PLACEHOLDER MARK + : "𑑝" U1145D # NEWA INSERTION SIGN + : "𑑞" U1145E # NEWA SANDHI MARK # Numerals @@ -10613,6 +10644,7 @@ include "%L" : "𑏅" U113C5 # TULU-TIGALARI VOWEL SIGN AI : "𑏇" U113C7 # TULU-TIGALARI VOWEL SIGN OO : "𑏈" U113C8 # TULU-TIGALARI VOWEL SIGN AU + : "𑏉" U113C9 # TULU-TIGALARI AU LENGTH MARK # Other signs @@ -12457,6 +12489,7 @@ include "%L" : "𞄪" U1E12A # NYIAKENG PUACHUE HMONG LETTER E : "𞄫" U1E12B # NYIAKENG PUACHUE HMONG LETTER EE : "𞄬" U1E12C # NYIAKENG PUACHUE HMONG LETTER W + : "𞄽" U1E13D # NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER : "𞄰" U1E130 # NYIAKENG PUACHUE HMONG TONE-B : "𞄱" U1E131 # NYIAKENG PUACHUE HMONG TONE-M @@ -12477,6 +12510,16 @@ include "%L" <8> : "𞅈" U1E148 # NYIAKENG PUACHUE HMONG DIGIT EIGHT <9> : "𞅉" U1E149 # NYIAKENG PUACHUE HMONG DIGIT NINE + : "𞄷" U1E137 # NYIAKENG PUACHUE HMONG SIGN FOR PERSON + : "𞄸" U1E138 # NYIAKENG PUACHUE HMONG SIGN FOR THING +

: "𞄹" U1E139 # NYIAKENG PUACHUE HMONG SIGN FOR LOCATION + : "𞄺" U1E13A # NYIAKENG PUACHUE HMONG SIGN FOR ANIMAL + : "𞄻" U1E13B # NYIAKENG PUACHUE HMONG SIGN FOR INVERTEBRATE + + : "𞄼" U1E13C # NYIAKENG PUACHUE HMONG SIGN XW XW + : "𞅎" U1E14E # NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ + : "𞅏" U1E14F # NYIAKENG PUACHUE HMONG CIRCLED CA + # (N = Nag Mundari/Mundari Bani) : "𞓐" U1E4D0 # NAG MUNDARI LETTER O @@ -18051,6 +18094,7 @@ include "HangulSyllables" : "ᝥ" U1765 # TAGBANWA LETTER NGA : "ᝦ" U1766 # TAGBANWA LETTER TA : "ᝧ" U1767 # TAGBANWA LETTER DA + : "ᝧ" U1767 # TAGBANWA LETTER DA : "ᝨ" U1768 # TAGBANWA LETTER NA

: "ᝩ" U1769 # TAGBANWA LETTER PA : "ᝪ" U176A # TAGBANWA LETTER BA @@ -19907,10 +19951,13 @@ include "HangulSyllables" : "ᩯ" U1A6F # TAI THAM VOWEL SIGN AE : "ᩰ" U1A70 # TAI THAM VOWEL SIGN OO : "ᩱ" U1A71 # TAI THAM VOWEL SIGN AI + <2> : "ᩲ" U1A72 # TAI THAM VOWEL SIGN THAM AI : "ᩳ" U1A73 # TAI THAM VOWEL SIGN OA ABOVE : "ᩴ" U1A74 # TAI THAM SIGN MAI KANG (ANUSVARA) : "ᩴ" U1A74 # TAI THAM SIGN MAI KANG (ANUSVARA) <0> : "᩺" U1A7A # TAI THAM SIGN RA HAAM + <0> : "᩼" U1A7C # TAI THAM SIGN KHUEN-LUE KARAN + : "᩿" U1A7F # TAI THAM COMBINING CRYPTOGRAMMIC DOT # Tones @@ -19922,8 +19969,18 @@ include "HangulSyllables" # Punctuation - : "᪨" U1AA8 # TAI THAM SIGN KAAN - <2> : "᪩" U1AA9 # TAI THAM SIGN KAANKUU + : "᪨" U1AA8 # TAI THAM SIGN KAAN + <2> : "᪩" U1AA9 # TAI THAM SIGN KAANKUU + : "᪪" U1AAA # TAI THAM SIGN SATKAAN + <2> : "᪫" U1AAB # TAI THAM SIGN SATKAANKUU + : "᪣" U1AA3 # TAI THAM SIGN KEOW + : "᪤" U1AA4 # TAI THAM SIGN HOY + : "᪥" U1AA5 # TAI THAM SIGN DOKMAI + : "᪭" U1AAD # TAI THAM SIGN CAANG + : "᪦" U1AA6 # TAI THAM SIGN REVERSED ROTATED RANA + <2> : "᪬" U1AAC # TAI THAM SIGN HANG + : "ᪧ" U1AA7 # TAI THAM SIGN MAI YAMOK + <2> : "᩻" U1A7B # TAI THAM SIGN MAI SAM # Numerals @@ -19949,6 +20006,12 @@ include "HangulSyllables" <8> : "᪘" U1A98 # TAI THAM THAM DIGIT EIGHT <9> : "᪙" U1A99 # TAI THAM THAM DIGIT NINE +# Logograms + + : "᪠" U1AA0 # TAI THAM SIGN WIANG (CITY) + : "᪡" U1AA1 # TAI THAM SIGN WIANGWAAK (VILLAGE) + : "᪢" U1AA2 # TAI THAM SIGN SAWAN (HEAVEN) + # (v = Tai Viet) : "ꪀ" UAA80 # TAI VIET LETTER LOW KO @@ -21115,8 +21178,8 @@ include "HangulSyllables" : "𑪂" U11A82 # SOYOMBO LETTER HA : "𑪃" U11A83 # SOYOMBO LETTER KSSA - : "𑪄" U11A84 # SOYOMBO SIGN JIHVAMULIYA - : "𑪅" U11A85 # SOYOMBO SIGN UPADHMANIYA + : "𑪄" U11A84 # SOYOMBO SIGN JIHVAMULIYA + : "𑪅" U11A85 # SOYOMBO SIGN UPADHMANIYA : "𑪆" U11A86 # SOYOMBO CLUSTER-INITIAL LETTER RA : "𑪇" U11A87 # SOYOMBO CLUSTER-INITIAL LETTER LA diff --git a/Compose.md b/Compose.md index 653c589..da096e3 100644 --- a/Compose.md +++ b/Compose.md @@ -28,12 +28,12 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (391): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (33179) +### Non-Latin script characters (33229) * **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (2006): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc -* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3266): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc +* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3291): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc * **Braille**, UEB (272): ⠗⠫⠙⠊⠞ etc -* **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1787): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc +* **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1796): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc * **Cyrillic**, Glagolitic & Old Permic (607): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc * **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1340): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc * **Georgian** & Caucasian Albanian (208): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc @@ -45,7 +45,7 @@ The file aims for memorability and consistency. As a result, a small number of t * **International Morse Code** (76): ·-· · -·· -·· ·· - etc * **Old Italic**, Carian, Ogham, Old Hungarian, Runic & Old Turkic (395): 𐌓𐌄𐌃𐌃𐌉𐌕, 𐊥𐊺𐊢𐊢𐊹𐊭, ᚏᚓᚇᚔᚈ, ⁧𐲢𐳉𐳇𐳇𐳐𐳦⁩, ᚱᛖᛞᛞᛁᛏ, ⁧𐰺𐰅𐰑𐰃𐱃⁩ etc * **Sutton SignWriting** (667): 𝧿𝨾𝡇𝪜𝪡𝦈𝪪 etc -* **Thai**, Cham, Khmer, Lao, Tai Tham & Tai Viet (505): เรดดิต, ꨣꨮꨖꨪꩅ, រេទិត, ເຣັດິຕ, ᩁᩮᨯᩥᨲ, ꪧꪵꪒꪲꪒ etc +* **Thai**, Cham, Khmer, Lao, Tai Tham & Tai Viet (521): เรดดิต, ꨣꨮꨖꨪꩅ, រេទិត, ເຣັດິຕ, ᩁᩮᨯᩥᨲ, ꪧꪵꪒꪲꪒ etc * **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2473): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc * **Logograms/undeciphered**: Anatolian Hieroglyphs, Cuneiform, Cypro-Minoan, Egyptian Hieroglyphs, Linear A&B (4691): 𔐅 𒆛 𒿌 𓀁 𐙞 etc * Language names (150): 🇯🇵 日本語 🇮🇷 ⁧فارس⁩ 🇻🇳 Tiếng Việt etc From db07bc076a1c90421446df850a3faa1b263c5069 Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Tue, 29 Jul 2025 22:22:00 +0100 Subject: [PATCH 20/21] Georgian --- Compose | 23 +++++++++++++++++++++++ Compose.md | 4 ++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/Compose b/Compose index dd0be8e..13a18d9 100644 --- a/Compose +++ b/Compose @@ -15209,6 +15209,14 @@ include "%L" : "ჴ" U10F4 # GEORGIAN LETTER HAR (ẖ) : "ჵ" U10F5 # GEORGIAN LETTER HOE (ō) : "ჶ" U10F6 # GEORGIAN LETTER FI + : "ჹ" U10F9 # GEORGIAN LETTER TURNED GAN + : "ჺ" U10FA # GEORGIAN LETTER AIN (`) + : "ჷ" U10F7 # GEORGIAN LETTER YN (ɯ) + : "ჷ" U10F7 # GEORGIAN LETTER YN (ɨ) + : "ჹ" U10F9 # GEORGIAN LETTER TURNED GAN (ɣ) + : "ჽ" U10FD # GEORGIAN LETTER AEN (ə) + : "ჾ" U10FE # GEORGIAN LETTER HARD SIGN (ˠ) + : "ჿ" U10FF # GEORGIAN LETTER LABIAL SIGN (ʷ) : "Ა" U1C90 # GEORGIAN MTAVRULI CAPITAL LETTER AN : "Ბ" U1C91 # GEORGIAN MTAVRULI CAPITAL LETTER BAN @@ -15249,6 +15257,15 @@ include "%L" : "Ჴ" U1CB4 # GEORGIAN MTAVRULI CAPITAL LETTER HAR (ẖ) : "Ჵ" U1CB5 # GEORGIAN MTAVRULI CAPITAL LETTER HOE (ō) : "Ჶ" U1CB6 # GEORGIAN MTAVRULI CAPITAL LETTER FI + : "Ჸ" U1CB8 # GEORGIAN MTAVRULI CAPITAL LETTER ELIFI (') + : "Ჹ" U1CB9 # GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN + : "Ჺ" U1CBA # GEORGIAN MTAVRULI CAPITAL LETTER AIN (`) + : "Ჷ" U1CB7 # GEORGIAN MTAVRULI CAPITAL LETTER YN (ɯ) + : "Ჷ" U1CB7 # GEORGIAN MTAVRULI CAPITAL LETTER YN (ɨ) + : "Ჹ" U1CB9 # GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN (ɣ) + : "Ჽ" U1CBD # GEORGIAN MTAVRULI CAPITAL LETTER AEN (ə) + : "Ჾ" U1CBE # GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN (ˠ) + : "Ჿ" U1CBF # GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN (ʷ) # (k = Khutsuri/Asomtavruli/Nuskhuri) @@ -15290,6 +15307,9 @@ include "%L" : "Ⴣ" U10C3 # GEORGIAN CAPITAL LETTER WE : "Ⴤ" U10C4 # GEORGIAN CAPITAL LETTER HAR (ẖ) : "Ⴥ" U10C5 # GEORGIAN CAPITAL LETTER HOE (ō) + : "Ⴧ" U10C7 # GEORGIAN CAPITAL LETTER YN (ɯ) + : "Ⴧ" U10C7 # GEORGIAN CAPITAL LETTER YN (ɨ) + : "Ⴭ" U10CD # GEORGIAN CAPITAL LETTER AEN (ə) : "ⴀ" U2D00 # GEORGIAN SMALL LETTER AN : "ⴁ" U2D01 # GEORGIAN SMALL LETTER BAN @@ -15329,6 +15349,9 @@ include "%L" : "ⴣ" U2D23 # GEORGIAN SMALL LETTER WE : "ⴤ" U2D24 # GEORGIAN SMALL LETTER HAR (ẖ) : "ⴥ" U2D25 # GEORGIAN SMALL LETTER HOE (ō) + : "ⴧ" U2D27 # GEORGIAN SMALL LETTER YN (ɯ) + : "ⴧ" U2D27 # GEORGIAN SMALL LETTER YN (ɨ) + : "ⴭ" U2D2D # GEORGIAN SMALL LETTER AEN (ə) # (a = Caucasian Albanian) https://aiearmeniennes.org/artsakh/Alb-script/ diff --git a/Compose.md b/Compose.md index da096e3..c536dfa 100644 --- a/Compose.md +++ b/Compose.md @@ -28,7 +28,7 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (391): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (33229) +### Non-Latin script characters (33246) * **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (2006): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc * **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3291): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc @@ -36,7 +36,7 @@ The file aims for memorability and consistency. As a result, a small number of t * **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1796): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc * **Cyrillic**, Glagolitic & Old Permic (607): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc * **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1340): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc -* **Georgian** & Caucasian Albanian (208): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc +* **Georgian** & Caucasian Albanian (225): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc * **Greek**, Coptic, Cypriot, Elbasan, Gothic, Linear B, Lycian, Todhri & Vithkuqi (1144): Ρέντιτ, Ⲣⲉⲇⲇⲓⲧ, ⁧𐠤𐠯𐠮⁩, 𐔙𐔇𐔄𐔍𐔝, 𐍂𐌴𐌳𐌳𐌹𐍄, 𐀩𐀇𐀵, 𐊕𐊁𐊅𐊅𐊆𐊗, 𐗝𐗉𐗆𐗒𐗢, 𐖊𐖞𐖜𐖜𐖥𐖵 etc * **Hebrew**, Aramaic, Elymaic, Lydian, Palmyrene, Phoenician, Samaritan & Ugaritic (350): ⁧רֶדִיט⁩, ⁧𐡓𐡃𐡉𐡕⁩, ⁧𐿳𐿣𐿵⁩, ⁧𐤭𐤤𐤣𐤣𐤦𐤯⁩, ⁧𐡴𐡣𐡩𐡶⁩, ⁧𐤓𐤃𐤕⁩, ⁧ࠓࠝࠃࠪࠕ⁩, 𐎗𐎄𐎚 etc * **Japanese** (mostly kana) (879): 「レディット」 etc From 4e6a7fe22081b3a6ea7299b4e4db8cc979830db4 Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Wed, 30 Jul 2025 17:29:35 +0100 Subject: [PATCH 21/21] More scripts --- Compose | 158 ++++++++++++++++++++++++++++++++++++++++++++++++----- Compose.md | 12 ++-- 2 files changed, 151 insertions(+), 19 deletions(-) diff --git a/Compose b/Compose index 13a18d9..7346764 100644 --- a/Compose +++ b/Compose @@ -12,9 +12,6 @@ # # For a simple command line tool to help manage your compose configuration, check out # https://github.com/Udzu/xcompose -# -# For a helpful guide to various scripts, see https://r12a.github.io/scripts/index.html, -# though much of this file was written before becoming aware of that. include "%L" @@ -6171,6 +6168,27 @@ include "%L" : "ⵁ" U2D41 # TIFINAGH LETTER BERBER ACADEMY YAH : "ⵞ" U2D5E # TIFINAGH LETTER YACH +# Tuareg variants + <2> : "ⴸ" U2D38 # TIFINAGH LETTER YADH + <2> : "ⴶ" U2D36 # TIFINAGH LETTER YAJ + <2> : "ⴾ" U2D3E # TIFINAGH LETTER TUAREG YAK + <2> : "ⵂ" U2D42 # TIFINAGH LETTER TUAREG YAH + <2> : "ⵆ" U2D46 # TIFINAGH LETTER TUAREG YAKH + <2> : "ⵈ" U2D48 # TIFINAGH LETTER TUAREG YAQ + <2> : "ⵋ" U2D4B # TIFINAGH LETTER AHAGGAR YAZH + <3> : "ⵌ" U2D4C # TIFINAGH LETTER TUAREG YAZH + : "ⵐ" U2D50 # TIFINAGH LETTER TUAREG YAGN + : "ⵐ" U2D50 # TIFINAGH LETTER TUAREG YAGN + : "ⵑ" U2D51 # TIFINAGH LETTER TUAREG YANG + : "ⵑ" U2D51 # TIFINAGH LETTER TUAREG YANG + <2> : "ⵗ" U2D57 # TIFINAGH LETTER TUAREG YAGH + <3> : "ⵘ" U2D58 # TIFINAGH LETTER AYER YAGH + <2> : "ⵤ" U2D64 # TIFINAGH LETTER TAWELLEMET YAZ + <2> : "ⵧ" U2D67 # TIFINAGH LETTER YO + + : "⵰" U2D70 # TIFINAGH SEPARATOR MARK + : "⵿" U2D7F # TIFINAGH CONSONANT JOINER + # (T = Thaana) : "ހ" U0780 # THAANA LETTER HAA @@ -7569,7 +7587,7 @@ include "%L" : "𑥃" U11943 # DIVES AKURU SIGN NUKTA <2> : "𑥄" U11944 # DIVES AKURU DOUBLE DANDA : "𑥅" U11945 # DIVES AKURU GAP FILLER - : "𑥆" U11946 # DIVES AKURU END OF TEXT MARK + : "𑥆" U11946 # DIVES AKURU END OF TEXT MARK <0> : "𑥐" U11950 # DIVES AKURU DIGIT ZERO <1> : "𑥑" U11951 # DIVES AKURU DIGIT ONE @@ -9851,6 +9869,11 @@ include "%L" : "𑖌" U1158C # SIDDHAM LETTER O : "𑖍" U1158D # SIDDHAM LETTER AU + <2> : "𑗙" U115D9 # SIDDHAM LETTER TWO-CIRCLE ALTERNATE I + <3> : "𑗘" U115D8 # SIDDHAM LETTER THREE-CIRCLE ALTERNATE I + <2> : "𑗚" U115DA # SIDDHAM LETTER TWO-CIRCLE ALTERNATE II + <2> : "𑗛" U115DB # SIDDHAM LETTER ALTERNATE U + # Dependent vowels : "𑖯" U115AF # SIDDHAM VOWEL SIGN AA @@ -9866,6 +9889,9 @@ include "%L" : "𑖻" U115BB # SIDDHAM VOWEL SIGN AU <0> : "𑖿" U115BF # SIDDHAM SIGN VIRAMA + <2> : "𑗜" U115DC # SIDDHAM VOWEL SIGN ALTERNATE U + <2> : "𑗝" U115DD # SIDDHAM VOWEL SIGN ALTERNATE UU + # Other signs : "𑖼" U115BC # SIDDHAM SIGN CANDRABINDU @@ -9876,8 +9902,33 @@ include "%L" # Punctuation + : "𑗁" U115C1 # SIDDHAM SIGN SIDDHAM : "𑗂" U115C2 # SIDDHAM DANDA <2> : "𑗃" U115C3 # SIDDHAM DOUBLE DANDA + : "𑗄" U115C4 # SIDDHAM SEPARATOR DOT + : "𑗅" U115C5 # SIDDHAM SEPARATOR BAR + <1> : "𑗆" U115C6 # SIDDHAM REPETITION MARK-1 + <2> : "𑗇" U115C7 # SIDDHAM REPETITION MARK-2 + <3> : "𑗈" U115C8 # SIDDHAM REPETITION MARK-3 + : "𑗉" U115C9 # SIDDHAM END OF TEXT MARK + +# Siddham marks + + : "𑗊" U115CA # SIDDHAM SECTION MARK WITH TRIDENT AND U-SHAPED ORNAMENTS + : "𑗋" U115CB # SIDDHAM SECTION MARK WITH TRIDENT AND DOTTED CRESCENTS + : "𑗌" U115CC # SIDDHAM SECTION MARK WITH RAYS AND DOTTED CRESCENTS + <2> : "𑗍" U115CD # SIDDHAM SECTION MARK WITH RAYS AND DOTTED DOUBLE CRESCENTS + <3> : "𑗎" U115CE # SIDDHAM SECTION MARK WITH RAYS AND DOTTED TRIPLE CRESCENTS + : "𑗏" U115CF # SIDDHAM SECTION MARK DOUBLE RING + : "𑗐" U115D0 # SIDDHAM SECTION MARK DOUBLE RING WITH RAYS + <2> : "𑗑" U115D1 # SIDDHAM SECTION MARK WITH DOUBLE CRESCENTS + <3> : "𑗒" U115D2 # SIDDHAM SECTION MARK WITH TRIPLE CRESCENTS + <4> : "𑗓" U115D3 # SIDDHAM SECTION MARK WITH QUADRUPLE CRESCENTS + <7> : "𑗔" U115D4 # SIDDHAM SECTION MARK WITH SEPTUPLE CRESCENTS + : "𑗕" U115D5 # SIDDHAM SECTION MARK WITH CIRCLES AND RAYS + <2> : "𑗖" U115D6 # SIDDHAM SECTION MARK WITH CIRCLES AND TWO ENCLOSURES + <4> : "𑗗" U115D7 # SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES + # (SH = Sharada) @@ -10610,6 +10661,7 @@ include "%L" : "𑎳" U113B3 # TULU-TIGALARI LETTER LLA : "𑎴" U113B4 # TULU-TIGALARI LETTER RRA : "𑎵" U113B5 # TULU-TIGALARI LETTER LLLA + : "𑏑" U113D1 # TULU-TIGALARI REPHA # Independent vowels @@ -10645,13 +10697,24 @@ include "%L" : "𑏇" U113C7 # TULU-TIGALARI VOWEL SIGN OO : "𑏈" U113C8 # TULU-TIGALARI VOWEL SIGN AU : "𑏉" U113C9 # TULU-TIGALARI AU LENGTH MARK + <0> : "𑏎" U113CE # TULU-TIGALARI SIGN VIRAMA + <0> : "𑏏" U113CF # TULU-TIGALARI SIGN LOOPED VIRAMA # Other signs + : "𑏊" U113CA # TULU-TIGALARI SIGN CANDRA ANUNASIKA : "𑏌" U113CC # TULU-TIGALARI SIGN ANUSVARA : "𑏍" U113CD # TULU-TIGALARI SIGN VISARGA : "𑏎" U113CE # TULU-TIGALARI SIGN VIRAMA + : "𑏏" U113CF # TULU-TIGALARI SIGN LOOPED VIRAMA : "𑏐" U113D0 # TULU-TIGALARI CONJOINER + : "𑏡" U113E1 # TULU-TIGALARI VEDIC TONE SVARITA + : "𑏢" U113E2 # TULU-TIGALARI VEDIC TONE ANUDATTA + + : "𑏒" U113D2 # TULU-TIGALARI GEMINATION MARK + : "𑏓" U113D3 # TULU-TIGALARI SIGN PLUTA + : "𑏗" U113D7 # TULU-TIGALARI SIGN OM PUSHPIKA + : "𑏘" U113D8 # TULU-TIGALARI SIGN SHRII PUSHPIKA # Punctuation @@ -11814,7 +11877,7 @@ include "%L" # (Du/DU = Sloan-Duployan) - based on 1934 edition -# TODO: check that this is correct, especially forms with combining diacritics +# TODO: check that this is correct, especially forms with combining diacritics, # and add affixes and layout characters # consonants @@ -11894,8 +11957,6 @@ include "%L" : "𛲝" U1BC9D # DUPLOYAN THICK LETTER SELECTOR -# TODO: affixes - # (h = Cherokee) (not the best abbreviation, but think Ch = Cherokee) : "Ꭰ" U13A0 # CHEROKEE LETTER A @@ -13008,9 +13069,12 @@ include "%L"

: "𖬽" U16B3D # PAHAWH HMONG SIGN XYEEM RHO

: "𖬾" U16B3E # PAHAWH HMONG SIGN XYEEM TOV

: "𖬿" U16B3F # PAHAWH HMONG SIGN XYEEM FAIB -

: "𖭂" U16B42 # PAHAWH HMONG SIGN VOS NRUA -

: "𖭃" U16B43 # PAHAWH HMONG SIGN IB YAM +

: "𖭂" U16B42 # PAHAWH HMONG SIGN VOS NRUA (REDUPLICATION) +

: "𖭃" U16B43 # PAHAWH HMONG SIGN IB YAM (DITTO) +

: "𖭀" U16B40 # PAHAWH HMONG SIGN VOS SEEV +

<1> : "𖭁" U16B41 # PAHAWH HMONG SIGN MEEJ SUAB

: "𖭄" U16B44 # PAHAWH HMONG SIGN XAUS +

: "𖭅" U16B45 # PAHAWH HMONG SIGN CIM TSOV ROG # numerals @@ -13034,8 +13098,10 @@ include "%L" # logographs +

: "𖭣" U16B63 # PAHAWH HMONG SIGN VOS LUB

: "𖭤" U16B64 # PAHAWH HMONG SIGN XYOO

: "𖭦" U16B66 # PAHAWH HMONG SIGN THIRD-STAGE HLI +

<2> : "𖭥" U16B65 # PAHAWH HMONG SIGN HLI

: "𖭧" U16B67 # PAHAWH HMONG SIGN ZWJ THAJ

: "𖭨" U16B68 # PAHAWH HMONG SIGN HNUB

: "𖭩" U16B69 # PAHAWH HMONG SIGN NQIG @@ -13054,6 +13120,26 @@ include "%L"

: "𖭶" U16B76 # PAHAWH HMONG SIGN CIM PUB DAWB

: "𖭷" U16B77 # PAHAWH HMONG SIGN CIM NRES TOS +

: "𖭽" U16B7D # PAHAWH HMONG CLAN SIGN TSHEEJ +

: "𖭾" U16B7E # PAHAWH HMONG CLAN SIGN YEEG +

: "𖭿" U16B7F # PAHAWH HMONG CLAN SIGN LIS +

: "𖮀" U16B80 # PAHAWH HMONG CLAN SIGN LAUJ +

: "𖮁" U16B81 # PAHAWH HMONG CLAN SIGN XYOOJ +

: "𖮂" U16B82 # PAHAWH HMONG CLAN SIGN KOO +

: "𖮃" U16B83 # PAHAWH HMONG CLAN SIGN HAWJ +

: "𖮄" U16B84 # PAHAWH HMONG CLAN SIGN MUAS +

: "𖮅" U16B85 # PAHAWH HMONG CLAN SIGN THOJ +

: "𖮆" U16B86 # PAHAWH HMONG CLAN SIGN TSAB +

: "𖮇" U16B87 # PAHAWH HMONG CLAN SIGN PHAB +

: "𖮈" U16B88 # PAHAWH HMONG CLAN SIGN KHAB +

: "𖮉" U16B89 # PAHAWH HMONG CLAN SIGN HAM +

: "𖮊" U16B8A # PAHAWH HMONG CLAN SIGN VAJ +

: "𖮋" U16B8B # PAHAWH HMONG CLAN SIGN FAJ +

: "𖮌" U16B8C # PAHAWH HMONG CLAN SIGN YAJ +

: "𖮍" U16B8D # PAHAWH HMONG CLAN SIGN TSWB +

: "𖮎" U16B8E # PAHAWH HMONG CLAN SIGN KWM +

: "𖮏" U16B8F # PAHAWH HMONG CLAN SIGN VWJ + # (R = Cree Syllabics) (not the best abbreviation, but think CR = CREE) # differences between East and West Cree orthographies are signified by a E/W suffix @@ -19855,8 +19941,9 @@ include "HangulSyllables" <2> : "້" U0EC9 # LAO TONE MAI THO <6> <2> : "໊" U0ECA # LAO TONE MAI TI : "໋" U0ECB # LAO TONE MAI CATAWA - : "໌" U0ECC # LAO CANCELLATION MARK + : "໌" U0ECC # LAO CANCELLATION MARK : "ໍ" U0ECD # LAO NIGGAHITA + : "໎" U0ECE # LAO YAMAKKAN # Punctuation @@ -19876,7 +19963,29 @@ include "HangulSyllables" <8> : "໘" U0ED8 # LAO DIGIT EIGHT <9> : "໙" U0ED9 # LAO DIGIT NINE -# (t = Tai Tham) +# Other languages + + : "ຨ" U0EA8 # LAO LETTER SANSKRIT SHA + : "ຨ" U0EA8 # LAO LETTER SANSKRIT SHA + : "ຩ" U0EA9 # LAO LETTER SANSKRIT SSA + : "ຩ" U0EA9 # LAO LETTER SANSKRIT SSA + +

: "ຆ" U0E86 # LAO LETTER PALI GHA +

: "ຉ" U0E89 # LAO LETTER PALI CHA +

: "ຌ" U0E8C # LAO LETTER PALI JHA +

: "ຎ" U0E8E # LAO LETTER PALI NYA +

: "ຏ" U0E8F # LAO LETTER PALI TTA +

: "ຐ" U0E90 # LAO LETTER PALI TTHA +

: "ຑ" U0E91 # LAO LETTER PALI DDA +

: "ຒ" U0E92 # LAO LETTER PALI DDHA +

: "ຓ" U0E93 # LAO LETTER PALI NNA +

: "ຘ" U0E98 # LAO LETTER PALI DHA +

: "ຠ" U0EA0 # LAO LETTER PALI BHA +

: "ຬ" U0EAC # LAO LETTER PALI LLA + : "ໞ" U0EDE # LAO LETTER KHMU GO + : "ໟ" U0EDF # LAO LETTER KHMU NYO + +# (t = Tai Tham) cf https://r12a.github.io/scripts/lana/index.html # Consonants (based on corresponding Thai transliteration) @@ -20950,7 +21059,7 @@ include "HangulSyllables" # (p = Pollard/Miao)

: "𖼀" U16F00 # MIAO LETTER PA -

: "𖼁" U16F01 # MIAO LETTER BA +

: "𖼁" U16F01 # MIAO LETTER BA

: "𖼃" U16F03 # MIAO LETTER PLA

: "𖼄" U16F04 # MIAO LETTER MA

<2> : "𖼆" U16F06 # MIAO LETTER ARCHAIC MA @@ -20982,7 +21091,7 @@ include "HangulSyllables"

: "𖼣" U16F23 # MIAO LETTER NGA

<2> : "𖼥" U16F25 # MIAO LETTER ARCHAIC NGA

: "𖼤" U16F24 # MIAO LETTER NGHA -

: "𖼦" U16F26 # MIAO LETTER HA +

: "𖼦" U16F26 # MIAO LETTER HA

: "𖼧" U16F27 # MIAO LETTER XA

: "𖼨" U16F28 # MIAO LETTER GHA

: "𖼩" U16F29 # MIAO LETTER GHHA @@ -20991,6 +21100,7 @@ include "HangulSyllables"

: "𖼬" U16F2C # MIAO LETTER NYA

: "𖼭" U16F2D # MIAO LETTER NYHA

: "𖼮" U16F2E # MIAO LETTER TSHA +

<2> : "𖼲" U16F32 # MIAO LETTER REFORMED TSHA

: "𖼯" U16F2F # MIAO LETTER DZHA

: "𖼳" U16F33 # MIAO LETTER SHA

: "𖼴" U16F34 # MIAO LETTER SSA @@ -21006,10 +21116,30 @@ include "HangulSyllables"

: "𖽀" U16F40 # MIAO LETTER ZZYA

: "𖽁" U16F41 # MIAO LETTER ZZSYA

: "𖽂" U16F42 # MIAO LETTER WA +

: "𖽃" U16F43 # MIAO LETTER AH +

: "𖽄" U16F44 # MIAO LETTER HHA +# Yi alternatives +

: "𖼂" U16F02 # MIAO LETTER YI PA +

: "𖼌" U16F0C # MIAO LETTER YI TTA +

: "𖼍" U16F0D # MIAO LETTER YI TA +

: "𖼒" U16F12 # MIAO LETTER YI NNA +

: "𖼠" U16F20 # MIAO LETTER YI KA +

: "𖼰" U16F30 # MIAO LETTER YI TSHA +

: "𖼱" U16F31 # MIAO LETTER YI DZHA +

: "𖼹" U16F39 # MIAO LETTER YI TSA +# Xiaohua Miao & Bai Yi (Q: what sounds do these make?) +

: "𖽅" U16F45 # MIAO LETTER BRI +

: "𖽆" U16F46 # MIAO LETTER SYI +

: "𖽇" U16F47 # MIAO LETTER DZYI +

: "𖽈" U16F48 # MIAO LETTER TE +

: "𖽉" U16F49 # MIAO LETTER TSE +

: "𖽊" U16F4A # MIAO LETTER RTE

: "𖽏" U16F4F # MIAO SIGN CONSONANT MODIFIER BAR (NUKTA)

: "𖽐" U16F50 # MIAO LETTER NASALIZATION

: "𖽑" U16F51 # MIAO SIGN ASPIRATION +

: "𖽒" U16F52 # MIAO SIGN REFORMED VOICING +

: "𖽓" U16F53 # MIAO SIGN REFORMED ASPIRATION

: "𖽔" U16F54 # MIAO VOWEL SIGN A

: "𖽕" U16F55 # MIAO VOWEL SIGN AA @@ -21045,7 +21175,9 @@ include "HangulSyllables"

: "𖽳" U16F73 # MIAO VOWEL SIGN AE

: "𖽴" U16F74 # MIAO VOWEL SIGN AEE

: "𖽵" U16F75 # MIAO VOWEL SIGN ERR +

: "𖽶" U16F76 # MIAO VOWEL SIGN ROUNDED ERR

: "𖽷" U16F77 # MIAO VOWEL SIGN ER +

: "𖽸" U16F78 # MIAO VOWEL SIGN ROUNDED ER

: "𖽹" U16F79 # MIAO VOWEL SIGN AI

: "𖽺" U16F7A # MIAO VOWEL SIGN EI

: "𖽻" U16F7B # MIAO VOWEL SIGN AU diff --git a/Compose.md b/Compose.md index c536dfa..408a271 100644 --- a/Compose.md +++ b/Compose.md @@ -28,12 +28,12 @@ The file aims for memorability and consistency. As a result, a small number of t * **Multigraphs and ligatures** (391): ʣ ㏈ etc * **Letter games** (315): ʇᴉppǝɹ ɟibbɘר டωᓀᓀ·–⟝ etc -### Non-Latin script characters (33246) -* **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (2006): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc +### Non-Latin script characters (33360) +* **Arabic**, Old North Arabian, Old South Arabian, Avestan, Chorasmian, Hatran, Mandaic, Manichaean, Nabatean, Old Persian, Osmanya, Inscriptional Pahlavi, Psalter Pahlavi, Inscriptional Parthian, Hanifi Rohingya, Old Sogdian, Sogdian, Syriac, Neo-Tifinagh, Thaana, Old Uyghur & Yezidi (2022): ⁧رِيدِتْ⁩, ⁧𐪇𐪕𐪉⁩, ⁧𐬭𐬈𐬛𐬌𐬙⁩, ⁧𐩧𐩵𐩩⁩, ⁧𐿂𐾴𐾺𐿄⁩, ⁧𐣣𐣣𐣩𐣵⁩, ⁧ࡓࡏࡃࡉࡕ⁩, ⁧𐫡𐫅𐫏𐫤⁩, ⁧𐢛𐢅𐢍𐢞⁩, 𐎼𐎡𐎮𐎡𐎫, 𐒇𐒗𐒆𐒘𐒂, ⁧𐭥𐭣𐭩𐭲⁩, ⁧𐮅𐮃𐮈𐮑⁩, ⁧𐴌𐴠𐴊𐴞𐴃⁩, ⁧𐭓𐭃𐭉𐭕⁩, ⁧𐼘𐼘𐼊𐼚⁩, ⁧𐽀𐼲𐼷𐽂⁩, ⁧ܪܕ݁ܝܬ݁⁩, ⵔⴻⴷⴷⵉⵜ, ⁧ރެދިތް⁩, ⁧𐽾𐽲𐽶𐾀⁩, ⁧𐺎𐺩𐺋𐺨𐺕⁩ etc * **Armenian** (95): Րեդդիտ etc -* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3291): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc +* **Brahmic scripts**: Ahom, Bengali, Bhaiksuki, Brahmi, Burmese, Chakma, Devanagari, Dives Akuru, Dogra, Grantha, Gujala Gondi, Gujarati, Gurmukhi, Gurung Khema, Kaithi, Kannada, Kawi, Kharoshthi, Khojki, Khudawadi, Kirat Rai, Lepcha, Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meitei, Modi, Multani, Nandinagari, Newa, Odia, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Sylheti Nagri, Takri, Tamil, Telugu, Tigalari, Tirhuta (3327): 𑜍𑜦𑜔𑜢𑜌𑜫, রেডিট, 𑰨𑰸𑰠𑰰𑰞, 𑀭𑁂𑀟𑀺𑀝, ရေဍိဋ္, 𑄢𑄬𑄘𑄨𑄖𑄳, रेडिट, 𑤧𑤵𑤞𑤱𑤜𑤽, 𑠤𑠳𑠜𑠭𑠚, 𑌰𑍇𑌧𑌿𑌥, 𑶈𑶐𑵹𑶋𑵴𑶗, રેડિટ, ਰੇਡਿਟ, 𖄛𖄣𖄓𖄟𖄑, 𑂩𑂵𑂡𑂱𑂟, ರೆಡಿತ್, ⁧𐨪𐨅𐨢𐨁𐨠⁩, 𑈦𑈰𑈝𑈭𑈚, 𑋙𑋥𑋐𑋡𑋎, 𖵝𖵧𖵕𖵤𖵓, ᰛᰬᰌᰧᰳ, ᤖᤧᤍᤡᤳ, 𑅭𑅓𑅦𑅑𑅟, രെഡിട്, 𑲊𑲳𑱼𑲱𑱻, 𑴦𑴺𑴞𑴲𑴜, ꯔꯦꯗꯤꯠ, 𑘨𑘹𑘠𑘱𑘞, 𑊢𑊃𑊙𑊁𑊗, 𑧈𑧚𑧀𑧒𑦾, 𑐬𑐾𑐢𑐶𑐠, ରେଡିଟ, ꢬꢾꢞꢶꢜ, 𑆫𑆼𑆣𑆴𑆡, 𑖨𑖸𑖠𑖰𑖞, රෙඩිට්, ꠞꠦꠗꠤꠕ, 𑚤𑚲𑚜𑚮𑚚, ரெடிட், రెడిట్, 𑎬𑏂𑎤𑎹𑎢, 𑒩𑒺𑒡𑒱𑒟 etc * **Braille**, UEB (272): ⠗⠫⠙⠊⠞ etc -* **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1796): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc +* **ConScripts**: Cherokee, Cree, Deseret, Inuktitut, Kayah Li, Mefeidrin, Mundari Bani, Mru, Nyiakeng Puachue Hmong, Ol Chiki, Old Onal, Osage, Pahawh Hmong, Pau Cin Hau, Shavian, Sloan-Duployan, Sora Sompeng, Sunuwar, Tangsa, Toto, Wancho & Warang Citi (1820): ᎴᏗᏛ, ᕃᑎᑦ, 𐐡𐐇𐐔𐐆𐐓, ᕃᑎᑦ, ꤚꤢꤧꤘꤤꤒ, 𖹜𖹯𖹹𖹹𖹫𖹨, 𞓣𞓤𞓡𞓚𞓝, 𖩓𖩘𖩅𖩊𖩀, 𞄣𞄪𞄏𞄦𞄃, ᱨᱮᱫᱫᱤᱛ, 𞗧𞗨𞗠𞗜𞗝, 𐒴𐓟𐓵𐓣𐓰, 𖬡𖬉𖬰𖬞𖬰𖬃𖬰𖬧𖬵, 𑫒𑫖𑫄𑫗𑫎, 𐑮𐑧𐑛𐑦𐑑, 𛰋𛱌𛰍, 𑃝𑃣𑃔𑃤𑃑, 𑯄𑯂𑯀𑯃𑯁, 𖪲𖪔𖪱𖪏𖪰, 𞊟𞊦𞊓𞊡𞊒, 𞋗𞋛𞋄𞋜𞋋, 𑢼𑣈𑣔𑣂𑣕 etc * **Cyrillic**, Glagolitic & Old Permic (607): Ре́ддит, Ⱃⰵδδιτ, 𐍠𐍔𐍓𐍓𐍙𐍢 etc * **Geʽez**, Adlam, Bamum, Bassa Vah, Garay, Mende Kikakui, Meroitic Cursive & Hieroglyphic, NKo & Vai (1340): ሬዲተ, ⁧𞤈𞤫𞤣𞤭𞤼⁩, ꚥꛤꛤ꛱, 𖫦𖫬𖫗𖫭𖫡, ⁧𐵞𐵩𐵺𐵋𐵽⁩, ⁧𞠺𞠾𞡊⁩, ⁧𐦫𐦪𐦷𐦢𐦴⁩, ⁧𐦎𐦁𐦝𐦂𐦘⁩, ⁧ߙߍߘߘߌߕ⁩, ꗸꔹꗋ etc * **Georgian** & Caucasian Albanian (225): რედდიტ, ႰႤႣႣႨႲ, 𐕙𐔴𐔳𐔳𐔼𐔸 etc @@ -45,8 +45,8 @@ The file aims for memorability and consistency. As a result, a small number of t * **International Morse Code** (76): ·-· · -·· -·· ·· - etc * **Old Italic**, Carian, Ogham, Old Hungarian, Runic & Old Turkic (395): 𐌓𐌄𐌃𐌃𐌉𐌕, 𐊥𐊺𐊢𐊢𐊹𐊭, ᚏᚓᚇᚔᚈ, ⁧𐲢𐳉𐳇𐳇𐳐𐳦⁩, ᚱᛖᛞᛞᛁᛏ, ⁧𐰺𐰅𐰑𐰃𐱃⁩ etc * **Sutton SignWriting** (667): 𝧿𝨾𝡇𝪜𝪡𝦈𝪪 etc -* **Thai**, Cham, Khmer, Lao, Tai Tham & Tai Viet (521): เรดดิต, ꨣꨮꨖꨪꩅ, រេទិត, ເຣັດິຕ, ᩁᩮᨯᩥᨲ, ꪧꪵꪒꪲꪒ etc -* **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2473): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc +* **Thai**, Cham, Khmer, Lao, Tai Tham & Tai Viet (538): เรดดิต, ꨣꨮꨖꨪꩅ, រេទិត, ເຣັດິຕ, ᩁᩮᨯᩥᨲ, ꪧꪵꪒꪲꪒ etc +* **Zhuyin**, Fraser, Mongolian, New Tai Lue, ʼPhags-pa, Pollard, Soyombo, Tai LeTibetan, Yi, Zanabazar Square (2494): ㄏㄨㄥˊㄉㄧˊ, ꓡꓯꓓꓲꓔ, ᠷᠡᠳᠢᠲ, ᦜᦵᦡᦲᧆ, ꡘꡠꡊꡞꡈ, 𖼖𖽝𖼋𖽡𖼊, 𑩼𑩔𑩩𑩑𑪍, ᥘᥦᥖᥤᥖ, རེཌིཊ྄, ꏒꄶ, 𑨫𑨄𑨜𑨁𑨚𑨴 etc * **Logograms/undeciphered**: Anatolian Hieroglyphs, Cuneiform, Cypro-Minoan, Egyptian Hieroglyphs, Linear A&B (4691): 𔐅 𒆛 𒿌 𓀁 𐙞 etc * Language names (150): 🇯🇵 日本語 🇮🇷 ⁧فارس⁩ 🇻🇳 Tiếng Việt etc