Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 41 additions & 2 deletions unicodetools/data/linkification/dev/LinkEmail.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# LinkEmail.txt
# Date: 2025-12-24, 00:04:19 GMT
# Date: 2025-12-24, 02:37:15 GMT
# © 2025 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -121,10 +121,12 @@
0526..0527 # 6.0 [2] (Ԧ..ԧ) CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER..CYRILLIC SMALL LETTER SHHA WITH DESCENDER
0528..052F # 7.0 [8] (Ԩ..ԯ) CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK..CYRILLIC SMALL LETTER EL WITH DESCENDER
0531..0556 # 1.1 [38] (Ա..Ֆ) ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
0558 # 18.0 (U+0558) MODIFIER LETTER ARMENIAN SMALL EH
0559 # 1.1 (ՙ) ARMENIAN MODIFIER LETTER LEFT HALF RING
0560 # 11.0 (ՠ) ARMENIAN SMALL LETTER TURNED AYB
0561..0587 # 1.1 [39] (ա..և) ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
0588 # 11.0 (ֈ) ARMENIAN SMALL LETTER YI WITH STROKE
058B..058C # 18.0 [2] (U+058B..U+058C) MODIFIER LETTER ARMENIAN SMALL INI..MODIFIER LETTER ARMENIAN SMALL YI
0591..05A1 # 2.0 [17] (֑..֡) HEBREW ACCENT ETNAHTA..HEBREW ACCENT PAZER
05A2 # 4.1 (֢) HEBREW ACCENT ATNAH HAFUKH
05A3..05AF # 2.0 [13] (֣..֯) HEBREW ACCENT MUNAH..HEBREW MARK MASORA CIRCLE
Expand All @@ -136,6 +138,7 @@
05C4 # 2.0 (ׄ) HEBREW MARK UPPER DOT
05C5 # 4.1 (ׅ) HEBREW MARK LOWER DOT
05C7 # 4.1 (ׇ) HEBREW POINT QAMATS QATAN
05C8 # 18.0 (U+05C8) HEBREW POINT SHEVA NA MUDGASH
05D0..05EA # 1.1 [27] (א..ת) HEBREW LETTER ALEF..HEBREW LETTER TAV
05EF # 11.0 (ׯ) HEBREW YOD TRIANGLE
05F0..05F2 # 1.1 [3] (װ..ײ) HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
Expand Down Expand Up @@ -220,6 +223,7 @@
097E..097F # 5.0 [2] (ॾ..ॿ) DEVANAGARI LETTER DDDA..DEVANAGARI LETTER BBA
0980 # 7.0 (ঀ) BENGALI ANJI
0981..0983 # 1.1 [3] (ঁ..ঃ) BENGALI SIGN CANDRABINDU..BENGALI SIGN VISARGA
0984 # 18.0 (U+0984) BENGALI SIGN COMBINING ANUSVARA ABOVE
0985..098C # 1.1 [8] (অ..ঌ) BENGALI LETTER A..BENGALI LETTER VOCALIC L
098F..0990 # 1.1 [2] (এ..ঐ) BENGALI LETTER E..BENGALI LETTER AI
0993..09A8 # 1.1 [22] (ও..ন) BENGALI LETTER O..BENGALI LETTER NA
Expand All @@ -238,6 +242,7 @@
09E6..09F1 # 1.1 [12] (০..ৱ) BENGALI DIGIT ZERO..BENGALI LETTER RA WITH LOWER DIAGONAL
09FC # 10.0 (ৼ) BENGALI LETTER VEDIC ANUSVARA
09FE # 11.0 (৾) BENGALI SANDHI MARK
09FF # 18.0 (U+09FF) BENGALI LETTER SANSKRIT BA
0A01 # 4.0 (ਁ) GURMUKHI SIGN ADAK BINDI
0A02 # 1.1 (ਂ) GURMUKHI SIGN BINDI
0A03 # 4.0 (ਃ) GURMUKHI SIGN VISARGA
Expand Down Expand Up @@ -287,6 +292,7 @@
0B44 # 5.1 (ୄ) ORIYA VOWEL SIGN VOCALIC RR
0B47..0B48 # 1.1 [2] (େ..ୈ) ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
0B4B..0B4D # 1.1 [3] (ୋ..୍) ORIYA VOWEL SIGN O..ORIYA SIGN VIRAMA
0B53..0B54 # 18.0 [2] (U+0B53..U+0B54) ORIYA SIGN DOT ABOVE..ORIYA SIGN DOUBLE DOT ABOVE
0B55 # 13.0 (୕) ORIYA SIGN OVERLINE
0B56..0B57 # 1.1 [2] (ୖ..ୗ) ORIYA AI LENGTH MARK..ORIYA AU LENGTH MARK
0B5C..0B5D # 1.1 [2] (ଡ଼..ଢ଼) ORIYA LETTER RRA..ORIYA LETTER RHA
Expand Down Expand Up @@ -544,6 +550,7 @@
1810..1819 # 3.0 [10] (᠐..᠙) MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
1820..1877 # 3.0 [88] (ᠠ..ᡷ) MONGOLIAN LETTER A..MONGOLIAN LETTER MANCHU ZHA
1878 # 11.0 (ᡸ) MONGOLIAN LETTER CHA WITH TWO DOTS
1879 # 18.0 (U+1879) MONGOLIAN LETTER ALTERNATE UE
1880..18A9 # 3.0 [42] (ᢀ..ᢩ) MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI DAGALGA
18AA # 5.1 (ᢪ) MONGOLIAN LETTER MANCHU ALI GALI LHA
18B0..18F5 # 5.2 [70] (ᢰ..ᣵ) CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
Expand All @@ -568,7 +575,9 @@
1ABF..1AC0 # 13.0 [2] (ᪿ..ᫀ) COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
1AC1..1ACE # 14.0 [14] (᫁..ᫎ) COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING LATIN SMALL LETTER INSULAR T
1ACF..1ADD # 17.0 [15] (᫏..᫝) COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW
1ADE..1ADF # 18.0 [2] (U+1ADE..U+1ADF) COMBINING GRAVE-DOT..COMBINING DOT-ACUTE
1AE0..1AEB # 17.0 [12] (᫠..᫫) COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE
1AEC..1AF0 # 18.0 [5] (U+1AEC..U+1AF0) COMBINING CARON-ACUTE..COMBINING DOUBLE COMMA ABOVE
1B00..1B4B # 5.0 [76] (ᬀ..ᭋ) BALINESE SIGN ULU RICEM..BALINESE LETTER ASYURA SASAK
1B4C # 14.0 (ᭌ) BALINESE LETTER ARCHAIC JNYA
1B50..1B59 # 5.0 [10] (᭐..᭙) BALINESE DIGIT ZERO..BALINESE DIGIT NINE
Expand Down Expand Up @@ -631,8 +640,10 @@
2054 # 4.0 (⁔) INVERTED UNDERTIE
2071 # 3.2 (ⁱ) SUPERSCRIPT LATIN SMALL LETTER I
207F # 1.1 (ⁿ) SUPERSCRIPT LATIN SMALL LETTER N
208F # 18.0 (U+208F) MODIFIER LETTER HIGH AND LOW VERTICAL LINE
2090..2094 # 4.1 [5] (ₐ..ₔ) LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
2095..209C # 6.0 [8] (ₕ..ₜ) LATIN SUBSCRIPT SMALL LETTER H..LATIN SUBSCRIPT SMALL LETTER T
209D..209F # 18.0 [3] (U+209D..U+209F) LATIN SUBSCRIPT SMALL LETTER W..LATIN SUBSCRIPT SMALL LETTER Z
20D0..20DC # 1.1 [13] (⃐..⃜) COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20E1 # 1.1 (⃡) COMBINING LEFT RIGHT ARROW ABOVE
20E5..20EA # 3.2 [6] (⃥..⃪) COMBINING REVERSE SOLIDUS OVERLAY..COMBINING LEFTWARDS ARROW OVERLAY
Expand Down Expand Up @@ -764,6 +775,8 @@ A7D3 # 14.0 (ꟓ) LATIN SMALL LETTER DOUBLE THORN
A7D4 # 17.0 (꟔) LATIN CAPITAL LETTER DOUBLE WYNN
A7D5..A7D9 # 14.0 [5] (ꟕ..ꟙ) LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S
A7DA..A7DC # 16.0 [3] (Ꟛ..Ƛ) LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER LAMBDA WITH STROKE
A7DD # 18.0 (U+A7DD) LATIN CAPITAL LETTER CLOSED OMEGA
A7E2 # 18.0 (U+A7E2) LATIN CAPITAL LETTER R WITH LONG LEG
A7F1 # 17.0 (꟱) MODIFIER LETTER CAPITAL S
A7F2..A7F4 # 14.0 [3] (ꟲ..ꟴ) MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
A7F5..A7F6 # 13.0 [2] (Ꟶ..ꟶ) LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
Expand Down Expand Up @@ -808,6 +821,7 @@ AB60..AB63 # 8.0 [4] (ꭠ..ꭣ) LATIN SMALL LETTER SAKHA YAT..LATIN SMALL
AB64..AB65 # 7.0 [2] (ꭤ..ꭥ) LATIN SMALL LETTER INVERTED ALPHA..GREEK LETTER SMALL CAPITAL OMEGA
AB66..AB67 # 12.0 [2] (ꭦ..ꭧ) LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK..LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK
AB68..AB69 # 13.0 [2] (ꭨ..ꭩ) LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE..MODIFIER LETTER SMALL TURNED W
AB6C..AB6D # 18.0 [2] (U+AB6C..U+AB6D) LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING
AB70..ABBF # 8.0 [80] (ꭰ..ꮿ) CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
ABC0..ABEA # 5.2 [43] (ꯀ..ꯪ) MEETEI MAYEK LETTER KOK..MEETEI MAYEK VOWEL SIGN NUNG
ABEC..ABED # 5.2 [2] (꯬..꯭) MEETEI MAYEK LUM IYEK..MEETEI MAYEK APUN IYEK
Expand Down Expand Up @@ -903,6 +917,7 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
10780..10785 # 14.0 [6] (𐞀..𐞅) MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK
10787..107B0 # 14.0 [42] (𐞇..𐞰) MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK
107B2..107BA # 14.0 [9] (𐞲..𐞺) MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL
107BB..107BF # 18.0 [5] (U+107BB..U+107BF) MODIFIER LETTER SMALL TURNED T..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR
10800..10805 # 4.0 [6] (𐠀..𐠅) CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
10808 # 4.0 (𐠈) CYPRIOT SYLLABLE JO
1080A..10835 # 4.0 [44] (𐠊..𐠵) CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
Expand Down Expand Up @@ -948,6 +963,8 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
10EB0..10EB1 # 13.0 [2] (𐺰..𐺱) YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
10EC2..10EC4 # 16.0 [3] (𐻂..𐻄) ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
10EC5..10EC7 # 17.0 [3] (𐻅..𐻇) ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW..ARABIC LETTER YEH WITH FOUR DOTS BELOW
10ED9..10EEE # 18.0 [22] (U+10ED9..U+10EEE) ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH
10EF9 # 18.0 (U+10EF9) ARABIC MARK CROWN
10EFA..10EFB # 17.0 [2] (𐻺..𐻻) ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW NOON
10EFC # 16.0 (𐻼) ARABIC COMBINING ALEF OVERLAY
10EFD..10EFF # 15.0 [3] (𐻽..𐻿) ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
Expand Down Expand Up @@ -1061,6 +1078,7 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
11A9D # 11.0 (𑪝) SOYOMBO MARK PLUTA
11AB0..11ABF # 14.0 [16] (𑪰..𑪿) CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA
11AC0..11AF8 # 7.0 [57] (𑫀..𑫸) PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
11B0A # 18.0 (U+11B0A) DEVANAGARI LETTER ALTERNATE DDDA
11B60..11B67 # 17.0 [8] (𑭠..𑭧) SHARADA VOWEL SIGN OE..SHARADA VOWEL SIGN CANDRA O
11BC0..11BE0 # 16.0 [33] (𑯀..𑯠) SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
11BF0..11BF9 # 16.0 [10] (𑯰..𑯹) SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
Expand Down Expand Up @@ -1098,7 +1116,10 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
12399 # 8.0 (𒎙) CUNEIFORM SIGN U U
12400..12462 # 5.0 [99] (𒐀..𒑢) CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
12463..1246E # 7.0 [12] (𒑣..𒑮) CUNEIFORM NUMERIC SIGN ONE QUARTER GUR..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
1246F # 18.0 (U+1246F) CUNEIFORM NUMERIC SIGN SEVEN ASH TENU
12475..1247F # 18.0 [11] (U+12475..U+1247F) CUNEIFORM NUMERIC SIGN EIGHT ASH TENU..CUNEIFORM NUMERIC SIGN ASH TIMES NINE DISH TENU
12480..12543 # 8.0 [196] (𒒀..𒕃) CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
12550..12686 # 18.0 [311] (U+12550..U+12686) CUNEIFORM NUMERIC SIGN ONE N01..CUNEIFORM NUMERIC SIGN ONE N36 FLAT
12F90..12FF0 # 14.0 [97] (𒾐..𒿰) CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
13000..1342E # 5.2 [1071] (𓀀..𓐮) EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
1342F # 15.0 (𓐯) EGYPTIAN HIEROGLYPH V011D
Expand All @@ -1120,6 +1141,8 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
16B7D..16B8F # 7.0 [19] (𖭽..𖮏) PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ
16D40..16D6C # 16.0 [45] (𖵀..𖵬) KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN SAAT
16D70..16D79 # 16.0 [10] (𖵰..𖵹) KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE
16D80..16D9D # 18.0 [30] (U+16D80..U+16D9D) CHISOI LETTER A..CHISOI SIGN SISO
16DA0..16DA9 # 18.0 [10] (U+16DA0..U+16DA9) CHISOI DIGIT ZERO..CHISOI DIGIT NINE
16E40..16E7F # 11.0 [64] (𖹀..𖹿) MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y
16EA0..16EB8 # 17.0 [25] (𖺠..𖺸) BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY
16EBB..16ED3 # 17.0 [25] (𖺻..𖻓) BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY
Expand All @@ -1141,20 +1164,26 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
187F8..187FF # 17.0 [8] (𘟸..𘟿) TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF
18800..18AF2 # 9.0 [755] (𘠀..𘫲) TANGUT COMPONENT-001..TANGUT COMPONENT-755
18AF3..18CD5 # 13.0 [483] (𘫳..𘳕) TANGUT COMPONENT-756..KHITAN SMALL SCRIPT CHARACTER-18CD5
18CD6..18CDA # 18.0 [5] (U+18CD6..U+18CDA) KHITAN SMALL SCRIPT CHARACTER-18CD6..KHITAN SMALL SCRIPT CHARACTER-18CDA
18CFF # 16.0 (𘳿) KHITAN SMALL SCRIPT CHARACTER-18CFF
18D00..18D08 # 13.0 [9] (𘴀..𘴈) TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
18D09..18D1E # 17.0 [22] (𘴉..𘴞) TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1E
18D1F..18D20 # 18.0 [2] (U+18D1F..U+18D20) TANGUT IDEOGRAPH-18D1F..TANGUT IDEOGRAPH-18D20
18D80..18DF2 # 17.0 [115] (𘶀..𘷲) TANGUT COMPONENT-769..TANGUT COMPONENT-883
18E00..19191 # 18.0 [914] (U+18E00..U+19191) JURCHEN CHARACTER-18E00..JURCHEN CHARACTER-19191
191A0..191D2 # 18.0 [51] (U+191A0..U+191D2) JURCHEN RADICAL-01..JURCHEN RADICAL-51
1AFF0..1AFF3 # 14.0 [4] (𚿰..𚿳) KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
1AFF5..1AFFB # 14.0 [7] (𚿵..𚿻) KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
1AFFD..1AFFE # 14.0 [2] (𚿽..𚿾) KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
1B000..1B001 # 6.0 [2] (𛀀..𛀁) KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE
1B002..1B11E # 10.0 [285] (𛀂..𛄞) HENTAIGANA LETTER A-1..HENTAIGANA LETTER N-MU-MO-2
1B11F..1B122 # 14.0 [4] (𛄟..𛄢) HIRAGANA LETTER ARCHAIC WU..KATAKANA LETTER ARCHAIC WU
1B123..1B128 # 18.0 [6] (U+1B123..U+1B128) HIRAGANA DIGRAPH KOTO..KATAKANA LETTER ALTERNATE WI
1B132 # 15.0 (𛄲) HIRAGANA LETTER SMALL KO
1B150..1B152 # 12.0 [3] (𛅐..𛅒) HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
1B155 # 15.0 (𛅕) KATAKANA LETTER SMALL KO
1B164..1B167 # 12.0 [4] (𛅤..𛅧) KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
1B168 # 18.0 (U+1B168) KATAKANA LETTER SMALL ARCHAIC YE
1B170..1B2FB # 10.0 [396] (𛅰..𛋻) NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
1BC00..1BC6A # 7.0 [107] (𛰀..𛱪) DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
1BC70..1BC7C # 7.0 [13] (𛱰..𛱼) DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
Expand All @@ -1164,12 +1193,17 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
1CCF0..1CCF9 # 16.0 [10] (𜳰..𜳹) OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE
1CF00..1CF2D # 14.0 [46] (𜼀..𜼭) ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
1CF30..1CF46 # 14.0 [23] (𜼰..𜽆) ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
1D127..1D128 # 18.0 [2] (U+1D127..U+1D128) MUSICAL SYMBOL COMBINING STRESS..MUSICAL SYMBOL COMBINING UNSTRESS
1D165..1D169 # 3.1 [5] (𝅥..𝅩) MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING TREMOLO-3
1D16D..1D172 # 3.1 [6] (𝅭..𝅲) MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
1D17B..1D182 # 3.1 [8] (𝅻..𝆂) MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
1D185..1D18B # 3.1 [7] (𝆅..𝆋) MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD # 3.1 [4] (𝆪..𝆭) MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
1D242..1D244 # 4.1 [3] (𝉂..𝉄) COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
1D250..1D252 # 18.0 [3] (U+1D250..U+1D252) MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8
1D25B..1D25C # 18.0 [2] (U+1D25B..U+1D25C) MUSICAL SYMBOL COMBINING TREMOLO-4..MUSICAL SYMBOL COMBINING TREMOLO-5
1D25F # 18.0 (U+1D25F) MUSICAL SYMBOL COMBINING BUZZ ROLL STEM
1D280..1D281 # 18.0 [2] (U+1D280..U+1D281) MUSICAL SYMBOL COMBINING STEM BOW BEHIND BRIDGE..MUSICAL SYMBOL COMBINING STEM BOW ON TOP OF BRIDGE
1D400..1D454 # 3.1 [85] (𝐀..𝑔) MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
1D456..1D49C # 3.1 [71] (𝑖..𝒜) MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
1D49E..1D49F # 3.1 [2] (𝒞..𝒟) MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
Expand Down Expand Up @@ -1212,7 +1246,10 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
1DA9B..1DA9F # 8.0 [5] (𝪛..𝪟) SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6
1DAA1..1DAAF # 8.0 [15] (𝪡..𝪯) SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16
1DF00..1DF1E # 14.0 [31] (𝼀..𝼞) LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER S WITH CURL
1DF1F..1DF24 # 18.0 [6] (U+1DF1F..U+1DF24) LATIN SMALL LETTER D-ETH DIGRAPH..LATIN SMALL LETTER T-THETA DIGRAPH
1DF25..1DF2A # 15.0 [6] (𝼥..𝼪) LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK
1DF2B..1DF81 # 18.0 [87] (U+1DF2B..U+1DF81) LATIN SMALL LETTER DEZH DIGRAPH WITH CURL..LATIN CAPITAL LETTER E WITH BENT TOPBAR
1DFCD..1DFFF # 18.0 [51] (U+1DFCD..U+1DFFF) MODIFIER LETTER SMALL TURNED R WITH MID-HEIGHT LEFT HOOK..MODIFIER LETTER SMALL T WITH HOOK AND RETROFLEX HOOK
1E000..1E006 # 9.0 [7] (𞀀..𞀆) COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
1E008..1E018 # 9.0 [17] (𞀈..𞀘) COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
1E01B..1E021 # 9.0 [7] (𞀛..𞀡) COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
Expand Down Expand Up @@ -1282,6 +1319,7 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
2B739 # 15.0 (𫜹) CJK UNIFIED IDEOGRAPH-2B739
2B73A..2B73F # 17.0 [6] (𫜺..𫜿) CJK UNIFIED IDEOGRAPH-2B73A..CJK UNIFIED IDEOGRAPH-2B73F
2B740..2B81D # 6.0 [222] (𫝀..𫠝) CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B81E # 18.0 (U+2B81E) CJK UNIFIED IDEOGRAPH-2B81E
2B820..2CEA1 # 8.0 [5762] (𫠠..𬺡) CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEA2..2CEAD # 17.0 [12] (𬺢..𬺭) CJK UNIFIED IDEOGRAPH-2CEA2..CJK UNIFIED IDEOGRAPH-2CEAD
2CEB0..2EBE0 # 10.0 [7473] (𬺰..𮯠) CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
Expand All @@ -1290,6 +1328,7 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
30000..3134A # 13.0 [4939] (𰀀..𱍊) CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF # 15.0 [4192] (𱍐..𲎯) CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
323B0..33479 # 17.0 [4298] (𲎰..𳑹) CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-33479
3D000..3FC3F # 18.0 [11328] (U+3D000..U+3FC3F) SEAL CHARACTER-3D000..SEAL CHARACTER-3FC3F
E0100..E01EF # 4.0 [240] (U+E0100..U+E01EF) VARIATION SELECTOR-17..VARIATION SELECTOR-256

# Total code points: 149241
# Total code points: 162119
Original file line number Diff line number Diff line change
Expand Up @@ -1804,6 +1804,10 @@ public String _getValue(String string) {
protected List<String> _getAvailableValues(List<String> result) {
return YESNO;
}

public boolean hasUniformUnassigned() {
return false;
}
}

// private static class StringTransformProperty extends SimpleProperty {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.ParsePosition;
import java.util.Comparator;
import java.util.EnumMap;
import java.util.EnumSet;
Expand Down Expand Up @@ -197,8 +198,11 @@ private LinkTermination(String uset) {

static final UnicodeSet EMAIL_EXCLUDES =
new UnicodeSet("[\\u0020 ; \\: \" ( ) \\[ \\] @ \\\\ < >]").freeze();
static final UnicodeSet validEmailLocalPart =
new UnicodeSet("[\\p{XID_Continue}\\p{block=basic_latin}-\\p{Cc}]")
public static final UnicodeSet validEmailLocalPart =
new UnicodeSet(
"[\\p{XID_Continue}\\p{block=basic_latin}-\\p{Cc}]",
new ParsePosition(0),
VersionedSymbolTable.frozenAt(Settings.LATEST_VERSION_INFO))
.removeAll(EMAIL_EXCLUDES)
.freeze();
public static final UnicodeProperty LinkEmail =
Expand Down
Loading