diff --git a/unicodetools/data/linkification/dev/LinkEmail.txt b/unicodetools/data/linkification/dev/LinkEmail.txt index d5a8eafda..b4a520a7c 100644 --- a/unicodetools/data/linkification/dev/LinkEmail.txt +++ b/unicodetools/data/linkification/dev/LinkEmail.txt @@ -1,5 +1,5 @@ # LinkEmail.txt -# Date: 2025-12-24, 00:04:19 GMT +# Date: 2025-12-24, 02:37:15 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -121,10 +121,12 @@ 0526..0527 # 6.0 [2] (Ԧ..ԧ) CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER..CYRILLIC SMALL LETTER SHHA WITH DESCENDER 0528..052F # 7.0 [8] (Ԩ..ԯ) CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK..CYRILLIC SMALL LETTER EL WITH DESCENDER 0531..0556 # 1.1 [38] (Ա..Ֆ) ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0558 # 18.0 (U+0558) MODIFIER LETTER ARMENIAN SMALL EH 0559 # 1.1 (ՙ) ARMENIAN MODIFIER LETTER LEFT HALF RING 0560 # 11.0 (ՠ) ARMENIAN SMALL LETTER TURNED AYB 0561..0587 # 1.1 [39] (ա..և) ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 0588 # 11.0 (ֈ) ARMENIAN SMALL LETTER YI WITH STROKE +058B..058C # 18.0 [2] (U+058B..U+058C) MODIFIER LETTER ARMENIAN SMALL INI..MODIFIER LETTER ARMENIAN SMALL YI 0591..05A1 # 2.0 [17] (֑..֡) HEBREW ACCENT ETNAHTA..HEBREW ACCENT PAZER 05A2 # 4.1 (֢) HEBREW ACCENT ATNAH HAFUKH 05A3..05AF # 2.0 [13] (֣..֯) HEBREW ACCENT MUNAH..HEBREW MARK MASORA CIRCLE @@ -136,6 +138,7 @@ 05C4 # 2.0 (ׄ) HEBREW MARK UPPER DOT 05C5 # 4.1 (ׅ) HEBREW MARK LOWER DOT 05C7 # 4.1 (ׇ) HEBREW POINT QAMATS QATAN +05C8 # 18.0 (U+05C8) HEBREW POINT SHEVA NA MUDGASH 05D0..05EA # 1.1 [27] (א..ת) HEBREW LETTER ALEF..HEBREW LETTER TAV 05EF # 11.0 (ׯ) HEBREW YOD TRIANGLE 05F0..05F2 # 1.1 [3] (װ..ײ) HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD @@ -220,6 +223,7 @@ 097E..097F # 5.0 [2] (ॾ..ॿ) DEVANAGARI LETTER DDDA..DEVANAGARI LETTER BBA 0980 # 7.0 (ঀ) BENGALI ANJI 0981..0983 # 1.1 [3] (ঁ..ঃ) BENGALI SIGN CANDRABINDU..BENGALI SIGN VISARGA +0984 # 18.0 (U+0984) BENGALI SIGN COMBINING ANUSVARA ABOVE 0985..098C # 1.1 [8] (অ..ঌ) BENGALI LETTER A..BENGALI LETTER VOCALIC L 098F..0990 # 1.1 [2] (এ..ঐ) BENGALI LETTER E..BENGALI LETTER AI 0993..09A8 # 1.1 [22] (ও..ন) BENGALI LETTER O..BENGALI LETTER NA @@ -238,6 +242,7 @@ 09E6..09F1 # 1.1 [12] (০..ৱ) BENGALI DIGIT ZERO..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC # 10.0 (ৼ) BENGALI LETTER VEDIC ANUSVARA 09FE # 11.0 (৾) BENGALI SANDHI MARK +09FF # 18.0 (U+09FF) BENGALI LETTER SANSKRIT BA 0A01 # 4.0 (ਁ) GURMUKHI SIGN ADAK BINDI 0A02 # 1.1 (ਂ) GURMUKHI SIGN BINDI 0A03 # 4.0 (ਃ) GURMUKHI SIGN VISARGA @@ -287,6 +292,7 @@ 0B44 # 5.1 (ୄ) ORIYA VOWEL SIGN VOCALIC RR 0B47..0B48 # 1.1 [2] (େ..ୈ) ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4D # 1.1 [3] (ୋ..୍) ORIYA VOWEL SIGN O..ORIYA SIGN VIRAMA +0B53..0B54 # 18.0 [2] (U+0B53..U+0B54) ORIYA SIGN DOT ABOVE..ORIYA SIGN DOUBLE DOT ABOVE 0B55 # 13.0 (୕) ORIYA SIGN OVERLINE 0B56..0B57 # 1.1 [2] (ୖ..ୗ) ORIYA AI LENGTH MARK..ORIYA AU LENGTH MARK 0B5C..0B5D # 1.1 [2] (ଡ଼..ଢ଼) ORIYA LETTER RRA..ORIYA LETTER RHA @@ -544,6 +550,7 @@ 1810..1819 # 3.0 [10] (᠐..᠙) MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE 1820..1877 # 3.0 [88] (ᠠ..ᡷ) MONGOLIAN LETTER A..MONGOLIAN LETTER MANCHU ZHA 1878 # 11.0 (ᡸ) MONGOLIAN LETTER CHA WITH TWO DOTS +1879 # 18.0 (U+1879) MONGOLIAN LETTER ALTERNATE UE 1880..18A9 # 3.0 [42] (ᢀ..ᢩ) MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI DAGALGA 18AA # 5.1 (ᢪ) MONGOLIAN LETTER MANCHU ALI GALI LHA 18B0..18F5 # 5.2 [70] (ᢰ..ᣵ) CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S @@ -568,7 +575,9 @@ 1ABF..1AC0 # 13.0 [2] (ᪿ..ᫀ) COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW 1AC1..1ACE # 14.0 [14] (᫁..ᫎ) COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING LATIN SMALL LETTER INSULAR T 1ACF..1ADD # 17.0 [15] (᫏..᫝) COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW +1ADE..1ADF # 18.0 [2] (U+1ADE..U+1ADF) COMBINING GRAVE-DOT..COMBINING DOT-ACUTE 1AE0..1AEB # 17.0 [12] (᫠..᫫) COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE +1AEC..1AF0 # 18.0 [5] (U+1AEC..U+1AF0) COMBINING CARON-ACUTE..COMBINING DOUBLE COMMA ABOVE 1B00..1B4B # 5.0 [76] (ᬀ..ᭋ) BALINESE SIGN ULU RICEM..BALINESE LETTER ASYURA SASAK 1B4C # 14.0 (ᭌ) BALINESE LETTER ARCHAIC JNYA 1B50..1B59 # 5.0 [10] (᭐..᭙) BALINESE DIGIT ZERO..BALINESE DIGIT NINE @@ -631,8 +640,10 @@ 2054 # 4.0 (⁔) INVERTED UNDERTIE 2071 # 3.2 (ⁱ) SUPERSCRIPT LATIN SMALL LETTER I 207F # 1.1 (ⁿ) SUPERSCRIPT LATIN SMALL LETTER N +208F # 18.0 (U+208F) MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2090..2094 # 4.1 [5] (ₐ..ₔ) LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA 2095..209C # 6.0 [8] (ₕ..ₜ) LATIN SUBSCRIPT SMALL LETTER H..LATIN SUBSCRIPT SMALL LETTER T +209D..209F # 18.0 [3] (U+209D..U+209F) LATIN SUBSCRIPT SMALL LETTER W..LATIN SUBSCRIPT SMALL LETTER Z 20D0..20DC # 1.1 [13] (⃐..⃜) COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20E1 # 1.1 (⃡) COMBINING LEFT RIGHT ARROW ABOVE 20E5..20EA # 3.2 [6] (⃥..⃪) COMBINING REVERSE SOLIDUS OVERLAY..COMBINING LEFTWARDS ARROW OVERLAY @@ -764,6 +775,8 @@ A7D3 # 14.0 (ꟓ) LATIN SMALL LETTER DOUBLE THORN A7D4 # 17.0 (꟔) LATIN CAPITAL LETTER DOUBLE WYNN A7D5..A7D9 # 14.0 [5] (ꟕ..ꟙ) LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S A7DA..A7DC # 16.0 [3] (Ꟛ..Ƛ) LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7DD # 18.0 (U+A7DD) LATIN CAPITAL LETTER CLOSED OMEGA +A7E2 # 18.0 (U+A7E2) LATIN CAPITAL LETTER R WITH LONG LEG A7F1 # 17.0 (꟱) MODIFIER LETTER CAPITAL S A7F2..A7F4 # 14.0 [3] (ꟲ..ꟴ) MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 # 13.0 [2] (Ꟶ..ꟶ) LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H @@ -808,6 +821,7 @@ AB60..AB63 # 8.0 [4] (ꭠ..ꭣ) LATIN SMALL LETTER SAKHA YAT..LATIN SMALL AB64..AB65 # 7.0 [2] (ꭤ..ꭥ) LATIN SMALL LETTER INVERTED ALPHA..GREEK LETTER SMALL CAPITAL OMEGA AB66..AB67 # 12.0 [2] (ꭦ..ꭧ) LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK..LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK AB68..AB69 # 13.0 [2] (ꭨ..ꭩ) LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE..MODIFIER LETTER SMALL TURNED W +AB6C..AB6D # 18.0 [2] (U+AB6C..U+AB6D) LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF # 8.0 [80] (ꭰ..ꮿ) CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABEA # 5.2 [43] (ꯀ..ꯪ) MEETEI MAYEK LETTER KOK..MEETEI MAYEK VOWEL SIGN NUNG ABEC..ABED # 5.2 [2] (꯬..꯭) MEETEI MAYEK LUM IYEK..MEETEI MAYEK APUN IYEK @@ -903,6 +917,7 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10780..10785 # 14.0 [6] (𐞀..𐞅) MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 # 14.0 [42] (𐞇..𐞰) MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA # 14.0 [9] (𐞲..𐞺) MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107BB..107BF # 18.0 [5] (U+107BB..U+107BF) MODIFIER LETTER SMALL TURNED T..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR 10800..10805 # 4.0 [6] (𐠀..𐠅) CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 # 4.0 (𐠈) CYPRIOT SYLLABLE JO 1080A..10835 # 4.0 [44] (𐠊..𐠵) CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -948,6 +963,8 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10EB0..10EB1 # 13.0 [2] (𐺰..𐺱) YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 # 16.0 [3] (𐻂..𐻄) ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5..10EC7 # 17.0 [3] (𐻅..𐻇) ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED9..10EEE # 18.0 [22] (U+10ED9..U+10EEE) ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH +10EF9 # 18.0 (U+10EF9) ARABIC MARK CROWN 10EFA..10EFB # 17.0 [2] (𐻺..𐻻) ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW NOON 10EFC # 16.0 (𐻼) ARABIC COMBINING ALEF OVERLAY 10EFD..10EFF # 15.0 [3] (𐻽..𐻿) ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA @@ -1061,6 +1078,7 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 11A9D # 11.0 (𑪝) SOYOMBO MARK PLUTA 11AB0..11ABF # 14.0 [16] (𑪰..𑪿) CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA 11AC0..11AF8 # 7.0 [57] (𑫀..𑫸) PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL +11B0A # 18.0 (U+11B0A) DEVANAGARI LETTER ALTERNATE DDDA 11B60..11B67 # 17.0 [8] (𑭠..𑭧) SHARADA VOWEL SIGN OE..SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 # 16.0 [33] (𑯀..𑯠) SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BF0..11BF9 # 16.0 [10] (𑯰..𑯹) SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE @@ -1098,7 +1116,10 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 12399 # 8.0 (𒎙) CUNEIFORM SIGN U U 12400..12462 # 5.0 [99] (𒐀..𒑢) CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 12463..1246E # 7.0 [12] (𒑣..𒑮) CUNEIFORM NUMERIC SIGN ONE QUARTER GUR..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +1246F # 18.0 (U+1246F) CUNEIFORM NUMERIC SIGN SEVEN ASH TENU +12475..1247F # 18.0 [11] (U+12475..U+1247F) CUNEIFORM NUMERIC SIGN EIGHT ASH TENU..CUNEIFORM NUMERIC SIGN ASH TIMES NINE DISH TENU 12480..12543 # 8.0 [196] (𒒀..𒕃) CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12550..12686 # 18.0 [311] (U+12550..U+12686) CUNEIFORM NUMERIC SIGN ONE N01..CUNEIFORM NUMERIC SIGN ONE N36 FLAT 12F90..12FF0 # 14.0 [97] (𒾐..𒿰) CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 13000..1342E # 5.2 [1071] (𓀀..𓐮) EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 1342F # 15.0 (𓐯) EGYPTIAN HIEROGLYPH V011D @@ -1120,6 +1141,8 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16B7D..16B8F # 7.0 [19] (𖭽..𖮏) PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ 16D40..16D6C # 16.0 [45] (𖵀..𖵬) KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN SAAT 16D70..16D79 # 16.0 [10] (𖵰..𖵹) KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D9D # 18.0 [30] (U+16D80..U+16D9D) CHISOI LETTER A..CHISOI SIGN SISO +16DA0..16DA9 # 18.0 [10] (U+16DA0..U+16DA9) CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F # 11.0 [64] (𖹀..𖹿) MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16EA0..16EB8 # 17.0 [25] (𖺠..𖺸) BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 16EBB..16ED3 # 17.0 [25] (𖺻..𖻓) BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY @@ -1141,20 +1164,26 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 187F8..187FF # 17.0 [8] (𘟸..𘟿) TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF 18800..18AF2 # 9.0 [755] (𘠀..𘫲) TANGUT COMPONENT-001..TANGUT COMPONENT-755 18AF3..18CD5 # 13.0 [483] (𘫳..𘳕) TANGUT COMPONENT-756..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CD6..18CDA # 18.0 [5] (U+18CD6..U+18CDA) KHITAN SMALL SCRIPT CHARACTER-18CD6..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF # 16.0 (𘳿) KHITAN SMALL SCRIPT CHARACTER-18CFF 18D00..18D08 # 13.0 [9] (𘴀..𘴈) TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 18D09..18D1E # 17.0 [22] (𘴉..𘴞) TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1E +18D1F..18D20 # 18.0 [2] (U+18D1F..U+18D20) TANGUT IDEOGRAPH-18D1F..TANGUT IDEOGRAPH-18D20 18D80..18DF2 # 17.0 [115] (𘶀..𘷲) TANGUT COMPONENT-769..TANGUT COMPONENT-883 +18E00..19191 # 18.0 [914] (U+18E00..U+19191) JURCHEN CHARACTER-18E00..JURCHEN CHARACTER-19191 +191A0..191D2 # 18.0 [51] (U+191A0..U+191D2) JURCHEN RADICAL-01..JURCHEN RADICAL-51 1AFF0..1AFF3 # 14.0 [4] (𚿰..𚿳) KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB # 14.0 [7] (𚿵..𚿻) KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE # 14.0 [2] (𚿽..𚿾) KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 1B000..1B001 # 6.0 [2] (𛀀..𛀁) KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1B002..1B11E # 10.0 [285] (𛀂..𛄞) HENTAIGANA LETTER A-1..HENTAIGANA LETTER N-MU-MO-2 1B11F..1B122 # 14.0 [4] (𛄟..𛄢) HIRAGANA LETTER ARCHAIC WU..KATAKANA LETTER ARCHAIC WU +1B123..1B128 # 18.0 [6] (U+1B123..U+1B128) HIRAGANA DIGRAPH KOTO..KATAKANA LETTER ALTERNATE WI 1B132 # 15.0 (𛄲) HIRAGANA LETTER SMALL KO 1B150..1B152 # 12.0 [3] (𛅐..𛅒) HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 # 15.0 (𛅕) KATAKANA LETTER SMALL KO 1B164..1B167 # 12.0 [4] (𛅤..𛅧) KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B168 # 18.0 (U+1B168) KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB # 10.0 [396] (𛅰..𛋻) NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A # 7.0 [107] (𛰀..𛱪) DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C # 7.0 [13] (𛱰..𛱼) DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK @@ -1164,12 +1193,17 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1CCF0..1CCF9 # 16.0 [10] (𜳰..𜳹) OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1CF00..1CF2D # 14.0 [46] (𜼀..𜼭) ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 # 14.0 [23] (𜼰..𜽆) ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D127..1D128 # 18.0 [2] (U+1D127..U+1D128) MUSICAL SYMBOL COMBINING STRESS..MUSICAL SYMBOL COMBINING UNSTRESS 1D165..1D169 # 3.1 [5] (𝅥..𝅩) MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16D..1D172 # 3.1 [6] (𝅭..𝅲) MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 1D17B..1D182 # 3.1 [8] (𝅻..𝆂) MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D185..1D18B # 3.1 [7] (𝆅..𝆋) MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD # 3.1 [4] (𝆪..𝆭) MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO 1D242..1D244 # 4.1 [3] (𝉂..𝉄) COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D250..1D252 # 18.0 [3] (U+1D250..U+1D252) MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 +1D25B..1D25C # 18.0 [2] (U+1D25B..U+1D25C) MUSICAL SYMBOL COMBINING TREMOLO-4..MUSICAL SYMBOL COMBINING TREMOLO-5 +1D25F # 18.0 (U+1D25F) MUSICAL SYMBOL COMBINING BUZZ ROLL STEM +1D280..1D281 # 18.0 [2] (U+1D280..U+1D281) MUSICAL SYMBOL COMBINING STEM BOW BEHIND BRIDGE..MUSICAL SYMBOL COMBINING STEM BOW ON TOP OF BRIDGE 1D400..1D454 # 3.1 [85] (𝐀..𝑔) MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C # 3.1 [71] (𝑖..𝒜) MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A 1D49E..1D49F # 3.1 [2] (𝒞..𝒟) MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D @@ -1212,7 +1246,10 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1DA9B..1DA9F # 8.0 [5] (𝪛..𝪟) SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 1DAA1..1DAAF # 8.0 [15] (𝪡..𝪯) SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 1DF00..1DF1E # 14.0 [31] (𝼀..𝼞) LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER S WITH CURL +1DF1F..1DF24 # 18.0 [6] (U+1DF1F..U+1DF24) LATIN SMALL LETTER D-ETH DIGRAPH..LATIN SMALL LETTER T-THETA DIGRAPH 1DF25..1DF2A # 15.0 [6] (𝼥..𝼪) LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF2B..1DF81 # 18.0 [87] (U+1DF2B..U+1DF81) LATIN SMALL LETTER DEZH DIGRAPH WITH CURL..LATIN CAPITAL LETTER E WITH BENT TOPBAR +1DFCD..1DFFF # 18.0 [51] (U+1DFCD..U+1DFFF) MODIFIER LETTER SMALL TURNED R WITH MID-HEIGHT LEFT HOOK..MODIFIER LETTER SMALL T WITH HOOK AND RETROFLEX HOOK 1E000..1E006 # 9.0 [7] (𞀀..𞀆) COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 # 9.0 [17] (𞀈..𞀘) COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 # 9.0 [7] (𞀛..𞀡) COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -1282,6 +1319,7 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 2B739 # 15.0 (𫜹) CJK UNIFIED IDEOGRAPH-2B739 2B73A..2B73F # 17.0 [6] (𫜺..𫜿) CJK UNIFIED IDEOGRAPH-2B73A..CJK UNIFIED IDEOGRAPH-2B73F 2B740..2B81D # 6.0 [222] (𫝀..𫠝) CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B81E # 18.0 (U+2B81E) CJK UNIFIED IDEOGRAPH-2B81E 2B820..2CEA1 # 8.0 [5762] (𫠠..𬺡) CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEA2..2CEAD # 17.0 [12] (𬺢..𬺭) CJK UNIFIED IDEOGRAPH-2CEA2..CJK UNIFIED IDEOGRAPH-2CEAD 2CEB0..2EBE0 # 10.0 [7473] (𬺰..𮯠) CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 @@ -1290,6 +1328,7 @@ FFDA..FFDC # 1.1 [3] (ᅳ..ᅵ) HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 30000..3134A # 13.0 [4939] (𰀀..𱍊) CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF # 15.0 [4192] (𱍐..𲎯) CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF 323B0..33479 # 17.0 [4298] (𲎰..𳑹) CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-33479 +3D000..3FC3F # 18.0 [11328] (U+3D000..U+3FC3F) SEAL CHARACTER-3D000..SEAL CHARACTER-3FC3F E0100..E01EF # 4.0 [240] (U+E0100..U+E01EF) VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 149241 +# Total code points: 162119 diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java index c3a3a8651..9afe7e87c 100644 --- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java @@ -1804,6 +1804,10 @@ public String _getValue(String string) { protected List _getAvailableValues(List result) { return YESNO; } + + public boolean hasUniformUnassigned() { + return false; + } } // private static class StringTransformProperty extends SimpleProperty { diff --git a/unicodetools/src/main/java/org/unicode/utilities/LinkUtilities.java b/unicodetools/src/main/java/org/unicode/utilities/LinkUtilities.java index 58dbb9c09..7901fff2a 100644 --- a/unicodetools/src/main/java/org/unicode/utilities/LinkUtilities.java +++ b/unicodetools/src/main/java/org/unicode/utilities/LinkUtilities.java @@ -25,6 +25,7 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.text.ParsePosition; import java.util.Comparator; import java.util.EnumMap; import java.util.EnumSet; @@ -197,8 +198,11 @@ private LinkTermination(String uset) { static final UnicodeSet EMAIL_EXCLUDES = new UnicodeSet("[\\u0020 ; \\: \" ( ) \\[ \\] @ \\\\ < >]").freeze(); - static final UnicodeSet validEmailLocalPart = - new UnicodeSet("[\\p{XID_Continue}\\p{block=basic_latin}-\\p{Cc}]") + public static final UnicodeSet validEmailLocalPart = + new UnicodeSet( + "[\\p{XID_Continue}\\p{block=basic_latin}-\\p{Cc}]", + new ParsePosition(0), + VersionedSymbolTable.frozenAt(Settings.LATEST_VERSION_INFO)) .removeAll(EMAIL_EXCLUDES) .freeze(); public static final UnicodeProperty LinkEmail =