diff --git a/CindyScriptPygments.py b/CindyScriptPygments.py index 82219c6..aae3943 100644 --- a/CindyScriptPygments.py +++ b/CindyScriptPygments.py @@ -13,20 +13,20 @@ u = str uchr = chr else: - u = unicode - uchr = unichr + u = str + uchr = chr def decompressUnicodeRanges(d, s, h): - utf16 = len(u"\U00012345") > 1 + utf16 = len("\U00012345") > 1 j = 0 - res = u"(?:[" if utf16 else u"[" + res = "(?:[" if utf16 else "[" n = len(s) i = 0 while i < n: c = ord(s[i]) if (c >= 0xe800): if utf16: - res += u"]|" + uchr(c - 0x1000) + u"[" + res += "]|" + uchr(c - 0x1000) + "[" j = 0xdc00 else: j = ((c - 0xe800) << 10) + 0x10000 @@ -37,12 +37,12 @@ def decompressUnicodeRanges(d, s, h): j += d[ord(s[i]) - 32] if j != fst: if j != fst + 1: - res += u"-" + res += "-" res += uchr(j) i += 1 if utf16: - h = u''.join(c if c == '-' else uchr(ord(c) - 0x1000) for c in h) - res += u"]|[" + h + eval(r'u"][\udc00-\udfff])"') + h = ''.join(c if c == '-' else uchr(ord(c) - 0x1000) for c in h) + res += "]|[" + h + eval(r'u"][\udc00-\udfff])"') else: n = len(h) i = 0 @@ -50,13 +50,13 @@ def decompressUnicodeRanges(d, s, h): c = ord(h[i]) j = (c - 0xe800) << 10 res += uchr(j + 0x10000) - if i + 1 < n and h[i + 1] == u"-": + if i + 1 < n and h[i + 1] == "-": i += 2 c = ord(h[i]) j = (c - 0xe800) << 10 - res += u"-" + uchr(j + 0x103ff) + res += "-" + uchr(j + 0x103ff) i += 1 - res += u"]" + res += "]" return res # Regular expression matching letters (Unicode 8.0.0 category L) @@ -71,27 +71,27 @@ def decompressUnicodeRanges(d, s, h): 107, 108, 116, 122, 130, 132, 134, 138, 160, 165, 185, 195, 196, 255, 268, 277, 310, 332, 339, 362, 365, 390, 449, 457, 470, 512, 513, 541, 568, 582, 619, 673, 726, 768, 820, 921, 991, 1164, 2684, 6581, 8453, 11171, 20949 -], (u"T4(4I!)!'!&/ 0 \x96')2$+! !\x83$ %## !(! ! ; u \x86.\x88 =#!+YoA& 87C" - u"% } !5%+%) #!*! <0a,!4B1%'!&-'!1!$!33`HsG$!;!+.52'(#%#- & !$#$!*!9% 2%" - u"H''%#- & % % %B# !H *+ - & % $$!;!5%3!,(#%#- & % $$!X% 5!:! '$ #$% " - u"! %$%$ $)W!G( / 2$!> &%C( / . $$!L! %5%;( D#!*!* 4'&*$W + !#&]F %" - u"6&]% !#% !#!(# & ! !#% # %1!#$ !/#L!S( @?$\x817-!*''#$!$%+ ',6!:= !&!" - u"#7 \x90 ##& ! ##D ##B ##& ! ##9 K ##^Y2*V#'$\x9d#5 4&p((+, #2*2*2, 5OC" - u"!'!Uy.D !&_)0J<#$,E'4Q/1Pv!|8:&K<9%)E>@7 )@\x80# #$%1gT\x8e#'#=#'#( ! !" - u" ! 0#P & !$ &$##'',& &d!9!*,b!'!#. !$$(! ! ! # 1##&$'!G%\xa58 8 \x84(" - u"#$%6= !&!#Q+!*/1& & & & & & & &t!\x97%E$&%'V( z #&D${:Aj2\x99\xa6q\xa9" - u"E\xa4UZ#\x8d$2)%-8*0#_J+#b#M#(S1 # /0O2[R'$! !6>)/A?+8?G)-):9*\ue803!`KJ9J\ue804#PriL3>@7M$!6F2#/! !C* 3V& ! # " - u"9 .+8N(#%#- & % $$!;!6$\ue805eF-% !\x8987#=F-!k7x4\ue806\x87RB!\x95K" - u"\ue808!\xa2\ue809e\x8a\ue80d!8\ue811!\x9c\ue81a!\x9b+0c<;F*#BH&:\ue81b" - u"\xa0U,!l,\ue82c!%\ue82f!\"&,$++.\ue835!w m %#!#%## ) ! & S ##( & > # $ " - u"!$& \x91#3 3 0 3 0 3 0 3 0 3 (\ue83a!\x8b\ue83b\x98# A % !#! . # ! !(!'" - u"! ! ! % !#! ! ! ! ! % !## & # # ! . 5& $ 5\ue869!\x9f7\x8c\ue86d!" - u"\xa1,g\ue86e!<#\xa3\ue873!\x9e\ue87e!\x9a" -), u"\ue80c\ue840-\ue868\ue86a-\ue86c\ue86f-\ue872") +], ("T4(4I!)!'!&/ 0 \x96')2$+! !\x83$ %## !(! ! ; u \x86.\x88 =#!+YoA& 87C" + "% } !5%+%) #!*! <0a,!4B1%'!&-'!1!$!33`HsG$!;!+.52'(#%#- & !$#$!*!9% 2%" + "H''%#- & % % %B# !H *+ - & % $$!;!5%3!,(#%#- & % $$!X% 5!:! '$ #$% " + "! %$%$ $)W!G( / 2$!> &%C( / . $$!L! %5%;( D#!*!* 4'&*$W + !#&]F %" + "6&]% !#% !#!(# & ! !#% # %1!#$ !/#L!S( @?$\x817-!*''#$!$%+ ',6!:= !&!" + "#7 \x90 ##& ! ##D ##B ##& ! ##9 K ##^Y2*V#'$\x9d#5 4&p((+, #2*2*2, 5OC" + "!'!Uy.D !&_)0J<#$,E'4Q/1Pv!|8:&K<9%)E>@7 )@\x80# #$%1gT\x8e#'#=#'#( ! !" + " ! 0#P & !$ &$##'',& &d!9!*,b!'!#. !$$(! ! ! # 1##&$'!G%\xa58 8 \x84(" + "#$%6= !&!#Q+!*/1& & & & & & & &t!\x97%E$&%'V( z #&D${:Aj2\x99\xa6q\xa9" + "E\xa4UZ#\x8d$2)%-8*0#_J+#b#M#(S1 # /0O2[R'$! !6>)/A?+8?G)-):9*\ue803!`KJ9J\ue804#PriL3>@7M$!6F2#/! !C* 3V& ! # " + "9 .+8N(#%#- & % $$!;!6$\ue805eF-% !\x8987#=F-!k7x4\ue806\x87RB!\x95K" + "\ue808!\xa2\ue809e\x8a\ue80d!8\ue811!\x9c\ue81a!\x9b+0c<;F*#BH&:\ue81b" + "\xa0U,!l,\ue82c!%\ue82f!\"&,$++.\ue835!w m %#!#%## ) ! & S ##( & > # $ " + "!$& \x91#3 3 0 3 0 3 0 3 0 3 (\ue83a!\x8b\ue83b\x98# A % !#! . # ! !(!'" + "! ! ! % !#! ! ! ! ! % !## & # # ! . 5& $ 5\ue869!\x9f7\x8c\ue86d!" + "\xa1,g\ue86e!<#\xa3\ue873!\x9e\ue87e!\x9a" +), "\ue80c\ue840-\ue868\ue86a-\ue86c\ue86f-\ue872") reName = (u(r"(?:#(?:[ \t]*[1-9])?|(?:'|L)(?:[ \t]*(?:[0-9']|L))*)") .replace(u("L"), unicodeLetters)) @@ -100,77 +100,77 @@ def decompressUnicodeRanges(d, s, h): # These must be sorted by decreasing string length, for longest match first operators = [ - u"~!=", - u"~<=", - u"~>=", - u":=_", - u"::=", - u"!=", - u"++", - u"--", - u"->", - u"..", - u":=", - u":>", - u"<:", - u"<=", - u"<>", - u"==", - u">=", - u"~<", - u"~=", - u"~>", - u"~~", - u"!", - u"%", - u"&", - u"*", - u"+", - u"-", - u".", - u"/", - u":", - u";", - u"<", - u"=", - u">", - u"^", - u"_", - u"|", - u"\u00ac", # ¬ - u"\u00b0", # ° - u"\u00b7", # · - u"\u00d7", # × - u"\u00f7", # ÷ - u"\u2062", # invisible times - u"\u2192", # → - u"\u2208", # ∈ - u"\u2209", # ∉ - u"\u2212", # − - u"\u2215", # ∕ - u"\u2216", # ∖ - u"\u221a", # √ - u"\u2227", # ∧ - u"\u2228", # ∨ - u"\u2229", # ∩ - u"\u222a", # ∪ - u"\u2236", # ∶ - u"\u2248", # ≈ - u"\u2249", # ≉ - u"\u225f", # ≟ - u"\u2260", # ≠ - u"\u2264", # ≤ - u"\u2265", # ≥ - u"\u2266", # ≦ - u"\u2267", # ≧ - u"\u22c5", # ⋅ - u"\u2a85", # ⪅ - u"\u2a86", # ⪆ - u"\u2a89", # ⪉ - u"\u2a8a", # ⪊ + "~!=", + "~<=", + "~>=", + ":=_", + "::=", + "!=", + "++", + "--", + "->", + "..", + ":=", + ":>", + "<:", + "<=", + "<>", + "==", + ">=", + "~<", + "~=", + "~>", + "~~", + "!", + "%", + "&", + "*", + "+", + "-", + ".", + "/", + ":", + ";", + "<", + "=", + ">", + "^", + "_", + "|", + "\u00ac", # ¬ + "\u00b0", # ° + "\u00b7", # · + "\u00d7", # × + "\u00f7", # ÷ + "\u2062", # invisible times + "\u2192", # → + "\u2208", # ∈ + "\u2209", # ∉ + "\u2212", # − + "\u2215", # ∕ + "\u2216", # ∖ + "\u221a", # √ + "\u2227", # ∧ + "\u2228", # ∨ + "\u2229", # ∩ + "\u222a", # ∪ + "\u2236", # ∶ + "\u2248", # ≈ + "\u2249", # ≉ + "\u225f", # ≟ + "\u2260", # ≠ + "\u2264", # ≤ + "\u2265", # ≥ + "\u2266", # ≦ + "\u2267", # ≧ + "\u22c5", # ⋅ + "\u2a85", # ⪅ + "\u2a86", # ⪆ + "\u2a89", # ⪉ + "\u2a8a", # ⪊ ] -reOps = u"|".join(map(re.escape, operators)) +reOps = "|".join(map(re.escape, operators)) class CindyScriptLexer(RegexLexer): name = "CindyScript" @@ -179,22 +179,22 @@ class CindyScriptLexer(RegexLexer): mimetypes = ["text/x-cindyscript"] tokens = { 'root': [ - (u'[ \t\n]+', Token.Text.Whitespace), - (u'//.*', Token.Comment.Single), - (u'/\\*', Token.Comment.Multiline, 'mlc'), + ('[ \t\n]+', Token.Text.Whitespace), + ('//.*', Token.Comment.Single), + ('/\\*', Token.Comment.Multiline, 'mlc'), (reNumber, Token.Number), (reOps, Token.Operator), (u(r'\,|\[|\]|\(|\)|\{|\}'), Token.Punctuation), - (u'(?:[₊₋][ \t]*)?[₀₁₂₃₄₅₆₇₈₉](?:[ \t]*[₀₁₂₃₄₅₆₇₈₉])*', Token.Number), - (u'(?:[⁺⁻][ \t]*)?[⁰¹²³⁴⁵⁶⁷⁸⁹](?:[ \t]*[⁰¹²³⁴⁵⁶⁷⁸⁹])*', Token.Number), - (reName + u'(?=\s*\()', Token.Name.Function), + ('(?:[₊₋][ \t]*)?[₀₁₂₃₄₅₆₇₈₉](?:[ \t]*[₀₁₂₃₄₅₆₇₈₉])*', Token.Number), + ('(?:[⁺⁻][ \t]*)?[⁰¹²³⁴⁵⁶⁷⁸⁹](?:[ \t]*[⁰¹²³⁴⁵⁶⁷⁸⁹])*', Token.Number), + (reName + '(?=\s*\()', Token.Name.Function), (reName, Token.Name.Variable), - (u'"[^"]*"', Token.String.Double), + ('"[^"]*"', Token.String.Double), ], 'mlc': [ - (u'/\\*', Token.Comment.Multiline, '#push'), - (u'\\*/', Token.Comment.Multiline, '#pop'), - (u'(?:[^*/]|\\*(?!/)|/(?!\\*))+', Token.Comment.Multiline), + ('/\\*', Token.Comment.Multiline, '#push'), + ('\\*/', Token.Comment.Multiline, '#pop'), + ('(?:[^*/]|\\*(?!/)|/(?!\\*))+', Token.Comment.Multiline), ], } @@ -209,14 +209,14 @@ class CindyJsHtmlLexer(RegexLexer): flags = re.DOTALL tokens = { 'root': [ - (u'(.*<\\s*script[^>]*\\s' - u'type\\s*=\\s*["\']text/x-cindyscript[^a-z][^>]*>)' - u'(.*?)' - u'(<\\s*/\\s*script\\s*>)', + ('(.*<\\s*script[^>]*\\s' + 'type\\s*=\\s*["\']text/x-cindyscript[^a-z][^>]*>)' + '(.*?)' + '(<\\s*/\\s*script\\s*>)', bygroups(using(HtmlLexer), using(CindyScriptLexer), using(HtmlLexer)) ), - (u'.+', using(HtmlLexer)) + ('.+', using(HtmlLexer)) ] } diff --git a/tests/LexerBase.py b/tests/LexerBase.py index aa09eca..e31dfdf 100644 --- a/tests/LexerBase.py +++ b/tests/LexerBase.py @@ -4,7 +4,7 @@ if sys.version_info[0] >= 3: u = str else: - u = unicode + u = str class LexerBase(unittest.TestCase): @@ -14,7 +14,7 @@ def lex(self, string, expected = None): if expected is not None: if len(expected) != len(toks): print() - print(' | '.join(v for t, v in toks)) + print((' | '.join(v for t, v in toks))) for exp, act in zip(expected, toks): if act is None: pass diff --git a/tests/TestHtml.py b/tests/TestHtml.py index 56764db..1104109 100644 --- a/tests/TestHtml.py +++ b/tests/TestHtml.py @@ -9,41 +9,41 @@ class TestCindyJsHtmlLexer(LexerBase): lexerClass = C.CindyJsHtmlLexer def test_JSandCS(self): - self.lex(u''' + self.lex('''
''', [ - u'<', u'html', u'>', - u'<', u'body', u' ', u'data-foo', u'=', u' ', u'"bar"', u'>', - u'<', (T.Name.Tag, u'script'), u' ', u'type', u'=', - (T.String, u'"text/javascript"'), u'>', u'', u'\n', - (T.Keyword, u'if'), - (T.Punctuation, u'('), - (T.Keyword, u'false'), - (T.Punctuation, u')'), - (T.Name, u'foo'), - (T.Punctuation, u'('), - (T.Punctuation, u')'), - (T.Punctuation, u';'), - (T.Text, u'\n'), - u'<', u'/', (T.Name.Tag, u'script'), u'>', - u'<', u'script', u' ', u'id', u'=', u'"xy"', - u' ', (T.Name.Attribute, u'type'), u'=', - (T.String, u'"text/x-cindyscript"'), u'>', u'\n', - (T.Name.Function, u'if'), - (T.Punctuation, u'('), - (T.Name.Variable, u'false'), - (T.Punctuation, u','), - (T.Name.Function, u'foo'), - (T.Punctuation, u'('), - (T.Punctuation, u')'), - (T.Punctuation, u')'), - (T.Text, u'\n'), - u'<', u'/', (T.Name.Tag, u'script'), u'>', - u'<', u'/', u'body', u'>', - u'<', u'/', (T.Name.Tag, u'html'), u'>', - u'\n', + '<', 'html', '>', + '<', 'body', ' ', 'data-foo', '=', ' ', '"bar"', '>', + '<', (T.Name.Tag, 'script'), ' ', 'type', '=', + (T.String, '"text/javascript"'), '>', '', '\n', + (T.Keyword, 'if'), + (T.Punctuation, '('), + (T.Keyword, 'false'), + (T.Punctuation, ')'), + (T.Name, 'foo'), + (T.Punctuation, '('), + (T.Punctuation, ')'), + (T.Punctuation, ';'), + (T.Text, '\n'), + '<', '/', (T.Name.Tag, 'script'), '>', + '<', 'script', ' ', 'id', '=', '"xy"', + ' ', (T.Name.Attribute, 'type'), '=', + (T.String, '"text/x-cindyscript"'), '>', '\n', + (T.Name.Function, 'if'), + (T.Punctuation, '('), + (T.Name.Variable, 'false'), + (T.Punctuation, ','), + (T.Name.Function, 'foo'), + (T.Punctuation, '('), + (T.Punctuation, ')'), + (T.Punctuation, ')'), + (T.Text, '\n'), + '<', '/', (T.Name.Tag, 'script'), '>', + '<', '/', 'body', '>', + '<', '/', (T.Name.Tag, 'html'), '>', + '\n', ]) diff --git a/tests/TestLexer.py b/tests/TestLexer.py index fd7f80e..a5f2669 100644 --- a/tests/TestLexer.py +++ b/tests/TestLexer.py @@ -9,55 +9,55 @@ class TestCindyScriptLexer(LexerBase): lexerClass = C.CindyScriptLexer def test_string(self): - self.lex(u'some+"f//o/*o\\"+bar', [ - u'some', u'+', + self.lex('some+"f//o/*o\\"+bar', [ + 'some', '+', (T.String.Double, '"f//o/*o\\"'), - u'+', u'bar', u'\n' + '+', 'bar', '\n' ]) def test_multiLineComment(self): - self.lex(u'f/*(x)//y+\ng*/(z)', [ - u'f', - (T.Comment.Multiline, u'/*'), - (T.Comment.Multiline, u'(x)//y+\ng'), - (T.Comment.Multiline, u'*/'), - u'(', u'z', u')', u'\n' + self.lex('f/*(x)//y+\ng*/(z)', [ + 'f', + (T.Comment.Multiline, '/*'), + (T.Comment.Multiline, '(x)//y+\ng'), + (T.Comment.Multiline, '*/'), + '(', 'z', ')', '\n' ]) def test_nestedMultiLineComment(self): - self.lex(u'a/*b/*c///*d*//e/**//*/ */*/***/f', [ - u'a', - (T.Comment.Multiline, u'/*'), - (T.Comment.Multiline, u'b'), - (T.Comment.Multiline, u'/*'), - (T.Comment.Multiline, u'c//'), - (T.Comment.Multiline, u'/*'), - (T.Comment.Multiline, u'd'), - (T.Comment.Multiline, u'*/'), - (T.Comment.Multiline, u'/e'), - (T.Comment.Multiline, u'/*'), - (T.Comment.Multiline, u'*/'), - (T.Comment.Multiline, u'/*'), - (T.Comment.Multiline, u'/ '), - (T.Comment.Multiline, u'*/'), - (T.Comment.Multiline, u'*/'), - (T.Comment.Multiline, u'**'), - (T.Comment.Multiline, u'*/'), - u'f', u'\n' + self.lex('a/*b/*c///*d*//e/**//*/ */*/***/f', [ + 'a', + (T.Comment.Multiline, '/*'), + (T.Comment.Multiline, 'b'), + (T.Comment.Multiline, '/*'), + (T.Comment.Multiline, 'c//'), + (T.Comment.Multiline, '/*'), + (T.Comment.Multiline, 'd'), + (T.Comment.Multiline, '*/'), + (T.Comment.Multiline, '/e'), + (T.Comment.Multiline, '/*'), + (T.Comment.Multiline, '*/'), + (T.Comment.Multiline, '/*'), + (T.Comment.Multiline, '/ '), + (T.Comment.Multiline, '*/'), + (T.Comment.Multiline, '*/'), + (T.Comment.Multiline, '**'), + (T.Comment.Multiline, '*/'), + 'f', '\n' ]) def test_singleDot(self): - self.lex(u'1.*2', [ - (T.Number, u'1.'), - (T.Operator, u'*'), - (T.Number, u'2'), - u'\n' + self.lex('1.*2', [ + (T.Number, '1.'), + (T.Operator, '*'), + (T.Number, '2'), + '\n' ]) def test_doubleDot(self): - self.lex(u'1..2', [ - (T.Number, u'1'), - (T.Operator, u'..'), - (T.Number, u'2'), - u'\n' + self.lex('1..2', [ + (T.Number, '1'), + (T.Operator, '..'), + (T.Number, '2'), + '\n' ]) diff --git a/tests/TestRegex.py b/tests/TestRegex.py index b26f546..d30f8cc 100644 --- a/tests/TestRegex.py +++ b/tests/TestRegex.py @@ -7,7 +7,7 @@ if sys.version_info[0] >= 3: u = str else: - u = unicode + u = str class TestRegex(unittest.TestCase): @@ -27,52 +27,52 @@ def shouldNotMatch(self, string): def test_reOps(self): self.re = re.compile(C.reOps) - self.shouldNotMatch(u'') - self.shouldMatch(u'+') - self.shouldNotMatch(u'(') - self.shouldMatch(u'|') - self.shouldMatch(u';') - self.shouldMatch(u'\u2260') - self.shouldMatch(u'~>=') + self.shouldNotMatch('') + self.shouldMatch('+') + self.shouldNotMatch('(') + self.shouldMatch('|') + self.shouldMatch(';') + self.shouldMatch('\u2260') + self.shouldMatch('~>=') def test_reNumber(self): self.re = re.compile(C.reNumber) - self.shouldMatch(u'1.2') - self.shouldMatch(u'1..2', 1) - self.shouldMatch(u'1 . . 2', 1) - self.shouldMatch(u'1 . * 2', 4) - self.shouldMatch(u'1 . 2') - self.shouldMatch(u'. 3') - self.shouldMatch(u'. 1 e + 2') - self.shouldMatch(u'3 . e 4') + self.shouldMatch('1.2') + self.shouldMatch('1..2', 1) + self.shouldMatch('1 . . 2', 1) + self.shouldMatch('1 . * 2', 4) + self.shouldMatch('1 . 2') + self.shouldMatch('. 3') + self.shouldMatch('. 1 e + 2') + self.shouldMatch('3 . e 4') def test_unicodeLetters(self): self.re = re.compile(C.unicodeLetters) - self.shouldMatch(u'a') - self.shouldNotMatch(u'1') - self.shouldNotMatch(u'.') - self.shouldMatch(u'\u00F6') - self.shouldNotMatch(u'\u00F7') - self.shouldMatch(u'\u00F8') - self.shouldMatch(u'\u02E4') - self.shouldNotMatch(u'\u02E5') - self.shouldMatch(u'\uFFDC') - self.shouldNotMatch(u'\uFFDE') - self.shouldMatch(u'\U00010000') - self.shouldNotMatch(u'\U0001000C') - self.shouldMatch(u'\U0002CEA0') - self.shouldMatch(u'\U0002CEA1') - self.shouldNotMatch(u'\U0002CEA2') - self.shouldMatch(u'\U00013210') - self.shouldNotMatch(u'\U00013579') + self.shouldMatch('a') + self.shouldNotMatch('1') + self.shouldNotMatch('.') + self.shouldMatch('\u00F6') + self.shouldNotMatch('\u00F7') + self.shouldMatch('\u00F8') + self.shouldMatch('\u02E4') + self.shouldNotMatch('\u02E5') + self.shouldMatch('\uFFDC') + self.shouldNotMatch('\uFFDE') + self.shouldMatch('\U00010000') + self.shouldNotMatch('\U0001000C') + self.shouldMatch('\U0002CEA0') + self.shouldMatch('\U0002CEA1') + self.shouldNotMatch('\U0002CEA2') + self.shouldMatch('\U00013210') + self.shouldNotMatch('\U00013579') def test_unicodeDecompression(self): # We could have pasted the fully decompressed regexp into the # source, but we prefer to keep the source small and have the # expanded version just in the unit tests. - bmp = u'A-Za-z\xAA\xB5\xBA\xC0-\xD6\xD8-\xF6\xF8-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0370-\u0374\u0376\u0377\u037A-\u037D\u037F\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03F5\u03F7-\u0481\u048A-\u052F\u0531-\u0556\u0559\u0561-\u0587\u05D0-\u05EA\u05F0-\u05F2\u0620-\u064A\u066E\u066F\u0671-\u06D3\u06D5\u06E5\u06E6\u06EE\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u07F4\u07F5\u07FA\u0800-\u0815\u081A\u0824\u0828\u0840-\u0858\u08A0-\u08B4\u0904-\u0939\u093D\u0950\u0958-\u0961\u0971-\u0980\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC\u09DD\u09DF-\u09E1\u09F0\u09F1\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABD\u0AD0\u0AE0\u0AE1\u0AF9\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B35-\u0B39\u0B3D\u0B5C\u0B5D\u0B5F-\u0B61\u0B71\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BD0\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D\u0C58-\u0C5A\u0C60\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDE\u0CE0\u0CE1\u0CF1\u0CF2\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D\u0D4E\u0D5F-\u0D61\u0D7A-\u0D7F\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0E01-\u0E30\u0E32\u0E33\u0E40-\u0E46\u0E81\u0E82\u0E84\u0E87\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA\u0EAB\u0EAD-\u0EB0\u0EB2\u0EB3\u0EBD\u0EC0-\u0EC4\u0EC6\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F88-\u0F8C\u1000-\u102A\u103F\u1050-\u1055\u105A-\u105D\u1061\u1065\u1066\u106E-\u1070\u1075-\u1081\u108E\u10A0-\u10C5\u10C7\u10CD\u10D0-\u10FA\u10FC-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u13A0-\u13F5\u13F8-\u13FD\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16F1-\u16F8\u1700-\u170C\u170E-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176C\u176E-\u1770\u1780-\u17B3\u17D7\u17DC\u1820-\u1877\u1880-\u18A8\u18AA\u18B0-\u18F5\u1900-\u191E\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19B0-\u19C9\u1A00-\u1A16\u1A20-\u1A54\u1AA7\u1B05-\u1B33\u1B45-\u1B4B\u1B83-\u1BA0\u1BAE\u1BAF\u1BBA-\u1BE5\u1C00-\u1C23\u1C4D-\u1C4F\u1C5A-\u1C7D\u1CE9-\u1CEC\u1CEE-\u1CF1\u1CF5\u1CF6\u1D00-\u1DBF\u1E00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2071\u207F\u2090-\u209C\u2102\u2107\u210A-\u2113\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u212F-\u2139\u213C-\u213F\u2145-\u2149\u214E\u2183\u2184\u2C00-\u2C2E\u2C30-\u2C5E\u2C60-\u2CE4\u2CEB-\u2CEE\u2CF2\u2CF3\u2D00-\u2D25\u2D27\u2D2D\u2D30-\u2D67\u2D6F\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u2E2F\u3005\u3006\u3031-\u3035\u303B\u303C\u3041-\u3096\u309D-\u309F\u30A1-\u30FA\u30FC-\u30FF\u3105-\u312D\u3131-\u318E\u31A0-\u31BA\u31F0-\u31FF\u3400-\u4DB5\u4E00-\u9FD5\uA000-\uA48C\uA4D0-\uA4FD\uA500-\uA60C\uA610-\uA61F\uA62A\uA62B\uA640-\uA66E\uA67F-\uA69D\uA6A0-\uA6E5\uA717-\uA71F\uA722-\uA788\uA78B-\uA7AD\uA7B0-\uA7B7\uA7F7-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA822\uA840-\uA873\uA882-\uA8B3\uA8F2-\uA8F7\uA8FB\uA8FD\uA90A-\uA925\uA930-\uA946\uA960-\uA97C\uA984-\uA9B2\uA9CF\uA9E0-\uA9E4\uA9E6-\uA9EF\uA9FA-\uA9FE\uAA00-\uAA28\uAA40-\uAA42\uAA44-\uAA4B\uAA60-\uAA76\uAA7A\uAA7E-\uAAAF\uAAB1\uAAB5\uAAB6\uAAB9-\uAABD\uAAC0\uAAC2\uAADB-\uAADD\uAAE0-\uAAEA\uAAF2-\uAAF4\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uAB30-\uAB5A\uAB5C-\uAB65\uAB70-\uABE2\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB00-\uFB06\uFB13-\uFB17\uFB1D\uFB1F-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC' - if len(u'\U00012345') > 1: - expected = u'(?:[' + bmp + ']' + eval("""( + bmp = 'A-Za-z\xAA\xB5\xBA\xC0-\xD6\xD8-\xF6\xF8-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0370-\u0374\u0376\u0377\u037A-\u037D\u037F\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03F5\u03F7-\u0481\u048A-\u052F\u0531-\u0556\u0559\u0561-\u0587\u05D0-\u05EA\u05F0-\u05F2\u0620-\u064A\u066E\u066F\u0671-\u06D3\u06D5\u06E5\u06E6\u06EE\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u07F4\u07F5\u07FA\u0800-\u0815\u081A\u0824\u0828\u0840-\u0858\u08A0-\u08B4\u0904-\u0939\u093D\u0950\u0958-\u0961\u0971-\u0980\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC\u09DD\u09DF-\u09E1\u09F0\u09F1\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABD\u0AD0\u0AE0\u0AE1\u0AF9\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B35-\u0B39\u0B3D\u0B5C\u0B5D\u0B5F-\u0B61\u0B71\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BD0\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D\u0C58-\u0C5A\u0C60\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDE\u0CE0\u0CE1\u0CF1\u0CF2\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D\u0D4E\u0D5F-\u0D61\u0D7A-\u0D7F\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0E01-\u0E30\u0E32\u0E33\u0E40-\u0E46\u0E81\u0E82\u0E84\u0E87\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA\u0EAB\u0EAD-\u0EB0\u0EB2\u0EB3\u0EBD\u0EC0-\u0EC4\u0EC6\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F88-\u0F8C\u1000-\u102A\u103F\u1050-\u1055\u105A-\u105D\u1061\u1065\u1066\u106E-\u1070\u1075-\u1081\u108E\u10A0-\u10C5\u10C7\u10CD\u10D0-\u10FA\u10FC-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u13A0-\u13F5\u13F8-\u13FD\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16F1-\u16F8\u1700-\u170C\u170E-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176C\u176E-\u1770\u1780-\u17B3\u17D7\u17DC\u1820-\u1877\u1880-\u18A8\u18AA\u18B0-\u18F5\u1900-\u191E\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19B0-\u19C9\u1A00-\u1A16\u1A20-\u1A54\u1AA7\u1B05-\u1B33\u1B45-\u1B4B\u1B83-\u1BA0\u1BAE\u1BAF\u1BBA-\u1BE5\u1C00-\u1C23\u1C4D-\u1C4F\u1C5A-\u1C7D\u1CE9-\u1CEC\u1CEE-\u1CF1\u1CF5\u1CF6\u1D00-\u1DBF\u1E00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2071\u207F\u2090-\u209C\u2102\u2107\u210A-\u2113\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u212F-\u2139\u213C-\u213F\u2145-\u2149\u214E\u2183\u2184\u2C00-\u2C2E\u2C30-\u2C5E\u2C60-\u2CE4\u2CEB-\u2CEE\u2CF2\u2CF3\u2D00-\u2D25\u2D27\u2D2D\u2D30-\u2D67\u2D6F\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u2E2F\u3005\u3006\u3031-\u3035\u303B\u303C\u3041-\u3096\u309D-\u309F\u30A1-\u30FA\u30FC-\u30FF\u3105-\u312D\u3131-\u318E\u31A0-\u31BA\u31F0-\u31FF\u3400-\u4DB5\u4E00-\u9FD5\uA000-\uA48C\uA4D0-\uA4FD\uA500-\uA60C\uA610-\uA61F\uA62A\uA62B\uA640-\uA66E\uA67F-\uA69D\uA6A0-\uA6E5\uA717-\uA71F\uA722-\uA788\uA78B-\uA7AD\uA7B0-\uA7B7\uA7F7-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA822\uA840-\uA873\uA882-\uA8B3\uA8F2-\uA8F7\uA8FB\uA8FD\uA90A-\uA925\uA930-\uA946\uA960-\uA97C\uA984-\uA9B2\uA9CF\uA9E0-\uA9E4\uA9E6-\uA9EF\uA9FA-\uA9FE\uAA00-\uAA28\uAA40-\uAA42\uAA44-\uAA4B\uAA60-\uAA76\uAA7A\uAA7E-\uAAAF\uAAB1\uAAB5\uAAB6\uAAB9-\uAABD\uAAC0\uAAC2\uAADB-\uAADD\uAAE0-\uAAEA\uAAF2-\uAAF4\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uAB30-\uAB5A\uAB5C-\uAB65\uAB70-\uABE2\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB00-\uFB06\uFB13-\uFB17\uFB1D\uFB1F-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC' + if len('\U00012345') > 1: + expected = '(?:[' + bmp + ']' + eval("""( u'|\uD800[\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1F\uDF30-\uDF40\uDF42-\uDF49\uDF50-\uDF75\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF]' u'|\uD801[\uDC00-\uDC9D\uDD00-\uDD27\uDD30-\uDD63\uDE00-\uDF36\uDF40-\uDF55\uDF60-\uDF67]' u'|\uD802[\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDC60-\uDC76\uDC80-\uDC9E\uDCE0-\uDCF2\uDCF4\uDCF5\uDD00-\uDD15\uDD20-\uDD39\uDD80-\uDDB7\uDDBE\uDDBF\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDE80-\uDE9C\uDEC0-\uDEC7\uDEC9-\uDEE4\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72\uDF80-\uDF91]' @@ -101,32 +101,32 @@ def test_unicodeDecompression(self): )""") else: expected = ( - u'[' + bmp + - u'\U00010000-\U0001000B\U0001000D-\U00010026\U00010028-\U0001003A\U0001003C\U0001003D\U0001003F-\U0001004D\U00010050-\U0001005D\U00010080-\U000100FA\U00010280-\U0001029C\U000102A0-\U000102D0\U00010300-\U0001031F\U00010330-\U00010340\U00010342-\U00010349\U00010350-\U00010375\U00010380-\U0001039D\U000103A0-\U000103C3\U000103C8-\U000103CF' - u'\U00010400-\U0001049D\U00010500-\U00010527\U00010530-\U00010563\U00010600-\U00010736\U00010740-\U00010755\U00010760-\U00010767' - u'\U00010800-\U00010805\U00010808\U0001080A-\U00010835\U00010837\U00010838\U0001083C\U0001083F-\U00010855\U00010860-\U00010876\U00010880-\U0001089E\U000108E0-\U000108F2\U000108F4\U000108F5\U00010900-\U00010915\U00010920-\U00010939\U00010980-\U000109B7\U000109BE\U000109BF\U00010A00\U00010A10-\U00010A13\U00010A15-\U00010A17\U00010A19-\U00010A33\U00010A60-\U00010A7C\U00010A80-\U00010A9C\U00010AC0-\U00010AC7\U00010AC9-\U00010AE4\U00010B00-\U00010B35\U00010B40-\U00010B55\U00010B60-\U00010B72\U00010B80-\U00010B91' - u'\U00010C00-\U00010C48\U00010C80-\U00010CB2\U00010CC0-\U00010CF2' - u'\U00011003-\U00011037\U00011083-\U000110AF\U000110D0-\U000110E8\U00011103-\U00011126\U00011150-\U00011172\U00011176\U00011183-\U000111B2\U000111C1-\U000111C4\U000111DA\U000111DC\U00011200-\U00011211\U00011213-\U0001122B\U00011280-\U00011286\U00011288\U0001128A-\U0001128D\U0001128F-\U0001129D\U0001129F-\U000112A8\U000112B0-\U000112DE\U00011305-\U0001130C\U0001130F\U00011310\U00011313-\U00011328\U0001132A-\U00011330\U00011332\U00011333\U00011335-\U00011339\U0001133D\U00011350\U0001135D-\U00011361' - u'\U00011480-\U000114AF\U000114C4\U000114C5\U000114C7\U00011580-\U000115AE\U000115D8-\U000115DB\U00011600-\U0001162F\U00011644\U00011680-\U000116AA\U00011700-\U00011719' - u'\U000118A0-\U000118DF\U000118FF\U00011AC0-\U00011AF8' - u'\U00012000-\U00012399' - u'\U00012480-\U00012543' - u'\U00013400-\U0001342E' - u'\U00014400-\U00014646' - u'\U00016800-\U00016A38\U00016A40-\U00016A5E\U00016AD0-\U00016AED\U00016B00-\U00016B2F\U00016B40-\U00016B43\U00016B63-\U00016B77\U00016B7D-\U00016B8F' - u'\U00016F00-\U00016F44\U00016F50\U00016F93-\U00016F9F' - u'\U0001B000\U0001B001' - u'\U0001BC00-\U0001BC6A\U0001BC70-\U0001BC7C\U0001BC80-\U0001BC88\U0001BC90-\U0001BC99' - u'\U0001D400-\U0001D454\U0001D456-\U0001D49C\U0001D49E\U0001D49F\U0001D4A2\U0001D4A5\U0001D4A6\U0001D4A9-\U0001D4AC\U0001D4AE-\U0001D4B9\U0001D4BB\U0001D4BD-\U0001D4C3\U0001D4C5-\U0001D505\U0001D507-\U0001D50A\U0001D50D-\U0001D514\U0001D516-\U0001D51C\U0001D51E-\U0001D539\U0001D53B-\U0001D53E\U0001D540-\U0001D544\U0001D546\U0001D54A-\U0001D550\U0001D552-\U0001D6A5\U0001D6A8-\U0001D6C0\U0001D6C2-\U0001D6DA\U0001D6DC-\U0001D6FA\U0001D6FC-\U0001D714\U0001D716-\U0001D734\U0001D736-\U0001D74E\U0001D750-\U0001D76E\U0001D770-\U0001D788\U0001D78A-\U0001D7A8\U0001D7AA-\U0001D7C2\U0001D7C4-\U0001D7CB' - u'\U0001E800-\U0001E8C4' - u'\U0001EE00-\U0001EE03\U0001EE05-\U0001EE1F\U0001EE21\U0001EE22\U0001EE24\U0001EE27\U0001EE29-\U0001EE32\U0001EE34-\U0001EE37\U0001EE39\U0001EE3B\U0001EE42\U0001EE47\U0001EE49\U0001EE4B\U0001EE4D-\U0001EE4F\U0001EE51\U0001EE52\U0001EE54\U0001EE57\U0001EE59\U0001EE5B\U0001EE5D\U0001EE5F\U0001EE61\U0001EE62\U0001EE64\U0001EE67-\U0001EE6A\U0001EE6C-\U0001EE72\U0001EE74-\U0001EE77\U0001EE79-\U0001EE7C\U0001EE7E\U0001EE80-\U0001EE89\U0001EE8B-\U0001EE9B\U0001EEA1-\U0001EEA3\U0001EEA5-\U0001EEA9\U0001EEAB-\U0001EEBB' - u'\U0002A400-\U0002A6D6\U0002A700-\U0002A7FF' - u'\U0002B400-\U0002B734\U0002B740-\U0002B7FF' - u'\U0002B800-\U0002B81D\U0002B820-\U0002BBFF' - u'\U0002CC00-\U0002CEA1' - u'\U0002F800-\U0002FA1D' - u'\U00013000-\U000133FF\U00020000-\U0002A3FF\U0002A800-\U0002B3FF\U0002BC00-\U0002CBFF' - u']') + '[' + bmp + + '\U00010000-\U0001000B\U0001000D-\U00010026\U00010028-\U0001003A\U0001003C\U0001003D\U0001003F-\U0001004D\U00010050-\U0001005D\U00010080-\U000100FA\U00010280-\U0001029C\U000102A0-\U000102D0\U00010300-\U0001031F\U00010330-\U00010340\U00010342-\U00010349\U00010350-\U00010375\U00010380-\U0001039D\U000103A0-\U000103C3\U000103C8-\U000103CF' + '\U00010400-\U0001049D\U00010500-\U00010527\U00010530-\U00010563\U00010600-\U00010736\U00010740-\U00010755\U00010760-\U00010767' + '\U00010800-\U00010805\U00010808\U0001080A-\U00010835\U00010837\U00010838\U0001083C\U0001083F-\U00010855\U00010860-\U00010876\U00010880-\U0001089E\U000108E0-\U000108F2\U000108F4\U000108F5\U00010900-\U00010915\U00010920-\U00010939\U00010980-\U000109B7\U000109BE\U000109BF\U00010A00\U00010A10-\U00010A13\U00010A15-\U00010A17\U00010A19-\U00010A33\U00010A60-\U00010A7C\U00010A80-\U00010A9C\U00010AC0-\U00010AC7\U00010AC9-\U00010AE4\U00010B00-\U00010B35\U00010B40-\U00010B55\U00010B60-\U00010B72\U00010B80-\U00010B91' + '\U00010C00-\U00010C48\U00010C80-\U00010CB2\U00010CC0-\U00010CF2' + '\U00011003-\U00011037\U00011083-\U000110AF\U000110D0-\U000110E8\U00011103-\U00011126\U00011150-\U00011172\U00011176\U00011183-\U000111B2\U000111C1-\U000111C4\U000111DA\U000111DC\U00011200-\U00011211\U00011213-\U0001122B\U00011280-\U00011286\U00011288\U0001128A-\U0001128D\U0001128F-\U0001129D\U0001129F-\U000112A8\U000112B0-\U000112DE\U00011305-\U0001130C\U0001130F\U00011310\U00011313-\U00011328\U0001132A-\U00011330\U00011332\U00011333\U00011335-\U00011339\U0001133D\U00011350\U0001135D-\U00011361' + '\U00011480-\U000114AF\U000114C4\U000114C5\U000114C7\U00011580-\U000115AE\U000115D8-\U000115DB\U00011600-\U0001162F\U00011644\U00011680-\U000116AA\U00011700-\U00011719' + '\U000118A0-\U000118DF\U000118FF\U00011AC0-\U00011AF8' + '\U00012000-\U00012399' + '\U00012480-\U00012543' + '\U00013400-\U0001342E' + '\U00014400-\U00014646' + '\U00016800-\U00016A38\U00016A40-\U00016A5E\U00016AD0-\U00016AED\U00016B00-\U00016B2F\U00016B40-\U00016B43\U00016B63-\U00016B77\U00016B7D-\U00016B8F' + '\U00016F00-\U00016F44\U00016F50\U00016F93-\U00016F9F' + '\U0001B000\U0001B001' + '\U0001BC00-\U0001BC6A\U0001BC70-\U0001BC7C\U0001BC80-\U0001BC88\U0001BC90-\U0001BC99' + '\U0001D400-\U0001D454\U0001D456-\U0001D49C\U0001D49E\U0001D49F\U0001D4A2\U0001D4A5\U0001D4A6\U0001D4A9-\U0001D4AC\U0001D4AE-\U0001D4B9\U0001D4BB\U0001D4BD-\U0001D4C3\U0001D4C5-\U0001D505\U0001D507-\U0001D50A\U0001D50D-\U0001D514\U0001D516-\U0001D51C\U0001D51E-\U0001D539\U0001D53B-\U0001D53E\U0001D540-\U0001D544\U0001D546\U0001D54A-\U0001D550\U0001D552-\U0001D6A5\U0001D6A8-\U0001D6C0\U0001D6C2-\U0001D6DA\U0001D6DC-\U0001D6FA\U0001D6FC-\U0001D714\U0001D716-\U0001D734\U0001D736-\U0001D74E\U0001D750-\U0001D76E\U0001D770-\U0001D788\U0001D78A-\U0001D7A8\U0001D7AA-\U0001D7C2\U0001D7C4-\U0001D7CB' + '\U0001E800-\U0001E8C4' + '\U0001EE00-\U0001EE03\U0001EE05-\U0001EE1F\U0001EE21\U0001EE22\U0001EE24\U0001EE27\U0001EE29-\U0001EE32\U0001EE34-\U0001EE37\U0001EE39\U0001EE3B\U0001EE42\U0001EE47\U0001EE49\U0001EE4B\U0001EE4D-\U0001EE4F\U0001EE51\U0001EE52\U0001EE54\U0001EE57\U0001EE59\U0001EE5B\U0001EE5D\U0001EE5F\U0001EE61\U0001EE62\U0001EE64\U0001EE67-\U0001EE6A\U0001EE6C-\U0001EE72\U0001EE74-\U0001EE77\U0001EE79-\U0001EE7C\U0001EE7E\U0001EE80-\U0001EE89\U0001EE8B-\U0001EE9B\U0001EEA1-\U0001EEA3\U0001EEA5-\U0001EEA9\U0001EEAB-\U0001EEBB' + '\U0002A400-\U0002A6D6\U0002A700-\U0002A7FF' + '\U0002B400-\U0002B734\U0002B740-\U0002B7FF' + '\U0002B800-\U0002B81D\U0002B820-\U0002BBFF' + '\U0002CC00-\U0002CEA1' + '\U0002F800-\U0002FA1D' + '\U00013000-\U000133FF\U00020000-\U0002A3FF\U0002A800-\U0002B3FF\U0002BC00-\U0002CBFF' + ']') actual = C.unicodeLetters if expected == actual: return None @@ -137,10 +137,10 @@ def test_unicodeDecompression(self): i += 1 e = expected[i - 10 : i + 10] a = actual[i - 10 : i + 10] - self.assertEqual(e, a, u"'{0}{1}{2}' != '{0}{3}{4}'".format( - u'' if i - 10 <= 0 else u'...', - e, u'' if i + 10 >= len(expected) else u'...', - a, u'' if i + 10 >= len(actual) else u'...', + self.assertEqual(e, a, "'{0}{1}{2}' != '{0}{3}{4}'".format( + '' if i - 10 <= 0 else '...', + e, '' if i + 10 >= len(expected) else '...', + a, '' if i + 10 >= len(actual) else '...', ))