From be14e786297221c80062f13b5443f65aeeaec250 Mon Sep 17 00:00:00 2001 From: stkao05 Date: Tue, 31 Mar 2015 00:58:57 +0800 Subject: [PATCH] Fix unescape() error when a invalid html entity is given --- html2text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/html2text.py b/html2text.py index 17528901..499a35d4 100755 --- a/html2text.py +++ b/html2text.py @@ -710,7 +710,7 @@ def replaceEntities(self, s): return self.charref(s[1:]) else: return self.entityref(s) - r_unescape = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));") + r_unescape = re.compile(r"&((?:#[0-9]+|#[xX][0-9a-fA-F]+|\w{1,8}));") def unescape(self, s): return self.r_unescape.sub(self.replaceEntities, s)