diff --git a/setup.py b/setup.py index 0a563d3..8944ff0 100755 --- a/setup.py +++ b/setup.py @@ -51,4 +51,5 @@ def read(filename): irclogsearch = irclog2html.irclogsearch:main """, zip_safe=False, + install_requires=['chardet'], ) diff --git a/src/irclog2html/irclog2html.py b/src/irclog2html/irclog2html.py index a40cb4e..13919f9 100755 --- a/src/irclog2html/irclog2html.py +++ b/src/irclog2html/irclog2html.py @@ -11,7 +11,7 @@ was written by Jeff Waugh and is available at www.perkypants.org """ -# Copyright (c) 2005--2013, Marius Gedminas +# Copyright (c) 2005--2013, Marius Gedminas # Copyright (c) 2000, Jeffrey W. Waugh # Python port: @@ -54,6 +54,7 @@ import shlex import shutil import sys +import chardet try: from urllib import quote @@ -138,10 +139,9 @@ def decode(self, s): if isinstance(s, unicode): # Accept input that's already Unicode, for convenience return s - try: - return s.decode('UTF-8') - except UnicodeError: - return s.decode('cp1252', 'replace') + else: + charset = chardet.detect(s)['encoding'] + return s.decode(charset, 'replace') def __iter__(self): for line in self.infile: