diff --git a/goose/network.py b/goose/network.py index 666a7d61..740c8b04 100644 --- a/goose/network.py +++ b/goose/network.py @@ -21,6 +21,7 @@ limitations under the License. """ import urllib2 +import zlib class HtmlFetcher(object): @@ -56,5 +57,7 @@ def get_html(self, url): # read the result content if self.result is not None: + if self.result.info().get('Content-Encoding') == 'gzip': + return zlib.decompress(self.result.read(), zlib.MAX_WBITS | 16) return self.result.read() return None diff --git a/tests/extractors/base.py b/tests/extractors/base.py index e19d20e0..94279547 100644 --- a/tests/extractors/base.py +++ b/tests/extractors/base.py @@ -25,6 +25,7 @@ import urllib2 import unittest import socket +import mimetools from StringIO import StringIO @@ -53,7 +54,8 @@ def content(self): def response(self, req): data = self.content(req) url = req.get_full_url() - resp = urllib2.addinfourl(StringIO(data), data, url) + headers = mimetools.Message(StringIO('')) + resp = urllib2.addinfourl(StringIO(data), headers, url) resp.code = self.code resp.msg = self.msg return resp