From 3977993b8bc8a846df1a5e5fe792d6f242009b03 Mon Sep 17 00:00:00 2001 From: chris Date: Tue, 11 Aug 2015 12:42:37 +0200 Subject: [PATCH 1/2] add gzip deflation to HtmlFetcher --- goose/network.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/goose/network.py b/goose/network.py index 666a7d61..740c8b04 100644 --- a/goose/network.py +++ b/goose/network.py @@ -21,6 +21,7 @@ limitations under the License. """ import urllib2 +import zlib class HtmlFetcher(object): @@ -56,5 +57,7 @@ def get_html(self, url): # read the result content if self.result is not None: + if self.result.info().get('Content-Encoding') == 'gzip': + return zlib.decompress(self.result.read(), zlib.MAX_WBITS | 16) return self.result.read() return None From b975ca0c498f56f8b4b04731f4ac089f644c393f Mon Sep 17 00:00:00 2001 From: chris Date: Wed, 12 Aug 2015 16:47:06 +0200 Subject: [PATCH 2/2] give mock response Message object headers, according to spec --- tests/extractors/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/extractors/base.py b/tests/extractors/base.py index e19d20e0..94279547 100644 --- a/tests/extractors/base.py +++ b/tests/extractors/base.py @@ -25,6 +25,7 @@ import urllib2 import unittest import socket +import mimetools from StringIO import StringIO @@ -53,7 +54,8 @@ def content(self): def response(self, req): data = self.content(req) url = req.get_full_url() - resp = urllib2.addinfourl(StringIO(data), data, url) + headers = mimetools.Message(StringIO('')) + resp = urllib2.addinfourl(StringIO(data), headers, url) resp.code = self.code resp.msg = self.msg return resp