diff --git a/.travis.yml b/.travis.yml
index 2f2c722e..0eefe3a2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,9 +3,11 @@ language: python
 python:
     - 2.6
     - 2.7
+    - 3.4
+    - 3.5
 
 install:
-    - pip install -r requirements.txt --use-mirrors
+    - pip install jieba
     - python setup.py install
 
 script: python setup.py test
diff --git a/README.rst b/README.rst
index 5dc8ab0b..bf12c3dd 100644
--- a/README.rst
+++ b/README.rst
@@ -180,7 +180,7 @@ class.
 
 
 Goose in Korean
-----------------
+---------------
 
 In order to use Goose in Korean you have to use the StopWordsKorean
 class.
@@ -197,24 +197,6 @@ class.
     14년째 세계 각국의 통신·안전·전파 규격 시험과 인증 한 우물만 파고 있는 이 회사 박채규 대표가 만나기로 한 주인공이다. 
     그는 전기전자·무선통신·자동차 전장품 분야에
 
-
-Known issues
-------------
-
-- There are some issues with unicode URLs.
-- Cookie handling : Some websites need cookie handling. At the moment the only work around is to use the raw_html extraction. For instance:
-
-    >>> import urllib2
-    >>> import goose
-    >>> url = "http://www.nytimes.com/2013/08/18/world/middleeast/pressure-by-us-failed-to-sway-egypts-leaders.html?hp"
-    >>> opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())
-    >>> response = opener.open(url)
-    >>> raw_html = response.read()
-    >>> g = goose.Goose()
-    >>> a = g.extract(raw_html=raw_html)
-    >>> a.cleaned_text
-    u'CAIRO \u2014 For a moment, at least, American and European diplomats trying to defuse the volatile standoff in Egypt thought they had a breakthrough.\n\nAs t'
-
 TODO
 ----
 
diff --git a/goose/__init__.py b/goose/__init__.py
index 409b5732..d1cd6da8 100644
--- a/goose/__init__.py
+++ b/goose/__init__.py
@@ -21,7 +21,6 @@
 limitations under the License.
 """
 import os
-import platform
 from tempfile import mkstemp
 
 from goose.version import version_info, __version__
@@ -64,9 +63,12 @@ def crawl(self, crawl_candiate):
         try:
             crawler = Crawler(self.config)
             article = crawler.crawl(crawl_candiate)
-        except (UnicodeDecodeError, ValueError):
-            self.config.parser_class = parsers[0]
-            return self.crawl(crawl_candiate)
+        except (UnicodeDecodeError, ValueError) as e:
+            if parsers:
+                self.config.parser_class = parsers[0]
+                return self.crawl(crawl_candiate)
+            else:
+                raise e
         return article
 
     def initialize(self):
diff --git a/goose/cleaners.py b/goose/cleaners.py
index c1384ee0..2ad975d0 100644
--- a/goose/cleaners.py
+++ b/goose/cleaners.py
@@ -20,6 +20,8 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
+from __future__ import unicode_literals
+
 from goose.utils import ReplaceSequence
 
 
@@ -48,7 +50,7 @@ def __init__(self, config, article):
         "|communitypromo|runaroundLeft|subscribe|vcard|articleheadings"
         "|date|^print$|popup|author-dropdown|tools|socialtools|byline"
         "|konafilter|KonaFilter|breadcrumbs|^fn$|wp-caption-text"
-        "|legende|ajoutVideo|timestamp|js_replies"
+        "|legende|ajoutVideo|timestamp|js_replies|disclaim"
         )
         self.regexp_namespace = "http://exslt.org/regular-expressions"
         self.nauthy_ids_re = "//*[re:test(@id, '%s', 'i')]" % self.remove_nodes_re
@@ -66,8 +68,7 @@ def __init__(self, config, article):
                                             .append("\t")\
                                             .append("^\\s+$")
 
-    def clean(self):
-        doc_to_clean = self.article.doc
+    def clean(self, doc_to_clean):
         doc_to_clean = self.clean_body_classes(doc_to_clean)
         doc_to_clean = self.clean_article_tags(doc_to_clean)
         doc_to_clean = self.clean_em_tags(doc_to_clean)
diff --git a/goose/configuration.py b/goose/configuration.py
index fcfa5b9a..7d83a34f 100644
--- a/goose/configuration.py
+++ b/goose/configuration.py
@@ -22,6 +22,9 @@
 """
 import os
 import tempfile
+
+import six
+
 from goose.text import StopWords
 from goose.parsers import Parser
 from goose.parsers import ParserSoup
@@ -30,10 +33,20 @@
 HTTP_DEFAULT_TIMEOUT = 30
 
 AVAILABLE_PARSERS = {
-    'lxml': Parser,
-    'soup': ParserSoup,
+    'lxml': Parser
 }
 
+if six.PY2:
+    AVAILABLE_PARSERS['soup'] = ParserSoup
+
+KNOWN_ARTICLE_CONTENT_PATTERNS = [
+    {'attr': 'class', 'value': 'short-story'},
+    {'attr': 'itemprop', 'value': 'articleBody'},
+    {'attr': 'class', 'value': 'post-content'},
+    {'attr': 'class', 'value': 'g-content'},
+    {'tag': 'article'},
+]
+
 
 class Configuration(object):
 
@@ -99,6 +112,12 @@ def __init__(self):
         # http timeout
         self.http_timeout = HTTP_DEFAULT_TIMEOUT
 
+        # known context patterns. Goose at first will search context at dom nodes, qualifying these patterns
+        self.known_context_patterns = KNOWN_ARTICLE_CONTENT_PATTERNS
+
+        # Strict mode. Generate exceptions on errors instead of swallowing them
+        self.strict = True
+
     def get_parser(self):
         return AVAILABLE_PARSERS[self.parser_class]
 
diff --git a/goose/crawler.py b/goose/crawler.py
index 34daf048..e5713b57 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -39,7 +39,7 @@
 from goose.cleaners import StandardDocumentCleaner
 from goose.outputformatters import StandardOutputFormatter
 
-from goose.network import HtmlFetcher
+from goose.network import NetworkFetcher
 
 
 class CrawlCandidate(object):
@@ -99,11 +99,13 @@ def __init__(self, config):
         # title extractor
         self.title_extractor = self.get_title_extractor()
 
+        # html fetcher
+        self.fetcher = NetworkFetcher(self.config)
+
         # image extrator
         self.image_extractor = self.get_image_extractor()
 
-        # html fetcher
-        self.htmlfetcher = HtmlFetcher(self.config)
+
 
         # TODO : log prefix
         self.logPrefix = "crawler:"
@@ -161,7 +163,10 @@ def crawl(self, crawl_candidate):
             self.article.doc = article_body
 
         # before we do any calcs on the body itself let's clean up the document
-        self.article.doc = self.cleaner.clean()
+        if not isinstance(self.article.doc, list):
+            self.article.doc = [self.cleaner.clean(self.article.doc)]
+        else:
+            self.article.doc = list(map(lambda doc1: self.cleaner.clean(deepcopy(doc1)), self.article.doc))
 
         # big stuff
         self.article.top_node = self.extractor.calculate_best_node()
@@ -212,11 +217,7 @@ def get_html(self, crawl_candidate, parsing_candidate):
             return crawl_candidate.raw_html
 
         # fetch HTML
-        html = self.htmlfetcher.get_html(parsing_candidate.url)
-        self.article.additional_data.update({
-            'request': self.htmlfetcher.request,
-            'result': self.htmlfetcher.result,
-            })
+        html = self.fetcher.fetch(parsing_candidate.url)
         return html
 
     def get_metas_extractor(self):
@@ -244,7 +245,7 @@ def get_title_extractor(self):
         return TitleExtractor(self.config, self.article)
 
     def get_image_extractor(self):
-        return ImageExtractor(self.config, self.article)
+        return ImageExtractor(self.fetcher, self.config, self.article)
 
     def get_video_extractor(self):
         return VideoExtractor(self.config, self.article)
diff --git a/goose/exceptions.py b/goose/exceptions.py
new file mode 100644
index 00000000..b75f3183
--- /dev/null
+++ b/goose/exceptions.py
@@ -0,0 +1,3 @@
+from .network import NetworkError
+
+__all__ = ['NetworkError']
diff --git a/goose/extractors/content.py b/goose/extractors/content.py
index e0703d55..433ed0c9 100644
--- a/goose/extractors/content.py
+++ b/goose/extractors/content.py
@@ -25,13 +25,6 @@
 from goose.extractors import BaseExtractor
 
 
-KNOWN_ARTICLE_CONTENT_TAGS = [
-    {'attr': 'itemprop', 'value': 'articleBody'},
-    {'attr': 'class', 'value': 'post-content'},
-    {'tag': 'article'},
-]
-
-
 class ContentExtractor(BaseExtractor):
 
     def get_language(self):
@@ -47,16 +40,17 @@ def get_language(self):
         return self.config.target_language
 
     def get_known_article_tags(self):
-        for item in KNOWN_ARTICLE_CONTENT_TAGS:
-            nodes = self.parser.getElementsByTag(
-                            self.article.doc,
-                            **item)
-            if len(nodes):
-                return nodes[0]
+        nodes = []
+        for item in self.config.known_context_patterns:
+            nodes.extend(self.parser.getElementsByTag(
+                         self.article.doc,
+                         **item))
+        if len(nodes):
+            return nodes
         return None
 
     def is_articlebody(self, node):
-        for item in KNOWN_ARTICLE_CONTENT_TAGS:
+        for item in self.config.known_context_patterns:
             # attribute
             if "attr" in item and "value" in item:
                 if self.parser.getAttribute(node, item['attr']) == item['value']:
@@ -260,7 +254,7 @@ def update_score(self, node, addToScore):
         if score_string:
             current_score = int(score_string)
 
-        new_score = current_score + addToScore
+        new_score = current_score + int(addToScore)
         self.parser.setAttribute(node, "gravityScore", str(new_score))
 
     def update_node_count(self, node, add_to_count):
@@ -315,16 +309,17 @@ def get_node_gravity_score(self, node):
             return None
         return int(grvScoreString)
 
-    def nodes_to_check(self, doc):
+    def nodes_to_check(self, docs):
         """\
         returns a list of nodes we want to search
         on like paragraphs and tables
         """
         nodes_to_check = []
 
-        for tag in ['p', 'pre', 'td']:
-            items = self.parser.getElementsByTag(doc, tag=tag)
-            nodes_to_check += items
+        for doc in docs:
+            for tag in ['p', 'pre', 'td']:
+                items = self.parser.getElementsByTag(doc, tag=tag)
+                nodes_to_check += items
         return nodes_to_check
 
     def is_table_and_no_para_exist(self, e):
diff --git a/goose/extractors/images.py b/goose/extractors/images.py
index 3af44f5f..ebaf6935 100644
--- a/goose/extractors/images.py
+++ b/goose/extractors/images.py
@@ -23,7 +23,7 @@
 import re
 import os
 
-from urlparse import urlparse, urljoin
+from six.moves.urllib.parse import urlparse, urljoin
 
 from goose.extractors import BaseExtractor
 from goose.image import Image
@@ -48,9 +48,10 @@ def __init__(self, node, parent_depth, sibling_depth):
 
 class ImageExtractor(BaseExtractor):
 
-    def __init__(self, config, article):
+    def __init__(self, fetcher, config, article):
         super(ImageExtractor, self).__init__(config, article)
 
+        self.fetcher = fetcher
         self.custom_site_mapping = {}
 
         self.load_customesite_mapping()
@@ -333,9 +334,7 @@ def get_local_image(self, src):
         """\
         returns the bytes of the image file on disk
         """
-        local_image = ImageUtils.store_image(None,
-                                    self.link_hash, src, self.config)
-        return local_image
+        return ImageUtils.store_image(self.fetcher, self.link_hash, src, self.config)
 
     def get_clean_domain(self):
         if self.article.domain:
diff --git a/goose/extractors/metas.py b/goose/extractors/metas.py
index 95acadd5..5a65aa16 100644
--- a/goose/extractors/metas.py
+++ b/goose/extractors/metas.py
@@ -22,8 +22,8 @@
 """
 
 import re
-from urlparse import urljoin
-from urlparse import urlparse
+
+from six.moves.urllib.parse import urlparse, urljoin
 
 from goose.extractors import BaseExtractor
 
diff --git a/goose/image.py b/goose/image.py
index 351e3396..58ddd021 100644
--- a/goose/image.py
+++ b/goose/image.py
@@ -46,7 +46,7 @@ def __init__(self):
         self.extraction_type = "NA"
 
         # stores how many bytes this image is.
-        self.bytes = long(0)
+        self.bytes = 0
 
     def get_src(self):
         return self.src
@@ -87,7 +87,7 @@ def set_mime_type(self, mime_type):
 class LocallyStoredImage(object):
 
     def __init__(self, src='', local_filename='',
-        link_hash='', bytes=long(0), file_extension='', height=0, width=0):
+                 link_hash='', bytes=0, file_extension='', height=0, width=0):
         self.src = src
         self.local_filename = local_filename
         self.link_hash = link_hash
diff --git a/goose/network.py b/goose/network.py
index 666a7d61..2aca4873 100644
--- a/goose/network.py
+++ b/goose/network.py
@@ -20,41 +20,41 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import urllib2
+import six
+import requests
 
 
-class HtmlFetcher(object):
+class NetworkError(RuntimeError):
+    def __init__(self, status_code, reason):
+        self.reason = reason
+        self.status_code = status_code
+
+
+class NetworkFetcher(object):
 
     def __init__(self, config):
         self.config = config
-        # set header
-        self.headers = {'User-agent': self.config.browser_user_agent}
+        self._connection = requests.Session()
+        self._connection.headers['User-agent'] = self.config.browser_user_agent
+
+        self._url = None
 
     def get_url(self):
-        # if we have a result
-        # get the final_url
-        if self.result is not None:
-            return self.result.geturl()
-        return None
+        return self._url
 
-    def get_html(self, url):
+    def fetch(self, url):
         # utf-8 encode unicode url
-        if isinstance(url, unicode):
+        if isinstance(url, six.text_type) and six.PY2:
             url = url.encode('utf-8')
 
-        # set request
-        self.request = urllib2.Request(
-                        url,
-                        headers=self.headers)
-        # do request
-        try:
-            self.result = urllib2.urlopen(
-                            self.request,
-                            timeout=self.config.http_timeout)
-        except Exception:
-            self.result = None
-
-        # read the result content
-        if self.result is not None:
-            return self.result.read()
-        return None
+        response = self._connection.get(url, timeout=self.config.http_timeout)
+        if response.ok:
+            self._url = response.url
+            text = response.content
+        else:
+            self._url = None
+            text = None
+            if self.config.strict:
+                raise NetworkError(response.status_code, response.reason)
+
+        return text
diff --git a/goose/outputformatters.py b/goose/outputformatters.py
index 1f8ba4bd..d2cb5019 100644
--- a/goose/outputformatters.py
+++ b/goose/outputformatters.py
@@ -20,7 +20,8 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-from HTMLParser import HTMLParser
+from six.moves.html_parser import HTMLParser
+
 from goose.text import innerTrim
 
 
@@ -66,12 +67,14 @@ def get_formatted_text(self):
         self.remove_fewwords_paragraphs()
         return self.convert_to_text()
 
+    _text_parser = HTMLParser()
+
     def convert_to_text(self):
         txts = []
         for node in list(self.get_top_node()):
             txt = self.parser.getText(node)
             if txt:
-                txt = HTMLParser().unescape(txt)
+                txt = self._text_parser.unescape(txt)
                 txt_lis = innerTrim(txt).split(r'\n')
                 txts.extend(txt_lis)
         return '\n\n'.join(txts)
diff --git a/goose/parsers.py b/goose/parsers.py
index a43e9b47..fab3eb31 100644
--- a/goose/parsers.py
+++ b/goose/parsers.py
@@ -21,11 +21,12 @@
 limitations under the License.
 """
 import lxml.html
-from lxml.html import soupparser
+
+import six
+
 from lxml import etree
 from copy import deepcopy
-from goose.text import innerTrim
-from goose.text import encodeValue
+from goose.text import innerTrim, encodeValue, get_encodings_from_content, smart_str
 
 
 class Parser(object):
@@ -50,13 +51,20 @@ def css_select(self, node, selector):
 
     @classmethod
     def fromstring(self, html):
-        html = encodeValue(html)
-        self.doc = lxml.html.fromstring(html)
+        encoding = get_encodings_from_content(html)
+        encoding = encoding and encoding[0] or None
+        if not encoding:
+            html = encodeValue(html)
+            self.doc = lxml.html.fromstring(html)
+        else:
+            html = smart_str(html, encoding=encoding)
+            parser = lxml.html.HTMLParser(encoding=encoding)
+            self.doc = lxml.html.fromstring(html, parser=parser)
         return self.doc
 
     @classmethod
     def nodeToString(self, node):
-        return etree.tostring(node)
+        return etree.tostring(node, encoding=six.text_type)
 
     @classmethod
     def replaceTag(self, node, tag):
@@ -239,6 +247,7 @@ class ParserSoup(Parser):
 
     @classmethod
     def fromstring(self, html):
+        from lxml.html import soupparser
         html = encodeValue(html)
         self.doc = soupparser.fromstring(html)
         return self.doc
diff --git a/goose/text.py b/goose/text.py
index 3ef63d6b..3d67f5fb 100644
--- a/goose/text.py
+++ b/goose/text.py
@@ -23,16 +23,54 @@
 import os
 import re
 import string
+
+import six
+
 from goose.utils import FileHelper
 from goose.utils.encoding import smart_unicode
 from goose.utils.encoding import smart_str
 from goose.utils.encoding import DjangoUnicodeDecodeError
 
+SPACE_SYMBOLS = re.compile(r'[\s\xa0\t]')
 TABSSPACE = re.compile(r'[\s\t]+')
 
 
+def get_encodings_from_content(content):
+    """
+    Code from:
+    https://github.com/sigmavirus24/requests-toolbelt/blob/master/requests_toolbelt/utils/deprecated.py
+    Return encodings from given content string.
+    :param content: string to extract encodings from.
+    """
+    if isinstance(content, six.binary_type) and six.PY3:
+        find_charset = re.compile(
+            br'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I
+        ).findall
+
+        find_pragma = re.compile(
+            br'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I
+        ).findall
+
+        find_xml = re.compile(
+            br'^<\?xml.*?encoding=["\']*(.+?)["\'>]'
+        ).findall
+    else:
+        find_charset = re.compile(
+            r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I
+        ).findall
+
+        find_pragma = re.compile(
+            r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I
+        ).findall
+
+        find_xml = re.compile(
+            r'^<\?xml.*?encoding=["\']*(.+?)["\'>]'
+        ).findall
+    return find_charset(content) + find_pragma(content) + find_xml(content)
+
+
 def innerTrim(value):
-    if isinstance(value, (unicode, str)):
+    if isinstance(value, (six.text_type, six.string_types)):
         # remove tab and white space
         value = re.sub(TABSSPACE, ' ', value)
         value = ''.join(value.splitlines())
@@ -87,7 +125,6 @@ def set_word_count(self, cnt):
 class StopWords(object):
 
     PUNCTUATION = re.compile("[^\\p{Ll}\\p{Lu}\\p{Lt}\\p{Lo}\\p{Nd}\\p{Pc}\\s]")
-    TRANS_TABLE = string.maketrans('', '')
     _cached_stop_words = {}
 
     def __init__(self, language='en'):
@@ -106,12 +143,13 @@ def __init__(self, language='en'):
     def remove_punctuation(self, content):
         # code taken form
         # http://stackoverflow.com/questions/265960/best-way-to-strip-punctuation-from-a-string-in-python
-        if isinstance(content, unicode):
-            content = content.encode('utf-8')
-        return content.translate(self.TRANS_TABLE, string.punctuation)
+        if not isinstance(content, six.text_type):
+            content = content.decode('utf-8')
+        tbl = dict.fromkeys(ord(x) for x in string.punctuation)
+        return content.translate(tbl)
 
     def candiate_words(self, stripped_input):
-        return stripped_input.split(' ')
+        return re.split(SPACE_SYMBOLS, stripped_input)
 
     def get_stopword_count(self, content):
         if not content:
diff --git a/goose/utils/__init__.py b/goose/utils/__init__.py
index 5a1de7d4..41cf9c95 100644
--- a/goose/utils/__init__.py
+++ b/goose/utils/__init__.py
@@ -26,7 +26,13 @@
 import os
 import goose
 import codecs
-import urlparse
+
+import six
+
+try:
+    from urlparse import urlparse
+except ImportError:
+    from urllib.parse import urlparse
 
 
 class BuildURL(object):
@@ -89,7 +95,7 @@ def __init__(self, urlString, link_hash):
 class RawHelper(object):
     @classmethod
     def get_parsing_candidate(self, url, raw_html):
-        if isinstance(raw_html, unicode):
+        if isinstance(raw_html, six.text_type):
             raw_html = raw_html.encode('utf-8')
         link_hash = '%s.%s' % (hashlib.md5(raw_html).hexdigest(), time.time())
         return ParsingCandidate(url, link_hash)
@@ -101,7 +107,8 @@ def get_parsing_candidate(self, url_to_crawl):
         # replace shebang is urls
         final_url = url_to_crawl.replace('#!', '?_escaped_fragment_=') \
                     if '#!' in url_to_crawl else url_to_crawl
-        link_hash = '%s.%s' % (hashlib.md5(final_url).hexdigest(), time.time())
+        url = final_url.encode("utf-8") if isinstance(final_url, six.text_type) else final_url
+        link_hash = '%s.%s' % (hashlib.md5(url).hexdigest(), time.time())
         return ParsingCandidate(final_url, link_hash)
 
 
diff --git a/goose/utils/encoding.py b/goose/utils/encoding.py
index 4dc23ca7..f94f476e 100644
--- a/goose/utils/encoding.py
+++ b/goose/utils/encoding.py
@@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
-import types
 import datetime
+
+import six
+
 from decimal import Decimal
 
 
@@ -45,8 +47,8 @@ def is_protected_type(obj):
     force_unicode(strings_only=True).
     """
     return isinstance(obj, (
-        types.NoneType,
-        int, long,
+        type(None),
+        six.integer_types,
         datetime.datetime, datetime.date, datetime.time,
         float, Decimal)
     )
@@ -62,17 +64,17 @@ def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
     # Handle the common case first, saves 30-40% in performance when s
     # is an instance of unicode. This function gets called often in that
     # setting.
-    if isinstance(s, unicode):
+    if isinstance(s, six.text_type):
         return s
     if strings_only and is_protected_type(s):
         return s
     try:
-        if not isinstance(s, basestring,):
+        if not isinstance(s, six.string_types,):
             if hasattr(s, '__unicode__'):
-                s = unicode(s)
+                s = s.__unicode__()
             else:
                 try:
-                    s = unicode(str(s), encoding, errors)
+                    s = six.text_type(s, encoding, errors)
                 except UnicodeEncodeError:
                     if not isinstance(s, Exception):
                         raise
@@ -84,12 +86,12 @@ def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
                     # output should be.
                     s = u' '.join([force_unicode(arg, encoding, strings_only,
                             errors) for arg in s])
-        elif not isinstance(s, unicode):
+        elif not isinstance(s, six.text_type):
             # Note: We use .decode() here, instead of unicode(s, encoding,
             # errors), so that if s is a SafeString, it ends up being a
             # SafeUnicode at the end.
             s = s.decode(encoding, errors)
-    except UnicodeDecodeError, e:
+    except UnicodeDecodeError as e:
         if not isinstance(s, Exception):
             raise DjangoUnicodeDecodeError(s, *e.args)
         else:
@@ -109,13 +111,17 @@ def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
 
     If strings_only is True, don't convert (some) non-string-like objects.
     """
-    if strings_only and isinstance(s, (types.NoneType, int)):
+    if strings_only and isinstance(s, (type(None), int)):
         return s
     # if isinstance(s, Promise):
     #     return unicode(s).encode(encoding, errors)
-    if not isinstance(s, basestring):
+    if isinstance(s, six.text_type):
+        return s.encode(encoding, errors)
+    elif not isinstance(s, six.binary_type):
         try:
-            return str(s)
+            if six.PY2:
+                return str(s)
+            return str(s).encode(encoding, errors)
         except UnicodeEncodeError:
             if isinstance(s, Exception):
                 # An Exception subclass containing non-ASCII data that doesn't
@@ -123,10 +129,6 @@ def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
                 # further exception.
                 return ' '.join([smart_str(arg, encoding, strings_only,
                         errors) for arg in s])
-            return unicode(s).encode(encoding, errors)
-    elif isinstance(s, unicode):
-        return s.encode(encoding, errors)
-    elif s and encoding != 'utf-8':
-        return s.decode('utf-8', errors).encode(encoding, errors)
+            return six.text_type(s).encode(encoding, errors)
     else:
         return s
diff --git a/goose/utils/images.py b/goose/utils/images.py
index 388d5c85..9c12a1f8 100644
--- a/goose/utils/images.py
+++ b/goose/utils/images.py
@@ -22,8 +22,9 @@
 """
 import hashlib
 import os
-import urllib2
+
 from PIL import Image
+
 from goose.utils.encoding import smart_str
 from goose.image import ImageDetails
 from goose.image import LocallyStoredImage
@@ -35,9 +36,9 @@ class ImageUtils(object):
     def get_image_dimensions(self, identify_program, path):
         image_details = ImageDetails()
         try:
-            image = Image.open(path)
-            image_details.set_mime_type(image.format)
-            width, height = image.size
+            with Image.open(path) as image:
+                image_details.set_mime_type(image.format)
+                width, height = image.size
             image_details.set_width(width)
             image_details.set_height(height)
         except IOError:
@@ -115,9 +116,6 @@ def clean_src_string(self, src):
     @classmethod
     def fetch(self, http_client, src):
         try:
-            req = urllib2.Request(src)
-            f = urllib2.urlopen(req)
-            data = f.read()
-            return data
+            return http_client.fetch(src)
         except Exception:
             return None
diff --git a/goose/version.py b/goose/version.py
index fedcbb6d..4f2a84c1 100644
--- a/goose/version.py
+++ b/goose/version.py
@@ -21,5 +21,5 @@
 limitations under the License.
 """
 
-version_info = (1, 0, 25)
+version_info = (1, 0, 29)
 __version__ = ".".join(map(str, version_info))
diff --git a/goose/video.py b/goose/video.py
index 8509bba0..0691ac96 100644
--- a/goose/video.py
+++ b/goose/video.py
@@ -21,6 +21,7 @@
 limitations under the License.
 """
 
+
 class Video(object):
     """\
     Video object
diff --git a/requirements.txt b/requirements.txt
index 7e6a6c09..8d153935 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,8 @@
+requests
 Pillow
 lxml
 cssselect
 jieba
-beautifulsoup
+beautifulsoup  # Only on python2
 nltk
+six
diff --git a/setup.py b/setup.py
index ebad2547..c4d1fabf 100644
--- a/setup.py
+++ b/setup.py
@@ -22,6 +22,8 @@
 """
 
 import os
+import sys
+
 from setuptools import setup, find_packages
 from imp import load_source
 
@@ -40,6 +42,9 @@
     'Programming Language :: Python :: 2',
     'Programming Language :: Python :: 2.6',
     'Programming Language :: Python :: 2.7',
+    'Programming Language :: Python :: 3',
+    'Programming Language :: Python :: 3.4',
+    'Programming Language :: Python :: 3.5',
     'Topic :: Internet',
     'Topic :: Utilities',
     'Topic :: Software Development :: Libraries :: Python Modules']
@@ -53,19 +58,27 @@
 except Exception:
     long_description = description
 
+requirements = ['Pillow', 'lxml', 'cssselect', 'jieba', 'nltk', 'six', 'requests']
+test_requirements = ['requests_mock']
+if sys.version_info[0] == 2:
+    requirements.append('beautifulsoup')
+    if sys.version_info[1] < 7:
+        test_requirements.append('unittest2')
+
 setup(name='goose-extractor',
-    version=version.__version__,
-    description=description,
-    long_description=long_description,
-    keywords='scrapping, extractor, web scrapping',
-    classifiers=CLASSIFIERS,
-    author='Xavier Grangier',
-    author_email='grangier@gmail.com',
-    url='https://github.com/grangier/python-goose',
-    license='Apache',
-    packages=find_packages(),
-    include_package_data=True,
-    zip_safe=False,
-    install_requires=['Pillow', 'lxml', 'cssselect', 'jieba', 'beautifulsoup', 'nltk'],
-    test_suite="tests"
-)
+      version=version.__version__,
+      description=description,
+      long_description=long_description,
+      keywords='scrapping, extractor, web scrapping',
+      classifiers=CLASSIFIERS,
+      author='Xavier Grangier',
+      author_email='grangier@gmail.com',
+      url='https://github.com/grangier/python-goose',
+      license='Apache',
+      packages=find_packages(),
+      include_package_data=True,
+      zip_safe=False,
+      install_requires=requirements,
+      test_suite="tests",
+      tests_require=test_requirements
+      )
diff --git a/tests/extractors/authors.py b/tests/extractors/authors.py
index 709040c1..a21d362e 100644
--- a/tests/extractors/authors.py
+++ b/tests/extractors/authors.py
@@ -21,12 +21,26 @@
 limitations under the License.
 """
 
-from base import TestExtractionBase
+from __future__ import absolute_import
+
+from .base import TestExtractionBase
 
 
 class TestArticleAuthor(TestExtractionBase):
 
     def test_author_schema(self):
         article = self.getArticle()
-        fields = ['authors']
-        self.runArticleAssertions(article=article, fields=fields)
+        field = 'authors'
+
+        # Do not call self.runArticleAssertions because need to sort results,
+        # because set not save ordering, so test failed;
+
+        expected_value = self.data['expected'][field]
+        result_value = getattr(article, field, None)
+
+        expected_value.sort()
+        result_value.sort()
+
+        # default assertion
+        msg = u"Error %s \nexpected: %s\nresult: %s" % (field, expected_value, result_value)
+        self.assertEqual(expected_value, result_value, msg=msg)
diff --git a/tests/extractors/base.py b/tests/extractors/base.py
index e19d20e0..cdf6cb32 100644
--- a/tests/extractors/base.py
+++ b/tests/extractors/base.py
@@ -22,11 +22,14 @@
 """
 import os
 import json
-import urllib2
 import unittest
 import socket
+import requests_mock
 
-from StringIO import StringIO
+try:
+    import urllib2
+except ImportError:
+    import urllib.request as urllib2
 
 from goose import Goose
 from goose.utils import FileHelper
@@ -37,7 +40,7 @@
 
 
 # Response
-class MockResponse():
+class MockResponse:
     """\
     Base mock response class
     """
@@ -47,45 +50,8 @@ class MockResponse():
     def __init__(self, cls):
         self.cls = cls
 
-    def content(self):
-        return "response"
-
-    def response(self, req):
-        data = self.content(req)
-        url = req.get_full_url()
-        resp = urllib2.addinfourl(StringIO(data), data, url)
-        resp.code = self.code
-        resp.msg = self.msg
-        return resp
-
-
-class MockHTTPHandler(urllib2.HTTPHandler, urllib2.HTTPSHandler):
-    """\
-    Mocked HTTPHandler in order to query APIs locally
-    """
-    cls = None
-
-    def https_open(self, req):
-        return self.http_open(req)
-
-    def http_open(self, req):
-        r = self.cls.callback(self.cls)
-        return r.response(req)
-
-    @staticmethod
-    def patch(cls):
-        opener = urllib2.build_opener(MockHTTPHandler)
-        urllib2.install_opener(opener)
-        # dirty !
-        for h in opener.handlers:
-            if isinstance(h, MockHTTPHandler):
-                h.cls = cls
-        return [h for h in opener.handlers if isinstance(h, MockHTTPHandler)][0]
-
-    @staticmethod
-    def unpatch():
-        # urllib2
-        urllib2._opener = None
+    def contents(self):
+        pass
 
 
 class BaseMockTests(unittest.TestCase):
@@ -98,10 +64,8 @@ def setUp(self):
         # patch DNS
         self.original_getaddrinfo = socket.getaddrinfo
         socket.getaddrinfo = self.new_getaddrinfo
-        MockHTTPHandler.patch(self)
 
     def tearDown(self):
-        MockHTTPHandler.unpatch()
         # DNS
         socket.getaddrinfo = self.original_getaddrinfo
 
@@ -113,7 +77,7 @@ def _get_current_testname(self):
 
 
 class MockResponseExtractors(MockResponse):
-    def content(self, req):
+    def contents(self):
         test, suite, module, cls, func = self.cls.id().split('.')
         path = os.path.join(
                 os.path.dirname(CURRENT_PATH),
@@ -123,7 +87,7 @@ def content(self, req):
                 "%s.html" % func)
         path = os.path.abspath(path)
         content = FileHelper.loadResourceFile(path)
-        return content
+        yield self.cls.data['url'], content.encode('utf-8')
 
 
 class TestExtractionBase(BaseMockTests):
@@ -132,6 +96,14 @@ class TestExtractionBase(BaseMockTests):
     """
     callback = MockResponseExtractors
 
+    def setUp(self):
+        # patch DNS
+        self.original_getaddrinfo = socket.getaddrinfo
+        socket.getaddrinfo = self.new_getaddrinfo
+
+    def tearDown(self):
+        socket.getaddrinfo = self.original_getaddrinfo
+
     def getRawHtml(self):
         test, suite, module, cls, func = self.id().split('.')
         path = os.path.join(
@@ -203,8 +175,12 @@ def runArticleAssertions(self, article, fields):
             self.assertEqual(expected_value, result_value, msg=msg)
 
     def extract(self, instance):
-        article = instance.extract(url=self.data['url'])
-        return article
+        article_url = self.data['url']
+        with requests_mock.mock() as m:
+            for url, content in self.callback(self).contents():
+                m.get(url, content=content)
+            article = instance.extract(url=article_url)
+            return article
 
     def getConfig(self):
         config = Configuration()
diff --git a/tests/extractors/content.py b/tests/extractors/content.py
index 30dc2754..854c4bd1 100644
--- a/tests/extractors/content.py
+++ b/tests/extractors/content.py
@@ -20,7 +20,9 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-from base import TestExtractionBase
+from __future__ import absolute_import
+
+from .base import TestExtractionBase
 
 from goose.text import StopWordsChinese
 from goose.text import StopWordsArabic
diff --git a/tests/extractors/images.py b/tests/extractors/images.py
index e47a1dde..9c089fe2 100644
--- a/tests/extractors/images.py
+++ b/tests/extractors/images.py
@@ -20,13 +20,15 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
+from __future__ import absolute_import
+
 import os
 import json
 import hashlib
 import unittest
 
-from base import MockResponse
-from base import TestExtractionBase
+from .base import MockResponse
+from .base import TestExtractionBase
 
 from goose.configuration import Configuration
 from goose.image import Image
@@ -40,8 +42,8 @@
 
 class MockResponseImage(MockResponse):
 
-    def image_content(self, req):
-        md5_hash = hashlib.md5(req.get_full_url()).hexdigest()
+    def image_content(self, url):
+        md5_hash = hashlib.md5(url.encode('utf-8')).hexdigest()
         current_test = self.cls._get_current_testname()
         path = os.path.join(
                 os.path.dirname(CURRENT_PATH),
@@ -51,12 +53,15 @@ def image_content(self, req):
                 current_test,
                 md5_hash)
         path = os.path.abspath(path)
-        f = open(path, 'rb')
-        content = f.read()
-        f.close()
-        return content
-
-    def html_content(self, req):
+        try:
+            f = open(path, 'rb')
+            content = f.read()
+            f.close()
+            return content
+        except Exception:
+            return None
+
+    def html_content(self):
         current_test = self.cls._get_current_testname()
         path = os.path.join(
                 os.path.dirname(CURRENT_PATH),
@@ -66,12 +71,14 @@ def html_content(self, req):
                 current_test,
                 "%s.html" % current_test)
         path = os.path.abspath(path)
-        return FileHelper.loadResourceFile(path)
-
-    def content(self, req):
-        if self.cls.data['url'] == req.get_full_url():
-            return self.html_content(req)
-        return self.image_content(req)
+        return FileHelper.loadResourceFile(path).encode('utf-8')
+
+    def contents(self):
+        yield self.cls.data['url'], self.html_content()
+        img_url = self.cls.data['expected']['top_image']['src']
+        if img_url:
+            yield img_url, self.image_content(img_url)
+        # self.image_content()
 
 
 class ImageExtractionTests(TestExtractionBase):
diff --git a/tests/extractors/links.py b/tests/extractors/links.py
index 8539465e..ea15a459 100644
--- a/tests/extractors/links.py
+++ b/tests/extractors/links.py
@@ -21,7 +21,9 @@
 limitations under the License.
 """
 
-from base import TestExtractionBase
+from __future__ import absolute_import
+
+from .base import TestExtractionBase
 
 
 class TestArticleLinks(TestExtractionBase):
diff --git a/tests/extractors/metas.py b/tests/extractors/metas.py
index fd45915a..a4eef74c 100644
--- a/tests/extractors/metas.py
+++ b/tests/extractors/metas.py
@@ -21,7 +21,9 @@
 limitations under the License.
 """
 
-from base import TestExtractionBase
+from __future__ import absolute_import
+
+from .base import TestExtractionBase
 
 
 class TestMetas(TestExtractionBase):
diff --git a/tests/extractors/opengraph.py b/tests/extractors/opengraph.py
index 415a784c..a0616227 100644
--- a/tests/extractors/opengraph.py
+++ b/tests/extractors/opengraph.py
@@ -21,7 +21,9 @@
 limitations under the License.
 """
 
-from base import TestExtractionBase
+from __future__ import absolute_import
+
+from .base import TestExtractionBase
 
 
 class TestOpenGraph(TestExtractionBase):
diff --git a/tests/extractors/publishdate.py b/tests/extractors/publishdate.py
index 8d2a13b9..355250d5 100644
--- a/tests/extractors/publishdate.py
+++ b/tests/extractors/publishdate.py
@@ -21,7 +21,9 @@
 limitations under the License.
 """
 
-from base import TestExtractionBase
+from __future__ import absolute_import
+
+from .base import TestExtractionBase
 
 
 class TestPublishDate(TestExtractionBase):
diff --git a/tests/extractors/tags.py b/tests/extractors/tags.py
index 22b17129..2f5562ba 100644
--- a/tests/extractors/tags.py
+++ b/tests/extractors/tags.py
@@ -21,7 +21,9 @@
 limitations under the License.
 """
 
-from base import TestExtractionBase
+from __future__ import absolute_import
+
+from .base import TestExtractionBase
 
 
 class TestArticleTags(TestExtractionBase):
diff --git a/tests/extractors/title.py b/tests/extractors/title.py
index 09170205..c6f7813c 100644
--- a/tests/extractors/title.py
+++ b/tests/extractors/title.py
@@ -21,7 +21,9 @@
 limitations under the License.
 """
 
-from base import TestExtractionBase
+from __future__ import absolute_import
+
+from .base import TestExtractionBase
 
 
 class TestTitle(TestExtractionBase):
diff --git a/tests/extractors/tweets.py b/tests/extractors/tweets.py
index 50300f43..3f72a604 100644
--- a/tests/extractors/tweets.py
+++ b/tests/extractors/tweets.py
@@ -20,8 +20,9 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
+from __future__ import absolute_import
 
-from base import TestExtractionBase
+from .base import TestExtractionBase
 
 
 class TestArticleTweet(TestExtractionBase):
diff --git a/tests/extractors/videos.py b/tests/extractors/videos.py
index 10be15ff..0350c8c3 100644
--- a/tests/extractors/videos.py
+++ b/tests/extractors/videos.py
@@ -20,7 +20,9 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-from base import TestExtractionBase
+from __future__ import absolute_import
+
+from .base import TestExtractionBase
 
 
 class ImageExtractionTests(TestExtractionBase):
diff --git a/tests/parsers.py b/tests/parsers.py
index 6614368d..6e5e1986 100644
--- a/tests/parsers.py
+++ b/tests/parsers.py
@@ -21,7 +21,12 @@
 limitations under the License.
 """
 import os
-import unittest
+try:
+    import unittest2 as unittest  # Need to support skipIf in python 2.6
+except ImportError:
+    import unittest
+
+import six
 
 from goose.utils import FileHelper
 from goose.parsers import Parser
@@ -254,11 +259,28 @@ def test_delAttribute(self):
         # remove an unexistant attribute
         self.parser.delAttribute(div,  attr="bla")
 
+    def test_encoding(self):
+        """
+        If pass unicode string to lxml.html.fromstring with encoding set in document will receive:
+        "ValueError: Unicode strings with encoding declaration are not supported.
+        Please use bytes input or XML fragments without declaration."
+        Test for this case.
+        """
+        html = u"""
+        <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+        """
+        html += u'<html><body>'
+        html += u'<p>Я рядочок</p>'
+        html += u'</body></html>'
+        self.parser.fromstring(html)
+
 
 class TestParser(ParserBase):
     pass
 
 
 class TestParserSoup(ParserBase):
+
+    @unittest.skipIf(six.PY3, "supported only in python2")
     def setUp(self):
         self.parser = ParserSoup