diff --git a/goose/__init__.py b/goose/__init__.py index 409b5732..dc8104d0 100644 --- a/goose/__init__.py +++ b/goose/__init__.py @@ -60,13 +60,16 @@ def shutdown_network(self): def crawl(self, crawl_candiate): parsers = list(self.config.available_parsers) - parsers.remove(self.config.parser_class) - try: - crawler = Crawler(self.config) - article = crawler.crawl(crawl_candiate) - except (UnicodeDecodeError, ValueError): - self.config.parser_class = parsers[0] - return self.crawl(crawl_candiate) + article = None + for parser in parsers: + try: + crawler = Crawler(self.config) + article = crawler.crawl(crawl_candiate) + + break + except (UnicodeDecodeError, ValueError): + self.config.parser_class = parser + continue return article def initialize(self):