From f593b6269bc23034451e2f979d968c54a05638fc Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Wed, 17 May 2017 07:03:25 -0500 Subject: [PATCH] Fix #191: infinite recursion on some pages Changed to iterative algorithm --- goose/__init__.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/goose/__init__.py b/goose/__init__.py index 409b5732..dc8104d0 100644 --- a/goose/__init__.py +++ b/goose/__init__.py @@ -60,13 +60,16 @@ def shutdown_network(self): def crawl(self, crawl_candiate): parsers = list(self.config.available_parsers) - parsers.remove(self.config.parser_class) - try: - crawler = Crawler(self.config) - article = crawler.crawl(crawl_candiate) - except (UnicodeDecodeError, ValueError): - self.config.parser_class = parsers[0] - return self.crawl(crawl_candiate) + article = None + for parser in parsers: + try: + crawler = Crawler(self.config) + article = crawler.crawl(crawl_candiate) + + break + except (UnicodeDecodeError, ValueError): + self.config.parser_class = parser + continue return article def initialize(self):