diff --git a/solvers.py b/solvers.py index 25112a8..63076e7 100644 --- a/solvers.py +++ b/solvers.py @@ -19,7 +19,9 @@ def build_queries(question_text, answers): def build_urls(self, question_text, answers): """ build URLs with search queries """ - queries = self.build_queries(question_text.replace(' NOT ', ' ').replace(' NEVER ', ' '), answers) + parsed_question_text = question_text.replace(' NOT ', ' ').replace(' NEVER ', ' ') + parsed_question_text = re.sub(r'[^A-Za-z0-9\“\”\? ]', '', parsed_question_text).replace(' ', ' ') + queries = self.build_queries(parsed_question_text, answers) return [self.service_url.format(quote_plus(query)) for query in queries] @staticmethod diff --git a/utils.py b/utils.py index 073eb87..0600c98 100644 --- a/utils.py +++ b/utils.py @@ -34,6 +34,6 @@ def get_significant_words(question_words): def get_raw_words(data): """ Extract raw words from data """ - data = re.sub(r'[^\w ]', '', data).replace(' and ', ' ').strip() + data = re.sub(r'[^A-Za-z0-9 ]', '', data).replace(' and ', ' ').strip() words = data.replace(' ', ' ').lower() return words