diff --git a/hqtrivia_bot.py b/hqtrivia_bot.py index baf8a84..9c1482e 100755 --- a/hqtrivia_bot.py +++ b/hqtrivia_bot.py @@ -12,7 +12,7 @@ from requests_cache import CachedSession from requests_futures.sessions import FuturesSession from websocket import WebSocketApp, WebSocketException, WebSocketTimeoutException -from solvers import GoogleAnswerWordsSolver, GoogleResultsCountSolver +from solvers import GoogleAnswerWordsSolver, WolframAlphaAnswerWordsSolver, GoogleResultsCountSolver from utils import Colours @@ -25,7 +25,8 @@ def __init__(self): self.current_game = '' self.solvers = [ GoogleAnswerWordsSolver(), - GoogleResultsCountSolver() + GoogleResultsCountSolver(), + WolframAlphaAnswerWordsSolver() ] self.headers = { 'User-Agent': 'hq-viewer/1.2.4 (iPhone; iOS 11.1.1; Scale/3.00)', @@ -123,7 +124,7 @@ def prediction_time(self, data): confidence = {'A': 0, 'B': 0, 'C': 0} for solver in self.solvers: responses[solver] = solver.fetch_responses( - solver.build_urls(data.get('question'), data.get('answers')), session + solver.build_urls(data.get('question'), data.get('answers'), data.get('category')), session ) for solver, responses in responses.items(): (prediction, confidence) = solver.run( @@ -360,7 +361,8 @@ def cache(self, command): session = CachedSession('db/cache', allowable_codes=(200, 302, 304)) solvers = [ GoogleAnswerWordsSolver(), - GoogleResultsCountSolver() + GoogleResultsCountSolver(), + WolframAlphaAnswerWordsSolver() ] print('Running cache %s' % command) if command == 'prune': @@ -382,7 +384,7 @@ def cache_prune(session, solvers): for filename in sorted(glob('games/*.json')): game = load(open(filename)) for turn in game.get('questions'): - urls.extend(solver.build_urls(turn.get('question'), turn.get('answers'))) + urls.extend(solver.build_urls(turn.get('question'), turn.get('answers'), turn.get('category'))) stale_entries = [] for key, (resp, _) in session.cache.responses.items(): if resp.url not in urls and not any(step.url in urls for step in resp.history): @@ -400,7 +402,7 @@ def cache_refresh(session, solvers): for filename in sorted(glob('games/*.json')): game = load(open(filename)) for turn in game.get('questions'): - urls.extend(solver.build_urls(turn.get('question'), turn.get('answers'))) + urls.extend(solver.build_urls(turn.get('question'), turn.get('answers'), turn.get('category'))) cache_misses = [ url for url in urls if not session.cache.create_key( session.prepare_request(Request('GET', url)) @@ -442,7 +444,7 @@ def cache_export(session, solvers): urls = [] for solver in solvers: for turn in game.get('questions'): - urls.extend(solver.build_urls(turn.get('question'), turn.get('answers'))) + urls.extend(solver.build_urls(turn.get('question'), turn.get('answers'), turn.get('category'))) url_keys = [session.cache.create_key(session.prepare_request(Request('GET', url))) for url in urls] conn = connect(':memory:') cur = conn.cursor() diff --git a/solvers.py b/solvers.py index 25112a8..14499bd 100644 --- a/solvers.py +++ b/solvers.py @@ -1,7 +1,7 @@ """ Solvers for the HQ Trivia bot project """ import re import sys -from urllib.parse import quote_plus +from urllib.parse import quote_plus, unquote_plus from bs4 import BeautifulSoup from utils import Colours, get_raw_words, get_significant_words @@ -13,13 +13,13 @@ class BaseSolver(object): service_url = None @staticmethod - def build_queries(question_text, answers): + def build_queries(question_text, answers, category): """ build queries with question text and answers """ raise NotImplementedError() - def build_urls(self, question_text, answers): + def build_urls(self, question_text, answers, category): """ build URLs with search queries """ - queries = self.build_queries(question_text.replace(' NOT ', ' ').replace(' NEVER ', ' '), answers) + queries = self.build_queries(question_text.replace(' NOT ', ' ').replace(' NEVER ', ' '), answers, category) return [self.service_url.format(quote_plus(query)) for query in queries] @staticmethod @@ -73,7 +73,7 @@ class GoogleAnswerWordsSolver(BaseSolver): service_url = 'https://www.google.co.uk/search?pws=0&q={}' @staticmethod - def build_queries(question_text, answers): + def build_queries(question_text, answers, category): """ build queries with question text and answers """ return [question_text] @@ -112,7 +112,7 @@ class GoogleResultsCountSolver(BaseSolver): service_url = 'https://www.google.co.uk/search?pws=0&q={}' @staticmethod - def build_queries(question_text, answers): + def build_queries(question_text, answers, category): """ build queries with question text and answers """ return ['%s "%s"' % (question_text, answer) for answer in answers.values()] @@ -129,3 +129,34 @@ def get_answer_matches(self, response, index, answers, matches): chr(65 + index), Colours.BOLD.value, matches[chr(65 + index)], Colours.ENDC.value )) return matches + + +class WolframAlphaAnswerWordsSolver(BaseSolver): + """ Solver that searches question on Wolfram Alpha and looks for answer words """ + + weight = 100 + service_url = 'http://api.wolframalpha.com/v1/result?appid=4H762W-PQ7735Q7T6&timeout=2&i={}' + + @staticmethod + def build_queries(question_text, answers, category): + """ build queries with question text and answers """ + if 'Which of these' in question_text and category in ['Geography', 'Literature ']: + question_text = re.sub(r'Which of these( [^ ]*)( is)?( NOT)?', r'Is {}\1', question_text) + return [question_text.format(answer) for answer in answers.values()] + return [] + + @staticmethod + def get_answer_matches(response, _index, answers, matches): + """ get answer occurences for response """ + result = BeautifulSoup(response.text, "html5lib").text + print('{}: {}{}{}'.format(unquote_plus(response.url.split('&i=')[1]), \ + Colours.BOLD.value, result, Colours.ENDC.value)) + if result != 'Wolfram|Alpha did not understand your input': + results_words = get_raw_words(result) + for index, answer in answers.items(): + answer_words = get_raw_words(answer) + matches[index] += results_words.count(answer_words) + for index, count in matches.items(): + print('{}: {} '.format(index, Colours.BOLD.value + str(count) + Colours.ENDC.value), end='', flush=True) + print('\n') + return matches