hangman/hangman_api_user.py at main · hbyecoding/hangman · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
# %% [markdown]
# # Trexquant Interview Project (The Hangman Game)
#
# * Copyright Trexquant Investment LP. All Rights Reserved.
# * Redistribution of this question without written consent from Trexquant is prohibited

# %% [markdown]
# ## Instruction:
# For this coding test, your mission is to write an algorithm that plays the game of Hangman through our API server.
#
# When a user plays Hangman, the server first selects a secret word at random from a list. The server then returns a row of underscores (space separated)—one for each letter in the secret word—and asks the user to guess a letter. If the user guesses a letter that is in the word, the word is redisplayed with all instances of that letter shown in the correct positions, along with any letters correctly guessed on previous turns. If the letter does not appear in the word, the user is charged with an incorrect guess. The user keeps guessing letters until either (1) the user has correctly guessed all the letters in the word
# or (2) the user has made six incorrect guesses.
#
# You are required to write a "guess" function that takes current word (with underscores) as input and returns a guess letter. You will use the API codes below to play 1,000 Hangman games. You have the opportunity to practice before you want to start recording your game results.
#
# Your algorithm is permitted to use a training set of approximately 250,000 dictionary words. Your algorithm will be tested on an entirely disjoint set of 250,000 dictionary words. Please note that this means the words that you will ultimately be tested on do NOT appear in the dictionary that you are given. You are not permitted to use any dictionary other than the training dictionary we provided. This requirement will be strictly enforced by code review.
#
# You are provided with a basic, working algorithm. This algorithm will match the provided masked string (e.g. a _ _ l e) to all possible words in the dictionary, tabulate the frequency of letters appearing in these possible words, and then guess the letter with the highest frequency of appearence that has not already been guessed. If there are no remaining words that match then it will default back to the character frequency distribution of the entire dictionary.
#
# This benchmark strategy is successful approximately 18% of the time. Your task is to design an algorithm that significantly outperforms this benchmark.

# %%
import json
import requests
import random
import string
import secrets
import time
import re
import collections

try:
    from urllib.parse import parse_qs, urlencode, urlparse
except ImportError:
    from urlparse import parse_qs, urlparse
    from urllib import urlencode

from requests.packages.urllib3.exceptions import InsecureRequestWarning

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

# %%
class HangmanAPI(object):
    def __init__(self, access_token=None, session=None, timeout=None):
        self.hangman_url = self.determine_hangman_url()
        self.access_token = access_token
        self.session = session or requests.Session()
        self.timeout = timeout
        self.guessed_letters = []

        full_dictionary_location = "/Users/hbye/quantT/kunwang/hangman/words_250000_train.txt"
        self.full_dictionary = self.build_dictionary(full_dictionary_location)
        self.full_dictionary_common_letter_sorted = collections.Counter("".join(self.full_dictionary)).most_common()

        self.current_dictionary = []
        # Define common letter combinations (you can expand this list)
        self.common_combinations = {
            'th', 'he', 'in', 'er', 'an', 're', 'ed', 'on', 'es', 'st',
            'nd', 'at', 'or', 'nt', 'is', 'ar', 'te', 'en', 'al', 'to',
            'ing', 'ion', 'ati', 'ent', 'and', 'tha', 'str', 'all', 'out',
            'tch', 'igh', 'ough', 'tion', 'sion', 'ence', 'ance', 'ment', 'able', 'ible'
        }
        # Pre-calculate combination frequencies from the full dictionary for efficiency
        self.combination_frequencies = self._precompute_combination_frequencies()

    @staticmethod
    def determine_hangman_url():
        links = ['https://trexsim.com']

        data = {link: 0 for link in links}

        for link in links:

            requests.get(link)

            for i in range(10):
                s = time.time()
                requests.get(link)
                data[link] = time.time() - s

        link = sorted(data.items(), key=lambda x: x[1])[0][0]
        link += '/trexsim/hangman'
        return link

    def _precompute_combination_frequencies(self):
        """
        Precomputes frequencies of common combinations from the full dictionary.
        This helps in quickly evaluating which combinations are most likely.
        """
        combination_counts = collections.Counter()
        for word in self.full_dictionary:
            for combo in self.common_combinations:
                if combo in word:
                    combination_counts[combo] += 1 # Count if the combination exists in the word
        return combination_counts


    def guess(self, word):
        # 1. 预处理
        clean_word = word[::2].replace("_", ".")
        len_word = len(clean_word)
        current_dictionary = self.current_dictionary
        new_dictionary = []

        # 2. 筛选可能的单词
        for dict_word in current_dictionary:
            if len(dict_word) != len_word:
                continue
            if re.match(clean_word, dict_word):
                new_dictionary.append(dict_word)
        self.current_dictionary = new_dictionary

        # If no words match, fall back to the original dictionary
        if not self.current_dictionary:
            self.current_dictionary = self.full_dictionary
            # Re-filter based on the full dictionary if the filtered dictionary became empty
            for dict_word in self.full_dictionary:
                if len(dict_word) != len_word:
                    continue
                if re.match(clean_word, dict_word):
                    new_dictionary.append(dict_word)
            self.current_dictionary = new_dictionary


        # --- NEW: Prioritize guessing based on common letter combinations ---
        best_combo_letter = None
        best_combo_score = -1

        # Calculate combination scores based on the current (filtered) dictionary
        current_combo_scores = collections.Counter()
        for dict_word in self.current_dictionary:
            for combo in self.common_combinations:
                # Check if the combination can fit into the unknown parts of the current word
                # and if its letters haven't been guessed yet.
                combo_len = len(combo)
                for i in range(len_word - combo_len + 1):
                    word_slice = clean_word[i : i + combo_len]
                    match_possible = True
                    for j in range(combo_len):
                        if word_slice[j] != '.' and word_slice[j] != combo[j]:
                            match_possible = False
                            break
                        if combo[j] in self.guessed_letters and clean_word[i+j] == '.': # If combo letter already guessed AND the slot is unknown
                            match_possible = False
                            break
                    if match_possible:
                        # If a match is possible, consider the letters that are currently unknown
                        # but part of the combo. We want to guess *those* letters.
                        for k in range(combo_len):
                            if clean_word[i+k] == '.' and combo[k] not in self.guessed_letters:
                                current_combo_scores[combo[k]] += self.combination_frequencies[combo] # Use precomputed frequency as a weight

        # Now, from the combination scores, pick the best letter to guess
        if current_combo_scores:
            for letter, score in current_combo_scores.most_common():
                if letter not in self.guessed_letters:
                    return letter # Guess the most frequent letter that is part of a high-scoring combination

        # --- END NEW ---


        # 3. 统计未猜测位置的字母频率（按位置统计）
        position_counters = [collections.Counter() for _ in range(len_word)]
        for w in new_dictionary:
            for i, ch in enumerate(w):
                if clean_word[i] == '.' and ch not in self.guessed_letters:
                    position_counters[i][ch] += 1

        # 4. 统计所有未猜过字母的总频率
        total_counter = collections.Counter()
        import pdb; pdb.set_trace() # Keep this for debugging if needed, but remove for production
        for w in new_dictionary:
            for ch in set(w):  # 用set避免重复计数
                if ch not in self.guessed_letters:
                    total_counter[ch] += 1

        # 5. 优先猜元音
        vowels = ['e', 'a', 'o', 'i', 'u']
        for v in vowels:
            if v not in self.guessed_letters and total_counter[v] > 0:
                return v

        # 6. 如果元音都猜过了，猜出现频率最高的字母（按位置优先）
        best_letter = None
        best_score = -1
        for i, counter in enumerate(position_counters):
            if not counter:
                continue
            letter, score = counter.most_common(1)[0]
            if score > best_score and letter not in self.guessed_letters:
                best_letter = letter
                best_score = score
        if best_letter:
            return best_letter

        # 7. 如果还没有，猜总频率最高的字母
        for letter, _ in total_counter.most_common():
            if letter not in self.guessed_letters:
                return letter

        # 8. 如果没有可能单词，回退到全字典统计
        for letter, _ in self.full_dictionary_common_letter_sorted:
            if letter not in self.guessed_letters:
                return letter

        # 9. 实在没有就返回一个不会出错的字母
        for ch in 'abcdefghijklmnopqrstuvwxyz':
            if ch not in self.guessed_letters:
                return ch
        return 'e'  # 兜底

    ##########################################################
    # You'll likely not need to modify any of the code below #
    ##########################################################

    def build_dictionary(self, dictionary_file_location):
        text_file = open(dictionary_file_location,"r")
        full_dictionary = text_file.read().splitlines()
        text_file.close()
        return full_dictionary

    def start_game(self, practice=True, verbose=True):
        # reset guessed letters to empty set and current plausible dictionary to the full dictionary
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary

        response = self.request("/new_game", {"practice":practice})
        if response.get('status')=="approved":
            game_id = response.get('game_id')
            word = response.get('word')
            tries_remains = response.get('tries_remains')
            if verbose:
                print("Successfully start a new game! Game ID: {0}. # of tries remaining: {1}. Word: {2}.".format(game_id, tries_remains, word))
            while tries_remains>0:
                # get guessed letter from user code
                guess_letter = self.guess(word)

                # append guessed letter to guessed letters field in hangman object
                self.guessed_letters.append(guess_letter)
                if verbose:
                    print("Guessing letter: {0}".format(guess_letter))

                try:
                    res = self.request("/guess_letter", {"request":"guess_letter", "game_id":game_id, "letter":guess_letter})
                except HangmanAPIError:
                    print('HangmanAPIError exception caught on request.')
                    continue
                except Exception as e:
                    print('Other exception caught on request.')
                    raise e

                if verbose:
                    print("Sever response: {0}".format(res))
                status = res.get('status')
                tries_remains = res.get('tries_remains')
                if status=="success":
                    if verbose:
                        print("Successfully finished game: {0}".format(game_id))
                    return True
                elif status=="failed":
                    reason = res.get('reason', '# of tries exceeded!')
                    if verbose:
                        print("Failed game: {0}. Because of: {1}".format(game_id, reason))
                    return False
                elif status=="ongoing":
                    word = res.get('word')
        else:
            if verbose:
                print("Failed to start a new game")
        return status=="success"

    def my_status(self):
        return self.request("/my_status", {})

    def request(
            self, path, args=None, post_args=None, method=None):
        if args is None:
            args = dict()
        if post_args is not None:
            method = "POST"

        # Add `access_token` to post_args or args if it has not already been
        # included.
        if self.access_token:
            # If post_args exists, we assume that args either does not exists
            # or it does not need `access_token`.
            if post_args and "access_token" not in post_args:
                post_args["access_token"] = self.access_token
            elif "access_token" not in args:
                args["access_token"] = self.access_token

        time.sleep(0.2)

        num_retry, time_sleep = 50, 2
        for it in range(num_retry):
            try:
                response = self.session.request(
                    method or "GET",
                    self.hangman_url + path,
                    timeout=self.timeout,
                    params=args,
                    data=post_args,
                    verify=False
                )
                break
            except requests.HTTPError as e:
                response = json.loads(e.read())
                raise HangmanAPIError(response)
            except requests.exceptions.SSLError as e:
                if it + 1 == num_retry:
                    raise
                time.sleep(time_sleep)

        headers = response.headers
        if 'json' in headers['content-type']:
            result = response.json()
        elif "access_token" in parse_qs(response.text):
            query_str = parse_qs(response.text)
            if "access_token" in query_str:
                result = {"access_token": query_str["access_token"][0]}
                if "expires" in query_str:
                    result["expires"] = query_str["expires"][0]
            else:
                raise HangmanAPIError(response.json())
        else:
            raise HangmanAPIError('Maintype was not text, or querystring')

        if result and isinstance(result, dict) and result.get("error"):
            raise HangmanAPIError(result)
        return result

class HangmanAPIError(Exception):
    def __init__(self, result):
        self.result = result
        self.code = None
        try:
            self.type = result["error_code"]
        except (KeyError, TypeError):
            self.type = ""

        try:
            self.message = result["error_description"]
        except (KeyError, TypeError):
            try:
                self.message = result["error"]["message"]
                self.code = result["error"].get("code")
                if not self.type:
                    self.type = result["error"].get("type", "")
            except (KeyError, TypeError):
                try:
                    self.message = result["error_msg"]
                except (KeyError, TypeError):
                    self.message = result

        Exception.__init__(self, self.message)

# %% [markdown]
# # API Usage Examples

# %% [markdown]
# ## To start a new game:
# 1. Make sure you have implemented your own "guess" method.
# 2. Use the access_token that we sent you to create your HangmanAPI object.
# 3. Start a game by calling "start_game" method.
# 4. If you wish to test your function without being recorded, set "practice" parameter to 1.
# 5. Note: You have a rate limit of 20 new games per minute. DO NOT start more than 20 new games within one minute.

# %%
api = HangmanAPI(access_token="18965b3ab8184fc94104e4a7fb6c50", timeout=2000)


# %% [markdown]
# ## Playing practice games:
# You can use the command below to play up to 100,000 practice games.

# %%
api.start_game(practice=1,verbose=True)
[total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
practice_success_rate = total_practice_successes / total_practice_runs
print('run %d practice games out of an allotted 100,000. practice success rate so far = %.3f' % (total_practice_runs, practice_success_rate))


# %% [markdown]
# ## Playing recorded games:
# Please finalize your code prior to running the cell below. Once this code executes once successfully your submission will be finalized. Our system will not allow you to rerun any additional games.
#
# Please note that it is expected that after you successfully run this block of code that subsequent runs will result in the error message "Your account has been deactivated".
#
# Once you've run this section of the code your submission is complete. Please send us your source code via email.

# %%
for i in range(1000):
    print('Playing ', i, ' th game')
    # Uncomment the following line to execute your final runs. Do not do this until you are satisfied with your submission
    #api.start_game(practice=0,verbose=False)

    # DO NOT REMOVE as otherwise the server may lock you out for too high frequency of requests
    time.sleep(0.5)

# %%


# %% [markdown]
# ## To check your game statistics
# 1. Simply use "my_status" method.
# 2. Returns your total number of games, and number of wins.

# %%
[total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
success_rate = total_recorded_successes/total_recorded_runs
print('overall success rate = %.3f' % success_rate)

# %%