-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathauto_play_genetics.py
More file actions
245 lines (214 loc) · 9.49 KB
/
auto_play_genetics.py
File metadata and controls
245 lines (214 loc) · 9.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
"""
this module implements methods to automatically let AIs
play a specified number of games to determine which
one is the best. Contrary to auto_play_games.py,
this module works on given instances of AIs rather than
creating the instances itself. This has the limitation of
not being able to combine different AIs into one player,
but it allows for differently parametrized instances of the
same AI to be compared.
last edited: 11.04.2023
author: Sebastian Jost
"""
import tkinter as tk
import multiprocessing as mp
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from memory_profiler import profile
from program_files.game_state import Game_State
from program_files.wizard_ais.ai_base_class import Wizard_Base_Ai
from program_files.helper_functions import get_hands
# @profile
class Genetic_Auto_Play():
def __init__(self,
n_players: int,
ai_instances: list[Wizard_Base_Ai],
max_rounds: int = 20,
confidence_level: float = 0.95,
limit_choices: bool = False, # not implemented
):
"""
initialize auto-play setup
inputs:
-------
n_players (int): number of players in the game
limit_choices (bool): whether or not to allow the number of bids can equal the number of tricks (not implemented)
max_rounds (int): number of rounds to be played
ai_instances (list[Wizard_Base_Ai]): list of AI instances to be used in the games
confidence_level (float): confidence level for player scores (score = lower bound of confidence interval)
"""
self.n_players: int = n_players
self.limit_choices: bool = limit_choices
self.n_rounds: int = min(max_rounds, 60 // self.n_players) + 1
self.ai_instances: list[Wizard_Base_Ai] = ai_instances
self.confidence_level: float = confidence_level / 2 # two-sided confidence interval
self.games_played = 0
def auto_play_single_threaded(self, n_games: int) -> np.ndarray:
"""
automatically play `n_games` with the set AIs and record the results in self.average_scores and self.win_ratios
This method uses only one thread and runs all games one after the other.
Args:
n_games (int): number of games to be played
returns:
(np.ndarray): scores for each player as lower bound of confidence interval
"""
scores: np.ndarray = np.zeros((n_games, self.n_players))
random_order = np.arange(self.n_players)
for n in range(n_games):
np.random.shuffle(random_order)
ai_instances = [self.ai_instances[i] for i in random_order]
player_scores: np.ndarray = self.play_game(ai_instances)
scores[n, :] = player_scores
# calculate average scores and standard deviations for each player
avg_scores: np.ndarray = np.sum(scores, axis=0) / n_games
standard_deviations: np.ndarray = np.std(scores, axis=0)
# calculate confidence intervals
z_score: float = stats.norm.ppf(1 - (1 - self.confidence_level) / 2)
lower_confidence_bound: np.ndarray = avg_scores - standard_deviations / np.sqrt(n_games) * z_score
return lower_confidence_bound
def play_record_game(self, *_) -> np.ndarray:
"""
play a single game and return the final scores of the players
returns:
--------
(np.ndarray): final scores of the players
"""
random_order: np.ndarray = np.arange(self.n_players)
np.random.shuffle(random_order)
ai_instances: list[Wizard_Base_Ai] = [self.ai_instances[i] for i in random_order]
player_scores: np.ndarray = self.play_game(ai_instances)
player_scores[random_order] = player_scores # record results in proper order
return player_scores
def auto_play_multi_threaded(self,
n_games: int,
process_pool: mp.Pool,
) -> np.ndarray:
"""
automatically play `n_games` with the set AIs and record the results in self.average_scores and self.win_ratios
This method uses only one thread and runs all games one after the other.
Args:
n_games (int): number of games to be played
reset_stats (bool): whether to start counting at 0 or continue counting old scores
returns:
(np.ndarray): scores for each player as lower bound of confidence interval
"""
# play games in parallel
result_list: list[np.ndarray] = process_pool.map(self.play_record_game, range(n_games))
# record results
scores: np.ndarray = np.array(result_list)
# calculate average scores and standard deviations for each player
avg_scores: np.ndarray = np.sum(scores, axis=0) / n_games
standard_deviations: np.ndarray = np.std(scores, axis=0)
# calculate confidence intervals
z_score: float = stats.norm.ppf(1 - (1 - self.confidence_level) / 2)
lower_confidence_bound: np.ndarray = avg_scores - standard_deviations / np.sqrt(n_games) * z_score
return lower_confidence_bound
def play_game(self, ai_instances: list[Wizard_Base_Ai]):
"""
play one game with the rules set in `self`
"""
game = Game_State(n_players=self.n_players, verbosity=0)
for round_nbr in range(1, self.n_rounds):
self.play_round(round_nbr, game, self.limit_choices, ai_instances)
return game.players_total_points
def play_round(self, round_nbr: int, game: Game_State, limit_choices: bool, ai_instances: list[Wizard_Base_Ai]):
"""
play the given round with `self.n_players` players.
"""
# generate hands and determine trump
# print(f"Starting round {round_nbr}")
hands, trump_card = get_hands(game.n_players, round_nbr)
if trump_card is None:
trump_color = -1
elif trump_card.value != 14:
trump_color = trump_card.color
else: # trump card is a wizard -> player who "gave cards" determines trump
trump_color = ai_instances[game.round_starting_player].get_trump_color_choice(
hands=hands,
active_player=game.round_starting_player,
game_state=game) # game.round_starting_player(
# game.round_starting_player,
# hands[game.round_starting_player])
game.start_round(hands, trump_card, trump_color)
# handle player predictions
predictions = np.zeros(game.n_players, dtype=np.int8)
player_index = game.round_starting_player
for _ in range(self.n_players):
ai_bid = ai_instances[game.round_starting_player].get_prediction(
player_index=player_index,
game_state=game)
predictions[player_index] = ai_bid
player_index = (player_index + 1) % game.n_players
game.set_predictions(predictions)
# play tricks of the round
while game.tricks_to_be_played > 0:
self.play_trick(game, ai_instances)
def play_trick(self, game: Game_State, ai_instances: list[Wizard_Base_Ai]):
"""
play one trick and advance the game object accordingly
"""
game.start_trick()
for _ in range(game.n_players):
action = ai_instances[game.trick_active_player].get_trick_action(
game_state=game)
game.perform_action(action)
def get_player_labels(self):
"""
generate labels for each player including their name and all AI types used
Returns:
list: list of multiline strings containing player information
"""
player_labels = [""]*6
for i in range(self.n_players):
player_label = f"Player {i+1}\n"
player_label += f"trump: {self.ai_instances[i].color_number_weight}"
player_label += f", {self.ai_instances[i].color_sum_weight}\n"
player_label += f"bids: {self.ai_instances[i].min_value_for_win}"
player_label += f", {self.ai_instances[i].min_trump_value_for_win}"
player_label += f", {self.ai_instances[i].round_factor}"
player_label += f", {self.ai_instances[i].jester_factor}"
player_label += f", {self.ai_instances[i].prediction_factor}\n"
player_label += f"trick: {self.ai_instances[i].trump_value_increase}"
player_label += f", {self.ai_instances[i].wizard_value}"
player_label += f", {self.ai_instances[i].n_cards_factor}"
player_label += f", {self.ai_instances[i].remaining_cards_factor}"
player_labels[i] = player_label
return player_labels
def plot_results(self, tkinter_embedded: tk.Frame = None, highlight_final_value=True):
"""
plot average scores and win ratios currently saved
inputs:
-------
tkinter_embedded (tkinter.Frame): a tkinter frame where the plot is to be shown. If none is given, the plot is shown in a seperate window created by matplotlib.
"""
player_labels = self.get_player_labels()
colors = ["#22dd22", "#00aaaa", "#5588ff", "#bb00bb", "#dd2222", "#ff8800"]
if tkinter_embedded is None:
fig, axes = plt.subplots(2, sharex=True)
ax1, ax2 = axes
for i in range(self.n_players):
ax1.plot(self.win_ratios[:, i], label=player_labels[i], color=colors[i], alpha=0.5)
ax1.hlines(
(self.win_ratios[-1, i],),
xmin=0,
xmax=self.games_played,
linestyle="--",
color=colors[i])
# label=self.win_ratios[-1,i])
ax2.plot(self.scores[:, i], label=player_labels[i], color=colors[i], alpha=0.5)
ax2.hlines(
(self.scores[-1, i],),
xmin=0,
xmax=self.games_played,
linestyle="--",
color=colors[i])
# label=self.average_scores[-1,i])
ax1.set_ylabel("win ratio")
ax2.set_xlabel("game number")
ax2.set_ylabel("average score")
ax1.grid(color="#dddddd")
ax2.grid(color="#dddddd")
ax2.legend(loc="center left", bbox_to_anchor=(1.02, 1.02))
# ax2.legend(loc="center right")
plt.show()