-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpong_twoPlayers.py
More file actions
321 lines (269 loc) · 10.5 KB
/
pong_twoPlayers.py
File metadata and controls
321 lines (269 loc) · 10.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
import math
import random
# state = (ball_x, ball_y, velocity_x, velocity_y, l_paddle_y, r_paddle_y)
# action = ([l_up, l_down, l_stay], [r_up, r_down, r_stay])
# Often modified constants
REPORT_TRAIL = 10000
# Table Siz
HEIGHT = 12
WIDTH = 12
LEFT_BOUND = 0
RIGHT_BOUND = 1
# Left paddle values (Use the undefeated wall as competitor)
LP_HEIGHT = 1
LP_STEP = 0
# Right paddle values
RP_HEIGHT = 0.2
RP_STEP = 0.04
# Velocity values
DIS_V_X = 1
DIS_V_Y = 1
V_Y_UP_BOUND = 0.015
# helper function to detect bounce
def is_bounced(prev_state, curr_state):
# check if the ball is already bounced back
not_bounced = True
if prev_state[2] < 0:
not_bounced = False
# check if it will be bounced
if not_bounced:
if curr_state[2] < 0:
return True
else:
if curr_state[2] > 0:
return True
return False
# declare reward state
def reward_state(prev_state, curr_state):
# get curr state info
ball_x, ball_y, velocity_x, velocity_y, l_paddle_y, r_paddle_y = curr_state
# if the ball is already bounced, reward = 1
if is_bounced(prev_state, curr_state) and velocity_x < 0:
return 0
else:
# else we lose but there are two cases
# if ball x position is already out of bound
if ball_x > RIGHT_BOUND:
# and check if the ball y position is in the fit of paddle
if r_paddle_y > ball_y or ball_y > r_paddle_y + RP_HEIGHT:
# this means the ball is outside our range
return -1
# for the left paddle, this is the same condition
if ball_x < LEFT_BOUND:
if l_paddle_y > ball_y or ball_y > l_paddle_y + LP_HEIGHT:
return 1
return 0
# declare action state
def action_state(curr_state, action):
# get curr state info
ball_x, ball_y, velocity_x, velocity_y, l_paddle_y, r_paddle_y = curr_state
# get action command
l_action, r_action = action
# update left paddle position
if l_action == 'Up':
l_paddle_y_new = max(0, l_paddle_y - LP_STEP)
elif l_action == 'Down':
l_paddle_y_new = min(1 - LP_HEIGHT, l_paddle_y + LP_STEP)
elif l_action == 'Nothing':
l_paddle_y_new = l_paddle_y
else:
l_paddle_y_new = 0
# update right paddle position
if r_action == 'Up':
r_paddle_y_new = max(0, r_paddle_y - RP_STEP)
elif r_action == 'Down':
r_paddle_y_new = min(1 - RP_HEIGHT, r_paddle_y + RP_STEP)
elif r_action == 'Nothing':
r_paddle_y_new = r_paddle_y
else:
r_paddle_y_new = 0
# update new position
ball_x_new = ball_x + velocity_x
ball_y_new = ball_y + velocity_y
# we need to discretize the ball position in order to prevent out-of-bound situation
dis_ball_x = math.floor(ball_x_new * WIDTH) / WIDTH
# lets bounce now!
# reverse the direction and velocity if touch the bound
if ball_y_new > 1:
ball_y_new = 2 - ball_y_new
velocity_y = -velocity_y
if ball_y_new < 0:
ball_y_new = -ball_y_new
velocity_y = -velocity_y
# check x position with discretized value
if dis_ball_x > 1:
U = random.uniform(-0.015, 0.015)
V = random.uniform(-0.03, 0.03)
if r_paddle_y_new <= ball_y_new and ball_y_new <= (r_paddle_y_new + RP_HEIGHT):
ball_x_new = 2 - ball_x_new
# make sure x speed won't exceed 0.03
velocity_x = min(-0.03, -velocity_x + U)
velocity_y += V
# for the left paddle
if dis_ball_x < 0:
U = random.uniform(-0.015, 0.015)
V = random.uniform(-0.03, 0.03)
if l_paddle_y_new <= ball_y_new and ball_y_new <= (l_paddle_y_new + LP_HEIGHT):
ball_x_new = -ball_x_new
velocity_x = max(0.03, -velocity_x + U)
velocity_y += V
return (ball_x_new, ball_y_new, velocity_x, velocity_y, l_paddle_y_new, r_paddle_y_new)
def terminate_state(state):
"""terminate_state.
Function used to chekc if the state is terminated
"""
# Upacking each element from the state tuple
ball_x, ball_y, velocity_x, velocity_y, l_paddle_y, r_paddle_y = state
# For the first player:
# if the ball has pass the right bound and it is going right
if ball_x > RIGHT_BOUND and velocity_x > 0:
return True
if ball_x < LEFT_BOUND and velocity_x < 0:
return True
return False
# we need to convert the continuous game state into discrete
def to_discrete(curr_state):
ball_x, ball_y, velocity_x, velocity_y, l_paddle_y, r_paddle_y = curr_state
ball_x = math.floor(WIDTH * ball_x) / WIDTH
ball_y = math.floor(HEIGHT * ball_y) / HEIGHT
# set the speed to discretized speed
vx_new = DIS_V_X
vy_new = DIS_V_Y
# discretize ball speed
# change x speed direction
if velocity_x < 0:
vx_new = -DIS_V_X
# change y speed direction to 0 if in bound
if abs(velocity_y) < V_Y_UP_BOUND:
vy_new = 0
elif velocity_y < 0:
vy_new = -DIS_V_Y
# discretize paddle
r_paddle_y_new = math.floor(r_paddle_y*HEIGHT/(1-RP_HEIGHT)) * ((1-RP_HEIGHT)/HEIGHT)
# for part 1
if LP_HEIGHT == 1:
l_paddle_y_new = l_paddle_y
else:
l_paddle_y_new = math.floor(l_paddle_y*HEIGHT/(1-LP_HEIGHT)) / HEIGHT
l_paddle_y_new = 0
return (ball_x, ball_y, vx_new, vy_new, l_paddle_y_new, r_paddle_y_new)
def random_speed():
offset_x = random.uniform(-0.015, 0.015)
offset_y = random.uniform(-0.03, 0.03)
if offset_x > 0:
u = 0.03 + offset_x
else:
u = -0.03 + offset_x
return u, offset_y
def Qlearning(QLearn_Dict, action_counter, state, prev_state, prev_action):
Q_state = to_discrete(state)
Q_prev_state = to_discrete(prev_state)
if terminate_state(Q_prev_state):
Q_prev_state = 'End State'
QLearn_Dict[Q_prev_state] = -1
best_action = 'End'
else:
action_counter[Q_prev_state][prev_action] += 1
c = 50
alpha = c / (c + action_counter[Q_prev_state][prev_action])
gamma = 0.9
if Q_state not in QLearn_Dict:
QLearn_Dict[Q_state] = {'Up': 0, 'Nothing': 0, 'Down': 0}
action_counter[Q_state] = {'Up': 0, 'Nothing': 0, 'Down': 0}
Q_prev_val = QLearn_Dict[Q_prev_state][prev_action]
QLearn_Dict[Q_prev_state][prev_action] = (1 - alpha) * Q_prev_val + alpha * (
reward_state(Q_prev_state, Q_state) + gamma * getMaxUtil(QLearn_Dict, Q_state))
best_action = exploration(QLearn_Dict[Q_state], action_counter[Q_state])
return best_action
# Exploration function uses the modified strategy discussed in the lecture slides
def exploration(Q_action_set, counter_set):
threshold = 10
action = min(counter_set, key=counter_set.get)
if counter_set[action] > threshold:
return max(Q_action_set, key=Q_action_set.get)
else:
return min(counter_set, key=counter_set.get)
def getMaxUtil(QLearn_Dict, Q_state):
if terminate_state(Q_state):
return -1
Utilval = (QLearn_Dict[Q_state]['Up'], QLearn_Dict[Q_state]['Nothing'], QLearn_Dict[Q_state]['Down'])
return max(Utilval)
def simulated_training(trainsession, Qlearn_Dict, action_counter):
# Initialize game
u, v = random_speed()
ini_state = (0.5, 0.5, u, v, 0.5 - 0.5 * LP_HEIGHT, 0.5 - 0.5 * RP_HEIGHT)
prev_state = ini_state
# print(ini_state)
Q_ini_state = to_discrete(ini_state)
# print(Q_ini_state)
Qlearn_Dict[Q_ini_state] = {'Up': 0, 'Nothing': 0, 'Down': 0}
action_counter[Q_ini_state] = {'Up': 0, 'Nothing': 0, 'Down': 0}
R_action = 'Up'
action = (l_paddle_action(prev_state), R_action)
state = action_state(prev_state, action)
sum_bounce_r = 0
# sum_bounce_l = 0
# Deleted
# rightwin = 0
print('Initiated')
for i in range(trainsession):
averageBounce_r = 0
averageBounce_l = 0
while True:
R_action = Qlearning(Qlearn_Dict, action_counter, state, prev_state, R_action)
if R_action == 'End':
# Some parts of the following are useless here beacause
# they are update parameters that we do not need anymore
"""
# if in termination state, and ball x position is on the
# left bound side, right paddle wins
if prev_state[0] < 0.2:
rightwin += 1
"""
sum_bounce_r += averageBounce_r
# sum_bounce_l += averageBounce_l
break
prev_state = state
action = (l_paddle_action(prev_state), R_action)
state = action_state(prev_state, action)
if is_bounced(prev_state, state) and state[2] < 0:
averageBounce_r += 1
if is_bounced(prev_state, state) and state[2] > 0:
averageBounce_l += 1
""" Quite useless for this part, but keep it for special occasions
"""
if (i+1) % REPORT_TRAIL == 0:
print("\nAverage bounces for right paddle (per 10000) after %d trails: " % (i+1), sum_bounce_r/REPORT_TRAIL)
sum_bounce_r = 0
u, v = random_speed()
ini_state = (0.5, 0.5, u, v, 0.5 - 0.5 * LP_HEIGHT, 0.5 - 0.5 * RP_HEIGHT)
Q_ini_state = to_discrete(ini_state)
if Q_ini_state not in Qlearn_Dict:
Qlearn_Dict[Q_ini_state] = {'Up': 0, 'Nothing': 0, 'Down': 0}
action_counter[Q_ini_state] = {'Up': 0, 'Nothing': 0, 'Down': 0}
prev_state = ini_state
R_action = exploration(Qlearn_Dict[Q_ini_state], action_counter[Q_ini_state])
L_action = l_paddle_action(prev_state)
action = (L_action, R_action)
state = action_state(prev_state, action)
return 'Done'
# modified for 2 players
# return position to the game window for update
def update_pos(prev_state, prev_action, state, Qlearning_dict, action_counter):
r_action = Qlearning(Qlearning_dict, action_counter, state, prev_state, prev_action)
if r_action == 'End':
return state, prev_state, 'End'
new_action = (l_paddle_action(state), r_action)
return (action_state(state, new_action), state, r_action)
# Updated for 2.2 left paddle hardcoded motion
# define the movement of left paddle
def l_paddle_action(curr_state):
# _, ball_y, _, _, l_pad_y, _ = curr_state
# if ball_y > l_pad_y + LP_HEIGHT * (3/4):
# action = 'Down'
# elif l_pad_y + LP_HEIGHT * (1/4) <= ball_y <= l_pad_y + LP_HEIGHT * (3/4):
# action = 'Nothing'
# else:
# action = 'Up'
action = 'Up'
return action