forked from MrCaiting/Pong
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdumb_player.py
More file actions
246 lines (207 loc) · 7.82 KB
/
dumb_player.py
File metadata and controls
246 lines (207 loc) · 7.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
import math
import random
# state = (ball_x, ball_y, velocity_x, velocity_y, l_paddle_y, r_paddle_y)
# action = ([l_up, l_down, l_stay], [r_up, r_down, r_stay])
# Table Size
HEIGHT = 12
WIDTH = 12
LEFT_BOUND = 0
RIGHT_BOUND = 1
# Left paddle values (Regular competitor: same size of paddle with half speed)
LP_HEIGHT = 0.2
LP_STEP = 0.02
# Right paddle values
RP_HEIGHT = 0.2
RP_STEP = 0.04
# Velocity values
DIS_V_X = 1
DIS_V_Y = 1
V_Y_UP_BOUND = 0.015
# helper function to detect bounce
def is_bounced(prev_state, curr_state):
# check if the ball is already bounced back
not_bounced = True
if prev_state[2] < 0:
not_bounced = False
# check if it will be bounced
if not_bounced:
if curr_state[2] < 0:
return True
else:
if curr_state[2] > 0:
return True
return False
# declare reward state
def reward_state(prev_state, curr_state):
# get curr state info
ball_x, ball_y, velocity_x, velocity_y, l_paddle_y, r_paddle_y = curr_state
# if the ball is already bounced, reward = 1
if is_bounced(prev_state, curr_state) and velocity_x < 0:
return 0
else:
# else we lose but there are two cases
# if ball x position is already out of bound
if ball_x > RIGHT_BOUND:
# and check if the ball y position is in the fit of paddle
if r_paddle_y > ball_y or ball_y > r_paddle_y + RP_HEIGHT:
# this means the ball is outside our range
return -1
# for the left paddle, this is the same condition
if ball_x < LEFT_BOUND:
if l_paddle_y > ball_y or ball_y > l_paddle_y + LP_HEIGHT:
return 1
return 0
# declare action state
def action_state(curr_state, action):
# get curr state info
ball_x, ball_y, velocity_x, velocity_y, l_paddle_y, r_paddle_y = curr_state
# get action command
l_action, r_action = action
# update left paddle position
if l_action == 'Up':
l_paddle_y_new = max(0, l_paddle_y - LP_STEP)
elif l_action == 'Down':
l_paddle_y_new = min(1 - LP_HEIGHT, l_paddle_y + LP_STEP)
elif l_action == 'Nothing':
l_paddle_y_new = l_paddle_y
else:
l_paddle_y_new = 0
# update right paddle position
if r_action == 'Up':
r_paddle_y_new = max(0, r_paddle_y - RP_STEP)
elif r_action == 'Down':
r_paddle_y_new = min(1 - RP_HEIGHT, r_paddle_y + RP_STEP)
elif r_action == 'Nothing':
r_paddle_y_new = r_paddle_y
else:
r_paddle_y_new = 0
# update new position
ball_x_new = ball_x + velocity_x
ball_y_new = ball_y + velocity_y
# we need to discretize the ball position in order to prevent out-of-bound situation
dis_ball_x = math.floor(ball_x_new * WIDTH) / WIDTH
# lets bounce now!
# reverse the direction and velocity if touch the bound
if ball_y_new > 1:
ball_y_new = 2 - ball_y_new
velocity_y = -velocity_y
if ball_y_new < 0:
ball_y_new = -ball_y_new
velocity_y = -velocity_y
# check x position with discretized value
if dis_ball_x > 1:
U = random.uniform(-0.015, 0.015)
V = random.uniform(-0.03, 0.03)
if r_paddle_y_new <= ball_y_new and ball_y_new <= (r_paddle_y_new + RP_HEIGHT):
ball_x_new = 2 - ball_x_new
# make sure x speed won't exceed 0.03
velocity_x = min(-0.03, -velocity_x + U)
velocity_y += V
# for the left paddle
if dis_ball_x < 0:
U = random.uniform(-0.015, 0.015)
V = random.uniform(-0.03, 0.03)
if l_paddle_y_new <= ball_y_new and ball_y_new <= (l_paddle_y_new + LP_HEIGHT):
ball_x_new = -ball_x_new
velocity_x = max(0.03, -velocity_x + U)
velocity_y += V
return (ball_x_new, ball_y_new, velocity_x, velocity_y, l_paddle_y_new, r_paddle_y_new)
def terminate_state(state):
"""terminate_state.
Function used to chekc if the state is terminated
"""
# Upacking each element from the state tuple
ball_x, ball_y, velocity_x, velocity_y, l_paddle_y, r_paddle_y = state
# For the first player:
# if the ball has pass the right bound and it is going right
if ball_x > RIGHT_BOUND and velocity_x > 0:
return True
if ball_x < LEFT_BOUND and velocity_x < 0:
return True
return False
# we need to convert the continuous game state into discrete
def to_discrete(curr_state):
ball_x, ball_y, velocity_x, velocity_y, l_paddle_y, r_paddle_y = curr_state
ball_x = math.floor(WIDTH * ball_x) / WIDTH
ball_y = math.floor(HEIGHT * ball_y) / HEIGHT
# set the speed to discretized speed
vx_new = DIS_V_X
vy_new = DIS_V_Y
# discretize ball speed
# change x speed direction
if velocity_x < 0:
vx_new = -DIS_V_X
# change y speed direction to 0 if in bound
if abs(velocity_y) < V_Y_UP_BOUND:
vy_new = 0
elif velocity_y < 0:
vy_new = -DIS_V_Y
# discretize paddle
r_paddle_y_new = math.floor(r_paddle_y*HEIGHT/(1-RP_HEIGHT)) * ((1-RP_HEIGHT)/HEIGHT)
# for part 1
if LP_HEIGHT == 1:
l_paddle_y_new = l_paddle_y
else:
l_paddle_y_new = math.floor(l_paddle_y*HEIGHT/(1-LP_HEIGHT)) / HEIGHT
l_paddle_y_new = 0
return (ball_x, ball_y, vx_new, vy_new, l_paddle_y_new, r_paddle_y_new)
def random_speed():
offset_x = random.uniform(-0.015, 0.015)
offset_y = random.uniform(-0.03, 0.03)
if offset_x > 0:
u = 0.03 + offset_x
else:
u = -0.03 + offset_x
return u, offset_y
def Qlearning(QLearn_Dict, action_counter, state, prev_state, prev_action):
Q_state = to_discrete(state)
Q_prev_state = to_discrete(prev_state)
if terminate_state(Q_prev_state):
Q_prev_state = 'End State'
QLearn_Dict[Q_prev_state] = -1
best_action = 'End'
else:
action_counter[Q_prev_state][prev_action] += 1
c = 50
alpha = c / (c + action_counter[Q_prev_state][prev_action])
gamma = 0.9
if Q_state not in QLearn_Dict:
QLearn_Dict[Q_state] = {'Up': 0, 'Nothing': 0, 'Down': 0}
action_counter[Q_state] = {'Up': 0, 'Nothing': 0, 'Down': 0}
Q_prev_val = QLearn_Dict[Q_prev_state][prev_action]
QLearn_Dict[Q_prev_state][prev_action] = (1 - alpha) * Q_prev_val + alpha * (
reward_state(Q_prev_state, Q_state) + gamma * getMaxUtil(QLearn_Dict, Q_state))
best_action = exploration(QLearn_Dict[Q_state], action_counter[Q_state])
return best_action
# Exploration function uses the modified strategy discussed in the lecture slides
def exploration(Q_action_set, counter_set):
threshold = 10
action = min(counter_set, key=counter_set.get)
if counter_set[action] > threshold:
return max(Q_action_set, key=Q_action_set.get)
else:
return min(counter_set, key=counter_set.get)
def getMaxUtil(QLearn_Dict, Q_state):
if terminate_state(Q_state):
return -1
Utilval = (QLearn_Dict[Q_state]['Up'], QLearn_Dict[Q_state]['Nothing'], QLearn_Dict[Q_state]['Down'])
return max(Utilval)
# modified for 2 players
# return position to the game window for update
def update_pos(prev_state, prev_action, state, Qlearning_dict, action_counter):
r_action = Qlearning(Qlearning_dict, action_counter, state, prev_state, prev_action)
if r_action == 'End':
return state, prev_state, 'End'
new_action = (l_paddle_action(state), r_action)
return (action_state(state, new_action), state, r_action)
# Updated for 2.2 left paddle hardcoded motion
# define the movement of left paddle
def l_paddle_action(curr_state):
_, ball_y, _, _, l_pad_y, _ = curr_state
if ball_y > l_pad_y + LP_HEIGHT * (3/4):
action = 'Down'
elif l_pad_y + LP_HEIGHT * (1/4) <= ball_y <= l_pad_y + LP_HEIGHT * (3/4):
action = 'Nothing'
else:
action = 'Up'
return action