-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest.py
More file actions
executable file
·55 lines (37 loc) · 1.61 KB
/
test.py
File metadata and controls
executable file
·55 lines (37 loc) · 1.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import numpy as np
import torch
import random
def test(net, env, actions, parameters, image_processor, device):
total_test_rewards = []
for episode in range(1):
total_rewards = 0
state = env.reset()
state = image_processor.stack_frame(state, True)
print("****************************************************")
print("EPISODE ", episode)
while True:
state = torch.Tensor(state).to(device)
# Get action from Q-network or explore with low probability
explore_probability = np.random.rand()
if (explore_probability > 0.1):
# Make a random action (exploration)
choice = random.randint(1, len(actions)) - 1
action = actions[choice]
else:
# Estimate the Qs values state
Qs = net(state.view(1, 4, 110, 84))
# Take the biggest Q value (= the best action)
choice = np.argmax(Qs.detach().cpu().numpy())
action = actions[choice]
# print('Action =', action)
# Perform the action and get the next_state, reward, and done information
next_state, reward, done, _ = env.step(action)
env.render()
total_rewards += reward
if done:
print("Score", total_rewards)
total_test_rewards.append(total_rewards)
break
next_state = image_processor.stack_frame(next_state, False)
state = next_state
env.close()