forked from miyosuda/async_deep_reinforce
-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy patha3c_display.py
More file actions
102 lines (83 loc) · 3.08 KB
/
a3c_display.py
File metadata and controls
102 lines (83 loc) · 3.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import random
import os
import pickle
from game_state import GameState
from game_ac_network import GameACFFNetwork, GameACLSTMNetwork
from a3c_training_thread import A3CTrainingThread
from rmsprop_applier import RMSPropApplier
import options
options = options.options
def choose_action(pi_values):
pi_values -= np.finfo(np.float32).epsneg
action_samples = np.random.multinomial(options.num_experiments, pi_values)
return action_samples.argmax(0)
# use CPU for display tool
device = "/cpu:0"
if options.use_lstm:
global_network = GameACLSTMNetwork(options.action_size, -1, device)
else:
global_network = GameACFFNetwork(options.action_size, device)
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
saver = tf.train.Saver()
checkpoint = tf.train.get_checkpoint_state(options.checkpoint_dir)
# for pseudo-count
psc_info = {"psc_n":0, "psc_vcount":None}
if checkpoint and checkpoint.model_checkpoint_path:
saver.restore(sess, checkpoint.model_checkpoint_path)
print("checkpoint loaded:", checkpoint.model_checkpoint_path)
tokens = checkpoint.model_checkpoint_path.split("-")
# set global step
global_t = int(tokens[1])
print(">>> global step set: ", global_t)
# for pseudo-count
if options.psc_use:
psc_fname = options.checkpoint_dir + '/' + 'psc.' + str(global_t)
if os.path.exists(psc_fname):
with open(psc_fname, "rb") as f:
psc_info = pickle.load(f)
print("psc_info loaded:", psc_fname)
else:
print("psc_info does not exist and not loaded:", psc_fname)
game_state = GameState(0, options, display=options.display, no_op_max=30, thread_index=0)
# for pseudo-count
if options.psc_use:
game_state.psc_set_psc_info(psc_info)
if options.use_gym and (options.record_screen_dir is not None):
game_state.set_record_screen_dir(options.record_screen_dir)
for episode in range(options.num_episode_record):
episode_record_dir = None
if (not options.use_gym) and (options.record_screen_dir is not None):
episode_dir = options.rom.split(".")[0] + "-e{:03d}".format(episode)
episode_record_dir = os.path.join(options.record_screen_dir, episode_dir)
os.makedirs(episode_record_dir)
game_state.set_record_screen_dir(episode_record_dir)
steps = 0
reward = 0
while True:
pi_values = global_network.run_policy(sess, game_state.s_t)
action = choose_action(pi_values)
game_state.process(action)
if game_state.reward != 0:
reward += game_state.reward
print("SCORE=", reward)
# terminate if the play time is too long
steps += 1
terminal = game_state.terminal
if steps > options.max_play_steps:
terminal = True
if terminal:
game_state.reset()
print("Game finised with score=", reward)
break
else:
game_state.update()
if (not options.use_gym) and (options.record_screen_dir is not None):
new_episode_record_dir = episode_record_dir + "-r{:04d}-s{:04d}".format(reward, steps)
os.rename(episode_record_dir, new_episode_record_dir)
if options.use_gym:
game_state.close_record_screen_dir()