-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtrain.py
More file actions
123 lines (91 loc) · 3.52 KB
/
train.py
File metadata and controls
123 lines (91 loc) · 3.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import numpy as np
import yaml
from collections import deque
from unityagents import UnityEnvironment
from maddpg.multi_agent import MultiAgent
from utils.utils import save_agent
def train(environment,
train_config,
agent_config,
print_every,
solving_score,
random_seed=None):
n_episodes = train_config['n_episodes']
max_t = train_config['max_t']
ou_noise = train_config['ou_noise_start']
ou_noise_decay_rate = train_config['ou_noise_decay_rate']
# get the default brain
env = environment
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
env_info = env.reset(train_mode=True)[brain_name]
# Initialize our agent
observation_size = brain.vector_observation_space_size * \
brain.num_stacked_vector_observations
action_size = brain.vector_action_space_size
agent_count = len(env_info.agents)
multi_agent = MultiAgent(
agent_count=agent_count,
observation_size=observation_size,
action_size=action_size,
train_config=train_config,
agent_config=agent_config,
seed=random_seed
)
all_train_scores = []
solve_epi = 0
train_scores_window = deque(maxlen=print_every)
for i_episode in range(1, n_episodes + 1):
train_scores = train_episode(env, multi_agent, brain_name, max_t, ou_noise)
train_scores = np.max(train_scores)
ou_noise *= ou_noise_decay_rate
train_scores_window.append(train_scores)
all_train_scores.append(train_scores)
print('\rEpisode {}\tAverage Training Score: {:.3f}'
.format(i_episode, np.mean(train_scores_window)), end='')
if i_episode % print_every == 0:
print('\rEpisode {}\tAverage Training Score: {:.3f}'
.format(i_episode, np.mean(train_scores_window)))
if np.mean(train_scores_window) >= solving_score and solve_epi == 0:
print('\nEnvironment solved in {:d} episodes!'
'\tAverage Training Score: {:.3f}'
.format(i_episode, np.mean(train_scores_window)))
solve_epi = i_episode
return multi_agent, all_train_scores, solve_epi
def train_episode(env, multi_agent, brain_name, max_t, ou_noise):
env_info = env.reset(train_mode=True)[brain_name]
obs = env_info.vector_observations
multi_agent.reset()
scores = np.zeros(multi_agent.agent_count)
for _ in range(max_t):
actions = multi_agent.act(obs, noise=ou_noise)
brain_info = env.step(actions)[brain_name]
next_obs = brain_info.vector_observations
rewards = np.asarray(brain_info.rewards)
dones = np.asarray(brain_info.local_done)
multi_agent.step(obs, actions, rewards, next_obs, dones)
obs = next_obs
scores += rewards
if np.any(dones):
break
return scores
if __name__ == '__main__':
yaml_path = 'examples/tennis/config.yaml'
with open(yaml_path, 'r') as f:
cfg = yaml.load(f)
env_filepath = cfg['train_env_filepath']
train_config = cfg['train_config']
agent_config = cfg['agent_config']
solving_score = cfg['solving_score']
checkpoint_save_path = cfg['model_filepath']
env = UnityEnvironment(file_name=env_filepath)
multi_agent, _, _ = train(
environment=env,
train_config=train_config,
agent_config=agent_config,
print_every=100,
solving_score=0.5,
random_seed=0
)
env.close()
save_agent(multi_agent, checkpoint_save_path)