Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion nasim/agents/bruteforce_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def run_bruteforce_agent(env, step_limit=1e6, verbose=True):
act = next(act_iter)
cycle_complete = True

_, rew, done, env_step_limit_reached, _ = env.step(act)
_, rew, done, env_step_limit_reached, _ = env.step(int(act))
total_reward += rew

if cycle_complete and verbose:
Expand Down
9 changes: 4 additions & 5 deletions nasim/agents/ql_replay_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,10 +247,9 @@ def run_train_episode(self, step_limit):
episode_return = 0

while not done and not env_step_limit_reached and steps < step_limit:
a = self.get_egreedy_action(o, self.get_epsilon())

a = self.get_egreedy_action(o[0], self.get_epsilon())
next_o, r, done, env_step_limit_reached, _ = self.env.step(a)
self.replay.store(o, a, next_o, r, done)
self.replay.store(o[0], a, next_o, r, done)
self.steps_done += 1
mean_td_error, mean_v = self.optimize()
self.logger.add_scalar(
Expand Down Expand Up @@ -287,7 +286,7 @@ def run_eval_episode(self,
input("Initial state. Press enter to continue..")

while not done and not env_step_limit_reached:
a = self.get_egreedy_action(o, eval_epsilon)
a = self.get_egreedy_action(o[0], eval_epsilon)
next_o, r, done, env_step_limit_reached, _ = env.step(a)
o = next_o
episode_return += r
Expand All @@ -296,7 +295,7 @@ def run_eval_episode(self,
print("\n" + line_break)
print(f"Step {steps}")
print(line_break)
print(f"Action Performed = {env.action_space.get_action(a)}")
print(f"Action Performed = {env.action_space.get_action(int(a))}")
env.render(render_mode)
print(f"Reward = {r}")
print(f"Done = {done}")
Expand Down
2 changes: 1 addition & 1 deletion nasim/agents/random_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def run_random_agent(env, step_limit=1e6, verbose=True):

while not done and not env_step_limit_reached and t < step_limit:
a = env.action_space.sample()
_, r, done, env_step_limit_reached, _ = env.step(a)
_, r, done, env_step_limit_reached, _ = env.step(int(a))
total_reward += r
if (t+1) % 100 == 0 and verbose:
print(f"{t}: {total_reward}")
Expand Down