From 496420ca5ebb2ed7b69d1e58cfa37aff985c04a0 Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Tue, 14 Aug 2018 14:36:16 -0700 Subject: [PATCH 1/7] bug fix to TestEnv and added option for custom reward functions --- flow/envs/test.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/flow/envs/test.py b/flow/envs/test.py index bfc8b241..54b6c6c0 100644 --- a/flow/envs/test.py +++ b/flow/envs/test.py @@ -29,17 +29,20 @@ class and returns a real number. @property def action_space(self): - return Box(low=0, high=0, shape=0, dtype=np.float32) + return Box(low=0, high=0, shape=(0,), dtype=np.float32) @property def observation_space(self): - return Box(low=0, high=0, shape=0, dtype=np.float32) + return Box(low=0, high=0, shape=(0,), dtype=np.float32) def _apply_rl_actions(self, rl_actions): return def compute_reward(self, state, rl_actions, **kwargs): - return 0 + if "reward_fn" in self.env_params.additional_params: + return self.env_params.additional_params["reward_fn"](self) + else: + return 0 def get_state(self, **kwargs): return np.array([]) From 5f1ff176de70903cf2411f61db5d3990c93aa45f Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Tue, 14 Aug 2018 14:36:55 -0700 Subject: [PATCH 2/7] self.sorted_ids instantiated in __init__ and cleaned some comments --- flow/envs/base_env.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/flow/envs/base_env.py b/flow/envs/base_env.py index db7d3ecf..a29ef705 100755 --- a/flow/envs/base_env.py +++ b/flow/envs/base_env.py @@ -124,6 +124,11 @@ def __init__(self, env_params, sumo_params, scenario): if not hasattr(self.env_params, "evaluate"): self.env_params.evaluate = False + # list of sorted ids (defaults to regular list of vehicle ids if the + # "sort_vehicles" attribute in env_params is set to False) + self.sorted_ids = deepcopy(self.vehicles.get_ids()) + self.sorted_extra_data = None + self.start_sumo() self.setup_initial_state() @@ -521,8 +526,7 @@ def reset(self): self.initial_state = deepcopy(initial_state) - # # clear all vehicles from the network and the vehicles class - + # clear all vehicles from the network and the vehicles class for veh_id in self.traci_connection.vehicle.getIDList(): try: self.traci_connection.vehicle.remove(veh_id) @@ -532,8 +536,8 @@ def reset(self): print("Error during start: {}".format(traceback.format_exc())) pass - # clear all vehicles from the network and the vehicles class - # FIXME (ev, ak) this is weird and shouldn't be necessary + # clear collided vehicles that were not deleted in the first round of + # removals (this may be a sumo bug and seems to happen stochastically) for veh_id in list(self.vehicles.get_ids()): self.vehicles.remove(veh_id) try: From 994fc888006d4cbead60f23829f65b0399fa889d Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Tue, 14 Aug 2018 14:53:52 -0700 Subject: [PATCH 3/7] instantiate prev_last_lc in __init__ --- flow/envs/base_env.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/flow/envs/base_env.py b/flow/envs/base_env.py index a29ef705..b12d0800 100755 --- a/flow/envs/base_env.py +++ b/flow/envs/base_env.py @@ -129,6 +129,13 @@ def __init__(self, env_params, sumo_params, scenario): self.sorted_ids = deepcopy(self.vehicles.get_ids()) self.sorted_extra_data = None + # keeps track of the last time a lane change occurred before the + # current time step. This is meant to ensure that lane changes by RL + # vehicles do not occur in quick succession. + self.prev_last_lc = dict() + for veh_id in self.vehicles.get_ids(): + self.prev_last_lc[veh_id] = -float("inf") + self.start_sumo() self.setup_initial_state() @@ -578,7 +585,7 @@ def reset(self): # update the colors of vehicles self.update_vehicle_colors() - self.prev_last_lc = dict() + self.prev_last_lc.clear() for veh_id in self.vehicles.get_ids(): # re-initialize the vehicles class with the states of the vehicles # at the start of a rollout From b1279e02b03b280b99b1855d80261c78871ee480 Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Tue, 14 Aug 2018 15:12:48 -0700 Subject: [PATCH 4/7] bug fix to what is deemed visible (was added a '' string sometimes) --- flow/envs/loop/lane_changing.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/flow/envs/loop/lane_changing.py b/flow/envs/loop/lane_changing.py index 59ff6d7f..a5925ca7 100755 --- a/flow/envs/loop/lane_changing.py +++ b/flow/envs/loop/lane_changing.py @@ -127,6 +127,8 @@ def _apply_rl_actions(self, actions): direction[non_lane_changing_veh] = \ np.array([0] * sum(non_lane_changing_veh)) + print(direction) + self.apply_acceleration(sorted_rl_ids, acc=acceleration) self.apply_lane_change(sorted_rl_ids, direction=direction) @@ -210,14 +212,13 @@ def get_state(self): lane_headways[j] /= max_length vel_in_front[j] = self.vehicles.get_speed(lane_leader) \ / max_speed + self.visible.extend([lane_leader]) for j, lane_follower in enumerate(lane_followers): if lane_follower != '': lane_headways[j] /= max_length vel_behind[j] = self.vehicles.get_speed(lane_follower) \ / max_speed - - self.visible.extend(lane_leaders) - self.visible.extend(lane_followers) + self.visible.extend([lane_follower]) # add the headways, tailways, and speed for all lane leaders # and followers From 8e2ef241547220e0b921e5dbda516101c4dd2a91 Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Tue, 14 Aug 2018 15:45:26 -0700 Subject: [PATCH 5/7] bug fix to the value of newly departed vehicles --- flow/core/vehicles.py | 2 +- flow/envs/loop/lane_changing.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/flow/core/vehicles.py b/flow/core/vehicles.py index ee1f2dd0..55f62c76 100755 --- a/flow/core/vehicles.py +++ b/flow/core/vehicles.py @@ -437,7 +437,7 @@ def _add_departed(self, veh_id, veh_type, env): self.set_absolute_position(veh_id, 0) # set the "last_lc" parameter of the vehicle - self.set_state(veh_id, "last_lc", env.time_counter) + self.set_state(veh_id, "last_lc", -float("inf")) # specify the initial speed self.__vehicles[veh_id]["initial_speed"] = \ diff --git a/flow/envs/loop/lane_changing.py b/flow/envs/loop/lane_changing.py index a5925ca7..a6e538ca 100755 --- a/flow/envs/loop/lane_changing.py +++ b/flow/envs/loop/lane_changing.py @@ -127,8 +127,6 @@ def _apply_rl_actions(self, actions): direction[non_lane_changing_veh] = \ np.array([0] * sum(non_lane_changing_veh)) - print(direction) - self.apply_acceleration(sorted_rl_ids, acc=acceleration) self.apply_lane_change(sorted_rl_ids, direction=direction) From 62417c74b21f49ff0b569130aa2de20c65c32878 Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Tue, 14 Aug 2018 15:46:15 -0700 Subject: [PATCH 6/7] tests to ensure that all environments continue to run as they have in the past --- tests/fast_tests/test_environments.py | 383 ++++++++++++++++++++++++++ 1 file changed, 383 insertions(+) create mode 100644 tests/fast_tests/test_environments.py diff --git a/tests/fast_tests/test_environments.py b/tests/fast_tests/test_environments.py new file mode 100644 index 00000000..936496f2 --- /dev/null +++ b/tests/fast_tests/test_environments.py @@ -0,0 +1,383 @@ +import unittest +import os +import numpy as np + +from flow.core.vehicles import Vehicles +from flow.core.params import NetParams, EnvParams, SumoParams, InitialConfig +from flow.controllers import RLController, IDMController +from flow.core.rewards import desired_velocity + +from flow.scenarios import LoopScenario, CircleGenerator +from flow.scenarios.loop.loop_scenario import ADDITIONAL_NET_PARAMS \ + as LOOP_PARAMS + +from flow.envs import TestEnv, AccelEnv, LaneChangeAccelEnv, \ + LaneChangeAccelPOEnv, WaveAttenuationEnv, WaveAttenuationPOEnv +from flow.envs.loop.loop_accel import ADDITIONAL_ENV_PARAMS as ACCELENV_PARAMS +from flow.envs.loop.lane_changing import ADDITIONAL_ENV_PARAMS as LCENV_PARAMS +from flow.envs.loop.wave_attenuation import ADDITIONAL_ENV_PARAMS as WAV_PARAMS + +os.environ["TEST_FLAG"] = "True" + + +class TestTestEnv(unittest.TestCase): + + """Tests the TestEnv environment in flow/envs/test.py""" + + def setUp(self): + vehicles = Vehicles() + vehicles.add("test") + net_params = NetParams(additional_params=LOOP_PARAMS) + env_params = EnvParams() + sumo_params = SumoParams() + + scenario = LoopScenario("test_loop", + generator_class=CircleGenerator, + vehicles=vehicles, + net_params=net_params) + + self.env = TestEnv(env_params, sumo_params, scenario) + + def tearDown(self): + self.env.terminate() + self.env = None + + def test_obs_space(self): + self.assertEqual(self.env.observation_space.shape[0], 0) + self.assertEqual(len(self.env.observation_space.high), 0) + self.assertEqual(len(self.env.observation_space.low), 0) + + def test_action_space(self): + self.assertEqual(self.env.action_space.shape[0], 0) + self.assertEqual(len(self.env.action_space.high), 0) + self.assertEqual(len(self.env.action_space.low), 0) + + def test_get_state(self): + self.assertEqual(len(self.env.get_state()), 0) + + def test_compute_reward(self): + # test the default + self.assertEqual(self.env.compute_reward([], []), 0) + + # test if the "reward_fn" parameter is defined + def reward_fn(*_): + return 1 + self.env.env_params.additional_params["reward_fn"] = reward_fn + self.assertEqual(self.env.compute_reward([], []), 1) + + +class TestAccelEnv(unittest.TestCase): + + """Tests the AccelEnv environment in flow/envs/loop/loop_accel.py""" + + def setUp(self): + vehicles = Vehicles() + vehicles.add("rl", acceleration_controller=(RLController, {})) + vehicles.add("human", acceleration_controller=(IDMController, {})) + + net_params = NetParams(additional_params=LOOP_PARAMS) + env_params = EnvParams(additional_params=ACCELENV_PARAMS) + sumo_params = SumoParams() + + scenario = LoopScenario("test_loop", + generator_class=CircleGenerator, + vehicles=vehicles, + net_params=net_params) + + self.env = AccelEnv(env_params, sumo_params, scenario) + + def tearDown(self): + self.env.terminate() + self.env = None + + def test_observed_ids(self): + self.env.additional_command() + self.assertListEqual(self.env.vehicles.get_observed_ids(), + self.env.vehicles.get_human_ids()) + + def test_action_space(self): + self.assertEqual(self.env.action_space.shape[0], + self.env.vehicles.num_rl_vehicles) + self.assertEqual(self.env.action_space.high, + self.env.env_params.additional_params["max_accel"]) + self.assertEqual(self.env.action_space.low, + -self.env.env_params.additional_params["max_decel"]) + + def test_get_state(self): + expected_state = np.array([[self.env.vehicles.get_speed(veh_id) + / self.env.scenario.max_speed, + self.env.get_x_by_id(veh_id) / + self.env.scenario.length] + for veh_id in self.env.sorted_ids]) + + self.assertTrue((self.env.get_state() == expected_state).all()) + + def test_compute_reward(self): + rew = self.env.compute_reward([], [], fail=False) + self.assertEqual(rew, desired_velocity(self.env)) + + def test_apply_rl_actions(self): + self.env.step(rl_actions=[1]) + self.assertAlmostEqual(self.env.vehicles.get_speed("rl_0"), 0.1, 2) + + +class TestLaneChangeAccelEnv(unittest.TestCase): + + """Tests the LaneChangeAccelEnv env in flow/envs/loop/lane_changing.py""" + + def setUp(self): + vehicles = Vehicles() + vehicles.add("rl", acceleration_controller=(RLController, {})) + vehicles.add("human", acceleration_controller=(IDMController, {})) + + loop_params = LOOP_PARAMS.copy() + loop_params["lanes"] = 2 + net_params = NetParams(additional_params=loop_params) + env_params = EnvParams(additional_params=LCENV_PARAMS) + sumo_params = SumoParams() + initial_config = InitialConfig(lanes_distribution=1) + + scenario = LoopScenario("test_loop", + generator_class=CircleGenerator, + vehicles=vehicles, + net_params=net_params, + initial_config=initial_config) + + self.env = LaneChangeAccelEnv(env_params, sumo_params, scenario) + + def tearDown(self): + self.env.terminate() + self.env = None + + def test_observed_ids(self): + self.env.additional_command() + self.assertListEqual(self.env.vehicles.get_observed_ids(), + self.env.vehicles.get_human_ids()) + + def test_action_space(self): + self.assertEqual(self.env.action_space.shape[0], + 2 * self.env.vehicles.num_rl_vehicles) + self.assertTrue( + (self.env.action_space.high == + np.array([self.env.env_params.additional_params["max_accel"], 1])) + .all()) + self.assertTrue( + (self.env.action_space.low == + np.array([-self.env.env_params.additional_params["max_decel"], + -1])).all()) + + def test_get_state(self): + # normalizers + max_speed = self.env.scenario.max_speed + length = self.env.scenario.length + max_lanes = max(self.env.scenario.num_lanes(edge) + for edge in self.env.scenario.get_edge_list()) + + expected = np.array([[self.env.vehicles.get_speed(veh_id) / max_speed, + self.env.get_x_by_id(veh_id) / length, + self.env.vehicles.get_lane(veh_id) / max_lanes] + for veh_id in self.env.sorted_ids]) + + self.assertTrue((self.env.get_state() == expected).all()) + + def test_compute_reward(self): + rew = self.env.compute_reward([], [], fail=False) + self.assertEqual(rew, desired_velocity(self.env)) + + def test_apply_rl_actions(self): + self.env.step(rl_actions=[1, 1]) + self.assertAlmostEqual(self.env.vehicles.get_speed("rl_0"), 0.1, 2) + self.assertEqual(self.env.vehicles.get_lane("rl_0"), 1) + + +class TestLaneChangeAccelPOEnv(unittest.TestCase): + + """Tests the LaneChangeAccelPOEnv env in flow/envs/loop/lane_changing.py. + Note that some tests are skipped here because they covered by its parent + class: LaneChangeAccelEnv""" + + def setUp(self): + vehicles = Vehicles() + vehicles.add("rl", acceleration_controller=(RLController, {})) + vehicles.add("human", acceleration_controller=(IDMController, {})) + + loop_params = LOOP_PARAMS.copy() + loop_params["lanes"] = 2 + net_params = NetParams(additional_params=loop_params) + env_params = EnvParams(additional_params=LCENV_PARAMS) + sumo_params = SumoParams() + initial_config = InitialConfig(lanes_distribution=1) + + scenario = LoopScenario("test_loop", + generator_class=CircleGenerator, + vehicles=vehicles, + net_params=net_params, + initial_config=initial_config) + + self.env = LaneChangeAccelPOEnv(env_params, sumo_params, scenario) + + def tearDown(self): + self.env.terminate() + self.env = None + + def test_observed_ids(self): + self.env.step([]) + self.env.additional_command() + self.assertListEqual(self.env.vehicles.get_observed_ids(), + self.env.vehicles.get_leader( + self.env.vehicles.get_rl_ids())) + + def test_obs_space(self): + self.assertEqual(self.env.observation_space.shape[0], + 4 * self.env.vehicles.num_rl_vehicles * + self.env.num_lanes + + self.env.vehicles.num_rl_vehicles) + self.assertTrue((np.array(self.env.observation_space.high) == 1).all()) + self.assertTrue((np.array(self.env.observation_space.low) == 0).all()) + + +class TestWaveAttenuationEnv(unittest.TestCase): + + """Tests WaveAttenuationEnv in flow/envs/loop/wave_attenuation.py. Note + that, besides the reward function and the reset method, it acts in a very + similar manner as AccelEnv.""" + + def setUp(self): + vehicles = Vehicles() + vehicles.add("rl", acceleration_controller=(RLController, {})) + vehicles.add("human", acceleration_controller=(IDMController, {})) + + loop_params = LOOP_PARAMS.copy() + # picking a number outside the ring range to test the reset in a later + # portion of the class + loop_params["length"] = 2000 + net_params = NetParams(additional_params=LOOP_PARAMS) + + env_params = EnvParams(additional_params=WAV_PARAMS) + sumo_params = SumoParams() + + scenario = LoopScenario("test_loop", + generator_class=CircleGenerator, + vehicles=vehicles, + net_params=net_params) + + self.env = WaveAttenuationEnv(env_params, sumo_params, scenario) + + def tearDown(self): + self.env.terminate() + self.env = None + + def test_observed_ids(self): + self.env.additional_command() + self.assertListEqual(self.env.vehicles.get_observed_ids(), + self.env.vehicles.get_human_ids()) + + def test_action_space(self): + self.assertEqual(self.env.action_space.shape[0], + self.env.vehicles.num_rl_vehicles) + self.assertEqual(self.env.action_space.high, + self.env.env_params.additional_params["max_accel"]) + self.assertEqual(self.env.action_space.low, + -self.env.env_params.additional_params["max_decel"]) + + def test_get_state(self): + expected_state = np.array([[self.env.vehicles.get_speed(veh_id) + / self.env.scenario.max_speed, + self.env.get_x_by_id(veh_id) / + self.env.scenario.length] + for veh_id in self.env.sorted_ids]) + + self.assertTrue((self.env.get_state() == expected_state).all()) + + def test_compute_reward(self): + # explicitly copied over the reward here to make sure we never lose it + # (this is only reward that has manage to solve for the partially + # observable ring with varying lengths, at least when using policy + # gradient) + vel = np.array([self.env.vehicles.get_speed(veh_id) + for veh_id in self.env.vehicles.get_ids()]) + eta_2 = 4. + reward = eta_2 * np.mean(vel) / 20 + eta = 8 # 0.25 + rl_actions = np.array([1]) + accel_threshold = 0 + np.tanh(np.mean(np.abs(rl_actions))) + if np.mean(np.abs(rl_actions)) > accel_threshold: + reward += eta * (accel_threshold - np.mean(np.abs(rl_actions))) + expected_rew = float(reward) + + rew = self.env.compute_reward([], rl_actions=rl_actions, fail=False) + self.assertEqual(rew, expected_rew) + + def test_apply_rl_actions(self): + self.env.step(rl_actions=[1]) + self.assertAlmostEqual(self.env.vehicles.get_speed("rl_0"), 0.1, 2) + + def test_reset(self): + """Tests that the length of the ring road scenario during a reset is + set between the ring_length range. For this reason, we start with a + very large ring in this problem.""" + self.env.reset() + self.assertGreaterEqual(self.env.scenario.length, + self.env.env_params.additional_params[ + "ring_length"][0]) + self.assertLessEqual(self.env.scenario.length, + self.env.env_params.additional_params[ + "ring_length"][1]) + + +class TestWaveAttenuationPOEnv(unittest.TestCase): + + """Tests WaveAttenuationPOEnv in flow/envs/loop/wave_attenuation.py. Note + that some tests are skipped here because they covered by its parent class: + TestWaveAttenuationEnv.""" + + def setUp(self): + vehicles = Vehicles() + vehicles.add("rl", acceleration_controller=(RLController, {})) + vehicles.add("human", acceleration_controller=(IDMController, {})) + + loop_params = LOOP_PARAMS.copy() + # picking a number outside the ring range to test the reset in a later + # portion of the class + loop_params["length"] = 2000 + net_params = NetParams(additional_params=LOOP_PARAMS) + + env_params = EnvParams(additional_params=WAV_PARAMS) + sumo_params = SumoParams() + + scenario = LoopScenario("test_loop", + generator_class=CircleGenerator, + vehicles=vehicles, + net_params=net_params) + + self.env = WaveAttenuationPOEnv(env_params, sumo_params, scenario) + + def tearDown(self): + self.env.terminate() + self.env = None + + def test_observed_ids(self): + self.env.additional_command() + self.assertListEqual(self.env.vehicles.get_observed_ids(), + self.env.vehicles.get_leader( + self.env.vehicles.get_rl_ids())) + + def test_get_state(self): + rl_id = self.env.vehicles.get_rl_ids()[0] + lead_id = self.env.vehicles.get_leader(rl_id) or rl_id + max_speed = 15. + max_length = self.env.env_params.additional_params["ring_length"][1] + + expected_state = np.array([ + self.env.vehicles.get_speed(rl_id) / max_speed, + (self.env.vehicles.get_speed(lead_id) - self.env.vehicles. + get_speed(rl_id)) / max_speed, + self.env.vehicles.get_headway(rl_id) / max_length + ]) + + self.assertTrue((self.env.get_state() == expected_state).all()) + + +if __name__ == '__main__': + unittest.main() From 1ba38b76c02d9938b8d2b430053918acd0714319 Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Mon, 20 Aug 2018 18:13:55 -0700 Subject: [PATCH 7/7] PR fixes --- flow/envs/base_env.py | 3 +-- flow/envs/loop/wave_attenuation.py | 2 +- tests/fast_tests/test_environments.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/flow/envs/base_env.py b/flow/envs/base_env.py index b12d0800..582c3a97 100755 --- a/flow/envs/base_env.py +++ b/flow/envs/base_env.py @@ -126,8 +126,7 @@ def __init__(self, env_params, sumo_params, scenario): # list of sorted ids (defaults to regular list of vehicle ids if the # "sort_vehicles" attribute in env_params is set to False) - self.sorted_ids = deepcopy(self.vehicles.get_ids()) - self.sorted_extra_data = None + self.sorted_ids, self.sorted_extra_data = self.sort_by_position() # keeps track of the last time a lane change occurred before the # current time step. This is meant to ensure that lane changes by RL diff --git a/flow/envs/loop/wave_attenuation.py b/flow/envs/loop/wave_attenuation.py index ef749a04..53953081 100644 --- a/flow/envs/loop/wave_attenuation.py +++ b/flow/envs/loop/wave_attenuation.py @@ -91,7 +91,7 @@ def compute_reward(self, state, rl_actions, **kwargs): eta = 8 # 0.25 rl_actions = np.array(rl_actions) accel_threshold = 0 - np.tanh(np.mean(np.abs(rl_actions))) + if np.mean(np.abs(rl_actions)) > accel_threshold: reward += eta * (accel_threshold - np.mean(np.abs(rl_actions))) diff --git a/tests/fast_tests/test_environments.py b/tests/fast_tests/test_environments.py index 936496f2..6e6750d3 100644 --- a/tests/fast_tests/test_environments.py +++ b/tests/fast_tests/test_environments.py @@ -301,7 +301,7 @@ def test_compute_reward(self): eta = 8 # 0.25 rl_actions = np.array([1]) accel_threshold = 0 - np.tanh(np.mean(np.abs(rl_actions))) + if np.mean(np.abs(rl_actions)) > accel_threshold: reward += eta * (accel_threshold - np.mean(np.abs(rl_actions))) expected_rew = float(reward)