From 496420ca5ebb2ed7b69d1e58cfa37aff985c04a0 Mon Sep 17 00:00:00 2001
From: AboudyKreidieh <akreidieh@gmail.com>
Date: Tue, 14 Aug 2018 14:36:16 -0700
Subject: [PATCH 1/7] bug fix to TestEnv and added option for custom reward
 functions

---
 flow/envs/test.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/flow/envs/test.py b/flow/envs/test.py
index bfc8b241..54b6c6c0 100644
--- a/flow/envs/test.py
+++ b/flow/envs/test.py
@@ -29,17 +29,20 @@ class and returns a real number.
 
     @property
     def action_space(self):
-        return Box(low=0, high=0, shape=0, dtype=np.float32)
+        return Box(low=0, high=0, shape=(0,), dtype=np.float32)
 
     @property
     def observation_space(self):
-        return Box(low=0, high=0, shape=0, dtype=np.float32)
+        return Box(low=0, high=0, shape=(0,), dtype=np.float32)
 
     def _apply_rl_actions(self, rl_actions):
         return
 
     def compute_reward(self, state, rl_actions, **kwargs):
-        return 0
+        if "reward_fn" in self.env_params.additional_params:
+            return self.env_params.additional_params["reward_fn"](self)
+        else:
+            return 0
 
     def get_state(self, **kwargs):
         return np.array([])

From 5f1ff176de70903cf2411f61db5d3990c93aa45f Mon Sep 17 00:00:00 2001
From: AboudyKreidieh <akreidieh@gmail.com>
Date: Tue, 14 Aug 2018 14:36:55 -0700
Subject: [PATCH 2/7] self.sorted_ids instantiated in __init__ and cleaned some
 comments

---
 flow/envs/base_env.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/flow/envs/base_env.py b/flow/envs/base_env.py
index db7d3ecf..a29ef705 100755
--- a/flow/envs/base_env.py
+++ b/flow/envs/base_env.py
@@ -124,6 +124,11 @@ def __init__(self, env_params, sumo_params, scenario):
         if not hasattr(self.env_params, "evaluate"):
             self.env_params.evaluate = False
 
+        # list of sorted ids (defaults to regular list of vehicle ids if the
+        # "sort_vehicles" attribute in env_params is set to False)
+        self.sorted_ids = deepcopy(self.vehicles.get_ids())
+        self.sorted_extra_data = None
+
         self.start_sumo()
         self.setup_initial_state()
 
@@ -521,8 +526,7 @@ def reset(self):
 
             self.initial_state = deepcopy(initial_state)
 
-        # # clear all vehicles from the network and the vehicles class
-
+        # clear all vehicles from the network and the vehicles class
         for veh_id in self.traci_connection.vehicle.getIDList():
             try:
                 self.traci_connection.vehicle.remove(veh_id)
@@ -532,8 +536,8 @@ def reset(self):
                 print("Error during start: {}".format(traceback.format_exc()))
                 pass
 
-        # clear all vehicles from the network and the vehicles class
-        # FIXME (ev, ak) this is weird and shouldn't be necessary
+        # clear collided vehicles that were not deleted in the first round of
+        # removals (this may be a sumo bug and seems to happen stochastically)
         for veh_id in list(self.vehicles.get_ids()):
             self.vehicles.remove(veh_id)
             try:

From 994fc888006d4cbead60f23829f65b0399fa889d Mon Sep 17 00:00:00 2001
From: AboudyKreidieh <akreidieh@gmail.com>
Date: Tue, 14 Aug 2018 14:53:52 -0700
Subject: [PATCH 3/7] instantiate prev_last_lc in __init__

---
 flow/envs/base_env.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/flow/envs/base_env.py b/flow/envs/base_env.py
index a29ef705..b12d0800 100755
--- a/flow/envs/base_env.py
+++ b/flow/envs/base_env.py
@@ -129,6 +129,13 @@ def __init__(self, env_params, sumo_params, scenario):
         self.sorted_ids = deepcopy(self.vehicles.get_ids())
         self.sorted_extra_data = None
 
+        # keeps track of the last time a lane change occurred before the
+        # current time step. This is meant to ensure that lane changes by RL
+        # vehicles do not occur in quick succession.
+        self.prev_last_lc = dict()
+        for veh_id in self.vehicles.get_ids():
+            self.prev_last_lc[veh_id] = -float("inf")
+
         self.start_sumo()
         self.setup_initial_state()
 
@@ -578,7 +585,7 @@ def reset(self):
         # update the colors of vehicles
         self.update_vehicle_colors()
 
-        self.prev_last_lc = dict()
+        self.prev_last_lc.clear()
         for veh_id in self.vehicles.get_ids():
             # re-initialize the vehicles class with the states of the vehicles
             # at the start of a rollout

From b1279e02b03b280b99b1855d80261c78871ee480 Mon Sep 17 00:00:00 2001
From: AboudyKreidieh <akreidieh@gmail.com>
Date: Tue, 14 Aug 2018 15:12:48 -0700
Subject: [PATCH 4/7] bug fix to what is deemed visible (was added a '' string
 sometimes)

---
 flow/envs/loop/lane_changing.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/flow/envs/loop/lane_changing.py b/flow/envs/loop/lane_changing.py
index 59ff6d7f..a5925ca7 100755
--- a/flow/envs/loop/lane_changing.py
+++ b/flow/envs/loop/lane_changing.py
@@ -127,6 +127,8 @@ def _apply_rl_actions(self, actions):
         direction[non_lane_changing_veh] = \
             np.array([0] * sum(non_lane_changing_veh))
 
+        print(direction)
+
         self.apply_acceleration(sorted_rl_ids, acc=acceleration)
         self.apply_lane_change(sorted_rl_ids, direction=direction)
 
@@ -210,14 +212,13 @@ def get_state(self):
                     lane_headways[j] /= max_length
                     vel_in_front[j] = self.vehicles.get_speed(lane_leader) \
                         / max_speed
+                    self.visible.extend([lane_leader])
             for j, lane_follower in enumerate(lane_followers):
                 if lane_follower != '':
                     lane_headways[j] /= max_length
                     vel_behind[j] = self.vehicles.get_speed(lane_follower) \
                         / max_speed
-
-            self.visible.extend(lane_leaders)
-            self.visible.extend(lane_followers)
+                    self.visible.extend([lane_follower])
 
             # add the headways, tailways, and speed for all lane leaders
             # and followers

From 8e2ef241547220e0b921e5dbda516101c4dd2a91 Mon Sep 17 00:00:00 2001
From: AboudyKreidieh <akreidieh@gmail.com>
Date: Tue, 14 Aug 2018 15:45:26 -0700
Subject: [PATCH 5/7] bug fix to the  value of newly departed vehicles

---
 flow/core/vehicles.py           | 2 +-
 flow/envs/loop/lane_changing.py | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/flow/core/vehicles.py b/flow/core/vehicles.py
index ee1f2dd0..55f62c76 100755
--- a/flow/core/vehicles.py
+++ b/flow/core/vehicles.py
@@ -437,7 +437,7 @@ def _add_departed(self, veh_id, veh_type, env):
         self.set_absolute_position(veh_id, 0)
 
         # set the "last_lc" parameter of the vehicle
-        self.set_state(veh_id, "last_lc", env.time_counter)
+        self.set_state(veh_id, "last_lc", -float("inf"))
 
         # specify the initial speed
         self.__vehicles[veh_id]["initial_speed"] = \
diff --git a/flow/envs/loop/lane_changing.py b/flow/envs/loop/lane_changing.py
index a5925ca7..a6e538ca 100755
--- a/flow/envs/loop/lane_changing.py
+++ b/flow/envs/loop/lane_changing.py
@@ -127,8 +127,6 @@ def _apply_rl_actions(self, actions):
         direction[non_lane_changing_veh] = \
             np.array([0] * sum(non_lane_changing_veh))
 
-        print(direction)
-
         self.apply_acceleration(sorted_rl_ids, acc=acceleration)
         self.apply_lane_change(sorted_rl_ids, direction=direction)
 

From 62417c74b21f49ff0b569130aa2de20c65c32878 Mon Sep 17 00:00:00 2001
From: AboudyKreidieh <akreidieh@gmail.com>
Date: Tue, 14 Aug 2018 15:46:15 -0700
Subject: [PATCH 6/7] tests to ensure that all environments continue to run as
 they have in the past

---
 tests/fast_tests/test_environments.py | 383 ++++++++++++++++++++++++++
 1 file changed, 383 insertions(+)
 create mode 100644 tests/fast_tests/test_environments.py

diff --git a/tests/fast_tests/test_environments.py b/tests/fast_tests/test_environments.py
new file mode 100644
index 00000000..936496f2
--- /dev/null
+++ b/tests/fast_tests/test_environments.py
@@ -0,0 +1,383 @@
+import unittest
+import os
+import numpy as np
+
+from flow.core.vehicles import Vehicles
+from flow.core.params import NetParams, EnvParams, SumoParams, InitialConfig
+from flow.controllers import RLController, IDMController
+from flow.core.rewards import desired_velocity
+
+from flow.scenarios import LoopScenario, CircleGenerator
+from flow.scenarios.loop.loop_scenario import ADDITIONAL_NET_PARAMS \
+    as LOOP_PARAMS
+
+from flow.envs import TestEnv, AccelEnv, LaneChangeAccelEnv, \
+    LaneChangeAccelPOEnv, WaveAttenuationEnv, WaveAttenuationPOEnv
+from flow.envs.loop.loop_accel import ADDITIONAL_ENV_PARAMS as ACCELENV_PARAMS
+from flow.envs.loop.lane_changing import ADDITIONAL_ENV_PARAMS as LCENV_PARAMS
+from flow.envs.loop.wave_attenuation import ADDITIONAL_ENV_PARAMS as WAV_PARAMS
+
+os.environ["TEST_FLAG"] = "True"
+
+
+class TestTestEnv(unittest.TestCase):
+
+    """Tests the TestEnv environment in flow/envs/test.py"""
+
+    def setUp(self):
+        vehicles = Vehicles()
+        vehicles.add("test")
+        net_params = NetParams(additional_params=LOOP_PARAMS)
+        env_params = EnvParams()
+        sumo_params = SumoParams()
+
+        scenario = LoopScenario("test_loop",
+                                generator_class=CircleGenerator,
+                                vehicles=vehicles,
+                                net_params=net_params)
+
+        self.env = TestEnv(env_params, sumo_params, scenario)
+
+    def tearDown(self):
+        self.env.terminate()
+        self.env = None
+
+    def test_obs_space(self):
+        self.assertEqual(self.env.observation_space.shape[0], 0)
+        self.assertEqual(len(self.env.observation_space.high), 0)
+        self.assertEqual(len(self.env.observation_space.low), 0)
+
+    def test_action_space(self):
+        self.assertEqual(self.env.action_space.shape[0], 0)
+        self.assertEqual(len(self.env.action_space.high), 0)
+        self.assertEqual(len(self.env.action_space.low), 0)
+
+    def test_get_state(self):
+        self.assertEqual(len(self.env.get_state()), 0)
+
+    def test_compute_reward(self):
+        # test the default
+        self.assertEqual(self.env.compute_reward([], []), 0)
+
+        # test if the "reward_fn" parameter is defined
+        def reward_fn(*_):
+            return 1
+        self.env.env_params.additional_params["reward_fn"] = reward_fn
+        self.assertEqual(self.env.compute_reward([], []), 1)
+
+
+class TestAccelEnv(unittest.TestCase):
+
+    """Tests the AccelEnv environment in flow/envs/loop/loop_accel.py"""
+
+    def setUp(self):
+        vehicles = Vehicles()
+        vehicles.add("rl", acceleration_controller=(RLController, {}))
+        vehicles.add("human", acceleration_controller=(IDMController, {}))
+
+        net_params = NetParams(additional_params=LOOP_PARAMS)
+        env_params = EnvParams(additional_params=ACCELENV_PARAMS)
+        sumo_params = SumoParams()
+
+        scenario = LoopScenario("test_loop",
+                                generator_class=CircleGenerator,
+                                vehicles=vehicles,
+                                net_params=net_params)
+
+        self.env = AccelEnv(env_params, sumo_params, scenario)
+
+    def tearDown(self):
+        self.env.terminate()
+        self.env = None
+
+    def test_observed_ids(self):
+        self.env.additional_command()
+        self.assertListEqual(self.env.vehicles.get_observed_ids(),
+                             self.env.vehicles.get_human_ids())
+
+    def test_action_space(self):
+        self.assertEqual(self.env.action_space.shape[0],
+                         self.env.vehicles.num_rl_vehicles)
+        self.assertEqual(self.env.action_space.high,
+                         self.env.env_params.additional_params["max_accel"])
+        self.assertEqual(self.env.action_space.low,
+                         -self.env.env_params.additional_params["max_decel"])
+
+    def test_get_state(self):
+        expected_state = np.array([[self.env.vehicles.get_speed(veh_id)
+                                    / self.env.scenario.max_speed,
+                                    self.env.get_x_by_id(veh_id) /
+                                    self.env.scenario.length]
+                                   for veh_id in self.env.sorted_ids])
+
+        self.assertTrue((self.env.get_state() == expected_state).all())
+
+    def test_compute_reward(self):
+        rew = self.env.compute_reward([], [], fail=False)
+        self.assertEqual(rew, desired_velocity(self.env))
+
+    def test_apply_rl_actions(self):
+        self.env.step(rl_actions=[1])
+        self.assertAlmostEqual(self.env.vehicles.get_speed("rl_0"), 0.1, 2)
+
+
+class TestLaneChangeAccelEnv(unittest.TestCase):
+
+    """Tests the LaneChangeAccelEnv env in flow/envs/loop/lane_changing.py"""
+
+    def setUp(self):
+        vehicles = Vehicles()
+        vehicles.add("rl", acceleration_controller=(RLController, {}))
+        vehicles.add("human", acceleration_controller=(IDMController, {}))
+
+        loop_params = LOOP_PARAMS.copy()
+        loop_params["lanes"] = 2
+        net_params = NetParams(additional_params=loop_params)
+        env_params = EnvParams(additional_params=LCENV_PARAMS)
+        sumo_params = SumoParams()
+        initial_config = InitialConfig(lanes_distribution=1)
+
+        scenario = LoopScenario("test_loop",
+                                generator_class=CircleGenerator,
+                                vehicles=vehicles,
+                                net_params=net_params,
+                                initial_config=initial_config)
+
+        self.env = LaneChangeAccelEnv(env_params, sumo_params, scenario)
+
+    def tearDown(self):
+        self.env.terminate()
+        self.env = None
+
+    def test_observed_ids(self):
+        self.env.additional_command()
+        self.assertListEqual(self.env.vehicles.get_observed_ids(),
+                             self.env.vehicles.get_human_ids())
+
+    def test_action_space(self):
+        self.assertEqual(self.env.action_space.shape[0],
+                         2 * self.env.vehicles.num_rl_vehicles)
+        self.assertTrue(
+            (self.env.action_space.high ==
+             np.array([self.env.env_params.additional_params["max_accel"], 1]))
+            .all())
+        self.assertTrue(
+            (self.env.action_space.low ==
+             np.array([-self.env.env_params.additional_params["max_decel"],
+                       -1])).all())
+
+    def test_get_state(self):
+        # normalizers
+        max_speed = self.env.scenario.max_speed
+        length = self.env.scenario.length
+        max_lanes = max(self.env.scenario.num_lanes(edge)
+                        for edge in self.env.scenario.get_edge_list())
+
+        expected = np.array([[self.env.vehicles.get_speed(veh_id) / max_speed,
+                              self.env.get_x_by_id(veh_id) / length,
+                              self.env.vehicles.get_lane(veh_id) / max_lanes]
+                             for veh_id in self.env.sorted_ids])
+
+        self.assertTrue((self.env.get_state() == expected).all())
+
+    def test_compute_reward(self):
+        rew = self.env.compute_reward([], [], fail=False)
+        self.assertEqual(rew, desired_velocity(self.env))
+
+    def test_apply_rl_actions(self):
+        self.env.step(rl_actions=[1, 1])
+        self.assertAlmostEqual(self.env.vehicles.get_speed("rl_0"), 0.1, 2)
+        self.assertEqual(self.env.vehicles.get_lane("rl_0"), 1)
+
+
+class TestLaneChangeAccelPOEnv(unittest.TestCase):
+
+    """Tests the LaneChangeAccelPOEnv env in flow/envs/loop/lane_changing.py.
+    Note that some tests are skipped here because they covered by its parent
+    class: LaneChangeAccelEnv"""
+
+    def setUp(self):
+        vehicles = Vehicles()
+        vehicles.add("rl", acceleration_controller=(RLController, {}))
+        vehicles.add("human", acceleration_controller=(IDMController, {}))
+
+        loop_params = LOOP_PARAMS.copy()
+        loop_params["lanes"] = 2
+        net_params = NetParams(additional_params=loop_params)
+        env_params = EnvParams(additional_params=LCENV_PARAMS)
+        sumo_params = SumoParams()
+        initial_config = InitialConfig(lanes_distribution=1)
+
+        scenario = LoopScenario("test_loop",
+                                generator_class=CircleGenerator,
+                                vehicles=vehicles,
+                                net_params=net_params,
+                                initial_config=initial_config)
+
+        self.env = LaneChangeAccelPOEnv(env_params, sumo_params, scenario)
+
+    def tearDown(self):
+        self.env.terminate()
+        self.env = None
+
+    def test_observed_ids(self):
+        self.env.step([])
+        self.env.additional_command()
+        self.assertListEqual(self.env.vehicles.get_observed_ids(),
+                             self.env.vehicles.get_leader(
+                                 self.env.vehicles.get_rl_ids()))
+
+    def test_obs_space(self):
+        self.assertEqual(self.env.observation_space.shape[0],
+                         4 * self.env.vehicles.num_rl_vehicles *
+                         self.env.num_lanes +
+                         self.env.vehicles.num_rl_vehicles)
+        self.assertTrue((np.array(self.env.observation_space.high) == 1).all())
+        self.assertTrue((np.array(self.env.observation_space.low) == 0).all())
+
+
+class TestWaveAttenuationEnv(unittest.TestCase):
+
+    """Tests WaveAttenuationEnv in flow/envs/loop/wave_attenuation.py. Note
+    that, besides the reward function and the reset method, it acts in a very
+    similar manner as AccelEnv."""
+
+    def setUp(self):
+        vehicles = Vehicles()
+        vehicles.add("rl", acceleration_controller=(RLController, {}))
+        vehicles.add("human", acceleration_controller=(IDMController, {}))
+
+        loop_params = LOOP_PARAMS.copy()
+        # picking a number outside the ring range to test the reset in a later
+        # portion of the class
+        loop_params["length"] = 2000
+        net_params = NetParams(additional_params=LOOP_PARAMS)
+
+        env_params = EnvParams(additional_params=WAV_PARAMS)
+        sumo_params = SumoParams()
+
+        scenario = LoopScenario("test_loop",
+                                generator_class=CircleGenerator,
+                                vehicles=vehicles,
+                                net_params=net_params)
+
+        self.env = WaveAttenuationEnv(env_params, sumo_params, scenario)
+
+    def tearDown(self):
+        self.env.terminate()
+        self.env = None
+
+    def test_observed_ids(self):
+        self.env.additional_command()
+        self.assertListEqual(self.env.vehicles.get_observed_ids(),
+                             self.env.vehicles.get_human_ids())
+
+    def test_action_space(self):
+        self.assertEqual(self.env.action_space.shape[0],
+                         self.env.vehicles.num_rl_vehicles)
+        self.assertEqual(self.env.action_space.high,
+                         self.env.env_params.additional_params["max_accel"])
+        self.assertEqual(self.env.action_space.low,
+                         -self.env.env_params.additional_params["max_decel"])
+
+    def test_get_state(self):
+        expected_state = np.array([[self.env.vehicles.get_speed(veh_id)
+                                    / self.env.scenario.max_speed,
+                                    self.env.get_x_by_id(veh_id) /
+                                    self.env.scenario.length]
+                                   for veh_id in self.env.sorted_ids])
+
+        self.assertTrue((self.env.get_state() == expected_state).all())
+
+    def test_compute_reward(self):
+        # explicitly copied over the reward here to make sure we never lose it
+        # (this is only reward that has manage to solve for the partially
+        # observable ring with varying lengths, at least when using policy
+        # gradient)
+        vel = np.array([self.env.vehicles.get_speed(veh_id)
+                        for veh_id in self.env.vehicles.get_ids()])
+        eta_2 = 4.
+        reward = eta_2 * np.mean(vel) / 20
+        eta = 8  # 0.25
+        rl_actions = np.array([1])
+        accel_threshold = 0
+        np.tanh(np.mean(np.abs(rl_actions)))
+        if np.mean(np.abs(rl_actions)) > accel_threshold:
+            reward += eta * (accel_threshold - np.mean(np.abs(rl_actions)))
+        expected_rew = float(reward)
+
+        rew = self.env.compute_reward([], rl_actions=rl_actions, fail=False)
+        self.assertEqual(rew, expected_rew)
+
+    def test_apply_rl_actions(self):
+        self.env.step(rl_actions=[1])
+        self.assertAlmostEqual(self.env.vehicles.get_speed("rl_0"), 0.1, 2)
+
+    def test_reset(self):
+        """Tests that the length of the ring road scenario during a reset is
+        set between the ring_length range. For this reason, we start with a
+        very large ring in this problem."""
+        self.env.reset()
+        self.assertGreaterEqual(self.env.scenario.length,
+                                self.env.env_params.additional_params[
+                                    "ring_length"][0])
+        self.assertLessEqual(self.env.scenario.length,
+                             self.env.env_params.additional_params[
+                                 "ring_length"][1])
+
+
+class TestWaveAttenuationPOEnv(unittest.TestCase):
+
+    """Tests WaveAttenuationPOEnv in flow/envs/loop/wave_attenuation.py. Note
+    that some tests are skipped here because they covered by its parent class:
+    TestWaveAttenuationEnv."""
+
+    def setUp(self):
+        vehicles = Vehicles()
+        vehicles.add("rl", acceleration_controller=(RLController, {}))
+        vehicles.add("human", acceleration_controller=(IDMController, {}))
+
+        loop_params = LOOP_PARAMS.copy()
+        # picking a number outside the ring range to test the reset in a later
+        # portion of the class
+        loop_params["length"] = 2000
+        net_params = NetParams(additional_params=LOOP_PARAMS)
+
+        env_params = EnvParams(additional_params=WAV_PARAMS)
+        sumo_params = SumoParams()
+
+        scenario = LoopScenario("test_loop",
+                                generator_class=CircleGenerator,
+                                vehicles=vehicles,
+                                net_params=net_params)
+
+        self.env = WaveAttenuationPOEnv(env_params, sumo_params, scenario)
+
+    def tearDown(self):
+        self.env.terminate()
+        self.env = None
+
+    def test_observed_ids(self):
+        self.env.additional_command()
+        self.assertListEqual(self.env.vehicles.get_observed_ids(),
+                             self.env.vehicles.get_leader(
+                                 self.env.vehicles.get_rl_ids()))
+
+    def test_get_state(self):
+        rl_id = self.env.vehicles.get_rl_ids()[0]
+        lead_id = self.env.vehicles.get_leader(rl_id) or rl_id
+        max_speed = 15.
+        max_length = self.env.env_params.additional_params["ring_length"][1]
+
+        expected_state = np.array([
+            self.env.vehicles.get_speed(rl_id) / max_speed,
+            (self.env.vehicles.get_speed(lead_id) - self.env.vehicles.
+             get_speed(rl_id)) / max_speed,
+            self.env.vehicles.get_headway(rl_id) / max_length
+        ])
+
+        self.assertTrue((self.env.get_state() == expected_state).all())
+
+
+if __name__ == '__main__':
+    unittest.main()

From 1ba38b76c02d9938b8d2b430053918acd0714319 Mon Sep 17 00:00:00 2001
From: AboudyKreidieh <akreidieh@gmail.com>
Date: Mon, 20 Aug 2018 18:13:55 -0700
Subject: [PATCH 7/7] PR fixes

---
 flow/envs/base_env.py                 | 3 +--
 flow/envs/loop/wave_attenuation.py    | 2 +-
 tests/fast_tests/test_environments.py | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/flow/envs/base_env.py b/flow/envs/base_env.py
index b12d0800..582c3a97 100755
--- a/flow/envs/base_env.py
+++ b/flow/envs/base_env.py
@@ -126,8 +126,7 @@ def __init__(self, env_params, sumo_params, scenario):
 
         # list of sorted ids (defaults to regular list of vehicle ids if the
         # "sort_vehicles" attribute in env_params is set to False)
-        self.sorted_ids = deepcopy(self.vehicles.get_ids())
-        self.sorted_extra_data = None
+        self.sorted_ids, self.sorted_extra_data = self.sort_by_position()
 
         # keeps track of the last time a lane change occurred before the
         # current time step. This is meant to ensure that lane changes by RL
diff --git a/flow/envs/loop/wave_attenuation.py b/flow/envs/loop/wave_attenuation.py
index ef749a04..53953081 100644
--- a/flow/envs/loop/wave_attenuation.py
+++ b/flow/envs/loop/wave_attenuation.py
@@ -91,7 +91,7 @@ def compute_reward(self, state, rl_actions, **kwargs):
         eta = 8  # 0.25
         rl_actions = np.array(rl_actions)
         accel_threshold = 0
-        np.tanh(np.mean(np.abs(rl_actions)))
+
         if np.mean(np.abs(rl_actions)) > accel_threshold:
             reward += eta * (accel_threshold - np.mean(np.abs(rl_actions)))
 
diff --git a/tests/fast_tests/test_environments.py b/tests/fast_tests/test_environments.py
index 936496f2..6e6750d3 100644
--- a/tests/fast_tests/test_environments.py
+++ b/tests/fast_tests/test_environments.py
@@ -301,7 +301,7 @@ def test_compute_reward(self):
         eta = 8  # 0.25
         rl_actions = np.array([1])
         accel_threshold = 0
-        np.tanh(np.mean(np.abs(rl_actions)))
+
         if np.mean(np.abs(rl_actions)) > accel_threshold:
             reward += eta * (accel_threshold - np.mean(np.abs(rl_actions)))
         expected_rew = float(reward)