berkeleyflow · AboudyKreidieh · Aug 14, 2018 · Aug 14, 2018 · Aug 14, 2018 · Aug 14, 2018
@@ -436,7 +436,7 @@ def _add_departed(self, veh_id, veh_type, env):
         self.set_absolute_position(veh_id, 0)
 
         # set the "last_lc" parameter of the vehicle
-        self.set_state(veh_id, "last_lc", env.time_counter)
+        self.set_state(veh_id, "last_lc", -float("inf"))
 
         # specify the initial speed
         self.__vehicles[veh_id]["initial_speed"] = \

@@ -125,6 +125,17 @@ def __init__(self, env_params, sumo_params, scenario):
         if not hasattr(self.env_params, "evaluate"):
             self.env_params.evaluate = False
 
+        # list of sorted ids (defaults to regular list of vehicle ids if the
+        # "sort_vehicles" attribute in env_params is set to False)
+        self.sorted_ids, self.sorted_extra_data = self.sort_by_position()
+
+        # keeps track of the last time a lane change occurred before the
+        # current time step. This is meant to ensure that lane changes by RL
+        # vehicles do not occur in quick succession.
+        self.prev_last_lc = dict()
+        for veh_id in self.vehicles.get_ids():
+            self.prev_last_lc[veh_id] = -float("inf")
+
         self.start_sumo()
         self.setup_initial_state()
 
@@ -523,8 +534,7 @@ def reset(self):
 
             self.initial_state = deepcopy(initial_state)
 
-        # # clear all vehicles from the network and the vehicles class
-
+        # clear all vehicles from the network and the vehicles class
         for veh_id in self.traci_connection.vehicle.getIDList():
             try:
                 self.traci_connection.vehicle.remove(veh_id)
@@ -534,8 +544,8 @@ def reset(self):
                 print("Error during start: {}".format(traceback.format_exc()))
                 pass
 
-        # clear all vehicles from the network and the vehicles class
-        # FIXME (ev, ak) this is weird and shouldn't be necessary
+        # clear collided vehicles that were not deleted in the first round of
+        # removals (this may be a sumo bug and seems to happen stochastically)
         for veh_id in list(self.vehicles.get_ids()):
             self.vehicles.remove(veh_id)
             try:
@@ -577,7 +587,7 @@ def reset(self):
         # update the colors of vehicles
         self.update_vehicle_colors()
 
-        self.prev_last_lc = dict()
+        self.prev_last_lc.clear()
         for veh_id in self.vehicles.get_ids():
             # re-initialize the vehicles class with the states of the vehicles
             # at the start of a rollout

@@ -210,14 +210,13 @@ def get_state(self):
                     lane_headways[j] /= max_length
                     vel_in_front[j] = self.vehicles.get_speed(lane_leader) \
                         / max_speed
+                    self.visible.extend([lane_leader])
             for j, lane_follower in enumerate(lane_followers):
                 if lane_follower != '':
                     lane_headways[j] /= max_length
                     vel_behind[j] = self.vehicles.get_speed(lane_follower) \
                         / max_speed
-
-            self.visible.extend(lane_leaders)
-            self.visible.extend(lane_followers)
+                    self.visible.extend([lane_follower])
 
             # add the headways, tailways, and speed for all lane leaders
             # and followers

@@ -91,7 +91,7 @@ def compute_reward(self, state, rl_actions, **kwargs):
         eta = 8  # 0.25
         rl_actions = np.array(rl_actions)
         accel_threshold = 0
-        np.tanh(np.mean(np.abs(rl_actions)))
+
         if np.mean(np.abs(rl_actions)) > accel_threshold:
             reward += eta * (accel_threshold - np.mean(np.abs(rl_actions)))
 

@@ -29,17 +29,20 @@ class and returns a real number.
 
     @property
     def action_space(self):
-        return Box(low=0, high=0, shape=0, dtype=np.float32)
+        return Box(low=0, high=0, shape=(0,), dtype=np.float32)
 
     @property
     def observation_space(self):
-        return Box(low=0, high=0, shape=0, dtype=np.float32)
+        return Box(low=0, high=0, shape=(0,), dtype=np.float32)
 
     def _apply_rl_actions(self, rl_actions):
         return
 
     def compute_reward(self, state, rl_actions, **kwargs):
-        return 0
+        if "reward_fn" in self.env_params.additional_params:
+            return self.env_params.additional_params["reward_fn"](self)
+        else:
+            return 0
 
     def get_state(self, **kwargs):
         return np.array([])