diff --git a/mainComputerControl/DeepRunner.py b/mainComputerControl/DeepRunner.py new file mode 100644 index 0000000..571d566 --- /dev/null +++ b/mainComputerControl/DeepRunner.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +import random +import traceback +import numpy as np +from collections import deque +from keras.models import Sequential +from keras.layers import Dense +from keras.optimizers import Adam +from keras import backend as K + +# Custom Robotic Chaser RL environment +import environment + +EPISODES = 5000 + + +class DQNAgent: + def __init__(self, state_size, action_size): + self.state_size = state_size + self.action_size = action_size + self.memory = deque(maxlen=2000) + self.gamma = 0.95 # discount rate + self.epsilon = 1.0 # exploration rate + self.epsilon_min = 0.01 + self.epsilon_decay = 0.99 + self.learning_rate = 0.001 + self.model = self._build_model() + self.target_model = self._build_model() + self.update_target_model() + + def _huber_loss(self, target, prediction): + # sqrt(1+error^2)-1 + error = prediction - target + return K.mean(K.sqrt(1+K.square(error))-1, axis=-1) + + def _build_model(self): + # Neural Net for Deep-Q learning Model + model = Sequential() + model.add(Dense(24, input_dim=self.state_size, activation='relu')) + model.add(Dense(24, activation='relu')) + model.add(Dense(self.action_size, activation='linear')) + model.compile(loss=self._huber_loss, + optimizer=Adam(lr=self.learning_rate)) + return model + + def update_target_model(self): + # copy weights from model to target_model + self.target_model.set_weights(self.model.get_weights()) + + def remember(self, state, action, reward, next_state, done): + self.memory.append((state, action, reward, next_state, done)) + + def act(self, state): + if np.random.rand() <= self.epsilon: + return random.randrange(self.action_size) + act_values = self.model.predict(state) + return np.argmax(act_values[0]) # returns action + + def replay(self, batch_size): + minibatch = random.sample(self.memory, batch_size) + for state, action, reward, next_state, done in minibatch: + target = self.model.predict(state) + if done: + target[0][action] = reward + else: + a = self.model.predict(next_state)[0] + t = self.target_model.predict(next_state)[0] + target[0][action] = reward + self.gamma * t[np.argmax(a)] + self.model.fit(state, target, epochs=1, verbose=0) + if self.epsilon > self.epsilon_min: + self.epsilon *= self.epsilon_decay + + def load(self, name): + self.model.load_weights(name) + + def save(self, name): + self.model.save_weights(name) + +# A Function Which Determines the +def getReward(next_state): + # Get the important distances + pos_runner = np.array(next_state[0:2]) + pos_center = np.array((0.5, 0.5)) + pos_chaser = np.array(next_state[3:5]) + dist_from_chaser = np.linalg.norm(pos_chaser-pos_runner) + # Perform our pseudo-Couloumbs law + # Avoid blowing up massive rewards with constant 1 in the denominator and shrink the function significantly in the X direction + # Again using distance squared, but inverting to reward greater distances rather than closer + distance_reward = 10*dist_from_chaser**2 + return (distance_reward) + +if __name__ == "__main__": + env = environment.Environment(0.01, "/dev/ttyUSB0", "/dev/ttyUSB1") # Our time step is slightly greater than the time it takes to send a single frame + state_size = 6 # 6 length array - ChaserX, ChaserY, RunnerX, RunnerY, CurrentRunnerHeading, CurrentChaserHeading + action_size = 21 # 4 actions - turn left, turn right, continue straight, do nothing + agent = DQNAgent(state_size, action_size) + done = False + batch_size = 10 + try: + env.reset([0.2, 0.2], [0.6, 0.6]) + for e in range(EPISODES): + state = env.getSystemState([0, 0, 0, 1, 1, 360]) + state = np.reshape(state, [1, state_size]) + reward = 0 + for time in range(3000): # Max time of roughly 30 seconds + action = agent.act(state) + array_state = state[0].tolist() + next_state = env.step(array_state, heading_correction=(action-11)) + array_next_state = next_state.tolist() + done = False + if env.pursuer.navigator.has_arrived() or array_state[0] >= 0.9 or array_state[1] >= 0.9 or array_state[0] <= 0.1 or array_state[1] <= 0.1: + print env.pursuer.pos, env.pursuer.target_pos + done = True + if done: + reward += -10 + else: + reward += getReward(next_state) + next_state = np.reshape(next_state, [1, state_size]) + agent.remember(state, action, reward, next_state, done) + state = next_state + if done or time == 2999: + agent.update_target_model() + env.reset([0.2, 0.2], [0.6, 0.6]) + print("episode: {}/{}, Time Survived: {}, e: {:.2}" + .format(e, EPISODES, time, agent.epsilon)) + break + if len(agent.memory) > batch_size: + agent.replay(batch_size) + agent.save("./running-agent-ddqn.h5") + except (KeyboardInterrupt, SystemExit): + for i in range(100): + env.pursuer.stop() + env.evader.stop() + env.pursuer.end() + env.evader.end() + except Exception as e: + for i in range(100): + env.pursuer.stop() + env.evader.stop() + env.pursuer.end() + env.evader.end() + traceback.print_exc() diff --git a/mainComputerControl/class_test.py b/mainComputerControl/class_test.py index a800958..4317b93 100644 --- a/mainComputerControl/class_test.py +++ b/mainComputerControl/class_test.py @@ -1,18 +1,15 @@ -class A: - def __init__(self): - self.number = 0 - -class B: - def __init__(self, instance_of_a): - self.instance_of_a = instance_of_a +from environment import Environment if __name__ == '__main__': - instance_of_a = A() - instance_of_b = B(instance_of_a) - - instance_of_b.instance_of_a.number = 5 - - if (instance_of_a.number == instance_of_b.instance_of_a.number): - print "Passed by reference" - else: - print "Passed value :(" + time_step = 0.3 + env = Environment(time_step, 'COM5', 'COM9') + + while True: + a = raw_input('ENTER to reset') + env.reset([0.8, 0.8], [0.2, 0.8]) + a = raw_input('ENTER to reset') + env.reset([0.8, 0.2], [0.8, 0.8]) + a = raw_input('ENTER to reset') + env.reset([0.2, 0.2], [0.8, 0.2]) + a = raw_input('ENTER to reset') + env.reset([0.2, 0.8], [0.2, 0.2]) diff --git a/mainComputerControl/controller.py b/mainComputerControl/controller.py index 9d3631f..bda44c8 100644 --- a/mainComputerControl/controller.py +++ b/mainComputerControl/controller.py @@ -63,7 +63,7 @@ def get_error(self): if( (self.error) < -180): self.error = (360 + self.target_heading - self.current_heading ) - print self.error + #print self.error def get_motor_input_pivot(self): # update integrator term @@ -120,7 +120,7 @@ def command_motors_pivot(self): self.xBee.send_command(2) # send motor speed - print "Pivot Throttle: ", self.motor_input_pivot + #print "Pivot Throttle: ", self.motor_input_pivot self.xBee.send_command(abs(self.motor_input_pivot)) def command_motors_forward(self): diff --git a/mainComputerControl/environment.py b/mainComputerControl/environment.py index 0e9f029..8c0f583 100644 --- a/mainComputerControl/environment.py +++ b/mainComputerControl/environment.py @@ -4,8 +4,10 @@ from rover import Rover from tracker import Tracker +import numpy as np + class Environment(): - def __init__(self, system_time_step): + def __init__(self, system_time_step, usb_pursuer, usb_evader): # system self.system_tracker = Tracker() self.timer = time.time() @@ -17,27 +19,58 @@ def __init__(self, system_time_step): self.pursuer = Rover(tracker=self.system_tracker, my_id=self.pursuer_id, target_id=self.evader_id, + forward_speed=100, + proportional_gain=2.5, time_step=self.system_time_step, - comm_port='COM5') + reversed=True, + comm_port=usb_pursuer) self.evader = Rover(tracker=self.system_tracker, my_id=self.evader_id, target_id=None, time_step=self.system_time_step, max_pivot_input=100, - forward_speed=100, + forward_speed=130, pivot_threshold=30, proportional_gain=5, integrator_gain=0, - reversed=True, - comm_port='COM9') + reversed=False, + comm_port=usb_evader) + + self.setThresh(0.18) + + def getSystemState(self, prev_state): + self.system_tracker.update() - def step(self, target_heading=None, target_position=None): + # return the system state + try: + evader_x = self.system_tracker.get_pos(self.evader_id)[0] + evader_y = self.system_tracker.get_pos(self.evader_id)[1] + evader_heading = self.system_tracker.get_heading(self.evader_id) + except: + evader_x = prev_state[0] + evader_y = prev_state[1] + evader_heading = prev_state[2] + try: + pursuer_x = self.system_tracker.get_pos(self.pursuer_id)[0] + pursuer_y = self.system_tracker.get_pos(self.pursuer_id)[1] + pursuer_heading = self.system_tracker.get_heading(self.pursuer_id) + except: + pursuer_x = prev_state[3] + pursuer_y = prev_state[4] + pursuer_heading = prev_state[5] + # Round Positions to the Centimeter and directions to full degrees + plain_state = (np.round(evader_x, 2), np.round(evader_y, 2), np.round(evader_heading), np.round(pursuer_x, 2), np.round(pursuer_y), np.round(pursuer_heading)) + system_state_array = np.array(plain_state) + return system_state_array + + + def step(self, prev_state, target_heading=None, target_position=None, heading_correction=None): # update environmate state self.system_tracker.update() - if ((target_heading==None) and (target_position == None)): - return None + if ((target_heading==None) and (target_position == None) and (heading_correction == None)): + pass # update system state elif (target_heading): @@ -49,26 +82,29 @@ def step(self, target_heading=None, target_position=None): self.pursuer.update_state() self.evader.update_state(target_pos=target_position) + elif(heading_correction != None): + self.pursuer.update_state() + self.evader.update_state(heading_correction=heading_correction) + # update actions self.pursuer.update_action() self.evader.update_action() # wait for there to be information to report and update time.sleep(self.system_time_step) - self.system_tracker.update() + system_state = self.getSystemState(prev_state) - # return the system state - evader_x = self.system_tracker.get_pos(self.evader_id)[0] - evader_y = self.system_tracker.get_pos(self.evader_id)[1] - evader_heading = self.system_tracker.get_heading(self.evader_id) - pursuer_x = self.system_tracker.get_pos(self.pursuer_id)[0] - pursuer_y = self.system_tracker.get_pos(self.pursuer_id)[1] - pursuer_heading = self.system_tracker.get_heading(self.pursuer_id) + return system_state - return (evader_x, evader_y, evader_heading, pursuer_x, pursuer_y, pursuer_heading) - def reset(self): + def setThresh(self, thresh): + self.pursuer.navigator.arrival_distance = thresh + self.evader.navigator.arrival_distance = thresh + + + def reset(self, pursuer_target, evader_target): # generate a random starting position + """ pursuer_x = random.randrange(20, 800, 1)/1000.0 pursuer_y = random.randrange(20, 800, 1)/1000.0 evader_x = random.randrange(20, 800, 1)/1000.0 @@ -76,16 +112,36 @@ def reset(self): pursuer_target = [pursuer_x, pursuer_y] evader_target = [evader_x, evader_y] + """ - # tell robots to go to their reset locations + print pursuer_target, evader_target self.pursuer.update_state(target_pos=pursuer_target) self.evader.update_state(target_pos=evader_target) - + self.setThresh(0.05) # wait until they have arrived - while ((not self.pursuer.navigator.has_arrived()) and + while ((not self.pursuer.navigator.has_arrived()) or (not self.evader.navigator.has_arrived())): + # keep moving toward the targets self.system_tracker.update() - self.pursuer.update_action() - self.evader.update_action() - time.sleep(0.001) + if self.timer >= self.system_time_step: + self.timer = time.time() + # tell robots to go to their reset locations + self.pursuer.update_state(target_pos=pursuer_target) + self.evader.update_state(target_pos=evader_target) + # stop whoever has arrived + if (self.pursuer.navigator.has_arrived()): + self.pursuer.coast() + else: + self.pursuer.update_action() + + if (self.evader.navigator.has_arrived()): + self.evader.coast() + else: + self.evader.update_action() + + + for i in range(100): + self.evader.coast() + self.pursuer.coast() + self.setThresh(0.18) diff --git a/mainComputerControl/main_chase.py b/mainComputerControl/main_chase.py index 42d7666..f269010 100644 --- a/mainComputerControl/main_chase.py +++ b/mainComputerControl/main_chase.py @@ -11,10 +11,11 @@ # tricicle -- default conotroller is for this guy pursuer = Rover(tracker=system_tracker, - my_id=0, - target_id=2, + my_id=2, + target_id=0, time_step=system_time_step, - comm_port='COM5') + reversed=True, + comm_port='COM9') while(True): try: diff --git a/mainComputerControl/navigator.py b/mainComputerControl/navigator.py index b2a8d97..25b7768 100644 --- a/mainComputerControl/navigator.py +++ b/mainComputerControl/navigator.py @@ -43,5 +43,7 @@ def has_arrived(self): if (self.my_pos and self.target_pos): if (self.distance_to_target < self.arrival_distance): + #print "Arrived" return True + #print "Not Arrived" return False diff --git a/mainComputerControl/navigator.pyc b/mainComputerControl/navigator.pyc index c08a462..5c60ebd 100644 Binary files a/mainComputerControl/navigator.pyc and b/mainComputerControl/navigator.pyc differ diff --git a/mainComputerControl/rover.py b/mainComputerControl/rover.py index ac931da..068b2cc 100644 --- a/mainComputerControl/rover.py +++ b/mainComputerControl/rover.py @@ -36,17 +36,39 @@ def __init__(self, self.target_pos = [0, 0] self.desired_heading = 0 - def update_state(self, target_pos=None, desired_heading=None): + def update_state(self, + target_pos=None, + desired_heading=None, + heading_correction=None): + # update my state parameters self.current_heading = self.tracker.get_heading(self.id) self.pos = self.tracker.get_pos(self.id) - # update my target's state parmeters + # update my target state parmeters if (target_pos): self.target_pos = target_pos + if (self.pos and self.target_pos): + self.desired_heading = self.navigator \ + .get_target_heading(self.pos, self.target_pos) + else: + self.desired_heading = None elif (desired_heading): self.desired_heading = desired_heading self.target_pos = None + elif (heading_correction): + # make sure desired_heading is not None + if (self.desired_heading == None): + self.desired_heading = self.tracker.get_heading(self.id) + + # the tracker could still return None... + if (self.desired_heading == None): + return + + # calculate the new heading! + new_heading = (self.desired_heading+heading_correction)%360 + self.desired_heading = new_heading + self.target_pos = None else: self.target_pos = self.tracker.get_pos(self.target_id) if (self.pos and self.target_pos): @@ -70,5 +92,11 @@ def update_action(self): # no new data (fiducial is not in view) self.controller.coast() - def end(self): + def stop(self): self.controller.stop() + + def coast(self): + self.controller.coast() + + def end(self): + self.controller.close() diff --git a/reacTIVision-windows-64bit/calibration.grid.bak b/reacTIVision-windows-64bit/calibration.grid.bak new file mode 100644 index 0000000..4af39ba --- /dev/null +++ b/reacTIVision-windows-64bit/calibration.grid.bak @@ -0,0 +1,80 @@ +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 diff --git a/reacTIVision-windows-64bit/camera.xml b/reacTIVision-windows-64bit/camera.xml index 130e6f6..d96b17b 100644 --- a/reacTIVision-windows-64bit/camera.xml +++ b/reacTIVision-windows-64bit/camera.xml @@ -1,8 +1,8 @@ - + - +