From ef437d6b02e747d0e06d5e8d7e393f6e7041457b Mon Sep 17 00:00:00 2001 From: Helw150 Date: Mon, 30 Apr 2018 04:11:01 +0400 Subject: [PATCH 01/12] Deep Runner for testing @dbcarelli --- mainComputerControl/DeepRunner.py | 130 ++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 mainComputerControl/DeepRunner.py diff --git a/mainComputerControl/DeepRunner.py b/mainComputerControl/DeepRunner.py new file mode 100644 index 0000000..6e3fa1d --- /dev/null +++ b/mainComputerControl/DeepRunner.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +import random +import numpy as np +from collections import deque +from keras.models import Sequential +from keras.layers import Dense +from keras.optimizers import Adam +from keras import backend as K + +# Custom Robotic Chaser RL environment +import environment + +EPISODES = 5000 + + +class DQNAgent: + def __init__(self, state_size, action_size): + self.state_size = state_size + self.action_size = action_size + self.memory = deque(maxlen=2000) + self.gamma = 0.95 # discount rate + self.epsilon = 1.0 # exploration rate + self.epsilon_min = 0.01 + self.epsilon_decay = 0.99 + self.learning_rate = 0.001 + self.model = self._build_model() + self.target_model = self._build_model() + self.update_target_model() + + def _huber_loss(self, target, prediction): + # sqrt(1+error^2)-1 + error = prediction - target + return K.mean(K.sqrt(1+K.square(error))-1, axis=-1) + + def _build_model(self): + # Neural Net for Deep-Q learning Model + model = Sequential() + model.add(Dense(24, input_dim=self.state_size, activation='relu')) + model.add(Dense(24, activation='relu')) + model.add(Dense(self.action_size, activation='linear')) + model.compile(loss=self._huber_loss, + optimizer=Adam(lr=self.learning_rate)) + return model + + def update_target_model(self): + # copy weights from model to target_model + self.target_model.set_weights(self.model.get_weights()) + + def remember(self, state, action, reward, next_state, done): + self.memory.append((state, action, reward, next_state, done)) + + def act(self, state): + if np.random.rand() <= self.epsilon: + return random.randrange(self.action_size) + act_values = self.model.predict(state) + return np.argmax(act_values[0]) # returns action + + def replay(self, batch_size): + minibatch = random.sample(self.memory, batch_size) + for state, action, reward, next_state, done in minibatch: + target = self.model.predict(state) + if done: + target[0][action] = reward + else: + a = self.model.predict(next_state)[0] + t = self.target_model.predict(next_state)[0] + target[0][action] = reward + self.gamma * t[np.argmax(a)] + self.model.fit(state, target, epochs=1, verbose=0) + if self.epsilon > self.epsilon_min: + self.epsilon *= self.epsilon_decay + + def load(self, name): + self.model.load_weights(name) + + def save(self, name): + self.model.save_weights(name) + +# A Function Which Determines the +def getReward(next_state): + # Get the important distances + pos_runner = np.array(next_state[0:2]) + pos_center = np.array((0.5, 0.5)) + pos_chaser = np.array(next_state[3:5]) + dist_from_center = np.linalg.norm(pos_center-pos_runner) + dist_from_chaser = np.linalg.norm(pos_chaser-pos_runner) + # Perform our pseudo-Couloumbs law + # Avoid blowing up massive rewards with constant 1 in the denominator and shrink the function significantly in the X direction + central_reward = 2/(1+(dist_from_center*50)**2) + # Again using distance squared, but inverting to reward greater distances rather than closer + distance_reward = 10*dist_from_chaser**2 + return central_reward + distance_reward + +if __name__ == "__main__": + env = environment.Environment(0.04) # Our time step is slightly greater than the time it takes to send a single frame + state_size = 6 # 6 length array - ChaserX, ChaserY, RunnerX, RunnerY, CurrentRunnerHeading, CurrentChaserHeading + action_size = 4 # 4 actions - turn left, turn right, continue straight, do nothing + agent = DQNAgent(state_size, action_size) + done = False + batch_size = 32 + + for e in range(EPISODES): + state = env.reset() + state = np.reshape(state, [1, state_size]) + for time in range(900): # Max time of roughly 30 seconds + direct_action = agent.act(state) + current_heading_in_360 = state[2] + 180 + if direct_action != 3: # 3 action is do nothing + if direct_action != 2: # 2 is subtract from heading + # Either go straight or right + action = (current_heading_in_360 + (5*direct_action)) % 360 + else: + # Go Left + action = (current_heading_in_360 - 5) % 360 + else: + # Do Nothing + action = None + next_state = env.step(target_heading=action) + done = bool(next_state[0] == next_state[3] and next_state[1] == next_state[4]) + reward = getReward(next_state) if not done else -10 + next_state = np.reshape(next_state, [1, state_size]) + agent.remember(state, action, reward, next_state, done) + state = next_state + if done: + agent.update_target_model() + print("episode: {}/{}, score: {}, e: {:.2}" + .format(e, EPISODES, time, agent.epsilon)) + break + if len(agent.memory) > batch_size: + agent.replay(batch_size) + #agent.save("./running-agent-ddqn.h5") From f1458277c705f75027bd2c2fe3ec152e64bb7ef1 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Mon, 30 Apr 2018 04:19:42 +0400 Subject: [PATCH 02/12] Fixing a few logical errors --- mainComputerControl/DeepRunner.py | 17 +++++++++-------- mainComputerControl/environment.py | 29 +++++++++++++++++------------ 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/mainComputerControl/DeepRunner.py b/mainComputerControl/DeepRunner.py index 6e3fa1d..f83daa3 100644 --- a/mainComputerControl/DeepRunner.py +++ b/mainComputerControl/DeepRunner.py @@ -99,22 +99,23 @@ def getReward(next_state): batch_size = 32 for e in range(EPISODES): - state = env.reset() + env.reset() + state = env.getSystemState() state = np.reshape(state, [1, state_size]) for time in range(900): # Max time of roughly 30 seconds - direct_action = agent.act(state) + action = agent.act(state) current_heading_in_360 = state[2] + 180 - if direct_action != 3: # 3 action is do nothing - if direct_action != 2: # 2 is subtract from heading + if action != 3: # 3 action is do nothing + if action != 2: # 2 is subtract from heading # Either go straight or right - action = (current_heading_in_360 + (5*direct_action)) % 360 + direct_action = (current_heading_in_360 + (5*action)) % 360 else: # Go Left - action = (current_heading_in_360 - 5) % 360 + direct_action = (current_heading_in_360 - 5) % 360 else: # Do Nothing - action = None - next_state = env.step(target_heading=action) + direct_action = None + next_state = env.step(target_heading=direct_action) done = bool(next_state[0] == next_state[3] and next_state[1] == next_state[4]) reward = getReward(next_state) if not done else -10 next_state = np.reshape(next_state, [1, state_size]) diff --git a/mainComputerControl/environment.py b/mainComputerControl/environment.py index 0e9f029..c388b3a 100644 --- a/mainComputerControl/environment.py +++ b/mainComputerControl/environment.py @@ -32,6 +32,20 @@ def __init__(self, system_time_step): reversed=True, comm_port='COM9') + def getSystemState(self): + self.system_tracker.update() + + # return the system state + evader_x = self.system_tracker.get_pos(self.evader_id)[0] + evader_y = self.system_tracker.get_pos(self.evader_id)[1] + evader_heading = self.system_tracker.get_heading(self.evader_id) + pursuer_x = self.system_tracker.get_pos(self.pursuer_id)[0] + pursuer_y = self.system_tracker.get_pos(self.pursuer_id)[1] + pursuer_heading = self.system_tracker.get_heading(self.pursuer_id) + + return (evader_x, evader_y, evader_heading, pursuer_x, pursuer_y, pursuer_heading) + + def step(self, target_heading=None, target_position=None): # update environmate state self.system_tracker.update() @@ -55,18 +69,9 @@ def step(self, target_heading=None, target_position=None): # wait for there to be information to report and update time.sleep(self.system_time_step) - self.system_tracker.update() - - # return the system state - evader_x = self.system_tracker.get_pos(self.evader_id)[0] - evader_y = self.system_tracker.get_pos(self.evader_id)[1] - evader_heading = self.system_tracker.get_heading(self.evader_id) - pursuer_x = self.system_tracker.get_pos(self.pursuer_id)[0] - pursuer_y = self.system_tracker.get_pos(self.pursuer_id)[1] - pursuer_heading = self.system_tracker.get_heading(self.pursuer_id) - - return (evader_x, evader_y, evader_heading, pursuer_x, pursuer_y, pursuer_heading) - + return getSystemState() + + def reset(self): # generate a random starting position pursuer_x = random.randrange(20, 800, 1)/1000.0 From 81f3102ffdcbd21a23a68d31e508e82bf98dc0f0 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Mon, 30 Apr 2018 04:28:18 +0400 Subject: [PATCH 03/12] Make our real data more discrete and numpy friendly --- mainComputerControl/environment.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mainComputerControl/environment.py b/mainComputerControl/environment.py index c388b3a..053ac9b 100644 --- a/mainComputerControl/environment.py +++ b/mainComputerControl/environment.py @@ -4,6 +4,8 @@ from rover import Rover from tracker import Tracker +import numpy as np + class Environment(): def __init__(self, system_time_step): # system @@ -43,7 +45,9 @@ def getSystemState(self): pursuer_y = self.system_tracker.get_pos(self.pursuer_id)[1] pursuer_heading = self.system_tracker.get_heading(self.pursuer_id) - return (evader_x, evader_y, evader_heading, pursuer_x, pursuer_y, pursuer_heading) + # Round Positions to the Centimeter and directions to full degrees + system_state_array = np.array((np.round(evader_x, 2), np.round(evader_y, 2), np.round(evader_heading), np.round(pursuer_x, 2), np.round(pursuer_y), np.round(pursuer_heading))) + return system_state_array def step(self, target_heading=None, target_position=None): From f625bf6c65001cac4b853bd83e863edcc23202a1 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Mon, 30 Apr 2018 04:37:23 +0400 Subject: [PATCH 04/12] Moved Reset so that the simulation would fully stop at the end of each round This will hopefully minimize non-recored motor use for our real world setting --- mainComputerControl/DeepRunner.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mainComputerControl/DeepRunner.py b/mainComputerControl/DeepRunner.py index f83daa3..0e5ea44 100644 --- a/mainComputerControl/DeepRunner.py +++ b/mainComputerControl/DeepRunner.py @@ -99,10 +99,9 @@ def getReward(next_state): batch_size = 32 for e in range(EPISODES): - env.reset() state = env.getSystemState() state = np.reshape(state, [1, state_size]) - for time in range(900): # Max time of roughly 30 seconds + for time in range(1000): # Max time of roughly 30 seconds action = agent.act(state) current_heading_in_360 = state[2] + 180 if action != 3: # 3 action is do nothing @@ -121,9 +120,10 @@ def getReward(next_state): next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action, reward, next_state, done) state = next_state - if done: + if done or time == 999: + env.reset() agent.update_target_model() - print("episode: {}/{}, score: {}, e: {:.2}" + print("episode: {}/{}, Time Survived: {}, e: {:.2}" .format(e, EPISODES, time, agent.epsilon)) break if len(agent.memory) > batch_size: From a3247248679ed93602ecae40614a4e75a9dd854b Mon Sep 17 00:00:00 2001 From: Helw150 Date: Mon, 30 Apr 2018 12:12:40 +0400 Subject: [PATCH 05/12] Working with Linux and Beginnings of RL --- mainComputerControl/DeepRunner.py | 74 ++++++++++++++++++------------ mainComputerControl/environment.py | 42 +++++++++++------ mainComputerControl/main_chase.py | 3 +- 3 files changed, 74 insertions(+), 45 deletions(-) diff --git a/mainComputerControl/DeepRunner.py b/mainComputerControl/DeepRunner.py index 0e5ea44..1ac3755 100644 --- a/mainComputerControl/DeepRunner.py +++ b/mainComputerControl/DeepRunner.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- import random +import traceback import numpy as np from collections import deque from keras.models import Sequential @@ -99,33 +100,48 @@ def getReward(next_state): batch_size = 32 for e in range(EPISODES): - state = env.getSystemState() - state = np.reshape(state, [1, state_size]) - for time in range(1000): # Max time of roughly 30 seconds - action = agent.act(state) - current_heading_in_360 = state[2] + 180 - if action != 3: # 3 action is do nothing - if action != 2: # 2 is subtract from heading - # Either go straight or right - direct_action = (current_heading_in_360 + (5*action)) % 360 + try: + state = env.getSystemState([0, 0, 0, 1, 1, 360]) + state = np.reshape(state, [1, state_size]) + for time in range(1000): # Max time of roughly 30 seconds + action = agent.act(state) + array_state = state[0].tolist() + current_heading_in_360 = array_state[2] + if action != 3: # 3 action is do nothing + if action != 2: # 2 is subtract from heading + # Either go straight or right + direct_action = (current_heading_in_360 + (5*action)) % 360 + else: + # Go Left + direct_action = (current_heading_in_360 - 5) % 360 else: - # Go Left - direct_action = (current_heading_in_360 - 5) % 360 - else: - # Do Nothing - direct_action = None - next_state = env.step(target_heading=direct_action) - done = bool(next_state[0] == next_state[3] and next_state[1] == next_state[4]) - reward = getReward(next_state) if not done else -10 - next_state = np.reshape(next_state, [1, state_size]) - agent.remember(state, action, reward, next_state, done) - state = next_state - if done or time == 999: - env.reset() - agent.update_target_model() - print("episode: {}/{}, Time Survived: {}, e: {:.2}" - .format(e, EPISODES, time, agent.epsilon)) - break - if len(agent.memory) > batch_size: - agent.replay(batch_size) - #agent.save("./running-agent-ddqn.h5") + # Do Nothing + direct_action = None + next_state = env.step(array_state, target_heading=direct_action) + array_next_state = next_state.tolist() + done = False + if next_state[0] == next_state[3] and next_state[1] == next_state[4]: + done = True + reward = getReward(next_state) if not done else -10 + next_state = np.reshape(next_state, [1, state_size]) + agent.remember(state, action, reward, next_state, done) + state = next_state + if done or time == 999: + agent.update_target_model() + env.reset() + state = env.getSystemState() + print("episode: {}/{}, Time Survived: {}, e: {:.2}" + .format(e, EPISODES, time, agent.epsilon)) + break + if len(agent.memory) > batch_size: + agent.replay(batch_size) + #agent.save("./running-agent-ddqn.h5") + except (KeyboardInterrupt, SystemExit): + env.pursuer.end() + env.evader.end() + break + except Exception as e: + env.pursuer.end() + env.evader.end() + traceback.print_exc() + break diff --git a/mainComputerControl/environment.py b/mainComputerControl/environment.py index 053ac9b..ef17163 100644 --- a/mainComputerControl/environment.py +++ b/mainComputerControl/environment.py @@ -20,7 +20,8 @@ def __init__(self, system_time_step): my_id=self.pursuer_id, target_id=self.evader_id, time_step=self.system_time_step, - comm_port='COM5') + reversed=True, + comm_port='/dev/ttyUSB0') self.evader = Rover(tracker=self.system_tracker, my_id=self.evader_id, @@ -31,31 +32,40 @@ def __init__(self, system_time_step): pivot_threshold=30, proportional_gain=5, integrator_gain=0, - reversed=True, - comm_port='COM9') + comm_port='/dev/ttyUSB1') - def getSystemState(self): + def getSystemState(self, prev_state): self.system_tracker.update() # return the system state - evader_x = self.system_tracker.get_pos(self.evader_id)[0] - evader_y = self.system_tracker.get_pos(self.evader_id)[1] - evader_heading = self.system_tracker.get_heading(self.evader_id) - pursuer_x = self.system_tracker.get_pos(self.pursuer_id)[0] - pursuer_y = self.system_tracker.get_pos(self.pursuer_id)[1] - pursuer_heading = self.system_tracker.get_heading(self.pursuer_id) - + try: + evader_x = self.system_tracker.get_pos(self.evader_id)[0] + evader_y = self.system_tracker.get_pos(self.evader_id)[1] + evader_heading = self.system_tracker.get_heading(self.evader_id) + except: + evader_x = prev_state[0] + evader_y = prev_state[1] + evader_heading = prev_state[2] + try: + pursuer_x = self.system_tracker.get_pos(self.pursuer_id)[0] + pursuer_y = self.system_tracker.get_pos(self.pursuer_id)[1] + pursuer_heading = self.system_tracker.get_heading(self.pursuer_id) + except: + pursuer_x = prev_state[3] + pursuer_y = prev_state[4] + pursuer_heading = prev_state[5] # Round Positions to the Centimeter and directions to full degrees - system_state_array = np.array((np.round(evader_x, 2), np.round(evader_y, 2), np.round(evader_heading), np.round(pursuer_x, 2), np.round(pursuer_y), np.round(pursuer_heading))) + plain_state = (np.round(evader_x, 2), np.round(evader_y, 2), np.round(evader_heading), np.round(pursuer_x, 2), np.round(pursuer_y), np.round(pursuer_heading)) + system_state_array = np.array(plain_state) return system_state_array - def step(self, target_heading=None, target_position=None): + def step(self, prev_state, target_heading=None, target_position=None): # update environmate state self.system_tracker.update() if ((target_heading==None) and (target_position == None)): - return None + pass # update system state elif (target_heading): @@ -73,7 +83,9 @@ def step(self, target_heading=None, target_position=None): # wait for there to be information to report and update time.sleep(self.system_time_step) - return getSystemState() + system_state = self.getSystemState(prev_state) + + return system_state def reset(self): diff --git a/mainComputerControl/main_chase.py b/mainComputerControl/main_chase.py index 42d7666..7fd45a1 100644 --- a/mainComputerControl/main_chase.py +++ b/mainComputerControl/main_chase.py @@ -14,7 +14,8 @@ my_id=0, target_id=2, time_step=system_time_step, - comm_port='COM5') + reversed=True, + comm_port='/dev/ttyUSB0') while(True): try: From 08193413d452a2d32349ad8eca8db9ec679a6e2f Mon Sep 17 00:00:00 2001 From: Helw150 Date: Mon, 30 Apr 2018 20:21:38 +0400 Subject: [PATCH 06/12] Changes to ease debugging and increase turning --- mainComputerControl/DeepRunner.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/mainComputerControl/DeepRunner.py b/mainComputerControl/DeepRunner.py index 1ac3755..9988e19 100644 --- a/mainComputerControl/DeepRunner.py +++ b/mainComputerControl/DeepRunner.py @@ -98,9 +98,9 @@ def getReward(next_state): agent = DQNAgent(state_size, action_size) done = False batch_size = 32 - - for e in range(EPISODES): - try: + try: + env.reset() + for e in range(EPISODES): state = env.getSystemState([0, 0, 0, 1, 1, 360]) state = np.reshape(state, [1, state_size]) for time in range(1000): # Max time of roughly 30 seconds @@ -110,10 +110,10 @@ def getReward(next_state): if action != 3: # 3 action is do nothing if action != 2: # 2 is subtract from heading # Either go straight or right - direct_action = (current_heading_in_360 + (5*action)) % 360 + direct_action = (current_heading_in_360 + (20*action)) % 360 else: # Go Left - direct_action = (current_heading_in_360 - 5) % 360 + direct_action = (current_heading_in_360 - 20) % 360 else: # Do Nothing direct_action = None @@ -136,12 +136,10 @@ def getReward(next_state): if len(agent.memory) > batch_size: agent.replay(batch_size) #agent.save("./running-agent-ddqn.h5") - except (KeyboardInterrupt, SystemExit): - env.pursuer.end() - env.evader.end() - break - except Exception as e: - env.pursuer.end() - env.evader.end() - traceback.print_exc() - break + except (KeyboardInterrupt, SystemExit): + env.pursuer.end() + env.evader.end() + except Exception as e: + env.pursuer.end() + env.evader.end() + traceback.print_exc() From 7945ff9f045bd758e283abf34c0db5d635ebde87 Mon Sep 17 00:00:00 2001 From: dbcarelli Date: Mon, 30 Apr 2018 20:23:22 +0400 Subject: [PATCH 07/12] Fix reset --- mainComputerControl/class_test.py | 29 +++---- mainComputerControl/environment.py | 40 ++++++--- mainComputerControl/main_chase.py | 6 +- mainComputerControl/navigator.py | 2 + mainComputerControl/navigator.pyc | Bin 1783 -> 1825 bytes mainComputerControl/rover.py | 13 ++- .../calibration.grid.bak | 80 ++++++++++++++++++ reacTIVision-windows-64bit/camera.xml | 4 +- 8 files changed, 141 insertions(+), 33 deletions(-) create mode 100644 reacTIVision-windows-64bit/calibration.grid.bak diff --git a/mainComputerControl/class_test.py b/mainComputerControl/class_test.py index a800958..4317b93 100644 --- a/mainComputerControl/class_test.py +++ b/mainComputerControl/class_test.py @@ -1,18 +1,15 @@ -class A: - def __init__(self): - self.number = 0 - -class B: - def __init__(self, instance_of_a): - self.instance_of_a = instance_of_a +from environment import Environment if __name__ == '__main__': - instance_of_a = A() - instance_of_b = B(instance_of_a) - - instance_of_b.instance_of_a.number = 5 - - if (instance_of_a.number == instance_of_b.instance_of_a.number): - print "Passed by reference" - else: - print "Passed value :(" + time_step = 0.3 + env = Environment(time_step, 'COM5', 'COM9') + + while True: + a = raw_input('ENTER to reset') + env.reset([0.8, 0.8], [0.2, 0.8]) + a = raw_input('ENTER to reset') + env.reset([0.8, 0.2], [0.8, 0.8]) + a = raw_input('ENTER to reset') + env.reset([0.2, 0.2], [0.8, 0.2]) + a = raw_input('ENTER to reset') + env.reset([0.2, 0.8], [0.2, 0.2]) diff --git a/mainComputerControl/environment.py b/mainComputerControl/environment.py index ef17163..d1eaa25 100644 --- a/mainComputerControl/environment.py +++ b/mainComputerControl/environment.py @@ -7,7 +7,7 @@ import numpy as np class Environment(): - def __init__(self, system_time_step): + def __init__(self, system_time_step, usb_pursuer, usb_evader): # system self.system_tracker = Tracker() self.timer = time.time() @@ -20,8 +20,7 @@ def __init__(self, system_time_step): my_id=self.pursuer_id, target_id=self.evader_id, time_step=self.system_time_step, - reversed=True, - comm_port='/dev/ttyUSB0') + comm_port=usb_pursuer) self.evader = Rover(tracker=self.system_tracker, my_id=self.evader_id, @@ -32,11 +31,12 @@ def __init__(self, system_time_step): pivot_threshold=30, proportional_gain=5, integrator_gain=0, - comm_port='/dev/ttyUSB1') + reversed=True, + comm_port=usb_evader) def getSystemState(self, prev_state): self.system_tracker.update() - + # return the system state try: evader_x = self.system_tracker.get_pos(self.evader_id)[0] @@ -59,7 +59,7 @@ def getSystemState(self, prev_state): system_state_array = np.array(plain_state) return system_state_array - + def step(self, prev_state, target_heading=None, target_position=None): # update environmate state self.system_tracker.update() @@ -86,10 +86,11 @@ def step(self, prev_state, target_heading=None, target_position=None): system_state = self.getSystemState(prev_state) return system_state - - - def reset(self): + + + def reset(self, pursuer_target, evader_target): # generate a random starting position + """ pursuer_x = random.randrange(20, 800, 1)/1000.0 pursuer_y = random.randrange(20, 800, 1)/1000.0 evader_x = random.randrange(20, 800, 1)/1000.0 @@ -97,16 +98,33 @@ def reset(self): pursuer_target = [pursuer_x, pursuer_y] evader_target = [evader_x, evader_y] + """ - # tell robots to go to their reset locations + print pursuer_target, evader_target self.pursuer.update_state(target_pos=pursuer_target) self.evader.update_state(target_pos=evader_target) # wait until they have arrived - while ((not self.pursuer.navigator.has_arrived()) and + while ((not self.pursuer.navigator.has_arrived()) or (not self.evader.navigator.has_arrived())): + + # stop whoever has arrived + if (self.pursuer.navigator.has_arrived()): + self.pursuer.coast() + print "Pursuer Arrived" + if (self.evader.navigator.has_arrived()): + self.evader.coast() + print "Evader Arrived" + # keep moving toward the targets self.system_tracker.update() + # tell robots to go to their reset locations + self.pursuer.update_state(target_pos=pursuer_target) + self.evader.update_state(target_pos=evader_target) self.pursuer.update_action() self.evader.update_action() time.sleep(0.001) + + for i in range(100): + self.evader.coast() + self.pursuer.coast() diff --git a/mainComputerControl/main_chase.py b/mainComputerControl/main_chase.py index 7fd45a1..f269010 100644 --- a/mainComputerControl/main_chase.py +++ b/mainComputerControl/main_chase.py @@ -11,11 +11,11 @@ # tricicle -- default conotroller is for this guy pursuer = Rover(tracker=system_tracker, - my_id=0, - target_id=2, + my_id=2, + target_id=0, time_step=system_time_step, reversed=True, - comm_port='/dev/ttyUSB0') + comm_port='COM9') while(True): try: diff --git a/mainComputerControl/navigator.py b/mainComputerControl/navigator.py index b2a8d97..0308485 100644 --- a/mainComputerControl/navigator.py +++ b/mainComputerControl/navigator.py @@ -43,5 +43,7 @@ def has_arrived(self): if (self.my_pos and self.target_pos): if (self.distance_to_target < self.arrival_distance): + print "Arrived" return True + print "Not Arrived" return False diff --git a/mainComputerControl/navigator.pyc b/mainComputerControl/navigator.pyc index c08a46212daa50008666399c4a35a920d1baaf38..5c60ebddbf5cd53b8954b8b5b5068690d571fc71 100644 GIT binary patch delta 145 zcmey)yO58a`7XHM2-$ugP1P{Y8G#mGdpfCf2V^L9NS!zl#H<0a@U!nlzPrkx(SX2lo#=t1V c=*q;&Bm|^c8QB?GC$C`DWfa+bja88m02}!r<^TWy delta 116 zcmZ3;_nnuW`7Gk#jUk%>BvQi85S+)r zP{P3wtii&-z~Gk&Bp5Q8Km;R@nEZ?7upl2uj8TZum5Gx{2uQO}zQn4_D7=YHkr4pp CfEFVF diff --git a/mainComputerControl/rover.py b/mainComputerControl/rover.py index ac931da..a87532a 100644 --- a/mainComputerControl/rover.py +++ b/mainComputerControl/rover.py @@ -44,6 +44,11 @@ def update_state(self, target_pos=None, desired_heading=None): # update my target's state parmeters if (target_pos): self.target_pos = target_pos + if (self.pos and self.target_pos): + self.desired_heading = self.navigator \ + .get_target_heading(self.pos, self.target_pos) + else: + self.desired_heading = None elif (desired_heading): self.desired_heading = desired_heading self.target_pos = None @@ -70,5 +75,11 @@ def update_action(self): # no new data (fiducial is not in view) self.controller.coast() - def end(self): + def stop(self): self.controller.stop() + + def coast(self): + self.controller.coast() + + def end(self): + self.controller.close() diff --git a/reacTIVision-windows-64bit/calibration.grid.bak b/reacTIVision-windows-64bit/calibration.grid.bak new file mode 100644 index 0000000..4af39ba --- /dev/null +++ b/reacTIVision-windows-64bit/calibration.grid.bak @@ -0,0 +1,80 @@ +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 diff --git a/reacTIVision-windows-64bit/camera.xml b/reacTIVision-windows-64bit/camera.xml index 130e6f6..d96b17b 100644 --- a/reacTIVision-windows-64bit/camera.xml +++ b/reacTIVision-windows-64bit/camera.xml @@ -1,8 +1,8 @@ - + - + From 1d1b45e5ea7e5ce277af612b990ced5b1d220442 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Mon, 30 Apr 2018 23:42:48 +0400 Subject: [PATCH 08/12] Build that chooses angle directly rather than left and right --- mainComputerControl/DeepRunner.py | 39 +++++++++++++----------------- mainComputerControl/controller.py | 4 +-- mainComputerControl/environment.py | 19 +++++++++++---- mainComputerControl/navigator.py | 4 +-- 4 files changed, 35 insertions(+), 31 deletions(-) diff --git a/mainComputerControl/DeepRunner.py b/mainComputerControl/DeepRunner.py index 9988e19..44d797e 100644 --- a/mainComputerControl/DeepRunner.py +++ b/mainComputerControl/DeepRunner.py @@ -92,54 +92,49 @@ def getReward(next_state): return central_reward + distance_reward if __name__ == "__main__": - env = environment.Environment(0.04) # Our time step is slightly greater than the time it takes to send a single frame + env = environment.Environment(0.01, "/dev/ttyUSB0", "/dev/ttyUSB1") # Our time step is slightly greater than the time it takes to send a single frame state_size = 6 # 6 length array - ChaserX, ChaserY, RunnerX, RunnerY, CurrentRunnerHeading, CurrentChaserHeading - action_size = 4 # 4 actions - turn left, turn right, continue straight, do nothing + action_size = 360 # 4 actions - turn left, turn right, continue straight, do nothing agent = DQNAgent(state_size, action_size) done = False - batch_size = 32 + batch_size = 10 try: - env.reset() + env.reset([0.2, 0.2], [0.8, 0.8]) for e in range(EPISODES): state = env.getSystemState([0, 0, 0, 1, 1, 360]) state = np.reshape(state, [1, state_size]) - for time in range(1000): # Max time of roughly 30 seconds + for time in range(3000): # Max time of roughly 30 seconds action = agent.act(state) array_state = state[0].tolist() - current_heading_in_360 = array_state[2] - if action != 3: # 3 action is do nothing - if action != 2: # 2 is subtract from heading - # Either go straight or right - direct_action = (current_heading_in_360 + (20*action)) % 360 - else: - # Go Left - direct_action = (current_heading_in_360 - 20) % 360 - else: - # Do Nothing - direct_action = None - next_state = env.step(array_state, target_heading=direct_action) + next_state = env.step(array_state, target_heading=action) array_next_state = next_state.tolist() done = False - if next_state[0] == next_state[3] and next_state[1] == next_state[4]: + if env.pursuer.navigator.has_arrived(): + print env.pursuer.pos, env.pursuer.target_pos done = True reward = getReward(next_state) if not done else -10 next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action, reward, next_state, done) state = next_state - if done or time == 999: + if done or time == 2999: agent.update_target_model() - env.reset() - state = env.getSystemState() + env.reset([0.2, 0.2], [0.8, 0.8]) print("episode: {}/{}, Time Survived: {}, e: {:.2}" .format(e, EPISODES, time, agent.epsilon)) break if len(agent.memory) > batch_size: agent.replay(batch_size) - #agent.save("./running-agent-ddqn.h5") + agent.save("./running-agent-ddqn.h5") except (KeyboardInterrupt, SystemExit): + for i in range(100): + env.pursuer.stop() + env.evader.stop() env.pursuer.end() env.evader.end() except Exception as e: + for i in range(100): + env.pursuer.stop() + env.evader.stop() env.pursuer.end() env.evader.end() traceback.print_exc() diff --git a/mainComputerControl/controller.py b/mainComputerControl/controller.py index 9d3631f..bda44c8 100644 --- a/mainComputerControl/controller.py +++ b/mainComputerControl/controller.py @@ -63,7 +63,7 @@ def get_error(self): if( (self.error) < -180): self.error = (360 + self.target_heading - self.current_heading ) - print self.error + #print self.error def get_motor_input_pivot(self): # update integrator term @@ -120,7 +120,7 @@ def command_motors_pivot(self): self.xBee.send_command(2) # send motor speed - print "Pivot Throttle: ", self.motor_input_pivot + #print "Pivot Throttle: ", self.motor_input_pivot self.xBee.send_command(abs(self.motor_input_pivot)) def command_motors_forward(self): diff --git a/mainComputerControl/environment.py b/mainComputerControl/environment.py index d1eaa25..ac675f2 100644 --- a/mainComputerControl/environment.py +++ b/mainComputerControl/environment.py @@ -20,6 +20,7 @@ def __init__(self, system_time_step, usb_pursuer, usb_evader): my_id=self.pursuer_id, target_id=self.evader_id, time_step=self.system_time_step, + reversed=True, comm_port=usb_pursuer) self.evader = Rover(tracker=self.system_tracker, @@ -27,13 +28,15 @@ def __init__(self, system_time_step, usb_pursuer, usb_evader): target_id=None, time_step=self.system_time_step, max_pivot_input=100, - forward_speed=100, + forward_speed=130, pivot_threshold=30, proportional_gain=5, integrator_gain=0, - reversed=True, + reversed=False, comm_port=usb_evader) + self.setThresh(0.18) + def getSystemState(self, prev_state): self.system_tracker.update() @@ -88,6 +91,11 @@ def step(self, prev_state, target_heading=None, target_position=None): return system_state + def setThresh(self, thresh): + self.pursuer.navigator.arrival_distance = thresh + self.evader.navigator.arrival_distance = thresh + + def reset(self, pursuer_target, evader_target): # generate a random starting position """ @@ -103,7 +111,7 @@ def reset(self, pursuer_target, evader_target): print pursuer_target, evader_target self.pursuer.update_state(target_pos=pursuer_target) self.evader.update_state(target_pos=evader_target) - + self.setThresh(0.05) # wait until they have arrived while ((not self.pursuer.navigator.has_arrived()) or (not self.evader.navigator.has_arrived())): @@ -111,10 +119,10 @@ def reset(self, pursuer_target, evader_target): # stop whoever has arrived if (self.pursuer.navigator.has_arrived()): self.pursuer.coast() - print "Pursuer Arrived" + #print "Pursuer Arrived" if (self.evader.navigator.has_arrived()): self.evader.coast() - print "Evader Arrived" + #print "Evader Arrived" # keep moving toward the targets self.system_tracker.update() @@ -128,3 +136,4 @@ def reset(self, pursuer_target, evader_target): for i in range(100): self.evader.coast() self.pursuer.coast() + self.setThresh(0.18) diff --git a/mainComputerControl/navigator.py b/mainComputerControl/navigator.py index 0308485..25b7768 100644 --- a/mainComputerControl/navigator.py +++ b/mainComputerControl/navigator.py @@ -43,7 +43,7 @@ def has_arrived(self): if (self.my_pos and self.target_pos): if (self.distance_to_target < self.arrival_distance): - print "Arrived" + #print "Arrived" return True - print "Not Arrived" + #print "Not Arrived" return False From 4caa687f473b96a9e61c8cd73b5f39254ce28b80 Mon Sep 17 00:00:00 2001 From: dbcarelli Date: Tue, 1 May 2018 00:06:02 +0400 Subject: [PATCH 09/12] Rover.update_state takes in a heading_correction argument for smooth trajectory updating --- mainComputerControl/rover.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mainComputerControl/rover.py b/mainComputerControl/rover.py index a87532a..5fff4dc 100644 --- a/mainComputerControl/rover.py +++ b/mainComputerControl/rover.py @@ -36,12 +36,16 @@ def __init__(self, self.target_pos = [0, 0] self.desired_heading = 0 - def update_state(self, target_pos=None, desired_heading=None): + def update_state(self, + target_pos=None, + desired_heading=None, + heading_correction=None): + # update my state parameters self.current_heading = self.tracker.get_heading(self.id) self.pos = self.tracker.get_pos(self.id) - # update my target's state parmeters + # update my target state parmeters if (target_pos): self.target_pos = target_pos if (self.pos and self.target_pos): @@ -52,6 +56,10 @@ def update_state(self, target_pos=None, desired_heading=None): elif (desired_heading): self.desired_heading = desired_heading self.target_pos = None + elif (heading_correction): + new_heading = (self.desired_heading+heading_correction)%360 + self.desired_heading = new_heading + self.target_pos = None else: self.target_pos = self.tracker.get_pos(self.target_id) if (self.pos and self.target_pos): From 5450133893f00bff484dea22285c17eeff127691 Mon Sep 17 00:00:00 2001 From: dbcarelli Date: Tue, 1 May 2018 00:16:52 +0400 Subject: [PATCH 10/12] Handle unexpected types --- mainComputerControl/rover.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mainComputerControl/rover.py b/mainComputerControl/rover.py index 5fff4dc..068b2cc 100644 --- a/mainComputerControl/rover.py +++ b/mainComputerControl/rover.py @@ -57,6 +57,15 @@ def update_state(self, self.desired_heading = desired_heading self.target_pos = None elif (heading_correction): + # make sure desired_heading is not None + if (self.desired_heading == None): + self.desired_heading = self.tracker.get_heading(self.id) + + # the tracker could still return None... + if (self.desired_heading == None): + return + + # calculate the new heading! new_heading = (self.desired_heading+heading_correction)%360 self.desired_heading = new_heading self.target_pos = None From 574fc6eae1933e3ff9440e1fd1ad5fc0015efbe9 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Tue, 1 May 2018 03:16:47 +0400 Subject: [PATCH 11/12] Smooth turning learner --- mainComputerControl/DeepRunner.py | 10 +++++----- mainComputerControl/environment.py | 10 ++++++++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/mainComputerControl/DeepRunner.py b/mainComputerControl/DeepRunner.py index 44d797e..012661a 100644 --- a/mainComputerControl/DeepRunner.py +++ b/mainComputerControl/DeepRunner.py @@ -94,7 +94,7 @@ def getReward(next_state): if __name__ == "__main__": env = environment.Environment(0.01, "/dev/ttyUSB0", "/dev/ttyUSB1") # Our time step is slightly greater than the time it takes to send a single frame state_size = 6 # 6 length array - ChaserX, ChaserY, RunnerX, RunnerY, CurrentRunnerHeading, CurrentChaserHeading - action_size = 360 # 4 actions - turn left, turn right, continue straight, do nothing + action_size = 21 # 4 actions - turn left, turn right, continue straight, do nothing agent = DQNAgent(state_size, action_size) done = False batch_size = 10 @@ -106,7 +106,7 @@ def getReward(next_state): for time in range(3000): # Max time of roughly 30 seconds action = agent.act(state) array_state = state[0].tolist() - next_state = env.step(array_state, target_heading=action) + next_state = env.step(array_state, heading_correction=(action-11)) array_next_state = next_state.tolist() done = False if env.pursuer.navigator.has_arrived(): @@ -122,9 +122,9 @@ def getReward(next_state): print("episode: {}/{}, Time Survived: {}, e: {:.2}" .format(e, EPISODES, time, agent.epsilon)) break - if len(agent.memory) > batch_size: - agent.replay(batch_size) - agent.save("./running-agent-ddqn.h5") + if len(agent.memory) > batch_size: + agent.replay(batch_size) + agent.save("./running-agent-ddqn.h5") except (KeyboardInterrupt, SystemExit): for i in range(100): env.pursuer.stop() diff --git a/mainComputerControl/environment.py b/mainComputerControl/environment.py index ac675f2..3d5219b 100644 --- a/mainComputerControl/environment.py +++ b/mainComputerControl/environment.py @@ -63,11 +63,12 @@ def getSystemState(self, prev_state): return system_state_array - def step(self, prev_state, target_heading=None, target_position=None): + def step(self, prev_state, target_heading=None, target_position=None, heading_correction=None): # update environmate state self.system_tracker.update() - if ((target_heading==None) and (target_position == None)): + print heading_correction + if ((target_heading==None) and (target_position == None) and (heading_correction == None)): pass # update system state @@ -80,6 +81,11 @@ def step(self, prev_state, target_heading=None, target_position=None): self.pursuer.update_state() self.evader.update_state(target_pos=target_position) + elif(heading_correction != None): + print 'hello' + self.pursuer.update_state() + self.evader.update_state(heading_correction=heading_correction) + # update actions self.pursuer.update_action() self.evader.update_action() From 3748d2a4ca8bb811d13de4ae571416f117c2f210 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Tue, 1 May 2018 14:57:25 +0400 Subject: [PATCH 12/12] Learned how to not hit walls --- mainComputerControl/DeepRunner.py | 22 +++++++++--------- mainComputerControl/environment.py | 36 ++++++++++++++++-------------- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/mainComputerControl/DeepRunner.py b/mainComputerControl/DeepRunner.py index 012661a..571d566 100644 --- a/mainComputerControl/DeepRunner.py +++ b/mainComputerControl/DeepRunner.py @@ -82,14 +82,12 @@ def getReward(next_state): pos_runner = np.array(next_state[0:2]) pos_center = np.array((0.5, 0.5)) pos_chaser = np.array(next_state[3:5]) - dist_from_center = np.linalg.norm(pos_center-pos_runner) dist_from_chaser = np.linalg.norm(pos_chaser-pos_runner) # Perform our pseudo-Couloumbs law # Avoid blowing up massive rewards with constant 1 in the denominator and shrink the function significantly in the X direction - central_reward = 2/(1+(dist_from_center*50)**2) # Again using distance squared, but inverting to reward greater distances rather than closer distance_reward = 10*dist_from_chaser**2 - return central_reward + distance_reward + return (distance_reward) if __name__ == "__main__": env = environment.Environment(0.01, "/dev/ttyUSB0", "/dev/ttyUSB1") # Our time step is slightly greater than the time it takes to send a single frame @@ -99,32 +97,36 @@ def getReward(next_state): done = False batch_size = 10 try: - env.reset([0.2, 0.2], [0.8, 0.8]) + env.reset([0.2, 0.2], [0.6, 0.6]) for e in range(EPISODES): state = env.getSystemState([0, 0, 0, 1, 1, 360]) state = np.reshape(state, [1, state_size]) + reward = 0 for time in range(3000): # Max time of roughly 30 seconds action = agent.act(state) array_state = state[0].tolist() next_state = env.step(array_state, heading_correction=(action-11)) array_next_state = next_state.tolist() done = False - if env.pursuer.navigator.has_arrived(): + if env.pursuer.navigator.has_arrived() or array_state[0] >= 0.9 or array_state[1] >= 0.9 or array_state[0] <= 0.1 or array_state[1] <= 0.1: print env.pursuer.pos, env.pursuer.target_pos done = True - reward = getReward(next_state) if not done else -10 + if done: + reward += -10 + else: + reward += getReward(next_state) next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action, reward, next_state, done) state = next_state if done or time == 2999: agent.update_target_model() - env.reset([0.2, 0.2], [0.8, 0.8]) + env.reset([0.2, 0.2], [0.6, 0.6]) print("episode: {}/{}, Time Survived: {}, e: {:.2}" .format(e, EPISODES, time, agent.epsilon)) break - if len(agent.memory) > batch_size: - agent.replay(batch_size) - agent.save("./running-agent-ddqn.h5") + if len(agent.memory) > batch_size: + agent.replay(batch_size) + agent.save("./running-agent-ddqn.h5") except (KeyboardInterrupt, SystemExit): for i in range(100): env.pursuer.stop() diff --git a/mainComputerControl/environment.py b/mainComputerControl/environment.py index 3d5219b..8c0f583 100644 --- a/mainComputerControl/environment.py +++ b/mainComputerControl/environment.py @@ -19,6 +19,8 @@ def __init__(self, system_time_step, usb_pursuer, usb_evader): self.pursuer = Rover(tracker=self.system_tracker, my_id=self.pursuer_id, target_id=self.evader_id, + forward_speed=100, + proportional_gain=2.5, time_step=self.system_time_step, reversed=True, comm_port=usb_pursuer) @@ -67,7 +69,6 @@ def step(self, prev_state, target_heading=None, target_position=None, heading_co # update environmate state self.system_tracker.update() - print heading_correction if ((target_heading==None) and (target_position == None) and (heading_correction == None)): pass @@ -82,7 +83,6 @@ def step(self, prev_state, target_heading=None, target_position=None, heading_co self.evader.update_state(target_pos=target_position) elif(heading_correction != None): - print 'hello' self.pursuer.update_state() self.evader.update_state(heading_correction=heading_correction) @@ -122,23 +122,25 @@ def reset(self, pursuer_target, evader_target): while ((not self.pursuer.navigator.has_arrived()) or (not self.evader.navigator.has_arrived())): - # stop whoever has arrived - if (self.pursuer.navigator.has_arrived()): - self.pursuer.coast() - #print "Pursuer Arrived" - if (self.evader.navigator.has_arrived()): - self.evader.coast() - #print "Evader Arrived" - # keep moving toward the targets self.system_tracker.update() - # tell robots to go to their reset locations - self.pursuer.update_state(target_pos=pursuer_target) - self.evader.update_state(target_pos=evader_target) - self.pursuer.update_action() - self.evader.update_action() - time.sleep(0.001) - + if self.timer >= self.system_time_step: + self.timer = time.time() + # tell robots to go to their reset locations + self.pursuer.update_state(target_pos=pursuer_target) + self.evader.update_state(target_pos=evader_target) + # stop whoever has arrived + if (self.pursuer.navigator.has_arrived()): + self.pursuer.coast() + else: + self.pursuer.update_action() + + if (self.evader.navigator.has_arrived()): + self.evader.coast() + else: + self.evader.update_action() + + for i in range(100): self.evader.coast() self.pursuer.coast()