Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 142 additions & 0 deletions mainComputerControl/DeepRunner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# -*- coding: utf-8 -*-
import random
import traceback
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras import backend as K

# Custom Robotic Chaser RL environment
import environment

EPISODES = 5000


class DQNAgent:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000)
self.gamma = 0.95 # discount rate
self.epsilon = 1.0 # exploration rate
self.epsilon_min = 0.01
self.epsilon_decay = 0.99
self.learning_rate = 0.001
self.model = self._build_model()
self.target_model = self._build_model()
self.update_target_model()

def _huber_loss(self, target, prediction):
# sqrt(1+error^2)-1
error = prediction - target
return K.mean(K.sqrt(1+K.square(error))-1, axis=-1)

def _build_model(self):
# Neural Net for Deep-Q learning Model
model = Sequential()
model.add(Dense(24, input_dim=self.state_size, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(self.action_size, activation='linear'))
model.compile(loss=self._huber_loss,
optimizer=Adam(lr=self.learning_rate))
return model

def update_target_model(self):
# copy weights from model to target_model
self.target_model.set_weights(self.model.get_weights())

def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))

def act(self, state):
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
act_values = self.model.predict(state)
return np.argmax(act_values[0]) # returns action

def replay(self, batch_size):
minibatch = random.sample(self.memory, batch_size)
for state, action, reward, next_state, done in minibatch:
target = self.model.predict(state)
if done:
target[0][action] = reward
else:
a = self.model.predict(next_state)[0]
t = self.target_model.predict(next_state)[0]
target[0][action] = reward + self.gamma * t[np.argmax(a)]
self.model.fit(state, target, epochs=1, verbose=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay

def load(self, name):
self.model.load_weights(name)

def save(self, name):
self.model.save_weights(name)

# A Function Which Determines the
def getReward(next_state):
# Get the important distances
pos_runner = np.array(next_state[0:2])
pos_center = np.array((0.5, 0.5))
pos_chaser = np.array(next_state[3:5])
dist_from_chaser = np.linalg.norm(pos_chaser-pos_runner)
# Perform our pseudo-Couloumbs law
# Avoid blowing up massive rewards with constant 1 in the denominator and shrink the function significantly in the X direction
# Again using distance squared, but inverting to reward greater distances rather than closer
distance_reward = 10*dist_from_chaser**2
return (distance_reward)

if __name__ == "__main__":
env = environment.Environment(0.01, "/dev/ttyUSB0", "/dev/ttyUSB1") # Our time step is slightly greater than the time it takes to send a single frame
state_size = 6 # 6 length array - ChaserX, ChaserY, RunnerX, RunnerY, CurrentRunnerHeading, CurrentChaserHeading
action_size = 21 # 4 actions - turn left, turn right, continue straight, do nothing
agent = DQNAgent(state_size, action_size)
done = False
batch_size = 10
try:
env.reset([0.2, 0.2], [0.6, 0.6])
for e in range(EPISODES):
state = env.getSystemState([0, 0, 0, 1, 1, 360])
state = np.reshape(state, [1, state_size])
reward = 0
for time in range(3000): # Max time of roughly 30 seconds
action = agent.act(state)
array_state = state[0].tolist()
next_state = env.step(array_state, heading_correction=(action-11))
array_next_state = next_state.tolist()
done = False
if env.pursuer.navigator.has_arrived() or array_state[0] >= 0.9 or array_state[1] >= 0.9 or array_state[0] <= 0.1 or array_state[1] <= 0.1:
print env.pursuer.pos, env.pursuer.target_pos
done = True
if done:
reward += -10
else:
reward += getReward(next_state)
next_state = np.reshape(next_state, [1, state_size])
agent.remember(state, action, reward, next_state, done)
state = next_state
if done or time == 2999:
agent.update_target_model()
env.reset([0.2, 0.2], [0.6, 0.6])
print("episode: {}/{}, Time Survived: {}, e: {:.2}"
.format(e, EPISODES, time, agent.epsilon))
break
if len(agent.memory) > batch_size:
agent.replay(batch_size)
agent.save("./running-agent-ddqn.h5")
except (KeyboardInterrupt, SystemExit):
for i in range(100):
env.pursuer.stop()
env.evader.stop()
env.pursuer.end()
env.evader.end()
except Exception as e:
for i in range(100):
env.pursuer.stop()
env.evader.stop()
env.pursuer.end()
env.evader.end()
traceback.print_exc()
29 changes: 13 additions & 16 deletions mainComputerControl/class_test.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
class A:
def __init__(self):
self.number = 0

class B:
def __init__(self, instance_of_a):
self.instance_of_a = instance_of_a
from environment import Environment

if __name__ == '__main__':
instance_of_a = A()
instance_of_b = B(instance_of_a)

instance_of_b.instance_of_a.number = 5

if (instance_of_a.number == instance_of_b.instance_of_a.number):
print "Passed by reference"
else:
print "Passed value :("
time_step = 0.3
env = Environment(time_step, 'COM5', 'COM9')

while True:
a = raw_input('ENTER to reset')
env.reset([0.8, 0.8], [0.2, 0.8])
a = raw_input('ENTER to reset')
env.reset([0.8, 0.2], [0.8, 0.8])
a = raw_input('ENTER to reset')
env.reset([0.2, 0.2], [0.8, 0.2])
a = raw_input('ENTER to reset')
env.reset([0.2, 0.8], [0.2, 0.2])
4 changes: 2 additions & 2 deletions mainComputerControl/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def get_error(self):
if( (self.error) < -180):
self.error = (360 + self.target_heading - self.current_heading )

print self.error
#print self.error

def get_motor_input_pivot(self):
# update integrator term
Expand Down Expand Up @@ -120,7 +120,7 @@ def command_motors_pivot(self):
self.xBee.send_command(2)

# send motor speed
print "Pivot Throttle: ", self.motor_input_pivot
#print "Pivot Throttle: ", self.motor_input_pivot
self.xBee.send_command(abs(self.motor_input_pivot))

def command_motors_forward(self):
Expand Down
104 changes: 80 additions & 24 deletions mainComputerControl/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
from rover import Rover
from tracker import Tracker

import numpy as np

class Environment():
def __init__(self, system_time_step):
def __init__(self, system_time_step, usb_pursuer, usb_evader):
# system
self.system_tracker = Tracker()
self.timer = time.time()
Expand All @@ -17,27 +19,58 @@ def __init__(self, system_time_step):
self.pursuer = Rover(tracker=self.system_tracker,
my_id=self.pursuer_id,
target_id=self.evader_id,
forward_speed=100,
proportional_gain=2.5,
time_step=self.system_time_step,
comm_port='COM5')
reversed=True,
comm_port=usb_pursuer)

self.evader = Rover(tracker=self.system_tracker,
my_id=self.evader_id,
target_id=None,
time_step=self.system_time_step,
max_pivot_input=100,
forward_speed=100,
forward_speed=130,
pivot_threshold=30,
proportional_gain=5,
integrator_gain=0,
reversed=True,
comm_port='COM9')
reversed=False,
comm_port=usb_evader)

self.setThresh(0.18)

def getSystemState(self, prev_state):
self.system_tracker.update()

def step(self, target_heading=None, target_position=None):
# return the system state
try:
evader_x = self.system_tracker.get_pos(self.evader_id)[0]
evader_y = self.system_tracker.get_pos(self.evader_id)[1]
evader_heading = self.system_tracker.get_heading(self.evader_id)
except:
evader_x = prev_state[0]
evader_y = prev_state[1]
evader_heading = prev_state[2]
try:
pursuer_x = self.system_tracker.get_pos(self.pursuer_id)[0]
pursuer_y = self.system_tracker.get_pos(self.pursuer_id)[1]
pursuer_heading = self.system_tracker.get_heading(self.pursuer_id)
except:
pursuer_x = prev_state[3]
pursuer_y = prev_state[4]
pursuer_heading = prev_state[5]
# Round Positions to the Centimeter and directions to full degrees
plain_state = (np.round(evader_x, 2), np.round(evader_y, 2), np.round(evader_heading), np.round(pursuer_x, 2), np.round(pursuer_y), np.round(pursuer_heading))
system_state_array = np.array(plain_state)
return system_state_array


def step(self, prev_state, target_heading=None, target_position=None, heading_correction=None):
# update environmate state
self.system_tracker.update()

if ((target_heading==None) and (target_position == None)):
return None
if ((target_heading==None) and (target_position == None) and (heading_correction == None)):
pass

# update system state
elif (target_heading):
Expand All @@ -49,43 +82,66 @@ def step(self, target_heading=None, target_position=None):
self.pursuer.update_state()
self.evader.update_state(target_pos=target_position)

elif(heading_correction != None):
self.pursuer.update_state()
self.evader.update_state(heading_correction=heading_correction)

# update actions
self.pursuer.update_action()
self.evader.update_action()

# wait for there to be information to report and update
time.sleep(self.system_time_step)
self.system_tracker.update()
system_state = self.getSystemState(prev_state)

# return the system state
evader_x = self.system_tracker.get_pos(self.evader_id)[0]
evader_y = self.system_tracker.get_pos(self.evader_id)[1]
evader_heading = self.system_tracker.get_heading(self.evader_id)
pursuer_x = self.system_tracker.get_pos(self.pursuer_id)[0]
pursuer_y = self.system_tracker.get_pos(self.pursuer_id)[1]
pursuer_heading = self.system_tracker.get_heading(self.pursuer_id)
return system_state

return (evader_x, evader_y, evader_heading, pursuer_x, pursuer_y, pursuer_heading)

def reset(self):
def setThresh(self, thresh):
self.pursuer.navigator.arrival_distance = thresh
self.evader.navigator.arrival_distance = thresh


def reset(self, pursuer_target, evader_target):
# generate a random starting position
"""
pursuer_x = random.randrange(20, 800, 1)/1000.0
pursuer_y = random.randrange(20, 800, 1)/1000.0
evader_x = random.randrange(20, 800, 1)/1000.0
evader_y = random.randrange(20, 800, 1)/1000.0

pursuer_target = [pursuer_x, pursuer_y]
evader_target = [evader_x, evader_y]
"""

# tell robots to go to their reset locations
print pursuer_target, evader_target
self.pursuer.update_state(target_pos=pursuer_target)
self.evader.update_state(target_pos=evader_target)

self.setThresh(0.05)
# wait until they have arrived
while ((not self.pursuer.navigator.has_arrived()) and
while ((not self.pursuer.navigator.has_arrived()) or
(not self.evader.navigator.has_arrived())):

# keep moving toward the targets
self.system_tracker.update()
self.pursuer.update_action()
self.evader.update_action()
time.sleep(0.001)
if self.timer >= self.system_time_step:
self.timer = time.time()
# tell robots to go to their reset locations
self.pursuer.update_state(target_pos=pursuer_target)
self.evader.update_state(target_pos=evader_target)
# stop whoever has arrived
if (self.pursuer.navigator.has_arrived()):
self.pursuer.coast()
else:
self.pursuer.update_action()

if (self.evader.navigator.has_arrived()):
self.evader.coast()
else:
self.evader.update_action()


for i in range(100):
self.evader.coast()
self.pursuer.coast()
self.setThresh(0.18)
7 changes: 4 additions & 3 deletions mainComputerControl/main_chase.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@

# tricicle -- default conotroller is for this guy
pursuer = Rover(tracker=system_tracker,
my_id=0,
target_id=2,
my_id=2,
target_id=0,
time_step=system_time_step,
comm_port='COM5')
reversed=True,
comm_port='COM9')

while(True):
try:
Expand Down
2 changes: 2 additions & 0 deletions mainComputerControl/navigator.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,7 @@ def has_arrived(self):

if (self.my_pos and self.target_pos):
if (self.distance_to_target < self.arrival_distance):
#print "Arrived"
return True
#print "Not Arrived"
return False
Binary file modified mainComputerControl/navigator.pyc
Binary file not shown.
Loading