Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 32 additions & 13 deletions NeuronalNetwork/environment/environment_fitness.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

import math
from .environment_node_data import NodeData, Mode


from math import sqrt
class FitnessData:
"""
Class for calculating the fitness function reward from the current simulation state. Possible reward calculation
Expand Down Expand Up @@ -148,35 +147,55 @@ def calculate_reward(self, robot_x: float, robot_y: float, robot_orientation: fl
:return: Reward, done.
"""
done = False
reward = -1
reward = -1.0

#distance_start_to_end = self.__distance_start_to_end()
distance_start_to_end = self._distance_start_to_end()

distance_robot_to_end = self._distance_robot_to_end(robot_x, robot_y)
distance_robot_to_start = self._distance_robot_to_start(robot_x, robot_y)

distance_between_last_step = self._distance_between_last_step(robot_x, robot_y)

distance_robot_to_end_last = self._distance_robot_to_end(self._robot_x_last, self._robot_y_last)
distance_robot_to_end_diff = distance_robot_to_end_last - distance_robot_to_end;

distance_robot_to_end_diff_abs = abs(distance_robot_to_end_diff)*5

diff_rotation_to_end_last = math.fabs(self.angle_difference_from_robot_to_end(self._robot_x_last, self._robot_y_last,self._robot_orientation_last))
diff_rotation_to_end = math.fabs(self.angle_difference_from_robot_to_end(robot_x, robot_y,robot_orientation))

diff_rotations = math.fabs(diff_rotation_to_end - diff_rotation_to_end_last)*5

if diff_rotation_to_end > diff_rotation_to_end_last:
diff_rotations *= -2.0
else:
diff_rotations *= 1.3

if distance_robot_to_end > sqrt(distance_between_last_step**2 + distance_robot_to_end_last**2):

distance_robot_to_end_diff_abs *= -2.0


if distance_robot_to_end_diff < 0:
distance_robot_to_end_diff *= 2
else:
distance_robot_to_end_diff *= 1.5

reward += distance_robot_to_end_diff
distance_robot_to_end_diff_abs *= 1.3

reward += distance_robot_to_end_diff_abs
reward += diff_rotations

#reward = distance_between_last_step + (1 - distance_robot_to_end / distance_start_to_end) + distance_between_last_step

# reward += 10 * max((10 - self._distance_robot_to_end(robot_x, robot_y)) / 10, 0)
# reward += distance_robot_to_start
# reward += distance_between_last_step
# reward -= distance_robot_to_end
#reward += distance_robot_to_start * 0.3
#reward -= distance_start_to_end * 0.1
#reward += distance_between_last_step
#reward -= distance_robot_to_end * 0.1

reward += ((math.pi - math.fabs(self._difference_two_angles(robot_orientation, self._orientation_robot_to_end(robot_x, robot_y)))) / math.pi) * 1
#reward += max((5 - self._distance_robot_to_end(robot_x, robot_y)) / 5, 0)
#reward = 0

if env_done:
reward = -10 #- distance_robot_to_end / distance_start_to_end * 100
reward = -20 #- distance_robot_to_end / distance_start_to_end * 100
done = True
elif distance_robot_to_end < self._node_data.get_node_end().radius():
reward = 10
Expand Down
51 changes: 32 additions & 19 deletions NeuronalNetwork/ga3c/Config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,34 @@ class Config:
# Environment configuration

# Path of the world
PATH_TO_WORLD = ["../Simulation2d/world/roblab"]
PATH_TO_WORLD = ["../Simulation2d/world/square"]
# Use this for multiple Environments in parallel
# PATH_TO_WORLD = ["../Simulation2d/world/room", "../Simulation2d/world/four_rooms"]
#train 1
#PATH_TO_WORLD = ["../Simulation2d/world/square","../Simulation2d/world/square", "../Simulation2d/world/square"]
#train_2
PATH_TO_WORLD = ["../Simulation2d/world/room","../Simulation2d/world/room"]
#PATH_TO_WORLD = ["../Simulation2d/world/four_rooms","../Simulation2d/world/four_rooms","../Simulation2d/world/four_rooms","../Simulation2d/world/four_rooms","../Simulation2d/world/four_rooms","../Simulation2d/world/four_rooms"]
#train_3
#PATH_TO_WORLD = ["../Simulation2d/world/four_rooms","../Simulation2d/world/four_rooms","../Simulation2d/world/four_rooms","../Simulation2d/world/four_rooms","../Simulation2d/world/four_rooms","../Simulation2d/world/four_rooms","../Simulation2d/world/four_rooms","../Simulation2d/world/four_rooms","../Simulation2d/world/four_rooms","../Simulation2d/world/four_rooms"]

#train_4
NETWORK_DIR =""

NETWORK_DIR =""

PATH_TO_WORLD = ["../Simulation2d/world/roblab"]
#train_5
#PATH_TO_WORLD = ["../Simulation2d/world/room"]
# Mode
MODE=Mode.ALL_RANDOM
# Terminate the simulation
TERMINATE_AT_END=False
# Cluster size of the lidar
CLUSTER_SIZE=1
CLUSTER_SIZE=18
# use observation rotation vector
USE_OBSERVATION_ROTATION=True
# Observation rotation vector size
OBSERVATION_ROTATION_SIZE=128

OBSERVATION_ROTATION_SIZE=16

# Visualize for training
VISUALIZE = True
Expand All @@ -56,9 +69,9 @@ class Config:
# Enable to train
TRAIN_MODELS = True
# Load old models. Throws if the model doesn't exist
LOAD_CHECKPOINT = False
LOAD_CHECKPOINT =True
# If 0, the latest checkpoint is loaded
LOAD_EPISODE = 0
LOAD_EPISODE = 0

#########################################################################
# Number of agents, predictors, trainers and other system settings
Expand All @@ -67,7 +80,7 @@ class Config:
# Number of Agents
AGENTS = 32#32
# Number of Predictors
PREDICTORS = 2 #2
PREDICTORS = 2#2
# Number of Trainers
TRAINERS = 2 #2

Expand All @@ -83,7 +96,7 @@ class Config:
# Algorithm parameters

# Max step Iteration -> if read the environment ist done. 0 for endless.
MAX_STEP_ITERATION = 300
MAX_STEP_ITERATION = 1000

# Discount factor
DISCOUNT = 0.99
Expand All @@ -96,16 +109,16 @@ class Config:
REWARD_MAX = 1

# Max size of the queue
MAX_QUEUE_SIZE = 100 #100
PREDICTION_BATCH_SIZE = 128 #128
MAX_QUEUE_SIZE = 200 #100
PREDICTION_BATCH_SIZE = 256 #128

# Input of the DNN
STACKED_FRAMES = 4
OBSERVATION_SIZE=1081+OBSERVATION_ROTATION_SIZE
STACKED_FRAMES = 1
OBSERVATION_SIZE=60

# Total number of episodes and annealing frequency
EPISODES = 400000
ANNEALING_EPISODE_COUNT = 400000
EPISODES = 100000
ANNEALING_EPISODE_COUNT = 100000

# Entropy regualrization hyper-parameter
BETA_START = 0.01
Expand All @@ -129,20 +142,20 @@ class Config:
# Epsilon (regularize policy lag in GA3C)
LOG_EPSILON = 1e-6
# Training min batch size - increasing the batch size increases the stability of the algorithm, but make learning slower
TRAINING_MIN_BATCH_SIZE = 32 #0
TRAINING_MIN_BATCH_SIZE = 128 #0

#########################################################################
# Log and save

# Enable TensorBoard
TENSORBOARD = False
TENSORBOARD = True
# Update TensorBoard every X training steps
TENSORBOARD_UPDATE_FREQUENCY = 1000

# Enable to save models every SAVE_FREQUENCY episodes
SAVE_MODELS = True
# Save every SAVE_FREQUENCY episodes
SAVE_FREQUENCY = 1000
SAVE_FREQUENCY = 200

# Print stats every PRINT_STATS_FREQUENCY episodes
PRINT_STATS_FREQUENCY = 1
Expand All @@ -152,7 +165,7 @@ class Config:
# Results filename
RESULTS_FILENAME = 'results.txt'
# Network checkpoint name
NETWORK_NAME = 'network'
NETWORK_NAME = 'network60x60'

#########################################################################
# More experimental parameters here
Expand Down
10 changes: 7 additions & 3 deletions NeuronalNetwork/ga3c/Environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from .GameManager import GameManager
from action_mapper import ACTION_SIZE

from .Grid_map import GridMap

class Environment:
def __init__(self, id=-1):
Expand All @@ -52,10 +53,13 @@ def __init__(self, id=-1):
def _get_current_state(self):
if not self.frame_q.full():
return None # frame queue is not full yet.
x_ = np.array(self.frame_q.queue)
_maps = np.array([i[0] for i in self.frame_q.queue])
_rotations = np.array([i[1] for i in self.frame_q.queue])
_maps = np.reshape(_maps,(60,60,2))
_rotations = np.reshape(_rotations,(16,1))
#x_ = np.transpose(x_, [1, 2, 0]) # move channels
x_ = np.transpose(x_, [1, 0]) # move channels
return x_
#x_ = np.transpose(x_, [1, 0]) # move channels
return np.array([_maps, _rotations])

def _update_frame_q(self, frame):
if self.frame_q.full():
Expand Down
25 changes: 22 additions & 3 deletions NeuronalNetwork/ga3c/GameManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

from action_mapper import map_action
from environment.environment import Environment
import numpy as np
from .Grid_map import GridMap

from .Config import Config

Expand Down Expand Up @@ -55,22 +57,39 @@ def __init__(self, id):

def reset(self):
observation, _, _, _ = self.env.reset()
return observation
input_laser,rotation = self.process_observation(observation)
map = GridMap(input_laser)
obs = np.array([[map.States_map,map.Reward_map],[rotation]])
return obs


def step(self, action):
self._update_display()
if action is None:
observation, reward, done, info = self.env.step(0, 0, 20)

input_laser, rotation = self.process_observation(observation)
map = GridMap(input_laser)
obs = np.array([[map.States_map, map.Reward_map], [rotation]])
reward = 0
done = False
else:

linear, angular = map_action(action)
observation, reward, done, info = self.env.step(linear, angular,20)
return observation, reward, done, info
input_laser, rotation = self.process_observation(observation)
map = GridMap(input_laser)
obs = np.array([[map.States_map, map.Reward_map], [rotation]])

return obs, reward, done, info

def _update_display(self):
if self.visualize:
self.env.visualize()

def observation_size(self):
return self.env.observation_size()
return self.env.observation_size()
def process_observation(self,observation):
laser_scan= np.array(observation[:Config.OBSERVATION_SIZE])
oriontaion=np.array(observation[Config.OBSERVATION_SIZE:])
return laser_scan, oriontaion
72 changes: 72 additions & 0 deletions NeuronalNetwork/ga3c/Grid_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import numpy as np


class GridMap:

def __init__(self, input_laser):
self.state = self.normalise(input_laser)
self.height = self.state.size
self.witdh=self.state.size
self.States_map= self.S_map()
self.Reward_map= self.R_map()


def S_map(self):
state = np.reshape(self.state, (self.witdh))
States_map = np.zeros((1,self.height))
for s_ in state:
if int(((s_*200)* self.height)/100)<self.height:
zeros = int(((s_ * 200) * self.height) / 100)
else:
zeros = int(((s_ * 100) * self.height) / 100)
if (self.height - zeros) > 0 :
ones = int(self.height - zeros)
else:
ones = 0
col = np.zeros((zeros), dtype=float)
col = np.append(col, np.ones((ones), dtype=float))
col = np.reshape(col,(1,self.height))
States_map=np.append(States_map,col,axis=0)
States_map= np.rot90(np.reshape(np.delete(States_map, 0, 0), (self.witdh, self.height)))
return States_map
def R_map(self):
Reward_map=np.zeros((1,self.witdh))
States_map= np.rot90(self.States_map,1)

#States_map=np.transpose(self.States_map)
for s in States_map:
ones= np.count_nonzero(s)-1
zeros = 0
if (ones) <0 :
ones = 0
zeros-=1

zeros += np.count_nonzero(s-1)
collisions = np.ones((ones),dtype=float) * (-1)
free = np.ones((zeros),dtype=float) * (-0.04)

if zeros==self.height:
ziel=zeros*1
elif zeros< self.height and zeros >= ((self.height)/2):
ziel = zeros * 0.5
else:
ziel= ones * (-1)
R_col = np.array(np.append(np.append(collisions,ziel ),free))
R_col = np.reshape(R_col,(1,self.witdh))
Reward_map=np.append(Reward_map,R_col,axis=0)
Reward_map = np.delete(Reward_map,0,0)
Reward_map = np.rot90(Reward_map,-1)

return Reward_map
def normalise(self,s):
s = np.array ([s])
shape = s.shape
max_min = np.amax(s) - np.amin(s)
min = np.amin(s)
if not max_min==0:
s = (s - min) / max_min
#s = (s + 1) / 2
else:
s = (s - min)
#s = (s + 1) / 2
return np.reshape(s, shape)
Loading