diff --git a/DumbGame/DumbGame0.py b/DumbGame/DumbGame0.py new file mode 100644 index 0000000..828668d --- /dev/null +++ b/DumbGame/DumbGame0.py @@ -0,0 +1,93 @@ +import random + +class Game: + + def __init__(self): + print("New Game. Guess the correct order of numbers 0-5 with a partner. 7 strikes and you're out.") + self.over = False + self.n_strikes = 8 + self.numbers = list(range(0,5)) + random.shuffle(self.numbers) + self.acting_player_index = 0 + self.score = 0 + self.state = self.GetState() + + def GetActingPlayerIndex(self): + return self.acting_player_index + + def GetNextPlayerIndex(self): + return (self.acting_player_index + 1) % 2 + + def GetState(self, player_idx=None): + s = [] + s.append(self.n_strikes) + s.append(self.acting_player_index) + s.append(self.score) + # conceal solution from player + if player_idx is not None: + self.state.append("") + else: + s.append(self.numbers) + return s + + def NumberIsPlayable(self, guessed_number): + return self.numbers[self.score] == guessed_number + + def CheckGameOver(self): + if self.n_strikes == 0: + return True + if self.score == len(self.numbers): + return True + return False + + def NextTurn(self): + start_state = self.GetState() + print(f"Player {self.acting_player_index + 1}'s turn to act.") + print("Here's what they know about the game:") + print(self.GetState(self.acting_player_index)) + self.Action() + #self.GetActingPlayer().Act() + self.over = self.CheckGameOver() + self.acting_player_index = self.GetNextPlayerIndex() + end_state = self.GetState() + assert start_state != end_state + return end_state + + def Action(self): + while True: + try: + guessed_number = int(input("Guess a number 0-9> ")) + assert guessed_number in range(0,9) + break + except ValueError: # not an int + continue + except AssertionError: # not 0-9 + continue + if self.NumberIsPlayable(guessed_number): + print("Correct!") + self.score += 1 + else: + self.n_strikes -= 1 + print(f"Wrong. {self.n_strikes} strikes remaining.") + + + +if __name__ == "__main__": + + game = Game() + print(game.GetState()) + + while not game.over: + try: + new_state = game.NextTurn() + print(game.GetState()) + except AssertionError: + print("Error: game state did not change when a turn was taken.") + sys.exit(1) + + print("Game finished.") + + if game.score == len(game.numbers): + print("Fireworks! You Win!") + else: + print("Too bad, you lose with a score of", game.GetScore()) diff --git a/DumbGame/DumbGame1.py b/DumbGame/DumbGame1.py new file mode 100644 index 0000000..f6d8e27 --- /dev/null +++ b/DumbGame/DumbGame1.py @@ -0,0 +1,62 @@ +################################################################################ +# Dumb game to be played by machines +# Guess the correct order of the numbers 1-5 which are shuffled. +# Keep guessing until you get the whole sequence. +# Penalized -1 for every wrong guess. +################################################################################ + +from gym import Env +from gym.spaces import Discrete, Box +import random +import numpy as np + + +class DumbGameEnv(Env): + def __init__(self): + self.n_numbers = 5 + self.answer = list(range(self.n_numbers)) + random.shuffle(self.answer) + self.state = 0 + self.action_space = Discrete(5) + self.observation_space = Box(low=np.array([0],dtype=np.float32), high=np.array([2],dtype=np.float32)) + + def step(self, action): + reward = 0 + if action == self.answer[self.state]: + self.state += 1 + reward = 1 + else: + reward = -1 + + done = self.state == self.n_numbers or self.state < -50 + + info = {} + + # Return step information + return self.state, reward, done, info + + def render(self): + pass + + def reset(self): + random.shuffle(self.answer) + self.state = 0 + self.n_guesses = 0 + return self.state + +if __name__ == "__main__": + env = DumbGameEnv() + #print(env.observation_space.sample()) # 0-1 + #print(env.action_space.sample()) # 0-4 + episodes = 10 + for episode in range(1, episodes+1): + state = env.reset() + done = False + score = 0 + n_guesses = 0 + while not done: + n_guesses += 1 + action = env.action_space.sample() + n_state, reward, done, info = env.step(action) + score+=reward + print(f'Episode:{episode} Score:{score} NGuesses:{n_guesses}') diff --git a/RL_test_0/OpenAICustonEnvironmentReinforcementLearning.py b/RL_test_0/OpenAICustonEnvironmentReinforcementLearning.py new file mode 100644 index 0000000..2b7f653 --- /dev/null +++ b/RL_test_0/OpenAICustonEnvironmentReinforcementLearning.py @@ -0,0 +1,143 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:light +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.14.1 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# # 0. Install Dependencies + +# # 1. Test Random Environment with OpenAI Gym + +from gym import Env +from gym.spaces import Discrete, Box +import numpy as np +import random + + +class ShowerEnv(Env): + def __init__(self): + # Actions we can take, down, stay, up + self.action_space = Discrete(3) + # Temperature array + self.observation_space = Box(low=np.array([0]), high=np.array([100])) + # Set start temp + self.state = 38 + random.randint(-3,3) + # Set shower length + self.shower_length = 60 + + def step(self, action): + # Apply action + # 0 -1 = -1 temperature + # 1 -1 = 0 + # 2 -1 = 1 temperature + self.state += action -1 + # Reduce shower length by 1 second + self.shower_length -= 1 + + # Calculate reward + if self.state >=37 and self.state <=39: + reward =1 + else: + reward = -1 + + # Check if shower is done + if self.shower_length <= 0: + done = True + else: + done = False + + # Apply temperature noise + #self.state += random.randint(-1,1) + # Set placeholder for info + info = {} + + # Return step information + return self.state, reward, done, info + + def render(self): + # Implement viz + pass + + def reset(self): + # Reset shower temperature + self.state = 38 + random.randint(-3,3) + # Reset shower time + self.shower_length = 60 + return self.state + + + +env = ShowerEnv() + +env.observation_space.sample() + +episodes = 10 +for episode in range(1, episodes+1): + state = env.reset() + done = False + score = 0 + + while not done: + #env.render() + action = env.action_space.sample() + n_state, reward, done, info = env.step(action) + score+=reward + print('Episode:{} Score:{}'.format(episode, score)) + +# # 2. Create a Deep Learning Model with Keras + +import numpy as np +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense, Flatten +from tensorflow.keras.optimizers import Adam + +states = env.observation_space.shape +actions = env.action_space.n + +actions + + +def build_model(states, actions): + model = Sequential() + model.add(Dense(24, activation='relu', input_shape=states)) + model.add(Dense(24, activation='relu')) + model.add(Dense(actions, activation='linear')) + return model + + +del model + +model = build_model(states, actions) + +model.summary() + +# # 3. Build Agent with Keras-RL + +from rl.agents import DQNAgent +from rl.policy import BoltzmannQPolicy +from rl.memory import SequentialMemory + + +def build_agent(model, actions): + policy = BoltzmannQPolicy() + memory = SequentialMemory(limit=50000, window_length=1) + dqn = DQNAgent(model=model, memory=memory, policy=policy, + nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2) + return dqn + + +dqn = build_agent(model, actions) +dqn.compile(Adam(lr=1e-3), metrics=['mae']) +dqn.fit(env, nb_steps=50000, visualize=False, verbose=1) + +scores = dqn.test(env, nb_episodes=100, visualize=False) +print(np.mean(scores.history['episode_reward'])) diff --git a/RL_test_1/DumbGame.py b/RL_test_1/DumbGame.py new file mode 100644 index 0000000..f6d8e27 --- /dev/null +++ b/RL_test_1/DumbGame.py @@ -0,0 +1,62 @@ +################################################################################ +# Dumb game to be played by machines +# Guess the correct order of the numbers 1-5 which are shuffled. +# Keep guessing until you get the whole sequence. +# Penalized -1 for every wrong guess. +################################################################################ + +from gym import Env +from gym.spaces import Discrete, Box +import random +import numpy as np + + +class DumbGameEnv(Env): + def __init__(self): + self.n_numbers = 5 + self.answer = list(range(self.n_numbers)) + random.shuffle(self.answer) + self.state = 0 + self.action_space = Discrete(5) + self.observation_space = Box(low=np.array([0],dtype=np.float32), high=np.array([2],dtype=np.float32)) + + def step(self, action): + reward = 0 + if action == self.answer[self.state]: + self.state += 1 + reward = 1 + else: + reward = -1 + + done = self.state == self.n_numbers or self.state < -50 + + info = {} + + # Return step information + return self.state, reward, done, info + + def render(self): + pass + + def reset(self): + random.shuffle(self.answer) + self.state = 0 + self.n_guesses = 0 + return self.state + +if __name__ == "__main__": + env = DumbGameEnv() + #print(env.observation_space.sample()) # 0-1 + #print(env.action_space.sample()) # 0-4 + episodes = 10 + for episode in range(1, episodes+1): + state = env.reset() + done = False + score = 0 + n_guesses = 0 + while not done: + n_guesses += 1 + action = env.action_space.sample() + n_state, reward, done, info = env.step(action) + score+=reward + print(f'Episode:{episode} Score:{score} NGuesses:{n_guesses}') diff --git a/RL_test_1/OpenAICustomEnvironmentReinforcementLearning.py b/RL_test_1/OpenAICustomEnvironmentReinforcementLearning.py new file mode 100644 index 0000000..d87e19e --- /dev/null +++ b/RL_test_1/OpenAICustomEnvironmentReinforcementLearning.py @@ -0,0 +1,169 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:light +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.14.1 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# # 0. Install Dependencies + +# # 1. Test Random Environment with OpenAI Gym + +from gym import Env +from gym.spaces import Discrete, Box +import numpy as np +import random + + +class ShowerEnv(Env): + def __init__(self): + # Actions we can take, down, stay, up + self.action_space = Discrete(5) + # Temperature array + self.observation_space = Discrete(100) + # Set start temp + self.state = 38 + random.randint(-3,3) + # Set shower length + self.shower_length = 60 + + self.answer = list(range(3)) + random.shuffle(self.answer) + + def step(self, action): + # Apply action + # 0 -1 = -1 temperature + # 1 -1 = 0 + # 2 -1 = 1 temperature + self.state += action -2 + # Reduce shower length by 1 second + self.shower_length -= 1 + + # Calculate reward + if self.state >=37 and self.state <=39: + reward =1 + else: + reward = -1 + + # Check if shower is done + if self.shower_length <= 0: + done = True + else: + done = False + + # Apply temperature noise + #self.state += random.randint(-1,1) + # Set placeholder for info + info = {} + + # Return step information + return self.state, reward, done, info + + def render(self): + # Implement viz + pass + + def reset(self): + # Reset shower temperature + self.state = 38 + random.randint(-3,3) + # Reset shower time + self.shower_length = 60 + random.shuffle(self.answer) + return self.state + + +env = ShowerEnv() + +env.observation_space.sample() + +assert not env.observation_space.contains(1.5) + +episodes = 10 +for episode in range(1, episodes+1): + state = env.reset() + done = False + score = 0 + + while not done: + #env.render() + action = env.action_space.sample() + n_state, reward, done, info = env.step(action) + score+=reward + print('Episode:{} Score:{}'.format(episode, score)) + +# # 2. Create a Deep Learning Model with Keras + +import numpy as np +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense, Flatten +from tensorflow.keras.optimizers import Adam + +#states = env.observation_space.shape +states = (1,) +actions = env.action_space.n + +actions + +states + + +def build_model(states, actions): + model = Sequential() + model.add(Dense(24, activation='relu', input_shape=states)) + model.add(Dense(24, activation='relu')) + model.add(Dense(actions, activation='linear')) + return model + + +del model + +model = build_model(states, actions) + +model.summary() + +# # 3. Build Agent with Keras-RL + +from rl.agents import DQNAgent +from rl.policy import BoltzmannQPolicy +from rl.memory import SequentialMemory + + +def build_agent(model, actions): + policy = BoltzmannQPolicy() + memory = SequentialMemory(limit=50000, window_length=1) + dqn = DQNAgent(model=model, memory=memory, policy=policy, + nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2) + return dqn + + +dqn = build_agent(model, actions) +dqn.compile(Adam(lr=1e-3), metrics=['mae']) +dqn.fit(env, nb_steps=50000, visualize=False, verbose=1) + +scores = dqn.test(env, nb_episodes=100, visualize=False) +print(np.mean(scores.history['episode_reward'])) + +dqn.save_weights('dqn_weights_discrete.h5f', overwrite=True) + +del model +del dqn +del env + +env = ShowerEnv() +actions = env.action_space.n +states = (1,) +model = build_model(states, actions) +dqn = build_agent(model, actions) +dqn.compile(Adam(lr=1e-3), metrics=['mae']) + +dqn.load_weights('dqn_weights_discrete.h5f') + +scores = dqn.test(env, nb_episodes=100, visualize=False) +print(np.mean(scores.history['episode_reward'])) diff --git a/RL_test_1/dqn_weights_box.h5f.data-00000-of-00001 b/RL_test_1/dqn_weights_box.h5f.data-00000-of-00001 new file mode 100644 index 0000000..1a86847 Binary files /dev/null and b/RL_test_1/dqn_weights_box.h5f.data-00000-of-00001 differ diff --git a/RL_test_1/dqn_weights_box.h5f.index b/RL_test_1/dqn_weights_box.h5f.index new file mode 100644 index 0000000..518e851 Binary files /dev/null and b/RL_test_1/dqn_weights_box.h5f.index differ diff --git a/RL_test_1/dqn_weights_discrete.h5f.data-00000-of-00001 b/RL_test_1/dqn_weights_discrete.h5f.data-00000-of-00001 new file mode 100644 index 0000000..4687923 Binary files /dev/null and b/RL_test_1/dqn_weights_discrete.h5f.data-00000-of-00001 differ diff --git a/RL_test_1/dqn_weights_discrete.h5f.index b/RL_test_1/dqn_weights_discrete.h5f.index new file mode 100644 index 0000000..3c646ef Binary files /dev/null and b/RL_test_1/dqn_weights_discrete.h5f.index differ diff --git a/RL_test_2/DeepReinforcementLearning.py b/RL_test_2/DeepReinforcementLearning.py new file mode 100644 index 0000000..358559f --- /dev/null +++ b/RL_test_2/DeepReinforcementLearning.py @@ -0,0 +1,115 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:light +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.14.1 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# # 0. Install Dependencies + +# !pip install tensorflow==2.3.0 +# !pip install gym +# !pip install keras +# !pip install keras-rl2 + +# # 1. Test Random Environment with OpenAI Gym + +import gym +import random + +env = gym.make('CartPole-v0') +states = env.observation_space.shape[0] +actions = env.action_space.n + +actions + +episodes = 10 +for episode in range(1, episodes+1): + state = env.reset() + done = False + score = 0 + + while not done: + env.render() + action = random.choice([0,1]) + n_state, reward, done, info = env.step(action) + score+=reward + print('Episode:{} Score:{}'.format(episode, score)) + +# # 2. Create a Deep Learning Model with Keras + +import numpy as np +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense, Flatten +from tensorflow.keras.optimizers import Adam + + +def build_model(states, actions): + model = Sequential() + model.add(Flatten(input_shape=(1,states))) + model.add(Dense(24, activation='relu')) + model.add(Dense(24, activation='relu')) + model.add(Dense(actions, activation='linear')) + return model + + +# + +del model + +model = build_model(states, actions) +# - + +model.summary() + +# # 3. Build Agent with Keras-RL + +from rl.agents import DQNAgent +from rl.policy import BoltzmannQPolicy +from rl.memory import SequentialMemory + + +def build_agent(model, actions): + policy = BoltzmannQPolicy() + memory = SequentialMemory(limit=50000, window_length=1) + dqn = DQNAgent(model=model, memory=memory, policy=policy, + nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2) + return dqn + + +dqn = build_agent(model, actions) +dqn.compile(Adam(lr=1e-3), metrics=['mae']) +dqn.fit(env, nb_steps=10000, visualize=False, verbose=1) + +scores = dqn.test(env, nb_episodes=5, visualize=False) +print(np.mean(scores.history['episode_reward'])) + +_ = dqn.test(env, nb_episodes=15, visualize=True) + +# # 4. Reloading Agent from Memory + +dqn.save_weights('dqn_weights.h5f', overwrite=True) + +del model +del dqn +del env + +env = gym.make('CartPole-v0') +actions = env.action_space.n +states = env.observation_space.shape[0] +model = build_model(states, actions) +dqn = build_agent(model, actions) +dqn.compile(Adam(lr=1e-3), metrics=['mae']) + +dqn.load_weights('dqn_weights.h5f') + +_ = dqn.test(env, nb_episodes=5, visualize=True) + + diff --git a/RL_test_2/dqn_weights.h5f.data-00000-of-00001 b/RL_test_2/dqn_weights.h5f.data-00000-of-00001 new file mode 100644 index 0000000..ed0d663 Binary files /dev/null and b/RL_test_2/dqn_weights.h5f.data-00000-of-00001 differ diff --git a/RL_test_2/dqn_weights.h5f.index b/RL_test_2/dqn_weights.h5f.index new file mode 100644 index 0000000..68e1419 Binary files /dev/null and b/RL_test_2/dqn_weights.h5f.index differ diff --git a/RL_test_3/Untitled.py b/RL_test_3/Untitled.py new file mode 100644 index 0000000..a861436 --- /dev/null +++ b/RL_test_3/Untitled.py @@ -0,0 +1,10 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:light +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.14.1 +# --- diff --git a/RL_test_4/hanabi_ml_2.py b/RL_test_4/hanabi_ml_2.py new file mode 100644 index 0000000..9003bc8 --- /dev/null +++ b/RL_test_4/hanabi_ml_2.py @@ -0,0 +1,68 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:light +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.14.1 +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# --- + +from DumbGame import DumbGameEnv +import numpy as np +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense, Flatten +from tensorflow.keras.optimizers import Adam + +env = DumbGameEnv() +states = env.observation_space.shape +actions = env.action_space.n +print(f"States:{states} Actions:{actions}") + + +def build_model(states, actions): + model = Sequential() + model.add(Dense(24, activation='relu', input_shape=states)) + model.add(Dense(24, activation='relu')) + model.add(Dense(actions, activation='linear')) + return model + + +# + +from rl.agents import DQNAgent +from rl.policy import BoltzmannQPolicy +from rl.memory import SequentialMemory + +def build_agent(model, actions): + policy = BoltzmannQPolicy() + memory = SequentialMemory(limit=20000, window_length=1) + dqn = DQNAgent(model=model, memory=memory, policy=policy, + nb_actions=actions, nb_steps_warmup=20, target_model_update=0.1) + return dqn + + +# - + +model = build_model(states, actions) +model.summary() + +dqn = build_agent(model, actions) +dqn.compile(Adam(lr=0.1))#, metrics=['mae']) +history = dqn.fit(env, nb_steps=5000, visualize=False, verbose=1) + +print(history.params) +print(history.history) + +scores = dqn.test(env, nb_episodes=1, visualize=False) + +# + +#print(np.mean(scores.history['episode_reward'])) +#dqn.get_config() +#scores = dqn.test(env, nb_episodes=1, visualize=False, verbose=1) +#test(self, env, nb_episodes=1, action_repetition=1, callbacks=None, visualize=True, nb_max_episode_steps=None, nb_max_start_steps=0, start_step_policy=None, verbose=1) +#print(np.mean(scores.history['episode_reward']))callbacks = callbacks[:] diff --git a/RL_test_5/DeepReinforcementLearning.py b/RL_test_5/DeepReinforcementLearning.py new file mode 100644 index 0000000..e7c06b8 --- /dev/null +++ b/RL_test_5/DeepReinforcementLearning.py @@ -0,0 +1,96 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:light +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.14.1 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# # 0. Install Dependencies + +# # 1. Test Random Environment with OpenAI Gym + +from DumbGame import DumbGameEnv +env = DumbGameEnv() +states = env.observation_space.shape +actions = env.action_space.n +print(f"States:{states} Actions:{actions}") + +# # 2. Create a Deep Learning Model with Keras + +import numpy as np +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense, Flatten +from tensorflow.keras.optimizers import Adam + +states = env.observation_space.shape +actions = env.action_space.n + +actions + + +def build_model(states, actions): + model = Sequential() + model.add(Dense(24, activation='relu', input_shape=states)) + model.add(Dense(24, activation='relu')) + model.add(Dense(actions, activation='linear')) + return model + + +del model + +model = build_model(states, actions) + +model.summary() + +# # 3. Build Agent with Keras-RL + +from rl.agents import DQNAgent +from rl.policy import BoltzmannQPolicy +from rl.memory import SequentialMemory + + +def build_agent(model, actions): + policy = BoltzmannQPolicy() + memory = SequentialMemory(limit=50000, window_length=1) + dqn = DQNAgent(model=model, memory=memory, policy=policy, + nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2) + return dqn + + +dqn = build_agent(model, actions) +dqn.compile(Adam(lr=1e-3), metrics=['mae']) +dqn.fit(env, nb_steps=50000, visualize=False, verbose=1) + +scores = dqn.test(env, nb_episodes=100, visualize=False) +print(np.mean(scores.history['episode_reward'])) + +_ = dqn.test(env, nb_episodes=15, visualize=True) + +# # 4. Reloading Agent from Memory + +dqn.save_weights('dqn_weights.h5f', overwrite=True) + +del model +del dqn +del env + +env = gym.make('CartPole-v0') +actions = env.action_space.n +states = env.observation_space.shape[0] +model = build_model(states, actions) +dqn = build_agent(model, actions) +dqn.compile(Adam(lr=1e-3), metrics=['mae']) + +dqn.load_weights('dqn_weights.h5f') + +_ = dqn.test(env, nb_episodes=5, visualize=True) + +