Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions DumbGame/DumbGame0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import random

class Game:

def __init__(self):
print("New Game. Guess the correct order of numbers 0-5 with a partner. 7 strikes and you're out.")
self.over = False
self.n_strikes = 8
self.numbers = list(range(0,5))
random.shuffle(self.numbers)
self.acting_player_index = 0
self.score = 0
self.state = self.GetState()

def GetActingPlayerIndex(self):
return self.acting_player_index

def GetNextPlayerIndex(self):
return (self.acting_player_index + 1) % 2

def GetState(self, player_idx=None):
s = []
s.append(self.n_strikes)
s.append(self.acting_player_index)
s.append(self.score)
# conceal solution from player
if player_idx is not None:
self.state.append("")
else:
s.append(self.numbers)
return s

def NumberIsPlayable(self, guessed_number):
return self.numbers[self.score] == guessed_number

def CheckGameOver(self):
if self.n_strikes == 0:
return True
if self.score == len(self.numbers):
return True
return False

def NextTurn(self):
start_state = self.GetState()
print(f"Player {self.acting_player_index + 1}'s turn to act.")
print("Here's what they know about the game:")
print(self.GetState(self.acting_player_index))
self.Action()
#self.GetActingPlayer().Act()
self.over = self.CheckGameOver()
self.acting_player_index = self.GetNextPlayerIndex()
end_state = self.GetState()
assert start_state != end_state
return end_state

def Action(self):
while True:
try:
guessed_number = int(input("Guess a number 0-9> "))
assert guessed_number in range(0,9)
break
except ValueError: # not an int
continue
except AssertionError: # not 0-9
continue
if self.NumberIsPlayable(guessed_number):
print("Correct!")
self.score += 1
else:
self.n_strikes -= 1
print(f"Wrong. {self.n_strikes} strikes remaining.")



if __name__ == "__main__":

game = Game()
print(game.GetState())

while not game.over:
try:
new_state = game.NextTurn()
print(game.GetState())
except AssertionError:
print("Error: game state did not change when a turn was taken.")
sys.exit(1)

print("Game finished.")

if game.score == len(game.numbers):
print("Fireworks! You Win!")
else:
print("Too bad, you lose with a score of", game.GetScore())
62 changes: 62 additions & 0 deletions DumbGame/DumbGame1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
################################################################################
# Dumb game to be played by machines
# Guess the correct order of the numbers 1-5 which are shuffled.
# Keep guessing until you get the whole sequence.
# Penalized -1 for every wrong guess.
################################################################################

from gym import Env
from gym.spaces import Discrete, Box
import random
import numpy as np


class DumbGameEnv(Env):
def __init__(self):
self.n_numbers = 5
self.answer = list(range(self.n_numbers))
random.shuffle(self.answer)
self.state = 0
self.action_space = Discrete(5)
self.observation_space = Box(low=np.array([0],dtype=np.float32), high=np.array([2],dtype=np.float32))

def step(self, action):
reward = 0
if action == self.answer[self.state]:
self.state += 1
reward = 1
else:
reward = -1

done = self.state == self.n_numbers or self.state < -50

info = {}

# Return step information
return self.state, reward, done, info

def render(self):
pass

def reset(self):
random.shuffle(self.answer)
self.state = 0
self.n_guesses = 0
return self.state

if __name__ == "__main__":
env = DumbGameEnv()
#print(env.observation_space.sample()) # 0-1
#print(env.action_space.sample()) # 0-4
episodes = 10
for episode in range(1, episodes+1):
state = env.reset()
done = False
score = 0
n_guesses = 0
while not done:
n_guesses += 1
action = env.action_space.sample()
n_state, reward, done, info = env.step(action)
score+=reward
print(f'Episode:{episode} Score:{score} NGuesses:{n_guesses}')
143 changes: 143 additions & 0 deletions RL_test_0/OpenAICustonEnvironmentReinforcementLearning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:light
# text_representation:
# extension: .py
# format_name: light
# format_version: '1.5'
# jupytext_version: 1.14.1
# kernelspec:
# display_name: Python 3 (ipykernel)
# language: python
# name: python3
# ---

# # 0. Install Dependencies

# # 1. Test Random Environment with OpenAI Gym

from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random


class ShowerEnv(Env):
def __init__(self):
# Actions we can take, down, stay, up
self.action_space = Discrete(3)
# Temperature array
self.observation_space = Box(low=np.array([0]), high=np.array([100]))
# Set start temp
self.state = 38 + random.randint(-3,3)
# Set shower length
self.shower_length = 60

def step(self, action):
# Apply action
# 0 -1 = -1 temperature
# 1 -1 = 0
# 2 -1 = 1 temperature
self.state += action -1
# Reduce shower length by 1 second
self.shower_length -= 1

# Calculate reward
if self.state >=37 and self.state <=39:
reward =1
else:
reward = -1

# Check if shower is done
if self.shower_length <= 0:
done = True
else:
done = False

# Apply temperature noise
#self.state += random.randint(-1,1)
# Set placeholder for info
info = {}

# Return step information
return self.state, reward, done, info

def render(self):
# Implement viz
pass

def reset(self):
# Reset shower temperature
self.state = 38 + random.randint(-3,3)
# Reset shower time
self.shower_length = 60
return self.state



env = ShowerEnv()

env.observation_space.sample()

episodes = 10
for episode in range(1, episodes+1):
state = env.reset()
done = False
score = 0

while not done:
#env.render()
action = env.action_space.sample()
n_state, reward, done, info = env.step(action)
score+=reward
print('Episode:{} Score:{}'.format(episode, score))

# # 2. Create a Deep Learning Model with Keras

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

states = env.observation_space.shape
actions = env.action_space.n

actions


def build_model(states, actions):
model = Sequential()
model.add(Dense(24, activation='relu', input_shape=states))
model.add(Dense(24, activation='relu'))
model.add(Dense(actions, activation='linear'))
return model


del model

model = build_model(states, actions)

model.summary()

# # 3. Build Agent with Keras-RL

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory


def build_agent(model, actions):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy,
nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
return dqn


dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))
62 changes: 62 additions & 0 deletions RL_test_1/DumbGame.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
################################################################################
# Dumb game to be played by machines
# Guess the correct order of the numbers 1-5 which are shuffled.
# Keep guessing until you get the whole sequence.
# Penalized -1 for every wrong guess.
################################################################################

from gym import Env
from gym.spaces import Discrete, Box
import random
import numpy as np


class DumbGameEnv(Env):
def __init__(self):
self.n_numbers = 5
self.answer = list(range(self.n_numbers))
random.shuffle(self.answer)
self.state = 0
self.action_space = Discrete(5)
self.observation_space = Box(low=np.array([0],dtype=np.float32), high=np.array([2],dtype=np.float32))

def step(self, action):
reward = 0
if action == self.answer[self.state]:
self.state += 1
reward = 1
else:
reward = -1

done = self.state == self.n_numbers or self.state < -50

info = {}

# Return step information
return self.state, reward, done, info

def render(self):
pass

def reset(self):
random.shuffle(self.answer)
self.state = 0
self.n_guesses = 0
return self.state

if __name__ == "__main__":
env = DumbGameEnv()
#print(env.observation_space.sample()) # 0-1
#print(env.action_space.sample()) # 0-4
episodes = 10
for episode in range(1, episodes+1):
state = env.reset()
done = False
score = 0
n_guesses = 0
while not done:
n_guesses += 1
action = env.action_space.sample()
n_state, reward, done, info = env.step(action)
score+=reward
print(f'Episode:{episode} Score:{score} NGuesses:{n_guesses}')
Loading