diff --git a/project-boilerplates/reinforcement-learning/.gitignore b/project-boilerplates/reinforcement-learning/.gitignore index 707e04f..5f0f6f3 100644 --- a/project-boilerplates/reinforcement-learning/.gitignore +++ b/project-boilerplates/reinforcement-learning/.gitignore @@ -9,3 +9,4 @@ __pycache__/ .env .direnv/ .vscode/ +.python-version diff --git a/project-boilerplates/reinforcement-learning/README.md b/project-boilerplates/reinforcement-learning/README.md index 2b9b25c..5c7c3c7 100644 --- a/project-boilerplates/reinforcement-learning/README.md +++ b/project-boilerplates/reinforcement-learning/README.md @@ -1,6 +1,6 @@ This is a **boilerplate** repo for a reinforcement learning (RL) project. -This directory provides an example repository structure for RL projects using pytorch. This template provides a generic agent using the [deep Q-learning](https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf) algorithm as well as an agent playing random actions for baseline performance. The DQN architecture is in itw own class and is hot-swappable with other potential architectures. A sample environment using [OpenAi's gym](https://github.com/openai/gym) and a generic control loop is also provided. +This directory provides an example repository structure for RL projects using Pytorch or Tensorflow. This template provides a generic agent using the [deep Q-learning](https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf) algorithm as well as an agent playing random actions for baseline performance. The DQN architecture is in its own class and is hot-swappable with other potential architectures. A sample environment using [OpenAi's gym](https://github.com/openai/gym) and a generic control loop is also provided. Note that since RL projects are rarely data-centric, and data has to be generated on-the-fly, requirements are likely to differ from standard ML projects. diff --git a/project-boilerplates/reinforcement-learning/requirements.txt b/project-boilerplates/reinforcement-learning/requirements.txt index 3080386..683d363 100644 --- a/project-boilerplates/reinforcement-learning/requirements.txt +++ b/project-boilerplates/reinforcement-learning/requirements.txt @@ -1,6 +1,8 @@ # Update as needed torch -#tensorflow +tensorflow-macos; sys_platform == 'darwin' and 'ARM' in platform_version +tensorflow; sys_platform == 'darwin' and 'ARM' not in platform_version +tensorflow; sys_platform != 'darwin' gymnasium gymnasium[box2d] tqdm diff --git a/project-boilerplates/reinforcement-learning/rl_boilerplate/agent.py b/project-boilerplates/reinforcement-learning/rl_boilerplate/agent.py index 733ac35..29b04a4 100644 --- a/project-boilerplates/reinforcement-learning/rl_boilerplate/agent.py +++ b/project-boilerplates/reinforcement-learning/rl_boilerplate/agent.py @@ -120,18 +120,17 @@ def set(self, obs_old, act, rwd, obs_new): # Compute the loss loss = tf.square(exp - out) - print(loss) + # Perform a backward propagation. grads = tape.gradient(loss, self.net.trainable_variables) self.opt.apply_gradients(zip(grads, self.net.trainable_variables)) def get(self, obs_new, act_space): """ - Run an epsilon-greedy policy for next actino selection. + Run an epsilon-greedy policy for next action selection. """ # Return random action with probability epsilon if random.uniform(0, 1) < CFG.epsilon: return act_space.sample() # Else, return action with highest value - with torch.no_grad(): - return tf.argmax(self.net(obs_new.reshape(1, -1)), axis=1).numpy()[0] + return tf.argmax(self.net(obs_new.reshape(1, -1)), axis=1).numpy()[0] diff --git a/project-boilerplates/reinforcement-learning/rl_boilerplate/config.py b/project-boilerplates/reinforcement-learning/rl_boilerplate/config.py index 4870629..b88a4cc 100644 --- a/project-boilerplates/reinforcement-learning/rl_boilerplate/config.py +++ b/project-boilerplates/reinforcement-learning/rl_boilerplate/config.py @@ -9,7 +9,7 @@ class Configuration: """ - This configuration class is extremely flexible due to a two-step init process. We only instantiate a single instance of it (at the bottom if this file) so that all modules can import this singleton at load time. The second initialization (which happens in main.py) allows the user to input custom parameters of the config class at execution time. + This configuration class is extremely flexible due to a two-step init process. We only instantiate a single instance of it (at the bottom if this file) so that all modules can import this singleton at load time. (As python always cache module imports, the import actually only happens once). Then, the second initialization happens in main.py and allows the user to input custom parameters of the config class at execution time - and change them as the please during execution. """ def __init__(self): @@ -18,7 +18,7 @@ def __init__(self): """ self.alpha = 0.2 self.gamma = 0.98 - self.epsilon = 1.0 + self.epsilon = None self.rnd_seed = None self.agt_type = None @@ -30,7 +30,7 @@ def init(self, agt_type, **kwargs): # Mandatory arguments go here. In our case it is useless. self.agt_type = agt_type - # We set default values for arguments we have to define + # We set default values for arguments if we want here self.rnd_seed = random.randint(0, 1000) self.epsilon = 0.05 diff --git a/project-boilerplates/reinforcement-learning/rl_boilerplate/environment.py b/project-boilerplates/reinforcement-learning/rl_boilerplate/environment.py index 7c5e489..1892a60 100644 --- a/project-boilerplates/reinforcement-learning/rl_boilerplate/environment.py +++ b/project-boilerplates/reinforcement-learning/rl_boilerplate/environment.py @@ -17,7 +17,7 @@ def get_env(): return gym.make("LunarLander-v2", render_mode="human") -def run_env(env, agt, run_number): +def run_env(env, agt): """ Run a given environment with a given agent. """ @@ -27,8 +27,7 @@ def run_env(env, agt, run_number): # We get the action space. act_space = env.action_space - print(f"Run number: {run_number + 1}") - for _ in range(1000): + for _ in tqdm(range(1000)): # We can visually render the learning environment. We disable it for performance. env.render() diff --git a/project-boilerplates/reinforcement-learning/rl_boilerplate/main.py b/project-boilerplates/reinforcement-learning/rl_boilerplate/main.py index 5319af6..a42f4f1 100644 --- a/project-boilerplates/reinforcement-learning/rl_boilerplate/main.py +++ b/project-boilerplates/reinforcement-learning/rl_boilerplate/main.py @@ -1,14 +1,23 @@ from rl_boilerplate import agent, environment - -from config import CFG +from rl_boilerplate.config import CFG +from tqdm import tqdm # We initialize our configuration class -CFG.init("", rnd_seed=22) +print("initialize config:") +CFG.init("", epsilon=0.8) +print(CFG.__dict__) -# We create an agent. State and action spaces are hardcoded here. -agt = agent.DQNAgent_tf(8, 4) +# We create an agent. +#agt = agent.DQNAgent_tf(8, 4) +agt = agent.DQNAgent_pt(8, 4) # Run a learning process -for i in range(1000): +for i in tqdm(range(1, 1000)): + print(f"\n 💫 Run number: {i}\n ") env = environment.get_env() - environment.run_env(env, agt, i) + environment.run_env(env, agt) + + if i % 10 == 0: + print("Reduce exploration rate:") + CFG.init("", epsilon=CFG.epsilon * 0.5) + print(CFG.__dict__)